pat_rep.c

Go to the documentation of this file.
00001 /*-
00002  * Copyright (c) 1992 Keith Muller.
00003  * Copyright (c) 1992, 1993
00004  *      The Regents of the University of California.  All rights reserved.
00005  *
00006  * This code is derived from software contributed to Berkeley by
00007  * Keith Muller of the University of California, San Diego.
00008  *
00009  * Redistribution and use in source and binary forms, with or without
00010  * modification, are permitted provided that the following conditions
00011  * are met:
00012  * 1. Redistributions of source code must retain the above copyright
00013  *    notice, this list of conditions and the following disclaimer.
00014  * 2. Redistributions in binary form must reproduce the above copyright
00015  *    notice, this list of conditions and the following disclaimer in the
00016  *    documentation and/or other materials provided with the distribution.
00017  * 4. Neither the name of the University nor the names of its contributors
00018  *    may be used to endorse or promote products derived from this software
00019  *    without specific prior written permission.
00020  *
00021  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
00022  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00023  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00024  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
00025  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00026  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00027  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00028  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00029  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00030  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00031  * SUCH DAMAGE.
00032  */
00033 
00034 #ifndef lint
00035 #if 0
00036 static char sccsid[] = "@(#)pat_rep.c   8.2 (Berkeley) 4/18/94";
00037 #endif
00038 #endif /* not lint */
00039 
00040 #include <sys/types.h>
00041 #include <sys/stat.h>
00042 #include <stdio.h>
00043 #include <string.h>
00044 #include <unistd.h>
00045 #include <stdlib.h>
00046 #include <errno.h>
00047 #ifdef NET2_REGEX
00048 #include <regexp.h>
00049 #else
00050 #include <regex.h>
00051 #endif
00052 #include "pax.h"
00053 #include "pat_rep.h"
00054 #include "extern.h"
00055 
00056 /*
00057  * routines to handle pattern matching, name modification (regular expression
00058  * substitution and interactive renames), and destination name modification for
00059  * copy (-rw). Both file name and link names are adjusted as required in these
00060  * routines.
00061  */
00062 
00063 #define MAXSUBEXP       10              /* max subexpressions, DO NOT CHANGE */
00064 static PATTERN *pathead = NULL;         /* file pattern match list head */
00065 static PATTERN *pattail = NULL;         /* file pattern match list tail */
00066 static REPLACE *rephead = NULL;         /* replacement string list head */
00067 static REPLACE *reptail = NULL;         /* replacement string list tail */
00068 
00069 static int rep_name(char *, int *, int);
00070 static int tty_rename(ARCHD *);
00071 static int fix_path(char *, int *, char *, int);
00072 static int fn_match(char *, char *, char **);
00073 static char * range_match(char *, int);
00074 #ifdef NET2_REGEX
00075 static int resub(regexp *, char *, char *, char *);
00076 #else
00077 static int resub(regex_t *, regmatch_t *, char *, char *, char *);
00078 #endif
00079 
00080 /*
00081  * rep_add()
00082  *      parses the -s replacement string; compiles the regular expression
00083  *      and stores the compiled value and it's replacement string together in
00084  *      replacement string list. Input to this function is of the form:
00085  *              /old/new/pg
00086  *      The first char in the string specifies the delimiter used by this
00087  *      replacement string. "Old" is a regular expression in "ed" format which
00088  *      is compiled by regcomp() and is applied to filenames. "new" is the
00089  *      substitution string; p and g are options flags for printing and global
00090  *      replacement (over the single filename)
00091  * Return:
00092  *      0 if a proper replacement string and regular expression was added to
00093  *      the list of replacement patterns; -1 otherwise.
00094  */
00095 
00096 int
00097 rep_add(char *str)
00098 {
00099         char *pt1;
00100         char *pt2;
00101         REPLACE *rep;
00102 #       ifndef NET2_REGEX
00103         int res;
00104         char rebuf[BUFSIZ];
00105 #       endif
00106 
00107         /*
00108          * throw out the bad parameters
00109          */
00110         if ((str == NULL) || (*str == '\0')) {
00111                 paxwarn(1, "Empty replacement string");
00112                 return(-1);
00113         }
00114 
00115         /*
00116          * first character in the string specifies what the delimiter is for
00117          * this expression
00118          */
00119         if ((pt1 = strchr(str+1, *str)) == NULL) {
00120                 paxwarn(1, "Invalid replacement string %s", str);
00121                 return(-1);
00122         }
00123 
00124         /*
00125          * allocate space for the node that handles this replacement pattern
00126          * and split out the regular expression and try to compile it
00127          */
00128         if ((rep = (REPLACE *)malloc(sizeof(REPLACE))) == NULL) {
00129                 paxwarn(1, "Unable to allocate memory for replacement string");
00130                 return(-1);
00131         }
00132 
00133         *pt1 = '\0';
00134 #       ifdef NET2_REGEX
00135         if ((rep->rcmp = regcomp(str+1)) == NULL) {
00136 #       else
00137         if ((res = regcomp(&(rep->rcmp), str+1, 0)) != 0) {
00138                 regerror(res, &(rep->rcmp), rebuf, sizeof(rebuf));
00139                 paxwarn(1, "%s while compiling regular expression %s", rebuf, str);
00140 #       endif
00141                 (void)free((char *)rep);
00142                 return(-1);
00143         }
00144 
00145         /*
00146          * put the delimiter back in case we need an error message and
00147          * locate the delimiter at the end of the replacement string
00148          * we then point the node at the new substitution string
00149          */
00150         *pt1++ = *str;
00151         if ((pt2 = strchr(pt1, *str)) == NULL) {
00152 #               ifdef NET2_REGEX
00153                 (void)free((char *)rep->rcmp);
00154 #               else
00155                 regfree(&(rep->rcmp));
00156 #               endif
00157                 (void)free((char *)rep);
00158                 paxwarn(1, "Invalid replacement string %s", str);
00159                 return(-1);
00160         }
00161 
00162         *pt2 = '\0';
00163         rep->nstr = pt1;
00164         pt1 = pt2++;
00165         rep->flgs = 0;
00166 
00167         /*
00168          * set the options if any
00169          */
00170         while (*pt2 != '\0') {
00171                 switch(*pt2) {
00172                 case 'g':
00173                 case 'G':
00174                         rep->flgs  |= GLOB;
00175                         break;
00176                 case 'p':
00177                 case 'P':
00178                         rep->flgs  |= PRNT;
00179                         break;
00180                 default:
00181 #                       ifdef NET2_REGEX
00182                         (void)free((char *)rep->rcmp);
00183 #                       else
00184                         regfree(&(rep->rcmp));
00185 #                       endif
00186                         (void)free((char *)rep);
00187                         *pt1 = *str;
00188                         paxwarn(1, "Invalid replacement string option %s", str);
00189                         return(-1);
00190                 }
00191                 ++pt2;
00192         }
00193 
00194         /*
00195          * all done, link it in at the end
00196          */
00197         rep->fow = NULL;
00198         if (rephead == NULL) {
00199                 reptail = rephead = rep;
00200                 return(0);
00201         }
00202         reptail->fow = rep;
00203         reptail = rep;
00204         return(0);
00205 }
00206 
00207 /*
00208  * pat_add()
00209  *      add a pattern match to the pattern match list. Pattern matches are used
00210  *      to select which archive members are extracted. (They appear as
00211  *      arguments to pax in the list and read modes). If no patterns are
00212  *      supplied to pax, all members in the archive will be selected (and the
00213  *      pattern match list is empty).
00214  * Return:
00215  *      0 if the pattern was added to the list, -1 otherwise
00216  */
00217 
00218 int
00219 pat_add(char *str, char *chdnam)
00220 {
00221         PATTERN *pt;
00222 
00223         /*
00224          * throw out the junk
00225          */
00226         if ((str == NULL) || (*str == '\0')) {
00227                 paxwarn(1, "Empty pattern string");
00228                 return(-1);
00229         }
00230 
00231         /*
00232          * allocate space for the pattern and store the pattern. the pattern is
00233          * part of argv so do not bother to copy it, just point at it. Add the
00234          * node to the end of the pattern list
00235          */
00236         if ((pt = (PATTERN *)malloc(sizeof(PATTERN))) == NULL) {
00237                 paxwarn(1, "Unable to allocate memory for pattern string");
00238                 return(-1);
00239         }
00240 
00241         pt->pstr = str;
00242         pt->pend = NULL;
00243         pt->plen = strlen(str);
00244         pt->fow = NULL;
00245         pt->flgs = 0;
00246         pt->chdname = chdnam;
00247 
00248         if (pathead == NULL) {
00249                 pattail = pathead = pt;
00250                 return(0);
00251         }
00252         pattail->fow = pt;
00253         pattail = pt;
00254         return(0);
00255 }
00256 
00257 /*
00258  * pat_chk()
00259  *      complain if any the user supplied pattern did not result in a match to
00260  *      a selected archive member.
00261  */
00262 
00263 void
00264 pat_chk(void)
00265 {
00266         PATTERN *pt;
00267         int wban = 0;
00268 
00269         /*
00270          * walk down the list checking the flags to make sure MTCH was set,
00271          * if not complain
00272          */
00273         for (pt = pathead; pt != NULL; pt = pt->fow) {
00274                 if (pt->flgs & MTCH)
00275                         continue;
00276                 if (!wban) {
00277                         paxwarn(1, "WARNING! These patterns were not matched:");
00278                         ++wban;
00279                 }
00280                 (void)fprintf(stderr, "%s\n", pt->pstr);
00281         }
00282 }
00283 
00284 /*
00285  * pat_sel()
00286  *      the archive member which matches a pattern was selected. Mark the
00287  *      pattern as having selected an archive member. arcn->pat points at the
00288  *      pattern that was matched. arcn->pat is set in pat_match()
00289  *
00290  *      NOTE: When the -c option is used, we are called when there was no match
00291  *      by pat_match() (that means we did match before the inverted sense of
00292  *      the logic). Now this seems really strange at first, but with -c  we
00293  *      need to keep track of those patterns that cause an archive member to NOT
00294  *      be selected (it found an archive member with a specified pattern)
00295  * Return:
00296  *      0 if the pattern pointed at by arcn->pat was tagged as creating a
00297  *      match, -1 otherwise.
00298  */
00299 
00300 int
00301 pat_sel(ARCHD *arcn)
00302 {
00303         PATTERN *pt;
00304         PATTERN **ppt;
00305         int len;
00306 
00307         /*
00308          * if no patterns just return
00309          */
00310         if ((pathead == NULL) || ((pt = arcn->pat) == NULL))
00311                 return(0);
00312 
00313         /*
00314          * when we are NOT limited to a single match per pattern mark the
00315          * pattern and return
00316          */
00317         if (!nflag) {
00318                 pt->flgs |= MTCH;
00319                 return(0);
00320         }
00321 
00322         /*
00323          * we reach this point only when we allow a single selected match per
00324          * pattern, if the pattern matches a directory and we do not have -d
00325          * (dflag) we are done with this pattern. We may also be handed a file
00326          * in the subtree of a directory. in that case when we are operating
00327          * with -d, this pattern was already selected and we are done
00328          */
00329         if (pt->flgs & DIR_MTCH)
00330                 return(0);
00331 
00332         if (!dflag && ((pt->pend != NULL) || (arcn->type == PAX_DIR))) {
00333                 /*
00334                  * ok we matched a directory and we are allowing
00335                  * subtree matches but because of the -n only its children will
00336                  * match. This is tagged as a DIR_MTCH type.
00337                  * WATCH IT, the code assumes that pt->pend points
00338                  * into arcn->name and arcn->name has not been modified.
00339                  * If not we will have a big mess. Yup this is another kludge
00340                  */
00341 
00342                 /*
00343                  * if this was a prefix match, remove trailing part of path
00344                  * so we can copy it. Future matches will be exact prefix match
00345                  */
00346                 if (pt->pend != NULL)
00347                         *pt->pend = '\0';
00348 
00349                 if ((pt->pstr = strdup(arcn->name)) == NULL) {
00350                         paxwarn(1, "Pattern select out of memory");
00351                         if (pt->pend != NULL)
00352                                 *pt->pend = '/';
00353                         pt->pend = NULL;
00354                         return(-1);
00355                 }
00356 
00357                 /*
00358                  * put the trailing / back in the source string
00359                  */
00360                 if (pt->pend != NULL) {
00361                         *pt->pend = '/';
00362                         pt->pend = NULL;
00363                 }
00364                 pt->plen = strlen(pt->pstr);
00365 
00366                 /*
00367                  * strip off any trailing /, this should really never happen
00368                  */
00369                 len = pt->plen - 1;
00370                 if (*(pt->pstr + len) == '/') {
00371                         *(pt->pstr + len) = '\0';
00372                         pt->plen = len;
00373                 }
00374                 pt->flgs = DIR_MTCH | MTCH;
00375                 arcn->pat = pt;
00376                 return(0);
00377         }
00378 
00379         /*
00380          * we are then done with this pattern, so we delete it from the list
00381          * because it can never be used for another match.
00382          * Seems kind of strange to do for a -c, but the pax spec is really
00383          * vague on the interaction of -c -n and -d. We assume that when -c
00384          * and the pattern rejects a member (i.e. it matched it) it is done.
00385          * In effect we place the order of the flags as having -c last.
00386          */
00387         pt = pathead;
00388         ppt = &pathead;
00389         while ((pt != NULL) && (pt != arcn->pat)) {
00390                 ppt = &(pt->fow);
00391                 pt = pt->fow;
00392         }
00393 
00394         if (pt == NULL) {
00395                 /*
00396                  * should never happen....
00397                  */
00398                 paxwarn(1, "Pattern list inconsistant");
00399                 return(-1);
00400         }
00401         *ppt = pt->fow;
00402         (void)free((char *)pt);
00403         arcn->pat = NULL;
00404         return(0);
00405 }
00406 
00407 /*
00408  * pat_match()
00409  *      see if this archive member matches any supplied pattern, if a match
00410  *      is found, arcn->pat is set to point at the potential pattern. Later if
00411  *      this archive member is "selected" we process and mark the pattern as
00412  *      one which matched a selected archive member (see pat_sel())
00413  * Return:
00414  *      0 if this archive member should be processed, 1 if it should be
00415  *      skipped and -1 if we are done with all patterns (and pax should quit
00416  *      looking for more members)
00417  */
00418 
00419 int
00420 pat_match(ARCHD *arcn)
00421 {
00422         PATTERN *pt;
00423 
00424         arcn->pat = NULL;
00425 
00426         /*
00427          * if there are no more patterns and we have -n (and not -c) we are
00428          * done. otherwise with no patterns to match, matches all
00429          */
00430         if (pathead == NULL) {
00431                 if (nflag && !cflag)
00432                         return(-1);
00433                 return(0);
00434         }
00435 
00436         /*
00437          * have to search down the list one at a time looking for a match.
00438          */
00439         pt = pathead;
00440         while (pt != NULL) {
00441                 /*
00442                  * check for a file name match unless we have DIR_MTCH set in
00443                  * this pattern then we want a prefix match
00444                  */
00445                 if (pt->flgs & DIR_MTCH) {
00446                         /*
00447                          * this pattern was matched before to a directory
00448                          * as we must have -n set for this (but not -d). We can
00449                          * only match CHILDREN of that directory so we must use
00450                          * an exact prefix match (no wildcards).
00451                          */
00452                         if ((arcn->name[pt->plen] == '/') &&
00453                             (strncmp(pt->pstr, arcn->name, pt->plen) == 0))
00454                                 break;
00455                 } else if (fn_match(pt->pstr, arcn->name, &pt->pend) == 0)
00456                         break;
00457                 pt = pt->fow;
00458         }
00459 
00460         /*
00461          * return the result, remember that cflag (-c) inverts the sense of a
00462          * match
00463          */
00464         if (pt == NULL)
00465                 return(cflag ? 0 : 1);
00466 
00467         /*
00468          * We had a match, now when we invert the sense (-c) we reject this
00469          * member. However we have to tag the pattern a being successful, (in a
00470          * match, not in selecting an archive member) so we call pat_sel() here.
00471          */
00472         arcn->pat = pt;
00473         if (!cflag)
00474                 return(0);
00475 
00476         if (pat_sel(arcn) < 0)
00477                 return(-1);
00478         arcn->pat = NULL;
00479         return(1);
00480 }
00481 
00482 /*
00483  * fn_match()
00484  * Return:
00485  *      0 if this archive member should be processed, 1 if it should be
00486  *      skipped and -1 if we are done with all patterns (and pax should quit
00487  *      looking for more members)
00488  *      Note: *pend may be changed to show where the prefix ends.
00489  */
00490 
00491 static int
00492 fn_match(char *pattern, char *string, char **pend)
00493 {
00494         char c;
00495         char test;
00496 
00497         *pend = NULL;
00498         for (;;) {
00499                 switch (c = *pattern++) {
00500                 case '\0':
00501                         /*
00502                          * Ok we found an exact match
00503                          */
00504                         if (*string == '\0')
00505                                 return(0);
00506 
00507                         /*
00508                          * Check if it is a prefix match
00509                          */
00510                         if ((dflag == 1) || (*string != '/'))
00511                                 return(-1);
00512 
00513                         /*
00514                          * It is a prefix match, remember where the trailing
00515                          * / is located
00516                          */
00517                         *pend = string;
00518                         return(0);
00519                 case '?':
00520                         if ((test = *string++) == '\0')
00521                                 return (-1);
00522                         break;
00523                 case '*':
00524                         c = *pattern;
00525                         /*
00526                          * Collapse multiple *'s.
00527                          */
00528                         while (c == '*')
00529                                 c = *++pattern;
00530 
00531                         /*
00532                          * Optimized hack for pattern with a * at the end
00533                          */
00534                         if (c == '\0')
00535                                 return (0);
00536 
00537                         /*
00538                          * General case, use recursion.
00539                          */
00540                         while ((test = *string) != '\0') {
00541                                 if (!fn_match(pattern, string, pend))
00542                                         return (0);
00543                                 ++string;
00544                         }
00545                         return (-1);
00546                 case '[':
00547                         /*
00548                          * range match
00549                          */
00550                         if (((test = *string++) == '\0') ||
00551                             ((pattern = range_match(pattern, test)) == NULL))
00552                                 return (-1);
00553                         break;
00554                 case '\\':
00555                 default:
00556                         if (c != *string++)
00557                                 return (-1);
00558                         break;
00559                 }
00560         }
00561         /* NOTREACHED */
00562 }
00563 
00564 static char *
00565 range_match(char *pattern, int test)
00566 {
00567         char c;
00568         char c2;
00569         int negate;
00570         int ok = 0;
00571 
00572         if ((negate = (*pattern == '!')) != 0)
00573                 ++pattern;
00574 
00575         while ((c = *pattern++) != ']') {
00576                 /*
00577                  * Illegal pattern
00578                  */
00579                 if (c == '\0')
00580                         return (NULL);
00581 
00582                 if ((*pattern == '-') && ((c2 = pattern[1]) != '\0') &&
00583                     (c2 != ']')) {
00584                         if ((c <= test) && (test <= c2))
00585                                 ok = 1;
00586                         pattern += 2;
00587                 } else if (c == test)
00588                         ok = 1;
00589         }
00590         return (ok == negate ? NULL : pattern);
00591 }
00592 
00593 /*
00594  * mod_name()
00595  *      modify a selected file name. first attempt to apply replacement string
00596  *      expressions, then apply interactive file rename. We apply replacement
00597  *      string expressions to both filenames and file links (if we didn't the
00598  *      links would point to the wrong place, and we could never be able to
00599  *      move an archive that has a file link in it). When we rename files
00600  *      interactively, we store that mapping (old name to user input name) so
00601  *      if we spot any file links to the old file name in the future, we will
00602  *      know exactly how to fix the file link.
00603  * Return:
00604  *      0 continue to  process file, 1 skip this file, -1 pax is finished
00605  */
00606 
00607 int
00608 mod_name(ARCHD *arcn)
00609 {
00610         int res = 0;
00611 
00612         /*
00613          * Strip off leading '/' if appropriate.
00614          * Currently, this option is only set for the tar format.
00615          */
00616         if (rmleadslash && arcn->name[0] == '/') {
00617                 if (arcn->name[1] == '\0') {
00618                         arcn->name[0] = '.';
00619                 } else {
00620                         (void)memmove(arcn->name, &arcn->name[1],
00621                             strlen(arcn->name));
00622                         arcn->nlen--;
00623                 }
00624                 if (rmleadslash < 2) {
00625                         rmleadslash = 2;
00626                         paxwarn(0, "Removing leading / from absolute path names in the archive");
00627                 }
00628         }
00629         if (rmleadslash && arcn->ln_name[0] == '/' &&
00630             (arcn->type == PAX_HLK || arcn->type == PAX_HRG)) {
00631                 if (arcn->ln_name[1] == '\0') {
00632                         arcn->ln_name[0] = '.';
00633                 } else {
00634                         (void)memmove(arcn->ln_name, &arcn->ln_name[1],
00635                             strlen(arcn->ln_name));
00636                         arcn->ln_nlen--;
00637                 }
00638                 if (rmleadslash < 2) {
00639                         rmleadslash = 2;
00640                         paxwarn(0, "Removing leading / from absolute path names in the archive");
00641                 }
00642         }
00643 
00644         /*
00645          * IMPORTANT: We have a problem. what do we do with symlinks?
00646          * Modifying a hard link name makes sense, as we know the file it
00647          * points at should have been seen already in the archive (and if it
00648          * wasn't seen because of a read error or a bad archive, we lose
00649          * anyway). But there are no such requirements for symlinks. On one
00650          * hand the symlink that refers to a file in the archive will have to
00651          * be modified to so it will still work at its new location in the
00652          * file system. On the other hand a symlink that points elsewhere (and
00653          * should continue to do so) should not be modified. There is clearly
00654          * no perfect solution here. So we handle them like hardlinks. Clearly
00655          * a replacement made by the interactive rename mapping is very likely
00656          * to be correct since it applies to a single file and is an exact
00657          * match. The regular expression replacements are a little harder to
00658          * justify though. We claim that the symlink name is only likely
00659          * to be replaced when it points within the file tree being moved and
00660          * in that case it should be modified. what we really need to do is to
00661          * call an oracle here. :)
00662          */
00663         if (rephead != NULL) {
00664                 /*
00665                  * we have replacement strings, modify the name and the link
00666                  * name if any.
00667                  */
00668                 if ((res = rep_name(arcn->name, &(arcn->nlen), 1)) != 0)
00669                         return(res);
00670 
00671                 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
00672                     (arcn->type == PAX_HRG)) &&
00673                     ((res = rep_name(arcn->ln_name, &(arcn->ln_nlen), 0)) != 0))
00674                         return(res);
00675         }
00676 
00677         if (iflag) {
00678                 /*
00679                  * perform interactive file rename, then map the link if any
00680                  */
00681                 if ((res = tty_rename(arcn)) != 0)
00682                         return(res);
00683                 if ((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
00684                     (arcn->type == PAX_HRG))
00685                         sub_name(arcn->ln_name, &(arcn->ln_nlen), sizeof(arcn->ln_name));
00686         }
00687         return(res);
00688 }
00689 
00690 /*
00691  * tty_rename()
00692  *      Prompt the user for a replacement file name. A "." keeps the old name,
00693  *      a empty line skips the file, and an EOF on reading the tty, will cause
00694  *      pax to stop processing and exit. Otherwise the file name input, replaces
00695  *      the old one.
00696  * Return:
00697  *      0 process this file, 1 skip this file, -1 we need to exit pax
00698  */
00699 
00700 static int
00701 tty_rename(ARCHD *arcn)
00702 {
00703         char tmpname[PAXPATHLEN+2];
00704         int res;
00705 
00706         /*
00707          * prompt user for the replacement name for a file, keep trying until
00708          * we get some reasonable input. Archives may have more than one file
00709          * on them with the same name (from updates etc). We print verbose info
00710          * on the file so the user knows what is up.
00711          */
00712         tty_prnt("\nATTENTION: %s interactive file rename operation.\n", argv0);
00713 
00714         for (;;) {
00715                 ls_tty(arcn);
00716                 tty_prnt("Input new name, or a \".\" to keep the old name, ");
00717                 tty_prnt("or a \"return\" to skip this file.\n");
00718                 tty_prnt("Input > ");
00719                 if (tty_read(tmpname, sizeof(tmpname)) < 0)
00720                         return(-1);
00721                 if (strcmp(tmpname, "..") == 0) {
00722                         tty_prnt("Try again, illegal file name: ..\n");
00723                         continue;
00724                 }
00725                 if (strlen(tmpname) > PAXPATHLEN) {
00726                         tty_prnt("Try again, file name too long\n");
00727                         continue;
00728                 }
00729                 break;
00730         }
00731 
00732         /*
00733          * empty file name, skips this file. a "." leaves it alone
00734          */
00735         if (tmpname[0] == '\0') {
00736                 tty_prnt("Skipping file.\n");
00737                 return(1);
00738         }
00739         if ((tmpname[0] == '.') && (tmpname[1] == '\0')) {
00740                 tty_prnt("Processing continues, name unchanged.\n");
00741                 return(0);
00742         }
00743 
00744         /*
00745          * ok the name changed. We may run into links that point at this
00746          * file later. we have to remember where the user sent the file
00747          * in order to repair any links.
00748          */
00749         tty_prnt("Processing continues, name changed to: %s\n", tmpname);
00750         res = add_name(arcn->name, arcn->nlen, tmpname);
00751         arcn->nlen = l_strncpy(arcn->name, tmpname, sizeof(arcn->name) - 1);
00752         arcn->name[arcn->nlen] = '\0';
00753         if (res < 0)
00754                 return(-1);
00755         return(0);
00756 }
00757 
00758 /*
00759  * set_dest()
00760  *      fix up the file name and the link name (if any) so this file will land
00761  *      in the destination directory (used during copy() -rw).
00762  * Return:
00763  *      0 if ok, -1 if failure (name too long)
00764  */
00765 
00766 int
00767 set_dest(ARCHD *arcn, char *dest_dir, int dir_len)
00768 {
00769         if (fix_path(arcn->name, &(arcn->nlen), dest_dir, dir_len) < 0)
00770                 return(-1);
00771 
00772         /*
00773          * It is really hard to deal with symlinks here, we cannot be sure
00774          * if the name they point was moved (or will be moved). It is best to
00775          * leave them alone.
00776          */
00777         if ((arcn->type != PAX_HLK) && (arcn->type != PAX_HRG))
00778                 return(0);
00779 
00780         if (fix_path(arcn->ln_name, &(arcn->ln_nlen), dest_dir, dir_len) < 0)
00781                 return(-1);
00782         return(0);
00783 }
00784 
00785 /*
00786  * fix_path
00787  *      concatenate dir_name and or_name and store the result in or_name (if
00788  *      it fits). This is one ugly function.
00789  * Return:
00790  *      0 if ok, -1 if the final name is too long
00791  */
00792 
00793 static int
00794 fix_path( char *or_name, int *or_len, char *dir_name, int dir_len)
00795 {
00796         char *src;
00797         char *dest;
00798         char *start;
00799         int len;
00800 
00801         /*
00802          * we shift the or_name to the right enough to tack in the dir_name
00803          * at the front. We make sure we have enough space for it all before
00804          * we start. since dest always ends in a slash, we skip of or_name
00805          * if it also starts with one.
00806          */
00807         start = or_name;
00808         src = start + *or_len;
00809         dest = src + dir_len;
00810         if (*start == '/') {
00811                 ++start;
00812                 --dest;
00813         }
00814         if ((len = dest - or_name) > PAXPATHLEN) {
00815                 paxwarn(1, "File name %s/%s, too long", dir_name, start);
00816                 return(-1);
00817         }
00818         *or_len = len;
00819 
00820         /*
00821          * enough space, shift
00822          */
00823         while (src >= start)
00824                 *dest-- = *src--;
00825         src = dir_name + dir_len - 1;
00826 
00827         /*
00828          * splice in the destination directory name
00829          */
00830         while (src >= dir_name)
00831                 *dest-- = *src--;
00832 
00833         *(or_name + len) = '\0';
00834         return(0);
00835 }
00836 
00837 /*
00838  * rep_name()
00839  *      walk down the list of replacement strings applying each one in order.
00840  *      when we find one with a successful substitution, we modify the name
00841  *      as specified. if required, we print the results. if the resulting name
00842  *      is empty, we will skip this archive member. We use the regexp(3)
00843  *      routines (regexp() ought to win a prize as having the most cryptic
00844  *      library function manual page).
00845  *      --Parameters--
00846  *      name is the file name we are going to apply the regular expressions to
00847  *      (and may be modified)
00848  *      nlen is the length of this name (and is modified to hold the length of
00849  *      the final string).
00850  *      prnt is a flag that says whether to print the final result.
00851  * Return:
00852  *      0 if substitution was successful, 1 if we are to skip the file (the name
00853  *      ended up empty)
00854  */
00855 
00856 static int
00857 rep_name(char *name, int *nlen, int prnt)
00858 {
00859         REPLACE *pt;
00860         char *inpt;
00861         char *outpt;
00862         char *endpt;
00863         char *rpt;
00864         int found = 0;
00865         int res;
00866 #       ifndef NET2_REGEX
00867         regmatch_t pm[MAXSUBEXP];
00868 #       endif
00869         char nname[PAXPATHLEN+1];       /* final result of all replacements */
00870         char buf1[PAXPATHLEN+1];        /* where we work on the name */
00871 
00872         /*
00873          * copy the name into buf1, where we will work on it. We need to keep
00874          * the orig string around so we can print out the result of the final
00875          * replacement. We build up the final result in nname. inpt points at
00876          * the string we apply the regular expression to. prnt is used to
00877          * suppress printing when we handle replacements on the link field
00878          * (the user already saw that substitution go by)
00879          */
00880         pt = rephead;
00881         (void)strcpy(buf1, name);
00882         inpt = buf1;
00883         outpt = nname;
00884         endpt = outpt + PAXPATHLEN;
00885 
00886         /*
00887          * try each replacement string in order
00888          */
00889         while (pt != NULL) {
00890                 do {
00891                         /*
00892                          * check for a successful substitution, if not go to
00893                          * the next pattern, or cleanup if we were global
00894                          */
00895 #                       ifdef NET2_REGEX
00896                         if (regexec(pt->rcmp, inpt) == 0)
00897 #                       else
00898                         if (regexec(&(pt->rcmp), inpt, MAXSUBEXP, pm, 0) != 0)
00899 #                       endif
00900                                 break;
00901 
00902                         /*
00903                          * ok we found one. We have three parts, the prefix
00904                          * which did not match, the section that did and the
00905                          * tail (that also did not match). Copy the prefix to
00906                          * the final output buffer (watching to make sure we
00907                          * do not create a string too long).
00908                          */
00909                         found = 1;
00910 #                       ifdef NET2_REGEX
00911                         rpt = pt->rcmp->startp[0];
00912 #                       else
00913                         rpt = inpt + pm[0].rm_so;
00914 #                       endif
00915 
00916                         while ((inpt < rpt) && (outpt < endpt))
00917                                 *outpt++ = *inpt++;
00918                         if (outpt == endpt)
00919                                 break;
00920 
00921                         /*
00922                          * for the second part (which matched the regular
00923                          * expression) apply the substitution using the
00924                          * replacement string and place it the prefix in the
00925                          * final output. If we have problems, skip it.
00926                          */
00927 #                       ifdef NET2_REGEX
00928                         if ((res = resub(pt->rcmp,pt->nstr,outpt,endpt)) < 0) {
00929 #                       else
00930                         if ((res = resub(&(pt->rcmp),pm,pt->nstr,outpt,endpt))
00931                             < 0) {
00932 #                       endif
00933                                 if (prnt)
00934                                         paxwarn(1, "Replacement name error %s",
00935                                             name);
00936                                 return(1);
00937                         }
00938                         outpt += res;
00939 
00940                         /*
00941                          * we set up to look again starting at the first
00942                          * character in the tail (of the input string right
00943                          * after the last character matched by the regular
00944                          * expression (inpt always points at the first char in
00945                          * the string to process). If we are not doing a global
00946                          * substitution, we will use inpt to copy the tail to
00947                          * the final result. Make sure we do not overrun the
00948                          * output buffer
00949                          */
00950 #                       ifdef NET2_REGEX
00951                         inpt = pt->rcmp->endp[0];
00952 #                       else
00953                         inpt += pm[0].rm_eo - pm[0].rm_so;
00954 #                       endif
00955 
00956                         if ((outpt == endpt) || (*inpt == '\0'))
00957                                 break;
00958 
00959                         /*
00960                          * if the user wants global we keep trying to
00961                          * substitute until it fails, then we are done.
00962                          */
00963                 } while (pt->flgs & GLOB);
00964 
00965                 if (found)
00966                         break;
00967 
00968                 /*
00969                  * a successful substitution did NOT occur, try the next one
00970                  */
00971                 pt = pt->fow;
00972         }
00973 
00974         if (found) {
00975                 /*
00976                  * we had a substitution, copy the last tail piece (if there is
00977                  * room) to the final result
00978                  */
00979                 while ((outpt < endpt) && (*inpt != '\0'))
00980                         *outpt++ = *inpt++;
00981 
00982                 *outpt = '\0';
00983                 if ((outpt == endpt) && (*inpt != '\0')) {
00984                         if (prnt)
00985                                 paxwarn(1,"Replacement name too long %s >> %s",
00986                                     name, nname);
00987                         return(1);
00988                 }
00989 
00990                 /*
00991                  * inform the user of the result if wanted
00992                  */
00993                 if (prnt && (pt->flgs & PRNT)) {
00994                         if (*nname == '\0')
00995                                 (void)fprintf(stderr,"%s >> <empty string>\n",
00996                                     name);
00997                         else
00998                                 (void)fprintf(stderr,"%s >> %s\n", name, nname);
00999                 }
01000 
01001                 /*
01002                  * if empty inform the caller this file is to be skipped
01003                  * otherwise copy the new name over the orig name and return
01004                  */
01005                 if (*nname == '\0')
01006                         return(1);
01007                 *nlen = l_strncpy(name, nname, PAXPATHLEN + 1);
01008                 name[PAXPATHLEN] = '\0';
01009         }
01010         return(0);
01011 }
01012 
01013 #ifdef NET2_REGEX
01014 /*
01015  * resub()
01016  *      apply the replacement to the matched expression. expand out the old
01017  *      style ed(1) subexpression expansion.
01018  * Return:
01019  *      -1 if error, or the number of characters added to the destination.
01020  */
01021 
01022 static int
01023 resub(regexp *prog, char *src, char *dest, char *destend)
01024 {
01025         char *spt;
01026         char *dpt;
01027         char c;
01028         int no;
01029         int len;
01030 
01031         spt = src;
01032         dpt = dest;
01033         while ((dpt < destend) && ((c = *spt++) != '\0')) {
01034                 if (c == '&')
01035                         no = 0;
01036                 else if ((c == '\\') && (*spt >= '0') && (*spt <= '9'))
01037                         no = *spt++ - '0';
01038                 else {
01039                         if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
01040                                 c = *spt++;
01041                         *dpt++ = c;
01042                         continue;
01043                 }
01044                 if ((prog->startp[no] == NULL) || (prog->endp[no] == NULL) ||
01045                     ((len = prog->endp[no] - prog->startp[no]) <= 0))
01046                         continue;
01047 
01048                 /*
01049                  * copy the subexpression to the destination.
01050                  * fail if we run out of space or the match string is damaged
01051                  */
01052                 if (len > (destend - dpt))
01053                         len = destend - dpt;
01054                 if (l_strncpy(dpt, prog->startp[no], len) != len)
01055                         return(-1);
01056                 dpt += len;
01057         }
01058         return(dpt - dest);
01059 }
01060 
01061 #else
01062 
01063 /*
01064  * resub()
01065  *      apply the replacement to the matched expression. expand out the old
01066  *      style ed(1) subexpression expansion.
01067  * Return:
01068  *      -1 if error, or the number of characters added to the destination.
01069  */
01070 
01071 static int
01072 resub(regex_t *rp, regmatch_t *pm, char *src, char *dest,
01073         char *destend)
01074 {
01075         char *spt;
01076         char *dpt;
01077         char c;
01078         regmatch_t *pmpt;
01079         int len;
01080         int subexcnt;
01081 
01082         spt =  src;
01083         dpt = dest;
01084         subexcnt = rp->re_nsub;
01085         while ((dpt < destend) && ((c = *spt++) != '\0')) {
01086                 /*
01087                  * see if we just have an ordinary replacement character
01088                  * or we refer to a subexpression.
01089                  */
01090                 if (c == '&') {
01091                         pmpt = pm;
01092                 } else if ((c == '\\') && (*spt >= '0') && (*spt <= '9')) {
01093                         /*
01094                          * make sure there is a subexpression as specified
01095                          */
01096                         if ((len = *spt++ - '0') > subexcnt)
01097                                 return(-1);
01098                         pmpt = pm + len;
01099                 } else {
01100                         /*
01101                          * Ordinary character, just copy it
01102                          */
01103                         if ((c == '\\') && ((*spt == '\\') || (*spt == '&')))
01104                                 c = *spt++;
01105                         *dpt++ = c;
01106                         continue;
01107                 }
01108 
01109                 /*
01110                  * continue if the subexpression is bogus
01111                  */
01112                 if ((pmpt->rm_so < 0) || (pmpt->rm_eo < 0) ||
01113                     ((len = pmpt->rm_eo - pmpt->rm_so) <= 0))
01114                         continue;
01115 
01116                 /*
01117                  * copy the subexpression to the destination.
01118                  * fail if we run out of space or the match string is damaged
01119                  */
01120                 if (len > (destend - dpt))
01121                         len = destend - dpt;
01122                 if (l_strncpy(dpt, src + pmpt->rm_so, len) != len)
01123                         return(-1);
01124                 dpt += len;
01125         }
01126         return(dpt - dest);
01127 }
01128 #endif

Generated on Fri Apr 14 22:57:01 2006 for minix by  doxygen 1.4.6