sed.c

Go to the documentation of this file.
00001 /* sed - stream editor          Author: Eric S. Raymond */
00002 
00003 /* This used to be three different files with the following makefile:
00004  * (Note the chmem).
00005 
00006 CFLAGS= -F -T.
00007 
00008 OBJS=   sedcomp.s sedexec.s
00009 
00010 sed:    $(OBJS)
00011         cc -T. -o sed $(OBJS)
00012   @chmem =13312 sed
00013 
00014 $(OBJS):        sed.h
00015 
00016  * If you want longer lines: increase MAXBUF.
00017  * If you want scripts with more text: increase POOLSIZE.
00018  * If you want more commands per script: increase MAXCMDS.
00019  */
00020 
00021 #include <ctype.h>
00022 #include <sys/types.h>
00023 #include <stdlib.h>
00024 #include <string.h>
00025 #include <unistd.h>
00026 #include <stdio.h>
00027 
00028 /*+++++++++++++++*/
00029 
00030 /* Sed.h -- types and constants for the stream editor */
00031 
00032 /* Data area sizes used by both modules */
00033 #define MAXBUF          4000    /* current line buffer size */
00034 #define MAXAPPENDS      20      /* maximum number of appends */
00035 #define MAXTAGS         9       /* tagged patterns are \1 to \9 */
00036 
00037 /* Constants for compiled-command representation */
00038 #define EQCMD   0x01            /* = -- print current line number        */
00039 #define ACMD    0x02            /* a -- append text after current line   */
00040 #define BCMD    0x03            /* b -- branch to label                  */
00041 #define CCMD    0x04            /* c -- change current line              */
00042 #define DCMD    0x05            /* d -- delete all of pattern space */
00043 #define CDCMD   0x06            /* D -- delete first line of pattern space */
00044 #define GCMD    0x07            /* g -- copy hold space to pattern space */
00045 #define CGCMD   0x08            /* G -- append hold space to pattern space */
00046 #define HCMD    0x09            /* h -- copy pattern space to hold space */
00047 #define CHCMD   0x0A            /* H -- append pattern space to hold space */
00048 #define ICMD    0x0B            /* i -- insert text before current line  */
00049 #define LCMD    0x0C            /* l -- print pattern space in escaped form */
00050 #define NCMD    0x0D            /* n -- get next line into pattern space */
00051 #define CNCMD   0x0E            /* N -- append next line to pattern space */
00052 #define PCMD    0x0F            /* p -- print pattern space to output    */
00053 #define CPCMD   0x10            /* P -- print first line of pattern space */
00054 #define QCMD    0x11            /* q -- exit the stream editor           */
00055 #define RCMD    0x12            /* r -- read in a file after current line */
00056 #define SCMD    0x13            /* s -- regular-expression substitute    */
00057 #define TCMD    0x14            /* t -- branch on any substitute successful */
00058 #define CTCMD   0x15            /* T -- branch on any substitute failed  */
00059 #define WCMD    0x16            /* w -- write pattern space to file      */
00060 #define CWCMD   0x17            /* W -- write first line of pattern space */
00061 #define XCMD    0x18            /* x -- exhange pattern and hold spaces  */
00062 #define YCMD    0x19            /* y -- transliterate text               */
00063 
00064 struct cmd_t {                  /* compiled-command representation */
00065   char *addr1;                  /* first address for command */
00066   char *addr2;                  /* second address for command */
00067   union {
00068         char *lhs;              /* s command lhs */
00069         struct cmd_t *link;     /* label link */
00070   } u;
00071   char command;                 /* command code */
00072   char *rhs;                    /* s command replacement string */
00073   FILE *fout;                   /* associated output file descriptor */
00074   struct {
00075         char allbut;            /* was negation specified? */
00076         char global;            /* was g postfix specified? */
00077         char print;             /* was p postfix specified? */
00078         char inrange;           /* in an address range? */
00079   } flags;
00080 };
00081 typedef struct cmd_t sedcmd;    /* use this name for declarations */
00082 
00083 #define BAD     ((char *) -1)   /* guaranteed not a string ptr */
00084 
00085 
00086 
00087 /* Address and regular expression compiled-form markers */
00088 #define STAR    1               /* marker for Kleene star */
00089 #define CCHR    2               /* non-newline character to be matched
00090                          * follows */
00091 #define CDOT    4               /* dot wild-card marker */
00092 #define CCL     6               /* character class follows */
00093 #define CNL     8               /* match line start */
00094 #define CDOL    10              /* match line end */
00095 #define CBRA    12              /* tagged pattern start marker */
00096 #define CKET    14              /* tagged pattern end marker */
00097 #define CBACK   16              /* backslash-digit pair marker */
00098 #define CLNUM   18              /* numeric-address index follows */
00099 #define CEND    20              /* symbol for end-of-source */
00100 #define CEOF    22              /* end-of-field mark */
00101 
00102 /* Sed.h ends here */
00103 
00104 #ifndef CMASK
00105 #define CMASK  0xFF             /* some char type should have been unsigned
00106                          * char? */
00107 #endif
00108 
00109 /*+++++++++++++++*/
00110 
00111 /* Sed - stream editor          Author: Eric S. Raymond */
00112 
00113 /*
00114    The stream editor compiles its command input  (from files or -e options)
00115    into an internal form using compile() then executes the compiled form using
00116    execute(). Main() just initializes data structures, interprets command line
00117    options, and calls compile() and execute() in appropriate sequence.
00118 
00119    The data structure produced by compile() is an array of compiled-command
00120    structures (type sedcmd).  These contain several pointers into pool[], the
00121    regular-expression and text-data pool, plus a command code and g & p flags.
00122    In the special case that the command is a label the struct  will hold a ptr
00123    into the labels array labels[] during most of the compile,  until resolve()
00124    resolves references at the end.
00125 
00126    The operation of execute() is described in its source module.
00127 */
00128 
00129 /* #include <stdio.h> */
00130 /* #include "sed.h"   */
00131 
00132 /* Imported functions */
00133 
00134 /***** public stuff ******/
00135 
00136 #define MAXCMDS         500     /* maximum number of compiled commands */
00137 #define MAXLINES        256     /* max # numeric addresses to compile */
00138 
00139 /* Main data areas */
00140 char linebuf[MAXBUF + 1];       /* current-line buffer */
00141 sedcmd cmds[MAXCMDS + 1];       /* hold compiled commands */
00142 long linenum[MAXLINES];         /* numeric-addresses table */
00143 
00144 /* Miscellaneous shared variables */
00145 int nflag;                      /* -n option flag */
00146 int eargc;                      /* scratch copy of argument count */
00147 char **eargv;                   /* scratch copy of argument list */
00148 char bits[] = {1, 2, 4, 8, 16, 32, 64, 128};
00149 
00150 /***** module common stuff *****/
00151 
00152 #define POOLSIZE        20000   /* size of string-pool space */
00153 #define WFILES          10      /* max # w output files that can be compiled */
00154 #define RELIMIT         256     /* max chars in compiled RE */
00155 #define MAXDEPTH        20      /* maximum {}-nesting level */
00156 #define MAXLABS         50      /* max # of labels that can be handled */
00157 
00158 #define SKIPWS(pc)      while ((*pc==' ') || (*pc=='\t')) pc++
00159 #define ABORT(msg)      (fprintf(stderr, msg, linebuf), quit(2))
00160 #define IFEQ(x, v)      if (*x == v) x++ ,      /* do expression */
00161 
00162 /* Error messages */
00163 static char AGMSG[] = "sed: garbled address %s\n";
00164 static char CGMSG[] = "sed: garbled command %s\n";
00165 static char TMTXT[] = "sed: too much text: %s\n";
00166 static char AD1NG[] = "sed: no addresses allowed for %s\n";
00167 static char AD2NG[] = "sed: only one address allowed for %s\n";
00168 static char TMCDS[] = "sed: too many commands, last was %s\n";
00169 static char COCFI[] = "sed: cannot open command-file %s\n";
00170 static char UFLAG[] = "sed: unknown flag %c\n";
00171 static char CCOFI[] = "sed: cannot create %s\n";
00172 static char ULABL[] = "sed: undefined label %s\n";
00173 static char TMLBR[] = "sed: too many {'s\n";
00174 static char FRENL[] = "sed: first RE must be non-null\n";
00175 static char NSCAX[] = "sed: no such command as %s\n";
00176 static char TMRBR[] = "sed: too many }'s\n";
00177 static char DLABL[] = "sed: duplicate label %s\n";
00178 static char TMLAB[] = "sed: too many labels: %s\n";
00179 static char TMWFI[] = "sed: too many w files\n";
00180 static char REITL[] = "sed: RE too long: %s\n";
00181 static char TMLNR[] = "sed: too many line numbers\n";
00182 static char TRAIL[] = "sed: command \"%s\" has trailing garbage\n";
00183 
00184 typedef struct {                /* represent a command label */
00185   char *name;                   /* the label name */
00186   sedcmd *last;                 /* it's on the label search list */
00187   sedcmd *address;              /* pointer to the cmd it labels */
00188 }
00189 
00190  label;
00191 
00192 /* Label handling */
00193 static label labels[MAXLABS];   /* here's the label table */
00194 static label *lab = labels + 1; /* pointer to current label */
00195 static label *lablst = labels;  /* header for search list */
00196 
00197 /* String pool for regular expressions, append text, etc. etc. */
00198 static char pool[POOLSIZE];     /* the pool */
00199 static char *fp = pool;         /* current pool pointer */
00200 static char *poolend = pool + POOLSIZE; /* pointer past pool end */
00201 
00202 /* Compilation state */
00203 static FILE *cmdf = NULL;       /* current command source */
00204 static char *cp = linebuf;      /* compile pointer */
00205 static sedcmd *cmdp = cmds;     /* current compiled-cmd ptr */
00206 static char *lastre = NULL;     /* old RE pointer */
00207 static int bdepth = 0;          /* current {}-nesting level */
00208 static int bcount = 0;          /* # tagged patterns in current RE */
00209 
00210 /* Compilation flags */
00211 static int eflag;               /* -e option flag */
00212 static int gflag;               /* -g option flag */
00213 
00214 _PROTOTYPE(int main, (int argc, char **argv));
00215 _PROTOTYPE(static void compile, (void));
00216 _PROTOTYPE(static int cmdcomp, (int cchar));
00217 _PROTOTYPE(static char *rhscomp, (char *rhsp, int delim));
00218 _PROTOTYPE(static char *recomp, (char *expbuf, int redelim));
00219 _PROTOTYPE(static int cmdline, (char *cbuf));
00220 _PROTOTYPE(static char *address, (char *expbuf));
00221 _PROTOTYPE(static char *gettext, (char *txp));
00222 _PROTOTYPE(static label *search, (label *ptr));
00223 _PROTOTYPE(static void resolve, (void));
00224 _PROTOTYPE(static char *ycomp, (char *ep, int delim));
00225 _PROTOTYPE(void quit, (int n));
00226 _PROTOTYPE(void execute, (void));
00227 _PROTOTYPE(static int selected, (sedcmd *ipc));
00228 _PROTOTYPE(static int match, (char *expbuf, int gf));
00229 _PROTOTYPE(static int advance, (char *lp, char *ep));
00230 _PROTOTYPE(static int substitute, (sedcmd *ipc));
00231 _PROTOTYPE(static void dosub, (char *rhsbuf));
00232 _PROTOTYPE(static char *place, (char *asp, char *al1, char *al2));
00233 _PROTOTYPE(static void listto, (char *p1, FILE *fp));
00234 _PROTOTYPE(static void truncated, (int h));
00235 _PROTOTYPE(static void command, (sedcmd *ipc));
00236 _PROTOTYPE(static void openfile, (char *file));
00237 _PROTOTYPE(static void get, (void));
00238 _PROTOTYPE(static void initget, (void));
00239 _PROTOTYPE(static char *getline, (char *buf));
00240 _PROTOTYPE(static int Memcmp, (char *a, char *b, int count));
00241 _PROTOTYPE(static void readout, (void));
00242 
00243 int main(argc, argv)
00244 /* Main sequence of the stream editor */
00245 int argc;
00246 char *argv[];
00247 {
00248   eargc = argc;                 /* set local copy of argument count */
00249   eargv = argv;                 /* set local copy of argument list */
00250   cmdp->addr1 = pool;           /* 1st addr expand will be at pool start */
00251   if (eargc == 1) quit(0);      /* exit immediately if no arguments */
00252   /* Scan through the arguments, interpreting each one */
00253   while ((--eargc > 0) && (**++eargv == '-')) switch (eargv[0][1]) {
00254             case 'e':
00255                 eflag++;
00256                 compile();      /* compile with e flag on */
00257                 eflag = 0;
00258                 continue;       /* get another argument */
00259             case 'f':
00260                 if (eargc-- <= 0)       /* barf if no -f file */
00261                         quit(2);
00262                 if ((cmdf = fopen(*++eargv, "r")) == NULL) {
00263                         fprintf(stderr, COCFI, *eargv);
00264                         quit(2);
00265                 }
00266                 compile();      /* file is O.K., compile it */
00267                 fclose(cmdf);
00268                 continue;       /* go back for another argument */
00269             case 'g':
00270                 gflag++;        /* set global flag on all s cmds */
00271                 continue;
00272             case 'n':
00273                 nflag++;        /* no print except on p flag or w */
00274                 continue;
00275             default:
00276                 fprintf(stdout, UFLAG, eargv[0][1]);
00277                 continue;
00278         }
00279 
00280 
00281   if (cmdp == cmds) {           /* no commands have been compiled */
00282         eargv--;
00283         eargc++;
00284         eflag++;
00285         compile();
00286         eflag = 0;
00287         eargv++;
00288         eargc--;
00289   }
00290   if (bdepth)                   /* we have unbalanced squigglies */
00291         ABORT(TMLBR);
00292 
00293   lablst->address = cmdp;       /* set up header of label linked list */
00294   resolve();                    /* resolve label table indirections */
00295   execute();                    /* execute commands */
00296   quit(0);                      /* everything was O.K. if we got here */
00297   return(0);
00298 }
00299 
00300 
00301 #define H       0x80            /* 128 bit, on if there's really code for
00302                          * command */
00303 #define LOWCMD  56              /* = '8', lowest char indexed in cmdmask */
00304 
00305 /* Indirect through this to get command internal code, if it exists */
00306 static char cmdmask[] =
00307 {
00308  0, 0, H, 0, 0, H + EQCMD, 0, 0,
00309  0, 0, 0, 0, H + CDCMD, 0, 0, CGCMD,
00310  CHCMD, 0, 0, 0, 0, 0, CNCMD, 0,
00311  CPCMD, 0, 0, 0, H + CTCMD, 0, 0, H + CWCMD,
00312  0, 0, 0, 0, 0, 0, 0, 0,
00313  0, H + ACMD, H + BCMD, H + CCMD, DCMD, 0, 0, GCMD,
00314  HCMD, H + ICMD, 0, 0, H + LCMD, 0, NCMD, 0,
00315  PCMD, H + QCMD, H + RCMD, H + SCMD, H + TCMD, 0, 0, H + WCMD,
00316  XCMD, H + YCMD, 0, H + BCMD, 0, H, 0, 0,
00317 };
00318 
00319 static void compile()
00320 /* Precompile sed commands out of a file */
00321 {
00322   char ccode;
00323 
00324 
00325   for (;;) {                    /* main compilation loop */
00326         if (*cp == '\0') {      /* get a new command line */
00327                 *linebuf = '\0';        /* K.H */
00328                 if (cmdline(cp = linebuf) < 0) break;
00329         }
00330         SKIPWS(cp);
00331         if (*cp == '\0')        /* empty */
00332                 continue;
00333         if (*cp == '#') {       /* comment */
00334                 while (*cp) ++cp;
00335                 continue;
00336         }
00337         if (*cp == ';') {       /* ; separates cmds */
00338                 cp++;
00339                 continue;
00340         }
00341 
00342         /* Compile first address */
00343         if (fp > poolend)
00344                 ABORT(TMTXT);
00345         else if ((fp = address(cmdp->addr1 = fp)) == BAD)
00346                 ABORT(AGMSG);
00347 
00348         if (fp == cmdp->addr1) {/* if empty RE was found */
00349                 if (lastre)     /* if there was previous RE */
00350                         cmdp->addr1 = lastre;   /* use it */
00351                 else
00352                         ABORT(FRENL);
00353         } else if (fp == NULL) {/* if fp was NULL */
00354                 fp = cmdp->addr1;       /* use current pool location */
00355                 cmdp->addr1 = NULL;
00356         } else {
00357                 lastre = cmdp->addr1;
00358                 if (*cp == ',' || *cp == ';') { /* there's 2nd addr */
00359                         cp++;
00360                         if (fp > poolend) ABORT(TMTXT);
00361                         fp = address(cmdp->addr2 = fp);
00362                         if (fp == BAD || fp == NULL) ABORT(AGMSG);
00363                         if (fp == cmdp->addr2)
00364                                 cmdp->addr2 = lastre;
00365                         else
00366                                 lastre = cmdp->addr2;
00367                 } else
00368                         cmdp->addr2 = NULL;     /* no 2nd address */
00369         }
00370         if (fp > poolend) ABORT(TMTXT);
00371 
00372         SKIPWS(cp);             /* discard whitespace after address */
00373         IFEQ(cp, '!') cmdp->flags.allbut = 1;
00374 
00375         SKIPWS(cp);             /* get cmd char, range-check it */
00376         if ((*cp < LOWCMD) || (*cp > '~')
00377             || ((ccode = cmdmask[*cp - LOWCMD]) == 0))
00378                 ABORT(NSCAX);
00379 
00380         cmdp->command = ccode & ~H;     /* fill in command value */
00381         if ((ccode & H) == 0)   /* if no compile-time code */
00382                 cp++;           /* discard command char */
00383         else if (cmdcomp(*cp++))/* execute it; if ret = 1 */
00384                 continue;       /* skip next line read */
00385 
00386         if (++cmdp >= cmds + MAXCMDS) ABORT(TMCDS);
00387 
00388         SKIPWS(cp);             /* look for trailing stuff */
00389         if (*cp != '\0' && *cp != ';' && *cp != '#') ABORT(TRAIL);
00390   }
00391 }
00392 
00393 static int cmdcomp(cchar)
00394 /* Compile a single command */
00395 register char cchar;            /* character name of command */
00396 {
00397   static sedcmd **cmpstk[MAXDEPTH];     /* current cmd stack for {} */
00398   static char *fname[WFILES];   /* w file name pointers */
00399   static FILE *fout[WFILES];    /* w file file ptrs */
00400   static int nwfiles = 1;       /* count of open w files */
00401   int i;                        /* indexing dummy used in w */
00402   sedcmd *sp1, *sp2;            /* temps for label searches */
00403   label *lpt;
00404   char redelim;                 /* current RE delimiter */
00405 
00406   fout[0] = stdout;
00407   switch (cchar) {
00408       case '{':                 /* start command group */
00409         cmdp->flags.allbut = !cmdp->flags.allbut;
00410         cmpstk[bdepth++] = &(cmdp->u.link);
00411         if (++cmdp >= cmds + MAXCMDS) ABORT(TMCDS);
00412         return(1);
00413 
00414       case '}':                 /* end command group */
00415         if (cmdp->addr1) ABORT(AD1NG);  /* no addresses allowed */
00416         if (--bdepth < 0) ABORT(TMRBR); /* too many right braces */
00417         *cmpstk[bdepth] = cmdp; /* set the jump address */
00418         return(1);
00419 
00420       case '=':                 /* print current source line number */
00421       case 'q':                 /* exit the stream editor */
00422         if (cmdp->addr2) ABORT(AD2NG);
00423         break;
00424 
00425       case ':':                 /* label declaration */
00426         if (cmdp->addr1) ABORT(AD1NG);  /* no addresses allowed */
00427         fp = gettext(lab->name = fp);   /* get the label name */
00428         if (lpt = search(lab)) {/* does it have a double? */
00429                 if (lpt->address) ABORT(DLABL); /* yes, abort */
00430         } else {                /* check that it doesn't overflow label table */
00431                 lab->last = NULL;
00432                 lpt = lab;
00433                 if (++lab >= labels + MAXLABS) ABORT(TMLAB);
00434         }
00435         lpt->address = cmdp;
00436         return(1);
00437 
00438       case 'b':                 /* branch command */
00439       case 't':                 /* branch-on-succeed command */
00440       case 'T':                 /* branch-on-fail command */
00441         SKIPWS(cp);
00442         if (*cp == '\0') {      /* if branch is to start of cmds... */
00443                 /* Add current command to end of label last */
00444                 if (sp1 = lablst->last) {
00445                         while (sp2 = sp1->u.link) sp1 = sp2;
00446                         sp1->u.link = cmdp;
00447                 } else          /* lablst->last == NULL */
00448                         lablst->last = cmdp;
00449                 break;
00450         }
00451         fp = gettext(lab->name = fp);   /* else get label into pool */
00452         if (lpt = search(lab)) {/* enter branch to it */
00453                 if (lpt->address)
00454                         cmdp->u.link = lpt->address;
00455                 else {
00456                         sp1 = lpt->last;
00457                         while (sp2 = sp1->u.link) sp1 = sp2;
00458                         sp1->u.link = cmdp;
00459                 }
00460         } else {                /* matching named label not found */
00461                 lab->last = cmdp;       /* add the new label */
00462                 lab->address = NULL;    /* it's forward of here */
00463                 if (++lab >= labels + MAXLABS)  /* overflow if last */
00464                         ABORT(TMLAB);
00465         }
00466         break;
00467 
00468       case 'a':                 /* append text */
00469       case 'i':                 /* insert text */
00470       case 'r':                 /* read file into stream */
00471         if (cmdp->addr2) ABORT(AD2NG);
00472       case 'c':                 /* change text */
00473         if ((*cp == '\\') && (*++cp == '\n')) cp++;
00474         fp = gettext(cmdp->u.lhs = fp);
00475         break;
00476 
00477       case 'D':                 /* delete current line in hold space */
00478         cmdp->u.link = cmds;
00479         break;
00480 
00481       case 's':                 /* substitute regular expression */
00482         redelim = *cp++;        /* get delimiter from 1st ch */
00483         if ((fp = recomp(cmdp->u.lhs = fp, redelim)) == BAD) ABORT(CGMSG);
00484         if (fp == cmdp->u.lhs)  /* if compiled RE zero len */
00485                 cmdp->u.lhs = lastre;   /* use the previous one */
00486         else                    /* otherwise */
00487                 lastre = cmdp->u.lhs;   /* save the one just found */
00488         if ((cmdp->rhs = fp) > poolend) ABORT(TMTXT);
00489         if ((fp = rhscomp(cmdp->rhs, redelim)) == BAD) ABORT(CGMSG);
00490         if (gflag) cmdp->flags.global ++;
00491         while (*cp == 'g' || *cp == 'p' || *cp == 'P') {
00492                 IFEQ(cp, 'g') cmdp->flags.global ++;
00493                 IFEQ(cp, 'p') cmdp->flags.print = 1;
00494                 IFEQ(cp, 'P') cmdp->flags.print = 2;
00495         }
00496 
00497       case 'l':                 /* list pattern space */
00498         if (*cp == 'w')
00499                 cp++;           /* and execute a w command! */
00500         else
00501                 break;          /* s or l is done */
00502 
00503       case 'w':                 /* write-pattern-space command */
00504       case 'W':                 /* write-first-line command */
00505         if (nwfiles >= WFILES) ABORT(TMWFI);
00506         fp = gettext(fname[nwfiles] = fp);      /* filename will be in pool */
00507         for (i = nwfiles - 1; i >= 0; i--)      /* match it in table */
00508                 if ((fname[i] != NULL) &&
00509                     (strcmp(fname[nwfiles], fname[i]) == 0)) {
00510                         cmdp->fout = fout[i];
00511                         return(0);
00512                 }
00513 
00514         /* If didn't find one, open new out file */
00515         if ((cmdp->fout = fopen(fname[nwfiles], "w")) == NULL) {
00516                 fprintf(stderr, CCOFI, fname[nwfiles]);
00517                 quit(2);
00518         }
00519         fout[nwfiles++] = cmdp->fout;
00520         break;
00521 
00522       case 'y':                 /* transliterate text */
00523         fp = ycomp(cmdp->u.lhs = fp, *cp++);    /* compile translit */
00524         if (fp == BAD) ABORT(CGMSG);    /* fail on bad form */
00525         if (fp > poolend) ABORT(TMTXT); /* fail on overflow */
00526         break;
00527   }
00528   return(0);                    /* succeeded in interpreting one command */
00529 }
00530 
00531 static char *rhscomp(rhsp, delim)       /* uses bcount */
00532  /* Generate replacement string for substitute command right hand side */
00533 register char *rhsp;            /* place to compile expression to */
00534 register char delim;            /* regular-expression end-mark to look for */
00535 {
00536   register char *p = cp;        /* strictly for speed */
00537 
00538   for (;;)
00539         if ((*rhsp = *p++) == '\\') {   /* copy; if it's a \, */
00540                 *rhsp = *p++;   /* copy escaped char */
00541                 /* Check validity of pattern tag */
00542                 if (*rhsp > bcount + '0' && *rhsp <= '9') return(BAD);
00543                 *rhsp++ |= 0x80;/* mark the good ones */
00544                 continue;
00545         } else if (*rhsp == delim) {    /* found RE end, hooray... */
00546                 *rhsp++ = '\0'; /* cap the expression string */
00547                 cp = p;
00548                 return(rhsp);   /* pt at 1 past the RE */
00549         } else if (*rhsp++ == '\0')     /* last ch not RE end, help! */
00550                 return(BAD);
00551 }
00552 
00553 static char *recomp(expbuf, redelim)    /* uses cp, bcount */
00554  /* Compile a regular expression to internal form */
00555 char *expbuf;                   /* place to compile it to */
00556 char redelim;                   /* RE end-marker to look for */
00557 {
00558   register char *ep = expbuf;   /* current-compiled-char pointer */
00559   register char *sp = cp;       /* source-character ptr */
00560   register int c;               /* current-character pointer */
00561   char negclass;                /* all-but flag */
00562   char *lastep;                 /* ptr to last expr compiled */
00563   char *svclass;                /* start of current char class */
00564   char brnest[MAXTAGS];         /* bracket-nesting array */
00565   char *brnestp;                /* ptr to current bracket-nest */
00566   int classct;                  /* class element count */
00567   int tags;                     /* # of closed tags */
00568 
00569   if (*cp == redelim)           /* if first char is RE endmarker */
00570         return(cp++, expbuf);   /* leave existing RE unchanged */
00571 
00572   lastep = NULL;                /* there's no previous RE */
00573   brnestp = brnest;             /* initialize ptr to brnest array */
00574   tags = bcount = 0;            /* initialize counters */
00575 
00576   if (*ep++ = (*sp == '^'))     /* check for start-of-line syntax */
00577         sp++;
00578 
00579   for (;;) {
00580         if (ep >= expbuf + RELIMIT)     /* match is too large */
00581                 return(cp = sp, BAD);
00582         if ((c = *sp++) == redelim) {   /* found the end of the RE */
00583                 cp = sp;
00584                 if (brnestp != brnest)  /* \(, \) unbalanced */
00585                         return(BAD);
00586                 *ep++ = CEOF;   /* write end-of-pattern mark */
00587                 return(ep);     /* return ptr to compiled RE */
00588         }
00589         if (c != '*')           /* if we're a postfix op */
00590                 lastep = ep;    /* get ready to match last */
00591 
00592         switch (c) {
00593             case '\\':
00594                 if ((c = *sp++) == '(') {       /* start tagged section */
00595                         if (bcount >= MAXTAGS) return(cp = sp, BAD);
00596                         *brnestp++ = bcount;    /* update tag stack */
00597                         *ep++ = CBRA;   /* enter tag-start */
00598                         *ep++ = bcount++;       /* bump tag count */
00599                         continue;
00600                 } else if (c == ')') {  /* end tagged section */
00601                         if (brnestp <= brnest)  /* extra \) */
00602                                 return(cp = sp, BAD);
00603                         *ep++ = CKET;   /* enter end-of-tag */
00604                         *ep++ = *--brnestp;     /* pop tag stack */
00605                         tags++; /* count closed tags */
00606                         continue;
00607                 } else if (c >= '1' && c <= '9') {      /* tag use */
00608                         if ((c -= '1') >= tags) /* too few */
00609                                 return(BAD);
00610                         *ep++ = CBACK;  /* enter tag mark */
00611                         *ep++ = c;      /* and the number */
00612                         continue;
00613                 } else if (c == '\n')   /* escaped newline no good */
00614                         return(cp = sp, BAD);
00615                 else if (c == 'n')      /* match a newline */
00616                         c = '\n';
00617                 else if (c == 't')      /* match a tab */
00618                         c = '\t';
00619                 else if (c == 'r')      /* match a return */
00620                         c = '\r';
00621                 goto defchar;
00622 
00623             case '\0':          /* ignore nuls */
00624                 continue;
00625 
00626             case '\n':          /* trailing pattern delimiter is missing */
00627                 return(cp = sp, BAD);
00628 
00629             case '.':           /* match any char except newline */
00630                 *ep++ = CDOT;
00631                 continue;
00632             case '*':           /* 0..n repeats of previous pattern */
00633                 if (lastep == NULL)     /* if * isn't first on line */
00634                         goto defchar;   /* match a literal * */
00635                 if (*lastep == CKET)    /* can't iterate a tag */
00636                         return(cp = sp, BAD);
00637                 *lastep |= STAR;/* flag previous pattern */
00638                 continue;
00639 
00640             case '$':           /* match only end-of-line */
00641                 if (*sp != redelim)     /* if we're not at end of RE */
00642                         goto defchar;   /* match a literal $ */
00643                 *ep++ = CDOL;   /* insert end-symbol mark */
00644                 continue;
00645 
00646             case '[':           /* begin character set pattern */
00647                 if (ep + 17 >= expbuf + RELIMIT) ABORT(REITL);
00648                 *ep++ = CCL;    /* insert class mark */
00649                 if (negclass = ((c = *sp++) == '^')) c = *sp++;
00650                 svclass = sp;   /* save ptr to class start */
00651                 do {
00652                         if (c == '\0') ABORT(CGMSG);
00653 
00654                         /* Handle character ranges */
00655                         if (c == '-' && sp > svclass && *sp != ']')
00656                                 for (c = sp[-2]; c < *sp; c++)
00657                                         ep[c >> 3] |= bits[c & 7];
00658 
00659                         /* Handle escape sequences in sets */
00660                         if (c == '\\')
00661                                 if ((c = *sp++) == 'n')
00662                                         c = '\n';
00663                                 else if (c == 't')
00664                                         c = '\t';
00665                                 else if (c == 'r')
00666                                         c = '\r';
00667 
00668                         /* Enter (possibly translated) char in set */
00669                         ep[c >> 3] |= bits[c & 7];
00670                 } while
00671                         ((c = *sp++) != ']');
00672 
00673                 /* Invert the bitmask if all-but was specified */
00674                 if (negclass) for (classct = 0; classct < 16; classct++)
00675                                 ep[classct] ^= 0xFF;
00676                 ep[0] &= 0xFE;  /* never match ASCII 0 */
00677                 ep += 16;       /* advance ep past set mask */
00678                 continue;
00679 
00680   defchar:                      /* match literal character */
00681             default:            /* which is what we'd do by default */
00682                 *ep++ = CCHR;   /* insert character mark */
00683                 *ep++ = c;
00684         }
00685   }
00686 }
00687 
00688 static int cmdline(cbuf)        /* uses eflag, eargc, cmdf */
00689  /* Read next command from -e argument or command file */
00690 register char *cbuf;
00691 {
00692   register int inc;             /* not char because must hold EOF */
00693 
00694   *cbuf-- = 0;                  /* so pre-increment points us at cbuf */
00695 
00696   /* E command flag is on */
00697   if (eflag) {
00698         register char *p;       /* ptr to current -e argument */
00699         static char *savep;     /* saves previous value of p */
00700 
00701         if (eflag > 0) {        /* there are pending -e arguments */
00702                 eflag = -1;
00703                 if (eargc-- <= 0) quit(2);      /* if no arguments, barf */
00704 
00705                 /* Else transcribe next e argument into cbuf */
00706                 p = *++eargv;
00707                 while (*++cbuf = *p++)
00708                         if (*cbuf == '\\') {
00709                                 if ((*++cbuf = *p++) == '\0')
00710                                         return(savep = NULL, -1);
00711                                 else
00712                                         continue;
00713                         } else if (*cbuf == '\n') {     /* end of 1 cmd line */
00714                                 *cbuf = '\0';
00715                                 return(savep = p, 1);
00716                                 /* We'll be back for the rest... */
00717                         }
00718 
00719                 /* Found end-of-string; can advance to next argument */
00720                 return(savep = NULL, 1);
00721         }
00722         if ((p = savep) == NULL) return(-1);
00723 
00724         while (*++cbuf = *p++)
00725                 if (*cbuf == '\\') {
00726                         if ((*++cbuf = *p++) == '0')
00727                                 return(savep = NULL, -1);
00728                         else
00729                                 continue;
00730                 } else if (*cbuf == '\n') {
00731                         *cbuf = '\0';
00732                         return(savep = p, 1);
00733                 }
00734         return(savep = NULL, 1);
00735   }
00736 
00737   /* If no -e flag read from command file descriptor */
00738   while ((inc = getc(cmdf)) != EOF)     /* get next char */
00739         if ((*++cbuf = inc) == '\\')    /* if it's escape */
00740                 *++cbuf = inc = getc(cmdf);     /* get next char */
00741         else if (*cbuf == '\n') /* end on newline */
00742                 return(*cbuf = '\0', 1);        /* cap the string */
00743 
00744   return(*++cbuf = '\0', -1);   /* end-of-file, no more chars */
00745 }
00746 
00747 static char *address(expbuf)    /* uses cp, linenum */
00748  /* Expand an address at *cp... into expbuf, return ptr at following char */
00749 register char *expbuf;
00750 {
00751   static int numl = 0;          /* current ind in addr-number table */
00752   register char *rcp;           /* temp compile ptr for forwd look */
00753   long lno;                     /* computed value of numeric address */
00754 
00755   if (*cp == '$') {             /* end-of-source address */
00756         *expbuf++ = CEND;       /* write symbolic end address */
00757         *expbuf++ = CEOF;       /* and the end-of-address mark (!) */
00758         cp++;                   /* go to next source character */
00759         return(expbuf); /* we're done */
00760   }
00761   if (*cp == '/' || *cp == '\\') { /* start of regular-expression match */
00762         if (*cp == '\\') cp++;
00763         return(recomp(expbuf, *cp++));  /* compile the RE */
00764   }
00765 
00766   rcp = cp;
00767   lno = 0;                      /* now handle a numeric address */
00768   while (*rcp >= '0' && *rcp <= '9')    /* collect digits */
00769         lno = lno * 10 + *rcp++ - '0';  /* compute their value */
00770 
00771   if (rcp > cp) {               /* if we caught a number... */
00772         *expbuf++ = CLNUM;      /* put a numeric-address marker */
00773         *expbuf++ = numl;       /* and the address table index */
00774         linenum[numl++] = lno;  /* and set the table entry */
00775         if (numl >= MAXLINES)   /* oh-oh, address table overflow */
00776                 ABORT(TMLNR);   /* abort with error message */
00777         *expbuf++ = CEOF;       /* write the end-of-address marker */
00778         cp = rcp;               /* point compile past the address */
00779         return(expbuf); /* we're done */
00780   }
00781   return(NULL);                 /* no legal address was found */
00782 }
00783 
00784 static char *gettext(txp)       /* uses global cp */
00785  /* Accept multiline input from *cp..., discarding leading whitespace */
00786 register char *txp;             /* where to put the text */
00787 {
00788   register char *p = cp;        /* this is for speed */
00789 
00790   SKIPWS(p);                    /* discard whitespace */
00791   do {
00792         if ((*txp = *p++) == '\\')      /* handle escapes */
00793                 *txp = *p++;
00794         if (*txp == '\0')       /* we're at end of input */
00795                 return(cp = --p, ++txp);
00796         else if (*txp == '\n')  /* also SKIPWS after newline */
00797                 SKIPWS(p);
00798   } while
00799         (txp++);                /* keep going till we find that nul */
00800   return(txp);
00801 }
00802 
00803 static label *search(ptr)       /* uses global lablst */
00804  /* Find the label matching *ptr, return NULL if none */
00805 register label *ptr;
00806 {
00807   register label *rp;
00808   for (rp = lablst; rp < ptr; rp++)
00809         if ((rp->name != NULL) && (strcmp(rp->name, ptr->name) == 0))
00810                 return(rp);
00811   return(NULL);
00812 }
00813 
00814 static void resolve()
00815 {                               /* uses global lablst */
00816   /* Write label links into the compiled-command space */
00817   register label *lptr;
00818   register sedcmd *rptr, *trptr;
00819 
00820   /* Loop through the label table */
00821   for (lptr = lablst; lptr < lab; lptr++)
00822         if (lptr->address == NULL) {    /* barf if not defined */
00823                 fprintf(stderr, ULABL, lptr->name);
00824                 quit(2);
00825         } else if (lptr->last) {/* if last is non-null */
00826                 rptr = lptr->last;      /* chase it */
00827                 while (trptr = rptr->u.link) {  /* resolve refs */
00828                         rptr->u.link = lptr->address;
00829                         rptr = trptr;
00830                 }
00831                 rptr->u.link = lptr->address;
00832         }
00833 }
00834 
00835 static char *ycomp(ep, delim)
00836 /* Compile a y (transliterate) command */
00837 register char *ep;              /* where to compile to */
00838 char delim;                     /* end delimiter to look for */
00839 {
00840   register char *tp, *sp;
00841   register int c;
00842 
00843   /* Scan the 'from' section for invalid chars */
00844   for (sp = tp = cp; *tp != delim; tp++) {
00845         if (*tp == '\\') tp++;
00846         if ((*tp == '\n') || (*tp == '\0')) return (BAD);
00847   }
00848   tp++;                         /* tp now points at first char of 'to'
00849                          * section */
00850 
00851   /* Now rescan the 'from' section */
00852   while ((c = *sp++ & 0x7F) != delim) {
00853         if (c == '\\' && *sp == 'n') {
00854                 sp++;
00855                 c = '\n';
00856         }
00857         if ((ep[c] = *tp++) == '\\' && *tp == 'n') {
00858                 ep[c] = '\n';
00859                 tp++;
00860         }
00861         if ((ep[c] == delim) || (ep[c] == '\0')) return(BAD);
00862   }
00863 
00864   if (*tp != delim)             /* 'to', 'from' parts have unequal lengths */
00865         return(BAD);
00866 
00867   cp = ++tp;                    /* point compile ptr past translit */
00868 
00869   for (c = 0; c < 128; c++)     /* fill in self-map entries in table */
00870         if (ep[c] == 0) ep[c] = c;
00871 
00872   return(ep + 0x80);            /* return first free location past table end */
00873 }
00874 
00875 void quit(n)
00876 int n;
00877 {
00878 /* Flush buffers and exit.  Now a historical relic.  Rely on exit to flush
00879  * the buffers.
00880  */
00881   exit(n);
00882 }
00883 
00884 /*+++++++++++++++*/
00885 
00886 /*
00887    sedexec.c -- execute compiled form of stream editor commands
00888 
00889    The single entry point of this module is the function execute(). It
00890    may take a string argument (the name of a file to be used as text)  or
00891    the argument NULL which tells it to filter standard input. It executes
00892    the compiled commands in cmds[] on each line in turn.
00893 
00894    The function command() does most of the work. Match() and advance()
00895    are used for matching text against precompiled regular expressions and
00896    dosub() does right-hand-side substitution.  Getline() does text input;
00897    readout() and Memcmp() are output and string-comparison utilities.
00898 */
00899 
00900 /* #include <stdio.h>   */
00901 /* #include <ctype.h>   */
00902 /* #include "sed.h"     */
00903 
00904 /***** shared variables imported from the main ******/
00905 
00906 /* Main data areas */
00907 extern char linebuf[];          /* current-line buffer */
00908 extern sedcmd cmds[];           /* hold compiled commands */
00909 extern long linenum[];          /* numeric-addresses table */
00910 
00911 /* Miscellaneous shared variables */
00912 extern int nflag;               /* -n option flag */
00913 extern int eargc;               /* scratch copy of argument count */
00914 extern char **eargv;            /* scratch copy of argument list */
00915 extern char bits[];             /* the bits table */
00916 
00917 /***** end of imported stuff *****/
00918 
00919 #define MAXHOLD  MAXBUF         /* size of the hold space */
00920 #define GENSIZ   MAXBUF         /* maximum genbuf size */
00921 
00922 #define TRUE     1
00923 #define FALSE    0
00924 
00925 static char LTLMSG[] = "sed: line too long\n";
00926 
00927 static char *spend;             /* current end-of-line-buffer pointer */
00928 static long lnum = 0L;          /* current source line number */
00929 
00930 /* Append buffer maintenance */
00931 static sedcmd *appends[MAXAPPENDS];     /* array of ptrs to a,i,c commands */
00932 static sedcmd **aptr = appends; /* ptr to current append */
00933 
00934 /* Genbuf and its pointers */
00935 static char genbuf[GENSIZ];
00936 static char *loc1;
00937 static char *loc2;
00938 static char *locs;
00939 
00940 /* Command-logic flags */
00941 static int lastline;            /* do-line flag */
00942 static int jump;                /* jump to cmd's link address if set */
00943 static int delete;              /* delete command flag */
00944 
00945 /* Tagged-pattern tracking */
00946 static char *bracend[MAXTAGS];  /* tagged pattern start pointers */
00947 static char *brastart[MAXTAGS]; /* tagged pattern end pointers */
00948 
00949 static int anysub;              /* true if any s on current line succeeded */
00950 
00951 
00952 void execute()
00953 /* Execute the compiled commands in cmds[] */
00954 {
00955   register char *p1;            /* dummy copy ptrs */
00956   register sedcmd *ipc;         /* ptr to current command */
00957   char *execp;                  /* ptr to source */
00958 
00959 
00960   initget();
00961 
00962   /* Here's the main command-execution loop */
00963   for (;;) {
00964 
00965         /* Get next line to filter */
00966         if ((execp = getline(linebuf)) == BAD) return;
00967         spend = execp;
00968         anysub = FALSE;
00969 
00970         /* Loop through compiled commands, executing them */
00971         for (ipc = cmds; ipc->command;) {
00972                 if (!selected(ipc)) {
00973                         ipc++;
00974                         continue;
00975                 }
00976                 command(ipc);   /* execute the command pointed at */
00977 
00978                 if (delete)     /* if delete flag is set */
00979                         break;  /* don't exec rest of compiled cmds */
00980 
00981                 if (jump) {     /* if jump set, follow cmd's link */
00982                         jump = FALSE;
00983                         if ((ipc = ipc->u.link) == 0) {
00984                                 ipc = cmds;
00985                                 break;
00986                         }
00987                 } else          /* normal goto next command */
00988                         ipc++;
00989         }
00990 
00991         /* We've now done all modification commands on the line */
00992 
00993         /* Here's where the transformed line is output */
00994         if (!nflag && !delete) {
00995                 for (p1 = linebuf; p1 < spend; p1++) putc(*p1, stdout);
00996                 putc('\n', stdout);
00997         }
00998 
00999         /* If we've been set up for append, emit the text from it */
01000         if (aptr > appends) readout();
01001 
01002         delete = FALSE;         /* clear delete flag; about to get next cmd */
01003   }
01004 }
01005 
01006 static int selected(ipc)
01007 /* Is current command selected */
01008 sedcmd *ipc;
01009 {
01010   register char *p1 = ipc->addr1;       /* point p1 at first address */
01011   register char *p2 = ipc->addr2;       /* and p2 at second */
01012   int c;
01013   int sel = TRUE;               /* select by default */
01014 
01015   if (!p1)                      /* No addresses: always selected */
01016         ;
01017   else if (ipc->flags.inrange) {
01018         if (*p2 == CEND);
01019         else if (*p2 == CLNUM) {
01020                 c = p2[1] & CMASK;
01021                 if (lnum >= linenum[c]) {
01022                         ipc->flags.inrange = FALSE;
01023                         if (lnum > linenum[c]) sel = FALSE;
01024                 }
01025         } else if (match(p2, 0))
01026                 ipc->flags.inrange = FALSE;
01027   } else if (*p1 == CEND) {
01028         if (!lastline) sel = FALSE;
01029   } else if (*p1 == CLNUM) {
01030         c = p1[1] & CMASK;
01031         if (lnum != linenum[c])
01032                 sel = FALSE;
01033         else if (p2)
01034                 ipc->flags.inrange = TRUE;
01035   } else if (match(p1, 0)) {
01036         if (p2) ipc->flags.inrange = TRUE;
01037   } else
01038         sel = FALSE;
01039 
01040   return ipc->flags.allbut ? !sel : sel;
01041 }
01042 
01043 static int match(expbuf, gf)    /* uses genbuf */
01044  /* Match RE at expbuf against linebuf; if gf set, copy linebuf from genbuf */
01045 char *expbuf;
01046 int gf;
01047 {
01048   register char *p1, *p2, c;
01049 
01050   if (gf) {
01051         if (*expbuf) return(FALSE);
01052         p1 = linebuf;
01053         p2 = genbuf;
01054         while (*p1++ = *p2++);
01055         locs = p1 = loc2;
01056   } else {
01057         p1 = linebuf;
01058         locs = FALSE;
01059   }
01060 
01061   p2 = expbuf;
01062   if (*p2++) {
01063         loc1 = p1;
01064         if (*p2 == CCHR && p2[1] != *p1)        /* 1st char is wrong */
01065                 return(FALSE);  /* so fail */
01066         return(advance(p1, p2));/* else try to match rest */
01067   }
01068 
01069   /* Quick check for 1st character if it's literal */
01070   if (*p2 == CCHR) {
01071         c = p2[1];              /* pull out character to search for */
01072         do {
01073                 if (*p1 != c) continue; /* scan the source string */
01074                 if (advance(p1, p2))    /* found it, match the rest */
01075                         return(loc1 = p1, 1);
01076         } while
01077                 (*p1++);
01078         return(FALSE);          /* didn't find that first char */
01079   }
01080 
01081   /* Else try for unanchored match of the pattern */
01082   do {
01083         if (advance(p1, p2)) return(loc1 = p1, 1);
01084   } while
01085         (*p1++);
01086 
01087   /* If got here, didn't match either way */
01088   return(FALSE);
01089 }
01090 
01091 static int advance(lp, ep)
01092 /* Attempt to advance match pointer by one pattern element */
01093 register char *lp;              /* source (linebuf) ptr */
01094 register char *ep;              /* regular expression element ptr */
01095 {
01096   register char *curlp;         /* save ptr for closures */
01097   char c;                       /* scratch character holder */
01098   char *bbeg;
01099   int ct;
01100 
01101   for (;;) switch (*ep++) {
01102             case CCHR:          /* literal character */
01103                 if (*ep++ == *lp++)     /* if chars are equal */
01104                         continue;       /* matched */
01105                 return(FALSE);  /* else return false */
01106 
01107             case CDOT:          /* anything but newline */
01108                 if (*lp++)      /* first NUL is at EOL */
01109                         continue;       /* keep going if didn't find */
01110                 return(FALSE);  /* else return false */
01111 
01112             case CNL:           /* start-of-line */
01113             case CDOL:          /* end-of-line */
01114                 if (*lp == 0)   /* found that first NUL? */
01115                         continue;       /* yes, keep going */
01116                 return(FALSE);  /* else return false */
01117 
01118             case CEOF:          /* end-of-address mark */
01119                 loc2 = lp;      /* set second loc */
01120                 return(TRUE);   /* return true */
01121 
01122             case CCL:           /* a closure */
01123                 c = *lp++ & 0177;
01124                 if (ep[c >> 3] & bits[c & 07]) {        /* is char in set? */
01125                         ep += 16;       /* then skip rest of bitmask */
01126                         continue;       /* and keep going */
01127                 }
01128                 return(FALSE);  /* else return false */
01129 
01130             case CBRA:          /* start of tagged pattern */
01131                 brastart[*ep++] = lp;   /* mark it */
01132                 continue;       /* and go */
01133 
01134             case CKET:          /* end of tagged pattern */
01135                 bracend[*ep++] = lp;    /* mark it */
01136                 continue;       /* and go */
01137 
01138             case CBACK:
01139                 bbeg = brastart[*ep];
01140                 ct = bracend[*ep++] - bbeg;
01141 
01142                 if (Memcmp(bbeg, lp, ct)) {
01143                         lp += ct;
01144                         continue;
01145                 }
01146                 return(FALSE);
01147 
01148             case CBACK | STAR:
01149                 bbeg = brastart[*ep];
01150                 ct = bracend[*ep++] - bbeg;
01151                 curlp = lp;
01152                 while (Memcmp(bbeg, lp, ct)) lp += ct;
01153 
01154                 while (lp >= curlp) {
01155                         if (advance(lp, ep)) return(TRUE);
01156                         lp -= ct;
01157                 }
01158                 return(FALSE);
01159 
01160 
01161             case CDOT | STAR:   /* match .* */
01162                 curlp = lp;     /* save closure start loc */
01163                 while (*lp++);  /* match anything */
01164                 goto star;      /* now look for followers */
01165 
01166             case CCHR | STAR:   /* match <literal char>* */
01167                 curlp = lp;     /* save closure start loc */
01168                 while (*lp++ == *ep);   /* match many of that char */
01169                 ep++;           /* to start of next element */
01170                 goto star;      /* match it and followers */
01171 
01172             case CCL | STAR:    /* match [...]* */
01173                 curlp = lp;     /* save closure start loc */
01174                 do {
01175                         c = *lp++ & 0x7F;       /* match any in set */
01176                 } while
01177                         (ep[c >> 3] & bits[c & 07]);
01178                 ep += 16;       /* skip past the set */
01179                 goto star;      /* match followers */
01180 
01181   star:                         /* the recursion part of a * or + match */
01182                 if (--lp == curlp)      /* 0 matches */
01183                         continue;
01184 
01185                 if (*ep == CCHR) {
01186                         c = ep[1];
01187                         do {
01188                                 if (*lp != c) continue;
01189                                 if (advance(lp, ep)) return (TRUE);
01190                         } while
01191                                 (lp-- > curlp);
01192                         return(FALSE);
01193                 }
01194                 if (*ep == CBACK) {
01195                         c = *(brastart[ep[1]]);
01196                         do {
01197                                 if (*lp != c) continue;
01198                                 if (advance(lp, ep)) return (TRUE);
01199                         } while
01200                                 (lp-- > curlp);
01201                         return(FALSE);
01202                 }
01203                 do {
01204                         if (lp == locs) break;
01205                         if (advance(lp, ep)) return (TRUE);
01206                 } while
01207                         (lp-- > curlp);
01208                 return(FALSE);
01209 
01210             default:
01211                 fprintf(stderr, "sed: RE error, %o\n", *--ep);
01212                 quit(2);
01213         }
01214 }
01215 
01216 static int substitute(ipc)
01217 /* Perform s command */
01218 sedcmd *ipc;                    /* ptr to s command struct */
01219 {
01220   int nullmatch;
01221 
01222   if (match(ipc->u.lhs, 0)) {   /* if no match */
01223         nullmatch = (loc1 == loc2);
01224         dosub(ipc->rhs);        /* perform it once */
01225   } else
01226         return(FALSE);          /* command fails */
01227 
01228   if (ipc->flags.global)        /* if global flag enabled */
01229         while (*loc2) {         /* cycle through possibles */
01230                 if (nullmatch) loc2++;
01231                 if (match(ipc->u.lhs, 1)) {     /* found another */
01232                         nullmatch = (loc1 == loc2);
01233                         dosub(ipc->rhs);        /* so substitute */
01234                 } else          /* otherwise, */
01235                         break;  /* we're done */
01236         }
01237   return(TRUE);                 /* we succeeded */
01238 }
01239 
01240 static void dosub(rhsbuf)       /* uses linebuf, genbuf, spend */
01241  /* Generate substituted right-hand side (of s command) */
01242 char *rhsbuf;                   /* where to put the result */
01243 {
01244   register char *lp, *sp, *rp;
01245   int c;
01246 
01247   /* Copy linebuf to genbuf up to location  1 */
01248   lp = linebuf;
01249   sp = genbuf;
01250   while (lp < loc1) *sp++ = *lp++;
01251 
01252   for (rp = rhsbuf; c = *rp++;) {
01253         if (c == '&') {
01254                 sp = place(sp, loc1, loc2);
01255                 continue;
01256         } else if (c & 0200 && (c &= 0177) >= '1' && c < MAXTAGS + '1') {
01257                 sp = place(sp, brastart[c - '1'], bracend[c - '1']);
01258                 continue;
01259         }
01260         *sp++ = c & 0177;
01261         if (sp >= genbuf + MAXBUF) fprintf(stderr, LTLMSG);
01262   }
01263   lp = loc2;
01264   loc2 = sp - genbuf + linebuf;
01265   while (*sp++ = *lp++)
01266         if (sp >= genbuf + MAXBUF) fprintf(stderr, LTLMSG);
01267   lp = linebuf;
01268   sp = genbuf;
01269   while (*lp++ = *sp++);
01270   spend = lp - 1;
01271 }
01272 
01273 static char *place(asp, al1, al2)       /* uses genbuf */
01274  /* Place chars at *al1...*(al1 - 1) at asp... in genbuf[] */
01275 register char *asp, *al1, *al2;
01276 {
01277   while (al1 < al2) {
01278         *asp++ = *al1++;
01279         if (asp >= genbuf + MAXBUF) fprintf(stderr, LTLMSG);
01280   }
01281   return(asp);
01282 }
01283 
01284 static void listto(p1, fp)
01285 /* Write a hex dump expansion of *p1... to fp */
01286 register char *p1;              /* the source */
01287 FILE *fp;                       /* output stream to write to */
01288 {
01289   p1--;
01290   while (*p1++)
01291         if (isprint(*p1))
01292                 putc(*p1, fp);  /* pass it through */
01293         else {
01294                 putc('\\', fp); /* emit a backslash */
01295                 switch (*p1) {
01296                     case '\b':
01297                         putc('b', fp);
01298                         break;  /* BS */
01299                     case '\t':
01300                         putc('t', fp);
01301                         break;  /* TAB */
01302                     case '\n':
01303                         putc('n', fp);
01304                         break;  /* NL */
01305                     case '\r':
01306                         putc('r', fp);
01307                         break;  /* CR */
01308                     case '\33':
01309                         putc('e', fp);
01310                         break;  /* ESC */
01311                     default:
01312                         fprintf(fp, "%02x", *p1 & 0xFF);
01313                 }
01314         }
01315   putc('\n', fp);
01316 }
01317 
01318 static void truncated(h)
01319 int h;
01320 {
01321   static long last = 0L;
01322 
01323   if (lnum == last) return;
01324   last = lnum;
01325 
01326   fprintf(stderr, "sed: ");
01327   fprintf(stderr, h ? "hold space" : "line %ld", lnum);
01328   fprintf(stderr, " truncated to %d characters\n", MAXBUF);
01329 }
01330 
01331 static void command(ipc)
01332 /* Execute compiled command pointed at by ipc */
01333 sedcmd *ipc;
01334 {
01335   static char holdsp[MAXHOLD + 1];      /* the hold space */
01336   static char *hspend = holdsp; /* hold space end pointer */
01337   register char *p1, *p2;
01338   char *execp;
01339   int didsub;                   /* true if last s succeeded */
01340 
01341   switch (ipc->command) {
01342       case ACMD:                /* append */
01343         *aptr++ = ipc;
01344         if (aptr >= appends + MAXAPPENDS) fprintf(stderr,
01345                         "sed: too many appends after line %ld\n",
01346                         lnum);
01347         *aptr = 0;
01348         break;
01349 
01350       case CCMD:                /* change pattern space */
01351         delete = TRUE;
01352         if (!ipc->flags.inrange || lastline) printf("%s\n", ipc->u.lhs);
01353         break;
01354 
01355       case DCMD:                /* delete pattern space */
01356         delete++;
01357         break;
01358 
01359       case CDCMD:               /* delete a line in hold space */
01360         p1 = p2 = linebuf;
01361         while (*p1 != '\n')
01362                 if (delete = (*p1++ == 0)) return;
01363         p1++;
01364         while (*p2++ = *p1++) continue;
01365         spend = p2 - 1;
01366         jump++;
01367         break;
01368 
01369       case EQCMD:               /* show current line number */
01370         fprintf(stdout, "%ld\n", lnum);
01371         break;
01372 
01373       case GCMD:                /* copy hold space to pattern space */
01374         p1 = linebuf;
01375         p2 = holdsp;
01376         while (*p1++ = *p2++);
01377         spend = p1 - 1;
01378         break;
01379 
01380       case CGCMD:               /* append hold space to pattern space */
01381         *spend++ = '\n';
01382         p1 = spend;
01383         p2 = holdsp;
01384         do
01385                 if (p1 > linebuf + MAXBUF) {
01386                         truncated(0);
01387                         p1[-1] = 0;
01388                         break;
01389                 }
01390         while (*p1++ = *p2++);
01391 
01392         spend = p1 - 1;
01393         break;
01394 
01395       case HCMD:                /* copy pattern space to hold space */
01396         p1 = holdsp;
01397         p2 = linebuf;
01398         while (*p1++ = *p2++);
01399         hspend = p1 - 1;
01400         break;
01401 
01402       case CHCMD:               /* append pattern space to hold space */
01403         *hspend++ = '\n';
01404         p1 = hspend;
01405         p2 = linebuf;
01406         do
01407                 if (p1 > holdsp + MAXBUF) {
01408                         truncated(1);
01409                         p1[-1] = 0;
01410                         break;
01411                 }
01412         while (*p1++ = *p2++);
01413 
01414         hspend = p1 - 1;
01415         break;
01416 
01417       case ICMD:                /* insert text */
01418         printf("%s\n", ipc->u.lhs);
01419         break;
01420 
01421       case BCMD:                /* branch to label */
01422         jump = TRUE;
01423         break;
01424 
01425       case LCMD:                /* list text */
01426         listto(linebuf, (ipc->fout != NULL) ? ipc->fout : stdout);
01427         break;
01428 
01429       case NCMD:                /* read next line into pattern space */
01430         if (!nflag) puts(linebuf);      /* flush out the current line */
01431         if (aptr > appends) readout();  /* do pending a, r commands */
01432         if ((execp = getline(linebuf)) == BAD) {
01433                 delete = TRUE;
01434                 break;
01435         }
01436         spend = execp;
01437         anysub = FALSE;
01438         break;
01439 
01440       case CNCMD:               /* append next line to pattern space */
01441         if (aptr > appends) readout();
01442         *spend++ = '\n';
01443         if ((execp = getline(spend)) == BAD) {
01444                 *--spend = 0;
01445                 break;
01446         }
01447         spend = execp;
01448         anysub = FALSE;
01449         break;
01450 
01451       case PCMD:                /* print pattern space */
01452         puts(linebuf);
01453         break;
01454 
01455       case CPCMD:               /* print one line from pattern space */
01456 cpcom:                          /* so s command can jump here */
01457         for (p1 = linebuf; *p1 != '\n' && *p1 != '\0';) putc(*p1++, stdout);
01458         putc('\n', stdout);
01459         break;
01460 
01461       case QCMD:                /* quit the stream editor */
01462         if (!nflag) puts(linebuf);      /* flush out the current line */
01463         if (aptr > appends)
01464                 readout();      /* do any pending a and r commands */
01465         quit(0);
01466 
01467       case RCMD:                /* read a file into the stream */
01468         *aptr++ = ipc;
01469         if (aptr >= appends + MAXAPPENDS) fprintf(stderr,
01470                         "sed: too many reads after line %ld\n",
01471                         lnum);
01472         *aptr = 0;
01473         break;
01474 
01475       case SCMD:                /* substitute RE */
01476         didsub = substitute(ipc);
01477         if (didsub) anysub = TRUE;
01478         if (ipc->flags.print && didsub)
01479                 if (ipc->flags.print == TRUE)
01480                         puts(linebuf);
01481                 else
01482                         goto cpcom;
01483         if (didsub && ipc->fout) fprintf(ipc->fout, "%s\n", linebuf);
01484         break;
01485 
01486       case TCMD:                /* branch on any s successful */
01487       case CTCMD:               /* branch on any s failed */
01488         if (anysub == (ipc->command == CTCMD))
01489                 break;          /* no branch if any s failed, else */
01490         anysub = FALSE;
01491         jump = TRUE;            /* set up to jump to assoc'd label */
01492         break;
01493 
01494       case CWCMD:               /* write one line from pattern space */
01495         for (p1 = linebuf; *p1 != '\n' && *p1 != '\0';)
01496                 putc(*p1++, ipc->fout);
01497         putc('\n', ipc->fout);
01498         break;
01499 
01500       case WCMD:                /* write pattern space to file */
01501         fprintf(ipc->fout, "%s\n", linebuf);
01502         break;
01503 
01504       case XCMD:                /* exchange pattern and hold spaces */
01505         p1 = linebuf;
01506         p2 = genbuf;
01507         while (*p2++ = *p1++) continue;
01508         p1 = holdsp;
01509         p2 = linebuf;
01510         while (*p2++ = *p1++) continue;
01511         spend = p2 - 1;
01512         p1 = genbuf;
01513         p2 = holdsp;
01514         while (*p2++ = *p1++) continue;
01515         hspend = p2 - 1;
01516         break;
01517 
01518       case YCMD:
01519         p1 = linebuf;
01520         p2 = ipc->u.lhs;
01521         while (*p1 = p2[*p1]) p1++;
01522         break;
01523   }
01524 }
01525 
01526 static void openfile(file)
01527 char *file;
01528 /* Replace stdin by given file */
01529 {
01530   if (freopen(file, "r", stdin) == NULL) {
01531         fprintf(stderr, "sed: can't open %s\n", file);
01532         quit(1);
01533   }
01534 }
01535 
01536 static int c;                   /* Will be the next char to read, a kind of
01537                          * lookahead */
01538 
01539 static void get()
01540 /* Read next character into c treating all argument files as run through cat */
01541 {
01542   while ((c = getchar()) == EOF && --eargc >= 0) openfile(*eargv++);
01543 }
01544 
01545 static void initget()
01546 /* Initialise character input */
01547 {
01548   if (--eargc >= 0) openfile(*eargv++); /* else input == stdin */
01549   get();
01550 }
01551 
01552 static char *getline(buf)
01553 /* Get next line of text to be edited, return pointer to end */
01554 register char *buf;             /* where to send the input */
01555 {
01556   if (c == EOF) return BAD;
01557 
01558   lnum++;                       /* we can read a new line */
01559 
01560   do {
01561         if (c == '\n') {
01562                 get();
01563                 break;
01564         }
01565         if (buf <= linebuf + MAXBUF) *buf++ = c;
01566         get();
01567   } while (c != EOF);
01568 
01569   if (c == EOF) lastline = TRUE;
01570 
01571   if (buf > linebuf + MAXBUF) {
01572         truncated(0);
01573         --buf;
01574   }
01575   *buf = 0;
01576   return buf;
01577 }
01578 
01579 static int Memcmp(a, b, count)
01580 /* Return TRUE if *a... == *b... for count chars, FALSE otherwise */
01581 register char *a, *b;
01582 int count;
01583 {
01584   while (count--)               /* look at count characters */
01585         if (*a++ != *b++)       /* if any are nonequal   */
01586                 return(FALSE);  /* return FALSE for false */
01587   return(TRUE);                 /* compare succeeded */
01588 }
01589 
01590 static void readout()
01591 /* Write file indicated by r command to output */
01592 {
01593   register int t;               /* hold input char or EOF */
01594   FILE *fi;                     /* ptr to file to be read */
01595 
01596   aptr = appends - 1;           /* arrange for pre-increment to work right */
01597   while (*++aptr)
01598         if ((*aptr)->command == ACMD)   /* process "a" cmd */
01599                 printf("%s\n", (*aptr)->u.lhs);
01600         else {                  /* process "r" cmd */
01601                 if ((fi = fopen((*aptr)->u.lhs, "r")) == NULL) {
01602                         fprintf(stderr, "sed: can't open %s\n",
01603                                 (*aptr)->u.lhs);
01604                         continue;
01605                 }
01606                 while ((t = getc(fi)) != EOF) putc((char) t, stdout);
01607                 fclose(fi);
01608         }
01609   aptr = appends;               /* reset the append ptr */
01610   *aptr = 0;
01611 }
01612 
01613 /* Sedexec.c ends here */

Generated on Fri Apr 14 22:57:11 2006 for minix by  doxygen 1.4.6