l.c

Go to the documentation of this file.
00001 /*
00002  * a small awk clone
00003  *
00004  * (C) 1989 Saeko Hirabauashi & Kouichi Hirabayashi
00005  *
00006  * Absolutely no warranty. Use this software with your own risk.
00007  *
00008  * Permission to use, copy, modify and distribute this software for any
00009  * purpose and without fee is hereby granted, provided that the above
00010  * copyright and disclaimer notice.
00011  *
00012  * This program was written to fit into 64K+64K memory of the Minix 1.2.
00013  */
00014 
00015 
00016 #include <stdio.h>
00017 #include <ctype.h>
00018 #include "awk.h"
00019 
00020 extern char *srcprg;    /* inline program */
00021 extern FILE *pfp;       /* program file */
00022 
00023 int sym;        /* lexical token */
00024 int sym1;       /* auxiliary lexical token */
00025 int regexflg;   /* set by parser (y.c) to indicate parsing REGEXPR */
00026 int funflg;     /* set by parser (y.c) to indicate parsing FUNCTION */
00027 int printflg;   /* set by parser (y.c) to indicate parsing PRINT */
00028 int getlineflg; /* set by parser (y.c) to indicate parsing GETLINE */
00029 char text[BUFSIZ];      /* lexical word */
00030 char line[BUFSIZ];      /* program line for error message (ring buffer) */
00031 char *linep = line;     /* line pointer */
00032 char funnam[128];       /* function name for error message */
00033 int lineno = 1;
00034 
00035 lex()
00036 {
00037   int c, d;
00038   char *s;
00039 
00040   if (regexflg)
00041         return sym = scanreg();
00042 next:
00043   while ((c = Getc()) == ' ' || c == '\t')
00044         ;
00045   while (c == '#')
00046         for (c = Getc(); c != '\n'; c = Getc())
00047                 ;
00048   switch (c) {
00049   case '\\':
00050         if ((c = Getc()) == '\n') {
00051                 lineno++;
00052                 goto next;
00053         }
00054         break;
00055   case '\n':
00056         lineno++;
00057         break;
00058   }
00059   switch (c) {
00060   case EOF:     return sym = 0;
00061   case '+':     return sym = follow2('=', '+', ADDEQ, INC, ADD);
00062   case '-':     return sym = follow2('=', '-', SUBEQ, DEC, SUB);
00063   case '*':     return sym = follow('=', MULTEQ, MULT);
00064   case '/':     return sym = follow('=', DIVEQ, DIV);
00065   case '%':     return sym = follow('=', MODEQ, MOD);
00066   case '^':     return sym = follow('=', POWEQ, POWER);
00067   case '=':     return sym = follow('=', EQ, ASSIGN);
00068   case '!':     return sym = follow2('=', '~', NE, NOMATCH, NOT);
00069   case '&':     return sym = follow('&', AND, BINAND);
00070   case '|':     sym = follow('|', OR, BINOR);
00071                 if (printflg && sym == BINOR)
00072                         sym = R_POUT;
00073                 return sym;
00074   case '<':     sym = follow2('=', '<', LE, SHIFTL, LT);
00075                 if (getlineflg && sym == LT)
00076                         sym = R_IN;
00077                 return sym;
00078   case '>':     sym = follow2('=', '>', GE, SHIFTR, GT);
00079                 if (printflg) {
00080                         switch (sym) {
00081                         case GT: sym = R_OUT; break;
00082                         case SHIFTR: sym = R_APD; break;
00083                         }
00084                 }
00085                 return sym;
00086   case '~':     return sym = MATCH; break;
00087   case ';': case '\n':  return sym = EOL;
00088   }
00089   if (isalpha(c) || c == '_') {
00090         for (s = text; isalnum(c) || c == '_'; ) {
00091                 *s++ = c; c = Getc();
00092         }
00093         Ungetc(c);
00094         *s = '\0';
00095         if ((d = iskeywd(text)) == 0 &&
00096                 (d = isbuiltin(text, &sym1)) == 0) {
00097                         if (c == '(')
00098                                 return sym = CALL;
00099                         else if (funflg) {
00100                                 if ((sym1 = isarg(text)) != -1)
00101                                         return sym = ARG;
00102                         }
00103         }
00104         return sym = d ? d : IDENT;
00105   }
00106   else if (c == '.' || (isdigit(c))) {
00107         Ungetc(c);
00108         return sym = scannum(text);     /* NUMBER */
00109   }
00110   else if (c == '"')
00111         return sym = scanstr(text);     /* STRING */
00112   return sym = c;
00113 }
00114 
00115 static
00116 follow(c1, r1, r2)
00117 {
00118   register int c;
00119 
00120   if ((c = Getc()) == c1)
00121         return r1;
00122   else {
00123         Ungetc(c);
00124         return r2;
00125   }
00126 }
00127 
00128 static
00129 follow2(c1, c2, r1, r2, r3)
00130 {
00131   register int c;
00132 
00133   if ((c = Getc()) == c1)
00134         return r1;
00135   else if (c == c2)
00136         return r2;
00137   else {
00138         Ungetc(c);
00139         return r3;
00140   }
00141 }
00142 
00143 static
00144 iskeywd(s) char *s;
00145 {
00146   static struct { char *kw; int token; } tab[] = {
00147         "BEGIN", BEGIN,
00148         "END", END,
00149         "break", BREAK,
00150         "continue", CONTIN,
00151         "delete", DELETE,
00152         "do", DO,
00153         "else", ELSE,
00154         "exit", EXIT,
00155         "for", FOR,
00156         "func", FUNC,
00157         "function", FUNC,
00158         "getline", GETLINE,
00159         "if", IF,
00160         "in", IN,
00161         "next", NEXT,
00162         "print", PRINT,
00163         "printf", PRINTF,
00164         "return", RETURN,
00165         "sprint", SPRINT,
00166         "sprintf", SPRINTF,
00167         "while", WHILE,
00168         "", 0, 0
00169   };
00170   register int i;
00171 
00172   for (i = 0; tab[i].token; i++)
00173         if (strcmp(tab[i].kw, s) == 0)
00174                 break;
00175   return tab[i].token;
00176 }
00177 
00178 static
00179 isbuiltin(s, p) char *s; int *p;
00180 {
00181   static struct { char *kw; int type; int token; } tab[] = {
00182         "atan2", MATHFUN, ATAN2,
00183         "close", STRFUN, CLOSE,
00184         "cos", MATHFUN, COS,
00185         "exp", MATHFUN, EXP,
00186         "gsub", SUBST, RGSUB,
00187         "index", STRFUN, INDEX,
00188         "int", MATHFUN, INT,
00189         "length", STRFUN, LENGTH,
00190         "log", MATHFUN, LOG,
00191         "match", STRFUN, RMATCH,
00192         "sin", MATHFUN, SIN,
00193         "sqrt", MATHFUN, SQRT,
00194         "rand", MATHFUN, RAND,
00195         "srand", MATHFUN, SRAND,
00196         "split", STRFUN, SPLIT,
00197         "sub", SUBST, RSUB,
00198         "substr", STRFUN, SUBSTR,
00199         "system", STRFUN, SYSTEM,
00200         "", 0, 0
00201   };
00202   register int i;
00203 
00204   for (i = 0; tab[i].token; i++)
00205         if (strcmp(tab[i].kw, s) == 0)
00206                 break;
00207   *p = tab[i].token;
00208   return tab[i].type;
00209 }
00210 
00211 static
00212 scannum(s) char *s;
00213 {
00214   register int c;
00215   char *strchr();
00216 
00217   if ((c = Getc()) && strchr("+-", c) != NULL) {
00218         *s++ = c; c = Getc();
00219   }
00220   while (isdigit(c)) {
00221         *s++ = c; c = Getc();
00222   }
00223   if (c == '.') {
00224         *s++ = c; c = Getc();
00225         while (isdigit(c)) {
00226                 *s++ = c; c = Getc();
00227         }
00228   }
00229   if (c && strchr("eE", c) != NULL) {
00230         *s++ = c; c = Getc();
00231         if (c && strchr("+-", c) != NULL) {
00232                 *s++ = c; c = Getc();
00233         }
00234         while (isdigit(c)) {
00235                 *s++ = c; c = Getc();
00236         }
00237   }
00238   *s = '\0';
00239   Ungetc(c);
00240   return NUMBER;
00241 }
00242 
00243 static
00244 scanstr(s) char *s;
00245 {
00246   register int c, i, j;
00247 
00248   for (c = Getc(); c != EOF & c != '"'; ) {
00249         if (c == '\\') {
00250                 switch (c = Getc()) {
00251                 case 'b': c = '\b'; break;
00252                 case 'f': c = '\f'; break;
00253                 case 'n': c = '\n'; break;
00254                 case 'r': c = '\r'; break;
00255                 case 't': c = '\t'; break;
00256                 default:
00257                 if (isdigit(c)) {
00258                         for (i = j = 0; i < 3 && isdigit(c); c = Getc(), i++)
00259                                 j = j * 8 + c - '0';
00260                         Ungetc(c);
00261                         c = j;
00262                 }
00263                 break;
00264                 }
00265         }
00266         *s++ = c;
00267         if (isKanji(c))
00268                 *s++ = Getc();
00269         c = Getc();
00270   }
00271   *s = '\0';
00272   return STRING;
00273 }
00274 
00275 static
00276 scanreg()
00277 {
00278   register int c;
00279   register char *s;
00280 
00281   for (s = text; (c = Getc()) != '/'; )
00282         if (c == '\n')
00283                 error("newline in regular expression");
00284         else {
00285                 if (isKanji(c) || c == '\\') {
00286                         *s++ = c; c = Getc();
00287                 }
00288                 *s++ = c;
00289         }
00290   *s = '\0';
00291   return REGEXP;
00292 }
00293 
00294 static int c0;
00295 
00296 Ungetc(c)
00297 {
00298   c0 = c;
00299 
00300   if (linep > line) {
00301         if (--linep < line)
00302                 linep == line + BUFSIZ - 1;
00303   }
00304 }
00305 
00306 Getc()
00307 {
00308   register int c;
00309   char *s, *t;
00310 
00311   if (c0) {
00312         c = c0; c0 = 0;
00313   }     
00314   else if (srcprg)
00315         c = *srcprg ? *srcprg++ : EOF;
00316   else
00317         c = fgetc(pfp);
00318 
00319 #if 0
00320   if (linep - line == BUFSIZ) {
00321 printf("!!!\n");
00322         for (s = line; *s != '\n' && ((s - line) <BUFSIZ); s++)
00323                 ;
00324 printf("***(%d)***\n", *s);
00325         for (t = line; s < linep; )
00326                 *t++ = *++s;
00327   }
00328 #endif
00329   *linep++ = c;
00330   if ((linep - line) == BUFSIZ)
00331         linep = line;
00332   return c;
00333 }

Generated on Fri Apr 14 22:56:41 2006 for minix by  doxygen 1.4.6