00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include <stdio.h>
00017 #include <ctype.h>
00018 #include "awk.h"
00019
00020 extern char *srcprg;
00021 extern FILE *pfp;
00022
00023 int sym;
00024 int sym1;
00025 int regexflg;
00026 int funflg;
00027 int printflg;
00028 int getlineflg;
00029 char text[BUFSIZ];
00030 char line[BUFSIZ];
00031 char *linep = line;
00032 char funnam[128];
00033 int lineno = 1;
00034
00035 lex()
00036 {
00037 int c, d;
00038 char *s;
00039
00040 if (regexflg)
00041 return sym = scanreg();
00042 next:
00043 while ((c = Getc()) == ' ' || c == '\t')
00044 ;
00045 while (c == '#')
00046 for (c = Getc(); c != '\n'; c = Getc())
00047 ;
00048 switch (c) {
00049 case '\\':
00050 if ((c = Getc()) == '\n') {
00051 lineno++;
00052 goto next;
00053 }
00054 break;
00055 case '\n':
00056 lineno++;
00057 break;
00058 }
00059 switch (c) {
00060 case EOF: return sym = 0;
00061 case '+': return sym = follow2('=', '+', ADDEQ, INC, ADD);
00062 case '-': return sym = follow2('=', '-', SUBEQ, DEC, SUB);
00063 case '*': return sym = follow('=', MULTEQ, MULT);
00064 case '/': return sym = follow('=', DIVEQ, DIV);
00065 case '%': return sym = follow('=', MODEQ, MOD);
00066 case '^': return sym = follow('=', POWEQ, POWER);
00067 case '=': return sym = follow('=', EQ, ASSIGN);
00068 case '!': return sym = follow2('=', '~', NE, NOMATCH, NOT);
00069 case '&': return sym = follow('&', AND, BINAND);
00070 case '|': sym = follow('|', OR, BINOR);
00071 if (printflg && sym == BINOR)
00072 sym = R_POUT;
00073 return sym;
00074 case '<': sym = follow2('=', '<', LE, SHIFTL, LT);
00075 if (getlineflg && sym == LT)
00076 sym = R_IN;
00077 return sym;
00078 case '>': sym = follow2('=', '>', GE, SHIFTR, GT);
00079 if (printflg) {
00080 switch (sym) {
00081 case GT: sym = R_OUT; break;
00082 case SHIFTR: sym = R_APD; break;
00083 }
00084 }
00085 return sym;
00086 case '~': return sym = MATCH; break;
00087 case ';': case '\n': return sym = EOL;
00088 }
00089 if (isalpha(c) || c == '_') {
00090 for (s = text; isalnum(c) || c == '_'; ) {
00091 *s++ = c; c = Getc();
00092 }
00093 Ungetc(c);
00094 *s = '\0';
00095 if ((d = iskeywd(text)) == 0 &&
00096 (d = isbuiltin(text, &sym1)) == 0) {
00097 if (c == '(')
00098 return sym = CALL;
00099 else if (funflg) {
00100 if ((sym1 = isarg(text)) != -1)
00101 return sym = ARG;
00102 }
00103 }
00104 return sym = d ? d : IDENT;
00105 }
00106 else if (c == '.' || (isdigit(c))) {
00107 Ungetc(c);
00108 return sym = scannum(text);
00109 }
00110 else if (c == '"')
00111 return sym = scanstr(text);
00112 return sym = c;
00113 }
00114
00115 static
00116 follow(c1, r1, r2)
00117 {
00118 register int c;
00119
00120 if ((c = Getc()) == c1)
00121 return r1;
00122 else {
00123 Ungetc(c);
00124 return r2;
00125 }
00126 }
00127
00128 static
00129 follow2(c1, c2, r1, r2, r3)
00130 {
00131 register int c;
00132
00133 if ((c = Getc()) == c1)
00134 return r1;
00135 else if (c == c2)
00136 return r2;
00137 else {
00138 Ungetc(c);
00139 return r3;
00140 }
00141 }
00142
00143 static
00144 iskeywd(s) char *s;
00145 {
00146 static struct { char *kw; int token; } tab[] = {
00147 "BEGIN", BEGIN,
00148 "END", END,
00149 "break", BREAK,
00150 "continue", CONTIN,
00151 "delete", DELETE,
00152 "do", DO,
00153 "else", ELSE,
00154 "exit", EXIT,
00155 "for", FOR,
00156 "func", FUNC,
00157 "function", FUNC,
00158 "getline", GETLINE,
00159 "if", IF,
00160 "in", IN,
00161 "next", NEXT,
00162 "print", PRINT,
00163 "printf", PRINTF,
00164 "return", RETURN,
00165 "sprint", SPRINT,
00166 "sprintf", SPRINTF,
00167 "while", WHILE,
00168 "", 0, 0
00169 };
00170 register int i;
00171
00172 for (i = 0; tab[i].token; i++)
00173 if (strcmp(tab[i].kw, s) == 0)
00174 break;
00175 return tab[i].token;
00176 }
00177
00178 static
00179 isbuiltin(s, p) char *s; int *p;
00180 {
00181 static struct { char *kw; int type; int token; } tab[] = {
00182 "atan2", MATHFUN, ATAN2,
00183 "close", STRFUN, CLOSE,
00184 "cos", MATHFUN, COS,
00185 "exp", MATHFUN, EXP,
00186 "gsub", SUBST, RGSUB,
00187 "index", STRFUN, INDEX,
00188 "int", MATHFUN, INT,
00189 "length", STRFUN, LENGTH,
00190 "log", MATHFUN, LOG,
00191 "match", STRFUN, RMATCH,
00192 "sin", MATHFUN, SIN,
00193 "sqrt", MATHFUN, SQRT,
00194 "rand", MATHFUN, RAND,
00195 "srand", MATHFUN, SRAND,
00196 "split", STRFUN, SPLIT,
00197 "sub", SUBST, RSUB,
00198 "substr", STRFUN, SUBSTR,
00199 "system", STRFUN, SYSTEM,
00200 "", 0, 0
00201 };
00202 register int i;
00203
00204 for (i = 0; tab[i].token; i++)
00205 if (strcmp(tab[i].kw, s) == 0)
00206 break;
00207 *p = tab[i].token;
00208 return tab[i].type;
00209 }
00210
00211 static
00212 scannum(s) char *s;
00213 {
00214 register int c;
00215 char *strchr();
00216
00217 if ((c = Getc()) && strchr("+-", c) != NULL) {
00218 *s++ = c; c = Getc();
00219 }
00220 while (isdigit(c)) {
00221 *s++ = c; c = Getc();
00222 }
00223 if (c == '.') {
00224 *s++ = c; c = Getc();
00225 while (isdigit(c)) {
00226 *s++ = c; c = Getc();
00227 }
00228 }
00229 if (c && strchr("eE", c) != NULL) {
00230 *s++ = c; c = Getc();
00231 if (c && strchr("+-", c) != NULL) {
00232 *s++ = c; c = Getc();
00233 }
00234 while (isdigit(c)) {
00235 *s++ = c; c = Getc();
00236 }
00237 }
00238 *s = '\0';
00239 Ungetc(c);
00240 return NUMBER;
00241 }
00242
00243 static
00244 scanstr(s) char *s;
00245 {
00246 register int c, i, j;
00247
00248 for (c = Getc(); c != EOF & c != '"'; ) {
00249 if (c == '\\') {
00250 switch (c = Getc()) {
00251 case 'b': c = '\b'; break;
00252 case 'f': c = '\f'; break;
00253 case 'n': c = '\n'; break;
00254 case 'r': c = '\r'; break;
00255 case 't': c = '\t'; break;
00256 default:
00257 if (isdigit(c)) {
00258 for (i = j = 0; i < 3 && isdigit(c); c = Getc(), i++)
00259 j = j * 8 + c - '0';
00260 Ungetc(c);
00261 c = j;
00262 }
00263 break;
00264 }
00265 }
00266 *s++ = c;
00267 if (isKanji(c))
00268 *s++ = Getc();
00269 c = Getc();
00270 }
00271 *s = '\0';
00272 return STRING;
00273 }
00274
00275 static
00276 scanreg()
00277 {
00278 register int c;
00279 register char *s;
00280
00281 for (s = text; (c = Getc()) != '/'; )
00282 if (c == '\n')
00283 error("newline in regular expression");
00284 else {
00285 if (isKanji(c) || c == '\\') {
00286 *s++ = c; c = Getc();
00287 }
00288 *s++ = c;
00289 }
00290 *s = '\0';
00291 return REGEXP;
00292 }
00293
00294 static int c0;
00295
00296 Ungetc(c)
00297 {
00298 c0 = c;
00299
00300 if (linep > line) {
00301 if (--linep < line)
00302 linep == line + BUFSIZ - 1;
00303 }
00304 }
00305
00306 Getc()
00307 {
00308 register int c;
00309 char *s, *t;
00310
00311 if (c0) {
00312 c = c0; c0 = 0;
00313 }
00314 else if (srcprg)
00315 c = *srcprg ? *srcprg++ : EOF;
00316 else
00317 c = fgetc(pfp);
00318
00319 #if 0
00320 if (linep - line == BUFSIZ) {
00321 printf("!!!\n");
00322 for (s = line; *s != '\n' && ((s - line) <BUFSIZ); s++)
00323 ;
00324 printf("***(%d)***\n", *s);
00325 for (t = line; s < linep; )
00326 *t++ = *++s;
00327 }
00328 #endif
00329 *linep++ = c;
00330 if ((linep - line) == BUFSIZ)
00331 linep = line;
00332 return c;
00333 }