reader.c

Go to the documentation of this file.
00001 #include "defs.h"
00002 
00003 /*  The line size must be a positive integer.  One hundred was chosen   */
00004 /*  because few lines in Yacc input grammars exceed 100 characters.     */
00005 /*  Note that if a line exceeds LINESIZE characters, the line buffer    */
00006 /*  will be expanded to accomodate it.                                  */
00007 
00008 #define LINESIZE 100
00009 
00010 char *cache;
00011 int cinc, cache_size;
00012 
00013 int ntags, tagmax;
00014 char **tag_table;
00015 
00016 char saw_eof, unionized;
00017 char *cptr, *line;
00018 int linesize;
00019 
00020 bucket *goal;
00021 int prec;
00022 int gensym;
00023 char last_was_action;
00024 
00025 int maxitems;
00026 bucket **pitem;
00027 
00028 int maxrules;
00029 bucket **plhs;
00030 
00031 int name_pool_size;
00032 char *name_pool;
00033 
00034 char line_format[] = "#line %d \"%s\"\n";
00035 
00036 
00037 cachec(c)
00038 int c;
00039 {
00040     assert(cinc >= 0);
00041     if (cinc >= cache_size)
00042     {
00043         cache_size += 256;
00044         cache = REALLOC(cache, cache_size);
00045         if (cache == 0) no_space();
00046     }
00047     cache[cinc] = c;
00048     ++cinc;
00049 }
00050 
00051 
00052 get_line()
00053 {
00054     register FILE *f = input_file;
00055     register int c;
00056     register int i;
00057 
00058     if (saw_eof || (c = getc(f)) == EOF)
00059     {
00060         if (line) { FREE(line); line = 0; }
00061         cptr = 0;
00062         saw_eof = 1;
00063         return;
00064     }
00065 
00066     if (line == 0 || linesize != (LINESIZE + 1))
00067     {
00068         if (line) FREE(line);
00069         linesize = LINESIZE + 1;
00070         line = MALLOC(linesize);
00071         if (line == 0) no_space();
00072     }
00073 
00074     i = 0;
00075     ++lineno;
00076     for (;;)
00077     {
00078         line[i]  =  c;
00079         if (c == '\n') { cptr = line; return; }
00080         if (++i >= linesize)
00081         {
00082             linesize += LINESIZE;
00083             line = REALLOC(line, linesize);
00084             if (line ==  0) no_space();
00085         }
00086         c = getc(f);
00087         if (c ==  EOF)
00088         {
00089             line[i] = '\n';
00090             saw_eof = 1;
00091             cptr = line;
00092             return;
00093         }
00094     }
00095 }
00096 
00097 
00098 char *
00099 dup_line()
00100 {
00101     register char *p, *s, *t;
00102 
00103     if (line == 0) return (0);
00104     s = line;
00105     while (*s != '\n') ++s;
00106     p = MALLOC(s - line + 1);
00107     if (p == 0) no_space();
00108 
00109     s = line;
00110     t = p;
00111     while ((*t++ = *s++) != '\n') continue;
00112     return (p);
00113 }
00114 
00115 
00116 skip_comment()
00117 {
00118     register char *s;
00119 
00120     int st_lineno = lineno;
00121     char *st_line = dup_line();
00122     char *st_cptr = st_line + (cptr - line);
00123 
00124     s = cptr + 2;
00125     for (;;)
00126     {
00127         if (*s == '*' && s[1] == '/')
00128         {
00129             cptr = s + 2;
00130             FREE(st_line);
00131             return;
00132         }
00133         if (*s == '\n')
00134         {
00135             get_line();
00136             if (line == 0)
00137                 unterminated_comment(st_lineno, st_line, st_cptr);
00138             s = cptr;
00139         }
00140         else
00141             ++s;
00142     }
00143 }
00144 
00145 
00146 int
00147 nextc()
00148 {
00149     register char *s;
00150 
00151     if (line == 0)
00152     {
00153         get_line();
00154         if (line == 0)
00155             return (EOF);
00156     }
00157 
00158     s = cptr;
00159     for (;;)
00160     {
00161         switch (*s)
00162         {
00163         case '\n':
00164             get_line();
00165             if (line == 0) return (EOF);
00166             s = cptr;
00167             break;
00168 
00169         case ' ':
00170         case '\t':
00171         case '\f':
00172         case '\r':
00173         case '\v':
00174         case ',':
00175         case ';':
00176             ++s;
00177             break;
00178 
00179         case '\\':
00180             cptr = s;
00181             return ('%');
00182 
00183         case '/':
00184             if (s[1] == '*')
00185             {
00186                 cptr = s;
00187                 skip_comment();
00188                 s = cptr;
00189                 break;
00190             }
00191             else if (s[1] == '/')
00192             {
00193                 get_line();
00194                 if (line == 0) return (EOF);
00195                 s = cptr;
00196                 break;
00197             }
00198             /* fall through */
00199 
00200         default:
00201             cptr = s;
00202             return (*s);
00203         }
00204     }
00205 }
00206 
00207 
00208 int
00209 keyword()
00210 {
00211     register int c;
00212     char *t_cptr = cptr;
00213 
00214     c = *++cptr;
00215     if (isalpha(c))
00216     {
00217         cinc = 0;
00218         for (;;)
00219         {
00220             if (isalpha(c))
00221             {
00222                 if (isupper(c)) c = tolower(c);
00223                 cachec(c);
00224             }
00225             else if (isdigit(c) || c == '_' || c == '.' || c == '$')
00226                 cachec(c);
00227             else
00228                 break;
00229             c = *++cptr;
00230         }
00231         cachec(NUL);
00232 
00233         if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
00234             return (TOKEN);
00235         if (strcmp(cache, "type") == 0)
00236             return (TYPE);
00237         if (strcmp(cache, "left") == 0)
00238             return (LEFT);
00239         if (strcmp(cache, "right") == 0)
00240             return (RIGHT);
00241         if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
00242             return (NONASSOC);
00243         if (strcmp(cache, "start") == 0)
00244             return (START);
00245         if (strcmp(cache, "union") == 0)
00246             return (UNION);
00247         if (strcmp(cache, "ident") == 0)
00248             return (IDENT);
00249     }
00250     else
00251     {
00252         ++cptr;
00253         if (c == '{')
00254             return (TEXT);
00255         if (c == '%' || c == '\\')
00256             return (MARK);
00257         if (c == '<')
00258             return (LEFT);
00259         if (c == '>')
00260             return (RIGHT);
00261         if (c == '0')
00262             return (TOKEN);
00263         if (c == '2')
00264             return (NONASSOC);
00265     }
00266     syntax_error(lineno, line, t_cptr);
00267     /*NOTREACHED*/
00268 }
00269 
00270 
00271 copy_ident()
00272 {
00273     register int c;
00274     register FILE *f = output_file;
00275 
00276     c = nextc();
00277     if (c == EOF) unexpected_EOF();
00278     if (c != '"') syntax_error(lineno, line, cptr);
00279     ++outline;
00280     fprintf(f, "#ident \"");
00281     for (;;)
00282     {
00283         c = *++cptr;
00284         if (c == '\n')
00285         {
00286             fprintf(f, "\"\n");
00287             return;
00288         }
00289         putc(c, f);
00290         if (c == '"')
00291         {
00292             putc('\n', f);
00293             ++cptr;
00294             return;
00295         }
00296     }
00297 }
00298 
00299 
00300 copy_text()
00301 {
00302     register int c;
00303     int quote;
00304     register FILE *f = text_file;
00305     int need_newline = 0;
00306     int t_lineno = lineno;
00307     char *t_line = dup_line();
00308     char *t_cptr = t_line + (cptr - line - 2);
00309 
00310     if (*cptr == '\n')
00311     {
00312         get_line();
00313         if (line == 0)
00314             unterminated_text(t_lineno, t_line, t_cptr);
00315     }
00316     if (!lflag) fprintf(f, line_format, lineno, input_file_name);
00317 
00318 loop:
00319     c = *cptr++;
00320     switch (c)
00321     {
00322     case '\n':
00323     next_line:
00324         putc('\n', f);
00325         need_newline = 0;
00326         get_line();
00327         if (line) goto loop;
00328         unterminated_text(t_lineno, t_line, t_cptr);
00329 
00330     case '\'':
00331     case '"':
00332         {
00333             int s_lineno = lineno;
00334             char *s_line = dup_line();
00335             char *s_cptr = s_line + (cptr - line - 1);
00336 
00337             quote = c;
00338             putc(c, f);
00339             for (;;)
00340             {
00341                 c = *cptr++;
00342                 putc(c, f);
00343                 if (c == quote)
00344                 {
00345                     need_newline = 1;
00346                     FREE(s_line);
00347                     goto loop;
00348                 }
00349                 if (c == '\n')
00350                     unterminated_string(s_lineno, s_line, s_cptr);
00351                 if (c == '\\')
00352                 {
00353                     c = *cptr++;
00354                     putc(c, f);
00355                     if (c == '\n')
00356                     {
00357                         get_line();
00358                         if (line == 0)
00359                             unterminated_string(s_lineno, s_line, s_cptr);
00360                     }
00361                 }
00362             }
00363         }
00364 
00365     case '/':
00366         putc(c, f);
00367         need_newline = 1;
00368         c = *cptr;
00369         if (c == '/')
00370         {
00371             putc('*', f);
00372             while ((c = *++cptr) != '\n')
00373             {
00374                 if (c == '*' && cptr[1] == '/')
00375                     fprintf(f, "* ");
00376                 else
00377                     putc(c, f);
00378             }
00379             fprintf(f, "*/");
00380             goto next_line;
00381         }
00382         if (c == '*')
00383         {
00384             int c_lineno = lineno;
00385             char *c_line = dup_line();
00386             char *c_cptr = c_line + (cptr - line - 1);
00387 
00388             putc('*', f);
00389             ++cptr;
00390             for (;;)
00391             {
00392                 c = *cptr++;
00393                 putc(c, f);
00394                 if (c == '*' && *cptr == '/')
00395                 {
00396                     putc('/', f);
00397                     ++cptr;
00398                     FREE(c_line);
00399                     goto loop;
00400                 }
00401                 if (c == '\n')
00402                 {
00403                     get_line();
00404                     if (line == 0)
00405                         unterminated_comment(c_lineno, c_line, c_cptr);
00406                 }
00407             }
00408         }
00409         need_newline = 1;
00410         goto loop;
00411 
00412     case '%':
00413     case '\\':
00414         if (*cptr == '}')
00415         {
00416             if (need_newline) putc('\n', f);
00417             ++cptr;
00418             FREE(t_line);
00419             return;
00420         }
00421         /* fall through */
00422 
00423     default:
00424         putc(c, f);
00425         need_newline = 1;
00426         goto loop;
00427     }
00428 }
00429 
00430 
00431 copy_union()
00432 {
00433     register int c;
00434     int quote;
00435     int depth;
00436     int u_lineno = lineno;
00437     char *u_line = dup_line();
00438     char *u_cptr = u_line + (cptr - line - 6);
00439 
00440     if (unionized) over_unionized(cptr - 6);
00441     unionized = 1;
00442 
00443     if (!lflag)
00444         fprintf(text_file, line_format, lineno, input_file_name);
00445 
00446     fprintf(text_file, "typedef union");
00447     if (dflag) fprintf(union_file, "typedef union");
00448 
00449     depth = 0;
00450 loop:
00451     c = *cptr++;
00452     putc(c, text_file);
00453     if (dflag) putc(c, union_file);
00454     switch (c)
00455     {
00456     case '\n':
00457     next_line:
00458         get_line();
00459         if (line == 0) unterminated_union(u_lineno, u_line, u_cptr);
00460         goto loop;
00461 
00462     case '{':
00463         ++depth;
00464         goto loop;
00465 
00466     case '}':
00467         if (--depth == 0)
00468         {
00469             fprintf(text_file, " YYSTYPE;\n");
00470             FREE(u_line);
00471             return;
00472         }
00473         goto loop;
00474 
00475     case '\'':
00476     case '"':
00477         {
00478             int s_lineno = lineno;
00479             char *s_line = dup_line();
00480             char *s_cptr = s_line + (cptr - line - 1);
00481 
00482             quote = c;
00483             for (;;)
00484             {
00485                 c = *cptr++;
00486                 putc(c, text_file);
00487                 if (dflag) putc(c, union_file);
00488                 if (c == quote)
00489                 {
00490                     FREE(s_line);
00491                     goto loop;
00492                 }
00493                 if (c == '\n')
00494                     unterminated_string(s_lineno, s_line, s_cptr);
00495                 if (c == '\\')
00496                 {
00497                     c = *cptr++;
00498                     putc(c, text_file);
00499                     if (dflag) putc(c, union_file);
00500                     if (c == '\n')
00501                     {
00502                         get_line();
00503                         if (line == 0)
00504                             unterminated_string(s_lineno, s_line, s_cptr);
00505                     }
00506                 }
00507             }
00508         }
00509 
00510     case '/':
00511         c = *cptr;
00512         if (c == '/')
00513         {
00514             putc('*', text_file);
00515             if (dflag) putc('*', union_file);
00516             while ((c = *++cptr) != '\n')
00517             {
00518                 if (c == '*' && cptr[1] == '/')
00519                 {
00520                     fprintf(text_file, "* ");
00521                     if (dflag) fprintf(union_file, "* ");
00522                 }
00523                 else
00524                 {
00525                     putc(c, text_file);
00526                     if (dflag) putc(c, union_file);
00527                 }
00528             }
00529             fprintf(text_file, "*/\n");
00530             if (dflag) fprintf(union_file, "*/\n");
00531             goto next_line;
00532         }
00533         if (c == '*')
00534         {
00535             int c_lineno = lineno;
00536             char *c_line = dup_line();
00537             char *c_cptr = c_line + (cptr - line - 1);
00538 
00539             putc('*', text_file);
00540             if (dflag) putc('*', union_file);
00541             ++cptr;
00542             for (;;)
00543             {
00544                 c = *cptr++;
00545                 putc(c, text_file);
00546                 if (dflag) putc(c, union_file);
00547                 if (c == '*' && *cptr == '/')
00548                 {
00549                     putc('/', text_file);
00550                     if (dflag) putc('/', union_file);
00551                     ++cptr;
00552                     FREE(c_line);
00553                     goto loop;
00554                 }
00555                 if (c == '\n')
00556                 {
00557                     get_line();
00558                     if (line == 0)
00559                         unterminated_comment(c_lineno, c_line, c_cptr);
00560                 }
00561             }
00562         }
00563         goto loop;
00564 
00565     default:
00566         goto loop;
00567     }
00568 }
00569 
00570 
00571 int
00572 hexval(c)
00573 int c;
00574 {
00575     if (c >= '0' && c <= '9')
00576         return (c - '0');
00577     if (c >= 'A' && c <= 'F')
00578         return (c - 'A' + 10);
00579     if (c >= 'a' && c <= 'f')
00580         return (c - 'a' + 10);
00581     return (-1);
00582 }
00583 
00584 
00585 bucket *
00586 get_literal()
00587 {
00588     register int c, quote;
00589     register int i;
00590     register int n;
00591     register char *s;
00592     register bucket *bp;
00593     int s_lineno = lineno;
00594     char *s_line = dup_line();
00595     char *s_cptr = s_line + (cptr - line);
00596 
00597     quote = *cptr++;
00598     cinc = 0;
00599     for (;;)
00600     {
00601         c = *cptr++;
00602         if (c == quote) break;
00603         if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr);
00604         if (c == '\\')
00605         {
00606             char *c_cptr = cptr - 1;
00607 
00608             c = *cptr++;
00609             switch (c)
00610             {
00611             case '\n':
00612                 get_line();
00613                 if (line == 0) unterminated_string(s_lineno, s_line, s_cptr);
00614                 continue;
00615 
00616             case '0': case '1': case '2': case '3':
00617             case '4': case '5': case '6': case '7':
00618                 n = c - '0';
00619                 c = *cptr;
00620                 if (IS_OCTAL(c))
00621                 {
00622                     n = (n << 3) + (c - '0');
00623                     c = *++cptr;
00624                     if (IS_OCTAL(c))
00625                     {
00626                         n = (n << 3) + (c - '0');
00627                         ++cptr;
00628                     }
00629                 }
00630                 if (n > MAXCHAR) illegal_character(c_cptr);
00631                 c = n;
00632                 break;
00633 
00634             case 'x':
00635                 c = *cptr++;
00636                 n = hexval(c);
00637                 if (n < 0 || n >= 16)
00638                     illegal_character(c_cptr);
00639                 for (;;)
00640                 {
00641                     c = *cptr;
00642                     i = hexval(c);
00643                     if (i < 0 || i >= 16) break;
00644                     ++cptr;
00645                     n = (n << 4) + i;
00646                     if (n > MAXCHAR) illegal_character(c_cptr);
00647                 }
00648                 c = n;
00649                 break;
00650 
00651             case 'a': c = 7; break;
00652             case 'b': c = '\b'; break;
00653             case 'f': c = '\f'; break;
00654             case 'n': c = '\n'; break;
00655             case 'r': c = '\r'; break;
00656             case 't': c = '\t'; break;
00657             case 'v': c = '\v'; break;
00658             }
00659         }
00660         cachec(c);
00661     }
00662     FREE(s_line);
00663 
00664     n = cinc;
00665     s = MALLOC(n);
00666     if (s == 0) no_space();
00667     
00668     for (i = 0; i < n; ++i)
00669         s[i] = cache[i];
00670 
00671     cinc = 0;
00672     if (n == 1)
00673         cachec('\'');
00674     else
00675         cachec('"');
00676 
00677     for (i = 0; i < n; ++i)
00678     {
00679         c = ((unsigned char *)s)[i];
00680         if (c == '\\' || c == cache[0])
00681         {
00682             cachec('\\');
00683             cachec(c);
00684         }
00685         else if (isprint(c))
00686             cachec(c);
00687         else
00688         {
00689             cachec('\\');
00690             switch (c)
00691             {
00692             case 7: cachec('a'); break;
00693             case '\b': cachec('b'); break;
00694             case '\f': cachec('f'); break;
00695             case '\n': cachec('n'); break;
00696             case '\r': cachec('r'); break;
00697             case '\t': cachec('t'); break;
00698             case '\v': cachec('v'); break;
00699             default:
00700                 cachec(((c >> 6) & 7) + '0');
00701                 cachec(((c >> 3) & 7) + '0');
00702                 cachec((c & 7) + '0');
00703                 break;
00704             }
00705         }
00706     }
00707 
00708     if (n == 1)
00709         cachec('\'');
00710     else
00711         cachec('"');
00712 
00713     cachec(NUL);
00714     bp = lookup(cache);
00715     bp->class = TERM;
00716     if (n == 1 && bp->value == UNDEFINED)
00717         bp->value = *(unsigned char *)s;
00718     FREE(s);
00719 
00720     return (bp);
00721 }
00722 
00723 
00724 int
00725 is_reserved(name)
00726 char *name;
00727 {
00728     char *s;
00729 
00730     if (strcmp(name, ".") == 0 ||
00731             strcmp(name, "$accept") == 0 ||
00732             strcmp(name, "$end") == 0)
00733         return (1);
00734 
00735     if (name[0] == '$' && name[1] == '$' && isdigit(name[2]))
00736     {
00737         s = name + 3;
00738         while (isdigit(*s)) ++s;
00739         if (*s == NUL) return (1);
00740     }
00741 
00742     return (0);
00743 }
00744 
00745 
00746 bucket *
00747 get_name()
00748 {
00749     register int c;
00750 
00751     cinc = 0;
00752     for (c = *cptr; IS_IDENT(c); c = *++cptr)
00753         cachec(c);
00754     cachec(NUL);
00755 
00756     if (is_reserved(cache)) used_reserved(cache);
00757 
00758     return (lookup(cache));
00759 }
00760 
00761 
00762 int
00763 get_number()
00764 {
00765     register int c;
00766     register int n;
00767 
00768     n = 0;
00769     for (c = *cptr; isdigit(c); c = *++cptr)
00770         n = 10*n + (c - '0');
00771 
00772     return (n);
00773 }
00774 
00775 
00776 char *
00777 get_tag()
00778 {
00779     register int c;
00780     register int i;
00781     register char *s;
00782     int t_lineno = lineno;
00783     char *t_line = dup_line();
00784     char *t_cptr = t_line + (cptr - line);
00785 
00786     ++cptr;
00787     c = nextc();
00788     if (c == EOF) unexpected_EOF();
00789     if (!isalpha(c) && c != '_' && c != '$')
00790         illegal_tag(t_lineno, t_line, t_cptr);
00791 
00792     cinc = 0;
00793     do { cachec(c); c = *++cptr; } while (IS_IDENT(c));
00794     cachec(NUL);
00795 
00796     c = nextc();
00797     if (c == EOF) unexpected_EOF();
00798     if (c != '>')
00799         illegal_tag(t_lineno, t_line, t_cptr);
00800     ++cptr;
00801 
00802     for (i = 0; i < ntags; ++i)
00803     {
00804         if (strcmp(cache, tag_table[i]) == 0)
00805             return (tag_table[i]);
00806     }
00807 
00808     if (ntags >= tagmax)
00809     {
00810         tagmax += 16;
00811         tag_table = (char **)
00812                         (tag_table ? REALLOC(tag_table, tagmax*sizeof(char *))
00813                                    : MALLOC(tagmax*sizeof(char *)));
00814         if (tag_table == 0) no_space();
00815     }
00816 
00817     s = MALLOC(cinc);
00818     if  (s == 0) no_space();
00819     strcpy(s, cache);
00820     tag_table[ntags] = s;
00821     ++ntags;
00822     FREE(t_line);
00823     return (s);
00824 }
00825 
00826 
00827 declare_tokens(assoc)
00828 int assoc;
00829 {
00830     register int c;
00831     register bucket *bp;
00832     int value;
00833     char *tag = 0;
00834 
00835     if (assoc != TOKEN) ++prec;
00836 
00837     c = nextc();
00838     if (c == EOF) unexpected_EOF();
00839     if (c == '<')
00840     {
00841         tag = get_tag();
00842         c = nextc();
00843         if (c == EOF) unexpected_EOF();
00844     }
00845 
00846     for (;;)
00847     {
00848         if (isalpha(c) || c == '_' || c == '.' || c == '$')
00849             bp = get_name();
00850         else if (c == '\'' || c == '"')
00851             bp = get_literal();
00852         else
00853             return;
00854 
00855         if (bp == goal) tokenized_start(bp->name);
00856         bp->class = TERM;
00857 
00858         if (tag)
00859         {
00860             if (bp->tag && tag != bp->tag)
00861                 retyped_warning(bp->name);
00862             bp->tag = tag;
00863         }
00864 
00865         if (assoc != TOKEN)
00866         {
00867             if (bp->prec && prec != bp->prec)
00868                 reprec_warning(bp->name);
00869             bp->assoc = assoc;
00870             bp->prec = prec;
00871         }
00872 
00873         c = nextc();
00874         if (c == EOF) unexpected_EOF();
00875         value = UNDEFINED;
00876         if (isdigit(c))
00877         {
00878             value = get_number();
00879             if (bp->value != UNDEFINED && value != bp->value)
00880                 revalued_warning(bp->name);
00881             bp->value = value;
00882             c = nextc();
00883             if (c == EOF) unexpected_EOF();
00884         }
00885     }
00886 }
00887 
00888 
00889 declare_types()
00890 {
00891     register int c;
00892     register bucket *bp;
00893     char *tag;
00894 
00895     c = nextc();
00896     if (c == EOF) unexpected_EOF();
00897     if (c != '<') syntax_error(lineno, line, cptr);
00898     tag = get_tag();
00899 
00900     for (;;)
00901     {
00902         c = nextc();
00903         if (isalpha(c) || c == '_' || c == '.' || c == '$')
00904             bp = get_name();
00905         else if (c == '\'' || c == '"')
00906             bp = get_literal();
00907         else
00908             return;
00909 
00910         if (bp->tag && tag != bp->tag)
00911             retyped_warning(bp->name);
00912         bp->tag = tag;
00913     }
00914 }
00915 
00916 
00917 declare_start()
00918 {
00919     register int c;
00920     register bucket *bp;
00921 
00922     c = nextc();
00923     if (c == EOF) unexpected_EOF();
00924     if (!isalpha(c) && c != '_' && c != '.' && c != '$')
00925         syntax_error(lineno, line, cptr);
00926     bp = get_name();
00927     if (bp->class == TERM)
00928         terminal_start(bp->name);
00929     if (goal && goal != bp)
00930         restarted_warning();
00931     goal = bp;
00932 }
00933 
00934 
00935 read_declarations()
00936 {
00937     register int c, k;
00938 
00939     cache_size = 256;
00940     cache = MALLOC(cache_size);
00941     if (cache == 0) no_space();
00942 
00943     for (;;)
00944     {
00945         c = nextc();
00946         if (c == EOF) unexpected_EOF();
00947         if (c != '%') syntax_error(lineno, line, cptr);
00948         switch (k = keyword())
00949         {
00950         case MARK:
00951             return;
00952 
00953         case IDENT:
00954             copy_ident();
00955             break;
00956 
00957         case TEXT:
00958             copy_text();
00959             break;
00960 
00961         case UNION:
00962             copy_union();
00963             break;
00964 
00965         case TOKEN:
00966         case LEFT:
00967         case RIGHT:
00968         case NONASSOC:
00969             declare_tokens(k);
00970             break;
00971 
00972         case TYPE:
00973             declare_types();
00974             break;
00975 
00976         case START:
00977             declare_start();
00978             break;
00979         }
00980     }
00981 }
00982 
00983 
00984 initialize_grammar()
00985 {
00986     nitems = 4;
00987     maxitems = 300;
00988     pitem = (bucket **) MALLOC(maxitems*sizeof(bucket *));
00989     if (pitem == 0) no_space();
00990     pitem[0] = 0;
00991     pitem[1] = 0;
00992     pitem[2] = 0;
00993     pitem[3] = 0;
00994 
00995     nrules = 3;
00996     maxrules = 100;
00997     plhs = (bucket **) MALLOC(maxrules*sizeof(bucket *));
00998     if (plhs == 0) no_space();
00999     plhs[0] = 0;
01000     plhs[1] = 0;
01001     plhs[2] = 0;
01002     rprec = (short *) MALLOC(maxrules*sizeof(short));
01003     if (rprec == 0) no_space();
01004     rprec[0] = 0;
01005     rprec[1] = 0;
01006     rprec[2] = 0;
01007     rassoc = (char *) MALLOC(maxrules*sizeof(char));
01008     if (rassoc == 0) no_space();
01009     rassoc[0] = TOKEN;
01010     rassoc[1] = TOKEN;
01011     rassoc[2] = TOKEN;
01012 }
01013 
01014 
01015 expand_items()
01016 {
01017     maxitems += 300;
01018     pitem = (bucket **) REALLOC(pitem, maxitems*sizeof(bucket *));
01019     if (pitem == 0) no_space();
01020 }
01021 
01022 
01023 expand_rules()
01024 {
01025     maxrules += 100;
01026     plhs = (bucket **) REALLOC(plhs, maxrules*sizeof(bucket *));
01027     if (plhs == 0) no_space();
01028     rprec = (short *) REALLOC(rprec, maxrules*sizeof(short));
01029     if (rprec == 0) no_space();
01030     rassoc = (char *) REALLOC(rassoc, maxrules*sizeof(char));
01031     if (rassoc == 0) no_space();
01032 }
01033 
01034 
01035 advance_to_start()
01036 {
01037     register int c;
01038     register bucket *bp;
01039     char *s_cptr;
01040     int s_lineno;
01041 
01042     for (;;)
01043     {
01044         c = nextc();
01045         if (c != '%') break;
01046         s_cptr = cptr;
01047         switch (keyword())
01048         {
01049         case MARK:
01050             no_grammar();
01051 
01052         case TEXT:
01053             copy_text();
01054             break;
01055 
01056         case START:
01057             declare_start();
01058             break;
01059 
01060         default:
01061             syntax_error(lineno, line, s_cptr);
01062         }
01063     }
01064 
01065     c = nextc();
01066     if (!isalpha(c) && c != '_' && c != '.' && c != '_')
01067         syntax_error(lineno, line, cptr);
01068     bp = get_name();
01069     if (goal == 0)
01070     {
01071         if (bp->class == TERM)
01072             terminal_start(bp->name);
01073         goal = bp;
01074     }
01075 
01076     s_lineno = lineno;
01077     c = nextc();
01078     if (c == EOF) unexpected_EOF();
01079     if (c != ':') syntax_error(lineno, line, cptr);
01080     start_rule(bp, s_lineno);
01081     ++cptr;
01082 }
01083 
01084 
01085 start_rule(bp, s_lineno)
01086 register bucket *bp;
01087 int s_lineno;
01088 {
01089     if (bp->class == TERM)
01090         terminal_lhs(s_lineno);
01091     bp->class = NONTERM;
01092     if (nrules >= maxrules)
01093         expand_rules();
01094     plhs[nrules] = bp;
01095     rprec[nrules] = UNDEFINED;
01096     rassoc[nrules] = TOKEN;
01097 }
01098 
01099 
01100 end_rule()
01101 {
01102     register int i;
01103 
01104     if (!last_was_action && plhs[nrules]->tag)
01105     {
01106         for (i = nitems - 1; pitem[i]; --i) continue;
01107         if (pitem[i+1] == 0 || pitem[i+1]->tag != plhs[nrules]->tag)
01108             default_action_warning();
01109     }
01110 
01111     last_was_action = 0;
01112     if (nitems >= maxitems) expand_items();
01113     pitem[nitems] = 0;
01114     ++nitems;
01115     ++nrules;
01116 }
01117 
01118 
01119 insert_empty_rule()
01120 {
01121     register bucket *bp, **bpp;
01122 
01123     assert(cache);
01124     sprintf(cache, "$$%d", ++gensym);
01125     bp = make_bucket(cache);
01126     last_symbol->next = bp;
01127     last_symbol = bp;
01128     bp->tag = plhs[nrules]->tag;
01129     bp->class = NONTERM;
01130 
01131     if ((nitems += 2) > maxitems)
01132         expand_items();
01133     bpp = pitem + nitems - 1;
01134     *bpp-- = bp;
01135     while (bpp[0] = bpp[-1]) --bpp;
01136 
01137     if (++nrules >= maxrules)
01138         expand_rules();
01139     plhs[nrules] = plhs[nrules-1];
01140     plhs[nrules-1] = bp;
01141     rprec[nrules] = rprec[nrules-1];
01142     rprec[nrules-1] = 0;
01143     rassoc[nrules] = rassoc[nrules-1];
01144     rassoc[nrules-1] = TOKEN;
01145 }
01146 
01147 
01148 add_symbol()
01149 {
01150     register int c;
01151     register bucket *bp;
01152     int s_lineno = lineno;
01153 
01154     c = *cptr;
01155     if (c == '\'' || c == '"')
01156         bp = get_literal();
01157     else
01158         bp = get_name();
01159 
01160     c = nextc();
01161     if (c == ':')
01162     {
01163         end_rule();
01164         start_rule(bp, s_lineno);
01165         ++cptr;
01166         return;
01167     }
01168 
01169     if (last_was_action)
01170         insert_empty_rule();
01171     last_was_action = 0;
01172 
01173     if (++nitems > maxitems)
01174         expand_items();
01175     pitem[nitems-1] = bp;
01176 }
01177 
01178 
01179 copy_action()
01180 {
01181     register int c;
01182     register int i, n;
01183     int depth;
01184     int quote;
01185     char *tag;
01186     register FILE *f = action_file;
01187     int a_lineno = lineno;
01188     char *a_line = dup_line();
01189     char *a_cptr = a_line + (cptr - line);
01190 
01191     if (last_was_action)
01192         insert_empty_rule();
01193     last_was_action = 1;
01194 
01195     fprintf(f, "case %d:\n", nrules - 2);
01196     if (!lflag)
01197         fprintf(f, line_format, lineno, input_file_name);
01198     if (*cptr == '=') ++cptr;
01199 
01200     n = 0;
01201     for (i = nitems - 1; pitem[i]; --i) ++n;
01202 
01203     depth = 0;
01204 loop:
01205     c = *cptr;
01206     if (c == '$')
01207     {
01208         if (cptr[1] == '<')
01209         {
01210             int d_lineno = lineno;
01211             char *d_line = dup_line();
01212             char *d_cptr = d_line + (cptr - line);
01213 
01214             ++cptr;
01215             tag = get_tag();
01216             c = *cptr;
01217             if (c == '$')
01218             {
01219                 fprintf(f, "yyval.%s", tag);
01220                 ++cptr;
01221                 FREE(d_line);
01222                 goto loop;
01223             }
01224             else if (isdigit(c))
01225             {
01226                 i = get_number();
01227                 if (i > n) dollar_warning(d_lineno, i);
01228                 fprintf(f, "yyvsp[%d].%s", i - n, tag);
01229                 FREE(d_line);
01230                 goto loop;
01231             }
01232             else if (c == '-' && isdigit(cptr[1]))
01233             {
01234                 ++cptr;
01235                 i = -get_number() - n;
01236                 fprintf(f, "yyvsp[%d].%s", i, tag);
01237                 FREE(d_line);
01238                 goto loop;
01239             }
01240             else
01241                 dollar_error(d_lineno, d_line, d_cptr);
01242         }
01243         else if (cptr[1] == '$')
01244         {
01245             if (ntags)
01246             {
01247                 tag = plhs[nrules]->tag;
01248                 if (tag == 0) untyped_lhs();
01249                 fprintf(f, "yyval.%s", tag);
01250             }
01251             else
01252                 fprintf(f, "yyval");
01253             cptr += 2;
01254             goto loop;
01255         }
01256         else if (isdigit(cptr[1]))
01257         {
01258             ++cptr;
01259             i = get_number();
01260             if (ntags)
01261             {
01262                 if (i <= 0 || i > n)
01263                     unknown_rhs(i);
01264                 tag = pitem[nitems + i - n - 1]->tag;
01265                 if (tag == 0) untyped_rhs(i, pitem[nitems + i - n - 1]->name);
01266                 fprintf(f, "yyvsp[%d].%s", i - n, tag);
01267             }
01268             else
01269             {
01270                 if (i > n)
01271                     dollar_warning(lineno, i);
01272                 fprintf(f, "yyvsp[%d]", i - n);
01273             }
01274             goto loop;
01275         }
01276         else if (cptr[1] == '-')
01277         {
01278             cptr += 2;
01279             i = get_number();
01280             if (ntags)
01281                 unknown_rhs(-i);
01282             fprintf(f, "yyvsp[%d]", -i - n);
01283             goto loop;
01284         }
01285     }
01286     if (isalpha(c) || c == '_' || c == '$')
01287     {
01288         do
01289         {
01290             putc(c, f);
01291             c = *++cptr;
01292         } while (isalnum(c) || c == '_' || c == '$');
01293         goto loop;
01294     }
01295     putc(c, f);
01296     ++cptr;
01297     switch (c)
01298     {
01299     case '\n':
01300     next_line:
01301         get_line();
01302         if (line) goto loop;
01303         unterminated_action(a_lineno, a_line, a_cptr);
01304 
01305     case ';':
01306         if (depth > 0) goto loop;
01307         fprintf(f, "\nbreak;\n");
01308         return;
01309 
01310     case '{':
01311         ++depth;
01312         goto loop;
01313 
01314     case '}':
01315         if (--depth > 0) goto loop;
01316         fprintf(f, "\nbreak;\n");
01317         return;
01318 
01319     case '\'':
01320     case '"':
01321         {
01322             int s_lineno = lineno;
01323             char *s_line = dup_line();
01324             char *s_cptr = s_line + (cptr - line - 1);
01325 
01326             quote = c;
01327             for (;;)
01328             {
01329                 c = *cptr++;
01330                 putc(c, f);
01331                 if (c == quote)
01332                 {
01333                     FREE(s_line);
01334                     goto loop;
01335                 }
01336                 if (c == '\n')
01337                     unterminated_string(s_lineno, s_line, s_cptr);
01338                 if (c == '\\')
01339                 {
01340                     c = *cptr++;
01341                     putc(c, f);
01342                     if (c == '\n')
01343                     {
01344                         get_line();
01345                         if (line == 0)
01346                             unterminated_string(s_lineno, s_line, s_cptr);
01347                     }
01348                 }
01349             }
01350         }
01351 
01352     case '/':
01353         c = *cptr;
01354         if (c == '/')
01355         {
01356             putc('*', f);
01357             while ((c = *++cptr) != '\n')
01358             {
01359                 if (c == '*' && cptr[1] == '/')
01360                     fprintf(f, "* ");
01361                 else
01362                     putc(c, f);
01363             }
01364             fprintf(f, "*/\n");
01365             goto next_line;
01366         }
01367         if (c == '*')
01368         {
01369             int c_lineno = lineno;
01370             char *c_line = dup_line();
01371             char *c_cptr = c_line + (cptr - line - 1);
01372 
01373             putc('*', f);
01374             ++cptr;
01375             for (;;)
01376             {
01377                 c = *cptr++;
01378                 putc(c, f);
01379                 if (c == '*' && *cptr == '/')
01380                 {
01381                     putc('/', f);
01382                     ++cptr;
01383                     FREE(c_line);
01384                     goto loop;
01385                 }
01386                 if (c == '\n')
01387                 {
01388                     get_line();
01389                     if (line == 0)
01390                         unterminated_comment(c_lineno, c_line, c_cptr);
01391                 }
01392             }
01393         }
01394         goto loop;
01395 
01396     default:
01397         goto loop;
01398     }
01399 }
01400 
01401 
01402 int
01403 mark_symbol()
01404 {
01405     register int c;
01406     register bucket *bp;
01407 
01408     c = cptr[1];
01409     if (c == '%' || c == '\\')
01410     {
01411         cptr += 2;
01412         return (1);
01413     }
01414 
01415     if (c == '=')
01416         cptr += 2;
01417     else if ((c == 'p' || c == 'P') &&
01418              ((c = cptr[2]) == 'r' || c == 'R') &&
01419              ((c = cptr[3]) == 'e' || c == 'E') &&
01420              ((c = cptr[4]) == 'c' || c == 'C') &&
01421              ((c = cptr[5], !IS_IDENT(c))))
01422         cptr += 5;
01423     else
01424         syntax_error(lineno, line, cptr);
01425 
01426     c = nextc();
01427     if (isalpha(c) || c == '_' || c == '.' || c == '$')
01428         bp = get_name();
01429     else if (c == '\'' || c == '"')
01430         bp = get_literal();
01431     else
01432     {
01433         syntax_error(lineno, line, cptr);
01434         /*NOTREACHED*/
01435     }
01436 
01437     if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
01438         prec_redeclared();
01439 
01440     rprec[nrules] = bp->prec;
01441     rassoc[nrules] = bp->assoc;
01442     return (0);
01443 }
01444 
01445 
01446 read_grammar()
01447 {
01448     register int c;
01449 
01450     initialize_grammar();
01451     advance_to_start();
01452 
01453     for (;;)
01454     {
01455         c = nextc();
01456         if (c == EOF) break;
01457         if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' ||
01458                 c == '"')
01459             add_symbol();
01460         else if (c == '{' || c == '=')
01461             copy_action();
01462         else if (c == '|')
01463         {
01464             end_rule();
01465             start_rule(plhs[nrules-1], 0);
01466             ++cptr;
01467         }
01468         else if (c == '%')
01469         {
01470             if (mark_symbol()) break;
01471         }
01472         else
01473             syntax_error(lineno, line, cptr);
01474     }
01475     end_rule();
01476 }
01477 
01478 
01479 free_tags()
01480 {
01481     register int i;
01482 
01483     if (tag_table == 0) return;
01484 
01485     for (i = 0; i < ntags; ++i)
01486     {
01487         assert(tag_table[i]);
01488         FREE(tag_table[i]);
01489     }
01490     FREE(tag_table);
01491 }
01492 
01493 
01494 pack_names()
01495 {
01496     register bucket *bp;
01497     register char *p, *s, *t;
01498 
01499     name_pool_size = 13;  /* 13 == sizeof("$end") + sizeof("$accept") */
01500     for (bp = first_symbol; bp; bp = bp->next)
01501         name_pool_size += strlen(bp->name) + 1;
01502     name_pool = MALLOC(name_pool_size);
01503     if (name_pool == 0) no_space();
01504 
01505     strcpy(name_pool, "$accept");
01506     strcpy(name_pool+8, "$end");
01507     t = name_pool + 13;
01508     for (bp = first_symbol; bp; bp = bp->next)
01509     {
01510         p = t;
01511         s = bp->name;
01512         while (*t++ = *s++) continue;
01513         FREE(bp->name);
01514         bp->name = p;
01515     }
01516 }
01517 
01518 
01519 check_symbols()
01520 {
01521     register bucket *bp;
01522 
01523     if (goal->class == UNKNOWN)
01524         undefined_goal(goal->name);
01525 
01526     for (bp = first_symbol; bp; bp = bp->next)
01527     {
01528         if (bp->class == UNKNOWN)
01529         {
01530             undefined_symbol_warning(bp->name);
01531             bp->class = TERM;
01532         }
01533     }
01534 }
01535 
01536 
01537 pack_symbols()
01538 {
01539     register bucket *bp;
01540     register bucket **v;
01541     register int i, j, k, n;
01542 
01543     nsyms = 2;
01544     ntokens = 1;
01545     for (bp = first_symbol; bp; bp = bp->next)
01546     {
01547         ++nsyms;
01548         if (bp->class == TERM) ++ntokens;
01549     }
01550     start_symbol = ntokens;
01551     nvars = nsyms - ntokens;
01552 
01553     symbol_name = (char **) MALLOC(nsyms*sizeof(char *));
01554     if (symbol_name == 0) no_space();
01555     symbol_value = (short *) MALLOC(nsyms*sizeof(short));
01556     if (symbol_value == 0) no_space();
01557     symbol_prec = (short *) MALLOC(nsyms*sizeof(short));
01558     if (symbol_prec == 0) no_space();
01559     symbol_assoc = MALLOC(nsyms);
01560     if (symbol_assoc == 0) no_space();
01561 
01562     v = (bucket **) MALLOC(nsyms*sizeof(bucket *));
01563     if (v == 0) no_space();
01564 
01565     v[0] = 0;
01566     v[start_symbol] = 0;
01567 
01568     i = 1;
01569     j = start_symbol + 1;
01570     for (bp = first_symbol; bp; bp = bp->next)
01571     {
01572         if (bp->class == TERM)
01573             v[i++] = bp;
01574         else
01575             v[j++] = bp;
01576     }
01577     assert(i == ntokens && j == nsyms);
01578 
01579     for (i = 1; i < ntokens; ++i)
01580         v[i]->index = i;
01581 
01582     goal->index = start_symbol + 1;
01583     k = start_symbol + 2;
01584     while (++i < nsyms)
01585         if (v[i] != goal)
01586         {
01587             v[i]->index = k;
01588             ++k;
01589         }
01590 
01591     goal->value = 0;
01592     k = 1;
01593     for (i = start_symbol + 1; i < nsyms; ++i)
01594     {
01595         if (v[i] != goal)
01596         {
01597             v[i]->value = k;
01598             ++k;
01599         }
01600     }
01601 
01602     k = 0;
01603     for (i = 1; i < ntokens; ++i)
01604     {
01605         n = v[i]->value;
01606         if (n > 256)
01607         {
01608             for (j = k++; j > 0 && symbol_value[j-1] > n; --j)
01609                 symbol_value[j] = symbol_value[j-1];
01610             symbol_value[j] = n;
01611         }
01612     }
01613 
01614     if (v[1]->value == UNDEFINED)
01615         v[1]->value = 256;
01616 
01617     j = 0;
01618     n = 257;
01619     for (i = 2; i < ntokens; ++i)
01620     {
01621         if (v[i]->value == UNDEFINED)
01622         {
01623             while (j < k && n == symbol_value[j])
01624             {
01625                 while (++j < k && n == symbol_value[j]) continue;
01626                 ++n;
01627             }
01628             v[i]->value = n;
01629             ++n;
01630         }
01631     }
01632 
01633     symbol_name[0] = name_pool + 8;
01634     symbol_value[0] = 0;
01635     symbol_prec[0] = 0;
01636     symbol_assoc[0] = TOKEN;
01637     for (i = 1; i < ntokens; ++i)
01638     {
01639         symbol_name[i] = v[i]->name;
01640         symbol_value[i] = v[i]->value;
01641         symbol_prec[i] = v[i]->prec;
01642         symbol_assoc[i] = v[i]->assoc;
01643     }
01644     symbol_name[start_symbol] = name_pool;
01645     symbol_value[start_symbol] = -1;
01646     symbol_prec[start_symbol] = 0;
01647     symbol_assoc[start_symbol] = TOKEN;
01648     for (++i; i < nsyms; ++i)
01649     {
01650         k = v[i]->index;
01651         symbol_name[k] = v[i]->name;
01652         symbol_value[k] = v[i]->value;
01653         symbol_prec[k] = v[i]->prec;
01654         symbol_assoc[k] = v[i]->assoc;
01655     }
01656 
01657     FREE(v);
01658 }
01659 
01660 
01661 pack_grammar()
01662 {
01663     register int i, j;
01664     int assoc, prec;
01665 
01666     ritem = (short *) MALLOC(nitems*sizeof(short));
01667     if (ritem == 0) no_space();
01668     rlhs = (short *) MALLOC(nrules*sizeof(short));
01669     if (rlhs == 0) no_space();
01670     rrhs = (short *) MALLOC((nrules+1)*sizeof(short));
01671     if (rrhs == 0) no_space();
01672     rprec = (short *) REALLOC(rprec, nrules*sizeof(short));
01673     if (rprec == 0) no_space();
01674     rassoc = REALLOC(rassoc, nrules);
01675     if (rassoc == 0) no_space();
01676 
01677     ritem[0] = -1;
01678     ritem[1] = goal->index;
01679     ritem[2] = 0;
01680     ritem[3] = -2;
01681     rlhs[0] = 0;
01682     rlhs[1] = 0;
01683     rlhs[2] = start_symbol;
01684     rrhs[0] = 0;
01685     rrhs[1] = 0;
01686     rrhs[2] = 1;
01687 
01688     j = 4;
01689     for (i = 3; i < nrules; ++i)
01690     {
01691         rlhs[i] = plhs[i]->index;
01692         rrhs[i] = j;
01693         assoc = TOKEN;
01694         prec = 0;
01695         while (pitem[j])
01696         {
01697             ritem[j] = pitem[j]->index;
01698             if (pitem[j]->class == TERM)
01699             {
01700                 prec = pitem[j]->prec;
01701                 assoc = pitem[j]->assoc;
01702             }
01703             ++j;
01704         }
01705         ritem[j] = -i;
01706         ++j;
01707         if (rprec[i] == UNDEFINED)
01708         {
01709             rprec[i] = prec;
01710             rassoc[i] = assoc;
01711         }
01712     }
01713     rrhs[i] = j;
01714 
01715     FREE(plhs);
01716     FREE(pitem);
01717 }
01718 
01719 
01720 print_grammar()
01721 {
01722     register int i, j, k;
01723     int spacing;
01724     register FILE *f = verbose_file;
01725 
01726     if (!vflag) return;
01727 
01728     k = 1;
01729     for (i = 2; i < nrules; ++i)
01730     {
01731         if (rlhs[i] != rlhs[i-1])
01732         {
01733             if (i != 2) fprintf(f, "\n");
01734             fprintf(f, "%4d  %s :", i - 2, symbol_name[rlhs[i]]);
01735             spacing = strlen(symbol_name[rlhs[i]]) + 1;
01736         }
01737         else
01738         {
01739             fprintf(f, "%4d  ", i - 2);
01740             j = spacing;
01741             while (--j >= 0) putc(' ', f);
01742             putc('|', f);
01743         }
01744 
01745         while (ritem[k] >= 0)
01746         {
01747             fprintf(f, " %s", symbol_name[ritem[k]]);
01748             ++k;
01749         }
01750         ++k;
01751         putc('\n', f);
01752     }
01753 }
01754 
01755 
01756 reader()
01757 {
01758     write_section(banner);
01759     create_symbol_table();
01760     read_declarations();
01761     read_grammar();
01762     free_symbol_table();
01763     free_tags();
01764     pack_names();
01765     check_symbols();
01766     pack_symbols();
01767     pack_grammar();
01768     free_symbols();
01769     print_grammar();
01770 }

Generated on Fri Apr 14 22:56:45 2006 for minix by  doxygen 1.4.6