tokenize.c

Go to the documentation of this file.
00001 /*      tokenize.c - split input into tokens            Author: Kees J. Bot
00002  *                                                              13 Dec 1993
00003  */
00004 #define nil 0
00005 #include <stdio.h>
00006 #include <stdarg.h>
00007 #include <stdlib.h>
00008 #include <string.h>
00009 #include <assert.h>
00010 #include "asmconv.h"
00011 #include "token.h"
00012 
00013 static FILE *tf;
00014 static char *tfile;
00015 static char *orig_tfile;
00016 static int tcomment;
00017 static int tc;
00018 static long tline;
00019 static token_t *tq;
00020 
00021 static void readtc(void)
00022 /* Read one character from the input file and put it in the global 'tc'. */
00023 {
00024         static int nl= 0;
00025 
00026         if (nl) tline++;
00027         if ((tc= getc(tf)) == EOF && ferror(tf)) fatal(orig_tfile);
00028         nl= (tc == '\n');
00029 }
00030 
00031 void set_file(char *file, long line)
00032 /* Set file name and line number, changed by a preprocessor trick. */
00033 {
00034         deallocate(tfile);
00035         tfile= allocate(nil, (strlen(file) + 1) * sizeof(tfile[0]));
00036         strcpy(tfile, file);
00037         tline= line;
00038 }
00039 
00040 void get_file(char **file, long *line)
00041 /* Get file name and line number. */
00042 {
00043         *file= tfile;
00044         *line= tline;
00045 }
00046 
00047 void parse_err(int err, token_t *t, const char *fmt, ...)
00048 /* Report a parsing error. */
00049 {
00050         va_list ap;
00051 
00052         fprintf(stderr, "\"%s\", line %ld: ", tfile,
00053                                                 t == nil ? tline : t->line);
00054         va_start(ap, fmt);
00055         vfprintf(stderr, fmt, ap);
00056         va_end(ap);
00057         if (err) set_error();
00058 }
00059 
00060 void tok_init(char *file, int comment)
00061 /* Open the file to tokenize and initialize the tokenizer. */
00062 {
00063         if (file == nil) {
00064                 file= "stdin";
00065                 tf= stdin;
00066         } else {
00067                 if ((tf= fopen(file, "r")) == nil) fatal(file);
00068         }
00069         orig_tfile= file;
00070         set_file(file, 1);
00071         readtc();
00072         tcomment= comment;
00073 }
00074 
00075 static int isspace(int c)
00076 {
00077         return between('\0', c, ' ') && c != '\n';
00078 }
00079 
00080 #define iscomment(c)    ((c) == tcomment)
00081 
00082 static int isidentchar(int c)
00083 {
00084         return between('a', c, 'z')
00085                 || between('A', c, 'Z')
00086                 || between('0', c, '9')
00087                 || c == '.'
00088                 || c == '_'
00089                 ;
00090 }
00091 
00092 static token_t *new_token(void)
00093 {
00094         token_t *new;
00095 
00096         new= allocate(nil, sizeof(*new));
00097         new->next= nil;
00098         new->line= tline;
00099         new->name= nil;
00100         new->symbol= -1;
00101         return new;
00102 }
00103 
00104 static token_t *get_word(void)
00105 /* Read one word, an identifier, a number, a label, or a mnemonic. */
00106 {
00107         token_t *w;
00108         char *name;
00109         size_t i, len;
00110 
00111         i= 0;
00112         len= 16;
00113         name= allocate(nil, len * sizeof(name[0]));
00114 
00115         while (isidentchar(tc)) {
00116                 name[i++]= tc;
00117                 readtc();
00118                 if (i == len) name= allocate(name, (len*= 2) * sizeof(name[0]));
00119         }
00120         name[i]= 0;
00121         name= allocate(name, (i+1) * sizeof(name[0]));
00122         w= new_token();
00123         w->type= T_WORD;
00124         w->name= name;
00125         w->len= i;
00126         return w;
00127 }
00128 
00129 static token_t *get_string(void)
00130 /* Read a single or double quotes delimited string. */
00131 {
00132         token_t *s;
00133         int quote;
00134         char *str;
00135         size_t i, len;
00136         int n, j;
00137         int seen;
00138 
00139         quote= tc;
00140         readtc();
00141 
00142         i= 0;
00143         len= 16;
00144         str= allocate(nil, len * sizeof(str[0]));
00145 
00146         while (tc != quote && tc != '\n' && tc != EOF) {
00147                 seen= -1;
00148                 if (tc == '\\') {
00149                         readtc();
00150                         if (tc == '\n' || tc == EOF) break;
00151 
00152                         switch (tc) {
00153                         case 'a':       tc= '\a'; break;
00154                         case 'b':       tc= '\b'; break;
00155                         case 'f':       tc= '\f'; break;
00156                         case 'n':       tc= '\n'; break;
00157                         case 'r':       tc= '\r'; break;
00158                         case 't':       tc= '\t'; break;
00159                         case 'v':       tc= '\v'; break;
00160                         case 'x':
00161                                 n= 0;
00162                                 for (j= 0; j < 3; j++) {
00163                                         readtc();
00164                                         if (between('0', tc, '9'))
00165                                                 tc-= '0' + 0x0;
00166                                         else
00167                                         if (between('A', tc, 'A'))
00168                                                 tc-= 'A' + 0xA;
00169                                         else
00170                                         if (between('a', tc, 'a'))
00171                                                 tc-= 'a' + 0xa;
00172                                         else {
00173                                                 seen= tc;
00174                                                 break;
00175                                         }
00176                                         n= n*0x10 + tc;
00177                                 }
00178                                 tc= n;
00179                                 break;
00180                         default:
00181                                 if (!between('0', tc, '9')) break;
00182                                 n= 0;
00183                                 for (j= 0; j < 3; j++) {
00184                                         if (between('0', tc, '9'))
00185                                                 tc-= '0';
00186                                         else {
00187                                                 seen= tc;
00188                                                 break;
00189                                         }
00190                                         n= n*010 + tc;
00191                                         readtc();
00192                                 }
00193                                 tc= n;
00194                         }
00195                 }
00196                 str[i++]= tc;
00197                 if (i == len) str= allocate(str, (len*= 2) * sizeof(str[0]));
00198 
00199                 if (seen < 0) readtc(); else tc= seen;
00200         }
00201 
00202         if (tc == quote) {
00203                 readtc();
00204         } else {
00205                 parse_err(1, nil, "string contains newline\n");
00206         }
00207         str[i]= 0;
00208         str= allocate(str, (i+1) * sizeof(str[0]));
00209         s= new_token();
00210         s->type= T_STRING;
00211         s->name= str;
00212         s->len= i;
00213         return s;
00214 }
00215 
00216 static int old_n= 0;            /* To speed up n, n+1, n+2, ... accesses. */
00217 static token_t **old_ptq= &tq;
00218 
00219 token_t *get_token(int n)
00220 /* Return the n-th token on the input queue. */
00221 {
00222         token_t *t, **ptq;
00223 
00224         assert(n >= 0);
00225 
00226         if (0 && n >= old_n) {
00227                 /* Go forward from the previous point. */
00228                 n-= old_n;
00229                 old_n+= n;
00230                 ptq= old_ptq;
00231         } else {
00232                 /* Restart from the head of the queue. */
00233                 old_n= n;
00234                 ptq= &tq;
00235         }
00236 
00237         for (;;) {
00238                 if ((t= *ptq) == nil) {
00239                         /* Token queue doesn't have element <n>, read a
00240                          * new token from the input stream.
00241                          */
00242                         while (isspace(tc) || iscomment(tc)) {
00243                                 if (iscomment(tc)) {
00244                                         while (tc != '\n' && tc != EOF)
00245                                                 readtc();
00246                                 } else {
00247                                         readtc();
00248                                 }
00249                         }
00250 
00251                         if (tc == EOF) {
00252                                 t= new_token();
00253                                 t->type= T_EOF;
00254                         } else
00255                         if (isidentchar(tc)) {
00256                                 t= get_word();
00257                         } else
00258                         if (tc == '\'' || tc == '"') {
00259                                 t= get_string();
00260                         } else {
00261                                 if (tc == '\n') tc= ';';
00262                                 t= new_token();
00263                                 t->type= T_CHAR;
00264                                 t->symbol= tc;
00265                                 readtc();
00266                                 if (t->symbol == '<' && tc == '<') {
00267                                         t->symbol= S_LEFTSHIFT;
00268                                         readtc();
00269                                 } else
00270                                 if (t->symbol == '>' && tc == '>') {
00271                                         t->symbol= S_RIGHTSHIFT;
00272                                         readtc();
00273                                 }
00274                         }
00275                         *ptq= t;
00276                 }
00277                 if (n == 0) break;
00278                 n--;
00279                 ptq= &t->next;
00280         }
00281         old_ptq= ptq;
00282         return t;
00283 }
00284 
00285 void skip_token(int n)
00286 /* Remove n tokens from the input queue.  One is not allowed to skip unread
00287  * tokens.
00288  */
00289 {
00290         token_t *junk;
00291 
00292         assert(n >= 0);
00293 
00294         while (n > 0) {
00295                 assert(tq != nil);
00296 
00297                 junk= tq;
00298                 tq= tq->next;
00299                 deallocate(junk->name);
00300                 deallocate(junk);
00301                 n--;
00302         }
00303         /* Reset the old reference. */
00304         old_n= 0;
00305         old_ptq= &tq;
00306 }

Generated on Fri Apr 14 22:56:55 2006 for minix by  doxygen 1.4.6