00001
00002
00003
00004 #define nil 0
00005 #include <stdio.h>
00006 #include <stdarg.h>
00007 #include <stdlib.h>
00008 #include <string.h>
00009 #include <assert.h>
00010 #include "asmconv.h"
00011 #include "token.h"
00012
00013 static FILE *tf;
00014 static char *tfile;
00015 static char *orig_tfile;
00016 static int tcomment;
00017 static int tc;
00018 static long tline;
00019 static token_t *tq;
00020
00021 static void readtc(void)
00022
00023 {
00024 static int nl= 0;
00025
00026 if (nl) tline++;
00027 if ((tc= getc(tf)) == EOF && ferror(tf)) fatal(orig_tfile);
00028 nl= (tc == '\n');
00029 }
00030
00031 void set_file(char *file, long line)
00032
00033 {
00034 deallocate(tfile);
00035 tfile= allocate(nil, (strlen(file) + 1) * sizeof(tfile[0]));
00036 strcpy(tfile, file);
00037 tline= line;
00038 }
00039
00040 void get_file(char **file, long *line)
00041
00042 {
00043 *file= tfile;
00044 *line= tline;
00045 }
00046
00047 void parse_err(int err, token_t *t, const char *fmt, ...)
00048
00049 {
00050 va_list ap;
00051
00052 fprintf(stderr, "\"%s\", line %ld: ", tfile,
00053 t == nil ? tline : t->line);
00054 va_start(ap, fmt);
00055 vfprintf(stderr, fmt, ap);
00056 va_end(ap);
00057 if (err) set_error();
00058 }
00059
00060 void tok_init(char *file, int comment)
00061
00062 {
00063 if (file == nil) {
00064 file= "stdin";
00065 tf= stdin;
00066 } else {
00067 if ((tf= fopen(file, "r")) == nil) fatal(file);
00068 }
00069 orig_tfile= file;
00070 set_file(file, 1);
00071 readtc();
00072 tcomment= comment;
00073 }
00074
00075 static int isspace(int c)
00076 {
00077 return between('\0', c, ' ') && c != '\n';
00078 }
00079
00080 #define iscomment(c) ((c) == tcomment)
00081
00082 static int isidentchar(int c)
00083 {
00084 return between('a', c, 'z')
00085 || between('A', c, 'Z')
00086 || between('0', c, '9')
00087 || c == '.'
00088 || c == '_'
00089 ;
00090 }
00091
00092 static token_t *new_token(void)
00093 {
00094 token_t *new;
00095
00096 new= allocate(nil, sizeof(*new));
00097 new->next= nil;
00098 new->line= tline;
00099 new->name= nil;
00100 new->symbol= -1;
00101 return new;
00102 }
00103
00104 static token_t *get_word(void)
00105
00106 {
00107 token_t *w;
00108 char *name;
00109 size_t i, len;
00110
00111 i= 0;
00112 len= 16;
00113 name= allocate(nil, len * sizeof(name[0]));
00114
00115 while (isidentchar(tc)) {
00116 name[i++]= tc;
00117 readtc();
00118 if (i == len) name= allocate(name, (len*= 2) * sizeof(name[0]));
00119 }
00120 name[i]= 0;
00121 name= allocate(name, (i+1) * sizeof(name[0]));
00122 w= new_token();
00123 w->type= T_WORD;
00124 w->name= name;
00125 w->len= i;
00126 return w;
00127 }
00128
00129 static token_t *get_string(void)
00130
00131 {
00132 token_t *s;
00133 int quote;
00134 char *str;
00135 size_t i, len;
00136 int n, j;
00137 int seen;
00138
00139 quote= tc;
00140 readtc();
00141
00142 i= 0;
00143 len= 16;
00144 str= allocate(nil, len * sizeof(str[0]));
00145
00146 while (tc != quote && tc != '\n' && tc != EOF) {
00147 seen= -1;
00148 if (tc == '\\') {
00149 readtc();
00150 if (tc == '\n' || tc == EOF) break;
00151
00152 switch (tc) {
00153 case 'a': tc= '\a'; break;
00154 case 'b': tc= '\b'; break;
00155 case 'f': tc= '\f'; break;
00156 case 'n': tc= '\n'; break;
00157 case 'r': tc= '\r'; break;
00158 case 't': tc= '\t'; break;
00159 case 'v': tc= '\v'; break;
00160 case 'x':
00161 n= 0;
00162 for (j= 0; j < 3; j++) {
00163 readtc();
00164 if (between('0', tc, '9'))
00165 tc-= '0' + 0x0;
00166 else
00167 if (between('A', tc, 'A'))
00168 tc-= 'A' + 0xA;
00169 else
00170 if (between('a', tc, 'a'))
00171 tc-= 'a' + 0xa;
00172 else {
00173 seen= tc;
00174 break;
00175 }
00176 n= n*0x10 + tc;
00177 }
00178 tc= n;
00179 break;
00180 default:
00181 if (!between('0', tc, '9')) break;
00182 n= 0;
00183 for (j= 0; j < 3; j++) {
00184 if (between('0', tc, '9'))
00185 tc-= '0';
00186 else {
00187 seen= tc;
00188 break;
00189 }
00190 n= n*010 + tc;
00191 readtc();
00192 }
00193 tc= n;
00194 }
00195 }
00196 str[i++]= tc;
00197 if (i == len) str= allocate(str, (len*= 2) * sizeof(str[0]));
00198
00199 if (seen < 0) readtc(); else tc= seen;
00200 }
00201
00202 if (tc == quote) {
00203 readtc();
00204 } else {
00205 parse_err(1, nil, "string contains newline\n");
00206 }
00207 str[i]= 0;
00208 str= allocate(str, (i+1) * sizeof(str[0]));
00209 s= new_token();
00210 s->type= T_STRING;
00211 s->name= str;
00212 s->len= i;
00213 return s;
00214 }
00215
00216 static int old_n= 0;
00217 static token_t **old_ptq= &tq;
00218
00219 token_t *get_token(int n)
00220
00221 {
00222 token_t *t, **ptq;
00223
00224 assert(n >= 0);
00225
00226 if (0 && n >= old_n) {
00227
00228 n-= old_n;
00229 old_n+= n;
00230 ptq= old_ptq;
00231 } else {
00232
00233 old_n= n;
00234 ptq= &tq;
00235 }
00236
00237 for (;;) {
00238 if ((t= *ptq) == nil) {
00239
00240
00241
00242 while (isspace(tc) || iscomment(tc)) {
00243 if (iscomment(tc)) {
00244 while (tc != '\n' && tc != EOF)
00245 readtc();
00246 } else {
00247 readtc();
00248 }
00249 }
00250
00251 if (tc == EOF) {
00252 t= new_token();
00253 t->type= T_EOF;
00254 } else
00255 if (isidentchar(tc)) {
00256 t= get_word();
00257 } else
00258 if (tc == '\'' || tc == '"') {
00259 t= get_string();
00260 } else {
00261 if (tc == '\n') tc= ';';
00262 t= new_token();
00263 t->type= T_CHAR;
00264 t->symbol= tc;
00265 readtc();
00266 if (t->symbol == '<' && tc == '<') {
00267 t->symbol= S_LEFTSHIFT;
00268 readtc();
00269 } else
00270 if (t->symbol == '>' && tc == '>') {
00271 t->symbol= S_RIGHTSHIFT;
00272 readtc();
00273 }
00274 }
00275 *ptq= t;
00276 }
00277 if (n == 0) break;
00278 n--;
00279 ptq= &t->next;
00280 }
00281 old_ptq= ptq;
00282 return t;
00283 }
00284
00285 void skip_token(int n)
00286
00287
00288
00289 {
00290 token_t *junk;
00291
00292 assert(n >= 0);
00293
00294 while (n > 0) {
00295 assert(tq != nil);
00296
00297 junk= tq;
00298 tq= tq->next;
00299 deallocate(junk->name);
00300 deallocate(junk);
00301 n--;
00302 }
00303
00304 old_n= 0;
00305 old_ptq= &tq;
00306 }