00001
00002
00003
00004 #define nil 0
00005 #include <stdio.h>
00006 #include <stdlib.h>
00007 #include <stdarg.h>
00008 #include <string.h>
00009 #include <assert.h>
00010 #include "asmconv.h"
00011 #include "token.h"
00012 #include "asm86.h"
00013 #include "languages.h"
00014
00015 typedef struct mnemonic {
00016 opcode_t opcode;
00017 char *name;
00018 } mnemonic_t;
00019
00020 static mnemonic_t mnemtab[] = {
00021 { AAA, "aaa" },
00022 { AAD, "aad" },
00023 { AAM, "aam" },
00024 { AAS, "aas" },
00025 { ADC, "adc%" },
00026 { ADD, "add%" },
00027 { AND, "and%" },
00028 { ARPL, "arpl" },
00029 { BOUND, "bound%" },
00030 { BSF, "bsf%" },
00031 { BSR, "bsr%" },
00032 { BSWAP, "bswap" },
00033 { BT, "bt%" },
00034 { BTC, "btc%" },
00035 { BTR, "btr%" },
00036 { BTS, "bts%" },
00037 { CALL, "call" },
00038 { CALLF, "lcall" },
00039 { CBW, "cbtw" },
00040 { CLC, "clc" },
00041 { CLD, "cld" },
00042 { CLI, "cli" },
00043 { CLTS, "clts" },
00044 { CMC, "cmc" },
00045 { CMP, "cmp%" },
00046 { CMPS, "cmps%" },
00047 { CMPXCHG, "cmpxchg" },
00048 { CWD, "cwtd" },
00049 { DAA, "daa" },
00050 { DAS, "das" },
00051 { DEC, "dec%" },
00052 { DIV, "div%" },
00053 { DOT_ALIGN, ".align" },
00054 { DOT_ASCII, ".ascii" },
00055 { DOT_ASCIZ, ".asciz" },
00056 { DOT_ASSERT, ".assert" },
00057 { DOT_BASE, ".base" },
00058 { DOT_BSS, ".bss" },
00059 { DOT_COMM, ".comm" },
00060 { DOT_DATA, ".data" },
00061 { DOT_DATA1, ".byte" },
00062 { DOT_DATA2, ".short" },
00063 { DOT_DATA4, ".long" },
00064 { DOT_DEFINE, ".globl" },
00065 { DOT_EXTERN, ".globl" },
00066 { DOT_FILE, ".file" },
00067 { DOT_LCOMM, ".lcomm" },
00068 { DOT_LINE, ".line" },
00069 { DOT_LIST, ".list" },
00070 { DOT_NOLIST, ".nolist" },
00071 { DOT_ROM, ".data" },
00072 { DOT_SPACE, ".space" },
00073 { DOT_SYMB, ".symb" },
00074 { DOT_TEXT, ".text" },
00075 { DOT_USE16, ".use16" },
00076 { DOT_USE32, ".use32" },
00077 { ENTER, "enter" },
00078 { F2XM1, "f2xm1" },
00079 { FABS, "fabs" },
00080 { FADD, "fadd" },
00081 { FADDD, "faddl" },
00082 { FADDP, "faddp" },
00083 { FADDS, "fadds" },
00084 { FBLD, "fbld" },
00085 { FBSTP, "fbstp" },
00086 { FCHS, "fchs" },
00087 { FCLEX, "fnclex" },
00088 { FCOMD, "fcoml" },
00089 { FCOMPD, "fcompl" },
00090 { FCOMPP, "fcompp" },
00091 { FCOMPS, "fcomps" },
00092 { FCOMS, "fcoms" },
00093 { FCOS, "fcos" },
00094 { FDECSTP, "fdecstp" },
00095 { FDIVD, "fdivl" },
00096 { FDIVP, "fdivp" },
00097 { FDIVRD, "fdivrl" },
00098 { FDIVRP, "fdivrp" },
00099 { FDIVRS, "fdivrs" },
00100 { FDIVS, "fdivs" },
00101 { FFREE, "ffree" },
00102 { FIADDL, "fiaddl" },
00103 { FIADDS, "fiadds" },
00104 { FICOM, "ficom" },
00105 { FICOMP, "ficomp" },
00106 { FIDIVL, "fidivl" },
00107 { FIDIVRL, "fidivrl" },
00108 { FIDIVRS, "fidivrs" },
00109 { FIDIVS, "fidivs" },
00110 { FILDL, "fildl" },
00111 { FILDQ, "fildq" },
00112 { FILDS, "filds" },
00113 { FIMULL, "fimull" },
00114 { FIMULS, "fimuls" },
00115 { FINCSTP, "fincstp" },
00116 { FINIT, "fninit" },
00117 { FISTL, "fistl" },
00118 { FISTP, "fistp" },
00119 { FISTS, "fists" },
00120 { FISUBL, "fisubl" },
00121 { FISUBRL, "fisubrl" },
00122 { FISUBRS, "fisubrs" },
00123 { FISUBS, "fisubs" },
00124 { FLD1, "fld1" },
00125 { FLDCW, "fldcw" },
00126 { FLDD, "fldl" },
00127 { FLDENV, "fldenv" },
00128 { FLDL2E, "fldl2e" },
00129 { FLDL2T, "fldl2t" },
00130 { FLDLG2, "fldlg2" },
00131 { FLDLN2, "fldln2" },
00132 { FLDPI, "fldpi" },
00133 { FLDS, "flds" },
00134 { FLDX, "fldt" },
00135 { FLDZ, "fldz" },
00136 { FMULD, "fmull" },
00137 { FMULP, "fmulp" },
00138 { FMULS, "fmuls" },
00139 { FNOP, "fnop" },
00140 { FPATAN, "fpatan" },
00141 { FPREM, "fprem" },
00142 { FPREM1, "fprem1" },
00143 { FPTAN, "fptan" },
00144 { FRNDINT, "frndint" },
00145 { FRSTOR, "frstor" },
00146 { FSAVE, "fnsave" },
00147 { FSCALE, "fscale" },
00148 { FSIN, "fsin" },
00149 { FSINCOS, "fsincos" },
00150 { FSQRT, "fsqrt" },
00151 { FSTCW, "fnstcw" },
00152 { FSTD, "fstl" },
00153 { FSTENV, "fnstenv" },
00154 { FSTPD, "fstpl" },
00155 { FSTPS, "fstps" },
00156 { FSTPX, "fstpt" },
00157 { FSTS, "fsts" },
00158 { FSTSW, "fstsw" },
00159 { FSUBD, "fsubl" },
00160 { FSUBP, "fsubp" },
00161 { FSUBPR, "fsubpr" },
00162 { FSUBRD, "fsubrl" },
00163 { FSUBRS, "fsubrs" },
00164 { FSUBS, "fsubs" },
00165 { FTST, "ftst" },
00166 { FUCOM, "fucom" },
00167 { FUCOMP, "fucomp" },
00168 { FUCOMPP, "fucompp" },
00169 { FXAM, "fxam" },
00170 { FXCH, "fxch" },
00171 { FXTRACT, "fxtract" },
00172 { FYL2X, "fyl2x" },
00173 { FYL2XP1, "fyl2xp1" },
00174 { HLT, "hlt" },
00175 { IDIV, "idiv%" },
00176 { IMUL, "imul%" },
00177 { IN, "in%" },
00178 { INC, "inc%" },
00179 { INS, "ins%" },
00180 { INT, "int" },
00181 { INTO, "into" },
00182 { INVD, "invd" },
00183 { INVLPG, "invlpg" },
00184 { IRET, "iret" },
00185 { IRETD, "iret" },
00186 { JA, "ja" },
00187 { JAE, "jae" },
00188 { JB, "jb" },
00189 { JBE, "jbe" },
00190 { JCXZ, "jcxz" },
00191 { JE, "je" },
00192 { JG, "jg" },
00193 { JGE, "jge" },
00194 { JL, "jl" },
00195 { JLE, "jle" },
00196 { JMP, "jmp" },
00197 { JMPF, "ljmp" },
00198 { JNE, "jne" },
00199 { JNO, "jno" },
00200 { JNP, "jnp" },
00201 { JNS, "jns" },
00202 { JO, "jo" },
00203 { JP, "jp" },
00204 { JS, "js" },
00205 { LAHF, "lahf" },
00206 { LAR, "lar" },
00207 { LDS, "lds" },
00208 { LEA, "lea%" },
00209 { LEAVE, "leave" },
00210 { LES, "les" },
00211 { LFS, "lfs" },
00212 { LGDT, "lgdt" },
00213 { LGS, "lgs" },
00214 { LIDT, "lidt" },
00215 { LLDT, "lldt" },
00216 { LMSW, "lmsw" },
00217 { LOCK, "lock" },
00218 { LODS, "lods%" },
00219 { LOOP, "loop" },
00220 { LOOPE, "loope" },
00221 { LOOPNE, "loopne" },
00222 { LSL, "lsl" },
00223 { LSS, "lss" },
00224 { LTR, "ltr" },
00225 { MOV, "mov%" },
00226 { MOVS, "movs%" },
00227 { MOVSX, "movswl" },
00228 { MOVSXB, "movsb%" },
00229 { MOVZX, "movzwl" },
00230 { MOVZXB, "movzb%" },
00231 { MUL, "mul%" },
00232 { NEG, "neg%" },
00233 { NOP, "nop" },
00234 { NOT, "not%" },
00235 { OR, "or%" },
00236 { OUT, "out%" },
00237 { OUTS, "outs%" },
00238 { POP, "pop%" },
00239 { POPA, "popa%" },
00240 { POPF, "popf%" },
00241 { PUSH, "push%" },
00242 { PUSHA, "pusha%" },
00243 { PUSHF, "pushf%" },
00244 { RCL, "rcl%" },
00245 { RCR, "rcr%" },
00246 { RET, "ret" },
00247 { RETF, "lret" },
00248 { ROL, "rol%" },
00249 { ROR, "ror%" },
00250 { SAHF, "sahf" },
00251 { SAL, "sal%" },
00252 { SAR, "sar%" },
00253 { SBB, "sbb%" },
00254 { SCAS, "scas%" },
00255 { SETA, "setab" },
00256 { SETAE, "setaeb" },
00257 { SETB, "setbb" },
00258 { SETBE, "setbeb" },
00259 { SETE, "seteb" },
00260 { SETG, "setgb" },
00261 { SETGE, "setgeb" },
00262 { SETL, "setlb" },
00263 { SETLE, "setleb" },
00264 { SETNE, "setneb" },
00265 { SETNO, "setnob" },
00266 { SETNP, "setnpb" },
00267 { SETNS, "setnsb" },
00268 { SETO, "setob" },
00269 { SETP, "setpb" },
00270 { SETS, "setsb" },
00271 { SGDT, "sgdt" },
00272 { SHL, "shl%" },
00273 { SHLD, "shld%" },
00274 { SHR, "shr%" },
00275 { SHRD, "shrd%" },
00276 { SIDT, "sidt" },
00277 { SLDT, "sldt" },
00278 { SMSW, "smsw" },
00279 { STC, "stc" },
00280 { STD, "std" },
00281 { STI, "sti" },
00282 { STOS, "stos%" },
00283 { STR, "str" },
00284 { SUB, "sub%" },
00285 { TEST, "test%" },
00286 { VERR, "verr" },
00287 { VERW, "verw" },
00288 { WAIT, "wait" },
00289 { WBINVD, "wbinvd" },
00290 { XADD, "xadd" },
00291 { XCHG, "xchg%" },
00292 { XLAT, "xlat" },
00293 { XOR, "xor%" },
00294 };
00295
00296 static FILE *ef;
00297 static long eline= 1;
00298 static char *efile;
00299 static char *orig_efile;
00300 static char *opcode2name_tab[N_OPCODES];
00301
00302 static void gnu_putchar(int c)
00303
00304
00305
00306 {
00307 if (putc(c, ef) == EOF) fatal(orig_efile);
00308 }
00309
00310 static void gnu_printf(const char *fmt, ...)
00311 {
00312 va_list ap;
00313
00314 va_start(ap, fmt);
00315 if (vfprintf(ef, fmt, ap) == EOF) fatal(orig_efile);
00316 va_end(ap);
00317 }
00318
00319 void gnu_emit_init(char *file, const char *banner)
00320
00321 {
00322 mnemonic_t *mp;
00323
00324 if (file == nil) {
00325 file= "stdout";
00326 ef= stdout;
00327 } else {
00328 if ((ef= fopen(file, "w")) == nil) fatal(file);
00329 }
00330 orig_efile= file;
00331 efile= file;
00332 gnu_printf("/ %s", banner);
00333
00334
00335 for (mp= mnemtab; mp < arraylimit(mnemtab); mp++) {
00336 assert(opcode2name_tab[mp->opcode] == nil);
00337 opcode2name_tab[mp->opcode]= mp->name;
00338 }
00339 }
00340
00341 #define opcode2name(op) (opcode2name_tab[op] + 0)
00342
00343 static void gnu_put_string(const char *s, size_t n)
00344
00345 {
00346 while (n > 0) {
00347 int c= *s;
00348
00349 if (c < ' ' || c > 0177) {
00350 gnu_printf("\\%03o", c);
00351 } else
00352 if (c == '"' || c == '\\') {
00353 gnu_printf("\\%c", c & 0xFF);
00354 } else {
00355 gnu_putchar(c);
00356 }
00357 s++;
00358 n--;
00359 }
00360 }
00361
00362 static void gnu_put_expression(asm86_t *a, expression_t *e, int deref)
00363
00364
00365
00366 {
00367 assert(e != nil);
00368
00369 switch (e->operator) {
00370 case ',':
00371 if (is_pseudo(a->opcode)) {
00372
00373 gnu_put_expression(a, e->left, deref);
00374 gnu_printf(", ");
00375 gnu_put_expression(a, e->right, deref);
00376 } else {
00377
00378
00379
00380
00381
00382
00383 gnu_put_expression(a, e->right, deref);
00384 gnu_printf(", ");
00385 gnu_put_expression(a, e->left, deref);
00386 }
00387 break;
00388 case 'O':
00389 if (deref && a->optype == JUMP) gnu_putchar('*');
00390 if (e->left != nil) gnu_put_expression(a, e->left, 0);
00391 gnu_putchar('(');
00392 if (e->middle != nil) gnu_put_expression(a, e->middle, 0);
00393 if (e->right != nil) {
00394 gnu_putchar(',');
00395 gnu_put_expression(a, e->right, 0);
00396 }
00397 gnu_putchar(')');
00398 break;
00399 case '(':
00400 if (!deref) gnu_putchar('(');
00401 if (deref && a->optype == JUMP) gnu_putchar('*');
00402 gnu_put_expression(a, e->middle, 0);
00403 if (!deref) gnu_putchar(')');
00404 break;
00405 case 'B':
00406 gnu_printf("%%%s", e->name);
00407 break;
00408 case '1':
00409 case '2':
00410 case '4':
00411 case '8':
00412 gnu_printf("%%%s,%c", e->name, e->operator);
00413 break;
00414 case '+':
00415 case '-':
00416 case '~':
00417 if (e->middle != nil) {
00418 if (deref && a->optype >= BYTE) gnu_putchar('$');
00419 gnu_putchar(e->operator);
00420 gnu_put_expression(a, e->middle, 0);
00421 break;
00422 }
00423
00424 case '*':
00425 case '/':
00426 case '%':
00427 case '&':
00428 case '|':
00429 case '^':
00430 case S_LEFTSHIFT:
00431 case S_RIGHTSHIFT:
00432 if (deref && a->optype >= BYTE) gnu_putchar('$');
00433 gnu_put_expression(a, e->left, 0);
00434 if (e->operator == S_LEFTSHIFT) {
00435 gnu_printf("<<");
00436 } else
00437 if (e->operator == S_RIGHTSHIFT) {
00438 gnu_printf(">>");
00439 } else {
00440 gnu_putchar(e->operator);
00441 }
00442 gnu_put_expression(a, e->right, 0);
00443 break;
00444 case '[':
00445 if (deref && a->optype >= BYTE) gnu_putchar('$');
00446 gnu_putchar('(');
00447 gnu_put_expression(a, e->middle, 0);
00448 gnu_putchar(')');
00449 break;
00450 case 'W':
00451 if (isregister(e->name)) {
00452 if (a->optype == JUMP) gnu_putchar('*');
00453 gnu_printf("%%%s", e->name);
00454 } else {
00455 if (deref && a->optype >= BYTE) gnu_putchar('$');
00456 gnu_printf("%s", e->name);
00457 }
00458 break;
00459 case 'S':
00460 gnu_putchar('"');
00461 gnu_put_string(e->name, e->len);
00462 gnu_putchar('"');
00463 break;
00464 default:
00465 fprintf(stderr,
00466 "asmconv: internal error, unknown expression operator '%d'\n",
00467 e->operator);
00468 exit(EXIT_FAILURE);
00469 }
00470 }
00471
00472 void gnu_emit_instruction(asm86_t *a)
00473
00474 {
00475 int same= 0;
00476 char *p;
00477
00478 if (a == nil) {
00479
00480 gnu_putchar('\n');
00481 return;
00482 }
00483
00484 if (use16()) {
00485 fprintf(stderr,
00486 "asmconv: the GNU assembler can't translate 8086 code\n");
00487 exit(EXIT_FAILURE);
00488 }
00489
00490
00491 if ((a->file != efile && strcmp(a->file, efile) != 0)
00492 || a->line < eline || a->line > eline+10) {
00493 gnu_putchar('\n');
00494 gnu_printf("# %ld \"%s\"\n", a->line, a->file);
00495 efile= a->file;
00496 eline= a->line;
00497 } else {
00498 if (a->line == eline) {
00499 gnu_printf("; ");
00500 same= 1;
00501 }
00502 while (eline < a->line) {
00503 gnu_putchar('\n');
00504 eline++;
00505 }
00506 }
00507
00508 if (a->opcode == DOT_LABEL) {
00509 assert(a->args->operator == ':');
00510 gnu_printf("%s:", a->args->name);
00511 } else
00512 if (a->opcode == DOT_EQU) {
00513 assert(a->args->operator == '=');
00514 gnu_printf("\t%s = ", a->args->name);
00515 gnu_put_expression(a, a->args->middle, 0);
00516 } else
00517 if (a->opcode == DOT_ALIGN) {
00518
00519 unsigned long n;
00520 unsigned s;
00521
00522 assert(a->args->operator == 'W' && isanumber(a->args->name));
00523 n= strtoul(a->args->name, nil, 0);
00524 for (s= 0; s <= 4 && (1 << s) < n; s++) {}
00525 gnu_printf(".align\t%u", s);
00526 } else
00527 if ((p= opcode2name(a->opcode)) != nil) {
00528 if (!is_pseudo(a->opcode) && !same) gnu_putchar('\t');
00529
00530 switch (a->rep) {
00531 case ONCE: break;
00532 case REP: gnu_printf("rep; "); break;
00533 case REPE: gnu_printf("repe; "); break;
00534 case REPNE: gnu_printf("repne; "); break;
00535 default: assert(0);
00536 }
00537 switch (a->seg) {
00538
00539 case DEFSEG: break;
00540 case CSEG: gnu_printf(".byte 0x2e; "); break;
00541 case DSEG: gnu_printf(".byte 0x3e; "); break;
00542 case ESEG: gnu_printf(".byte 0x26; "); break;
00543 case FSEG: gnu_printf(".byte 0x64; "); break;
00544 case GSEG: gnu_printf(".byte 0x65; "); break;
00545 case SSEG: gnu_printf(".byte 0x36; "); break;
00546 default: assert(0);
00547 }
00548
00549
00550 if (a->opcode == CBW) {
00551 if (!(a->oaz & OPZ)) p= "cwtl";
00552 a->oaz&= ~OPZ;
00553 }
00554 if (a->opcode == CWD) {
00555 if (!(a->oaz & OPZ)) p= "cltd";
00556 a->oaz&= ~OPZ;
00557 }
00558
00559 if (a->opcode == RET || a->opcode == RETF) {
00560
00561 a->optype= WORD;
00562 }
00563
00564 if (a->opcode == MUL && a->args != nil
00565 && a->args->operator == ',') {
00566
00567 p="imul%";
00568 }
00569
00570
00571 if (a->oaz & ADZ) gnu_printf(".byte 0x67; ");
00572 if (a->oaz & OPZ && strchr(p, '%') == nil)
00573 gnu_printf(".byte 0x66; ");
00574
00575
00576 if (a->opcode == JMPF && a->args != nil
00577 && a->args->operator == ',') {
00578
00579 gnu_printf(".byte 0xEA; .long ");
00580 gnu_put_expression(a, a->args->right, 0);
00581 gnu_printf("; .short ");
00582 gnu_put_expression(a, a->args->left, 0);
00583 return;
00584 }
00585 if (a->opcode == JMPF && a->args != nil
00586 && a->args->operator == 'O'
00587 && a->args->left != nil
00588 && a->args->right == nil
00589 && a->args->middle != nil
00590 && a->args->middle->operator == 'B'
00591 && strcmp(a->args->middle->name, "esp") == 0
00592 ) {
00593
00594 gnu_printf(".byte 0xFF,0x6C,0x24,");
00595 gnu_put_expression(a, a->args->left, 0);
00596 return;
00597 }
00598 if (a->opcode == MOV && a->args != nil
00599 && a->args->operator == ','
00600 && a->args->left != nil
00601 && a->args->left->operator == 'W'
00602 && (strcmp(a->args->left->name, "ds") == 0
00603 || strcmp(a->args->left->name, "es") == 0)
00604 && a->args->right->operator == 'O'
00605 && a->args->right->left != nil
00606 && a->args->right->right == nil
00607 && a->args->right->middle != nil
00608 && a->args->right->middle->operator == 'B'
00609 && strcmp(a->args->right->middle->name, "esp") == 0
00610 ) {
00611
00612 gnu_printf(".byte 0x8E,0x%02X,0x24,",
00613 a->args->left->name[0] == 'd' ? 0x5C : 0x44);
00614 gnu_put_expression(a, a->args->right->left, 0);
00615 return;
00616 }
00617 if (a->opcode == MOV && a->args != nil
00618 && a->args->operator == ','
00619 && a->args->left != nil
00620 && a->args->left->operator == 'W'
00621 && (strcmp(a->args->left->name, "ds") == 0
00622 || strcmp(a->args->left->name, "es") == 0)
00623 && a->args->right->operator == '('
00624 && a->args->right->middle != nil
00625 ) {
00626
00627 gnu_printf(".byte 0x8E,0x%02X; .long ",
00628 a->args->left->name[0] == 'd' ? 0x1D : 0x05);
00629 gnu_put_expression(a, a->args->right->middle, 0);
00630 return;
00631 }
00632
00633 while (*p != 0) {
00634 if (*p == '%') {
00635 if (a->optype == BYTE) {
00636 gnu_putchar('b');
00637 } else
00638 if (a->optype == WORD) {
00639 gnu_putchar((a->oaz & OPZ) ? 'w' : 'l');
00640 } else {
00641 assert(0);
00642 }
00643 } else {
00644 gnu_putchar(*p);
00645 }
00646 p++;
00647 }
00648
00649 if (a->args != nil) {
00650 static char *aregs[] = { "al", "ax", "eax" };
00651
00652 gnu_putchar('\t');
00653 switch (a->opcode) {
00654 case IN:
00655 gnu_put_expression(a, a->args, 1);
00656 gnu_printf(", %%%s", aregs[a->optype - BYTE]);
00657 break;
00658 case OUT:
00659 gnu_printf("%%%s, ", aregs[a->optype - BYTE]);
00660 gnu_put_expression(a, a->args, 1);
00661 break;
00662 default:
00663 gnu_put_expression(a, a->args, 1);
00664 }
00665 }
00666 if (a->opcode == DOT_USE16) set_use16();
00667 if (a->opcode == DOT_USE32) set_use32();
00668 } else {
00669 fprintf(stderr,
00670 "asmconv: internal error, unknown opcode '%d'\n",
00671 a->opcode);
00672 exit(EXIT_FAILURE);
00673 }
00674 }