00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 #include <stdio.h>
00032 #include <ctype.h>
00033 #include "regexp.h"
00034
00035 #define regerror(a) error("regular expression error: %s", a)
00036
00037 int r_start, r_length;
00038
00039
00040
00041
00042
00043 #define MAGIC 0234
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082 #define END 0
00083 #define BOL 1
00084 #define EOL 2
00085 #define ANY 3
00086 #define ANYOF 4
00087 #define ANYBUT 5
00088 #define BRANCH 6
00089 #define BACK 7
00090 #define EXACTLY 8
00091 #define NOTHING 9
00092 #define STAR 10
00093 #define PLUS 11
00094 #define OPEN 20
00095
00096 #define CLOSE 30
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130 #define OP(p) (*(p))
00131 #define NEXT(p) (((*((p)+1)&0377)<<8) + *((p)+2)&0377)
00132 #define OPERAND(p) ((p) + 3)
00133
00134
00135
00136
00137
00138
00139 #ifndef CHARBITS
00140 #define UCHARAT(p) ((int)*(ushort *)(p))
00141 #else
00142 #define UCHARAT(p) ((int)*(p)&CHARBITS)
00143 #endif
00144
00145 #define FAIL(m) { regerror(m); return(NULL); }
00146 #define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?')
00147 #define META "^$.[()|?+*\\"
00148
00149
00150
00151
00152 #define HASWIDTH 01
00153 #define SIMPLE 02
00154 #define SPSTART 04
00155 #define WORST 0
00156
00157
00158
00159
00160 static ushort *regparse;
00161 static int regnpar;
00162 static ushort regdummy;
00163 static ushort *regcode;
00164 static long regsize;
00165
00166
00167
00168
00169 #ifndef STATIC
00170 #define STATIC static
00171 #endif
00172 STATIC ushort *reg();
00173 STATIC ushort *regbranch();
00174 STATIC ushort *regpiece();
00175 STATIC ushort *regatom();
00176 STATIC ushort *regnode();
00177 STATIC ushort *regnext();
00178 STATIC void regc();
00179 STATIC void reginsert();
00180 STATIC void regtail();
00181 STATIC void regoptail();
00182 STATIC int Strcspn();
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199 regexp *
00200 regcomp(exp)
00201 ushort *exp;
00202 {
00203 register regexp *r;
00204 register ushort *scan;
00205 register ushort *longest;
00206 register int len;
00207 int flags;
00208 extern char *emalloc();
00209
00210 if (exp == NULL)
00211 FAIL("NULL argument");
00212
00213
00214 regparse = exp;
00215 regnpar = 1;
00216 regsize = 0L;
00217 regcode = ®dummy;
00218 regc((ushort) MAGIC);
00219 if (reg(0, &flags) == NULL)
00220 return(NULL);
00221
00222
00223 if (regsize >= 32767L)
00224 FAIL("regexp too big");
00225
00226
00227 r = (regexp *)emalloc(sizeof(regexp) + (unsigned)regsize * sizeof(ushort));
00228
00229
00230 regparse = exp;
00231 regnpar = 1;
00232 regcode = r->program;
00233 regc((ushort) MAGIC);
00234 if (reg(0, &flags) == NULL)
00235 return(NULL);
00236
00237
00238 r->regstart = '\0';
00239 r->reganch = 0;
00240 r->regmust = NULL;
00241 r->regmlen = 0;
00242 scan = r->program+1;
00243 if (OP(regnext(scan)) == END) {
00244 scan = OPERAND(scan);
00245
00246
00247 if (OP(scan) == EXACTLY)
00248 r->regstart = *OPERAND(scan);
00249 else if (OP(scan) == BOL)
00250 r->reganch++;
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260 if (flags&SPSTART) {
00261 longest = NULL;
00262 len = 0;
00263 for (; scan != NULL; scan = regnext(scan))
00264 if (OP(scan) == EXACTLY && Strlen(OPERAND(scan)) >= len) {
00265 longest = OPERAND(scan);
00266 len = Strlen(OPERAND(scan));
00267 }
00268 r->regmust = longest;
00269 r->regmlen = len;
00270 }
00271 }
00272
00273 return(r);
00274 }
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285 static ushort *
00286 reg(paren, flagp)
00287 int paren;
00288 int *flagp;
00289 {
00290 register ushort *ret;
00291 register ushort *br;
00292 register ushort *ender;
00293 register int parno;
00294 int flags;
00295
00296 *flagp = HASWIDTH;
00297
00298
00299 if (paren) {
00300 if (regnpar >= NSUBEXP)
00301 FAIL("too many ()");
00302 parno = regnpar;
00303 regnpar++;
00304 ret = regnode(OPEN+parno);
00305 } else
00306 ret = NULL;
00307
00308
00309 br = regbranch(&flags);
00310 if (br == NULL)
00311 return(NULL);
00312 if (ret != NULL)
00313 regtail(ret, br);
00314 else
00315 ret = br;
00316 if (!(flags&HASWIDTH))
00317 *flagp &= ~HASWIDTH;
00318 *flagp |= flags&SPSTART;
00319 while (*regparse == '|') {
00320 regparse++;
00321 br = regbranch(&flags);
00322 if (br == NULL)
00323 return(NULL);
00324 regtail(ret, br);
00325 if (!(flags&HASWIDTH))
00326 *flagp &= ~HASWIDTH;
00327 *flagp |= flags&SPSTART;
00328 }
00329
00330
00331 ender = regnode((paren) ? CLOSE+parno : END);
00332 regtail(ret, ender);
00333
00334
00335 for (br = ret; br != NULL; br = regnext(br))
00336 regoptail(br, ender);
00337
00338
00339 if (paren && *regparse++ != ')') {
00340 FAIL("unmatched ()");
00341 } else if (!paren && *regparse != '\0') {
00342 if (*regparse == ')') {
00343 FAIL("unmatched ()");
00344 } else
00345 FAIL("junk on end");
00346
00347 }
00348
00349 return(ret);
00350 }
00351
00352
00353
00354
00355
00356
00357 static ushort *
00358 regbranch(flagp)
00359 int *flagp;
00360 {
00361 register ushort *ret;
00362 register ushort *chain;
00363 register ushort *latest;
00364 int flags;
00365
00366 *flagp = WORST;
00367
00368 ret = regnode(BRANCH);
00369 chain = NULL;
00370 while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
00371 latest = regpiece(&flags);
00372 if (latest == NULL)
00373 return(NULL);
00374 *flagp |= flags&HASWIDTH;
00375 if (chain == NULL)
00376 *flagp |= flags&SPSTART;
00377 else
00378 regtail(chain, latest);
00379 chain = latest;
00380 }
00381 if (chain == NULL)
00382 (void) regnode(NOTHING);
00383
00384 return(ret);
00385 }
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396 static ushort *
00397 regpiece(flagp)
00398 int *flagp;
00399 {
00400 register ushort *ret;
00401 register ushort op;
00402 register ushort *next;
00403 int flags;
00404
00405 ret = regatom(&flags);
00406 if (ret == NULL)
00407 return(NULL);
00408
00409 op = *regparse;
00410 if (!ISMULT(op)) {
00411 *flagp = flags;
00412 return(ret);
00413 }
00414
00415 if (!(flags&HASWIDTH) && op != '?')
00416 FAIL("*+ operand could be empty");
00417 *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
00418
00419 if (op == '*' && (flags&SIMPLE))
00420 reginsert(STAR, ret);
00421 else if (op == '*') {
00422
00423 reginsert(BRANCH, ret);
00424 regoptail(ret, regnode(BACK));
00425 regoptail(ret, ret);
00426 regtail(ret, regnode(BRANCH));
00427 regtail(ret, regnode(NOTHING));
00428 } else if (op == '+' && (flags&SIMPLE))
00429 reginsert(PLUS, ret);
00430 else if (op == '+') {
00431
00432 next = regnode(BRANCH);
00433 regtail(ret, next);
00434 regtail(regnode(BACK), ret);
00435 regtail(next, regnode(BRANCH));
00436 regtail(ret, regnode(NOTHING));
00437 } else if (op == '?') {
00438
00439 reginsert(BRANCH, ret);
00440 regtail(ret, regnode(BRANCH));
00441 next = regnode(NOTHING);
00442 regtail(ret, next);
00443 regoptail(ret, next);
00444 }
00445 regparse++;
00446 if (ISMULT(*regparse))
00447 FAIL("nested *?+");
00448
00449 return(ret);
00450 }
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460 static ushort *
00461 regatom(flagp)
00462 int *flagp;
00463 {
00464 register ushort *ret;
00465 int flags;
00466 ushort c, d;
00467
00468 *flagp = WORST;
00469
00470 switch ((int) *regparse++) {
00471 case '^':
00472 ret = regnode(BOL);
00473 break;
00474 case '$':
00475 ret = regnode(EOL);
00476 break;
00477 case '.':
00478 ret = regnode(ANY);
00479 *flagp |= HASWIDTH|SIMPLE;
00480 break;
00481 case '[': {
00482 register int class;
00483 register int classend;
00484 register int c;
00485
00486 if (*regparse == '^') {
00487 ret = regnode(ANYBUT);
00488 regparse++;
00489 } else
00490 ret = regnode(ANYOF);
00491 if (*regparse == ']' || *regparse == '-')
00492 regc(*regparse++);
00493 while (*regparse != '\0' && *regparse != ']') {
00494 if (*regparse == '-') {
00495 regparse++;
00496 if (*regparse == ']' || *regparse == '\0')
00497 regc((ushort) '-');
00498 else {
00499 class = UCHARAT(regparse-2)+1;
00500 classend = UCHARAT(regparse);
00501 if (class > classend+1)
00502 FAIL("invalid [] range");
00503 regc((ushort) 0xffff);
00504 regc((ushort) class);
00505 regc((ushort) classend);
00506 regparse++;
00507 }
00508 } else {
00509 if ((c = *regparse++) == '\\') {
00510 if ((c = *regparse++) == 'n')
00511 c = '\n';
00512 else if (c == 't')
00513 c = '\t';
00514 }
00515 regc(c);
00516 }
00517 }
00518 regc((ushort) 0);
00519 if (*regparse != ']')
00520 FAIL("unmatched []");
00521 regparse++;
00522 *flagp |= HASWIDTH|SIMPLE;
00523 }
00524 break;
00525 case '(':
00526 ret = reg(1, &flags);
00527 if (ret == NULL)
00528 return(NULL);
00529 *flagp |= flags&(HASWIDTH|SPSTART);
00530 break;
00531 case '\0':
00532 case '|':
00533 case ')':
00534 FAIL("internal urp");
00535 break;
00536 case '?':
00537 case '+':
00538 case '*':
00539 FAIL("?+* follows nothing");
00540 break;
00541 case '\\':
00542 if (*regparse == '\0')
00543 FAIL("trailing \\");
00544 ret = regnode(EXACTLY);
00545
00546
00547
00548 c = *regparse++;
00549 if (c == 'n')
00550 c = '\n';
00551 else if (c == 't')
00552 c = '\t';
00553 else if (c == 'f')
00554 c = '\f';
00555 else if (c == '\r')
00556 c = '\r';
00557 else if (c == '\b')
00558 c = '\b';
00559 else if (IsDigid(c)) {
00560 d = c - '0';
00561 if (IsDigid(*regparse)) {
00562 d = d * 8 + *regparse++ - '0';
00563 if (IsDigid(*regparse))
00564 d = d * 8 + *regparse++ - '0';
00565 }
00566 c = d;
00567 }
00568 regc(c);
00569 regc((ushort) 0);
00570 *flagp |= HASWIDTH|SIMPLE;
00571 break;
00572 default: {
00573 register int len;
00574 register char ender;
00575
00576 regparse--;
00577 len = Strcspn(regparse, META);
00578 if (len <= 0)
00579 FAIL("internal disaster");
00580 ender = *(regparse+len);
00581 if (len > 1 && ISMULT(ender))
00582 len--;
00583 *flagp |= HASWIDTH;
00584 if (len == 1)
00585 *flagp |= SIMPLE;
00586 ret = regnode(EXACTLY);
00587 while (len > 0) {
00588 regc(*regparse++);
00589 len--;
00590 }
00591 regc((ushort) 0);
00592 }
00593 break;
00594 }
00595
00596 return(ret);
00597 }
00598
00599 IsDigid(c) ushort c;
00600 {
00601 return '0' <= c && c <= '9';
00602 }
00603
00604
00605
00606
00607 static ushort *
00608 regnode(op)
00609 ushort op;
00610 {
00611 register ushort *ret;
00612 register ushort *ptr;
00613
00614 ret = regcode;
00615 if (ret == ®dummy) {
00616 regsize += 3;
00617 return(ret);
00618 }
00619
00620 ptr = ret;
00621 *ptr++ = op;
00622 *ptr++ = '\0';
00623 *ptr++ = '\0';
00624 regcode = ptr;
00625
00626 return(ret);
00627 }
00628
00629
00630
00631
00632 static void
00633 regc(b)
00634 ushort b;
00635 {
00636 if (regcode != ®dummy)
00637 *regcode++ = b;
00638 else
00639 regsize++;
00640 }
00641
00642
00643
00644
00645
00646
00647 static void
00648 reginsert(op, opnd)
00649 ushort op;
00650 ushort *opnd;
00651 {
00652 register ushort *src;
00653 register ushort *dst;
00654 register ushort *place;
00655
00656 if (regcode == ®dummy) {
00657 regsize += 3;
00658 return;
00659 }
00660
00661 src = regcode;
00662 regcode += 3;
00663 dst = regcode;
00664 while (src > opnd)
00665 *--dst = *--src;
00666
00667 place = opnd;
00668 *place++ = op;
00669 *place++ = '\0';
00670 *place++ = '\0';
00671 }
00672
00673
00674
00675
00676 static void
00677 regtail(p, val)
00678 ushort *p;
00679 ushort *val;
00680 {
00681 register ushort *scan;
00682 register ushort *temp;
00683 register int offset;
00684
00685 if (p == ®dummy)
00686 return;
00687
00688
00689 scan = p;
00690 for (;;) {
00691 temp = regnext(scan);
00692 if (temp == NULL)
00693 break;
00694 scan = temp;
00695 }
00696
00697 if (OP(scan) == BACK)
00698 offset = scan - val;
00699 else
00700 offset = val - scan;
00701 *(scan+1) = (offset>>8)&0377;
00702 *(scan+2) = offset&0377;
00703 }
00704
00705
00706
00707
00708 static void
00709 regoptail(p, val)
00710 ushort *p;
00711 ushort *val;
00712 {
00713
00714 if (p == NULL || p == ®dummy || OP(p) != BRANCH)
00715 return;
00716 regtail(OPERAND(p), val);
00717 }
00718
00719
00720
00721
00722
00723
00724
00725
00726 static ushort *reginput;
00727 static ushort *regbol;
00728 static ushort **regstartp;
00729 static ushort **regendp;
00730
00731
00732
00733
00734 STATIC int regtry();
00735 STATIC int regmatch();
00736 STATIC int regrepeat();
00737
00738 #ifdef DEBUG
00739 int regnarrate = 0;
00740 void regdump();
00741 STATIC char *regprop();
00742 #endif
00743
00744
00745
00746
00747 int
00748 regexec(prog, string, bolflag)
00749 register regexp *prog;
00750 register ushort *string;
00751 int bolflag;
00752 {
00753 register ushort *s;
00754 extern ushort *Strchr();
00755
00756
00757 if (prog == NULL || string == NULL) {
00758 regerror("NULL parameter");
00759 return(0);
00760 }
00761
00762
00763 if (prog->program[0] != MAGIC) {
00764 regerror("corrupted program");
00765 return(0);
00766 }
00767
00768
00769 if (prog->regmust != NULL) {
00770 s = string;
00771 while ((s = Strchr(s, prog->regmust[0])) != NULL) {
00772 if (Strncmp(s, prog->regmust, prog->regmlen) == 0)
00773 break;
00774 s++;
00775 }
00776 if (s == NULL)
00777 return(0);
00778 }
00779
00780
00781 if(bolflag)
00782 regbol = string;
00783 else
00784 regbol = NULL;
00785
00786
00787 if (prog->reganch)
00788 return(regtry(prog, string));
00789
00790
00791 s = string;
00792 if (prog->regstart != '\0') {
00793
00794 while ((s = Strchr(s, prog->regstart)) != NULL) {
00795 if (regtry(prog, s))
00796 return(1);
00797 s++;
00798 }
00799 }
00800 else
00801
00802 do {
00803 if (regtry(prog, s))
00804 return(1);
00805 } while (*s++ != '\0');
00806
00807
00808 return(0);
00809 }
00810
00811
00812
00813
00814 static int
00815 regtry(prog, string)
00816 regexp *prog;
00817 ushort *string;
00818 {
00819 register int i;
00820 register ushort **sp;
00821 register ushort **ep;
00822
00823 reginput = string;
00824 regstartp = prog->startp;
00825 regendp = prog->endp;
00826
00827 sp = prog->startp;
00828 ep = prog->endp;
00829 for (i = NSUBEXP; i > 0; i--) {
00830 *sp++ = NULL;
00831 *ep++ = NULL;
00832 }
00833 if (regmatch(prog->program + 1)) {
00834 prog->startp[0] = string;
00835 prog->endp[0] = reginput;
00836 return(1);
00837 } else
00838 return(0);
00839 }
00840
00841
00842
00843
00844
00845
00846
00847
00848
00849
00850
00851 static int
00852 regmatch(prog)
00853 ushort *prog;
00854 {
00855 register ushort *scan;
00856 ushort *next;
00857 extern ushort *Strchr();
00858
00859 scan = prog;
00860 #ifdef DEBUG
00861 if (scan != NULL && regnarrate)
00862 fprintf(stderr, "%s(\n", regprop(scan));
00863 #endif
00864 while (scan != NULL) {
00865 #ifdef DEBUG
00866 if (regnarrate)
00867 fprintf(stderr, "%s...\n", regprop(scan));
00868 #endif
00869 next = regnext(scan);
00870
00871 switch ((int) OP(scan)) {
00872 case BOL:
00873 if (reginput != regbol)
00874 return(0);
00875 break;
00876 case EOL:
00877 if (*reginput != '\0')
00878 return(0);
00879 break;
00880 case ANY:
00881 if (*reginput == '\0')
00882 return(0);
00883 reginput++;
00884 break;
00885 case EXACTLY: {
00886 register int len;
00887 register ushort *opnd;
00888
00889 opnd = OPERAND(scan);
00890
00891 if (*opnd != *reginput)
00892 return(0);
00893 len = Strlen(opnd);
00894 if (len > 1 && Strncmp(opnd, reginput, len) != 0)
00895 return(0);
00896 reginput += len;
00897 }
00898 break;
00899 case ANYOF:
00900 if (*reginput == '\0' || isthere(OPERAND(scan), *reginput) == 0)
00901 return(0);
00902 reginput++;
00903 break;
00904 case ANYBUT:
00905 if (*reginput == '\0' || isthere(OPERAND(scan), *reginput) != 0)
00906 return(0);
00907 reginput++;
00908 break;
00909 case NOTHING:
00910 break;
00911 case BACK:
00912 break;
00913 case OPEN+1:
00914 case OPEN+2:
00915 case OPEN+3:
00916 case OPEN+4:
00917 case OPEN+5:
00918 case OPEN+6:
00919 case OPEN+7:
00920 case OPEN+8:
00921 case OPEN+9: {
00922 register int no;
00923 register ushort *save;
00924
00925 no = OP(scan) - OPEN;
00926 save = reginput;
00927
00928 if (regmatch(next)) {
00929
00930
00931
00932
00933
00934 if (regstartp[no] == NULL)
00935 regstartp[no] = save;
00936 return(1);
00937 } else
00938 return(0);
00939 }
00940 break;
00941 case CLOSE+1:
00942 case CLOSE+2:
00943 case CLOSE+3:
00944 case CLOSE+4:
00945 case CLOSE+5:
00946 case CLOSE+6:
00947 case CLOSE+7:
00948 case CLOSE+8:
00949 case CLOSE+9: {
00950 register int no;
00951 register ushort *save;
00952
00953 no = OP(scan) - CLOSE;
00954 save = reginput;
00955
00956 if (regmatch(next)) {
00957
00958
00959
00960
00961
00962 if (regendp[no] == NULL)
00963 regendp[no] = save;
00964 return(1);
00965 } else
00966 return(0);
00967 }
00968 break;
00969 case BRANCH: {
00970 register ushort *save;
00971
00972 if (OP(next) != BRANCH)
00973 next = OPERAND(scan);
00974 else {
00975 do {
00976 save = reginput;
00977 if (regmatch(OPERAND(scan)))
00978 return(1);
00979 reginput = save;
00980 scan = regnext(scan);
00981 } while (scan != NULL && OP(scan) == BRANCH);
00982 return(0);
00983
00984 }
00985 }
00986 break;
00987 case STAR:
00988 case PLUS: {
00989 register ushort nextch;
00990 register int no;
00991 register ushort *save;
00992 register int min;
00993
00994
00995
00996
00997
00998 nextch = '\0';
00999 if (OP(next) == EXACTLY)
01000 nextch = *OPERAND(next);
01001 min = (OP(scan) == STAR) ? 0 : 1;
01002 save = reginput;
01003 no = regrepeat(OPERAND(scan));
01004 while (no >= min) {
01005
01006 if (nextch == '\0' || *reginput == nextch)
01007 if (regmatch(next))
01008 return(1);
01009
01010 no--;
01011 reginput = save + no;
01012 }
01013 return(0);
01014 }
01015 break;
01016 case END:
01017 return(1);
01018 break;
01019 default:
01020 regerror("memory corruption");
01021 return(0);
01022 break;
01023 }
01024
01025 scan = next;
01026 }
01027
01028
01029
01030
01031
01032 regerror("corrupted pointers");
01033 return(0);
01034 }
01035
01036
01037
01038
01039 static int
01040 regrepeat(p)
01041 ushort *p;
01042 {
01043 register int count = 0;
01044 register ushort *scan;
01045 register ushort *opnd;
01046
01047 scan = reginput;
01048 opnd = OPERAND(p);
01049 switch (OP(p)) {
01050 case ANY:
01051 count = Strlen(scan);
01052 scan += count;
01053 break;
01054 case EXACTLY:
01055 while (*opnd == *scan) {
01056 count++;
01057 scan++;
01058 }
01059 break;
01060 case ANYOF:
01061 while (*scan != '\0' && isthere(opnd, *scan) != 0) {
01062 count++;
01063 scan++;
01064 }
01065 break;
01066 case ANYBUT:
01067 while (*scan != '\0' && isthere(opnd, *scan) == 0) {
01068 count++;
01069 scan++;
01070 }
01071 break;
01072 default:
01073 regerror("internal foulup");
01074 count = 0;
01075 break;
01076 }
01077 reginput = scan;
01078
01079 return(count);
01080 }
01081
01082
01083
01084
01085 static ushort *
01086 regnext(p)
01087 register ushort *p;
01088 {
01089 register int offset;
01090
01091 if (p == ®dummy)
01092 return(NULL);
01093
01094 offset = NEXT(p);
01095 if (offset == 0)
01096 return(NULL);
01097
01098 if (OP(p) == BACK)
01099 return(p-offset);
01100 else
01101 return(p+offset);
01102 }
01103
01104 #ifdef DEBUG
01105
01106 STATIC char *regprop();
01107
01108
01109
01110
01111 void
01112 regdump(r)
01113 regexp *r;
01114 {
01115 register ushort *s;
01116 register ushort op = EXACTLY;
01117 register ushort *next;
01118
01119
01120 s = r->program + 1;
01121 while (op != END) {
01122 op = OP(s);
01123 printf("%2d%s", s-r->program, regprop(s));
01124 next = regnext(s);
01125 if (next == NULL)
01126 printf("(0)");
01127 else
01128 printf("(%d)", (s-r->program)+(next-s));
01129 s += 3;
01130 if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
01131
01132 while (*s != '\0') {
01133 if (*s == 0xffff) {
01134 kputchar(*++s); putchar('-'); ++s;
01135 }
01136 kputchar(*s++);
01137 }
01138 s++;
01139 }
01140 putchar('\n');
01141 }
01142
01143
01144 if (r->regstart != '\0') {
01145 fputs("start `", stdout); kputchar(r->regstart); fputs("' ", stdout);
01146 }
01147 if (r->reganch)
01148 printf("anchored ");
01149 if (r->regmust != NULL) {
01150 fputs("must have \"", stdout); kputs(r->regmust); putchar('"');
01151 }
01152 printf("\n");
01153 }
01154
01155 kputchar(c) ushort c;
01156 {
01157 if (c & 0xff00)
01158 putchar(c >> 8);
01159 putchar(c & 0xff);
01160 }
01161
01162 kputs(s) ushort *s;
01163 {
01164 while (*s)
01165 kputchar(*s++);
01166 }
01167
01168
01169
01170
01171 static char *
01172 regprop(op)
01173 ushort *op;
01174 {
01175 register char *p;
01176 static char buf[50];
01177
01178 (void) strcpy(buf, ":");
01179
01180 switch ((int) OP(op)) {
01181 case BOL:
01182 p = "BOL";
01183 break;
01184 case EOL:
01185 p = "EOL";
01186 break;
01187 case ANY:
01188 p = "ANY";
01189 break;
01190 case ANYOF:
01191 p = "ANYOF";
01192 break;
01193 case ANYBUT:
01194 p = "ANYBUT";
01195 break;
01196 case BRANCH:
01197 p = "BRANCH";
01198 break;
01199 case EXACTLY:
01200 p = "EXACTLY";
01201 break;
01202 case NOTHING:
01203 p = "NOTHING";
01204 break;
01205 case BACK:
01206 p = "BACK";
01207 break;
01208 case END:
01209 p = "END";
01210 break;
01211 case OPEN+1:
01212 case OPEN+2:
01213 case OPEN+3:
01214 case OPEN+4:
01215 case OPEN+5:
01216 case OPEN+6:
01217 case OPEN+7:
01218 case OPEN+8:
01219 case OPEN+9:
01220 sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN);
01221 p = NULL;
01222 break;
01223 case CLOSE+1:
01224 case CLOSE+2:
01225 case CLOSE+3:
01226 case CLOSE+4:
01227 case CLOSE+5:
01228 case CLOSE+6:
01229 case CLOSE+7:
01230 case CLOSE+8:
01231 case CLOSE+9:
01232 sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE);
01233 p = NULL;
01234 break;
01235 case STAR:
01236 p = "STAR";
01237 break;
01238 case PLUS:
01239 p = "PLUS";
01240 break;
01241 default:
01242 regerror("corrupted opcode");
01243 break;
01244 }
01245 if (p != NULL)
01246 (void) strcat(buf, p);
01247 return(buf);
01248 }
01249 #endif
01250
01251
01252
01253
01254
01255
01256
01257
01258
01259
01260
01261
01262 static int
01263 Strcspn(s1, s2)
01264 ushort *s1;
01265 unsigned char *s2;
01266 {
01267 register ushort *scan1;
01268 register unsigned char *scan2;
01269 register int count;
01270
01271 count = 0;
01272 for (scan1 = s1; *scan1 != '\0'; scan1++) {
01273 for (scan2 = s2; *scan2 != '\0';)
01274 if (*scan1 == *scan2++)
01275 return(count);
01276 count++;
01277 }
01278 return(count);
01279 }
01280
01281 isthere(s, c) ushort *s, c;
01282 {
01283 register unsigned int c1, c2;
01284
01285 for ( ; *s; s++) {
01286 if (*s == 0xffff) {
01287 c1 = *++s; c2 = *++s;
01288 if (c1 <= c && c <= c2)
01289 return 1;
01290 }
01291 else if (*s == c)
01292 return 1;
01293 }
01294 return 0;
01295 }
01296
01297 ushort *
01298 Strchr(s, c) ushort *s, c;
01299 {
01300 for ( ; *s; s++)
01301 if (*s == c)
01302 return s;
01303 return NULL;
01304 }
01305
01306 Strncmp(s, t, n) ushort *s, *t;
01307 {
01308 for ( ; --n > 0 && *s == *t; s++, t++)
01309 ;
01310 return *s - *t;
01311 }
01312
01313 Strlen(s) ushort *s;
01314 {
01315 int i;
01316
01317 for (i = 0; *s; i++, s++)
01318 ;
01319 return i;
01320 }
01321
01322 ushort kbuf[BUFSIZ];
01323
01324 char *
01325 mkpat(s) char *s;
01326 {
01327 sjtok(kbuf, s);
01328 return (char *) regcomp(kbuf);
01329 }
01330
01331 match(p, s) regexp *p; char *s;
01332 {
01333 register int i;
01334
01335 sjtok(kbuf, s);
01336 if (i = regexec(p, kbuf, 1)) {
01337 r_start = p->startp[0] - kbuf + 1;
01338 r_length = p->endp[0] - p->startp[0];
01339 }
01340 else
01341 r_start = r_length = 0;
01342 return i;
01343 }
01344
01345 sjtok(s, t) ushort *s; unsigned char *t;
01346 {
01347 register c;
01348
01349 for ( ; *t; t++) {
01350 if (isKanji(c = *t))
01351 c = (c << 8) | (*++t & 0xff);
01352 *s++ = c;
01353 }
01354 *s = 0;
01355 }
01356
01357 ktosj(s, t) unsigned char *s; ushort *t;
01358 {
01359 register c;
01360
01361 while (*t) {
01362 if ((c = *t++) & 0xff00)
01363 *s++ = c >> 8;
01364 *s++ = c & 0xff;
01365 }
01366 *s = '\0';
01367 }
01368
01369 regsub(dst, exp, src, pat, pos) ushort *dst, *src, *pat; regexp *exp;
01370 {
01371 register int c, i;
01372 register ushort *loc1, *loc2, *s, *t, *u;
01373 register int n = 0;
01374
01375 if (exp->program[0] != MAGIC) {
01376 regerror("damaged regexp fed to regsub");
01377 return 0;
01378 }
01379 while (*src) {
01380 next:
01381 if (regexec(exp, src, 1) == 0)
01382 break;
01383 loc1 = exp->startp[0]; loc2 = exp->endp[0];
01384 if (pos-- > 1) {
01385 while (src < loc2)
01386 *dst++ = *src++;
01387 goto next;
01388 }
01389 while (src < loc1)
01390 *dst++ = *src++;
01391 for (s = pat; c = *s++; ) {
01392 if (c == '&')
01393 i = 0;
01394 else if (c == '\\' && '0' <= *s && *s <= '9')
01395 i = *s++ - '0';
01396 else {
01397 if (c == '\\' && (*s == '\\' || *s == '&'))
01398 c = *s++;
01399 *dst++ = c;
01400 continue;
01401 }
01402 if ((t = exp->startp[i]) != NULL
01403 && (u = exp->endp[i]) != NULL) {
01404 while (t < u)
01405 *dst++ = *t++;
01406 }
01407 }
01408 src = loc2;
01409 n++;
01410 if (pos == 0)
01411 break;
01412 }
01413 while (*src)
01414 *dst++ = *src++;
01415 *dst++ = 0;
01416 return n;
01417 }
01418
01419 static ushort kbuf1[BUFSIZ], kbuf2[BUFSIZ];
01420
01421 Sub(u, exp, str, s, t, pos) char *exp; char *s, *t, *u;
01422 {
01423 register int i;
01424 regexp *r;
01425
01426 if (str) {
01427 sjtok(kbuf, exp);
01428 r = regcomp(kbuf);
01429 }
01430 else
01431 r = (regexp *) exp;
01432 sjtok(kbuf, s);
01433 sjtok(kbuf1, t);
01434 i = regsub(kbuf2, r, kbuf1, kbuf, pos);
01435 ktosj(u, kbuf2);
01436 if (str)
01437 sfree(r);
01438
01439 return i;
01440 }