join.c

Go to the documentation of this file.
00001 /* join - relation data base operator   Author:  Saeko Hirabayashi */
00002 
00003 /* Written by Saeko Hirabayashi, 1989.
00004  * 1992-01-28 Modified by Kouichi Hirabayashi to add some POSIX1003.2 options.
00005  *
00006  * This a free program.
00007  */
00008 
00009 #include <string.h>
00010 #include <stdio.h>
00011 
00012 #define MAXFLD  200             /* maximum # of fields to accept */
00013 
00014 _PROTOTYPE(void main, (int argc, char **argv));
00015 _PROTOTYPE(void error, (char *s, char *t));
00016 _PROTOTYPE(void usage, (void));
00017 _PROTOTYPE(void match, (void));
00018 _PROTOTYPE(void f1_only, (void));
00019 _PROTOTYPE(void f2_only, (void));
00020 _PROTOTYPE(void output, (int flag));
00021 _PROTOTYPE(void outfld, (int file));
00022 _PROTOTYPE(void outputf, (int flag));
00023 _PROTOTYPE(int compare, (void));
00024 _PROTOTYPE(int get1, (void));
00025 _PROTOTYPE(int get2, (int back));
00026 _PROTOTYPE(int getrec, (int file));
00027 _PROTOTYPE(int split, (int file));
00028 _PROTOTYPE(int atoi, (char *str));
00029 _PROTOTYPE(int exit, (int val));
00030 _PROTOTYPE(FILE * efopen, (char *file, char *mode));
00031 _PROTOTYPE(void (*outfun), (int file)); /* output func: output() or outputf()*/
00032 
00033 #define F1      1
00034 #define F2      2
00035 #define SEP     (sep ? sep : ' ')
00036 
00037 FILE *fp[2];                    /* file pointer for file1 and file2 */
00038 long head;                      /* head of the current (same)key group of the
00039                                  * file2 */
00040 
00041 char buf[2][BUFSIZ];            /* input buffer for file1 and file2 */
00042 char *fld[2][MAXFLD];           /* field vector for file1 and file2 */
00043 int nfld[2];                    /* # of fields for file1 and file2 */
00044 
00045 int kpos[2];                    /* key field position for file1 and file2
00046                                  * (from 0) */
00047 char oldkey[BUFSIZ];            /* previous key of the file1 */
00048 
00049 struct {                        /* output list by -o option */
00050   int o_file;                   /* file #: 0 or 1 */
00051   int o_field;                  /* field #: 0, 1, 2, .. */
00052 } olist[MAXFLD];
00053 int nout;                       /* # of output filed */
00054 
00055 int aflag;                      /* n for '-an': F1 or F2 or both */
00056 int vflag;                      /* n for '-vn': F1 or F2 or both */
00057 char *es;                       /* s for '-e s' */
00058 char sep;                       /* c for -tc: filed separator */
00059 char *cmd;                      /* name of this program */
00060 
00061 void main(argc, argv)
00062 int argc;
00063 char **argv;
00064 {
00065   register char *s;
00066   int c, i, j;
00067 
00068   cmd = argv[0];
00069   outfun = output;              /* default output form */
00070 
00071   while (--argc > 0 && (*++argv)[0] == '-' && (*argv)[1]) {
00072         /* "-" is a file name (stdin) */
00073         s = argv[0] + 1;
00074         if ((c = *s) == '-' && !s[1]) {
00075                 ++argv;
00076                 --argc;
00077                 break;          /* -- */
00078         }
00079         if (*++s == '\0') {
00080                 s = *++argv;
00081                 --argc;
00082         }
00083         switch (c) {
00084             case 'a':           /* add unpairable line to output */
00085                 vflag = 0;
00086                 switch (*s) {
00087                     case '1':   aflag |= F1;    break;
00088                     case '2':   aflag |= F2;    break;
00089                     default:    aflag |= (F1 | F2);     break;
00090                 }
00091                 break;
00092 
00093             case 'e':           /* replace empty field by es */
00094                 es = s;
00095                 break;
00096 
00097             case 'j':           /* key field (obsolute) */
00098                 c = *s++;
00099                 if (*s == '\0') {
00100                         s = *++argv;
00101                         --argc;
00102                 }
00103 
00104             case '1':           /* key field of file1 */
00105             case '2':           /* key field of file2 */
00106                 i = atoi(s) - 1;
00107 
00108                 switch (c) {
00109                     case '1':   kpos[0] = i;    break;
00110                     case '2':   kpos[1] = i;    break;
00111                     default:    kpos[0] = kpos[1] = i;
00112                                 break;
00113                 }
00114                 break;
00115 
00116             case 'o':           /* specify output format */
00117                 do {
00118                         i = j = 0;
00119                         sscanf(s, "%d.%d", &i, &j);
00120                         if (i < 1 || j < 1 || i > 2) usage();
00121                         olist[nout].o_file = i - 1;
00122                         olist[nout].o_field = j - 1;
00123                         nout++;
00124                         if ((s = strchr(s, ',')) != (char *) 0)
00125                                 s++;
00126                         else {
00127                                 s = *++argv;
00128                                 --argc;
00129                         }
00130                 } while (argc > 2 && *s != '-');
00131                 ++argc;
00132                 --argv;         /* compensation */
00133                 outfun = outputf;
00134                 break;
00135 
00136             case 't':           /* tab char */
00137                 sep = *s;
00138                 break;
00139 
00140             case 'v':           /* output unpairable line only */
00141                 aflag = 0;
00142                 switch (*s) {
00143                     case '1':   vflag |= F1;    break;
00144                     case '2':   vflag |= F2;    break;
00145                     default:    vflag |= (F1 | F2);     break;
00146                 }
00147                 break;
00148 
00149             default:    usage();
00150         }
00151   }
00152   if (argc != 2) usage();
00153 
00154   fp[0] = strcmp(argv[0], "-") ? efopen(argv[0], "r") : stdin;
00155   fp[1] = efopen(argv[1], "r");
00156 
00157   nfld[0] = get1();             /* read file1 */
00158   nfld[1] = get2(0);            /* read file2 */
00159 
00160   while (nfld[0] || nfld[1]) {
00161         if ((i = compare()) == 0)
00162                 match();
00163         else if (i < 0)
00164                 f1_only();
00165         else
00166                 f2_only();
00167   }
00168   fflush(stdout);
00169 
00170   exit(0);
00171 }
00172 
00173 void usage()
00174 {
00175   fprintf(stderr,
00176     "Usage: %s [-an|-vn] [-e str] [-o list] [-tc] [-1 f] [-2 f] file1 file2\n",
00177     cmd);
00178   exit(1);
00179 }
00180 
00181 int compare()
00182 {                               /* compare key field */
00183   register int r;
00184 
00185   if (nfld[1] == 0)             /* file2 EOF */
00186         r = -1;
00187   else if (nfld[0] == 0)        /* file1 EOF */
00188         r = 1;
00189   else {
00190         if (nfld[0] <= kpos[0])
00191                 error("missing key field in file1", (char *) 0);
00192         if (nfld[1] <= kpos[1])
00193                 error("missing key field in file2", (char *) 0);
00194 
00195         r = strcmp(fld[0][kpos[0]], fld[1][kpos[1]]);
00196   }
00197   return r;
00198 }
00199 
00200 void match()
00201 {
00202   long p;
00203 
00204   if (!vflag) (*outfun) (F1 | F2);
00205 
00206   p = ftell(fp[1]);
00207   nfld[1] = get2(0);            /* check key order */
00208   if (nfld[1] == 0 || strcmp(fld[0][kpos[0]], fld[1][kpos[1]])) {
00209         nfld[0] = get1();
00210         if (strcmp(fld[0][kpos[0]], oldkey) == 0) {
00211                 fseek(fp[1], head, 0);  /* re-do from head */
00212                 nfld[1] = get2(1);      /* don't check key order */
00213         } else
00214                 head = p;       /* mark here */
00215   }
00216 }
00217 
00218 void f1_only()
00219 {
00220   if ((aflag & F1) || (vflag & F1)) (*outfun) (F1);
00221   nfld[0] = get1();
00222 }
00223 
00224 void f2_only()
00225 {
00226   if ((aflag & F2) || (vflag & F2)) (*outfun) (F2);
00227   head = ftell(fp[1]);          /* mark */
00228   nfld[1] = get2(0);            /* check key order */
00229 }
00230 
00231 void output(f)
00232 {                               /* default output form */
00233   if (f & F1)
00234         fputs(fld[0][kpos[0]], stdout);
00235   else
00236         fputs(fld[1][kpos[1]], stdout);
00237   if (f & F1) outfld(0);
00238   if (f & F2) outfld(1);
00239   fputc('\n', stdout);
00240 }
00241 
00242 void outfld(file)
00243 {                               /* output all fields except key_field */
00244   register int i;
00245   int k, n;
00246 
00247   k = kpos[file];
00248   n = nfld[file];
00249   for (i = 0; i < n; i++)
00250         if (i != k) {
00251                 fputc(SEP, stdout);
00252                 fputs(fld[file][i], stdout);
00253         }
00254 }
00255 
00256 void outputf(f)
00257 {                               /* output by '-o list' */
00258   int i, j, k;
00259   register char *s;
00260 
00261   for (i = k = 0; i < nout; i++) {
00262         j = olist[i].o_file;
00263         if ((f & (j + 1)) && (olist[i].o_field < nfld[j]))
00264                 s = fld[j][olist[i].o_field];
00265         else
00266                 s = es;
00267         if (s) {
00268                 if (k++) fputc(SEP, stdout);
00269                 fputs(s, stdout);
00270         }
00271   }
00272   fputc('\n', stdout);
00273 }
00274 
00275 int get1()
00276 {                               /* read file1 */
00277   int r;
00278   static char oldkey1[BUFSIZ];
00279 
00280   if (fld[0][kpos[0]]) {
00281         strcpy(oldkey, fld[0][kpos[0]]);  /* save previous key for control */
00282   }
00283   r = getrec(0);
00284 
00285   if (r) {
00286         if (strcmp(oldkey1, fld[0][kpos[0]]) > 0)
00287               error("file1 is not sorted", (char *) 0);
00288         strcpy(oldkey1, fld[0][kpos[0]]);  /* save prev key for sort check */
00289   }
00290   return r;
00291 }
00292 
00293 int get2(back)
00294 {                               /* read file2 */
00295   static char oldkey2[BUFSIZ];
00296   int r;
00297 
00298   r = getrec(1);
00299 
00300   if (r) {
00301         if (!back && strcmp(oldkey2, fld[1][kpos[1]]) > 0)
00302               error("file2 is not sorted", (char *) 0);
00303         strcpy(oldkey2, fld[1][kpos[1]]);  /* save prev key for sort check */
00304   }
00305   return r;
00306 }
00307 
00308 int getrec(file)
00309 {                               /* read one line to split it */
00310   if (fgets(buf[file], BUFSIZ, fp[file]) == (char *) 0)
00311         *buf[file] = '\0';
00312   else if (*buf[file] == '\n' || *buf[file] == '\r')
00313         error("null line in file%s", file ? "1" : "0");
00314 
00315   return split(file);
00316 }
00317 
00318 int split(file)
00319 {                               /* setup fields */
00320   register int n;
00321   register char *s, *t;
00322 
00323   for (n = 0, s = buf[file]; *s && *s != '\n' && *s != '\r';) {
00324         if (sep) {
00325                 for (t = s; *s && *s != sep && *s != '\n' && *s != '\r'; s++);
00326         } else {
00327                 while (*s == ' ' || *s == '\t')
00328                         s++;    /* skip leading white space */
00329                 for (t = s; *s && *s != ' ' && *s != '\t'
00330                      && *s != '\n' && *s != '\r'; s++);
00331                 /* We will treat trailing white space as NULL field */
00332         }
00333         if (*s) *s++ = '\0';
00334         fld[file][n++] = t;
00335         if (n == MAXFLD) error("too many filed in file%s", file ? "1" : "0");
00336   }
00337   fld[file][n] = (char *) 0;
00338 
00339   return n;
00340 }
00341 
00342 FILE *efopen(file, mode)
00343 char *file, *mode;
00344 {
00345   FILE *fp;
00346 
00347   if ((fp = fopen(file, mode)) == (FILE *) 0) error("can't open %s", file);
00348 
00349   return fp;
00350 }
00351 
00352 void error(s, t)
00353 char *s, *t;
00354 {
00355   fprintf(stderr, "%s: ", cmd);
00356   fprintf(stderr, s, t);
00357   fprintf(stderr, "\n");
00358 
00359   exit(1);
00360 }

Generated on Fri Apr 14 22:57:08 2006 for minix by  doxygen 1.4.6