uniq.c

Go to the documentation of this file.
00001 /* uniq - compact repeated lines                Author: John Woods */
00002 /* Uniq [-udc] [-n] [+n] [infile [outfile]]
00003  *
00004  *      Written 02/08/86 by John Woods, placed into public domain.  Enjoy.
00005  *
00006  */
00007 
00008 /* If the symbol WRITE_ERROR is defined, uniq will exit(1) if it gets a
00009  * write error on the output.  This is not (of course) how V7 uniq does it,
00010  * so undefine the symbol if you want to lose your output to a full disk
00011  */
00012 
00013 #define WRITE_ERROR 1
00014 #include <ctype.h>
00015 #include <errno.h>
00016 #include <string.h>
00017 #include <stdlib.h>
00018 #include <stdio.h>
00019 
00020 char buffer[BUFSIZ];
00021 int uflag = 1;                  /* default is union of -d and -u outputs */
00022 int dflag = 1;                  /* flags are mutually exclusive */
00023 int cflag = 0;
00024 int fields = 0;
00025 int chars = 0;
00026 
00027 _PROTOTYPE(int main, (int argc, char **argv));
00028 _PROTOTYPE(FILE *xfopen, (char *fn, char *mode));
00029 _PROTOTYPE(char *skip, (char *s));
00030 _PROTOTYPE(int equal, (char *s1, char *s2));
00031 _PROTOTYPE(void show, (char *line, int count));
00032 _PROTOTYPE(int uniq, (void));
00033 _PROTOTYPE(void usage, (void));
00034 _PROTOTYPE(int getline, (char *buf, int count));
00035 
00036 FILE *xfopen(fn, mode)
00037 char *fn, *mode;
00038 {
00039   FILE *p;
00040 
00041   if ((p = fopen(fn, mode)) == NULL) {
00042         perror("uniq");
00043         fflush(stdout);
00044         exit(1);
00045   }
00046   return(p);
00047 }
00048 
00049 int main(argc, argv)
00050 int argc;
00051 char *argv[];
00052 {
00053   char *p;
00054   int inf = -1, outf;
00055 
00056   setbuf(stdout, buffer);
00057   for (--argc, ++argv; argc > 0 && (**argv == '-' || **argv == '+');
00058        --argc, ++argv) {
00059         if (**argv == '+')
00060                 chars = atoi(*argv + 1);
00061         else if (isdigit(argv[0][1]))
00062                 fields = atoi(*argv + 1);
00063         else if (argv[0][1] == '\0')
00064                 inf = 0;        /* - is stdin */
00065         else
00066                 for (p = *argv + 1; *p; p++) {
00067                         switch (*p) {
00068                             case 'd':
00069                                 dflag = 1;
00070                                 uflag = 0;
00071                                 break;
00072                             case 'u':
00073                                 uflag = 1;
00074                                 dflag = 0;
00075                                 break;
00076                             case 'c':   cflag = 1;      break;
00077                             default:    usage();
00078                         }
00079                 }
00080   }
00081 
00082   /* Input file */
00083   if (argc == 0)
00084         inf = 0;
00085   else if (inf == -1) {         /* if - was not given */
00086         fclose(stdin);
00087         xfopen(*argv++, "r");
00088         argc--;
00089   }
00090   if (argc == 0)
00091         outf = 1;
00092   else {
00093         fclose(stdout);
00094         xfopen(*argv++, "w");
00095         argc--;
00096   }
00097 
00098   uniq();
00099   fflush(stdout);
00100   return(0);
00101 }
00102 
00103 char *skip(s)
00104 char *s;
00105 {
00106   int n;
00107 
00108   /* Skip fields */
00109   for (n = fields; n > 0; --n) {
00110         /* Skip blanks */
00111         while (*s && (*s == ' ' || *s == '\t')) s++;
00112         if (!*s) return s;
00113         while (*s && (*s != ' ' && *s != '\t')) s++;
00114         if (!*s) return s;
00115   }
00116 
00117   /* Skip characters */
00118   for (n = chars; n > 0; --n) {
00119         if (!*s) return s;
00120         s++;
00121   }
00122   return s;
00123 }
00124 
00125 int equal(s1, s2)
00126 char *s1, *s2;
00127 {
00128   return !strcmp(skip(s1), skip(s2));
00129 }
00130 
00131 void show(line, count)
00132 char *line;
00133 int count;
00134 {
00135   if (cflag)
00136         printf("%4d %s", count, line);
00137   else {
00138         if ((uflag && count == 1) || (dflag && count != 1))
00139                 printf("%s", line);
00140   }
00141 }
00142 
00143 /* The meat of the whole affair */
00144 char *nowline, *prevline, buf1[1024], buf2[1024];
00145 
00146 int uniq()
00147 {
00148   char *p;
00149   int seen;
00150 
00151   /* Setup */
00152   prevline = buf1;
00153   if (getline(prevline, 1024) < 0) return(0);
00154   seen = 1;
00155   nowline = buf2;
00156 
00157   /* Get nowline and compare if not equal, dump prevline and swap
00158    * pointers else continue, bumping seen count */
00159   while (getline(nowline, 1024) > 0) {
00160         if (!equal(prevline, nowline)) {
00161                 show(prevline, seen);
00162                 seen = 1;
00163                 p = nowline;
00164                 nowline = prevline;
00165                 prevline = p;
00166         } else
00167                 seen += 1;
00168   }
00169   show(prevline, seen);
00170   return 0;
00171 }
00172 
00173 void usage()
00174 {
00175   fprintf(stderr, "Usage: uniq [-udc] [+n] [-n] [input [output]]\n");
00176 }
00177 
00178 int getline(buf, count)
00179 char *buf;
00180 int count;
00181 {
00182   int c;
00183   int ct = 0;
00184 
00185   while (ct++ < count) {
00186         c = getc(stdin);
00187         if (c < 0) return(-1);
00188         *buf++ = c;
00189         if (c == '\n') {
00190                 *buf++ = 0;
00191                 return(ct);
00192         }
00193   }
00194   return(ct);
00195 }

Generated on Fri Apr 14 22:57:12 2006 for minix by  doxygen 1.4.6