decomp16.c

Go to the documentation of this file.
00001 /* decomp16: decompress 16bit compressed files on a 16bit Intel processor
00002  *
00003  * Version 1.3 of 25 Mar 92.
00004  *
00005  * This was written by John N. White on 6/30/91 and is Public Domain.
00006  * Patched to run under news by Will Rose, Feb 92.
00007  * J N White's (earlier) patches added by Will Rose, 20 Feb 92.
00008  * Unsigned int increment/wrap bug fixed by Will Rose, 24 Mar 92.
00009  * Argument bug fixed, stdio generalised by Will Rose, 25 Mar 92.
00010  *
00011  * decomp16 can use as as little as 512 bytes of stack; since it forks
00012  * four additional copies, it's probably worth using minimum stack rather
00013  * than the 8192 byte Minix default.  To reduce memory still further,
00014  * change BUFSZ below to 256; it is currently set to 1024 for speed.  The
00015  * minimal decomp16 needs about 280k to run in pipe mode (56k per copy).
00016  *
00017  * This program acts as a filter:
00018  *    decomp16 < compressed_file > decompressed_file
00019  * The arguments -0 to -4 run only the corresponding pass.
00020  * Thus:
00021  *    decomp16 -4 < compressed_file > 3;
00022  *    decomp16 -3 < 3 > 2;
00023  *    decomp16 -2 < 2 > 1;
00024  *    decomp16 -1 < 1 > 0;
00025  *    decomp16 -0 < 0 > decompressed_file
00026  * will also work, as will connecting the passes by explicit pipes if
00027  * there is enough memory to do so.  File name arguments can also be
00028  * given directly on the command line.
00029  *
00030  * Compress uses a modified LZW compression algorithm. A compressed file
00031  * is a set of indices into a dictionary of strings. The number of bits
00032  * used to store each index depends on the number of entries currently
00033  * in the dictionary. If there are between 257 and 512 entries, 9 bits
00034  * are used. With 513 entries, 10 bits are used, etc. The initial dictionary
00035  * consists of 0-255 (which are the corresponding chars) and 256 (which
00036  * is a special CLEAR code). As each index in the compressed file is read,
00037  * a new entry is added to the dictionary consisting of the current string
00038  * with the first char of the next string appended. When the dictionary
00039  * is full, no further entries are added. If a CLEAR code is received,
00040  * the dictionary will be completely reset. The first two bytes of the
00041  * compressed file are a magic number, and the third byte indicates the
00042  * maximum number of bits, and whether the CLEAR code is used (older versions
00043  * of compress didn't have CLEAR).
00044  *
00045  * This program works by forking four more copies of itself. The five
00046  * programs form a pipeline. Copy 0 writes to stdout, and forks copy 1
00047  * to supply its input, which in turn forks and reads from copy 2, etc.
00048  * This sequence is used so that when the program exits, all writes
00049  * are completed and a program that has exec'd uncompress (such as news)
00050  * can immediately use the uncompressed data when the wait() call returns.
00051  *
00052  * If given a switch -#, where # is a digit from 0 to 4 (example: -2), the
00053  * program will run as that copy, reading from stdin and writing to stdout.
00054  * This allows decompressing with very limited RAM because only one of the
00055  * five passes is in memory at a time.
00056  *
00057  * The compressed data is a series of string indices (and a header at
00058  * the beginning and an occasional CLEAR code). As these indices flow
00059  * through the pipes, each program decodes the ones it can. The result
00060  * of each decoding will be indices that the following programs can handle.
00061  *
00062  * Each of the 65536 strings in the dictionary is an earlier string with
00063  * some character added to the end (except for the the 256 predefined
00064  * single char strings). When new entries are made to the dictionary,
00065  * the string index part will just be the last index to pass through.
00066  * But the char part is the first char of the next string, which isn't
00067  * known yet. So the string can be stored as a pair of indices. When
00068  * this string is specified, it is converted to this pair of indices,
00069  * which are flagged so that the first will be decoded in full while
00070  * the second will be decoded to its first char. The dictionary takes
00071  * 256k to store (64k strings of 2 indices of 2 bytes each). This is
00072  * too big for a 64k data segment, so it is divided into 5 equal parts.
00073  * Copy 4 of the program maintains the high part and copy 0 holds the
00074  * low part.
00075  */
00076 
00077 #include <sys/types.h>
00078 #include <fcntl.h>
00079 #include <stdlib.h>
00080 #include <unistd.h>
00081 
00082 #define BUFSZ           1024    /* size of i/o buffers */
00083 #define BUFSZ_2         (BUFSZ/2)       /* # of unsigned shorts in i/o bufs */
00084 #define DICTSZ          (unsigned)13056 /* # of local dictionary entries */
00085 #define EOF_INDEX       (unsigned short)0xFFFF  /* EOF flag for pipeline */
00086 #define FALSE           0
00087 #define TRUE            ~FALSE
00088 
00089 int fdin, fdout, fderr;         /* input, output, and error file descriptors */
00090 int ibufstart, obufind, ibufend;/* i/o buffer indices */
00091 int ipbufind = BUFSZ_2;         /* pipe buffer indices */
00092 int opbufind = 0;
00093 int pnum = -1;                  /* ID of this copy */
00094 unsigned short ipbuf[BUFSZ_2];  /* for buffering input */
00095 unsigned short opbuf[BUFSZ_2];  /* for buffering output */
00096 unsigned char *ibuf = (unsigned char *) ipbuf;
00097 unsigned char *obuf = (unsigned char *) opbuf;
00098 
00099 unsigned short dindex[DICTSZ];  /* dictionary: index to substring */
00100 unsigned short dchar[DICTSZ];   /* dictionary: last char of string */
00101 unsigned iindex, tindex, tindex2;       /* holds index being processed */
00102 unsigned base;                  /* where in global dict local dict starts */
00103 unsigned tbase;
00104 unsigned locend;                /* where in global dict local dict ends */
00105 unsigned curend = 256;          /* current end of global dict */
00106 unsigned maxend;                /* max end of global dict */
00107 int dcharp;                     /* ptr to dchar that needs next index entry */
00108 int curbits;                    /* number of bits for getbits() to read */
00109 int maxbits;                    /* limit on number of bits */
00110 int clearflg;                   /* if set, allow CLEAR */
00111 int inmod;                      /* mod 8 for getbits() */
00112 
00113 _PROTOTYPE(int main, (int argc, char **argv));
00114 _PROTOTYPE(void ffork, (void));
00115 _PROTOTYPE(void die, (char *s));
00116 _PROTOTYPE(void myputc, (unsigned c));
00117 _PROTOTYPE(unsigned mygetc, (void));
00118 _PROTOTYPE(void getbits, (void));
00119 _PROTOTYPE(void getpipe, (void));
00120 _PROTOTYPE(void putpipe, (unsigned u, int flag));
00121 
00122 int main(argc, argv)
00123 int argc;
00124 char **argv;
00125 {
00126   char c, *cp;
00127   int j, k, fdtmp;
00128   unsigned int len;
00129 
00130   /* Find the program name */
00131   j = 0;
00132   while (argv[0][j] != '\0') j++;
00133   len = (unsigned int) j;
00134   while (j--)
00135         if (argv[0][j] == '/') break;
00136   if (argv[0][j] == '/') j++;
00137   cp = argv[0] + j;
00138   len -= j;
00139 
00140   /* Sort out the flags */
00141   for (k = 1; k < argc; k++) {
00142         if (argv[k][0] == '-') {
00143                 c = argv[k][1];
00144                 switch (c) {
00145                     case '0':   /* pass numbers */
00146                     case '1':
00147                     case '2':
00148                     case '3':
00149                     case '4':   pnum = c - '0'; break;
00150                     case 'd':   /* used by news */
00151                         break;
00152                     default:
00153                         (void) write(1, "Usage: ", 7);
00154                         (void) write(1, cp, len);
00155                         (void) write(1, " [-#] [in] [out]\n", 17);
00156                         exit(0);
00157                         break;
00158                 }
00159 
00160                 /* Once it's checked, lose it anyway */
00161                 for (j = k; j < argc; j++) argv[j] = argv[j + 1];
00162                 argc--;
00163                 k--;
00164         }
00165   }
00166 
00167   /* Default i/o settings */
00168   fdin = 0;
00169   fdout = 1;
00170   fderr = 2;
00171 
00172   /* Try to open specific files and connect them to stdin/stdout */
00173   if (argc > 1) {
00174         if ((fdtmp = open(argv[1], 0)) == -1) die("input open failed");
00175         (void) close(0);
00176         if ((fdin = dup(fdtmp)) == -1) die("input dup failed\n");
00177         (void) close(fdtmp);
00178   }
00179   if (argc > 2) {
00180         (void) unlink(argv[2]);
00181         if ((fdtmp = creat(argv[2], 0666)) == -1) die("output creat failed");
00182         (void) close(1);
00183         if ((fdout = dup(fdtmp)) == -1) die("output dup failed\n");
00184         (void) close(fdtmp);
00185   }
00186 
00187   /* Sort out type of compression */
00188   if (pnum == -1 || pnum == 4) {/* if this is pass 4 */
00189         /* Check header of compressed file */
00190         if (mygetc() != 0x1F || mygetc() != 0x9D)      /* check magic number */
00191                 die("not a compressed file\n");
00192         iindex = mygetc();      /* get compression style */
00193   } else
00194         getpipe();              /* get compression style */
00195 
00196   maxbits = iindex & 0x1F;
00197   clearflg = ((iindex & 0x80) != 0) ? TRUE : FALSE;
00198   if (maxbits < 9 || maxbits > 16)      /* check for valid maxbits */
00199         die("can't decompress\n");
00200   if (pnum != -1 && pnum != 0)
00201         putpipe(iindex, 0);     /* pass style to next copy */
00202 
00203   /* Fork off an ancestor if necessary - ffork() increments pnum */
00204   if (pnum == -1) {
00205         pnum = 0;
00206         if (pnum == 0) ffork();
00207         if (pnum == 1) ffork();
00208         if (pnum == 2) ffork();
00209         if (pnum == 3) ffork();
00210   }
00211 
00212   /* Preliminary inits. Note: end/maxend/curend are highest, not
00213    * highest + 1 */
00214   base = DICTSZ * pnum + 256;
00215   locend = base + DICTSZ - 1;
00216   maxend = (1 << maxbits) - 1;
00217   if (maxend > locend) maxend = locend;
00218 
00219   while (TRUE) {
00220         curend = 255 + (clearflg ? 1 : 0);      /* init dictionary */
00221         dcharp = DICTSZ;        /* flag for none needed */
00222         curbits = 9;            /* init curbits (for copy 0) */
00223         while (TRUE) {          /* for each index in input */
00224                 if (pnum == 4) {/* get index using getbits() */
00225                         if (curbits < maxbits && (1 << curbits) <= curend) {
00226                                 /* Curbits needs to be increased */
00227                                 /* Due to uglyness in compress, these
00228                                  * indices in the compressed file are
00229                                  * wasted */
00230                                 while (inmod) getbits();
00231                                 curbits++;
00232                         }
00233                         getbits();
00234                 } else
00235                         getpipe();      /* get next index */
00236 
00237                 if (iindex == 256 && clearflg) {
00238                         if (pnum > 0) putpipe(iindex, 0);
00239                         /* Due to uglyness in compress, these indices
00240                          * in the compressed file are wasted */
00241                         while (inmod) getbits();
00242                         break;
00243                 }
00244                 tindex = iindex;
00245                 /* Convert the index part, ignoring spawned chars */
00246                 while (tindex >= base) tindex = dindex[tindex - base];
00247                 /* Pass on the index */
00248                 putpipe(tindex, 0);
00249                 /* Save the char of the last added entry, if any */
00250                 if (dcharp < DICTSZ) dchar[dcharp++] = tindex;
00251                 if (curend < maxend && ++curend > (base - 1))
00252                         dindex[dcharp = (curend - base)] = iindex;
00253 
00254                 /* Do spawned chars. They are naturally produced in
00255                  * the wrong order. To get them in the right order
00256                  * without using memory, a series of passes,
00257                  * progressively less deep, are used */
00258                 tbase = base;
00259                 while ((tindex = iindex) >= tbase) {/* for each char to spawn*/
00260                         while ((tindex2 = dindex[tindex - base]) >= tbase)
00261                                 tindex = tindex2;    /* scan to desired char */
00262                         putpipe(dchar[tindex-base], 1); /* put it to the pipe*/
00263                         tbase = tindex + 1;
00264                         if (tbase == 0) break;  /* it's a wrap */
00265                 }
00266         }
00267   }
00268 }
00269 
00270 
00271 /* F f o r k
00272  *
00273  * Fork off the previous pass - the parent reads from the child.
00274  */
00275 void ffork()
00276 {
00277   int j, pfd[2];
00278 
00279   if (pipe(pfd) == -1) die("pipe() error\n");
00280   if ((j = fork()) == -1) die("fork() error\n");
00281   if (j == 0) {                 /* this is the child */
00282         if (close(1) == -1) die("close(1) error\n");
00283         if (dup(pfd[1]) != 1) die("dup(1) error\n");
00284         (void) close(pfd[0]);
00285         pnum++;
00286   } else {                      /* this is the parent */
00287         if (close(0) == -1) die("close(0) error\n");
00288         if (dup(pfd[0]) != 0) die("dup(0) error\n");
00289         (void) close(pfd[1]);
00290   }
00291 }
00292 
00293 
00294 /* D i e
00295  *
00296  * If s is a message, write it to stderr. Flush buffers if needed. Then exit.
00297  */
00298 void die(s)
00299 char *s;
00300 {
00301   /* Flush stdout buffer if needed */
00302   if (obufind != 0) {
00303         if (write(fdout, (char *) obuf, (unsigned) obufind) != obufind)
00304                 s = "bad stdout write\n";
00305         obufind = 0;
00306   }
00307 
00308   /* Flush pipe if needed */
00309   do
00310         putpipe(EOF_INDEX, 0);
00311   while (opbufind);
00312   /* Write any error message */
00313   if (s != (char *) NULL) {
00314         while (*s) (void) write(fderr, s++, 1);
00315   }
00316   exit((s == (char *) NULL) ? 0 : 1);
00317 }
00318 
00319 
00320 /* M p u t c
00321  *
00322  * Put a char to stdout.
00323  */
00324 void myputc(c)
00325 unsigned c;
00326 {
00327   obuf[obufind++] = c;
00328   if (obufind >= BUFSZ) {       /* if stdout buffer full */
00329         if (write(fdout, (char *) obuf, BUFSZ) != BUFSZ)        /* flush to stdout */
00330                 die("bad stdout write\n");
00331         obufind = 0;
00332   }
00333 }
00334 
00335 
00336 /* M y g e t c
00337  *
00338  * Get a char from stdin. If EOF, then die() and exit.
00339  */
00340 unsigned mygetc()
00341 {
00342   if (ibufstart >= ibufend) {   /* if stdin buffer empty */
00343         if ((ibufend = read(fdin, (char *) ibuf, BUFSZ)) <= 0)
00344                 die((char *) NULL);     /* if EOF, do normal exit */
00345         ibufstart = 0;
00346   }
00347   return(ibuf[ibufstart++] & 0xff);
00348 }
00349 
00350 
00351 /* G e t b i t s
00352  *
00353  * Put curbits bits into index from stdin. Note: only copy 4 uses this.
00354  * The bits within a byte are in the correct order. But when the bits
00355  * cross a byte boundry, the lowest bits will be in the higher part of
00356  * the current byte, and the higher bits will be in the lower part of
00357  * the next byte.
00358  */
00359 void getbits()
00360 {
00361   int have;
00362   static unsigned curbyte;      /* byte having bits extracted from it */
00363   static int left;              /* how many bits are left in curbyte */
00364 
00365   inmod = (inmod + 1) & 7;      /* count input mod 8 */
00366   iindex = curbyte;
00367   have = left;
00368   if (curbits - have > 8) {
00369         iindex |= mygetc() << have;
00370         have += 8;
00371   }
00372   iindex |= ((curbyte = mygetc()) << have) & ~((unsigned) 0xFFFF << curbits);
00373   curbyte >>= curbits - have;
00374   left = 8 - (curbits - have);
00375 }
00376 
00377 
00378 /* G e t p i p e
00379  *
00380  * Get an index from the pipeline. If flagged firstonly, handle it here.
00381  */
00382 void getpipe()
00383 {
00384   static short flags;
00385   static int n = 0;             /* number of flags in flags */
00386 
00387   while (TRUE) {                /* while index with firstonly flag set */
00388         if (n <= 0) {
00389                 if (ipbufind >= BUFSZ_2) {      /* if pipe input buffer
00390                                                  * empty */
00391                         if (read(fdin, (char *) ipbuf, BUFSZ) != BUFSZ)
00392                                 die("bad pipe read\n");
00393                         ipbufind = 0;
00394                 }
00395                 flags = ipbuf[ipbufind++];
00396                 n = 15;
00397         }
00398         iindex = ipbuf[ipbufind++];
00399         if (iindex > curend)
00400                 die((iindex == EOF_INDEX) ? (char *) NULL : "invalid data\n");
00401         flags <<= 1;
00402         n--;
00403         /* Assume flags < 0 if highest remaining flag is set */
00404         if (flags < 0) {        /* if firstonly flag for index is not set */
00405                 while (iindex >= base) iindex = dindex[iindex - base];
00406                 putpipe(iindex, 1);
00407         } else
00408                 return;         /* return with valid non-firstonly index */
00409   }
00410 }
00411 
00412 
00413 /* P u t p i p e
00414  *
00415  * put an index into the pipeline.
00416  */
00417 void putpipe(u, flag)
00418 unsigned u;
00419 int flag;
00420 {
00421   static unsigned short flags, *flagp;
00422   static int n = 0;             /* number of flags in flags */
00423 
00424   if (pnum == 0) {              /* if we should write to stdout */
00425         myputc(u);              /* index will be the char value */
00426         return;
00427   }
00428   if (n == 0) {                 /* if we need to reserve a flag entry */
00429         flags = 0;
00430         flagp = opbuf + opbufind;
00431         opbufind++;
00432   }
00433   opbuf[opbufind++] = u;        /* add index to buffer */
00434   flags = (flags << 1) | flag;  /* add firstonly flag */
00435   if (++n >= 15) {              /* if block of 15 indices */
00436         n = 0;
00437         *flagp = flags;         /* insert flags entry */
00438         if (opbufind >= BUFSZ_2) {      /* if pipe out buffer full */
00439                 opbufind = 0;
00440                 if (write(fdout, (char *) opbuf, BUFSZ) != BUFSZ)
00441                         die("bad pipe write\n");
00442         }
00443   }
00444 }

Generated on Fri Apr 14 22:57:05 2006 for minix by  doxygen 1.4.6