00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #include <sys/types.h>
00035 #include <sys/stat.h>
00036 #include <fcntl.h>
00037 #include <signal.h>
00038 #include <unistd.h>
00039 #include <stdlib.h>
00040 #include <string.h>
00041 #include <stdio.h>
00042 #include <limits.h>
00043
00044 #define OPEN_FILES (OPEN_MAX-4)
00045 #if __minix_vmd
00046 #define MEMORY_SIZE (1024 * 1024)
00047 #else
00048 #define MEMORY_SIZE ((10 * sizeof(int)) * 1024)
00049 #endif
00050
00051 #define LINE_SIZE (1024 >> 1)
00052 #define IO_SIZE (2 * 1024)
00053 #define STD_OUT 1
00054
00055
00056 #define OK 0
00057 #define ERROR -1
00058 #define NIL_PTR ((char *) 0)
00059
00060
00061 #define LOWER -1
00062 #define SAME 0
00063 #define HIGHER 1
00064
00065
00066 #define DICT 0x001
00067 #define ASCII 0x002
00068 #define BLANK 0x004
00069 #define DIGIT 0x008
00070 #define UPPER 0x010
00071
00072 typedef int BOOL;
00073
00074 #define FALSE 0
00075 #define TRUE 1
00076
00077 typedef struct {
00078 int fd;
00079 char *buffer;
00080 int read_chars;
00081 int cnt;
00082 char *line;
00083 } MERGE;
00084
00085 #define NIL_MERGE ((MERGE *) 0)
00086 MERGE merge_f[OPEN_FILES];
00087 int buf_size;
00088
00089 #define FIELDS_LIMIT 10
00090 #define GLOBAL 0
00091
00092 typedef struct {
00093 int beg_field, beg_pos;
00094 int end_field, end_pos;
00095 BOOL reverse;
00096 BOOL blanks;
00097 BOOL dictionary;
00098 BOOL fold_case;
00099 BOOL ascii;
00100 BOOL numeric;
00101 } FIELD;
00102
00103
00104 FIELD fields[FIELDS_LIMIT];
00105 int field_cnt;
00106
00107
00108 BOOL check = FALSE;
00109 BOOL only_merge = FALSE;
00110 BOOL uniq = FALSE;
00111
00112 char *mem_top;
00113 char *cur_pos;
00114 char **line_table;
00115 BOOL in_core = TRUE;
00116
00117
00118 char temp_files[] = "/tmp/sort.XXXXX.XX";
00119 char *output_file;
00120 int out_fd;
00121 char out_buffer[IO_SIZE];
00122
00123 char **argptr;
00124 int args_offset;
00125 int args_limit;
00126
00127 char separator;
00128 int nr_of_files = 0;
00129 int disabled;
00130
00131 char USAGE[] = "Usage: sort [-funbirdcmt'x'] [+beg_pos [-end_pos]] [-o outfile] [file] ..";
00132
00133
00134 _PROTOTYPE(int main, (int argc, char **argv));
00135 _PROTOTYPE(void get_opts, (char *ptr, FIELD * field));
00136 _PROTOTYPE(void new_field, (FIELD * field, int *offset, BOOL beg_fl));
00137 _PROTOTYPE(void adjust_options, (FIELD * field));
00138 _PROTOTYPE(void error, (BOOL quit, char *message, char *arg));
00139 _PROTOTYPE(void open_outfile, (void));
00140 _PROTOTYPE(void get_file, (int fd, off_t size));
00141 _PROTOTYPE(int last_line, (void));
00142 _PROTOTYPE(void print_table, (int fd));
00143 _PROTOTYPE(char *file_name, (int nr));
00144 _PROTOTYPE(void mread, (int fd, char *address, int bytes));
00145 _PROTOTYPE(void mwrite, (int fd, char *address, int bytes));
00146 _PROTOTYPE(void sort, (void));
00147 _PROTOTYPE(void sort_table, (int nel));
00148 _PROTOTYPE(void incr, (int si, int ei));
00149 _PROTOTYPE(int cmp_fields, (char *el1, char *el2));
00150 _PROTOTYPE(void build_field, (char *dest, FIELD * field, char *src));
00151 _PROTOTYPE(char *skip_fields, (char *str, int nf));
00152 _PROTOTYPE(int compare, (char *el1, char *el2));
00153 _PROTOTYPE(int cmp, (unsigned char *el1, unsigned char *el2, FIELD * field));
00154 _PROTOTYPE(int digits, (char *str1, char *str2, BOOL check_sign));
00155 _PROTOTYPE(void files_merge, (int file_cnt));
00156 _PROTOTYPE(void merge, (int start_file, int limit_file));
00157 _PROTOTYPE(void put_line, (char *line));
00158 _PROTOTYPE(MERGE * print, (MERGE * merg, int file_cnt));
00159 _PROTOTYPE(int read_line, (MERGE * merg));
00160 _PROTOTYPE(MERGE * skip_lines, (MERGE * smallest, int file_cnt));
00161 _PROTOTYPE(void uniq_lines, (MERGE * merg));
00162 _PROTOTYPE(void check_file, (int fd, char *file));
00163 _PROTOTYPE(int length, (char *line));
00164 _PROTOTYPE(void copy, (char *dest, char *src));
00165 _PROTOTYPE(char *msbrk, (int size));
00166 _PROTOTYPE(void mbrk, (char *address));
00167 _PROTOTYPE(void catch, (int dummy));
00168
00169
00170 char table[256] = {
00171
00172 0, 0, 0, 0, 0, 0, 0, 0,
00173 0, BLANK | DICT, 0, 0, 0, 0, 0, 0,
00174 0, 0, 0, 0, 0, 0, 0, 0,
00175 0, 0, 0, 0, 0, 0, 0, 0,
00176
00177
00178 BLANK | DICT | ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
00179 ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
00180 ASCII, ASCII,
00181
00182
00183 DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
00184 DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
00185 DIGIT | DICT | ASCII, DIGIT | DICT | ASCII, DIGIT | DICT | ASCII,
00186 DIGIT | DICT | ASCII,
00187
00188
00189 ASCII, ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
00190
00191
00192 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
00193 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
00194 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
00195 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
00196 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
00197 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
00198 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
00199 UPPER | DICT | ASCII, UPPER | DICT | ASCII, UPPER | DICT | ASCII,
00200 UPPER | DICT | ASCII, UPPER | DICT | ASCII,
00201
00202
00203 ASCII, ASCII, ASCII, ASCII, ASCII, ASCII,
00204
00205
00206 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
00207 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
00208 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
00209 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
00210 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
00211 DICT | ASCII, DICT | ASCII, DICT | ASCII, DICT | ASCII,
00212 DICT | ASCII, DICT | ASCII,
00213
00214
00215 ASCII, ASCII, ASCII, ASCII,
00216
00217
00218 0, 0, 0, 0, 0, 0, 0, 0, 0,
00219 0, 0, 0, 0, 0, 0, 0, 0, 0,
00220 0, 0, 0, 0, 0, 0, 0, 0, 0,
00221 0, 0, 0, 0, 0, 0, 0, 0, 0,
00222 0, 0, 0, 0, 0, 0, 0, 0, 0,
00223 0, 0, 0, 0, 0, 0, 0, 0, 0,
00224 0, 0, 0, 0, 0, 0, 0, 0, 0,
00225 0, 0, 0, 0, 0, 0, 0, 0, 0,
00226 0, 0, 0, 0, 0, 0, 0
00227 };
00228
00229
00230
00231
00232
00233
00234 void get_opts(ptr, field)
00235 register char *ptr;
00236 register FIELD *field;
00237 {
00238 switch (*ptr) {
00239 case 'b':
00240 field->blanks = TRUE;
00241 break;
00242 case 'd':
00243 field->dictionary = TRUE;
00244 break;
00245 case 'f':
00246 field->fold_case = TRUE;
00247 break;
00248 case 'i':
00249 field->ascii = TRUE;
00250 break;
00251 case 'n':
00252 field->numeric = TRUE;
00253 field->blanks = TRUE;
00254 break;
00255 case 'r':
00256 field->reverse = TRUE;
00257 break;
00258 default:
00259 error(TRUE, USAGE, NIL_PTR);
00260 }
00261 }
00262
00263
00264
00265
00266
00267
00268 void new_field(field, offset, beg_fl)
00269 register FIELD *field;
00270 int *offset;
00271 BOOL beg_fl;
00272 {
00273 register char *ptr;
00274
00275 ptr = argptr[*offset];
00276 *offset += 1;
00277 ptr++;
00278
00279 if (beg_fl)
00280 field->beg_field = atoi(ptr);
00281 else
00282 field->end_field = atoi(ptr);
00283
00284 while (table[*ptr] & DIGIT)
00285 ptr++;
00286
00287 if (*ptr == '.') {
00288 ptr++;
00289 if (beg_fl)
00290 field->beg_pos = atoi(ptr);
00291 else
00292 field->end_pos = atoi(ptr);
00293 while (table[*ptr] & DIGIT)
00294 ptr++;
00295 }
00296 if (beg_fl) {
00297 while (*ptr != '\0')
00298 get_opts(ptr++, field);
00299 }
00300 if (beg_fl) {
00301 ptr = argptr[*offset];
00302 if (ptr && *ptr == '-' && ((table[*(ptr + 1)] & DIGIT) || *(ptr + 1) == '.')) {
00303 new_field(field, offset, FALSE);
00304 if (field->beg_field > field->end_field)
00305 error(TRUE, "End field is before start field!", NIL_PTR);
00306 } else
00307 field->end_field = ERROR;
00308 }
00309 }
00310
00311 int main(argc, argv)
00312 int argc;
00313 char *argv[];
00314 {
00315 int arg_count = 1;
00316 struct stat st;
00317 register char *ptr;
00318 register int fd;
00319 int pid, pow;
00320
00321 argptr = argv;
00322 cur_pos = mem_top = msbrk(MEMORY_SIZE);
00323
00324 while (arg_count < argc && ((ptr = argv[arg_count])[0] == '-' || *ptr == '+')) {
00325 if (*ptr == '-' && *(ptr + 1) == '\0')
00326 break;
00327 if (*ptr == '+') {
00328 if (++field_cnt == FIELDS_LIMIT)
00329 error(TRUE, "Too many fields", NIL_PTR);
00330 new_field(&fields[field_cnt], &arg_count, TRUE);
00331 } else {
00332 while (*++ptr) {
00333 switch (*ptr) {
00334 case 'c':
00335 check = TRUE;
00336 break;
00337 case 'm':
00338 only_merge = TRUE;
00339 break;
00340 case 'u':
00341 uniq = TRUE;
00342 break;
00343 case 'o':
00344 output_file = argv[++arg_count];
00345 break;
00346 case 't':
00347 ptr++;
00348 separator = *ptr;
00349 break;
00350 default:
00351 get_opts(ptr, &fields[GLOBAL]);
00352 }
00353 }
00354 arg_count++;
00355 }
00356 }
00357
00358 for (fd = 1; fd <= field_cnt; fd++) adjust_options(&fields[fd]);
00359
00360
00361 ptr = &temp_files[10];
00362 pid = getpid();
00363 pow = 10000;
00364 while (pow != 0) {
00365 *ptr++ = pid / pow + '0';
00366 pid %= pow;
00367 pow /= 10;
00368 }
00369
00370 signal(SIGINT, catch);
00371
00372
00373 if (only_merge) {
00374 args_limit = args_offset = arg_count;
00375 while (argv[args_limit] != NIL_PTR)
00376 args_limit++;
00377 files_merge(args_limit - arg_count);
00378 exit(0);
00379 }
00380 if (arg_count == argc) {
00381 if (check)
00382 check_file(0, NIL_PTR);
00383 else
00384 get_file(0, (off_t) 0);
00385 } else
00386 while (arg_count < argc) {
00387 if (strcmp(argv[arg_count], "-") == 0)
00388 fd = 0;
00389 else if (stat(argv[arg_count], &st) < 0) {
00390 error(FALSE, "Cannot find ", argv[arg_count++]);
00391 continue;
00392 }
00393
00394
00395 else if ((fd = open(argv[arg_count], O_RDONLY)) < 0) {
00396 error(FALSE, "Cannot open ", argv[arg_count++]);
00397 continue;
00398 }
00399 if (check)
00400 check_file(fd, argv[arg_count]);
00401 else
00402 get_file(fd, st.st_size);
00403 arg_count++;
00404 }
00405
00406 if (check) exit(0);
00407
00408 sort();
00409
00410 if (nr_of_files == 1)
00411 exit(0);
00412
00413 files_merge(nr_of_files);
00414 return(0);
00415 }
00416
00417
00418
00419
00420 void adjust_options(field)
00421 register FIELD *field;
00422 {
00423 register FIELD *gfield = &fields[GLOBAL];
00424
00425 if (gfield->reverse) field->reverse = TRUE;
00426 if (gfield->blanks) field->blanks = TRUE;
00427 if (gfield->dictionary) field->dictionary = TRUE;
00428 if (gfield->fold_case) field->fold_case = TRUE;
00429 if (gfield->ascii) field->ascii = TRUE;
00430 if (gfield->numeric) field->numeric = TRUE;
00431 }
00432
00433
00434 void error(quit, message, arg)
00435 register BOOL quit;
00436 register char *message, *arg;
00437 {
00438 write(2, message, strlen(message));
00439 if (arg != NIL_PTR) write(2, arg, strlen(arg));
00440 perror(" ");
00441 if (quit) exit(1);
00442 }
00443
00444
00445
00446
00447 void open_outfile()
00448 {
00449 if (output_file == NIL_PTR)
00450 out_fd = STD_OUT;
00451 else if ((out_fd = creat(output_file, 0644)) < 0)
00452 error(TRUE, "Cannot creat ", output_file);
00453 }
00454
00455
00456
00457
00458 void get_file(fd, size)
00459 int fd;
00460 register off_t size;
00461 {
00462 register int i;
00463 int rest;
00464 char save_ch;
00465
00466 rest = MEMORY_SIZE - (cur_pos - mem_top);
00467 if (fd == 0) {
00468 while ((i = read(0, cur_pos, rest)) > 0) {
00469 if ((cur_pos - mem_top) + i == MEMORY_SIZE) {
00470 in_core = FALSE;
00471 i = last_line();
00472 save_ch = mem_top[i];
00473 mem_top[i] = '\0';
00474 sort();
00475 mem_top[i] = save_ch;
00476
00477 for (rest = 0; i + rest != MEMORY_SIZE; rest++)
00478 mem_top[rest] = mem_top[i + rest];
00479
00480 cur_pos = &mem_top[rest];
00481 } else {
00482 cur_pos = cur_pos + i;
00483 *cur_pos = '\0';
00484 }
00485
00486
00487 rest = MEMORY_SIZE - (cur_pos - mem_top);
00488 }
00489 }
00490
00491
00492 else if (size > rest) {
00493 mread(fd, cur_pos, rest);
00494 in_core = FALSE;
00495 i = last_line();
00496 mem_top[i] = '\0';
00497 (void) lseek(fd, (off_t) (i - MEMORY_SIZE), SEEK_CUR);
00498 size = size - rest - i + MEMORY_SIZE;
00499 cur_pos = mem_top;
00500 sort();
00501 get_file(fd, size);
00502 } else {
00503 rest = size;
00504 mread(fd, cur_pos, rest);
00505 cur_pos = cur_pos + rest;
00506 *cur_pos = '\0';
00507 (void) close(fd);
00508 }
00509 }
00510
00511
00512
00513
00514 int last_line()
00515 {
00516 register int i;
00517
00518 for (i = MEMORY_SIZE - 2; i > 0; i--)
00519 if (mem_top[i] == '\n') break;
00520 return i + 1;
00521 }
00522
00523
00524
00525
00526 void print_table(fd)
00527 int fd;
00528 {
00529 register char **line_ptr;
00530 register char *ptr;
00531 int index = 0;
00532
00533 if (fd == ERROR) {
00534 if ((fd = creat(file_name(nr_of_files), 0644)) < 0)
00535 error(TRUE, "Cannot creat ", file_name(nr_of_files));
00536 }
00537 for (line_ptr = line_table; *line_ptr != NIL_PTR; line_ptr++) {
00538 ptr = *line_ptr;
00539
00540 if (uniq && *(line_ptr + 1) != NIL_PTR) {
00541 if (compare(ptr, *(line_ptr + 1)) == SAME) continue;
00542 }
00543 do {
00544 out_buffer[index++] = *ptr;
00545 if (index == IO_SIZE) {
00546 mwrite(fd, out_buffer, IO_SIZE);
00547 index = 0;
00548 }
00549 } while (*ptr++ != '\n');
00550 }
00551 mwrite(fd, out_buffer, index);
00552 (void) close(fd);
00553 nr_of_files++;
00554 }
00555
00556
00557
00558
00559 char *file_name(nr)
00560 register int nr;
00561 {
00562 if (only_merge) {
00563 if (args_offset + nr < args_limit) return argptr[args_offset + nr];
00564 }
00565 temp_files[16] = nr / 26 + 'a';
00566 temp_files[17] = nr % 26 + 'a';
00567
00568 return temp_files;
00569 }
00570
00571
00572 void mread(fd, address, bytes)
00573 int fd;
00574 char *address;
00575 register int bytes;
00576 {
00577 if (read(fd, address, bytes) < 0 && bytes != 0)
00578 error(TRUE, "Read error", NIL_PTR);
00579 }
00580
00581
00582 void mwrite(fd, address, bytes)
00583 int fd;
00584 char *address;
00585 register int bytes;
00586 {
00587 if (write(fd, address, bytes) != bytes && bytes != 0)
00588 error(TRUE, "Write error", NIL_PTR);
00589 }
00590
00591
00592 void sort()
00593 {
00594 register char *ptr = mem_top;
00595 register int count = 0;
00596
00597
00598 while (*ptr) {
00599 if (*ptr++ == '\n') count++;
00600 }
00601
00602
00603 line_table = (char **) msbrk(count * sizeof(char *) + sizeof(char *));
00604
00605 count = 1;
00606 ptr = line_table[0] = mem_top;
00607 while (*ptr) {
00608 if (*ptr++ == '\n') line_table[count++] = ptr;
00609 }
00610
00611 line_table[count - 1] = NIL_PTR;
00612
00613
00614 sort_table(count - 1);
00615
00616
00617 if (in_core) {
00618 open_outfile();
00619 print_table(out_fd);
00620 } else
00621 print_table(ERROR);
00622
00623
00624 mbrk((char *) line_table);
00625 }
00626
00627
00628 void sort_table(nel)
00629 register int nel;
00630 {
00631 char *tmp;
00632 register int i;
00633
00634
00635 for (i = (nel >> 1); i >= 1; i--) incr(i, nel);
00636
00637
00638 for (i = nel; i > 1; i--) {
00639 tmp = line_table[0];
00640 line_table[0] = line_table[i - 1];
00641 line_table[i - 1] = tmp;
00642 incr(1, i - 1);
00643 }
00644 }
00645
00646
00647 void incr(si, ei)
00648 register int si, ei;
00649 {
00650 char *tmp;
00651
00652 while (si <= (ei >> 1)) {
00653 si <<= 1;
00654 if (si + 1 <= ei && compare(line_table[si - 1], line_table[si]) <= 0)
00655 si++;
00656 if (compare(line_table[(si >> 1) - 1], line_table[si - 1]) >= 0)
00657 return;
00658 tmp = line_table[(si >> 1) - 1];
00659 line_table[(si >> 1) - 1] = line_table[si - 1];
00660 line_table[si - 1] = tmp;
00661 }
00662 }
00663
00664
00665
00666
00667
00668 int cmp_fields(el1, el2)
00669 register char *el1, *el2;
00670 {
00671 int i, ret;
00672 char line1[LINE_SIZE], line2[LINE_SIZE];
00673
00674 for (i = 0; i < field_cnt; i++) {
00675 build_field(line1, &fields[i + 1], el1);
00676 build_field(line2, &fields[i + 1], el2);
00677 if ((ret = cmp((unsigned char *) line1, (unsigned char *) line2,
00678 &fields[i + 1])) != SAME)
00679 break;
00680 }
00681
00682
00683 if (i != field_cnt && fields[i + 1].reverse) return -ret;
00684
00685
00686 return ret;
00687 }
00688
00689
00690
00691
00692 void build_field(dest, field, src)
00693 char *dest;
00694 register FIELD *field;
00695 register char *src;
00696 {
00697 char *begin = src;
00698 char *last;
00699 int i;
00700
00701
00702 src = skip_fields(src, field->beg_field);
00703
00704
00705 for (i = 0; i < field->beg_pos && *src != '\n'; i++) src++;
00706
00707
00708 copy(dest, src);
00709
00710
00711 if (field->end_field != ERROR) {
00712 last = skip_fields(begin, field->end_field);
00713
00714 for (i = 0; i < field->end_pos && *last != '\n'; i++) last++;
00715 dest[last - src] = '\n';
00716 }
00717 }
00718
00719
00720 char *skip_fields(str, nf)
00721 register char *str;
00722 int nf;
00723 {
00724 while (nf-- > 0) {
00725 if (separator == '\0') {
00726 while (*str != ' ' && *str != '\t' && *str != '\n') str++;
00727 while (table[*str] & BLANK) str++;
00728 } else {
00729 while (*str != separator && *str != '\n') str++;
00730 if (*str == separator) str++;
00731 }
00732 }
00733 return str;
00734 }
00735
00736
00737
00738
00739
00740 int compare(el1, el2)
00741 register char *el1, *el2;
00742 {
00743 int ret;
00744
00745 if (field_cnt > GLOBAL) return cmp_fields(el1, el2);
00746
00747 ret = cmp((unsigned char *) el1, (unsigned char *) el2, &fields[GLOBAL]);
00748 return(fields[GLOBAL].reverse) ? -ret : ret;
00749 }
00750
00751
00752
00753
00754 int cmp(el1, el2, field)
00755 register unsigned char *el1, *el2;
00756 FIELD *field;
00757 {
00758 int c1, c2;
00759
00760 if (field->blanks) {
00761 while (table[*el1] & BLANK) el1++;
00762 while (table[*el2] & BLANK) el2++;
00763 }
00764 if (field->numeric)
00765 return digits((char *) el1, (char *) el2, TRUE);
00766
00767 for (;;) {
00768 while (*el1 == *el2) {
00769 if (*el1++ == '\n')
00770 return SAME;
00771 el2++;
00772 }
00773 if (*el1 == '\n')
00774 return LOWER;
00775 if (*el2 == '\n') return HIGHER;
00776 if (field->ascii) {
00777 if ((table[*el1] & ASCII) == 0) {
00778 do {
00779 el1++;
00780 } while ((table[*el1] & ASCII) == 0);
00781 continue;
00782 }
00783 if ((table[*el2] & ASCII) == 0) {
00784 do {
00785 el2++;
00786 } while ((table[*el2] & ASCII) == 0);
00787 continue;
00788 }
00789 }
00790 if (field->dictionary) {
00791 if ((table[*el1] & DICT) == 0) {
00792 do {
00793 el1++;
00794 } while ((table[*el1] & DICT) == 0);
00795 continue;
00796 }
00797 if ((table[*el2] & DICT) == 0) {
00798 do {
00799 el2++;
00800 } while ((table[*el2] & DICT) == 0);
00801 continue;
00802 }
00803 }
00804 if (field->fold_case) {
00805 if (table[c1 = *el1++] & UPPER) c1 += 'a' - 'A';
00806 if (table[c2 = *el2++] & UPPER) c2 += 'a' - 'A';
00807 if (c1 == c2) continue;
00808 return c1 - c2;
00809 }
00810 return *el1 - *el2;
00811 }
00812
00813
00814 }
00815
00816
00817
00818
00819
00820 int digits(str1, str2, check_sign)
00821 register char *str1, *str2;
00822 BOOL check_sign;
00823 {
00824 BOOL negative = FALSE;
00825 int diff, pow, ret;
00826
00827
00828 if (check_sign) {
00829 if (*str1 == '-') {
00830 negative = TRUE;
00831 str1++;
00832 } else if (*str1 == '+')
00833 str1++;
00834
00835 if (*str2 == '-') {
00836 if (negative == FALSE) return HIGHER;
00837 str2++;
00838 } else if (negative)
00839 return LOWER;
00840 else if (*str2 == '+')
00841 str2++;
00842 }
00843
00844
00845 while ((table[*str1] & DIGIT) && table[*str2] & DIGIT) {
00846 if (*str1 != *str2) break;
00847 str1++;
00848 str2++;
00849 }
00850
00851
00852 if (*str1 == '.' || *str2 == '.') {
00853 if (*str1 == '.') {
00854 if (*str2 == '.')
00855 ret = digits(str1 + 1, str2 + 1, FALSE);
00856 else
00857 ret = (table[*str2] & DIGIT) ? LOWER : HIGHER;
00858 } else
00859 ret = (table[*str1] & DIGIT) ? HIGHER : LOWER;
00860 }
00861
00862
00863 else if ((table[*str1] & DIGIT) && (table[*str2] & DIGIT)) {
00864 diff = *str1 - *str2;
00865 pow = 0;
00866 while (table[*str1++] & DIGIT) pow++;
00867 while (table[*str2++] & DIGIT) pow--;
00868 ret = (pow == 0) ? diff : pow;
00869 }
00870
00871
00872 else {
00873 if ((table[*str1] & DIGIT) == 0)
00874 ret = (table[*str2] & DIGIT) ? LOWER : SAME;
00875 else
00876 ret = HIGHER;
00877 }
00878
00879
00880 return(negative) ? -ret : ret;
00881 }
00882
00883
00884
00885
00886 void files_merge(file_cnt)
00887 register int file_cnt;
00888 {
00889 register int i;
00890 int limit;
00891
00892 for (i = 0; i < file_cnt; i += OPEN_FILES) {
00893
00894 if ((limit = i + OPEN_FILES) >= file_cnt) {
00895 open_outfile();
00896 limit = file_cnt;
00897 } else {
00898
00899 temp_files[16] = file_cnt / 26 + 'a';
00900 temp_files[17] = file_cnt % 26 + 'a';
00901 if ((out_fd = creat(temp_files, 0644)) < 0)
00902 error(TRUE, "Cannot creat ", temp_files);
00903 file_cnt++;
00904 }
00905 merge(i, limit);
00906 }
00907
00908
00909 i = (only_merge) ? args_limit - args_offset : 0;
00910 while (i < file_cnt) (void) unlink(file_name(i++));
00911 }
00912
00913
00914 void merge(start_file, limit_file)
00915 int start_file, limit_file;
00916 {
00917 register MERGE *smallest;
00918 register int i;
00919 int file_cnt = limit_file - start_file;
00920
00921
00922 buf_size = MEMORY_SIZE / file_cnt - LINE_SIZE;
00923
00924 mbrk(mem_top);
00925 disabled = 0;
00926
00927
00928 for (i = start_file; i < limit_file; i++) {
00929 smallest = &merge_f[i - start_file];
00930 if (!strcmp(file_name(i), "-"))
00931 smallest->fd = 0;
00932 else if ((smallest->fd = open(file_name(i), O_RDONLY)) < 0) {
00933 smallest->fd = ERROR;
00934 error(FALSE, "Cannot open ", file_name(i));
00935 disabled++;
00936 continue;
00937 }
00938 smallest->buffer = msbrk(buf_size);
00939 smallest->line = msbrk(LINE_SIZE);
00940 smallest->cnt = smallest->read_chars = 0;
00941 (void) read_line(smallest);
00942 }
00943
00944 if (disabled == file_cnt) {
00945 (void) close(out_fd);
00946 return;
00947 }
00948
00949
00950 for (i = 0; i < file_cnt; i++) {
00951 if (merge_f[i].fd != ERROR) {
00952 smallest = &merge_f[i];
00953 break;
00954 }
00955 }
00956
00957
00958 while (disabled < file_cnt - 1) {
00959 if (uniq)
00960 smallest = skip_lines(smallest, file_cnt);
00961 else {
00962 for (i = 0; i < file_cnt; i++) {
00963 if (merge_f[i].fd == ERROR)
00964 continue;
00965 if (compare(merge_f[i].line, smallest->line) < 0)
00966 smallest = &merge_f[i];
00967 }
00968 }
00969 smallest = print(smallest, file_cnt);
00970 }
00971
00972 if (only_merge && uniq)
00973 uniq_lines(smallest);
00974 else
00975 while (print(smallest, file_cnt) != NIL_MERGE);
00976
00977 put_line(NIL_PTR);
00978 }
00979
00980
00981
00982
00983 void put_line(line)
00984 register char *line;
00985 {
00986 static int index = 0;
00987
00988 if (line == NIL_PTR) {
00989 mwrite(out_fd, out_buffer, index);
00990 index = 0;
00991 (void) close(out_fd);
00992 return;
00993 }
00994 do {
00995 out_buffer[index++] = *line;
00996 if (index == IO_SIZE) {
00997 mwrite(out_fd, out_buffer, IO_SIZE);
00998 index = 0;
00999 }
01000 } while (*line++ != '\n');
01001 }
01002
01003
01004
01005
01006
01007
01008 MERGE *print(merg, file_cnt)
01009 register MERGE *merg;
01010 int file_cnt;
01011 {
01012 register int i;
01013
01014 put_line(merg->line);
01015
01016 if (read_line(merg) == ERROR) {
01017 for (i = 0; i < file_cnt; i++) {
01018 if (merge_f[i].fd != ERROR) {
01019 merg = &merge_f[i];
01020 break;
01021 }
01022 }
01023 if (i == file_cnt)
01024 return NIL_MERGE;
01025 }
01026 return merg;
01027 }
01028
01029
01030
01031
01032
01033
01034 int read_line(merg)
01035 register MERGE *merg;
01036 {
01037 register char *ptr = merg->line - 1;
01038
01039 do {
01040 ptr++;
01041 if (merg->cnt == merg->read_chars) {
01042 if ((merg->read_chars =
01043 read(merg->fd, merg->buffer, buf_size)) <= 0) {
01044 (void) close(merg->fd);
01045 merg->fd = ERROR;
01046 disabled++;
01047 return ERROR;
01048 }
01049 merg->cnt = 0;
01050 }
01051 *ptr = merg->buffer[merg->cnt++];
01052 if (ptr - merg->line == LINE_SIZE - 1)
01053 *ptr = '\n';
01054 } while (*ptr != '\n' && *ptr != '\0');
01055
01056 if (*ptr == '\0')
01057 *ptr = '\n';
01058 *++ptr = '\0';
01059 return OK;
01060 }
01061
01062
01063
01064
01065 MERGE *skip_lines(smallest, file_cnt)
01066 register MERGE *smallest;
01067 int file_cnt;
01068 {
01069 register int i;
01070 int ret;
01071
01072 if (disabled == file_cnt - 1)
01073 return smallest;
01074
01075 for (i = 0; i < file_cnt; i++) {
01076 if (merge_f[i].fd == ERROR || smallest == &merge_f[i])
01077 continue;
01078 while ((ret = compare(merge_f[i].line, smallest->line)) == 0) {
01079 if (read_line(&merge_f[i]) == ERROR) break;
01080 }
01081 if (ret < 0)
01082 return skip_lines(&merge_f[i], file_cnt);
01083 }
01084 return smallest;
01085 }
01086
01087
01088 void uniq_lines(merg)
01089 register MERGE *merg;
01090 {
01091 char lastline[LINE_SIZE];
01092
01093 for (;;) {
01094 put_line(merg->line);
01095 copy(lastline, merg->line);
01096 if (read_line(merg) == ERROR)
01097 return;
01098
01099 while (compare(lastline, merg->line) == SAME)
01100 if (read_line(merg) == ERROR) return;
01101 }
01102
01103
01104 }
01105
01106
01107
01108
01109
01110 void check_file(fd, file)
01111 int fd;
01112 char *file;
01113 {
01114 register MERGE *merg;
01115 char lastline[LINE_SIZE];
01116 register int ret;
01117
01118 if (fd == 0) file = "stdin";
01119 merg = (MERGE *) mem_top;
01120 merg->buffer = mem_top + sizeof(MERGE);
01121 merg->line = msbrk(LINE_SIZE);
01122 merg->cnt = merg->read_chars = 0;
01123 merg->fd = fd;
01124 buf_size = MEMORY_SIZE - sizeof(MERGE);
01125
01126 if (read_line(merg) == ERROR)
01127 return;
01128 copy(lastline, merg->line);
01129
01130 for (;;) {
01131 if (read_line(merg) == ERROR)
01132 break;
01133 if ((ret = compare(lastline, merg->line)) > 0) {
01134 error(FALSE, "Disorder in file ", file);
01135 write(2, merg->line, length(merg->line));
01136 break;
01137 } else if (ret < 0)
01138 copy(lastline, merg->line);
01139 else if (uniq) {
01140 error(FALSE, "Non uniq line in file ", file);
01141 write(2, merg->line, length(merg->line));
01142 break;
01143 }
01144 }
01145
01146 mbrk(mem_top);
01147 }
01148
01149
01150 int length(line)
01151 register char *line;
01152 {
01153 register int i = 1;
01154
01155 while (*line++ != '\n') i++;
01156 return i;
01157 }
01158
01159
01160 void copy(dest, src)
01161 register char *dest, *src;
01162 {
01163 while ((*dest++ = *src++) != '\n');
01164 }
01165
01166
01167 char *msbrk(size)
01168 register int size;
01169 {
01170 register char *address;
01171
01172 if ((address = sbrk(size)) == (char *) -1)
01173 error(TRUE, "Not enough memory. Use chmem to allocate more", NIL_PTR);
01174 return address;
01175 }
01176
01177
01178 void mbrk(address)
01179 char *address;
01180 {
01181 if (brk(address) == -1) error(TRUE, "Cannot reset memory", NIL_PTR);
01182 }
01183
01184 void catch(dummy)
01185 int dummy;
01186 {
01187 register int i;
01188
01189 signal(SIGINT, SIG_IGN);
01190 only_merge = FALSE;
01191 for (i = 0; i < 26; i++) (void) unlink(file_name(i));
01192 exit(2);
01193 }