/* * lzcomp [-options] infile outfile */ #ifdef DOCUMENTATION title lzcomp File Compression index File compression synopsis .s.nf lzcomp [-options] [infile [outfile]] .s.f description lzcomp implements the Lempel-Ziv file compression algorithm. (Files compressed by lzcomp are uncompressed by lzdcmp.) It essentially finds common substrings and replaces them with a variable-size code. This is deterministic, and can be done with a single pass over the file. Thus, the decompression procedure needs no input table, but can track the way the table was built. Options may be given in either case. .lm +8 .p -8 -B Input file is "binary", not "human readable text". This is necessary on Dec operating systems, such as VMS, that treat these files differently. (Note that binary support is rudimentary and probably insufficient as yet.) .p -8 -C Supress block compression for compatiblity with past versions of compress. .p -8 -M bits Write using the specified number of bits in the code -- useful for big machines making files for little machines. .p -8 -N Write in a format compatible with old compress. .p -8 -Q Quiet -- don't write out status messages. .p -8 -D val Debug (if compiled in) (value is debug level). -V Verbose (if compiled in), for debugging. .s.lm -16 The other two arguments are the input and output filenames respectively. Redirection is supported, however, the output should be a disk/tape file. The file format is almost identical to the original Unix implementation of compress (V3.0). Files written by Unix compress should be readable by lzdcmp. Authors The algorithm is from "A Technique for High Performance Data Compression." Terry A. Welch. IEEE Computer Vol 17, No. 6 (June 1984), pp 8-19. This revision is by Martin Minow. Unix Compress authors are as follows: .s.nf Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) Jim McKie (decvax!mcvax!jim) Steve Davies (decvax!vax135!petsd!peora!srd) Ken Turkowski (decvax!decwrl!turtlevax!ken) James A. Woods (decvax!ihnp4!ames!jaw) Joe Orost (decvax!vax135!petsd!joe) .s.f #endif /* * Compatible with compress.c, v3.0 84/11/27 */ /*)BUILD * $(PROGRAM) = lzcomp * $(INCLUDE) = lz.h * $(FILES) = { lzcmp1.c lzcmp2.c lzio.c } */ #include "lz.h" #ifndef decus #include extern int abort(); #endif #ifdef DEBUG #ifdef unix #include #include #else #include #endif static struct timeb start_time; static struct timeb end_time; #endif /* * These global parameters are written to the compressed file. * The decompressor needs them. */ short maxbits = BITS; /* settable max # bits/code */ short block_compress = BLOCK_MASK; code_int maxmaxcode = 1 << BITS; static char_type magic_header[] = /* First 2 bytes of compressed */ { HEAD1_MAGIC, HEAD2_MAGIC, 0 }; /* data file. */ code_int hsize = HSIZE; /* Actual hash table size */ flag binary = FALSE; /* Read text if false */ flag nomagic = FALSE; /* No magic header if TRUE */ flag quiet = FALSE; /* don't talk about compression */ #ifdef DEBUG flag debug = 0; #endif long fsize; /* Input file size in bytes */ char *infilename = NULL; /* For error printouts */ char *outfilename = NULL; /* For openoutput and errors */ main(argc, argv) int argc; char *argv[]; /* * Compress mainline */ { #ifdef DEBUG int msec; extern long in_count; #endif setup(argc, argv); getsize(); openoutput(); #ifndef decus signal(SIGINT, abort); #endif #ifdef DEBUG ftime(&start_time); #endif if (maxbits < INIT_BITS) /* maxbits is set by */ maxbits = INIT_BITS; /* the -M option. Make */ if (maxbits > BITS) /* sure it's within a */ maxbits = BITS; /* reasonable range */ maxmaxcode = 1 << maxbits; /* Truly biggest code */ #ifndef COMPATIBLE if (!nomagic) { PUT(magic_header[0]); PUT(magic_header[1]); PUT((char) (maxbits | block_compress)); } #endif if (!compress()) { fprintf(stderr, "Warning, compression unsuccessful.\n"); /* * Hook here to delete output file. */ } #ifdef DEBUG if (!quiet) { ftime(&end_time); end_time.time -= start_time.time; msec = end_time.millitm - start_time.millitm; if (msec < 0) { msec += 1000; end_time.time--; } fprintf(stderr, "%ld.%03d seconds for compression.", end_time.time, msec); if (msec >= 500) /* Roundoff */ end_time.time++; start_time.time = (in_count * 100) / end_time.time; fprintf(stderr, " %ld.%02ld input bytes per second.\n", start_time.time / 100, start_time.time % 100); } #endif exit(IO_SUCCESS); } typedef struct TUNETAB { long fsize; code_int hsize; } TUNETAB; static TUNETAB tunetab[] = { #if HSIZE > 5003 { 1 << 12, 5003 }, #endif #if HSIZE > 9001 { 1 << 13, 9001 }, #endif #if HSIZE > 18013 { 1 << 14, 18013 }, #endif #if HSIZE > 35023 { 1 << 15, 35023 }, { 47000, 50021 }, #endif { 0, 0 }, }; #ifdef unix #include #include #endif #ifdef vms #if L_cuserid >= 16 /* * VMS Version 4.1 or later (by inspection -- defined in */ #include #include #endif #endif getsize() /* * Tune the hash table parameters for small files. * We don't have a good way to find the file size on vms. * fsize is set to zero if we can't find it. */ { register TUNETAB *tunep; #ifdef rsx extern char f_efbk; /* F.EFBK -- highest block in file */ #define fdb(p,offset) (stdin->io_fdb[((int) &p + offset)] & 0xFF) #define efbk(offset) fdb(f_efbk, offset) extern char f_rtyp; /* F.RTYP -- Record type */ extern char f_ratt; /* F.RATT -- Record attributes */ /* * Note: Block number is stored high-order word first. */ fsize = efbk(2) + (efbk(3) << 8) + (efbk(0) << 16) + (efbk(1) << 24); fsize *= 512; #endif #ifdef rt11 fsize = stdin->io_size; /* Set by Decus C */ fsize *= 512; #endif #ifdef vms #if L_cuserid >= 16 /* * VMS Version 4 -- see note above. */ struct stat statbuf; fsize = 0; if (fstat(fileno(stdin), &statbuf) == 0) fsize = (long) statbuf.st_size; #else fsize = 0; #endif #endif #ifdef unix struct stat statbuf; fsize = 0; if (fstat(fileno(stdin), &statbuf) == 0) fsize = (long) statbuf.st_size; #endif hsize = HSIZE; if (fsize > 0) { for (tunep = tunetab; tunep->fsize != 0; tunep++) { if (fsize < tunep->fsize) { hsize = tunep->hsize; break; } } } #ifdef DEBUG if (!quiet) { if (fsize > 0) { fprintf(stderr, "%s: size %ld bytes, hash size %d\n", infilename, fsize, hsize); } else { fprintf(stderr, "%s: size unknown, hash size %d\n", infilename, hsize); } } #endif } static setup(argc, argv) int argc; char *argv[]; /* * Get parameters and open files. Exit fatally on errors. */ { register char *ap; register int c; int i, j; char *arg; #ifndef unix char filename[80]; #endif #ifdef vms argc = getredirection(argc, argv); #endif for (i = j = 1; i < argc; i++) { arg = ap = argv[i]; if (*ap++ != '-' || *ap == EOS) /* Filename? */ argv[j++] = argv[i]; /* Just copy it */ else { c = *ap++; /* Option byte */ if (islower(c)) c = toupper(c); switch (c) { case 'B': binary = TRUE; break; case 'C': block_compress = 0; break; case 'M': if (*ap == EOS) { ap = argv[++i]; if (i >= argc) { fprintf(stderr, "-M needs a value\n"); goto usage; } } maxbits = atoi(ap); goto nextarg; #ifdef COMPATIBLE case 'N': nomagic = TRUE; break; #endif case 'Q': quiet = TRUE; break; #ifdef DEBUG case 'D': if (isdigit(*ap)) { debug = atoi(ap); goto nextarg; } debug = 1; break; #endif default: fprintf(stderr, "Unknown option \"%s\"\n", arg); usage: fprintf(stderr, "The following options are valid:\n\ -B\tBinary file (important on VMS/RSX, ignored on Unix)\n\ -C\tNo block compress (compatible with compress 2.0)\n\ -M val\tExplicitly set the maximum number of code bits\n"); #ifdef COMPATIBLE fprintf(stderr, "\ -N\tNo header (file is readable by old compress)\n"); #endif fprintf(stderr, "\ -Q\tNo output to stderr, unless error.\n"); #ifdef DEBUG fprintf(stderr, "-Dn\tDebug (n == level)\n"); #endif exit(IO_ERROR); } /* Switch on options */ } /* If -option */ nextarg: ; /* Go to next argv[] */ } /* For all argc's */ /* outfilename = NULL; */ /* Set "stdout" signal */ switch (j) { /* Any file arguments? */ case 3: /* both files given */ if (!streq(argv[2], "-")) /* But - means stdout */ outfilename = argv[2]; case 2: /* Input file given */ if (!streq(argv[1], "-")) { #ifdef decus if (freopen(argv[1], (binary) ? "rn" : "r", stdin) == NULL) { perror(argv[1]); exit(IO_ERROR); } #else /* * Special case for vms too? */ if (freopen(argv[1], "r", stdin) == NULL) { perror(argv[1]); exit(IO_ERROR); } #endif infilename = argv[1]; break; } /* Else "-": explicitly read from stdin. */ case 0: /* None! */ case 1: /* No file arguments */ #ifdef vms fgetname(stdin, filename); infilename = malloc(strlen(filename) + 1); strcpy(infilename, filename); #else #ifdef decus fgetname(stdin, filename); infilename = malloc(strlen(filename) + 1); strcpy(infilename, filename); #else infilename = "stdin"; #endif #endif break; default: fprintf(stderr, "Too many file arguments\n"); exit(IO_ERROR); } } openoutput() /* * Open the output file (after the input file has been opened). * if outfilename == NULL, it's already open on stdout. */ { #ifndef unix char filename[80]; #endif if (outfilename == NULL) { #ifdef vms fgetname(stdout, filename); outfilename = malloc(strlen(filename) + 1); strcpy(outfilename, filename); #else #ifdef decus fgetname(stdout, filename); outfilename = malloc(strlen(filename) + 1); strcpy(outfilename, filename); #else outfilename = ""; #endif #endif } else { #ifdef decus if (freopen(outfilename, "wn", stdout) == NULL) { perror(outfilename); exit(IO_ERROR); } #else if (freopen(outfilename, "w", stdout) == NULL) { perror(outfilename); exit(IO_ERROR); } #endif } if (isatty(fileno(stdout))) { fprintf(stderr, "%s: is a terminal. We object.\n", outfilename); exit(IO_ERROR); } }