Doxygen

lzx.c
Go to the documentation of this file.
00001 /***************************************************************************
00002  *                        lzx.c - LZX decompression routines               *
00003  *                           -------------------                           *
00004  *                                                                         *
00005  *  maintainer: Jed Wing <jedwin@ugcs.caltech.edu>                         *
00006  *  source:     modified lzx.c from cabextract v0.5                        *
00007  *  notes:      This file was taken from cabextract v0.5, which was,       *
00008  *              itself, a modified version of the lzx decompression code   *
00009  *              from unlzx.                                                *
00010  *                                                                         *
00011  *  platforms:  In its current incarnation, this file has been tested on   *
00012  *              two different Linux platforms (one, redhat-based, with a   *
00013  *              2.1.2 glibc and gcc 2.95.x, and the other, Debian, with    *
00014  *              2.2.4 glibc and both gcc 2.95.4 and gcc 3.0.2).  Both were *
00015  *              Intel x86 compatible machines.                             *
00016  ***************************************************************************/
00017 
00018 /***************************************************************************
00019  *
00020  *   Copyright(C) Stuart Caie
00021  *
00022  * This library is free software; you can redistribute it and/or
00023  * modify it under the terms of the GNU Lesser General Public
00024  * License as published by the Free Software Foundation; either
00025  * version 2.1 of the License, or (at your option) any later version.
00026  *
00027  * This library is distributed in the hope that it will be useful,
00028  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00029  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00030  * Lesser General Public License for more details.
00031  *
00032  * You should have received a copy of the GNU Lesser General Public
00033  * License along with this library; if not, write to the Free Software
00034  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
00035  *
00036  ***************************************************************************/
00037 
00038 #include "lzx.h"
00039 #include <stdarg.h>
00040 //#include <stdio.h>
00041 //#include <stdlib.h>
00042 //#include <string.h>
00043 
00044 #include <windef.h>
00045 #include <winbase.h>
00046 
00047 /* sized types */
00048 typedef unsigned char  UBYTE; /* 8 bits exactly    */
00049 typedef unsigned short UWORD; /* 16 bits (or more) */
00050 
00051 /* some constants defined by the LZX specification */
00052 #define LZX_MIN_MATCH                (2)
00053 #define LZX_MAX_MATCH                (257)
00054 #define LZX_NUM_CHARS                (256)
00055 #define LZX_BLOCKTYPE_INVALID        (0)   /* also blocktypes 4-7 invalid */
00056 #define LZX_BLOCKTYPE_VERBATIM       (1)
00057 #define LZX_BLOCKTYPE_ALIGNED        (2)
00058 #define LZX_BLOCKTYPE_UNCOMPRESSED   (3)
00059 #define LZX_PRETREE_NUM_ELEMENTS     (20)
00060 #define LZX_ALIGNED_NUM_ELEMENTS     (8)   /* aligned offset tree #elements */
00061 #define LZX_NUM_PRIMARY_LENGTHS      (7)   /* this one missing from spec! */
00062 #define LZX_NUM_SECONDARY_LENGTHS    (249) /* length tree #elements */
00063 
00064 /* LZX huffman defines: tweak tablebits as desired */
00065 #define LZX_PRETREE_MAXSYMBOLS  (LZX_PRETREE_NUM_ELEMENTS)
00066 #define LZX_PRETREE_TABLEBITS   (6)
00067 #define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8)
00068 #define LZX_MAINTREE_TABLEBITS  (12)
00069 #define LZX_LENGTH_MAXSYMBOLS   (LZX_NUM_SECONDARY_LENGTHS+1)
00070 #define LZX_LENGTH_TABLEBITS    (12)
00071 #define LZX_ALIGNED_MAXSYMBOLS  (LZX_ALIGNED_NUM_ELEMENTS)
00072 #define LZX_ALIGNED_TABLEBITS   (7)
00073 
00074 #define LZX_LENTABLE_SAFETY (64) /* we allow length table decoding overruns */
00075 
00076 #define LZX_DECLARE_TABLE(tbl) \
00077   UWORD tbl##_table[(1<<LZX_##tbl##_TABLEBITS) + (LZX_##tbl##_MAXSYMBOLS<<1)];\
00078   UBYTE tbl##_len  [LZX_##tbl##_MAXSYMBOLS + LZX_LENTABLE_SAFETY]
00079 
00080 struct LZXstate
00081 {
00082     UBYTE *window;         /* the actual decoding window              */
00083     ULONG window_size;     /* window size (32Kb through 2Mb)          */
00084     ULONG actual_size;     /* window size when it was first allocated */
00085     ULONG window_posn;     /* current offset within the window        */
00086     ULONG R0, R1, R2;      /* for the LRU offset system               */
00087     UWORD main_elements;   /* number of main tree elements            */
00088     int   header_read;     /* have we started decoding at all yet?    */
00089     UWORD block_type;      /* type of this block                      */
00090     ULONG block_length;    /* uncompressed length of this block       */
00091     ULONG block_remaining; /* uncompressed bytes still left to decode */
00092     ULONG frames_read;     /* the number of CFDATA blocks processed   */
00093     LONG  intel_filesize;  /* magic header value used for transform   */
00094     LONG  intel_curpos;    /* current offset in transform space       */
00095     int   intel_started;   /* have we seen any translatable data yet? */
00096 
00097     LZX_DECLARE_TABLE(PRETREE);
00098     LZX_DECLARE_TABLE(MAINTREE);
00099     LZX_DECLARE_TABLE(LENGTH);
00100     LZX_DECLARE_TABLE(ALIGNED);
00101 };
00102 
00103 /* LZX decruncher */
00104 
00105 /* Microsoft's LZX document and their implementation of the
00106  * com.ms.util.cab Java package do not concur.
00107  *
00108  * In the LZX document, there is a table showing the correlation between
00109  * window size and the number of position slots. It states that the 1MB
00110  * window = 40 slots and the 2MB window = 42 slots. In the implementation,
00111  * 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the
00112  * first slot whose position base is equal to or more than the required
00113  * window size'. This would explain why other tables in the document refer
00114  * to 50 slots rather than 42.
00115  *
00116  * The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode
00117  * is not defined in the specification.
00118  *
00119  * The LZX document does not state the uncompressed block has an
00120  * uncompressed length field. Where does this length field come from, so
00121  * we can know how large the block is? The implementation has it as the 24
00122  * bits following after the 3 blocktype bits, before the alignment
00123  * padding.
00124  *
00125  * The LZX document states that aligned offset blocks have their aligned
00126  * offset huffman tree AFTER the main and length trees. The implementation
00127  * suggests that the aligned offset tree is BEFORE the main and length
00128  * trees.
00129  *
00130  * The LZX document decoding algorithm states that, in an aligned offset
00131  * block, if an extra_bits value is 1, 2 or 3, then that number of bits
00132  * should be read and the result added to the match offset. This is
00133  * correct for 1 and 2, but not 3, where just a huffman symbol (using the
00134  * aligned tree) should be read.
00135  *
00136  * Regarding the E8 preprocessing, the LZX document states 'No translation
00137  * may be performed on the last 6 bytes of the input block'. This is
00138  * correct.  However, the pseudocode provided checks for the *E8 leader*
00139  * up to the last 6 bytes. If the leader appears between -10 and -7 bytes
00140  * from the end, this would cause the next four bytes to be modified, at
00141  * least one of which would be in the last 6 bytes, which is not allowed
00142  * according to the spec.
00143  *
00144  * The specification states that the huffman trees must always contain at
00145  * least one element. However, many CAB files contain blocks where the
00146  * length tree is completely empty (because there are no matches), and
00147  * this is expected to succeed.
00148  */
00149 
00150 
00151 /* LZX uses what it calls 'position slots' to represent match offsets.
00152  * What this means is that a small 'position slot' number and a small
00153  * offset from that slot are encoded instead of one large offset for
00154  * every match.
00155  * - position_base is an index to the position slot bases
00156  * - extra_bits states how many bits of offset-from-base data is needed.
00157  */
00158 static const UBYTE extra_bits[51] = {
00159      0,  0,  0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,
00160      7,  7,  8,  8,  9,  9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14,
00161     15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
00162     17, 17, 17
00163 };
00164 
00165 static const ULONG position_base[51] = {
00166           0,       1,       2,      3,      4,      6,      8,     12,     16,     24,     32,       48,      64,      96,     128,     192,
00167         256,     384,     512,    768,   1024,   1536,   2048,   3072,   4096,   6144,   8192,    12288,   16384,   24576,   32768,   49152,
00168       65536,   98304,  131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936,
00169     1835008, 1966080, 2097152
00170 };
00171 
00172 struct LZXstate *LZXinit(int window)
00173 {
00174     struct LZXstate *pState=NULL;
00175     ULONG wndsize = 1 << window;
00176     int i, posn_slots;
00177 
00178     /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */
00179     /* if a previously allocated window is big enough, keep it     */
00180     if (window < 15 || window > 21) return NULL;
00181 
00182     /* allocate state and associated window */
00183     pState = HeapAlloc(GetProcessHeap(), 0, sizeof(struct LZXstate));
00184     if (!(pState->window = HeapAlloc(GetProcessHeap(), 0, wndsize)))
00185     {
00186         HeapFree(GetProcessHeap(), 0, pState);
00187         return NULL;
00188     }
00189     pState->actual_size = wndsize;
00190     pState->window_size = wndsize;
00191 
00192     /* calculate required position slots */
00193     if (window == 20) posn_slots = 42;
00194     else if (window == 21) posn_slots = 50;
00195     else posn_slots = window << 1;
00196 
00198     /* posn_slots=i=0; while (i < wndsize) i += 1 << extra_bits[posn_slots++]; */
00199 
00200     /* initialize other state */
00201     pState->R0  =  pState->R1  = pState->R2 = 1;
00202     pState->main_elements   = LZX_NUM_CHARS + (posn_slots << 3);
00203     pState->header_read     = 0;
00204     pState->frames_read     = 0;
00205     pState->block_remaining = 0;
00206     pState->block_type      = LZX_BLOCKTYPE_INVALID;
00207     pState->intel_curpos    = 0;
00208     pState->intel_started   = 0;
00209     pState->window_posn     = 0;
00210 
00211     /* initialise tables to 0 (because deltas will be applied to them) */
00212     for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) pState->MAINTREE_len[i] = 0;
00213     for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++)   pState->LENGTH_len[i]   = 0;
00214 
00215     return pState;
00216 }
00217 
00218 void LZXteardown(struct LZXstate *pState)
00219 {
00220     if (pState)
00221     {
00222         HeapFree(GetProcessHeap(), 0, pState->window);
00223         HeapFree(GetProcessHeap(), 0, pState);
00224     }
00225 }
00226 
00227 int LZXreset(struct LZXstate *pState)
00228 {
00229     int i;
00230 
00231     pState->R0  =  pState->R1  = pState->R2 = 1;
00232     pState->header_read     = 0;
00233     pState->frames_read     = 0;
00234     pState->block_remaining = 0;
00235     pState->block_type      = LZX_BLOCKTYPE_INVALID;
00236     pState->intel_curpos    = 0;
00237     pState->intel_started   = 0;
00238     pState->window_posn     = 0;
00239 
00240     for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) pState->MAINTREE_len[i] = 0;
00241     for (i = 0; i < LZX_LENGTH_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++)   pState->LENGTH_len[i]   = 0;
00242 
00243     return DECR_OK;
00244 }
00245 
00246 
00247 /* Bitstream reading macros:
00248  *
00249  * INIT_BITSTREAM    should be used first to set up the system
00250  * READ_BITS(var,n)  takes N bits from the buffer and puts them in var
00251  *
00252  * ENSURE_BITS(n)    ensures there are at least N bits in the bit buffer
00253  * PEEK_BITS(n)      extracts (without removing) N bits from the bit buffer
00254  * REMOVE_BITS(n)    removes N bits from the bit buffer
00255  *
00256  * These bit access routines work by using the area beyond the MSB and the
00257  * LSB as a free source of zeroes. This avoids having to mask any bits.
00258  * So we have to know the bit width of the bitbuffer variable. This is
00259  * sizeof(ULONG) * 8, also defined as ULONG_BITS
00260  */
00261 
00262 /* number of bits in ULONG. Note: This must be at multiple of 16, and at
00263  * least 32 for the bitbuffer code to work (ie, it must be able to ensure
00264  * up to 17 bits - that's adding 16 bits when there's one bit left, or
00265  * adding 32 bits when there are no bits left. The code should work fine
00266  * for machines where ULONG >= 32 bits.
00267  */
00268 #define ULONG_BITS (sizeof(ULONG)<<3)
00269 
00270 #define INIT_BITSTREAM do { bitsleft = 0; bitbuf = 0; } while (0)
00271 
00272 #define ENSURE_BITS(n)                          \
00273   while (bitsleft < (n)) {                      \
00274     bitbuf |= ((inpos[1]<<8)|inpos[0]) << (ULONG_BITS-16 - bitsleft);   \
00275     bitsleft += 16; inpos+=2;                       \
00276   }
00277 
00278 #define PEEK_BITS(n)   (bitbuf >> (ULONG_BITS - (n)))
00279 #define REMOVE_BITS(n) ((bitbuf <<= (n)), (bitsleft -= (n)))
00280 
00281 #define READ_BITS(v,n) do {                     \
00282   ENSURE_BITS(n);                           \
00283   (v) = PEEK_BITS(n);                           \
00284   REMOVE_BITS(n);                           \
00285 } while (0)
00286 
00287 
00288 /* Huffman macros */
00289 
00290 #define TABLEBITS(tbl)   (LZX_##tbl##_TABLEBITS)
00291 #define MAXSYMBOLS(tbl)  (LZX_##tbl##_MAXSYMBOLS)
00292 #define SYMTABLE(tbl)    (pState->tbl##_table)
00293 #define LENTABLE(tbl)    (pState->tbl##_len)
00294 
00295 /* BUILD_TABLE(tablename) builds a huffman lookup table from code lengths.
00296  * In reality, it just calls make_decode_table() with the appropriate
00297  * values - they're all fixed by some #defines anyway, so there's no point
00298  * writing each call out in full by hand.
00299  */
00300 #define BUILD_TABLE(tbl)                        \
00301   if (make_decode_table(                        \
00302     MAXSYMBOLS(tbl), TABLEBITS(tbl), LENTABLE(tbl), SYMTABLE(tbl)   \
00303   )) { return DECR_ILLEGALDATA; }
00304 
00305 
00306 /* READ_HUFFSYM(tablename, var) decodes one huffman symbol from the
00307  * bitstream using the stated table and puts it in var.
00308  */
00309 #define READ_HUFFSYM(tbl,var) do {                  \
00310   ENSURE_BITS(16);                          \
00311   hufftbl = SYMTABLE(tbl);                      \
00312   if ((i = hufftbl[PEEK_BITS(TABLEBITS(tbl))]) >= MAXSYMBOLS(tbl)) {    \
00313     j = 1 << (ULONG_BITS - TABLEBITS(tbl));             \
00314     do {                                \
00315       j >>= 1; i <<= 1; i |= (bitbuf & j) ? 1 : 0;          \
00316       if (!j) { return DECR_ILLEGALDATA; }                          \
00317     } while ((i = hufftbl[i]) >= MAXSYMBOLS(tbl));          \
00318   }                                 \
00319   j = LENTABLE(tbl)[(var) = i];                     \
00320   REMOVE_BITS(j);                           \
00321 } while (0)
00322 
00323 
00324 /* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols
00325  * first to last in the given table. The code lengths are stored in their
00326  * own special LZX way.
00327  */
00328 #define READ_LENGTHS(tbl,first,last) do { \
00329   lb.bb = bitbuf; lb.bl = bitsleft; lb.ip = inpos; \
00330   if (lzx_read_lens(pState, LENTABLE(tbl),(first),(last),&lb)) { \
00331     return DECR_ILLEGALDATA; \
00332   } \
00333   bitbuf = lb.bb; bitsleft = lb.bl; inpos = lb.ip; \
00334 } while (0)
00335 
00336 
00337 /* make_decode_table(nsyms, nbits, length[], table[])
00338  *
00339  * This function was coded by David Tritscher. It builds a fast huffman
00340  * decoding table out of just a canonical huffman code lengths table.
00341  *
00342  * nsyms  = total number of symbols in this huffman tree.
00343  * nbits  = any symbols with a code length of nbits or less can be decoded
00344  *          in one lookup of the table.
00345  * length = A table to get code lengths from [0 to syms-1]
00346  * table  = The table to fill up with decoded symbols and pointers.
00347  *
00348  * Returns 0 for OK or 1 for error
00349  */
00350 
00351 static int make_decode_table(ULONG nsyms, ULONG nbits, UBYTE *length, UWORD *table) {
00352     register UWORD sym;
00353     register ULONG leaf;
00354     register UBYTE bit_num = 1;
00355     ULONG fill;
00356     ULONG pos         = 0; /* the current position in the decode table */
00357     ULONG table_mask  = 1 << nbits;
00358     ULONG bit_mask    = table_mask >> 1; /* don't do 0 length codes */
00359     ULONG next_symbol = bit_mask; /* base of allocation for long codes */
00360 
00361     /* fill entries for codes short enough for a direct mapping */
00362     while (bit_num <= nbits) {
00363         for (sym = 0; sym < nsyms; sym++) {
00364             if (length[sym] == bit_num) {
00365                 leaf = pos;
00366 
00367                 if((pos += bit_mask) > table_mask) return 1; /* table overrun */
00368 
00369                 /* fill all possible lookups of this symbol with the symbol itself */
00370                 fill = bit_mask;
00371                 while (fill-- > 0) table[leaf++] = sym;
00372             }
00373         }
00374         bit_mask >>= 1;
00375         bit_num++;
00376     }
00377 
00378     /* if there are any codes longer than nbits */
00379     if (pos != table_mask) {
00380         /* clear the remainder of the table */
00381         for (sym = pos; sym < table_mask; sym++) table[sym] = 0;
00382 
00383         /* give ourselves room for codes to grow by up to 16 more bits */
00384         pos <<= 16;
00385         table_mask <<= 16;
00386         bit_mask = 1 << 15;
00387 
00388         while (bit_num <= 16) {
00389             for (sym = 0; sym < nsyms; sym++) {
00390                 if (length[sym] == bit_num) {
00391                     leaf = pos >> 16;
00392                     for (fill = 0; fill < bit_num - nbits; fill++) {
00393                         /* if this path hasn't been taken yet, 'allocate' two entries */
00394                         if (table[leaf] == 0) {
00395                             table[(next_symbol << 1)] = 0;
00396                             table[(next_symbol << 1) + 1] = 0;
00397                             table[leaf] = next_symbol++;
00398                         }
00399                         /* follow the path and select either left or right for next bit */
00400                         leaf = table[leaf] << 1;
00401                         if ((pos >> (15-fill)) & 1) leaf++;
00402                     }
00403                     table[leaf] = sym;
00404 
00405                     if ((pos += bit_mask) > table_mask) return 1; /* table overflow */
00406                 }
00407             }
00408             bit_mask >>= 1;
00409             bit_num++;
00410         }
00411     }
00412 
00413     /* full table? */
00414     if (pos == table_mask) return 0;
00415 
00416     /* either erroneous table, or all elements are 0 - let's find out. */
00417     for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1;
00418     return 0;
00419 }
00420 
00421 struct lzx_bits {
00422   ULONG bb;
00423   int bl;
00424   UBYTE *ip;
00425 };
00426 
00427 static int lzx_read_lens(struct LZXstate *pState, UBYTE *lens, ULONG first, ULONG last, struct lzx_bits *lb) {
00428     ULONG i,j, x,y;
00429     int z;
00430 
00431     register ULONG bitbuf = lb->bb;
00432     register int bitsleft = lb->bl;
00433     UBYTE *inpos = lb->ip;
00434     UWORD *hufftbl;
00435 
00436     for (x = 0; x < 20; x++) {
00437         READ_BITS(y, 4);
00438         LENTABLE(PRETREE)[x] = y;
00439     }
00440     BUILD_TABLE(PRETREE);
00441 
00442     for (x = first; x < last; ) {
00443         READ_HUFFSYM(PRETREE, z);
00444         if (z == 17) {
00445             READ_BITS(y, 4); y += 4;
00446             while (y--) lens[x++] = 0;
00447         }
00448         else if (z == 18) {
00449             READ_BITS(y, 5); y += 20;
00450             while (y--) lens[x++] = 0;
00451         }
00452         else if (z == 19) {
00453             READ_BITS(y, 1); y += 4;
00454             READ_HUFFSYM(PRETREE, z);
00455             z = lens[x] - z; if (z < 0) z += 17;
00456             while (y--) lens[x++] = z;
00457         }
00458         else {
00459             z = lens[x] - z; if (z < 0) z += 17;
00460             lens[x++] = z;
00461         }
00462     }
00463 
00464     lb->bb = bitbuf;
00465     lb->bl = bitsleft;
00466     lb->ip = inpos;
00467     return 0;
00468 }
00469 
00470 int LZXdecompress(struct LZXstate *pState, unsigned char *inpos, unsigned char *outpos, int inlen, int outlen) {
00471     UBYTE *endinp = inpos + inlen;
00472     UBYTE *window = pState->window;
00473     UBYTE *runsrc, *rundest;
00474     UWORD *hufftbl; /* used in READ_HUFFSYM macro as chosen decoding table */
00475 
00476     ULONG window_posn = pState->window_posn;
00477     ULONG window_size = pState->window_size;
00478     ULONG R0 = pState->R0;
00479     ULONG R1 = pState->R1;
00480     ULONG R2 = pState->R2;
00481 
00482     register ULONG bitbuf;
00483     register int bitsleft;
00484     ULONG match_offset, i,j,k; /* ijk used in READ_HUFFSYM macro */
00485     struct lzx_bits lb; /* used in READ_LENGTHS macro */
00486 
00487     int togo = outlen, this_run, main_element, aligned_bits;
00488     int match_length, length_footer, extra, verbatim_bits;
00489     int copy_length;
00490 
00491     INIT_BITSTREAM;
00492 
00493     /* read header if necessary */
00494     if (!pState->header_read) {
00495         i = j = 0;
00496         READ_BITS(k, 1); if (k) { READ_BITS(i,16); READ_BITS(j,16); }
00497         pState->intel_filesize = (i << 16) | j; /* or 0 if not encoded */
00498         pState->header_read = 1;
00499     }
00500 
00501     /* main decoding loop */
00502     while (togo > 0) {
00503         /* last block finished, new block expected */
00504         if (pState->block_remaining == 0) {
00505             if (pState->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) {
00506                 if (pState->block_length & 1) inpos++; /* realign bitstream to word */
00507                 INIT_BITSTREAM;
00508             }
00509 
00510             READ_BITS(pState->block_type, 3);
00511             READ_BITS(i, 16);
00512             READ_BITS(j, 8);
00513             pState->block_remaining = pState->block_length = (i << 8) | j;
00514 
00515             switch (pState->block_type) {
00516                 case LZX_BLOCKTYPE_ALIGNED:
00517                     for (i = 0; i < 8; i++) { READ_BITS(j, 3); LENTABLE(ALIGNED)[i] = j; }
00518                     BUILD_TABLE(ALIGNED);
00519                     /* rest of aligned header is same as verbatim */
00520 
00521                 case LZX_BLOCKTYPE_VERBATIM:
00522                     READ_LENGTHS(MAINTREE, 0, 256);
00523                     READ_LENGTHS(MAINTREE, 256, pState->main_elements);
00524                     BUILD_TABLE(MAINTREE);
00525                     if (LENTABLE(MAINTREE)[0xE8] != 0) pState->intel_started = 1;
00526 
00527                     READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS);
00528                     BUILD_TABLE(LENGTH);
00529                     break;
00530 
00531                 case LZX_BLOCKTYPE_UNCOMPRESSED:
00532                     pState->intel_started = 1; /* because we can't assume otherwise */
00533                     ENSURE_BITS(16); /* get up to 16 pad bits into the buffer */
00534                     if (bitsleft > 16) inpos -= 2; /* and align the bitstream! */
00535                     R0 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4;
00536                     R1 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4;
00537                     R2 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4;
00538                     break;
00539 
00540                 default:
00541                     return DECR_ILLEGALDATA;
00542             }
00543         }
00544 
00545         /* buffer exhaustion check */
00546         if (inpos > endinp) {
00547             /* it's possible to have a file where the next run is less than
00548              * 16 bits in size. In this case, the READ_HUFFSYM() macro used
00549              * in building the tables will exhaust the buffer, so we should
00550              * allow for this, but not allow those accidentally read bits to
00551              * be used (so we check that there are at least 16 bits
00552              * remaining - in this boundary case they aren't really part of
00553              * the compressed data)
00554              */
00555             if (inpos > (endinp+2) || bitsleft < 16) return DECR_ILLEGALDATA;
00556         }
00557 
00558         while ((this_run = pState->block_remaining) > 0 && togo > 0) {
00559             if (this_run > togo) this_run = togo;
00560             togo -= this_run;
00561             pState->block_remaining -= this_run;
00562 
00563             /* apply 2^x-1 mask */
00564             window_posn &= window_size - 1;
00565             /* runs can't straddle the window wraparound */
00566             if ((window_posn + this_run) > window_size)
00567                 return DECR_DATAFORMAT;
00568 
00569             switch (pState->block_type) {
00570 
00571                 case LZX_BLOCKTYPE_VERBATIM:
00572                     while (this_run > 0) {
00573                         READ_HUFFSYM(MAINTREE, main_element);
00574 
00575                         if (main_element < LZX_NUM_CHARS) {
00576                             /* literal: 0 to LZX_NUM_CHARS-1 */
00577                             window[window_posn++] = main_element;
00578                             this_run--;
00579                         }
00580                         else {
00581                             /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
00582                             main_element -= LZX_NUM_CHARS;
00583 
00584                             match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
00585                             if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
00586                                 READ_HUFFSYM(LENGTH, length_footer);
00587                                 match_length += length_footer;
00588                             }
00589                             match_length += LZX_MIN_MATCH;
00590 
00591                             match_offset = main_element >> 3;
00592 
00593                             if (match_offset > 2) {
00594                                 /* not repeated offset */
00595                                 if (match_offset != 3) {
00596                                     extra = extra_bits[match_offset];
00597                                     READ_BITS(verbatim_bits, extra);
00598                                     match_offset = position_base[match_offset] - 2 + verbatim_bits;
00599                                 }
00600                                 else {
00601                                     match_offset = 1;
00602                                 }
00603 
00604                                 /* update repeated offset LRU queue */
00605                                 R2 = R1; R1 = R0; R0 = match_offset;
00606                             }
00607                             else if (match_offset == 0) {
00608                                 match_offset = R0;
00609                             }
00610                             else if (match_offset == 1) {
00611                                 match_offset = R1;
00612                                 R1 = R0; R0 = match_offset;
00613                             }
00614                             else /* match_offset == 2 */ {
00615                                 match_offset = R2;
00616                                 R2 = R0; R0 = match_offset;
00617                             }
00618 
00619                             rundest = window + window_posn;
00620                             this_run -= match_length;
00621 
00622                             /* copy any wrapped around source data */
00623                             if (window_posn >= match_offset) {
00624                                 /* no wrap */
00625                                  runsrc = rundest - match_offset;
00626                             } else {
00627                                 runsrc = rundest + (window_size - match_offset);
00628                                 copy_length = match_offset - window_posn;
00629                                 if (copy_length < match_length) {
00630                                      match_length -= copy_length;
00631                                      window_posn += copy_length;
00632                                      while (copy_length-- > 0) *rundest++ = *runsrc++;
00633                                      runsrc = window;
00634                                 }
00635                             }
00636                             window_posn += match_length;
00637  
00638                             /* copy match data - no worries about destination wraps */
00639                             while (match_length-- > 0) *rundest++ = *runsrc++;
00640 
00641                         }
00642                     }
00643                     break;
00644 
00645                 case LZX_BLOCKTYPE_ALIGNED:
00646                     while (this_run > 0) {
00647                         READ_HUFFSYM(MAINTREE, main_element);
00648 
00649                         if (main_element < LZX_NUM_CHARS) {
00650                             /* literal: 0 to LZX_NUM_CHARS-1 */
00651                             window[window_posn++] = main_element;
00652                             this_run--;
00653                         }
00654                         else {
00655                             /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */
00656                             main_element -= LZX_NUM_CHARS;
00657 
00658                             match_length = main_element & LZX_NUM_PRIMARY_LENGTHS;
00659                             if (match_length == LZX_NUM_PRIMARY_LENGTHS) {
00660                                 READ_HUFFSYM(LENGTH, length_footer);
00661                                 match_length += length_footer;
00662                             }
00663                             match_length += LZX_MIN_MATCH;
00664 
00665                             match_offset = main_element >> 3;
00666 
00667                             if (match_offset > 2) {
00668                                 /* not repeated offset */
00669                                 extra = extra_bits[match_offset];
00670                                 match_offset = position_base[match_offset] - 2;
00671                                 if (extra > 3) {
00672                                     /* verbatim and aligned bits */
00673                                     extra -= 3;
00674                                     READ_BITS(verbatim_bits, extra);
00675                                     match_offset += (verbatim_bits << 3);
00676                                     READ_HUFFSYM(ALIGNED, aligned_bits);
00677                                     match_offset += aligned_bits;
00678                                 }
00679                                 else if (extra == 3) {
00680                                     /* aligned bits only */
00681                                     READ_HUFFSYM(ALIGNED, aligned_bits);
00682                                     match_offset += aligned_bits;
00683                                 }
00684                                 else if (extra > 0) { /* extra==1, extra==2 */
00685                                     /* verbatim bits only */
00686                                     READ_BITS(verbatim_bits, extra);
00687                                     match_offset += verbatim_bits;
00688                                 }
00689                                 else /* extra == 0 */ {
00690                                     /* ??? */
00691                                     match_offset = 1;
00692                                 }
00693 
00694                                 /* update repeated offset LRU queue */
00695                                 R2 = R1; R1 = R0; R0 = match_offset;
00696                             }
00697                             else if (match_offset == 0) {
00698                                 match_offset = R0;
00699                             }
00700                             else if (match_offset == 1) {
00701                                 match_offset = R1;
00702                                 R1 = R0; R0 = match_offset;
00703                             }
00704                             else /* match_offset == 2 */ {
00705                                 match_offset = R2;
00706                                 R2 = R0; R0 = match_offset;
00707                             }
00708 
00709                             rundest = window + window_posn;
00710                             this_run -= match_length;
00711 
00712                             /* copy any wrapped around source data */
00713                             if (window_posn >= match_offset) {
00714                                 /* no wrap */
00715                                  runsrc = rundest - match_offset;
00716                             } else {
00717                                 runsrc = rundest + (window_size - match_offset);
00718                                 copy_length = match_offset - window_posn;
00719                                 if (copy_length < match_length) {
00720                                      match_length -= copy_length;
00721                                      window_posn += copy_length;
00722                                      while (copy_length-- > 0) *rundest++ = *runsrc++;
00723                                      runsrc = window;
00724                                 }
00725                             }
00726                             window_posn += match_length;
00727  
00728                             /* copy match data - no worries about destination wraps */
00729                             while (match_length-- > 0) *rundest++ = *runsrc++;
00730 
00731                         }
00732                     }
00733                     break;
00734 
00735                 case LZX_BLOCKTYPE_UNCOMPRESSED:
00736                     if ((inpos + this_run) > endinp) return DECR_ILLEGALDATA;
00737                     memcpy(window + window_posn, inpos, (size_t) this_run);
00738                     inpos += this_run; window_posn += this_run;
00739                     break;
00740 
00741                 default:
00742                     return DECR_ILLEGALDATA; /* might as well */
00743             }
00744 
00745         }
00746     }
00747 
00748     if (togo != 0) return DECR_ILLEGALDATA;
00749     memcpy(outpos, window + ((!window_posn) ? window_size : window_posn) - outlen, (size_t) outlen);
00750 
00751     pState->window_posn = window_posn;
00752     pState->R0 = R0;
00753     pState->R1 = R1;
00754     pState->R2 = R2;
00755 
00756     /* intel E8 decoding */
00757     if ((pState->frames_read++ < 32768) && pState->intel_filesize != 0) {
00758         if (outlen <= 6 || !pState->intel_started) {
00759             pState->intel_curpos += outlen;
00760         }
00761         else {
00762             UBYTE *data    = outpos;
00763             UBYTE *dataend = data + outlen - 10;
00764             LONG curpos    = pState->intel_curpos;
00765             LONG filesize  = pState->intel_filesize;
00766             LONG abs_off, rel_off;
00767 
00768             pState->intel_curpos = curpos + outlen;
00769 
00770             while (data < dataend) {
00771                 if (*data++ != 0xE8) { curpos++; continue; }
00772                 abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24);
00773                 if ((abs_off >= -curpos) && (abs_off < filesize)) {
00774                     rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize;
00775                     data[0] = (UBYTE) rel_off;
00776                     data[1] = (UBYTE) (rel_off >> 8);
00777                     data[2] = (UBYTE) (rel_off >> 16);
00778                     data[3] = (UBYTE) (rel_off >> 24);
00779                 }
00780                 data += 4;
00781                 curpos += 5;
00782             }
00783         }
00784     }
00785     return DECR_OK;
00786 }
00787 
00788 #ifdef LZX_CHM_TESTDRIVER
00789 int main(int c, char **v)
00790 {
00791     FILE *fin, *fout;
00792     struct LZXstate state;
00793     UBYTE ibuf[16384];
00794     UBYTE obuf[32768];
00795     int ilen, olen;
00796     int status;
00797     int i;
00798     int count=0;
00799     int w = atoi(v[1]);
00800     LZXinit(&state, w);
00801     fout = fopen(v[2], "wb");
00802     for (i=3; i<c; i++)
00803     {
00804         fin = fopen(v[i], "rb");
00805         ilen = fread(ibuf, 1, 16384, fin);
00806         status = LZXdecompress(&state, ibuf, obuf, ilen, 32768);
00807         switch (status)
00808         {
00809             case DECR_OK:
00810                 printf("ok\n");
00811                 fwrite(obuf, 1, 32768, fout);
00812                 break;
00813             case DECR_DATAFORMAT:
00814                 printf("bad format\n");
00815                 break;
00816             case DECR_ILLEGALDATA:
00817                 printf("illegal data\n");
00818                 break;
00819             case DECR_NOMEMORY:
00820                 printf("no memory\n");
00821                 break;
00822             default:
00823                 break;
00824         }
00825         fclose(fin);
00826         if (++count == 2)
00827         {
00828             count = 0;
00829             LZXreset(&state);
00830         }
00831     }
00832     fclose(fout);
00833 }
00834 #endif