Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenlzx.c
Go to the documentation of this file.
00001 /*************************************************************************** 00002 * lzx.c - LZX decompression routines * 00003 * ------------------- * 00004 * * 00005 * maintainer: Jed Wing <jedwin@ugcs.caltech.edu> * 00006 * source: modified lzx.c from cabextract v0.5 * 00007 * notes: This file was taken from cabextract v0.5, which was, * 00008 * itself, a modified version of the lzx decompression code * 00009 * from unlzx. * 00010 * * 00011 * platforms: In its current incarnation, this file has been tested on * 00012 * two different Linux platforms (one, redhat-based, with a * 00013 * 2.1.2 glibc and gcc 2.95.x, and the other, Debian, with * 00014 * 2.2.4 glibc and both gcc 2.95.4 and gcc 3.0.2). Both were * 00015 * Intel x86 compatible machines. * 00016 ***************************************************************************/ 00017 00018 /*************************************************************************** 00019 * 00020 * Copyright(C) Stuart Caie 00021 * 00022 * This library is free software; you can redistribute it and/or 00023 * modify it under the terms of the GNU Lesser General Public 00024 * License as published by the Free Software Foundation; either 00025 * version 2.1 of the License, or (at your option) any later version. 00026 * 00027 * This library is distributed in the hope that it will be useful, 00028 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00029 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00030 * Lesser General Public License for more details. 00031 * 00032 * You should have received a copy of the GNU Lesser General Public 00033 * License along with this library; if not, write to the Free Software 00034 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 00035 * 00036 ***************************************************************************/ 00037 00038 #include "lzx.h" 00039 #include <stdarg.h> 00040 #include <stdio.h> 00041 #include <stdlib.h> 00042 #include <string.h> 00043 00044 #include "windef.h" 00045 #include "winbase.h" 00046 00047 /* sized types */ 00048 typedef unsigned char UBYTE; /* 8 bits exactly */ 00049 typedef unsigned short UWORD; /* 16 bits (or more) */ 00050 00051 /* some constants defined by the LZX specification */ 00052 #define LZX_MIN_MATCH (2) 00053 #define LZX_MAX_MATCH (257) 00054 #define LZX_NUM_CHARS (256) 00055 #define LZX_BLOCKTYPE_INVALID (0) /* also blocktypes 4-7 invalid */ 00056 #define LZX_BLOCKTYPE_VERBATIM (1) 00057 #define LZX_BLOCKTYPE_ALIGNED (2) 00058 #define LZX_BLOCKTYPE_UNCOMPRESSED (3) 00059 #define LZX_PRETREE_NUM_ELEMENTS (20) 00060 #define LZX_ALIGNED_NUM_ELEMENTS (8) /* aligned offset tree #elements */ 00061 #define LZX_NUM_PRIMARY_LENGTHS (7) /* this one missing from spec! */ 00062 #define LZX_NUM_SECONDARY_LENGTHS (249) /* length tree #elements */ 00063 00064 /* LZX huffman defines: tweak tablebits as desired */ 00065 #define LZX_PRETREE_MAXSYMBOLS (LZX_PRETREE_NUM_ELEMENTS) 00066 #define LZX_PRETREE_TABLEBITS (6) 00067 #define LZX_MAINTREE_MAXSYMBOLS (LZX_NUM_CHARS + 50*8) 00068 #define LZX_MAINTREE_TABLEBITS (12) 00069 #define LZX_LENGTH_MAXSYMBOLS (LZX_NUM_SECONDARY_LENGTHS+1) 00070 #define LZX_LENGTH_TABLEBITS (12) 00071 #define LZX_ALIGNED_MAXSYMBOLS (LZX_ALIGNED_NUM_ELEMENTS) 00072 #define LZX_ALIGNED_TABLEBITS (7) 00073 00074 #define LZX_LENTABLE_SAFETY (64) /* we allow length table decoding overruns */ 00075 00076 #define LZX_DECLARE_TABLE(tbl) \ 00077 UWORD tbl##_table[(1<<LZX_##tbl##_TABLEBITS) + (LZX_##tbl##_MAXSYMBOLS<<1)];\ 00078 UBYTE tbl##_len [LZX_##tbl##_MAXSYMBOLS + LZX_LENTABLE_SAFETY] 00079 00080 struct LZXstate 00081 { 00082 UBYTE *window; /* the actual decoding window */ 00083 ULONG window_size; /* window size (32Kb through 2Mb) */ 00084 ULONG actual_size; /* window size when it was first allocated */ 00085 ULONG window_posn; /* current offset within the window */ 00086 ULONG R0, R1, R2; /* for the LRU offset system */ 00087 UWORD main_elements; /* number of main tree elements */ 00088 int header_read; /* have we started decoding at all yet? */ 00089 UWORD block_type; /* type of this block */ 00090 ULONG block_length; /* uncompressed length of this block */ 00091 ULONG block_remaining; /* uncompressed bytes still left to decode */ 00092 ULONG frames_read; /* the number of CFDATA blocks processed */ 00093 LONG intel_filesize; /* magic header value used for transform */ 00094 LONG intel_curpos; /* current offset in transform space */ 00095 int intel_started; /* have we seen any translatable data yet? */ 00096 00097 LZX_DECLARE_TABLE(PRETREE); 00098 LZX_DECLARE_TABLE(MAINTREE); 00099 LZX_DECLARE_TABLE(LENGTH); 00100 LZX_DECLARE_TABLE(ALIGNED); 00101 }; 00102 00103 /* LZX decruncher */ 00104 00105 /* Microsoft's LZX document and their implementation of the 00106 * com.ms.util.cab Java package do not concur. 00107 * 00108 * In the LZX document, there is a table showing the correlation between 00109 * window size and the number of position slots. It states that the 1MB 00110 * window = 40 slots and the 2MB window = 42 slots. In the implementation, 00111 * 1MB = 42 slots, 2MB = 50 slots. The actual calculation is 'find the 00112 * first slot whose position base is equal to or more than the required 00113 * window size'. This would explain why other tables in the document refer 00114 * to 50 slots rather than 42. 00115 * 00116 * The constant NUM_PRIMARY_LENGTHS used in the decompression pseudocode 00117 * is not defined in the specification. 00118 * 00119 * The LZX document does not state the uncompressed block has an 00120 * uncompressed length field. Where does this length field come from, so 00121 * we can know how large the block is? The implementation has it as the 24 00122 * bits following after the 3 blocktype bits, before the alignment 00123 * padding. 00124 * 00125 * The LZX document states that aligned offset blocks have their aligned 00126 * offset huffman tree AFTER the main and length trees. The implementation 00127 * suggests that the aligned offset tree is BEFORE the main and length 00128 * trees. 00129 * 00130 * The LZX document decoding algorithm states that, in an aligned offset 00131 * block, if an extra_bits value is 1, 2 or 3, then that number of bits 00132 * should be read and the result added to the match offset. This is 00133 * correct for 1 and 2, but not 3, where just a huffman symbol (using the 00134 * aligned tree) should be read. 00135 * 00136 * Regarding the E8 preprocessing, the LZX document states 'No translation 00137 * may be performed on the last 6 bytes of the input block'. This is 00138 * correct. However, the pseudocode provided checks for the *E8 leader* 00139 * up to the last 6 bytes. If the leader appears between -10 and -7 bytes 00140 * from the end, this would cause the next four bytes to be modified, at 00141 * least one of which would be in the last 6 bytes, which is not allowed 00142 * according to the spec. 00143 * 00144 * The specification states that the huffman trees must always contain at 00145 * least one element. However, many CAB files contain blocks where the 00146 * length tree is completely empty (because there are no matches), and 00147 * this is expected to succeed. 00148 */ 00149 00150 00151 /* LZX uses what it calls 'position slots' to represent match offsets. 00152 * What this means is that a small 'position slot' number and a small 00153 * offset from that slot are encoded instead of one large offset for 00154 * every match. 00155 * - position_base is an index to the position slot bases 00156 * - extra_bits states how many bits of offset-from-base data is needed. 00157 */ 00158 static const UBYTE extra_bits[51] = { 00159 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 00160 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 00161 15, 15, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 00162 17, 17, 17 00163 }; 00164 00165 static const ULONG position_base[51] = { 00166 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 00167 256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576, 32768, 49152, 00168 65536, 98304, 131072, 196608, 262144, 393216, 524288, 655360, 786432, 917504, 1048576, 1179648, 1310720, 1441792, 1572864, 1703936, 00169 1835008, 1966080, 2097152 00170 }; 00171 00172 struct LZXstate *LZXinit(int window) 00173 { 00174 struct LZXstate *pState=NULL; 00175 ULONG wndsize = 1 << window; 00176 int i, posn_slots; 00177 00178 /* LZX supports window sizes of 2^15 (32Kb) through 2^21 (2Mb) */ 00179 /* if a previously allocated window is big enough, keep it */ 00180 if (window < 15 || window > 21) return NULL; 00181 00182 /* allocate state and associated window */ 00183 pState = HeapAlloc(GetProcessHeap(), 0, sizeof(struct LZXstate)); 00184 if (!(pState->window = HeapAlloc(GetProcessHeap(), 0, wndsize))) 00185 { 00186 HeapFree(GetProcessHeap(), 0, pState); 00187 return NULL; 00188 } 00189 pState->actual_size = wndsize; 00190 pState->window_size = wndsize; 00191 00192 /* calculate required position slots */ 00193 if (window == 20) posn_slots = 42; 00194 else if (window == 21) posn_slots = 50; 00195 else posn_slots = window << 1; 00196 00198 /* posn_slots=i=0; while (i < wndsize) i += 1 << extra_bits[posn_slots++]; */ 00199 00200 /* initialize other state */ 00201 pState->R0 = pState->R1 = pState->R2 = 1; 00202 pState->main_elements = LZX_NUM_CHARS + (posn_slots << 3); 00203 pState->header_read = 0; 00204 pState->frames_read = 0; 00205 pState->block_remaining = 0; 00206 pState->block_type = LZX_BLOCKTYPE_INVALID; 00207 pState->intel_curpos = 0; 00208 pState->intel_started = 0; 00209 pState->window_posn = 0; 00210 00211 /* initialise tables to 0 (because deltas will be applied to them) */ 00212 for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS; i++) pState->MAINTREE_len[i] = 0; 00213 for (i = 0; i < LZX_LENGTH_MAXSYMBOLS; i++) pState->LENGTH_len[i] = 0; 00214 00215 return pState; 00216 } 00217 00218 void LZXteardown(struct LZXstate *pState) 00219 { 00220 if (pState) 00221 { 00222 HeapFree(GetProcessHeap(), 0, pState->window); 00223 HeapFree(GetProcessHeap(), 0, pState); 00224 } 00225 } 00226 00227 int LZXreset(struct LZXstate *pState) 00228 { 00229 int i; 00230 00231 pState->R0 = pState->R1 = pState->R2 = 1; 00232 pState->header_read = 0; 00233 pState->frames_read = 0; 00234 pState->block_remaining = 0; 00235 pState->block_type = LZX_BLOCKTYPE_INVALID; 00236 pState->intel_curpos = 0; 00237 pState->intel_started = 0; 00238 pState->window_posn = 0; 00239 00240 for (i = 0; i < LZX_MAINTREE_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) pState->MAINTREE_len[i] = 0; 00241 for (i = 0; i < LZX_LENGTH_MAXSYMBOLS + LZX_LENTABLE_SAFETY; i++) pState->LENGTH_len[i] = 0; 00242 00243 return DECR_OK; 00244 } 00245 00246 00247 /* Bitstream reading macros: 00248 * 00249 * INIT_BITSTREAM should be used first to set up the system 00250 * READ_BITS(var,n) takes N bits from the buffer and puts them in var 00251 * 00252 * ENSURE_BITS(n) ensures there are at least N bits in the bit buffer 00253 * PEEK_BITS(n) extracts (without removing) N bits from the bit buffer 00254 * REMOVE_BITS(n) removes N bits from the bit buffer 00255 * 00256 * These bit access routines work by using the area beyond the MSB and the 00257 * LSB as a free source of zeroes. This avoids having to mask any bits. 00258 * So we have to know the bit width of the bitbuffer variable. This is 00259 * sizeof(ULONG) * 8, also defined as ULONG_BITS 00260 */ 00261 00262 /* number of bits in ULONG. Note: This must be at multiple of 16, and at 00263 * least 32 for the bitbuffer code to work (ie, it must be able to ensure 00264 * up to 17 bits - that's adding 16 bits when there's one bit left, or 00265 * adding 32 bits when there are no bits left. The code should work fine 00266 * for machines where ULONG >= 32 bits. 00267 */ 00268 #define ULONG_BITS (sizeof(ULONG)<<3) 00269 00270 #define INIT_BITSTREAM do { bitsleft = 0; bitbuf = 0; } while (0) 00271 00272 #define ENSURE_BITS(n) \ 00273 while (bitsleft < (n)) { \ 00274 bitbuf |= ((inpos[1]<<8)|inpos[0]) << (ULONG_BITS-16 - bitsleft); \ 00275 bitsleft += 16; inpos+=2; \ 00276 } 00277 00278 #define PEEK_BITS(n) (bitbuf >> (ULONG_BITS - (n))) 00279 #define REMOVE_BITS(n) ((bitbuf <<= (n)), (bitsleft -= (n))) 00280 00281 #define READ_BITS(v,n) do { \ 00282 ENSURE_BITS(n); \ 00283 (v) = PEEK_BITS(n); \ 00284 REMOVE_BITS(n); \ 00285 } while (0) 00286 00287 00288 /* Huffman macros */ 00289 00290 #define TABLEBITS(tbl) (LZX_##tbl##_TABLEBITS) 00291 #define MAXSYMBOLS(tbl) (LZX_##tbl##_MAXSYMBOLS) 00292 #define SYMTABLE(tbl) (pState->tbl##_table) 00293 #define LENTABLE(tbl) (pState->tbl##_len) 00294 00295 /* BUILD_TABLE(tablename) builds a huffman lookup table from code lengths. 00296 * In reality, it just calls make_decode_table() with the appropriate 00297 * values - they're all fixed by some #defines anyway, so there's no point 00298 * writing each call out in full by hand. 00299 */ 00300 #define BUILD_TABLE(tbl) \ 00301 if (make_decode_table( \ 00302 MAXSYMBOLS(tbl), TABLEBITS(tbl), LENTABLE(tbl), SYMTABLE(tbl) \ 00303 )) { return DECR_ILLEGALDATA; } 00304 00305 00306 /* READ_HUFFSYM(tablename, var) decodes one huffman symbol from the 00307 * bitstream using the stated table and puts it in var. 00308 */ 00309 #define READ_HUFFSYM(tbl,var) do { \ 00310 ENSURE_BITS(16); \ 00311 hufftbl = SYMTABLE(tbl); \ 00312 if ((i = hufftbl[PEEK_BITS(TABLEBITS(tbl))]) >= MAXSYMBOLS(tbl)) { \ 00313 j = 1 << (ULONG_BITS - TABLEBITS(tbl)); \ 00314 do { \ 00315 j >>= 1; i <<= 1; i |= (bitbuf & j) ? 1 : 0; \ 00316 if (!j) { return DECR_ILLEGALDATA; } \ 00317 } while ((i = hufftbl[i]) >= MAXSYMBOLS(tbl)); \ 00318 } \ 00319 j = LENTABLE(tbl)[(var) = i]; \ 00320 REMOVE_BITS(j); \ 00321 } while (0) 00322 00323 00324 /* READ_LENGTHS(tablename, first, last) reads in code lengths for symbols 00325 * first to last in the given table. The code lengths are stored in their 00326 * own special LZX way. 00327 */ 00328 #define READ_LENGTHS(tbl,first,last) do { \ 00329 lb.bb = bitbuf; lb.bl = bitsleft; lb.ip = inpos; \ 00330 if (lzx_read_lens(pState, LENTABLE(tbl),(first),(last),&lb)) { \ 00331 return DECR_ILLEGALDATA; \ 00332 } \ 00333 bitbuf = lb.bb; bitsleft = lb.bl; inpos = lb.ip; \ 00334 } while (0) 00335 00336 00337 /* make_decode_table(nsyms, nbits, length[], table[]) 00338 * 00339 * This function was coded by David Tritscher. It builds a fast huffman 00340 * decoding table out of just a canonical huffman code lengths table. 00341 * 00342 * nsyms = total number of symbols in this huffman tree. 00343 * nbits = any symbols with a code length of nbits or less can be decoded 00344 * in one lookup of the table. 00345 * length = A table to get code lengths from [0 to syms-1] 00346 * table = The table to fill up with decoded symbols and pointers. 00347 * 00348 * Returns 0 for OK or 1 for error 00349 */ 00350 00351 static int make_decode_table(ULONG nsyms, ULONG nbits, UBYTE *length, UWORD *table) { 00352 register UWORD sym; 00353 register ULONG leaf; 00354 register UBYTE bit_num = 1; 00355 ULONG fill; 00356 ULONG pos = 0; /* the current position in the decode table */ 00357 ULONG table_mask = 1 << nbits; 00358 ULONG bit_mask = table_mask >> 1; /* don't do 0 length codes */ 00359 ULONG next_symbol = bit_mask; /* base of allocation for long codes */ 00360 00361 /* fill entries for codes short enough for a direct mapping */ 00362 while (bit_num <= nbits) { 00363 for (sym = 0; sym < nsyms; sym++) { 00364 if (length[sym] == bit_num) { 00365 leaf = pos; 00366 00367 if((pos += bit_mask) > table_mask) return 1; /* table overrun */ 00368 00369 /* fill all possible lookups of this symbol with the symbol itself */ 00370 fill = bit_mask; 00371 while (fill-- > 0) table[leaf++] = sym; 00372 } 00373 } 00374 bit_mask >>= 1; 00375 bit_num++; 00376 } 00377 00378 /* if there are any codes longer than nbits */ 00379 if (pos != table_mask) { 00380 /* clear the remainder of the table */ 00381 for (sym = pos; sym < table_mask; sym++) table[sym] = 0; 00382 00383 /* give ourselves room for codes to grow by up to 16 more bits */ 00384 pos <<= 16; 00385 table_mask <<= 16; 00386 bit_mask = 1 << 15; 00387 00388 while (bit_num <= 16) { 00389 for (sym = 0; sym < nsyms; sym++) { 00390 if (length[sym] == bit_num) { 00391 leaf = pos >> 16; 00392 for (fill = 0; fill < bit_num - nbits; fill++) { 00393 /* if this path hasn't been taken yet, 'allocate' two entries */ 00394 if (table[leaf] == 0) { 00395 table[(next_symbol << 1)] = 0; 00396 table[(next_symbol << 1) + 1] = 0; 00397 table[leaf] = next_symbol++; 00398 } 00399 /* follow the path and select either left or right for next bit */ 00400 leaf = table[leaf] << 1; 00401 if ((pos >> (15-fill)) & 1) leaf++; 00402 } 00403 table[leaf] = sym; 00404 00405 if ((pos += bit_mask) > table_mask) return 1; /* table overflow */ 00406 } 00407 } 00408 bit_mask >>= 1; 00409 bit_num++; 00410 } 00411 } 00412 00413 /* full table? */ 00414 if (pos == table_mask) return 0; 00415 00416 /* either erroneous table, or all elements are 0 - let's find out. */ 00417 for (sym = 0; sym < nsyms; sym++) if (length[sym]) return 1; 00418 return 0; 00419 } 00420 00421 struct lzx_bits { 00422 ULONG bb; 00423 int bl; 00424 UBYTE *ip; 00425 }; 00426 00427 static int lzx_read_lens(struct LZXstate *pState, UBYTE *lens, ULONG first, ULONG last, struct lzx_bits *lb) { 00428 ULONG i,j, x,y; 00429 int z; 00430 00431 register ULONG bitbuf = lb->bb; 00432 register int bitsleft = lb->bl; 00433 UBYTE *inpos = lb->ip; 00434 UWORD *hufftbl; 00435 00436 for (x = 0; x < 20; x++) { 00437 READ_BITS(y, 4); 00438 LENTABLE(PRETREE)[x] = y; 00439 } 00440 BUILD_TABLE(PRETREE); 00441 00442 for (x = first; x < last; ) { 00443 READ_HUFFSYM(PRETREE, z); 00444 if (z == 17) { 00445 READ_BITS(y, 4); y += 4; 00446 while (y--) lens[x++] = 0; 00447 } 00448 else if (z == 18) { 00449 READ_BITS(y, 5); y += 20; 00450 while (y--) lens[x++] = 0; 00451 } 00452 else if (z == 19) { 00453 READ_BITS(y, 1); y += 4; 00454 READ_HUFFSYM(PRETREE, z); 00455 z = lens[x] - z; if (z < 0) z += 17; 00456 while (y--) lens[x++] = z; 00457 } 00458 else { 00459 z = lens[x] - z; if (z < 0) z += 17; 00460 lens[x++] = z; 00461 } 00462 } 00463 00464 lb->bb = bitbuf; 00465 lb->bl = bitsleft; 00466 lb->ip = inpos; 00467 return 0; 00468 } 00469 00470 int LZXdecompress(struct LZXstate *pState, unsigned char *inpos, unsigned char *outpos, int inlen, int outlen) { 00471 UBYTE *endinp = inpos + inlen; 00472 UBYTE *window = pState->window; 00473 UBYTE *runsrc, *rundest; 00474 UWORD *hufftbl; /* used in READ_HUFFSYM macro as chosen decoding table */ 00475 00476 ULONG window_posn = pState->window_posn; 00477 ULONG window_size = pState->window_size; 00478 ULONG R0 = pState->R0; 00479 ULONG R1 = pState->R1; 00480 ULONG R2 = pState->R2; 00481 00482 register ULONG bitbuf; 00483 register int bitsleft; 00484 ULONG match_offset, i,j,k; /* ijk used in READ_HUFFSYM macro */ 00485 struct lzx_bits lb; /* used in READ_LENGTHS macro */ 00486 00487 int togo = outlen, this_run, main_element, aligned_bits; 00488 int match_length, length_footer, extra, verbatim_bits; 00489 int copy_length; 00490 00491 INIT_BITSTREAM; 00492 00493 /* read header if necessary */ 00494 if (!pState->header_read) { 00495 i = j = 0; 00496 READ_BITS(k, 1); if (k) { READ_BITS(i,16); READ_BITS(j,16); } 00497 pState->intel_filesize = (i << 16) | j; /* or 0 if not encoded */ 00498 pState->header_read = 1; 00499 } 00500 00501 /* main decoding loop */ 00502 while (togo > 0) { 00503 /* last block finished, new block expected */ 00504 if (pState->block_remaining == 0) { 00505 if (pState->block_type == LZX_BLOCKTYPE_UNCOMPRESSED) { 00506 if (pState->block_length & 1) inpos++; /* realign bitstream to word */ 00507 INIT_BITSTREAM; 00508 } 00509 00510 READ_BITS(pState->block_type, 3); 00511 READ_BITS(i, 16); 00512 READ_BITS(j, 8); 00513 pState->block_remaining = pState->block_length = (i << 8) | j; 00514 00515 switch (pState->block_type) { 00516 case LZX_BLOCKTYPE_ALIGNED: 00517 for (i = 0; i < 8; i++) { READ_BITS(j, 3); LENTABLE(ALIGNED)[i] = j; } 00518 BUILD_TABLE(ALIGNED); 00519 /* rest of aligned header is same as verbatim */ 00520 00521 case LZX_BLOCKTYPE_VERBATIM: 00522 READ_LENGTHS(MAINTREE, 0, 256); 00523 READ_LENGTHS(MAINTREE, 256, pState->main_elements); 00524 BUILD_TABLE(MAINTREE); 00525 if (LENTABLE(MAINTREE)[0xE8] != 0) pState->intel_started = 1; 00526 00527 READ_LENGTHS(LENGTH, 0, LZX_NUM_SECONDARY_LENGTHS); 00528 BUILD_TABLE(LENGTH); 00529 break; 00530 00531 case LZX_BLOCKTYPE_UNCOMPRESSED: 00532 pState->intel_started = 1; /* because we can't assume otherwise */ 00533 ENSURE_BITS(16); /* get up to 16 pad bits into the buffer */ 00534 if (bitsleft > 16) inpos -= 2; /* and align the bitstream! */ 00535 R0 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4; 00536 R1 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4; 00537 R2 = inpos[0]|(inpos[1]<<8)|(inpos[2]<<16)|(inpos[3]<<24);inpos+=4; 00538 break; 00539 00540 default: 00541 return DECR_ILLEGALDATA; 00542 } 00543 } 00544 00545 /* buffer exhaustion check */ 00546 if (inpos > endinp) { 00547 /* it's possible to have a file where the next run is less than 00548 * 16 bits in size. In this case, the READ_HUFFSYM() macro used 00549 * in building the tables will exhaust the buffer, so we should 00550 * allow for this, but not allow those accidentally read bits to 00551 * be used (so we check that there are at least 16 bits 00552 * remaining - in this boundary case they aren't really part of 00553 * the compressed data) 00554 */ 00555 if (inpos > (endinp+2) || bitsleft < 16) return DECR_ILLEGALDATA; 00556 } 00557 00558 while ((this_run = pState->block_remaining) > 0 && togo > 0) { 00559 if (this_run > togo) this_run = togo; 00560 togo -= this_run; 00561 pState->block_remaining -= this_run; 00562 00563 /* apply 2^x-1 mask */ 00564 window_posn &= window_size - 1; 00565 /* runs can't straddle the window wraparound */ 00566 if ((window_posn + this_run) > window_size) 00567 return DECR_DATAFORMAT; 00568 00569 switch (pState->block_type) { 00570 00571 case LZX_BLOCKTYPE_VERBATIM: 00572 while (this_run > 0) { 00573 READ_HUFFSYM(MAINTREE, main_element); 00574 00575 if (main_element < LZX_NUM_CHARS) { 00576 /* literal: 0 to LZX_NUM_CHARS-1 */ 00577 window[window_posn++] = main_element; 00578 this_run--; 00579 } 00580 else { 00581 /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ 00582 main_element -= LZX_NUM_CHARS; 00583 00584 match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; 00585 if (match_length == LZX_NUM_PRIMARY_LENGTHS) { 00586 READ_HUFFSYM(LENGTH, length_footer); 00587 match_length += length_footer; 00588 } 00589 match_length += LZX_MIN_MATCH; 00590 00591 match_offset = main_element >> 3; 00592 00593 if (match_offset > 2) { 00594 /* not repeated offset */ 00595 if (match_offset != 3) { 00596 extra = extra_bits[match_offset]; 00597 READ_BITS(verbatim_bits, extra); 00598 match_offset = position_base[match_offset] - 2 + verbatim_bits; 00599 } 00600 else { 00601 match_offset = 1; 00602 } 00603 00604 /* update repeated offset LRU queue */ 00605 R2 = R1; R1 = R0; R0 = match_offset; 00606 } 00607 else if (match_offset == 0) { 00608 match_offset = R0; 00609 } 00610 else if (match_offset == 1) { 00611 match_offset = R1; 00612 R1 = R0; R0 = match_offset; 00613 } 00614 else /* match_offset == 2 */ { 00615 match_offset = R2; 00616 R2 = R0; R0 = match_offset; 00617 } 00618 00619 rundest = window + window_posn; 00620 this_run -= match_length; 00621 00622 /* copy any wrapped around source data */ 00623 if (window_posn >= match_offset) { 00624 /* no wrap */ 00625 runsrc = rundest - match_offset; 00626 } else { 00627 runsrc = rundest + (window_size - match_offset); 00628 copy_length = match_offset - window_posn; 00629 if (copy_length < match_length) { 00630 match_length -= copy_length; 00631 window_posn += copy_length; 00632 while (copy_length-- > 0) *rundest++ = *runsrc++; 00633 runsrc = window; 00634 } 00635 } 00636 window_posn += match_length; 00637 00638 /* copy match data - no worries about destination wraps */ 00639 while (match_length-- > 0) *rundest++ = *runsrc++; 00640 00641 } 00642 } 00643 break; 00644 00645 case LZX_BLOCKTYPE_ALIGNED: 00646 while (this_run > 0) { 00647 READ_HUFFSYM(MAINTREE, main_element); 00648 00649 if (main_element < LZX_NUM_CHARS) { 00650 /* literal: 0 to LZX_NUM_CHARS-1 */ 00651 window[window_posn++] = main_element; 00652 this_run--; 00653 } 00654 else { 00655 /* match: LZX_NUM_CHARS + ((slot<<3) | length_header (3 bits)) */ 00656 main_element -= LZX_NUM_CHARS; 00657 00658 match_length = main_element & LZX_NUM_PRIMARY_LENGTHS; 00659 if (match_length == LZX_NUM_PRIMARY_LENGTHS) { 00660 READ_HUFFSYM(LENGTH, length_footer); 00661 match_length += length_footer; 00662 } 00663 match_length += LZX_MIN_MATCH; 00664 00665 match_offset = main_element >> 3; 00666 00667 if (match_offset > 2) { 00668 /* not repeated offset */ 00669 extra = extra_bits[match_offset]; 00670 match_offset = position_base[match_offset] - 2; 00671 if (extra > 3) { 00672 /* verbatim and aligned bits */ 00673 extra -= 3; 00674 READ_BITS(verbatim_bits, extra); 00675 match_offset += (verbatim_bits << 3); 00676 READ_HUFFSYM(ALIGNED, aligned_bits); 00677 match_offset += aligned_bits; 00678 } 00679 else if (extra == 3) { 00680 /* aligned bits only */ 00681 READ_HUFFSYM(ALIGNED, aligned_bits); 00682 match_offset += aligned_bits; 00683 } 00684 else if (extra > 0) { /* extra==1, extra==2 */ 00685 /* verbatim bits only */ 00686 READ_BITS(verbatim_bits, extra); 00687 match_offset += verbatim_bits; 00688 } 00689 else /* extra == 0 */ { 00690 /* ??? */ 00691 match_offset = 1; 00692 } 00693 00694 /* update repeated offset LRU queue */ 00695 R2 = R1; R1 = R0; R0 = match_offset; 00696 } 00697 else if (match_offset == 0) { 00698 match_offset = R0; 00699 } 00700 else if (match_offset == 1) { 00701 match_offset = R1; 00702 R1 = R0; R0 = match_offset; 00703 } 00704 else /* match_offset == 2 */ { 00705 match_offset = R2; 00706 R2 = R0; R0 = match_offset; 00707 } 00708 00709 rundest = window + window_posn; 00710 this_run -= match_length; 00711 00712 /* copy any wrapped around source data */ 00713 if (window_posn >= match_offset) { 00714 /* no wrap */ 00715 runsrc = rundest - match_offset; 00716 } else { 00717 runsrc = rundest + (window_size - match_offset); 00718 copy_length = match_offset - window_posn; 00719 if (copy_length < match_length) { 00720 match_length -= copy_length; 00721 window_posn += copy_length; 00722 while (copy_length-- > 0) *rundest++ = *runsrc++; 00723 runsrc = window; 00724 } 00725 } 00726 window_posn += match_length; 00727 00728 /* copy match data - no worries about destination wraps */ 00729 while (match_length-- > 0) *rundest++ = *runsrc++; 00730 00731 } 00732 } 00733 break; 00734 00735 case LZX_BLOCKTYPE_UNCOMPRESSED: 00736 if ((inpos + this_run) > endinp) return DECR_ILLEGALDATA; 00737 memcpy(window + window_posn, inpos, (size_t) this_run); 00738 inpos += this_run; window_posn += this_run; 00739 break; 00740 00741 default: 00742 return DECR_ILLEGALDATA; /* might as well */ 00743 } 00744 00745 } 00746 } 00747 00748 if (togo != 0) return DECR_ILLEGALDATA; 00749 memcpy(outpos, window + ((!window_posn) ? window_size : window_posn) - outlen, (size_t) outlen); 00750 00751 pState->window_posn = window_posn; 00752 pState->R0 = R0; 00753 pState->R1 = R1; 00754 pState->R2 = R2; 00755 00756 /* intel E8 decoding */ 00757 if ((pState->frames_read++ < 32768) && pState->intel_filesize != 0) { 00758 if (outlen <= 6 || !pState->intel_started) { 00759 pState->intel_curpos += outlen; 00760 } 00761 else { 00762 UBYTE *data = outpos; 00763 UBYTE *dataend = data + outlen - 10; 00764 LONG curpos = pState->intel_curpos; 00765 LONG filesize = pState->intel_filesize; 00766 LONG abs_off, rel_off; 00767 00768 pState->intel_curpos = curpos + outlen; 00769 00770 while (data < dataend) { 00771 if (*data++ != 0xE8) { curpos++; continue; } 00772 abs_off = data[0] | (data[1]<<8) | (data[2]<<16) | (data[3]<<24); 00773 if ((abs_off >= -curpos) && (abs_off < filesize)) { 00774 rel_off = (abs_off >= 0) ? abs_off - curpos : abs_off + filesize; 00775 data[0] = (UBYTE) rel_off; 00776 data[1] = (UBYTE) (rel_off >> 8); 00777 data[2] = (UBYTE) (rel_off >> 16); 00778 data[3] = (UBYTE) (rel_off >> 24); 00779 } 00780 data += 4; 00781 curpos += 5; 00782 } 00783 } 00784 } 00785 return DECR_OK; 00786 } 00787 00788 #ifdef LZX_CHM_TESTDRIVER 00789 int main(int c, char **v) 00790 { 00791 FILE *fin, *fout; 00792 struct LZXstate state; 00793 UBYTE ibuf[16384]; 00794 UBYTE obuf[32768]; 00795 int ilen, olen; 00796 int status; 00797 int i; 00798 int count=0; 00799 int w = atoi(v[1]); 00800 LZXinit(&state, w); 00801 fout = fopen(v[2], "wb"); 00802 for (i=3; i<c; i++) 00803 { 00804 fin = fopen(v[i], "rb"); 00805 ilen = fread(ibuf, 1, 16384, fin); 00806 status = LZXdecompress(&state, ibuf, obuf, ilen, 32768); 00807 switch (status) 00808 { 00809 case DECR_OK: 00810 printf("ok\n"); 00811 fwrite(obuf, 1, 32768, fout); 00812 break; 00813 case DECR_DATAFORMAT: 00814 printf("bad format\n"); 00815 break; 00816 case DECR_ILLEGALDATA: 00817 printf("illegal data\n"); 00818 break; 00819 case DECR_NOMEMORY: 00820 printf("no memory\n"); 00821 break; 00822 default: 00823 break; 00824 } 00825 fclose(fin); 00826 if (++count == 2) 00827 { 00828 count = 0; 00829 LZXreset(&state); 00830 } 00831 } 00832 fclose(fout); 00833 } 00834 #endif Generated on Sun May 27 2012 04:24:14 for ReactOS by
1.7.6.1
|