Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenbzip2recover.c
Go to the documentation of this file.
00001 /*-----------------------------------------------------------*/ 00002 /*--- Block recoverer program for bzip2 ---*/ 00003 /*--- bzip2recover.c ---*/ 00004 /*-----------------------------------------------------------*/ 00005 00006 /* ------------------------------------------------------------------ 00007 This file is part of bzip2/libbzip2, a program and library for 00008 lossless, block-sorting data compression. 00009 00010 bzip2/libbzip2 version 1.0.6 of 6 September 2010 00011 Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org> 00012 00013 Please read the WARNING, DISCLAIMER and PATENTS sections in the 00014 README file. 00015 00016 This program is released under the terms of the license contained 00017 in the file LICENSE. 00018 ------------------------------------------------------------------ */ 00019 00020 /* This program is a complete hack and should be rewritten properly. 00021 It isn't very complicated. */ 00022 00023 #include <stdio.h> 00024 #include <errno.h> 00025 #include <stdlib.h> 00026 #include <string.h> 00027 00028 00029 /* This program records bit locations in the file to be recovered. 00030 That means that if 64-bit ints are not supported, we will not 00031 be able to recover .bz2 files over 512MB (2^32 bits) long. 00032 On GNU supported platforms, we take advantage of the 64-bit 00033 int support to circumvent this problem. Ditto MSVC. 00034 00035 This change occurred in version 1.0.2; all prior versions have 00036 the 512MB limitation. 00037 */ 00038 #ifdef __GNUC__ 00039 typedef unsigned long long int MaybeUInt64; 00040 # define MaybeUInt64_FMT "%Lu" 00041 #else 00042 #ifdef _MSC_VER 00043 typedef unsigned __int64 MaybeUInt64; 00044 # define MaybeUInt64_FMT "%I64u" 00045 #else 00046 typedef unsigned int MaybeUInt64; 00047 # define MaybeUInt64_FMT "%u" 00048 #endif 00049 #endif 00050 00051 typedef unsigned int UInt32; 00052 typedef int Int32; 00053 typedef unsigned char UChar; 00054 typedef char Char; 00055 typedef unsigned char Bool; 00056 #define True ((Bool)1) 00057 #define False ((Bool)0) 00058 00059 00060 #define BZ_MAX_FILENAME 2000 00061 00062 Char inFileName[BZ_MAX_FILENAME]; 00063 Char outFileName[BZ_MAX_FILENAME]; 00064 Char progName[BZ_MAX_FILENAME]; 00065 00066 MaybeUInt64 bytesOut = 0; 00067 MaybeUInt64 bytesIn = 0; 00068 00069 00070 /*---------------------------------------------------*/ 00071 /*--- Header bytes ---*/ 00072 /*---------------------------------------------------*/ 00073 00074 #define BZ_HDR_B 0x42 /* 'B' */ 00075 #define BZ_HDR_Z 0x5a /* 'Z' */ 00076 #define BZ_HDR_h 0x68 /* 'h' */ 00077 #define BZ_HDR_0 0x30 /* '0' */ 00078 00079 00080 /*---------------------------------------------------*/ 00081 /*--- I/O errors ---*/ 00082 /*---------------------------------------------------*/ 00083 00084 /*---------------------------------------------*/ 00085 static void readError ( void ) 00086 { 00087 fprintf ( stderr, 00088 "%s: I/O error reading `%s', possible reason follows.\n", 00089 progName, inFileName ); 00090 perror ( progName ); 00091 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 00092 progName ); 00093 exit ( 1 ); 00094 } 00095 00096 00097 /*---------------------------------------------*/ 00098 static void writeError ( void ) 00099 { 00100 fprintf ( stderr, 00101 "%s: I/O error reading `%s', possible reason follows.\n", 00102 progName, inFileName ); 00103 perror ( progName ); 00104 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 00105 progName ); 00106 exit ( 1 ); 00107 } 00108 00109 00110 /*---------------------------------------------*/ 00111 static void mallocFail ( Int32 n ) 00112 { 00113 fprintf ( stderr, 00114 "%s: malloc failed on request for %d bytes.\n", 00115 progName, n ); 00116 fprintf ( stderr, "%s: warning: output file(s) may be incomplete.\n", 00117 progName ); 00118 exit ( 1 ); 00119 } 00120 00121 00122 /*---------------------------------------------*/ 00123 static void tooManyBlocks ( Int32 max_handled_blocks ) 00124 { 00125 fprintf ( stderr, 00126 "%s: `%s' appears to contain more than %d blocks\n", 00127 progName, inFileName, max_handled_blocks ); 00128 fprintf ( stderr, 00129 "%s: and cannot be handled. To fix, increase\n", 00130 progName ); 00131 fprintf ( stderr, 00132 "%s: BZ_MAX_HANDLED_BLOCKS in bzip2recover.c, and recompile.\n", 00133 progName ); 00134 exit ( 1 ); 00135 } 00136 00137 00138 00139 /*---------------------------------------------------*/ 00140 /*--- Bit stream I/O ---*/ 00141 /*---------------------------------------------------*/ 00142 00143 typedef 00144 struct { 00145 FILE* handle; 00146 Int32 buffer; 00147 Int32 buffLive; 00148 Char mode; 00149 } 00150 BitStream; 00151 00152 00153 /*---------------------------------------------*/ 00154 static BitStream* bsOpenReadStream ( FILE* stream ) 00155 { 00156 BitStream *bs = malloc ( sizeof(BitStream) ); 00157 if (bs == NULL) mallocFail ( sizeof(BitStream) ); 00158 bs->handle = stream; 00159 bs->buffer = 0; 00160 bs->buffLive = 0; 00161 bs->mode = 'r'; 00162 return bs; 00163 } 00164 00165 00166 /*---------------------------------------------*/ 00167 static BitStream* bsOpenWriteStream ( FILE* stream ) 00168 { 00169 BitStream *bs = malloc ( sizeof(BitStream) ); 00170 if (bs == NULL) mallocFail ( sizeof(BitStream) ); 00171 bs->handle = stream; 00172 bs->buffer = 0; 00173 bs->buffLive = 0; 00174 bs->mode = 'w'; 00175 return bs; 00176 } 00177 00178 00179 /*---------------------------------------------*/ 00180 static void bsPutBit ( BitStream* bs, Int32 bit ) 00181 { 00182 if (bs->buffLive == 8) { 00183 Int32 retVal = putc ( (UChar) bs->buffer, bs->handle ); 00184 if (retVal == EOF) writeError(); 00185 bytesOut++; 00186 bs->buffLive = 1; 00187 bs->buffer = bit & 0x1; 00188 } else { 00189 bs->buffer = ( (bs->buffer << 1) | (bit & 0x1) ); 00190 bs->buffLive++; 00191 }; 00192 } 00193 00194 00195 /*---------------------------------------------*/ 00196 /*-- 00197 Returns 0 or 1, or 2 to indicate EOF. 00198 --*/ 00199 static Int32 bsGetBit ( BitStream* bs ) 00200 { 00201 if (bs->buffLive > 0) { 00202 bs->buffLive --; 00203 return ( ((bs->buffer) >> (bs->buffLive)) & 0x1 ); 00204 } else { 00205 Int32 retVal = getc ( bs->handle ); 00206 if ( retVal == EOF ) { 00207 if (errno != 0) readError(); 00208 return 2; 00209 } 00210 bs->buffLive = 7; 00211 bs->buffer = retVal; 00212 return ( ((bs->buffer) >> 7) & 0x1 ); 00213 } 00214 } 00215 00216 00217 /*---------------------------------------------*/ 00218 static void bsClose ( BitStream* bs ) 00219 { 00220 Int32 retVal; 00221 00222 if ( bs->mode == 'w' ) { 00223 while ( bs->buffLive < 8 ) { 00224 bs->buffLive++; 00225 bs->buffer <<= 1; 00226 }; 00227 retVal = putc ( (UChar) (bs->buffer), bs->handle ); 00228 if (retVal == EOF) writeError(); 00229 bytesOut++; 00230 retVal = fflush ( bs->handle ); 00231 if (retVal == EOF) writeError(); 00232 } 00233 retVal = fclose ( bs->handle ); 00234 if (retVal == EOF) { 00235 if (bs->mode == 'w') writeError(); else readError(); 00236 } 00237 free ( bs ); 00238 } 00239 00240 00241 /*---------------------------------------------*/ 00242 static void bsPutUChar ( BitStream* bs, UChar c ) 00243 { 00244 Int32 i; 00245 for (i = 7; i >= 0; i--) 00246 bsPutBit ( bs, (((UInt32) c) >> i) & 0x1 ); 00247 } 00248 00249 00250 /*---------------------------------------------*/ 00251 static void bsPutUInt32 ( BitStream* bs, UInt32 c ) 00252 { 00253 Int32 i; 00254 00255 for (i = 31; i >= 0; i--) 00256 bsPutBit ( bs, (c >> i) & 0x1 ); 00257 } 00258 00259 00260 /*---------------------------------------------*/ 00261 static Bool endsInBz2 ( Char* name ) 00262 { 00263 Int32 n = strlen ( name ); 00264 if (n <= 4) return False; 00265 return 00266 (name[n-4] == '.' && 00267 name[n-3] == 'b' && 00268 name[n-2] == 'z' && 00269 name[n-1] == '2'); 00270 } 00271 00272 00273 /*---------------------------------------------------*/ 00274 /*--- ---*/ 00275 /*---------------------------------------------------*/ 00276 00277 /* This logic isn't really right when it comes to Cygwin. */ 00278 #ifdef _WIN32 00279 # define BZ_SPLIT_SYM '\\' /* path splitter on Windows platform */ 00280 #else 00281 # define BZ_SPLIT_SYM '/' /* path splitter on Unix platform */ 00282 #endif 00283 00284 #define BLOCK_HEADER_HI 0x00003141UL 00285 #define BLOCK_HEADER_LO 0x59265359UL 00286 00287 #define BLOCK_ENDMARK_HI 0x00001772UL 00288 #define BLOCK_ENDMARK_LO 0x45385090UL 00289 00290 /* Increase if necessary. However, a .bz2 file with > 50000 blocks 00291 would have an uncompressed size of at least 40GB, so the chances 00292 are low you'll need to up this. 00293 */ 00294 #define BZ_MAX_HANDLED_BLOCKS 50000 00295 00296 MaybeUInt64 bStart [BZ_MAX_HANDLED_BLOCKS]; 00297 MaybeUInt64 bEnd [BZ_MAX_HANDLED_BLOCKS]; 00298 MaybeUInt64 rbStart[BZ_MAX_HANDLED_BLOCKS]; 00299 MaybeUInt64 rbEnd [BZ_MAX_HANDLED_BLOCKS]; 00300 00301 Int32 main ( Int32 argc, Char** argv ) 00302 { 00303 FILE* inFile; 00304 FILE* outFile; 00305 BitStream* bsIn, *bsWr; 00306 Int32 b, wrBlock, currBlock, rbCtr; 00307 MaybeUInt64 bitsRead; 00308 00309 UInt32 buffHi, buffLo, blockCRC; 00310 Char* p; 00311 00312 strcpy ( progName, argv[0] ); 00313 inFileName[0] = outFileName[0] = 0; 00314 00315 fprintf ( stderr, 00316 "bzip2recover 1.0.6: extracts blocks from damaged .bz2 files.\n" ); 00317 00318 if (argc != 2) { 00319 fprintf ( stderr, "%s: usage is `%s damaged_file_name'.\n", 00320 progName, progName ); 00321 switch (sizeof(MaybeUInt64)) { 00322 case 8: 00323 fprintf(stderr, 00324 "\trestrictions on size of recovered file: None\n"); 00325 break; 00326 case 4: 00327 fprintf(stderr, 00328 "\trestrictions on size of recovered file: 512 MB\n"); 00329 fprintf(stderr, 00330 "\tto circumvent, recompile with MaybeUInt64 as an\n" 00331 "\tunsigned 64-bit int.\n"); 00332 break; 00333 default: 00334 fprintf(stderr, 00335 "\tsizeof(MaybeUInt64) is not 4 or 8 -- " 00336 "configuration error.\n"); 00337 break; 00338 } 00339 exit(1); 00340 } 00341 00342 if (strlen(argv[1]) >= BZ_MAX_FILENAME-20) { 00343 fprintf ( stderr, 00344 "%s: supplied filename is suspiciously (>= %d chars) long. Bye!\n", 00345 progName, (int)strlen(argv[1]) ); 00346 exit(1); 00347 } 00348 00349 strcpy ( inFileName, argv[1] ); 00350 00351 inFile = fopen ( inFileName, "rb" ); 00352 if (inFile == NULL) { 00353 fprintf ( stderr, "%s: can't read `%s'\n", progName, inFileName ); 00354 exit(1); 00355 } 00356 00357 bsIn = bsOpenReadStream ( inFile ); 00358 fprintf ( stderr, "%s: searching for block boundaries ...\n", progName ); 00359 00360 bitsRead = 0; 00361 buffHi = buffLo = 0; 00362 currBlock = 0; 00363 bStart[currBlock] = 0; 00364 00365 rbCtr = 0; 00366 00367 while (True) { 00368 b = bsGetBit ( bsIn ); 00369 bitsRead++; 00370 if (b == 2) { 00371 if (bitsRead >= bStart[currBlock] && 00372 (bitsRead - bStart[currBlock]) >= 40) { 00373 bEnd[currBlock] = bitsRead-1; 00374 if (currBlock > 0) 00375 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT 00376 " to " MaybeUInt64_FMT " (incomplete)\n", 00377 currBlock, bStart[currBlock], bEnd[currBlock] ); 00378 } else 00379 currBlock--; 00380 break; 00381 } 00382 buffHi = (buffHi << 1) | (buffLo >> 31); 00383 buffLo = (buffLo << 1) | (b & 1); 00384 if ( ( (buffHi & 0x0000ffff) == BLOCK_HEADER_HI 00385 && buffLo == BLOCK_HEADER_LO) 00386 || 00387 ( (buffHi & 0x0000ffff) == BLOCK_ENDMARK_HI 00388 && buffLo == BLOCK_ENDMARK_LO) 00389 ) { 00390 if (bitsRead > 49) { 00391 bEnd[currBlock] = bitsRead-49; 00392 } else { 00393 bEnd[currBlock] = 0; 00394 } 00395 if (currBlock > 0 && 00396 (bEnd[currBlock] - bStart[currBlock]) >= 130) { 00397 fprintf ( stderr, " block %d runs from " MaybeUInt64_FMT 00398 " to " MaybeUInt64_FMT "\n", 00399 rbCtr+1, bStart[currBlock], bEnd[currBlock] ); 00400 rbStart[rbCtr] = bStart[currBlock]; 00401 rbEnd[rbCtr] = bEnd[currBlock]; 00402 rbCtr++; 00403 } 00404 if (currBlock >= BZ_MAX_HANDLED_BLOCKS) 00405 tooManyBlocks(BZ_MAX_HANDLED_BLOCKS); 00406 currBlock++; 00407 00408 bStart[currBlock] = bitsRead; 00409 } 00410 } 00411 00412 bsClose ( bsIn ); 00413 00414 /*-- identified blocks run from 1 to rbCtr inclusive. --*/ 00415 00416 if (rbCtr < 1) { 00417 fprintf ( stderr, 00418 "%s: sorry, I couldn't find any block boundaries.\n", 00419 progName ); 00420 exit(1); 00421 }; 00422 00423 fprintf ( stderr, "%s: splitting into blocks\n", progName ); 00424 00425 inFile = fopen ( inFileName, "rb" ); 00426 if (inFile == NULL) { 00427 fprintf ( stderr, "%s: can't open `%s'\n", progName, inFileName ); 00428 exit(1); 00429 } 00430 bsIn = bsOpenReadStream ( inFile ); 00431 00432 /*-- placate gcc's dataflow analyser --*/ 00433 blockCRC = 0; bsWr = 0; 00434 00435 bitsRead = 0; 00436 outFile = NULL; 00437 wrBlock = 0; 00438 while (True) { 00439 b = bsGetBit(bsIn); 00440 if (b == 2) break; 00441 buffHi = (buffHi << 1) | (buffLo >> 31); 00442 buffLo = (buffLo << 1) | (b & 1); 00443 if (bitsRead == 47+rbStart[wrBlock]) 00444 blockCRC = (buffHi << 16) | (buffLo >> 16); 00445 00446 if (outFile != NULL && bitsRead >= rbStart[wrBlock] 00447 && bitsRead <= rbEnd[wrBlock]) { 00448 bsPutBit ( bsWr, b ); 00449 } 00450 00451 bitsRead++; 00452 00453 if (bitsRead == rbEnd[wrBlock]+1) { 00454 if (outFile != NULL) { 00455 bsPutUChar ( bsWr, 0x17 ); bsPutUChar ( bsWr, 0x72 ); 00456 bsPutUChar ( bsWr, 0x45 ); bsPutUChar ( bsWr, 0x38 ); 00457 bsPutUChar ( bsWr, 0x50 ); bsPutUChar ( bsWr, 0x90 ); 00458 bsPutUInt32 ( bsWr, blockCRC ); 00459 bsClose ( bsWr ); 00460 } 00461 if (wrBlock >= rbCtr) break; 00462 wrBlock++; 00463 } else 00464 if (bitsRead == rbStart[wrBlock]) { 00465 /* Create the output file name, correctly handling leading paths. 00466 (31.10.2001 by Sergey E. Kusikov) */ 00467 Char* split; 00468 Int32 ofs, k; 00469 for (k = 0; k < BZ_MAX_FILENAME; k++) 00470 outFileName[k] = 0; 00471 strcpy (outFileName, inFileName); 00472 split = strrchr (outFileName, BZ_SPLIT_SYM); 00473 if (split == NULL) { 00474 split = outFileName; 00475 } else { 00476 ++split; 00477 } 00478 /* Now split points to the start of the basename. */ 00479 ofs = split - outFileName; 00480 sprintf (split, "rec%5d", wrBlock+1); 00481 for (p = split; *p != 0; p++) if (*p == ' ') *p = '0'; 00482 strcat (outFileName, inFileName + ofs); 00483 00484 if ( !endsInBz2(outFileName)) strcat ( outFileName, ".bz2" ); 00485 00486 fprintf ( stderr, " writing block %d to `%s' ...\n", 00487 wrBlock+1, outFileName ); 00488 00489 outFile = fopen ( outFileName, "wb" ); 00490 if (outFile == NULL) { 00491 fprintf ( stderr, "%s: can't write `%s'\n", 00492 progName, outFileName ); 00493 exit(1); 00494 } 00495 bsWr = bsOpenWriteStream ( outFile ); 00496 bsPutUChar ( bsWr, BZ_HDR_B ); 00497 bsPutUChar ( bsWr, BZ_HDR_Z ); 00498 bsPutUChar ( bsWr, BZ_HDR_h ); 00499 bsPutUChar ( bsWr, BZ_HDR_0 + 9 ); 00500 bsPutUChar ( bsWr, 0x31 ); bsPutUChar ( bsWr, 0x41 ); 00501 bsPutUChar ( bsWr, 0x59 ); bsPutUChar ( bsWr, 0x26 ); 00502 bsPutUChar ( bsWr, 0x53 ); bsPutUChar ( bsWr, 0x59 ); 00503 } 00504 } 00505 00506 fprintf ( stderr, "%s: finished\n", progName ); 00507 return 0; 00508 } 00509 00510 00511 00512 /*-----------------------------------------------------------*/ 00513 /*--- end bzip2recover.c ---*/ 00514 /*-----------------------------------------------------------*/ Generated on Sat May 26 2012 04:32:18 for ReactOS by
1.7.6.1
|