ReactOS  0.4.14-dev-77-gd9e7c48
blocksort.c
Go to the documentation of this file.
1 
2 /*-------------------------------------------------------------*/
3 /*--- Block sorting machinery ---*/
4 /*--- blocksort.c ---*/
5 /*-------------------------------------------------------------*/
6 
7 /* ------------------------------------------------------------------
8  This file is part of bzip2/libbzip2, a program and library for
9  lossless, block-sorting data compression.
10 
11  bzip2/libbzip2 version 1.0.6 of 6 September 2010
12  Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
13 
14  Please read the WARNING, DISCLAIMER and PATENTS sections in the
15  README file.
16 
17  This program is released under the terms of the license contained
18  in the file LICENSE.
19  ------------------------------------------------------------------ */
20 
21 
22 #include "bzlib_private.h"
23 
24 /*---------------------------------------------*/
25 /*--- Fallback O(N log(N)^2) sorting ---*/
26 /*--- algorithm, for repetitive blocks ---*/
27 /*---------------------------------------------*/
28 
29 /*---------------------------------------------*/
30 static
32 void fallbackSimpleSort ( UInt32* fmap,
33  UInt32* eclass,
34  Int32 lo,
35  Int32 hi )
36 {
37  Int32 i, j, tmp;
38  UInt32 ec_tmp;
39 
40  if (lo == hi) return;
41 
42  if (hi - lo > 3) {
43  for ( i = hi-4; i >= lo; i-- ) {
44  tmp = fmap[i];
45  ec_tmp = eclass[tmp];
46  for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
47  fmap[j-4] = fmap[j];
48  fmap[j-4] = tmp;
49  }
50  }
51 
52  for ( i = hi-1; i >= lo; i-- ) {
53  tmp = fmap[i];
54  ec_tmp = eclass[tmp];
55  for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
56  fmap[j-1] = fmap[j];
57  fmap[j-1] = tmp;
58  }
59 }
60 
61 
62 /*---------------------------------------------*/
63 #define fswap(zz1, zz2) \
64  { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
65 
66 #define fvswap(zzp1, zzp2, zzn) \
67 { \
68  Int32 yyp1 = (zzp1); \
69  Int32 yyp2 = (zzp2); \
70  Int32 yyn = (zzn); \
71  while (yyn > 0) { \
72  fswap(fmap[yyp1], fmap[yyp2]); \
73  yyp1++; yyp2++; yyn--; \
74  } \
75 }
76 
77 
78 #define fmin(a,b) ((a) < (b)) ? (a) : (b)
79 
80 #define fpush(lz,hz) { stackLo[sp] = lz; \
81  stackHi[sp] = hz; \
82  sp++; }
83 
84 #define fpop(lz,hz) { sp--; \
85  lz = stackLo[sp]; \
86  hz = stackHi[sp]; }
87 
88 #define FALLBACK_QSORT_SMALL_THRESH 10
89 #define FALLBACK_QSORT_STACK_SIZE 100
90 
91 
92 static
93 void fallbackQSort3 ( UInt32* fmap,
94  UInt32* eclass,
95  Int32 loSt,
96  Int32 hiSt )
97 {
98  Int32 unLo, unHi, ltLo, gtHi, n, m;
99  Int32 sp, lo, hi;
100  UInt32 med, r, r3;
103 
104  r = 0;
105 
106  sp = 0;
107  fpush ( loSt, hiSt );
108 
109  while (sp > 0) {
110 
111  AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
112 
113  fpop ( lo, hi );
114  if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
115  fallbackSimpleSort ( fmap, eclass, lo, hi );
116  continue;
117  }
118 
119  /* Random partitioning. Median of 3 sometimes fails to
120  avoid bad cases. Median of 9 seems to help but
121  looks rather expensive. This too seems to work but
122  is cheaper. Guidance for the magic constants
123  7621 and 32768 is taken from Sedgewick's algorithms
124  book, chapter 35.
125  */
126  r = ((r * 7621) + 1) % 32768;
127  r3 = r % 3;
128  if (r3 == 0) med = eclass[fmap[lo]]; else
129  if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
130  med = eclass[fmap[hi]];
131 
132  unLo = ltLo = lo;
133  unHi = gtHi = hi;
134 
135  while (1) {
136  while (1) {
137  if (unLo > unHi) break;
138  n = (Int32)eclass[fmap[unLo]] - (Int32)med;
139  if (n == 0) {
140  fswap(fmap[unLo], fmap[ltLo]);
141  ltLo++; unLo++;
142  continue;
143  };
144  if (n > 0) break;
145  unLo++;
146  }
147  while (1) {
148  if (unLo > unHi) break;
149  n = (Int32)eclass[fmap[unHi]] - (Int32)med;
150  if (n == 0) {
151  fswap(fmap[unHi], fmap[gtHi]);
152  gtHi--; unHi--;
153  continue;
154  };
155  if (n < 0) break;
156  unHi--;
157  }
158  if (unLo > unHi) break;
159  fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
160  }
161 
162  AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
163 
164  if (gtHi < ltLo) continue;
165 
166  n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
167  m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
168 
169  n = lo + unLo - ltLo - 1;
170  m = hi - (gtHi - unHi) + 1;
171 
172  if (n - lo > hi - m) {
173  fpush ( lo, n );
174  fpush ( m, hi );
175  } else {
176  fpush ( m, hi );
177  fpush ( lo, n );
178  }
179  }
180 }
181 
182 #undef fmin
183 #undef fpush
184 #undef fpop
185 #undef fswap
186 #undef fvswap
187 #undef FALLBACK_QSORT_SMALL_THRESH
188 #undef FALLBACK_QSORT_STACK_SIZE
189 
190 
191 /*---------------------------------------------*/
192 /* Pre:
193  nblock > 0
194  eclass exists for [0 .. nblock-1]
195  ((UChar*)eclass) [0 .. nblock-1] holds block
196  ptr exists for [0 .. nblock-1]
197 
198  Post:
199  ((UChar*)eclass) [0 .. nblock-1] holds block
200  All other areas of eclass destroyed
201  fmap [0 .. nblock-1] holds sorted order
202  bhtab [ 0 .. 2+(nblock/32) ] destroyed
203 */
204 
205 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
206 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
207 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
208 #define WORD_BH(zz) bhtab[(zz) >> 5]
209 #define UNALIGNED_BH(zz) ((zz) & 0x01f)
210 
211 static
212 void fallbackSort ( UInt32* fmap,
213  UInt32* eclass,
214  UInt32* bhtab,
215  Int32 nblock,
216  Int32 verb )
217 {
218  Int32 ftab[257];
219  Int32 ftabCopy[256];
220  Int32 H, i, j, k, l, r, cc, cc1;
221  Int32 nNotDone;
222  Int32 nBhtab;
223  UChar* eclass8 = (UChar*)eclass;
224 
225  /*--
226  Initial 1-char radix sort to generate
227  initial fmap and initial BH bits.
228  --*/
229  if (verb >= 4)
230  VPrintf0 ( " bucket sorting ...\n" );
231  for (i = 0; i < 257; i++) ftab[i] = 0;
232  for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
233  for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
234  for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
235 
236  for (i = 0; i < nblock; i++) {
237  j = eclass8[i];
238  k = ftab[j] - 1;
239  ftab[j] = k;
240  fmap[k] = i;
241  }
242 
243  nBhtab = 2 + (nblock / 32);
244  for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
245  for (i = 0; i < 256; i++) SET_BH(ftab[i]);
246 
247  /*--
248  Inductively refine the buckets. Kind-of an
249  "exponential radix sort" (!), inspired by the
250  Manber-Myers suffix array construction algorithm.
251  --*/
252 
253  /*-- set sentinel bits for block-end detection --*/
254  for (i = 0; i < 32; i++) {
255  SET_BH(nblock + 2*i);
256  CLEAR_BH(nblock + 2*i + 1);
257  }
258 
259  /*-- the log(N) loop --*/
260  H = 1;
261  while (1) {
262 
263  if (verb >= 4)
264  VPrintf1 ( " depth %6d has ", H );
265 
266  j = 0;
267  for (i = 0; i < nblock; i++) {
268  if (ISSET_BH(i)) j = i;
269  k = fmap[i] - H; if (k < 0) k += nblock;
270  eclass[k] = j;
271  }
272 
273  nNotDone = 0;
274  r = -1;
275  while (1) {
276 
277  /*-- find the next non-singleton bucket --*/
278  k = r + 1;
279  while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
280  if (ISSET_BH(k)) {
281  while (WORD_BH(k) == 0xffffffff) k += 32;
282  while (ISSET_BH(k)) k++;
283  }
284  l = k - 1;
285  if (l >= nblock) break;
286  while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
287  if (!ISSET_BH(k)) {
288  while (WORD_BH(k) == 0x00000000) k += 32;
289  while (!ISSET_BH(k)) k++;
290  }
291  r = k - 1;
292  if (r >= nblock) break;
293 
294  /*-- now [l, r] bracket current bucket --*/
295  if (r > l) {
296  nNotDone += (r - l + 1);
297  fallbackQSort3 ( fmap, eclass, l, r );
298 
299  /*-- scan bucket and generate header bits-- */
300  cc = -1;
301  for (i = l; i <= r; i++) {
302  cc1 = eclass[fmap[i]];
303  if (cc != cc1) { SET_BH(i); cc = cc1; };
304  }
305  }
306  }
307 
308  if (verb >= 4)
309  VPrintf1 ( "%6d unresolved strings\n", nNotDone );
310 
311  H *= 2;
312  if (H > nblock || nNotDone == 0) break;
313  }
314 
315  /*--
316  Reconstruct the original block in
317  eclass8 [0 .. nblock-1], since the
318  previous phase destroyed it.
319  --*/
320  if (verb >= 4)
321  VPrintf0 ( " reconstructing block ...\n" );
322  j = 0;
323  for (i = 0; i < nblock; i++) {
324  while (ftabCopy[j] == 0) j++;
325  ftabCopy[j]--;
326  eclass8[fmap[i]] = (UChar)j;
327  }
328  AssertH ( j < 256, 1005 );
329 }
330 
331 #undef SET_BH
332 #undef CLEAR_BH
333 #undef ISSET_BH
334 #undef WORD_BH
335 #undef UNALIGNED_BH
336 
337 
338 /*---------------------------------------------*/
339 /*--- The main, O(N^2 log(N)) sorting ---*/
340 /*--- algorithm. Faster for "normal" ---*/
341 /*--- non-repetitive blocks. ---*/
342 /*---------------------------------------------*/
343 
344 /*---------------------------------------------*/
345 static
348  UInt32 i2,
349  UChar* block,
350  UInt16* quadrant,
351  UInt32 nblock,
352  Int32* budget )
353 {
354  Int32 k;
355  UChar c1, c2;
356  UInt16 s1, s2;
357 
358  AssertD ( i1 != i2, "mainGtU" );
359  /* 1 */
360  c1 = block[i1]; c2 = block[i2];
361  if (c1 != c2) return (c1 > c2);
362  i1++; i2++;
363  /* 2 */
364  c1 = block[i1]; c2 = block[i2];
365  if (c1 != c2) return (c1 > c2);
366  i1++; i2++;
367  /* 3 */
368  c1 = block[i1]; c2 = block[i2];
369  if (c1 != c2) return (c1 > c2);
370  i1++; i2++;
371  /* 4 */
372  c1 = block[i1]; c2 = block[i2];
373  if (c1 != c2) return (c1 > c2);
374  i1++; i2++;
375  /* 5 */
376  c1 = block[i1]; c2 = block[i2];
377  if (c1 != c2) return (c1 > c2);
378  i1++; i2++;
379  /* 6 */
380  c1 = block[i1]; c2 = block[i2];
381  if (c1 != c2) return (c1 > c2);
382  i1++; i2++;
383  /* 7 */
384  c1 = block[i1]; c2 = block[i2];
385  if (c1 != c2) return (c1 > c2);
386  i1++; i2++;
387  /* 8 */
388  c1 = block[i1]; c2 = block[i2];
389  if (c1 != c2) return (c1 > c2);
390  i1++; i2++;
391  /* 9 */
392  c1 = block[i1]; c2 = block[i2];
393  if (c1 != c2) return (c1 > c2);
394  i1++; i2++;
395  /* 10 */
396  c1 = block[i1]; c2 = block[i2];
397  if (c1 != c2) return (c1 > c2);
398  i1++; i2++;
399  /* 11 */
400  c1 = block[i1]; c2 = block[i2];
401  if (c1 != c2) return (c1 > c2);
402  i1++; i2++;
403  /* 12 */
404  c1 = block[i1]; c2 = block[i2];
405  if (c1 != c2) return (c1 > c2);
406  i1++; i2++;
407 
408  k = nblock + 8;
409 
410  do {
411  /* 1 */
412  c1 = block[i1]; c2 = block[i2];
413  if (c1 != c2) return (c1 > c2);
414  s1 = quadrant[i1]; s2 = quadrant[i2];
415  if (s1 != s2) return (s1 > s2);
416  i1++; i2++;
417  /* 2 */
418  c1 = block[i1]; c2 = block[i2];
419  if (c1 != c2) return (c1 > c2);
420  s1 = quadrant[i1]; s2 = quadrant[i2];
421  if (s1 != s2) return (s1 > s2);
422  i1++; i2++;
423  /* 3 */
424  c1 = block[i1]; c2 = block[i2];
425  if (c1 != c2) return (c1 > c2);
426  s1 = quadrant[i1]; s2 = quadrant[i2];
427  if (s1 != s2) return (s1 > s2);
428  i1++; i2++;
429  /* 4 */
430  c1 = block[i1]; c2 = block[i2];
431  if (c1 != c2) return (c1 > c2);
432  s1 = quadrant[i1]; s2 = quadrant[i2];
433  if (s1 != s2) return (s1 > s2);
434  i1++; i2++;
435  /* 5 */
436  c1 = block[i1]; c2 = block[i2];
437  if (c1 != c2) return (c1 > c2);
438  s1 = quadrant[i1]; s2 = quadrant[i2];
439  if (s1 != s2) return (s1 > s2);
440  i1++; i2++;
441  /* 6 */
442  c1 = block[i1]; c2 = block[i2];
443  if (c1 != c2) return (c1 > c2);
444  s1 = quadrant[i1]; s2 = quadrant[i2];
445  if (s1 != s2) return (s1 > s2);
446  i1++; i2++;
447  /* 7 */
448  c1 = block[i1]; c2 = block[i2];
449  if (c1 != c2) return (c1 > c2);
450  s1 = quadrant[i1]; s2 = quadrant[i2];
451  if (s1 != s2) return (s1 > s2);
452  i1++; i2++;
453  /* 8 */
454  c1 = block[i1]; c2 = block[i2];
455  if (c1 != c2) return (c1 > c2);
456  s1 = quadrant[i1]; s2 = quadrant[i2];
457  if (s1 != s2) return (s1 > s2);
458  i1++; i2++;
459 
460  if (i1 >= nblock) i1 -= nblock;
461  if (i2 >= nblock) i2 -= nblock;
462 
463  k -= 8;
464  (*budget)--;
465  }
466  while (k >= 0);
467 
468  return False;
469 }
470 
471 
472 /*---------------------------------------------*/
473 /*--
474  Knuth's increments seem to work better
475  than Incerpi-Sedgewick here. Possibly
476  because the number of elems to sort is
477  usually small, typically <= 20.
478 --*/
479 static
480 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
481  9841, 29524, 88573, 265720,
482  797161, 2391484 };
483 
484 static
486  UChar* block,
487  UInt16* quadrant,
488  Int32 nblock,
489  Int32 lo,
490  Int32 hi,
491  Int32 d,
492  Int32* budget )
493 {
494  Int32 i, j, h, bigN, hp;
495  UInt32 v;
496 
497  bigN = hi - lo + 1;
498  if (bigN < 2) return;
499 
500  hp = 0;
501  while (incs[hp] < bigN) hp++;
502  hp--;
503 
504  for (; hp >= 0; hp--) {
505  h = incs[hp];
506 
507  i = lo + h;
508  while (True) {
509 
510  /*-- copy 1 --*/
511  if (i > hi) break;
512  v = ptr[i];
513  j = i;
514  while ( mainGtU (
515  ptr[j-h]+d, v+d, block, quadrant, nblock, budget
516  ) ) {
517  ptr[j] = ptr[j-h];
518  j = j - h;
519  if (j <= (lo + h - 1)) break;
520  }
521  ptr[j] = v;
522  i++;
523 
524  /*-- copy 2 --*/
525  if (i > hi) break;
526  v = ptr[i];
527  j = i;
528  while ( mainGtU (
529  ptr[j-h]+d, v+d, block, quadrant, nblock, budget
530  ) ) {
531  ptr[j] = ptr[j-h];
532  j = j - h;
533  if (j <= (lo + h - 1)) break;
534  }
535  ptr[j] = v;
536  i++;
537 
538  /*-- copy 3 --*/
539  if (i > hi) break;
540  v = ptr[i];
541  j = i;
542  while ( mainGtU (
543  ptr[j-h]+d, v+d, block, quadrant, nblock, budget
544  ) ) {
545  ptr[j] = ptr[j-h];
546  j = j - h;
547  if (j <= (lo + h - 1)) break;
548  }
549  ptr[j] = v;
550  i++;
551 
552  if (*budget < 0) return;
553  }
554  }
555 }
556 
557 
558 /*---------------------------------------------*/
559 /*--
560  The following is an implementation of
561  an elegant 3-way quicksort for strings,
562  described in a paper "Fast Algorithms for
563  Sorting and Searching Strings", by Robert
564  Sedgewick and Jon L. Bentley.
565 --*/
566 
567 #define mswap(zz1, zz2) \
568  { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
569 
570 #define mvswap(zzp1, zzp2, zzn) \
571 { \
572  Int32 yyp1 = (zzp1); \
573  Int32 yyp2 = (zzp2); \
574  Int32 yyn = (zzn); \
575  while (yyn > 0) { \
576  mswap(ptr[yyp1], ptr[yyp2]); \
577  yyp1++; yyp2++; yyn--; \
578  } \
579 }
580 
581 static
584 {
585  UChar t;
586  if (a > b) { t = a; a = b; b = t; };
587  if (b > c) {
588  b = c;
589  if (a > b) b = a;
590  }
591  return b;
592 }
593 
594 #define mmin(a,b) ((a) < (b)) ? (a) : (b)
595 
596 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
597  stackHi[sp] = hz; \
598  stackD [sp] = dz; \
599  sp++; }
600 
601 #define mpop(lz,hz,dz) { sp--; \
602  lz = stackLo[sp]; \
603  hz = stackHi[sp]; \
604  dz = stackD [sp]; }
605 
606 
607 #define mnextsize(az) (nextHi[az]-nextLo[az])
608 
609 #define mnextswap(az,bz) \
610  { Int32 tz; \
611  tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
612  tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
613  tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
614 
615 
616 #define MAIN_QSORT_SMALL_THRESH 20
617 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
618 #define MAIN_QSORT_STACK_SIZE 100
619 
620 static
622  UChar* block,
623  UInt16* quadrant,
624  Int32 nblock,
625  Int32 loSt,
626  Int32 hiSt,
627  Int32 dSt,
628  Int32* budget )
629 {
630  Int32 unLo, unHi, ltLo, gtHi, n, m, med;
631  Int32 sp, lo, hi, d;
632 
633  Int32 stackLo[MAIN_QSORT_STACK_SIZE];
634  Int32 stackHi[MAIN_QSORT_STACK_SIZE];
635  Int32 stackD [MAIN_QSORT_STACK_SIZE];
636 
637  Int32 nextLo[3];
638  Int32 nextHi[3];
639  Int32 nextD [3];
640 
641  sp = 0;
642  mpush ( loSt, hiSt, dSt );
643 
644  while (sp > 0) {
645 
646  AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
647 
648  mpop ( lo, hi, d );
649  if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
651  mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
652  if (*budget < 0) return;
653  continue;
654  }
655 
656  med = (Int32)
657  mmed3 ( block[ptr[ lo ]+d],
658  block[ptr[ hi ]+d],
659  block[ptr[ (lo+hi)>>1 ]+d] );
660 
661  unLo = ltLo = lo;
662  unHi = gtHi = hi;
663 
664  while (True) {
665  while (True) {
666  if (unLo > unHi) break;
667  n = ((Int32)block[ptr[unLo]+d]) - med;
668  if (n == 0) {
669  mswap(ptr[unLo], ptr[ltLo]);
670  ltLo++; unLo++; continue;
671  };
672  if (n > 0) break;
673  unLo++;
674  }
675  while (True) {
676  if (unLo > unHi) break;
677  n = ((Int32)block[ptr[unHi]+d]) - med;
678  if (n == 0) {
679  mswap(ptr[unHi], ptr[gtHi]);
680  gtHi--; unHi--; continue;
681  };
682  if (n < 0) break;
683  unHi--;
684  }
685  if (unLo > unHi) break;
686  mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
687  }
688 
689  AssertD ( unHi == unLo-1, "mainQSort3(2)" );
690 
691  if (gtHi < ltLo) {
692  mpush(lo, hi, d+1 );
693  continue;
694  }
695 
696  n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
697  m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
698 
699  n = lo + unLo - ltLo - 1;
700  m = hi - (gtHi - unHi) + 1;
701 
702  nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
703  nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
704  nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
705 
706  if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
707  if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
708  if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
709 
710  AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
711  AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
712 
713  mpush (nextLo[0], nextHi[0], nextD[0]);
714  mpush (nextLo[1], nextHi[1], nextD[1]);
715  mpush (nextLo[2], nextHi[2], nextD[2]);
716  }
717 }
718 
719 #undef mswap
720 #undef mvswap
721 #undef mpush
722 #undef mpop
723 #undef mmin
724 #undef mnextsize
725 #undef mnextswap
726 #undef MAIN_QSORT_SMALL_THRESH
727 #undef MAIN_QSORT_DEPTH_THRESH
728 #undef MAIN_QSORT_STACK_SIZE
729 
730 
731 /*---------------------------------------------*/
732 /* Pre:
733  nblock > N_OVERSHOOT
734  block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
735  ((UChar*)block32) [0 .. nblock-1] holds block
736  ptr exists for [0 .. nblock-1]
737 
738  Post:
739  ((UChar*)block32) [0 .. nblock-1] holds block
740  All other areas of block32 destroyed
741  ftab [0 .. 65536 ] destroyed
742  ptr [0 .. nblock-1] holds sorted order
743  if (*budget < 0), sorting was abandoned
744 */
745 
746 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
747 #define SETMASK (1 << 21)
748 #define CLEARMASK (~(SETMASK))
749 
750 static
751 void mainSort ( UInt32* ptr,
752  UChar* block,
753  UInt16* quadrant,
754  UInt32* ftab,
755  Int32 nblock,
756  Int32 verb,
757  Int32* budget )
758 {
759  Int32 i, j, k, ss, sb;
760  Int32 runningOrder[256];
761  Bool bigDone[256];
762  Int32 copyStart[256];
763  Int32 copyEnd [256];
764  UChar c1;
765  Int32 numQSorted;
766  UInt16 s;
767  if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
768 
769  /*-- set up the 2-byte frequency table --*/
770  for (i = 65536; i >= 0; i--) ftab[i] = 0;
771 
772  j = block[0] << 8;
773  i = nblock-1;
774  for (; i >= 3; i -= 4) {
775  quadrant[i] = 0;
776  j = (j >> 8) | ( ((UInt16)block[i]) << 8);
777  ftab[j]++;
778  quadrant[i-1] = 0;
779  j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
780  ftab[j]++;
781  quadrant[i-2] = 0;
782  j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
783  ftab[j]++;
784  quadrant[i-3] = 0;
785  j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
786  ftab[j]++;
787  }
788  for (; i >= 0; i--) {
789  quadrant[i] = 0;
790  j = (j >> 8) | ( ((UInt16)block[i]) << 8);
791  ftab[j]++;
792  }
793 
794  /*-- (emphasises close relationship of block & quadrant) --*/
795  for (i = 0; i < BZ_N_OVERSHOOT; i++) {
796  block [nblock+i] = block[i];
797  quadrant[nblock+i] = 0;
798  }
799 
800  if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
801 
802  /*-- Complete the initial radix sort --*/
803  for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
804 
805  s = block[0] << 8;
806  i = nblock-1;
807  for (; i >= 3; i -= 4) {
808  s = (s >> 8) | (block[i] << 8);
809  j = ftab[s] -1;
810  ftab[s] = j;
811  ptr[j] = i;
812  s = (s >> 8) | (block[i-1] << 8);
813  j = ftab[s] -1;
814  ftab[s] = j;
815  ptr[j] = i-1;
816  s = (s >> 8) | (block[i-2] << 8);
817  j = ftab[s] -1;
818  ftab[s] = j;
819  ptr[j] = i-2;
820  s = (s >> 8) | (block[i-3] << 8);
821  j = ftab[s] -1;
822  ftab[s] = j;
823  ptr[j] = i-3;
824  }
825  for (; i >= 0; i--) {
826  s = (s >> 8) | (block[i] << 8);
827  j = ftab[s] -1;
828  ftab[s] = j;
829  ptr[j] = i;
830  }
831 
832  /*--
833  Now ftab contains the first loc of every small bucket.
834  Calculate the running order, from smallest to largest
835  big bucket.
836  --*/
837  for (i = 0; i <= 255; i++) {
838  bigDone [i] = False;
839  runningOrder[i] = i;
840  }
841 
842  {
843  Int32 vv;
844  Int32 h = 1;
845  do h = 3 * h + 1; while (h <= 256);
846  do {
847  h = h / 3;
848  for (i = h; i <= 255; i++) {
849  vv = runningOrder[i];
850  j = i;
851  while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
852  runningOrder[j] = runningOrder[j-h];
853  j = j - h;
854  if (j <= (h - 1)) goto zero;
855  }
856  zero:
857  runningOrder[j] = vv;
858  }
859  } while (h != 1);
860  }
861 
862  /*--
863  The main sorting loop.
864  --*/
865 
866  numQSorted = 0;
867 
868  for (i = 0; i <= 255; i++) {
869 
870  /*--
871  Process big buckets, starting with the least full.
872  Basically this is a 3-step process in which we call
873  mainQSort3 to sort the small buckets [ss, j], but
874  also make a big effort to avoid the calls if we can.
875  --*/
876  ss = runningOrder[i];
877 
878  /*--
879  Step 1:
880  Complete the big bucket [ss] by quicksorting
881  any unsorted small buckets [ss, j], for j != ss.
882  Hopefully previous pointer-scanning phases have already
883  completed many of the small buckets [ss, j], so
884  we don't have to sort them at all.
885  --*/
886  for (j = 0; j <= 255; j++) {
887  if (j != ss) {
888  sb = (ss << 8) + j;
889  if ( ! (ftab[sb] & SETMASK) ) {
890  Int32 lo = ftab[sb] & CLEARMASK;
891  Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
892  if (hi > lo) {
893  if (verb >= 4)
894  VPrintf4 ( " qsort [0x%x, 0x%x] "
895  "done %d this %d\n",
896  ss, j, numQSorted, hi - lo + 1 );
897  mainQSort3 (
898  ptr, block, quadrant, nblock,
899  lo, hi, BZ_N_RADIX, budget
900  );
901  numQSorted += (hi - lo + 1);
902  if (*budget < 0) return;
903  }
904  }
905  ftab[sb] |= SETMASK;
906  }
907  }
908 
909  AssertH ( !bigDone[ss], 1006 );
910 
911  /*--
912  Step 2:
913  Now scan this big bucket [ss] so as to synthesise the
914  sorted order for small buckets [t, ss] for all t,
915  including, magically, the bucket [ss,ss] too.
916  This will avoid doing Real Work in subsequent Step 1's.
917  --*/
918  {
919  for (j = 0; j <= 255; j++) {
920  copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
921  copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
922  }
923  for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
924  k = ptr[j]-1; if (k < 0) k += nblock;
925  c1 = block[k];
926  if (!bigDone[c1])
927  ptr[ copyStart[c1]++ ] = k;
928  }
929  for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
930  k = ptr[j]-1; if (k < 0) k += nblock;
931  c1 = block[k];
932  if (!bigDone[c1])
933  ptr[ copyEnd[c1]-- ] = k;
934  }
935  }
936 
937  AssertH ( (copyStart[ss]-1 == copyEnd[ss])
938  ||
939  /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
940  Necessity for this case is demonstrated by compressing
941  a sequence of approximately 48.5 million of character
942  251; 1.0.0/1.0.1 will then die here. */
943  (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
944  1007 )
945 
946  for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
947 
948  /*--
949  Step 3:
950  The [ss] big bucket is now done. Record this fact,
951  and update the quadrant descriptors. Remember to
952  update quadrants in the overshoot area too, if
953  necessary. The "if (i < 255)" test merely skips
954  this updating for the last bucket processed, since
955  updating for the last bucket is pointless.
956 
957  The quadrant array provides a way to incrementally
958  cache sort orderings, as they appear, so as to
959  make subsequent comparisons in fullGtU() complete
960  faster. For repetitive blocks this makes a big
961  difference (but not big enough to be able to avoid
962  the fallback sorting mechanism, exponential radix sort).
963 
964  The precise meaning is: at all times:
965 
966  for 0 <= i < nblock and 0 <= j <= nblock
967 
968  if block[i] != block[j],
969 
970  then the relative values of quadrant[i] and
971  quadrant[j] are meaningless.
972 
973  else {
974  if quadrant[i] < quadrant[j]
975  then the string starting at i lexicographically
976  precedes the string starting at j
977 
978  else if quadrant[i] > quadrant[j]
979  then the string starting at j lexicographically
980  precedes the string starting at i
981 
982  else
983  the relative ordering of the strings starting
984  at i and j has not yet been determined.
985  }
986  --*/
987  bigDone[ss] = True;
988 
989  if (i < 255) {
990  Int32 bbStart = ftab[ss << 8] & CLEARMASK;
991  Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
992  Int32 shifts = 0;
993 
994  while ((bbSize >> shifts) > 65534) shifts++;
995 
996  for (j = bbSize-1; j >= 0; j--) {
997  Int32 a2update = ptr[bbStart + j];
998  UInt16 qVal = (UInt16)(j >> shifts);
999  quadrant[a2update] = qVal;
1000  if (a2update < BZ_N_OVERSHOOT)
1001  quadrant[a2update + nblock] = qVal;
1002  }
1003  AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1004  }
1005 
1006  }
1007 
1008  if (verb >= 4)
1009  VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
1010  nblock, numQSorted, nblock - numQSorted );
1011 }
1012 
1013 #undef BIGFREQ
1014 #undef SETMASK
1015 #undef CLEARMASK
1016 
1017 
1018 /*---------------------------------------------*/
1019 /* Pre:
1020  nblock > 0
1021  arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1022  ((UChar*)arr2) [0 .. nblock-1] holds block
1023  arr1 exists for [0 .. nblock-1]
1024 
1025  Post:
1026  ((UChar*)arr2) [0 .. nblock-1] holds block
1027  All other areas of block destroyed
1028  ftab [ 0 .. 65536 ] destroyed
1029  arr1 [0 .. nblock-1] holds sorted order
1030 */
1032 {
1033  UInt32* ptr = s->ptr;
1034  UChar* block = s->block;
1035  UInt32* ftab = s->ftab;
1036  Int32 nblock = s->nblock;
1037  Int32 verb = s->verbosity;
1038  Int32 wfact = s->workFactor;
1039  UInt16* quadrant;
1040  Int32 budget;
1041  Int32 budgetInit;
1042  Int32 i;
1043 
1044  if (nblock < 10000) {
1045  fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1046  } else {
1047  /* Calculate the location for quadrant, remembering to get
1048  the alignment right. Assumes that &(block[0]) is at least
1049  2-byte aligned -- this should be ok since block is really
1050  the first section of arr2.
1051  */
1052  i = nblock+BZ_N_OVERSHOOT;
1053  if (i & 1) i++;
1054  quadrant = (UInt16*)(&(block[i]));
1055 
1056  /* (wfact-1) / 3 puts the default-factor-30
1057  transition point at very roughly the same place as
1058  with v0.1 and v0.9.0.
1059  Not that it particularly matters any more, since the
1060  resulting compressed stream is now the same regardless
1061  of whether or not we use the main sort or fallback sort.
1062  */
1063  if (wfact < 1 ) wfact = 1;
1064  if (wfact > 100) wfact = 100;
1065  budgetInit = nblock * ((wfact-1) / 3);
1066  budget = budgetInit;
1067 
1068  mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1069  if (verb >= 3)
1070  VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1071  budgetInit - budget,
1072  nblock,
1073  (float)(budgetInit - budget) /
1074  (float)(nblock==0 ? 1 : nblock) );
1075  if (budget < 0) {
1076  if (verb >= 2)
1077  VPrintf0 ( " too repetitive; using fallback"
1078  " sorting algorithm\n" );
1079  fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1080  }
1081  }
1082 
1083  s->origPtr = -1;
1084  for (i = 0; i < s->nblock; i++)
1085  if (ptr[i] == 0)
1086  { s->origPtr = i; break; };
1087 
1088  AssertH( s->origPtr != -1, 1003 );
1089 }
1090 
1091 
1092 /*-------------------------------------------------------------*/
1093 /*--- end blocksort.c ---*/
1094 /*-------------------------------------------------------------*/
static unsigned int block
Definition: xmlmemory.c:118
#define fpush(lz, hz)
Definition: blocksort.c:80
#define BIGFREQ(b)
Definition: blocksort.c:746
struct S2 s2
static DNS_RECORDW r3
Definition: record.c:39
#define ISSET_BH(zz)
Definition: blocksort.c:207
superblock * sb
Definition: btrfs.c:4162
GLdouble GLdouble GLdouble r
Definition: gl.h:2055
DWORD UInt32
Definition: chm_lib.c:106
#define Int32
Definition: interfaces.hpp:76
#define UNALIGNED_BH(zz)
Definition: blocksort.c:209
GLdouble n
Definition: glext.h:7729
GLdouble GLdouble t
Definition: gl.h:2047
#define FALLBACK_QSORT_STACK_SIZE
Definition: blocksort.c:89
#define VPrintf1(zf, za1)
Definition: bzlib_private.h:75
#define fpop(lz, hz)
Definition: blocksort.c:84
#define fmin(a, b)
Definition: blocksort.c:78
BYTE UChar
Definition: chm_lib.c:102
USHORT UInt16
Definition: chm_lib.c:104
const GLfloat * m
Definition: glext.h:10848
GLfloat GLfloat GLfloat GLfloat h
Definition: glext.h:7723
#define SET_BH(zz)
Definition: blocksort.c:205
#define MAIN_QSORT_SMALL_THRESH
Definition: blocksort.c:616
#define mswap(zz1, zz2)
Definition: blocksort.c:567
#define BZ_N_OVERSHOOT
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
#define SETMASK
Definition: blocksort.c:747
#define BZ_N_RADIX
#define a
Definition: ke_i.h:78
#define AssertH(cond, errcode)
Definition: bzlib_private.h:59
#define mpop(lz, hz, dz)
Definition: blocksort.c:601
static PVOID ptr
Definition: dispmode.c:27
#define mnextswap(az, bz)
Definition: blocksort.c:609
unsigned char Bool
Definition: bzip2.c:162
#define b
Definition: ke_i.h:79
static __inline__ UChar mmed3(UChar a, UChar b, UChar c)
Definition: blocksort.c:583
void BZ2_blockSort(EState *s)
Definition: blocksort.c:1031
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
Definition: glfuncs.h:250
#define fswap(zz1, zz2)
Definition: blocksort.c:63
r l[0]
Definition: byte_order.h:167
#define True
Definition: types.h:24
#define mvswap(zzp1, zzp2, zzn)
Definition: blocksort.c:570
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
#define False
Definition: types.h:25
#define d
Definition: ke_i.h:81
static void mainSimpleSort(UInt32 *ptr, UChar *block, UInt16 *quadrant, Int32 nblock, Int32 lo, Int32 hi, Int32 d, Int32 *budget)
Definition: blocksort.c:485
struct S1 s1
#define UInt16
Definition: interfaces.hpp:74
const GLubyte * c
Definition: glext.h:8905
#define mmin(a, b)
Definition: blocksort.c:594
static double zero
Definition: j0_y0.c:96
#define fvswap(zzp1, zzp2, zzn)
Definition: blocksort.c:66
static __inline__ void fallbackSimpleSort(UInt32 *fmap, UInt32 *eclass, Int32 lo, Int32 hi)
Definition: blocksort.c:32
#define FALLBACK_QSORT_SMALL_THRESH
Definition: blocksort.c:88
#define WORD_BH(zz)
Definition: blocksort.c:208
GLdouble s
Definition: gl.h:2039
#define MAIN_QSORT_DEPTH_THRESH
Definition: blocksort.c:617
#define CLEAR_BH(zz)
Definition: blocksort.c:206
uint32_t cc
Definition: isohybrid.c:75
#define VPrintf0(zf)
Definition: bzlib_private.h:73
const GLdouble * v
Definition: gl.h:2040
#define VPrintf3(zf, za1, za2, za3)
Definition: bzlib_private.h:79
static void fallbackSort(UInt32 *fmap, UInt32 *eclass, UInt32 *bhtab, Int32 nblock, Int32 verb)
Definition: blocksort.c:212
#define CLEARMASK
Definition: blocksort.c:748
#define mpush(lz, hz, dz)
Definition: blocksort.c:596
#define __inline__
Definition: bzlib_private.h:53
#define AssertD(cond, msg)
Definition: bzlib_private.h:70
static void fallbackQSort3(UInt32 *fmap, UInt32 *eclass, Int32 loSt, Int32 hiSt)
Definition: blocksort.c:93
LONG Int32
Definition: chm_lib.c:105
#define MAIN_QSORT_STACK_SIZE
Definition: blocksort.c:618
#define c
Definition: ke_i.h:80
#define mnextsize(az)
Definition: blocksort.c:607
static const WCHAR sp[]
Definition: suminfo.c:288
#define VPrintf4(zf, za1, za2, za3, za4)
Definition: bzlib_private.h:81
GLboolean GLboolean GLboolean GLboolean a
Definition: glext.h:6204
static void mainQSort3(UInt32 *ptr, UChar *block, UInt16 *quadrant, Int32 nblock, Int32 loSt, Int32 hiSt, Int32 dSt, Int32 *budget)
Definition: blocksort.c:621
#define ss
Definition: i386-dis.c:432
static Int32 incs[14]
Definition: blocksort.c:480
int k
Definition: mpi.c:3369
static __inline__ Bool mainGtU(UInt32 i1, UInt32 i2, UChar *block, UInt16 *quadrant, UInt32 nblock, Int32 *budget)
Definition: blocksort.c:347
#define H
static void mainSort(UInt32 *ptr, UChar *block, UInt16 *quadrant, UInt32 *ftab, Int32 nblock, Int32 verb, Int32 *budget)
Definition: blocksort.c:751