Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenmpi.c
Go to the documentation of this file.
00001 /* 00002 * dlls/rsaenh/mpi.c 00003 * Multi Precision Integer functions 00004 * 00005 * Copyright 2004 Michael Jung 00006 * Based on public domain code by Tom St Denis (tomstdenis@iahu.ca) 00007 * 00008 * This library is free software; you can redistribute it and/or 00009 * modify it under the terms of the GNU Lesser General Public 00010 * License as published by the Free Software Foundation; either 00011 * version 2.1 of the License, or (at your option) any later version. 00012 * 00013 * This library is distributed in the hope that it will be useful, 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00016 * Lesser General Public License for more details. 00017 * 00018 * You should have received a copy of the GNU Lesser General Public 00019 * License along with this library; if not, write to the Free Software 00020 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 00021 */ 00022 00023 /* 00024 * This file contains code from the LibTomCrypt cryptographic 00025 * library written by Tom St Denis (tomstdenis@iahu.ca). LibTomCrypt 00026 * is in the public domain. The code in this file is tailored to 00027 * special requirements. Take a look at http://libtomcrypt.org for the 00028 * original version. 00029 */ 00030 00031 #include <stdarg.h> 00032 00033 #include "windef.h" 00034 #include "winbase.h" 00035 #include "tomcrypt.h" 00036 00037 /* Known optimal configurations 00038 CPU /Compiler /MUL CUTOFF/SQR CUTOFF 00039 ------------------------------------------------------------- 00040 Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-) 00041 */ 00042 static const int KARATSUBA_MUL_CUTOFF = 88, /* Min. number of digits before Karatsuba multiplication is used. */ 00043 KARATSUBA_SQR_CUTOFF = 128; /* Min. number of digits before Karatsuba squaring is used. */ 00044 00045 00046 /* trim unused digits */ 00047 static void mp_clamp(mp_int *a); 00048 00049 /* compare |a| to |b| */ 00050 static int mp_cmp_mag(const mp_int *a, const mp_int *b); 00051 00052 /* Counts the number of lsbs which are zero before the first zero bit */ 00053 static int mp_cnt_lsb(const mp_int *a); 00054 00055 /* computes a = B**n mod b without division or multiplication useful for 00056 * normalizing numbers in a Montgomery system. 00057 */ 00058 static int mp_montgomery_calc_normalization(mp_int *a, const mp_int *b); 00059 00060 /* computes x/R == x (mod N) via Montgomery Reduction */ 00061 static int mp_montgomery_reduce(mp_int *a, const mp_int *m, mp_digit mp); 00062 00063 /* setups the montgomery reduction */ 00064 static int mp_montgomery_setup(const mp_int *a, mp_digit *mp); 00065 00066 /* Barrett Reduction, computes a (mod b) with a precomputed value c 00067 * 00068 * Assumes that 0 < a <= b*b, note if 0 > a > -(b*b) then you can merely 00069 * compute the reduction as -1 * mp_reduce(mp_abs(a)) [pseudo code]. 00070 */ 00071 static int mp_reduce(mp_int *a, const mp_int *b, const mp_int *c); 00072 00073 /* reduces a modulo b where b is of the form 2**p - k [0 <= a] */ 00074 static int mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d); 00075 00076 /* determines k value for 2k reduction */ 00077 static int mp_reduce_2k_setup(const mp_int *a, mp_digit *d); 00078 00079 /* used to setup the Barrett reduction for a given modulus b */ 00080 static int mp_reduce_setup(mp_int *a, const mp_int *b); 00081 00082 /* set to a digit */ 00083 static void mp_set(mp_int *a, mp_digit b); 00084 00085 /* b = a*a */ 00086 static int mp_sqr(const mp_int *a, mp_int *b); 00087 00088 /* c = a * a (mod b) */ 00089 static int mp_sqrmod(const mp_int *a, mp_int *b, mp_int *c); 00090 00091 00092 static void bn_reverse(unsigned char *s, int len); 00093 static int s_mp_add(mp_int *a, mp_int *b, mp_int *c); 00094 static int s_mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y); 00095 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1) 00096 static int s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs); 00097 static int s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs); 00098 static int s_mp_sqr(const mp_int *a, mp_int *b); 00099 static int s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c); 00100 static int mp_exptmod_fast(const mp_int *G, const mp_int *X, mp_int *P, mp_int *Y, int mode); 00101 static int mp_invmod_slow (const mp_int * a, mp_int * b, mp_int * c); 00102 static int mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c); 00103 static int mp_karatsuba_sqr(const mp_int *a, mp_int *b); 00104 00105 /* grow as required */ 00106 static int mp_grow (mp_int * a, int size) 00107 { 00108 int i; 00109 mp_digit *tmp; 00110 00111 /* if the alloc size is smaller alloc more ram */ 00112 if (a->alloc < size) { 00113 /* ensure there are always at least MP_PREC digits extra on top */ 00114 size += (MP_PREC * 2) - (size % MP_PREC); 00115 00116 /* reallocate the array a->dp 00117 * 00118 * We store the return in a temporary variable 00119 * in case the operation failed we don't want 00120 * to overwrite the dp member of a. 00121 */ 00122 tmp = HeapReAlloc(GetProcessHeap(), 0, a->dp, sizeof (mp_digit) * size); 00123 if (tmp == NULL) { 00124 /* reallocation failed but "a" is still valid [can be freed] */ 00125 return MP_MEM; 00126 } 00127 00128 /* reallocation succeeded so set a->dp */ 00129 a->dp = tmp; 00130 00131 /* zero excess digits */ 00132 i = a->alloc; 00133 a->alloc = size; 00134 for (; i < a->alloc; i++) { 00135 a->dp[i] = 0; 00136 } 00137 } 00138 return MP_OKAY; 00139 } 00140 00141 /* b = a/2 */ 00142 static int mp_div_2(const mp_int * a, mp_int * b) 00143 { 00144 int x, res, oldused; 00145 00146 /* copy */ 00147 if (b->alloc < a->used) { 00148 if ((res = mp_grow (b, a->used)) != MP_OKAY) { 00149 return res; 00150 } 00151 } 00152 00153 oldused = b->used; 00154 b->used = a->used; 00155 { 00156 register mp_digit r, rr, *tmpa, *tmpb; 00157 00158 /* source alias */ 00159 tmpa = a->dp + b->used - 1; 00160 00161 /* dest alias */ 00162 tmpb = b->dp + b->used - 1; 00163 00164 /* carry */ 00165 r = 0; 00166 for (x = b->used - 1; x >= 0; x--) { 00167 /* get the carry for the next iteration */ 00168 rr = *tmpa & 1; 00169 00170 /* shift the current digit, add in carry and store */ 00171 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); 00172 00173 /* forward carry to next iteration */ 00174 r = rr; 00175 } 00176 00177 /* zero excess digits */ 00178 tmpb = b->dp + b->used; 00179 for (x = b->used; x < oldused; x++) { 00180 *tmpb++ = 0; 00181 } 00182 } 00183 b->sign = a->sign; 00184 mp_clamp (b); 00185 return MP_OKAY; 00186 } 00187 00188 /* swap the elements of two integers, for cases where you can't simply swap the 00189 * mp_int pointers around 00190 */ 00191 static void 00192 mp_exch (mp_int * a, mp_int * b) 00193 { 00194 mp_int t; 00195 00196 t = *a; 00197 *a = *b; 00198 *b = t; 00199 } 00200 00201 /* init a new mp_int */ 00202 static int mp_init (mp_int * a) 00203 { 00204 int i; 00205 00206 /* allocate memory required and clear it */ 00207 a->dp = HeapAlloc(GetProcessHeap(), 0, sizeof (mp_digit) * MP_PREC); 00208 if (a->dp == NULL) { 00209 return MP_MEM; 00210 } 00211 00212 /* set the digits to zero */ 00213 for (i = 0; i < MP_PREC; i++) { 00214 a->dp[i] = 0; 00215 } 00216 00217 /* set the used to zero, allocated digits to the default precision 00218 * and sign to positive */ 00219 a->used = 0; 00220 a->alloc = MP_PREC; 00221 a->sign = MP_ZPOS; 00222 00223 return MP_OKAY; 00224 } 00225 00226 /* init an mp_init for a given size */ 00227 static int mp_init_size (mp_int * a, int size) 00228 { 00229 int x; 00230 00231 /* pad size so there are always extra digits */ 00232 size += (MP_PREC * 2) - (size % MP_PREC); 00233 00234 /* alloc mem */ 00235 a->dp = HeapAlloc(GetProcessHeap(), 0, sizeof (mp_digit) * size); 00236 if (a->dp == NULL) { 00237 return MP_MEM; 00238 } 00239 00240 /* set the members */ 00241 a->used = 0; 00242 a->alloc = size; 00243 a->sign = MP_ZPOS; 00244 00245 /* zero the digits */ 00246 for (x = 0; x < size; x++) { 00247 a->dp[x] = 0; 00248 } 00249 00250 return MP_OKAY; 00251 } 00252 00253 /* clear one (frees) */ 00254 static void 00255 mp_clear (mp_int * a) 00256 { 00257 int i; 00258 00259 /* only do anything if a hasn't been freed previously */ 00260 if (a->dp != NULL) { 00261 /* first zero the digits */ 00262 for (i = 0; i < a->used; i++) { 00263 a->dp[i] = 0; 00264 } 00265 00266 /* free ram */ 00267 HeapFree(GetProcessHeap(), 0, a->dp); 00268 00269 /* reset members to make debugging easier */ 00270 a->dp = NULL; 00271 a->alloc = a->used = 0; 00272 a->sign = MP_ZPOS; 00273 } 00274 } 00275 00276 /* set to zero */ 00277 static void 00278 mp_zero (mp_int * a) 00279 { 00280 a->sign = MP_ZPOS; 00281 a->used = 0; 00282 memset (a->dp, 0, sizeof (mp_digit) * a->alloc); 00283 } 00284 00285 /* b = |a| 00286 * 00287 * Simple function copies the input and fixes the sign to positive 00288 */ 00289 static int 00290 mp_abs (const mp_int * a, mp_int * b) 00291 { 00292 int res; 00293 00294 /* copy a to b */ 00295 if (a != b) { 00296 if ((res = mp_copy (a, b)) != MP_OKAY) { 00297 return res; 00298 } 00299 } 00300 00301 /* force the sign of b to positive */ 00302 b->sign = MP_ZPOS; 00303 00304 return MP_OKAY; 00305 } 00306 00307 /* computes the modular inverse via binary extended euclidean algorithm, 00308 * that is c = 1/a mod b 00309 * 00310 * Based on slow invmod except this is optimized for the case where b is 00311 * odd as per HAC Note 14.64 on pp. 610 00312 */ 00313 static int 00314 fast_mp_invmod (const mp_int * a, mp_int * b, mp_int * c) 00315 { 00316 mp_int x, y, u, v, B, D; 00317 int res, neg; 00318 00319 /* 2. [modified] b must be odd */ 00320 if (mp_iseven (b) == 1) { 00321 return MP_VAL; 00322 } 00323 00324 /* init all our temps */ 00325 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) { 00326 return res; 00327 } 00328 00329 /* x == modulus, y == value to invert */ 00330 if ((res = mp_copy (b, &x)) != MP_OKAY) { 00331 goto __ERR; 00332 } 00333 00334 /* we need y = |a| */ 00335 if ((res = mp_abs (a, &y)) != MP_OKAY) { 00336 goto __ERR; 00337 } 00338 00339 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 00340 if ((res = mp_copy (&x, &u)) != MP_OKAY) { 00341 goto __ERR; 00342 } 00343 if ((res = mp_copy (&y, &v)) != MP_OKAY) { 00344 goto __ERR; 00345 } 00346 mp_set (&D, 1); 00347 00348 top: 00349 /* 4. while u is even do */ 00350 while (mp_iseven (&u) == 1) { 00351 /* 4.1 u = u/2 */ 00352 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { 00353 goto __ERR; 00354 } 00355 /* 4.2 if B is odd then */ 00356 if (mp_isodd (&B) == 1) { 00357 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) { 00358 goto __ERR; 00359 } 00360 } 00361 /* B = B/2 */ 00362 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { 00363 goto __ERR; 00364 } 00365 } 00366 00367 /* 5. while v is even do */ 00368 while (mp_iseven (&v) == 1) { 00369 /* 5.1 v = v/2 */ 00370 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) { 00371 goto __ERR; 00372 } 00373 /* 5.2 if D is odd then */ 00374 if (mp_isodd (&D) == 1) { 00375 /* D = (D-x)/2 */ 00376 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { 00377 goto __ERR; 00378 } 00379 } 00380 /* D = D/2 */ 00381 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { 00382 goto __ERR; 00383 } 00384 } 00385 00386 /* 6. if u >= v then */ 00387 if (mp_cmp (&u, &v) != MP_LT) { 00388 /* u = u - v, B = B - D */ 00389 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { 00390 goto __ERR; 00391 } 00392 00393 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) { 00394 goto __ERR; 00395 } 00396 } else { 00397 /* v - v - u, D = D - B */ 00398 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { 00399 goto __ERR; 00400 } 00401 00402 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) { 00403 goto __ERR; 00404 } 00405 } 00406 00407 /* if not zero goto step 4 */ 00408 if (mp_iszero (&u) == 0) { 00409 goto top; 00410 } 00411 00412 /* now a = C, b = D, gcd == g*v */ 00413 00414 /* if v != 1 then there is no inverse */ 00415 if (mp_cmp_d (&v, 1) != MP_EQ) { 00416 res = MP_VAL; 00417 goto __ERR; 00418 } 00419 00420 /* b is now the inverse */ 00421 neg = a->sign; 00422 while (D.sign == MP_NEG) { 00423 if ((res = mp_add (&D, b, &D)) != MP_OKAY) { 00424 goto __ERR; 00425 } 00426 } 00427 mp_exch (&D, c); 00428 c->sign = neg; 00429 res = MP_OKAY; 00430 00431 __ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL); 00432 return res; 00433 } 00434 00435 /* computes xR**-1 == x (mod N) via Montgomery Reduction 00436 * 00437 * This is an optimized implementation of montgomery_reduce 00438 * which uses the comba method to quickly calculate the columns of the 00439 * reduction. 00440 * 00441 * Based on Algorithm 14.32 on pp.601 of HAC. 00442 */ 00443 static int 00444 fast_mp_montgomery_reduce (mp_int * x, const mp_int * n, mp_digit rho) 00445 { 00446 int ix, res, olduse; 00447 mp_word W[MP_WARRAY]; 00448 00449 /* get old used count */ 00450 olduse = x->used; 00451 00452 /* grow a as required */ 00453 if (x->alloc < n->used + 1) { 00454 if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) { 00455 return res; 00456 } 00457 } 00458 00459 /* first we have to get the digits of the input into 00460 * an array of double precision words W[...] 00461 */ 00462 { 00463 register mp_word *_W; 00464 register mp_digit *tmpx; 00465 00466 /* alias for the W[] array */ 00467 _W = W; 00468 00469 /* alias for the digits of x*/ 00470 tmpx = x->dp; 00471 00472 /* copy the digits of a into W[0..a->used-1] */ 00473 for (ix = 0; ix < x->used; ix++) { 00474 *_W++ = *tmpx++; 00475 } 00476 00477 /* zero the high words of W[a->used..m->used*2] */ 00478 for (; ix < n->used * 2 + 1; ix++) { 00479 *_W++ = 0; 00480 } 00481 } 00482 00483 /* now we proceed to zero successive digits 00484 * from the least significant upwards 00485 */ 00486 for (ix = 0; ix < n->used; ix++) { 00487 /* mu = ai * m' mod b 00488 * 00489 * We avoid a double precision multiplication (which isn't required) 00490 * by casting the value down to a mp_digit. Note this requires 00491 * that W[ix-1] have the carry cleared (see after the inner loop) 00492 */ 00493 register mp_digit mu; 00494 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK); 00495 00496 /* a = a + mu * m * b**i 00497 * 00498 * This is computed in place and on the fly. The multiplication 00499 * by b**i is handled by offsetting which columns the results 00500 * are added to. 00501 * 00502 * Note the comba method normally doesn't handle carries in the 00503 * inner loop In this case we fix the carry from the previous 00504 * column since the Montgomery reduction requires digits of the 00505 * result (so far) [see above] to work. This is 00506 * handled by fixing up one carry after the inner loop. The 00507 * carry fixups are done in order so after these loops the 00508 * first m->used words of W[] have the carries fixed 00509 */ 00510 { 00511 register int iy; 00512 register mp_digit *tmpn; 00513 register mp_word *_W; 00514 00515 /* alias for the digits of the modulus */ 00516 tmpn = n->dp; 00517 00518 /* Alias for the columns set by an offset of ix */ 00519 _W = W + ix; 00520 00521 /* inner loop */ 00522 for (iy = 0; iy < n->used; iy++) { 00523 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++); 00524 } 00525 } 00526 00527 /* now fix carry for next digit, W[ix+1] */ 00528 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT); 00529 } 00530 00531 /* now we have to propagate the carries and 00532 * shift the words downward [all those least 00533 * significant digits we zeroed]. 00534 */ 00535 { 00536 register mp_digit *tmpx; 00537 register mp_word *_W, *_W1; 00538 00539 /* nox fix rest of carries */ 00540 00541 /* alias for current word */ 00542 _W1 = W + ix; 00543 00544 /* alias for next word, where the carry goes */ 00545 _W = W + ++ix; 00546 00547 for (; ix <= n->used * 2 + 1; ix++) { 00548 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); 00549 } 00550 00551 /* copy out, A = A/b**n 00552 * 00553 * The result is A/b**n but instead of converting from an 00554 * array of mp_word to mp_digit than calling mp_rshd 00555 * we just copy them in the right order 00556 */ 00557 00558 /* alias for destination word */ 00559 tmpx = x->dp; 00560 00561 /* alias for shifted double precision result */ 00562 _W = W + n->used; 00563 00564 for (ix = 0; ix < n->used + 1; ix++) { 00565 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK)); 00566 } 00567 00568 /* zero oldused digits, if the input a was larger than 00569 * m->used+1 we'll have to clear the digits 00570 */ 00571 for (; ix < olduse; ix++) { 00572 *tmpx++ = 0; 00573 } 00574 } 00575 00576 /* set the max used and clamp */ 00577 x->used = n->used + 1; 00578 mp_clamp (x); 00579 00580 /* if A >= m then A = A - m */ 00581 if (mp_cmp_mag (x, n) != MP_LT) { 00582 return s_mp_sub (x, n, x); 00583 } 00584 return MP_OKAY; 00585 } 00586 00587 /* Fast (comba) multiplier 00588 * 00589 * This is the fast column-array [comba] multiplier. It is 00590 * designed to compute the columns of the product first 00591 * then handle the carries afterwards. This has the effect 00592 * of making the nested loops that compute the columns very 00593 * simple and schedulable on super-scalar processors. 00594 * 00595 * This has been modified to produce a variable number of 00596 * digits of output so if say only a half-product is required 00597 * you don't have to compute the upper half (a feature 00598 * required for fast Barrett reduction). 00599 * 00600 * Based on Algorithm 14.12 on pp.595 of HAC. 00601 * 00602 */ 00603 static int 00604 fast_s_mp_mul_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs) 00605 { 00606 int olduse, res, pa, ix, iz; 00607 mp_digit W[MP_WARRAY]; 00608 register mp_word _W; 00609 00610 /* grow the destination as required */ 00611 if (c->alloc < digs) { 00612 if ((res = mp_grow (c, digs)) != MP_OKAY) { 00613 return res; 00614 } 00615 } 00616 00617 /* number of output digits to produce */ 00618 pa = MIN(digs, a->used + b->used); 00619 00620 /* clear the carry */ 00621 _W = 0; 00622 for (ix = 0; ix <= pa; ix++) { 00623 int tx, ty; 00624 int iy; 00625 mp_digit *tmpx, *tmpy; 00626 00627 /* get offsets into the two bignums */ 00628 ty = MIN(b->used-1, ix); 00629 tx = ix - ty; 00630 00631 /* setup temp aliases */ 00632 tmpx = a->dp + tx; 00633 tmpy = b->dp + ty; 00634 00635 /* This is the number of times the loop will iterate, essentially it's 00636 while (tx++ < a->used && ty-- >= 0) { ... } 00637 */ 00638 iy = MIN(a->used-tx, ty+1); 00639 00640 /* execute loop */ 00641 for (iz = 0; iz < iy; ++iz) { 00642 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 00643 } 00644 00645 /* store term */ 00646 W[ix] = ((mp_digit)_W) & MP_MASK; 00647 00648 /* make next carry */ 00649 _W = _W >> ((mp_word)DIGIT_BIT); 00650 } 00651 00652 /* setup dest */ 00653 olduse = c->used; 00654 c->used = digs; 00655 00656 { 00657 register mp_digit *tmpc; 00658 tmpc = c->dp; 00659 for (ix = 0; ix < digs; ix++) { 00660 /* now extract the previous digit [below the carry] */ 00661 *tmpc++ = W[ix]; 00662 } 00663 00664 /* clear unused digits [that existed in the old copy of c] */ 00665 for (; ix < olduse; ix++) { 00666 *tmpc++ = 0; 00667 } 00668 } 00669 mp_clamp (c); 00670 return MP_OKAY; 00671 } 00672 00673 /* this is a modified version of fast_s_mul_digs that only produces 00674 * output digits *above* digs. See the comments for fast_s_mul_digs 00675 * to see how it works. 00676 * 00677 * This is used in the Barrett reduction since for one of the multiplications 00678 * only the higher digits were needed. This essentially halves the work. 00679 * 00680 * Based on Algorithm 14.12 on pp.595 of HAC. 00681 */ 00682 static int 00683 fast_s_mp_mul_high_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs) 00684 { 00685 int olduse, res, pa, ix, iz; 00686 mp_digit W[MP_WARRAY]; 00687 mp_word _W; 00688 00689 /* grow the destination as required */ 00690 pa = a->used + b->used; 00691 if (c->alloc < pa) { 00692 if ((res = mp_grow (c, pa)) != MP_OKAY) { 00693 return res; 00694 } 00695 } 00696 00697 /* number of output digits to produce */ 00698 pa = a->used + b->used; 00699 _W = 0; 00700 for (ix = digs; ix <= pa; ix++) { 00701 int tx, ty, iy; 00702 mp_digit *tmpx, *tmpy; 00703 00704 /* get offsets into the two bignums */ 00705 ty = MIN(b->used-1, ix); 00706 tx = ix - ty; 00707 00708 /* setup temp aliases */ 00709 tmpx = a->dp + tx; 00710 tmpy = b->dp + ty; 00711 00712 /* This is the number of times the loop will iterate, essentially it's 00713 while (tx++ < a->used && ty-- >= 0) { ... } 00714 */ 00715 iy = MIN(a->used-tx, ty+1); 00716 00717 /* execute loop */ 00718 for (iz = 0; iz < iy; iz++) { 00719 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 00720 } 00721 00722 /* store term */ 00723 W[ix] = ((mp_digit)_W) & MP_MASK; 00724 00725 /* make next carry */ 00726 _W = _W >> ((mp_word)DIGIT_BIT); 00727 } 00728 00729 /* setup dest */ 00730 olduse = c->used; 00731 c->used = pa; 00732 00733 { 00734 register mp_digit *tmpc; 00735 00736 tmpc = c->dp + digs; 00737 for (ix = digs; ix <= pa; ix++) { 00738 /* now extract the previous digit [below the carry] */ 00739 *tmpc++ = W[ix]; 00740 } 00741 00742 /* clear unused digits [that existed in the old copy of c] */ 00743 for (; ix < olduse; ix++) { 00744 *tmpc++ = 0; 00745 } 00746 } 00747 mp_clamp (c); 00748 return MP_OKAY; 00749 } 00750 00751 /* fast squaring 00752 * 00753 * This is the comba method where the columns of the product 00754 * are computed first then the carries are computed. This 00755 * has the effect of making a very simple inner loop that 00756 * is executed the most 00757 * 00758 * W2 represents the outer products and W the inner. 00759 * 00760 * A further optimizations is made because the inner 00761 * products are of the form "A * B * 2". The *2 part does 00762 * not need to be computed until the end which is good 00763 * because 64-bit shifts are slow! 00764 * 00765 * Based on Algorithm 14.16 on pp.597 of HAC. 00766 * 00767 */ 00768 /* the jist of squaring... 00769 00770 you do like mult except the offset of the tmpx [one that starts closer to zero] 00771 can't equal the offset of tmpy. So basically you set up iy like before then you min it with 00772 (ty-tx) so that it never happens. You double all those you add in the inner loop 00773 00774 After that loop you do the squares and add them in. 00775 00776 Remove W2 and don't memset W 00777 00778 */ 00779 00780 static int fast_s_mp_sqr (const mp_int * a, mp_int * b) 00781 { 00782 int olduse, res, pa, ix, iz; 00783 mp_digit W[MP_WARRAY], *tmpx; 00784 mp_word W1; 00785 00786 /* grow the destination as required */ 00787 pa = a->used + a->used; 00788 if (b->alloc < pa) { 00789 if ((res = mp_grow (b, pa)) != MP_OKAY) { 00790 return res; 00791 } 00792 } 00793 00794 /* number of output digits to produce */ 00795 W1 = 0; 00796 for (ix = 0; ix <= pa; ix++) { 00797 int tx, ty, iy; 00798 mp_word _W; 00799 mp_digit *tmpy; 00800 00801 /* clear counter */ 00802 _W = 0; 00803 00804 /* get offsets into the two bignums */ 00805 ty = MIN(a->used-1, ix); 00806 tx = ix - ty; 00807 00808 /* setup temp aliases */ 00809 tmpx = a->dp + tx; 00810 tmpy = a->dp + ty; 00811 00812 /* This is the number of times the loop will iterate, essentially it's 00813 while (tx++ < a->used && ty-- >= 0) { ... } 00814 */ 00815 iy = MIN(a->used-tx, ty+1); 00816 00817 /* now for squaring tx can never equal ty 00818 * we halve the distance since they approach at a rate of 2x 00819 * and we have to round because odd cases need to be executed 00820 */ 00821 iy = MIN(iy, (ty-tx+1)>>1); 00822 00823 /* execute loop */ 00824 for (iz = 0; iz < iy; iz++) { 00825 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 00826 } 00827 00828 /* double the inner product and add carry */ 00829 _W = _W + _W + W1; 00830 00831 /* even columns have the square term in them */ 00832 if ((ix&1) == 0) { 00833 _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]); 00834 } 00835 00836 /* store it */ 00837 W[ix] = _W; 00838 00839 /* make next carry */ 00840 W1 = _W >> ((mp_word)DIGIT_BIT); 00841 } 00842 00843 /* setup dest */ 00844 olduse = b->used; 00845 b->used = a->used+a->used; 00846 00847 { 00848 mp_digit *tmpb; 00849 tmpb = b->dp; 00850 for (ix = 0; ix < pa; ix++) { 00851 *tmpb++ = W[ix] & MP_MASK; 00852 } 00853 00854 /* clear unused digits [that existed in the old copy of c] */ 00855 for (; ix < olduse; ix++) { 00856 *tmpb++ = 0; 00857 } 00858 } 00859 mp_clamp (b); 00860 return MP_OKAY; 00861 } 00862 00863 /* computes a = 2**b 00864 * 00865 * Simple algorithm which zeroes the int, grows it then just sets one bit 00866 * as required. 00867 */ 00868 static int 00869 mp_2expt (mp_int * a, int b) 00870 { 00871 int res; 00872 00873 /* zero a as per default */ 00874 mp_zero (a); 00875 00876 /* grow a to accommodate the single bit */ 00877 if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) { 00878 return res; 00879 } 00880 00881 /* set the used count of where the bit will go */ 00882 a->used = b / DIGIT_BIT + 1; 00883 00884 /* put the single bit in its place */ 00885 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT); 00886 00887 return MP_OKAY; 00888 } 00889 00890 /* high level addition (handles signs) */ 00891 int mp_add (mp_int * a, mp_int * b, mp_int * c) 00892 { 00893 int sa, sb, res; 00894 00895 /* get sign of both inputs */ 00896 sa = a->sign; 00897 sb = b->sign; 00898 00899 /* handle two cases, not four */ 00900 if (sa == sb) { 00901 /* both positive or both negative */ 00902 /* add their magnitudes, copy the sign */ 00903 c->sign = sa; 00904 res = s_mp_add (a, b, c); 00905 } else { 00906 /* one positive, the other negative */ 00907 /* subtract the one with the greater magnitude from */ 00908 /* the one of the lesser magnitude. The result gets */ 00909 /* the sign of the one with the greater magnitude. */ 00910 if (mp_cmp_mag (a, b) == MP_LT) { 00911 c->sign = sb; 00912 res = s_mp_sub (b, a, c); 00913 } else { 00914 c->sign = sa; 00915 res = s_mp_sub (a, b, c); 00916 } 00917 } 00918 return res; 00919 } 00920 00921 00922 /* single digit addition */ 00923 static int 00924 mp_add_d (mp_int * a, mp_digit b, mp_int * c) 00925 { 00926 int res, ix, oldused; 00927 mp_digit *tmpa, *tmpc, mu; 00928 00929 /* grow c as required */ 00930 if (c->alloc < a->used + 1) { 00931 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 00932 return res; 00933 } 00934 } 00935 00936 /* if a is negative and |a| >= b, call c = |a| - b */ 00937 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) { 00938 /* temporarily fix sign of a */ 00939 a->sign = MP_ZPOS; 00940 00941 /* c = |a| - b */ 00942 res = mp_sub_d(a, b, c); 00943 00944 /* fix sign */ 00945 a->sign = c->sign = MP_NEG; 00946 00947 return res; 00948 } 00949 00950 /* old number of used digits in c */ 00951 oldused = c->used; 00952 00953 /* sign always positive */ 00954 c->sign = MP_ZPOS; 00955 00956 /* source alias */ 00957 tmpa = a->dp; 00958 00959 /* destination alias */ 00960 tmpc = c->dp; 00961 00962 /* if a is positive */ 00963 if (a->sign == MP_ZPOS) { 00964 /* add digit, after this we're propagating 00965 * the carry. 00966 */ 00967 *tmpc = *tmpa++ + b; 00968 mu = *tmpc >> DIGIT_BIT; 00969 *tmpc++ &= MP_MASK; 00970 00971 /* now handle rest of the digits */ 00972 for (ix = 1; ix < a->used; ix++) { 00973 *tmpc = *tmpa++ + mu; 00974 mu = *tmpc >> DIGIT_BIT; 00975 *tmpc++ &= MP_MASK; 00976 } 00977 /* set final carry */ 00978 ix++; 00979 *tmpc++ = mu; 00980 00981 /* setup size */ 00982 c->used = a->used + 1; 00983 } else { 00984 /* a was negative and |a| < b */ 00985 c->used = 1; 00986 00987 /* the result is a single digit */ 00988 if (a->used == 1) { 00989 *tmpc++ = b - a->dp[0]; 00990 } else { 00991 *tmpc++ = b; 00992 } 00993 00994 /* setup count so the clearing of oldused 00995 * can fall through correctly 00996 */ 00997 ix = 1; 00998 } 00999 01000 /* now zero to oldused */ 01001 while (ix++ < oldused) { 01002 *tmpc++ = 0; 01003 } 01004 mp_clamp(c); 01005 01006 return MP_OKAY; 01007 } 01008 01009 /* trim unused digits 01010 * 01011 * This is used to ensure that leading zero digits are 01012 * trimed and the leading "used" digit will be non-zero 01013 * Typically very fast. Also fixes the sign if there 01014 * are no more leading digits 01015 */ 01016 void 01017 mp_clamp (mp_int * a) 01018 { 01019 /* decrease used while the most significant digit is 01020 * zero. 01021 */ 01022 while (a->used > 0 && a->dp[a->used - 1] == 0) { 01023 --(a->used); 01024 } 01025 01026 /* reset the sign flag if used == 0 */ 01027 if (a->used == 0) { 01028 a->sign = MP_ZPOS; 01029 } 01030 } 01031 01032 void mp_clear_multi(mp_int *mp, ...) 01033 { 01034 mp_int* next_mp = mp; 01035 va_list args; 01036 va_start(args, mp); 01037 while (next_mp != NULL) { 01038 mp_clear(next_mp); 01039 next_mp = va_arg(args, mp_int*); 01040 } 01041 va_end(args); 01042 } 01043 01044 /* compare two ints (signed)*/ 01045 int 01046 mp_cmp (const mp_int * a, const mp_int * b) 01047 { 01048 /* compare based on sign */ 01049 if (a->sign != b->sign) { 01050 if (a->sign == MP_NEG) { 01051 return MP_LT; 01052 } else { 01053 return MP_GT; 01054 } 01055 } 01056 01057 /* compare digits */ 01058 if (a->sign == MP_NEG) { 01059 /* if negative compare opposite direction */ 01060 return mp_cmp_mag(b, a); 01061 } else { 01062 return mp_cmp_mag(a, b); 01063 } 01064 } 01065 01066 /* compare a digit */ 01067 int mp_cmp_d(const mp_int * a, mp_digit b) 01068 { 01069 /* compare based on sign */ 01070 if (a->sign == MP_NEG) { 01071 return MP_LT; 01072 } 01073 01074 /* compare based on magnitude */ 01075 if (a->used > 1) { 01076 return MP_GT; 01077 } 01078 01079 /* compare the only digit of a to b */ 01080 if (a->dp[0] > b) { 01081 return MP_GT; 01082 } else if (a->dp[0] < b) { 01083 return MP_LT; 01084 } else { 01085 return MP_EQ; 01086 } 01087 } 01088 01089 /* compare maginitude of two ints (unsigned) */ 01090 int mp_cmp_mag (const mp_int * a, const mp_int * b) 01091 { 01092 int n; 01093 mp_digit *tmpa, *tmpb; 01094 01095 /* compare based on # of non-zero digits */ 01096 if (a->used > b->used) { 01097 return MP_GT; 01098 } 01099 01100 if (a->used < b->used) { 01101 return MP_LT; 01102 } 01103 01104 /* alias for a */ 01105 tmpa = a->dp + (a->used - 1); 01106 01107 /* alias for b */ 01108 tmpb = b->dp + (a->used - 1); 01109 01110 /* compare based on digits */ 01111 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) { 01112 if (*tmpa > *tmpb) { 01113 return MP_GT; 01114 } 01115 01116 if (*tmpa < *tmpb) { 01117 return MP_LT; 01118 } 01119 } 01120 return MP_EQ; 01121 } 01122 01123 static const int lnz[16] = { 01124 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 01125 }; 01126 01127 /* Counts the number of lsbs which are zero before the first zero bit */ 01128 int mp_cnt_lsb(const mp_int *a) 01129 { 01130 int x; 01131 mp_digit q, qq; 01132 01133 /* easy out */ 01134 if (mp_iszero(a) == 1) { 01135 return 0; 01136 } 01137 01138 /* scan lower digits until non-zero */ 01139 for (x = 0; x < a->used && a->dp[x] == 0; x++); 01140 q = a->dp[x]; 01141 x *= DIGIT_BIT; 01142 01143 /* now scan this digit until a 1 is found */ 01144 if ((q & 1) == 0) { 01145 do { 01146 qq = q & 15; 01147 x += lnz[qq]; 01148 q >>= 4; 01149 } while (qq == 0); 01150 } 01151 return x; 01152 } 01153 01154 /* copy, b = a */ 01155 int 01156 mp_copy (const mp_int * a, mp_int * b) 01157 { 01158 int res, n; 01159 01160 /* if dst == src do nothing */ 01161 if (a == b) { 01162 return MP_OKAY; 01163 } 01164 01165 /* grow dest */ 01166 if (b->alloc < a->used) { 01167 if ((res = mp_grow (b, a->used)) != MP_OKAY) { 01168 return res; 01169 } 01170 } 01171 01172 /* zero b and copy the parameters over */ 01173 { 01174 register mp_digit *tmpa, *tmpb; 01175 01176 /* pointer aliases */ 01177 01178 /* source */ 01179 tmpa = a->dp; 01180 01181 /* destination */ 01182 tmpb = b->dp; 01183 01184 /* copy all the digits */ 01185 for (n = 0; n < a->used; n++) { 01186 *tmpb++ = *tmpa++; 01187 } 01188 01189 /* clear high digits */ 01190 for (; n < b->used; n++) { 01191 *tmpb++ = 0; 01192 } 01193 } 01194 01195 /* copy used count and sign */ 01196 b->used = a->used; 01197 b->sign = a->sign; 01198 return MP_OKAY; 01199 } 01200 01201 /* returns the number of bits in an int */ 01202 int 01203 mp_count_bits (const mp_int * a) 01204 { 01205 int r; 01206 mp_digit q; 01207 01208 /* shortcut */ 01209 if (a->used == 0) { 01210 return 0; 01211 } 01212 01213 /* get number of digits and add that */ 01214 r = (a->used - 1) * DIGIT_BIT; 01215 01216 /* take the last digit and count the bits in it */ 01217 q = a->dp[a->used - 1]; 01218 while (q > 0) { 01219 ++r; 01220 q >>= ((mp_digit) 1); 01221 } 01222 return r; 01223 } 01224 01225 /* calc a value mod 2**b */ 01226 static int 01227 mp_mod_2d (const mp_int * a, int b, mp_int * c) 01228 { 01229 int x, res; 01230 01231 /* if b is <= 0 then zero the int */ 01232 if (b <= 0) { 01233 mp_zero (c); 01234 return MP_OKAY; 01235 } 01236 01237 /* if the modulus is larger than the value than return */ 01238 if (b > a->used * DIGIT_BIT) { 01239 res = mp_copy (a, c); 01240 return res; 01241 } 01242 01243 /* copy */ 01244 if ((res = mp_copy (a, c)) != MP_OKAY) { 01245 return res; 01246 } 01247 01248 /* zero digits above the last digit of the modulus */ 01249 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) { 01250 c->dp[x] = 0; 01251 } 01252 /* clear the digit that is not completely outside/inside the modulus */ 01253 c->dp[b / DIGIT_BIT] &= (1 << ((mp_digit)b % DIGIT_BIT)) - 1; 01254 mp_clamp (c); 01255 return MP_OKAY; 01256 } 01257 01258 /* shift right a certain amount of digits */ 01259 static void mp_rshd (mp_int * a, int b) 01260 { 01261 int x; 01262 01263 /* if b <= 0 then ignore it */ 01264 if (b <= 0) { 01265 return; 01266 } 01267 01268 /* if b > used then simply zero it and return */ 01269 if (a->used <= b) { 01270 mp_zero (a); 01271 return; 01272 } 01273 01274 { 01275 register mp_digit *bottom, *top; 01276 01277 /* shift the digits down */ 01278 01279 /* bottom */ 01280 bottom = a->dp; 01281 01282 /* top [offset into digits] */ 01283 top = a->dp + b; 01284 01285 /* this is implemented as a sliding window where 01286 * the window is b-digits long and digits from 01287 * the top of the window are copied to the bottom 01288 * 01289 * e.g. 01290 01291 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ----> 01292 /\ | ----> 01293 \-------------------/ ----> 01294 */ 01295 for (x = 0; x < (a->used - b); x++) { 01296 *bottom++ = *top++; 01297 } 01298 01299 /* zero the top digits */ 01300 for (; x < a->used; x++) { 01301 *bottom++ = 0; 01302 } 01303 } 01304 01305 /* remove excess digits */ 01306 a->used -= b; 01307 } 01308 01309 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */ 01310 static int mp_div_2d (const mp_int * a, int b, mp_int * c, mp_int * d) 01311 { 01312 mp_digit D, r, rr; 01313 int x, res; 01314 mp_int t; 01315 01316 01317 /* if the shift count is <= 0 then we do no work */ 01318 if (b <= 0) { 01319 res = mp_copy (a, c); 01320 if (d != NULL) { 01321 mp_zero (d); 01322 } 01323 return res; 01324 } 01325 01326 if ((res = mp_init (&t)) != MP_OKAY) { 01327 return res; 01328 } 01329 01330 /* get the remainder */ 01331 if (d != NULL) { 01332 if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) { 01333 mp_clear (&t); 01334 return res; 01335 } 01336 } 01337 01338 /* copy */ 01339 if ((res = mp_copy (a, c)) != MP_OKAY) { 01340 mp_clear (&t); 01341 return res; 01342 } 01343 01344 /* shift by as many digits in the bit count */ 01345 if (b >= DIGIT_BIT) { 01346 mp_rshd (c, b / DIGIT_BIT); 01347 } 01348 01349 /* shift any bit count < DIGIT_BIT */ 01350 D = (mp_digit) (b % DIGIT_BIT); 01351 if (D != 0) { 01352 register mp_digit *tmpc, mask, shift; 01353 01354 /* mask */ 01355 mask = (((mp_digit)1) << D) - 1; 01356 01357 /* shift for lsb */ 01358 shift = DIGIT_BIT - D; 01359 01360 /* alias */ 01361 tmpc = c->dp + (c->used - 1); 01362 01363 /* carry */ 01364 r = 0; 01365 for (x = c->used - 1; x >= 0; x--) { 01366 /* get the lower bits of this word in a temp */ 01367 rr = *tmpc & mask; 01368 01369 /* shift the current word and mix in the carry bits from the previous word */ 01370 *tmpc = (*tmpc >> D) | (r << shift); 01371 --tmpc; 01372 01373 /* set the carry to the carry bits of the current word found above */ 01374 r = rr; 01375 } 01376 } 01377 mp_clamp (c); 01378 if (d != NULL) { 01379 mp_exch (&t, d); 01380 } 01381 mp_clear (&t); 01382 return MP_OKAY; 01383 } 01384 01385 /* shift left a certain amount of digits */ 01386 static int mp_lshd (mp_int * a, int b) 01387 { 01388 int x, res; 01389 01390 /* if its less than zero return */ 01391 if (b <= 0) { 01392 return MP_OKAY; 01393 } 01394 01395 /* grow to fit the new digits */ 01396 if (a->alloc < a->used + b) { 01397 if ((res = mp_grow (a, a->used + b)) != MP_OKAY) { 01398 return res; 01399 } 01400 } 01401 01402 { 01403 register mp_digit *top, *bottom; 01404 01405 /* increment the used by the shift amount then copy upwards */ 01406 a->used += b; 01407 01408 /* top */ 01409 top = a->dp + a->used - 1; 01410 01411 /* base */ 01412 bottom = a->dp + a->used - 1 - b; 01413 01414 /* much like mp_rshd this is implemented using a sliding window 01415 * except the window goes the other way around. Copying from 01416 * the bottom to the top. see bn_mp_rshd.c for more info. 01417 */ 01418 for (x = a->used - 1; x >= b; x--) { 01419 *top-- = *bottom--; 01420 } 01421 01422 /* zero the lower digits */ 01423 top = a->dp; 01424 for (x = 0; x < b; x++) { 01425 *top++ = 0; 01426 } 01427 } 01428 return MP_OKAY; 01429 } 01430 01431 /* shift left by a certain bit count */ 01432 static int mp_mul_2d (const mp_int * a, int b, mp_int * c) 01433 { 01434 mp_digit d; 01435 int res; 01436 01437 /* copy */ 01438 if (a != c) { 01439 if ((res = mp_copy (a, c)) != MP_OKAY) { 01440 return res; 01441 } 01442 } 01443 01444 if (c->alloc < c->used + b/DIGIT_BIT + 1) { 01445 if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) { 01446 return res; 01447 } 01448 } 01449 01450 /* shift by as many digits in the bit count */ 01451 if (b >= DIGIT_BIT) { 01452 if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) { 01453 return res; 01454 } 01455 } 01456 01457 /* shift any bit count < DIGIT_BIT */ 01458 d = (mp_digit) (b % DIGIT_BIT); 01459 if (d != 0) { 01460 register mp_digit *tmpc, shift, mask, r, rr; 01461 register int x; 01462 01463 /* bitmask for carries */ 01464 mask = (((mp_digit)1) << d) - 1; 01465 01466 /* shift for msbs */ 01467 shift = DIGIT_BIT - d; 01468 01469 /* alias */ 01470 tmpc = c->dp; 01471 01472 /* carry */ 01473 r = 0; 01474 for (x = 0; x < c->used; x++) { 01475 /* get the higher bits of the current word */ 01476 rr = (*tmpc >> shift) & mask; 01477 01478 /* shift the current word and OR in the carry */ 01479 *tmpc = ((*tmpc << d) | r) & MP_MASK; 01480 ++tmpc; 01481 01482 /* set the carry to the carry bits of the current word */ 01483 r = rr; 01484 } 01485 01486 /* set final carry */ 01487 if (r != 0) { 01488 c->dp[(c->used)++] = r; 01489 } 01490 } 01491 mp_clamp (c); 01492 return MP_OKAY; 01493 } 01494 01495 /* multiply by a digit */ 01496 static int 01497 mp_mul_d (const mp_int * a, mp_digit b, mp_int * c) 01498 { 01499 mp_digit u, *tmpa, *tmpc; 01500 mp_word r; 01501 int ix, res, olduse; 01502 01503 /* make sure c is big enough to hold a*b */ 01504 if (c->alloc < a->used + 1) { 01505 if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) { 01506 return res; 01507 } 01508 } 01509 01510 /* get the original destinations used count */ 01511 olduse = c->used; 01512 01513 /* set the sign */ 01514 c->sign = a->sign; 01515 01516 /* alias for a->dp [source] */ 01517 tmpa = a->dp; 01518 01519 /* alias for c->dp [dest] */ 01520 tmpc = c->dp; 01521 01522 /* zero carry */ 01523 u = 0; 01524 01525 /* compute columns */ 01526 for (ix = 0; ix < a->used; ix++) { 01527 /* compute product and carry sum for this term */ 01528 r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b); 01529 01530 /* mask off higher bits to get a single digit */ 01531 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK)); 01532 01533 /* send carry into next iteration */ 01534 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 01535 } 01536 01537 /* store final carry [if any] */ 01538 *tmpc++ = u; 01539 01540 /* now zero digits above the top */ 01541 while (ix++ < olduse) { 01542 *tmpc++ = 0; 01543 } 01544 01545 /* set used count */ 01546 c->used = a->used + 1; 01547 mp_clamp(c); 01548 01549 return MP_OKAY; 01550 } 01551 01552 /* integer signed division. 01553 * c*b + d == a [e.g. a/b, c=quotient, d=remainder] 01554 * HAC pp.598 Algorithm 14.20 01555 * 01556 * Note that the description in HAC is horribly 01557 * incomplete. For example, it doesn't consider 01558 * the case where digits are removed from 'x' in 01559 * the inner loop. It also doesn't consider the 01560 * case that y has fewer than three digits, etc.. 01561 * 01562 * The overall algorithm is as described as 01563 * 14.20 from HAC but fixed to treat these cases. 01564 */ 01565 static int mp_div (const mp_int * a, const mp_int * b, mp_int * c, mp_int * d) 01566 { 01567 mp_int q, x, y, t1, t2; 01568 int res, n, t, i, norm, neg; 01569 01570 /* is divisor zero ? */ 01571 if (mp_iszero (b) == 1) { 01572 return MP_VAL; 01573 } 01574 01575 /* if a < b then q=0, r = a */ 01576 if (mp_cmp_mag (a, b) == MP_LT) { 01577 if (d != NULL) { 01578 res = mp_copy (a, d); 01579 } else { 01580 res = MP_OKAY; 01581 } 01582 if (c != NULL) { 01583 mp_zero (c); 01584 } 01585 return res; 01586 } 01587 01588 if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) { 01589 return res; 01590 } 01591 q.used = a->used + 2; 01592 01593 if ((res = mp_init (&t1)) != MP_OKAY) { 01594 goto __Q; 01595 } 01596 01597 if ((res = mp_init (&t2)) != MP_OKAY) { 01598 goto __T1; 01599 } 01600 01601 if ((res = mp_init_copy (&x, a)) != MP_OKAY) { 01602 goto __T2; 01603 } 01604 01605 if ((res = mp_init_copy (&y, b)) != MP_OKAY) { 01606 goto __X; 01607 } 01608 01609 /* fix the sign */ 01610 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 01611 x.sign = y.sign = MP_ZPOS; 01612 01613 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ 01614 norm = mp_count_bits(&y) % DIGIT_BIT; 01615 if (norm < DIGIT_BIT-1) { 01616 norm = (DIGIT_BIT-1) - norm; 01617 if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) { 01618 goto __Y; 01619 } 01620 if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) { 01621 goto __Y; 01622 } 01623 } else { 01624 norm = 0; 01625 } 01626 01627 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ 01628 n = x.used - 1; 01629 t = y.used - 1; 01630 01631 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ 01632 if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */ 01633 goto __Y; 01634 } 01635 01636 while (mp_cmp (&x, &y) != MP_LT) { 01637 ++(q.dp[n - t]); 01638 if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) { 01639 goto __Y; 01640 } 01641 } 01642 01643 /* reset y by shifting it back down */ 01644 mp_rshd (&y, n - t); 01645 01646 /* step 3. for i from n down to (t + 1) */ 01647 for (i = n; i >= (t + 1); i--) { 01648 if (i > x.used) { 01649 continue; 01650 } 01651 01652 /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 01653 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ 01654 if (x.dp[i] == y.dp[t]) { 01655 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1); 01656 } else { 01657 mp_word tmp; 01658 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT); 01659 tmp |= ((mp_word) x.dp[i - 1]); 01660 tmp /= ((mp_word) y.dp[t]); 01661 if (tmp > (mp_word) MP_MASK) 01662 tmp = MP_MASK; 01663 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK)); 01664 } 01665 01666 /* while (q{i-t-1} * (yt * b + y{t-1})) > 01667 xi * b**2 + xi-1 * b + xi-2 01668 01669 do q{i-t-1} -= 1; 01670 */ 01671 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK; 01672 do { 01673 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK; 01674 01675 /* find left hand */ 01676 mp_zero (&t1); 01677 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; 01678 t1.dp[1] = y.dp[t]; 01679 t1.used = 2; 01680 if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) { 01681 goto __Y; 01682 } 01683 01684 /* find right hand */ 01685 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; 01686 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; 01687 t2.dp[2] = x.dp[i]; 01688 t2.used = 3; 01689 } while (mp_cmp_mag(&t1, &t2) == MP_GT); 01690 01691 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ 01692 if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) { 01693 goto __Y; 01694 } 01695 01696 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) { 01697 goto __Y; 01698 } 01699 01700 if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) { 01701 goto __Y; 01702 } 01703 01704 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ 01705 if (x.sign == MP_NEG) { 01706 if ((res = mp_copy (&y, &t1)) != MP_OKAY) { 01707 goto __Y; 01708 } 01709 if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) { 01710 goto __Y; 01711 } 01712 if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) { 01713 goto __Y; 01714 } 01715 01716 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK; 01717 } 01718 } 01719 01720 /* now q is the quotient and x is the remainder 01721 * [which we have to normalize] 01722 */ 01723 01724 /* get sign before writing to c */ 01725 x.sign = x.used == 0 ? MP_ZPOS : a->sign; 01726 01727 if (c != NULL) { 01728 mp_clamp (&q); 01729 mp_exch (&q, c); 01730 c->sign = neg; 01731 } 01732 01733 if (d != NULL) { 01734 mp_div_2d (&x, norm, &x, NULL); 01735 mp_exch (&x, d); 01736 } 01737 01738 res = MP_OKAY; 01739 01740 __Y:mp_clear (&y); 01741 __X:mp_clear (&x); 01742 __T2:mp_clear (&t2); 01743 __T1:mp_clear (&t1); 01744 __Q:mp_clear (&q); 01745 return res; 01746 } 01747 01748 static int s_is_power_of_two(mp_digit b, int *p) 01749 { 01750 int x; 01751 01752 for (x = 1; x < DIGIT_BIT; x++) { 01753 if (b == (((mp_digit)1)<<x)) { 01754 *p = x; 01755 return 1; 01756 } 01757 } 01758 return 0; 01759 } 01760 01761 /* single digit division (based on routine from MPI) */ 01762 static int mp_div_d (const mp_int * a, mp_digit b, mp_int * c, mp_digit * d) 01763 { 01764 mp_int q; 01765 mp_word w; 01766 mp_digit t; 01767 int res, ix; 01768 01769 /* cannot divide by zero */ 01770 if (b == 0) { 01771 return MP_VAL; 01772 } 01773 01774 /* quick outs */ 01775 if (b == 1 || mp_iszero(a) == 1) { 01776 if (d != NULL) { 01777 *d = 0; 01778 } 01779 if (c != NULL) { 01780 return mp_copy(a, c); 01781 } 01782 return MP_OKAY; 01783 } 01784 01785 /* power of two ? */ 01786 if (s_is_power_of_two(b, &ix) == 1) { 01787 if (d != NULL) { 01788 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1); 01789 } 01790 if (c != NULL) { 01791 return mp_div_2d(a, ix, c, NULL); 01792 } 01793 return MP_OKAY; 01794 } 01795 01796 /* no easy answer [c'est la vie]. Just division */ 01797 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { 01798 return res; 01799 } 01800 01801 q.used = a->used; 01802 q.sign = a->sign; 01803 w = 0; 01804 for (ix = a->used - 1; ix >= 0; ix--) { 01805 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); 01806 01807 if (w >= b) { 01808 t = (mp_digit)(w / b); 01809 w -= ((mp_word)t) * ((mp_word)b); 01810 } else { 01811 t = 0; 01812 } 01813 q.dp[ix] = t; 01814 } 01815 01816 if (d != NULL) { 01817 *d = (mp_digit)w; 01818 } 01819 01820 if (c != NULL) { 01821 mp_clamp(&q); 01822 mp_exch(&q, c); 01823 } 01824 mp_clear(&q); 01825 01826 return res; 01827 } 01828 01829 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm. 01830 * 01831 * Based on algorithm from the paper 01832 * 01833 * "Generating Efficient Primes for Discrete Log Cryptosystems" 01834 * Chae Hoon Lim, Pil Loong Lee, 01835 * POSTECH Information Research Laboratories 01836 * 01837 * The modulus must be of a special format [see manual] 01838 * 01839 * Has been modified to use algorithm 7.10 from the LTM book instead 01840 * 01841 * Input x must be in the range 0 <= x <= (n-1)**2 01842 */ 01843 static int 01844 mp_dr_reduce (mp_int * x, const mp_int * n, mp_digit k) 01845 { 01846 int err, i, m; 01847 mp_word r; 01848 mp_digit mu, *tmpx1, *tmpx2; 01849 01850 /* m = digits in modulus */ 01851 m = n->used; 01852 01853 /* ensure that "x" has at least 2m digits */ 01854 if (x->alloc < m + m) { 01855 if ((err = mp_grow (x, m + m)) != MP_OKAY) { 01856 return err; 01857 } 01858 } 01859 01860 /* top of loop, this is where the code resumes if 01861 * another reduction pass is required. 01862 */ 01863 top: 01864 /* aliases for digits */ 01865 /* alias for lower half of x */ 01866 tmpx1 = x->dp; 01867 01868 /* alias for upper half of x, or x/B**m */ 01869 tmpx2 = x->dp + m; 01870 01871 /* set carry to zero */ 01872 mu = 0; 01873 01874 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ 01875 for (i = 0; i < m; i++) { 01876 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu; 01877 *tmpx1++ = (mp_digit)(r & MP_MASK); 01878 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT)); 01879 } 01880 01881 /* set final carry */ 01882 *tmpx1++ = mu; 01883 01884 /* zero words above m */ 01885 for (i = m + 1; i < x->used; i++) { 01886 *tmpx1++ = 0; 01887 } 01888 01889 /* clamp, sub and return */ 01890 mp_clamp (x); 01891 01892 /* if x >= n then subtract and reduce again 01893 * Each successive "recursion" makes the input smaller and smaller. 01894 */ 01895 if (mp_cmp_mag (x, n) != MP_LT) { 01896 s_mp_sub(x, n, x); 01897 goto top; 01898 } 01899 return MP_OKAY; 01900 } 01901 01902 /* sets the value of "d" required for mp_dr_reduce */ 01903 static void mp_dr_setup(const mp_int *a, mp_digit *d) 01904 { 01905 /* the casts are required if DIGIT_BIT is one less than 01906 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31] 01907 */ 01908 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - 01909 ((mp_word)a->dp[0])); 01910 } 01911 01912 /* this is a shell function that calls either the normal or Montgomery 01913 * exptmod functions. Originally the call to the montgomery code was 01914 * embedded in the normal function but that wasted a lot of stack space 01915 * for nothing (since 99% of the time the Montgomery code would be called) 01916 */ 01917 int mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y) 01918 { 01919 int dr; 01920 01921 /* modulus P must be positive */ 01922 if (P->sign == MP_NEG) { 01923 return MP_VAL; 01924 } 01925 01926 /* if exponent X is negative we have to recurse */ 01927 if (X->sign == MP_NEG) { 01928 mp_int tmpG, tmpX; 01929 int err; 01930 01931 /* first compute 1/G mod P */ 01932 if ((err = mp_init(&tmpG)) != MP_OKAY) { 01933 return err; 01934 } 01935 if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) { 01936 mp_clear(&tmpG); 01937 return err; 01938 } 01939 01940 /* now get |X| */ 01941 if ((err = mp_init(&tmpX)) != MP_OKAY) { 01942 mp_clear(&tmpG); 01943 return err; 01944 } 01945 if ((err = mp_abs(X, &tmpX)) != MP_OKAY) { 01946 mp_clear_multi(&tmpG, &tmpX, NULL); 01947 return err; 01948 } 01949 01950 /* and now compute (1/G)**|X| instead of G**X [X < 0] */ 01951 err = mp_exptmod(&tmpG, &tmpX, P, Y); 01952 mp_clear_multi(&tmpG, &tmpX, NULL); 01953 return err; 01954 } 01955 01956 dr = 0; 01957 01958 /* if the modulus is odd or dr != 0 use the fast method */ 01959 if (mp_isodd (P) == 1 || dr != 0) { 01960 return mp_exptmod_fast (G, X, P, Y, dr); 01961 } else { 01962 /* otherwise use the generic Barrett reduction technique */ 01963 return s_mp_exptmod (G, X, P, Y); 01964 } 01965 } 01966 01967 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 01968 * 01969 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. 01970 * The value of k changes based on the size of the exponent. 01971 * 01972 * Uses Montgomery or Diminished Radix reduction [whichever appropriate] 01973 */ 01974 01975 int 01976 mp_exptmod_fast (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y, int redmode) 01977 { 01978 mp_int M[256], res; 01979 mp_digit buf, mp; 01980 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 01981 01982 /* use a pointer to the reduction algorithm. This allows us to use 01983 * one of many reduction algorithms without modding the guts of 01984 * the code with if statements everywhere. 01985 */ 01986 int (*redux)(mp_int*,const mp_int*,mp_digit); 01987 01988 /* find window size */ 01989 x = mp_count_bits (X); 01990 if (x <= 7) { 01991 winsize = 2; 01992 } else if (x <= 36) { 01993 winsize = 3; 01994 } else if (x <= 140) { 01995 winsize = 4; 01996 } else if (x <= 450) { 01997 winsize = 5; 01998 } else if (x <= 1303) { 01999 winsize = 6; 02000 } else if (x <= 3529) { 02001 winsize = 7; 02002 } else { 02003 winsize = 8; 02004 } 02005 02006 /* init M array */ 02007 /* init first cell */ 02008 if ((err = mp_init(&M[1])) != MP_OKAY) { 02009 return err; 02010 } 02011 02012 /* now init the second half of the array */ 02013 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 02014 if ((err = mp_init(&M[x])) != MP_OKAY) { 02015 for (y = 1<<(winsize-1); y < x; y++) { 02016 mp_clear (&M[y]); 02017 } 02018 mp_clear(&M[1]); 02019 return err; 02020 } 02021 } 02022 02023 /* determine and setup reduction code */ 02024 if (redmode == 0) { 02025 /* now setup montgomery */ 02026 if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) { 02027 goto __M; 02028 } 02029 02030 /* automatically pick the comba one if available (saves quite a few calls/ifs) */ 02031 if (((P->used * 2 + 1) < MP_WARRAY) && 02032 P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 02033 redux = fast_mp_montgomery_reduce; 02034 } else { 02035 /* use slower baseline Montgomery method */ 02036 redux = mp_montgomery_reduce; 02037 } 02038 } else if (redmode == 1) { 02039 /* setup DR reduction for moduli of the form B**k - b */ 02040 mp_dr_setup(P, &mp); 02041 redux = mp_dr_reduce; 02042 } else { 02043 /* setup DR reduction for moduli of the form 2**k - b */ 02044 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) { 02045 goto __M; 02046 } 02047 redux = mp_reduce_2k; 02048 } 02049 02050 /* setup result */ 02051 if ((err = mp_init (&res)) != MP_OKAY) { 02052 goto __M; 02053 } 02054 02055 /* create M table 02056 * 02057 02058 * 02059 * The first half of the table is not computed though accept for M[0] and M[1] 02060 */ 02061 02062 if (redmode == 0) { 02063 /* now we need R mod m */ 02064 if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) { 02065 goto __RES; 02066 } 02067 02068 /* now set M[1] to G * R mod m */ 02069 if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) { 02070 goto __RES; 02071 } 02072 } else { 02073 mp_set(&res, 1); 02074 if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) { 02075 goto __RES; 02076 } 02077 } 02078 02079 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ 02080 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 02081 goto __RES; 02082 } 02083 02084 for (x = 0; x < (winsize - 1); x++) { 02085 if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) { 02086 goto __RES; 02087 } 02088 if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) { 02089 goto __RES; 02090 } 02091 } 02092 02093 /* create upper table */ 02094 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 02095 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 02096 goto __RES; 02097 } 02098 if ((err = redux (&M[x], P, mp)) != MP_OKAY) { 02099 goto __RES; 02100 } 02101 } 02102 02103 /* set initial mode and bit cnt */ 02104 mode = 0; 02105 bitcnt = 1; 02106 buf = 0; 02107 digidx = X->used - 1; 02108 bitcpy = 0; 02109 bitbuf = 0; 02110 02111 for (;;) { 02112 /* grab next digit as required */ 02113 if (--bitcnt == 0) { 02114 /* if digidx == -1 we are out of digits so break */ 02115 if (digidx == -1) { 02116 break; 02117 } 02118 /* read next digit and reset bitcnt */ 02119 buf = X->dp[digidx--]; 02120 bitcnt = DIGIT_BIT; 02121 } 02122 02123 /* grab the next msb from the exponent */ 02124 y = (buf >> (DIGIT_BIT - 1)) & 1; 02125 buf <<= (mp_digit)1; 02126 02127 /* if the bit is zero and mode == 0 then we ignore it 02128 * These represent the leading zero bits before the first 1 bit 02129 * in the exponent. Technically this opt is not required but it 02130 * does lower the # of trivial squaring/reductions used 02131 */ 02132 if (mode == 0 && y == 0) { 02133 continue; 02134 } 02135 02136 /* if the bit is zero and mode == 1 then we square */ 02137 if (mode == 1 && y == 0) { 02138 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 02139 goto __RES; 02140 } 02141 if ((err = redux (&res, P, mp)) != MP_OKAY) { 02142 goto __RES; 02143 } 02144 continue; 02145 } 02146 02147 /* else we add it to the window */ 02148 bitbuf |= (y << (winsize - ++bitcpy)); 02149 mode = 2; 02150 02151 if (bitcpy == winsize) { 02152 /* ok window is filled so square as required and multiply */ 02153 /* square first */ 02154 for (x = 0; x < winsize; x++) { 02155 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 02156 goto __RES; 02157 } 02158 if ((err = redux (&res, P, mp)) != MP_OKAY) { 02159 goto __RES; 02160 } 02161 } 02162 02163 /* then multiply */ 02164 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { 02165 goto __RES; 02166 } 02167 if ((err = redux (&res, P, mp)) != MP_OKAY) { 02168 goto __RES; 02169 } 02170 02171 /* empty window and reset */ 02172 bitcpy = 0; 02173 bitbuf = 0; 02174 mode = 1; 02175 } 02176 } 02177 02178 /* if bits remain then square/multiply */ 02179 if (mode == 2 && bitcpy > 0) { 02180 /* square then multiply if the bit is set */ 02181 for (x = 0; x < bitcpy; x++) { 02182 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 02183 goto __RES; 02184 } 02185 if ((err = redux (&res, P, mp)) != MP_OKAY) { 02186 goto __RES; 02187 } 02188 02189 /* get next bit of the window */ 02190 bitbuf <<= 1; 02191 if ((bitbuf & (1 << winsize)) != 0) { 02192 /* then multiply */ 02193 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { 02194 goto __RES; 02195 } 02196 if ((err = redux (&res, P, mp)) != MP_OKAY) { 02197 goto __RES; 02198 } 02199 } 02200 } 02201 } 02202 02203 if (redmode == 0) { 02204 /* fixup result if Montgomery reduction is used 02205 * recall that any value in a Montgomery system is 02206 * actually multiplied by R mod n. So we have 02207 * to reduce one more time to cancel out the factor 02208 * of R. 02209 */ 02210 if ((err = redux(&res, P, mp)) != MP_OKAY) { 02211 goto __RES; 02212 } 02213 } 02214 02215 /* swap res with Y */ 02216 mp_exch (&res, Y); 02217 err = MP_OKAY; 02218 __RES:mp_clear (&res); 02219 __M: 02220 mp_clear(&M[1]); 02221 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 02222 mp_clear (&M[x]); 02223 } 02224 return err; 02225 } 02226 02227 /* Greatest Common Divisor using the binary method */ 02228 int mp_gcd (const mp_int * a, const mp_int * b, mp_int * c) 02229 { 02230 mp_int u, v; 02231 int k, u_lsb, v_lsb, res; 02232 02233 /* either zero than gcd is the largest */ 02234 if (mp_iszero (a) == 1 && mp_iszero (b) == 0) { 02235 return mp_abs (b, c); 02236 } 02237 if (mp_iszero (a) == 0 && mp_iszero (b) == 1) { 02238 return mp_abs (a, c); 02239 } 02240 02241 /* optimized. At this point if a == 0 then 02242 * b must equal zero too 02243 */ 02244 if (mp_iszero (a) == 1) { 02245 mp_zero(c); 02246 return MP_OKAY; 02247 } 02248 02249 /* get copies of a and b we can modify */ 02250 if ((res = mp_init_copy (&u, a)) != MP_OKAY) { 02251 return res; 02252 } 02253 02254 if ((res = mp_init_copy (&v, b)) != MP_OKAY) { 02255 goto __U; 02256 } 02257 02258 /* must be positive for the remainder of the algorithm */ 02259 u.sign = v.sign = MP_ZPOS; 02260 02261 /* B1. Find the common power of two for u and v */ 02262 u_lsb = mp_cnt_lsb(&u); 02263 v_lsb = mp_cnt_lsb(&v); 02264 k = MIN(u_lsb, v_lsb); 02265 02266 if (k > 0) { 02267 /* divide the power of two out */ 02268 if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) { 02269 goto __V; 02270 } 02271 02272 if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) { 02273 goto __V; 02274 } 02275 } 02276 02277 /* divide any remaining factors of two out */ 02278 if (u_lsb != k) { 02279 if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) { 02280 goto __V; 02281 } 02282 } 02283 02284 if (v_lsb != k) { 02285 if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) { 02286 goto __V; 02287 } 02288 } 02289 02290 while (mp_iszero(&v) == 0) { 02291 /* make sure v is the largest */ 02292 if (mp_cmp_mag(&u, &v) == MP_GT) { 02293 /* swap u and v to make sure v is >= u */ 02294 mp_exch(&u, &v); 02295 } 02296 02297 /* subtract smallest from largest */ 02298 if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) { 02299 goto __V; 02300 } 02301 02302 /* Divide out all factors of two */ 02303 if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) { 02304 goto __V; 02305 } 02306 } 02307 02308 /* multiply by 2**k which we divided out at the beginning */ 02309 if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) { 02310 goto __V; 02311 } 02312 c->sign = MP_ZPOS; 02313 res = MP_OKAY; 02314 __V:mp_clear (&u); 02315 __U:mp_clear (&v); 02316 return res; 02317 } 02318 02319 /* get the lower 32-bits of an mp_int */ 02320 unsigned long mp_get_int(const mp_int * a) 02321 { 02322 int i; 02323 unsigned long res; 02324 02325 if (a->used == 0) { 02326 return 0; 02327 } 02328 02329 /* get number of digits of the lsb we have to read */ 02330 i = MIN(a->used,(int)((sizeof(unsigned long)*CHAR_BIT+DIGIT_BIT-1)/DIGIT_BIT))-1; 02331 02332 /* get most significant digit of result */ 02333 res = DIGIT(a,i); 02334 02335 while (--i >= 0) { 02336 res = (res << DIGIT_BIT) | DIGIT(a,i); 02337 } 02338 02339 /* force result to 32-bits always so it is consistent on non 32-bit platforms */ 02340 return res & 0xFFFFFFFFUL; 02341 } 02342 02343 /* creates "a" then copies b into it */ 02344 int mp_init_copy (mp_int * a, const mp_int * b) 02345 { 02346 int res; 02347 02348 if ((res = mp_init (a)) != MP_OKAY) { 02349 return res; 02350 } 02351 return mp_copy (b, a); 02352 } 02353 02354 int mp_init_multi(mp_int *mp, ...) 02355 { 02356 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */ 02357 int n = 0; /* Number of ok inits */ 02358 mp_int* cur_arg = mp; 02359 va_list args; 02360 02361 va_start(args, mp); /* init args to next argument from caller */ 02362 while (cur_arg != NULL) { 02363 if (mp_init(cur_arg) != MP_OKAY) { 02364 /* Oops - error! Back-track and mp_clear what we already 02365 succeeded in init-ing, then return error. 02366 */ 02367 va_list clean_args; 02368 02369 /* end the current list */ 02370 va_end(args); 02371 02372 /* now start cleaning up */ 02373 cur_arg = mp; 02374 va_start(clean_args, mp); 02375 while (n--) { 02376 mp_clear(cur_arg); 02377 cur_arg = va_arg(clean_args, mp_int*); 02378 } 02379 va_end(clean_args); 02380 res = MP_MEM; 02381 break; 02382 } 02383 n++; 02384 cur_arg = va_arg(args, mp_int*); 02385 } 02386 va_end(args); 02387 return res; /* Assumed ok, if error flagged above. */ 02388 } 02389 02390 /* hac 14.61, pp608 */ 02391 int mp_invmod (const mp_int * a, mp_int * b, mp_int * c) 02392 { 02393 /* b cannot be negative */ 02394 if (b->sign == MP_NEG || mp_iszero(b) == 1) { 02395 return MP_VAL; 02396 } 02397 02398 /* if the modulus is odd we can use a faster routine instead */ 02399 if (mp_isodd (b) == 1) { 02400 return fast_mp_invmod (a, b, c); 02401 } 02402 02403 return mp_invmod_slow(a, b, c); 02404 } 02405 02406 /* hac 14.61, pp608 */ 02407 int mp_invmod_slow (const mp_int * a, mp_int * b, mp_int * c) 02408 { 02409 mp_int x, y, u, v, A, B, C, D; 02410 int res; 02411 02412 /* b cannot be negative */ 02413 if (b->sign == MP_NEG || mp_iszero(b) == 1) { 02414 return MP_VAL; 02415 } 02416 02417 /* init temps */ 02418 if ((res = mp_init_multi(&x, &y, &u, &v, 02419 &A, &B, &C, &D, NULL)) != MP_OKAY) { 02420 return res; 02421 } 02422 02423 /* x = a, y = b */ 02424 if ((res = mp_copy (a, &x)) != MP_OKAY) { 02425 goto __ERR; 02426 } 02427 if ((res = mp_copy (b, &y)) != MP_OKAY) { 02428 goto __ERR; 02429 } 02430 02431 /* 2. [modified] if x,y are both even then return an error! */ 02432 if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) { 02433 res = MP_VAL; 02434 goto __ERR; 02435 } 02436 02437 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 02438 if ((res = mp_copy (&x, &u)) != MP_OKAY) { 02439 goto __ERR; 02440 } 02441 if ((res = mp_copy (&y, &v)) != MP_OKAY) { 02442 goto __ERR; 02443 } 02444 mp_set (&A, 1); 02445 mp_set (&D, 1); 02446 02447 top: 02448 /* 4. while u is even do */ 02449 while (mp_iseven (&u) == 1) { 02450 /* 4.1 u = u/2 */ 02451 if ((res = mp_div_2 (&u, &u)) != MP_OKAY) { 02452 goto __ERR; 02453 } 02454 /* 4.2 if A or B is odd then */ 02455 if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) { 02456 /* A = (A+y)/2, B = (B-x)/2 */ 02457 if ((res = mp_add (&A, &y, &A)) != MP_OKAY) { 02458 goto __ERR; 02459 } 02460 if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) { 02461 goto __ERR; 02462 } 02463 } 02464 /* A = A/2, B = B/2 */ 02465 if ((res = mp_div_2 (&A, &A)) != MP_OKAY) { 02466 goto __ERR; 02467 } 02468 if ((res = mp_div_2 (&B, &B)) != MP_OKAY) { 02469 goto __ERR; 02470 } 02471 } 02472 02473 /* 5. while v is even do */ 02474 while (mp_iseven (&v) == 1) { 02475 /* 5.1 v = v/2 */ 02476 if ((res = mp_div_2 (&v, &v)) != MP_OKAY) { 02477 goto __ERR; 02478 } 02479 /* 5.2 if C or D is odd then */ 02480 if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) { 02481 /* C = (C+y)/2, D = (D-x)/2 */ 02482 if ((res = mp_add (&C, &y, &C)) != MP_OKAY) { 02483 goto __ERR; 02484 } 02485 if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) { 02486 goto __ERR; 02487 } 02488 } 02489 /* C = C/2, D = D/2 */ 02490 if ((res = mp_div_2 (&C, &C)) != MP_OKAY) { 02491 goto __ERR; 02492 } 02493 if ((res = mp_div_2 (&D, &D)) != MP_OKAY) { 02494 goto __ERR; 02495 } 02496 } 02497 02498 /* 6. if u >= v then */ 02499 if (mp_cmp (&u, &v) != MP_LT) { 02500 /* u = u - v, A = A - C, B = B - D */ 02501 if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) { 02502 goto __ERR; 02503 } 02504 02505 if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) { 02506 goto __ERR; 02507 } 02508 02509 if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) { 02510 goto __ERR; 02511 } 02512 } else { 02513 /* v - v - u, C = C - A, D = D - B */ 02514 if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) { 02515 goto __ERR; 02516 } 02517 02518 if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) { 02519 goto __ERR; 02520 } 02521 02522 if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) { 02523 goto __ERR; 02524 } 02525 } 02526 02527 /* if not zero goto step 4 */ 02528 if (mp_iszero (&u) == 0) 02529 goto top; 02530 02531 /* now a = C, b = D, gcd == g*v */ 02532 02533 /* if v != 1 then there is no inverse */ 02534 if (mp_cmp_d (&v, 1) != MP_EQ) { 02535 res = MP_VAL; 02536 goto __ERR; 02537 } 02538 02539 /* if its too low */ 02540 while (mp_cmp_d(&C, 0) == MP_LT) { 02541 if ((res = mp_add(&C, b, &C)) != MP_OKAY) { 02542 goto __ERR; 02543 } 02544 } 02545 02546 /* too big */ 02547 while (mp_cmp_mag(&C, b) != MP_LT) { 02548 if ((res = mp_sub(&C, b, &C)) != MP_OKAY) { 02549 goto __ERR; 02550 } 02551 } 02552 02553 /* C is now the inverse */ 02554 mp_exch (&C, c); 02555 res = MP_OKAY; 02556 __ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL); 02557 return res; 02558 } 02559 02560 /* c = |a| * |b| using Karatsuba Multiplication using 02561 * three half size multiplications 02562 * 02563 * Let B represent the radix [e.g. 2**DIGIT_BIT] and 02564 * let n represent half of the number of digits in 02565 * the min(a,b) 02566 * 02567 * a = a1 * B**n + a0 02568 * b = b1 * B**n + b0 02569 * 02570 * Then, a * b => 02571 a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0 02572 * 02573 * Note that a1b1 and a0b0 are used twice and only need to be 02574 * computed once. So in total three half size (half # of 02575 * digit) multiplications are performed, a0b0, a1b1 and 02576 * (a1-b1)(a0-b0) 02577 * 02578 * Note that a multiplication of half the digits requires 02579 * 1/4th the number of single precision multiplications so in 02580 * total after one call 25% of the single precision multiplications 02581 * are saved. Note also that the call to mp_mul can end up back 02582 * in this function if the a0, a1, b0, or b1 are above the threshold. 02583 * This is known as divide-and-conquer and leads to the famous 02584 * O(N**lg(3)) or O(N**1.584) work which is asymptotically lower than 02585 * the standard O(N**2) that the baseline/comba methods use. 02586 * Generally though the overhead of this method doesn't pay off 02587 * until a certain size (N ~ 80) is reached. 02588 */ 02589 int mp_karatsuba_mul (const mp_int * a, const mp_int * b, mp_int * c) 02590 { 02591 mp_int x0, x1, y0, y1, t1, x0y0, x1y1; 02592 int B, err; 02593 02594 /* default the return code to an error */ 02595 err = MP_MEM; 02596 02597 /* min # of digits */ 02598 B = MIN (a->used, b->used); 02599 02600 /* now divide in two */ 02601 B = B >> 1; 02602 02603 /* init copy all the temps */ 02604 if (mp_init_size (&x0, B) != MP_OKAY) 02605 goto ERR; 02606 if (mp_init_size (&x1, a->used - B) != MP_OKAY) 02607 goto X0; 02608 if (mp_init_size (&y0, B) != MP_OKAY) 02609 goto X1; 02610 if (mp_init_size (&y1, b->used - B) != MP_OKAY) 02611 goto Y0; 02612 02613 /* init temps */ 02614 if (mp_init_size (&t1, B * 2) != MP_OKAY) 02615 goto Y1; 02616 if (mp_init_size (&x0y0, B * 2) != MP_OKAY) 02617 goto T1; 02618 if (mp_init_size (&x1y1, B * 2) != MP_OKAY) 02619 goto X0Y0; 02620 02621 /* now shift the digits */ 02622 x0.used = y0.used = B; 02623 x1.used = a->used - B; 02624 y1.used = b->used - B; 02625 02626 { 02627 register int x; 02628 register mp_digit *tmpa, *tmpb, *tmpx, *tmpy; 02629 02630 /* we copy the digits directly instead of using higher level functions 02631 * since we also need to shift the digits 02632 */ 02633 tmpa = a->dp; 02634 tmpb = b->dp; 02635 02636 tmpx = x0.dp; 02637 tmpy = y0.dp; 02638 for (x = 0; x < B; x++) { 02639 *tmpx++ = *tmpa++; 02640 *tmpy++ = *tmpb++; 02641 } 02642 02643 tmpx = x1.dp; 02644 for (x = B; x < a->used; x++) { 02645 *tmpx++ = *tmpa++; 02646 } 02647 02648 tmpy = y1.dp; 02649 for (x = B; x < b->used; x++) { 02650 *tmpy++ = *tmpb++; 02651 } 02652 } 02653 02654 /* only need to clamp the lower words since by definition the 02655 * upper words x1/y1 must have a known number of digits 02656 */ 02657 mp_clamp (&x0); 02658 mp_clamp (&y0); 02659 02660 /* now calc the products x0y0 and x1y1 */ 02661 /* after this x0 is no longer required, free temp [x0==t2]! */ 02662 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) 02663 goto X1Y1; /* x0y0 = x0*y0 */ 02664 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) 02665 goto X1Y1; /* x1y1 = x1*y1 */ 02666 02667 /* now calc x1-x0 and y1-y0 */ 02668 if (mp_sub (&x1, &x0, &t1) != MP_OKAY) 02669 goto X1Y1; /* t1 = x1 - x0 */ 02670 if (mp_sub (&y1, &y0, &x0) != MP_OKAY) 02671 goto X1Y1; /* t2 = y1 - y0 */ 02672 if (mp_mul (&t1, &x0, &t1) != MP_OKAY) 02673 goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */ 02674 02675 /* add x0y0 */ 02676 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) 02677 goto X1Y1; /* t2 = x0y0 + x1y1 */ 02678 if (mp_sub (&x0, &t1, &t1) != MP_OKAY) 02679 goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */ 02680 02681 /* shift by B */ 02682 if (mp_lshd (&t1, B) != MP_OKAY) 02683 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ 02684 if (mp_lshd (&x1y1, B * 2) != MP_OKAY) 02685 goto X1Y1; /* x1y1 = x1y1 << 2*B */ 02686 02687 if (mp_add (&x0y0, &t1, &t1) != MP_OKAY) 02688 goto X1Y1; /* t1 = x0y0 + t1 */ 02689 if (mp_add (&t1, &x1y1, c) != MP_OKAY) 02690 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ 02691 02692 /* Algorithm succeeded set the return code to MP_OKAY */ 02693 err = MP_OKAY; 02694 02695 X1Y1:mp_clear (&x1y1); 02696 X0Y0:mp_clear (&x0y0); 02697 T1:mp_clear (&t1); 02698 Y1:mp_clear (&y1); 02699 Y0:mp_clear (&y0); 02700 X1:mp_clear (&x1); 02701 X0:mp_clear (&x0); 02702 ERR: 02703 return err; 02704 } 02705 02706 /* Karatsuba squaring, computes b = a*a using three 02707 * half size squarings 02708 * 02709 * See comments of karatsuba_mul for details. It 02710 * is essentially the same algorithm but merely 02711 * tuned to perform recursive squarings. 02712 */ 02713 int mp_karatsuba_sqr (const mp_int * a, mp_int * b) 02714 { 02715 mp_int x0, x1, t1, t2, x0x0, x1x1; 02716 int B, err; 02717 02718 err = MP_MEM; 02719 02720 /* min # of digits */ 02721 B = a->used; 02722 02723 /* now divide in two */ 02724 B = B >> 1; 02725 02726 /* init copy all the temps */ 02727 if (mp_init_size (&x0, B) != MP_OKAY) 02728 goto ERR; 02729 if (mp_init_size (&x1, a->used - B) != MP_OKAY) 02730 goto X0; 02731 02732 /* init temps */ 02733 if (mp_init_size (&t1, a->used * 2) != MP_OKAY) 02734 goto X1; 02735 if (mp_init_size (&t2, a->used * 2) != MP_OKAY) 02736 goto T1; 02737 if (mp_init_size (&x0x0, B * 2) != MP_OKAY) 02738 goto T2; 02739 if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY) 02740 goto X0X0; 02741 02742 { 02743 register int x; 02744 register mp_digit *dst, *src; 02745 02746 src = a->dp; 02747 02748 /* now shift the digits */ 02749 dst = x0.dp; 02750 for (x = 0; x < B; x++) { 02751 *dst++ = *src++; 02752 } 02753 02754 dst = x1.dp; 02755 for (x = B; x < a->used; x++) { 02756 *dst++ = *src++; 02757 } 02758 } 02759 02760 x0.used = B; 02761 x1.used = a->used - B; 02762 02763 mp_clamp (&x0); 02764 02765 /* now calc the products x0*x0 and x1*x1 */ 02766 if (mp_sqr (&x0, &x0x0) != MP_OKAY) 02767 goto X1X1; /* x0x0 = x0*x0 */ 02768 if (mp_sqr (&x1, &x1x1) != MP_OKAY) 02769 goto X1X1; /* x1x1 = x1*x1 */ 02770 02771 /* now calc (x1-x0)**2 */ 02772 if (mp_sub (&x1, &x0, &t1) != MP_OKAY) 02773 goto X1X1; /* t1 = x1 - x0 */ 02774 if (mp_sqr (&t1, &t1) != MP_OKAY) 02775 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ 02776 02777 /* add x0y0 */ 02778 if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY) 02779 goto X1X1; /* t2 = x0x0 + x1x1 */ 02780 if (mp_sub (&t2, &t1, &t1) != MP_OKAY) 02781 goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */ 02782 02783 /* shift by B */ 02784 if (mp_lshd (&t1, B) != MP_OKAY) 02785 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */ 02786 if (mp_lshd (&x1x1, B * 2) != MP_OKAY) 02787 goto X1X1; /* x1x1 = x1x1 << 2*B */ 02788 02789 if (mp_add (&x0x0, &t1, &t1) != MP_OKAY) 02790 goto X1X1; /* t1 = x0x0 + t1 */ 02791 if (mp_add (&t1, &x1x1, b) != MP_OKAY) 02792 goto X1X1; /* t1 = x0x0 + t1 + x1x1 */ 02793 02794 err = MP_OKAY; 02795 02796 X1X1:mp_clear (&x1x1); 02797 X0X0:mp_clear (&x0x0); 02798 T2:mp_clear (&t2); 02799 T1:mp_clear (&t1); 02800 X1:mp_clear (&x1); 02801 X0:mp_clear (&x0); 02802 ERR: 02803 return err; 02804 } 02805 02806 /* computes least common multiple as |a*b|/(a, b) */ 02807 int mp_lcm (const mp_int * a, const mp_int * b, mp_int * c) 02808 { 02809 int res; 02810 mp_int t1, t2; 02811 02812 02813 if ((res = mp_init_multi (&t1, &t2, NULL)) != MP_OKAY) { 02814 return res; 02815 } 02816 02817 /* t1 = get the GCD of the two inputs */ 02818 if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) { 02819 goto __T; 02820 } 02821 02822 /* divide the smallest by the GCD */ 02823 if (mp_cmp_mag(a, b) == MP_LT) { 02824 /* store quotient in t2 such that t2 * b is the LCM */ 02825 if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) { 02826 goto __T; 02827 } 02828 res = mp_mul(b, &t2, c); 02829 } else { 02830 /* store quotient in t2 such that t2 * a is the LCM */ 02831 if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) { 02832 goto __T; 02833 } 02834 res = mp_mul(a, &t2, c); 02835 } 02836 02837 /* fix the sign to positive */ 02838 c->sign = MP_ZPOS; 02839 02840 __T: 02841 mp_clear_multi (&t1, &t2, NULL); 02842 return res; 02843 } 02844 02845 /* c = a mod b, 0 <= c < b */ 02846 int 02847 mp_mod (const mp_int * a, mp_int * b, mp_int * c) 02848 { 02849 mp_int t; 02850 int res; 02851 02852 if ((res = mp_init (&t)) != MP_OKAY) { 02853 return res; 02854 } 02855 02856 if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) { 02857 mp_clear (&t); 02858 return res; 02859 } 02860 02861 if (t.sign != b->sign) { 02862 res = mp_add (b, &t, c); 02863 } else { 02864 res = MP_OKAY; 02865 mp_exch (&t, c); 02866 } 02867 02868 mp_clear (&t); 02869 return res; 02870 } 02871 02872 static int 02873 mp_mod_d (const mp_int * a, mp_digit b, mp_digit * c) 02874 { 02875 return mp_div_d(a, b, NULL, c); 02876 } 02877 02878 /* b = a*2 */ 02879 static int mp_mul_2(const mp_int * a, mp_int * b) 02880 { 02881 int x, res, oldused; 02882 02883 /* grow to accommodate result */ 02884 if (b->alloc < a->used + 1) { 02885 if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) { 02886 return res; 02887 } 02888 } 02889 02890 oldused = b->used; 02891 b->used = a->used; 02892 02893 { 02894 register mp_digit r, rr, *tmpa, *tmpb; 02895 02896 /* alias for source */ 02897 tmpa = a->dp; 02898 02899 /* alias for dest */ 02900 tmpb = b->dp; 02901 02902 /* carry */ 02903 r = 0; 02904 for (x = 0; x < a->used; x++) { 02905 02906 /* get what will be the *next* carry bit from the 02907 * MSB of the current digit 02908 */ 02909 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1)); 02910 02911 /* now shift up this digit, add in the carry [from the previous] */ 02912 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK; 02913 02914 /* copy the carry that would be from the source 02915 * digit into the next iteration 02916 */ 02917 r = rr; 02918 } 02919 02920 /* new leading digit? */ 02921 if (r != 0) { 02922 /* add a MSB which is always 1 at this point */ 02923 *tmpb = 1; 02924 ++(b->used); 02925 } 02926 02927 /* now zero any excess digits on the destination 02928 * that we didn't write to 02929 */ 02930 tmpb = b->dp + b->used; 02931 for (x = b->used; x < oldused; x++) { 02932 *tmpb++ = 0; 02933 } 02934 } 02935 b->sign = a->sign; 02936 return MP_OKAY; 02937 } 02938 02939 /* 02940 * shifts with subtractions when the result is greater than b. 02941 * 02942 * The method is slightly modified to shift B unconditionally up to just under 02943 * the leading bit of b. This saves a lot of multiple precision shifting. 02944 */ 02945 int mp_montgomery_calc_normalization (mp_int * a, const mp_int * b) 02946 { 02947 int x, bits, res; 02948 02949 /* how many bits of last digit does b use */ 02950 bits = mp_count_bits (b) % DIGIT_BIT; 02951 02952 02953 if (b->used > 1) { 02954 if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) { 02955 return res; 02956 } 02957 } else { 02958 mp_set(a, 1); 02959 bits = 1; 02960 } 02961 02962 02963 /* now compute C = A * B mod b */ 02964 for (x = bits - 1; x < DIGIT_BIT; x++) { 02965 if ((res = mp_mul_2 (a, a)) != MP_OKAY) { 02966 return res; 02967 } 02968 if (mp_cmp_mag (a, b) != MP_LT) { 02969 if ((res = s_mp_sub (a, b, a)) != MP_OKAY) { 02970 return res; 02971 } 02972 } 02973 } 02974 02975 return MP_OKAY; 02976 } 02977 02978 /* computes xR**-1 == x (mod N) via Montgomery Reduction */ 02979 int 02980 mp_montgomery_reduce (mp_int * x, const mp_int * n, mp_digit rho) 02981 { 02982 int ix, res, digs; 02983 mp_digit mu; 02984 02985 /* can the fast reduction [comba] method be used? 02986 * 02987 * Note that unlike in mul you're safely allowed *less* 02988 * than the available columns [255 per default] since carries 02989 * are fixed up in the inner loop. 02990 */ 02991 digs = n->used * 2 + 1; 02992 if ((digs < MP_WARRAY) && 02993 n->used < 02994 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 02995 return fast_mp_montgomery_reduce (x, n, rho); 02996 } 02997 02998 /* grow the input as required */ 02999 if (x->alloc < digs) { 03000 if ((res = mp_grow (x, digs)) != MP_OKAY) { 03001 return res; 03002 } 03003 } 03004 x->used = digs; 03005 03006 for (ix = 0; ix < n->used; ix++) { 03007 /* mu = ai * rho mod b 03008 * 03009 * The value of rho must be precalculated via 03010 * montgomery_setup() such that 03011 * it equals -1/n0 mod b this allows the 03012 * following inner loop to reduce the 03013 * input one digit at a time 03014 */ 03015 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK); 03016 03017 /* a = a + mu * m * b**i */ 03018 { 03019 register int iy; 03020 register mp_digit *tmpn, *tmpx, u; 03021 register mp_word r; 03022 03023 /* alias for digits of the modulus */ 03024 tmpn = n->dp; 03025 03026 /* alias for the digits of x [the input] */ 03027 tmpx = x->dp + ix; 03028 03029 /* set the carry to zero */ 03030 u = 0; 03031 03032 /* Multiply and add in place */ 03033 for (iy = 0; iy < n->used; iy++) { 03034 /* compute product and sum */ 03035 r = ((mp_word)mu) * ((mp_word)*tmpn++) + 03036 ((mp_word) u) + ((mp_word) * tmpx); 03037 03038 /* get carry */ 03039 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 03040 03041 /* fix digit */ 03042 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK)); 03043 } 03044 /* At this point the ix'th digit of x should be zero */ 03045 03046 03047 /* propagate carries upwards as required*/ 03048 while (u) { 03049 *tmpx += u; 03050 u = *tmpx >> DIGIT_BIT; 03051 *tmpx++ &= MP_MASK; 03052 } 03053 } 03054 } 03055 03056 /* at this point the n.used'th least 03057 * significant digits of x are all zero 03058 * which means we can shift x to the 03059 * right by n.used digits and the 03060 * residue is unchanged. 03061 */ 03062 03063 /* x = x/b**n.used */ 03064 mp_clamp(x); 03065 mp_rshd (x, n->used); 03066 03067 /* if x >= n then x = x - n */ 03068 if (mp_cmp_mag (x, n) != MP_LT) { 03069 return s_mp_sub (x, n, x); 03070 } 03071 03072 return MP_OKAY; 03073 } 03074 03075 /* setups the montgomery reduction stuff */ 03076 int 03077 mp_montgomery_setup (const mp_int * n, mp_digit * rho) 03078 { 03079 mp_digit x, b; 03080 03081 /* fast inversion mod 2**k 03082 * 03083 * Based on the fact that 03084 * 03085 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) 03086 * => 2*X*A - X*X*A*A = 1 03087 * => 2*(1) - (1) = 1 03088 */ 03089 b = n->dp[0]; 03090 03091 if ((b & 1) == 0) { 03092 return MP_VAL; 03093 } 03094 03095 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ 03096 x *= 2 - b * x; /* here x*a==1 mod 2**8 */ 03097 x *= 2 - b * x; /* here x*a==1 mod 2**16 */ 03098 x *= 2 - b * x; /* here x*a==1 mod 2**32 */ 03099 03100 /* rho = -1/m mod b */ 03101 *rho = (((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK; 03102 03103 return MP_OKAY; 03104 } 03105 03106 /* high level multiplication (handles sign) */ 03107 int mp_mul (const mp_int * a, const mp_int * b, mp_int * c) 03108 { 03109 int res, neg; 03110 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 03111 03112 /* use Karatsuba? */ 03113 if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) { 03114 res = mp_karatsuba_mul (a, b, c); 03115 } else 03116 { 03117 /* can we use the fast multiplier? 03118 * 03119 * The fast multiplier can be used if the output will 03120 * have less than MP_WARRAY digits and the number of 03121 * digits won't affect carry propagation 03122 */ 03123 int digs = a->used + b->used + 1; 03124 03125 if ((digs < MP_WARRAY) && 03126 MIN(a->used, b->used) <= 03127 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 03128 res = fast_s_mp_mul_digs (a, b, c, digs); 03129 } else 03130 res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */ 03131 } 03132 c->sign = (c->used > 0) ? neg : MP_ZPOS; 03133 return res; 03134 } 03135 03136 /* d = a * b (mod c) */ 03137 int 03138 mp_mulmod (const mp_int * a, const mp_int * b, mp_int * c, mp_int * d) 03139 { 03140 int res; 03141 mp_int t; 03142 03143 if ((res = mp_init (&t)) != MP_OKAY) { 03144 return res; 03145 } 03146 03147 if ((res = mp_mul (a, b, &t)) != MP_OKAY) { 03148 mp_clear (&t); 03149 return res; 03150 } 03151 res = mp_mod (&t, c, d); 03152 mp_clear (&t); 03153 return res; 03154 } 03155 03156 /* table of first PRIME_SIZE primes */ 03157 static const mp_digit __prime_tab[] = { 03158 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, 03159 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, 03160 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, 03161 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083, 03162 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, 03163 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, 03164 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, 03165 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, 03166 03167 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, 03168 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, 03169 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, 03170 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, 03171 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, 03172 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, 03173 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, 03174 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, 03175 03176 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, 03177 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, 03178 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, 03179 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, 03180 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, 03181 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, 03182 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, 03183 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, 03184 03185 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, 03186 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, 03187 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, 03188 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, 03189 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, 03190 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, 03191 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, 03192 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653 03193 }; 03194 03195 /* determines if an integers is divisible by one 03196 * of the first PRIME_SIZE primes or not 03197 * 03198 * sets result to 0 if not, 1 if yes 03199 */ 03200 static int mp_prime_is_divisible (const mp_int * a, int *result) 03201 { 03202 int err, ix; 03203 mp_digit res; 03204 03205 /* default to not */ 03206 *result = MP_NO; 03207 03208 for (ix = 0; ix < PRIME_SIZE; ix++) { 03209 /* what is a mod __prime_tab[ix] */ 03210 if ((err = mp_mod_d (a, __prime_tab[ix], &res)) != MP_OKAY) { 03211 return err; 03212 } 03213 03214 /* is the residue zero? */ 03215 if (res == 0) { 03216 *result = MP_YES; 03217 return MP_OKAY; 03218 } 03219 } 03220 03221 return MP_OKAY; 03222 } 03223 03224 /* Miller-Rabin test of "a" to the base of "b" as described in 03225 * HAC pp. 139 Algorithm 4.24 03226 * 03227 * Sets result to 0 if definitely composite or 1 if probably prime. 03228 * Randomly the chance of error is no more than 1/4 and often 03229 * very much lower. 03230 */ 03231 static int mp_prime_miller_rabin (mp_int * a, const mp_int * b, int *result) 03232 { 03233 mp_int n1, y, r; 03234 int s, j, err; 03235 03236 /* default */ 03237 *result = MP_NO; 03238 03239 /* ensure b > 1 */ 03240 if (mp_cmp_d(b, 1) != MP_GT) { 03241 return MP_VAL; 03242 } 03243 03244 /* get n1 = a - 1 */ 03245 if ((err = mp_init_copy (&n1, a)) != MP_OKAY) { 03246 return err; 03247 } 03248 if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) { 03249 goto __N1; 03250 } 03251 03252 /* set 2**s * r = n1 */ 03253 if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) { 03254 goto __N1; 03255 } 03256 03257 /* count the number of least significant bits 03258 * which are zero 03259 */ 03260 s = mp_cnt_lsb(&r); 03261 03262 /* now divide n - 1 by 2**s */ 03263 if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) { 03264 goto __R; 03265 } 03266 03267 /* compute y = b**r mod a */ 03268 if ((err = mp_init (&y)) != MP_OKAY) { 03269 goto __R; 03270 } 03271 if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) { 03272 goto __Y; 03273 } 03274 03275 /* if y != 1 and y != n1 do */ 03276 if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) { 03277 j = 1; 03278 /* while j <= s-1 and y != n1 */ 03279 while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) { 03280 if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) { 03281 goto __Y; 03282 } 03283 03284 /* if y == 1 then composite */ 03285 if (mp_cmp_d (&y, 1) == MP_EQ) { 03286 goto __Y; 03287 } 03288 03289 ++j; 03290 } 03291 03292 /* if y != n1 then composite */ 03293 if (mp_cmp (&y, &n1) != MP_EQ) { 03294 goto __Y; 03295 } 03296 } 03297 03298 /* probably prime now */ 03299 *result = MP_YES; 03300 __Y:mp_clear (&y); 03301 __R:mp_clear (&r); 03302 __N1:mp_clear (&n1); 03303 return err; 03304 } 03305 03306 /* performs a variable number of rounds of Miller-Rabin 03307 * 03308 * Probability of error after t rounds is no more than 03309 03310 * 03311 * Sets result to 1 if probably prime, 0 otherwise 03312 */ 03313 static int mp_prime_is_prime (mp_int * a, int t, int *result) 03314 { 03315 mp_int b; 03316 int ix, err, res; 03317 03318 /* default to no */ 03319 *result = MP_NO; 03320 03321 /* valid value of t? */ 03322 if (t <= 0 || t > PRIME_SIZE) { 03323 return MP_VAL; 03324 } 03325 03326 /* is the input equal to one of the primes in the table? */ 03327 for (ix = 0; ix < PRIME_SIZE; ix++) { 03328 if (mp_cmp_d(a, __prime_tab[ix]) == MP_EQ) { 03329 *result = 1; 03330 return MP_OKAY; 03331 } 03332 } 03333 03334 /* first perform trial division */ 03335 if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) { 03336 return err; 03337 } 03338 03339 /* return if it was trivially divisible */ 03340 if (res == MP_YES) { 03341 return MP_OKAY; 03342 } 03343 03344 /* now perform the miller-rabin rounds */ 03345 if ((err = mp_init (&b)) != MP_OKAY) { 03346 return err; 03347 } 03348 03349 for (ix = 0; ix < t; ix++) { 03350 /* set the prime */ 03351 mp_set (&b, __prime_tab[ix]); 03352 03353 if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) { 03354 goto __B; 03355 } 03356 03357 if (res == MP_NO) { 03358 goto __B; 03359 } 03360 } 03361 03362 /* passed the test */ 03363 *result = MP_YES; 03364 __B:mp_clear (&b); 03365 return err; 03366 } 03367 03368 static const struct { 03369 int k, t; 03370 } sizes[] = { 03371 { 128, 28 }, 03372 { 256, 16 }, 03373 { 384, 10 }, 03374 { 512, 7 }, 03375 { 640, 6 }, 03376 { 768, 5 }, 03377 { 896, 4 }, 03378 { 1024, 4 } 03379 }; 03380 03381 /* returns # of RM trials required for a given bit size */ 03382 int mp_prime_rabin_miller_trials(int size) 03383 { 03384 int x; 03385 03386 for (x = 0; x < (int)(sizeof(sizes)/(sizeof(sizes[0]))); x++) { 03387 if (sizes[x].k == size) { 03388 return sizes[x].t; 03389 } else if (sizes[x].k > size) { 03390 return (x == 0) ? sizes[0].t : sizes[x - 1].t; 03391 } 03392 } 03393 return sizes[x-1].t + 1; 03394 } 03395 03396 /* makes a truly random prime of a given size (bits), 03397 * 03398 * Flags are as follows: 03399 * 03400 * LTM_PRIME_BBS - make prime congruent to 3 mod 4 03401 * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS) 03402 * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero 03403 * LTM_PRIME_2MSB_ON - make the 2nd highest bit one 03404 * 03405 * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can 03406 * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself 03407 * so it can be NULL 03408 * 03409 */ 03410 03411 /* This is possibly the mother of all prime generation functions, muahahahahaha! */ 03412 int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat) 03413 { 03414 unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb; 03415 int res, err, bsize, maskOR_msb_offset; 03416 03417 /* sanity check the input */ 03418 if (size <= 1 || t <= 0) { 03419 return MP_VAL; 03420 } 03421 03422 /* LTM_PRIME_SAFE implies LTM_PRIME_BBS */ 03423 if (flags & LTM_PRIME_SAFE) { 03424 flags |= LTM_PRIME_BBS; 03425 } 03426 03427 /* calc the byte size */ 03428 bsize = (size>>3)+((size&7)?1:0); 03429 03430 /* we need a buffer of bsize bytes */ 03431 tmp = HeapAlloc(GetProcessHeap(), 0, bsize); 03432 if (tmp == NULL) { 03433 return MP_MEM; 03434 } 03435 03436 /* calc the maskAND value for the MSbyte*/ 03437 maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7))); 03438 03439 /* calc the maskOR_msb */ 03440 maskOR_msb = 0; 03441 maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0; 03442 if (flags & LTM_PRIME_2MSB_ON) { 03443 maskOR_msb |= 1 << ((size - 2) & 7); 03444 } else if (flags & LTM_PRIME_2MSB_OFF) { 03445 maskAND &= ~(1 << ((size - 2) & 7)); 03446 } 03447 03448 /* get the maskOR_lsb */ 03449 maskOR_lsb = 0; 03450 if (flags & LTM_PRIME_BBS) { 03451 maskOR_lsb |= 3; 03452 } 03453 03454 do { 03455 /* read the bytes */ 03456 if (cb(tmp, bsize, dat) != bsize) { 03457 err = MP_VAL; 03458 goto error; 03459 } 03460 03461 /* work over the MSbyte */ 03462 tmp[0] &= maskAND; 03463 tmp[0] |= 1 << ((size - 1) & 7); 03464 03465 /* mix in the maskORs */ 03466 tmp[maskOR_msb_offset] |= maskOR_msb; 03467 tmp[bsize-1] |= maskOR_lsb; 03468 03469 /* read it in */ 03470 if ((err = mp_read_unsigned_bin(a, tmp, bsize)) != MP_OKAY) { goto error; } 03471 03472 /* is it prime? */ 03473 if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; } 03474 if (res == MP_NO) { 03475 continue; 03476 } 03477 03478 if (flags & LTM_PRIME_SAFE) { 03479 /* see if (a-1)/2 is prime */ 03480 if ((err = mp_sub_d(a, 1, a)) != MP_OKAY) { goto error; } 03481 if ((err = mp_div_2(a, a)) != MP_OKAY) { goto error; } 03482 03483 /* is it prime? */ 03484 if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; } 03485 } 03486 } while (res == MP_NO); 03487 03488 if (flags & LTM_PRIME_SAFE) { 03489 /* restore a to the original value */ 03490 if ((err = mp_mul_2(a, a)) != MP_OKAY) { goto error; } 03491 if ((err = mp_add_d(a, 1, a)) != MP_OKAY) { goto error; } 03492 } 03493 03494 err = MP_OKAY; 03495 error: 03496 HeapFree(GetProcessHeap(), 0, tmp); 03497 return err; 03498 } 03499 03500 /* reads an unsigned char array, assumes the msb is stored first [big endian] */ 03501 int 03502 mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c) 03503 { 03504 int res; 03505 03506 /* make sure there are at least two digits */ 03507 if (a->alloc < 2) { 03508 if ((res = mp_grow(a, 2)) != MP_OKAY) { 03509 return res; 03510 } 03511 } 03512 03513 /* zero the int */ 03514 mp_zero (a); 03515 03516 /* read the bytes in */ 03517 while (c-- > 0) { 03518 if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) { 03519 return res; 03520 } 03521 03522 a->dp[0] |= *b++; 03523 a->used += 1; 03524 } 03525 mp_clamp (a); 03526 return MP_OKAY; 03527 } 03528 03529 /* reduces x mod m, assumes 0 < x < m**2, mu is 03530 * precomputed via mp_reduce_setup. 03531 * From HAC pp.604 Algorithm 14.42 03532 */ 03533 int 03534 mp_reduce (mp_int * x, const mp_int * m, const mp_int * mu) 03535 { 03536 mp_int q; 03537 int res, um = m->used; 03538 03539 /* q = x */ 03540 if ((res = mp_init_copy (&q, x)) != MP_OKAY) { 03541 return res; 03542 } 03543 03544 /* q1 = x / b**(k-1) */ 03545 mp_rshd (&q, um - 1); 03546 03547 /* according to HAC this optimization is ok */ 03548 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) { 03549 if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) { 03550 goto CLEANUP; 03551 } 03552 } else { 03553 if ((res = s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) { 03554 goto CLEANUP; 03555 } 03556 } 03557 03558 /* q3 = q2 / b**(k+1) */ 03559 mp_rshd (&q, um + 1); 03560 03561 /* x = x mod b**(k+1), quick (no division) */ 03562 if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) { 03563 goto CLEANUP; 03564 } 03565 03566 /* q = q * m mod b**(k+1), quick (no division) */ 03567 if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) { 03568 goto CLEANUP; 03569 } 03570 03571 /* x = x - q */ 03572 if ((res = mp_sub (x, &q, x)) != MP_OKAY) { 03573 goto CLEANUP; 03574 } 03575 03576 /* If x < 0, add b**(k+1) to it */ 03577 if (mp_cmp_d (x, 0) == MP_LT) { 03578 mp_set (&q, 1); 03579 if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) 03580 goto CLEANUP; 03581 if ((res = mp_add (x, &q, x)) != MP_OKAY) 03582 goto CLEANUP; 03583 } 03584 03585 /* Back off if it's too big */ 03586 while (mp_cmp (x, m) != MP_LT) { 03587 if ((res = s_mp_sub (x, m, x)) != MP_OKAY) { 03588 goto CLEANUP; 03589 } 03590 } 03591 03592 CLEANUP: 03593 mp_clear (&q); 03594 03595 return res; 03596 } 03597 03598 /* reduces a modulo n where n is of the form 2**p - d */ 03599 int 03600 mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d) 03601 { 03602 mp_int q; 03603 int p, res; 03604 03605 if ((res = mp_init(&q)) != MP_OKAY) { 03606 return res; 03607 } 03608 03609 p = mp_count_bits(n); 03610 top: 03611 /* q = a/2**p, a = a mod 2**p */ 03612 if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) { 03613 goto ERR; 03614 } 03615 03616 if (d != 1) { 03617 /* q = q * d */ 03618 if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) { 03619 goto ERR; 03620 } 03621 } 03622 03623 /* a = a + q */ 03624 if ((res = s_mp_add(a, &q, a)) != MP_OKAY) { 03625 goto ERR; 03626 } 03627 03628 if (mp_cmp_mag(a, n) != MP_LT) { 03629 s_mp_sub(a, n, a); 03630 goto top; 03631 } 03632 03633 ERR: 03634 mp_clear(&q); 03635 return res; 03636 } 03637 03638 /* determines the setup value */ 03639 static int 03640 mp_reduce_2k_setup(const mp_int *a, mp_digit *d) 03641 { 03642 int res, p; 03643 mp_int tmp; 03644 03645 if ((res = mp_init(&tmp)) != MP_OKAY) { 03646 return res; 03647 } 03648 03649 p = mp_count_bits(a); 03650 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) { 03651 mp_clear(&tmp); 03652 return res; 03653 } 03654 03655 if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) { 03656 mp_clear(&tmp); 03657 return res; 03658 } 03659 03660 *d = tmp.dp[0]; 03661 mp_clear(&tmp); 03662 return MP_OKAY; 03663 } 03664 03665 /* pre-calculate the value required for Barrett reduction 03666 * For a given modulus "b" it calculates the value required in "a" 03667 */ 03668 int mp_reduce_setup (mp_int * a, const mp_int * b) 03669 { 03670 int res; 03671 03672 if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) { 03673 return res; 03674 } 03675 return mp_div (a, b, a, NULL); 03676 } 03677 03678 /* set to a digit */ 03679 void mp_set (mp_int * a, mp_digit b) 03680 { 03681 mp_zero (a); 03682 a->dp[0] = b & MP_MASK; 03683 a->used = (a->dp[0] != 0) ? 1 : 0; 03684 } 03685 03686 /* set a 32-bit const */ 03687 int mp_set_int (mp_int * a, unsigned long b) 03688 { 03689 int x, res; 03690 03691 mp_zero (a); 03692 03693 /* set four bits at a time */ 03694 for (x = 0; x < 8; x++) { 03695 /* shift the number up four bits */ 03696 if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) { 03697 return res; 03698 } 03699 03700 /* OR in the top four bits of the source */ 03701 a->dp[0] |= (b >> 28) & 15; 03702 03703 /* shift the source up to the next four bits */ 03704 b <<= 4; 03705 03706 /* ensure that digits are not clamped off */ 03707 a->used += 1; 03708 } 03709 mp_clamp (a); 03710 return MP_OKAY; 03711 } 03712 03713 /* shrink a bignum */ 03714 int mp_shrink (mp_int * a) 03715 { 03716 mp_digit *tmp; 03717 if (a->alloc != a->used && a->used > 0) { 03718 if ((tmp = HeapReAlloc(GetProcessHeap(), 0, a->dp, sizeof (mp_digit) * a->used)) == NULL) { 03719 return MP_MEM; 03720 } 03721 a->dp = tmp; 03722 a->alloc = a->used; 03723 } 03724 return MP_OKAY; 03725 } 03726 03727 /* computes b = a*a */ 03728 int 03729 mp_sqr (const mp_int * a, mp_int * b) 03730 { 03731 int res; 03732 03733 if (a->used >= KARATSUBA_SQR_CUTOFF) { 03734 res = mp_karatsuba_sqr (a, b); 03735 } else 03736 { 03737 /* can we use the fast comba multiplier? */ 03738 if ((a->used * 2 + 1) < MP_WARRAY && 03739 a->used < 03740 (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) { 03741 res = fast_s_mp_sqr (a, b); 03742 } else 03743 res = s_mp_sqr (a, b); 03744 } 03745 b->sign = MP_ZPOS; 03746 return res; 03747 } 03748 03749 /* c = a * a (mod b) */ 03750 int 03751 mp_sqrmod (const mp_int * a, mp_int * b, mp_int * c) 03752 { 03753 int res; 03754 mp_int t; 03755 03756 if ((res = mp_init (&t)) != MP_OKAY) { 03757 return res; 03758 } 03759 03760 if ((res = mp_sqr (a, &t)) != MP_OKAY) { 03761 mp_clear (&t); 03762 return res; 03763 } 03764 res = mp_mod (&t, b, c); 03765 mp_clear (&t); 03766 return res; 03767 } 03768 03769 /* high level subtraction (handles signs) */ 03770 int 03771 mp_sub (mp_int * a, mp_int * b, mp_int * c) 03772 { 03773 int sa, sb, res; 03774 03775 sa = a->sign; 03776 sb = b->sign; 03777 03778 if (sa != sb) { 03779 /* subtract a negative from a positive, OR */ 03780 /* subtract a positive from a negative. */ 03781 /* In either case, ADD their magnitudes, */ 03782 /* and use the sign of the first number. */ 03783 c->sign = sa; 03784 res = s_mp_add (a, b, c); 03785 } else { 03786 /* subtract a positive from a positive, OR */ 03787 /* subtract a negative from a negative. */ 03788 /* First, take the difference between their */ 03789 /* magnitudes, then... */ 03790 if (mp_cmp_mag (a, b) != MP_LT) { 03791 /* Copy the sign from the first */ 03792 c->sign = sa; 03793 /* The first has a larger or equal magnitude */ 03794 res = s_mp_sub (a, b, c); 03795 } else { 03796 /* The result has the *opposite* sign from */ 03797 /* the first number. */ 03798 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS; 03799 /* The second has a larger magnitude */ 03800 res = s_mp_sub (b, a, c); 03801 } 03802 } 03803 return res; 03804 } 03805 03806 /* single digit subtraction */ 03807 int 03808 mp_sub_d (mp_int * a, mp_digit b, mp_int * c) 03809 { 03810 mp_digit *tmpa, *tmpc, mu; 03811 int res, ix, oldused; 03812 03813 /* grow c as required */ 03814 if (c->alloc < a->used + 1) { 03815 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 03816 return res; 03817 } 03818 } 03819 03820 /* if a is negative just do an unsigned 03821 * addition [with fudged signs] 03822 */ 03823 if (a->sign == MP_NEG) { 03824 a->sign = MP_ZPOS; 03825 res = mp_add_d(a, b, c); 03826 a->sign = c->sign = MP_NEG; 03827 return res; 03828 } 03829 03830 /* setup regs */ 03831 oldused = c->used; 03832 tmpa = a->dp; 03833 tmpc = c->dp; 03834 03835 /* if a <= b simply fix the single digit */ 03836 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) { 03837 if (a->used == 1) { 03838 *tmpc++ = b - *tmpa; 03839 } else { 03840 *tmpc++ = b; 03841 } 03842 ix = 1; 03843 03844 /* negative/1digit */ 03845 c->sign = MP_NEG; 03846 c->used = 1; 03847 } else { 03848 /* positive/size */ 03849 c->sign = MP_ZPOS; 03850 c->used = a->used; 03851 03852 /* subtract first digit */ 03853 *tmpc = *tmpa++ - b; 03854 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); 03855 *tmpc++ &= MP_MASK; 03856 03857 /* handle rest of the digits */ 03858 for (ix = 1; ix < a->used; ix++) { 03859 *tmpc = *tmpa++ - mu; 03860 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); 03861 *tmpc++ &= MP_MASK; 03862 } 03863 } 03864 03865 /* zero excess digits */ 03866 while (ix++ < oldused) { 03867 *tmpc++ = 0; 03868 } 03869 mp_clamp(c); 03870 return MP_OKAY; 03871 } 03872 03873 /* store in unsigned [big endian] format */ 03874 int 03875 mp_to_unsigned_bin (const mp_int * a, unsigned char *b) 03876 { 03877 int x, res; 03878 mp_int t; 03879 03880 if ((res = mp_init_copy (&t, a)) != MP_OKAY) { 03881 return res; 03882 } 03883 03884 x = 0; 03885 while (mp_iszero (&t) == 0) { 03886 b[x++] = (unsigned char) (t.dp[0] & 255); 03887 if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) { 03888 mp_clear (&t); 03889 return res; 03890 } 03891 } 03892 bn_reverse (b, x); 03893 mp_clear (&t); 03894 return MP_OKAY; 03895 } 03896 03897 /* get the size for an unsigned equivalent */ 03898 int 03899 mp_unsigned_bin_size (const mp_int * a) 03900 { 03901 int size = mp_count_bits (a); 03902 return (size / 8 + ((size & 7) != 0 ? 1 : 0)); 03903 } 03904 03905 /* reverse an array, used for radix code */ 03906 static void 03907 bn_reverse (unsigned char *s, int len) 03908 { 03909 int ix, iy; 03910 unsigned char t; 03911 03912 ix = 0; 03913 iy = len - 1; 03914 while (ix < iy) { 03915 t = s[ix]; 03916 s[ix] = s[iy]; 03917 s[iy] = t; 03918 ++ix; 03919 --iy; 03920 } 03921 } 03922 03923 /* low level addition, based on HAC pp.594, Algorithm 14.7 */ 03924 static int 03925 s_mp_add (mp_int * a, mp_int * b, mp_int * c) 03926 { 03927 mp_int *x; 03928 int olduse, res, min, max; 03929 03930 /* find sizes, we let |a| <= |b| which means we have to sort 03931 * them. "x" will point to the input with the most digits 03932 */ 03933 if (a->used > b->used) { 03934 min = b->used; 03935 max = a->used; 03936 x = a; 03937 } else { 03938 min = a->used; 03939 max = b->used; 03940 x = b; 03941 } 03942 03943 /* init result */ 03944 if (c->alloc < max + 1) { 03945 if ((res = mp_grow (c, max + 1)) != MP_OKAY) { 03946 return res; 03947 } 03948 } 03949 03950 /* get old used digit count and set new one */ 03951 olduse = c->used; 03952 c->used = max + 1; 03953 03954 { 03955 register mp_digit u, *tmpa, *tmpb, *tmpc; 03956 register int i; 03957 03958 /* alias for digit pointers */ 03959 03960 /* first input */ 03961 tmpa = a->dp; 03962 03963 /* second input */ 03964 tmpb = b->dp; 03965 03966 /* destination */ 03967 tmpc = c->dp; 03968 03969 /* zero the carry */ 03970 u = 0; 03971 for (i = 0; i < min; i++) { 03972 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */ 03973 *tmpc = *tmpa++ + *tmpb++ + u; 03974 03975 /* U = carry bit of T[i] */ 03976 u = *tmpc >> ((mp_digit)DIGIT_BIT); 03977 03978 /* take away carry bit from T[i] */ 03979 *tmpc++ &= MP_MASK; 03980 } 03981 03982 /* now copy higher words if any, that is in A+B 03983 * if A or B has more digits add those in 03984 */ 03985 if (min != max) { 03986 for (; i < max; i++) { 03987 /* T[i] = X[i] + U */ 03988 *tmpc = x->dp[i] + u; 03989 03990 /* U = carry bit of T[i] */ 03991 u = *tmpc >> ((mp_digit)DIGIT_BIT); 03992 03993 /* take away carry bit from T[i] */ 03994 *tmpc++ &= MP_MASK; 03995 } 03996 } 03997 03998 /* add carry */ 03999 *tmpc++ = u; 04000 04001 /* clear digits above oldused */ 04002 for (i = c->used; i < olduse; i++) { 04003 *tmpc++ = 0; 04004 } 04005 } 04006 04007 mp_clamp (c); 04008 return MP_OKAY; 04009 } 04010 04011 static int s_mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y) 04012 { 04013 mp_int M[256], res, mu; 04014 mp_digit buf; 04015 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 04016 04017 /* find window size */ 04018 x = mp_count_bits (X); 04019 if (x <= 7) { 04020 winsize = 2; 04021 } else if (x <= 36) { 04022 winsize = 3; 04023 } else if (x <= 140) { 04024 winsize = 4; 04025 } else if (x <= 450) { 04026 winsize = 5; 04027 } else if (x <= 1303) { 04028 winsize = 6; 04029 } else if (x <= 3529) { 04030 winsize = 7; 04031 } else { 04032 winsize = 8; 04033 } 04034 04035 /* init M array */ 04036 /* init first cell */ 04037 if ((err = mp_init(&M[1])) != MP_OKAY) { 04038 return err; 04039 } 04040 04041 /* now init the second half of the array */ 04042 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 04043 if ((err = mp_init(&M[x])) != MP_OKAY) { 04044 for (y = 1<<(winsize-1); y < x; y++) { 04045 mp_clear (&M[y]); 04046 } 04047 mp_clear(&M[1]); 04048 return err; 04049 } 04050 } 04051 04052 /* create mu, used for Barrett reduction */ 04053 if ((err = mp_init (&mu)) != MP_OKAY) { 04054 goto __M; 04055 } 04056 if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) { 04057 goto __MU; 04058 } 04059 04060 /* create M table 04061 * 04062 * The M table contains powers of the base, 04063 * e.g. M[x] = G**x mod P 04064 * 04065 * The first half of the table is not 04066 * computed though accept for M[0] and M[1] 04067 */ 04068 if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) { 04069 goto __MU; 04070 } 04071 04072 /* compute the value at M[1<<(winsize-1)] by squaring 04073 * M[1] (winsize-1) times 04074 */ 04075 if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 04076 goto __MU; 04077 } 04078 04079 for (x = 0; x < (winsize - 1); x++) { 04080 if ((err = mp_sqr (&M[1 << (winsize - 1)], 04081 &M[1 << (winsize - 1)])) != MP_OKAY) { 04082 goto __MU; 04083 } 04084 if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) { 04085 goto __MU; 04086 } 04087 } 04088 04089 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P) 04090 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1) 04091 */ 04092 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 04093 if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 04094 goto __MU; 04095 } 04096 if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) { 04097 goto __MU; 04098 } 04099 } 04100 04101 /* setup result */ 04102 if ((err = mp_init (&res)) != MP_OKAY) { 04103 goto __MU; 04104 } 04105 mp_set (&res, 1); 04106 04107 /* set initial mode and bit cnt */ 04108 mode = 0; 04109 bitcnt = 1; 04110 buf = 0; 04111 digidx = X->used - 1; 04112 bitcpy = 0; 04113 bitbuf = 0; 04114 04115 for (;;) { 04116 /* grab next digit as required */ 04117 if (--bitcnt == 0) { 04118 /* if digidx == -1 we are out of digits */ 04119 if (digidx == -1) { 04120 break; 04121 } 04122 /* read next digit and reset the bitcnt */ 04123 buf = X->dp[digidx--]; 04124 bitcnt = DIGIT_BIT; 04125 } 04126 04127 /* grab the next msb from the exponent */ 04128 y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1; 04129 buf <<= (mp_digit)1; 04130 04131 /* if the bit is zero and mode == 0 then we ignore it 04132 * These represent the leading zero bits before the first 1 bit 04133 * in the exponent. Technically this opt is not required but it 04134 * does lower the # of trivial squaring/reductions used 04135 */ 04136 if (mode == 0 && y == 0) { 04137 continue; 04138 } 04139 04140 /* if the bit is zero and mode == 1 then we square */ 04141 if (mode == 1 && y == 0) { 04142 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 04143 goto __RES; 04144 } 04145 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) { 04146 goto __RES; 04147 } 04148 continue; 04149 } 04150 04151 /* else we add it to the window */ 04152 bitbuf |= (y << (winsize - ++bitcpy)); 04153 mode = 2; 04154 04155 if (bitcpy == winsize) { 04156 /* ok window is filled so square as required and multiply */ 04157 /* square first */ 04158 for (x = 0; x < winsize; x++) { 04159 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 04160 goto __RES; 04161 } 04162 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) { 04163 goto __RES; 04164 } 04165 } 04166 04167 /* then multiply */ 04168 if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) { 04169 goto __RES; 04170 } 04171 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) { 04172 goto __RES; 04173 } 04174 04175 /* empty window and reset */ 04176 bitcpy = 0; 04177 bitbuf = 0; 04178 mode = 1; 04179 } 04180 } 04181 04182 /* if bits remain then square/multiply */ 04183 if (mode == 2 && bitcpy > 0) { 04184 /* square then multiply if the bit is set */ 04185 for (x = 0; x < bitcpy; x++) { 04186 if ((err = mp_sqr (&res, &res)) != MP_OKAY) { 04187 goto __RES; 04188 } 04189 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) { 04190 goto __RES; 04191 } 04192 04193 bitbuf <<= 1; 04194 if ((bitbuf & (1 << winsize)) != 0) { 04195 /* then multiply */ 04196 if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) { 04197 goto __RES; 04198 } 04199 if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) { 04200 goto __RES; 04201 } 04202 } 04203 } 04204 } 04205 04206 mp_exch (&res, Y); 04207 err = MP_OKAY; 04208 __RES:mp_clear (&res); 04209 __MU:mp_clear (&mu); 04210 __M: 04211 mp_clear(&M[1]); 04212 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 04213 mp_clear (&M[x]); 04214 } 04215 return err; 04216 } 04217 04218 /* multiplies |a| * |b| and only computes up to digs digits of result 04219 * HAC pp. 595, Algorithm 14.12 Modified so you can control how 04220 * many digits of output are created. 04221 */ 04222 static int 04223 s_mp_mul_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs) 04224 { 04225 mp_int t; 04226 int res, pa, pb, ix, iy; 04227 mp_digit u; 04228 mp_word r; 04229 mp_digit tmpx, *tmpt, *tmpy; 04230 04231 /* can we use the fast multiplier? */ 04232 if (((digs) < MP_WARRAY) && 04233 MIN (a->used, b->used) < 04234 (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 04235 return fast_s_mp_mul_digs (a, b, c, digs); 04236 } 04237 04238 if ((res = mp_init_size (&t, digs)) != MP_OKAY) { 04239 return res; 04240 } 04241 t.used = digs; 04242 04243 /* compute the digits of the product directly */ 04244 pa = a->used; 04245 for (ix = 0; ix < pa; ix++) { 04246 /* set the carry to zero */ 04247 u = 0; 04248 04249 /* limit ourselves to making digs digits of output */ 04250 pb = MIN (b->used, digs - ix); 04251 04252 /* setup some aliases */ 04253 /* copy of the digit from a used within the nested loop */ 04254 tmpx = a->dp[ix]; 04255 04256 /* an alias for the destination shifted ix places */ 04257 tmpt = t.dp + ix; 04258 04259 /* an alias for the digits of b */ 04260 tmpy = b->dp; 04261 04262 /* compute the columns of the output and propagate the carry */ 04263 for (iy = 0; iy < pb; iy++) { 04264 /* compute the column as a mp_word */ 04265 r = ((mp_word)*tmpt) + 04266 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 04267 ((mp_word) u); 04268 04269 /* the new column is the lower part of the result */ 04270 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 04271 04272 /* get the carry word from the result */ 04273 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 04274 } 04275 /* set carry if it is placed below digs */ 04276 if (ix + iy < digs) { 04277 *tmpt = u; 04278 } 04279 } 04280 04281 mp_clamp (&t); 04282 mp_exch (&t, c); 04283 04284 mp_clear (&t); 04285 return MP_OKAY; 04286 } 04287 04288 /* multiplies |a| * |b| and does not compute the lower digs digits 04289 * [meant to get the higher part of the product] 04290 */ 04291 static int 04292 s_mp_mul_high_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs) 04293 { 04294 mp_int t; 04295 int res, pa, pb, ix, iy; 04296 mp_digit u; 04297 mp_word r; 04298 mp_digit tmpx, *tmpt, *tmpy; 04299 04300 /* can we use the fast multiplier? */ 04301 if (((a->used + b->used + 1) < MP_WARRAY) 04302 && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) { 04303 return fast_s_mp_mul_high_digs (a, b, c, digs); 04304 } 04305 04306 if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) { 04307 return res; 04308 } 04309 t.used = a->used + b->used + 1; 04310 04311 pa = a->used; 04312 pb = b->used; 04313 for (ix = 0; ix < pa; ix++) { 04314 /* clear the carry */ 04315 u = 0; 04316 04317 /* left hand side of A[ix] * B[iy] */ 04318 tmpx = a->dp[ix]; 04319 04320 /* alias to the address of where the digits will be stored */ 04321 tmpt = &(t.dp[digs]); 04322 04323 /* alias for where to read the right hand side from */ 04324 tmpy = b->dp + (digs - ix); 04325 04326 for (iy = digs - ix; iy < pb; iy++) { 04327 /* calculate the double precision result */ 04328 r = ((mp_word)*tmpt) + 04329 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 04330 ((mp_word) u); 04331 04332 /* get the lower part */ 04333 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 04334 04335 /* carry the carry */ 04336 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 04337 } 04338 *tmpt = u; 04339 } 04340 mp_clamp (&t); 04341 mp_exch (&t, c); 04342 mp_clear (&t); 04343 return MP_OKAY; 04344 } 04345 04346 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */ 04347 static int 04348 s_mp_sqr (const mp_int * a, mp_int * b) 04349 { 04350 mp_int t; 04351 int res, ix, iy, pa; 04352 mp_word r; 04353 mp_digit u, tmpx, *tmpt; 04354 04355 pa = a->used; 04356 if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) { 04357 return res; 04358 } 04359 04360 /* default used is maximum possible size */ 04361 t.used = 2*pa + 1; 04362 04363 for (ix = 0; ix < pa; ix++) { 04364 /* first calculate the digit at 2*ix */ 04365 /* calculate double precision result */ 04366 r = ((mp_word) t.dp[2*ix]) + 04367 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]); 04368 04369 /* store lower part in result */ 04370 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK)); 04371 04372 /* get the carry */ 04373 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 04374 04375 /* left hand side of A[ix] * A[iy] */ 04376 tmpx = a->dp[ix]; 04377 04378 /* alias for where to store the results */ 04379 tmpt = t.dp + (2*ix + 1); 04380 04381 for (iy = ix + 1; iy < pa; iy++) { 04382 /* first calculate the product */ 04383 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]); 04384 04385 /* now calculate the double precision result, note we use 04386 * addition instead of *2 since it's easier to optimize 04387 */ 04388 r = ((mp_word) *tmpt) + r + r + ((mp_word) u); 04389 04390 /* store lower part */ 04391 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 04392 04393 /* get carry */ 04394 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 04395 } 04396 /* propagate upwards */ 04397 while (u != 0) { 04398 r = ((mp_word) *tmpt) + ((mp_word) u); 04399 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 04400 u = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 04401 } 04402 } 04403 04404 mp_clamp (&t); 04405 mp_exch (&t, b); 04406 mp_clear (&t); 04407 return MP_OKAY; 04408 } 04409 04410 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */ 04411 int 04412 s_mp_sub (const mp_int * a, const mp_int * b, mp_int * c) 04413 { 04414 int olduse, res, min, max; 04415 04416 /* find sizes */ 04417 min = b->used; 04418 max = a->used; 04419 04420 /* init result */ 04421 if (c->alloc < max) { 04422 if ((res = mp_grow (c, max)) != MP_OKAY) { 04423 return res; 04424 } 04425 } 04426 olduse = c->used; 04427 c->used = max; 04428 04429 { 04430 register mp_digit u, *tmpa, *tmpb, *tmpc; 04431 register int i; 04432 04433 /* alias for digit pointers */ 04434 tmpa = a->dp; 04435 tmpb = b->dp; 04436 tmpc = c->dp; 04437 04438 /* set carry to zero */ 04439 u = 0; 04440 for (i = 0; i < min; i++) { 04441 /* T[i] = A[i] - B[i] - U */ 04442 *tmpc = *tmpa++ - *tmpb++ - u; 04443 04444 /* U = carry bit of T[i] 04445 * Note this saves performing an AND operation since 04446 * if a carry does occur it will propagate all the way to the 04447 * MSB. As a result a single shift is enough to get the carry 04448 */ 04449 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); 04450 04451 /* Clear carry from T[i] */ 04452 *tmpc++ &= MP_MASK; 04453 } 04454 04455 /* now copy higher words if any, e.g. if A has more digits than B */ 04456 for (; i < max; i++) { 04457 /* T[i] = A[i] - U */ 04458 *tmpc = *tmpa++ - u; 04459 04460 /* U = carry bit of T[i] */ 04461 u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1)); 04462 04463 /* Clear carry from T[i] */ 04464 *tmpc++ &= MP_MASK; 04465 } 04466 04467 /* clear digits above used (since we may not have grown result above) */ 04468 for (i = c->used; i < olduse; i++) { 04469 *tmpc++ = 0; 04470 } 04471 } 04472 04473 mp_clamp (c); 04474 return MP_OKAY; 04475 } Generated on Sun May 27 2012 04:26:08 for ReactOS by
1.7.6.1
|