ReactOS  0.4.14-dev-52-g6116262
mpi.c
Go to the documentation of this file.
1 /*
2  * dlls/rsaenh/mpi.c
3  * Multi Precision Integer functions
4  *
5  * Copyright 2004 Michael Jung
6  * Based on public domain code by Tom St Denis (tomstdenis@iahu.ca)
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21  */
22 
23 /*
24  * This file contains code from the LibTomCrypt cryptographic
25  * library written by Tom St Denis (tomstdenis@iahu.ca). LibTomCrypt
26  * is in the public domain. The code in this file is tailored to
27  * special requirements. Take a look at http://libtomcrypt.org for the
28  * original version.
29  */
30 
31 #include <stdarg.h>
32 
33 #include <windef.h>
34 #include <winbase.h>
35 #include "tomcrypt.h"
36 
37 /* Known optimal configurations
38  CPU /Compiler /MUL CUTOFF/SQR CUTOFF
39 -------------------------------------------------------------
40  Intel P4 Northwood /GCC v3.4.1 / 88/ 128/LTM 0.32 ;-)
41 */
42 static const int KARATSUBA_MUL_CUTOFF = 88, /* Min. number of digits before Karatsuba multiplication is used. */
43  KARATSUBA_SQR_CUTOFF = 128; /* Min. number of digits before Karatsuba squaring is used. */
44 
45 
46 /* trim unused digits */
47 static void mp_clamp(mp_int *a);
48 
49 /* compare |a| to |b| */
50 static int mp_cmp_mag(const mp_int *a, const mp_int *b);
51 
52 /* Counts the number of lsbs which are zero before the first zero bit */
53 static int mp_cnt_lsb(const mp_int *a);
54 
55 /* computes a = B**n mod b without division or multiplication useful for
56  * normalizing numbers in a Montgomery system.
57  */
58 static int mp_montgomery_calc_normalization(mp_int *a, const mp_int *b);
59 
60 /* computes x/R == x (mod N) via Montgomery Reduction */
61 static int mp_montgomery_reduce(mp_int *a, const mp_int *m, mp_digit mp);
62 
63 /* setups the montgomery reduction */
64 static int mp_montgomery_setup(const mp_int *a, mp_digit *mp);
65 
66 /* Barrett Reduction, computes a (mod b) with a precomputed value c
67  *
68  * Assumes that 0 < a <= b*b, note if 0 > a > -(b*b) then you can merely
69  * compute the reduction as -1 * mp_reduce(mp_abs(a)) [pseudo code].
70  */
71 static int mp_reduce(mp_int *a, const mp_int *b, const mp_int *c);
72 
73 /* reduces a modulo b where b is of the form 2**p - k [0 <= a] */
74 static int mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d);
75 
76 /* determines k value for 2k reduction */
77 static int mp_reduce_2k_setup(const mp_int *a, mp_digit *d);
78 
79 /* used to setup the Barrett reduction for a given modulus b */
80 static int mp_reduce_setup(mp_int *a, const mp_int *b);
81 
82 /* set to a digit */
83 static void mp_set(mp_int *a, mp_digit b);
84 
85 /* b = a*a */
86 static int mp_sqr(const mp_int *a, mp_int *b);
87 
88 /* c = a * a (mod b) */
89 static int mp_sqrmod(const mp_int *a, mp_int *b, mp_int *c);
90 
91 
92 static void bn_reverse(unsigned char *s, int len);
93 static int s_mp_add(mp_int *a, mp_int *b, mp_int *c);
94 static int s_mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y);
95 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
96 static int s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs);
97 static int s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs);
98 static int s_mp_sqr(const mp_int *a, mp_int *b);
99 static int s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c);
100 static int mp_exptmod_fast(const mp_int *G, const mp_int *X, mp_int *P, mp_int *Y, int mode);
101 static int mp_invmod_slow (const mp_int * a, mp_int * b, mp_int * c);
102 static int mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c);
103 static int mp_karatsuba_sqr(const mp_int *a, mp_int *b);
104 
105 /* grow as required */
106 static int mp_grow (mp_int * a, int size)
107 {
108  int i;
109  mp_digit *tmp;
110 
111  /* if the alloc size is smaller alloc more ram */
112  if (a->alloc < size) {
113  /* ensure there are always at least MP_PREC digits extra on top */
114  size += (MP_PREC * 2) - (size % MP_PREC);
115 
116  /* reallocate the array a->dp
117  *
118  * We store the return in a temporary variable
119  * in case the operation failed we don't want
120  * to overwrite the dp member of a.
121  */
122  tmp = HeapReAlloc(GetProcessHeap(), 0, a->dp, sizeof (mp_digit) * size);
123  if (tmp == NULL) {
124  /* reallocation failed but "a" is still valid [can be freed] */
125  return MP_MEM;
126  }
127 
128  /* reallocation succeeded so set a->dp */
129  a->dp = tmp;
130 
131  /* zero excess digits */
132  i = a->alloc;
133  a->alloc = size;
134  for (; i < a->alloc; i++) {
135  a->dp[i] = 0;
136  }
137  }
138  return MP_OKAY;
139 }
140 
141 /* b = a/2 */
142 static int mp_div_2(const mp_int * a, mp_int * b)
143 {
144  int x, res, oldused;
145 
146  /* copy */
147  if (b->alloc < a->used) {
148  if ((res = mp_grow (b, a->used)) != MP_OKAY) {
149  return res;
150  }
151  }
152 
153  oldused = b->used;
154  b->used = a->used;
155  {
156  register mp_digit r, rr, *tmpa, *tmpb;
157 
158  /* source alias */
159  tmpa = a->dp + b->used - 1;
160 
161  /* dest alias */
162  tmpb = b->dp + b->used - 1;
163 
164  /* carry */
165  r = 0;
166  for (x = b->used - 1; x >= 0; x--) {
167  /* get the carry for the next iteration */
168  rr = *tmpa & 1;
169 
170  /* shift the current digit, add in carry and store */
171  *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
172 
173  /* forward carry to next iteration */
174  r = rr;
175  }
176 
177  /* zero excess digits */
178  tmpb = b->dp + b->used;
179  for (x = b->used; x < oldused; x++) {
180  *tmpb++ = 0;
181  }
182  }
183  b->sign = a->sign;
184  mp_clamp (b);
185  return MP_OKAY;
186 }
187 
188 /* swap the elements of two integers, for cases where you can't simply swap the
189  * mp_int pointers around
190  */
191 static void
193 {
194  mp_int t;
195 
196  t = *a;
197  *a = *b;
198  *b = t;
199 }
200 
201 /* init a new mp_int */
202 static int mp_init (mp_int * a)
203 {
204  int i;
205 
206  /* allocate memory required and clear it */
207  a->dp = HeapAlloc(GetProcessHeap(), 0, sizeof (mp_digit) * MP_PREC);
208  if (a->dp == NULL) {
209  return MP_MEM;
210  }
211 
212  /* set the digits to zero */
213  for (i = 0; i < MP_PREC; i++) {
214  a->dp[i] = 0;
215  }
216 
217  /* set the used to zero, allocated digits to the default precision
218  * and sign to positive */
219  a->used = 0;
220  a->alloc = MP_PREC;
221  a->sign = MP_ZPOS;
222 
223  return MP_OKAY;
224 }
225 
226 /* init an mp_init for a given size */
227 static int mp_init_size (mp_int * a, int size)
228 {
229  int x;
230 
231  /* pad size so there are always extra digits */
232  size += (MP_PREC * 2) - (size % MP_PREC);
233 
234  /* alloc mem */
235  a->dp = HeapAlloc(GetProcessHeap(), 0, sizeof (mp_digit) * size);
236  if (a->dp == NULL) {
237  return MP_MEM;
238  }
239 
240  /* set the members */
241  a->used = 0;
242  a->alloc = size;
243  a->sign = MP_ZPOS;
244 
245  /* zero the digits */
246  for (x = 0; x < size; x++) {
247  a->dp[x] = 0;
248  }
249 
250  return MP_OKAY;
251 }
252 
253 /* clear one (frees) */
254 static void
256 {
257  int i;
258 
259  /* only do anything if a hasn't been freed previously */
260  if (a->dp != NULL) {
261  /* first zero the digits */
262  for (i = 0; i < a->used; i++) {
263  a->dp[i] = 0;
264  }
265 
266  /* free ram */
267  HeapFree(GetProcessHeap(), 0, a->dp);
268 
269  /* reset members to make debugging easier */
270  a->dp = NULL;
271  a->alloc = a->used = 0;
272  a->sign = MP_ZPOS;
273  }
274 }
275 
276 /* set to zero */
277 static void
279 {
280  a->sign = MP_ZPOS;
281  a->used = 0;
282  memset (a->dp, 0, sizeof (mp_digit) * a->alloc);
283 }
284 
285 /* b = |a|
286  *
287  * Simple function copies the input and fixes the sign to positive
288  */
289 static int
290 mp_abs (const mp_int * a, mp_int * b)
291 {
292  int res;
293 
294  /* copy a to b */
295  if (a != b) {
296  if ((res = mp_copy (a, b)) != MP_OKAY) {
297  return res;
298  }
299  }
300 
301  /* force the sign of b to positive */
302  b->sign = MP_ZPOS;
303 
304  return MP_OKAY;
305 }
306 
307 /* computes the modular inverse via binary extended euclidean algorithm,
308  * that is c = 1/a mod b
309  *
310  * Based on slow invmod except this is optimized for the case where b is
311  * odd as per HAC Note 14.64 on pp. 610
312  */
313 static int
315 {
316  mp_int x, y, u, v, B, D;
317  int res, neg;
318 
319  /* 2. [modified] b must be odd */
320  if (mp_iseven (b) == 1) {
321  return MP_VAL;
322  }
323 
324  /* init all our temps */
325  if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
326  return res;
327  }
328 
329  /* x == modulus, y == value to invert */
330  if ((res = mp_copy (b, &x)) != MP_OKAY) {
331  goto __ERR;
332  }
333 
334  /* we need y = |a| */
335  if ((res = mp_abs (a, &y)) != MP_OKAY) {
336  goto __ERR;
337  }
338 
339  /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
340  if ((res = mp_copy (&x, &u)) != MP_OKAY) {
341  goto __ERR;
342  }
343  if ((res = mp_copy (&y, &v)) != MP_OKAY) {
344  goto __ERR;
345  }
346  mp_set (&D, 1);
347 
348 top:
349  /* 4. while u is even do */
350  while (mp_iseven (&u) == 1) {
351  /* 4.1 u = u/2 */
352  if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
353  goto __ERR;
354  }
355  /* 4.2 if B is odd then */
356  if (mp_isodd (&B) == 1) {
357  if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
358  goto __ERR;
359  }
360  }
361  /* B = B/2 */
362  if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
363  goto __ERR;
364  }
365  }
366 
367  /* 5. while v is even do */
368  while (mp_iseven (&v) == 1) {
369  /* 5.1 v = v/2 */
370  if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
371  goto __ERR;
372  }
373  /* 5.2 if D is odd then */
374  if (mp_isodd (&D) == 1) {
375  /* D = (D-x)/2 */
376  if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
377  goto __ERR;
378  }
379  }
380  /* D = D/2 */
381  if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
382  goto __ERR;
383  }
384  }
385 
386  /* 6. if u >= v then */
387  if (mp_cmp (&u, &v) != MP_LT) {
388  /* u = u - v, B = B - D */
389  if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
390  goto __ERR;
391  }
392 
393  if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
394  goto __ERR;
395  }
396  } else {
397  /* v - v - u, D = D - B */
398  if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
399  goto __ERR;
400  }
401 
402  if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
403  goto __ERR;
404  }
405  }
406 
407  /* if not zero goto step 4 */
408  if (mp_iszero (&u) == 0) {
409  goto top;
410  }
411 
412  /* now a = C, b = D, gcd == g*v */
413 
414  /* if v != 1 then there is no inverse */
415  if (mp_cmp_d (&v, 1) != MP_EQ) {
416  res = MP_VAL;
417  goto __ERR;
418  }
419 
420  /* b is now the inverse */
421  neg = a->sign;
422  while (D.sign == MP_NEG) {
423  if ((res = mp_add (&D, b, &D)) != MP_OKAY) {
424  goto __ERR;
425  }
426  }
427  mp_exch (&D, c);
428  c->sign = neg;
429  res = MP_OKAY;
430 
431 __ERR:mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
432  return res;
433 }
434 
435 /* computes xR**-1 == x (mod N) via Montgomery Reduction
436  *
437  * This is an optimized implementation of montgomery_reduce
438  * which uses the comba method to quickly calculate the columns of the
439  * reduction.
440  *
441  * Based on Algorithm 14.32 on pp.601 of HAC.
442 */
443 static int
445 {
446  int ix, res, olduse;
448 
449  /* get old used count */
450  olduse = x->used;
451 
452  /* grow a as required */
453  if (x->alloc < n->used + 1) {
454  if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
455  return res;
456  }
457  }
458 
459  /* first we have to get the digits of the input into
460  * an array of double precision words W[...]
461  */
462  {
463  register mp_word *_W;
464  register mp_digit *tmpx;
465 
466  /* alias for the W[] array */
467  _W = W;
468 
469  /* alias for the digits of x*/
470  tmpx = x->dp;
471 
472  /* copy the digits of a into W[0..a->used-1] */
473  for (ix = 0; ix < x->used; ix++) {
474  *_W++ = *tmpx++;
475  }
476 
477  /* zero the high words of W[a->used..m->used*2] */
478  for (; ix < n->used * 2 + 1; ix++) {
479  *_W++ = 0;
480  }
481  }
482 
483  /* now we proceed to zero successive digits
484  * from the least significant upwards
485  */
486  for (ix = 0; ix < n->used; ix++) {
487  /* mu = ai * m' mod b
488  *
489  * We avoid a double precision multiplication (which isn't required)
490  * by casting the value down to a mp_digit. Note this requires
491  * that W[ix-1] have the carry cleared (see after the inner loop)
492  */
493  register mp_digit mu;
494  mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
495 
496  /* a = a + mu * m * b**i
497  *
498  * This is computed in place and on the fly. The multiplication
499  * by b**i is handled by offsetting which columns the results
500  * are added to.
501  *
502  * Note the comba method normally doesn't handle carries in the
503  * inner loop In this case we fix the carry from the previous
504  * column since the Montgomery reduction requires digits of the
505  * result (so far) [see above] to work. This is
506  * handled by fixing up one carry after the inner loop. The
507  * carry fixups are done in order so after these loops the
508  * first m->used words of W[] have the carries fixed
509  */
510  {
511  register int iy;
512  register mp_digit *tmpn;
513  register mp_word *_W;
514 
515  /* alias for the digits of the modulus */
516  tmpn = n->dp;
517 
518  /* Alias for the columns set by an offset of ix */
519  _W = W + ix;
520 
521  /* inner loop */
522  for (iy = 0; iy < n->used; iy++) {
523  *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
524  }
525  }
526 
527  /* now fix carry for next digit, W[ix+1] */
528  W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
529  }
530 
531  /* now we have to propagate the carries and
532  * shift the words downward [all those least
533  * significant digits we zeroed].
534  */
535  {
536  register mp_digit *tmpx;
537  register mp_word *_W, *_W1;
538 
539  /* nox fix rest of carries */
540 
541  /* alias for current word */
542  _W1 = W + ix;
543 
544  /* alias for next word, where the carry goes */
545  _W = W + ++ix;
546 
547  for (; ix <= n->used * 2 + 1; ix++) {
548  *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
549  }
550 
551  /* copy out, A = A/b**n
552  *
553  * The result is A/b**n but instead of converting from an
554  * array of mp_word to mp_digit than calling mp_rshd
555  * we just copy them in the right order
556  */
557 
558  /* alias for destination word */
559  tmpx = x->dp;
560 
561  /* alias for shifted double precision result */
562  _W = W + n->used;
563 
564  for (ix = 0; ix < n->used + 1; ix++) {
565  *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
566  }
567 
568  /* zero oldused digits, if the input a was larger than
569  * m->used+1 we'll have to clear the digits
570  */
571  for (; ix < olduse; ix++) {
572  *tmpx++ = 0;
573  }
574  }
575 
576  /* set the max used and clamp */
577  x->used = n->used + 1;
578  mp_clamp (x);
579 
580  /* if A >= m then A = A - m */
581  if (mp_cmp_mag (x, n) != MP_LT) {
582  return s_mp_sub (x, n, x);
583  }
584  return MP_OKAY;
585 }
586 
587 /* Fast (comba) multiplier
588  *
589  * This is the fast column-array [comba] multiplier. It is
590  * designed to compute the columns of the product first
591  * then handle the carries afterwards. This has the effect
592  * of making the nested loops that compute the columns very
593  * simple and schedulable on super-scalar processors.
594  *
595  * This has been modified to produce a variable number of
596  * digits of output so if say only a half-product is required
597  * you don't have to compute the upper half (a feature
598  * required for fast Barrett reduction).
599  *
600  * Based on Algorithm 14.12 on pp.595 of HAC.
601  *
602  */
603 static int
604 fast_s_mp_mul_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
605 {
606  int olduse, res, pa, ix, iz;
608  register mp_word _W;
609 
610  /* grow the destination as required */
611  if (c->alloc < digs) {
612  if ((res = mp_grow (c, digs)) != MP_OKAY) {
613  return res;
614  }
615  }
616 
617  /* number of output digits to produce */
618  pa = MIN(digs, a->used + b->used);
619 
620  /* clear the carry */
621  _W = 0;
622  for (ix = 0; ix <= pa; ix++) {
623  int tx, ty;
624  int iy;
625  mp_digit *tmpx, *tmpy;
626 
627  /* get offsets into the two bignums */
628  ty = MIN(b->used-1, ix);
629  tx = ix - ty;
630 
631  /* setup temp aliases */
632  tmpx = a->dp + tx;
633  tmpy = b->dp + ty;
634 
635  /* This is the number of times the loop will iterate, essentially it's
636  while (tx++ < a->used && ty-- >= 0) { ... }
637  */
638  iy = MIN(a->used-tx, ty+1);
639 
640  /* execute loop */
641  for (iz = 0; iz < iy; ++iz) {
642  _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
643  }
644 
645  /* store term */
646  W[ix] = ((mp_digit)_W) & MP_MASK;
647 
648  /* make next carry */
649  _W = _W >> ((mp_word)DIGIT_BIT);
650  }
651 
652  /* setup dest */
653  olduse = c->used;
654  c->used = digs;
655 
656  {
657  register mp_digit *tmpc;
658  tmpc = c->dp;
659  for (ix = 0; ix < digs; ix++) {
660  /* now extract the previous digit [below the carry] */
661  *tmpc++ = W[ix];
662  }
663 
664  /* clear unused digits [that existed in the old copy of c] */
665  for (; ix < olduse; ix++) {
666  *tmpc++ = 0;
667  }
668  }
669  mp_clamp (c);
670  return MP_OKAY;
671 }
672 
673 /* this is a modified version of fast_s_mul_digs that only produces
674  * output digits *above* digs. See the comments for fast_s_mul_digs
675  * to see how it works.
676  *
677  * This is used in the Barrett reduction since for one of the multiplications
678  * only the higher digits were needed. This essentially halves the work.
679  *
680  * Based on Algorithm 14.12 on pp.595 of HAC.
681  */
682 static int
683 fast_s_mp_mul_high_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
684 {
685  int olduse, res, pa, ix, iz;
687  mp_word _W;
688 
689  /* grow the destination as required */
690  pa = a->used + b->used;
691  if (c->alloc < pa) {
692  if ((res = mp_grow (c, pa)) != MP_OKAY) {
693  return res;
694  }
695  }
696 
697  /* number of output digits to produce */
698  pa = a->used + b->used;
699  _W = 0;
700  for (ix = digs; ix <= pa; ix++) {
701  int tx, ty, iy;
702  mp_digit *tmpx, *tmpy;
703 
704  /* get offsets into the two bignums */
705  ty = MIN(b->used-1, ix);
706  tx = ix - ty;
707 
708  /* setup temp aliases */
709  tmpx = a->dp + tx;
710  tmpy = b->dp + ty;
711 
712  /* This is the number of times the loop will iterate, essentially it's
713  while (tx++ < a->used && ty-- >= 0) { ... }
714  */
715  iy = MIN(a->used-tx, ty+1);
716 
717  /* execute loop */
718  for (iz = 0; iz < iy; iz++) {
719  _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
720  }
721 
722  /* store term */
723  W[ix] = ((mp_digit)_W) & MP_MASK;
724 
725  /* make next carry */
726  _W = _W >> ((mp_word)DIGIT_BIT);
727  }
728 
729  /* setup dest */
730  olduse = c->used;
731  c->used = pa;
732 
733  {
734  register mp_digit *tmpc;
735 
736  tmpc = c->dp + digs;
737  for (ix = digs; ix <= pa; ix++) {
738  /* now extract the previous digit [below the carry] */
739  *tmpc++ = W[ix];
740  }
741 
742  /* clear unused digits [that existed in the old copy of c] */
743  for (; ix < olduse; ix++) {
744  *tmpc++ = 0;
745  }
746  }
747  mp_clamp (c);
748  return MP_OKAY;
749 }
750 
751 /* fast squaring
752  *
753  * This is the comba method where the columns of the product
754  * are computed first then the carries are computed. This
755  * has the effect of making a very simple inner loop that
756  * is executed the most
757  *
758  * W2 represents the outer products and W the inner.
759  *
760  * A further optimizations is made because the inner
761  * products are of the form "A * B * 2". The *2 part does
762  * not need to be computed until the end which is good
763  * because 64-bit shifts are slow!
764  *
765  * Based on Algorithm 14.16 on pp.597 of HAC.
766  *
767  */
768 /* the jist of squaring...
769 
770 you do like mult except the offset of the tmpx [one that starts closer to zero]
771 can't equal the offset of tmpy. So basically you set up iy like before then you min it with
772 (ty-tx) so that it never happens. You double all those you add in the inner loop
773 
774 After that loop you do the squares and add them in.
775 
776 Remove W2 and don't memset W
777 
778 */
779 
780 static int fast_s_mp_sqr (const mp_int * a, mp_int * b)
781 {
782  int olduse, res, pa, ix, iz;
783  mp_digit W[MP_WARRAY], *tmpx;
784  mp_word W1;
785 
786  /* grow the destination as required */
787  pa = a->used + a->used;
788  if (b->alloc < pa) {
789  if ((res = mp_grow (b, pa)) != MP_OKAY) {
790  return res;
791  }
792  }
793 
794  /* number of output digits to produce */
795  W1 = 0;
796  for (ix = 0; ix <= pa; ix++) {
797  int tx, ty, iy;
798  mp_word _W;
799  mp_digit *tmpy;
800 
801  /* clear counter */
802  _W = 0;
803 
804  /* get offsets into the two bignums */
805  ty = MIN(a->used-1, ix);
806  tx = ix - ty;
807 
808  /* setup temp aliases */
809  tmpx = a->dp + tx;
810  tmpy = a->dp + ty;
811 
812  /* This is the number of times the loop will iterate, essentially it's
813  while (tx++ < a->used && ty-- >= 0) { ... }
814  */
815  iy = MIN(a->used-tx, ty+1);
816 
817  /* now for squaring tx can never equal ty
818  * we halve the distance since they approach at a rate of 2x
819  * and we have to round because odd cases need to be executed
820  */
821  iy = MIN(iy, (ty-tx+1)>>1);
822 
823  /* execute loop */
824  for (iz = 0; iz < iy; iz++) {
825  _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
826  }
827 
828  /* double the inner product and add carry */
829  _W = _W + _W + W1;
830 
831  /* even columns have the square term in them */
832  if ((ix&1) == 0) {
833  _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
834  }
835 
836  /* store it */
837  W[ix] = _W;
838 
839  /* make next carry */
840  W1 = _W >> ((mp_word)DIGIT_BIT);
841  }
842 
843  /* setup dest */
844  olduse = b->used;
845  b->used = a->used+a->used;
846 
847  {
848  mp_digit *tmpb;
849  tmpb = b->dp;
850  for (ix = 0; ix < pa; ix++) {
851  *tmpb++ = W[ix] & MP_MASK;
852  }
853 
854  /* clear unused digits [that existed in the old copy of c] */
855  for (; ix < olduse; ix++) {
856  *tmpb++ = 0;
857  }
858  }
859  mp_clamp (b);
860  return MP_OKAY;
861 }
862 
863 /* computes a = 2**b
864  *
865  * Simple algorithm which zeroes the int, grows it then just sets one bit
866  * as required.
867  */
868 static int
869 mp_2expt (mp_int * a, int b)
870 {
871  int res;
872 
873  /* zero a as per default */
874  mp_zero (a);
875 
876  /* grow a to accommodate the single bit */
877  if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
878  return res;
879  }
880 
881  /* set the used count of where the bit will go */
882  a->used = b / DIGIT_BIT + 1;
883 
884  /* put the single bit in its place */
885  a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
886 
887  return MP_OKAY;
888 }
889 
890 /* high level addition (handles signs) */
891 int mp_add (mp_int * a, mp_int * b, mp_int * c)
892 {
893  int sa, sb, res;
894 
895  /* get sign of both inputs */
896  sa = a->sign;
897  sb = b->sign;
898 
899  /* handle two cases, not four */
900  if (sa == sb) {
901  /* both positive or both negative */
902  /* add their magnitudes, copy the sign */
903  c->sign = sa;
904  res = s_mp_add (a, b, c);
905  } else {
906  /* one positive, the other negative */
907  /* subtract the one with the greater magnitude from */
908  /* the one of the lesser magnitude. The result gets */
909  /* the sign of the one with the greater magnitude. */
910  if (mp_cmp_mag (a, b) == MP_LT) {
911  c->sign = sb;
912  res = s_mp_sub (b, a, c);
913  } else {
914  c->sign = sa;
915  res = s_mp_sub (a, b, c);
916  }
917  }
918  return res;
919 }
920 
921 
922 /* single digit addition */
923 static int
925 {
926  int res, ix, oldused;
927  mp_digit *tmpa, *tmpc, mu;
928 
929  /* grow c as required */
930  if (c->alloc < a->used + 1) {
931  if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
932  return res;
933  }
934  }
935 
936  /* if a is negative and |a| >= b, call c = |a| - b */
937  if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
938  /* temporarily fix sign of a */
939  a->sign = MP_ZPOS;
940 
941  /* c = |a| - b */
942  res = mp_sub_d(a, b, c);
943 
944  /* fix sign */
945  a->sign = c->sign = MP_NEG;
946 
947  return res;
948  }
949 
950  /* old number of used digits in c */
951  oldused = c->used;
952 
953  /* sign always positive */
954  c->sign = MP_ZPOS;
955 
956  /* source alias */
957  tmpa = a->dp;
958 
959  /* destination alias */
960  tmpc = c->dp;
961 
962  /* if a is positive */
963  if (a->sign == MP_ZPOS) {
964  /* add digit, after this we're propagating
965  * the carry.
966  */
967  *tmpc = *tmpa++ + b;
968  mu = *tmpc >> DIGIT_BIT;
969  *tmpc++ &= MP_MASK;
970 
971  /* now handle rest of the digits */
972  for (ix = 1; ix < a->used; ix++) {
973  *tmpc = *tmpa++ + mu;
974  mu = *tmpc >> DIGIT_BIT;
975  *tmpc++ &= MP_MASK;
976  }
977  /* set final carry */
978  ix++;
979  *tmpc++ = mu;
980 
981  /* setup size */
982  c->used = a->used + 1;
983  } else {
984  /* a was negative and |a| < b */
985  c->used = 1;
986 
987  /* the result is a single digit */
988  if (a->used == 1) {
989  *tmpc++ = b - a->dp[0];
990  } else {
991  *tmpc++ = b;
992  }
993 
994  /* setup count so the clearing of oldused
995  * can fall through correctly
996  */
997  ix = 1;
998  }
999 
1000  /* now zero to oldused */
1001  while (ix++ < oldused) {
1002  *tmpc++ = 0;
1003  }
1004  mp_clamp(c);
1005 
1006  return MP_OKAY;
1007 }
1008 
1009 /* trim unused digits
1010  *
1011  * This is used to ensure that leading zero digits are
1012  * trimed and the leading "used" digit will be non-zero
1013  * Typically very fast. Also fixes the sign if there
1014  * are no more leading digits
1015  */
1016 void
1018 {
1019  /* decrease used while the most significant digit is
1020  * zero.
1021  */
1022  while (a->used > 0 && a->dp[a->used - 1] == 0) {
1023  --(a->used);
1024  }
1025 
1026  /* reset the sign flag if used == 0 */
1027  if (a->used == 0) {
1028  a->sign = MP_ZPOS;
1029  }
1030 }
1031 
1032 void mp_clear_multi(mp_int *mp, ...)
1033 {
1034  mp_int* next_mp = mp;
1035  va_list args;
1036  va_start(args, mp);
1037  while (next_mp != NULL) {
1038  mp_clear(next_mp);
1039  next_mp = va_arg(args, mp_int*);
1040  }
1041  va_end(args);
1042 }
1043 
1044 /* compare two ints (signed)*/
1045 int
1046 mp_cmp (const mp_int * a, const mp_int * b)
1047 {
1048  /* compare based on sign */
1049  if (a->sign != b->sign) {
1050  if (a->sign == MP_NEG) {
1051  return MP_LT;
1052  } else {
1053  return MP_GT;
1054  }
1055  }
1056 
1057  /* compare digits */
1058  if (a->sign == MP_NEG) {
1059  /* if negative compare opposite direction */
1060  return mp_cmp_mag(b, a);
1061  } else {
1062  return mp_cmp_mag(a, b);
1063  }
1064 }
1065 
1066 /* compare a digit */
1067 int mp_cmp_d(const mp_int * a, mp_digit b)
1068 {
1069  /* compare based on sign */
1070  if (a->sign == MP_NEG) {
1071  return MP_LT;
1072  }
1073 
1074  /* compare based on magnitude */
1075  if (a->used > 1) {
1076  return MP_GT;
1077  }
1078 
1079  /* compare the only digit of a to b */
1080  if (a->dp[0] > b) {
1081  return MP_GT;
1082  } else if (a->dp[0] < b) {
1083  return MP_LT;
1084  } else {
1085  return MP_EQ;
1086  }
1087 }
1088 
1089 /* compare maginitude of two ints (unsigned) */
1090 int mp_cmp_mag (const mp_int * a, const mp_int * b)
1091 {
1092  int n;
1093  mp_digit *tmpa, *tmpb;
1094 
1095  /* compare based on # of non-zero digits */
1096  if (a->used > b->used) {
1097  return MP_GT;
1098  }
1099 
1100  if (a->used < b->used) {
1101  return MP_LT;
1102  }
1103 
1104  /* alias for a */
1105  tmpa = a->dp + (a->used - 1);
1106 
1107  /* alias for b */
1108  tmpb = b->dp + (a->used - 1);
1109 
1110  /* compare based on digits */
1111  for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
1112  if (*tmpa > *tmpb) {
1113  return MP_GT;
1114  }
1115 
1116  if (*tmpa < *tmpb) {
1117  return MP_LT;
1118  }
1119  }
1120  return MP_EQ;
1121 }
1122 
1123 static const int lnz[16] = {
1124  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
1125 };
1126 
1127 /* Counts the number of lsbs which are zero before the first zero bit */
1128 int mp_cnt_lsb(const mp_int *a)
1129 {
1130  int x;
1131  mp_digit q, qq;
1132 
1133  /* easy out */
1134  if (mp_iszero(a) == 1) {
1135  return 0;
1136  }
1137 
1138  /* scan lower digits until non-zero */
1139  for (x = 0; x < a->used && a->dp[x] == 0; x++);
1140  q = a->dp[x];
1141  x *= DIGIT_BIT;
1142 
1143  /* now scan this digit until a 1 is found */
1144  if ((q & 1) == 0) {
1145  do {
1146  qq = q & 15;
1147  x += lnz[qq];
1148  q >>= 4;
1149  } while (qq == 0);
1150  }
1151  return x;
1152 }
1153 
1154 /* copy, b = a */
1155 int
1156 mp_copy (const mp_int * a, mp_int * b)
1157 {
1158  int res, n;
1159 
1160  /* if dst == src do nothing */
1161  if (a == b) {
1162  return MP_OKAY;
1163  }
1164 
1165  /* grow dest */
1166  if (b->alloc < a->used) {
1167  if ((res = mp_grow (b, a->used)) != MP_OKAY) {
1168  return res;
1169  }
1170  }
1171 
1172  /* zero b and copy the parameters over */
1173  {
1174  register mp_digit *tmpa, *tmpb;
1175 
1176  /* pointer aliases */
1177 
1178  /* source */
1179  tmpa = a->dp;
1180 
1181  /* destination */
1182  tmpb = b->dp;
1183 
1184  /* copy all the digits */
1185  for (n = 0; n < a->used; n++) {
1186  *tmpb++ = *tmpa++;
1187  }
1188 
1189  /* clear high digits */
1190  for (; n < b->used; n++) {
1191  *tmpb++ = 0;
1192  }
1193  }
1194 
1195  /* copy used count and sign */
1196  b->used = a->used;
1197  b->sign = a->sign;
1198  return MP_OKAY;
1199 }
1200 
1201 /* returns the number of bits in an int */
1202 int
1204 {
1205  int r;
1206  mp_digit q;
1207 
1208  /* shortcut */
1209  if (a->used == 0) {
1210  return 0;
1211  }
1212 
1213  /* get number of digits and add that */
1214  r = (a->used - 1) * DIGIT_BIT;
1215 
1216  /* take the last digit and count the bits in it */
1217  q = a->dp[a->used - 1];
1218  while (q > 0) {
1219  ++r;
1220  q >>= ((mp_digit) 1);
1221  }
1222  return r;
1223 }
1224 
1225 /* calc a value mod 2**b */
1226 static int
1227 mp_mod_2d (const mp_int * a, int b, mp_int * c)
1228 {
1229  int x, res;
1230 
1231  /* if b is <= 0 then zero the int */
1232  if (b <= 0) {
1233  mp_zero (c);
1234  return MP_OKAY;
1235  }
1236 
1237  /* if the modulus is larger than the value than return */
1238  if (b > a->used * DIGIT_BIT) {
1239  res = mp_copy (a, c);
1240  return res;
1241  }
1242 
1243  /* copy */
1244  if ((res = mp_copy (a, c)) != MP_OKAY) {
1245  return res;
1246  }
1247 
1248  /* zero digits above the last digit of the modulus */
1249  for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
1250  c->dp[x] = 0;
1251  }
1252  /* clear the digit that is not completely outside/inside the modulus */
1253  c->dp[b / DIGIT_BIT] &= (1 << ((mp_digit)b % DIGIT_BIT)) - 1;
1254  mp_clamp (c);
1255  return MP_OKAY;
1256 }
1257 
1258 /* shift right a certain amount of digits */
1259 static void mp_rshd (mp_int * a, int b)
1260 {
1261  int x;
1262 
1263  /* if b <= 0 then ignore it */
1264  if (b <= 0) {
1265  return;
1266  }
1267 
1268  /* if b > used then simply zero it and return */
1269  if (a->used <= b) {
1270  mp_zero (a);
1271  return;
1272  }
1273 
1274  {
1275  register mp_digit *bottom, *top;
1276 
1277  /* shift the digits down */
1278 
1279  /* bottom */
1280  bottom = a->dp;
1281 
1282  /* top [offset into digits] */
1283  top = a->dp + b;
1284 
1285  /* this is implemented as a sliding window where
1286  * the window is b-digits long and digits from
1287  * the top of the window are copied to the bottom
1288  *
1289  * e.g.
1290 
1291  b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
1292  /\ | ---->
1293  \-------------------/ ---->
1294  */
1295  for (x = 0; x < (a->used - b); x++) {
1296  *bottom++ = *top++;
1297  }
1298 
1299  /* zero the top digits */
1300  for (; x < a->used; x++) {
1301  *bottom++ = 0;
1302  }
1303  }
1304 
1305  /* remove excess digits */
1306  a->used -= b;
1307 }
1308 
1309 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
1310 static int mp_div_2d (const mp_int * a, int b, mp_int * c, mp_int * d)
1311 {
1312  mp_digit D, r, rr;
1313  int x, res;
1314  mp_int t;
1315 
1316 
1317  /* if the shift count is <= 0 then we do no work */
1318  if (b <= 0) {
1319  res = mp_copy (a, c);
1320  if (d != NULL) {
1321  mp_zero (d);
1322  }
1323  return res;
1324  }
1325 
1326  if ((res = mp_init (&t)) != MP_OKAY) {
1327  return res;
1328  }
1329 
1330  /* get the remainder */
1331  if (d != NULL) {
1332  if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
1333  mp_clear (&t);
1334  return res;
1335  }
1336  }
1337 
1338  /* copy */
1339  if ((res = mp_copy (a, c)) != MP_OKAY) {
1340  mp_clear (&t);
1341  return res;
1342  }
1343 
1344  /* shift by as many digits in the bit count */
1345  if (b >= DIGIT_BIT) {
1346  mp_rshd (c, b / DIGIT_BIT);
1347  }
1348 
1349  /* shift any bit count < DIGIT_BIT */
1350  D = (mp_digit) (b % DIGIT_BIT);
1351  if (D != 0) {
1352  register mp_digit *tmpc, mask, shift;
1353 
1354  /* mask */
1355  mask = (((mp_digit)1) << D) - 1;
1356 
1357  /* shift for lsb */
1358  shift = DIGIT_BIT - D;
1359 
1360  /* alias */
1361  tmpc = c->dp + (c->used - 1);
1362 
1363  /* carry */
1364  r = 0;
1365  for (x = c->used - 1; x >= 0; x--) {
1366  /* get the lower bits of this word in a temp */
1367  rr = *tmpc & mask;
1368 
1369  /* shift the current word and mix in the carry bits from the previous word */
1370  *tmpc = (*tmpc >> D) | (r << shift);
1371  --tmpc;
1372 
1373  /* set the carry to the carry bits of the current word found above */
1374  r = rr;
1375  }
1376  }
1377  mp_clamp (c);
1378  if (d != NULL) {
1379  mp_exch (&t, d);
1380  }
1381  mp_clear (&t);
1382  return MP_OKAY;
1383 }
1384 
1385 /* shift left a certain amount of digits */
1386 static int mp_lshd (mp_int * a, int b)
1387 {
1388  int x, res;
1389 
1390  /* if it's less than zero return */
1391  if (b <= 0) {
1392  return MP_OKAY;
1393  }
1394 
1395  /* grow to fit the new digits */
1396  if (a->alloc < a->used + b) {
1397  if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
1398  return res;
1399  }
1400  }
1401 
1402  {
1403  register mp_digit *top, *bottom;
1404 
1405  /* increment the used by the shift amount then copy upwards */
1406  a->used += b;
1407 
1408  /* top */
1409  top = a->dp + a->used - 1;
1410 
1411  /* base */
1412  bottom = a->dp + a->used - 1 - b;
1413 
1414  /* much like mp_rshd this is implemented using a sliding window
1415  * except the window goes the other way around. Copying from
1416  * the bottom to the top. see bn_mp_rshd.c for more info.
1417  */
1418  for (x = a->used - 1; x >= b; x--) {
1419  *top-- = *bottom--;
1420  }
1421 
1422  /* zero the lower digits */
1423  top = a->dp;
1424  for (x = 0; x < b; x++) {
1425  *top++ = 0;
1426  }
1427  }
1428  return MP_OKAY;
1429 }
1430 
1431 /* shift left by a certain bit count */
1432 static int mp_mul_2d (const mp_int * a, int b, mp_int * c)
1433 {
1434  mp_digit d;
1435  int res;
1436 
1437  /* copy */
1438  if (a != c) {
1439  if ((res = mp_copy (a, c)) != MP_OKAY) {
1440  return res;
1441  }
1442  }
1443 
1444  if (c->alloc < c->used + b/DIGIT_BIT + 1) {
1445  if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
1446  return res;
1447  }
1448  }
1449 
1450  /* shift by as many digits in the bit count */
1451  if (b >= DIGIT_BIT) {
1452  if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
1453  return res;
1454  }
1455  }
1456 
1457  /* shift any bit count < DIGIT_BIT */
1458  d = (mp_digit) (b % DIGIT_BIT);
1459  if (d != 0) {
1460  register mp_digit *tmpc, shift, mask, r, rr;
1461  register int x;
1462 
1463  /* bitmask for carries */
1464  mask = (((mp_digit)1) << d) - 1;
1465 
1466  /* shift for msbs */
1467  shift = DIGIT_BIT - d;
1468 
1469  /* alias */
1470  tmpc = c->dp;
1471 
1472  /* carry */
1473  r = 0;
1474  for (x = 0; x < c->used; x++) {
1475  /* get the higher bits of the current word */
1476  rr = (*tmpc >> shift) & mask;
1477 
1478  /* shift the current word and OR in the carry */
1479  *tmpc = ((*tmpc << d) | r) & MP_MASK;
1480  ++tmpc;
1481 
1482  /* set the carry to the carry bits of the current word */
1483  r = rr;
1484  }
1485 
1486  /* set final carry */
1487  if (r != 0) {
1488  c->dp[(c->used)++] = r;
1489  }
1490  }
1491  mp_clamp (c);
1492  return MP_OKAY;
1493 }
1494 
1495 /* multiply by a digit */
1496 static int
1498 {
1499  mp_digit u, *tmpa, *tmpc;
1500  mp_word r;
1501  int ix, res, olduse;
1502 
1503  /* make sure c is big enough to hold a*b */
1504  if (c->alloc < a->used + 1) {
1505  if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
1506  return res;
1507  }
1508  }
1509 
1510  /* get the original destinations used count */
1511  olduse = c->used;
1512 
1513  /* set the sign */
1514  c->sign = a->sign;
1515 
1516  /* alias for a->dp [source] */
1517  tmpa = a->dp;
1518 
1519  /* alias for c->dp [dest] */
1520  tmpc = c->dp;
1521 
1522  /* zero carry */
1523  u = 0;
1524 
1525  /* compute columns */
1526  for (ix = 0; ix < a->used; ix++) {
1527  /* compute product and carry sum for this term */
1528  r = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
1529 
1530  /* mask off higher bits to get a single digit */
1531  *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
1532 
1533  /* send carry into next iteration */
1534  u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
1535  }
1536 
1537  /* store final carry [if any] */
1538  *tmpc++ = u;
1539 
1540  /* now zero digits above the top */
1541  while (ix++ < olduse) {
1542  *tmpc++ = 0;
1543  }
1544 
1545  /* set used count */
1546  c->used = a->used + 1;
1547  mp_clamp(c);
1548 
1549  return MP_OKAY;
1550 }
1551 
1552 /* integer signed division.
1553  * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
1554  * HAC pp.598 Algorithm 14.20
1555  *
1556  * Note that the description in HAC is horribly
1557  * incomplete. For example, it doesn't consider
1558  * the case where digits are removed from 'x' in
1559  * the inner loop. It also doesn't consider the
1560  * case that y has fewer than three digits, etc..
1561  *
1562  * The overall algorithm is as described as
1563  * 14.20 from HAC but fixed to treat these cases.
1564 */
1565 static int mp_div (const mp_int * a, const mp_int * b, mp_int * c, mp_int * d)
1566 {
1567  mp_int q, x, y, t1, t2;
1568  int res, n, t, i, norm, neg;
1569 
1570  /* is divisor zero ? */
1571  if (mp_iszero (b) == 1) {
1572  return MP_VAL;
1573  }
1574 
1575  /* if a < b then q=0, r = a */
1576  if (mp_cmp_mag (a, b) == MP_LT) {
1577  if (d != NULL) {
1578  res = mp_copy (a, d);
1579  } else {
1580  res = MP_OKAY;
1581  }
1582  if (c != NULL) {
1583  mp_zero (c);
1584  }
1585  return res;
1586  }
1587 
1588  if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
1589  return res;
1590  }
1591  q.used = a->used + 2;
1592 
1593  if ((res = mp_init (&t1)) != MP_OKAY) {
1594  goto __Q;
1595  }
1596 
1597  if ((res = mp_init (&t2)) != MP_OKAY) {
1598  goto __T1;
1599  }
1600 
1601  if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
1602  goto __T2;
1603  }
1604 
1605  if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
1606  goto __X;
1607  }
1608 
1609  /* fix the sign */
1610  neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
1611  x.sign = y.sign = MP_ZPOS;
1612 
1613  /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1614  norm = mp_count_bits(&y) % DIGIT_BIT;
1615  if (norm < DIGIT_BIT-1) {
1616  norm = (DIGIT_BIT-1) - norm;
1617  if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
1618  goto __Y;
1619  }
1620  if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
1621  goto __Y;
1622  }
1623  } else {
1624  norm = 0;
1625  }
1626 
1627  /* note hac does 0 based, so if used==5 then it's 0,1,2,3,4, e.g. use 4 */
1628  n = x.used - 1;
1629  t = y.used - 1;
1630 
1631  /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1632  if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
1633  goto __Y;
1634  }
1635 
1636  while (mp_cmp (&x, &y) != MP_LT) {
1637  ++(q.dp[n - t]);
1638  if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
1639  goto __Y;
1640  }
1641  }
1642 
1643  /* reset y by shifting it back down */
1644  mp_rshd (&y, n - t);
1645 
1646  /* step 3. for i from n down to (t + 1) */
1647  for (i = n; i >= (t + 1); i--) {
1648  if (i > x.used) {
1649  continue;
1650  }
1651 
1652  /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1653  * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1654  if (x.dp[i] == y.dp[t]) {
1655  q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
1656  } else {
1657  mp_word tmp;
1658  tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
1659  tmp |= ((mp_word) x.dp[i - 1]);
1660  tmp /= ((mp_word) y.dp[t]);
1661  if (tmp > (mp_word) MP_MASK)
1662  tmp = MP_MASK;
1663  q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
1664  }
1665 
1666  /* while (q{i-t-1} * (yt * b + y{t-1})) >
1667  xi * b**2 + xi-1 * b + xi-2
1668 
1669  do q{i-t-1} -= 1;
1670  */
1671  q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
1672  do {
1673  q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
1674 
1675  /* find left hand */
1676  mp_zero (&t1);
1677  t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
1678  t1.dp[1] = y.dp[t];
1679  t1.used = 2;
1680  if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1681  goto __Y;
1682  }
1683 
1684  /* find right hand */
1685  t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
1686  t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
1687  t2.dp[2] = x.dp[i];
1688  t2.used = 3;
1689  } while (mp_cmp_mag(&t1, &t2) == MP_GT);
1690 
1691  /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1692  if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1693  goto __Y;
1694  }
1695 
1696  if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1697  goto __Y;
1698  }
1699 
1700  if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
1701  goto __Y;
1702  }
1703 
1704  /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1705  if (x.sign == MP_NEG) {
1706  if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
1707  goto __Y;
1708  }
1709  if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
1710  goto __Y;
1711  }
1712  if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
1713  goto __Y;
1714  }
1715 
1716  q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
1717  }
1718  }
1719 
1720  /* now q is the quotient and x is the remainder
1721  * [which we have to normalize]
1722  */
1723 
1724  /* get sign before writing to c */
1725  x.sign = x.used == 0 ? MP_ZPOS : a->sign;
1726 
1727  if (c != NULL) {
1728  mp_clamp (&q);
1729  mp_exch (&q, c);
1730  c->sign = neg;
1731  }
1732 
1733  if (d != NULL) {
1734  mp_div_2d (&x, norm, &x, NULL);
1735  mp_exch (&x, d);
1736  }
1737 
1738  res = MP_OKAY;
1739 
1740 __Y:mp_clear (&y);
1741 __X:mp_clear (&x);
1742 __T2:mp_clear (&t2);
1743 __T1:mp_clear (&t1);
1744 __Q:mp_clear (&q);
1745  return res;
1746 }
1747 
1749 {
1750  int x;
1751 
1752  for (x = 1; x < DIGIT_BIT; x++) {
1753  if (b == (((mp_digit)1)<<x)) {
1754  *p = x;
1755  return TRUE;
1756  }
1757  }
1758  return FALSE;
1759 }
1760 
1761 /* single digit division (based on routine from MPI) */
1762 static int mp_div_d (const mp_int * a, mp_digit b, mp_int * c, mp_digit * d)
1763 {
1764  mp_int q;
1765  mp_word w;
1766  mp_digit t;
1767  int res, ix;
1768 
1769  /* cannot divide by zero */
1770  if (b == 0) {
1771  return MP_VAL;
1772  }
1773 
1774  /* quick outs */
1775  if (b == 1 || mp_iszero(a) == 1) {
1776  if (d != NULL) {
1777  *d = 0;
1778  }
1779  if (c != NULL) {
1780  return mp_copy(a, c);
1781  }
1782  return MP_OKAY;
1783  }
1784 
1785  /* power of two ? */
1786  if (s_is_power_of_two(b, &ix)) {
1787  if (d != NULL) {
1788  *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
1789  }
1790  if (c != NULL) {
1791  return mp_div_2d(a, ix, c, NULL);
1792  }
1793  return MP_OKAY;
1794  }
1795 
1796  /* no easy answer [c'est la vie]. Just division */
1797  if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
1798  return res;
1799  }
1800 
1801  q.used = a->used;
1802  q.sign = a->sign;
1803  w = 0;
1804  for (ix = a->used - 1; ix >= 0; ix--) {
1805  w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
1806 
1807  if (w >= b) {
1808  t = (mp_digit)(w / b);
1809  w -= ((mp_word)t) * ((mp_word)b);
1810  } else {
1811  t = 0;
1812  }
1813  q.dp[ix] = t;
1814  }
1815 
1816  if (d != NULL) {
1817  *d = (mp_digit)w;
1818  }
1819 
1820  if (c != NULL) {
1821  mp_clamp(&q);
1822  mp_exch(&q, c);
1823  }
1824  mp_clear(&q);
1825 
1826  return res;
1827 }
1828 
1829 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
1830  *
1831  * Based on algorithm from the paper
1832  *
1833  * "Generating Efficient Primes for Discrete Log Cryptosystems"
1834  * Chae Hoon Lim, Pil Loong Lee,
1835  * POSTECH Information Research Laboratories
1836  *
1837  * The modulus must be of a special format [see manual]
1838  *
1839  * Has been modified to use algorithm 7.10 from the LTM book instead
1840  *
1841  * Input x must be in the range 0 <= x <= (n-1)**2
1842  */
1843 static int
1845 {
1846  int err, i, m;
1847  mp_word r;
1848  mp_digit mu, *tmpx1, *tmpx2;
1849 
1850  /* m = digits in modulus */
1851  m = n->used;
1852 
1853  /* ensure that "x" has at least 2m digits */
1854  if (x->alloc < m + m) {
1855  if ((err = mp_grow (x, m + m)) != MP_OKAY) {
1856  return err;
1857  }
1858  }
1859 
1860 /* top of loop, this is where the code resumes if
1861  * another reduction pass is required.
1862  */
1863 top:
1864  /* aliases for digits */
1865  /* alias for lower half of x */
1866  tmpx1 = x->dp;
1867 
1868  /* alias for upper half of x, or x/B**m */
1869  tmpx2 = x->dp + m;
1870 
1871  /* set carry to zero */
1872  mu = 0;
1873 
1874  /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
1875  for (i = 0; i < m; i++) {
1876  r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
1877  *tmpx1++ = (mp_digit)(r & MP_MASK);
1878  mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
1879  }
1880 
1881  /* set final carry */
1882  *tmpx1++ = mu;
1883 
1884  /* zero words above m */
1885  for (i = m + 1; i < x->used; i++) {
1886  *tmpx1++ = 0;
1887  }
1888 
1889  /* clamp, sub and return */
1890  mp_clamp (x);
1891 
1892  /* if x >= n then subtract and reduce again
1893  * Each successive "recursion" makes the input smaller and smaller.
1894  */
1895  if (mp_cmp_mag (x, n) != MP_LT) {
1896  s_mp_sub(x, n, x);
1897  goto top;
1898  }
1899  return MP_OKAY;
1900 }
1901 
1902 /* sets the value of "d" required for mp_dr_reduce */
1903 static void mp_dr_setup(const mp_int *a, mp_digit *d)
1904 {
1905  /* the casts are required if DIGIT_BIT is one less than
1906  * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
1907  */
1908  *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
1909  ((mp_word)a->dp[0]));
1910 }
1911 
1912 /* this is a shell function that calls either the normal or Montgomery
1913  * exptmod functions. Originally the call to the montgomery code was
1914  * embedded in the normal function but that wasted a lot of stack space
1915  * for nothing (since 99% of the time the Montgomery code would be called)
1916  */
1917 int mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y)
1918 {
1919  int dr;
1920 
1921  /* modulus P must be positive */
1922  if (P->sign == MP_NEG) {
1923  return MP_VAL;
1924  }
1925 
1926  /* if exponent X is negative we have to recurse */
1927  if (X->sign == MP_NEG) {
1928  mp_int tmpG, tmpX;
1929  int err;
1930 
1931  /* first compute 1/G mod P */
1932  if ((err = mp_init(&tmpG)) != MP_OKAY) {
1933  return err;
1934  }
1935  if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
1936  mp_clear(&tmpG);
1937  return err;
1938  }
1939 
1940  /* now get |X| */
1941  if ((err = mp_init(&tmpX)) != MP_OKAY) {
1942  mp_clear(&tmpG);
1943  return err;
1944  }
1945  if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
1946  mp_clear_multi(&tmpG, &tmpX, NULL);
1947  return err;
1948  }
1949 
1950  /* and now compute (1/G)**|X| instead of G**X [X < 0] */
1951  err = mp_exptmod(&tmpG, &tmpX, P, Y);
1952  mp_clear_multi(&tmpG, &tmpX, NULL);
1953  return err;
1954  }
1955 
1956  dr = 0;
1957 
1958  /* if the modulus is odd use the fast method */
1959  if (mp_isodd (P) == 1) {
1960  return mp_exptmod_fast (G, X, P, Y, dr);
1961  } else {
1962  /* otherwise use the generic Barrett reduction technique */
1963  return s_mp_exptmod (G, X, P, Y);
1964  }
1965 }
1966 
1967 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
1968  *
1969  * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
1970  * The value of k changes based on the size of the exponent.
1971  *
1972  * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
1973  */
1974 
1975 int
1976 mp_exptmod_fast (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y, int redmode)
1977 {
1978  mp_int M[256], res;
1979  mp_digit buf, mp;
1980  int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
1981 
1982  /* use a pointer to the reduction algorithm. This allows us to use
1983  * one of many reduction algorithms without modding the guts of
1984  * the code with if statements everywhere.
1985  */
1986  int (*redux)(mp_int*,const mp_int*,mp_digit);
1987 
1988  /* find window size */
1989  x = mp_count_bits (X);
1990  if (x <= 7) {
1991  winsize = 2;
1992  } else if (x <= 36) {
1993  winsize = 3;
1994  } else if (x <= 140) {
1995  winsize = 4;
1996  } else if (x <= 450) {
1997  winsize = 5;
1998  } else if (x <= 1303) {
1999  winsize = 6;
2000  } else if (x <= 3529) {
2001  winsize = 7;
2002  } else {
2003  winsize = 8;
2004  }
2005 
2006  /* init M array */
2007  /* init first cell */
2008  if ((err = mp_init(&M[1])) != MP_OKAY) {
2009  return err;
2010  }
2011 
2012  /* now init the second half of the array */
2013  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2014  if ((err = mp_init(&M[x])) != MP_OKAY) {
2015  for (y = 1<<(winsize-1); y < x; y++) {
2016  mp_clear (&M[y]);
2017  }
2018  mp_clear(&M[1]);
2019  return err;
2020  }
2021  }
2022 
2023  /* determine and setup reduction code */
2024  if (redmode == 0) {
2025  /* now setup montgomery */
2026  if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
2027  goto __M;
2028  }
2029 
2030  /* automatically pick the comba one if available (saves quite a few calls/ifs) */
2031  if (((P->used * 2 + 1) < MP_WARRAY) &&
2032  P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2033  redux = fast_mp_montgomery_reduce;
2034  } else {
2035  /* use slower baseline Montgomery method */
2036  redux = mp_montgomery_reduce;
2037  }
2038  } else if (redmode == 1) {
2039  /* setup DR reduction for moduli of the form B**k - b */
2040  mp_dr_setup(P, &mp);
2041  redux = mp_dr_reduce;
2042  } else {
2043  /* setup DR reduction for moduli of the form 2**k - b */
2044  if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
2045  goto __M;
2046  }
2047  redux = mp_reduce_2k;
2048  }
2049 
2050  /* setup result */
2051  if ((err = mp_init (&res)) != MP_OKAY) {
2052  goto __M;
2053  }
2054 
2055  /* create M table
2056  *
2057 
2058  *
2059  * The first half of the table is not computed though accept for M[0] and M[1]
2060  */
2061 
2062  if (redmode == 0) {
2063  /* now we need R mod m */
2065  goto __RES;
2066  }
2067 
2068  /* now set M[1] to G * R mod m */
2069  if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
2070  goto __RES;
2071  }
2072  } else {
2073  mp_set(&res, 1);
2074  if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
2075  goto __RES;
2076  }
2077  }
2078 
2079  /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
2080  if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
2081  goto __RES;
2082  }
2083 
2084  for (x = 0; x < (winsize - 1); x++) {
2085  if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
2086  goto __RES;
2087  }
2088  if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
2089  goto __RES;
2090  }
2091  }
2092 
2093  /* create upper table */
2094  for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
2095  if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
2096  goto __RES;
2097  }
2098  if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
2099  goto __RES;
2100  }
2101  }
2102 
2103  /* set initial mode and bit cnt */
2104  mode = 0;
2105  bitcnt = 1;
2106  buf = 0;
2107  digidx = X->used - 1;
2108  bitcpy = 0;
2109  bitbuf = 0;
2110 
2111  for (;;) {
2112  /* grab next digit as required */
2113  if (--bitcnt == 0) {
2114  /* if digidx == -1 we are out of digits so break */
2115  if (digidx == -1) {
2116  break;
2117  }
2118  /* read next digit and reset bitcnt */
2119  buf = X->dp[digidx--];
2120  bitcnt = DIGIT_BIT;
2121  }
2122 
2123  /* grab the next msb from the exponent */
2124  y = (buf >> (DIGIT_BIT - 1)) & 1;
2125  buf <<= (mp_digit)1;
2126 
2127  /* if the bit is zero and mode == 0 then we ignore it
2128  * These represent the leading zero bits before the first 1 bit
2129  * in the exponent. Technically this opt is not required but it
2130  * does lower the # of trivial squaring/reductions used
2131  */
2132  if (mode == 0 && y == 0) {
2133  continue;
2134  }
2135 
2136  /* if the bit is zero and mode == 1 then we square */
2137  if (mode == 1 && y == 0) {
2138  if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2139  goto __RES;
2140  }
2141  if ((err = redux (&res, P, mp)) != MP_OKAY) {
2142  goto __RES;
2143  }
2144  continue;
2145  }
2146 
2147  /* else we add it to the window */
2148  bitbuf |= (y << (winsize - ++bitcpy));
2149  mode = 2;
2150 
2151  if (bitcpy == winsize) {
2152  /* ok window is filled so square as required and multiply */
2153  /* square first */
2154  for (x = 0; x < winsize; x++) {
2155  if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2156  goto __RES;
2157  }
2158  if ((err = redux (&res, P, mp)) != MP_OKAY) {
2159  goto __RES;
2160  }
2161  }
2162 
2163  /* then multiply */
2164  if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
2165  goto __RES;
2166  }
2167  if ((err = redux (&res, P, mp)) != MP_OKAY) {
2168  goto __RES;
2169  }
2170 
2171  /* empty window and reset */
2172  bitcpy = 0;
2173  bitbuf = 0;
2174  mode = 1;
2175  }
2176  }
2177 
2178  /* if bits remain then square/multiply */
2179  if (mode == 2 && bitcpy > 0) {
2180  /* square then multiply if the bit is set */
2181  for (x = 0; x < bitcpy; x++) {
2182  if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
2183  goto __RES;
2184  }
2185  if ((err = redux (&res, P, mp)) != MP_OKAY) {
2186  goto __RES;
2187  }
2188 
2189  /* get next bit of the window */
2190  bitbuf <<= 1;
2191  if ((bitbuf & (1 << winsize)) != 0) {
2192  /* then multiply */
2193  if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
2194  goto __RES;
2195  }
2196  if ((err = redux (&res, P, mp)) != MP_OKAY) {
2197  goto __RES;
2198  }
2199  }
2200  }
2201  }
2202 
2203  if (redmode == 0) {
2204  /* fixup result if Montgomery reduction is used
2205  * recall that any value in a Montgomery system is
2206  * actually multiplied by R mod n. So we have
2207  * to reduce one more time to cancel out the factor
2208  * of R.
2209  */
2210  if ((err = redux(&res, P, mp)) != MP_OKAY) {
2211  goto __RES;
2212  }
2213  }
2214 
2215  /* swap res with Y */
2216  mp_exch (&res, Y);
2217  err = MP_OKAY;
2218 __RES:mp_clear (&res);
2219 __M:
2220  mp_clear(&M[1]);
2221  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
2222  mp_clear (&M[x]);
2223  }
2224  return err;
2225 }
2226 
2227 /* Greatest Common Divisor using the binary method */
2228 int mp_gcd (const mp_int * a, const mp_int * b, mp_int * c)
2229 {
2230  mp_int u, v;
2231  int k, u_lsb, v_lsb, res;
2232 
2233  /* either zero than gcd is the largest */
2234  if (mp_iszero (a) == 1 && mp_iszero (b) == 0) {
2235  return mp_abs (b, c);
2236  }
2237  if (mp_iszero (a) == 0 && mp_iszero (b) == 1) {
2238  return mp_abs (a, c);
2239  }
2240 
2241  /* optimized. At this point if a == 0 then
2242  * b must equal zero too
2243  */
2244  if (mp_iszero (a) == 1) {
2245  mp_zero(c);
2246  return MP_OKAY;
2247  }
2248 
2249  /* get copies of a and b we can modify */
2250  if ((res = mp_init_copy (&u, a)) != MP_OKAY) {
2251  return res;
2252  }
2253 
2254  if ((res = mp_init_copy (&v, b)) != MP_OKAY) {
2255  goto __U;
2256  }
2257 
2258  /* must be positive for the remainder of the algorithm */
2259  u.sign = v.sign = MP_ZPOS;
2260 
2261  /* B1. Find the common power of two for u and v */
2262  u_lsb = mp_cnt_lsb(&u);
2263  v_lsb = mp_cnt_lsb(&v);
2264  k = MIN(u_lsb, v_lsb);
2265 
2266  if (k > 0) {
2267  /* divide the power of two out */
2268  if ((res = mp_div_2d(&u, k, &u, NULL)) != MP_OKAY) {
2269  goto __V;
2270  }
2271 
2272  if ((res = mp_div_2d(&v, k, &v, NULL)) != MP_OKAY) {
2273  goto __V;
2274  }
2275  }
2276 
2277  /* divide any remaining factors of two out */
2278  if (u_lsb != k) {
2279  if ((res = mp_div_2d(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
2280  goto __V;
2281  }
2282  }
2283 
2284  if (v_lsb != k) {
2285  if ((res = mp_div_2d(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
2286  goto __V;
2287  }
2288  }
2289 
2290  while (mp_iszero(&v) == 0) {
2291  /* make sure v is the largest */
2292  if (mp_cmp_mag(&u, &v) == MP_GT) {
2293  /* swap u and v to make sure v is >= u */
2294  mp_exch(&u, &v);
2295  }
2296 
2297  /* subtract smallest from largest */
2298  if ((res = s_mp_sub(&v, &u, &v)) != MP_OKAY) {
2299  goto __V;
2300  }
2301 
2302  /* Divide out all factors of two */
2303  if ((res = mp_div_2d(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
2304  goto __V;
2305  }
2306  }
2307 
2308  /* multiply by 2**k which we divided out at the beginning */
2309  if ((res = mp_mul_2d (&u, k, c)) != MP_OKAY) {
2310  goto __V;
2311  }
2312  c->sign = MP_ZPOS;
2313  res = MP_OKAY;
2314 __V:mp_clear (&u);
2315 __U:mp_clear (&v);
2316  return res;
2317 }
2318 
2319 /* get the lower 32-bits of an mp_int */
2320 unsigned long mp_get_int(const mp_int * a)
2321 {
2322  int i;
2323  unsigned long res;
2324 
2325  if (a->used == 0) {
2326  return 0;
2327  }
2328 
2329  /* get number of digits of the lsb we have to read */
2330  i = MIN(a->used,(int)((sizeof(unsigned long)*CHAR_BIT+DIGIT_BIT-1)/DIGIT_BIT))-1;
2331 
2332  /* get most significant digit of result */
2333  res = DIGIT(a,i);
2334 
2335  while (--i >= 0) {
2336  res = (res << DIGIT_BIT) | DIGIT(a,i);
2337  }
2338 
2339  /* force result to 32-bits always so it is consistent on non 32-bit platforms */
2340  return res & 0xFFFFFFFFUL;
2341 }
2342 
2343 /* creates "a" then copies b into it */
2344 int mp_init_copy (mp_int * a, const mp_int * b)
2345 {
2346  int res;
2347 
2348  if ((res = mp_init (a)) != MP_OKAY) {
2349  return res;
2350  }
2351  return mp_copy (b, a);
2352 }
2353 
2354 int mp_init_multi(mp_int *mp, ...)
2355 {
2356  mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
2357  int n = 0; /* Number of ok inits */
2358  mp_int* cur_arg = mp;
2359  va_list args;
2360 
2361  va_start(args, mp); /* init args to next argument from caller */
2362  while (cur_arg != NULL) {
2363  if (mp_init(cur_arg) != MP_OKAY) {
2364  /* Oops - error! Back-track and mp_clear what we already
2365  succeeded in init-ing, then return error.
2366  */
2367  va_list clean_args;
2368 
2369  /* end the current list */
2370  va_end(args);
2371 
2372  /* now start cleaning up */
2373  cur_arg = mp;
2374  va_start(clean_args, mp);
2375  while (n--) {
2376  mp_clear(cur_arg);
2377  cur_arg = va_arg(clean_args, mp_int*);
2378  }
2379  va_end(clean_args);
2380  res = MP_MEM;
2381  break;
2382  }
2383  n++;
2384  cur_arg = va_arg(args, mp_int*);
2385  }
2386  va_end(args);
2387  return res; /* Assumed ok, if error flagged above. */
2388 }
2389 
2390 /* hac 14.61, pp608 */
2391 int mp_invmod (const mp_int * a, mp_int * b, mp_int * c)
2392 {
2393  /* b cannot be negative */
2394  if (b->sign == MP_NEG || mp_iszero(b) == 1) {
2395  return MP_VAL;
2396  }
2397 
2398  /* if the modulus is odd we can use a faster routine instead */
2399  if (mp_isodd (b) == 1) {
2400  return fast_mp_invmod (a, b, c);
2401  }
2402 
2403  return mp_invmod_slow(a, b, c);
2404 }
2405 
2406 /* hac 14.61, pp608 */
2407 int mp_invmod_slow (const mp_int * a, mp_int * b, mp_int * c)
2408 {
2409  mp_int x, y, u, v, A, B, C, D;
2410  int res;
2411 
2412  /* b cannot be negative */
2413  if (b->sign == MP_NEG || mp_iszero(b) == 1) {
2414  return MP_VAL;
2415  }
2416 
2417  /* init temps */
2418  if ((res = mp_init_multi(&x, &y, &u, &v,
2419  &A, &B, &C, &D, NULL)) != MP_OKAY) {
2420  return res;
2421  }
2422 
2423  /* x = a, y = b */
2424  if ((res = mp_copy (a, &x)) != MP_OKAY) {
2425  goto __ERR;
2426  }
2427  if ((res = mp_copy (b, &y)) != MP_OKAY) {
2428  goto __ERR;
2429  }
2430 
2431  /* 2. [modified] if x,y are both even then return an error! */
2432  if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
2433  res = MP_VAL;
2434  goto __ERR;
2435  }
2436 
2437  /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
2438  if ((res = mp_copy (&x, &u)) != MP_OKAY) {
2439  goto __ERR;
2440  }
2441  if ((res = mp_copy (&y, &v)) != MP_OKAY) {
2442  goto __ERR;
2443  }
2444  mp_set (&A, 1);
2445  mp_set (&D, 1);
2446 
2447 top:
2448  /* 4. while u is even do */
2449  while (mp_iseven (&u) == 1) {
2450  /* 4.1 u = u/2 */
2451  if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
2452  goto __ERR;
2453  }
2454  /* 4.2 if A or B is odd then */
2455  if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
2456  /* A = (A+y)/2, B = (B-x)/2 */
2457  if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
2458  goto __ERR;
2459  }
2460  if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
2461  goto __ERR;
2462  }
2463  }
2464  /* A = A/2, B = B/2 */
2465  if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
2466  goto __ERR;
2467  }
2468  if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
2469  goto __ERR;
2470  }
2471  }
2472 
2473  /* 5. while v is even do */
2474  while (mp_iseven (&v) == 1) {
2475  /* 5.1 v = v/2 */
2476  if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
2477  goto __ERR;
2478  }
2479  /* 5.2 if C or D is odd then */
2480  if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
2481  /* C = (C+y)/2, D = (D-x)/2 */
2482  if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
2483  goto __ERR;
2484  }
2485  if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
2486  goto __ERR;
2487  }
2488  }
2489  /* C = C/2, D = D/2 */
2490  if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
2491  goto __ERR;
2492  }
2493  if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
2494  goto __ERR;
2495  }
2496  }
2497 
2498  /* 6. if u >= v then */
2499  if (mp_cmp (&u, &v) != MP_LT) {
2500  /* u = u - v, A = A - C, B = B - D */
2501  if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
2502  goto __ERR;
2503  }
2504 
2505  if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
2506  goto __ERR;
2507  }
2508 
2509  if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
2510  goto __ERR;
2511  }
2512  } else {
2513  /* v - v - u, C = C - A, D = D - B */
2514  if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
2515  goto __ERR;
2516  }
2517 
2518  if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
2519  goto __ERR;
2520  }
2521 
2522  if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
2523  goto __ERR;
2524  }
2525  }
2526 
2527  /* if not zero goto step 4 */
2528  if (mp_iszero (&u) == 0)
2529  goto top;
2530 
2531  /* now a = C, b = D, gcd == g*v */
2532 
2533  /* if v != 1 then there is no inverse */
2534  if (mp_cmp_d (&v, 1) != MP_EQ) {
2535  res = MP_VAL;
2536  goto __ERR;
2537  }
2538 
2539  /* if it's too low */
2540  while (mp_cmp_d(&C, 0) == MP_LT) {
2541  if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
2542  goto __ERR;
2543  }
2544  }
2545 
2546  /* too big */
2547  while (mp_cmp_mag(&C, b) != MP_LT) {
2548  if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
2549  goto __ERR;
2550  }
2551  }
2552 
2553  /* C is now the inverse */
2554  mp_exch (&C, c);
2555  res = MP_OKAY;
2556 __ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
2557  return res;
2558 }
2559 
2560 /* c = |a| * |b| using Karatsuba Multiplication using
2561  * three half size multiplications
2562  *
2563  * Let B represent the radix [e.g. 2**DIGIT_BIT] and
2564  * let n represent half of the number of digits in
2565  * the min(a,b)
2566  *
2567  * a = a1 * B**n + a0
2568  * b = b1 * B**n + b0
2569  *
2570  * Then, a * b =>
2571  a1b1 * B**2n + ((a1 - a0)(b1 - b0) + a0b0 + a1b1) * B + a0b0
2572  *
2573  * Note that a1b1 and a0b0 are used twice and only need to be
2574  * computed once. So in total three half size (half # of
2575  * digit) multiplications are performed, a0b0, a1b1 and
2576  * (a1-b1)(a0-b0)
2577  *
2578  * Note that a multiplication of half the digits requires
2579  * 1/4th the number of single precision multiplications so in
2580  * total after one call 25% of the single precision multiplications
2581  * are saved. Note also that the call to mp_mul can end up back
2582  * in this function if the a0, a1, b0, or b1 are above the threshold.
2583  * This is known as divide-and-conquer and leads to the famous
2584  * O(N**lg(3)) or O(N**1.584) work which is asymptotically lower than
2585  * the standard O(N**2) that the baseline/comba methods use.
2586  * Generally though the overhead of this method doesn't pay off
2587  * until a certain size (N ~ 80) is reached.
2588  */
2589 int mp_karatsuba_mul (const mp_int * a, const mp_int * b, mp_int * c)
2590 {
2591  mp_int x0, x1, y0, y1, t1, x0y0, x1y1;
2592  int B, err;
2593 
2594  /* default the return code to an error */
2595  err = MP_MEM;
2596 
2597  /* min # of digits */
2598  B = MIN (a->used, b->used);
2599 
2600  /* now divide in two */
2601  B = B >> 1;
2602 
2603  /* init copy all the temps */
2604  if (mp_init_size (&x0, B) != MP_OKAY)
2605  goto ERR;
2606  if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2607  goto X0;
2608  if (mp_init_size (&y0, B) != MP_OKAY)
2609  goto X1;
2610  if (mp_init_size (&y1, b->used - B) != MP_OKAY)
2611  goto Y0;
2612 
2613  /* init temps */
2614  if (mp_init_size (&t1, B * 2) != MP_OKAY)
2615  goto Y1;
2616  if (mp_init_size (&x0y0, B * 2) != MP_OKAY)
2617  goto T1;
2618  if (mp_init_size (&x1y1, B * 2) != MP_OKAY)
2619  goto X0Y0;
2620 
2621  /* now shift the digits */
2622  x0.used = y0.used = B;
2623  x1.used = a->used - B;
2624  y1.used = b->used - B;
2625 
2626  {
2627  register int x;
2628  register mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
2629 
2630  /* we copy the digits directly instead of using higher level functions
2631  * since we also need to shift the digits
2632  */
2633  tmpa = a->dp;
2634  tmpb = b->dp;
2635 
2636  tmpx = x0.dp;
2637  tmpy = y0.dp;
2638  for (x = 0; x < B; x++) {
2639  *tmpx++ = *tmpa++;
2640  *tmpy++ = *tmpb++;
2641  }
2642 
2643  tmpx = x1.dp;
2644  for (x = B; x < a->used; x++) {
2645  *tmpx++ = *tmpa++;
2646  }
2647 
2648  tmpy = y1.dp;
2649  for (x = B; x < b->used; x++) {
2650  *tmpy++ = *tmpb++;
2651  }
2652  }
2653 
2654  /* only need to clamp the lower words since by definition the
2655  * upper words x1/y1 must have a known number of digits
2656  */
2657  mp_clamp (&x0);
2658  mp_clamp (&y0);
2659 
2660  /* now calc the products x0y0 and x1y1 */
2661  /* after this x0 is no longer required, free temp [x0==t2]! */
2662  if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY)
2663  goto X1Y1; /* x0y0 = x0*y0 */
2664  if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY)
2665  goto X1Y1; /* x1y1 = x1*y1 */
2666 
2667  /* now calc x1-x0 and y1-y0 */
2668  if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
2669  goto X1Y1; /* t1 = x1 - x0 */
2670  if (mp_sub (&y1, &y0, &x0) != MP_OKAY)
2671  goto X1Y1; /* t2 = y1 - y0 */
2672  if (mp_mul (&t1, &x0, &t1) != MP_OKAY)
2673  goto X1Y1; /* t1 = (x1 - x0) * (y1 - y0) */
2674 
2675  /* add x0y0 */
2676  if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY)
2677  goto X1Y1; /* t2 = x0y0 + x1y1 */
2678  if (mp_sub (&x0, &t1, &t1) != MP_OKAY)
2679  goto X1Y1; /* t1 = x0y0 + x1y1 - (x1-x0)*(y1-y0) */
2680 
2681  /* shift by B */
2682  if (mp_lshd (&t1, B) != MP_OKAY)
2683  goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
2684  if (mp_lshd (&x1y1, B * 2) != MP_OKAY)
2685  goto X1Y1; /* x1y1 = x1y1 << 2*B */
2686 
2687  if (mp_add (&x0y0, &t1, &t1) != MP_OKAY)
2688  goto X1Y1; /* t1 = x0y0 + t1 */
2689  if (mp_add (&t1, &x1y1, c) != MP_OKAY)
2690  goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
2691 
2692  /* Algorithm succeeded set the return code to MP_OKAY */
2693  err = MP_OKAY;
2694 
2695 X1Y1:mp_clear (&x1y1);
2696 X0Y0:mp_clear (&x0y0);
2697 T1:mp_clear (&t1);
2698 Y1:mp_clear (&y1);
2699 Y0:mp_clear (&y0);
2700 X1:mp_clear (&x1);
2701 X0:mp_clear (&x0);
2702 ERR:
2703  return err;
2704 }
2705 
2706 /* Karatsuba squaring, computes b = a*a using three
2707  * half size squarings
2708  *
2709  * See comments of karatsuba_mul for details. It
2710  * is essentially the same algorithm but merely
2711  * tuned to perform recursive squarings.
2712  */
2714 {
2715  mp_int x0, x1, t1, t2, x0x0, x1x1;
2716  int B, err;
2717 
2718  err = MP_MEM;
2719 
2720  /* min # of digits */
2721  B = a->used;
2722 
2723  /* now divide in two */
2724  B = B >> 1;
2725 
2726  /* init copy all the temps */
2727  if (mp_init_size (&x0, B) != MP_OKAY)
2728  goto ERR;
2729  if (mp_init_size (&x1, a->used - B) != MP_OKAY)
2730  goto X0;
2731 
2732  /* init temps */
2733  if (mp_init_size (&t1, a->used * 2) != MP_OKAY)
2734  goto X1;
2735  if (mp_init_size (&t2, a->used * 2) != MP_OKAY)
2736  goto T1;
2737  if (mp_init_size (&x0x0, B * 2) != MP_OKAY)
2738  goto T2;
2739  if (mp_init_size (&x1x1, (a->used - B) * 2) != MP_OKAY)
2740  goto X0X0;
2741 
2742  {
2743  register int x;
2744  register mp_digit *dst, *src;
2745 
2746  src = a->dp;
2747 
2748  /* now shift the digits */
2749  dst = x0.dp;
2750  for (x = 0; x < B; x++) {
2751  *dst++ = *src++;
2752  }
2753 
2754  dst = x1.dp;
2755  for (x = B; x < a->used; x++) {
2756  *dst++ = *src++;
2757  }
2758  }
2759 
2760  x0.used = B;
2761  x1.used = a->used - B;
2762 
2763  mp_clamp (&x0);
2764 
2765  /* now calc the products x0*x0 and x1*x1 */
2766  if (mp_sqr (&x0, &x0x0) != MP_OKAY)
2767  goto X1X1; /* x0x0 = x0*x0 */
2768  if (mp_sqr (&x1, &x1x1) != MP_OKAY)
2769  goto X1X1; /* x1x1 = x1*x1 */
2770 
2771  /* now calc (x1-x0)**2 */
2772  if (mp_sub (&x1, &x0, &t1) != MP_OKAY)
2773  goto X1X1; /* t1 = x1 - x0 */
2774  if (mp_sqr (&t1, &t1) != MP_OKAY)
2775  goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
2776 
2777  /* add x0y0 */
2778  if (s_mp_add (&x0x0, &x1x1, &t2) != MP_OKAY)
2779  goto X1X1; /* t2 = x0x0 + x1x1 */
2780  if (mp_sub (&t2, &t1, &t1) != MP_OKAY)
2781  goto X1X1; /* t1 = x0x0 + x1x1 - (x1-x0)*(x1-x0) */
2782 
2783  /* shift by B */
2784  if (mp_lshd (&t1, B) != MP_OKAY)
2785  goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
2786  if (mp_lshd (&x1x1, B * 2) != MP_OKAY)
2787  goto X1X1; /* x1x1 = x1x1 << 2*B */
2788 
2789  if (mp_add (&x0x0, &t1, &t1) != MP_OKAY)
2790  goto X1X1; /* t1 = x0x0 + t1 */
2791  if (mp_add (&t1, &x1x1, b) != MP_OKAY)
2792  goto X1X1; /* t1 = x0x0 + t1 + x1x1 */
2793 
2794  err = MP_OKAY;
2795 
2796 X1X1:mp_clear (&x1x1);
2797 X0X0:mp_clear (&x0x0);
2798 T2:mp_clear (&t2);
2799 T1:mp_clear (&t1);
2800 X1:mp_clear (&x1);
2801 X0:mp_clear (&x0);
2802 ERR:
2803  return err;
2804 }
2805 
2806 /* computes least common multiple as |a*b|/(a, b) */
2807 int mp_lcm (const mp_int * a, const mp_int * b, mp_int * c)
2808 {
2809  int res;
2810  mp_int t1, t2;
2811 
2812 
2813  if ((res = mp_init_multi (&t1, &t2, NULL)) != MP_OKAY) {
2814  return res;
2815  }
2816 
2817  /* t1 = get the GCD of the two inputs */
2818  if ((res = mp_gcd (a, b, &t1)) != MP_OKAY) {
2819  goto __T;
2820  }
2821 
2822  /* divide the smallest by the GCD */
2823  if (mp_cmp_mag(a, b) == MP_LT) {
2824  /* store quotient in t2 so that t2 * b is the LCM */
2825  if ((res = mp_div(a, &t1, &t2, NULL)) != MP_OKAY) {
2826  goto __T;
2827  }
2828  res = mp_mul(b, &t2, c);
2829  } else {
2830  /* store quotient in t2 so that t2 * a is the LCM */
2831  if ((res = mp_div(b, &t1, &t2, NULL)) != MP_OKAY) {
2832  goto __T;
2833  }
2834  res = mp_mul(a, &t2, c);
2835  }
2836 
2837  /* fix the sign to positive */
2838  c->sign = MP_ZPOS;
2839 
2840 __T:
2841  mp_clear_multi (&t1, &t2, NULL);
2842  return res;
2843 }
2844 
2845 /* c = a mod b, 0 <= c < b */
2846 int
2847 mp_mod (const mp_int * a, mp_int * b, mp_int * c)
2848 {
2849  mp_int t;
2850  int res;
2851 
2852  if ((res = mp_init (&t)) != MP_OKAY) {
2853  return res;
2854  }
2855 
2856  if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
2857  mp_clear (&t);
2858  return res;
2859  }
2860 
2861  if (t.sign != b->sign) {
2862  res = mp_add (b, &t, c);
2863  } else {
2864  res = MP_OKAY;
2865  mp_exch (&t, c);
2866  }
2867 
2868  mp_clear (&t);
2869  return res;
2870 }
2871 
2872 static int
2874 {
2875  return mp_div_d(a, b, NULL, c);
2876 }
2877 
2878 /* b = a*2 */
2879 static int mp_mul_2(const mp_int * a, mp_int * b)
2880 {
2881  int x, res, oldused;
2882 
2883  /* grow to accommodate result */
2884  if (b->alloc < a->used + 1) {
2885  if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
2886  return res;
2887  }
2888  }
2889 
2890  oldused = b->used;
2891  b->used = a->used;
2892 
2893  {
2894  register mp_digit r, rr, *tmpa, *tmpb;
2895 
2896  /* alias for source */
2897  tmpa = a->dp;
2898 
2899  /* alias for dest */
2900  tmpb = b->dp;
2901 
2902  /* carry */
2903  r = 0;
2904  for (x = 0; x < a->used; x++) {
2905 
2906  /* get what will be the *next* carry bit from the
2907  * MSB of the current digit
2908  */
2909  rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
2910 
2911  /* now shift up this digit, add in the carry [from the previous] */
2912  *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
2913 
2914  /* copy the carry that would be from the source
2915  * digit into the next iteration
2916  */
2917  r = rr;
2918  }
2919 
2920  /* new leading digit? */
2921  if (r != 0) {
2922  /* add a MSB which is always 1 at this point */
2923  *tmpb = 1;
2924  ++(b->used);
2925  }
2926 
2927  /* now zero any excess digits on the destination
2928  * that we didn't write to
2929  */
2930  tmpb = b->dp + b->used;
2931  for (x = b->used; x < oldused; x++) {
2932  *tmpb++ = 0;
2933  }
2934  }
2935  b->sign = a->sign;
2936  return MP_OKAY;
2937 }
2938 
2939 /*
2940  * shifts with subtractions when the result is greater than b.
2941  *
2942  * The method is slightly modified to shift B unconditionally up to just under
2943  * the leading bit of b. This saves a lot of multiple precision shifting.
2944  */
2946 {
2947  int x, bits, res;
2948 
2949  /* how many bits of last digit does b use */
2950  bits = mp_count_bits (b) % DIGIT_BIT;
2951 
2952 
2953  if (b->used > 1) {
2954  if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
2955  return res;
2956  }
2957  } else {
2958  mp_set(a, 1);
2959  bits = 1;
2960  }
2961 
2962 
2963  /* now compute C = A * B mod b */
2964  for (x = bits - 1; x < DIGIT_BIT; x++) {
2965  if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
2966  return res;
2967  }
2968  if (mp_cmp_mag (a, b) != MP_LT) {
2969  if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
2970  return res;
2971  }
2972  }
2973  }
2974 
2975  return MP_OKAY;
2976 }
2977 
2978 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
2979 int
2981 {
2982  int ix, res, digs;
2983  mp_digit mu;
2984 
2985  /* can the fast reduction [comba] method be used?
2986  *
2987  * Note that unlike in mul you're safely allowed *less*
2988  * than the available columns [255 per default] since carries
2989  * are fixed up in the inner loop.
2990  */
2991  digs = n->used * 2 + 1;
2992  if ((digs < MP_WARRAY) &&
2993  n->used <
2994  (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
2995  return fast_mp_montgomery_reduce (x, n, rho);
2996  }
2997 
2998  /* grow the input as required */
2999  if (x->alloc < digs) {
3000  if ((res = mp_grow (x, digs)) != MP_OKAY) {
3001  return res;
3002  }
3003  }
3004  x->used = digs;
3005 
3006  for (ix = 0; ix < n->used; ix++) {
3007  /* mu = ai * rho mod b
3008  *
3009  * The value of rho must be precalculated via
3010  * montgomery_setup() such that
3011  * it equals -1/n0 mod b this allows the
3012  * following inner loop to reduce the
3013  * input one digit at a time
3014  */
3015  mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
3016 
3017  /* a = a + mu * m * b**i */
3018  {
3019  register int iy;
3020  register mp_digit *tmpn, *tmpx, u;
3021  register mp_word r;
3022 
3023  /* alias for digits of the modulus */
3024  tmpn = n->dp;
3025 
3026  /* alias for the digits of x [the input] */
3027  tmpx = x->dp + ix;
3028 
3029  /* set the carry to zero */
3030  u = 0;
3031 
3032  /* Multiply and add in place */
3033  for (iy = 0; iy < n->used; iy++) {
3034  /* compute product and sum */
3035  r = ((mp_word)mu) * ((mp_word)*tmpn++) +
3036  ((mp_word) u) + ((mp_word) * tmpx);
3037 
3038  /* get carry */
3039  u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3040 
3041  /* fix digit */
3042  *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
3043  }
3044  /* At this point the ix'th digit of x should be zero */
3045 
3046 
3047  /* propagate carries upwards as required*/
3048  while (u) {
3049  *tmpx += u;
3050  u = *tmpx >> DIGIT_BIT;
3051  *tmpx++ &= MP_MASK;
3052  }
3053  }
3054  }
3055 
3056  /* at this point the n.used'th least
3057  * significant digits of x are all zero
3058  * which means we can shift x to the
3059  * right by n.used digits and the
3060  * residue is unchanged.
3061  */
3062 
3063  /* x = x/b**n.used */
3064  mp_clamp(x);
3065  mp_rshd (x, n->used);
3066 
3067  /* if x >= n then x = x - n */
3068  if (mp_cmp_mag (x, n) != MP_LT) {
3069  return s_mp_sub (x, n, x);
3070  }
3071 
3072  return MP_OKAY;
3073 }
3074 
3075 /* setups the montgomery reduction stuff */
3076 int
3078 {
3079  mp_digit x, b;
3080 
3081 /* fast inversion mod 2**k
3082  *
3083  * Based on the fact that
3084  *
3085  * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
3086  * => 2*X*A - X*X*A*A = 1
3087  * => 2*(1) - (1) = 1
3088  */
3089  b = n->dp[0];
3090 
3091  if ((b & 1) == 0) {
3092  return MP_VAL;
3093  }
3094 
3095  x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
3096  x *= 2 - b * x; /* here x*a==1 mod 2**8 */
3097  x *= 2 - b * x; /* here x*a==1 mod 2**16 */
3098  x *= 2 - b * x; /* here x*a==1 mod 2**32 */
3099 
3100  /* rho = -1/m mod b */
3101  *rho = (((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
3102 
3103  return MP_OKAY;
3104 }
3105 
3106 /* high level multiplication (handles sign) */
3107 int mp_mul (const mp_int * a, const mp_int * b, mp_int * c)
3108 {
3109  int res, neg;
3110  neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
3111 
3112  /* use Karatsuba? */
3113  if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
3114  res = mp_karatsuba_mul (a, b, c);
3115  } else
3116  {
3117  /* can we use the fast multiplier?
3118  *
3119  * The fast multiplier can be used if the output will
3120  * have less than MP_WARRAY digits and the number of
3121  * digits won't affect carry propagation
3122  */
3123  int digs = a->used + b->used + 1;
3124 
3125  if ((digs < MP_WARRAY) &&
3126  MIN(a->used, b->used) <=
3127  (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
3128  res = fast_s_mp_mul_digs (a, b, c, digs);
3129  } else
3130  res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
3131  }
3132  c->sign = (c->used > 0) ? neg : MP_ZPOS;
3133  return res;
3134 }
3135 
3136 /* d = a * b (mod c) */
3137 int
3138 mp_mulmod (const mp_int * a, const mp_int * b, mp_int * c, mp_int * d)
3139 {
3140  int res;
3141  mp_int t;
3142 
3143  if ((res = mp_init (&t)) != MP_OKAY) {
3144  return res;
3145  }
3146 
3147  if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
3148  mp_clear (&t);
3149  return res;
3150  }
3151  res = mp_mod (&t, c, d);
3152  mp_clear (&t);
3153  return res;
3154 }
3155 
3156 /* table of first PRIME_SIZE primes */
3157 static const mp_digit __prime_tab[] = {
3158  0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
3159  0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
3160  0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
3161  0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
3162  0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
3163  0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
3164  0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
3165  0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
3166 
3167  0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
3168  0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
3169  0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
3170  0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
3171  0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
3172  0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
3173  0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
3174  0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
3175 
3176  0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
3177  0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
3178  0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
3179  0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
3180  0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
3181  0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
3182  0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
3183  0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
3184 
3185  0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
3186  0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
3187  0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
3188  0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
3189  0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
3190  0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
3191  0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
3192  0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
3193 };
3194 
3195 /* determines if an integers is divisible by one
3196  * of the first PRIME_SIZE primes or not
3197  *
3198  * sets result to 0 if not, 1 if yes
3199  */
3200 static int mp_prime_is_divisible (const mp_int * a, int *result)
3201 {
3202  int err, ix;
3203  mp_digit res;
3204 
3205  /* default to not */
3206  *result = MP_NO;
3207 
3208  for (ix = 0; ix < PRIME_SIZE; ix++) {
3209  /* what is a mod __prime_tab[ix] */
3210  if ((err = mp_mod_d (a, __prime_tab[ix], &res)) != MP_OKAY) {
3211  return err;
3212  }
3213 
3214  /* is the residue zero? */
3215  if (res == 0) {
3216  *result = MP_YES;
3217  return MP_OKAY;
3218  }
3219  }
3220 
3221  return MP_OKAY;
3222 }
3223 
3224 /* Miller-Rabin test of "a" to the base of "b" as described in
3225  * HAC pp. 139 Algorithm 4.24
3226  *
3227  * Sets result to 0 if definitely composite or 1 if probably prime.
3228  * Randomly the chance of error is no more than 1/4 and often
3229  * very much lower.
3230  */
3231 static int mp_prime_miller_rabin (mp_int * a, const mp_int * b, int *result)
3232 {
3233  mp_int n1, y, r;
3234  int s, j, err;
3235 
3236  /* default */
3237  *result = MP_NO;
3238 
3239  /* ensure b > 1 */
3240  if (mp_cmp_d(b, 1) != MP_GT) {
3241  return MP_VAL;
3242  }
3243 
3244  /* get n1 = a - 1 */
3245  if ((err = mp_init_copy (&n1, a)) != MP_OKAY) {
3246  return err;
3247  }
3248  if ((err = mp_sub_d (&n1, 1, &n1)) != MP_OKAY) {
3249  goto __N1;
3250  }
3251 
3252  /* set 2**s * r = n1 */
3253  if ((err = mp_init_copy (&r, &n1)) != MP_OKAY) {
3254  goto __N1;
3255  }
3256 
3257  /* count the number of least significant bits
3258  * which are zero
3259  */
3260  s = mp_cnt_lsb(&r);
3261 
3262  /* now divide n - 1 by 2**s */
3263  if ((err = mp_div_2d (&r, s, &r, NULL)) != MP_OKAY) {
3264  goto __R;
3265  }
3266 
3267  /* compute y = b**r mod a */
3268  if ((err = mp_init (&y)) != MP_OKAY) {
3269  goto __R;
3270  }
3271  if ((err = mp_exptmod (b, &r, a, &y)) != MP_OKAY) {
3272  goto __Y;
3273  }
3274 
3275  /* if y != 1 and y != n1 do */
3276  if (mp_cmp_d (&y, 1) != MP_EQ && mp_cmp (&y, &n1) != MP_EQ) {
3277  j = 1;
3278  /* while j <= s-1 and y != n1 */
3279  while ((j <= (s - 1)) && mp_cmp (&y, &n1) != MP_EQ) {
3280  if ((err = mp_sqrmod (&y, a, &y)) != MP_OKAY) {
3281  goto __Y;
3282  }
3283 
3284  /* if y == 1 then composite */
3285  if (mp_cmp_d (&y, 1) == MP_EQ) {
3286  goto __Y;
3287  }
3288 
3289  ++j;
3290  }
3291 
3292  /* if y != n1 then composite */
3293  if (mp_cmp (&y, &n1) != MP_EQ) {
3294  goto __Y;
3295  }
3296  }
3297 
3298  /* probably prime now */
3299  *result = MP_YES;
3300 __Y:mp_clear (&y);
3301 __R:mp_clear (&r);
3302 __N1:mp_clear (&n1);
3303  return err;
3304 }
3305 
3306 /* performs a variable number of rounds of Miller-Rabin
3307  *
3308  * Probability of error after t rounds is no more than
3309 
3310  *
3311  * Sets result to 1 if probably prime, 0 otherwise
3312  */
3313 static int mp_prime_is_prime (mp_int * a, int t, int *result)
3314 {
3315  mp_int b;
3316  int ix, err, res;
3317 
3318  /* default to no */
3319  *result = MP_NO;
3320 
3321  /* valid value of t? */
3322  if (t <= 0 || t > PRIME_SIZE) {
3323  return MP_VAL;
3324  }
3325 
3326  /* is the input equal to one of the primes in the table? */
3327  for (ix = 0; ix < PRIME_SIZE; ix++) {
3328  if (mp_cmp_d(a, __prime_tab[ix]) == MP_EQ) {
3329  *result = 1;
3330  return MP_OKAY;
3331  }
3332  }
3333 
3334  /* first perform trial division */
3335  if ((err = mp_prime_is_divisible (a, &res)) != MP_OKAY) {
3336  return err;
3337  }
3338 
3339  /* return if it was trivially divisible */
3340  if (res == MP_YES) {
3341  return MP_OKAY;
3342  }
3343 
3344  /* now perform the miller-rabin rounds */
3345  if ((err = mp_init (&b)) != MP_OKAY) {
3346  return err;
3347  }
3348 
3349  for (ix = 0; ix < t; ix++) {
3350  /* set the prime */
3351  mp_set (&b, __prime_tab[ix]);
3352 
3353  if ((err = mp_prime_miller_rabin (a, &b, &res)) != MP_OKAY) {
3354  goto __B;
3355  }
3356 
3357  if (res == MP_NO) {
3358  goto __B;
3359  }
3360  }
3361 
3362  /* passed the test */
3363  *result = MP_YES;
3364 __B:mp_clear (&b);
3365  return err;
3366 }
3367 
3368 static const struct {
3369  int k, t;
3370 } sizes[] = {
3371 { 128, 28 },
3372 { 256, 16 },
3373 { 384, 10 },
3374 { 512, 7 },
3375 { 640, 6 },
3376 { 768, 5 },
3377 { 896, 4 },
3378 { 1024, 4 }
3379 };
3380 
3381 /* returns # of RM trials required for a given bit size */
3383 {
3384  int x;
3385 
3386  for (x = 0; x < (int)(sizeof(sizes)/(sizeof(sizes[0]))); x++) {
3387  if (sizes[x].k == size) {
3388  return sizes[x].t;
3389  } else if (sizes[x].k > size) {
3390  return (x == 0) ? sizes[0].t : sizes[x - 1].t;
3391  }
3392  }
3393  return sizes[x-1].t + 1;
3394 }
3395 
3396 /* makes a truly random prime of a given size (bits),
3397  *
3398  * Flags are as follows:
3399  *
3400  * LTM_PRIME_BBS - make prime congruent to 3 mod 4
3401  * LTM_PRIME_SAFE - make sure (p-1)/2 is prime as well (implies LTM_PRIME_BBS)
3402  * LTM_PRIME_2MSB_OFF - make the 2nd highest bit zero
3403  * LTM_PRIME_2MSB_ON - make the 2nd highest bit one
3404  *
3405  * You have to supply a callback which fills in a buffer with random bytes. "dat" is a parameter you can
3406  * have passed to the callback (e.g. a state or something). This function doesn't use "dat" itself
3407  * so it can be NULL
3408  *
3409  */
3410 
3411 /* This is possibly the mother of all prime generation functions, muahahahahaha! */
3412 int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat)
3413 {
3414  unsigned char *tmp, maskAND, maskOR_msb, maskOR_lsb;
3415  int res, err, bsize, maskOR_msb_offset;
3416 
3417  /* sanity check the input */
3418  if (size <= 1 || t <= 0) {
3419  return MP_VAL;
3420  }
3421 
3422  /* LTM_PRIME_SAFE implies LTM_PRIME_BBS */
3423  if (flags & LTM_PRIME_SAFE) {
3424  flags |= LTM_PRIME_BBS;
3425  }
3426 
3427  /* calc the byte size */
3428  bsize = (size>>3)+((size&7)?1:0);
3429 
3430  /* we need a buffer of bsize bytes */
3431  tmp = HeapAlloc(GetProcessHeap(), 0, bsize);
3432  if (tmp == NULL) {
3433  return MP_MEM;
3434  }
3435 
3436  /* calc the maskAND value for the MSbyte*/
3437  maskAND = ((size&7) == 0) ? 0xFF : (0xFF >> (8 - (size & 7)));
3438 
3439  /* calc the maskOR_msb */
3440  maskOR_msb = 0;
3441  maskOR_msb_offset = ((size & 7) == 1) ? 1 : 0;
3442  if (flags & LTM_PRIME_2MSB_ON) {
3443  maskOR_msb |= 1 << ((size - 2) & 7);
3444  } else if (flags & LTM_PRIME_2MSB_OFF) {
3445  maskAND &= ~(1 << ((size - 2) & 7));
3446  }
3447 
3448  /* get the maskOR_lsb */
3449  maskOR_lsb = 0;
3450  if (flags & LTM_PRIME_BBS) {
3451  maskOR_lsb |= 3;
3452  }
3453 
3454  do {
3455  /* read the bytes */
3456  if (cb(tmp, bsize, dat) != bsize) {
3457  err = MP_VAL;
3458  goto error;
3459  }
3460 
3461  /* work over the MSbyte */
3462  tmp[0] &= maskAND;
3463  tmp[0] |= 1 << ((size - 1) & 7);
3464 
3465  /* mix in the maskORs */
3466  tmp[maskOR_msb_offset] |= maskOR_msb;
3467  tmp[bsize-1] |= maskOR_lsb;
3468 
3469  /* read it in */
3470  if ((err = mp_read_unsigned_bin(a, tmp, bsize)) != MP_OKAY) { goto error; }
3471 
3472  /* is it prime? */
3473  if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; }
3474  if (res == MP_NO) {
3475  continue;
3476  }
3477 
3478  if (flags & LTM_PRIME_SAFE) {
3479  /* see if (a-1)/2 is prime */
3480  if ((err = mp_sub_d(a, 1, a)) != MP_OKAY) { goto error; }
3481  if ((err = mp_div_2(a, a)) != MP_OKAY) { goto error; }
3482 
3483  /* is it prime? */
3484  if ((err = mp_prime_is_prime(a, t, &res)) != MP_OKAY) { goto error; }
3485  }
3486  } while (res == MP_NO);
3487 
3488  if (flags & LTM_PRIME_SAFE) {
3489  /* restore a to the original value */
3490  if ((err = mp_mul_2(a, a)) != MP_OKAY) { goto error; }
3491  if ((err = mp_add_d(a, 1, a)) != MP_OKAY) { goto error; }
3492  }
3493 
3494  err = MP_OKAY;
3495 error:
3496  HeapFree(GetProcessHeap(), 0, tmp);
3497  return err;
3498 }
3499 
3500 /* reads an unsigned char array, assumes the msb is stored first [big endian] */
3501 int
3502 mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
3503 {
3504  int res;
3505 
3506  /* make sure there are at least two digits */
3507  if (a->alloc < 2) {
3508  if ((res = mp_grow(a, 2)) != MP_OKAY) {
3509  return res;
3510  }
3511  }
3512 
3513  /* zero the int */
3514  mp_zero (a);
3515 
3516  /* read the bytes in */
3517  while (c-- > 0) {
3518  if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
3519  return res;
3520  }
3521 
3522  a->dp[0] |= *b++;
3523  a->used += 1;
3524  }
3525  mp_clamp (a);
3526  return MP_OKAY;
3527 }
3528 
3529 /* reduces x mod m, assumes 0 < x < m**2, mu is
3530  * precomputed via mp_reduce_setup.
3531  * From HAC pp.604 Algorithm 14.42
3532  */
3533 int
3534 mp_reduce (mp_int * x, const mp_int * m, const mp_int * mu)
3535 {
3536  mp_int q;
3537  int res, um = m->used;
3538 
3539  /* q = x */
3540  if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
3541  return res;
3542  }
3543 
3544  /* q1 = x / b**(k-1) */
3545  mp_rshd (&q, um - 1);
3546 
3547  /* according to HAC this optimization is ok */
3548  if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
3549  if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
3550  goto CLEANUP;
3551  }
3552  } else {
3553  if ((res = s_mp_mul_high_digs (&q, mu, &q, um - 1)) != MP_OKAY) {
3554  goto CLEANUP;
3555  }
3556  }
3557 
3558  /* q3 = q2 / b**(k+1) */
3559  mp_rshd (&q, um + 1);
3560 
3561  /* x = x mod b**(k+1), quick (no division) */
3562  if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
3563  goto CLEANUP;
3564  }
3565 
3566  /* q = q * m mod b**(k+1), quick (no division) */
3567  if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
3568  goto CLEANUP;
3569  }
3570 
3571  /* x = x - q */
3572  if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
3573  goto CLEANUP;
3574  }
3575 
3576  /* If x < 0, add b**(k+1) to it */
3577  if (mp_cmp_d (x, 0) == MP_LT) {
3578  mp_set (&q, 1);
3579  if ((res = mp_lshd (&q, um + 1)) != MP_OKAY)
3580  goto CLEANUP;
3581  if ((res = mp_add (x, &q, x)) != MP_OKAY)
3582  goto CLEANUP;
3583  }
3584 
3585  /* Back off if it's too big */
3586  while (mp_cmp (x, m) != MP_LT) {
3587  if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
3588  goto CLEANUP;
3589  }
3590  }
3591 
3592 CLEANUP:
3593  mp_clear (&q);
3594 
3595  return res;
3596 }
3597 
3598 /* reduces a modulo n where n is of the form 2**p - d */
3599 int
3601 {
3602  mp_int q;
3603  int p, res;
3604 
3605  if ((res = mp_init(&q)) != MP_OKAY) {
3606  return res;
3607  }
3608 
3609  p = mp_count_bits(n);
3610 top:
3611  /* q = a/2**p, a = a mod 2**p */
3612  if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
3613  goto ERR;
3614  }
3615 
3616  if (d != 1) {
3617  /* q = q * d */
3618  if ((res = mp_mul_d(&q, d, &q)) != MP_OKAY) {
3619  goto ERR;
3620  }
3621  }
3622 
3623  /* a = a + q */
3624  if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
3625  goto ERR;
3626  }
3627 
3628  if (mp_cmp_mag(a, n) != MP_LT) {
3629  s_mp_sub(a, n, a);
3630  goto top;
3631  }
3632 
3633 ERR:
3634  mp_clear(&q);
3635  return res;
3636 }
3637 
3638 /* determines the setup value */
3639 static int
3641 {
3642  int res, p;
3643  mp_int tmp;
3644 
3645  if ((res = mp_init(&tmp)) != MP_OKAY) {
3646  return res;
3647  }
3648 
3649  p = mp_count_bits(a);
3650  if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
3651  mp_clear(&tmp);
3652  return res;
3653  }
3654 
3655  if ((res = s_mp_sub(&tmp, a, &tmp)) != MP_OKAY) {
3656  mp_clear(&tmp);
3657  return res;
3658  }
3659 
3660  *d = tmp.dp[0];
3661  mp_clear(&tmp);
3662  return MP_OKAY;
3663 }
3664 
3665 /* pre-calculate the value required for Barrett reduction
3666  * For a given modulus "b" it calculates the value required in "a"
3667  */
3668 int mp_reduce_setup (mp_int * a, const mp_int * b)
3669 {
3670  int res;
3671 
3672  if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
3673  return res;
3674  }
3675  return mp_div (a, b, a, NULL);
3676 }
3677 
3678 /* set to a digit */
3680 {
3681  mp_zero (a);
3682  a->dp[0] = b & MP_MASK;
3683  a->used = (a->dp[0] != 0) ? 1 : 0;
3684 }
3685 
3686 /* set a 32-bit const */
3687 int mp_set_int (mp_int * a, unsigned long b)
3688 {
3689  int x, res;
3690 
3691  mp_zero (a);
3692 
3693  /* set four bits at a time */
3694  for (x = 0; x < 8; x++) {
3695  /* shift the number up four bits */
3696  if ((res = mp_mul_2d (a, 4, a)) != MP_OKAY) {
3697  return res;
3698  }
3699 
3700  /* OR in the top four bits of the source */
3701  a->dp[0] |= (b >> 28) & 15;
3702 
3703  /* shift the source up to the next four bits */
3704  b <<= 4;
3705 
3706  /* ensure that digits are not clamped off */
3707  a->used += 1;
3708  }
3709  mp_clamp (a);
3710  return MP_OKAY;
3711 }
3712 
3713 /* shrink a bignum */
3715 {
3716  mp_digit *tmp;
3717  if (a->alloc != a->used && a->used > 0) {
3718  if ((tmp = HeapReAlloc(GetProcessHeap(), 0, a->dp, sizeof (mp_digit) * a->used)) == NULL) {
3719  return MP_MEM;
3720  }
3721  a->dp = tmp;
3722  a->alloc = a->used;
3723  }
3724  return MP_OKAY;
3725 }
3726 
3727 /* computes b = a*a */
3728 int
3729 mp_sqr (const mp_int * a, mp_int * b)
3730 {
3731  int res;
3732 
3733 if (a->used >= KARATSUBA_SQR_CUTOFF) {
3734  res = mp_karatsuba_sqr (a, b);
3735  } else
3736  {
3737  /* can we use the fast comba multiplier? */
3738  if ((a->used * 2 + 1) < MP_WARRAY &&
3739  a->used <
3740  (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
3741  res = fast_s_mp_sqr (a, b);
3742  } else
3743  res = s_mp_sqr (a, b);
3744  }
3745  b->sign = MP_ZPOS;
3746  return res;
3747 }
3748 
3749 /* c = a * a (mod b) */
3750 int
3751 mp_sqrmod (const mp_int * a, mp_int * b, mp_int * c)
3752 {
3753  int res;
3754  mp_int t;
3755 
3756  if ((res = mp_init (&t)) != MP_OKAY) {
3757  return res;
3758  }
3759 
3760  if ((res = mp_sqr (a, &t)) != MP_OKAY) {
3761  mp_clear (&t);
3762  return res;
3763  }
3764  res = mp_mod (&t, b, c);
3765  mp_clear (&t);
3766  return res;
3767 }
3768 
3769 /* high level subtraction (handles signs) */
3770 int
3772 {
3773  int sa, sb, res;
3774 
3775  sa = a->sign;
3776  sb = b->sign;
3777 
3778  if (sa != sb) {
3779  /* subtract a negative from a positive, OR */
3780  /* subtract a positive from a negative. */
3781  /* In either case, ADD their magnitudes, */
3782  /* and use the sign of the first number. */
3783  c->sign = sa;
3784  res = s_mp_add (a, b, c);
3785  } else {
3786  /* subtract a positive from a positive, OR */
3787  /* subtract a negative from a negative. */
3788  /* First, take the difference between their */
3789  /* magnitudes, then... */
3790  if (mp_cmp_mag (a, b) != MP_LT) {
3791  /* Copy the sign from the first */
3792  c->sign = sa;
3793  /* The first has a larger or equal magnitude */
3794  res = s_mp_sub (a, b, c);
3795  } else {
3796  /* The result has the *opposite* sign from */
3797  /* the first number. */
3798  c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
3799  /* The second has a larger magnitude */
3800  res = s_mp_sub (b, a, c);
3801  }
3802  }
3803  return res;
3804 }
3805 
3806 /* single digit subtraction */
3807 int
3809 {
3810  mp_digit *tmpa, *tmpc, mu;
3811  int res, ix, oldused;
3812 
3813  /* grow c as required */
3814  if (c->alloc < a->used + 1) {
3815  if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
3816  return res;
3817  }
3818  }
3819 
3820  /* if a is negative just do an unsigned
3821  * addition [with fudged signs]
3822  */
3823  if (a->sign == MP_NEG) {
3824  a->sign = MP_ZPOS;
3825  res = mp_add_d(a, b, c);
3826  a->sign = c->sign = MP_NEG;
3827  return res;
3828  }
3829 
3830  /* setup regs */
3831  oldused = c->used;
3832  tmpa = a->dp;
3833  tmpc = c->dp;
3834 
3835  /* if a <= b simply fix the single digit */
3836  if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
3837  if (a->used == 1) {
3838  *tmpc++ = b - *tmpa;
3839  } else {
3840  *tmpc++ = b;
3841  }
3842  ix = 1;
3843 
3844  /* negative/1digit */
3845  c->sign = MP_NEG;
3846  c->used = 1;
3847  } else {
3848  /* positive/size */
3849  c->sign = MP_ZPOS;
3850  c->used = a->used;
3851 
3852  /* subtract first digit */
3853  *tmpc = *tmpa++ - b;
3854  mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3855  *tmpc++ &= MP_MASK;
3856 
3857  /* handle rest of the digits */
3858  for (ix = 1; ix < a->used; ix++) {
3859  *tmpc = *tmpa++ - mu;
3860  mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
3861  *tmpc++ &= MP_MASK;
3862  }
3863  }
3864 
3865  /* zero excess digits */
3866  while (ix++ < oldused) {
3867  *tmpc++ = 0;
3868  }
3869  mp_clamp(c);
3870  return MP_OKAY;
3871 }
3872 
3873 /* store in unsigned [big endian] format */
3874 int
3875 mp_to_unsigned_bin (const mp_int * a, unsigned char *b)
3876 {
3877  int x, res;
3878  mp_int t;
3879 
3880  if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
3881  return res;
3882  }
3883 
3884  x = 0;
3885  while (mp_iszero (&t) == 0) {
3886  b[x++] = (unsigned char) (t.dp[0] & 255);
3887  if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
3888  mp_clear (&t);
3889  return res;
3890  }
3891  }
3892  bn_reverse (b, x);
3893  mp_clear (&t);
3894  return MP_OKAY;
3895 }
3896 
3897 /* get the size for an unsigned equivalent */
3898 int
3900 {
3901  int size = mp_count_bits (a);
3902  return (size / 8 + ((size & 7) != 0 ? 1 : 0));
3903 }
3904 
3905 /* reverse an array, used for radix code */
3906 static void
3907 bn_reverse (unsigned char *s, int len)
3908 {
3909  int ix, iy;
3910  unsigned char t;
3911 
3912  ix = 0;
3913  iy = len - 1;
3914  while (ix < iy) {
3915  t = s[ix];
3916  s[ix] = s[iy];
3917  s[iy] = t;
3918  ++ix;
3919  --iy;
3920  }
3921 }
3922 
3923 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
3924 static int
3926 {
3927  mp_int *x;
3928  int olduse, res, min, max;
3929 
3930  /* find sizes, we let |a| <= |b| which means we have to sort
3931  * them. "x" will point to the input with the most digits
3932  */
3933  if (a->used > b->used) {
3934  min = b->used;
3935  max = a->used;
3936  x = a;
3937  } else {
3938  min = a->used;
3939  max = b->used;
3940  x = b;
3941  }
3942 
3943  /* init result */
3944  if (c->alloc < max + 1) {
3945  if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
3946  return res;
3947  }
3948  }
3949 
3950  /* get old used digit count and set new one */
3951  olduse = c->used;
3952  c->used = max + 1;
3953 
3954  {
3955  register mp_digit u, *tmpa, *tmpb, *tmpc;
3956  register int i;
3957 
3958  /* alias for digit pointers */
3959 
3960  /* first input */
3961  tmpa = a->dp;
3962 
3963  /* second input */
3964  tmpb = b->dp;
3965 
3966  /* destination */
3967  tmpc = c->dp;
3968 
3969  /* zero the carry */
3970  u = 0;
3971  for (i = 0; i < min; i++) {
3972  /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
3973  *tmpc = *tmpa++ + *tmpb++ + u;
3974 
3975  /* U = carry bit of T[i] */
3976  u = *tmpc >> ((mp_digit)DIGIT_BIT);
3977 
3978  /* take away carry bit from T[i] */
3979  *tmpc++ &= MP_MASK;
3980  }
3981 
3982  /* now copy higher words if any, that is in A+B
3983  * if A or B has more digits add those in
3984  */
3985  if (min != max) {
3986  for (; i < max; i++) {
3987  /* T[i] = X[i] + U */
3988  *tmpc = x->dp[i] + u;
3989 
3990  /* U = carry bit of T[i] */
3991  u = *tmpc >> ((mp_digit)DIGIT_BIT);
3992 
3993  /* take away carry bit from T[i] */
3994  *tmpc++ &= MP_MASK;
3995  }
3996  }
3997 
3998  /* add carry */
3999  *tmpc++ = u;
4000 
4001  /* clear digits above oldused */
4002  for (i = c->used; i < olduse; i++) {
4003  *tmpc++ = 0;
4004  }
4005  }
4006 
4007  mp_clamp (c);
4008  return MP_OKAY;
4009 }
4010 
4011 static int s_mp_exptmod (const mp_int * G, const mp_int * X, mp_int * P, mp_int * Y)
4012 {
4013  mp_int M[256], res, mu;
4014  mp_digit buf;
4015  int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
4016 
4017  /* find window size */
4018  x = mp_count_bits (X);
4019  if (x <= 7) {
4020  winsize = 2;
4021  } else if (x <= 36) {
4022  winsize = 3;
4023  } else if (x <= 140) {
4024  winsize = 4;
4025  } else if (x <= 450) {
4026  winsize = 5;
4027  } else if (x <= 1303) {
4028  winsize = 6;
4029  } else if (x <= 3529) {
4030  winsize = 7;
4031  } else {
4032  winsize = 8;
4033  }
4034 
4035  /* init M array */
4036  /* init first cell */
4037  if ((err = mp_init(&M[1])) != MP_OKAY) {
4038  return err;
4039  }
4040 
4041  /* now init the second half of the array */
4042  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4043  if ((err = mp_init(&M[x])) != MP_OKAY) {
4044  for (y = 1<<(winsize-1); y < x; y++) {
4045  mp_clear (&M[y]);
4046  }
4047  mp_clear(&M[1]);
4048  return err;
4049  }
4050  }
4051 
4052  /* create mu, used for Barrett reduction */
4053  if ((err = mp_init (&mu)) != MP_OKAY) {
4054  goto __M;
4055  }
4056  if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
4057  goto __MU;
4058  }
4059 
4060  /* create M table
4061  *
4062  * The M table contains powers of the base,
4063  * e.g. M[x] = G**x mod P
4064  *
4065  * The first half of the table is not
4066  * computed though accept for M[0] and M[1]
4067  */
4068  if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
4069  goto __MU;
4070  }
4071 
4072  /* compute the value at M[1<<(winsize-1)] by squaring
4073  * M[1] (winsize-1) times
4074  */
4075  if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
4076  goto __MU;
4077  }
4078 
4079  for (x = 0; x < (winsize - 1); x++) {
4080  if ((err = mp_sqr (&M[1 << (winsize - 1)],
4081  &M[1 << (winsize - 1)])) != MP_OKAY) {
4082  goto __MU;
4083  }
4084  if ((err = mp_reduce (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
4085  goto __MU;
4086  }
4087  }
4088 
4089  /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
4090  * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
4091  */
4092  for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
4093  if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
4094  goto __MU;
4095  }
4096  if ((err = mp_reduce (&M[x], P, &mu)) != MP_OKAY) {
4097  goto __MU;
4098  }
4099  }
4100 
4101  /* setup result */
4102  if ((err = mp_init (&res)) != MP_OKAY) {
4103  goto __MU;
4104  }
4105  mp_set (&res, 1);
4106 
4107  /* set initial mode and bit cnt */
4108  mode = 0;
4109  bitcnt = 1;
4110  buf = 0;
4111  digidx = X->used - 1;
4112  bitcpy = 0;
4113  bitbuf = 0;
4114 
4115  for (;;) {
4116  /* grab next digit as required */
4117  if (--bitcnt == 0) {
4118  /* if digidx == -1 we are out of digits */
4119  if (digidx == -1) {
4120  break;
4121  }
4122  /* read next digit and reset the bitcnt */
4123  buf = X->dp[digidx--];
4124  bitcnt = DIGIT_BIT;
4125  }
4126 
4127  /* grab the next msb from the exponent */
4128  y = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
4129  buf <<= (mp_digit)1;
4130 
4131  /* if the bit is zero and mode == 0 then we ignore it
4132  * These represent the leading zero bits before the first 1 bit
4133  * in the exponent. Technically this opt is not required but it
4134  * does lower the # of trivial squaring/reductions used
4135  */
4136  if (mode == 0 && y == 0) {
4137  continue;
4138  }
4139 
4140  /* if the bit is zero and mode == 1 then we square */
4141  if (mode == 1 && y == 0) {
4142  if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4143  goto __RES;
4144  }
4145  if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4146  goto __RES;
4147  }
4148  continue;
4149  }
4150 
4151  /* else we add it to the window */
4152  bitbuf |= (y << (winsize - ++bitcpy));
4153  mode = 2;
4154 
4155  if (bitcpy == winsize) {
4156  /* ok window is filled so square as required and multiply */
4157  /* square first */
4158  for (x = 0; x < winsize; x++) {
4159  if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4160  goto __RES;
4161  }
4162  if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4163  goto __RES;
4164  }
4165  }
4166 
4167  /* then multiply */
4168  if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
4169  goto __RES;
4170  }
4171  if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4172  goto __RES;
4173  }
4174 
4175  /* empty window and reset */
4176  bitcpy = 0;
4177  bitbuf = 0;
4178  mode = 1;
4179  }
4180  }
4181 
4182  /* if bits remain then square/multiply */
4183  if (mode == 2 && bitcpy > 0) {
4184  /* square then multiply if the bit is set */
4185  for (x = 0; x < bitcpy; x++) {
4186  if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
4187  goto __RES;
4188  }
4189  if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4190  goto __RES;
4191  }
4192 
4193  bitbuf <<= 1;
4194  if ((bitbuf & (1 << winsize)) != 0) {
4195  /* then multiply */
4196  if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
4197  goto __RES;
4198  }
4199  if ((err = mp_reduce (&res, P, &mu)) != MP_OKAY) {
4200  goto __RES;
4201  }
4202  }
4203  }
4204  }
4205 
4206  mp_exch (&res, Y);
4207  err = MP_OKAY;
4208 __RES:mp_clear (&res);
4209 __MU:mp_clear (&mu);
4210 __M:
4211  mp_clear(&M[1]);
4212  for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4213  mp_clear (&M[x]);
4214  }
4215  return err;
4216 }
4217 
4218 /* multiplies |a| * |b| and only computes up to digs digits of result
4219  * HAC pp. 595, Algorithm 14.12 Modified so you can control how
4220  * many digits of output are created.
4221  */
4222 static int
4223 s_mp_mul_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
4224 {
4225  mp_int t;
4226  int res, pa, pb, ix, iy;
4227  mp_digit u;
4228  mp_word r;
4229  mp_digit tmpx, *tmpt, *tmpy;
4230 
4231  /* can we use the fast multiplier? */
4232  if (((digs) < MP_WARRAY) &&
4233  MIN (a->used, b->used) <
4234  (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4235  return fast_s_mp_mul_digs (a, b, c, digs);
4236  }
4237 
4238  if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
4239  return res;
4240  }
4241  t.used = digs;
4242 
4243  /* compute the digits of the product directly */
4244  pa = a->used;
4245  for (ix = 0; ix < pa; ix++) {
4246  /* set the carry to zero */
4247  u = 0;
4248 
4249  /* limit ourselves to making digs digits of output */
4250  pb = MIN (b->used, digs - ix);
4251 
4252  /* setup some aliases */
4253  /* copy of the digit from a used within the nested loop */
4254  tmpx = a->dp[ix];
4255 
4256  /* an alias for the destination shifted ix places */
4257  tmpt = t.dp + ix;
4258 
4259  /* an alias for the digits of b */
4260  tmpy = b->dp;
4261 
4262  /* compute the columns of the output and propagate the carry */
4263  for (iy = 0; iy < pb; iy++) {
4264  /* compute the column as a mp_word */
4265  r = ((mp_word)*tmpt) +
4266  ((mp_word)tmpx) * ((mp_word)*tmpy++) +
4267  ((mp_word) u);
4268 
4269  /* the new column is the lower part of the result */
4270  *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4271 
4272  /* get the carry word from the result */
4273  u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
4274  }
4275  /* set carry if it is placed below digs */
4276  if (ix + iy < digs) {
4277  *tmpt = u;
4278  }
4279  }
4280 
4281  mp_clamp (&t);
4282  mp_exch (&t, c);
4283 
4284  mp_clear (&t);
4285  return MP_OKAY;
4286 }
4287 
4288 /* multiplies |a| * |b| and does not compute the lower digs digits
4289  * [meant to get the higher part of the product]
4290  */
4291 static int
4292 s_mp_mul_high_digs (const mp_int * a, const mp_int * b, mp_int * c, int digs)
4293 {
4294  mp_int t;
4295  int res, pa, pb, ix, iy;
4296  mp_digit u;
4297  mp_word r;
4298  mp_digit tmpx, *tmpt, *tmpy;
4299 
4300  /* can we use the fast multiplier? */
4301  if (((a->used + b->used + 1) < MP_WARRAY)
4302  && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
4303  return fast_s_mp_mul_high_digs (a, b, c, digs);
4304  }
4305 
4306  if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
4307  return res;
4308  }
4309  t.used = a->used + b->used + 1;
4310 
4311  pa = a->used;
4312  pb = b->used;
4313  for (ix = 0; ix < pa; ix++) {
4314  /* clear the carry */
4315  u = 0;
4316 
4317  /* left hand side of A[ix] * B[iy] */
4318  tmpx = a->dp[ix];
4319 
4320  /* alias to the address of where the digits will be stored */
4321  tmpt = &(t.dp[digs]);
4322 
4323  /* alias for where to read the right hand side from */
4324  tmpy = b->dp + (digs - ix);
4325 
4326  for (iy = digs - ix; iy < pb; iy++) {
4327  /* calculate the double precision result */
4328  r = ((mp_word)*tmpt) +
4329  ((mp_word)tmpx) * ((mp_word)*tmpy++) +
4330  ((mp_word) u);
4331 
4332  /* get the lower part */
4333  *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4334 
4335  /* carry the carry */
4336  u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
4337  }
4338  *tmpt = u;
4339  }
4340  mp_clamp (&t);
4341  mp_exch (&t, c);
4342  mp_clear (&t);
4343  return MP_OKAY;
4344 }
4345 
4346 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
4347 static int
4348 s_mp_sqr (const mp_int * a, mp_int * b)
4349 {
4350  mp_int t;
4351  int res, ix, iy, pa;
4352  mp_word r;
4353  mp_digit u, tmpx, *tmpt;
4354 
4355  pa = a->used;
4356  if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
4357  return res;
4358  }
4359 
4360  /* default used is maximum possible size */
4361  t.used = 2*pa + 1;
4362 
4363  for (ix = 0; ix < pa; ix++) {
4364  /* first calculate the digit at 2*ix */
4365  /* calculate double precision result */
4366  r = ((mp_word) t.dp[2*ix]) +
4367  ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
4368 
4369  /* store lower part in result */
4370  t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
4371 
4372  /* get the carry */
4373  u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4374 
4375  /* left hand side of A[ix] * A[iy] */
4376  tmpx = a->dp[ix];
4377 
4378  /* alias for where to store the results */
4379  tmpt = t.dp + (2*ix + 1);
4380 
4381  for (iy = ix + 1; iy < pa; iy++) {
4382  /* first calculate the product */
4383  r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
4384 
4385  /* now calculate the double precision result, note we use
4386  * addition instead of *2 since it's easier to optimize
4387  */
4388  r = ((mp_word) *tmpt) + r + r + ((mp_word) u);
4389 
4390  /* store lower part */
4391  *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4392 
4393  /* get carry */
4394  u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4395  }
4396  /* propagate upwards */
4397  while (u != 0) {
4398  r = ((mp_word) *tmpt) + ((mp_word) u);
4399  *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
4400  u = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
4401  }
4402  }
4403 
4404  mp_clamp (&t);
4405  mp_exch (&t, b);
4406  mp_clear (&t);
4407  return MP_OKAY;
4408 }
4409 
4410 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
4411 int
4412 s_mp_sub (const mp_int * a, const mp_int * b, mp_int * c)
4413 {
4414  int olduse, res, min, max;
4415 
4416  /* find sizes */
4417  min = b->used;
4418  max = a->used;
4419 
4420  /* init result */
4421  if (c->alloc < max) {
4422  if ((res = mp_grow (c, max)) != MP_OKAY) {
4423  return res;
4424  }
4425  }
4426  olduse = c->used;
4427  c->used = max;
4428 
4429  {
4430  register mp_digit u, *tmpa, *tmpb, *tmpc;
4431  register int i;
4432 
4433  /* alias for digit pointers */
4434  tmpa = a->dp;
4435  tmpb = b->dp;
4436  tmpc = c->dp;
4437 
4438  /* set carry to zero */
4439  u = 0;
4440  for (i = 0; i < min; i++) {
4441  /* T[i] = A[i] - B[i] - U */
4442  *tmpc = *tmpa++ - *tmpb++ - u;
4443 
4444  /* U = carry bit of T[i]
4445  * Note this saves performing an AND operation since
4446  * if a carry does occur it will propagate all the way to the
4447  * MSB. As a result a single shift is enough to get the carry
4448  */
4449  u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
4450 
4451  /* Clear carry from T[i] */
4452  *tmpc++ &= MP_MASK;
4453  }
4454 
4455  /* now copy higher words if any, e.g. if A has more digits than B */
4456  for (; i < max; i++) {
4457  /* T[i] = A[i] - U */
4458  *tmpc = *tmpa++ - u;
4459 
4460  /* U = carry bit of T[i] */
4461  u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
4462 
4463  /* Clear carry from T[i] */
4464  *tmpc++ &= MP_MASK;
4465  }
4466 
4467  /* clear digits above used (since we may not have grown result above) */
4468  for (i = c->used; i < olduse; i++) {
4469  *tmpc++ = 0;
4470  }
4471  }
4472 
4473  mp_clamp (c);
4474  return MP_OKAY;
4475 }
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble * u
Definition: glfuncs.h:240
_In_ CLIPOBJ _In_ BRUSHOBJ _In_ LONG _In_ LONG y1
Definition: winddi.h:3706
static int mp_div_2d(const mp_int *a, int b, mp_int *c, mp_int *d)
Definition: mpi.c:1310
#define MP_NO
Definition: tomcrypt.h:206
int ltm_prime_callback(unsigned char *dst, int len, void *dat)
Definition: tomcrypt.h:231
int mp_add(mp_int *a, mp_int *b, mp_int *c)
Definition: mpi.c:891
#define C(name, bit)
Definition: cpu.h:208
#define max(a, b)
Definition: svc.c:63
static int mp_cnt_lsb(const mp_int *a)
Definition: mpi.c:1128
#define TRUE
Definition: types.h:120
int mp_set_int(mp_int *a, unsigned long b)
Definition: mpi.c:3687
#define shift
Definition: input.c:1668
void mp_clear_multi(mp_int *mp,...)
Definition: mpi.c:1032
GLubyte GLubyte GLubyte GLubyte w
Definition: glext.h:6102
static void mp_clamp(mp_int *a)
Definition: mpi.c:1017
#define CLEANUP
Definition: ntuser.h:5
static int mp_montgomery_calc_normalization(mp_int *a, const mp_int *b)
Definition: mpi.c:2945
static int mp_grow(mp_int *a, int size)
Definition: mpi.c:106
#define error(str)
Definition: mkdosfs.c:1605
GLint x0
Definition: linetemp.h:95
#define Y(I)
_Tp _STLP_CALL norm(const complex< _Tp > &__z)
Definition: _complex.h:741
ActualNumberDriverObjects * sizeof(PDRIVER_OBJECT)) PDRIVER_OBJECT *DriverObjectList
int mp_lcm(const mp_int *a, const mp_int *b, mp_int *c)
Definition: mpi.c:2807
GLenum GLuint GLenum GLsizei const GLchar * buf
Definition: glext.h:7751
static int mp_prime_miller_rabin(mp_int *a, const mp_int *b, int *result)
Definition: mpi.c:3231
int mp_prime_random_ex(mp_int *a, int t, int size, int flags, ltm_prime_callback cb, void *dat)
Definition: mpi.c:3412
superblock * sb
Definition: btrfs.c:4162
static const int lnz[16]
Definition: mpi.c:1123
static int s_mp_exptmod(const mp_int *G, const mp_int *X, mp_int *P, mp_int *Y)
Definition: mpi.c:4011
GLdouble GLdouble GLdouble r
Definition: gl.h:2055
int mp_shrink(mp_int *a)
Definition: mpi.c:3714
#define MP_PREC
Definition: tomcrypt.h:219
#define MP_MASK
Definition: tomcrypt.h:189
_In_ CLIPOBJ _In_ BRUSHOBJ _In_ LONG x1
Definition: winddi.h:3706
#define M(row, col)
static void mp_clear(mp_int *a)
Definition: mpi.c:255
int mp_mod(const mp_int *a, mp_int *b, mp_int *c)
Definition: mpi.c:2847
GLdouble n
Definition: glext.h:7729
int mp_count_bits(const mp_int *a)
Definition: mpi.c:1203
GLdouble GLdouble t
Definition: gl.h:2047
static int mp_dr_reduce(mp_int *x, const mp_int *n, mp_digit k)
Definition: mpi.c:1844
static int mp_add_d(mp_int *a, mp_digit b, mp_int *c)
Definition: mpi.c:924
GLint GLint GLint GLint GLint x
Definition: gl.h:1548
#define CHAR_BIT
Definition: urlcache.c:57
#define __T(x)
Definition: vfdio.h:17
static int s_mp_add(mp_int *a, mp_int *b, mp_int *c)
Definition: mpi.c:3925
#define LTM_PRIME_BBS
Definition: tomcrypt.h:209
int t
Definition: mpi.c:3369
static int mp_prime_is_divisible(const mp_int *a, int *result)
Definition: mpi.c:3200
int iy
Definition: tritemp.h:491
static void mp_dr_setup(const mp_int *a, mp_digit *d)
Definition: mpi.c:1903
static int mp_abs(const mp_int *a, mp_int *b)
Definition: mpi.c:290
int mp_cmp(const mp_int *a, const mp_int *b)
Definition: mpi.c:1046
Definition: match.c:390
static int mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c)
Definition: mpi.c:2589
GLenum GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * bits
Definition: glext.h:10929
ulong64 mp_word
Definition: tomcrypt.h:185
#define DIGIT_BIT
Definition: tomcrypt.h:186
const GLfloat * m
Definition: glext.h:10848
static int mp_montgomery_reduce(mp_int *a, const mp_int *m, mp_digit mp)
Definition: mpi.c:2980
static int mp_reduce_2k_setup(const mp_int *a, mp_digit *d)
Definition: mpi.c:3640
T MIN(T a, T b)
Definition: polytest.cpp:79
#define DIGIT(m, k)
Definition: tomcrypt.h:233
static int mp_reduce_2k(mp_int *a, const mp_int *n, mp_digit d)
Definition: mpi.c:3600
#define A(row, col)
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
Definition: glfuncs.h:248
static int mp_mul_d(const mp_int *a, mp_digit b, mp_int *c)
Definition: mpi.c:1497
GLenum GLint GLuint mask
Definition: glext.h:6028
#define MP_LT
Definition: tomcrypt.h:193
#define va_end(ap)
Definition: acmsvcex.h:90
int mp_prime_rabin_miller_trials(int size)
Definition: mpi.c:3382
static int mp_karatsuba_sqr(const mp_int *a, mp_int *b)
Definition: mpi.c:2713
static int mp_sqr(const mp_int *a, mp_int *b)
Definition: mpi.c:3729
unsigned int BOOL
Definition: ntddk_ex.h:94
static BOOL s_is_power_of_two(mp_digit b, int *p)
Definition: mpi.c:1748
#define a
Definition: ke_i.h:78
static int mp_lshd(mp_int *a, int b)
Definition: mpi.c:1386
static void mp_set(mp_int *a, mp_digit b)
Definition: mpi.c:3679
Definition: polytest.cpp:35
static const mp_digit __prime_tab[]
Definition: mpi.c:3157
int mp_cmp_d(const mp_int *a, mp_digit b)
Definition: mpi.c:1067
static int mp_invmod_slow(const mp_int *a, mp_int *b, mp_int *c)
Definition: mpi.c:2407
static int mp_reduce(mp_int *a, const mp_int *b, const mp_int *c)
Definition: mpi.c:3534
#define MP_MEM
Definition: tomcrypt.h:201
smooth NULL
Definition: ftsmooth.c:416
unsigned char
Definition: typeof.h:29
static int mp_init_size(mp_int *a, int size)
Definition: mpi.c:227
char * va_list
Definition: acmsvcex.h:78
GLint GLint bottom
Definition: glext.h:7726
int mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
Definition: mpi.c:3107
static int mp_div_d(const mp_int *a, mp_digit b, mp_int *c, mp_digit *d)
Definition: mpi.c:1762
#define MP_EQ
Definition: tomcrypt.h:194
static int fast_s_mp_mul_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs)
Definition: mpi.c:604
#define b
Definition: ke_i.h:79
GLbyte ty
Definition: glext.h:8756
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint GLint GLint j
Definition: glfuncs.h:250
static int mp_init(mp_int *a)
Definition: mpi.c:202
static int fast_mp_montgomery_reduce(mp_int *x, const mp_int *n, mp_digit rho)
Definition: mpi.c:444
static const int KARATSUBA_SQR_CUTOFF
Definition: mpi.c:43
GLboolean GLboolean GLboolean b
Definition: glext.h:6204
static int s_mp_mul_high_digs(const mp_int *a, const mp_int *b, mp_int *c, int digs)
Definition: mpi.c:4292
int mp_init_copy(mp_int *a, const mp_int *b)
Definition: mpi.c:2344
#define s_mp_mul(a, b, c)
Definition: mpi.c:95
GLsizeiptr size
Definition: glext.h:5919
#define mp_isodd(a)
Definition: tomcrypt.h:250
#define GetProcessHeap()
Definition: compat.h:395
PVOID WINAPI HeapAlloc(HANDLE, DWORD, SIZE_T)
#define d
Definition: ke_i.h:81
static int mp_exptmod_fast(const mp_int *G, const mp_int *X, mp_int *P, mp_int *Y, int mode)
Definition: mpi.c:1976
if(!(yy_init))
Definition: macro.lex.yy.c:714
static int mp_div(const mp_int *a, const mp_int *b, mp_int *c, mp_int *d)
Definition: mpi.c:1565
static void mp_exch(mp_int *a, mp_int *b)
Definition: mpi.c:192
unsigned long mp_get_int(const mp_int *a)
Definition: mpi.c:2320
const GLubyte * c
Definition: glext.h:8905
static int mp_sqrmod(const mp_int *a, mp_int *b, mp_int *c)
Definition: mpi.c:3751
static int mp_reduce_setup(mp_int *a, const mp_int *b)
Definition: mpi.c:3668
static int s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
Definition: mpi.c:4412
int mp_to_unsigned_bin(const mp_int *a, unsigned char *b)
Definition: mpi.c:3875
#define mp_iszero(a)
Definition: tomcrypt.h:248
int n1
Definition: dwarfget.c:148
GLdouble GLdouble GLdouble GLdouble q
Definition: gl.h:2063
#define PRIME_SIZE
Definition: tomcrypt.h:371
static DWORD cb
Definition: integrity.c:41
struct W W
GLbitfield flags
Definition: glext.h:7161
static int mp_2expt(mp_int *a, int b)
Definition: mpi.c:869
static int s_mp_sqr(const mp_int *a, mp_int *b)
Definition: mpi.c:4348
static void bn_reverse(unsigned char *s, int len)
Definition: mpi.c:3907
static const struct @514 sizes[]
Definition: ttei1.cpp:12
#define MP_GT
Definition: tomcrypt.h:195
#define MP_WARRAY
Definition: tomcrypt.h:222
static void mp_zero(mp_int *a)
Definition: mpi.c:278
GLenum GLsizei len
Definition: glext.h:6722
GLdouble s
Definition: gl.h:2039
static const int KARATSUBA_MUL_CUTOFF
Definition: mpi.c:42
#define MP_VAL
Definition: tomcrypt.h:202
#define MP_ZPOS
Definition: tomcrypt.h:197
GLenum src
Definition: glext.h:6340
#define err(...)
int mp_sub_d(mp_int *a, mp_digit b, mp_int *c)
Definition: mpi.c:3808
GLenum mode
Definition: glext.h:6217
#define P(row, col)
#define MP_OKAY
Definition: tomcrypt.h:200
#define mp_iseven(a)
Definition: tomcrypt.h:249
int mp_exptmod(const mp_int *G, const mp_int *X, mp_int *P, mp_int *Y)
Definition: mpi.c:1917
#define ERR(fmt,...)
Definition: debug.h:109
int mp_gcd(const mp_int *a, const mp_int *b, mp_int *c)
Definition: mpi.c:2228
#define G(x, y, z)
Definition: md5.c:52
#define va_arg(ap, T)
Definition: acmsvcex.h:89
#define D(name, bit)
const GLdouble * v
Definition: gl.h:2040
int mp_unsigned_bin_size(const mp_int *a)
Definition: mpi.c:3899
#define LTM_PRIME_2MSB_OFF
Definition: tomcrypt.h:211
#define MP_NEG
Definition: tomcrypt.h:198
#define HeapReAlloc
Definition: compat.h:393
GLenum GLenum dst
Definition: glext.h:6340
mp_digit * dp
Definition: tomcrypt.h:227
Definition: ttei6.cpp:27
GLint y0
Definition: linetemp.h:96
#define min(a, b)
Definition: monoChain.cc:55
#define va_start(ap, A)
Definition: acmsvcex.h:91
#define B(row, col)
static void mp_rshd(mp_int *a, int b)
Definition: mpi.c:1259
static int mp_mul_2(const mp_int *a, mp_int *b)
Definition: mpi.c:2879
#define MP_YES
Definition: tomcrypt.h:205
GLint GLint GLint GLint GLint GLint y
Definition: gl.h:1548
int mp_read_unsigned_bin(mp_int *a, const unsigned char *b, int c)
Definition: mpi.c:3502