mpi.c source code [OpenJDK/src/jdk.crypto.ec/share/native/libsunec/impl/mpi.c]

1	/*
2	* Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
3	* Use is subject to license terms.
4	*
5	* This library is free software; you can redistribute it and/or
6	* modify it under the terms of the GNU Lesser General Public
7	* License as published by the Free Software Foundation; either
8	* version 2.1 of the License, or (at your option) any later version.
9	*
10	* This library is distributed in the hope that it will be useful,
11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	* Lesser General Public License for more details.
14	*
15	* You should have received a copy of the GNU Lesser General Public License
16	* along with this library; if not, write to the Free Software Foundation,
17	* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18	*
19	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20	* or visit www.oracle.com if you need additional information or have any
21	* questions.
22	*/
23
24	/* *********************************************************************
25	*
26	* The Original Code is the MPI Arbitrary Precision Integer Arithmetic library.
27	*
28	* The Initial Developer of the Original Code is
29	* Michael J. Fromberger.
30	* Portions created by the Initial Developer are Copyright (C) 1998
31	* the Initial Developer. All Rights Reserved.
32	*
33	* Contributor(s):
34	* Netscape Communications Corporation
35	* Douglas Stebila <douglas@stebila.ca> of Sun Laboratories.
36	*
37	* Last Modified Date from the Original Code: Nov 2016
38	*********************************************************************** */
39
40	/ Arbitrary precision integer arithmetic library /
41
42	#include "mpi-priv.h"
43	#if defined(OSF1)
44	#include <c_asm.h>
45	#endif
46
47	#if MP_LOGTAB
48	/*
49	A table of the logs of 2 for various bases (the 0 and 1 entries of
50	this table are meaningless and should not be referenced).
51
52	This table is used to compute output lengths for the mp_toradix()
53	function. Since a number n in radix r takes up about log_r(n)
54	digits, we estimate the output size by taking the least integer
55	greater than log_r(n), where:
56
57	log_r(n) = log_2(n) log_r(2)*
58
59	This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
60	which are the output bases supported.
61	*/
62	#include "logtab.h"
63	#endif
64
65	/ {{{ Constant strings /
66
67	/ Constant strings returned by mp_strerror() /
68	static const char *mp_err_string[] = {
69	"unknown result code", / say what? /
70	"boolean true", / MP_OKAY, MP_YES /
71	"boolean false", / MP_NO /
72	"out of memory", / MP_MEM /
73	"argument out of range", / MP_RANGE /
74	"invalid input parameter", / MP_BADARG /
75	"result is undefined" / MP_UNDEF /
76	};
77
78	/ Value to digit maps for radix conversion /
79
80	/ s_dmap_1 - standard digits and letters /
81	static const char *s_dmap_1 =
82	"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
83
84	/ }}} /
85
86	unsigned long mp_allocs;
87	unsigned long mp_frees;
88	unsigned long mp_copies;
89
90	/ {{{ Default precision manipulation /
91
92	/ Default precision for newly created mp_int's /
93	static mp_size s_mp_defprec = MP_DEFPREC;
94
95	mp_size mp_get_prec(void)
96	{
97	return s_mp_defprec;
98
99	} / end mp_get_prec() /
100
101	void mp_set_prec(mp_size prec)
102	{
103	if(prec == `0`)
104	s_mp_defprec = MP_DEFPREC;
105	else
106	s_mp_defprec = prec;
107
108	} / end mp_set_prec() /
109
110	/ }}} /
111
112	/------------------------------------------------------------------------/
113	/ {{{ mp_init(mp, kmflag) /
114
115	/*
116	mp_init(mp, kmflag)
117
118	Initialize a new zero-valued mp_int. Returns MP_OKAY if successful,
119	MP_MEM if memory could not be allocated for the structure.
120	*/
121
122	mp_err mp_init(mp_int mp, int* kmflag)
123	{
124	return mp_init_size(mp, s_mp_defprec, kmflag);
125
126	} / end mp_init() /
127
128	/ }}} /
129
130	/ {{{ mp_init_size(mp, prec, kmflag) /
131
132	/*
133	mp_init_size(mp, prec, kmflag)
134
135	Initialize a new zero-valued mp_int with at least the given
136	precision; returns MP_OKAY if successful, or MP_MEM if memory could
137	not be allocated for the structure.
138	*/
139
140	mp_err mp_init_size(mp_int mp, mp_size prec, int* kmflag)
141	{
142	ARGCHK(mp != NULL && prec > `0`, MP_BADARG);
143
144	prec = MP_ROUNDUP(prec, s_mp_defprec);
145	if((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit), kmflag)) == NULL)
146	return MP_MEM;
147
148	SIGN(mp) = ZPOS;
149	USED(mp) = `1`;
150	ALLOC(mp) = prec;
151
152	return MP_OKAY;
153
154	} / end mp_init_size() /
155
156	/ }}} /
157
158	/ {{{ mp_init_copy(mp, from) /
159
160	/*
161	mp_init_copy(mp, from)
162
163	Initialize mp as an exact copy of from. Returns MP_OKAY if
164	successful, MP_MEM if memory could not be allocated for the new
165	structure.
166	*/
167
168	mp_err mp_init_copy(mp_int mp, const* mp_int *from)
169	{
170	ARGCHK(mp != NULL && from != NULL, MP_BADARG);
171
172	if(mp == from)
173	return MP_OKAY;
174
175	if((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit), FLAG(from))) == NULL)
176	return MP_MEM;
177
178	s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
179	USED(mp) = USED(from);
180	ALLOC(mp) = ALLOC(from);
181	SIGN(mp) = SIGN(from);
182
183	#ifndef _WIN32
184	FLAG(mp) = FLAG(from);
185	#endif /* _WIN32 */
186
187	return MP_OKAY;
188
189	} / end mp_init_copy() /
190
191	/ }}} /
192
193	/ {{{ mp_copy(from, to) /
194
195	/*
196	mp_copy(from, to)
197
198	Copies the mp_int 'from' to the mp_int 'to'. It is presumed that
199	'to' has already been initialized (if not, use mp_init_copy()
200	instead). If 'from' and 'to' are identical, nothing happens.
201	*/
202
203	mp_err mp_copy(const mp_int from, mp_int to)
204	{
205	ARGCHK(from != NULL && to != NULL, MP_BADARG);
206
207	if(from == to)
208	return MP_OKAY;
209
210	++mp_copies;
211	{ / copy /
212	mp_digit *tmp;
213
214	/*
215	If the allocated buffer in 'to' already has enough space to hold
216	all the used digits of 'from', we'll re-use it to avoid hitting
217	the memory allocater more than necessary; otherwise, we'd have
218	to grow anyway, so we just allocate a hunk and make the copy as
219	usual
220	*/
221	if(ALLOC(to) >= USED(from)) {
222	s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
223	s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
224
225	} else {
226	if((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit), FLAG(from))) == NULL)
227	return MP_MEM;
228
229	s_mp_copy(DIGITS(from), tmp, USED(from));
230
231	if(DIGITS(to) != NULL) {
232	#if MP_CRYPTO
233	s_mp_setz(DIGITS(to), ALLOC(to));
234	#endif
235	s_mp_free(DIGITS(to), ALLOC(to));
236	}
237
238	DIGITS(to) = tmp;
239	ALLOC(to) = ALLOC(from);
240	}
241
242	/ Copy the precision and sign from the original /
243	USED(to) = USED(from);
244	SIGN(to) = SIGN(from);
245	} / end copy /
246
247	return MP_OKAY;
248
249	} / end mp_copy() /
250
251	/ }}} /
252
253	/ {{{ mp_exch(mp1, mp2) /
254
255	/*
256	mp_exch(mp1, mp2)
257
258	Exchange mp1 and mp2 without allocating any intermediate memory
259	(well, unless you count the stack space needed for this call and the
260	locals it creates...). This cannot fail.
261	*/
262
263	void mp_exch(mp_int mp1, mp_int mp2)
264	{
265	#if MP_ARGCHK == 2
266	assert(mp1 != NULL && mp2 != NULL);
267	#else
268	if(mp1 == NULL \|\| mp2 == NULL)
269	return;
270	#endif
271
272	s_mp_exch(mp1, mp2);
273
274	} / end mp_exch() /
275
276	/ }}} /
277
278	/ {{{ mp_clear(mp) /
279
280	/*
281	mp_clear(mp)
282
283	Release the storage used by an mp_int, and void its fields so that
284	if someone calls mp_clear() again for the same int later, we won't
285	get tollchocked.
286	*/
287
288	void mp_clear(mp_int *mp)
289	{
290	if(mp == NULL)
291	return;
292
293	if(DIGITS(mp) != NULL) {
294	#if MP_CRYPTO
295	s_mp_setz(DIGITS(mp), ALLOC(mp));
296	#endif
297	s_mp_free(DIGITS(mp), ALLOC(mp));
298	DIGITS(mp) = NULL;
299	}
300
301	USED(mp) = `0`;
302	ALLOC(mp) = `0`;
303
304	} / end mp_clear() /
305
306	/ }}} /
307
308	/ {{{ mp_zero(mp) /
309
310	/*
311	mp_zero(mp)
312
313	Set mp to zero. Does not change the allocated size of the structure,
314	and therefore cannot fail (except on a bad argument, which we ignore)
315	*/
316	void mp_zero(mp_int *mp)
317	{
318	if(mp == NULL)
319	return;
320
321	s_mp_setz(DIGITS(mp), ALLOC(mp));
322	USED(mp) = `1`;
323	SIGN(mp) = ZPOS;
324
325	} / end mp_zero() /
326
327	/ }}} /
328
329	/ {{{ mp_set(mp, d) /
330
331	void mp_set(mp_int *mp, mp_digit d)
332	{
333	if(mp == NULL)
334	return;
335
336	mp_zero(mp);
337	DIGIT(mp, `0`) = d;
338
339	} / end mp_set() /
340
341	/ }}} /
342
343	/ {{{ mp_set_int(mp, z) /
344
345	mp_err mp_set_int(mp_int mp, long* z)
346	{
347	int ix;
348	unsigned long v = labs(z);
349	mp_err res;
350
351	ARGCHK(mp != NULL, MP_BADARG);
352
353	mp_zero(mp);
354	if(z == `0`)
355	return MP_OKAY; / shortcut for zero /
356
357	if (sizeof v <= sizeof(mp_digit)) {
358	DIGIT(mp,`0`) = v;
359	} else {
360	for (ix = sizeof(long) - `1`; ix >= `0`; ix--) {
361	if ((res = s_mp_mul_d(mp, (UCHAR_MAX + `1`))) != MP_OKAY)
362	return res;
363
364	res = s_mp_add_d(mp, (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
365	if (res != MP_OKAY)
366	return res;
367	}
368	}
369	if(z < `0`)
370	SIGN(mp) = NEG;
371
372	return MP_OKAY;
373
374	} / end mp_set_int() /
375
376	/ }}} /
377
378	/ {{{ mp_set_ulong(mp, z) /
379
380	mp_err mp_set_ulong(mp_int mp, unsigned* long z)
381	{
382	int ix;
383	mp_err res;
384
385	ARGCHK(mp != NULL, MP_BADARG);
386
387	mp_zero(mp);
388	if(z == `0`)
389	return MP_OKAY; / shortcut for zero /
390
391	if (sizeof z <= sizeof(mp_digit)) {
392	DIGIT(mp,`0`) = z;
393	} else {
394	for (ix = sizeof(long) - `1`; ix >= `0`; ix--) {
395	if ((res = s_mp_mul_d(mp, (UCHAR_MAX + `1`))) != MP_OKAY)
396	return res;
397
398	res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
399	if (res != MP_OKAY)
400	return res;
401	}
402	}
403	return MP_OKAY;
404	} / end mp_set_ulong() /
405
406	/ }}} /
407
408	/------------------------------------------------------------------------/
409	/ {{{ Digit arithmetic /
410
411	/ {{{ mp_add_d(a, d, b) /
412
413	/*
414	mp_add_d(a, d, b)
415
416	Compute the sum b = a + d, for a single digit d. Respects the sign of
417	its primary addend (single digits are unsigned anyway).
418	*/
419
420	mp_err mp_add_d(const mp_int a, mp_digit d, mp_int b)
421	{
422	mp_int tmp;
423	mp_err res;
424
425	ARGCHK(a != NULL && b != NULL, MP_BADARG);
426
427	if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
428	return res;
429
430	if(SIGN(&tmp) == ZPOS) {
431	if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
432	goto CLEANUP;
433	} else if(s_mp_cmp_d(&tmp, d) >= `0`) {
434	if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
435	goto CLEANUP;
436	} else {
437	mp_neg(&tmp, &tmp);
438
439	DIGIT(&tmp, `0`) = d - DIGIT(&tmp, `0`);
440	}
441
442	if(s_mp_cmp_d(&tmp, `0`) == `0`)
443	SIGN(&tmp) = ZPOS;
444
445	s_mp_exch(&tmp, b);
446
447	CLEANUP:
448	mp_clear(&tmp);
449	return res;
450
451	} / end mp_add_d() /
452
453	/ }}} /
454
455	/ {{{ mp_sub_d(a, d, b) /
456
457	/*
458	mp_sub_d(a, d, b)
459
460	Compute the difference b = a - d, for a single digit d. Respects the
461	sign of its subtrahend (single digits are unsigned anyway).
462	*/
463
464	mp_err mp_sub_d(const mp_int a, mp_digit d, mp_int b)
465	{
466	mp_int tmp;
467	mp_err res;
468
469	ARGCHK(a != NULL && b != NULL, MP_BADARG);
470
471	if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
472	return res;
473
474	if(SIGN(&tmp) == NEG) {
475	if((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
476	goto CLEANUP;
477	} else if(s_mp_cmp_d(&tmp, d) >= `0`) {
478	if((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
479	goto CLEANUP;
480	} else {
481	mp_neg(&tmp, &tmp);
482
483	DIGIT(&tmp, `0`) = d - DIGIT(&tmp, `0`);
484	SIGN(&tmp) = NEG;
485	}
486
487	if(s_mp_cmp_d(&tmp, `0`) == `0`)
488	SIGN(&tmp) = ZPOS;
489
490	s_mp_exch(&tmp, b);
491
492	CLEANUP:
493	mp_clear(&tmp);
494	return res;
495
496	} / end mp_sub_d() /
497
498	/ }}} /
499
500	/ {{{ mp_mul_d(a, d, b) /
501
502	/*
503	mp_mul_d(a, d, b)
504
505	Compute the product b = a d, for a single digit d. Respects the sign*
506	of its multiplicand (single digits are unsigned anyway)
507	*/
508
509	mp_err mp_mul_d(const mp_int a, mp_digit d, mp_int b)
510	{
511	mp_err res;
512
513	ARGCHK(a != NULL && b != NULL, MP_BADARG);
514
515	if(d == `0`) {
516	mp_zero(b);
517	return MP_OKAY;
518	}
519
520	if((res = mp_copy(a, b)) != MP_OKAY)
521	return res;
522
523	res = s_mp_mul_d(b, d);
524
525	return res;
526
527	} / end mp_mul_d() /
528
529	/ }}} /
530
531	/ {{{ mp_mul_2(a, c) /
532
533	mp_err mp_mul_2(const mp_int a, mp_int c)
534	{
535	mp_err res;
536
537	ARGCHK(a != NULL && c != NULL, MP_BADARG);
538
539	if((res = mp_copy(a, c)) != MP_OKAY)
540	return res;
541
542	return s_mp_mul_2(c);
543
544	} / end mp_mul_2() /
545
546	/ }}} /
547
548	/ {{{ mp_div_d(a, d, q, r) /
549
550	/*
551	mp_div_d(a, d, q, r)
552
553	Compute the quotient q = a / d and remainder r = a mod d, for a
554	single digit d. Respects the sign of its divisor (single digits are
555	unsigned anyway).
556	*/
557
558	mp_err mp_div_d(const mp_int a, mp_digit d, mp_int q, mp_digit *r)
559	{
560	mp_err res;
561	mp_int qp;
562	mp_digit rem;
563	int pow;
564
565	ARGCHK(a != NULL, MP_BADARG);
566
567	if(d == `0`)
568	return MP_RANGE;
569
570	/ Shortcut for powers of two ... /
571	if((pow = s_mp_ispow2d(d)) >= `0`) {
572	mp_digit mask;
573
574	mask = ((mp_digit)`1` << pow) - `1`;
575	rem = DIGIT(a, `0`) & mask;
576
577	if(q) {
578	mp_copy(a, q);
579	s_mp_div_2d(q, pow);
580	}
581
582	if(r)
583	*r = rem;
584
585	return MP_OKAY;
586	}
587
588	if((res = mp_init_copy(&qp, a)) != MP_OKAY)
589	return res;
590
591	res = s_mp_div_d(&qp, d, &rem);
592
593	if(s_mp_cmp_d(&qp, `0`) == `0`)
594	SIGN(q) = ZPOS;
595
596	if(r)
597	*r = rem;
598
599	if(q)
600	s_mp_exch(&qp, q);
601
602	mp_clear(&qp);
603	return res;
604
605	} / end mp_div_d() /
606
607	/ }}} /
608
609	/ {{{ mp_div_2(a, c) /
610
611	/*
612	mp_div_2(a, c)
613
614	Compute c = a / 2, disregarding the remainder.
615	*/
616
617	mp_err mp_div_2(const mp_int a, mp_int c)
618	{
619	mp_err res;
620
621	ARGCHK(a != NULL && c != NULL, MP_BADARG);
622
623	if((res = mp_copy(a, c)) != MP_OKAY)
624	return res;
625
626	s_mp_div_2(c);
627
628	return MP_OKAY;
629
630	} / end mp_div_2() /
631
632	/ }}} /
633
634	/ {{{ mp_expt_d(a, d, b) /
635
636	mp_err mp_expt_d(const mp_int a, mp_digit d, mp_int c)
637	{
638	mp_int s, x;
639	mp_err res;
640
641	ARGCHK(a != NULL && c != NULL, MP_BADARG);
642
643	if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
644	return res;
645	if((res = mp_init_copy(&x, a)) != MP_OKAY)
646	goto X;
647
648	DIGIT(&s, `0`) = `1`;
649
650	while(d != `0`) {
651	if(d & `1`) {
652	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
653	goto CLEANUP;
654	}
655
656	d /= `2`;
657
658	if((res = s_mp_sqr(&x)) != MP_OKAY)
659	goto CLEANUP;
660	}
661
662	s.flag = (mp_sign)`0`;
663	s_mp_exch(&s, c);
664
665	CLEANUP:
666	mp_clear(&x);
667	X:
668	mp_clear(&s);
669
670	return res;
671
672	} / end mp_expt_d() /
673
674	/ }}} /
675
676	/ }}} /
677
678	/------------------------------------------------------------------------/
679	/ {{{ Full arithmetic /
680
681	/ {{{ mp_abs(a, b) /
682
683	/*
684	mp_abs(a, b)
685
686	Compute b = \|a\|. 'a' and 'b' may be identical.
687	*/
688
689	mp_err mp_abs(const mp_int a, mp_int b)
690	{
691	mp_err res;
692
693	ARGCHK(a != NULL && b != NULL, MP_BADARG);
694
695	if((res = mp_copy(a, b)) != MP_OKAY)
696	return res;
697
698	SIGN(b) = ZPOS;
699
700	return MP_OKAY;
701
702	} / end mp_abs() /
703
704	/ }}} /
705
706	/ {{{ mp_neg(a, b) /
707
708	/*
709	mp_neg(a, b)
710
711	Compute b = -a. 'a' and 'b' may be identical.
712	*/
713
714	mp_err mp_neg(const mp_int a, mp_int b)
715	{
716	mp_err res;
717
718	ARGCHK(a != NULL && b != NULL, MP_BADARG);
719
720	if((res = mp_copy(a, b)) != MP_OKAY)
721	return res;
722
723	if(s_mp_cmp_d(b, `0`) == MP_EQ)
724	SIGN(b) = ZPOS;
725	else
726	SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
727
728	return MP_OKAY;
729
730	} / end mp_neg() /
731
732	/ }}} /
733
734	/ {{{ mp_add(a, b, c) /
735
736	/*
737	mp_add(a, b, c)
738
739	Compute c = a + b. All parameters may be identical.
740	*/
741
742	mp_err mp_add(const mp_int a, const* mp_int b, mp_int c)
743	{
744	mp_err res;
745
746	ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
747
748	if(SIGN(a) == SIGN(b)) { / same sign: add values, keep sign /
749	MP_CHECKOK( s_mp_add_3arg(a, b, c) );
750	} else if(s_mp_cmp(a, b) >= `0`) { / different sign: \|a\| >= \|b\| /
751	MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
752	} else { / different sign: \|a\| < \|b\| /
753	MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
754	}
755
756	if (s_mp_cmp_d(c, `0`) == MP_EQ)
757	SIGN(c) = ZPOS;
758
759	CLEANUP:
760	return res;
761
762	} / end mp_add() /
763
764	/ }}} /
765
766	/ {{{ mp_sub(a, b, c) /
767
768	/*
769	mp_sub(a, b, c)
770
771	Compute c = a - b. All parameters may be identical.
772	*/
773
774	mp_err mp_sub(const mp_int a, const* mp_int b, mp_int c)
775	{
776	mp_err res;
777	int magDiff;
778
779	ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
780
781	if (a == b) {
782	mp_zero(c);
783	return MP_OKAY;
784	}
785
786	if (MP_SIGN(a) != MP_SIGN(b)) {
787	MP_CHECKOK( s_mp_add_3arg(a, b, c) );
788	} else if (!(magDiff = s_mp_cmp(a, b))) {
789	mp_zero(c);
790	res = MP_OKAY;
791	} else if (magDiff > `0`) {
792	MP_CHECKOK( s_mp_sub_3arg(a, b, c) );
793	} else {
794	MP_CHECKOK( s_mp_sub_3arg(b, a, c) );
795	MP_SIGN(c) = !MP_SIGN(a);
796	}
797
798	if (s_mp_cmp_d(c, `0`) == MP_EQ)
799	MP_SIGN(c) = MP_ZPOS;
800
801	CLEANUP:
802	return res;
803
804	} / end mp_sub() /
805
806	/ }}} /
807
808	/ {{{ mp_mul(a, b, c) /
809
810	/*
811	mp_mul(a, b, c)
812
813	Compute c = a b. All parameters may be identical.*
814	*/
815	mp_err mp_mul(const mp_int a, const* mp_int b, mp_int c)
816	{
817	mp_digit *pb;
818	mp_int tmp;
819	mp_err res;
820	mp_size ib;
821	mp_size useda, usedb;
822
823	ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
824
825	if (a == c) {
826	if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
827	return res;
828	if (a == b)
829	b = &tmp;
830	a = &tmp;
831	} else if (b == c) {
832	if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
833	return res;
834	b = &tmp;
835	} else {
836	MP_DIGITS(&tmp) = `0`;
837	}
838
839	if (MP_USED(a) < MP_USED(b)) {
840	const mp_int xch = b; /* switch a and b, to do fewer outer loops /
841	b = a;
842	a = xch;
843	}
844
845	MP_USED(c) = `1`; MP_DIGIT(c, `0`) = `0`;
846	if((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
847	goto CLEANUP;
848
849	#ifdef NSS_USE_COMBA
850	if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
851	if (MP_USED(a) == `4`) {
852	s_mp_mul_comba_4(a, b, c);
853	goto CLEANUP;
854	}
855	if (MP_USED(a) == `8`) {
856	s_mp_mul_comba_8(a, b, c);
857	goto CLEANUP;
858	}
859	if (MP_USED(a) == `16`) {
860	s_mp_mul_comba_16(a, b, c);
861	goto CLEANUP;
862	}
863	if (MP_USED(a) == `32`) {
864	s_mp_mul_comba_32(a, b, c);
865	goto CLEANUP;
866	}
867	}
868	#endif
869
870	pb = MP_DIGITS(b);
871	s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
872
873	/ Outer loop: Digits of b /
874	useda = MP_USED(a);
875	usedb = MP_USED(b);
876	for (ib = `1`; ib < usedb; ib++) {
877	mp_digit b_i = *pb++;
878
879	/ Inner product: Digits of a /
880	if (b_i)
881	s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
882	else
883	MP_DIGIT(c, ib + useda) = b_i;
884	}
885
886	s_mp_clamp(c);
887
888	if(SIGN(a) == SIGN(b) \|\| s_mp_cmp_d(c, `0`) == MP_EQ)
889	SIGN(c) = ZPOS;
890	else
891	SIGN(c) = NEG;
892
893	CLEANUP:
894	mp_clear(&tmp);
895	return res;
896	} / end mp_mul() /
897
898	/ }}} /
899
900	/ {{{ mp_sqr(a, sqr) /
901
902	#if MP_SQUARE
903	/*
904	Computes the square of a. This can be done more
905	efficiently than a general multiplication, because many of the
906	computation steps are redundant when squaring. The inner product
907	step is a bit more complicated, but we save a fair number of
908	iterations of the multiplication loop.
909	*/
910
911	/ sqr = a^2; Caller provides both a and tmp; /
912	mp_err mp_sqr(const mp_int a, mp_int sqr)
913	{
914	mp_digit *pa;
915	mp_digit d;
916	mp_err res;
917	mp_size ix;
918	mp_int tmp;
919	int count;
920
921	ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
922
923	if (a == sqr) {
924	if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
925	return res;
926	a = &tmp;
927	} else {
928	DIGITS(&tmp) = `0`;
929	res = MP_OKAY;
930	}
931
932	ix = `2` * MP_USED(a);
933	if (ix > MP_ALLOC(sqr)) {
934	MP_USED(sqr) = `1`;
935	MP_CHECKOK( s_mp_grow(sqr, ix) );
936	}
937	MP_USED(sqr) = ix;
938	MP_DIGIT(sqr, `0`) = `0`;
939
940	#ifdef NSS_USE_COMBA
941	if (IS_POWER_OF_2(MP_USED(a))) {
942	if (MP_USED(a) == `4`) {
943	s_mp_sqr_comba_4(a, sqr);
944	goto CLEANUP;
945	}
946	if (MP_USED(a) == `8`) {
947	s_mp_sqr_comba_8(a, sqr);
948	goto CLEANUP;
949	}
950	if (MP_USED(a) == `16`) {
951	s_mp_sqr_comba_16(a, sqr);
952	goto CLEANUP;
953	}
954	if (MP_USED(a) == `32`) {
955	s_mp_sqr_comba_32(a, sqr);
956	goto CLEANUP;
957	}
958	}
959	#endif
960
961	pa = MP_DIGITS(a);
962	count = MP_USED(a) - `1`;
963	if (count > `0`) {
964	d = *pa++;
965	s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + `1`);
966	for (ix = `3`; --count > `0`; ix += `2`) {
967	d = *pa++;
968	s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
969	} / for(ix ...) /
970	MP_DIGIT(sqr, MP_USED(sqr)-`1`) = `0`; / above loop stopped short of this. /
971
972	/ now sqr = 2 /*
973	s_mp_mul_2(sqr);
974	} else {
975	MP_DIGIT(sqr, `1`) = `0`;
976	}
977
978	/ now add the squares of the digits of a to sqr. /
979	s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
980
981	SIGN(sqr) = ZPOS;
982	s_mp_clamp(sqr);
983
984	CLEANUP:
985	mp_clear(&tmp);
986	return res;
987
988	} / end mp_sqr() /
989	#endif
990
991	/ }}} /
992
993	/ {{{ mp_div(a, b, q, r) /
994
995	/*
996	mp_div(a, b, q, r)
997
998	Compute q = a / b and r = a mod b. Input parameters may be re-used
999	as output parameters. If q or r is NULL, that portion of the
1000	computation will be discarded (although it will still be computed)
1001	*/
1002	mp_err mp_div(const mp_int a, const* mp_int b, mp_int q, mp_int *r)
1003	{
1004	mp_err res;
1005	mp_int pQ, pR;
1006	mp_int qtmp, rtmp, btmp;
1007	int cmp;
1008	mp_sign signA;
1009	mp_sign signB;
1010
1011	ARGCHK(a != NULL && b != NULL, MP_BADARG);
1012
1013	signA = MP_SIGN(a);
1014	signB = MP_SIGN(b);
1015
1016	if(mp_cmp_z(b) == MP_EQ)
1017	return MP_RANGE;
1018
1019	DIGITS(&qtmp) = `0`;
1020	DIGITS(&rtmp) = `0`;
1021	DIGITS(&btmp) = `0`;
1022
1023	/ Set up some temporaries... /
1024	if (!r \|\| r == a \|\| r == b) {
1025	MP_CHECKOK( mp_init_copy(&rtmp, a) );
1026	pR = &rtmp;
1027	} else {
1028	MP_CHECKOK( mp_copy(a, r) );
1029	pR = r;
1030	}
1031
1032	if (!q \|\| q == a \|\| q == b) {
1033	MP_CHECKOK( mp_init_size(&qtmp, MP_USED(a), FLAG(a)) );
1034	pQ = &qtmp;
1035	} else {
1036	MP_CHECKOK( s_mp_pad(q, MP_USED(a)) );
1037	pQ = q;
1038	mp_zero(pQ);
1039	}
1040
1041	/*
1042	If \|a\| <= \|b\|, we can compute the solution without division;
1043	otherwise, we actually do the work required.
1044	*/
1045	if ((cmp = s_mp_cmp(a, b)) <= `0`) {
1046	if (cmp) {
1047	/ r was set to a above. /
1048	mp_zero(pQ);
1049	} else {
1050	mp_set(pQ, `1`);
1051	mp_zero(pR);
1052	}
1053	} else {
1054	MP_CHECKOK( mp_init_copy(&btmp, b) );
1055	MP_CHECKOK( s_mp_div(pR, &btmp, pQ) );
1056	}
1057
1058	/ Compute the signs for the output /
1059	MP_SIGN(pR) = signA; / Sr = Sa /
1060	/ Sq = ZPOS if Sa == Sb / / Sq = NEG if Sa != Sb /
1061	MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
1062
1063	if(s_mp_cmp_d(pQ, `0`) == MP_EQ)
1064	SIGN(pQ) = ZPOS;
1065	if(s_mp_cmp_d(pR, `0`) == MP_EQ)
1066	SIGN(pR) = ZPOS;
1067
1068	/ Copy output, if it is needed /
1069	if(q && q != pQ)
1070	s_mp_exch(pQ, q);
1071
1072	if(r && r != pR)
1073	s_mp_exch(pR, r);
1074
1075	CLEANUP:
1076	mp_clear(&btmp);
1077	mp_clear(&rtmp);
1078	mp_clear(&qtmp);
1079
1080	return res;
1081
1082	} / end mp_div() /
1083
1084	/ }}} /
1085
1086	/ {{{ mp_div_2d(a, d, q, r) /
1087
1088	mp_err mp_div_2d(const mp_int a, mp_digit d, mp_int q, mp_int *r)
1089	{
1090	mp_err res;
1091
1092	ARGCHK(a != NULL, MP_BADARG);
1093
1094	if(q) {
1095	if((res = mp_copy(a, q)) != MP_OKAY)
1096	return res;
1097	}
1098	if(r) {
1099	if((res = mp_copy(a, r)) != MP_OKAY)
1100	return res;
1101	}
1102	if(q) {
1103	s_mp_div_2d(q, d);
1104	}
1105	if(r) {
1106	s_mp_mod_2d(r, d);
1107	}
1108
1109	return MP_OKAY;
1110
1111	} / end mp_div_2d() /
1112
1113	/ }}} /
1114
1115	/ {{{ mp_expt(a, b, c) /
1116
1117	/*
1118	mp_expt(a, b, c)
1119
1120	Compute c = a * b, that is, raise a to the b power. Uses a*
1121	standard iterative square-and-multiply technique.
1122	*/
1123
1124	mp_err mp_expt(mp_int a, mp_int b, mp_int *c)
1125	{
1126	mp_int s, x;
1127	mp_err res;
1128	mp_digit d;
1129	unsigned int dig, bit;
1130
1131	ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1132
1133	if(mp_cmp_z(b) < `0`)
1134	return MP_RANGE;
1135
1136	if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1137	return res;
1138
1139	mp_set(&s, `1`);
1140
1141	if((res = mp_init_copy(&x, a)) != MP_OKAY)
1142	goto X;
1143
1144	/ Loop over low-order digits in ascending order /
1145	for(dig = `0`; dig < (USED(b) - `1`); dig++) {
1146	d = DIGIT(b, dig);
1147
1148	/ Loop over bits of each non-maximal digit /
1149	for(bit = `0`; bit < DIGIT_BIT; bit++) {
1150	if(d & `1`) {
1151	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1152	goto CLEANUP;
1153	}
1154
1155	d >>= `1`;
1156
1157	if((res = s_mp_sqr(&x)) != MP_OKAY)
1158	goto CLEANUP;
1159	}
1160	}
1161
1162	/ Consider now the last digit... /
1163	d = DIGIT(b, dig);
1164
1165	while(d) {
1166	if(d & `1`) {
1167	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1168	goto CLEANUP;
1169	}
1170
1171	d >>= `1`;
1172
1173	if((res = s_mp_sqr(&x)) != MP_OKAY)
1174	goto CLEANUP;
1175	}
1176
1177	if(mp_iseven(b))
1178	SIGN(&s) = SIGN(a);
1179
1180	res = mp_copy(&s, c);
1181
1182	CLEANUP:
1183	mp_clear(&x);
1184	X:
1185	mp_clear(&s);
1186
1187	return res;
1188
1189	} / end mp_expt() /
1190
1191	/ }}} /
1192
1193	/ {{{ mp_2expt(a, k) /
1194
1195	/ Compute a = 2^k /
1196
1197	mp_err mp_2expt(mp_int *a, mp_digit k)
1198	{
1199	ARGCHK(a != NULL, MP_BADARG);
1200
1201	return s_mp_2expt(a, k);
1202
1203	} / end mp_2expt() /
1204
1205	/ }}} /
1206
1207	/ {{{ mp_mod(a, m, c) /
1208
1209	/*
1210	mp_mod(a, m, c)
1211
1212	Compute c = a (mod m). Result will always be 0 <= c < m.
1213	*/
1214
1215	mp_err mp_mod(const mp_int a, const* mp_int m, mp_int c)
1216	{
1217	mp_err res;
1218	int mag;
1219
1220	ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1221
1222	if(SIGN(m) == NEG)
1223	return MP_RANGE;
1224
1225	/*
1226	If \|a\| > m, we need to divide to get the remainder and take the
1227	absolute value.
1228
1229	If \|a\| < m, we don't need to do any division, just copy and adjust
1230	the sign (if a is negative).
1231
1232	If \|a\| == m, we can simply set the result to zero.
1233
1234	This order is intended to minimize the average path length of the
1235	comparison chain on common workloads -- the most frequent cases are
1236	that \|a\| != m, so we do those first.
1237	*/
1238	if((mag = s_mp_cmp(a, m)) > `0`) {
1239	if((res = mp_div(a, m, NULL, c)) != MP_OKAY)
1240	return res;
1241
1242	if(SIGN(c) == NEG) {
1243	if((res = mp_add(c, m, c)) != MP_OKAY)
1244	return res;
1245	}
1246
1247	} else if(mag < `0`) {
1248	if((res = mp_copy(a, c)) != MP_OKAY)
1249	return res;
1250
1251	if(mp_cmp_z(a) < `0`) {
1252	if((res = mp_add(c, m, c)) != MP_OKAY)
1253	return res;
1254
1255	}
1256
1257	} else {
1258	mp_zero(c);
1259
1260	}
1261
1262	return MP_OKAY;
1263
1264	} / end mp_mod() /
1265
1266	/ }}} /
1267
1268	/ {{{ mp_mod_d(a, d, c) /
1269
1270	/*
1271	mp_mod_d(a, d, c)
1272
1273	Compute c = a (mod d). Result will always be 0 <= c < d
1274	*/
1275	mp_err mp_mod_d(const mp_int a, mp_digit d, mp_digit c)
1276	{
1277	mp_err res;
1278	mp_digit rem;
1279
1280	ARGCHK(a != NULL && c != NULL, MP_BADARG);
1281
1282	if(s_mp_cmp_d(a, d) > `0`) {
1283	if((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
1284	return res;
1285
1286	} else {
1287	if(SIGN(a) == NEG)
1288	rem = d - DIGIT(a, `0`);
1289	else
1290	rem = DIGIT(a, `0`);
1291	}
1292
1293	if(c)
1294	*c = rem;
1295
1296	return MP_OKAY;
1297
1298	} / end mp_mod_d() /
1299
1300	/ }}} /
1301
1302	/ {{{ mp_sqrt(a, b) /
1303
1304	/*
1305	mp_sqrt(a, b)
1306
1307	Compute the integer square root of a, and store the result in b.
1308	Uses an integer-arithmetic version of Newton's iterative linear
1309	approximation technique to determine this value; the result has the
1310	following two properties:
1311
1312	b^2 <= a
1313	(b+1)^2 >= a
1314
1315	It is a range error to pass a negative value.
1316	*/
1317	mp_err mp_sqrt(const mp_int a, mp_int b)
1318	{
1319	mp_int x, t;
1320	mp_err res;
1321	mp_size used;
1322
1323	ARGCHK(a != NULL && b != NULL, MP_BADARG);
1324
1325	/ Cannot take square root of a negative value /
1326	if(SIGN(a) == NEG)
1327	return MP_RANGE;
1328
1329	/ Special cases for zero and one, trivial /
1330	if(mp_cmp_d(a, `1`) <= `0`)
1331	return mp_copy(a, b);
1332
1333	/ Initialize the temporaries we'll use below /
1334	if((res = mp_init_size(&t, USED(a), FLAG(a))) != MP_OKAY)
1335	return res;
1336
1337	/ Compute an initial guess for the iteration as a itself /
1338	if((res = mp_init_copy(&x, a)) != MP_OKAY)
1339	goto X;
1340
1341	used = MP_USED(&x);
1342	if (used > `1`) {
1343	s_mp_rshd(&x, used / `2`);
1344	}
1345
1346	for(;;) {
1347	/ t = (x * x) - a /
1348	mp_copy(&x, &t); / can't fail, t is big enough for original x /
1349	if((res = mp_sqr(&t, &t)) != MP_OKAY \|\|
1350	(res = mp_sub(&t, a, &t)) != MP_OKAY)
1351	goto CLEANUP;
1352
1353	/ t = t / 2x /
1354	s_mp_mul_2(&x);
1355	if((res = mp_div(&t, &x, &t, NULL)) != MP_OKAY)
1356	goto CLEANUP;
1357	s_mp_div_2(&x);
1358
1359	/ Terminate the loop, if the quotient is zero /
1360	if(mp_cmp_z(&t) == MP_EQ)
1361	break;
1362
1363	/ x = x - t /
1364	if((res = mp_sub(&x, &t, &x)) != MP_OKAY)
1365	goto CLEANUP;
1366
1367	}
1368
1369	/ Copy result to output parameter /
1370	mp_sub_d(&x, `1`, &x);
1371	s_mp_exch(&x, b);
1372
1373	CLEANUP:
1374	mp_clear(&x);
1375	X:
1376	mp_clear(&t);
1377
1378	return res;
1379
1380	} / end mp_sqrt() /
1381
1382	/ }}} /
1383
1384	/ }}} /
1385
1386	/------------------------------------------------------------------------/
1387	/ {{{ Modular arithmetic /
1388
1389	#if MP_MODARITH
1390	/ {{{ mp_addmod(a, b, m, c) /
1391
1392	/*
1393	mp_addmod(a, b, m, c)
1394
1395	Compute c = (a + b) mod m
1396	*/
1397
1398	mp_err mp_addmod(const mp_int a, const* mp_int b, const* mp_int m, mp_int c)
1399	{
1400	mp_err res;
1401
1402	ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1403
1404	if((res = mp_add(a, b, c)) != MP_OKAY)
1405	return res;
1406	if((res = mp_mod(c, m, c)) != MP_OKAY)
1407	return res;
1408
1409	return MP_OKAY;
1410
1411	}
1412
1413	/ }}} /
1414
1415	/ {{{ mp_submod(a, b, m, c) /
1416
1417	/*
1418	mp_submod(a, b, m, c)
1419
1420	Compute c = (a - b) mod m
1421	*/
1422
1423	mp_err mp_submod(const mp_int a, const* mp_int b, const* mp_int m, mp_int c)
1424	{
1425	mp_err res;
1426
1427	ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1428
1429	if((res = mp_sub(a, b, c)) != MP_OKAY)
1430	return res;
1431	if((res = mp_mod(c, m, c)) != MP_OKAY)
1432	return res;
1433
1434	return MP_OKAY;
1435
1436	}
1437
1438	/ }}} /
1439
1440	/ {{{ mp_mulmod(a, b, m, c) /
1441
1442	/*
1443	mp_mulmod(a, b, m, c)
1444
1445	Compute c = (a b) mod m*
1446	*/
1447
1448	mp_err mp_mulmod(const mp_int a, const* mp_int b, const* mp_int m, mp_int c)
1449	{
1450	mp_err res;
1451
1452	ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
1453
1454	if((res = mp_mul(a, b, c)) != MP_OKAY)
1455	return res;
1456	if((res = mp_mod(c, m, c)) != MP_OKAY)
1457	return res;
1458
1459	return MP_OKAY;
1460
1461	}
1462
1463	/ }}} /
1464
1465	/ {{{ mp_sqrmod(a, m, c) /
1466
1467	#if MP_SQUARE
1468	mp_err mp_sqrmod(const mp_int a, const* mp_int m, mp_int c)
1469	{
1470	mp_err res;
1471
1472	ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
1473
1474	if((res = mp_sqr(a, c)) != MP_OKAY)
1475	return res;
1476	if((res = mp_mod(c, m, c)) != MP_OKAY)
1477	return res;
1478
1479	return MP_OKAY;
1480
1481	} / end mp_sqrmod() /
1482	#endif
1483
1484	/ }}} /
1485
1486	/ {{{ s_mp_exptmod(a, b, m, c) /
1487
1488	/*
1489	s_mp_exptmod(a, b, m, c)
1490
1491	Compute c = (a * b) mod m. Uses a standard square-and-multiply*
1492	method with modular reductions at each step. (This is basically the
1493	same code as mp_expt(), except for the addition of the reductions)
1494
1495	The modular reductions are done using Barrett's algorithm (see
1496	s_mp_reduce() below for details)
1497	*/
1498
1499	mp_err s_mp_exptmod(const mp_int a, const* mp_int b, const* mp_int m, mp_int c)
1500	{
1501	mp_int s, x, mu;
1502	mp_err res;
1503	mp_digit d;
1504	unsigned int dig, bit;
1505
1506	ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1507
1508	if(mp_cmp_z(b) < `0` \|\| mp_cmp_z(m) <= `0`)
1509	return MP_RANGE;
1510
1511	if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1512	return res;
1513	if((res = mp_init_copy(&x, a)) != MP_OKAY \|\|
1514	(res = mp_mod(&x, m, &x)) != MP_OKAY)
1515	goto X;
1516	if((res = mp_init(&mu, FLAG(a))) != MP_OKAY)
1517	goto MU;
1518
1519	mp_set(&s, `1`);
1520
1521	/ mu = b^2k / m /
1522	s_mp_add_d(&mu, `1`);
1523	s_mp_lshd(&mu, `2` * USED(m));
1524	if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
1525	goto CLEANUP;
1526
1527	/ Loop over digits of b in ascending order, except highest order /
1528	for(dig = `0`; dig < (USED(b) - `1`); dig++) {
1529	d = DIGIT(b, dig);
1530
1531	/ Loop over the bits of the lower-order digits /
1532	for(bit = `0`; bit < DIGIT_BIT; bit++) {
1533	if(d & `1`) {
1534	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1535	goto CLEANUP;
1536	if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1537	goto CLEANUP;
1538	}
1539
1540	d >>= `1`;
1541
1542	if((res = s_mp_sqr(&x)) != MP_OKAY)
1543	goto CLEANUP;
1544	if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1545	goto CLEANUP;
1546	}
1547	}
1548
1549	/ Now do the last digit... /
1550	d = DIGIT(b, dig);
1551
1552	while(d) {
1553	if(d & `1`) {
1554	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
1555	goto CLEANUP;
1556	if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
1557	goto CLEANUP;
1558	}
1559
1560	d >>= `1`;
1561
1562	if((res = s_mp_sqr(&x)) != MP_OKAY)
1563	goto CLEANUP;
1564	if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
1565	goto CLEANUP;
1566	}
1567
1568	s_mp_exch(&s, c);
1569
1570	CLEANUP:
1571	mp_clear(&mu);
1572	MU:
1573	mp_clear(&x);
1574	X:
1575	mp_clear(&s);
1576
1577	return res;
1578
1579	} / end s_mp_exptmod() /
1580
1581	/ }}} /
1582
1583	/ {{{ mp_exptmod_d(a, d, m, c) /
1584
1585	mp_err mp_exptmod_d(const mp_int a, mp_digit d, const* mp_int m, mp_int c)
1586	{
1587	mp_int s, x;
1588	mp_err res;
1589
1590	ARGCHK(a != NULL && c != NULL, MP_BADARG);
1591
1592	if((res = mp_init(&s, FLAG(a))) != MP_OKAY)
1593	return res;
1594	if((res = mp_init_copy(&x, a)) != MP_OKAY)
1595	goto X;
1596
1597	mp_set(&s, `1`);
1598
1599	while(d != `0`) {
1600	if(d & `1`) {
1601	if((res = s_mp_mul(&s, &x)) != MP_OKAY \|\|
1602	(res = mp_mod(&s, m, &s)) != MP_OKAY)
1603	goto CLEANUP;
1604	}
1605
1606	d /= `2`;
1607
1608	if((res = s_mp_sqr(&x)) != MP_OKAY \|\|
1609	(res = mp_mod(&x, m, &x)) != MP_OKAY)
1610	goto CLEANUP;
1611	}
1612
1613	s.flag = (mp_sign)`0`;
1614	s_mp_exch(&s, c);
1615
1616	CLEANUP:
1617	mp_clear(&x);
1618	X:
1619	mp_clear(&s);
1620
1621	return res;
1622
1623	} / end mp_exptmod_d() /
1624
1625	/ }}} /
1626	#endif /* if MP_MODARITH */
1627
1628	/ }}} /
1629
1630	/------------------------------------------------------------------------/
1631	/ {{{ Comparison functions /
1632
1633	/ {{{ mp_cmp_z(a) /
1634
1635	/*
1636	mp_cmp_z(a)
1637
1638	Compare a <=> 0. Returns <0 if a<0, 0 if a=0, >0 if a>0.
1639	*/
1640
1641	int mp_cmp_z(const mp_int *a)
1642	{
1643	if(SIGN(a) == NEG)
1644	return MP_LT;
1645	else if(USED(a) == `1` && DIGIT(a, `0`) == `0`)
1646	return MP_EQ;
1647	else
1648	return MP_GT;
1649
1650	} / end mp_cmp_z() /
1651
1652	/ }}} /
1653
1654	/ {{{ mp_cmp_d(a, d) /
1655
1656	/*
1657	mp_cmp_d(a, d)
1658
1659	Compare a <=> d. Returns <0 if a<d, 0 if a=d, >0 if a>d
1660	*/
1661
1662	int mp_cmp_d(const mp_int *a, mp_digit d)
1663	{
1664	ARGCHK(a != NULL, MP_EQ);
1665
1666	if(SIGN(a) == NEG)
1667	return MP_LT;
1668
1669	return s_mp_cmp_d(a, d);
1670
1671	} / end mp_cmp_d() /
1672
1673	/ }}} /
1674
1675	/ {{{ mp_cmp(a, b) /
1676
1677	int mp_cmp(const mp_int a, const* mp_int *b)
1678	{
1679	ARGCHK(a != NULL && b != NULL, MP_EQ);
1680
1681	if(SIGN(a) == SIGN(b)) {
1682	int mag;
1683
1684	if((mag = s_mp_cmp(a, b)) == MP_EQ)
1685	return MP_EQ;
1686
1687	if(SIGN(a) == ZPOS)
1688	return mag;
1689	else
1690	return -mag;
1691
1692	} else if(SIGN(a) == ZPOS) {
1693	return MP_GT;
1694	} else {
1695	return MP_LT;
1696	}
1697
1698	} / end mp_cmp() /
1699
1700	/ }}} /
1701
1702	/ {{{ mp_cmp_mag(a, b) /
1703
1704	/*
1705	mp_cmp_mag(a, b)
1706
1707	Compares \|a\| <=> \|b\|, and returns an appropriate comparison result
1708	*/
1709
1710	int mp_cmp_mag(mp_int a, mp_int b)
1711	{
1712	ARGCHK(a != NULL && b != NULL, MP_EQ);
1713
1714	return s_mp_cmp(a, b);
1715
1716	} / end mp_cmp_mag() /
1717
1718	/ }}} /
1719
1720	/ {{{ mp_cmp_int(a, z, kmflag) /
1721
1722	/*
1723	This just converts z to an mp_int, and uses the existing comparison
1724	routines. This is sort of inefficient, but it's not clear to me how
1725	frequently this wil get used anyway. For small positive constants,
1726	you can always use mp_cmp_d(), and for zero, there is mp_cmp_z().
1727	*/
1728	int mp_cmp_int(const mp_int a, long* z, int kmflag)
1729	{
1730	mp_int tmp;
1731	int out;
1732
1733	ARGCHK(a != NULL, MP_EQ);
1734
1735	mp_init(&tmp, kmflag); mp_set_int(&tmp, z);
1736	out = mp_cmp(a, &tmp);
1737	mp_clear(&tmp);
1738
1739	return out;
1740
1741	} / end mp_cmp_int() /
1742
1743	/ }}} /
1744
1745	/ {{{ mp_isodd(a) /
1746
1747	/*
1748	mp_isodd(a)
1749
1750	Returns a true (non-zero) value if a is odd, false (zero) otherwise.
1751	*/
1752	int mp_isodd(const mp_int *a)
1753	{
1754	ARGCHK(a != NULL, `0`);
1755
1756	return (int)(DIGIT(a, `0`) & `1`);
1757
1758	} / end mp_isodd() /
1759
1760	/ }}} /
1761
1762	/ {{{ mp_iseven(a) /
1763
1764	int mp_iseven(const mp_int *a)
1765	{
1766	return !mp_isodd(a);
1767
1768	} / end mp_iseven() /
1769
1770	/ }}} /
1771
1772	/ }}} /
1773
1774	/------------------------------------------------------------------------/
1775	/ {{{ Number theoretic functions /
1776
1777	#if MP_NUMTH
1778	/ {{{ mp_gcd(a, b, c) /
1779
1780	/*
1781	Like the old mp_gcd() function, except computes the GCD using the
1782	binary algorithm due to Josef Stein in 1961 (via Knuth).
1783	*/
1784	mp_err mp_gcd(mp_int a, mp_int b, mp_int *c)
1785	{
1786	mp_err res;
1787	mp_int u, v, t;
1788	mp_size k = `0`;
1789
1790	ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1791
1792	if(mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ)
1793	return MP_RANGE;
1794	if(mp_cmp_z(a) == MP_EQ) {
1795	return mp_copy(b, c);
1796	} else if(mp_cmp_z(b) == MP_EQ) {
1797	return mp_copy(a, c);
1798	}
1799
1800	if((res = mp_init(&t, FLAG(a))) != MP_OKAY)
1801	return res;
1802	if((res = mp_init_copy(&u, a)) != MP_OKAY)
1803	goto U;
1804	if((res = mp_init_copy(&v, b)) != MP_OKAY)
1805	goto V;
1806
1807	SIGN(&u) = ZPOS;
1808	SIGN(&v) = ZPOS;
1809
1810	/ Divide out common factors of 2 until at least 1 of a, b is even /
1811	while(mp_iseven(&u) && mp_iseven(&v)) {
1812	s_mp_div_2(&u);
1813	s_mp_div_2(&v);
1814	++k;
1815	}
1816
1817	/ Initialize t /
1818	if(mp_isodd(&u)) {
1819	if((res = mp_copy(&v, &t)) != MP_OKAY)
1820	goto CLEANUP;
1821
1822	/ t = -v /
1823	if(SIGN(&v) == ZPOS)
1824	SIGN(&t) = NEG;
1825	else
1826	SIGN(&t) = ZPOS;
1827
1828	} else {
1829	if((res = mp_copy(&u, &t)) != MP_OKAY)
1830	goto CLEANUP;
1831
1832	}
1833
1834	for(;;) {
1835	while(mp_iseven(&t)) {
1836	s_mp_div_2(&t);
1837	}
1838
1839	if(mp_cmp_z(&t) == MP_GT) {
1840	if((res = mp_copy(&t, &u)) != MP_OKAY)
1841	goto CLEANUP;
1842
1843	} else {
1844	if((res = mp_copy(&t, &v)) != MP_OKAY)
1845	goto CLEANUP;
1846
1847	/ v = -t /
1848	if(SIGN(&t) == ZPOS)
1849	SIGN(&v) = NEG;
1850	else
1851	SIGN(&v) = ZPOS;
1852	}
1853
1854	if((res = mp_sub(&u, &v, &t)) != MP_OKAY)
1855	goto CLEANUP;
1856
1857	if(s_mp_cmp_d(&t, `0`) == MP_EQ)
1858	break;
1859	}
1860
1861	s_mp_2expt(&v, k); / v = 2^k /
1862	res = mp_mul(&u, &v, c); / c = u * v /
1863
1864	CLEANUP:
1865	mp_clear(&v);
1866	V:
1867	mp_clear(&u);
1868	U:
1869	mp_clear(&t);
1870
1871	return res;
1872
1873	} / end mp_gcd() /
1874
1875	/ }}} /
1876
1877	/ {{{ mp_lcm(a, b, c) /
1878
1879	/ We compute the least common multiple using the rule:*
1880
1881	ab = [a, b](a, b)
1882
1883	... by computing the product, and dividing out the gcd.
1884	*/
1885
1886	mp_err mp_lcm(mp_int a, mp_int b, mp_int *c)
1887	{
1888	mp_int gcd, prod;
1889	mp_err res;
1890
1891	ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
1892
1893	/ Set up temporaries /
1894	if((res = mp_init(&gcd, FLAG(a))) != MP_OKAY)
1895	return res;
1896	if((res = mp_init(&prod, FLAG(a))) != MP_OKAY)
1897	goto GCD;
1898
1899	if((res = mp_mul(a, b, &prod)) != MP_OKAY)
1900	goto CLEANUP;
1901	if((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
1902	goto CLEANUP;
1903
1904	res = mp_div(&prod, &gcd, c, NULL);
1905
1906	CLEANUP:
1907	mp_clear(&prod);
1908	GCD:
1909	mp_clear(&gcd);
1910
1911	return res;
1912
1913	} / end mp_lcm() /
1914
1915	/ }}} /
1916
1917	/ {{{ mp_xgcd(a, b, g, x, y) /
1918
1919	/*
1920	mp_xgcd(a, b, g, x, y)
1921
1922	Compute g = (a, b) and values x and y satisfying Bezout's identity
1923	(that is, ax + by = g). This uses the binary extended GCD algorithm
1924	based on the Stein algorithm used for mp_gcd()
1925	See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
1926	*/
1927
1928	mp_err mp_xgcd(const mp_int a, const* mp_int b, mp_int g, mp_int x, mp_int y)
1929	{
1930	mp_int gx, xc, yc, u, v, A, B, C, D;
1931	mp_int *clean[`9`];
1932	mp_err res;
1933	int last = -`1`;
1934
1935	if(mp_cmp_z(b) == `0`)
1936	return MP_RANGE;
1937
1938	/ Initialize all these variables we need /
1939	MP_CHECKOK( mp_init(&u, FLAG(a)) );
1940	clean[++last] = &u;
1941	MP_CHECKOK( mp_init(&v, FLAG(a)) );
1942	clean[++last] = &v;
1943	MP_CHECKOK( mp_init(&gx, FLAG(a)) );
1944	clean[++last] = &gx;
1945	MP_CHECKOK( mp_init(&A, FLAG(a)) );
1946	clean[++last] = &A;
1947	MP_CHECKOK( mp_init(&B, FLAG(a)) );
1948	clean[++last] = &B;
1949	MP_CHECKOK( mp_init(&C, FLAG(a)) );
1950	clean[++last] = &C;
1951	MP_CHECKOK( mp_init(&D, FLAG(a)) );
1952	clean[++last] = &D;
1953	MP_CHECKOK( mp_init_copy(&xc, a) );
1954	clean[++last] = &xc;
1955	mp_abs(&xc, &xc);
1956	MP_CHECKOK( mp_init_copy(&yc, b) );
1957	clean[++last] = &yc;
1958	mp_abs(&yc, &yc);
1959
1960	mp_set(&gx, `1`);
1961
1962	/ Divide by two until at least one of them is odd /
1963	while(mp_iseven(&xc) && mp_iseven(&yc)) {
1964	mp_size nx = mp_trailing_zeros(&xc);
1965	mp_size ny = mp_trailing_zeros(&yc);
1966	mp_size n = MP_MIN(nx, ny);
1967	s_mp_div_2d(&xc,n);
1968	s_mp_div_2d(&yc,n);
1969	MP_CHECKOK( s_mp_mul_2d(&gx,n) );
1970	}
1971
1972	mp_copy(&xc, &u);
1973	mp_copy(&yc, &v);
1974	mp_set(&A, `1`); mp_set(&D, `1`);
1975
1976	/ Loop through binary GCD algorithm /
1977	do {
1978	while(mp_iseven(&u)) {
1979	s_mp_div_2(&u);
1980
1981	if(mp_iseven(&A) && mp_iseven(&B)) {
1982	s_mp_div_2(&A); s_mp_div_2(&B);
1983	} else {
1984	MP_CHECKOK( mp_add(&A, &yc, &A) );
1985	s_mp_div_2(&A);
1986	MP_CHECKOK( mp_sub(&B, &xc, &B) );
1987	s_mp_div_2(&B);
1988	}
1989	}
1990
1991	while(mp_iseven(&v)) {
1992	s_mp_div_2(&v);
1993
1994	if(mp_iseven(&C) && mp_iseven(&D)) {
1995	s_mp_div_2(&C); s_mp_div_2(&D);
1996	} else {
1997	MP_CHECKOK( mp_add(&C, &yc, &C) );
1998	s_mp_div_2(&C);
1999	MP_CHECKOK( mp_sub(&D, &xc, &D) );
2000	s_mp_div_2(&D);
2001	}
2002	}
2003
2004	if(mp_cmp(&u, &v) >= `0`) {
2005	MP_CHECKOK( mp_sub(&u, &v, &u) );
2006	MP_CHECKOK( mp_sub(&A, &C, &A) );
2007	MP_CHECKOK( mp_sub(&B, &D, &B) );
2008	} else {
2009	MP_CHECKOK( mp_sub(&v, &u, &v) );
2010	MP_CHECKOK( mp_sub(&C, &A, &C) );
2011	MP_CHECKOK( mp_sub(&D, &B, &D) );
2012	}
2013	} while (mp_cmp_z(&u) != `0`);
2014
2015	/ copy results to output /
2016	if(x)
2017	MP_CHECKOK( mp_copy(&C, x) );
2018
2019	if(y)
2020	MP_CHECKOK( mp_copy(&D, y) );
2021
2022	if(g)
2023	MP_CHECKOK( mp_mul(&gx, &v, g) );
2024
2025	CLEANUP:
2026	while(last >= `0`)
2027	mp_clear(clean[last--]);
2028
2029	return res;
2030
2031	} / end mp_xgcd() /
2032
2033	/ }}} /
2034
2035	mp_size mp_trailing_zeros(const mp_int *mp)
2036	{
2037	mp_digit d;
2038	mp_size n = `0`;
2039	unsigned int ix;
2040
2041	if (!mp \|\| !MP_DIGITS(mp) \|\| !mp_cmp_z(mp))
2042	return n;
2043
2044	for (ix = `0`; !(d = MP_DIGIT(mp,ix)) && (ix < MP_USED(mp)); ++ix)
2045	n += MP_DIGIT_BIT;
2046	if (!d)
2047	return `0`; / shouldn't happen, but ... /
2048	#if !defined(MP_USE_UINT_DIGIT)
2049	if (!(d & `0xffffffffU`)) {
2050	d >>= `32`;
2051	n += `32`;
2052	}
2053	#endif
2054	if (!(d & `0xffffU`)) {
2055	d >>= `16`;
2056	n += `16`;
2057	}
2058	if (!(d & `0xffU`)) {
2059	d >>= `8`;
2060	n += `8`;
2061	}
2062	if (!(d & `0xfU`)) {
2063	d >>= `4`;
2064	n += `4`;
2065	}
2066	if (!(d & `0x3U`)) {
2067	d >>= `2`;
2068	n += `2`;
2069	}
2070	if (!(d & `0x1U`)) {
2071	d >>= `1`;
2072	n += `1`;
2073	}
2074	#if MP_ARGCHK == 2
2075	assert(`0` != (d & `1`));
2076	#endif
2077	return n;
2078	}
2079
2080	/ Given a and prime p, computes c and k such that ac == 2k (mod p).
2081	** Returns k (positive) or error (negative).
2082	** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2083	** by Richard Schroeppel (a.k.a. Captain Nemo).
2084	*/
2085	mp_err s_mp_almost_inverse(const mp_int a, const* mp_int p, mp_int c)
2086	{
2087	mp_err res;
2088	mp_err k = `0`;
2089	mp_int d, f, g;
2090
2091	ARGCHK(a && p && c, MP_BADARG);
2092
2093	MP_DIGITS(&d) = `0`;
2094	MP_DIGITS(&f) = `0`;
2095	MP_DIGITS(&g) = `0`;
2096	MP_CHECKOK( mp_init(&d, FLAG(a)) );
2097	MP_CHECKOK( mp_init_copy(&f, a) ); / f = a /
2098	MP_CHECKOK( mp_init_copy(&g, p) ); / g = p /
2099
2100	mp_set(c, `1`);
2101	mp_zero(&d);
2102
2103	if (mp_cmp_z(&f) == `0`) {
2104	res = MP_UNDEF;
2105	} else
2106	for (;;) {
2107	int diff_sign;
2108	while (mp_iseven(&f)) {
2109	mp_size n = mp_trailing_zeros(&f);
2110	if (!n) {
2111	res = MP_UNDEF;
2112	goto CLEANUP;
2113	}
2114	s_mp_div_2d(&f, n);
2115	MP_CHECKOK( s_mp_mul_2d(&d, n) );
2116	k += n;
2117	}
2118	if (mp_cmp_d(&f, `1`) == MP_EQ) { / f == 1 /
2119	res = k;
2120	break;
2121	}
2122	diff_sign = mp_cmp(&f, &g);
2123	if (diff_sign < `0`) { / f < g /
2124	s_mp_exch(&f, &g);
2125	s_mp_exch(c, &d);
2126	} else if (diff_sign == `0`) { / f == g /
2127	res = MP_UNDEF; / a and p are not relatively prime /
2128	break;
2129	}
2130	if ((MP_DIGIT(&f,`0`) % `4`) == (MP_DIGIT(&g,`0`) % `4`)) {
2131	MP_CHECKOK( mp_sub(&f, &g, &f) ); / f = f - g /
2132	MP_CHECKOK( mp_sub(c, &d, c) ); / c = c - d /
2133	} else {
2134	MP_CHECKOK( mp_add(&f, &g, &f) ); / f = f + g /
2135	MP_CHECKOK( mp_add(c, &d, c) ); / c = c + d /
2136	}
2137	}
2138	if (res >= `0`) {
2139	while (MP_SIGN(c) != MP_ZPOS) {
2140	MP_CHECKOK( mp_add(c, p, c) );
2141	}
2142	res = k;
2143	}
2144
2145	CLEANUP:
2146	mp_clear(&d);
2147	mp_clear(&f);
2148	mp_clear(&g);
2149	return res;
2150	}
2151
2152	/ Compute T = (P ** -1) mod MP_RADIX. Also works for 16-bit mp_digits.*
2153	** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2154	** by Richard Schroeppel (a.k.a. Captain Nemo).
2155	*/
2156	mp_digit s_mp_invmod_radix(mp_digit P)
2157	{
2158	mp_digit T = P;
2159	T = `2` - (P T);
2160	T = `2` - (P T);
2161	T = `2` - (P T);
2162	T = `2` - (P T);
2163	#if !defined(MP_USE_UINT_DIGIT)
2164	T = `2` - (P T);
2165	T = `2` - (P T);
2166	#endif
2167	return T;
2168	}
2169
2170	/ Given c, k, and prime p, where ac == 2k (mod p),
2171	Compute x = (a -1) mod p. This is similar to Montgomery reduction.
2172	** This technique from the paper "Fast Modular Reciprocals" (unpublished)
2173	** by Richard Schroeppel (a.k.a. Captain Nemo).
2174	*/
2175	mp_err s_mp_fixup_reciprocal(const mp_int c, const* mp_int p, int* k, mp_int *x)
2176	{
2177	int k_orig = k;
2178	mp_digit r;
2179	mp_size ix;
2180	mp_err res;
2181
2182	if (mp_cmp_z(c) < `0`) { / c < 0 /
2183	MP_CHECKOK( mp_add(c, p, x) ); / x = c + p /
2184	} else {
2185	MP_CHECKOK( mp_copy(c, x) ); / x = c /
2186	}
2187
2188	/ make sure x is large enough /
2189	ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + `1`;
2190	ix = MP_MAX(ix, MP_USED(x));
2191	MP_CHECKOK( s_mp_pad(x, ix) );
2192
2193	r = `0` - s_mp_invmod_radix(MP_DIGIT(p,`0`));
2194
2195	for (ix = `0`; k > `0`; ix++) {
2196	int j = MP_MIN(k, MP_DIGIT_BIT);
2197	mp_digit v = r * MP_DIGIT(x, ix);
2198	if (j < MP_DIGIT_BIT) {
2199	v &= ((mp_digit)`1` << j) - `1`; / v = v mod (2 ** j) /
2200	}
2201	s_mp_mul_d_add_offset(p, v, x, ix); / x += p * v * (RADIX ** ix) /
2202	k -= j;
2203	}
2204	s_mp_clamp(x);
2205	s_mp_div_2d(x, k_orig);
2206	res = MP_OKAY;
2207
2208	CLEANUP:
2209	return res;
2210	}
2211
2212	/ compute mod inverse using Schroeppel's method, only if m is odd /
2213	mp_err s_mp_invmod_odd_m(const mp_int a, const* mp_int m, mp_int c)
2214	{
2215	int k;
2216	mp_err res;
2217	mp_int x;
2218
2219	ARGCHK(a && m && c, MP_BADARG);
2220
2221	if(mp_cmp_z(a) == `0` \|\| mp_cmp_z(m) == `0`)
2222	return MP_RANGE;
2223	if (mp_iseven(m))
2224	return MP_UNDEF;
2225
2226	MP_DIGITS(&x) = `0`;
2227
2228	if (a == c) {
2229	if ((res = mp_init_copy(&x, a)) != MP_OKAY)
2230	return res;
2231	if (a == m)
2232	m = &x;
2233	a = &x;
2234	} else if (m == c) {
2235	if ((res = mp_init_copy(&x, m)) != MP_OKAY)
2236	return res;
2237	m = &x;
2238	} else {
2239	MP_DIGITS(&x) = `0`;
2240	}
2241
2242	MP_CHECKOK( s_mp_almost_inverse(a, m, c) );
2243	k = res;
2244	MP_CHECKOK( s_mp_fixup_reciprocal(c, m, k, c) );
2245	CLEANUP:
2246	mp_clear(&x);
2247	return res;
2248	}
2249
2250	/ Known good algorithm for computing modular inverse. But slow. /
2251	mp_err mp_invmod_xgcd(const mp_int a, const* mp_int m, mp_int c)
2252	{
2253	mp_int g, x;
2254	mp_err res;
2255
2256	ARGCHK(a && m && c, MP_BADARG);
2257
2258	if(mp_cmp_z(a) == `0` \|\| mp_cmp_z(m) == `0`)
2259	return MP_RANGE;
2260
2261	MP_DIGITS(&g) = `0`;
2262	MP_DIGITS(&x) = `0`;
2263	MP_CHECKOK( mp_init(&x, FLAG(a)) );
2264	MP_CHECKOK( mp_init(&g, FLAG(a)) );
2265
2266	MP_CHECKOK( mp_xgcd(a, m, &g, &x, NULL) );
2267
2268	if (mp_cmp_d(&g, `1`) != MP_EQ) {
2269	res = MP_UNDEF;
2270	goto CLEANUP;
2271	}
2272
2273	res = mp_mod(&x, m, c);
2274	SIGN(c) = SIGN(a);
2275
2276	CLEANUP:
2277	mp_clear(&x);
2278	mp_clear(&g);
2279
2280	return res;
2281	}
2282
2283	/ modular inverse where modulus is 2*k. /*
2284	/ c = a-1 mod 2k /
2285	mp_err s_mp_invmod_2d(const mp_int a, mp_size k, mp_int c)
2286	{
2287	mp_err res;
2288	mp_size ix = k + `4`;
2289	mp_int t0, t1, val, tmp, two2k;
2290
2291	static const mp_digit d2 = `2`;
2292	static const mp_int two = { `0`, MP_ZPOS, `1`, `1`, (mp_digit *)&d2 };
2293
2294	if (mp_iseven(a))
2295	return MP_UNDEF;
2296	if (k <= MP_DIGIT_BIT) {
2297	mp_digit i = s_mp_invmod_radix(MP_DIGIT(a,`0`));
2298	if (k < MP_DIGIT_BIT)
2299	i &= ((mp_digit)`1` << k) - (mp_digit)`1`;
2300	mp_set(c, i);
2301	return MP_OKAY;
2302	}
2303	MP_DIGITS(&t0) = `0`;
2304	MP_DIGITS(&t1) = `0`;
2305	MP_DIGITS(&val) = `0`;
2306	MP_DIGITS(&tmp) = `0`;
2307	MP_DIGITS(&two2k) = `0`;
2308	MP_CHECKOK( mp_init_copy(&val, a) );
2309	s_mp_mod_2d(&val, k);
2310	MP_CHECKOK( mp_init_copy(&t0, &val) );
2311	MP_CHECKOK( mp_init_copy(&t1, &t0) );
2312	MP_CHECKOK( mp_init(&tmp, FLAG(a)) );
2313	MP_CHECKOK( mp_init(&two2k, FLAG(a)) );
2314	MP_CHECKOK( s_mp_2expt(&two2k, k) );
2315	do {
2316	MP_CHECKOK( mp_mul(&val, &t1, &tmp) );
2317	MP_CHECKOK( mp_sub(&two, &tmp, &tmp) );
2318	MP_CHECKOK( mp_mul(&t1, &tmp, &t1) );
2319	s_mp_mod_2d(&t1, k);
2320	while (MP_SIGN(&t1) != MP_ZPOS) {
2321	MP_CHECKOK( mp_add(&t1, &two2k, &t1) );
2322	}
2323	if (mp_cmp(&t1, &t0) == MP_EQ)
2324	break;
2325	MP_CHECKOK( mp_copy(&t1, &t0) );
2326	} while (--ix > `0`);
2327	if (!ix) {
2328	res = MP_UNDEF;
2329	} else {
2330	mp_exch(c, &t1);
2331	}
2332
2333	CLEANUP:
2334	mp_clear(&t0);
2335	mp_clear(&t1);
2336	mp_clear(&val);
2337	mp_clear(&tmp);
2338	mp_clear(&two2k);
2339	return res;
2340	}
2341
2342	mp_err s_mp_invmod_even_m(const mp_int a, const* mp_int m, mp_int c)
2343	{
2344	mp_err res;
2345	mp_size k;
2346	mp_int oddFactor, evenFactor; / factors of the modulus /
2347	mp_int oddPart, evenPart; / parts to combine via CRT. /
2348	mp_int C2, tmp1, tmp2;
2349
2350	/static const mp_digit d1 = 1; /
2351	/static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit )&d1 }; /*
2352
2353	if ((res = s_mp_ispow2(m)) >= `0`) {
2354	k = res;
2355	return s_mp_invmod_2d(a, k, c);
2356	}
2357	MP_DIGITS(&oddFactor) = `0`;
2358	MP_DIGITS(&evenFactor) = `0`;
2359	MP_DIGITS(&oddPart) = `0`;
2360	MP_DIGITS(&evenPart) = `0`;
2361	MP_DIGITS(&C2) = `0`;
2362	MP_DIGITS(&tmp1) = `0`;
2363	MP_DIGITS(&tmp2) = `0`;
2364
2365	MP_CHECKOK( mp_init_copy(&oddFactor, m) ); / oddFactor = m /
2366	MP_CHECKOK( mp_init(&evenFactor, FLAG(m)) );
2367	MP_CHECKOK( mp_init(&oddPart, FLAG(m)) );
2368	MP_CHECKOK( mp_init(&evenPart, FLAG(m)) );
2369	MP_CHECKOK( mp_init(&C2, FLAG(m)) );
2370	MP_CHECKOK( mp_init(&tmp1, FLAG(m)) );
2371	MP_CHECKOK( mp_init(&tmp2, FLAG(m)) );
2372
2373	k = mp_trailing_zeros(m);
2374	s_mp_div_2d(&oddFactor, k);
2375	MP_CHECKOK( s_mp_2expt(&evenFactor, k) );
2376
2377	/ compute a*-1 mod oddFactor. /*
2378	MP_CHECKOK( s_mp_invmod_odd_m(a, &oddFactor, &oddPart) );
2379	/ compute a-1 mod evenFactor, where evenFactor == 2k. /
2380	MP_CHECKOK( s_mp_invmod_2d( a, k, &evenPart) );
2381
2382	/ Use Chinese Remainer theorem to compute a*-1 mod m. /*
2383	/ let m1 = oddFactor, v1 = oddPart,*
2384	* let m2 = evenFactor, v2 = evenPart.
2385	*/
2386
2387	/ Compute C2 = m1*-1 mod m2. /*
2388	MP_CHECKOK( s_mp_invmod_2d(&oddFactor, k, &C2) );
2389
2390	/ compute u = (v2 - v1)C2 mod m2 /*
2391	MP_CHECKOK( mp_sub(&evenPart, &oddPart, &tmp1) );
2392	MP_CHECKOK( mp_mul(&tmp1, &C2, &tmp2) );
2393	s_mp_mod_2d(&tmp2, k);
2394	while (MP_SIGN(&tmp2) != MP_ZPOS) {
2395	MP_CHECKOK( mp_add(&tmp2, &evenFactor, &tmp2) );
2396	}
2397
2398	/ compute answer = v1 + um1 /*
2399	MP_CHECKOK( mp_mul(&tmp2, &oddFactor, c) );
2400	MP_CHECKOK( mp_add(&oddPart, c, c) );
2401	/ not sure this is necessary, but it's low cost if not. /
2402	MP_CHECKOK( mp_mod(c, m, c) );
2403
2404	CLEANUP:
2405	mp_clear(&oddFactor);
2406	mp_clear(&evenFactor);
2407	mp_clear(&oddPart);
2408	mp_clear(&evenPart);
2409	mp_clear(&C2);
2410	mp_clear(&tmp1);
2411	mp_clear(&tmp2);
2412	return res;
2413	}
2414
2415
2416	/ {{{ mp_invmod(a, m, c) /
2417
2418	/*
2419	mp_invmod(a, m, c)
2420
2421	Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
2422	This is equivalent to the question of whether (a, m) = 1. If not,
2423	MP_UNDEF is returned, and there is no inverse.
2424	*/
2425
2426	mp_err mp_invmod(const mp_int a, const* mp_int m, mp_int c)
2427	{
2428
2429	ARGCHK(a && m && c, MP_BADARG);
2430
2431	if(mp_cmp_z(a) == `0` \|\| mp_cmp_z(m) == `0`)
2432	return MP_RANGE;
2433
2434	if (mp_isodd(m)) {
2435	return s_mp_invmod_odd_m(a, m, c);
2436	}
2437	if (mp_iseven(a))
2438	return MP_UNDEF; / not invertable /
2439
2440	return s_mp_invmod_even_m(a, m, c);
2441
2442	} / end mp_invmod() /
2443
2444	/ }}} /
2445	#endif /* if MP_NUMTH */
2446
2447	/ }}} /
2448
2449	/------------------------------------------------------------------------/
2450	/ {{{ mp_print(mp, ofp) /
2451
2452	#if MP_IOFUNC
2453	/*
2454	mp_print(mp, ofp)
2455
2456	Print a textual representation of the given mp_int on the output
2457	stream 'ofp'. Output is generated using the internal radix.
2458	*/
2459
2460	void mp_print(mp_int mp, FILE ofp)
2461	{
2462	int ix;
2463
2464	if(mp == NULL \|\| ofp == NULL)
2465	return;
2466
2467	fputc((SIGN(mp) == NEG) ? `'-'` : `'+'`, ofp);
2468
2469	for(ix = USED(mp) - `1`; ix >= `0`; ix--) {
2470	fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
2471	}
2472
2473	} / end mp_print() /
2474
2475	#endif /* if MP_IOFUNC */
2476
2477	/ }}} /
2478
2479	/------------------------------------------------------------------------/
2480	/ {{{ More I/O Functions /
2481
2482	/ {{{ mp_read_raw(mp, str, len) /
2483
2484	/*
2485	mp_read_raw(mp, str, len)
2486
2487	Read in a raw value (base 256) into the given mp_int
2488	*/
2489
2490	mp_err mp_read_raw(mp_int mp, char* str, int* len)
2491	{
2492	int ix;
2493	mp_err res;
2494	unsigned char ustr = (unsigned* char *)str;
2495
2496	ARGCHK(mp != NULL && str != NULL && len > `0`, MP_BADARG);
2497
2498	mp_zero(mp);
2499
2500	/ Get sign from first byte /
2501	if(ustr[`0`])
2502	SIGN(mp) = NEG;
2503	else
2504	SIGN(mp) = ZPOS;
2505
2506	/ Read the rest of the digits /
2507	for(ix = `1`; ix < len; ix++) {
2508	if((res = mp_mul_d(mp, `256`, mp)) != MP_OKAY)
2509	return res;
2510	if((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
2511	return res;
2512	}
2513
2514	return MP_OKAY;
2515
2516	} / end mp_read_raw() /
2517
2518	/ }}} /
2519
2520	/ {{{ mp_raw_size(mp) /
2521
2522	int mp_raw_size(mp_int *mp)
2523	{
2524	ARGCHK(mp != NULL, `0`);
2525
2526	return (USED(mp) * sizeof(mp_digit)) + `1`;
2527
2528	} / end mp_raw_size() /
2529
2530	/ }}} /
2531
2532	/ {{{ mp_toraw(mp, str) /
2533
2534	mp_err mp_toraw(mp_int mp, char* *str)
2535	{
2536	int ix, jx, pos = `1`;
2537
2538	ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2539
2540	str[`0`] = (char)SIGN(mp);
2541
2542	/ Iterate over each digit... /
2543	for(ix = USED(mp) - `1`; ix >= `0`; ix--) {
2544	mp_digit d = DIGIT(mp, ix);
2545
2546	/ Unpack digit bytes, high order first /
2547	for(jx = sizeof(mp_digit) - `1`; jx >= `0`; jx--) {
2548	str[pos++] = (char)(d >> (jx * CHAR_BIT));
2549	}
2550	}
2551
2552	return MP_OKAY;
2553
2554	} / end mp_toraw() /
2555
2556	/ }}} /
2557
2558	/ {{{ mp_read_radix(mp, str, radix) /
2559
2560	/*
2561	mp_read_radix(mp, str, radix)
2562
2563	Read an integer from the given string, and set mp to the resulting
2564	value. The input is presumed to be in base 10. Leading non-digit
2565	characters are ignored, and the function reads until a non-digit
2566	character or the end of the string.
2567	*/
2568
2569	mp_err mp_read_radix(mp_int mp, const* char str, int* radix)
2570	{
2571	int ix = `0`, val = `0`;
2572	mp_err res;
2573	mp_sign sig = ZPOS;
2574
2575	ARGCHK(mp != NULL && str != NULL && radix >= `2` && radix <= MAX_RADIX,
2576	MP_BADARG);
2577
2578	mp_zero(mp);
2579
2580	/ Skip leading non-digit characters until a digit or '-' or '+' /
2581	while(str[ix] &&
2582	(s_mp_tovalue(str[ix], radix) < `0`) &&
2583	str[ix] != `'-'` &&
2584	str[ix] != `'+'`) {
2585	++ix;
2586	}
2587
2588	if(str[ix] == `'-'`) {
2589	sig = NEG;
2590	++ix;
2591	} else if(str[ix] == `'+'`) {
2592	sig = ZPOS; / this is the default anyway... /
2593	++ix;
2594	}
2595
2596	while((val = s_mp_tovalue(str[ix], radix)) >= `0`) {
2597	if((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
2598	return res;
2599	if((res = s_mp_add_d(mp, val)) != MP_OKAY)
2600	return res;
2601	++ix;
2602	}
2603
2604	if(s_mp_cmp_d(mp, `0`) == MP_EQ)
2605	SIGN(mp) = ZPOS;
2606	else
2607	SIGN(mp) = sig;
2608
2609	return MP_OKAY;
2610
2611	} / end mp_read_radix() /
2612
2613	mp_err mp_read_variable_radix(mp_int a, const* char * str, int default_radix)
2614	{
2615	int radix = default_radix;
2616	int cx;
2617	mp_sign sig = ZPOS;
2618	mp_err res;
2619
2620	/ Skip leading non-digit characters until a digit or '-' or '+' /
2621	while ((cx = *str) != `0` &&
2622	(s_mp_tovalue(cx, radix) < `0`) &&
2623	cx != `'-'` &&
2624	cx != `'+'`) {
2625	++str;
2626	}
2627
2628	if (cx == `'-'`) {
2629	sig = NEG;
2630	++str;
2631	} else if (cx == `'+'`) {
2632	sig = ZPOS; / this is the default anyway... /
2633	++str;
2634	}
2635
2636	if (str[`0`] == `'0'`) {
2637	if ((str[`1`] \| `0x20`) == `'x'`) {
2638	radix = `16`;
2639	str += `2`;
2640	} else {
2641	radix = `8`;
2642	str++;
2643	}
2644	}
2645	res = mp_read_radix(a, str, radix);
2646	if (res == MP_OKAY) {
2647	MP_SIGN(a) = (s_mp_cmp_d(a, `0`) == MP_EQ) ? ZPOS : sig;
2648	}
2649	return res;
2650	}
2651
2652	/ }}} /
2653
2654	/ {{{ mp_radix_size(mp, radix) /
2655
2656	int mp_radix_size(mp_int mp, int* radix)
2657	{
2658	int bits;
2659
2660	if(!mp \|\| radix < `2` \|\| radix > MAX_RADIX)
2661	return `0`;
2662
2663	bits = USED(mp) * DIGIT_BIT - `1`;
2664
2665	return s_mp_outlen(bits, radix);
2666
2667	} / end mp_radix_size() /
2668
2669	/ }}} /
2670
2671	/ {{{ mp_toradix(mp, str, radix) /
2672
2673	mp_err mp_toradix(mp_int mp, char* str, int* radix)
2674	{
2675	int ix, pos = `0`;
2676
2677	ARGCHK(mp != NULL && str != NULL, MP_BADARG);
2678	ARGCHK(radix > `1` && radix <= MAX_RADIX, MP_RANGE);
2679
2680	if(mp_cmp_z(mp) == MP_EQ) {
2681	str[`0`] = `'0'`;
2682	str[`1`] = `'\0'`;
2683	} else {
2684	mp_err res;
2685	mp_int tmp;
2686	mp_sign sgn;
2687	mp_digit rem, rdx = (mp_digit)radix;
2688	char ch;
2689
2690	if((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
2691	return res;
2692
2693	/ Save sign for later, and take absolute value /
2694	sgn = SIGN(&tmp); SIGN(&tmp) = ZPOS;
2695
2696	/ Generate output digits in reverse order /
2697	while(mp_cmp_z(&tmp) != `0`) {
2698	if((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
2699	mp_clear(&tmp);
2700	return res;
2701	}
2702
2703	/ Generate digits, use capital letters /
2704	ch = s_mp_todigit(rem, radix, `0`);
2705
2706	str[pos++] = ch;
2707	}
2708
2709	/ Add - sign if original value was negative /
2710	if(sgn == NEG)
2711	str[pos++] = `'-'`;
2712
2713	/ Add trailing NUL to end the string /
2714	str[pos--] = `'\0'`;
2715
2716	/ Reverse the digits and sign indicator /
2717	ix = `0`;
2718	while(ix < pos) {
2719	char tmp = str[ix];
2720
2721	str[ix] = str[pos];
2722	str[pos] = tmp;
2723	++ix;
2724	--pos;
2725	}
2726
2727	mp_clear(&tmp);
2728	}
2729
2730	return MP_OKAY;
2731
2732	} / end mp_toradix() /
2733
2734	/ }}} /
2735
2736	/ {{{ mp_tovalue(ch, r) /
2737
2738	int mp_tovalue(char ch, int r)
2739	{
2740	return s_mp_tovalue(ch, r);
2741
2742	} / end mp_tovalue() /
2743
2744	/ }}} /
2745
2746	/ }}} /
2747
2748	/ {{{ mp_strerror(ec) /
2749
2750	/*
2751	mp_strerror(ec)
2752
2753	Return a string describing the meaning of error code 'ec'. The
2754	string returned is allocated in static memory, so the caller should
2755	not attempt to modify or free the memory associated with this
2756	string.
2757	*/
2758	const char *mp_strerror(mp_err ec)
2759	{
2760	int aec = (ec < `0`) ? -ec : ec;
2761
2762	/ Code values are negative, so the senses of these comparisons*
2763	are accurate /*
2764	if(ec < MP_LAST_CODE \|\| ec > MP_OKAY) {
2765	return mp_err_string[`0`]; / unknown error code /
2766	} else {
2767	return mp_err_string[aec + `1`];
2768	}
2769
2770	} / end mp_strerror() /
2771
2772	/ }}} /
2773
2774	/========================================================================/
2775	/------------------------------------------------------------------------/
2776	/ Static function definitions (internal use only) /
2777
2778	/ {{{ Memory management /
2779
2780	/ {{{ s_mp_grow(mp, min) /
2781
2782	/ Make sure there are at least 'min' digits allocated to mp /
2783	mp_err s_mp_grow(mp_int *mp, mp_size min)
2784	{
2785	if(min > ALLOC(mp)) {
2786	mp_digit *tmp;
2787
2788	/ Set min to next nearest default precision block size /
2789	min = MP_ROUNDUP(min, s_mp_defprec);
2790
2791	if((tmp = s_mp_alloc(min, sizeof(mp_digit), FLAG(mp))) == NULL)
2792	return MP_MEM;
2793
2794	s_mp_copy(DIGITS(mp), tmp, USED(mp));
2795
2796	#if MP_CRYPTO
2797	s_mp_setz(DIGITS(mp), ALLOC(mp));
2798	#endif
2799	s_mp_free(DIGITS(mp), ALLOC(mp));
2800	DIGITS(mp) = tmp;
2801	ALLOC(mp) = min;
2802	}
2803
2804	return MP_OKAY;
2805
2806	} / end s_mp_grow() /
2807
2808	/ }}} /
2809
2810	/ {{{ s_mp_pad(mp, min) /
2811
2812	/ Make sure the used size of mp is at least 'min', growing if needed /
2813	mp_err s_mp_pad(mp_int *mp, mp_size min)
2814	{
2815	if(min > USED(mp)) {
2816	mp_err res;
2817
2818	/ Make sure there is room to increase precision /
2819	if (min > ALLOC(mp)) {
2820	if ((res = s_mp_grow(mp, min)) != MP_OKAY)
2821	return res;
2822	} else {
2823	s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
2824	}
2825
2826	/ Increase precision; should already be 0-filled /
2827	USED(mp) = min;
2828	}
2829
2830	return MP_OKAY;
2831
2832	} / end s_mp_pad() /
2833
2834	/ }}} /
2835
2836	/ {{{ s_mp_setz(dp, count) /
2837
2838	#if MP_MACRO == 0
2839	/ Set 'count' digits pointed to by dp to be zeroes /
2840	void s_mp_setz(mp_digit *dp, mp_size count)
2841	{
2842	#if MP_MEMSET == 0
2843	int ix;
2844
2845	for(ix = `0`; ix < count; ix++)
2846	dp[ix] = `0`;
2847	#else
2848	memset(dp, `0`, count * sizeof(mp_digit));
2849	#endif
2850
2851	} / end s_mp_setz() /
2852	#endif
2853
2854	/ }}} /
2855
2856	/ {{{ s_mp_copy(sp, dp, count) /
2857
2858	#if MP_MACRO == 0
2859	/ Copy 'count' digits from sp to dp /
2860	void s_mp_copy(const mp_digit sp, mp_digit dp, mp_size count)
2861	{
2862	#if MP_MEMCPY == 0
2863	int ix;
2864
2865	for(ix = `0`; ix < count; ix++)
2866	dp[ix] = sp[ix];
2867	#else
2868	memcpy(dp, sp, count * sizeof(mp_digit));
2869	#endif
2870
2871	} / end s_mp_copy() /
2872	#endif
2873
2874	/ }}} /
2875
2876	/ {{{ s_mp_alloc(nb, ni, kmflag) /
2877
2878	#if MP_MACRO == 0
2879	/ Allocate ni records of nb bytes each, and return a pointer to that /
2880	void s_mp_alloc(size_t nb, size_t ni, int* kmflag)
2881	{
2882	++mp_allocs;
2883	#ifdef _KERNEL
2884	mp_int *mp;
2885	mp = kmem_zalloc(nb * ni, kmflag);
2886	if (mp != NULL)
2887	FLAG(mp) = kmflag;
2888	return (mp);
2889	#else
2890	return calloc(nb, ni);
2891	#endif
2892
2893	} / end s_mp_alloc() /
2894	#endif
2895
2896	/ }}} /
2897
2898	/ {{{ s_mp_free(ptr) /
2899
2900	#if MP_MACRO == 0
2901	/ Free the memory pointed to by ptr /
2902	void s_mp_free(void *ptr, mp_size alloc)
2903	{
2904	if(ptr) {
2905	++mp_frees;
2906	#ifdef _KERNEL
2907	kmem_free(ptr, alloc * sizeof (mp_digit));
2908	#else
2909	free(ptr);
2910	#endif
2911	}
2912	} / end s_mp_free() /
2913	#endif
2914
2915	/ }}} /
2916
2917	/ {{{ s_mp_clamp(mp) /
2918
2919	#if MP_MACRO == 0
2920	/ Remove leading zeroes from the given value /
2921	void s_mp_clamp(mp_int *mp)
2922	{
2923	mp_size used = MP_USED(mp);
2924	while (used > `1` && DIGIT(mp, used - `1`) == `0`)
2925	--used;
2926	MP_USED(mp) = used;
2927	} / end s_mp_clamp() /
2928	#endif
2929
2930	/ }}} /
2931
2932	/ {{{ s_mp_exch(a, b) /
2933
2934	/ Exchange the data for a and b; (b, a) = (a, b) /
2935	void s_mp_exch(mp_int a, mp_int b)
2936	{
2937	mp_int tmp;
2938
2939	tmp = *a;
2940	a = b;
2941	*b = tmp;
2942
2943	} / end s_mp_exch() /
2944
2945	/ }}} /
2946
2947	/ }}} /
2948
2949	/ {{{ Arithmetic helpers /
2950
2951	/ {{{ s_mp_lshd(mp, p) /
2952
2953	/*
2954	Shift mp leftward by p digits, growing if needed, and zero-filling
2955	the in-shifted digits at the right end. This is a convenient
2956	alternative to multiplication by powers of the radix
2957	The value of USED(mp) must already have been set to the value for
2958	the shifted result.
2959	*/
2960
2961	mp_err s_mp_lshd(mp_int *mp, mp_size p)
2962	{
2963	mp_err res;
2964	mp_size pos;
2965	int ix;
2966
2967	if(p == `0`)
2968	return MP_OKAY;
2969
2970	if (MP_USED(mp) == `1` && MP_DIGIT(mp, `0`) == `0`)
2971	return MP_OKAY;
2972
2973	if((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
2974	return res;
2975
2976	pos = USED(mp) - `1`;
2977
2978	/ Shift all the significant figures over as needed /
2979	for(ix = pos - p; ix >= `0`; ix--)
2980	DIGIT(mp, ix + p) = DIGIT(mp, ix);
2981
2982	/ Fill the bottom digits with zeroes /
2983	for(ix = `0`; ix < p; ix++)
2984	DIGIT(mp, ix) = `0`;
2985
2986	return MP_OKAY;
2987
2988	} / end s_mp_lshd() /
2989
2990	/ }}} /
2991
2992	/ {{{ s_mp_mul_2d(mp, d) /
2993
2994	/*
2995	Multiply the integer by 2^d, where d is a number of bits. This
2996	amounts to a bitwise shift of the value.
2997	*/
2998	mp_err s_mp_mul_2d(mp_int *mp, mp_digit d)
2999	{
3000	mp_err res;
3001	mp_digit dshift, bshift;
3002	mp_digit mask;
3003
3004	ARGCHK(mp != NULL, MP_BADARG);
3005
3006	dshift = d / MP_DIGIT_BIT;
3007	bshift = d % MP_DIGIT_BIT;
3008	/ bits to be shifted out of the top word /
3009	mask = ((mp_digit)~`0` << (MP_DIGIT_BIT - bshift));
3010	mask &= MP_DIGIT(mp, MP_USED(mp) - `1`);
3011
3012	if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (mask != `0`) )))
3013	return res;
3014
3015	if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
3016	return res;
3017
3018	if (bshift) {
3019	mp_digit *pa = MP_DIGITS(mp);
3020	mp_digit *alim = pa + MP_USED(mp);
3021	mp_digit prev = `0`;
3022
3023	for (pa += dshift; pa < alim; ) {
3024	mp_digit x = *pa;
3025	*pa++ = (x << bshift) \| prev;
3026	prev = x >> (DIGIT_BIT - bshift);
3027	}
3028	}
3029
3030	s_mp_clamp(mp);
3031	return MP_OKAY;
3032	} / end s_mp_mul_2d() /
3033
3034	/ {{{ s_mp_rshd(mp, p) /
3035
3036	/*
3037	Shift mp rightward by p digits. Maintains the invariant that
3038	digits above the precision are all zero. Digits shifted off the
3039	end are lost. Cannot fail.
3040	*/
3041
3042	void s_mp_rshd(mp_int *mp, mp_size p)
3043	{
3044	mp_size ix;
3045	mp_digit src, dst;
3046
3047	if(p == `0`)
3048	return;
3049
3050	/ Shortcut when all digits are to be shifted off /
3051	if(p >= USED(mp)) {
3052	s_mp_setz(DIGITS(mp), ALLOC(mp));
3053	USED(mp) = `1`;
3054	SIGN(mp) = ZPOS;
3055	return;
3056	}
3057
3058	/ Shift all the significant figures over as needed /
3059	dst = MP_DIGITS(mp);
3060	src = dst + p;
3061	for (ix = USED(mp) - p; ix > `0`; ix--)
3062	dst++ = src++;
3063
3064	MP_USED(mp) -= p;
3065	/ Fill the top digits with zeroes /
3066	while (p-- > `0`)
3067	*dst++ = `0`;
3068
3069	#if 0
3070	/ Strip off any leading zeroes /
3071	s_mp_clamp(mp);
3072	#endif
3073
3074	} / end s_mp_rshd() /
3075
3076	/ }}} /
3077
3078	/ {{{ s_mp_div_2(mp) /
3079
3080	/ Divide by two -- take advantage of radix properties to do it fast /
3081	void s_mp_div_2(mp_int *mp)
3082	{
3083	s_mp_div_2d(mp, `1`);
3084
3085	} / end s_mp_div_2() /
3086
3087	/ }}} /
3088
3089	/ {{{ s_mp_mul_2(mp) /
3090
3091	mp_err s_mp_mul_2(mp_int *mp)
3092	{
3093	mp_digit *pd;
3094	unsigned int ix, used;
3095	mp_digit kin = `0`;
3096
3097	/ Shift digits leftward by 1 bit /
3098	used = MP_USED(mp);
3099	pd = MP_DIGITS(mp);
3100	for (ix = `0`; ix < used; ix++) {
3101	mp_digit d = *pd;
3102	*pd++ = (d << `1`) \| kin;
3103	kin = (d >> (DIGIT_BIT - `1`));
3104	}
3105
3106	/ Deal with rollover from last digit /
3107	if (kin) {
3108	if (ix >= ALLOC(mp)) {
3109	mp_err res;
3110	if((res = s_mp_grow(mp, ALLOC(mp) + `1`)) != MP_OKAY)
3111	return res;
3112	}
3113
3114	DIGIT(mp, ix) = kin;
3115	USED(mp) += `1`;
3116	}
3117
3118	return MP_OKAY;
3119
3120	} / end s_mp_mul_2() /
3121
3122	/ }}} /
3123
3124	/ {{{ s_mp_mod_2d(mp, d) /
3125
3126	/*
3127	Remainder the integer by 2^d, where d is a number of bits. This
3128	amounts to a bitwise AND of the value, and does not require the full
3129	division code
3130	*/
3131	void s_mp_mod_2d(mp_int *mp, mp_digit d)
3132	{
3133	mp_size ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
3134	mp_size ix;
3135	mp_digit dmask;
3136
3137	if(ndig >= USED(mp))
3138	return;
3139
3140	/ Flush all the bits above 2^d in its digit /
3141	dmask = ((mp_digit)`1` << nbit) - `1`;
3142	DIGIT(mp, ndig) &= dmask;
3143
3144	/ Flush all digits above the one with 2^d in it /
3145	for(ix = ndig + `1`; ix < USED(mp); ix++)
3146	DIGIT(mp, ix) = `0`;
3147
3148	s_mp_clamp(mp);
3149
3150	} / end s_mp_mod_2d() /
3151
3152	/ }}} /
3153
3154	/ {{{ s_mp_div_2d(mp, d) /
3155
3156	/*
3157	Divide the integer by 2^d, where d is a number of bits. This
3158	amounts to a bitwise shift of the value, and does not require the
3159	full division code (used in Barrett reduction, see below)
3160	*/
3161	void s_mp_div_2d(mp_int *mp, mp_digit d)
3162	{
3163	int ix;
3164	mp_digit save, next, mask;
3165
3166	s_mp_rshd(mp, d / DIGIT_BIT);
3167	d %= DIGIT_BIT;
3168	if (d) {
3169	mask = ((mp_digit)`1` << d) - `1`;
3170	save = `0`;
3171	for(ix = USED(mp) - `1`; ix >= `0`; ix--) {
3172	next = DIGIT(mp, ix) & mask;
3173	DIGIT(mp, ix) = (DIGIT(mp, ix) >> d) \| (save << (DIGIT_BIT - d));
3174	save = next;
3175	}
3176	}
3177	s_mp_clamp(mp);
3178
3179	} / end s_mp_div_2d() /
3180
3181	/ }}} /
3182
3183	/ {{{ s_mp_norm(a, b, d) /*
3184
3185	/*
3186	s_mp_norm(a, b, d)*
3187
3188	Normalize a and b for division, where b is the divisor. In order
3189	that we might make good guesses for quotient digits, we want the
3190	leading digit of b to be at least half the radix, which we
3191	accomplish by multiplying a and b by a power of 2. The exponent
3192	(shift count) is placed in pd, so that the remainder can be shifted*
3193	back at the end of the division process.
3194	*/
3195
3196	mp_err s_mp_norm(mp_int a, mp_int b, mp_digit *pd)
3197	{
3198	mp_digit d;
3199	mp_digit mask;
3200	mp_digit b_msd;
3201	mp_err res = MP_OKAY;
3202
3203	d = `0`;
3204	mask = DIGIT_MAX & ~(DIGIT_MAX >> `1`); / mask is msb of digit /
3205	b_msd = DIGIT(b, USED(b) - `1`);
3206	while (!(b_msd & mask)) {
3207	b_msd <<= `1`;
3208	++d;
3209	}
3210
3211	if (d) {
3212	MP_CHECKOK( s_mp_mul_2d(a, d) );
3213	MP_CHECKOK( s_mp_mul_2d(b, d) );
3214	}
3215
3216	*pd = d;
3217	CLEANUP:
3218	return res;
3219
3220	} / end s_mp_norm() /
3221
3222	/ }}} /
3223
3224	/ }}} /
3225
3226	/ {{{ Primitive digit arithmetic /
3227
3228	/ {{{ s_mp_add_d(mp, d) /
3229
3230	/ Add d to \|mp\| in place /
3231	mp_err s_mp_add_d(mp_int mp, mp_digit d) /* unsigned digit addition /
3232	{
3233	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3234	mp_word w, k = `0`;
3235	mp_size ix = `1`;
3236
3237	w = (mp_word)DIGIT(mp, `0`) + d;
3238	DIGIT(mp, `0`) = ACCUM(w);
3239	k = CARRYOUT(w);
3240
3241	while(ix < USED(mp) && k) {
3242	w = (mp_word)DIGIT(mp, ix) + k;
3243	DIGIT(mp, ix) = ACCUM(w);
3244	k = CARRYOUT(w);
3245	++ix;
3246	}
3247
3248	if(k != `0`) {
3249	mp_err res;
3250
3251	if((res = s_mp_pad(mp, USED(mp) + `1`)) != MP_OKAY)
3252	return res;
3253
3254	DIGIT(mp, ix) = (mp_digit)k;
3255	}
3256
3257	return MP_OKAY;
3258	#else
3259	mp_digit * pmp = MP_DIGITS(mp);
3260	mp_digit sum, mp_i, carry = `0`;
3261	mp_err res = MP_OKAY;
3262	int used = (int)MP_USED(mp);
3263
3264	mp_i = *pmp;
3265	*pmp++ = sum = d + mp_i;
3266	carry = (sum < d);
3267	while (carry && --used > `0`) {
3268	mp_i = *pmp;
3269	*pmp++ = sum = carry + mp_i;
3270	carry = !sum;
3271	}
3272	if (carry && !used) {
3273	/ mp is growing /
3274	used = MP_USED(mp);
3275	MP_CHECKOK( s_mp_pad(mp, used + `1`) );
3276	MP_DIGIT(mp, used) = carry;
3277	}
3278	CLEANUP:
3279	return res;
3280	#endif
3281	} / end s_mp_add_d() /
3282
3283	/ }}} /
3284
3285	/ {{{ s_mp_sub_d(mp, d) /
3286
3287	/ Subtract d from \|mp\| in place, assumes \|mp\| > d /
3288	mp_err s_mp_sub_d(mp_int mp, mp_digit d) /* unsigned digit subtract /
3289	{
3290	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3291	mp_word w, b = `0`;
3292	mp_size ix = `1`;
3293
3294	/ Compute initial subtraction /
3295	w = (RADIX + (mp_word)DIGIT(mp, `0`)) - d;
3296	b = CARRYOUT(w) ? `0` : `1`;
3297	DIGIT(mp, `0`) = ACCUM(w);
3298
3299	/ Propagate borrows leftward /
3300	while(b && ix < USED(mp)) {
3301	w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
3302	b = CARRYOUT(w) ? `0` : `1`;
3303	DIGIT(mp, ix) = ACCUM(w);
3304	++ix;
3305	}
3306
3307	/ Remove leading zeroes /
3308	s_mp_clamp(mp);
3309
3310	/ If we have a borrow out, it's a violation of the input invariant /
3311	if(b)
3312	return MP_RANGE;
3313	else
3314	return MP_OKAY;
3315	#else
3316	mp_digit *pmp = MP_DIGITS(mp);
3317	mp_digit mp_i, diff, borrow;
3318	mp_size used = MP_USED(mp);
3319
3320	mp_i = *pmp;
3321	*pmp++ = diff = mp_i - d;
3322	borrow = (diff > mp_i);
3323	while (borrow && --used) {
3324	mp_i = *pmp;
3325	*pmp++ = diff = mp_i - borrow;
3326	borrow = (diff > mp_i);
3327	}
3328	s_mp_clamp(mp);
3329	return (borrow && !used) ? MP_RANGE : MP_OKAY;
3330	#endif
3331	} / end s_mp_sub_d() /
3332
3333	/ }}} /
3334
3335	/ {{{ s_mp_mul_d(a, d) /
3336
3337	/ Compute a = a * d, single digit multiplication /
3338	mp_err s_mp_mul_d(mp_int *a, mp_digit d)
3339	{
3340	mp_err res;
3341	mp_size used;
3342	int pow;
3343
3344	if (!d) {
3345	mp_zero(a);
3346	return MP_OKAY;
3347	}
3348	if (d == `1`)
3349	return MP_OKAY;
3350	if (`0` <= (pow = s_mp_ispow2d(d))) {
3351	return s_mp_mul_2d(a, (mp_digit)pow);
3352	}
3353
3354	used = MP_USED(a);
3355	MP_CHECKOK( s_mp_pad(a, used + `1`) );
3356
3357	s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
3358
3359	s_mp_clamp(a);
3360
3361	CLEANUP:
3362	return res;
3363
3364	} / end s_mp_mul_d() /
3365
3366	/ }}} /
3367
3368	/ {{{ s_mp_div_d(mp, d, r) /
3369
3370	/*
3371	s_mp_div_d(mp, d, r)
3372
3373	Compute the quotient mp = mp / d and remainder r = mp mod d, for a
3374	single digit d. If r is null, the remainder will be discarded.
3375	*/
3376
3377	mp_err s_mp_div_d(mp_int mp, mp_digit d, mp_digit r)
3378	{
3379	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3380	mp_word w = `0`, q;
3381	#else
3382	mp_digit w = `0`, q;
3383	#endif
3384	int ix;
3385	mp_err res;
3386	mp_int quot;
3387	mp_int rem;
3388
3389	if(d == `0`)
3390	return MP_RANGE;
3391	if (d == `1`) {
3392	if (r)
3393	*r = `0`;
3394	return MP_OKAY;
3395	}
3396	/ could check for power of 2 here, but mp_div_d does that. /
3397	if (MP_USED(mp) == `1`) {
3398	mp_digit n = MP_DIGIT(mp,`0`);
3399	mp_digit rem;
3400
3401	q = n / d;
3402	rem = n % d;
3403	MP_DIGIT(mp,`0`) = q;
3404	if (r)
3405	*r = rem;
3406	return MP_OKAY;
3407	}
3408
3409	MP_DIGITS(&rem) = `0`;
3410	MP_DIGITS(&quot) = `0`;
3411	/ Make room for the quotient /
3412	MP_CHECKOK( mp_init_size(&quot, USED(mp), FLAG(mp)) );
3413
3414	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
3415	for(ix = USED(mp) - `1`; ix >= `0`; ix--) {
3416	w = (w << DIGIT_BIT) \| DIGIT(mp, ix);
3417
3418	if(w >= d) {
3419	q = w / d;
3420	w = w % d;
3421	} else {
3422	q = `0`;
3423	}
3424
3425	s_mp_lshd(&quot, `1`);
3426	DIGIT(&quot, `0`) = (mp_digit)q;
3427	}
3428	#else
3429	{
3430	mp_digit p;
3431	#if !defined(MP_ASSEMBLY_DIV_2DX1D)
3432	mp_digit norm;
3433	#endif
3434
3435	MP_CHECKOK( mp_init_copy(&rem, mp) );
3436
3437	#if !defined(MP_ASSEMBLY_DIV_2DX1D)
3438	MP_DIGIT(&quot, `0`) = d;
3439	MP_CHECKOK( s_mp_norm(&rem, &quot, &norm) );
3440	if (norm)
3441	d <<= norm;
3442	MP_DIGIT(&quot, `0`) = `0`;
3443	#endif
3444
3445	p = `0`;
3446	for (ix = USED(&rem) - `1`; ix >= `0`; ix--) {
3447	w = DIGIT(&rem, ix);
3448
3449	if (p) {
3450	MP_CHECKOK( s_mpv_div_2dx1d(p, w, d, &q, &w) );
3451	} else if (w >= d) {
3452	q = w / d;
3453	w = w % d;
3454	} else {
3455	q = `0`;
3456	}
3457
3458	MP_CHECKOK( s_mp_lshd(&quot, `1`) );
3459	DIGIT(&quot, `0`) = q;
3460	p = w;
3461	}
3462	#if !defined(MP_ASSEMBLY_DIV_2DX1D)
3463	if (norm)
3464	w >>= norm;
3465	#endif
3466	}
3467	#endif
3468
3469	/ Deliver the remainder, if desired /
3470	if(r)
3471	*r = (mp_digit)w;
3472
3473	s_mp_clamp(&quot);
3474	mp_exch(&quot, mp);
3475	CLEANUP:
3476	mp_clear(&quot);
3477	mp_clear(&rem);
3478
3479	return res;
3480	} / end s_mp_div_d() /
3481
3482	/ }}} /
3483
3484
3485	/ }}} /
3486
3487	/ {{{ Primitive full arithmetic /
3488
3489	/ {{{ s_mp_add(a, b) /
3490
3491	/ Compute a = \|a\| + \|b\| /
3492	mp_err s_mp_add(mp_int a, const* mp_int b) /* magnitude addition /
3493	{
3494	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3495	mp_word w = `0`;
3496	#else
3497	mp_digit d, sum, carry = `0`;
3498	#endif
3499	mp_digit pa, pb;
3500	mp_size ix;
3501	mp_size used;
3502	mp_err res;
3503
3504	/ Make sure a has enough precision for the output value /
3505	if((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
3506	return res;
3507
3508	/*
3509	Add up all digits up to the precision of b. If b had initially
3510	the same precision as a, or greater, we took care of it by the
3511	padding step above, so there is no problem. If b had initially
3512	less precision, we'll have to make sure the carry out is duly
3513	propagated upward among the higher-order digits of the sum.
3514	*/
3515	pa = MP_DIGITS(a);
3516	pb = MP_DIGITS(b);
3517	used = MP_USED(b);
3518	for(ix = `0`; ix < used; ix++) {
3519	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3520	w = w + pa + pb++;
3521	*pa++ = ACCUM(w);
3522	w = CARRYOUT(w);
3523	#else
3524	d = *pa;
3525	sum = d + *pb++;
3526	d = (sum < d); / detect overflow /
3527	*pa++ = sum += carry;
3528	carry = d + (sum < carry); / detect overflow /
3529	#endif
3530	}
3531
3532	/ If we run out of 'b' digits before we're actually done, make*
3533	sure the carries get propagated upward...
3534	*/
3535	used = MP_USED(a);
3536	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3537	while (w && ix < used) {
3538	w = w + *pa;
3539	*pa++ = ACCUM(w);
3540	w = CARRYOUT(w);
3541	++ix;
3542	}
3543	#else
3544	while (carry && ix < used) {
3545	sum = carry + *pa;
3546	*pa++ = sum;
3547	carry = !sum;
3548	++ix;
3549	}
3550	#endif
3551
3552	/ If there's an overall carry out, increase precision and include*
3553	it. We could have done this initially, but why touch the memory
3554	allocator unless we're sure we have to?
3555	*/
3556	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3557	if (w) {
3558	if((res = s_mp_pad(a, used + `1`)) != MP_OKAY)
3559	return res;
3560
3561	DIGIT(a, ix) = (mp_digit)w;
3562	}
3563	#else
3564	if (carry) {
3565	if((res = s_mp_pad(a, used + `1`)) != MP_OKAY)
3566	return res;
3567
3568	DIGIT(a, used) = carry;
3569	}
3570	#endif
3571
3572	return MP_OKAY;
3573	} / end s_mp_add() /
3574
3575	/ }}} /
3576
3577	/ Compute c = \|a\| + \|b\| / / magnitude addition /
3578	mp_err s_mp_add_3arg(const mp_int a, const* mp_int b, mp_int c)
3579	{
3580	mp_digit pa, pb, *pc;
3581	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3582	mp_word w = `0`;
3583	#else
3584	mp_digit sum, carry = `0`, d;
3585	#endif
3586	mp_size ix;
3587	mp_size used;
3588	mp_err res;
3589
3590	MP_SIGN(c) = MP_SIGN(a);
3591	if (MP_USED(a) < MP_USED(b)) {
3592	const mp_int *xch = a;
3593	a = b;
3594	b = xch;
3595	}
3596
3597	/ Make sure a has enough precision for the output value /
3598	if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3599	return res;
3600
3601	/*
3602	Add up all digits up to the precision of b. If b had initially
3603	the same precision as a, or greater, we took care of it by the
3604	exchange step above, so there is no problem. If b had initially
3605	less precision, we'll have to make sure the carry out is duly
3606	propagated upward among the higher-order digits of the sum.
3607	*/
3608	pa = MP_DIGITS(a);
3609	pb = MP_DIGITS(b);
3610	pc = MP_DIGITS(c);
3611	used = MP_USED(b);
3612	for (ix = `0`; ix < used; ix++) {
3613	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3614	w = w + pa++ + pb++;
3615	*pc++ = ACCUM(w);
3616	w = CARRYOUT(w);
3617	#else
3618	d = *pa++;
3619	sum = d + *pb++;
3620	d = (sum < d); / detect overflow /
3621	*pc++ = sum += carry;
3622	carry = d + (sum < carry); / detect overflow /
3623	#endif
3624	}
3625
3626	/ If we run out of 'b' digits before we're actually done, make*
3627	sure the carries get propagated upward...
3628	*/
3629	for (used = MP_USED(a); ix < used; ++ix) {
3630	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3631	w = w + *pa++;
3632	*pc++ = ACCUM(w);
3633	w = CARRYOUT(w);
3634	#else
3635	pc++ = sum = carry + pa++;
3636	carry = (sum < carry);
3637	#endif
3638	}
3639
3640	/ If there's an overall carry out, increase precision and include*
3641	it. We could have done this initially, but why touch the memory
3642	allocator unless we're sure we have to?
3643	*/
3644	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3645	if (w) {
3646	if((res = s_mp_pad(c, used + `1`)) != MP_OKAY)
3647	return res;
3648
3649	DIGIT(c, used) = (mp_digit)w;
3650	++used;
3651	}
3652	#else
3653	if (carry) {
3654	if((res = s_mp_pad(c, used + `1`)) != MP_OKAY)
3655	return res;
3656
3657	DIGIT(c, used) = carry;
3658	++used;
3659	}
3660	#endif
3661	MP_USED(c) = used;
3662	return MP_OKAY;
3663	}
3664	/ {{{ s_mp_add_offset(a, b, offset) /
3665
3666	/ Compute a = \|a\| + ( \|b\| * (RADIX ** offset) ) /
3667	mp_err s_mp_add_offset(mp_int a, mp_int b, mp_size offset)
3668	{
3669	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3670	mp_word w, k = `0`;
3671	#else
3672	mp_digit d, sum, carry = `0`;
3673	#endif
3674	mp_size ib;
3675	mp_size ia;
3676	mp_size lim;
3677	mp_err res;
3678
3679	/ Make sure a has enough precision for the output value /
3680	lim = MP_USED(b) + offset;
3681	if((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
3682	return res;
3683
3684	/*
3685	Add up all digits up to the precision of b. If b had initially
3686	the same precision as a, or greater, we took care of it by the
3687	padding step above, so there is no problem. If b had initially
3688	less precision, we'll have to make sure the carry out is duly
3689	propagated upward among the higher-order digits of the sum.
3690	*/
3691	lim = USED(b);
3692	for(ib = `0`, ia = offset; ib < lim; ib++, ia++) {
3693	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3694	w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
3695	DIGIT(a, ia) = ACCUM(w);
3696	k = CARRYOUT(w);
3697	#else
3698	d = MP_DIGIT(a, ia);
3699	sum = d + MP_DIGIT(b, ib);
3700	d = (sum < d);
3701	MP_DIGIT(a,ia) = sum += carry;
3702	carry = d + (sum < carry);
3703	#endif
3704	}
3705
3706	/ If we run out of 'b' digits before we're actually done, make*
3707	sure the carries get propagated upward...
3708	*/
3709	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3710	for (lim = MP_USED(a); k && (ia < lim); ++ia) {
3711	w = (mp_word)DIGIT(a, ia) + k;
3712	DIGIT(a, ia) = ACCUM(w);
3713	k = CARRYOUT(w);
3714	}
3715	#else
3716	for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
3717	d = MP_DIGIT(a, ia);
3718	MP_DIGIT(a,ia) = sum = d + carry;
3719	carry = (sum < d);
3720	}
3721	#endif
3722
3723	/ If there's an overall carry out, increase precision and include*
3724	it. We could have done this initially, but why touch the memory
3725	allocator unless we're sure we have to?
3726	*/
3727	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
3728	if(k) {
3729	if((res = s_mp_pad(a, USED(a) + `1`)) != MP_OKAY)
3730	return res;
3731
3732	DIGIT(a, ia) = (mp_digit)k;
3733	}
3734	#else
3735	if (carry) {
3736	if((res = s_mp_pad(a, lim + `1`)) != MP_OKAY)
3737	return res;
3738
3739	DIGIT(a, lim) = carry;
3740	}
3741	#endif
3742	s_mp_clamp(a);
3743
3744	return MP_OKAY;
3745
3746	} / end s_mp_add_offset() /
3747
3748	/ }}} /
3749
3750	/ {{{ s_mp_sub(a, b) /
3751
3752	/ Compute a = \|a\| - \|b\|, assumes \|a\| >= \|b\| /
3753	mp_err s_mp_sub(mp_int a, const* mp_int b) /* magnitude subtract /
3754	{
3755	mp_digit pa, pb, *limit;
3756	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3757	mp_sword w = `0`;
3758	#else
3759	mp_digit d, diff, borrow = `0`;
3760	#endif
3761
3762	/*
3763	Subtract and propagate borrow. Up to the precision of b, this
3764	accounts for the digits of b; after that, we just make sure the
3765	carries get to the right place. This saves having to pad b out to
3766	the precision of a just to make the loops work right...
3767	*/
3768	pa = MP_DIGITS(a);
3769	pb = MP_DIGITS(b);
3770	limit = pb + MP_USED(b);
3771	while (pb < limit) {
3772	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3773	w = w + pa - pb++;
3774	*pa++ = ACCUM(w);
3775	w >>= MP_DIGIT_BIT;
3776	#else
3777	d = *pa;
3778	diff = d - *pb++;
3779	d = (diff > d); / detect borrow /
3780	if (borrow && --diff == MP_DIGIT_MAX)
3781	++d;
3782	*pa++ = diff;
3783	borrow = d;
3784	#endif
3785	}
3786	limit = MP_DIGITS(a) + MP_USED(a);
3787	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3788	while (w && pa < limit) {
3789	w = w + *pa;
3790	*pa++ = ACCUM(w);
3791	w >>= MP_DIGIT_BIT;
3792	}
3793	#else
3794	while (borrow && pa < limit) {
3795	d = *pa;
3796	*pa++ = diff = d - borrow;
3797	borrow = (diff > d);
3798	}
3799	#endif
3800
3801	/ Clobber any leading zeroes we created /
3802	s_mp_clamp(a);
3803
3804	/*
3805	If there was a borrow out, then \|b\| > \|a\| in violation
3806	of our input invariant. We've already done the work,
3807	but we'll at least complain about it...
3808	*/
3809	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3810	return w ? MP_RANGE : MP_OKAY;
3811	#else
3812	return borrow ? MP_RANGE : MP_OKAY;
3813	#endif
3814	} / end s_mp_sub() /
3815
3816	/ }}} /
3817
3818	/ Compute c = \|a\| - \|b\|, assumes \|a\| >= \|b\| / / magnitude subtract /
3819	mp_err s_mp_sub_3arg(const mp_int a, const* mp_int b, mp_int c)
3820	{
3821	mp_digit pa, pb, *pc;
3822	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3823	mp_sword w = `0`;
3824	#else
3825	mp_digit d, diff, borrow = `0`;
3826	#endif
3827	int ix, limit;
3828	mp_err res;
3829
3830	MP_SIGN(c) = MP_SIGN(a);
3831
3832	/ Make sure a has enough precision for the output value /
3833	if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
3834	return res;
3835
3836	/*
3837	Subtract and propagate borrow. Up to the precision of b, this
3838	accounts for the digits of b; after that, we just make sure the
3839	carries get to the right place. This saves having to pad b out to
3840	the precision of a just to make the loops work right...
3841	*/
3842	pa = MP_DIGITS(a);
3843	pb = MP_DIGITS(b);
3844	pc = MP_DIGITS(c);
3845	limit = MP_USED(b);
3846	for (ix = `0`; ix < limit; ++ix) {
3847	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3848	w = w + pa++ - pb++;
3849	*pc++ = ACCUM(w);
3850	w >>= MP_DIGIT_BIT;
3851	#else
3852	d = *pa++;
3853	diff = d - *pb++;
3854	d = (diff > d);
3855	if (borrow && --diff == MP_DIGIT_MAX)
3856	++d;
3857	*pc++ = diff;
3858	borrow = d;
3859	#endif
3860	}
3861	for (limit = MP_USED(a); ix < limit; ++ix) {
3862	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3863	w = w + *pa++;
3864	*pc++ = ACCUM(w);
3865	w >>= MP_DIGIT_BIT;
3866	#else
3867	d = *pa++;
3868	*pc++ = diff = d - borrow;
3869	borrow = (diff > d);
3870	#endif
3871	}
3872
3873	/ Clobber any leading zeroes we created /
3874	MP_USED(c) = ix;
3875	s_mp_clamp(c);
3876
3877	/*
3878	If there was a borrow out, then \|b\| > \|a\| in violation
3879	of our input invariant. We've already done the work,
3880	but we'll at least complain about it...
3881	*/
3882	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
3883	return w ? MP_RANGE : MP_OKAY;
3884	#else
3885	return borrow ? MP_RANGE : MP_OKAY;
3886	#endif
3887	}
3888	/ {{{ s_mp_mul(a, b) /
3889
3890	/ Compute a = \|a\| * \|b\| /
3891	mp_err s_mp_mul(mp_int a, const* mp_int *b)
3892	{
3893	return mp_mul(a, b, a);
3894	} / end s_mp_mul() /
3895
3896	/ }}} /
3897
3898	#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
3899	/ This trick works on Sparc V8 CPUs with the Workshop compilers. /
3900	#define MP_MUL_DxD(a, b, Phi, Plo) \
3901	{ unsigned long long product = (unsigned long long)a * b; \
3902	Plo = (mp_digit)product; \
3903	Phi = (mp_digit)(product >> MP_DIGIT_BIT); }
3904	#elif defined(OSF1)
3905	#define MP_MUL_DxD(a, b, Phi, Plo) \
3906	{ Plo = asm ("mulq %a0, %a1, %v0", a, b);\
3907	Phi = asm ("umulh %a0, %a1, %v0", a, b); }
3908	#else
3909	#define MP_MUL_DxD(a, b, Phi, Plo) \
3910	{ mp_digit a0b1, a1b0; \
3911	Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \
3912	Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
3913	a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
3914	a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
3915	a1b0 += a0b1; \
3916	Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
3917	if (a1b0 < a0b1) \
3918	Phi += MP_HALF_RADIX; \
3919	a1b0 <<= MP_HALF_DIGIT_BIT; \
3920	Plo += a1b0; \
3921	if (Plo < a1b0) \
3922	++Phi; \
3923	}
3924	#endif
3925
3926	#if !defined(MP_ASSEMBLY_MULTIPLY)
3927	/ c = a * b /
3928	void s_mpv_mul_d(const mp_digit a, mp_size a_len, mp_digit b, mp_digit c)
3929	{
3930	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3931	mp_digit d = `0`;
3932
3933	/ Inner product: Digits of a /
3934	while (a_len--) {
3935	mp_word w = ((mp_word)b * *a++) + d;
3936	*c++ = ACCUM(w);
3937	d = CARRYOUT(w);
3938	}
3939	*c = d;
3940	#else
3941	mp_digit carry = `0`;
3942	while (a_len--) {
3943	mp_digit a_i = *a++;
3944	mp_digit a0b0, a1b1;
3945
3946	MP_MUL_DxD(a_i, b, a1b1, a0b0);
3947
3948	a0b0 += carry;
3949	if (a0b0 < carry)
3950	++a1b1;
3951	*c++ = a0b0;
3952	carry = a1b1;
3953	}
3954	*c = carry;
3955	#endif
3956	}
3957
3958	/ c += a * b /
3959	void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
3960	mp_digit *c)
3961	{
3962	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3963	mp_digit d = `0`;
3964
3965	/ Inner product: Digits of a /
3966	while (a_len--) {
3967	mp_word w = ((mp_word)b * a++) + c + d;
3968	*c++ = ACCUM(w);
3969	d = CARRYOUT(w);
3970	}
3971	*c = d;
3972	#else
3973	mp_digit carry = `0`;
3974	while (a_len--) {
3975	mp_digit a_i = *a++;
3976	mp_digit a0b0, a1b1;
3977
3978	MP_MUL_DxD(a_i, b, a1b1, a0b0);
3979
3980	a0b0 += carry;
3981	if (a0b0 < carry)
3982	++a1b1;
3983	a0b0 += a_i = *c;
3984	if (a0b0 < a_i)
3985	++a1b1;
3986	*c++ = a0b0;
3987	carry = a1b1;
3988	}
3989	*c = carry;
3990	#endif
3991	}
3992
3993	/ Presently, this is only used by the Montgomery arithmetic code. /
3994	/ c += a * b /
3995	void s_mpv_mul_d_add_prop(const mp_digit a, mp_size a_len, mp_digit b, mp_digit c)
3996	{
3997	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
3998	mp_digit d = `0`;
3999
4000	/ Inner product: Digits of a /
4001	while (a_len--) {
4002	mp_word w = ((mp_word)b * a++) + c + d;
4003	*c++ = ACCUM(w);
4004	d = CARRYOUT(w);
4005	}
4006
4007	while (d) {
4008	mp_word w = (mp_word)*c + d;
4009	*c++ = ACCUM(w);
4010	d = CARRYOUT(w);
4011	}
4012	#else
4013	mp_digit carry = `0`;
4014	while (a_len--) {
4015	mp_digit a_i = *a++;
4016	mp_digit a0b0, a1b1;
4017
4018	MP_MUL_DxD(a_i, b, a1b1, a0b0);
4019
4020	a0b0 += carry;
4021	if (a0b0 < carry)
4022	++a1b1;
4023
4024	a0b0 += a_i = *c;
4025	if (a0b0 < a_i)
4026	++a1b1;
4027
4028	*c++ = a0b0;
4029	carry = a1b1;
4030	}
4031	while (carry) {
4032	mp_digit c_i = *c;
4033	carry += c_i;
4034	*c++ = carry;
4035	carry = carry < c_i;
4036	}
4037	#endif
4038	}
4039	#endif
4040
4041	#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
4042	/ This trick works on Sparc V8 CPUs with the Workshop compilers. /
4043	#define MP_SQR_D(a, Phi, Plo) \
4044	{ unsigned long long square = (unsigned long long)a * a; \
4045	Plo = (mp_digit)square; \
4046	Phi = (mp_digit)(square >> MP_DIGIT_BIT); }
4047	#elif defined(OSF1)
4048	#define MP_SQR_D(a, Phi, Plo) \
4049	{ Plo = asm ("mulq %a0, %a0, %v0", a);\
4050	Phi = asm ("umulh %a0, %a0, %v0", a); }
4051	#else
4052	#define MP_SQR_D(a, Phi, Plo) \
4053	{ mp_digit Pmid; \
4054	Plo = (a & MP_HALF_DIGIT_MAX) * (a & MP_HALF_DIGIT_MAX); \
4055	Phi = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
4056	Pmid = (a & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
4057	Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1); \
4058	Pmid <<= (MP_HALF_DIGIT_BIT + 1); \
4059	Plo += Pmid; \
4060	if (Plo < Pmid) \
4061	++Phi; \
4062	}
4063	#endif
4064
4065	#if !defined(MP_ASSEMBLY_SQUARE)
4066	/ Add the squares of the digits of a to the digits of b. /
4067	void s_mpv_sqr_add_prop(const mp_digit pa, mp_size a_len, mp_digit ps)
4068	{
4069	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
4070	mp_word w;
4071	mp_digit d;
4072	mp_size ix;
4073
4074	w = `0`;
4075	#define ADD_SQUARE(n) \
4076	d = pa[n]; \
4077	w += (d * (mp_word)d) + ps[2*n]; \
4078	ps[2*n] = ACCUM(w); \
4079	w = (w >> DIGIT_BIT) + ps[2*n+1]; \
4080	ps[2*n+1] = ACCUM(w); \
4081	w = (w >> DIGIT_BIT)
4082
4083	for (ix = a_len; ix >= `4`; ix -= `4`) {
4084	ADD_SQUARE(`0`);
4085	ADD_SQUARE(`1`);
4086	ADD_SQUARE(`2`);
4087	ADD_SQUARE(`3`);
4088	pa += `4`;
4089	ps += `8`;
4090	}
4091	if (ix) {
4092	ps += `2`*ix;
4093	pa += ix;
4094	switch (ix) {
4095	case `3`: ADD_SQUARE(-`3`); / FALLTHRU /
4096	case `2`: ADD_SQUARE(-`2`); / FALLTHRU /
4097	case `1`: ADD_SQUARE(-`1`); / FALLTHRU /
4098	case `0`: break;
4099	}
4100	}
4101	while (w) {
4102	w += *ps;
4103	*ps++ = ACCUM(w);
4104	w = (w >> DIGIT_BIT);
4105	}
4106	#else
4107	mp_digit carry = `0`;
4108	while (a_len--) {
4109	mp_digit a_i = *pa++;
4110	mp_digit a0a0, a1a1;
4111
4112	MP_SQR_D(a_i, a1a1, a0a0);
4113
4114	/ here a1a1 and a0a0 constitute a_i ** 2 /
4115	a0a0 += carry;
4116	if (a0a0 < carry)
4117	++a1a1;
4118
4119	/ now add to ps /
4120	a0a0 += a_i = *ps;
4121	if (a0a0 < a_i)
4122	++a1a1;
4123	*ps++ = a0a0;
4124	a1a1 += a_i = *ps;
4125	carry = (a1a1 < a_i);
4126	*ps++ = a1a1;
4127	}
4128	while (carry) {
4129	mp_digit s_i = *ps;
4130	carry += s_i;
4131	*ps++ = carry;
4132	carry = carry < s_i;
4133	}
4134	#endif
4135	}
4136	#endif
4137
4138	#if (defined(MP_NO_MP_WORD) \|\| defined(MP_NO_DIV_WORD)) \
4139	&& !defined(MP_ASSEMBLY_DIV_2DX1D)
4140	/*
4141	** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
4142	** so its high bit is 1. This code is from NSPR.
4143	*/
4144	mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
4145	mp_digit qp, mp_digit rp)
4146	{
4147	mp_digit d1, d0, q1, q0;
4148	mp_digit r1, r0, m;
4149
4150	d1 = divisor >> MP_HALF_DIGIT_BIT;
4151	d0 = divisor & MP_HALF_DIGIT_MAX;
4152	r1 = Nhi % d1;
4153	q1 = Nhi / d1;
4154	m = q1 * d0;
4155	r1 = (r1 << MP_HALF_DIGIT_BIT) \| (Nlo >> MP_HALF_DIGIT_BIT);
4156	if (r1 < m) {
4157	q1--, r1 += divisor;
4158	if (r1 >= divisor && r1 < m) {
4159	q1--, r1 += divisor;
4160	}
4161	}
4162	r1 -= m;
4163	r0 = r1 % d1;
4164	q0 = r1 / d1;
4165	m = q0 * d0;
4166	r0 = (r0 << MP_HALF_DIGIT_BIT) \| (Nlo & MP_HALF_DIGIT_MAX);
4167	if (r0 < m) {
4168	q0--, r0 += divisor;
4169	if (r0 >= divisor && r0 < m) {
4170	q0--, r0 += divisor;
4171	}
4172	}
4173	if (qp)
4174	*qp = (q1 << MP_HALF_DIGIT_BIT) \| q0;
4175	if (rp)
4176	*rp = r0 - m;
4177	return MP_OKAY;
4178	}
4179	#endif
4180
4181	#if MP_SQUARE
4182	/ {{{ s_mp_sqr(a) /
4183
4184	mp_err s_mp_sqr(mp_int *a)
4185	{
4186	mp_err res;
4187	mp_int tmp;
4188	tmp.flag = (mp_sign)`0`;
4189
4190	if((res = mp_init_size(&tmp, `2` * USED(a), FLAG(a))) != MP_OKAY)
4191	return res;
4192	res = mp_sqr(a, &tmp);
4193	if (res == MP_OKAY) {
4194	s_mp_exch(&tmp, a);
4195	}
4196	mp_clear(&tmp);
4197	return res;
4198	}
4199
4200	/ }}} /
4201	#endif
4202
4203	/ {{{ s_mp_div(a, b) /
4204
4205	/*
4206	s_mp_div(a, b)
4207
4208	Compute a = a / b and b = a mod b. Assumes b > a.
4209	*/
4210
4211	mp_err s_mp_div(mp_int rem, /* i: dividend, o: remainder /
4212	mp_int div, /* i: divisor /
4213	mp_int quot) /* i: 0; o: quotient /
4214	{
4215	mp_int part, t;
4216	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4217	mp_word q_msd;
4218	#else
4219	mp_digit q_msd;
4220	#endif
4221	mp_err res;
4222	mp_digit d;
4223	mp_digit div_msd;
4224	int ix;
4225
4226	t.dp = (mp_digit *)NULL;
4227
4228	if(mp_cmp_z(div) == `0`)
4229	return MP_RANGE;
4230
4231	/ Shortcut if divisor is power of two /
4232	if((ix = s_mp_ispow2(div)) >= `0`) {
4233	MP_CHECKOK( mp_copy(rem, quot) );
4234	s_mp_div_2d(quot, (mp_digit)ix);
4235	s_mp_mod_2d(rem, (mp_digit)ix);
4236
4237	return MP_OKAY;
4238	}
4239
4240	DIGITS(&t) = `0`;
4241	MP_SIGN(rem) = ZPOS;
4242	MP_SIGN(div) = ZPOS;
4243
4244	/ A working temporary for division /
4245	MP_CHECKOK( mp_init_size(&t, MP_ALLOC(rem), FLAG(rem)));
4246
4247	/ Normalize to optimize guessing /
4248	MP_CHECKOK( s_mp_norm(rem, div, &d) );
4249
4250	part = *rem;
4251
4252	/ Perform the division itself...woo! /
4253	MP_USED(quot) = MP_ALLOC(quot);
4254
4255	/ Find a partial substring of rem which is at least div /
4256	/ If we didn't find one, we're finished dividing /
4257	while (MP_USED(rem) > MP_USED(div) \|\| s_mp_cmp(rem, div) >= `0`) {
4258	int i;
4259	int unusedRem;
4260
4261	unusedRem = MP_USED(rem) - MP_USED(div);
4262	MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
4263	MP_ALLOC(&part) = MP_ALLOC(rem) - unusedRem;
4264	MP_USED(&part) = MP_USED(div);
4265	if (s_mp_cmp(&part, div) < `0`) {
4266	-- unusedRem;
4267	#if MP_ARGCHK == 2
4268	assert(unusedRem >= `0`);
4269	#endif
4270	-- MP_DIGITS(&part);
4271	++ MP_USED(&part);
4272	++ MP_ALLOC(&part);
4273	}
4274
4275	/ Compute a guess for the next quotient digit /
4276	q_msd = MP_DIGIT(&part, MP_USED(&part) - `1`);
4277	div_msd = MP_DIGIT(div, MP_USED(div) - `1`);
4278	if (q_msd >= div_msd) {
4279	q_msd = `1`;
4280	} else if (MP_USED(&part) > `1`) {
4281	#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
4282	q_msd = (q_msd << MP_DIGIT_BIT) \| MP_DIGIT(&part, MP_USED(&part) - `2`);
4283	q_msd /= div_msd;
4284	if (q_msd == RADIX)
4285	--q_msd;
4286	#else
4287	mp_digit r;
4288	MP_CHECKOK( s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - `2`),
4289	div_msd, &q_msd, &r) );
4290	#endif
4291	} else {
4292	q_msd = `0`;
4293	}
4294	#if MP_ARGCHK == 2
4295	assert(q_msd > `0`); / This case should never occur any more. /
4296	#endif
4297	if (q_msd <= `0`)
4298	break;
4299
4300	/ See what that multiplies out to /
4301	mp_copy(div, &t);
4302	MP_CHECKOK( s_mp_mul_d(&t, (mp_digit)q_msd) );
4303
4304	/*
4305	If it's too big, back it off. We should not have to do this
4306	more than once, or, in rare cases, twice. Knuth describes a
4307	method by which this could be reduced to a maximum of once, but
4308	I didn't implement that here.
4309	* When using s_mpv_div_2dx1d, we may have to do this 3 times.
4310	*/
4311	for (i = `4`; s_mp_cmp(&t, &part) > `0` && i > `0`; --i) {
4312	--q_msd;
4313	s_mp_sub(&t, div); / t -= div /
4314	}
4315	if (i < `0`) {
4316	res = MP_RANGE;
4317	goto CLEANUP;
4318	}
4319
4320	/ At this point, q_msd should be the right next digit /
4321	MP_CHECKOK( s_mp_sub(&part, &t) ); / part -= t /
4322	s_mp_clamp(rem);
4323
4324	/*
4325	Include the digit in the quotient. We allocated enough memory
4326	for any quotient we could ever possibly get, so we should not
4327	have to check for failures here
4328	*/
4329	MP_DIGIT(quot, unusedRem) = (mp_digit)q_msd;
4330	}
4331
4332	/ Denormalize remainder /
4333	if (d) {
4334	s_mp_div_2d(rem, d);
4335	}
4336
4337	s_mp_clamp(quot);
4338
4339	CLEANUP:
4340	mp_clear(&t);
4341
4342	return res;
4343
4344	} / end s_mp_div() /
4345
4346
4347	/ }}} /
4348
4349	/ {{{ s_mp_2expt(a, k) /
4350
4351	mp_err s_mp_2expt(mp_int *a, mp_digit k)
4352	{
4353	mp_err res;
4354	mp_size dig, bit;
4355
4356	dig = k / DIGIT_BIT;
4357	bit = k % DIGIT_BIT;
4358
4359	mp_zero(a);
4360	if((res = s_mp_pad(a, dig + `1`)) != MP_OKAY)
4361	return res;
4362
4363	DIGIT(a, dig) \|= ((mp_digit)`1` << bit);
4364
4365	return MP_OKAY;
4366
4367	} / end s_mp_2expt() /
4368
4369	/ }}} /
4370
4371	/ {{{ s_mp_reduce(x, m, mu) /
4372
4373	/*
4374	Compute Barrett reduction, x (mod m), given a precomputed value for
4375	mu = b^2k / m, where b = RADIX and k = #digits(m). This should be
4376	faster than straight division, when many reductions by the same
4377	value of m are required (such as in modular exponentiation). This
4378	can nearly halve the time required to do modular exponentiation,
4379	as compared to using the full integer divide to reduce.
4380
4381	This algorithm was derived from the _Handbook of Applied
4382	Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
4383	pp. 603-604.
4384	*/
4385
4386	mp_err s_mp_reduce(mp_int x, const* mp_int m, const* mp_int *mu)
4387	{
4388	mp_int q;
4389	mp_err res;
4390
4391	if((res = mp_init_copy(&q, x)) != MP_OKAY)
4392	return res;
4393
4394	s_mp_rshd(&q, USED(m) - `1`); / q1 = x / b^(k-1) /
4395	s_mp_mul(&q, mu); / q2 = q1 * mu /
4396	s_mp_rshd(&q, USED(m) + `1`); / q3 = q2 / b^(k+1) /
4397
4398	/ x = x mod b^(k+1), quick (no division) /
4399	s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + `1`));
4400
4401	/ q = q * m mod b^(k+1), quick (no division) /
4402	s_mp_mul(&q, m);
4403	s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + `1`));
4404
4405	/ x = x - q /
4406	if((res = mp_sub(x, &q, x)) != MP_OKAY)
4407	goto CLEANUP;
4408
4409	/ If x < 0, add b^(k+1) to it /
4410	if(mp_cmp_z(x) < `0`) {
4411	mp_set(&q, `1`);
4412	if((res = s_mp_lshd(&q, USED(m) + `1`)) != MP_OKAY)
4413	goto CLEANUP;
4414	if((res = mp_add(x, &q, x)) != MP_OKAY)
4415	goto CLEANUP;
4416	}
4417
4418	/ Back off if it's too big /
4419	while(mp_cmp(x, m) >= `0`) {
4420	if((res = s_mp_sub(x, m)) != MP_OKAY)
4421	break;
4422	}
4423
4424	CLEANUP:
4425	mp_clear(&q);
4426
4427	return res;
4428
4429	} / end s_mp_reduce() /
4430
4431	/ }}} /
4432
4433	/ }}} /
4434
4435	/ {{{ Primitive comparisons /
4436
4437	/ {{{ s_mp_cmp(a, b) /
4438
4439	/ Compare \|a\| <=> \|b\|, return 0 if equal, <0 if a<b, >0 if a>b /
4440	int s_mp_cmp(const mp_int a, const* mp_int *b)
4441	{
4442	mp_size used_a = MP_USED(a);
4443	{
4444	mp_size used_b = MP_USED(b);
4445
4446	if (used_a > used_b)
4447	goto IS_GT;
4448	if (used_a < used_b)
4449	goto IS_LT;
4450	}
4451	{
4452	mp_digit pa, pb;
4453	mp_digit da = `0`, db = `0`;
4454
4455	#define CMP_AB(n) if ((da = pa[n]) != (db = pb[n])) goto done
4456
4457	pa = MP_DIGITS(a) + used_a;
4458	pb = MP_DIGITS(b) + used_a;
4459	while (used_a >= `4`) {
4460	pa -= `4`;
4461	pb -= `4`;
4462	used_a -= `4`;
4463	CMP_AB(`3`);
4464	CMP_AB(`2`);
4465	CMP_AB(`1`);
4466	CMP_AB(`0`);
4467	}
4468	while (used_a-- > `0` && ((da = --pa) == (db = --pb)))
4469	/ do nothing /;
4470	done:
4471	if (da > db)
4472	goto IS_GT;
4473	if (da < db)
4474	goto IS_LT;
4475	}
4476	return MP_EQ;
4477	IS_LT:
4478	return MP_LT;
4479	IS_GT:
4480	return MP_GT;
4481	} / end s_mp_cmp() /
4482
4483	/ }}} /
4484
4485	/ {{{ s_mp_cmp_d(a, d) /
4486
4487	/ Compare \|a\| <=> d, return 0 if equal, <0 if a<d, >0 if a>d /
4488	int s_mp_cmp_d(const mp_int *a, mp_digit d)
4489	{
4490	if(USED(a) > `1`)
4491	return MP_GT;
4492
4493	if(DIGIT(a, `0`) < d)
4494	return MP_LT;
4495	else if(DIGIT(a, `0`) > d)
4496	return MP_GT;
4497	else
4498	return MP_EQ;
4499
4500	} / end s_mp_cmp_d() /
4501
4502	/ }}} /
4503
4504	/ {{{ s_mp_ispow2(v) /
4505
4506	/*
4507	Returns -1 if the value is not a power of two; otherwise, it returns
4508	k such that v = 2^k, i.e. lg(v).
4509	*/
4510	int s_mp_ispow2(const mp_int *v)
4511	{
4512	mp_digit d;
4513	int extra = `0`, ix;
4514
4515	ix = MP_USED(v) - `1`;
4516	d = MP_DIGIT(v, ix); / most significant digit of v /
4517
4518	extra = s_mp_ispow2d(d);
4519	if (extra < `0` \|\| ix == `0`)
4520	return extra;
4521
4522	while (--ix >= `0`) {
4523	if (DIGIT(v, ix) != `0`)
4524	return -`1`; / not a power of two /
4525	extra += MP_DIGIT_BIT;
4526	}
4527
4528	return extra;
4529
4530	} / end s_mp_ispow2() /
4531
4532	/ }}} /
4533
4534	/ {{{ s_mp_ispow2d(d) /
4535
4536	int s_mp_ispow2d(mp_digit d)
4537	{
4538	if ((d != `0`) && ((d & (d-`1`)) == `0`)) { / d is a power of 2 /
4539	int pow = `0`;
4540	#if defined (MP_USE_UINT_DIGIT)
4541	if (d & `0xffff0000U`)
4542	pow += `16`;
4543	if (d & `0xff00ff00U`)
4544	pow += `8`;
4545	if (d & `0xf0f0f0f0U`)
4546	pow += `4`;
4547	if (d & `0xccccccccU`)
4548	pow += `2`;
4549	if (d & `0xaaaaaaaaU`)
4550	pow += `1`;
4551	#elif defined(MP_USE_LONG_LONG_DIGIT)
4552	if (d & `0xffffffff00000000ULL`)
4553	pow += `32`;
4554	if (d & `0xffff0000ffff0000ULL`)
4555	pow += `16`;
4556	if (d & `0xff00ff00ff00ff00ULL`)
4557	pow += `8`;
4558	if (d & `0xf0f0f0f0f0f0f0f0ULL`)
4559	pow += `4`;
4560	if (d & `0xccccccccccccccccULL`)
4561	pow += `2`;
4562	if (d & `0xaaaaaaaaaaaaaaaaULL`)
4563	pow += `1`;
4564	#elif defined(MP_USE_LONG_DIGIT)
4565	if (d & `0xffffffff00000000UL`)
4566	pow += `32`;
4567	if (d & `0xffff0000ffff0000UL`)
4568	pow += `16`;
4569	if (d & `0xff00ff00ff00ff00UL`)
4570	pow += `8`;
4571	if (d & `0xf0f0f0f0f0f0f0f0UL`)
4572	pow += `4`;
4573	if (d & `0xccccccccccccccccUL`)
4574	pow += `2`;
4575	if (d & `0xaaaaaaaaaaaaaaaaUL`)
4576	pow += `1`;
4577	#else
4578	#error "unknown type for mp_digit"
4579	#endif
4580	return pow;
4581	}
4582	return -`1`;
4583
4584	} / end s_mp_ispow2d() /
4585
4586	/ }}} /
4587
4588	/ }}} /
4589
4590	/ {{{ Primitive I/O helpers /
4591
4592	/ {{{ s_mp_tovalue(ch, r) /
4593
4594	/*
4595	Convert the given character to its digit value, in the given radix.
4596	If the given character is not understood in the given radix, -1 is
4597	returned. Otherwise the digit's numeric value is returned.
4598
4599	The results will be odd if you use a radix < 2 or > 62, you are
4600	expected to know what you're up to.
4601	*/
4602	int s_mp_tovalue(char ch, int r)
4603	{
4604	int val, xch;
4605
4606	if(r > `36`)
4607	xch = ch;
4608	else
4609	xch = toupper(ch);
4610
4611	if(isdigit(xch))
4612	val = xch - `'0'`;
4613	else if(isupper(xch))
4614	val = xch - `'A'` + `10`;
4615	else if(islower(xch))
4616	val = xch - `'a'` + `36`;
4617	else if(xch == `'+'`)
4618	val = `62`;
4619	else if(xch == `'/'`)
4620	val = `63`;
4621	else
4622	return -`1`;
4623
4624	if(val < `0` \|\| val >= r)
4625	return -`1`;
4626
4627	return val;
4628
4629	} / end s_mp_tovalue() /
4630
4631	/ }}} /
4632
4633	/ {{{ s_mp_todigit(val, r, low) /
4634
4635	/*
4636	Convert val to a radix-r digit, if possible. If val is out of range
4637	for r, returns zero. Otherwise, returns an ASCII character denoting
4638	the value in the given radix.
4639
4640	The results may be odd if you use a radix < 2 or > 64, you are
4641	expected to know what you're doing.
4642	*/
4643
4644	char s_mp_todigit(mp_digit val, int r, int low)
4645	{
4646	char ch;
4647
4648	if(val >= (unsigned int)r)
4649	return `0`;
4650
4651	ch = s_dmap_1[val];
4652
4653	if(r <= `36` && low)
4654	ch = tolower(ch);
4655
4656	return ch;
4657
4658	} / end s_mp_todigit() /
4659
4660	/ }}} /
4661
4662	/ {{{ s_mp_outlen(bits, radix) /
4663
4664	/*
4665	Return an estimate for how long a string is needed to hold a radix
4666	r representation of a number with 'bits' significant bits, plus an
4667	extra for a zero terminator (assuming C style strings here)
4668	*/
4669	int s_mp_outlen(int bits, int r)
4670	{
4671	return (int)((double)bits * LOG_V_2(r) + `1.5`) + `1`;
4672
4673	} / end s_mp_outlen() /
4674
4675	/ }}} /
4676
4677	/ }}} /
4678
4679	/ {{{ mp_read_unsigned_octets(mp, str, len) /
4680	/ mp_read_unsigned_octets(mp, str, len)*
4681	Read in a raw value (base 256) into the given mp_int
4682	No sign bit, number is positive. Leading zeros ignored.
4683	*/
4684
4685	mp_err
4686	mp_read_unsigned_octets(mp_int mp, const* unsigned char *str, mp_size len)
4687	{
4688	int count;
4689	mp_err res;
4690	mp_digit d;
4691
4692	ARGCHK(mp != NULL && str != NULL && len > `0`, MP_BADARG);
4693
4694	mp_zero(mp);
4695
4696	count = len % sizeof(mp_digit);
4697	if (count) {
4698	for (d = `0`; count-- > `0`; --len) {
4699	d = (d << `8`) \| *str++;
4700	}
4701	MP_DIGIT(mp, `0`) = d;
4702	}
4703
4704	/ Read the rest of the digits /
4705	for(; len > `0`; len -= sizeof(mp_digit)) {
4706	for (d = `0`, count = sizeof(mp_digit); count > `0`; --count) {
4707	d = (d << `8`) \| *str++;
4708	}
4709	if (MP_EQ == mp_cmp_z(mp)) {
4710	if (!d)
4711	continue;
4712	} else {
4713	if((res = s_mp_lshd(mp, `1`)) != MP_OKAY)
4714	return res;
4715	}
4716	MP_DIGIT(mp, `0`) = d;
4717	}
4718	return MP_OKAY;
4719	} / end mp_read_unsigned_octets() /
4720	/ }}} /
4721
4722	/ {{{ mp_unsigned_octet_size(mp) /
4723	int
4724	mp_unsigned_octet_size(const mp_int *mp)
4725	{
4726	int bytes;
4727	int ix;
4728	mp_digit d = `0`;
4729
4730	ARGCHK(mp != NULL, MP_BADARG);
4731	ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
4732
4733	bytes = (USED(mp) * sizeof(mp_digit));
4734
4735	/ subtract leading zeros. /
4736	/ Iterate over each digit... /
4737	for(ix = USED(mp) - `1`; ix >= `0`; ix--) {
4738	d = DIGIT(mp, ix);
4739	if (d)
4740	break;
4741	bytes -= sizeof(d);
4742	}
4743	if (!bytes)
4744	return `1`;
4745
4746	/ Have MSD, check digit bytes, high order first /
4747	for(ix = sizeof(mp_digit) - `1`; ix >= `0`; ix--) {
4748	unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
4749	if (x)
4750	break;
4751	--bytes;
4752	}
4753	return bytes;
4754	} / end mp_unsigned_octet_size() /
4755	/ }}} /
4756
4757	/ {{{ mp_to_unsigned_octets(mp, str) /
4758	/ output a buffer of big endian octets no longer than specified. /
4759	mp_err
4760	mp_to_unsigned_octets(const mp_int mp, unsigned* char *str, mp_size maxlen)
4761	{
4762	int ix, pos = `0`;
4763	unsigned int bytes;
4764
4765	ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4766
4767	bytes = mp_unsigned_octet_size(mp);
4768	ARGCHK(bytes <= maxlen, MP_BADARG);
4769
4770	/ Iterate over each digit... /
4771	for(ix = USED(mp) - `1`; ix >= `0`; ix--) {
4772	mp_digit d = DIGIT(mp, ix);
4773	int jx;
4774
4775	/ Unpack digit bytes, high order first /
4776	for(jx = sizeof(mp_digit) - `1`; jx >= `0`; jx--) {
4777	unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4778	if (!pos && !x) / suppress leading zeros /
4779	continue;
4780	str[pos++] = x;
4781	}
4782	}
4783	if (!pos)
4784	str[pos++] = `0`;
4785	return pos;
4786	} / end mp_to_unsigned_octets() /
4787	/ }}} /
4788
4789	/ {{{ mp_to_signed_octets(mp, str) /
4790	/ output a buffer of big endian octets no longer than specified. /
4791	mp_err
4792	mp_to_signed_octets(const mp_int mp, unsigned* char *str, mp_size maxlen)
4793	{
4794	int ix, pos = `0`;
4795	unsigned int bytes;
4796
4797	ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4798
4799	bytes = mp_unsigned_octet_size(mp);
4800	ARGCHK(bytes <= maxlen, MP_BADARG);
4801
4802	/ Iterate over each digit... /
4803	for(ix = USED(mp) - `1`; ix >= `0`; ix--) {
4804	mp_digit d = DIGIT(mp, ix);
4805	int jx;
4806
4807	/ Unpack digit bytes, high order first /
4808	for(jx = sizeof(mp_digit) - `1`; jx >= `0`; jx--) {
4809	unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4810	if (!pos) {
4811	if (!x) / suppress leading zeros /
4812	continue;
4813	if (x & `0x80`) { / add one leading zero to make output positive. /
4814	ARGCHK(bytes + `1` <= maxlen, MP_BADARG);
4815	if (bytes + `1` > maxlen)
4816	return MP_BADARG;
4817	str[pos++] = `0`;
4818	}
4819	}
4820	str[pos++] = x;
4821	}
4822	}
4823	if (!pos)
4824	str[pos++] = `0`;
4825	return pos;
4826	} / end mp_to_signed_octets() /
4827	/ }}} /
4828
4829	/ {{{ mp_to_fixlen_octets(mp, str) /
4830	/ output a buffer of big endian octets exactly as long as requested. /
4831	mp_err
4832	mp_to_fixlen_octets(const mp_int mp, unsigned* char *str, mp_size length)
4833	{
4834	int ix, pos = `0`;
4835	unsigned int bytes;
4836
4837	ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
4838
4839	bytes = mp_unsigned_octet_size(mp);
4840	ARGCHK(bytes <= length, MP_BADARG);
4841
4842	/ place any needed leading zeros /
4843	for (;length > bytes; --length) {
4844	*str++ = `0`;
4845	}
4846
4847	/ Iterate over each digit... /
4848	for(ix = USED(mp) - `1`; ix >= `0`; ix--) {
4849	mp_digit d = DIGIT(mp, ix);
4850	int jx;
4851
4852	/ Unpack digit bytes, high order first /
4853	for(jx = sizeof(mp_digit) - `1`; jx >= `0`; jx--) {
4854	unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
4855	if (!pos && !x) / suppress leading zeros /
4856	continue;
4857	str[pos++] = x;
4858	}
4859	}
4860	if (!pos)
4861	str[pos++] = `0`;
4862	return MP_OKAY;
4863	} / end mp_to_fixlen_octets() /
4864	/ }}} /
4865
4866
4867	/------------------------------------------------------------------------/
4868	/ HERE THERE BE DRAGONS /
4869

Browse the source code of OpenJDK/src/jdk.crypto.ec/share/native/libsunec/impl/mpi.c