GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: lib/libcrypto/crypto/../../libssl/src/crypto/bn/bn_exp.c Lines: 383 486 78.8 %
Date: 2016-12-06 Branches: 288 424 67.9 %

Line Branch Exec Source
1
/* $OpenBSD: bn_exp.c,v 1.23 2015/09/10 15:56:25 jsing Exp $ */
2
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3
 * All rights reserved.
4
 *
5
 * This package is an SSL implementation written
6
 * by Eric Young (eay@cryptsoft.com).
7
 * The implementation was written so as to conform with Netscapes SSL.
8
 *
9
 * This library is free for commercial and non-commercial use as long as
10
 * the following conditions are aheared to.  The following conditions
11
 * apply to all code found in this distribution, be it the RC4, RSA,
12
 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13
 * included with this distribution is covered by the same copyright terms
14
 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15
 *
16
 * Copyright remains Eric Young's, and as such any Copyright notices in
17
 * the code are not to be removed.
18
 * If this package is used in a product, Eric Young should be given attribution
19
 * as the author of the parts of the library used.
20
 * This can be in the form of a textual message at program startup or
21
 * in documentation (online or textual) provided with the package.
22
 *
23
 * Redistribution and use in source and binary forms, with or without
24
 * modification, are permitted provided that the following conditions
25
 * are met:
26
 * 1. Redistributions of source code must retain the copyright
27
 *    notice, this list of conditions and the following disclaimer.
28
 * 2. Redistributions in binary form must reproduce the above copyright
29
 *    notice, this list of conditions and the following disclaimer in the
30
 *    documentation and/or other materials provided with the distribution.
31
 * 3. All advertising materials mentioning features or use of this software
32
 *    must display the following acknowledgement:
33
 *    "This product includes cryptographic software written by
34
 *     Eric Young (eay@cryptsoft.com)"
35
 *    The word 'cryptographic' can be left out if the rouines from the library
36
 *    being used are not cryptographic related :-).
37
 * 4. If you include any Windows specific code (or a derivative thereof) from
38
 *    the apps directory (application code) you must include an acknowledgement:
39
 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40
 *
41
 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51
 * SUCH DAMAGE.
52
 *
53
 * The licence and distribution terms for any publically available version or
54
 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55
 * copied and put under another distribution licence
56
 * [including the GNU Public Licence.]
57
 */
58
/* ====================================================================
59
 * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
60
 *
61
 * Redistribution and use in source and binary forms, with or without
62
 * modification, are permitted provided that the following conditions
63
 * are met:
64
 *
65
 * 1. Redistributions of source code must retain the above copyright
66
 *    notice, this list of conditions and the following disclaimer.
67
 *
68
 * 2. Redistributions in binary form must reproduce the above copyright
69
 *    notice, this list of conditions and the following disclaimer in
70
 *    the documentation and/or other materials provided with the
71
 *    distribution.
72
 *
73
 * 3. All advertising materials mentioning features or use of this
74
 *    software must display the following acknowledgment:
75
 *    "This product includes software developed by the OpenSSL Project
76
 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77
 *
78
 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79
 *    endorse or promote products derived from this software without
80
 *    prior written permission. For written permission, please contact
81
 *    openssl-core@openssl.org.
82
 *
83
 * 5. Products derived from this software may not be called "OpenSSL"
84
 *    nor may "OpenSSL" appear in their names without prior written
85
 *    permission of the OpenSSL Project.
86
 *
87
 * 6. Redistributions of any form whatsoever must retain the following
88
 *    acknowledgment:
89
 *    "This product includes software developed by the OpenSSL Project
90
 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91
 *
92
 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93
 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
96
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103
 * OF THE POSSIBILITY OF SUCH DAMAGE.
104
 * ====================================================================
105
 *
106
 * This product includes cryptographic software written by Eric Young
107
 * (eay@cryptsoft.com).  This product includes software written by Tim
108
 * Hudson (tjh@cryptsoft.com).
109
 *
110
 */
111
112
#include <stdlib.h>
113
#include <string.h>
114
115
#include <openssl/err.h>
116
117
#include "bn_lcl.h"
118
119
/* maximum precomputation table size for *variable* sliding windows */
120
#define TABLE_SIZE	32
121
122
/* this one works - simple but works */
123
int
124
BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
125
15
{
126
15
	int i, bits, ret = 0;
127
	BIGNUM *v, *rr;
128
129
15
	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
130
		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
131
		BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
132
		return -1;
133
	}
134
135
15
	BN_CTX_start(ctx);
136
15
	if ((r == a) || (r == p))
137
		rr = BN_CTX_get(ctx);
138
	else
139
15
		rr = r;
140
15
	v = BN_CTX_get(ctx);
141
15
	if (rr == NULL || v == NULL)
142
		goto err;
143
144
15
	if (BN_copy(v, a) == NULL)
145
		goto err;
146
15
	bits = BN_num_bits(p);
147
148

15
	if (BN_is_odd(p)) {
149
7
		if (BN_copy(rr, a) == NULL)
150
			goto err;
151
	} else {
152
8
		if (!BN_one(rr))
153
			goto err;
154
	}
155
156
60
	for (i = 1; i < bits; i++) {
157
45
		if (!BN_sqr(v, v, ctx))
158
			goto err;
159
45
		if (BN_is_bit_set(p, i)) {
160
34
			if (!BN_mul(rr, rr, v, ctx))
161
				goto err;
162
		}
163
	}
164
15
	ret = 1;
165
166
15
err:
167
15
	if (r != rr && rr != NULL)
168
		BN_copy(r, rr);
169
15
	BN_CTX_end(ctx);
170
	bn_check_top(r);
171
15
	return (ret);
172
}
173
174
int
175
BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
176
    BN_CTX *ctx)
177
220
{
178
	int ret;
179
180
	bn_check_top(a);
181
	bn_check_top(p);
182
	bn_check_top(m);
183
184
	/* For even modulus  m = 2^k*m_odd,  it might make sense to compute
185
	 * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
186
	 * exponentiation for the odd part), using appropriate exponent
187
	 * reductions, and combine the results using the CRT.
188
	 *
189
	 * For now, we use Montgomery only if the modulus is odd; otherwise,
190
	 * exponentiation using the reciprocal-based quick remaindering
191
	 * algorithm is used.
192
	 *
193
	 * (Timing obtained with expspeed.c [computations  a^p mod m
194
	 * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
195
	 * 4096, 8192 bits], compared to the running time of the
196
	 * standard algorithm:
197
	 *
198
	 *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
199
         *                     55 .. 77 %  [UltraSparc processor, but
200
	 *                                  debug-solaris-sparcv8-gcc conf.]
201
	 *
202
	 *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
203
	 *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
204
	 *
205
	 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
206
	 * at 2048 and more bits, but at 512 and 1024 bits, it was
207
	 * slower even than the standard algorithm!
208
	 *
209
	 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
210
	 * should be obtained when the new Montgomery reduction code
211
	 * has been integrated into OpenSSL.)
212
	 */
213
214
#define MONT_MUL_MOD
215
#define MONT_EXP_WORD
216
#define RECP_MUL_MOD
217
218
#ifdef MONT_MUL_MOD
219
	/* I have finally been able to take out this pre-condition of
220
	 * the top bit being set.  It was caused by an error in BN_div
221
	 * with negatives.  There was also another problem when for a^b%m
222
	 * a >= m.  eay 07-May-97 */
223
/*	if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
224
225

220
	if (BN_is_odd(m)) {
226
#  ifdef MONT_EXP_WORD
227

270
		if (a->top == 1 && !a->neg &&
228
		    (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) {
229
50
			BN_ULONG A = a->d[0];
230
50
			ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL);
231
		} else
232
#  endif
233
170
			ret = BN_mod_exp_mont(r, a,p, m,ctx, NULL);
234
	} else
235
#endif
236
#ifdef RECP_MUL_MOD
237
	{
238
		ret = BN_mod_exp_recp(r, a,p, m, ctx);
239
	}
240
#else
241
	{
242
		ret = BN_mod_exp_simple(r, a,p, m, ctx);
243
	}
244
#endif
245
246
	bn_check_top(r);
247
220
	return (ret);
248
}
249
250
int
251
BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
252
    BN_CTX *ctx)
253
300
{
254
300
	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
255
300
	int start = 1;
256
	BIGNUM *aa;
257
	/* Table of variables obtained from 'ctx' */
258
	BIGNUM *val[TABLE_SIZE];
259
	BN_RECP_CTX recp;
260
261
300
	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
262
		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
263
		BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
264
		return -1;
265
	}
266
267
300
	bits = BN_num_bits(p);
268
269
300
	if (bits == 0) {
270
		ret = BN_one(r);
271
		return ret;
272
	}
273
274
300
	BN_CTX_start(ctx);
275
300
	if ((aa = BN_CTX_get(ctx)) == NULL)
276
		goto err;
277
300
	if ((val[0] = BN_CTX_get(ctx)) == NULL)
278
		goto err;
279
280
300
	BN_RECP_CTX_init(&recp);
281
300
	if (m->neg) {
282
		/* ignore sign of 'm' */
283
		if (!BN_copy(aa, m))
284
			goto err;
285
		aa->neg = 0;
286
		if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
287
			goto err;
288
	} else {
289
300
		if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
290
			goto err;
291
	}
292
293
300
	if (!BN_nnmod(val[0], a, m, ctx))
294
		goto err;		/* 1 */
295
300
	if (BN_is_zero(val[0])) {
296
		BN_zero(r);
297
		ret = 1;
298
		goto err;
299
	}
300
301


300
	window = BN_window_bits_for_exponent_size(bits);
302
300
	if (window > 1) {
303
300
		if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
304
			goto err;				/* 2 */
305
300
		j = 1 << (window - 1);
306
4800
		for (i = 1; i < j; i++) {
307

4500
			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
308
			    !BN_mod_mul_reciprocal(val[i], val[i - 1],
309
			    aa, &recp, ctx))
310
				goto err;
311
		}
312
	}
313
314
300
	start = 1;		/* This is used to avoid multiplication etc
315
				 * when there is only the value '1' in the
316
				 * buffer. */
317
300
	wvalue = 0;		/* The 'value' of the window */
318
300
	wstart = bits - 1;	/* The top bit of the window */
319
300
	wend = 0;		/* The bottom bit of the window */
320
321
300
	if (!BN_one(r))
322
		goto err;
323
324
	for (;;) {
325
57447
		if (BN_is_bit_set(p, wstart) == 0) {
326
37844
			if (!start)
327
37844
				if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx))
328
					goto err;
329
37844
			if (wstart == 0)
330
216
				break;
331
37628
			wstart--;
332
37628
			continue;
333
		}
334
		/* We now have wstart on a 'set' bit, we now need to work out
335
		 * how bit a window to do.  To do this we need to scan
336
		 * forward until the last set bit before the end of the
337
		 * window */
338
19603
		j = wstart;
339
19603
		wvalue = 1;
340
19603
		wend = 0;
341
97698
		for (i = 1; i < window; i++) {
342
78222
			if (wstart - i < 0)
343
127
				break;
344
78095
			if (BN_is_bit_set(p, wstart - i)) {
345
38915
				wvalue <<= (i - wend);
346
38915
				wvalue |= 1;
347
38915
				wend = i;
348
			}
349
		}
350
351
		/* wend is the size of the current window */
352
19603
		j = wend + 1;
353
		/* add the 'bytes above' */
354
19603
		if (!start)
355
96940
			for (i = 0; i < j; i++) {
356
77637
				if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx))
357
					goto err;
358
			}
359
360
		/* wvalue will be an odd number < 2^window */
361
19603
		if (!BN_mod_mul_reciprocal(r, r,val[wvalue >> 1], &recp, ctx))
362
			goto err;
363
364
		/* move the 'window' down further */
365
19603
		wstart -= wend + 1;
366
19603
		wvalue = 0;
367
19603
		start = 0;
368
19603
		if (wstart < 0)
369
84
			break;
370
	}
371
300
	ret = 1;
372
373
300
err:
374
300
	BN_CTX_end(ctx);
375
300
	BN_RECP_CTX_free(&recp);
376
	bn_check_top(r);
377
300
	return (ret);
378
}
379
380
int
381
BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
382
    BN_CTX *ctx, BN_MONT_CTX *in_mont)
383
2060
{
384
2060
	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
385
2060
	int start = 1;
386
	BIGNUM *d, *r;
387
	const BIGNUM *aa;
388
	/* Table of variables obtained from 'ctx' */
389
	BIGNUM *val[TABLE_SIZE];
390
2060
	BN_MONT_CTX *mont = NULL;
391
392
2060
	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
393
46
		return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
394
	}
395
396
	bn_check_top(a);
397
	bn_check_top(p);
398
	bn_check_top(m);
399
400

2014
	if (!BN_is_odd(m)) {
401
		BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
402
		return (0);
403
	}
404
2014
	bits = BN_num_bits(p);
405
2014
	if (bits == 0) {
406
		ret = BN_one(rr);
407
		return ret;
408
	}
409
410
2014
	BN_CTX_start(ctx);
411
2014
	if ((d = BN_CTX_get(ctx)) == NULL)
412
		goto err;
413
2014
	if ((r = BN_CTX_get(ctx)) == NULL)
414
		goto err;
415
2014
	if ((val[0] = BN_CTX_get(ctx)) == NULL)
416
		goto err;
417
418
	/* If this is not done, things will break in the montgomery
419
	 * part */
420
421
2014
	if (in_mont != NULL)
422
1646
		mont = in_mont;
423
	else {
424
368
		if ((mont = BN_MONT_CTX_new()) == NULL)
425
			goto err;
426
368
		if (!BN_MONT_CTX_set(mont, m, ctx))
427
			goto err;
428
	}
429
430

2133
	if (a->neg || BN_ucmp(a, m) >= 0) {
431
119
		if (!BN_nnmod(val[0], a,m, ctx))
432
			goto err;
433
119
		aa = val[0];
434
	} else
435
1895
		aa = a;
436
2014
	if (BN_is_zero(aa)) {
437
3
		BN_zero(rr);
438
3
		ret = 1;
439
3
		goto err;
440
	}
441
2011
	if (!BN_to_montgomery(val[0], aa, mont, ctx))
442
		goto err; /* 1 */
443
444


2011
	window = BN_window_bits_for_exponent_size(bits);
445
2011
	if (window > 1) {
446
1971
		if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
447
			goto err; /* 2 */
448
1971
		j = 1 << (window - 1);
449
18228
		for (i = 1; i < j; i++) {
450

16257
			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
451
			    !BN_mod_mul_montgomery(val[i], val[i - 1],
452
			    d, mont, ctx))
453
				goto err;
454
		}
455
	}
456
457
2011
	start = 1;		/* This is used to avoid multiplication etc
458
				 * when there is only the value '1' in the
459
				 * buffer. */
460
2011
	wvalue = 0;		/* The 'value' of the window */
461
2011
	wstart = bits - 1;	/* The top bit of the window */
462
2011
	wend = 0;		/* The bottom bit of the window */
463
464
2011
	if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
465
		goto err;
466
	for (;;) {
467
172078
		if (BN_is_bit_set(p, wstart) == 0) {
468
109670
			if (!start) {
469
109670
				if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
470
					goto err;
471
			}
472
109670
			if (wstart == 0)
473
198
				break;
474
109472
			wstart--;
475
109472
			continue;
476
		}
477
		/* We now have wstart on a 'set' bit, we now need to work out
478
		 * how bit a window to do.  To do this we need to scan
479
		 * forward until the last set bit before the end of the
480
		 * window */
481
62408
		j = wstart;
482
62408
		wvalue = 1;
483
62408
		wend = 0;
484
287528
		for (i = 1; i < window; i++) {
485
226562
			if (wstart - i < 0)
486
1442
				break;
487
225120
			if (BN_is_bit_set(p, wstart - i)) {
488
119624
				wvalue <<= (i - wend);
489
119624
				wvalue |= 1;
490
119624
				wend = i;
491
			}
492
		}
493
494
		/* wend is the size of the current window */
495
62408
		j = wend + 1;
496
		/* add the 'bytes above' */
497
62408
		if (!start)
498
289331
			for (i = 0; i < j; i++) {
499
228934
				if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
500
					goto err;
501
			}
502
503
		/* wvalue will be an odd number < 2^window */
504
62408
		if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
505
			goto err;
506
507
		/* move the 'window' down further */
508
62408
		wstart -= wend + 1;
509
62408
		wvalue = 0;
510
62408
		start = 0;
511
62408
		if (wstart < 0)
512
1813
			break;
513
	}
514
2011
	if (!BN_from_montgomery(rr, r,mont, ctx))
515
		goto err;
516
2011
	ret = 1;
517
518
2014
err:
519
2014
	if ((in_mont == NULL) && (mont != NULL))
520
368
		BN_MONT_CTX_free(mont);
521
2014
	BN_CTX_end(ctx);
522
	bn_check_top(rr);
523
2014
	return (ret);
524
}
525
526
527
/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
528
 * so that accessing any of these table values shows the same access pattern as far
529
 * as cache lines are concerned.  The following functions are used to transfer a BIGNUM
530
 * from/to that table. */
531
532
static int
533
MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf,
534
    int idx, int width)
535
6858
{
536
	size_t i, j;
537
538
6858
	if (top > b->top)
539
24
		top = b->top; /* this works because 'buf' is explicitly zeroed */
540
149466
	for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
541
142608
		buf[j] = ((unsigned char*)b->d)[i];
542
	}
543
544
6858
	return 1;
545
}
546
547
static int
548
MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx,
549
    int width)
550
27742
{
551
	size_t i, j;
552
553

27742
	if (bn_wexpand(b, top) == NULL)
554
		return 0;
555
556
628174
	for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
557
600432
		((unsigned char*)b->d)[i] = buf[j];
558
	}
559
560
27742
	b->top = top;
561

27742
	bn_correct_top(b);
562
27742
	return 1;
563
}
564
565
/* Given a pointer value, compute the next address that is a cache line multiple. */
566
#define MOD_EXP_CTIME_ALIGN(x_) \
567
	((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
568
569
/* This variant of BN_mod_exp_mont() uses fixed windows and the special
570
 * precomputation memory layout to limit data-dependency to a minimum
571
 * to protect secret exponents (cf. the hyper-threading timing attacks
572
 * pointed out by Colin Percival,
573
 * http://www.daemonology.net/hyperthreading-considered-harmful/)
574
 */
575
int
576
BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
577
    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
578
375
{
579
375
	int i, bits, ret = 0, window, wvalue;
580
	int top;
581
375
	BN_MONT_CTX *mont = NULL;
582
	int numPowers;
583
375
	unsigned char *powerbufFree = NULL;
584
375
	int powerbufLen = 0;
585
375
	unsigned char *powerbuf = NULL;
586
	BIGNUM tmp, am;
587
588
	bn_check_top(a);
589
	bn_check_top(p);
590
	bn_check_top(m);
591
592
375
	top = m->top;
593
594
375
	if (!(m->d[0] & 1)) {
595
		BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,
596
		    BN_R_CALLED_WITH_EVEN_MODULUS);
597
		return (0);
598
	}
599
375
	bits = BN_num_bits(p);
600
375
	if (bits == 0) {
601
		ret = BN_one(rr);
602
		return ret;
603
	}
604
605
375
	BN_CTX_start(ctx);
606
607
	/* Allocate a montgomery context if it was not supplied by the caller.
608
	 * If this is not done, things will break in the montgomery part.
609
 	 */
610
375
	if (in_mont != NULL)
611
168
		mont = in_mont;
612
	else {
613
207
		if ((mont = BN_MONT_CTX_new()) == NULL)
614
			goto err;
615
207
		if (!BN_MONT_CTX_set(mont, m, ctx))
616
			goto err;
617
	}
618
619
	/* Get the window size to use with size of p. */
620


375
	window = BN_window_bits_for_ctime_exponent_size(bits);
621
#if defined(OPENSSL_BN_ASM_MONT5)
622
375
	if (window == 6 && bits <= 1024)
623
20
		window = 5;	/* ~5% improvement of 2048-bit RSA sign */
624
#endif
625
626
	/* Allocate a buffer large enough to hold all of the pre-computed
627
	 * powers of am, am itself and tmp.
628
	 */
629
375
	numPowers = 1 << window;
630
375
	powerbufLen = sizeof(m->d[0]) * (top * numPowers +
631
	    ((2*top) > numPowers ? (2*top) : numPowers));
632
375
	if ((powerbufFree = malloc(powerbufLen +
633
	    MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
634
		goto err;
635
636
375
	powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
637
375
	memset(powerbuf, 0, powerbufLen);
638
639
	/* lay down tmp and am right after powers table */
640
375
	tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
641
375
	am.d = tmp.d + top;
642
375
	tmp.top = am.top = 0;
643
375
	tmp.dmax = am.dmax = top;
644
375
	tmp.neg = am.neg = 0;
645
375
	tmp.flags = am.flags = BN_FLG_STATIC_DATA;
646
647
	/* prepare a^0 in Montgomery domain */
648
#if 1
649
375
	if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
650
		goto err;
651
#else
652
	tmp.d[0] = (0 - m - >d[0]) & BN_MASK2;	/* 2^(top*BN_BITS2) - m */
653
	for (i = 1; i < top; i++)
654
		tmp.d[i] = (~m->d[i]) & BN_MASK2;
655
	tmp.top = top;
656
#endif
657
658
	/* prepare a^1 in Montgomery domain */
659

375
	if (a->neg || BN_ucmp(a, m) >= 0) {
660
125
		if (!BN_mod(&am, a,m, ctx))
661
			goto err;
662
125
		if (!BN_to_montgomery(&am, &am, mont, ctx))
663
			goto err;
664
250
	} else if (!BN_to_montgomery(&am, a,mont, ctx))
665
		goto err;
666
667
#if defined(OPENSSL_BN_ASM_MONT5)
668
	/* This optimization uses ideas from http://eprint.iacr.org/2011/239,
669
	 * specifically optimization of cache-timing attack countermeasures
670
	 * and pre-computation optimization. */
671
672
	/* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
673
	 * 512-bit RSA is hardly relevant, we omit it to spare size... */
674
375
	if (window == 5 && top > 1) {
675
		void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
676
		    const void *table, const BN_ULONG *np,
677
		    const BN_ULONG *n0, int num, int power);
678
		void bn_scatter5(const BN_ULONG *inp, size_t num,
679
		    void *table, size_t power);
680
		void bn_gather5(BN_ULONG *out, size_t num,
681
		    void *table, size_t power);
682
683
74
		BN_ULONG *np = mont->N.d, *n0 = mont->n0;
684
685
		/* BN_to_montgomery can contaminate words above .top
686
		 * [in BN_DEBUG[_DEBUG] build]... */
687
76
		for (i = am.top; i < top; i++)
688
2
			am.d[i] = 0;
689
78
		for (i = tmp.top; i < top; i++)
690
4
			tmp.d[i] = 0;
691
692
74
		bn_scatter5(tmp.d, top, powerbuf, 0);
693
74
		bn_scatter5(am.d, am.top, powerbuf, 1);
694
74
		bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
695
74
		bn_scatter5(tmp.d, top, powerbuf, 2);
696
697
#if 0
698
		for (i = 3; i < 32; i++) {
699
			/* Calculate a^i = a^(i-1) * a */
700
			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
701
			    n0, top, i - 1);
702
			bn_scatter5(tmp.d, top, powerbuf, i);
703
		}
704
#else
705
		/* same as above, but uses squaring for 1/2 of operations */
706
296
		for (i = 4; i < 32; i*=2) {
707
222
			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
708
222
			bn_scatter5(tmp.d, top, powerbuf, i);
709
		}
710
296
		for (i = 3; i < 8; i += 2) {
711
			int j;
712
222
			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
713
			    n0, top, i - 1);
714
222
			bn_scatter5(tmp.d, top, powerbuf, i);
715
740
			for (j = 2 * i; j < 32; j *= 2) {
716
518
				bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
717
518
				bn_scatter5(tmp.d, top, powerbuf, j);
718
			}
719
		}
720
296
		for (; i < 16; i += 2) {
721
296
			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
722
			    n0, top, i - 1);
723
296
			bn_scatter5(tmp.d, top, powerbuf, i);
724
296
			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
725
296
			bn_scatter5(tmp.d, top, powerbuf, 2*i);
726
		}
727
592
		for (; i < 32; i += 2) {
728
592
			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
729
			    n0, top, i - 1);
730
592
			bn_scatter5(tmp.d, top, powerbuf, i);
731
		}
732
#endif
733
74
		bits--;
734
315
		for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
735
241
			wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
736
74
		bn_gather5(tmp.d, top, powerbuf, wvalue);
737
738
		/* Scan the exponent one window at a time starting from the most
739
		 * significant bits.
740
		 */
741
7126
		while (bits >= 0) {
742
41868
			for (wvalue = 0, i = 0; i < 5; i++, bits--)
743
34890
				wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
744
745
6978
			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
746
6978
			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
747
6978
			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
748
6978
			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
749
6978
			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
750
6978
			bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
751
		}
752
753
74
		tmp.top = top;
754

74
		bn_correct_top(&tmp);
755
	} else
756
#endif
757
	{
758
301
		if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0,
759
		    numPowers))
760
			goto err;
761
301
		if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am,  top, powerbuf, 1,
762
		    numPowers))
763
			goto err;
764
765
		/* If the window size is greater than 1, then calculate
766
		 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
767
		 * (even powers could instead be computed as (a^(i/2))^2
768
		 * to use the slight performance advantage of sqr over mul).
769
		 */
770
301
		if (window > 1) {
771
296
			if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
772
				goto err;
773
296
			if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf,
774
			    2, numPowers))
775
				goto err;
776
6256
			for (i = 3; i < numPowers; i++) {
777
				/* Calculate a^i = a^(i-1) * a */
778
5960
				if (!BN_mod_mul_montgomery(&tmp, &am, &tmp,
779
				    mont, ctx))
780
					goto err;
781
5960
				if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top,
782
				    powerbuf, i, numPowers))
783
					goto err;
784
			}
785
		}
786
787
301
		bits--;
788
1098
		for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
789
797
			wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
790
301
		if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf,
791
		    wvalue, numPowers))
792
			goto err;
793
794
		/* Scan the exponent one window at a time starting from the most
795
		 * significant bits.
796
		 */
797
27742
		while (bits >= 0) {
798
27441
			wvalue = 0; /* The 'value' of the window */
799
800
			/* Scan the window, squaring the result as we go */
801
155079
			for (i = 0; i < window; i++, bits--) {
802
127638
				if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp,
803
				    mont, ctx))
804
					goto err;
805
127638
				wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
806
			}
807
808
			/* Fetch the appropriate pre-computed value from the pre-buf */
809
27441
			if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf,
810
			    wvalue, numPowers))
811
				goto err;
812
813
			/* Multiply the result into the intermediate result */
814
27441
			if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
815
				goto err;
816
		}
817
	}
818
819
	/* Convert the final result from montgomery to standard format */
820
375
	if (!BN_from_montgomery(rr, &tmp, mont, ctx))
821
		goto err;
822
375
	ret = 1;
823
824
375
err:
825
375
	if ((in_mont == NULL) && (mont != NULL))
826
207
		BN_MONT_CTX_free(mont);
827
375
	if (powerbuf != NULL) {
828
375
		explicit_bzero(powerbuf, powerbufLen);
829
375
		free(powerbufFree);
830
	}
831
375
	BN_CTX_end(ctx);
832
375
	return (ret);
833
}
834
835
int
836
BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m,
837
    BN_CTX *ctx, BN_MONT_CTX *in_mont)
838
50
{
839
50
	BN_MONT_CTX *mont = NULL;
840
50
	int b, bits, ret = 0;
841
	int r_is_one;
842
	BN_ULONG w, next_w;
843
	BIGNUM *d, *r, *t;
844
	BIGNUM *swap_tmp;
845
846
#define BN_MOD_MUL_WORD(r, w, m) \
847
		(BN_mul_word(r, (w)) && \
848
		(/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
849
			(BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
850
		/* BN_MOD_MUL_WORD is only used with 'w' large,
851
		 * so the BN_ucmp test is probably more overhead
852
		 * than always using BN_mod (which uses BN_copy if
853
		 * a similar test returns true). */
854
		/* We can use BN_mod and do not need BN_nnmod because our
855
		 * accumulator is never negative (the result of BN_mod does
856
		 * not depend on the sign of the modulus).
857
		 */
858
#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
859
		(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
860
861
50
	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
862
		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
863
		BNerr(BN_F_BN_MOD_EXP_MONT_WORD,
864
		    ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
865
		return -1;
866
	}
867
868
	bn_check_top(p);
869
	bn_check_top(m);
870
871

50
	if (!BN_is_odd(m)) {
872
		BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS);
873
		return (0);
874
	}
875
50
	if (m->top == 1)
876
39
		a %= m->d[0]; /* make sure that 'a' is reduced */
877
878
50
	bits = BN_num_bits(p);
879
50
	if (bits == 0) {
880
4
		ret = BN_one(rr);
881
4
		return ret;
882
	}
883
46
	if (a == 0) {
884
		BN_zero(rr);
885
		ret = 1;
886
		return ret;
887
	}
888
889
46
	BN_CTX_start(ctx);
890
46
	if ((d = BN_CTX_get(ctx)) == NULL)
891
		goto err;
892
46
	if ((r = BN_CTX_get(ctx)) == NULL)
893
		goto err;
894
46
	if ((t = BN_CTX_get(ctx)) == NULL)
895
		goto err;
896
897
46
	if (in_mont != NULL)
898
		mont = in_mont;
899
	else {
900
46
		if ((mont = BN_MONT_CTX_new()) == NULL)
901
			goto err;
902
46
		if (!BN_MONT_CTX_set(mont, m, ctx))
903
			goto err;
904
	}
905
906
46
	r_is_one = 1; /* except for Montgomery factor */
907
908
	/* bits-1 >= 0 */
909
910
	/* The result is accumulated in the product r*w. */
911
46
	w = a; /* bit 'bits-1' of 'p' is always set */
912
2954
	for (b = bits - 2; b >= 0; b--) {
913
		/* First, square r*w. */
914
2908
		next_w = w * w;
915
2908
		if ((next_w / w) != w) /* overflow */
916
		{
917
470
			if (r_is_one) {
918

15
				if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
919
					goto err;
920
15
				r_is_one = 0;
921
			} else {
922

455
				if (!BN_MOD_MUL_WORD(r, w, m))
923
					goto err;
924
			}
925
470
			next_w = 1;
926
		}
927
2908
		w = next_w;
928
2908
		if (!r_is_one) {
929
2832
			if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
930
				goto err;
931
		}
932
933
		/* Second, multiply r*w by 'a' if exponent bit is set. */
934
2908
		if (BN_is_bit_set(p, b)) {
935
1515
			next_w = w * a;
936
1515
			if ((next_w / a) != w) /* overflow */
937
			{
938
71
				if (r_is_one) {
939

5
					if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
940
						goto err;
941
5
					r_is_one = 0;
942
				} else {
943

66
					if (!BN_MOD_MUL_WORD(r, w, m))
944
						goto err;
945
				}
946
71
				next_w = a;
947
			}
948
1515
			w = next_w;
949
		}
950
	}
951
952
	/* Finally, set r:=r*w. */
953
46
	if (w != 1) {
954
39
		if (r_is_one) {
955

22
			if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
956
				goto err;
957
22
			r_is_one = 0;
958
		} else {
959

17
			if (!BN_MOD_MUL_WORD(r, w, m))
960
				goto err;
961
		}
962
	}
963
964
46
	if (r_is_one) /* can happen only if a == 1*/
965
	{
966
4
		if (!BN_one(rr))
967
			goto err;
968
	} else {
969
42
		if (!BN_from_montgomery(rr, r, mont, ctx))
970
			goto err;
971
	}
972
46
	ret = 1;
973
974
46
err:
975
46
	if ((in_mont == NULL) && (mont != NULL))
976
46
		BN_MONT_CTX_free(mont);
977
46
	BN_CTX_end(ctx);
978
	bn_check_top(rr);
979
46
	return (ret);
980
}
981
982
983
/* The old fallback, simple version :-) */
984
int
985
BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
986
    BN_CTX *ctx)
987
200
{
988
200
	int i, j,bits, ret = 0, wstart, wend, window, wvalue;
989
200
	int start = 1;
990
	BIGNUM *d;
991
	/* Table of variables obtained from 'ctx' */
992
	BIGNUM *val[TABLE_SIZE];
993
994
200
	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
995
		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
996
		BNerr(BN_F_BN_MOD_EXP_SIMPLE,
997
		    ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
998
		return -1;
999
	}
1000
1001
200
	bits = BN_num_bits(p);
1002
1003
200
	if (bits == 0) {
1004
		ret = BN_one(r);
1005
		return ret;
1006
	}
1007
1008
200
	BN_CTX_start(ctx);
1009
200
	if ((d = BN_CTX_get(ctx)) == NULL)
1010
		goto err;
1011
200
	if ((val[0] = BN_CTX_get(ctx)) == NULL)
1012
		goto err;
1013
1014
200
	if (!BN_nnmod(val[0],a,m,ctx))
1015
		goto err;		/* 1 */
1016
200
	if (BN_is_zero(val[0])) {
1017
		BN_zero(r);
1018
		ret = 1;
1019
		goto err;
1020
	}
1021
1022


200
	window = BN_window_bits_for_exponent_size(bits);
1023
200
	if (window > 1) {
1024
200
		if (!BN_mod_mul(d, val[0], val[0], m, ctx))
1025
			goto err;				/* 2 */
1026
200
		j = 1 << (window - 1);
1027
3200
		for (i = 1; i < j; i++) {
1028

3000
			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
1029
			    !BN_mod_mul(val[i], val[i - 1], d,m, ctx))
1030
				goto err;
1031
		}
1032
	}
1033
1034
200
	start = 1;		/* This is used to avoid multiplication etc
1035
				 * when there is only the value '1' in the
1036
				 * buffer. */
1037
200
	wvalue = 0;		/* The 'value' of the window */
1038
200
	wstart = bits - 1;	/* The top bit of the window */
1039
200
	wend = 0;		/* The bottom bit of the window */
1040
1041
200
	if (!BN_one(r))
1042
		goto err;
1043
1044
	for (;;) {
1045
32547
		if (BN_is_bit_set(p, wstart) == 0) {
1046
21544
			if (!start)
1047
21544
				if (!BN_mod_mul(r, r, r, m, ctx))
1048
					goto err;
1049
21544
			if (wstart == 0)
1050
116
				break;
1051
21428
			wstart--;
1052
21428
			continue;
1053
		}
1054
		/* We now have wstart on a 'set' bit, we now need to work out
1055
		 * how bit a window to do.  To do this we need to scan
1056
		 * forward until the last set bit before the end of the
1057
		 * window */
1058
11003
		j = wstart;
1059
11003
		wvalue = 1;
1060
11003
		wend = 0;
1061
54698
		for (i = 1; i < window; i++) {
1062
43822
			if (wstart - i < 0)
1063
127
				break;
1064
43695
			if (BN_is_bit_set(p, wstart - i)) {
1065
21815
				wvalue <<= (i - wend);
1066
21815
				wvalue |= 1;
1067
21815
				wend = i;
1068
			}
1069
		}
1070
1071
		/* wend is the size of the current window */
1072
11003
		j = wend + 1;
1073
		/* add the 'bytes above' */
1074
11003
		if (!start)
1075
54140
			for (i = 0; i < j; i++) {
1076
43337
				if (!BN_mod_mul(r, r, r, m, ctx))
1077
					goto err;
1078
			}
1079
1080
		/* wvalue will be an odd number < 2^window */
1081
11003
		if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
1082
			goto err;
1083
1084
		/* move the 'window' down further */
1085
11003
		wstart -= wend + 1;
1086
11003
		wvalue = 0;
1087
11003
		start = 0;
1088
11003
		if (wstart < 0)
1089
84
			break;
1090
	}
1091
200
	ret = 1;
1092
1093
200
err:
1094
200
	BN_CTX_end(ctx);
1095
	bn_check_top(r);
1096
200
	return (ret);
1097
}