GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: lib/libcrypto/bn/bn_exp.c Lines: 382 416 91.8 %
Date: 2017-11-13 Branches: 308 452 68.1 %

Line Branch Exec Source
1
/* $OpenBSD: bn_exp.c,v 1.31 2017/05/02 03:59:44 deraadt Exp $ */
2
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3
 * All rights reserved.
4
 *
5
 * This package is an SSL implementation written
6
 * by Eric Young (eay@cryptsoft.com).
7
 * The implementation was written so as to conform with Netscapes SSL.
8
 *
9
 * This library is free for commercial and non-commercial use as long as
10
 * the following conditions are aheared to.  The following conditions
11
 * apply to all code found in this distribution, be it the RC4, RSA,
12
 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13
 * included with this distribution is covered by the same copyright terms
14
 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15
 *
16
 * Copyright remains Eric Young's, and as such any Copyright notices in
17
 * the code are not to be removed.
18
 * If this package is used in a product, Eric Young should be given attribution
19
 * as the author of the parts of the library used.
20
 * This can be in the form of a textual message at program startup or
21
 * in documentation (online or textual) provided with the package.
22
 *
23
 * Redistribution and use in source and binary forms, with or without
24
 * modification, are permitted provided that the following conditions
25
 * are met:
26
 * 1. Redistributions of source code must retain the copyright
27
 *    notice, this list of conditions and the following disclaimer.
28
 * 2. Redistributions in binary form must reproduce the above copyright
29
 *    notice, this list of conditions and the following disclaimer in the
30
 *    documentation and/or other materials provided with the distribution.
31
 * 3. All advertising materials mentioning features or use of this software
32
 *    must display the following acknowledgement:
33
 *    "This product includes cryptographic software written by
34
 *     Eric Young (eay@cryptsoft.com)"
35
 *    The word 'cryptographic' can be left out if the rouines from the library
36
 *    being used are not cryptographic related :-).
37
 * 4. If you include any Windows specific code (or a derivative thereof) from
38
 *    the apps directory (application code) you must include an acknowledgement:
39
 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40
 *
41
 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51
 * SUCH DAMAGE.
52
 *
53
 * The licence and distribution terms for any publically available version or
54
 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55
 * copied and put under another distribution licence
56
 * [including the GNU Public Licence.]
57
 */
58
/* ====================================================================
59
 * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
60
 *
61
 * Redistribution and use in source and binary forms, with or without
62
 * modification, are permitted provided that the following conditions
63
 * are met:
64
 *
65
 * 1. Redistributions of source code must retain the above copyright
66
 *    notice, this list of conditions and the following disclaimer.
67
 *
68
 * 2. Redistributions in binary form must reproduce the above copyright
69
 *    notice, this list of conditions and the following disclaimer in
70
 *    the documentation and/or other materials provided with the
71
 *    distribution.
72
 *
73
 * 3. All advertising materials mentioning features or use of this
74
 *    software must display the following acknowledgment:
75
 *    "This product includes software developed by the OpenSSL Project
76
 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
77
 *
78
 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
79
 *    endorse or promote products derived from this software without
80
 *    prior written permission. For written permission, please contact
81
 *    openssl-core@openssl.org.
82
 *
83
 * 5. Products derived from this software may not be called "OpenSSL"
84
 *    nor may "OpenSSL" appear in their names without prior written
85
 *    permission of the OpenSSL Project.
86
 *
87
 * 6. Redistributions of any form whatsoever must retain the following
88
 *    acknowledgment:
89
 *    "This product includes software developed by the OpenSSL Project
90
 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
91
 *
92
 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
93
 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
95
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
96
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
98
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
99
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
100
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
101
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
102
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
103
 * OF THE POSSIBILITY OF SUCH DAMAGE.
104
 * ====================================================================
105
 *
106
 * This product includes cryptographic software written by Eric Young
107
 * (eay@cryptsoft.com).  This product includes software written by Tim
108
 * Hudson (tjh@cryptsoft.com).
109
 *
110
 */
111
112
#include <stdlib.h>
113
#include <string.h>
114
115
#include <openssl/err.h>
116
117
#include "bn_lcl.h"
118
#include "constant_time_locl.h"
119
120
/* maximum precomputation table size for *variable* sliding windows */
121
#define TABLE_SIZE	32
122
123
/* this one works - simple but works */
124
int
125
BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
126
{
127
	int i, bits, ret = 0;
128
	BIGNUM *v, *rr;
129
130
134610
	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
131
		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
132
		BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
133
		return -1;
134
	}
135
136
67305
	BN_CTX_start(ctx);
137

67380
	if ((r == a) || (r == p))
138
67230
		rr = BN_CTX_get(ctx);
139
	else
140
		rr = r;
141
67305
	v = BN_CTX_get(ctx);
142
67305
	if (rr == NULL || v == NULL)
143
		goto err;
144
145
67305
	if (BN_copy(v, a) == NULL)
146
		goto err;
147
67305
	bits = BN_num_bits(p);
148
149

134610
	if (BN_is_odd(p)) {
150
28947
		if (BN_copy(rr, a) == NULL)
151
			goto err;
152
	} else {
153
38358
		if (!BN_one(rr))
154
			goto err;
155
	}
156
157
715044
	for (i = 1; i < bits; i++) {
158
290217
		if (!BN_sqr(v, v, ctx))
159
			goto err;
160
290217
		if (BN_is_bit_set(p, i)) {
161
185521
			if (!BN_mul(rr, rr, v, ctx))
162
				goto err;
163
		}
164
	}
165
67305
	ret = 1;
166
167
err:
168
67305
	if (r != rr && rr != NULL)
169
67230
		BN_copy(r, rr);
170
67305
	BN_CTX_end(ctx);
171
	bn_check_top(r);
172
67305
	return (ret);
173
67305
}
174
175
static int
176
BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
177
    BN_CTX *ctx, int ct)
178
{
179
	int ret;
180
181
	bn_check_top(a);
182
	bn_check_top(p);
183
	bn_check_top(m);
184
185
	/* For even modulus  m = 2^k*m_odd,  it might make sense to compute
186
	 * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
187
	 * exponentiation for the odd part), using appropriate exponent
188
	 * reductions, and combine the results using the CRT.
189
	 *
190
	 * For now, we use Montgomery only if the modulus is odd; otherwise,
191
	 * exponentiation using the reciprocal-based quick remaindering
192
	 * algorithm is used.
193
	 *
194
	 * (Timing obtained with expspeed.c [computations  a^p mod m
195
	 * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
196
	 * 4096, 8192 bits], compared to the running time of the
197
	 * standard algorithm:
198
	 *
199
	 *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
200
         *                     55 .. 77 %  [UltraSparc processor, but
201
	 *                                  debug-solaris-sparcv8-gcc conf.]
202
	 *
203
	 *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
204
	 *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
205
	 *
206
	 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
207
	 * at 2048 and more bits, but at 512 and 1024 bits, it was
208
	 * slower even than the standard algorithm!
209
	 *
210
	 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
211
	 * should be obtained when the new Montgomery reduction code
212
	 * has been integrated into OpenSSL.)
213
	 */
214
215

5031
	if (BN_is_odd(m)) {
216

1863
		if (a->top == 1 && !a->neg && !ct) {
217
45
			BN_ULONG A = a->d[0];
218
45
			ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL);
219
45
		} else
220
1626
			ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL);
221
	} else	{
222
9
		ret = BN_mod_exp_recp(r, a,p, m, ctx);
223
	}
224
225
	bn_check_top(r);
226
1680
	return (ret);
227
}
228
229
int
230
BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
231
    BN_CTX *ctx)
232
{
233
393
	return BN_mod_exp_internal(r, a, p, m, ctx,
234
786
	    (BN_get_flags(p, BN_FLG_CONSTTIME) != 0));
235
}
236
237
int
238
BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
239
    BN_CTX *ctx)
240
{
241
2532
	return BN_mod_exp_internal(r, a, p, m, ctx, 1);
242
}
243
244
245
int
246
BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
247
    BN_CTX *ctx)
248
{
249
42
	return BN_mod_exp_internal(r, a, p, m, ctx, 0);
250
}
251
252
253
int
254
BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
255
    BN_CTX *ctx)
256
{
257
	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
258
	int start = 1;
259
	BIGNUM *aa;
260
	/* Table of variables obtained from 'ctx' */
261
1824
	BIGNUM *val[TABLE_SIZE];
262
912
	BN_RECP_CTX recp;
263
264
912
	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
265
		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
266
		BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
267
		return -1;
268
	}
269
270
912
	bits = BN_num_bits(p);
271
912
	if (bits == 0) {
272
		/* x**0 mod 1 is still zero. */
273

9
		if (BN_is_one(m)) {
274
			ret = 1;
275
3
			BN_zero(r);
276
3
		} else
277
			ret = BN_one(r);
278
3
		return ret;
279
	}
280
281
909
	BN_CTX_start(ctx);
282
909
	if ((aa = BN_CTX_get(ctx)) == NULL)
283
		goto err;
284
909
	if ((val[0] = BN_CTX_get(ctx)) == NULL)
285
		goto err;
286
287
909
	BN_RECP_CTX_init(&recp);
288
909
	if (m->neg) {
289
		/* ignore sign of 'm' */
290
		if (!BN_copy(aa, m))
291
			goto err;
292
		aa->neg = 0;
293
		if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0)
294
			goto err;
295
	} else {
296
909
		if (BN_RECP_CTX_set(&recp, m, ctx) <= 0)
297
			goto err;
298
	}
299
300
909
	if (!BN_nnmod(val[0], a, m, ctx))
301
		goto err;		/* 1 */
302
900
	if (BN_is_zero(val[0])) {
303
		BN_zero(r);
304
		ret = 1;
305
		goto err;
306
	}
307
308

2700
	window = BN_window_bits_for_exponent_size(bits);
309
900
	if (window > 1) {
310
900
		if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx))
311
			goto err;				/* 2 */
312
900
		j = 1 << (window - 1);
313
28800
		for (i = 1; i < j; i++) {
314

27000
			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
315
13500
			    !BN_mod_mul_reciprocal(val[i], val[i - 1],
316
			    aa, &recp, ctx))
317
				goto err;
318
		}
319
	}
320
321
	start = 1;		/* This is used to avoid multiplication etc
322
				 * when there is only the value '1' in the
323
				 * buffer. */
324
	wvalue = 0;		/* The 'value' of the window */
325
900
	wstart = bits - 1;	/* The top bit of the window */
326
	wend = 0;		/* The bottom bit of the window */
327
328
900
	if (!BN_one(r))
329
		goto err;
330
331
59409
	for (;;) {
332
178174
		if (BN_is_bit_set(p, wstart) == 0) {
333
119353
			if (!start)
334
119353
				if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx))
335
					goto err;
336
119353
			if (wstart == 0)
337
				break;
338
118765
			wstart--;
339
118765
			continue;
340
		}
341
		/* We now have wstart on a 'set' bit, we now need to work out
342
		 * how bit a window to do.  To do this we need to scan
343
		 * forward until the last set bit before the end of the
344
		 * window */
345
		j = wstart;
346
		wvalue = 1;
347
		wend = 0;
348
585742
		for (i = 1; i < window; i++) {
349
234560
			if (wstart - i < 0)
350
				break;
351
234050
			if (BN_is_bit_set(p, wstart - i)) {
352
113328
				wvalue <<= (i - wend);
353
113328
				wvalue |= 1;
354
				wend = i;
355
113328
			}
356
		}
357
358
		/* wend is the size of the current window */
359
58821
		j = wend + 1;
360
		/* add the 'bytes above' */
361
58821
		if (!start)
362
577068
			for (i = 0; i < j; i++) {
363
230613
				if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx))
364
					goto err;
365
			}
366
367
		/* wvalue will be an odd number < 2^window */
368
58821
		if (!BN_mod_mul_reciprocal(r, r,val[wvalue >> 1], &recp, ctx))
369
			goto err;
370
371
		/* move the 'window' down further */
372
58821
		wstart -= wend + 1;
373
		wvalue = 0;
374
		start = 0;
375
58821
		if (wstart < 0)
376
			break;
377
	}
378
900
	ret = 1;
379
380
err:
381
909
	BN_CTX_end(ctx);
382
909
	BN_RECP_CTX_free(&recp);
383
	bn_check_top(r);
384
909
	return (ret);
385
912
}
386
387
static int
388
BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
389
    BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct)
390
{
391
	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
392
	int start = 1;
393
	BIGNUM *d, *r;
394
	const BIGNUM *aa;
395
	/* Table of variables obtained from 'ctx' */
396
882440
	BIGNUM *val[TABLE_SIZE];
397
	BN_MONT_CTX *mont = NULL;
398
399
441220
	if (ct) {
400
440014
		return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
401
	}
402
403
	bn_check_top(a);
404
	bn_check_top(p);
405
	bn_check_top(m);
406
407

2412
	if (!BN_is_odd(m)) {
408
		BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
409
		return (0);
410
	}
411
412
1206
	bits = BN_num_bits(p);
413
1206
	if (bits == 0) {
414
		/* x**0 mod 1 is still zero. */
415

18
		if (BN_is_one(m)) {
416
			ret = 1;
417
6
			BN_zero(rr);
418
6
		} else
419
			ret = BN_one(rr);
420
6
		return ret;
421
	}
422
423
1200
	BN_CTX_start(ctx);
424
1200
	if ((d = BN_CTX_get(ctx)) == NULL)
425
		goto err;
426
1200
	if ((r = BN_CTX_get(ctx)) == NULL)
427
		goto err;
428
1200
	if ((val[0] = BN_CTX_get(ctx)) == NULL)
429
		goto err;
430
431
	/* If this is not done, things will break in the montgomery
432
	 * part */
433
434
1200
	if (in_mont != NULL)
435
		mont = in_mont;
436
	else {
437
1200
		if ((mont = BN_MONT_CTX_new()) == NULL)
438
			goto err;
439
1200
		if (!BN_MONT_CTX_set(mont, m, ctx))
440
			goto err;
441
	}
442
443

2400
	if (a->neg || BN_ucmp(a, m) >= 0) {
444
		if (!BN_nnmod(val[0], a,m, ctx))
445
			goto err;
446
		aa = val[0];
447
	} else
448
		aa = a;
449
1200
	if (BN_is_zero(aa)) {
450
		BN_zero(rr);
451
		ret = 1;
452
		goto err;
453
	}
454
1200
	if (!BN_to_montgomery(val[0], aa, mont, ctx))
455
		goto err; /* 1 */
456
457

3600
	window = BN_window_bits_for_exponent_size(bits);
458
1200
	if (window > 1) {
459
1200
		if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx))
460
			goto err; /* 2 */
461
1200
		j = 1 << (window - 1);
462
38400
		for (i = 1; i < j; i++) {
463

36000
			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
464
18000
			    !BN_mod_mul_montgomery(val[i], val[i - 1],
465
			    d, mont, ctx))
466
				goto err;
467
		}
468
	}
469
470
	start = 1;		/* This is used to avoid multiplication etc
471
				 * when there is only the value '1' in the
472
				 * buffer. */
473
	wvalue = 0;		/* The 'value' of the window */
474
1200
	wstart = bits - 1;	/* The top bit of the window */
475
	wend = 0;		/* The bottom bit of the window */
476
477
1200
	if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
478
		goto err;
479
67818
	for (;;) {
480
196548
		if (BN_is_bit_set(p, wstart) == 0) {
481
129306
			if (!start) {
482
129306
				if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
483
					goto err;
484
			}
485
129306
			if (wstart == 0)
486
				break;
487
128730
			wstart--;
488
128730
			continue;
489
		}
490
		/* We now have wstart on a 'set' bit, we now need to work out
491
		 * how bit a window to do.  To do this we need to scan
492
		 * forward until the last set bit before the end of the
493
		 * window */
494
		j = wstart;
495
		wvalue = 1;
496
		wend = 0;
497
668284
		for (i = 1; i < window; i++) {
498
267720
			if (wstart - i < 0)
499
				break;
500
266900
			if (BN_is_bit_set(p, wstart - i)) {
501
132856
				wvalue <<= (i - wend);
502
132856
				wvalue |= 1;
503
				wend = i;
504
132856
			}
505
		}
506
507
		/* wend is the size of the current window */
508
67242
		j = wend + 1;
509
		/* add the 'bytes above' */
510
67242
		if (!start)
511
665336
			for (i = 0; i < j; i++) {
512
266626
				if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
513
					goto err;
514
			}
515
516
		/* wvalue will be an odd number < 2^window */
517
67242
		if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx))
518
			goto err;
519
520
		/* move the 'window' down further */
521
67242
		wstart -= wend + 1;
522
		wvalue = 0;
523
		start = 0;
524
67242
		if (wstart < 0)
525
			break;
526
	}
527
1200
	if (!BN_from_montgomery(rr, r,mont, ctx))
528
		goto err;
529
1200
	ret = 1;
530
531
err:
532
1200
	if ((in_mont == NULL) && (mont != NULL))
533
1200
		BN_MONT_CTX_free(mont);
534
1200
	BN_CTX_end(ctx);
535
	bn_check_top(rr);
536
1200
	return (ret);
537
441220
}
538
539
int
540
BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
541
    BN_CTX *ctx, BN_MONT_CTX *in_mont)
542
{
543
603
	return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont,
544
1206
	    (BN_get_flags(p, BN_FLG_CONSTTIME) != 0));
545
}
546
547
int
548
BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
549
    BN_CTX *ctx, BN_MONT_CTX *in_mont)
550
{
551
880028
	return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1);
552
}
553
554
int
555
BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
556
    BN_CTX *ctx, BN_MONT_CTX *in_mont)
557
{
558
1206
	return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0);
559
}
560
561
/* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
562
 * so that accessing any of these table values shows the same access pattern as far
563
 * as cache lines are concerned.  The following functions are used to transfer a BIGNUM
564
 * from/to that table. */
565
566
static int
567
MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf,
568
    int idx, int window)
569
{
570
	int i, j;
571
2706804
	int width = 1 << window;
572
1353402
	BN_ULONG *table = (BN_ULONG *)buf;
573
574
1353402
	if (top > b->top)
575
1327
		top = b->top; /* this works because 'buf' is explicitly zeroed */
576
577
81637404
	for (i = 0, j = idx; i < top; i++, j += width) {
578
39465300
		table[j] = b->d[i];
579
	}
580
581
1353402
	return 1;
582
}
583
584
static int
585
MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx,
586
    int window)
587
{
588
	int i, j;
589
18505092
	int width = 1 << window;
590
9252546
	volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
591
592

18505092
	if (bn_wexpand(b, top) == NULL)
593
		return 0;
594
595
9252546
	if (window <= 3) {
596
408116912
		for (i = 0; i < top; i++, table += width) {
597
		    BN_ULONG acc = 0;
598
599
1187466462
		    for (j = 0; j < width; j++) {
600
791811720
			acc |= table[j] &
601
395905860
			       ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
602
		    }
603
604
197827371
		    b->d[i] = acc;
605
		}
606
	} else {
607
3021461
		int xstride = 1 << (window - 2);
608
		BN_ULONG y0, y1, y2, y3;
609
610
3021461
		i = idx >> (window - 2);        /* equivalent of idx / xstride */
611
3021461
		idx &= xstride - 1;             /* equivalent of idx % xstride */
612
613
3021461
		y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1);
614
3021461
		y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1);
615
3021461
		y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1);
616
3021461
		y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1);
617
618
184165906
		for (i = 0; i < top; i++, table += width) {
619
		    BN_ULONG acc = 0;
620
621
2995215144
		    for (j = 0; j < xstride; j++) {
622
4225638240
			acc |= ( (table[j + 0 * xstride] & y0) |
623
2817092160
				 (table[j + 1 * xstride] & y1) |
624
2817092160
				 (table[j + 2 * xstride] & y2) |
625
1408546080
				 (table[j + 3 * xstride] & y3) )
626
1408546080
			       & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
627
		    }
628
629
89061492
		    b->d[i] = acc;
630
		}
631
	}
632
9252546
	b->top = top;
633

46291553
	bn_correct_top(b);
634
9252546
	return 1;
635
9252546
}
636
637
/* Given a pointer value, compute the next address that is a cache line multiple. */
638
#define MOD_EXP_CTIME_ALIGN(x_) \
639
	((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
640
641
/* This variant of BN_mod_exp_mont() uses fixed windows and the special
642
 * precomputation memory layout to limit data-dependency to a minimum
643
 * to protect secret exponents (cf. the hyper-threading timing attacks
644
 * pointed out by Colin Percival,
645
 * http://www.daemonology.net/hyperthreading-considered-harmful/)
646
 */
647
int
648
BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
649
    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
650
{
651
	int i, bits, ret = 0, window, wvalue;
652
	int top;
653
	BN_MONT_CTX *mont = NULL;
654
	int numPowers;
655
	unsigned char *powerbufFree = NULL;
656
	int powerbufLen = 0;
657
	unsigned char *powerbuf = NULL;
658
882044
	BIGNUM tmp, am;
659
660
	bn_check_top(a);
661
	bn_check_top(p);
662
	bn_check_top(m);
663
664

882041
	if (!BN_is_odd(m)) {
665
6
		BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
666
6
		return (0);
667
	}
668
669
	top = m->top;
670
671
441016
	bits = BN_num_bits(p);
672
441016
	if (bits == 0) {
673
		/* x**0 mod 1 is still zero. */
674

72
		if (BN_is_one(m)) {
675
			ret = 1;
676
15
			BN_zero(rr);
677
15
		} else
678
15
			ret = BN_one(rr);
679
30
		return ret;
680
	}
681
682
440986
	BN_CTX_start(ctx);
683
684
	/* Allocate a montgomery context if it was not supplied by the caller.
685
	 * If this is not done, things will break in the montgomery part.
686
 	 */
687
440986
	if (in_mont != NULL)
688
438157
		mont = in_mont;
689
	else {
690
2829
		if ((mont = BN_MONT_CTX_new()) == NULL)
691
			goto err;
692
2829
		if (!BN_MONT_CTX_set(mont, m, ctx))
693
			goto err;
694
	}
695
696
	/* Get the window size to use with size of p. */
697

2008019
	window = BN_window_bits_for_ctime_exponent_size(bits);
698
#if defined(OPENSSL_BN_ASM_MONT5)
699
440986
	if (window == 6 && bits <= 1024)
700
		window = 5;	/* ~5% improvement of 2048-bit RSA sign */
701
#endif
702
703
	/* Allocate a buffer large enough to hold all of the pre-computed
704
	 * powers of am, am itself and tmp.
705
	 */
706
440986
	numPowers = 1 << window;
707
881972
	powerbufLen = sizeof(m->d[0]) * (top * numPowers +
708
440986
	    ((2*top) > numPowers ? (2*top) : numPowers));
709
881972
	if ((powerbufFree = calloc(powerbufLen +
710
440986
	    MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH, 1)) == NULL)
711
		goto err;
712
440986
	powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
713
714
	/* lay down tmp and am right after powers table */
715
440986
	tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
716
440986
	am.d = tmp.d + top;
717
440986
	tmp.top = am.top = 0;
718
440986
	tmp.dmax = am.dmax = top;
719
440986
	tmp.neg = am.neg = 0;
720
440986
	tmp.flags = am.flags = BN_FLG_STATIC_DATA;
721
722
	/* prepare a^0 in Montgomery domain */
723
#if 1
724
440986
	if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
725
		goto err;
726
#else
727
	tmp.d[0] = (0 - m - >d[0]) & BN_MASK2;	/* 2^(top*BN_BITS2) - m */
728
	for (i = 1; i < top; i++)
729
		tmp.d[i] = (~m->d[i]) & BN_MASK2;
730
	tmp.top = top;
731
#endif
732
733
	/* prepare a^1 in Montgomery domain */
734

881972
	if (a->neg || BN_ucmp(a, m) >= 0) {
735
755
		if (!BN_mod_ct(&am, a,m, ctx))
736
			goto err;
737
755
		if (!BN_to_montgomery(&am, &am, mont, ctx))
738
			goto err;
739
440231
	} else if (!BN_to_montgomery(&am, a,mont, ctx))
740
		goto err;
741
742
#if defined(OPENSSL_BN_ASM_MONT5)
743
	/* This optimization uses ideas from http://eprint.iacr.org/2011/239,
744
	 * specifically optimization of cache-timing attack countermeasures
745
	 * and pre-computation optimization. */
746
747
	/* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
748
	 * 512-bit RSA is hardly relevant, we omit it to spare size... */
749
440986
	if (window == 5 && top > 1) {
750
		void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
751
		    const void *table, const BN_ULONG *np,
752
		    const BN_ULONG *n0, int num, int power);
753
		void bn_scatter5(const BN_ULONG *inp, size_t num,
754
		    void *table, size_t power);
755
		void bn_gather5(BN_ULONG *out, size_t num,
756
		    void *table, size_t power);
757
758
57425
		BN_ULONG *np = mont->N.d, *n0 = mont->n0;
759
760
		/* BN_to_montgomery can contaminate words above .top
761
		 * [in BN_DEBUG[_DEBUG] build]... */
762
115268
		for (i = am.top; i < top; i++)
763
209
			am.d[i] = 0;
764
115488
		for (i = tmp.top; i < top; i++)
765
319
			tmp.d[i] = 0;
766
767
57425
		bn_scatter5(tmp.d, top, powerbuf, 0);
768
57425
		bn_scatter5(am.d, am.top, powerbuf, 1);
769
57425
		bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
770
57425
		bn_scatter5(tmp.d, top, powerbuf, 2);
771
772
#if 0
773
		for (i = 3; i < 32; i++) {
774
			/* Calculate a^i = a^(i-1) * a */
775
			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
776
			    n0, top, i - 1);
777
			bn_scatter5(tmp.d, top, powerbuf, i);
778
		}
779
#else
780
		/* same as above, but uses squaring for 1/2 of operations */
781
459400
		for (i = 4; i < 32; i*=2) {
782
172275
			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
783
172275
			bn_scatter5(tmp.d, top, powerbuf, i);
784
		}
785
459400
		for (i = 3; i < 8; i += 2) {
786
			int j;
787
344550
			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
788
172275
			    n0, top, i - 1);
789
172275
			bn_scatter5(tmp.d, top, powerbuf, i);
790
1148500
			for (j = 2 * i; j < 32; j *= 2) {
791
401975
				bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
792
401975
				bn_scatter5(tmp.d, top, powerbuf, j);
793
			}
794
		}
795
574250
		for (; i < 16; i += 2) {
796
459400
			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
797
229700
			    n0, top, i - 1);
798
229700
			bn_scatter5(tmp.d, top, powerbuf, i);
799
229700
			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
800
229700
			bn_scatter5(tmp.d, top, powerbuf, 2*i);
801
		}
802
1033650
		for (; i < 32; i += 2) {
803
918800
			bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np,
804
459400
			    n0, top, i - 1);
805
459400
			bn_scatter5(tmp.d, top, powerbuf, i);
806
		}
807
#endif
808
57425
		bits--;
809
605216
		for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
810
245183
			wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
811
57425
		bn_gather5(tmp.d, top, powerbuf, wvalue);
812
813
		/* Scan the exponent one window at a time starting from the most
814
		 * significant bits.
815
		 */
816
22449032
		while (bits >= 0) {
817
134005092
			for (wvalue = 0, i = 0; i < 5; i++, bits--)
818
55835455
				wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
819
820
11167091
			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
821
11167091
			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
822
11167091
			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
823
11167091
			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
824
11167091
			bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
825
11167091
			bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
826
		}
827
828
57425
		tmp.top = top;
829

287797
		bn_correct_top(&tmp);
830
57425
	} else
831
#endif
832
	{
833
383561
		if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0,
834
		    window))
835
			goto err;
836
383561
		if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am,  top, powerbuf, 1,
837
		    window))
838
			goto err;
839
840
		/* If the window size is greater than 1, then calculate
841
		 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
842
		 * (even powers could instead be computed as (a^(i/2))^2
843
		 * to use the slight performance advantage of sqr over mul).
844
		 */
845
383561
		if (window > 1) {
846
17076
			if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
847
				goto err;
848
17076
			if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf,
849
			    2, window))
850
				goto err;
851
1172560
			for (i = 3; i < numPowers; i++) {
852
				/* Calculate a^i = a^(i-1) * a */
853
569204
				if (!BN_mod_mul_montgomery(&tmp, &am, &tmp,
854
				    mont, ctx))
855
					goto err;
856
569204
				if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top,
857
				    powerbuf, i, window))
858
					goto err;
859
			}
860
		}
861
862
383561
		bits--;
863
1565672
		for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
864
399275
			wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
865
383561
		if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf,
866
		    wvalue, window))
867
			goto err;
868
869
		/* Scan the exponent one window at a time starting from the most
870
		 * significant bits.
871
		 */
872
18505092
		while (bits >= 0) {
873
			wvalue = 0; /* The 'value' of the window */
874
875
			/* Scan the window, squaring the result as we go */
876
64333458
			for (i = 0; i < window; i++, bits--) {
877
23297744
				if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp,
878
				    mont, ctx))
879
					goto err;
880
23297744
				wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
881
			}
882
883
			/* Fetch the appropriate pre-computed value from the pre-buf */
884
8868985
			if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf,
885
			    wvalue, window))
886
				goto err;
887
888
			/* Multiply the result into the intermediate result */
889
8868985
			if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
890
				goto err;
891
		}
892
	}
893
894
	/* Convert the final result from montgomery to standard format */
895
440986
	if (!BN_from_montgomery(rr, &tmp, mont, ctx))
896
		goto err;
897
440986
	ret = 1;
898
899
err:
900
440986
	if ((in_mont == NULL) && (mont != NULL))
901
2829
		BN_MONT_CTX_free(mont);
902
440986
	freezero(powerbufFree, powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
903
440986
	BN_CTX_end(ctx);
904
440986
	return (ret);
905
441022
}
906
907
int
908
BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m,
909
    BN_CTX *ctx, BN_MONT_CTX *in_mont)
910
{
911
	BN_MONT_CTX *mont = NULL;
912
	int b, bits, ret = 0;
913
	int r_is_one;
914
	BN_ULONG w, next_w;
915
	BIGNUM *d, *r, *t;
916
	BIGNUM *swap_tmp;
917
918
#define BN_MOD_MUL_WORD(r, w, m) \
919
		(BN_mul_word(r, (w)) && \
920
		(/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
921
			(BN_mod_ct(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
922
		/* BN_MOD_MUL_WORD is only used with 'w' large,
923
		 * so the BN_ucmp test is probably more overhead
924
		 * than always using BN_mod (which uses BN_copy if
925
		 * a similar test returns true). */
926
		/* We can use BN_mod and do not need BN_nnmod because our
927
		 * accumulator is never negative (the result of BN_mod does
928
		 * not depend on the sign of the modulus).
929
		 */
930
#define BN_TO_MONTGOMERY_WORD(r, w, mont) \
931
		(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
932
933
96
	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
934
		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
935
		BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
936
		return -1;
937
	}
938
939
	bn_check_top(p);
940
	bn_check_top(m);
941
942

96
	if (!BN_is_odd(m)) {
943
		BNerror(BN_R_CALLED_WITH_EVEN_MODULUS);
944
		return (0);
945
	}
946
48
	if (m->top == 1)
947
48
		a %= m->d[0]; /* make sure that 'a' is reduced */
948
949
48
	bits = BN_num_bits(p);
950
48
	if (bits == 0) {
951
		/* x**0 mod 1 is still zero. */
952

9
		if (BN_is_one(m)) {
953
			ret = 1;
954
3
			BN_zero(rr);
955
3
		} else
956
			ret = BN_one(rr);
957
3
		return ret;
958
	}
959
45
	if (a == 0) {
960
		BN_zero(rr);
961
		ret = 1;
962
		return ret;
963
	}
964
965
45
	BN_CTX_start(ctx);
966
45
	if ((d = BN_CTX_get(ctx)) == NULL)
967
		goto err;
968
45
	if ((r = BN_CTX_get(ctx)) == NULL)
969
		goto err;
970
45
	if ((t = BN_CTX_get(ctx)) == NULL)
971
		goto err;
972
973
45
	if (in_mont != NULL)
974
		mont = in_mont;
975
	else {
976
45
		if ((mont = BN_MONT_CTX_new()) == NULL)
977
			goto err;
978
45
		if (!BN_MONT_CTX_set(mont, m, ctx))
979
			goto err;
980
	}
981
982
	r_is_one = 1; /* except for Montgomery factor */
983
984
	/* bits-1 >= 0 */
985
986
	/* The result is accumulated in the product r*w. */
987
	w = a; /* bit 'bits-1' of 'p' is always set */
988
1652
	for (b = bits - 2; b >= 0; b--) {
989
		/* First, square r*w. */
990
781
		next_w = w * w;
991
781
		if ((next_w / w) != w) /* overflow */
992
		{
993
177
			if (r_is_one) {
994

30
				if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
995
					goto err;
996
				r_is_one = 0;
997
15
			} else {
998

324
				if (!BN_MOD_MUL_WORD(r, w, m))
999
					goto err;
1000
			}
1001
			next_w = 1;
1002
177
		}
1003
		w = next_w;
1004
781
		if (!r_is_one) {
1005
735
			if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
1006
				goto err;
1007
		}
1008
1009
		/* Second, multiply r*w by 'a' if exponent bit is set. */
1010
781
		if (BN_is_bit_set(p, b)) {
1011
393
			next_w = w * a;
1012
393
			if ((next_w / a) != w) /* overflow */
1013
			{
1014
223
				if (r_is_one) {
1015

48
					if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
1016
						goto err;
1017
					r_is_one = 0;
1018
24
				} else {
1019

398
					if (!BN_MOD_MUL_WORD(r, w, m))
1020
						goto err;
1021
				}
1022
				next_w = a;
1023
223
			}
1024
			w = next_w;
1025
393
		}
1026
	}
1027
1028
	/* Finally, set r:=r*w. */
1029
45
	if (w != 1) {
1030
34
		if (r_is_one) {
1031

12
			if (!BN_TO_MONTGOMERY_WORD(r, w, mont))
1032
				goto err;
1033
			r_is_one = 0;
1034
6
		} else {
1035

56
			if (!BN_MOD_MUL_WORD(r, w, m))
1036
				goto err;
1037
		}
1038
	}
1039
1040
45
	if (r_is_one) /* can happen only if a == 1*/
1041
	{
1042
		if (!BN_one(rr))
1043
			goto err;
1044
	} else {
1045
45
		if (!BN_from_montgomery(rr, r, mont, ctx))
1046
			goto err;
1047
	}
1048
45
	ret = 1;
1049
1050
err:
1051
45
	if ((in_mont == NULL) && (mont != NULL))
1052
45
		BN_MONT_CTX_free(mont);
1053
45
	BN_CTX_end(ctx);
1054
	bn_check_top(rr);
1055
45
	return (ret);
1056
48
}
1057
1058
1059
/* The old fallback, simple version :-) */
1060
int
1061
BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
1062
    BN_CTX *ctx)
1063
{
1064
	int i, j, bits, ret = 0, wstart, wend, window, wvalue;
1065
	int start = 1;
1066
	BIGNUM *d;
1067
	/* Table of variables obtained from 'ctx' */
1068
1218
	BIGNUM *val[TABLE_SIZE];
1069
1070
609
	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
1071
		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
1072
		BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
1073
		return -1;
1074
	}
1075
1076
609
	bits = BN_num_bits(p);
1077
609
	if (bits == 0) {
1078
		/* x**0 mod 1 is still zero. */
1079

9
		if (BN_is_one(m)) {
1080
			ret = 1;
1081
3
			BN_zero(r);
1082
3
		} else
1083
			ret = BN_one(r);
1084
3
		return ret;
1085
	}
1086
1087
606
	BN_CTX_start(ctx);
1088
606
	if ((d = BN_CTX_get(ctx)) == NULL)
1089
		goto err;
1090
606
	if ((val[0] = BN_CTX_get(ctx)) == NULL)
1091
		goto err;
1092
1093
606
	if (!BN_nnmod(val[0],a,m,ctx))
1094
		goto err;		/* 1 */
1095
606
	if (BN_is_zero(val[0])) {
1096
		BN_zero(r);
1097
		ret = 1;
1098
		goto err;
1099
	}
1100
1101

1812
	window = BN_window_bits_for_exponent_size(bits);
1102
606
	if (window > 1) {
1103
606
		if (!BN_mod_mul(d, val[0], val[0], m, ctx))
1104
			goto err;				/* 2 */
1105
606
		j = 1 << (window - 1);
1106
19584
		for (i = 1; i < j; i++) {
1107

18372
			if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
1108
9186
			    !BN_mod_mul(val[i], val[i - 1], d,m, ctx))
1109
				goto err;
1110
		}
1111
	}
1112
1113
	start = 1;		/* This is used to avoid multiplication etc
1114
				 * when there is only the value '1' in the
1115
				 * buffer. */
1116
	wvalue = 0;		/* The 'value' of the window */
1117
606
	wstart = bits - 1;	/* The top bit of the window */
1118
	wend = 0;		/* The bottom bit of the window */
1119
1120
606
	if (!BN_one(r))
1121
		goto err;
1122
1123
34543
	for (;;) {
1124
101782
		if (BN_is_bit_set(p, wstart) == 0) {
1125
67529
			if (!start)
1126
67529
				if (!BN_mod_mul(r, r, r, m, ctx))
1127
					goto err;
1128
67529
			if (wstart == 0)
1129
				break;
1130
67239
			wstart--;
1131
67239
			continue;
1132
		}
1133
		/* We now have wstart on a 'set' bit, we now need to work out
1134
		 * how bit a window to do.  To do this we need to scan
1135
		 * forward until the last set bit before the end of the
1136
		 * window */
1137
		j = wstart;
1138
		wvalue = 1;
1139
		wend = 0;
1140
341702
		for (i = 1; i < window; i++) {
1141
137012
			if (wstart - i < 0)
1142
				break;
1143
136598
			if (BN_is_bit_set(p, wstart - i)) {
1144
68698
				wvalue <<= (i - wend);
1145
68698
				wvalue |= 1;
1146
				wend = i;
1147
68698
			}
1148
		}
1149
1150
		/* wend is the size of the current window */
1151
34253
		j = wend + 1;
1152
		/* add the 'bytes above' */
1153
34253
		if (!start)
1154
340388
			for (i = 0; i < j; i++) {
1155
136547
				if (!BN_mod_mul(r, r, r, m, ctx))
1156
					goto err;
1157
			}
1158
1159
		/* wvalue will be an odd number < 2^window */
1160
34253
		if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx))
1161
			goto err;
1162
1163
		/* move the 'window' down further */
1164
34253
		wstart -= wend + 1;
1165
		wvalue = 0;
1166
		start = 0;
1167
34253
		if (wstart < 0)
1168
			break;
1169
	}
1170
606
	ret = 1;
1171
1172
err:
1173
606
	BN_CTX_end(ctx);
1174
	bn_check_top(r);
1175
606
	return (ret);
1176
609
}