Head

GCC Code Coverage Report

Directory:	./		Exec	Total	Coverage
File:	lib/libcrypto/bn/asm/x86_64-gcc.c	Lines:	203	227	89.4 %
Date:	2017-11-07	Branches:	30	34	88.2 %

Line	Branch	Exec	Source
1			/* $OpenBSD: x86_64-gcc.c,v 1.6 2015/09/12 09:04:12 miod Exp $ */
2			#include "../bn_lcl.h"
3			/*
4			* x86_64 BIGNUM accelerator version 0.1, December 2002.
5			*
6			* Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
7			* project.
8			*
9			* Rights for redistribution and usage in source and binary forms are
10			* granted according to the OpenSSL license. Warranty of any kind is
11			* disclaimed.
12			*
13			* Q. Version 0.1? It doesn't sound like Andy, he used to assign real
14			* versions, like 1.0...
15			* A. Well, that's because this code is basically a quick-n-dirty
16			* proof-of-concept hack. As you can see it's implemented with
17			* inline assembler, which means that you're bound to GCC and that
18			* there might be enough room for further improvement.
19			*
20			* Q. Why inline assembler?
21			* A. x86_64 features own ABI which I'm not familiar with. This is
22			* why I decided to let the compiler take care of subroutine
23			* prologue/epilogue as well as register allocation. For reference.
24			* Win64 implements different ABI for AMD64, different from Linux.
25			*
26			* Q. How much faster does it get?
27			* A. 'apps/openssl speed rsa dsa' output with no-asm:
28			*
29			* sign verify sign/s verify/s
30			* rsa 512 bits 0.0006s 0.0001s 1683.8 18456.2
31			* rsa 1024 bits 0.0028s 0.0002s 356.0 6407.0
32			* rsa 2048 bits 0.0172s 0.0005s 58.0 1957.8
33			* rsa 4096 bits 0.1155s 0.0018s 8.7 555.6
34			* sign verify sign/s verify/s
35			* dsa 512 bits 0.0005s 0.0006s 2100.8 1768.3
36			* dsa 1024 bits 0.0014s 0.0018s 692.3 559.2
37			* dsa 2048 bits 0.0049s 0.0061s 204.7 165.0
38			*
39			* 'apps/openssl speed rsa dsa' output with this module:
40			*
41			* sign verify sign/s verify/s
42			* rsa 512 bits 0.0004s 0.0000s 2767.1 33297.9
43			* rsa 1024 bits 0.0012s 0.0001s 867.4 14674.7
44			* rsa 2048 bits 0.0061s 0.0002s 164.0 5270.0
45			* rsa 4096 bits 0.0384s 0.0006s 26.1 1650.8
46			* sign verify sign/s verify/s
47			* dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3
48			* dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4
49			* dsa 2048 bits 0.0016s 0.0020s 620.4 504.6
50			*
51			* For the reference. IA-32 assembler implementation performs
52			* very much like 64-bit code compiled with no-asm on the same
53			* machine.
54			*/
55
56			#define BN_ULONG unsigned long
57
58			#undef mul
59			#undef mul_add
60			#undef sqr
61
62			/*
63			* "m"(a), "+m"(r) is the way to favor DirectPath �-code;
64			* "g"(0) let the compiler to decide where does it
65			* want to keep the value of zero;
66			*/
67			#define mul_add(r,a,word,carry) do { \
68			BN_ULONG high,low; \
69			asm ("mulq %3" \
70			: "=a"(low),"=d"(high) \
71			: "a"(word),"m"(a) \
72			: "cc"); \
73			asm ("addq %2,%0; adcq %3,%1" \
74			: "+r"(carry),"+d"(high)\
75			: "a"(low),"g"(0) \
76			: "cc"); \
77			asm ("addq %2,%0; adcq %3,%1" \
78			: "+m"(r),"+d"(high) \
79			: "r"(carry),"g"(0) \
80			: "cc"); \
81			carry=high; \
82			} while (0)
83
84			#define mul(r,a,word,carry) do { \
85			BN_ULONG high,low; \
86			asm ("mulq %3" \
87			: "=a"(low),"=d"(high) \
88			: "a"(word),"g"(a) \
89			: "cc"); \
90			asm ("addq %2,%0; adcq %3,%1" \
91			: "+r"(carry),"+d"(high)\
92			: "a"(low),"g"(0) \
93			: "cc"); \
94			(r)=carry, carry=high; \
95			} while (0)
96
97			#define sqr(r0,r1,a) \
98			asm ("mulq %2" \
99			: "=a"(r0),"=d"(r1) \
100			: "a"(a) \
101			: "cc");
102
103			BN_ULONG bn_mul_add_words(BN_ULONG rp, const BN_ULONG ap, int num, BN_ULONG w)
104			{
105			BN_ULONG c1=0;
106
107	✗✓	70335644	if (num <= 0) return(c1);
108
109	✓✓	476419114	while (num&~3)
110			{
111		220625646	mul_add(rp[0],ap[0],w,c1);
112		220625646	mul_add(rp[1],ap[1],w,c1);
113		220625646	mul_add(rp[2],ap[2],w,c1);
114		220625646	mul_add(rp[3],ap[3],w,c1);
115		220625646	ap+=4; rp+=4; num-=4;
116			}
117	✓✓	35167822	if (num)
118			{
119	✓✓	14407276	mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
120	✓✓	1719944	mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
121		712892	mul_add(rp[2],ap[2],w,c1); return c1;
122			}
123
124		27355975	return(c1);
125		35167822	}
126
127			BN_ULONG bn_mul_words(BN_ULONG rp, const BN_ULONG ap, int num, BN_ULONG w)
128			{
129			BN_ULONG c1=0;
130
131	✗✓	50488078	if (num <= 0) return(c1);
132
133	✓✓	142730527	while (num&~3)
134			{
135		58743244	mul(rp[0],ap[0],w,c1);
136		58743244	mul(rp[1],ap[1],w,c1);
137		58743244	mul(rp[2],ap[2],w,c1);
138		58743244	mul(rp[3],ap[3],w,c1);
139		58743244	ap+=4; rp+=4; num-=4;
140			}
141	✓✓	25244039	if (num)
142			{
143	✓✓	34432601	mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
144	✓✓	7669543	mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
145		2453307	mul(rp[2],ap[2],w,c1);
146		2453307	}
147		7950333	return(c1);
148		25244039	}
149
150			void bn_sqr_words(BN_ULONG r, const BN_ULONG a, int n)
151			{
152	✓✗	3150038	if (n <= 0) return;
153
154	✓✓	2324687	while (n&~3)
155			{
156		374834	sqr(r[0],r[1],a[0]);
157		374834	sqr(r[2],r[3],a[1]);
158		374834	sqr(r[4],r[5],a[2]);
159		374834	sqr(r[6],r[7],a[3]);
160		374834	a+=4; r+=8; n-=4;
161			}
162	✓✓	1575019	if (n)
163			{
164	✓✓	1574983	sqr(r[0],r[1],a[0]); if (--n == 0) return;
165	✓✓	51623	sqr(r[2],r[3],a[1]); if (--n == 0) return;
166		43868	sqr(r[4],r[5],a[2]);
167		43868	}
168		1575019	}
169
170			BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
171			{ BN_ULONG ret,waste;
172
173		5667870	asm ("divq %4"
174			: "=a"(ret),"=d"(waste)
175			: "a"(l),"d"(h),"g"(d)
176			: "cc");
177
178		2833935	return ret;
179			}
180
181			BN_ULONG bn_add_words (BN_ULONG rp, const BN_ULONG ap, const BN_ULONG *bp,int n)
182			{ BN_ULONG ret=0,i=0;
183
184	✓✓	28980407	if (n <= 0) return 0;
185
186		14297317	asm (
187			" subq %2,%2 \n"
188			".p2align 4 \n"
189			"1: movq (%4,%2,8),%0 \n"
190			" adcq (%5,%2,8),%0 \n"
191			" movq %0,(%3,%2,8) \n"
192			" leaq 1(%2),%2 \n"
193			" loop 1b \n"
194			" sbbq %0,%0 \n"
195			: "=&a"(ret),"+c"(n),"=&r"(i)
196			: "r"(rp),"r"(ap),"r"(bp)
197			: "cc"
198			);
199
200		14297317	return ret&1;
201		14425908	}
202
203			BN_ULONG bn_sub_words (BN_ULONG rp, const BN_ULONG ap, const BN_ULONG *bp,int n)
204			{ BN_ULONG ret=0,i=0;
205
206	✗✓	44352562	if (n <= 0) return 0;
207
208		22176281	asm (
209			" subq %2,%2 \n"
210			".p2align 4 \n"
211			"1: movq (%4,%2,8),%0 \n"
212			" sbbq (%5,%2,8),%0 \n"
213			" movq %0,(%3,%2,8) \n"
214			" leaq 1(%2),%2 \n"
215			" loop 1b \n"
216			" sbbq %0,%0 \n"
217			: "=&a"(ret),"+c"(n),"=&r"(i)
218			: "r"(rp),"r"(ap),"r"(bp)
219			: "cc"
220			);
221
222		22176281	return ret&1;
223		22176281	}
224
225			/* mul_add_c(a,b,c0,c1,c2) -- c+=ab for three word number c=(c2,c1,c0) /
226			/* mul_add_c2(a,b,c0,c1,c2) -- c+=2ab for three word number c=(c2,c1,c0) */
227			/* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
228			/* sqr_add_c2(a,i,c0,c1,c2) -- c+=2a[i]a[j] for three word number c=(c2,c1,c0) */
229
230			/*
231			* Keep in mind that carrying into high part of multiplication result
232			* can not overflow, because it cannot be all-ones.
233			*/
234			#if 0
235			/* original macros are kept for reference purposes */
236			#define mul_add_c(a,b,c0,c1,c2) do { \
237			BN_ULONG ta = (a), tb = (b); \
238			BN_ULONG lo, hi; \
239			BN_UMULT_LOHI(lo,hi,ta,tb); \
240			c0 += lo; hi += (c0<lo)?1:0; \
241			c1 += hi; c2 += (c1<hi)?1:0; \
242			} while(0)
243
244			#define mul_add_c2(a,b,c0,c1,c2) do { \
245			BN_ULONG ta = (a), tb = (b); \
246			BN_ULONG lo, hi, tt; \
247			BN_UMULT_LOHI(lo,hi,ta,tb); \
248			c0 += lo; tt = hi+((c0<lo)?1:0); \
249			c1 += tt; c2 += (c1<tt)?1:0; \
250			c0 += lo; hi += (c0<lo)?1:0; \
251			c1 += hi; c2 += (c1<hi)?1:0; \
252			} while(0)
253
254			#define sqr_add_c(a,i,c0,c1,c2) do { \
255			BN_ULONG ta = (a)[i]; \
256			BN_ULONG lo, hi; \
257			BN_UMULT_LOHI(lo,hi,ta,ta); \
258			c0 += lo; hi += (c0<lo)?1:0; \
259			c1 += hi; c2 += (c1<hi)?1:0; \
260			} while(0)
261			#else
262			#define mul_add_c(a,b,c0,c1,c2) do { \
263			BN_ULONG t1,t2; \
264			asm ("mulq %3" \
265			: "=a"(t1),"=d"(t2) \
266			: "a"(a),"m"(b) \
267			: "cc"); \
268			asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
269			: "+r"(c0),"+r"(c1),"+r"(c2) \
270			: "r"(t1),"r"(t2),"g"(0) \
271			: "cc"); \
272			} while (0)
273
274			#define sqr_add_c(a,i,c0,c1,c2) do { \
275			BN_ULONG t1,t2; \
276			asm ("mulq %2" \
277			: "=a"(t1),"=d"(t2) \
278			: "a"(a[i]) \
279			: "cc"); \
280			asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
281			: "+r"(c0),"+r"(c1),"+r"(c2) \
282			: "r"(t1),"r"(t2),"g"(0) \
283			: "cc"); \
284			} while (0)
285
286			#define mul_add_c2(a,b,c0,c1,c2) do { \
287			BN_ULONG t1,t2; \
288			asm ("mulq %3" \
289			: "=a"(t1),"=d"(t2) \
290			: "a"(a),"m"(b) \
291			: "cc"); \
292			asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
293			: "+r"(c0),"+r"(c1),"+r"(c2) \
294			: "r"(t1),"r"(t2),"g"(0) \
295			: "cc"); \
296			asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \
297			: "+r"(c0),"+r"(c1),"+r"(c2) \
298			: "r"(t1),"r"(t2),"g"(0) \
299			: "cc"); \
300			} while (0)
301			#endif
302
303			#define sqr_add_c2(a,i,j,c0,c1,c2) \
304			mul_add_c2((a)[i],(a)[j],c0,c1,c2)
305
306			void bn_mul_comba8(BN_ULONG r, BN_ULONG a, BN_ULONG *b)
307			{
308			BN_ULONG c1,c2,c3;
309
310			c1=0;
311			c2=0;
312			c3=0;
313		3553478	mul_add_c(a[0],b[0],c1,c2,c3);
314		1776739	r[0]=c1;
315			c1=0;
316		1776739	mul_add_c(a[0],b[1],c2,c3,c1);
317		1776739	mul_add_c(a[1],b[0],c2,c3,c1);
318		1776739	r[1]=c2;
319			c2=0;
320		1776739	mul_add_c(a[2],b[0],c3,c1,c2);
321		1776739	mul_add_c(a[1],b[1],c3,c1,c2);
322		1776739	mul_add_c(a[0],b[2],c3,c1,c2);
323		1776739	r[2]=c3;
324			c3=0;
325		1776739	mul_add_c(a[0],b[3],c1,c2,c3);
326		1776739	mul_add_c(a[1],b[2],c1,c2,c3);
327		1776739	mul_add_c(a[2],b[1],c1,c2,c3);
328		1776739	mul_add_c(a[3],b[0],c1,c2,c3);
329		1776739	r[3]=c1;
330			c1=0;
331		1776739	mul_add_c(a[4],b[0],c2,c3,c1);
332		1776739	mul_add_c(a[3],b[1],c2,c3,c1);
333		1776739	mul_add_c(a[2],b[2],c2,c3,c1);
334		1776739	mul_add_c(a[1],b[3],c2,c3,c1);
335		1776739	mul_add_c(a[0],b[4],c2,c3,c1);
336		1776739	r[4]=c2;
337			c2=0;
338		1776739	mul_add_c(a[0],b[5],c3,c1,c2);
339		1776739	mul_add_c(a[1],b[4],c3,c1,c2);
340		1776739	mul_add_c(a[2],b[3],c3,c1,c2);
341		1776739	mul_add_c(a[3],b[2],c3,c1,c2);
342		1776739	mul_add_c(a[4],b[1],c3,c1,c2);
343		1776739	mul_add_c(a[5],b[0],c3,c1,c2);
344		1776739	r[5]=c3;
345			c3=0;
346		1776739	mul_add_c(a[6],b[0],c1,c2,c3);
347		1776739	mul_add_c(a[5],b[1],c1,c2,c3);
348		1776739	mul_add_c(a[4],b[2],c1,c2,c3);
349		1776739	mul_add_c(a[3],b[3],c1,c2,c3);
350		1776739	mul_add_c(a[2],b[4],c1,c2,c3);
351		1776739	mul_add_c(a[1],b[5],c1,c2,c3);
352		1776739	mul_add_c(a[0],b[6],c1,c2,c3);
353		1776739	r[6]=c1;
354			c1=0;
355		1776739	mul_add_c(a[0],b[7],c2,c3,c1);
356		1776739	mul_add_c(a[1],b[6],c2,c3,c1);
357		1776739	mul_add_c(a[2],b[5],c2,c3,c1);
358		1776739	mul_add_c(a[3],b[4],c2,c3,c1);
359		1776739	mul_add_c(a[4],b[3],c2,c3,c1);
360		1776739	mul_add_c(a[5],b[2],c2,c3,c1);
361		1776739	mul_add_c(a[6],b[1],c2,c3,c1);
362		1776739	mul_add_c(a[7],b[0],c2,c3,c1);
363		1776739	r[7]=c2;
364			c2=0;
365		1776739	mul_add_c(a[7],b[1],c3,c1,c2);
366		1776739	mul_add_c(a[6],b[2],c3,c1,c2);
367		1776739	mul_add_c(a[5],b[3],c3,c1,c2);
368		1776739	mul_add_c(a[4],b[4],c3,c1,c2);
369		1776739	mul_add_c(a[3],b[5],c3,c1,c2);
370		1776739	mul_add_c(a[2],b[6],c3,c1,c2);
371		1776739	mul_add_c(a[1],b[7],c3,c1,c2);
372		1776739	r[8]=c3;
373			c3=0;
374		1776739	mul_add_c(a[2],b[7],c1,c2,c3);
375		1776739	mul_add_c(a[3],b[6],c1,c2,c3);
376		1776739	mul_add_c(a[4],b[5],c1,c2,c3);
377		1776739	mul_add_c(a[5],b[4],c1,c2,c3);
378		1776739	mul_add_c(a[6],b[3],c1,c2,c3);
379		1776739	mul_add_c(a[7],b[2],c1,c2,c3);
380		1776739	r[9]=c1;
381			c1=0;
382		1776739	mul_add_c(a[7],b[3],c2,c3,c1);
383		1776739	mul_add_c(a[6],b[4],c2,c3,c1);
384		1776739	mul_add_c(a[5],b[5],c2,c3,c1);
385		1776739	mul_add_c(a[4],b[6],c2,c3,c1);
386		1776739	mul_add_c(a[3],b[7],c2,c3,c1);
387		1776739	r[10]=c2;
388			c2=0;
389		1776739	mul_add_c(a[4],b[7],c3,c1,c2);
390		1776739	mul_add_c(a[5],b[6],c3,c1,c2);
391		1776739	mul_add_c(a[6],b[5],c3,c1,c2);
392		1776739	mul_add_c(a[7],b[4],c3,c1,c2);
393		1776739	r[11]=c3;
394			c3=0;
395		1776739	mul_add_c(a[7],b[5],c1,c2,c3);
396		1776739	mul_add_c(a[6],b[6],c1,c2,c3);
397		1776739	mul_add_c(a[5],b[7],c1,c2,c3);
398		1776739	r[12]=c1;
399			c1=0;
400		1776739	mul_add_c(a[6],b[7],c2,c3,c1);
401		1776739	mul_add_c(a[7],b[6],c2,c3,c1);
402		1776739	r[13]=c2;
403			c2=0;
404		1776739	mul_add_c(a[7],b[7],c3,c1,c2);
405		1776739	r[14]=c3;
406		1776739	r[15]=c1;
407		1776739	}
408
409			void bn_mul_comba4(BN_ULONG r, BN_ULONG a, BN_ULONG *b)
410			{
411			BN_ULONG c1,c2,c3;
412
413			c1=0;
414			c2=0;
415			c3=0;
416			mul_add_c(a[0],b[0],c1,c2,c3);
417			r[0]=c1;
418			c1=0;
419			mul_add_c(a[0],b[1],c2,c3,c1);
420			mul_add_c(a[1],b[0],c2,c3,c1);
421			r[1]=c2;
422			c2=0;
423			mul_add_c(a[2],b[0],c3,c1,c2);
424			mul_add_c(a[1],b[1],c3,c1,c2);
425			mul_add_c(a[0],b[2],c3,c1,c2);
426			r[2]=c3;
427			c3=0;
428			mul_add_c(a[0],b[3],c1,c2,c3);
429			mul_add_c(a[1],b[2],c1,c2,c3);
430			mul_add_c(a[2],b[1],c1,c2,c3);
431			mul_add_c(a[3],b[0],c1,c2,c3);
432			r[3]=c1;
433			c1=0;
434			mul_add_c(a[3],b[1],c2,c3,c1);
435			mul_add_c(a[2],b[2],c2,c3,c1);
436			mul_add_c(a[1],b[3],c2,c3,c1);
437			r[4]=c2;
438			c2=0;
439			mul_add_c(a[2],b[3],c3,c1,c2);
440			mul_add_c(a[3],b[2],c3,c1,c2);
441			r[5]=c3;
442			c3=0;
443			mul_add_c(a[3],b[3],c1,c2,c3);
444			r[6]=c1;
445			r[7]=c2;
446			}
447
448			void bn_sqr_comba8(BN_ULONG r, const BN_ULONG a)
449			{
450			BN_ULONG c1,c2,c3;
451
452			c1=0;
453			c2=0;
454			c3=0;
455		2289900	sqr_add_c(a,0,c1,c2,c3);
456		1144950	r[0]=c1;
457			c1=0;
458		1144950	sqr_add_c2(a,1,0,c2,c3,c1);
459		1144950	r[1]=c2;
460			c2=0;
461		1144950	sqr_add_c(a,1,c3,c1,c2);
462		1144950	sqr_add_c2(a,2,0,c3,c1,c2);
463		1144950	r[2]=c3;
464			c3=0;
465		1144950	sqr_add_c2(a,3,0,c1,c2,c3);
466		1144950	sqr_add_c2(a,2,1,c1,c2,c3);
467		1144950	r[3]=c1;
468			c1=0;
469		1144950	sqr_add_c(a,2,c2,c3,c1);
470		1144950	sqr_add_c2(a,3,1,c2,c3,c1);
471		1144950	sqr_add_c2(a,4,0,c2,c3,c1);
472		1144950	r[4]=c2;
473			c2=0;
474		1144950	sqr_add_c2(a,5,0,c3,c1,c2);
475		1144950	sqr_add_c2(a,4,1,c3,c1,c2);
476		1144950	sqr_add_c2(a,3,2,c3,c1,c2);
477		1144950	r[5]=c3;
478			c3=0;
479		1144950	sqr_add_c(a,3,c1,c2,c3);
480		1144950	sqr_add_c2(a,4,2,c1,c2,c3);
481		1144950	sqr_add_c2(a,5,1,c1,c2,c3);
482		1144950	sqr_add_c2(a,6,0,c1,c2,c3);
483		1144950	r[6]=c1;
484			c1=0;
485		1144950	sqr_add_c2(a,7,0,c2,c3,c1);
486		1144950	sqr_add_c2(a,6,1,c2,c3,c1);
487		1144950	sqr_add_c2(a,5,2,c2,c3,c1);
488		1144950	sqr_add_c2(a,4,3,c2,c3,c1);
489		1144950	r[7]=c2;
490			c2=0;
491		1144950	sqr_add_c(a,4,c3,c1,c2);
492		1144950	sqr_add_c2(a,5,3,c3,c1,c2);
493		1144950	sqr_add_c2(a,6,2,c3,c1,c2);
494		1144950	sqr_add_c2(a,7,1,c3,c1,c2);
495		1144950	r[8]=c3;
496			c3=0;
497		1144950	sqr_add_c2(a,7,2,c1,c2,c3);
498		1144950	sqr_add_c2(a,6,3,c1,c2,c3);
499		1144950	sqr_add_c2(a,5,4,c1,c2,c3);
500		1144950	r[9]=c1;
501			c1=0;
502		1144950	sqr_add_c(a,5,c2,c3,c1);
503		1144950	sqr_add_c2(a,6,4,c2,c3,c1);
504		1144950	sqr_add_c2(a,7,3,c2,c3,c1);
505		1144950	r[10]=c2;
506			c2=0;
507		1144950	sqr_add_c2(a,7,4,c3,c1,c2);
508		1144950	sqr_add_c2(a,6,5,c3,c1,c2);
509		1144950	r[11]=c3;
510			c3=0;
511		1144950	sqr_add_c(a,6,c1,c2,c3);
512		1144950	sqr_add_c2(a,7,5,c1,c2,c3);
513		1144950	r[12]=c1;
514			c1=0;
515		1144950	sqr_add_c2(a,7,6,c2,c3,c1);
516		1144950	r[13]=c2;
517			c2=0;
518		1144950	sqr_add_c(a,7,c3,c1,c2);
519		1144950	r[14]=c3;
520		1144950	r[15]=c1;
521		1144950	}
522
523			void bn_sqr_comba4(BN_ULONG r, const BN_ULONG a)
524			{
525			BN_ULONG c1,c2,c3;
526
527			c1=0;
528			c2=0;
529			c3=0;
530		117762	sqr_add_c(a,0,c1,c2,c3);
531		58881	r[0]=c1;
532			c1=0;
533		58881	sqr_add_c2(a,1,0,c2,c3,c1);
534		58881	r[1]=c2;
535			c2=0;
536		58881	sqr_add_c(a,1,c3,c1,c2);
537		58881	sqr_add_c2(a,2,0,c3,c1,c2);
538		58881	r[2]=c3;
539			c3=0;
540		58881	sqr_add_c2(a,3,0,c1,c2,c3);
541		58881	sqr_add_c2(a,2,1,c1,c2,c3);
542		58881	r[3]=c1;
543			c1=0;
544		58881	sqr_add_c(a,2,c2,c3,c1);
545		58881	sqr_add_c2(a,3,1,c2,c3,c1);
546		58881	r[4]=c2;
547			c2=0;
548		58881	sqr_add_c2(a,3,2,c3,c1,c2);
549		58881	r[5]=c3;
550			c3=0;
551		58881	sqr_add_c(a,3,c1,c2,c3);
552		58881	r[6]=c1;
553		58881	r[7]=c2;
554		58881	}


Generated by: GCOVR (Version 3.3)