GCC Code Coverage Report
Directory: ./ Exec Total Coverage
File: lib/libcrypto/modes/gcm128.c Lines: 233 347 67.1 %
Date: 2017-11-07 Branches: 78 134 58.2 %

Line Branch Exec Source
1
/* $OpenBSD: gcm128.c,v 1.20 2017/09/03 13:07:34 inoguchi Exp $ */
2
/* ====================================================================
3
 * Copyright (c) 2010 The OpenSSL Project.  All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
7
 * are met:
8
 *
9
 * 1. Redistributions of source code must retain the above copyright
10
 *    notice, this list of conditions and the following disclaimer.
11
 *
12
 * 2. Redistributions in binary form must reproduce the above copyright
13
 *    notice, this list of conditions and the following disclaimer in
14
 *    the documentation and/or other materials provided with the
15
 *    distribution.
16
 *
17
 * 3. All advertising materials mentioning features or use of this
18
 *    software must display the following acknowledgment:
19
 *    "This product includes software developed by the OpenSSL Project
20
 *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
21
 *
22
 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
23
 *    endorse or promote products derived from this software without
24
 *    prior written permission. For written permission, please contact
25
 *    openssl-core@openssl.org.
26
 *
27
 * 5. Products derived from this software may not be called "OpenSSL"
28
 *    nor may "OpenSSL" appear in their names without prior written
29
 *    permission of the OpenSSL Project.
30
 *
31
 * 6. Redistributions of any form whatsoever must retain the following
32
 *    acknowledgment:
33
 *    "This product includes software developed by the OpenSSL Project
34
 *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
35
 *
36
 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
37
 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
38
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
39
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
40
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
42
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
43
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
45
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
46
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47
 * OF THE POSSIBILITY OF SUCH DAMAGE.
48
 * ====================================================================
49
 */
50
51
#define OPENSSL_FIPSAPI
52
53
#include <openssl/crypto.h>
54
#include "modes_lcl.h"
55
#include <string.h>
56
57
#ifndef MODES_DEBUG
58
# ifndef NDEBUG
59
#  define NDEBUG
60
# endif
61
#endif
62
63
#if defined(BSWAP4) && defined(__STRICT_ALIGNMENT)
64
/* redefine, because alignment is ensured */
65
#undef	GETU32
66
#define	GETU32(p)	BSWAP4(*(const u32 *)(p))
67
#undef	PUTU32
68
#define	PUTU32(p,v)	*(u32 *)(p) = BSWAP4(v)
69
#endif
70
71
#define	PACK(s)		((size_t)(s)<<(sizeof(size_t)*8-16))
72
#define REDUCE1BIT(V)	\
73
	do { \
74
		if (sizeof(size_t)==8) { \
75
			u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
76
			V.lo  = (V.hi<<63)|(V.lo>>1); \
77
			V.hi  = (V.hi>>1 )^T; \
78
		} else { \
79
			u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80
			V.lo  = (V.hi<<63)|(V.lo>>1); \
81
			V.hi  = (V.hi>>1 )^((u64)T<<32); \
82
		} \
83
	} while(0)
84
85
/*
86
 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87
 * never be set to 8. 8 is effectively reserved for testing purposes.
88
 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89
 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90
 * whole spectrum of possible table driven implementations. Why? In
91
 * non-"Shoup's" case memory access pattern is segmented in such manner,
92
 * that it's trivial to see that cache timing information can reveal
93
 * fair portion of intermediate hash value. Given that ciphertext is
94
 * always available to attacker, it's possible for him to attempt to
95
 * deduce secret parameter H and if successful, tamper with messages
96
 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97
 * not as trivial, but there is no reason to believe that it's resistant
98
 * to cache-timing attack. And the thing about "8-bit" implementation is
99
 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100
 * key + 1KB shared. Well, on pros side it should be twice as fast as
101
 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102
 * was observed to run ~75% faster, closer to 100% for commercial
103
 * compilers... Yet "4-bit" procedure is preferred, because it's
104
 * believed to provide better security-performance balance and adequate
105
 * all-round performance. "All-round" refers to things like:
106
 *
107
 * - shorter setup time effectively improves overall timing for
108
 *   handling short messages;
109
 * - larger table allocation can become unbearable because of VM
110
 *   subsystem penalties (for example on Windows large enough free
111
 *   results in VM working set trimming, meaning that consequent
112
 *   malloc would immediately incur working set expansion);
113
 * - larger table has larger cache footprint, which can affect
114
 *   performance of other code paths (not necessarily even from same
115
 *   thread in Hyper-Threading world);
116
 *
117
 * Value of 1 is not appropriate for performance reasons.
118
 */
119
#if	TABLE_BITS==8
120
121
static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122
{
123
	int  i, j;
124
	u128 V;
125
126
	Htable[0].hi = 0;
127
	Htable[0].lo = 0;
128
	V.hi = H[0];
129
	V.lo = H[1];
130
131
	for (Htable[128]=V, i=64; i>0; i>>=1) {
132
		REDUCE1BIT(V);
133
		Htable[i] = V;
134
	}
135
136
	for (i=2; i<256; i<<=1) {
137
		u128 *Hi = Htable+i, H0 = *Hi;
138
		for (j=1; j<i; ++j) {
139
			Hi[j].hi = H0.hi^Htable[j].hi;
140
			Hi[j].lo = H0.lo^Htable[j].lo;
141
		}
142
	}
143
}
144
145
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
146
{
147
	u128 Z = { 0, 0};
148
	const u8 *xi = (const u8 *)Xi+15;
149
	size_t rem, n = *xi;
150
	static const size_t rem_8bit[256] = {
151
		PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
152
		PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
153
		PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
154
		PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
155
		PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
156
		PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
157
		PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
158
		PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
159
		PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
160
		PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
161
		PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
162
		PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
163
		PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
164
		PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
165
		PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
166
		PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
167
		PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
168
		PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
169
		PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
170
		PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
171
		PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
172
		PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
173
		PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
174
		PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
175
		PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
176
		PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
177
		PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
178
		PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
179
		PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
180
		PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
181
		PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
182
		PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
183
		PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
184
		PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
185
		PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
186
		PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
187
		PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
188
		PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
189
		PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
190
		PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
191
		PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
192
		PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
193
		PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
194
		PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
195
		PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
196
		PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
197
		PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
198
		PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
199
		PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
200
		PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
201
		PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
202
		PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
203
		PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
204
		PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
205
		PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
206
		PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
207
		PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
208
		PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
209
		PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
210
		PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
211
		PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
212
		PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
213
		PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
214
		PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
215
216
	while (1) {
217
		Z.hi ^= Htable[n].hi;
218
		Z.lo ^= Htable[n].lo;
219
220
		if ((u8 *)Xi==xi)	break;
221
222
		n = *(--xi);
223
224
		rem  = (size_t)Z.lo&0xff;
225
		Z.lo = (Z.hi<<56)|(Z.lo>>8);
226
		Z.hi = (Z.hi>>8);
227
#if SIZE_MAX == 0xffffffffffffffff
228
		Z.hi ^= rem_8bit[rem];
229
#else
230
		Z.hi ^= (u64)rem_8bit[rem]<<32;
231
#endif
232
	}
233
234
#if BYTE_ORDER == LITTLE_ENDIAN
235
#ifdef BSWAP8
236
	Xi[0] = BSWAP8(Z.hi);
237
	Xi[1] = BSWAP8(Z.lo);
238
#else
239
	u8 *p = (u8 *)Xi;
240
	u32 v;
241
	v = (u32)(Z.hi>>32);	PUTU32(p,v);
242
	v = (u32)(Z.hi);	PUTU32(p+4,v);
243
	v = (u32)(Z.lo>>32);	PUTU32(p+8,v);
244
	v = (u32)(Z.lo);	PUTU32(p+12,v);
245
#endif
246
#else /* BIG_ENDIAN */
247
	Xi[0] = Z.hi;
248
	Xi[1] = Z.lo;
249
#endif
250
}
251
#define GCM_MUL(ctx,Xi)   gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
252
253
#elif	TABLE_BITS==4
254
255
static void gcm_init_4bit(u128 Htable[16], u64 H[2])
256
{
257
	u128 V;
258
#if defined(OPENSSL_SMALL_FOOTPRINT)
259
	int  i;
260
#endif
261
262
	Htable[0].hi = 0;
263
	Htable[0].lo = 0;
264
	V.hi = H[0];
265
	V.lo = H[1];
266
267
#if defined(OPENSSL_SMALL_FOOTPRINT)
268
	for (Htable[8]=V, i=4; i>0; i>>=1) {
269
		REDUCE1BIT(V);
270
		Htable[i] = V;
271
	}
272
273
	for (i=2; i<16; i<<=1) {
274
		u128 *Hi = Htable+i;
275
		int   j;
276
		for (V=*Hi, j=1; j<i; ++j) {
277
			Hi[j].hi = V.hi^Htable[j].hi;
278
			Hi[j].lo = V.lo^Htable[j].lo;
279
		}
280
	}
281
#else
282
	Htable[8] = V;
283
	REDUCE1BIT(V);
284
	Htable[4] = V;
285
	REDUCE1BIT(V);
286
	Htable[2] = V;
287
	REDUCE1BIT(V);
288
	Htable[1] = V;
289
	Htable[3].hi  = V.hi^Htable[2].hi, Htable[3].lo  = V.lo^Htable[2].lo;
290
	V=Htable[4];
291
	Htable[5].hi  = V.hi^Htable[1].hi, Htable[5].lo  = V.lo^Htable[1].lo;
292
	Htable[6].hi  = V.hi^Htable[2].hi, Htable[6].lo  = V.lo^Htable[2].lo;
293
	Htable[7].hi  = V.hi^Htable[3].hi, Htable[7].lo  = V.lo^Htable[3].lo;
294
	V=Htable[8];
295
	Htable[9].hi  = V.hi^Htable[1].hi, Htable[9].lo  = V.lo^Htable[1].lo;
296
	Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
297
	Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
298
	Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
299
	Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
300
	Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
301
	Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
302
#endif
303
#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
304
	/*
305
	 * ARM assembler expects specific dword order in Htable.
306
	 */
307
	{
308
		int j;
309
#if BYTE_ORDER == LITTLE_ENDIAN
310
		for (j=0;j<16;++j) {
311
			V = Htable[j];
312
			Htable[j].hi = V.lo;
313
			Htable[j].lo = V.hi;
314
		}
315
#else /* BIG_ENDIAN */
316
		for (j=0;j<16;++j) {
317
			V = Htable[j];
318
			Htable[j].hi = V.lo<<32|V.lo>>32;
319
			Htable[j].lo = V.hi<<32|V.hi>>32;
320
		}
321
#endif
322
	}
323
#endif
324
}
325
326
#ifndef GHASH_ASM
327
static const size_t rem_4bit[16] = {
328
	PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
329
	PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
330
	PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
331
	PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
332
333
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
334
{
335
	u128 Z;
336
	int cnt = 15;
337
	size_t rem, nlo, nhi;
338
339
	nlo  = ((const u8 *)Xi)[15];
340
	nhi  = nlo>>4;
341
	nlo &= 0xf;
342
343
	Z.hi = Htable[nlo].hi;
344
	Z.lo = Htable[nlo].lo;
345
346
	while (1) {
347
		rem  = (size_t)Z.lo&0xf;
348
		Z.lo = (Z.hi<<60)|(Z.lo>>4);
349
		Z.hi = (Z.hi>>4);
350
#if SIZE_MAX == 0xffffffffffffffff
351
		Z.hi ^= rem_4bit[rem];
352
#else
353
		Z.hi ^= (u64)rem_4bit[rem]<<32;
354
#endif
355
		Z.hi ^= Htable[nhi].hi;
356
		Z.lo ^= Htable[nhi].lo;
357
358
		if (--cnt<0)		break;
359
360
		nlo  = ((const u8 *)Xi)[cnt];
361
		nhi  = nlo>>4;
362
		nlo &= 0xf;
363
364
		rem  = (size_t)Z.lo&0xf;
365
		Z.lo = (Z.hi<<60)|(Z.lo>>4);
366
		Z.hi = (Z.hi>>4);
367
#if SIZE_MAX == 0xffffffffffffffff
368
		Z.hi ^= rem_4bit[rem];
369
#else
370
		Z.hi ^= (u64)rem_4bit[rem]<<32;
371
#endif
372
		Z.hi ^= Htable[nlo].hi;
373
		Z.lo ^= Htable[nlo].lo;
374
	}
375
376
#if BYTE_ORDER == LITTLE_ENDIAN
377
#ifdef BSWAP8
378
	Xi[0] = BSWAP8(Z.hi);
379
	Xi[1] = BSWAP8(Z.lo);
380
#else
381
	u8 *p = (u8 *)Xi;
382
	u32 v;
383
	v = (u32)(Z.hi>>32);	PUTU32(p,v);
384
	v = (u32)(Z.hi);	PUTU32(p+4,v);
385
	v = (u32)(Z.lo>>32);	PUTU32(p+8,v);
386
	v = (u32)(Z.lo);	PUTU32(p+12,v);
387
#endif
388
#else /* BIG_ENDIAN */
389
	Xi[0] = Z.hi;
390
	Xi[1] = Z.lo;
391
#endif
392
}
393
394
#if !defined(OPENSSL_SMALL_FOOTPRINT)
395
/*
396
 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
397
 * details... Compiler-generated code doesn't seem to give any
398
 * performance improvement, at least not on x86[_64]. It's here
399
 * mostly as reference and a placeholder for possible future
400
 * non-trivial optimization[s]...
401
 */
402
static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
403
				const u8 *inp,size_t len)
404
{
405
    u128 Z;
406
    int cnt;
407
    size_t rem, nlo, nhi;
408
409
#if 1
410
    do {
411
	cnt  = 15;
412
	nlo  = ((const u8 *)Xi)[15];
413
	nlo ^= inp[15];
414
	nhi  = nlo>>4;
415
	nlo &= 0xf;
416
417
	Z.hi = Htable[nlo].hi;
418
	Z.lo = Htable[nlo].lo;
419
420
	while (1) {
421
		rem  = (size_t)Z.lo&0xf;
422
		Z.lo = (Z.hi<<60)|(Z.lo>>4);
423
		Z.hi = (Z.hi>>4);
424
#if SIZE_MAX == 0xffffffffffffffff
425
		Z.hi ^= rem_4bit[rem];
426
#else
427
		Z.hi ^= (u64)rem_4bit[rem]<<32;
428
#endif
429
		Z.hi ^= Htable[nhi].hi;
430
		Z.lo ^= Htable[nhi].lo;
431
432
		if (--cnt<0)		break;
433
434
		nlo  = ((const u8 *)Xi)[cnt];
435
		nlo ^= inp[cnt];
436
		nhi  = nlo>>4;
437
		nlo &= 0xf;
438
439
		rem  = (size_t)Z.lo&0xf;
440
		Z.lo = (Z.hi<<60)|(Z.lo>>4);
441
		Z.hi = (Z.hi>>4);
442
#if SIZE_MAX == 0xffffffffffffffff
443
		Z.hi ^= rem_4bit[rem];
444
#else
445
		Z.hi ^= (u64)rem_4bit[rem]<<32;
446
#endif
447
		Z.hi ^= Htable[nlo].hi;
448
		Z.lo ^= Htable[nlo].lo;
449
	}
450
#else
451
    /*
452
     * Extra 256+16 bytes per-key plus 512 bytes shared tables
453
     * [should] give ~50% improvement... One could have PACK()-ed
454
     * the rem_8bit even here, but the priority is to minimize
455
     * cache footprint...
456
     */
457
    u128 Hshr4[16];	/* Htable shifted right by 4 bits */
458
    u8   Hshl4[16];	/* Htable shifted left  by 4 bits */
459
    static const unsigned short rem_8bit[256] = {
460
	0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
461
	0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
462
	0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
463
	0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
464
	0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
465
	0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
466
	0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
467
	0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
468
	0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
469
	0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
470
	0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
471
	0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
472
	0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
473
	0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
474
	0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
475
	0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
476
	0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
477
	0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
478
	0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
479
	0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
480
	0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
481
	0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
482
	0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
483
	0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
484
	0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
485
	0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
486
	0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
487
	0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
488
	0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
489
	0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
490
	0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
491
	0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
492
    /*
493
     * This pre-processing phase slows down procedure by approximately
494
     * same time as it makes each loop spin faster. In other words
495
     * single block performance is approximately same as straightforward
496
     * "4-bit" implementation, and then it goes only faster...
497
     */
498
    for (cnt=0; cnt<16; ++cnt) {
499
	Z.hi = Htable[cnt].hi;
500
	Z.lo = Htable[cnt].lo;
501
	Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
502
	Hshr4[cnt].hi = (Z.hi>>4);
503
	Hshl4[cnt]    = (u8)(Z.lo<<4);
504
    }
505
506
    do {
507
	for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
508
		nlo  = ((const u8 *)Xi)[cnt];
509
		nlo ^= inp[cnt];
510
		nhi  = nlo>>4;
511
		nlo &= 0xf;
512
513
		Z.hi ^= Htable[nlo].hi;
514
		Z.lo ^= Htable[nlo].lo;
515
516
		rem = (size_t)Z.lo&0xff;
517
518
		Z.lo = (Z.hi<<56)|(Z.lo>>8);
519
		Z.hi = (Z.hi>>8);
520
521
		Z.hi ^= Hshr4[nhi].hi;
522
		Z.lo ^= Hshr4[nhi].lo;
523
		Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
524
	}
525
526
	nlo  = ((const u8 *)Xi)[0];
527
	nlo ^= inp[0];
528
	nhi  = nlo>>4;
529
	nlo &= 0xf;
530
531
	Z.hi ^= Htable[nlo].hi;
532
	Z.lo ^= Htable[nlo].lo;
533
534
	rem = (size_t)Z.lo&0xf;
535
536
	Z.lo = (Z.hi<<60)|(Z.lo>>4);
537
	Z.hi = (Z.hi>>4);
538
539
	Z.hi ^= Htable[nhi].hi;
540
	Z.lo ^= Htable[nhi].lo;
541
	Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
542
#endif
543
544
#if BYTE_ORDER == LITTLE_ENDIAN
545
#ifdef BSWAP8
546
	Xi[0] = BSWAP8(Z.hi);
547
	Xi[1] = BSWAP8(Z.lo);
548
#else
549
	u8 *p = (u8 *)Xi;
550
	u32 v;
551
	v = (u32)(Z.hi>>32);	PUTU32(p,v);
552
	v = (u32)(Z.hi);	PUTU32(p+4,v);
553
	v = (u32)(Z.lo>>32);	PUTU32(p+8,v);
554
	v = (u32)(Z.lo);	PUTU32(p+12,v);
555
#endif
556
#else /* BIG_ENDIAN */
557
	Xi[0] = Z.hi;
558
	Xi[1] = Z.lo;
559
#endif
560
    } while (inp+=16, len-=16);
561
}
562
#endif
563
#else
564
void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
565
void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
566
#endif
567
568
#define GCM_MUL(ctx,Xi)   gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
569
#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
570
#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
571
/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
572
 * trashing effect. In other words idea is to hash data while it's
573
 * still in L1 cache after encryption pass... */
574
#define GHASH_CHUNK       (3*1024)
575
#endif
576
577
#else	/* TABLE_BITS */
578
579
static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
580
{
581
	u128 V,Z = { 0,0 };
582
	long X;
583
	int  i,j;
584
	const long *xi = (const long *)Xi;
585
586
	V.hi = H[0];	/* H is in host byte order, no byte swapping */
587
	V.lo = H[1];
588
589
	for (j=0; j<16/sizeof(long); ++j) {
590
#if BYTE_ORDER == LITTLE_ENDIAN
591
#if SIZE_MAX == 0xffffffffffffffff
592
#ifdef BSWAP8
593
			X = (long)(BSWAP8(xi[j]));
594
#else
595
			const u8 *p = (const u8 *)(xi+j);
596
			X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
597
#endif
598
#else
599
			const u8 *p = (const u8 *)(xi+j);
600
			X = (long)GETU32(p);
601
#endif
602
#else /* BIG_ENDIAN */
603
		X = xi[j];
604
#endif
605
606
		for (i=0; i<8*sizeof(long); ++i, X<<=1) {
607
			u64 M = (u64)(X>>(8*sizeof(long)-1));
608
			Z.hi ^= V.hi&M;
609
			Z.lo ^= V.lo&M;
610
611
			REDUCE1BIT(V);
612
		}
613
	}
614
615
#if BYTE_ORDER == LITTLE_ENDIAN
616
#ifdef BSWAP8
617
	Xi[0] = BSWAP8(Z.hi);
618
	Xi[1] = BSWAP8(Z.lo);
619
#else
620
	u8 *p = (u8 *)Xi;
621
	u32 v;
622
	v = (u32)(Z.hi>>32);	PUTU32(p,v);
623
	v = (u32)(Z.hi);	PUTU32(p+4,v);
624
	v = (u32)(Z.lo>>32);	PUTU32(p+8,v);
625
	v = (u32)(Z.lo);	PUTU32(p+12,v);
626
#endif
627
#else /* BIG_ENDIAN */
628
	Xi[0] = Z.hi;
629
	Xi[1] = Z.lo;
630
#endif
631
}
632
#define GCM_MUL(ctx,Xi)	  gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
633
634
#endif
635
636
#if	defined(GHASH_ASM) && \
637
	(defined(__i386)	|| defined(__i386__)	|| \
638
	 defined(__x86_64)	|| defined(__x86_64__)	|| \
639
	 defined(_M_IX86)	|| defined(_M_AMD64)	|| defined(_M_X64))
640
#include "x86_arch.h"
641
#endif
642
643
#if	TABLE_BITS==4 && defined(GHASH_ASM)
644
# if	(defined(__i386)	|| defined(__i386__)	|| \
645
	 defined(__x86_64)	|| defined(__x86_64__)	|| \
646
	 defined(_M_IX86)	|| defined(_M_AMD64)	|| defined(_M_X64))
647
#  define GHASH_ASM_X86_OR_64
648
#  define GCM_FUNCREF_4BIT
649
650
void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
651
void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
652
void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
653
654
#  if	defined(__i386) || defined(__i386__) || defined(_M_IX86)
655
#   define GHASH_ASM_X86
656
void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
657
void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
658
659
void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
660
void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
661
#  endif
662
# elif defined(__arm__) || defined(__arm)
663
#  include "arm_arch.h"
664
#  if __ARM_ARCH__>=7
665
#   define GHASH_ASM_ARM
666
#   define GCM_FUNCREF_4BIT
667
void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
668
void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
669
#  endif
670
# endif
671
#endif
672
673
#ifdef GCM_FUNCREF_4BIT
674
# undef  GCM_MUL
675
# define GCM_MUL(ctx,Xi)	(*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
676
# ifdef GHASH
677
#  undef  GHASH
678
#  define GHASH(ctx,in,len)	(*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
679
# endif
680
#endif
681
682
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
683
{
684
32644
	memset(ctx,0,sizeof(*ctx));
685
16322
	ctx->block = block;
686
16322
	ctx->key   = key;
687
688
16322
	(*block)(ctx->H.c,ctx->H.c,key);
689
690
#if BYTE_ORDER == LITTLE_ENDIAN
691
	/* H is stored in host byte order */
692
#ifdef BSWAP8
693
16322
	ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
694
16322
	ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
695
#else
696
	u8 *p = ctx->H.c;
697
	u64 hi,lo;
698
	hi = (u64)GETU32(p)  <<32|GETU32(p+4);
699
	lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
700
	ctx->H.u[0] = hi;
701
	ctx->H.u[1] = lo;
702
#endif
703
#endif
704
705
#if	TABLE_BITS==8
706
	gcm_init_8bit(ctx->Htable,ctx->H.u);
707
#elif	TABLE_BITS==4
708
# if	defined(GHASH_ASM_X86_OR_64)
709
#  if	!defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
710
	/* check FXSR and PCLMULQDQ bits */
711
16322
	if ((OPENSSL_cpu_caps() & (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) ==
712
	    (CPUCAP_MASK_FXSR | CPUCAP_MASK_PCLMUL)) {
713
16322
		gcm_init_clmul(ctx->Htable,ctx->H.u);
714
16322
		ctx->gmult = gcm_gmult_clmul;
715
		ctx->ghash = gcm_ghash_clmul;
716
16322
		return;
717
	}
718
#  endif
719
	gcm_init_4bit(ctx->Htable,ctx->H.u);
720
#  if	defined(GHASH_ASM_X86)			/* x86 only */
721
#   if	defined(OPENSSL_IA32_SSE2)
722
	if (OPENSSL_cpu_caps() & CPUCAP_MASK_SSE) {	/* check SSE bit */
723
#   else
724
	if (OPENSSL_cpu_caps() & CPUCAP_MASK_MMX) {	/* check MMX bit */
725
#   endif
726
		ctx->gmult = gcm_gmult_4bit_mmx;
727
		ctx->ghash = gcm_ghash_4bit_mmx;
728
	} else {
729
		ctx->gmult = gcm_gmult_4bit_x86;
730
		ctx->ghash = gcm_ghash_4bit_x86;
731
	}
732
#  else
733
	ctx->gmult = gcm_gmult_4bit;
734
	ctx->ghash = gcm_ghash_4bit;
735
#  endif
736
# elif	defined(GHASH_ASM_ARM)
737
	if (OPENSSL_armcap_P & ARMV7_NEON) {
738
		ctx->gmult = gcm_gmult_neon;
739
		ctx->ghash = gcm_ghash_neon;
740
	} else {
741
		gcm_init_4bit(ctx->Htable,ctx->H.u);
742
		ctx->gmult = gcm_gmult_4bit;
743
		ctx->ghash = gcm_ghash_4bit;
744
	}
745
# else
746
	gcm_init_4bit(ctx->Htable,ctx->H.u);
747
# endif
748
#endif
749
16322
}
750
751
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
752
{
753
	unsigned int ctr;
754
#ifdef GCM_FUNCREF_4BIT
755
33864
	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
756
#endif
757
758
16932
	ctx->Yi.u[0]  = 0;
759
16932
	ctx->Yi.u[1]  = 0;
760
16932
	ctx->Xi.u[0]  = 0;
761
16932
	ctx->Xi.u[1]  = 0;
762
16932
	ctx->len.u[0] = 0;	/* AAD length */
763
16932
	ctx->len.u[1] = 0;	/* message length */
764
16932
	ctx->ares = 0;
765
16932
	ctx->mres = 0;
766
767
16932
	if (len==12) {
768
16848
		memcpy(ctx->Yi.c,iv,12);
769
16848
		ctx->Yi.c[15]=1;
770
		ctr=1;
771
16848
	}
772
	else {
773
		size_t i;
774
		u64 len0 = len;
775
776
480
		while (len>=16) {
777
5304
			for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
778
156
			GCM_MUL(ctx,Yi);
779
156
			iv += 16;
780
156
			len -= 16;
781
		}
782
84
		if (len) {
783
1584
			for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
784
72
			GCM_MUL(ctx,Yi);
785
72
		}
786
84
		len0 <<= 3;
787
#if BYTE_ORDER == LITTLE_ENDIAN
788
#ifdef BSWAP8
789
84
		ctx->Yi.u[1]  ^= BSWAP8(len0);
790
#else
791
		ctx->Yi.c[8]  ^= (u8)(len0>>56);
792
		ctx->Yi.c[9]  ^= (u8)(len0>>48);
793
		ctx->Yi.c[10] ^= (u8)(len0>>40);
794
		ctx->Yi.c[11] ^= (u8)(len0>>32);
795
		ctx->Yi.c[12] ^= (u8)(len0>>24);
796
		ctx->Yi.c[13] ^= (u8)(len0>>16);
797
		ctx->Yi.c[14] ^= (u8)(len0>>8);
798
		ctx->Yi.c[15] ^= (u8)(len0);
799
#endif
800
#else /* BIG_ENDIAN */
801
		ctx->Yi.u[1]  ^= len0;
802
#endif
803
804
84
		GCM_MUL(ctx,Yi);
805
806
#if BYTE_ORDER == LITTLE_ENDIAN
807
#ifdef BSWAP4
808
84
		ctr = BSWAP4(ctx->Yi.d[3]);
809
#else
810
		ctr = GETU32(ctx->Yi.c+12);
811
#endif
812
#else /* BIG_ENDIAN */
813
		ctr = ctx->Yi.d[3];
814
#endif
815
	}
816
817
16932
	(*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
818
16932
	++ctr;
819
#if BYTE_ORDER == LITTLE_ENDIAN
820
#ifdef BSWAP4
821
16932
	ctx->Yi.d[3] = BSWAP4(ctr);
822
#else
823
	PUTU32(ctx->Yi.c+12,ctr);
824
#endif
825
#else /* BIG_ENDIAN */
826
	ctx->Yi.d[3] = ctr;
827
#endif
828
16932
}
829
830
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
831
{
832
	size_t i;
833
	unsigned int n;
834
33624
	u64 alen = ctx->len.u[0];
835
#ifdef GCM_FUNCREF_4BIT
836
16812
	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
837
# ifdef GHASH
838
	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
839
16812
				const u8 *inp,size_t len)	= ctx->ghash;
840
# endif
841
#endif
842
843
16812
	if (ctx->len.u[1]) return -2;
844
845
16812
	alen += len;
846

33624
	if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
847
		return -1;
848
16812
	ctx->len.u[0] = alen;
849
850
16812
	n = ctx->ares;
851
16812
	if (n) {
852
		while (n && len) {
853
			ctx->Xi.c[n] ^= *(aad++);
854
			--len;
855
			n = (n+1)%16;
856
		}
857
		if (n==0) GCM_MUL(ctx,Xi);
858
		else {
859
			ctx->ares = n;
860
			return 0;
861
		}
862
	}
863
864
#ifdef GHASH
865
16812
	if ((i = (len&(size_t)-16))) {
866
192
		GHASH(ctx,aad,i);
867
192
		aad += i;
868
192
		len -= i;
869
192
	}
870
#else
871
	while (len>=16) {
872
		for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
873
		GCM_MUL(ctx,Xi);
874
		aad += 16;
875
		len -= 16;
876
	}
877
#endif
878
16812
	if (len) {
879
16800
		n = (unsigned int)len;
880
466944
		for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
881
	}
882
883
16812
	ctx->ares = n;
884
16812
	return 0;
885
16812
}
886
887
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
888
		const unsigned char *in, unsigned char *out,
889
		size_t len)
890
{
891
	unsigned int n, ctr;
892
	size_t i;
893
192
	u64        mlen  = ctx->len.u[1];
894
96
	block128_f block = ctx->block;
895
96
	void      *key   = ctx->key;
896
#ifdef GCM_FUNCREF_4BIT
897
96
	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
898
# ifdef GHASH
899
	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
900
96
				const u8 *inp,size_t len)	= ctx->ghash;
901
# endif
902
#endif
903
904
96
	mlen += len;
905

192
	if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
906
		return -1;
907
96
	ctx->len.u[1] = mlen;
908
909
96
	if (ctx->ares) {
910
		/* First call to encrypt finalizes GHASH(AAD) */
911
54
		GCM_MUL(ctx,Xi);
912
54
		ctx->ares = 0;
913
54
	}
914
915
#if BYTE_ORDER == LITTLE_ENDIAN
916
#ifdef BSWAP4
917
96
	ctr = BSWAP4(ctx->Yi.d[3]);
918
#else
919
	ctr = GETU32(ctx->Yi.c+12);
920
#endif
921
#else /* BIG_ENDIAN */
922
	ctr = ctx->Yi.d[3];
923
#endif
924
925
96
	n = ctx->mres;
926
#if !defined(OPENSSL_SMALL_FOOTPRINT)
927
	if (16%sizeof(size_t) == 0) do {	/* always true actually */
928
96
		if (n) {
929
			while (n && len) {
930
				ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
931
				--len;
932
				n = (n+1)%16;
933
			}
934
			if (n==0) GCM_MUL(ctx,Xi);
935
			else {
936
				ctx->mres = n;
937
				return 0;
938
			}
939
		}
940
#ifdef __STRICT_ALIGNMENT
941
		if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
942
			break;
943
#endif
944
#if defined(GHASH) && defined(GHASH_CHUNK)
945
96
		while (len>=GHASH_CHUNK) {
946
		    size_t j=GHASH_CHUNK;
947
948
		    while (j) {
949
		    	size_t *out_t=(size_t *)out;
950
		    	const size_t *in_t=(const size_t *)in;
951
952
			(*block)(ctx->Yi.c,ctx->EKi.c,key);
953
			++ctr;
954
#if BYTE_ORDER == LITTLE_ENDIAN
955
#ifdef BSWAP4
956
			ctx->Yi.d[3] = BSWAP4(ctr);
957
#else
958
			PUTU32(ctx->Yi.c+12,ctr);
959
#endif
960
#else /* BIG_ENDIAN */
961
			ctx->Yi.d[3] = ctr;
962
#endif
963
			for (i=0; i<16/sizeof(size_t); ++i)
964
				out_t[i] = in_t[i] ^ ctx->EKi.t[i];
965
			out += 16;
966
			in  += 16;
967
			j   -= 16;
968
		    }
969
		    GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
970
		    len -= GHASH_CHUNK;
971
		}
972
96
		if ((i = (len&(size_t)-16))) {
973
		    size_t j=i;
974
975
912
		    while (len>=16) {
976
360
		    	size_t *out_t=(size_t *)out;
977
360
		    	const size_t *in_t=(const size_t *)in;
978
979
360
			(*block)(ctx->Yi.c,ctx->EKi.c,key);
980
360
			++ctr;
981
#if BYTE_ORDER == LITTLE_ENDIAN
982
#ifdef BSWAP4
983
360
			ctx->Yi.d[3] = BSWAP4(ctr);
984
#else
985
			PUTU32(ctx->Yi.c+12,ctr);
986
#endif
987
#else /* BIG_ENDIAN */
988
			ctx->Yi.d[3] = ctr;
989
#endif
990
2160
			for (i=0; i<16/sizeof(size_t); ++i)
991
720
				out_t[i] = in_t[i] ^ ctx->EKi.t[i];
992
360
			out += 16;
993
360
			in  += 16;
994
360
			len -= 16;
995
		    }
996
96
		    GHASH(ctx,out-j,j);
997
96
		}
998
#else
999
		while (len>=16) {
1000
		    	size_t *out_t=(size_t *)out;
1001
		    	const size_t *in_t=(const size_t *)in;
1002
1003
			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1004
			++ctr;
1005
#if BYTE_ORDER == LITTLE_ENDIAN
1006
#ifdef BSWAP4
1007
			ctx->Yi.d[3] = BSWAP4(ctr);
1008
#else
1009
			PUTU32(ctx->Yi.c+12,ctr);
1010
#endif
1011
#else /* BIG_ENDIAN */
1012
			ctx->Yi.d[3] = ctr;
1013
#endif
1014
			for (i=0; i<16/sizeof(size_t); ++i)
1015
				ctx->Xi.t[i] ^=
1016
				out_t[i] = in_t[i]^ctx->EKi.t[i];
1017
			GCM_MUL(ctx,Xi);
1018
			out += 16;
1019
			in  += 16;
1020
			len -= 16;
1021
		}
1022
#endif
1023
96
		if (len) {
1024
54
			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1025
54
			++ctr;
1026
#if BYTE_ORDER == LITTLE_ENDIAN
1027
#ifdef BSWAP4
1028
54
			ctx->Yi.d[3] = BSWAP4(ctr);
1029
#else
1030
			PUTU32(ctx->Yi.c+12,ctr);
1031
#endif
1032
#else /* BIG_ENDIAN */
1033
			ctx->Yi.d[3] = ctr;
1034
#endif
1035
1404
			while (len--) {
1036
648
				ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1037
648
				++n;
1038
			}
1039
		}
1040
1041
96
		ctx->mres = n;
1042
96
		return 0;
1043
	} while(0);
1044
#endif
1045
	for (i=0;i<len;++i) {
1046
		if (n==0) {
1047
			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1048
			++ctr;
1049
#if BYTE_ORDER == LITTLE_ENDIAN
1050
#ifdef BSWAP4
1051
			ctx->Yi.d[3] = BSWAP4(ctr);
1052
#else
1053
			PUTU32(ctx->Yi.c+12,ctr);
1054
#endif
1055
#else /* BIG_ENDIAN */
1056
			ctx->Yi.d[3] = ctr;
1057
#endif
1058
		}
1059
		ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1060
		n = (n+1)%16;
1061
		if (n==0)
1062
			GCM_MUL(ctx,Xi);
1063
	}
1064
1065
	ctx->mres = n;
1066
	return 0;
1067
96
}
1068
1069
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1070
		const unsigned char *in, unsigned char *out,
1071
		size_t len)
1072
{
1073
	unsigned int n, ctr;
1074
	size_t i;
1075
192
	u64        mlen  = ctx->len.u[1];
1076
96
	block128_f block = ctx->block;
1077
96
	void      *key   = ctx->key;
1078
#ifdef GCM_FUNCREF_4BIT
1079
96
	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
1080
# ifdef GHASH
1081
	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1082
96
				const u8 *inp,size_t len)	= ctx->ghash;
1083
# endif
1084
#endif
1085
1086
96
	mlen += len;
1087

192
	if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1088
		return -1;
1089
96
	ctx->len.u[1] = mlen;
1090
1091
96
	if (ctx->ares) {
1092
		/* First call to decrypt finalizes GHASH(AAD) */
1093
54
		GCM_MUL(ctx,Xi);
1094
54
		ctx->ares = 0;
1095
54
	}
1096
1097
#if BYTE_ORDER == LITTLE_ENDIAN
1098
#ifdef BSWAP4
1099
96
	ctr = BSWAP4(ctx->Yi.d[3]);
1100
#else
1101
	ctr = GETU32(ctx->Yi.c+12);
1102
#endif
1103
#else /* BIG_ENDIAN */
1104
	ctr = ctx->Yi.d[3];
1105
#endif
1106
1107
96
	n = ctx->mres;
1108
#if !defined(OPENSSL_SMALL_FOOTPRINT)
1109
	if (16%sizeof(size_t) == 0) do {	/* always true actually */
1110
96
		if (n) {
1111
			while (n && len) {
1112
				u8 c = *(in++);
1113
				*(out++) = c^ctx->EKi.c[n];
1114
				ctx->Xi.c[n] ^= c;
1115
				--len;
1116
				n = (n+1)%16;
1117
			}
1118
			if (n==0) GCM_MUL (ctx,Xi);
1119
			else {
1120
				ctx->mres = n;
1121
				return 0;
1122
			}
1123
		}
1124
#ifdef __STRICT_ALIGNMENT
1125
		if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1126
			break;
1127
#endif
1128
#if defined(GHASH) && defined(GHASH_CHUNK)
1129
96
		while (len>=GHASH_CHUNK) {
1130
		    size_t j=GHASH_CHUNK;
1131
1132
		    GHASH(ctx,in,GHASH_CHUNK);
1133
		    while (j) {
1134
		    	size_t *out_t=(size_t *)out;
1135
		    	const size_t *in_t=(const size_t *)in;
1136
1137
			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1138
			++ctr;
1139
#if BYTE_ORDER == LITTLE_ENDIAN
1140
#ifdef BSWAP4
1141
				ctx->Yi.d[3] = BSWAP4(ctr);
1142
#else
1143
				PUTU32(ctx->Yi.c+12,ctr);
1144
#endif
1145
#else /* BIG_ENDIAN */
1146
				ctx->Yi.d[3] = ctr;
1147
#endif
1148
			for (i=0; i<16/sizeof(size_t); ++i)
1149
				out_t[i] = in_t[i]^ctx->EKi.t[i];
1150
			out += 16;
1151
			in  += 16;
1152
			j   -= 16;
1153
		    }
1154
		    len -= GHASH_CHUNK;
1155
		}
1156
96
		if ((i = (len&(size_t)-16))) {
1157
96
		    GHASH(ctx,in,i);
1158
912
		    while (len>=16) {
1159
360
		    	size_t *out_t=(size_t *)out;
1160
360
		    	const size_t *in_t=(const size_t *)in;
1161
1162
360
			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1163
360
			++ctr;
1164
#if BYTE_ORDER == LITTLE_ENDIAN
1165
#ifdef BSWAP4
1166
360
			ctx->Yi.d[3] = BSWAP4(ctr);
1167
#else
1168
			PUTU32(ctx->Yi.c+12,ctr);
1169
#endif
1170
#else /* BIG_ENDIAN */
1171
			ctx->Yi.d[3] = ctr;
1172
#endif
1173
2160
			for (i=0; i<16/sizeof(size_t); ++i)
1174
720
				out_t[i] = in_t[i]^ctx->EKi.t[i];
1175
360
			out += 16;
1176
360
			in  += 16;
1177
360
			len -= 16;
1178
		    }
1179
		}
1180
#else
1181
		while (len>=16) {
1182
		    	size_t *out_t=(size_t *)out;
1183
		    	const size_t *in_t=(const size_t *)in;
1184
1185
			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1186
			++ctr;
1187
#if BYTE_ORDER == LITTLE_ENDIAN
1188
#ifdef BSWAP4
1189
			ctx->Yi.d[3] = BSWAP4(ctr);
1190
#else
1191
			PUTU32(ctx->Yi.c+12,ctr);
1192
#endif
1193
#else /* BIG_ENDIAN */
1194
			ctx->Yi.d[3] = ctr;
1195
#endif
1196
			for (i=0; i<16/sizeof(size_t); ++i) {
1197
				size_t c = in[i];
1198
				out[i] = c^ctx->EKi.t[i];
1199
				ctx->Xi.t[i] ^= c;
1200
			}
1201
			GCM_MUL(ctx,Xi);
1202
			out += 16;
1203
			in  += 16;
1204
			len -= 16;
1205
		}
1206
#endif
1207
96
		if (len) {
1208
54
			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1209
54
			++ctr;
1210
#if BYTE_ORDER == LITTLE_ENDIAN
1211
#ifdef BSWAP4
1212
54
			ctx->Yi.d[3] = BSWAP4(ctr);
1213
#else
1214
			PUTU32(ctx->Yi.c+12,ctr);
1215
#endif
1216
#else /* BIG_ENDIAN */
1217
			ctx->Yi.d[3] = ctr;
1218
#endif
1219
1404
			while (len--) {
1220
648
				u8 c = in[n];
1221
648
				ctx->Xi.c[n] ^= c;
1222
648
				out[n] = c^ctx->EKi.c[n];
1223
648
				++n;
1224
			}
1225
		}
1226
1227
96
		ctx->mres = n;
1228
96
		return 0;
1229
	} while(0);
1230
#endif
1231
	for (i=0;i<len;++i) {
1232
		u8 c;
1233
		if (n==0) {
1234
			(*block)(ctx->Yi.c,ctx->EKi.c,key);
1235
			++ctr;
1236
#if BYTE_ORDER == LITTLE_ENDIAN
1237
#ifdef BSWAP4
1238
			ctx->Yi.d[3] = BSWAP4(ctr);
1239
#else
1240
			PUTU32(ctx->Yi.c+12,ctr);
1241
#endif
1242
#else /* BIG_ENDIAN */
1243
			ctx->Yi.d[3] = ctr;
1244
#endif
1245
		}
1246
		c = in[i];
1247
		out[i] = c^ctx->EKi.c[n];
1248
		ctx->Xi.c[n] ^= c;
1249
		n = (n+1)%16;
1250
		if (n==0)
1251
			GCM_MUL(ctx,Xi);
1252
	}
1253
1254
	ctx->mres = n;
1255
	return 0;
1256
96
}
1257
1258
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1259
		const unsigned char *in, unsigned char *out,
1260
		size_t len, ctr128_f stream)
1261
{
1262
	unsigned int n, ctr;
1263
	size_t i;
1264
16838
	u64   mlen = ctx->len.u[1];
1265
8419
	void *key  = ctx->key;
1266
#ifdef GCM_FUNCREF_4BIT
1267
8419
	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
1268
# ifdef GHASH
1269
	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1270
8419
				const u8 *inp,size_t len)	= ctx->ghash;
1271
# endif
1272
#endif
1273
1274
8419
	mlen += len;
1275

16838
	if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1276
		return -1;
1277
8419
	ctx->len.u[1] = mlen;
1278
1279
8419
	if (ctx->ares) {
1280
		/* First call to encrypt finalizes GHASH(AAD) */
1281
8419
		GCM_MUL(ctx,Xi);
1282
8419
		ctx->ares = 0;
1283
8419
	}
1284
1285
#if BYTE_ORDER == LITTLE_ENDIAN
1286
#ifdef BSWAP4
1287
8419
	ctr = BSWAP4(ctx->Yi.d[3]);
1288
#else
1289
	ctr = GETU32(ctx->Yi.c+12);
1290
#endif
1291
#else /* BIG_ENDIAN */
1292
	ctr = ctx->Yi.d[3];
1293
#endif
1294
1295
8419
	n = ctx->mres;
1296
8419
	if (n) {
1297
		while (n && len) {
1298
			ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1299
			--len;
1300
			n = (n+1)%16;
1301
		}
1302
		if (n==0) GCM_MUL(ctx,Xi);
1303
		else {
1304
			ctx->mres = n;
1305
			return 0;
1306
		}
1307
	}
1308
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1309
8419
	while (len>=GHASH_CHUNK) {
1310
		(*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1311
		ctr += GHASH_CHUNK/16;
1312
#if BYTE_ORDER == LITTLE_ENDIAN
1313
#ifdef BSWAP4
1314
		ctx->Yi.d[3] = BSWAP4(ctr);
1315
#else
1316
		PUTU32(ctx->Yi.c+12,ctr);
1317
#endif
1318
#else /* BIG_ENDIAN */
1319
		ctx->Yi.d[3] = ctr;
1320
#endif
1321
		GHASH(ctx,out,GHASH_CHUNK);
1322
		out += GHASH_CHUNK;
1323
		in  += GHASH_CHUNK;
1324
		len -= GHASH_CHUNK;
1325
	}
1326
#endif
1327
8419
	if ((i = (len&(size_t)-16))) {
1328
8237
		size_t j=i/16;
1329
1330
8237
		(*stream)(in,out,j,key,ctx->Yi.c);
1331
8237
		ctr += (unsigned int)j;
1332
#if BYTE_ORDER == LITTLE_ENDIAN
1333
#ifdef BSWAP4
1334
8237
		ctx->Yi.d[3] = BSWAP4(ctr);
1335
#else
1336
		PUTU32(ctx->Yi.c+12,ctr);
1337
#endif
1338
#else /* BIG_ENDIAN */
1339
		ctx->Yi.d[3] = ctr;
1340
#endif
1341
8237
		in  += i;
1342
8237
		len -= i;
1343
#if defined(GHASH)
1344
8237
		GHASH(ctx,out,i);
1345
8237
		out += i;
1346
#else
1347
		while (j--) {
1348
			for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1349
			GCM_MUL(ctx,Xi);
1350
			out += 16;
1351
		}
1352
#endif
1353
8237
	}
1354
8419
	if (len) {
1355
182
		(*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1356
182
		++ctr;
1357
#if BYTE_ORDER == LITTLE_ENDIAN
1358
#ifdef BSWAP4
1359
182
		ctx->Yi.d[3] = BSWAP4(ctr);
1360
#else
1361
		PUTU32(ctx->Yi.c+12,ctr);
1362
#endif
1363
#else /* BIG_ENDIAN */
1364
		ctx->Yi.d[3] = ctr;
1365
#endif
1366
1404
		while (len--) {
1367
520
			ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1368
520
			++n;
1369
		}
1370
	}
1371
1372
8419
	ctx->mres = n;
1373
8419
	return 0;
1374
8419
}
1375
1376
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1377
		const unsigned char *in, unsigned char *out,
1378
		size_t len,ctr128_f stream)
1379
{
1380
	unsigned int n, ctr;
1381
	size_t i;
1382
16546
	u64   mlen = ctx->len.u[1];
1383
8273
	void *key  = ctx->key;
1384
#ifdef GCM_FUNCREF_4BIT
1385
8273
	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
1386
# ifdef GHASH
1387
	void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1388
8273
				const u8 *inp,size_t len)	= ctx->ghash;
1389
# endif
1390
#endif
1391
1392
8273
	mlen += len;
1393

16546
	if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1394
		return -1;
1395
8273
	ctx->len.u[1] = mlen;
1396
1397
8273
	if (ctx->ares) {
1398
		/* First call to decrypt finalizes GHASH(AAD) */
1399
8273
		GCM_MUL(ctx,Xi);
1400
8273
		ctx->ares = 0;
1401
8273
	}
1402
1403
#if BYTE_ORDER == LITTLE_ENDIAN
1404
#ifdef BSWAP4
1405
8273
	ctr = BSWAP4(ctx->Yi.d[3]);
1406
#else
1407
	ctr = GETU32(ctx->Yi.c+12);
1408
#endif
1409
#else /* BIG_ENDIAN */
1410
	ctr = ctx->Yi.d[3];
1411
#endif
1412
1413
8273
	n = ctx->mres;
1414
8273
	if (n) {
1415
		while (n && len) {
1416
			u8 c = *(in++);
1417
			*(out++) = c^ctx->EKi.c[n];
1418
			ctx->Xi.c[n] ^= c;
1419
			--len;
1420
			n = (n+1)%16;
1421
		}
1422
		if (n==0) GCM_MUL (ctx,Xi);
1423
		else {
1424
			ctx->mres = n;
1425
			return 0;
1426
		}
1427
	}
1428
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1429
8273
	while (len>=GHASH_CHUNK) {
1430
		GHASH(ctx,in,GHASH_CHUNK);
1431
		(*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
1432
		ctr += GHASH_CHUNK/16;
1433
#if BYTE_ORDER == LITTLE_ENDIAN
1434
#ifdef BSWAP4
1435
		ctx->Yi.d[3] = BSWAP4(ctr);
1436
#else
1437
		PUTU32(ctx->Yi.c+12,ctr);
1438
#endif
1439
#else /* BIG_ENDIAN */
1440
		ctx->Yi.d[3] = ctr;
1441
#endif
1442
		out += GHASH_CHUNK;
1443
		in  += GHASH_CHUNK;
1444
		len -= GHASH_CHUNK;
1445
	}
1446
#endif
1447
8273
	if ((i = (len&(size_t)-16))) {
1448
8249
		size_t j=i/16;
1449
1450
#if defined(GHASH)
1451
8249
		GHASH(ctx,in,i);
1452
#else
1453
		while (j--) {
1454
			size_t k;
1455
			for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1456
			GCM_MUL(ctx,Xi);
1457
			in += 16;
1458
		}
1459
		j   = i/16;
1460
		in -= i;
1461
#endif
1462
8249
		(*stream)(in,out,j,key,ctx->Yi.c);
1463
8249
		ctr += (unsigned int)j;
1464
#if BYTE_ORDER == LITTLE_ENDIAN
1465
#ifdef BSWAP4
1466
8249
		ctx->Yi.d[3] = BSWAP4(ctr);
1467
#else
1468
		PUTU32(ctx->Yi.c+12,ctr);
1469
#endif
1470
#else /* BIG_ENDIAN */
1471
		ctx->Yi.d[3] = ctr;
1472
#endif
1473
8249
		out += i;
1474
8249
		in  += i;
1475
8249
		len -= i;
1476
8249
	}
1477
8273
	if (len) {
1478
24
		(*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
1479
24
		++ctr;
1480
#if BYTE_ORDER == LITTLE_ENDIAN
1481
#ifdef BSWAP4
1482
24
		ctx->Yi.d[3] = BSWAP4(ctr);
1483
#else
1484
		PUTU32(ctx->Yi.c+12,ctr);
1485
#endif
1486
#else /* BIG_ENDIAN */
1487
		ctx->Yi.d[3] = ctr;
1488
#endif
1489
768
		while (len--) {
1490
360
			u8 c = in[n];
1491
360
			ctx->Xi.c[n] ^= c;
1492
360
			out[n] = c^ctx->EKi.c[n];
1493
360
			++n;
1494
		}
1495
	}
1496
1497
8273
	ctx->mres = n;
1498
8273
	return 0;
1499
8273
}
1500
1501
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1502
			size_t len)
1503
{
1504
33864
	u64 alen = ctx->len.u[0]<<3;
1505
16932
	u64 clen = ctx->len.u[1]<<3;
1506
#ifdef GCM_FUNCREF_4BIT
1507
16932
	void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16])	= ctx->gmult;
1508
#endif
1509
1510

33550
	if (ctx->mres || ctx->ares)
1511
314
		GCM_MUL(ctx,Xi);
1512
1513
#if BYTE_ORDER == LITTLE_ENDIAN
1514
#ifdef BSWAP8
1515
16932
	alen = BSWAP8(alen);
1516
16932
	clen = BSWAP8(clen);
1517
#else
1518
	u8 *p = ctx->len.c;
1519
1520
	ctx->len.u[0] = alen;
1521
	ctx->len.u[1] = clen;
1522
1523
	alen = (u64)GETU32(p)  <<32|GETU32(p+4);
1524
	clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1525
#endif
1526
#endif
1527
1528
16932
	ctx->Xi.u[0] ^= alen;
1529
16932
	ctx->Xi.u[1] ^= clen;
1530
16932
	GCM_MUL(ctx,Xi);
1531
1532
16932
	ctx->Xi.u[0] ^= ctx->EK0.u[0];
1533
16932
	ctx->Xi.u[1] ^= ctx->EK0.u[1];
1534
1535
16932
	if (tag && len<=sizeof(ctx->Xi))
1536
240
		return memcmp(ctx->Xi.c,tag,len);
1537
	else
1538
16692
		return -1;
1539
16932
}
1540
1541
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1542
{
1543
33384
	CRYPTO_gcm128_finish(ctx, NULL, 0);
1544
16692
	memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
1545
16692
}
1546
1547
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1548
{
1549
	GCM128_CONTEXT *ret;
1550
1551
	if ((ret = malloc(sizeof(GCM128_CONTEXT))))
1552
		CRYPTO_gcm128_init(ret,key,block);
1553
1554
	return ret;
1555
}
1556
1557
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1558
{
1559
	freezero(ctx, sizeof(*ctx));
1560
}