1 |
|
|
/* $OpenBSD: gcm128.c,v 1.13 2015/09/10 15:56:25 jsing Exp $ */ |
2 |
|
|
/* ==================================================================== |
3 |
|
|
* Copyright (c) 2010 The OpenSSL Project. All rights reserved. |
4 |
|
|
* |
5 |
|
|
* Redistribution and use in source and binary forms, with or without |
6 |
|
|
* modification, are permitted provided that the following conditions |
7 |
|
|
* are met: |
8 |
|
|
* |
9 |
|
|
* 1. Redistributions of source code must retain the above copyright |
10 |
|
|
* notice, this list of conditions and the following disclaimer. |
11 |
|
|
* |
12 |
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
13 |
|
|
* notice, this list of conditions and the following disclaimer in |
14 |
|
|
* the documentation and/or other materials provided with the |
15 |
|
|
* distribution. |
16 |
|
|
* |
17 |
|
|
* 3. All advertising materials mentioning features or use of this |
18 |
|
|
* software must display the following acknowledgment: |
19 |
|
|
* "This product includes software developed by the OpenSSL Project |
20 |
|
|
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)" |
21 |
|
|
* |
22 |
|
|
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to |
23 |
|
|
* endorse or promote products derived from this software without |
24 |
|
|
* prior written permission. For written permission, please contact |
25 |
|
|
* openssl-core@openssl.org. |
26 |
|
|
* |
27 |
|
|
* 5. Products derived from this software may not be called "OpenSSL" |
28 |
|
|
* nor may "OpenSSL" appear in their names without prior written |
29 |
|
|
* permission of the OpenSSL Project. |
30 |
|
|
* |
31 |
|
|
* 6. Redistributions of any form whatsoever must retain the following |
32 |
|
|
* acknowledgment: |
33 |
|
|
* "This product includes software developed by the OpenSSL Project |
34 |
|
|
* for use in the OpenSSL Toolkit (http://www.openssl.org/)" |
35 |
|
|
* |
36 |
|
|
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY |
37 |
|
|
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
38 |
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
39 |
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR |
40 |
|
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
41 |
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
42 |
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
43 |
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
44 |
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
45 |
|
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
46 |
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
47 |
|
|
* OF THE POSSIBILITY OF SUCH DAMAGE. |
48 |
|
|
* ==================================================================== |
49 |
|
|
*/ |
50 |
|
|
|
51 |
|
|
#define OPENSSL_FIPSAPI |
52 |
|
|
|
53 |
|
|
#include <openssl/crypto.h> |
54 |
|
|
#include "modes_lcl.h" |
55 |
|
|
#include <string.h> |
56 |
|
|
|
57 |
|
|
#ifndef MODES_DEBUG |
58 |
|
|
# ifndef NDEBUG |
59 |
|
|
# define NDEBUG |
60 |
|
|
# endif |
61 |
|
|
#endif |
62 |
|
|
|
63 |
|
|
#if defined(BSWAP4) && defined(__STRICT_ALIGNMENT) |
64 |
|
|
/* redefine, because alignment is ensured */ |
65 |
|
|
#undef GETU32 |
66 |
|
|
#define GETU32(p) BSWAP4(*(const u32 *)(p)) |
67 |
|
|
#undef PUTU32 |
68 |
|
|
#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) |
69 |
|
|
#endif |
70 |
|
|
|
71 |
|
|
#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16)) |
72 |
|
|
#define REDUCE1BIT(V) \ |
73 |
|
|
do { \ |
74 |
|
|
if (sizeof(size_t)==8) { \ |
75 |
|
|
u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \ |
76 |
|
|
V.lo = (V.hi<<63)|(V.lo>>1); \ |
77 |
|
|
V.hi = (V.hi>>1 )^T; \ |
78 |
|
|
} else { \ |
79 |
|
|
u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \ |
80 |
|
|
V.lo = (V.hi<<63)|(V.lo>>1); \ |
81 |
|
|
V.hi = (V.hi>>1 )^((u64)T<<32); \ |
82 |
|
|
} \ |
83 |
|
|
} while(0) |
84 |
|
|
|
85 |
|
|
/* |
86 |
|
|
* Even though permitted values for TABLE_BITS are 8, 4 and 1, it should |
87 |
|
|
* never be set to 8. 8 is effectively reserved for testing purposes. |
88 |
|
|
* TABLE_BITS>1 are lookup-table-driven implementations referred to as |
89 |
|
|
* "Shoup's" in GCM specification. In other words OpenSSL does not cover |
90 |
|
|
* whole spectrum of possible table driven implementations. Why? In |
91 |
|
|
* non-"Shoup's" case memory access pattern is segmented in such manner, |
92 |
|
|
* that it's trivial to see that cache timing information can reveal |
93 |
|
|
* fair portion of intermediate hash value. Given that ciphertext is |
94 |
|
|
* always available to attacker, it's possible for him to attempt to |
95 |
|
|
* deduce secret parameter H and if successful, tamper with messages |
96 |
|
|
* [which is nothing but trivial in CTR mode]. In "Shoup's" case it's |
97 |
|
|
* not as trivial, but there is no reason to believe that it's resistant |
98 |
|
|
* to cache-timing attack. And the thing about "8-bit" implementation is |
99 |
|
|
* that it consumes 16 (sixteen) times more memory, 4KB per individual |
100 |
|
|
* key + 1KB shared. Well, on pros side it should be twice as fast as |
101 |
|
|
* "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version |
102 |
|
|
* was observed to run ~75% faster, closer to 100% for commercial |
103 |
|
|
* compilers... Yet "4-bit" procedure is preferred, because it's |
104 |
|
|
* believed to provide better security-performance balance and adequate |
105 |
|
|
* all-round performance. "All-round" refers to things like: |
106 |
|
|
* |
107 |
|
|
* - shorter setup time effectively improves overall timing for |
108 |
|
|
* handling short messages; |
109 |
|
|
* - larger table allocation can become unbearable because of VM |
110 |
|
|
* subsystem penalties (for example on Windows large enough free |
111 |
|
|
* results in VM working set trimming, meaning that consequent |
112 |
|
|
* malloc would immediately incur working set expansion); |
113 |
|
|
* - larger table has larger cache footprint, which can affect |
114 |
|
|
* performance of other code paths (not necessarily even from same |
115 |
|
|
* thread in Hyper-Threading world); |
116 |
|
|
* |
117 |
|
|
* Value of 1 is not appropriate for performance reasons. |
118 |
|
|
*/ |
119 |
|
|
#if TABLE_BITS==8 |
120 |
|
|
|
121 |
|
|
static void gcm_init_8bit(u128 Htable[256], u64 H[2]) |
122 |
|
|
{ |
123 |
|
|
int i, j; |
124 |
|
|
u128 V; |
125 |
|
|
|
126 |
|
|
Htable[0].hi = 0; |
127 |
|
|
Htable[0].lo = 0; |
128 |
|
|
V.hi = H[0]; |
129 |
|
|
V.lo = H[1]; |
130 |
|
|
|
131 |
|
|
for (Htable[128]=V, i=64; i>0; i>>=1) { |
132 |
|
|
REDUCE1BIT(V); |
133 |
|
|
Htable[i] = V; |
134 |
|
|
} |
135 |
|
|
|
136 |
|
|
for (i=2; i<256; i<<=1) { |
137 |
|
|
u128 *Hi = Htable+i, H0 = *Hi; |
138 |
|
|
for (j=1; j<i; ++j) { |
139 |
|
|
Hi[j].hi = H0.hi^Htable[j].hi; |
140 |
|
|
Hi[j].lo = H0.lo^Htable[j].lo; |
141 |
|
|
} |
142 |
|
|
} |
143 |
|
|
} |
144 |
|
|
|
145 |
|
|
static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256]) |
146 |
|
|
{ |
147 |
|
|
u128 Z = { 0, 0}; |
148 |
|
|
const u8 *xi = (const u8 *)Xi+15; |
149 |
|
|
size_t rem, n = *xi; |
150 |
|
|
static const size_t rem_8bit[256] = { |
151 |
|
|
PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246), |
152 |
|
|
PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E), |
153 |
|
|
PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56), |
154 |
|
|
PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E), |
155 |
|
|
PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66), |
156 |
|
|
PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E), |
157 |
|
|
PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076), |
158 |
|
|
PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E), |
159 |
|
|
PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06), |
160 |
|
|
PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E), |
161 |
|
|
PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416), |
162 |
|
|
PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E), |
163 |
|
|
PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626), |
164 |
|
|
PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E), |
165 |
|
|
PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836), |
166 |
|
|
PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E), |
167 |
|
|
PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6), |
168 |
|
|
PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE), |
169 |
|
|
PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6), |
170 |
|
|
PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE), |
171 |
|
|
PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6), |
172 |
|
|
PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE), |
173 |
|
|
PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6), |
174 |
|
|
PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE), |
175 |
|
|
PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86), |
176 |
|
|
PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E), |
177 |
|
|
PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496), |
178 |
|
|
PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E), |
179 |
|
|
PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6), |
180 |
|
|
PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE), |
181 |
|
|
PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6), |
182 |
|
|
PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE), |
183 |
|
|
PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346), |
184 |
|
|
PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E), |
185 |
|
|
PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56), |
186 |
|
|
PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E), |
187 |
|
|
PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66), |
188 |
|
|
PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E), |
189 |
|
|
PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176), |
190 |
|
|
PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E), |
191 |
|
|
PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06), |
192 |
|
|
PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E), |
193 |
|
|
PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516), |
194 |
|
|
PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E), |
195 |
|
|
PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726), |
196 |
|
|
PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E), |
197 |
|
|
PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936), |
198 |
|
|
PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E), |
199 |
|
|
PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6), |
200 |
|
|
PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE), |
201 |
|
|
PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6), |
202 |
|
|
PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE), |
203 |
|
|
PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6), |
204 |
|
|
PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE), |
205 |
|
|
PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6), |
206 |
|
|
PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE), |
207 |
|
|
PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86), |
208 |
|
|
PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E), |
209 |
|
|
PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596), |
210 |
|
|
PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E), |
211 |
|
|
PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6), |
212 |
|
|
PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE), |
213 |
|
|
PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6), |
214 |
|
|
PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) }; |
215 |
|
|
|
216 |
|
|
while (1) { |
217 |
|
|
Z.hi ^= Htable[n].hi; |
218 |
|
|
Z.lo ^= Htable[n].lo; |
219 |
|
|
|
220 |
|
|
if ((u8 *)Xi==xi) break; |
221 |
|
|
|
222 |
|
|
n = *(--xi); |
223 |
|
|
|
224 |
|
|
rem = (size_t)Z.lo&0xff; |
225 |
|
|
Z.lo = (Z.hi<<56)|(Z.lo>>8); |
226 |
|
|
Z.hi = (Z.hi>>8); |
227 |
|
|
if (sizeof(size_t)==8) |
228 |
|
|
Z.hi ^= rem_8bit[rem]; |
229 |
|
|
else |
230 |
|
|
Z.hi ^= (u64)rem_8bit[rem]<<32; |
231 |
|
|
} |
232 |
|
|
|
233 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) { |
234 |
|
|
#ifdef BSWAP8 |
235 |
|
|
Xi[0] = BSWAP8(Z.hi); |
236 |
|
|
Xi[1] = BSWAP8(Z.lo); |
237 |
|
|
#else |
238 |
|
|
u8 *p = (u8 *)Xi; |
239 |
|
|
u32 v; |
240 |
|
|
v = (u32)(Z.hi>>32); PUTU32(p,v); |
241 |
|
|
v = (u32)(Z.hi); PUTU32(p+4,v); |
242 |
|
|
v = (u32)(Z.lo>>32); PUTU32(p+8,v); |
243 |
|
|
v = (u32)(Z.lo); PUTU32(p+12,v); |
244 |
|
|
#endif |
245 |
|
|
} |
246 |
|
|
else { |
247 |
|
|
Xi[0] = Z.hi; |
248 |
|
|
Xi[1] = Z.lo; |
249 |
|
|
} |
250 |
|
|
} |
251 |
|
|
#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable) |
252 |
|
|
|
253 |
|
|
#elif TABLE_BITS==4 |
254 |
|
|
|
255 |
|
|
static void gcm_init_4bit(u128 Htable[16], u64 H[2]) |
256 |
|
|
{ |
257 |
|
|
u128 V; |
258 |
|
|
#if defined(OPENSSL_SMALL_FOOTPRINT) |
259 |
|
|
int i; |
260 |
|
|
#endif |
261 |
|
|
|
262 |
|
|
Htable[0].hi = 0; |
263 |
|
|
Htable[0].lo = 0; |
264 |
|
|
V.hi = H[0]; |
265 |
|
|
V.lo = H[1]; |
266 |
|
|
|
267 |
|
|
#if defined(OPENSSL_SMALL_FOOTPRINT) |
268 |
|
|
for (Htable[8]=V, i=4; i>0; i>>=1) { |
269 |
|
|
REDUCE1BIT(V); |
270 |
|
|
Htable[i] = V; |
271 |
|
|
} |
272 |
|
|
|
273 |
|
|
for (i=2; i<16; i<<=1) { |
274 |
|
|
u128 *Hi = Htable+i; |
275 |
|
|
int j; |
276 |
|
|
for (V=*Hi, j=1; j<i; ++j) { |
277 |
|
|
Hi[j].hi = V.hi^Htable[j].hi; |
278 |
|
|
Hi[j].lo = V.lo^Htable[j].lo; |
279 |
|
|
} |
280 |
|
|
} |
281 |
|
|
#else |
282 |
|
|
Htable[8] = V; |
283 |
|
|
REDUCE1BIT(V); |
284 |
|
|
Htable[4] = V; |
285 |
|
|
REDUCE1BIT(V); |
286 |
|
|
Htable[2] = V; |
287 |
|
|
REDUCE1BIT(V); |
288 |
|
|
Htable[1] = V; |
289 |
|
|
Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo; |
290 |
|
|
V=Htable[4]; |
291 |
|
|
Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo; |
292 |
|
|
Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo; |
293 |
|
|
Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo; |
294 |
|
|
V=Htable[8]; |
295 |
|
|
Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo; |
296 |
|
|
Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo; |
297 |
|
|
Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo; |
298 |
|
|
Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo; |
299 |
|
|
Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo; |
300 |
|
|
Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo; |
301 |
|
|
Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo; |
302 |
|
|
#endif |
303 |
|
|
#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm)) |
304 |
|
|
/* |
305 |
|
|
* ARM assembler expects specific dword order in Htable. |
306 |
|
|
*/ |
307 |
|
|
{ |
308 |
|
|
int j; |
309 |
|
|
|
310 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
311 |
|
|
for (j=0;j<16;++j) { |
312 |
|
|
V = Htable[j]; |
313 |
|
|
Htable[j].hi = V.lo; |
314 |
|
|
Htable[j].lo = V.hi; |
315 |
|
|
} |
316 |
|
|
else |
317 |
|
|
for (j=0;j<16;++j) { |
318 |
|
|
V = Htable[j]; |
319 |
|
|
Htable[j].hi = V.lo<<32|V.lo>>32; |
320 |
|
|
Htable[j].lo = V.hi<<32|V.hi>>32; |
321 |
|
|
} |
322 |
|
|
} |
323 |
|
|
#endif |
324 |
|
|
} |
325 |
|
|
|
326 |
|
|
#ifndef GHASH_ASM |
327 |
|
|
static const size_t rem_4bit[16] = { |
328 |
|
|
PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), |
329 |
|
|
PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), |
330 |
|
|
PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), |
331 |
|
|
PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) }; |
332 |
|
|
|
333 |
|
|
static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) |
334 |
|
|
{ |
335 |
|
|
u128 Z; |
336 |
|
|
int cnt = 15; |
337 |
|
|
size_t rem, nlo, nhi; |
338 |
|
|
|
339 |
|
|
nlo = ((const u8 *)Xi)[15]; |
340 |
|
|
nhi = nlo>>4; |
341 |
|
|
nlo &= 0xf; |
342 |
|
|
|
343 |
|
|
Z.hi = Htable[nlo].hi; |
344 |
|
|
Z.lo = Htable[nlo].lo; |
345 |
|
|
|
346 |
|
|
while (1) { |
347 |
|
|
rem = (size_t)Z.lo&0xf; |
348 |
|
|
Z.lo = (Z.hi<<60)|(Z.lo>>4); |
349 |
|
|
Z.hi = (Z.hi>>4); |
350 |
|
|
if (sizeof(size_t)==8) |
351 |
|
|
Z.hi ^= rem_4bit[rem]; |
352 |
|
|
else |
353 |
|
|
Z.hi ^= (u64)rem_4bit[rem]<<32; |
354 |
|
|
|
355 |
|
|
Z.hi ^= Htable[nhi].hi; |
356 |
|
|
Z.lo ^= Htable[nhi].lo; |
357 |
|
|
|
358 |
|
|
if (--cnt<0) break; |
359 |
|
|
|
360 |
|
|
nlo = ((const u8 *)Xi)[cnt]; |
361 |
|
|
nhi = nlo>>4; |
362 |
|
|
nlo &= 0xf; |
363 |
|
|
|
364 |
|
|
rem = (size_t)Z.lo&0xf; |
365 |
|
|
Z.lo = (Z.hi<<60)|(Z.lo>>4); |
366 |
|
|
Z.hi = (Z.hi>>4); |
367 |
|
|
if (sizeof(size_t)==8) |
368 |
|
|
Z.hi ^= rem_4bit[rem]; |
369 |
|
|
else |
370 |
|
|
Z.hi ^= (u64)rem_4bit[rem]<<32; |
371 |
|
|
|
372 |
|
|
Z.hi ^= Htable[nlo].hi; |
373 |
|
|
Z.lo ^= Htable[nlo].lo; |
374 |
|
|
} |
375 |
|
|
|
376 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) { |
377 |
|
|
#ifdef BSWAP8 |
378 |
|
|
Xi[0] = BSWAP8(Z.hi); |
379 |
|
|
Xi[1] = BSWAP8(Z.lo); |
380 |
|
|
#else |
381 |
|
|
u8 *p = (u8 *)Xi; |
382 |
|
|
u32 v; |
383 |
|
|
v = (u32)(Z.hi>>32); PUTU32(p,v); |
384 |
|
|
v = (u32)(Z.hi); PUTU32(p+4,v); |
385 |
|
|
v = (u32)(Z.lo>>32); PUTU32(p+8,v); |
386 |
|
|
v = (u32)(Z.lo); PUTU32(p+12,v); |
387 |
|
|
#endif |
388 |
|
|
} |
389 |
|
|
else { |
390 |
|
|
Xi[0] = Z.hi; |
391 |
|
|
Xi[1] = Z.lo; |
392 |
|
|
} |
393 |
|
|
} |
394 |
|
|
|
395 |
|
|
#if !defined(OPENSSL_SMALL_FOOTPRINT) |
396 |
|
|
/* |
397 |
|
|
* Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for |
398 |
|
|
* details... Compiler-generated code doesn't seem to give any |
399 |
|
|
* performance improvement, at least not on x86[_64]. It's here |
400 |
|
|
* mostly as reference and a placeholder for possible future |
401 |
|
|
* non-trivial optimization[s]... |
402 |
|
|
*/ |
403 |
|
|
static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16], |
404 |
|
|
const u8 *inp,size_t len) |
405 |
|
|
{ |
406 |
|
|
u128 Z; |
407 |
|
|
int cnt; |
408 |
|
|
size_t rem, nlo, nhi; |
409 |
|
|
|
410 |
|
|
#if 1 |
411 |
|
|
do { |
412 |
|
|
cnt = 15; |
413 |
|
|
nlo = ((const u8 *)Xi)[15]; |
414 |
|
|
nlo ^= inp[15]; |
415 |
|
|
nhi = nlo>>4; |
416 |
|
|
nlo &= 0xf; |
417 |
|
|
|
418 |
|
|
Z.hi = Htable[nlo].hi; |
419 |
|
|
Z.lo = Htable[nlo].lo; |
420 |
|
|
|
421 |
|
|
while (1) { |
422 |
|
|
rem = (size_t)Z.lo&0xf; |
423 |
|
|
Z.lo = (Z.hi<<60)|(Z.lo>>4); |
424 |
|
|
Z.hi = (Z.hi>>4); |
425 |
|
|
if (sizeof(size_t)==8) |
426 |
|
|
Z.hi ^= rem_4bit[rem]; |
427 |
|
|
else |
428 |
|
|
Z.hi ^= (u64)rem_4bit[rem]<<32; |
429 |
|
|
|
430 |
|
|
Z.hi ^= Htable[nhi].hi; |
431 |
|
|
Z.lo ^= Htable[nhi].lo; |
432 |
|
|
|
433 |
|
|
if (--cnt<0) break; |
434 |
|
|
|
435 |
|
|
nlo = ((const u8 *)Xi)[cnt]; |
436 |
|
|
nlo ^= inp[cnt]; |
437 |
|
|
nhi = nlo>>4; |
438 |
|
|
nlo &= 0xf; |
439 |
|
|
|
440 |
|
|
rem = (size_t)Z.lo&0xf; |
441 |
|
|
Z.lo = (Z.hi<<60)|(Z.lo>>4); |
442 |
|
|
Z.hi = (Z.hi>>4); |
443 |
|
|
if (sizeof(size_t)==8) |
444 |
|
|
Z.hi ^= rem_4bit[rem]; |
445 |
|
|
else |
446 |
|
|
Z.hi ^= (u64)rem_4bit[rem]<<32; |
447 |
|
|
|
448 |
|
|
Z.hi ^= Htable[nlo].hi; |
449 |
|
|
Z.lo ^= Htable[nlo].lo; |
450 |
|
|
} |
451 |
|
|
#else |
452 |
|
|
/* |
453 |
|
|
* Extra 256+16 bytes per-key plus 512 bytes shared tables |
454 |
|
|
* [should] give ~50% improvement... One could have PACK()-ed |
455 |
|
|
* the rem_8bit even here, but the priority is to minimize |
456 |
|
|
* cache footprint... |
457 |
|
|
*/ |
458 |
|
|
u128 Hshr4[16]; /* Htable shifted right by 4 bits */ |
459 |
|
|
u8 Hshl4[16]; /* Htable shifted left by 4 bits */ |
460 |
|
|
static const unsigned short rem_8bit[256] = { |
461 |
|
|
0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E, |
462 |
|
|
0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E, |
463 |
|
|
0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E, |
464 |
|
|
0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E, |
465 |
|
|
0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E, |
466 |
|
|
0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E, |
467 |
|
|
0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E, |
468 |
|
|
0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E, |
469 |
|
|
0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE, |
470 |
|
|
0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE, |
471 |
|
|
0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE, |
472 |
|
|
0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE, |
473 |
|
|
0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E, |
474 |
|
|
0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E, |
475 |
|
|
0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE, |
476 |
|
|
0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE, |
477 |
|
|
0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E, |
478 |
|
|
0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E, |
479 |
|
|
0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E, |
480 |
|
|
0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E, |
481 |
|
|
0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E, |
482 |
|
|
0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E, |
483 |
|
|
0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E, |
484 |
|
|
0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E, |
485 |
|
|
0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE, |
486 |
|
|
0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE, |
487 |
|
|
0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE, |
488 |
|
|
0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE, |
489 |
|
|
0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E, |
490 |
|
|
0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E, |
491 |
|
|
0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE, |
492 |
|
|
0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE }; |
493 |
|
|
/* |
494 |
|
|
* This pre-processing phase slows down procedure by approximately |
495 |
|
|
* same time as it makes each loop spin faster. In other words |
496 |
|
|
* single block performance is approximately same as straightforward |
497 |
|
|
* "4-bit" implementation, and then it goes only faster... |
498 |
|
|
*/ |
499 |
|
|
for (cnt=0; cnt<16; ++cnt) { |
500 |
|
|
Z.hi = Htable[cnt].hi; |
501 |
|
|
Z.lo = Htable[cnt].lo; |
502 |
|
|
Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4); |
503 |
|
|
Hshr4[cnt].hi = (Z.hi>>4); |
504 |
|
|
Hshl4[cnt] = (u8)(Z.lo<<4); |
505 |
|
|
} |
506 |
|
|
|
507 |
|
|
do { |
508 |
|
|
for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) { |
509 |
|
|
nlo = ((const u8 *)Xi)[cnt]; |
510 |
|
|
nlo ^= inp[cnt]; |
511 |
|
|
nhi = nlo>>4; |
512 |
|
|
nlo &= 0xf; |
513 |
|
|
|
514 |
|
|
Z.hi ^= Htable[nlo].hi; |
515 |
|
|
Z.lo ^= Htable[nlo].lo; |
516 |
|
|
|
517 |
|
|
rem = (size_t)Z.lo&0xff; |
518 |
|
|
|
519 |
|
|
Z.lo = (Z.hi<<56)|(Z.lo>>8); |
520 |
|
|
Z.hi = (Z.hi>>8); |
521 |
|
|
|
522 |
|
|
Z.hi ^= Hshr4[nhi].hi; |
523 |
|
|
Z.lo ^= Hshr4[nhi].lo; |
524 |
|
|
Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48; |
525 |
|
|
} |
526 |
|
|
|
527 |
|
|
nlo = ((const u8 *)Xi)[0]; |
528 |
|
|
nlo ^= inp[0]; |
529 |
|
|
nhi = nlo>>4; |
530 |
|
|
nlo &= 0xf; |
531 |
|
|
|
532 |
|
|
Z.hi ^= Htable[nlo].hi; |
533 |
|
|
Z.lo ^= Htable[nlo].lo; |
534 |
|
|
|
535 |
|
|
rem = (size_t)Z.lo&0xf; |
536 |
|
|
|
537 |
|
|
Z.lo = (Z.hi<<60)|(Z.lo>>4); |
538 |
|
|
Z.hi = (Z.hi>>4); |
539 |
|
|
|
540 |
|
|
Z.hi ^= Htable[nhi].hi; |
541 |
|
|
Z.lo ^= Htable[nhi].lo; |
542 |
|
|
Z.hi ^= ((u64)rem_8bit[rem<<4])<<48; |
543 |
|
|
#endif |
544 |
|
|
|
545 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) { |
546 |
|
|
#ifdef BSWAP8 |
547 |
|
|
Xi[0] = BSWAP8(Z.hi); |
548 |
|
|
Xi[1] = BSWAP8(Z.lo); |
549 |
|
|
#else |
550 |
|
|
u8 *p = (u8 *)Xi; |
551 |
|
|
u32 v; |
552 |
|
|
v = (u32)(Z.hi>>32); PUTU32(p,v); |
553 |
|
|
v = (u32)(Z.hi); PUTU32(p+4,v); |
554 |
|
|
v = (u32)(Z.lo>>32); PUTU32(p+8,v); |
555 |
|
|
v = (u32)(Z.lo); PUTU32(p+12,v); |
556 |
|
|
#endif |
557 |
|
|
} |
558 |
|
|
else { |
559 |
|
|
Xi[0] = Z.hi; |
560 |
|
|
Xi[1] = Z.lo; |
561 |
|
|
} |
562 |
|
|
} while (inp+=16, len-=16); |
563 |
|
|
} |
564 |
|
|
#endif |
565 |
|
|
#else |
566 |
|
|
void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]); |
567 |
|
|
void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); |
568 |
|
|
#endif |
569 |
|
|
|
570 |
|
|
#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) |
571 |
|
|
#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT) |
572 |
|
|
#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len) |
573 |
|
|
/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache |
574 |
|
|
* trashing effect. In other words idea is to hash data while it's |
575 |
|
|
* still in L1 cache after encryption pass... */ |
576 |
|
|
#define GHASH_CHUNK (3*1024) |
577 |
|
|
#endif |
578 |
|
|
|
579 |
|
|
#else /* TABLE_BITS */ |
580 |
|
|
|
581 |
|
|
static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2]) |
582 |
|
|
{ |
583 |
|
|
u128 V,Z = { 0,0 }; |
584 |
|
|
long X; |
585 |
|
|
int i,j; |
586 |
|
|
const long *xi = (const long *)Xi; |
587 |
|
|
|
588 |
|
|
V.hi = H[0]; /* H is in host byte order, no byte swapping */ |
589 |
|
|
V.lo = H[1]; |
590 |
|
|
|
591 |
|
|
for (j=0; j<16/sizeof(long); ++j) { |
592 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) { |
593 |
|
|
if (sizeof(long)==8) { |
594 |
|
|
#ifdef BSWAP8 |
595 |
|
|
X = (long)(BSWAP8(xi[j])); |
596 |
|
|
#else |
597 |
|
|
const u8 *p = (const u8 *)(xi+j); |
598 |
|
|
X = (long)((u64)GETU32(p)<<32|GETU32(p+4)); |
599 |
|
|
#endif |
600 |
|
|
} |
601 |
|
|
else { |
602 |
|
|
const u8 *p = (const u8 *)(xi+j); |
603 |
|
|
X = (long)GETU32(p); |
604 |
|
|
} |
605 |
|
|
} |
606 |
|
|
else |
607 |
|
|
X = xi[j]; |
608 |
|
|
|
609 |
|
|
for (i=0; i<8*sizeof(long); ++i, X<<=1) { |
610 |
|
|
u64 M = (u64)(X>>(8*sizeof(long)-1)); |
611 |
|
|
Z.hi ^= V.hi&M; |
612 |
|
|
Z.lo ^= V.lo&M; |
613 |
|
|
|
614 |
|
|
REDUCE1BIT(V); |
615 |
|
|
} |
616 |
|
|
} |
617 |
|
|
|
618 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) { |
619 |
|
|
#ifdef BSWAP8 |
620 |
|
|
Xi[0] = BSWAP8(Z.hi); |
621 |
|
|
Xi[1] = BSWAP8(Z.lo); |
622 |
|
|
#else |
623 |
|
|
u8 *p = (u8 *)Xi; |
624 |
|
|
u32 v; |
625 |
|
|
v = (u32)(Z.hi>>32); PUTU32(p,v); |
626 |
|
|
v = (u32)(Z.hi); PUTU32(p+4,v); |
627 |
|
|
v = (u32)(Z.lo>>32); PUTU32(p+8,v); |
628 |
|
|
v = (u32)(Z.lo); PUTU32(p+12,v); |
629 |
|
|
#endif |
630 |
|
|
} |
631 |
|
|
else { |
632 |
|
|
Xi[0] = Z.hi; |
633 |
|
|
Xi[1] = Z.lo; |
634 |
|
|
} |
635 |
|
|
} |
636 |
|
|
#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u) |
637 |
|
|
|
638 |
|
|
#endif |
639 |
|
|
|
640 |
|
|
#if TABLE_BITS==4 && defined(GHASH_ASM) |
641 |
|
|
# if !defined(I386_ONLY) && \ |
642 |
|
|
(defined(__i386) || defined(__i386__) || \ |
643 |
|
|
defined(__x86_64) || defined(__x86_64__) || \ |
644 |
|
|
defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) |
645 |
|
|
# define GHASH_ASM_X86_OR_64 |
646 |
|
|
# define GCM_FUNCREF_4BIT |
647 |
|
|
extern unsigned int OPENSSL_ia32cap_P[2]; |
648 |
|
|
|
649 |
|
|
void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]); |
650 |
|
|
void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]); |
651 |
|
|
void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); |
652 |
|
|
|
653 |
|
|
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) |
654 |
|
|
# define GHASH_ASM_X86 |
655 |
|
|
void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]); |
656 |
|
|
void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); |
657 |
|
|
|
658 |
|
|
void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]); |
659 |
|
|
void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); |
660 |
|
|
# endif |
661 |
|
|
# elif defined(__arm__) || defined(__arm) |
662 |
|
|
# include "arm_arch.h" |
663 |
|
|
# if __ARM_ARCH__>=7 |
664 |
|
|
# define GHASH_ASM_ARM |
665 |
|
|
# define GCM_FUNCREF_4BIT |
666 |
|
|
void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]); |
667 |
|
|
void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); |
668 |
|
|
# endif |
669 |
|
|
# endif |
670 |
|
|
#endif |
671 |
|
|
|
672 |
|
|
#ifdef GCM_FUNCREF_4BIT |
673 |
|
|
# undef GCM_MUL |
674 |
|
|
# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable) |
675 |
|
|
# ifdef GHASH |
676 |
|
|
# undef GHASH |
677 |
|
|
# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len) |
678 |
|
|
# endif |
679 |
|
|
#endif |
680 |
|
|
|
681 |
|
|
void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) |
682 |
|
28 |
{ |
683 |
|
28 |
memset(ctx,0,sizeof(*ctx)); |
684 |
|
28 |
ctx->block = block; |
685 |
|
28 |
ctx->key = key; |
686 |
|
|
|
687 |
|
28 |
(*block)(ctx->H.c,ctx->H.c,key); |
688 |
|
|
|
689 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) { |
690 |
|
|
/* H is stored in host byte order */ |
691 |
|
|
#ifdef BSWAP8 |
692 |
|
28 |
ctx->H.u[0] = BSWAP8(ctx->H.u[0]); |
693 |
|
28 |
ctx->H.u[1] = BSWAP8(ctx->H.u[1]); |
694 |
|
|
#else |
695 |
|
|
u8 *p = ctx->H.c; |
696 |
|
|
u64 hi,lo; |
697 |
|
|
hi = (u64)GETU32(p) <<32|GETU32(p+4); |
698 |
|
|
lo = (u64)GETU32(p+8)<<32|GETU32(p+12); |
699 |
|
|
ctx->H.u[0] = hi; |
700 |
|
|
ctx->H.u[1] = lo; |
701 |
|
|
#endif |
702 |
|
|
} |
703 |
|
|
|
704 |
|
|
#if TABLE_BITS==8 |
705 |
|
|
gcm_init_8bit(ctx->Htable,ctx->H.u); |
706 |
|
|
#elif TABLE_BITS==4 |
707 |
|
|
# if defined(GHASH_ASM_X86_OR_64) |
708 |
|
|
# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2) |
709 |
✓✗✓✗
|
28 |
if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */ |
710 |
|
|
OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */ |
711 |
|
28 |
gcm_init_clmul(ctx->Htable,ctx->H.u); |
712 |
|
28 |
ctx->gmult = gcm_gmult_clmul; |
713 |
|
28 |
ctx->ghash = gcm_ghash_clmul; |
714 |
|
28 |
return; |
715 |
|
|
} |
716 |
|
|
# endif |
717 |
|
|
gcm_init_4bit(ctx->Htable,ctx->H.u); |
718 |
|
|
# if defined(GHASH_ASM_X86) /* x86 only */ |
719 |
|
|
# if defined(OPENSSL_IA32_SSE2) |
720 |
|
|
if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */ |
721 |
|
|
# else |
722 |
|
|
if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */ |
723 |
|
|
# endif |
724 |
|
|
ctx->gmult = gcm_gmult_4bit_mmx; |
725 |
|
|
ctx->ghash = gcm_ghash_4bit_mmx; |
726 |
|
|
} else { |
727 |
|
|
ctx->gmult = gcm_gmult_4bit_x86; |
728 |
|
|
ctx->ghash = gcm_ghash_4bit_x86; |
729 |
|
|
} |
730 |
|
|
# else |
731 |
|
|
ctx->gmult = gcm_gmult_4bit; |
732 |
|
|
ctx->ghash = gcm_ghash_4bit; |
733 |
|
|
# endif |
734 |
|
|
# elif defined(GHASH_ASM_ARM) |
735 |
|
|
if (OPENSSL_armcap_P & ARMV7_NEON) { |
736 |
|
|
ctx->gmult = gcm_gmult_neon; |
737 |
|
|
ctx->ghash = gcm_ghash_neon; |
738 |
|
|
} else { |
739 |
|
|
gcm_init_4bit(ctx->Htable,ctx->H.u); |
740 |
|
|
ctx->gmult = gcm_gmult_4bit; |
741 |
|
|
ctx->ghash = gcm_ghash_4bit; |
742 |
|
|
} |
743 |
|
|
# else |
744 |
|
|
gcm_init_4bit(ctx->Htable,ctx->H.u); |
745 |
|
|
# endif |
746 |
|
|
#endif |
747 |
|
|
} |
748 |
|
|
|
749 |
|
|
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len) |
750 |
|
56 |
{ |
751 |
|
|
unsigned int ctr; |
752 |
|
|
#ifdef GCM_FUNCREF_4BIT |
753 |
|
56 |
void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; |
754 |
|
|
#endif |
755 |
|
|
|
756 |
|
56 |
ctx->Yi.u[0] = 0; |
757 |
|
56 |
ctx->Yi.u[1] = 0; |
758 |
|
56 |
ctx->Xi.u[0] = 0; |
759 |
|
56 |
ctx->Xi.u[1] = 0; |
760 |
|
56 |
ctx->len.u[0] = 0; /* AAD length */ |
761 |
|
56 |
ctx->len.u[1] = 0; /* message length */ |
762 |
|
56 |
ctx->ares = 0; |
763 |
|
56 |
ctx->mres = 0; |
764 |
|
|
|
765 |
✓✓ |
56 |
if (len==12) { |
766 |
|
42 |
memcpy(ctx->Yi.c,iv,12); |
767 |
|
42 |
ctx->Yi.c[15]=1; |
768 |
|
42 |
ctr=1; |
769 |
|
|
} |
770 |
|
|
else { |
771 |
|
|
size_t i; |
772 |
|
14 |
u64 len0 = len; |
773 |
|
|
|
774 |
✓✓ |
54 |
while (len>=16) { |
775 |
✓✓ |
26 |
for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i]; |
776 |
|
26 |
GCM_MUL(ctx,Yi); |
777 |
|
26 |
iv += 16; |
778 |
|
26 |
len -= 16; |
779 |
|
|
} |
780 |
✓✓ |
14 |
if (len) { |
781 |
✓✓ |
12 |
for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i]; |
782 |
|
12 |
GCM_MUL(ctx,Yi); |
783 |
|
|
} |
784 |
|
14 |
len0 <<= 3; |
785 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) { |
786 |
|
|
#ifdef BSWAP8 |
787 |
|
14 |
ctx->Yi.u[1] ^= BSWAP8(len0); |
788 |
|
|
#else |
789 |
|
|
ctx->Yi.c[8] ^= (u8)(len0>>56); |
790 |
|
|
ctx->Yi.c[9] ^= (u8)(len0>>48); |
791 |
|
|
ctx->Yi.c[10] ^= (u8)(len0>>40); |
792 |
|
|
ctx->Yi.c[11] ^= (u8)(len0>>32); |
793 |
|
|
ctx->Yi.c[12] ^= (u8)(len0>>24); |
794 |
|
|
ctx->Yi.c[13] ^= (u8)(len0>>16); |
795 |
|
|
ctx->Yi.c[14] ^= (u8)(len0>>8); |
796 |
|
|
ctx->Yi.c[15] ^= (u8)(len0); |
797 |
|
|
#endif |
798 |
|
|
} |
799 |
|
|
else |
800 |
|
|
ctx->Yi.u[1] ^= len0; |
801 |
|
|
|
802 |
|
14 |
GCM_MUL(ctx,Yi); |
803 |
|
|
|
804 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
805 |
|
|
#ifdef BSWAP4 |
806 |
|
14 |
ctr = BSWAP4(ctx->Yi.d[3]); |
807 |
|
|
#else |
808 |
|
|
ctr = GETU32(ctx->Yi.c+12); |
809 |
|
|
#endif |
810 |
|
|
else |
811 |
|
|
ctr = ctx->Yi.d[3]; |
812 |
|
|
} |
813 |
|
|
|
814 |
|
56 |
(*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key); |
815 |
|
56 |
++ctr; |
816 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
817 |
|
|
#ifdef BSWAP4 |
818 |
|
56 |
ctx->Yi.d[3] = BSWAP4(ctr); |
819 |
|
|
#else |
820 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
821 |
|
|
#endif |
822 |
|
|
else |
823 |
|
|
ctx->Yi.d[3] = ctr; |
824 |
|
56 |
} |
825 |
|
|
|
826 |
|
|
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len) |
827 |
|
36 |
{ |
828 |
|
|
size_t i; |
829 |
|
|
unsigned int n; |
830 |
|
36 |
u64 alen = ctx->len.u[0]; |
831 |
|
|
#ifdef GCM_FUNCREF_4BIT |
832 |
|
36 |
void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; |
833 |
|
|
# ifdef GHASH |
834 |
|
|
void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], |
835 |
|
36 |
const u8 *inp,size_t len) = ctx->ghash; |
836 |
|
|
# endif |
837 |
|
|
#endif |
838 |
|
|
|
839 |
✗✓ |
36 |
if (ctx->len.u[1]) return -2; |
840 |
|
|
|
841 |
|
36 |
alen += len; |
842 |
✗✓ |
36 |
if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len)) |
843 |
|
|
return -1; |
844 |
|
36 |
ctx->len.u[0] = alen; |
845 |
|
|
|
846 |
|
36 |
n = ctx->ares; |
847 |
✗✓ |
36 |
if (n) { |
848 |
|
|
while (n && len) { |
849 |
|
|
ctx->Xi.c[n] ^= *(aad++); |
850 |
|
|
--len; |
851 |
|
|
n = (n+1)%16; |
852 |
|
|
} |
853 |
|
|
if (n==0) GCM_MUL(ctx,Xi); |
854 |
|
|
else { |
855 |
|
|
ctx->ares = n; |
856 |
|
|
return 0; |
857 |
|
|
} |
858 |
|
|
} |
859 |
|
|
|
860 |
|
|
#ifdef GHASH |
861 |
✓✓ |
36 |
if ((i = (len&(size_t)-16))) { |
862 |
|
32 |
GHASH(ctx,aad,i); |
863 |
|
32 |
aad += i; |
864 |
|
32 |
len -= i; |
865 |
|
|
} |
866 |
|
|
#else |
867 |
|
|
while (len>=16) { |
868 |
|
|
for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i]; |
869 |
|
|
GCM_MUL(ctx,Xi); |
870 |
|
|
aad += 16; |
871 |
|
|
len -= 16; |
872 |
|
|
} |
873 |
|
|
#endif |
874 |
✓✓ |
36 |
if (len) { |
875 |
|
34 |
n = (unsigned int)len; |
876 |
✓✓ |
34 |
for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i]; |
877 |
|
|
} |
878 |
|
|
|
879 |
|
36 |
ctx->ares = n; |
880 |
|
36 |
return 0; |
881 |
|
|
} |
882 |
|
|
|
883 |
|
|
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, |
884 |
|
|
const unsigned char *in, unsigned char *out, |
885 |
|
|
size_t len) |
886 |
|
16 |
{ |
887 |
|
|
unsigned int n, ctr; |
888 |
|
|
size_t i; |
889 |
|
16 |
u64 mlen = ctx->len.u[1]; |
890 |
|
16 |
block128_f block = ctx->block; |
891 |
|
16 |
void *key = ctx->key; |
892 |
|
|
#ifdef GCM_FUNCREF_4BIT |
893 |
|
16 |
void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; |
894 |
|
|
# ifdef GHASH |
895 |
|
|
void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], |
896 |
|
16 |
const u8 *inp,size_t len) = ctx->ghash; |
897 |
|
|
# endif |
898 |
|
|
#endif |
899 |
|
|
|
900 |
|
16 |
mlen += len; |
901 |
✗✓ |
16 |
if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len)) |
902 |
|
|
return -1; |
903 |
|
16 |
ctx->len.u[1] = mlen; |
904 |
|
|
|
905 |
✓✓ |
16 |
if (ctx->ares) { |
906 |
|
|
/* First call to encrypt finalizes GHASH(AAD) */ |
907 |
|
9 |
GCM_MUL(ctx,Xi); |
908 |
|
9 |
ctx->ares = 0; |
909 |
|
|
} |
910 |
|
|
|
911 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
912 |
|
|
#ifdef BSWAP4 |
913 |
|
16 |
ctr = BSWAP4(ctx->Yi.d[3]); |
914 |
|
|
#else |
915 |
|
|
ctr = GETU32(ctx->Yi.c+12); |
916 |
|
|
#endif |
917 |
|
|
else |
918 |
|
|
ctr = ctx->Yi.d[3]; |
919 |
|
|
|
920 |
|
16 |
n = ctx->mres; |
921 |
|
|
#if !defined(OPENSSL_SMALL_FOOTPRINT) |
922 |
|
|
if (16%sizeof(size_t) == 0) do { /* always true actually */ |
923 |
✗✓ |
16 |
if (n) { |
924 |
|
|
while (n && len) { |
925 |
|
|
ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n]; |
926 |
|
|
--len; |
927 |
|
|
n = (n+1)%16; |
928 |
|
|
} |
929 |
|
|
if (n==0) GCM_MUL(ctx,Xi); |
930 |
|
|
else { |
931 |
|
|
ctx->mres = n; |
932 |
|
|
return 0; |
933 |
|
|
} |
934 |
|
|
} |
935 |
|
|
#ifdef __STRICT_ALIGNMENT |
936 |
|
|
if (((size_t)in|(size_t)out)%sizeof(size_t) != 0) |
937 |
|
|
break; |
938 |
|
|
#endif |
939 |
|
|
#if defined(GHASH) && defined(GHASH_CHUNK) |
940 |
✗✓ |
16 |
while (len>=GHASH_CHUNK) { |
941 |
|
|
size_t j=GHASH_CHUNK; |
942 |
|
|
|
943 |
|
|
while (j) { |
944 |
|
|
size_t *out_t=(size_t *)out; |
945 |
|
|
const size_t *in_t=(const size_t *)in; |
946 |
|
|
|
947 |
|
|
(*block)(ctx->Yi.c,ctx->EKi.c,key); |
948 |
|
|
++ctr; |
949 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
950 |
|
|
#ifdef BSWAP4 |
951 |
|
|
ctx->Yi.d[3] = BSWAP4(ctr); |
952 |
|
|
#else |
953 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
954 |
|
|
#endif |
955 |
|
|
else |
956 |
|
|
ctx->Yi.d[3] = ctr; |
957 |
|
|
for (i=0; i<16/sizeof(size_t); ++i) |
958 |
|
|
out_t[i] = in_t[i] ^ ctx->EKi.t[i]; |
959 |
|
|
out += 16; |
960 |
|
|
in += 16; |
961 |
|
|
j -= 16; |
962 |
|
|
} |
963 |
|
|
GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK); |
964 |
|
|
len -= GHASH_CHUNK; |
965 |
|
|
} |
966 |
✓✗ |
16 |
if ((i = (len&(size_t)-16))) { |
967 |
|
16 |
size_t j=i; |
968 |
|
|
|
969 |
✓✓ |
92 |
while (len>=16) { |
970 |
|
60 |
size_t *out_t=(size_t *)out; |
971 |
|
60 |
const size_t *in_t=(const size_t *)in; |
972 |
|
|
|
973 |
|
60 |
(*block)(ctx->Yi.c,ctx->EKi.c,key); |
974 |
|
60 |
++ctr; |
975 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
976 |
|
|
#ifdef BSWAP4 |
977 |
|
60 |
ctx->Yi.d[3] = BSWAP4(ctr); |
978 |
|
|
#else |
979 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
980 |
|
|
#endif |
981 |
|
|
else |
982 |
|
|
ctx->Yi.d[3] = ctr; |
983 |
✓✓ |
180 |
for (i=0; i<16/sizeof(size_t); ++i) |
984 |
|
120 |
out_t[i] = in_t[i] ^ ctx->EKi.t[i]; |
985 |
|
60 |
out += 16; |
986 |
|
60 |
in += 16; |
987 |
|
60 |
len -= 16; |
988 |
|
|
} |
989 |
|
16 |
GHASH(ctx,out-j,j); |
990 |
|
|
} |
991 |
|
|
#else |
992 |
|
|
while (len>=16) { |
993 |
|
|
size_t *out_t=(size_t *)out; |
994 |
|
|
const size_t *in_t=(const size_t *)in; |
995 |
|
|
|
996 |
|
|
(*block)(ctx->Yi.c,ctx->EKi.c,key); |
997 |
|
|
++ctr; |
998 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
999 |
|
|
#ifdef BSWAP4 |
1000 |
|
|
ctx->Yi.d[3] = BSWAP4(ctr); |
1001 |
|
|
#else |
1002 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
1003 |
|
|
#endif |
1004 |
|
|
else |
1005 |
|
|
ctx->Yi.d[3] = ctr; |
1006 |
|
|
for (i=0; i<16/sizeof(size_t); ++i) |
1007 |
|
|
ctx->Xi.t[i] ^= |
1008 |
|
|
out_t[i] = in_t[i]^ctx->EKi.t[i]; |
1009 |
|
|
GCM_MUL(ctx,Xi); |
1010 |
|
|
out += 16; |
1011 |
|
|
in += 16; |
1012 |
|
|
len -= 16; |
1013 |
|
|
} |
1014 |
|
|
#endif |
1015 |
✓✓ |
16 |
if (len) { |
1016 |
|
9 |
(*block)(ctx->Yi.c,ctx->EKi.c,key); |
1017 |
|
9 |
++ctr; |
1018 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
1019 |
|
|
#ifdef BSWAP4 |
1020 |
|
9 |
ctx->Yi.d[3] = BSWAP4(ctr); |
1021 |
|
|
#else |
1022 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
1023 |
|
|
#endif |
1024 |
|
|
else |
1025 |
|
|
ctx->Yi.d[3] = ctr; |
1026 |
✓✓ |
126 |
while (len--) { |
1027 |
|
108 |
ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n]; |
1028 |
|
108 |
++n; |
1029 |
|
|
} |
1030 |
|
|
} |
1031 |
|
|
|
1032 |
|
16 |
ctx->mres = n; |
1033 |
|
16 |
return 0; |
1034 |
|
|
} while(0); |
1035 |
|
|
#endif |
1036 |
|
|
for (i=0;i<len;++i) { |
1037 |
|
|
if (n==0) { |
1038 |
|
|
(*block)(ctx->Yi.c,ctx->EKi.c,key); |
1039 |
|
|
++ctr; |
1040 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
1041 |
|
|
#ifdef BSWAP4 |
1042 |
|
|
ctx->Yi.d[3] = BSWAP4(ctr); |
1043 |
|
|
#else |
1044 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
1045 |
|
|
#endif |
1046 |
|
|
else |
1047 |
|
|
ctx->Yi.d[3] = ctr; |
1048 |
|
|
} |
1049 |
|
|
ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n]; |
1050 |
|
|
n = (n+1)%16; |
1051 |
|
|
if (n==0) |
1052 |
|
|
GCM_MUL(ctx,Xi); |
1053 |
|
|
} |
1054 |
|
|
|
1055 |
|
|
ctx->mres = n; |
1056 |
|
|
return 0; |
1057 |
|
|
} |
1058 |
|
|
|
1059 |
|
|
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, |
1060 |
|
|
const unsigned char *in, unsigned char *out, |
1061 |
|
|
size_t len) |
1062 |
|
16 |
{ |
1063 |
|
|
unsigned int n, ctr; |
1064 |
|
|
size_t i; |
1065 |
|
16 |
u64 mlen = ctx->len.u[1]; |
1066 |
|
16 |
block128_f block = ctx->block; |
1067 |
|
16 |
void *key = ctx->key; |
1068 |
|
|
#ifdef GCM_FUNCREF_4BIT |
1069 |
|
16 |
void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; |
1070 |
|
|
# ifdef GHASH |
1071 |
|
|
void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], |
1072 |
|
16 |
const u8 *inp,size_t len) = ctx->ghash; |
1073 |
|
|
# endif |
1074 |
|
|
#endif |
1075 |
|
|
|
1076 |
|
16 |
mlen += len; |
1077 |
✗✓ |
16 |
if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len)) |
1078 |
|
|
return -1; |
1079 |
|
16 |
ctx->len.u[1] = mlen; |
1080 |
|
|
|
1081 |
✓✓ |
16 |
if (ctx->ares) { |
1082 |
|
|
/* First call to decrypt finalizes GHASH(AAD) */ |
1083 |
|
9 |
GCM_MUL(ctx,Xi); |
1084 |
|
9 |
ctx->ares = 0; |
1085 |
|
|
} |
1086 |
|
|
|
1087 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
1088 |
|
|
#ifdef BSWAP4 |
1089 |
|
16 |
ctr = BSWAP4(ctx->Yi.d[3]); |
1090 |
|
|
#else |
1091 |
|
|
ctr = GETU32(ctx->Yi.c+12); |
1092 |
|
|
#endif |
1093 |
|
|
else |
1094 |
|
|
ctr = ctx->Yi.d[3]; |
1095 |
|
|
|
1096 |
|
16 |
n = ctx->mres; |
1097 |
|
|
#if !defined(OPENSSL_SMALL_FOOTPRINT) |
1098 |
|
|
if (16%sizeof(size_t) == 0) do { /* always true actually */ |
1099 |
✗✓ |
16 |
if (n) { |
1100 |
|
|
while (n && len) { |
1101 |
|
|
u8 c = *(in++); |
1102 |
|
|
*(out++) = c^ctx->EKi.c[n]; |
1103 |
|
|
ctx->Xi.c[n] ^= c; |
1104 |
|
|
--len; |
1105 |
|
|
n = (n+1)%16; |
1106 |
|
|
} |
1107 |
|
|
if (n==0) GCM_MUL (ctx,Xi); |
1108 |
|
|
else { |
1109 |
|
|
ctx->mres = n; |
1110 |
|
|
return 0; |
1111 |
|
|
} |
1112 |
|
|
} |
1113 |
|
|
#ifdef __STRICT_ALIGNMENT |
1114 |
|
|
if (((size_t)in|(size_t)out)%sizeof(size_t) != 0) |
1115 |
|
|
break; |
1116 |
|
|
#endif |
1117 |
|
|
#if defined(GHASH) && defined(GHASH_CHUNK) |
1118 |
✗✓ |
16 |
while (len>=GHASH_CHUNK) { |
1119 |
|
|
size_t j=GHASH_CHUNK; |
1120 |
|
|
|
1121 |
|
|
GHASH(ctx,in,GHASH_CHUNK); |
1122 |
|
|
while (j) { |
1123 |
|
|
size_t *out_t=(size_t *)out; |
1124 |
|
|
const size_t *in_t=(const size_t *)in; |
1125 |
|
|
|
1126 |
|
|
(*block)(ctx->Yi.c,ctx->EKi.c,key); |
1127 |
|
|
++ctr; |
1128 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
1129 |
|
|
#ifdef BSWAP4 |
1130 |
|
|
ctx->Yi.d[3] = BSWAP4(ctr); |
1131 |
|
|
#else |
1132 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
1133 |
|
|
#endif |
1134 |
|
|
else |
1135 |
|
|
ctx->Yi.d[3] = ctr; |
1136 |
|
|
for (i=0; i<16/sizeof(size_t); ++i) |
1137 |
|
|
out_t[i] = in_t[i]^ctx->EKi.t[i]; |
1138 |
|
|
out += 16; |
1139 |
|
|
in += 16; |
1140 |
|
|
j -= 16; |
1141 |
|
|
} |
1142 |
|
|
len -= GHASH_CHUNK; |
1143 |
|
|
} |
1144 |
✓✗ |
16 |
if ((i = (len&(size_t)-16))) { |
1145 |
|
16 |
GHASH(ctx,in,i); |
1146 |
✓✓ |
92 |
while (len>=16) { |
1147 |
|
60 |
size_t *out_t=(size_t *)out; |
1148 |
|
60 |
const size_t *in_t=(const size_t *)in; |
1149 |
|
|
|
1150 |
|
60 |
(*block)(ctx->Yi.c,ctx->EKi.c,key); |
1151 |
|
60 |
++ctr; |
1152 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
1153 |
|
|
#ifdef BSWAP4 |
1154 |
|
60 |
ctx->Yi.d[3] = BSWAP4(ctr); |
1155 |
|
|
#else |
1156 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
1157 |
|
|
#endif |
1158 |
|
|
else |
1159 |
|
|
ctx->Yi.d[3] = ctr; |
1160 |
✓✓ |
180 |
for (i=0; i<16/sizeof(size_t); ++i) |
1161 |
|
120 |
out_t[i] = in_t[i]^ctx->EKi.t[i]; |
1162 |
|
60 |
out += 16; |
1163 |
|
60 |
in += 16; |
1164 |
|
60 |
len -= 16; |
1165 |
|
|
} |
1166 |
|
|
} |
1167 |
|
|
#else |
1168 |
|
|
while (len>=16) { |
1169 |
|
|
size_t *out_t=(size_t *)out; |
1170 |
|
|
const size_t *in_t=(const size_t *)in; |
1171 |
|
|
|
1172 |
|
|
(*block)(ctx->Yi.c,ctx->EKi.c,key); |
1173 |
|
|
++ctr; |
1174 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
1175 |
|
|
#ifdef BSWAP4 |
1176 |
|
|
ctx->Yi.d[3] = BSWAP4(ctr); |
1177 |
|
|
#else |
1178 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
1179 |
|
|
#endif |
1180 |
|
|
else |
1181 |
|
|
ctx->Yi.d[3] = ctr; |
1182 |
|
|
for (i=0; i<16/sizeof(size_t); ++i) { |
1183 |
|
|
size_t c = in[i]; |
1184 |
|
|
out[i] = c^ctx->EKi.t[i]; |
1185 |
|
|
ctx->Xi.t[i] ^= c; |
1186 |
|
|
} |
1187 |
|
|
GCM_MUL(ctx,Xi); |
1188 |
|
|
out += 16; |
1189 |
|
|
in += 16; |
1190 |
|
|
len -= 16; |
1191 |
|
|
} |
1192 |
|
|
#endif |
1193 |
✓✓ |
16 |
if (len) { |
1194 |
|
9 |
(*block)(ctx->Yi.c,ctx->EKi.c,key); |
1195 |
|
9 |
++ctr; |
1196 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
1197 |
|
|
#ifdef BSWAP4 |
1198 |
|
9 |
ctx->Yi.d[3] = BSWAP4(ctr); |
1199 |
|
|
#else |
1200 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
1201 |
|
|
#endif |
1202 |
|
|
else |
1203 |
|
|
ctx->Yi.d[3] = ctr; |
1204 |
✓✓ |
126 |
while (len--) { |
1205 |
|
108 |
u8 c = in[n]; |
1206 |
|
108 |
ctx->Xi.c[n] ^= c; |
1207 |
|
108 |
out[n] = c^ctx->EKi.c[n]; |
1208 |
|
108 |
++n; |
1209 |
|
|
} |
1210 |
|
|
} |
1211 |
|
|
|
1212 |
|
16 |
ctx->mres = n; |
1213 |
|
16 |
return 0; |
1214 |
|
|
} while(0); |
1215 |
|
|
#endif |
1216 |
|
|
for (i=0;i<len;++i) { |
1217 |
|
|
u8 c; |
1218 |
|
|
if (n==0) { |
1219 |
|
|
(*block)(ctx->Yi.c,ctx->EKi.c,key); |
1220 |
|
|
++ctr; |
1221 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
1222 |
|
|
#ifdef BSWAP4 |
1223 |
|
|
ctx->Yi.d[3] = BSWAP4(ctr); |
1224 |
|
|
#else |
1225 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
1226 |
|
|
#endif |
1227 |
|
|
else |
1228 |
|
|
ctx->Yi.d[3] = ctr; |
1229 |
|
|
} |
1230 |
|
|
c = in[i]; |
1231 |
|
|
out[i] = c^ctx->EKi.c[n]; |
1232 |
|
|
ctx->Xi.c[n] ^= c; |
1233 |
|
|
n = (n+1)%16; |
1234 |
|
|
if (n==0) |
1235 |
|
|
GCM_MUL(ctx,Xi); |
1236 |
|
|
} |
1237 |
|
|
|
1238 |
|
|
ctx->mres = n; |
1239 |
|
|
return 0; |
1240 |
|
|
} |
1241 |
|
|
|
1242 |
|
|
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, |
1243 |
|
|
const unsigned char *in, unsigned char *out, |
1244 |
|
|
size_t len, ctr128_f stream) |
1245 |
|
6 |
{ |
1246 |
|
|
unsigned int n, ctr; |
1247 |
|
|
size_t i; |
1248 |
|
6 |
u64 mlen = ctx->len.u[1]; |
1249 |
|
6 |
void *key = ctx->key; |
1250 |
|
|
#ifdef GCM_FUNCREF_4BIT |
1251 |
|
6 |
void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; |
1252 |
|
|
# ifdef GHASH |
1253 |
|
|
void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], |
1254 |
|
6 |
const u8 *inp,size_t len) = ctx->ghash; |
1255 |
|
|
# endif |
1256 |
|
|
#endif |
1257 |
|
|
|
1258 |
|
6 |
mlen += len; |
1259 |
✗✓ |
6 |
if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len)) |
1260 |
|
|
return -1; |
1261 |
|
6 |
ctx->len.u[1] = mlen; |
1262 |
|
|
|
1263 |
✓✗ |
6 |
if (ctx->ares) { |
1264 |
|
|
/* First call to encrypt finalizes GHASH(AAD) */ |
1265 |
|
6 |
GCM_MUL(ctx,Xi); |
1266 |
|
6 |
ctx->ares = 0; |
1267 |
|
|
} |
1268 |
|
|
|
1269 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
1270 |
|
|
#ifdef BSWAP4 |
1271 |
|
6 |
ctr = BSWAP4(ctx->Yi.d[3]); |
1272 |
|
|
#else |
1273 |
|
|
ctr = GETU32(ctx->Yi.c+12); |
1274 |
|
|
#endif |
1275 |
|
|
else |
1276 |
|
|
ctr = ctx->Yi.d[3]; |
1277 |
|
|
|
1278 |
|
6 |
n = ctx->mres; |
1279 |
✗✓ |
6 |
if (n) { |
1280 |
|
|
while (n && len) { |
1281 |
|
|
ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n]; |
1282 |
|
|
--len; |
1283 |
|
|
n = (n+1)%16; |
1284 |
|
|
} |
1285 |
|
|
if (n==0) GCM_MUL(ctx,Xi); |
1286 |
|
|
else { |
1287 |
|
|
ctx->mres = n; |
1288 |
|
|
return 0; |
1289 |
|
|
} |
1290 |
|
|
} |
1291 |
|
|
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) |
1292 |
✗✓ |
6 |
while (len>=GHASH_CHUNK) { |
1293 |
|
|
(*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c); |
1294 |
|
|
ctr += GHASH_CHUNK/16; |
1295 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
1296 |
|
|
#ifdef BSWAP4 |
1297 |
|
|
ctx->Yi.d[3] = BSWAP4(ctr); |
1298 |
|
|
#else |
1299 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
1300 |
|
|
#endif |
1301 |
|
|
else |
1302 |
|
|
ctx->Yi.d[3] = ctr; |
1303 |
|
|
GHASH(ctx,out,GHASH_CHUNK); |
1304 |
|
|
out += GHASH_CHUNK; |
1305 |
|
|
in += GHASH_CHUNK; |
1306 |
|
|
len -= GHASH_CHUNK; |
1307 |
|
|
} |
1308 |
|
|
#endif |
1309 |
✓✓ |
6 |
if ((i = (len&(size_t)-16))) { |
1310 |
|
4 |
size_t j=i/16; |
1311 |
|
|
|
1312 |
|
4 |
(*stream)(in,out,j,key,ctx->Yi.c); |
1313 |
|
4 |
ctr += (unsigned int)j; |
1314 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
1315 |
|
|
#ifdef BSWAP4 |
1316 |
|
4 |
ctx->Yi.d[3] = BSWAP4(ctr); |
1317 |
|
|
#else |
1318 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
1319 |
|
|
#endif |
1320 |
|
|
else |
1321 |
|
|
ctx->Yi.d[3] = ctr; |
1322 |
|
4 |
in += i; |
1323 |
|
4 |
len -= i; |
1324 |
|
|
#if defined(GHASH) |
1325 |
|
4 |
GHASH(ctx,out,i); |
1326 |
|
4 |
out += i; |
1327 |
|
|
#else |
1328 |
|
|
while (j--) { |
1329 |
|
|
for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i]; |
1330 |
|
|
GCM_MUL(ctx,Xi); |
1331 |
|
|
out += 16; |
1332 |
|
|
} |
1333 |
|
|
#endif |
1334 |
|
|
} |
1335 |
✓✓ |
6 |
if (len) { |
1336 |
|
2 |
(*ctx->block)(ctx->Yi.c,ctx->EKi.c,key); |
1337 |
|
2 |
++ctr; |
1338 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
1339 |
|
|
#ifdef BSWAP4 |
1340 |
|
2 |
ctx->Yi.d[3] = BSWAP4(ctr); |
1341 |
|
|
#else |
1342 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
1343 |
|
|
#endif |
1344 |
|
|
else |
1345 |
|
|
ctx->Yi.d[3] = ctr; |
1346 |
✓✓ |
34 |
while (len--) { |
1347 |
|
30 |
ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n]; |
1348 |
|
30 |
++n; |
1349 |
|
|
} |
1350 |
|
|
} |
1351 |
|
|
|
1352 |
|
6 |
ctx->mres = n; |
1353 |
|
6 |
return 0; |
1354 |
|
|
} |
1355 |
|
|
|
1356 |
|
|
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, |
1357 |
|
|
const unsigned char *in, unsigned char *out, |
1358 |
|
|
size_t len,ctr128_f stream) |
1359 |
|
10 |
{ |
1360 |
|
|
unsigned int n, ctr; |
1361 |
|
|
size_t i; |
1362 |
|
10 |
u64 mlen = ctx->len.u[1]; |
1363 |
|
10 |
void *key = ctx->key; |
1364 |
|
|
#ifdef GCM_FUNCREF_4BIT |
1365 |
|
10 |
void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; |
1366 |
|
|
# ifdef GHASH |
1367 |
|
|
void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16], |
1368 |
|
10 |
const u8 *inp,size_t len) = ctx->ghash; |
1369 |
|
|
# endif |
1370 |
|
|
#endif |
1371 |
|
|
|
1372 |
|
10 |
mlen += len; |
1373 |
✗✓ |
10 |
if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len)) |
1374 |
|
|
return -1; |
1375 |
|
10 |
ctx->len.u[1] = mlen; |
1376 |
|
|
|
1377 |
✓✗ |
10 |
if (ctx->ares) { |
1378 |
|
|
/* First call to decrypt finalizes GHASH(AAD) */ |
1379 |
|
10 |
GCM_MUL(ctx,Xi); |
1380 |
|
10 |
ctx->ares = 0; |
1381 |
|
|
} |
1382 |
|
|
|
1383 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
1384 |
|
|
#ifdef BSWAP4 |
1385 |
|
10 |
ctr = BSWAP4(ctx->Yi.d[3]); |
1386 |
|
|
#else |
1387 |
|
|
ctr = GETU32(ctx->Yi.c+12); |
1388 |
|
|
#endif |
1389 |
|
|
else |
1390 |
|
|
ctr = ctx->Yi.d[3]; |
1391 |
|
|
|
1392 |
|
10 |
n = ctx->mres; |
1393 |
✗✓ |
10 |
if (n) { |
1394 |
|
|
while (n && len) { |
1395 |
|
|
u8 c = *(in++); |
1396 |
|
|
*(out++) = c^ctx->EKi.c[n]; |
1397 |
|
|
ctx->Xi.c[n] ^= c; |
1398 |
|
|
--len; |
1399 |
|
|
n = (n+1)%16; |
1400 |
|
|
} |
1401 |
|
|
if (n==0) GCM_MUL (ctx,Xi); |
1402 |
|
|
else { |
1403 |
|
|
ctx->mres = n; |
1404 |
|
|
return 0; |
1405 |
|
|
} |
1406 |
|
|
} |
1407 |
|
|
#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT) |
1408 |
✗✓ |
10 |
while (len>=GHASH_CHUNK) { |
1409 |
|
|
GHASH(ctx,in,GHASH_CHUNK); |
1410 |
|
|
(*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c); |
1411 |
|
|
ctr += GHASH_CHUNK/16; |
1412 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
1413 |
|
|
#ifdef BSWAP4 |
1414 |
|
|
ctx->Yi.d[3] = BSWAP4(ctr); |
1415 |
|
|
#else |
1416 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
1417 |
|
|
#endif |
1418 |
|
|
else |
1419 |
|
|
ctx->Yi.d[3] = ctr; |
1420 |
|
|
out += GHASH_CHUNK; |
1421 |
|
|
in += GHASH_CHUNK; |
1422 |
|
|
len -= GHASH_CHUNK; |
1423 |
|
|
} |
1424 |
|
|
#endif |
1425 |
✓✓ |
10 |
if ((i = (len&(size_t)-16))) { |
1426 |
|
6 |
size_t j=i/16; |
1427 |
|
|
|
1428 |
|
|
#if defined(GHASH) |
1429 |
|
6 |
GHASH(ctx,in,i); |
1430 |
|
|
#else |
1431 |
|
|
while (j--) { |
1432 |
|
|
size_t k; |
1433 |
|
|
for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k]; |
1434 |
|
|
GCM_MUL(ctx,Xi); |
1435 |
|
|
in += 16; |
1436 |
|
|
} |
1437 |
|
|
j = i/16; |
1438 |
|
|
in -= i; |
1439 |
|
|
#endif |
1440 |
|
6 |
(*stream)(in,out,j,key,ctx->Yi.c); |
1441 |
|
6 |
ctr += (unsigned int)j; |
1442 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
1443 |
|
|
#ifdef BSWAP4 |
1444 |
|
6 |
ctx->Yi.d[3] = BSWAP4(ctr); |
1445 |
|
|
#else |
1446 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
1447 |
|
|
#endif |
1448 |
|
|
else |
1449 |
|
|
ctx->Yi.d[3] = ctr; |
1450 |
|
6 |
out += i; |
1451 |
|
6 |
in += i; |
1452 |
|
6 |
len -= i; |
1453 |
|
|
} |
1454 |
✓✓ |
10 |
if (len) { |
1455 |
|
4 |
(*ctx->block)(ctx->Yi.c,ctx->EKi.c,key); |
1456 |
|
4 |
++ctr; |
1457 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) |
1458 |
|
|
#ifdef BSWAP4 |
1459 |
|
4 |
ctx->Yi.d[3] = BSWAP4(ctr); |
1460 |
|
|
#else |
1461 |
|
|
PUTU32(ctx->Yi.c+12,ctr); |
1462 |
|
|
#endif |
1463 |
|
|
else |
1464 |
|
|
ctx->Yi.d[3] = ctr; |
1465 |
✓✓ |
68 |
while (len--) { |
1466 |
|
60 |
u8 c = in[n]; |
1467 |
|
60 |
ctx->Xi.c[n] ^= c; |
1468 |
|
60 |
out[n] = c^ctx->EKi.c[n]; |
1469 |
|
60 |
++n; |
1470 |
|
|
} |
1471 |
|
|
} |
1472 |
|
|
|
1473 |
|
10 |
ctx->mres = n; |
1474 |
|
10 |
return 0; |
1475 |
|
|
} |
1476 |
|
|
|
1477 |
|
|
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag, |
1478 |
|
|
size_t len) |
1479 |
|
56 |
{ |
1480 |
|
56 |
u64 alen = ctx->len.u[0]<<3; |
1481 |
|
56 |
u64 clen = ctx->len.u[1]<<3; |
1482 |
|
|
#ifdef GCM_FUNCREF_4BIT |
1483 |
|
56 |
void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult; |
1484 |
|
|
#endif |
1485 |
|
|
|
1486 |
✓✓ |
56 |
if (ctx->mres || ctx->ares) |
1487 |
|
24 |
GCM_MUL(ctx,Xi); |
1488 |
|
|
|
1489 |
|
|
if (BYTE_ORDER == LITTLE_ENDIAN) { |
1490 |
|
|
#ifdef BSWAP8 |
1491 |
|
56 |
alen = BSWAP8(alen); |
1492 |
|
56 |
clen = BSWAP8(clen); |
1493 |
|
|
#else |
1494 |
|
|
u8 *p = ctx->len.c; |
1495 |
|
|
|
1496 |
|
|
ctx->len.u[0] = alen; |
1497 |
|
|
ctx->len.u[1] = clen; |
1498 |
|
|
|
1499 |
|
|
alen = (u64)GETU32(p) <<32|GETU32(p+4); |
1500 |
|
|
clen = (u64)GETU32(p+8)<<32|GETU32(p+12); |
1501 |
|
|
#endif |
1502 |
|
|
} |
1503 |
|
|
|
1504 |
|
56 |
ctx->Xi.u[0] ^= alen; |
1505 |
|
56 |
ctx->Xi.u[1] ^= clen; |
1506 |
|
56 |
GCM_MUL(ctx,Xi); |
1507 |
|
|
|
1508 |
|
56 |
ctx->Xi.u[0] ^= ctx->EK0.u[0]; |
1509 |
|
56 |
ctx->Xi.u[1] ^= ctx->EK0.u[1]; |
1510 |
|
|
|
1511 |
✓✓ |
56 |
if (tag && len<=sizeof(ctx->Xi)) |
1512 |
|
40 |
return memcmp(ctx->Xi.c,tag,len); |
1513 |
|
|
else |
1514 |
|
16 |
return -1; |
1515 |
|
|
} |
1516 |
|
|
|
1517 |
|
|
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) |
1518 |
|
16 |
{ |
1519 |
|
16 |
CRYPTO_gcm128_finish(ctx, NULL, 0); |
1520 |
|
16 |
memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c)); |
1521 |
|
16 |
} |
1522 |
|
|
|
1523 |
|
|
GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block) |
1524 |
|
|
{ |
1525 |
|
|
GCM128_CONTEXT *ret; |
1526 |
|
|
|
1527 |
|
|
if ((ret = malloc(sizeof(GCM128_CONTEXT)))) |
1528 |
|
|
CRYPTO_gcm128_init(ret,key,block); |
1529 |
|
|
|
1530 |
|
|
return ret; |
1531 |
|
|
} |
1532 |
|
|
|
1533 |
|
|
void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx) |
1534 |
|
|
{ |
1535 |
|
|
if (ctx) { |
1536 |
|
|
explicit_bzero(ctx,sizeof(*ctx)); |
1537 |
|
|
free(ctx); |
1538 |
|
|
} |
1539 |
|
|
} |