Line data Source code
1 : /*
2 : * Public Domain poly1305 from Andrew Moon
3 : * Based on poly1305-donna.c, poly1305-donna-32.h and poly1305-donna.h from:
4 : * https://github.com/floodyberry/poly1305-donna
5 : */
6 :
7 : #include <sys/types.h>
8 : #include <sys/systm.h>
9 :
10 : #include "poly1305.h"
11 :
12 : /*
13 : * poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication
14 : * and 64 bit addition.
15 : */
16 :
17 : /* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */
18 : static unsigned long
19 0 : U8TO32(const unsigned char *p)
20 : {
21 0 : return (((unsigned long)(p[0] & 0xff)) |
22 0 : ((unsigned long)(p[1] & 0xff) << 8) |
23 0 : ((unsigned long)(p[2] & 0xff) << 16) |
24 0 : ((unsigned long)(p[3] & 0xff) << 24));
25 : }
26 :
27 : /* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */
28 : static void
29 0 : U32TO8(unsigned char *p, unsigned long v)
30 : {
31 0 : p[0] = (v) & 0xff;
32 0 : p[1] = (v >> 8) & 0xff;
33 0 : p[2] = (v >> 16) & 0xff;
34 0 : p[3] = (v >> 24) & 0xff;
35 0 : }
36 :
37 : void
38 0 : poly1305_init(poly1305_state *st, const unsigned char key[32])
39 : {
40 : /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
41 0 : st->r[0] = (U8TO32(&key[0])) & 0x3ffffff;
42 0 : st->r[1] = (U8TO32(&key[3]) >> 2) & 0x3ffff03;
43 0 : st->r[2] = (U8TO32(&key[6]) >> 4) & 0x3ffc0ff;
44 0 : st->r[3] = (U8TO32(&key[9]) >> 6) & 0x3f03fff;
45 0 : st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff;
46 :
47 : /* h = 0 */
48 0 : st->h[0] = 0;
49 0 : st->h[1] = 0;
50 0 : st->h[2] = 0;
51 0 : st->h[3] = 0;
52 0 : st->h[4] = 0;
53 :
54 : /* save pad for later */
55 0 : st->pad[0] = U8TO32(&key[16]);
56 0 : st->pad[1] = U8TO32(&key[20]);
57 0 : st->pad[2] = U8TO32(&key[24]);
58 0 : st->pad[3] = U8TO32(&key[28]);
59 :
60 0 : st->leftover = 0;
61 0 : st->final = 0;
62 0 : }
63 :
64 : static void
65 0 : poly1305_blocks(poly1305_state *st, const unsigned char *m, size_t bytes)
66 : {
67 0 : const unsigned long hibit = (st->final) ? 0 : (1 << 24); /* 1 << 128 */
68 : unsigned long r0, r1, r2, r3, r4;
69 : unsigned long s1, s2, s3, s4;
70 : unsigned long h0, h1, h2, h3, h4;
71 : unsigned long long d0, d1, d2, d3, d4;
72 : unsigned long c;
73 :
74 0 : r0 = st->r[0];
75 0 : r1 = st->r[1];
76 0 : r2 = st->r[2];
77 0 : r3 = st->r[3];
78 0 : r4 = st->r[4];
79 :
80 0 : s1 = r1 * 5;
81 0 : s2 = r2 * 5;
82 0 : s3 = r3 * 5;
83 0 : s4 = r4 * 5;
84 :
85 0 : h0 = st->h[0];
86 0 : h1 = st->h[1];
87 0 : h2 = st->h[2];
88 0 : h3 = st->h[3];
89 0 : h4 = st->h[4];
90 :
91 0 : while (bytes >= poly1305_block_size) {
92 : /* h += m[i] */
93 0 : h0 += (U8TO32(m + 0)) & 0x3ffffff;
94 0 : h1 += (U8TO32(m + 3) >> 2) & 0x3ffffff;
95 0 : h2 += (U8TO32(m + 6) >> 4) & 0x3ffffff;
96 0 : h3 += (U8TO32(m + 9) >> 6) & 0x3ffffff;
97 0 : h4 += (U8TO32(m + 12) >> 8) | hibit;
98 :
99 : /* h *= r */
100 0 : d0 = ((unsigned long long)h0 * r0) +
101 0 : ((unsigned long long)h1 * s4) +
102 0 : ((unsigned long long)h2 * s3) +
103 0 : ((unsigned long long)h3 * s2) +
104 0 : ((unsigned long long)h4 * s1);
105 0 : d1 = ((unsigned long long)h0 * r1) +
106 0 : ((unsigned long long)h1 * r0) +
107 0 : ((unsigned long long)h2 * s4) +
108 0 : ((unsigned long long)h3 * s3) +
109 0 : ((unsigned long long)h4 * s2);
110 0 : d2 = ((unsigned long long)h0 * r2) +
111 0 : ((unsigned long long)h1 * r1) +
112 0 : ((unsigned long long)h2 * r0) +
113 0 : ((unsigned long long)h3 * s4) +
114 0 : ((unsigned long long)h4 * s3);
115 0 : d3 = ((unsigned long long)h0 * r3) +
116 0 : ((unsigned long long)h1 * r2) +
117 0 : ((unsigned long long)h2 * r1) +
118 0 : ((unsigned long long)h3 * r0) +
119 0 : ((unsigned long long)h4 * s4);
120 0 : d4 = ((unsigned long long)h0 * r4) +
121 0 : ((unsigned long long)h1 * r3) +
122 0 : ((unsigned long long)h2 * r2) +
123 0 : ((unsigned long long)h3 * r1) +
124 0 : ((unsigned long long)h4 * r0);
125 :
126 : /* (partial) h %= p */
127 0 : c = (unsigned long)(d0 >> 26);
128 0 : h0 = (unsigned long)d0 & 0x3ffffff;
129 0 : d1 += c;
130 0 : c = (unsigned long)(d1 >> 26);
131 0 : h1 = (unsigned long)d1 & 0x3ffffff;
132 0 : d2 += c;
133 0 : c = (unsigned long)(d2 >> 26);
134 0 : h2 = (unsigned long)d2 & 0x3ffffff;
135 0 : d3 += c;
136 0 : c = (unsigned long)(d3 >> 26);
137 0 : h3 = (unsigned long)d3 & 0x3ffffff;
138 0 : d4 += c;
139 0 : c = (unsigned long)(d4 >> 26);
140 0 : h4 = (unsigned long)d4 & 0x3ffffff;
141 0 : h0 += c * 5;
142 0 : c = (h0 >> 26);
143 0 : h0 = h0 & 0x3ffffff;
144 0 : h1 += c;
145 :
146 0 : m += poly1305_block_size;
147 0 : bytes -= poly1305_block_size;
148 : }
149 :
150 0 : st->h[0] = h0;
151 0 : st->h[1] = h1;
152 0 : st->h[2] = h2;
153 0 : st->h[3] = h3;
154 0 : st->h[4] = h4;
155 0 : }
156 :
157 : void
158 0 : poly1305_update(poly1305_state *st, const unsigned char *m, size_t bytes)
159 : {
160 : size_t i;
161 :
162 : /* handle leftover */
163 0 : if (st->leftover) {
164 0 : size_t want = (poly1305_block_size - st->leftover);
165 0 : if (want > bytes)
166 0 : want = bytes;
167 0 : for (i = 0; i < want; i++)
168 0 : st->buffer[st->leftover + i] = m[i];
169 0 : bytes -= want;
170 0 : m += want;
171 0 : st->leftover += want;
172 0 : if (st->leftover < poly1305_block_size)
173 0 : return;
174 0 : poly1305_blocks(st, st->buffer, poly1305_block_size);
175 0 : st->leftover = 0;
176 0 : }
177 :
178 : /* process full blocks */
179 0 : if (bytes >= poly1305_block_size) {
180 0 : size_t want = (bytes & ~(poly1305_block_size - 1));
181 0 : poly1305_blocks(st, m, want);
182 0 : m += want;
183 0 : bytes -= want;
184 0 : }
185 :
186 : /* store leftover */
187 0 : if (bytes) {
188 0 : for (i = 0; i < bytes; i++)
189 0 : st->buffer[st->leftover + i] = m[i];
190 0 : st->leftover += bytes;
191 0 : }
192 0 : }
193 :
194 : void
195 0 : poly1305_finish(poly1305_state *st, unsigned char mac[16])
196 : {
197 : unsigned long h0, h1, h2, h3, h4, c;
198 : unsigned long g0, g1, g2, g3, g4;
199 : unsigned long long f;
200 : unsigned long mask;
201 :
202 : /* process the remaining block */
203 0 : if (st->leftover) {
204 : size_t i = st->leftover;
205 0 : st->buffer[i++] = 1;
206 0 : for (; i < poly1305_block_size; i++)
207 0 : st->buffer[i] = 0;
208 0 : st->final = 1;
209 0 : poly1305_blocks(st, st->buffer, poly1305_block_size);
210 0 : }
211 :
212 : /* fully carry h */
213 0 : h0 = st->h[0];
214 0 : h1 = st->h[1];
215 0 : h2 = st->h[2];
216 0 : h3 = st->h[3];
217 0 : h4 = st->h[4];
218 :
219 0 : c = h1 >> 26;
220 0 : h1 = h1 & 0x3ffffff;
221 0 : h2 += c;
222 0 : c = h2 >> 26;
223 0 : h2 = h2 & 0x3ffffff;
224 0 : h3 += c;
225 0 : c = h3 >> 26;
226 0 : h3 = h3 & 0x3ffffff;
227 0 : h4 += c;
228 0 : c = h4 >> 26;
229 0 : h4 = h4 & 0x3ffffff;
230 0 : h0 += c * 5;
231 0 : c = h0 >> 26;
232 0 : h0 = h0 & 0x3ffffff;
233 0 : h1 += c;
234 :
235 : /* compute h + -p */
236 0 : g0 = h0 + 5;
237 0 : c = g0 >> 26;
238 0 : g0 &= 0x3ffffff;
239 0 : g1 = h1 + c;
240 0 : c = g1 >> 26;
241 0 : g1 &= 0x3ffffff;
242 0 : g2 = h2 + c;
243 0 : c = g2 >> 26;
244 0 : g2 &= 0x3ffffff;
245 0 : g3 = h3 + c;
246 0 : c = g3 >> 26;
247 0 : g3 &= 0x3ffffff;
248 0 : g4 = h4 + c - (1 << 26);
249 :
250 : /* select h if h < p, or h + -p if h >= p */
251 0 : mask = (g4 >> ((sizeof(unsigned long) * 8) - 1)) - 1;
252 0 : g0 &= mask;
253 0 : g1 &= mask;
254 0 : g2 &= mask;
255 0 : g3 &= mask;
256 0 : g4 &= mask;
257 0 : mask = ~mask;
258 0 : h0 = (h0 & mask) | g0;
259 0 : h1 = (h1 & mask) | g1;
260 0 : h2 = (h2 & mask) | g2;
261 0 : h3 = (h3 & mask) | g3;
262 0 : h4 = (h4 & mask) | g4;
263 :
264 : /* h = h % (2^128) */
265 0 : h0 = ((h0) | (h1 << 26)) & 0xffffffff;
266 0 : h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
267 0 : h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
268 0 : h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
269 :
270 : /* mac = (h + pad) % (2^128) */
271 0 : f = (unsigned long long)h0 + st->pad[0];
272 : h0 = (unsigned long)f;
273 0 : f = (unsigned long long)h1 + st->pad[1] + (f >> 32);
274 : h1 = (unsigned long)f;
275 0 : f = (unsigned long long)h2 + st->pad[2] + (f >> 32);
276 : h2 = (unsigned long)f;
277 0 : f = (unsigned long long)h3 + st->pad[3] + (f >> 32);
278 : h3 = (unsigned long)f;
279 :
280 0 : U32TO8(mac + 0, h0);
281 0 : U32TO8(mac + 4, h1);
282 0 : U32TO8(mac + 8, h2);
283 0 : U32TO8(mac + 12, h3);
284 :
285 : /* zero out the state */
286 0 : st->h[0] = 0;
287 0 : st->h[1] = 0;
288 0 : st->h[2] = 0;
289 0 : st->h[3] = 0;
290 0 : st->h[4] = 0;
291 0 : st->r[0] = 0;
292 0 : st->r[1] = 0;
293 0 : st->r[2] = 0;
294 0 : st->r[3] = 0;
295 0 : st->r[4] = 0;
296 0 : st->pad[0] = 0;
297 0 : st->pad[1] = 0;
298 0 : st->pad[2] = 0;
299 0 : st->pad[3] = 0;
300 0 : }
|