1 /* LibTomCrypt, modular cryptographic library -- Tom St Denis */
2 /* SPDX-License-Identifier: Unlicense */
3
4 /*
5 * This LTC implementation was adapted from:
6 * http://www.ecrypt.eu.org/stream/e2-sosemanuk.html
7 */
8
9 /*
10 * SOSEMANUK reference implementation.
11 *
12 * This code is supposed to run on any conforming C implementation (C90
13 * or later).
14 *
15 * (c) 2005 X-CRYPT project. This software is provided 'as-is', without
16 * any express or implied warranty. In no event will the authors be held
17 * liable for any damages arising from the use of this software.
18 *
19 * Permission is granted to anyone to use this software for any purpose,
20 * including commercial applications, and to alter it and redistribute it
21 * freely, subject to no restriction.
22 *
23 * Technical remarks and questions can be addressed to
24 * <thomas.pornin@cryptolog.com>
25 */
26
27 #include "tomcrypt_private.h"
28
29 #ifdef LTC_SOSEMANUK
30
31 /* ======================================================================== */
32
33 /*
34 * We want (and sometimes need) to perform explicit truncations to 32 bits.
35 */
36 #define T32(x) ((x) & (ulong32)0xFFFFFFFF)
37
38 /*
39 * Some of our functions will be tagged as "inline" to help the compiler
40 * optimize things. We use "inline" only if the compiler is advanced
41 * enough to understand it; C99 compilers, and pre-C99 versions of gcc,
42 * understand enough "inline" for our purposes.
43 */
44
45 /* ======================================================================== */
46
47 /*
48 * Serpent S-boxes, implemented in bitslice mode. These circuits have
49 * been published by Dag Arne Osvik ("Speeding up Serpent", published in
50 * the 3rd AES Candidate Conference) and work on five 32-bit registers:
51 * the four inputs, and a fifth scratch register. There are meant to be
52 * quite fast on Pentium-class processors. These are not the fastest
53 * published, but they are "fast enough" and they are unencumbered as
54 * far as intellectual property is concerned (note: these are rewritten
55 * from the article itself, and hence are not covered by the GPL on
56 * Dag's code, which was not used here).
57 *
58 * The output bits are permuted. Here is the correspondance:
59 * S0: 1420
60 * S1: 2031
61 * S2: 2314
62 * S3: 1234
63 * S4: 1403
64 * S5: 1302
65 * S6: 0142
66 * S7: 4310
67 * (for instance, the output of S0 is in "r1, r4, r2, r0").
68 */
69
70 #define S0(r0, r1, r2, r3, r4) do { \
71 r3 ^= r0; r4 = r1; \
72 r1 &= r3; r4 ^= r2; \
73 r1 ^= r0; r0 |= r3; \
74 r0 ^= r4; r4 ^= r3; \
75 r3 ^= r2; r2 |= r1; \
76 r2 ^= r4; r4 = ~r4; \
77 r4 |= r1; r1 ^= r3; \
78 r1 ^= r4; r3 |= r0; \
79 r1 ^= r3; r4 ^= r3; \
80 } while (0)
81
82 #define S1(r0, r1, r2, r3, r4) do { \
83 r0 = ~r0; r2 = ~r2; \
84 r4 = r0; r0 &= r1; \
85 r2 ^= r0; r0 |= r3; \
86 r3 ^= r2; r1 ^= r0; \
87 r0 ^= r4; r4 |= r1; \
88 r1 ^= r3; r2 |= r0; \
89 r2 &= r4; r0 ^= r1; \
90 r1 &= r2; \
91 r1 ^= r0; r0 &= r2; \
92 r0 ^= r4; \
93 } while (0)
94
95 #define S2(r0, r1, r2, r3, r4) do { \
96 r4 = r0; r0 &= r2; \
97 r0 ^= r3; r2 ^= r1; \
98 r2 ^= r0; r3 |= r4; \
99 r3 ^= r1; r4 ^= r2; \
100 r1 = r3; r3 |= r4; \
101 r3 ^= r0; r0 &= r1; \
102 r4 ^= r0; r1 ^= r3; \
103 r1 ^= r4; r4 = ~r4; \
104 } while (0)
105
106 #define S3(r0, r1, r2, r3, r4) do { \
107 r4 = r0; r0 |= r3; \
108 r3 ^= r1; r1 &= r4; \
109 r4 ^= r2; r2 ^= r3; \
110 r3 &= r0; r4 |= r1; \
111 r3 ^= r4; r0 ^= r1; \
112 r4 &= r0; r1 ^= r3; \
113 r4 ^= r2; r1 |= r0; \
114 r1 ^= r2; r0 ^= r3; \
115 r2 = r1; r1 |= r3; \
116 r1 ^= r0; \
117 } while (0)
118
119 #define S4(r0, r1, r2, r3, r4) do { \
120 r1 ^= r3; r3 = ~r3; \
121 r2 ^= r3; r3 ^= r0; \
122 r4 = r1; r1 &= r3; \
123 r1 ^= r2; r4 ^= r3; \
124 r0 ^= r4; r2 &= r4; \
125 r2 ^= r0; r0 &= r1; \
126 r3 ^= r0; r4 |= r1; \
127 r4 ^= r0; r0 |= r3; \
128 r0 ^= r2; r2 &= r3; \
129 r0 = ~r0; r4 ^= r2; \
130 } while (0)
131
132 #define S5(r0, r1, r2, r3, r4) do { \
133 r0 ^= r1; r1 ^= r3; \
134 r3 = ~r3; r4 = r1; \
135 r1 &= r0; r2 ^= r3; \
136 r1 ^= r2; r2 |= r4; \
137 r4 ^= r3; r3 &= r1; \
138 r3 ^= r0; r4 ^= r1; \
139 r4 ^= r2; r2 ^= r0; \
140 r0 &= r3; r2 = ~r2; \
141 r0 ^= r4; r4 |= r3; \
142 r2 ^= r4; \
143 } while (0)
144
145 #define S6(r0, r1, r2, r3, r4) do { \
146 r2 = ~r2; r4 = r3; \
147 r3 &= r0; r0 ^= r4; \
148 r3 ^= r2; r2 |= r4; \
149 r1 ^= r3; r2 ^= r0; \
150 r0 |= r1; r2 ^= r1; \
151 r4 ^= r0; r0 |= r3; \
152 r0 ^= r2; r4 ^= r3; \
153 r4 ^= r0; r3 = ~r3; \
154 r2 &= r4; \
155 r2 ^= r3; \
156 } while (0)
157
158 #define S7(r0, r1, r2, r3, r4) do { \
159 r4 = r1; r1 |= r2; \
160 r1 ^= r3; r4 ^= r2; \
161 r2 ^= r1; r3 |= r4; \
162 r3 &= r0; r4 ^= r2; \
163 r3 ^= r1; r1 |= r4; \
164 r1 ^= r0; r0 |= r4; \
165 r0 ^= r2; r1 ^= r4; \
166 r2 ^= r1; r1 &= r0; \
167 r1 ^= r4; r2 = ~r2; \
168 r2 |= r0; \
169 r4 ^= r2; \
170 } while (0)
171
172 /*
173 * The Serpent linear transform.
174 */
175 #define SERPENT_LT(x0, x1, x2, x3) do { \
176 x0 = ROLc(x0, 13); \
177 x2 = ROLc(x2, 3); \
178 x1 = x1 ^ x0 ^ x2; \
179 x3 = x3 ^ x2 ^ T32(x0 << 3); \
180 x1 = ROLc(x1, 1); \
181 x3 = ROLc(x3, 7); \
182 x0 = x0 ^ x1 ^ x3; \
183 x2 = x2 ^ x3 ^ T32(x1 << 7); \
184 x0 = ROLc(x0, 5); \
185 x2 = ROLc(x2, 22); \
186 } while (0)
187
188 /* ======================================================================== */
189
190 /*
191 * Initialize Sosemanuk's state by providing a key. The key is an array of
192 * 1 to 32 bytes.
193 * @param st The Sosemanuk state
194 * @param key Key
195 * @param keylen Length of key in bytes
196 * @return CRYPT_OK on success
197 */
sosemanuk_setup(sosemanuk_state * st,const unsigned char * key,unsigned long keylen)198 int sosemanuk_setup(sosemanuk_state *st, const unsigned char *key, unsigned long keylen)
199 {
200 /*
201 * This key schedule is actually a truncated Serpent key schedule.
202 * The key-derived words (w_i) are computed within the eight
203 * local variables w0 to w7, which are reused again and again.
204 */
205
206 #define SKS(S, o0, o1, o2, o3, d0, d1, d2, d3) do { \
207 ulong32 r0, r1, r2, r3, r4; \
208 r0 = w ## o0; \
209 r1 = w ## o1; \
210 r2 = w ## o2; \
211 r3 = w ## o3; \
212 S(r0, r1, r2, r3, r4); \
213 st->kc[i ++] = r ## d0; \
214 st->kc[i ++] = r ## d1; \
215 st->kc[i ++] = r ## d2; \
216 st->kc[i ++] = r ## d3; \
217 } while (0)
218
219 #define SKS0 SKS(S0, 4, 5, 6, 7, 1, 4, 2, 0)
220 #define SKS1 SKS(S1, 0, 1, 2, 3, 2, 0, 3, 1)
221 #define SKS2 SKS(S2, 4, 5, 6, 7, 2, 3, 1, 4)
222 #define SKS3 SKS(S3, 0, 1, 2, 3, 1, 2, 3, 4)
223 #define SKS4 SKS(S4, 4, 5, 6, 7, 1, 4, 0, 3)
224 #define SKS5 SKS(S5, 0, 1, 2, 3, 1, 3, 0, 2)
225 #define SKS6 SKS(S6, 4, 5, 6, 7, 0, 1, 4, 2)
226 #define SKS7 SKS(S7, 0, 1, 2, 3, 4, 3, 1, 0)
227
228 #define WUP(wi, wi5, wi3, wi1, cc) do { \
229 ulong32 tt = (wi) ^ (wi5) ^ (wi3) \
230 ^ (wi1) ^ (0x9E3779B9 ^ (ulong32)(cc)); \
231 (wi) = ROLc(tt, 11); \
232 } while (0)
233
234 #define WUP0(cc) do { \
235 WUP(w0, w3, w5, w7, cc); \
236 WUP(w1, w4, w6, w0, cc + 1); \
237 WUP(w2, w5, w7, w1, cc + 2); \
238 WUP(w3, w6, w0, w2, cc + 3); \
239 } while (0)
240
241 #define WUP1(cc) do { \
242 WUP(w4, w7, w1, w3, cc); \
243 WUP(w5, w0, w2, w4, cc + 1); \
244 WUP(w6, w1, w3, w5, cc + 2); \
245 WUP(w7, w2, w4, w6, cc + 3); \
246 } while (0)
247
248 unsigned char wbuf[32];
249 ulong32 w0, w1, w2, w3, w4, w5, w6, w7;
250 int i = 0;
251
252 LTC_ARGCHK(st != NULL);
253 LTC_ARGCHK(key != NULL);
254 LTC_ARGCHK(keylen > 0 && keylen <= 32);
255
256 /*
257 * The key is copied into the wbuf[] buffer and padded to 256 bits
258 * as described in the Serpent specification.
259 */
260 XMEMCPY(wbuf, key, keylen);
261 if (keylen < 32) {
262 wbuf[keylen] = 0x01;
263 if (keylen < 31) {
264 XMEMSET(wbuf + keylen + 1, 0, 31 - keylen);
265 }
266 }
267
268 LOAD32L(w0, wbuf);
269 LOAD32L(w1, wbuf + 4);
270 LOAD32L(w2, wbuf + 8);
271 LOAD32L(w3, wbuf + 12);
272 LOAD32L(w4, wbuf + 16);
273 LOAD32L(w5, wbuf + 20);
274 LOAD32L(w6, wbuf + 24);
275 LOAD32L(w7, wbuf + 28);
276
277 WUP0(0); SKS3;
278 WUP1(4); SKS2;
279 WUP0(8); SKS1;
280 WUP1(12); SKS0;
281 WUP0(16); SKS7;
282 WUP1(20); SKS6;
283 WUP0(24); SKS5;
284 WUP1(28); SKS4;
285 WUP0(32); SKS3;
286 WUP1(36); SKS2;
287 WUP0(40); SKS1;
288 WUP1(44); SKS0;
289 WUP0(48); SKS7;
290 WUP1(52); SKS6;
291 WUP0(56); SKS5;
292 WUP1(60); SKS4;
293 WUP0(64); SKS3;
294 WUP1(68); SKS2;
295 WUP0(72); SKS1;
296 WUP1(76); SKS0;
297 WUP0(80); SKS7;
298 WUP1(84); SKS6;
299 WUP0(88); SKS5;
300 WUP1(92); SKS4;
301 WUP0(96); SKS3;
302
303 #undef SKS
304 #undef SKS0
305 #undef SKS1
306 #undef SKS2
307 #undef SKS3
308 #undef SKS4
309 #undef SKS5
310 #undef SKS6
311 #undef SKS7
312 #undef WUP
313 #undef WUP0
314 #undef WUP1
315
316 return CRYPT_OK;
317 }
318
319
320 /*
321 * Initialization continues by setting the IV. The IV length is up to 16 bytes.
322 * If "ivlen" is 0 (no IV), then the "iv" parameter can be NULL. If multiple
323 * encryptions/decryptions are to be performed with the same key and
324 * sosemanuk_done() has not been called, only sosemanuk_setiv() need be called
325 * to set the state.
326 * @param st The Sosemanuk state
327 * @param iv Initialization vector
328 * @param ivlen Length of iv in bytes
329 * @return CRYPT_OK on success
330 */
sosemanuk_setiv(sosemanuk_state * st,const unsigned char * iv,unsigned long ivlen)331 int sosemanuk_setiv(sosemanuk_state *st, const unsigned char *iv, unsigned long ivlen)
332 {
333
334 /*
335 * The Serpent key addition step.
336 */
337 #define KA(zc, x0, x1, x2, x3) do { \
338 x0 ^= st->kc[(zc)]; \
339 x1 ^= st->kc[(zc) + 1]; \
340 x2 ^= st->kc[(zc) + 2]; \
341 x3 ^= st->kc[(zc) + 3]; \
342 } while (0)
343
344 /*
345 * One Serpent round.
346 * zc = current subkey counter
347 * S = S-box macro for this round
348 * i0 to i4 = input register numbers (the fifth is a scratch register)
349 * o0 to o3 = output register numbers
350 */
351 #define FSS(zc, S, i0, i1, i2, i3, i4, o0, o1, o2, o3) do { \
352 KA(zc, r ## i0, r ## i1, r ## i2, r ## i3); \
353 S(r ## i0, r ## i1, r ## i2, r ## i3, r ## i4); \
354 SERPENT_LT(r ## o0, r ## o1, r ## o2, r ## o3); \
355 } while (0)
356
357 /*
358 * Last Serpent round. Contrary to the "true" Serpent, we keep
359 * the linear transformation for that last round.
360 */
361 #define FSF(zc, S, i0, i1, i2, i3, i4, o0, o1, o2, o3) do { \
362 KA(zc, r ## i0, r ## i1, r ## i2, r ## i3); \
363 S(r ## i0, r ## i1, r ## i2, r ## i3, r ## i4); \
364 SERPENT_LT(r ## o0, r ## o1, r ## o2, r ## o3); \
365 KA(zc + 4, r ## o0, r ## o1, r ## o2, r ## o3); \
366 } while (0)
367
368 ulong32 r0, r1, r2, r3, r4;
369 unsigned char ivtmp[16] = {0};
370
371 LTC_ARGCHK(st != NULL);
372 LTC_ARGCHK(ivlen <= 16);
373 LTC_ARGCHK(iv != NULL || ivlen == 0);
374
375 if (ivlen > 0) XMEMCPY(ivtmp, iv, ivlen);
376
377 /*
378 * Decode IV into four 32-bit words (little-endian).
379 */
380 LOAD32L(r0, ivtmp);
381 LOAD32L(r1, ivtmp + 4);
382 LOAD32L(r2, ivtmp + 8);
383 LOAD32L(r3, ivtmp + 12);
384
385 /*
386 * Encrypt IV with Serpent24. Some values are extracted from the
387 * output of the twelfth, eighteenth and twenty-fourth rounds.
388 */
389 FSS(0, S0, 0, 1, 2, 3, 4, 1, 4, 2, 0);
390 FSS(4, S1, 1, 4, 2, 0, 3, 2, 1, 0, 4);
391 FSS(8, S2, 2, 1, 0, 4, 3, 0, 4, 1, 3);
392 FSS(12, S3, 0, 4, 1, 3, 2, 4, 1, 3, 2);
393 FSS(16, S4, 4, 1, 3, 2, 0, 1, 0, 4, 2);
394 FSS(20, S5, 1, 0, 4, 2, 3, 0, 2, 1, 4);
395 FSS(24, S6, 0, 2, 1, 4, 3, 0, 2, 3, 1);
396 FSS(28, S7, 0, 2, 3, 1, 4, 4, 1, 2, 0);
397 FSS(32, S0, 4, 1, 2, 0, 3, 1, 3, 2, 4);
398 FSS(36, S1, 1, 3, 2, 4, 0, 2, 1, 4, 3);
399 FSS(40, S2, 2, 1, 4, 3, 0, 4, 3, 1, 0);
400 FSS(44, S3, 4, 3, 1, 0, 2, 3, 1, 0, 2);
401 st->s09 = r3;
402 st->s08 = r1;
403 st->s07 = r0;
404 st->s06 = r2;
405
406 FSS(48, S4, 3, 1, 0, 2, 4, 1, 4, 3, 2);
407 FSS(52, S5, 1, 4, 3, 2, 0, 4, 2, 1, 3);
408 FSS(56, S6, 4, 2, 1, 3, 0, 4, 2, 0, 1);
409 FSS(60, S7, 4, 2, 0, 1, 3, 3, 1, 2, 4);
410 FSS(64, S0, 3, 1, 2, 4, 0, 1, 0, 2, 3);
411 FSS(68, S1, 1, 0, 2, 3, 4, 2, 1, 3, 0);
412 st->r1 = r2;
413 st->s04 = r1;
414 st->r2 = r3;
415 st->s05 = r0;
416
417 FSS(72, S2, 2, 1, 3, 0, 4, 3, 0, 1, 4);
418 FSS(76, S3, 3, 0, 1, 4, 2, 0, 1, 4, 2);
419 FSS(80, S4, 0, 1, 4, 2, 3, 1, 3, 0, 2);
420 FSS(84, S5, 1, 3, 0, 2, 4, 3, 2, 1, 0);
421 FSS(88, S6, 3, 2, 1, 0, 4, 3, 2, 4, 1);
422 FSF(92, S7, 3, 2, 4, 1, 0, 0, 1, 2, 3);
423 st->s03 = r0;
424 st->s02 = r1;
425 st->s01 = r2;
426 st->s00 = r3;
427
428 st->ptr = sizeof(st->buf);
429
430 #undef KA
431 #undef FSS
432 #undef FSF
433
434 return CRYPT_OK;
435 }
436
437 /*
438 * Multiplication by alpha: alpha * x = T32(x << 8) ^ mul_a[x >> 24]
439 */
440 static const ulong32 mul_a[] = {
441 0x00000000, 0xE19FCF13, 0x6B973726, 0x8A08F835,
442 0xD6876E4C, 0x3718A15F, 0xBD10596A, 0x5C8F9679,
443 0x05A7DC98, 0xE438138B, 0x6E30EBBE, 0x8FAF24AD,
444 0xD320B2D4, 0x32BF7DC7, 0xB8B785F2, 0x59284AE1,
445 0x0AE71199, 0xEB78DE8A, 0x617026BF, 0x80EFE9AC,
446 0xDC607FD5, 0x3DFFB0C6, 0xB7F748F3, 0x566887E0,
447 0x0F40CD01, 0xEEDF0212, 0x64D7FA27, 0x85483534,
448 0xD9C7A34D, 0x38586C5E, 0xB250946B, 0x53CF5B78,
449 0x1467229B, 0xF5F8ED88, 0x7FF015BD, 0x9E6FDAAE,
450 0xC2E04CD7, 0x237F83C4, 0xA9777BF1, 0x48E8B4E2,
451 0x11C0FE03, 0xF05F3110, 0x7A57C925, 0x9BC80636,
452 0xC747904F, 0x26D85F5C, 0xACD0A769, 0x4D4F687A,
453 0x1E803302, 0xFF1FFC11, 0x75170424, 0x9488CB37,
454 0xC8075D4E, 0x2998925D, 0xA3906A68, 0x420FA57B,
455 0x1B27EF9A, 0xFAB82089, 0x70B0D8BC, 0x912F17AF,
456 0xCDA081D6, 0x2C3F4EC5, 0xA637B6F0, 0x47A879E3,
457 0x28CE449F, 0xC9518B8C, 0x435973B9, 0xA2C6BCAA,
458 0xFE492AD3, 0x1FD6E5C0, 0x95DE1DF5, 0x7441D2E6,
459 0x2D699807, 0xCCF65714, 0x46FEAF21, 0xA7616032,
460 0xFBEEF64B, 0x1A713958, 0x9079C16D, 0x71E60E7E,
461 0x22295506, 0xC3B69A15, 0x49BE6220, 0xA821AD33,
462 0xF4AE3B4A, 0x1531F459, 0x9F390C6C, 0x7EA6C37F,
463 0x278E899E, 0xC611468D, 0x4C19BEB8, 0xAD8671AB,
464 0xF109E7D2, 0x109628C1, 0x9A9ED0F4, 0x7B011FE7,
465 0x3CA96604, 0xDD36A917, 0x573E5122, 0xB6A19E31,
466 0xEA2E0848, 0x0BB1C75B, 0x81B93F6E, 0x6026F07D,
467 0x390EBA9C, 0xD891758F, 0x52998DBA, 0xB30642A9,
468 0xEF89D4D0, 0x0E161BC3, 0x841EE3F6, 0x65812CE5,
469 0x364E779D, 0xD7D1B88E, 0x5DD940BB, 0xBC468FA8,
470 0xE0C919D1, 0x0156D6C2, 0x8B5E2EF7, 0x6AC1E1E4,
471 0x33E9AB05, 0xD2766416, 0x587E9C23, 0xB9E15330,
472 0xE56EC549, 0x04F10A5A, 0x8EF9F26F, 0x6F663D7C,
473 0x50358897, 0xB1AA4784, 0x3BA2BFB1, 0xDA3D70A2,
474 0x86B2E6DB, 0x672D29C8, 0xED25D1FD, 0x0CBA1EEE,
475 0x5592540F, 0xB40D9B1C, 0x3E056329, 0xDF9AAC3A,
476 0x83153A43, 0x628AF550, 0xE8820D65, 0x091DC276,
477 0x5AD2990E, 0xBB4D561D, 0x3145AE28, 0xD0DA613B,
478 0x8C55F742, 0x6DCA3851, 0xE7C2C064, 0x065D0F77,
479 0x5F754596, 0xBEEA8A85, 0x34E272B0, 0xD57DBDA3,
480 0x89F22BDA, 0x686DE4C9, 0xE2651CFC, 0x03FAD3EF,
481 0x4452AA0C, 0xA5CD651F, 0x2FC59D2A, 0xCE5A5239,
482 0x92D5C440, 0x734A0B53, 0xF942F366, 0x18DD3C75,
483 0x41F57694, 0xA06AB987, 0x2A6241B2, 0xCBFD8EA1,
484 0x977218D8, 0x76EDD7CB, 0xFCE52FFE, 0x1D7AE0ED,
485 0x4EB5BB95, 0xAF2A7486, 0x25228CB3, 0xC4BD43A0,
486 0x9832D5D9, 0x79AD1ACA, 0xF3A5E2FF, 0x123A2DEC,
487 0x4B12670D, 0xAA8DA81E, 0x2085502B, 0xC11A9F38,
488 0x9D950941, 0x7C0AC652, 0xF6023E67, 0x179DF174,
489 0x78FBCC08, 0x9964031B, 0x136CFB2E, 0xF2F3343D,
490 0xAE7CA244, 0x4FE36D57, 0xC5EB9562, 0x24745A71,
491 0x7D5C1090, 0x9CC3DF83, 0x16CB27B6, 0xF754E8A5,
492 0xABDB7EDC, 0x4A44B1CF, 0xC04C49FA, 0x21D386E9,
493 0x721CDD91, 0x93831282, 0x198BEAB7, 0xF81425A4,
494 0xA49BB3DD, 0x45047CCE, 0xCF0C84FB, 0x2E934BE8,
495 0x77BB0109, 0x9624CE1A, 0x1C2C362F, 0xFDB3F93C,
496 0xA13C6F45, 0x40A3A056, 0xCAAB5863, 0x2B349770,
497 0x6C9CEE93, 0x8D032180, 0x070BD9B5, 0xE69416A6,
498 0xBA1B80DF, 0x5B844FCC, 0xD18CB7F9, 0x301378EA,
499 0x693B320B, 0x88A4FD18, 0x02AC052D, 0xE333CA3E,
500 0xBFBC5C47, 0x5E239354, 0xD42B6B61, 0x35B4A472,
501 0x667BFF0A, 0x87E43019, 0x0DECC82C, 0xEC73073F,
502 0xB0FC9146, 0x51635E55, 0xDB6BA660, 0x3AF46973,
503 0x63DC2392, 0x8243EC81, 0x084B14B4, 0xE9D4DBA7,
504 0xB55B4DDE, 0x54C482CD, 0xDECC7AF8, 0x3F53B5EB
505 };
506
507 /*
508 * Multiplication by 1/alpha: 1/alpha * x = (x >> 8) ^ mul_ia[x & 0xFF]
509 */
510 static const ulong32 mul_ia[] = {
511 0x00000000, 0x180F40CD, 0x301E8033, 0x2811C0FE,
512 0x603CA966, 0x7833E9AB, 0x50222955, 0x482D6998,
513 0xC078FBCC, 0xD877BB01, 0xF0667BFF, 0xE8693B32,
514 0xA04452AA, 0xB84B1267, 0x905AD299, 0x88559254,
515 0x29F05F31, 0x31FF1FFC, 0x19EEDF02, 0x01E19FCF,
516 0x49CCF657, 0x51C3B69A, 0x79D27664, 0x61DD36A9,
517 0xE988A4FD, 0xF187E430, 0xD99624CE, 0xC1996403,
518 0x89B40D9B, 0x91BB4D56, 0xB9AA8DA8, 0xA1A5CD65,
519 0x5249BE62, 0x4A46FEAF, 0x62573E51, 0x7A587E9C,
520 0x32751704, 0x2A7A57C9, 0x026B9737, 0x1A64D7FA,
521 0x923145AE, 0x8A3E0563, 0xA22FC59D, 0xBA208550,
522 0xF20DECC8, 0xEA02AC05, 0xC2136CFB, 0xDA1C2C36,
523 0x7BB9E153, 0x63B6A19E, 0x4BA76160, 0x53A821AD,
524 0x1B854835, 0x038A08F8, 0x2B9BC806, 0x339488CB,
525 0xBBC11A9F, 0xA3CE5A52, 0x8BDF9AAC, 0x93D0DA61,
526 0xDBFDB3F9, 0xC3F2F334, 0xEBE333CA, 0xF3EC7307,
527 0xA492D5C4, 0xBC9D9509, 0x948C55F7, 0x8C83153A,
528 0xC4AE7CA2, 0xDCA13C6F, 0xF4B0FC91, 0xECBFBC5C,
529 0x64EA2E08, 0x7CE56EC5, 0x54F4AE3B, 0x4CFBEEF6,
530 0x04D6876E, 0x1CD9C7A3, 0x34C8075D, 0x2CC74790,
531 0x8D628AF5, 0x956DCA38, 0xBD7C0AC6, 0xA5734A0B,
532 0xED5E2393, 0xF551635E, 0xDD40A3A0, 0xC54FE36D,
533 0x4D1A7139, 0x551531F4, 0x7D04F10A, 0x650BB1C7,
534 0x2D26D85F, 0x35299892, 0x1D38586C, 0x053718A1,
535 0xF6DB6BA6, 0xEED42B6B, 0xC6C5EB95, 0xDECAAB58,
536 0x96E7C2C0, 0x8EE8820D, 0xA6F942F3, 0xBEF6023E,
537 0x36A3906A, 0x2EACD0A7, 0x06BD1059, 0x1EB25094,
538 0x569F390C, 0x4E9079C1, 0x6681B93F, 0x7E8EF9F2,
539 0xDF2B3497, 0xC724745A, 0xEF35B4A4, 0xF73AF469,
540 0xBF179DF1, 0xA718DD3C, 0x8F091DC2, 0x97065D0F,
541 0x1F53CF5B, 0x075C8F96, 0x2F4D4F68, 0x37420FA5,
542 0x7F6F663D, 0x676026F0, 0x4F71E60E, 0x577EA6C3,
543 0xE18D0321, 0xF98243EC, 0xD1938312, 0xC99CC3DF,
544 0x81B1AA47, 0x99BEEA8A, 0xB1AF2A74, 0xA9A06AB9,
545 0x21F5F8ED, 0x39FAB820, 0x11EB78DE, 0x09E43813,
546 0x41C9518B, 0x59C61146, 0x71D7D1B8, 0x69D89175,
547 0xC87D5C10, 0xD0721CDD, 0xF863DC23, 0xE06C9CEE,
548 0xA841F576, 0xB04EB5BB, 0x985F7545, 0x80503588,
549 0x0805A7DC, 0x100AE711, 0x381B27EF, 0x20146722,
550 0x68390EBA, 0x70364E77, 0x58278E89, 0x4028CE44,
551 0xB3C4BD43, 0xABCBFD8E, 0x83DA3D70, 0x9BD57DBD,
552 0xD3F81425, 0xCBF754E8, 0xE3E69416, 0xFBE9D4DB,
553 0x73BC468F, 0x6BB30642, 0x43A2C6BC, 0x5BAD8671,
554 0x1380EFE9, 0x0B8FAF24, 0x239E6FDA, 0x3B912F17,
555 0x9A34E272, 0x823BA2BF, 0xAA2A6241, 0xB225228C,
556 0xFA084B14, 0xE2070BD9, 0xCA16CB27, 0xD2198BEA,
557 0x5A4C19BE, 0x42435973, 0x6A52998D, 0x725DD940,
558 0x3A70B0D8, 0x227FF015, 0x0A6E30EB, 0x12617026,
559 0x451FD6E5, 0x5D109628, 0x750156D6, 0x6D0E161B,
560 0x25237F83, 0x3D2C3F4E, 0x153DFFB0, 0x0D32BF7D,
561 0x85672D29, 0x9D686DE4, 0xB579AD1A, 0xAD76EDD7,
562 0xE55B844F, 0xFD54C482, 0xD545047C, 0xCD4A44B1,
563 0x6CEF89D4, 0x74E0C919, 0x5CF109E7, 0x44FE492A,
564 0x0CD320B2, 0x14DC607F, 0x3CCDA081, 0x24C2E04C,
565 0xAC977218, 0xB49832D5, 0x9C89F22B, 0x8486B2E6,
566 0xCCABDB7E, 0xD4A49BB3, 0xFCB55B4D, 0xE4BA1B80,
567 0x17566887, 0x0F59284A, 0x2748E8B4, 0x3F47A879,
568 0x776AC1E1, 0x6F65812C, 0x477441D2, 0x5F7B011F,
569 0xD72E934B, 0xCF21D386, 0xE7301378, 0xFF3F53B5,
570 0xB7123A2D, 0xAF1D7AE0, 0x870CBA1E, 0x9F03FAD3,
571 0x3EA637B6, 0x26A9777B, 0x0EB8B785, 0x16B7F748,
572 0x5E9A9ED0, 0x4695DE1D, 0x6E841EE3, 0x768B5E2E,
573 0xFEDECC7A, 0xE6D18CB7, 0xCEC04C49, 0xD6CF0C84,
574 0x9EE2651C, 0x86ED25D1, 0xAEFCE52F, 0xB6F3A5E2
575 };
576
577
578 /*
579 * Compute the next block of bits of output stream. This is equivalent
580 * to one full rotation of the shift register.
581 */
s_sosemanuk_internal(sosemanuk_state * st)582 static LTC_INLINE void s_sosemanuk_internal(sosemanuk_state *st)
583 {
584 /*
585 * MUL_A(x) computes alpha * x (in F_{2^32}).
586 * MUL_G(x) computes 1/alpha * x (in F_{2^32}).
587 */
588 #define MUL_A(x) (T32((x) << 8) ^ mul_a[(x) >> 24])
589 #define MUL_G(x) (((x) >> 8) ^ mul_ia[(x) & 0xFF])
590
591 /*
592 * This macro computes the special multiplexer, which chooses
593 * between "x" and "x xor y", depending on the least significant
594 * bit of the control word. We use the C "?:" selection operator
595 * (which most compilers know how to optimise) except for Alpha,
596 * where the manual sign extension seems to perform equally well
597 * with DEC/Compaq/HP compiler, and much better with gcc.
598 */
599 #ifdef __alpha
600 #define XMUX(c, x, y) ((((signed int)((c) << 31) >> 31) & (y)) ^ (x))
601 #else
602 #define XMUX(c, x, y) (((c) & 0x1) ? ((x) ^ (y)) : (x))
603 #endif
604
605 /*
606 * FSM() updates the finite state machine.
607 */
608 #define FSM(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9) do { \
609 ulong32 tt, or1; \
610 tt = XMUX(r1, s ## x1, s ## x8); \
611 or1 = r1; \
612 r1 = T32(r2 + tt); \
613 tt = T32(or1 * 0x54655307); \
614 r2 = ROLc(tt, 7); \
615 } while (0)
616
617 /*
618 * LRU updates the shift register; the dropped value is stored
619 * in variable "dd".
620 */
621 #define LRU(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, dd) do { \
622 dd = s ## x0; \
623 s ## x0 = MUL_A(s ## x0) ^ MUL_G(s ## x3) ^ s ## x9; \
624 } while (0)
625
626 /*
627 * CC1 stores into variable "ee" the next intermediate word
628 * (combination of the new states of the LFSR and the FSM).
629 */
630 #define CC1(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, ee) do { \
631 ee = T32(s ## x9 + r1) ^ r2; \
632 } while (0)
633
634 /*
635 * STEP computes one internal round. "dd" receives the "s_t"
636 * value (dropped from the LFSR) and "ee" gets the value computed
637 * from the LFSR and FSM.
638 */
639 #define STEP(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, dd, ee) do { \
640 FSM(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); \
641 LRU(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, dd); \
642 CC1(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, ee); \
643 } while (0)
644
645 /*
646 * Apply one Serpent round (with the provided S-box macro), XOR
647 * the result with the "v" values, and encode the result into
648 * the destination buffer, at the provided offset. The "x*"
649 * arguments encode the output permutation of the "S" macro.
650 */
651 #define SRD(S, x0, x1, x2, x3, ooff) do { \
652 S(u0, u1, u2, u3, u4); \
653 STORE32L(u ## x0 ^ v0, st->buf + ooff); \
654 STORE32L(u ## x1 ^ v1, st->buf + ooff + 4); \
655 STORE32L(u ## x2 ^ v2, st->buf + ooff + 8); \
656 STORE32L(u ## x3 ^ v3, st->buf + ooff + 12); \
657 } while (0)
658
659 ulong32 s00 = st->s00;
660 ulong32 s01 = st->s01;
661 ulong32 s02 = st->s02;
662 ulong32 s03 = st->s03;
663 ulong32 s04 = st->s04;
664 ulong32 s05 = st->s05;
665 ulong32 s06 = st->s06;
666 ulong32 s07 = st->s07;
667 ulong32 s08 = st->s08;
668 ulong32 s09 = st->s09;
669 ulong32 r1 = st->r1;
670 ulong32 r2 = st->r2;
671 ulong32 u0, u1, u2, u3, u4;
672 ulong32 v0, v1, v2, v3;
673
674 STEP(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, v0, u0);
675 STEP(01, 02, 03, 04, 05, 06, 07, 08, 09, 00, v1, u1);
676 STEP(02, 03, 04, 05, 06, 07, 08, 09, 00, 01, v2, u2);
677 STEP(03, 04, 05, 06, 07, 08, 09, 00, 01, 02, v3, u3);
678 SRD(S2, 2, 3, 1, 4, 0);
679 STEP(04, 05, 06, 07, 08, 09, 00, 01, 02, 03, v0, u0);
680 STEP(05, 06, 07, 08, 09, 00, 01, 02, 03, 04, v1, u1);
681 STEP(06, 07, 08, 09, 00, 01, 02, 03, 04, 05, v2, u2);
682 STEP(07, 08, 09, 00, 01, 02, 03, 04, 05, 06, v3, u3);
683 SRD(S2, 2, 3, 1, 4, 16);
684 STEP(08, 09, 00, 01, 02, 03, 04, 05, 06, 07, v0, u0);
685 STEP(09, 00, 01, 02, 03, 04, 05, 06, 07, 08, v1, u1);
686 STEP(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, v2, u2);
687 STEP(01, 02, 03, 04, 05, 06, 07, 08, 09, 00, v3, u3);
688 SRD(S2, 2, 3, 1, 4, 32);
689 STEP(02, 03, 04, 05, 06, 07, 08, 09, 00, 01, v0, u0);
690 STEP(03, 04, 05, 06, 07, 08, 09, 00, 01, 02, v1, u1);
691 STEP(04, 05, 06, 07, 08, 09, 00, 01, 02, 03, v2, u2);
692 STEP(05, 06, 07, 08, 09, 00, 01, 02, 03, 04, v3, u3);
693 SRD(S2, 2, 3, 1, 4, 48);
694 STEP(06, 07, 08, 09, 00, 01, 02, 03, 04, 05, v0, u0);
695 STEP(07, 08, 09, 00, 01, 02, 03, 04, 05, 06, v1, u1);
696 STEP(08, 09, 00, 01, 02, 03, 04, 05, 06, 07, v2, u2);
697 STEP(09, 00, 01, 02, 03, 04, 05, 06, 07, 08, v3, u3);
698 SRD(S2, 2, 3, 1, 4, 64);
699
700 st->s00 = s00;
701 st->s01 = s01;
702 st->s02 = s02;
703 st->s03 = s03;
704 st->s04 = s04;
705 st->s05 = s05;
706 st->s06 = s06;
707 st->s07 = s07;
708 st->s08 = s08;
709 st->s09 = s09;
710 st->r1 = r1;
711 st->r2 = r2;
712 }
713
714 /*
715 * Combine buffers in1[] and in2[] by XOR, result in out[]. The length
716 * is "datalen" (in bytes). Partial overlap of out[] with either in1[]
717 * or in2[] is not allowed. Total overlap (out == in1 and/or out == in2)
718 * is allowed.
719 */
s_xorbuf(const unsigned char * in1,const unsigned char * in2,unsigned char * out,unsigned long datalen)720 static LTC_INLINE void s_xorbuf(const unsigned char *in1, const unsigned char *in2,
721 unsigned char *out, unsigned long datalen)
722 {
723 while (datalen -- > 0) {
724 *out ++ = *in1 ++ ^ *in2 ++;
725 }
726 }
727
728
729 /*
730 * Cipher operation, as a stream cipher: data is read from the "in"
731 * buffer, combined by XOR with the stream, and the result is written
732 * in the "out" buffer. "in" and "out" must be either equal, or
733 * reference distinct buffers (no partial overlap is allowed).
734 * @param st The Sosemanuk state
735 * @param in Data in
736 * @param inlen Length of data in bytes
737 * @param out Data out
738 * @return CRYPT_OK on success
739 */
sosemanuk_crypt(sosemanuk_state * st,const unsigned char * in,unsigned long inlen,unsigned char * out)740 int sosemanuk_crypt(sosemanuk_state *st,
741 const unsigned char *in, unsigned long inlen, unsigned char *out)
742 {
743 LTC_ARGCHK(st != NULL);
744 LTC_ARGCHK(in != NULL);
745 LTC_ARGCHK(out != NULL);
746
747 if (st->ptr < (sizeof(st->buf))) {
748 unsigned long rlen = (sizeof(st->buf)) - st->ptr;
749
750 if (rlen > inlen) {
751 rlen = inlen;
752 }
753 s_xorbuf(st->buf + st->ptr, in, out, rlen);
754 in += rlen;
755 out += rlen;
756 inlen -= rlen;
757 st->ptr += rlen;
758 }
759 while (inlen > 0) {
760 s_sosemanuk_internal(st);
761 if (inlen >= sizeof(st->buf)) {
762 s_xorbuf(st->buf, in, out, sizeof(st->buf));
763 in += sizeof(st->buf);
764 out += sizeof(st->buf);
765 inlen -= sizeof(st->buf);
766 } else {
767 s_xorbuf(st->buf, in, out, inlen);
768 st->ptr = inlen;
769 inlen = 0;
770 }
771 }
772 return CRYPT_OK;
773 }
774
775
776
777 /*
778 * Cipher operation, as a PRNG: the provided output buffer is filled with
779 * pseudo-random bytes as output from the stream cipher.
780 * @param st The Sosemanuk state
781 * @param out Data out
782 * @param outlen Length of output in bytes
783 * @return CRYPT_OK on success
784 */
sosemanuk_keystream(sosemanuk_state * st,unsigned char * out,unsigned long outlen)785 int sosemanuk_keystream(sosemanuk_state *st, unsigned char *out, unsigned long outlen)
786 {
787 if (outlen == 0) return CRYPT_OK; /* nothing to do */
788 LTC_ARGCHK(out != NULL);
789 XMEMSET(out, 0, outlen);
790 return sosemanuk_crypt(st, out, outlen, out);
791 }
792
793
794 /*
795 * Terminate and clear Sosemanuk key context
796 * @param st The Sosemanuk state
797 * @return CRYPT_OK on success
798 */
sosemanuk_done(sosemanuk_state * st)799 int sosemanuk_done(sosemanuk_state *st)
800 {
801 LTC_ARGCHK(st != NULL);
802 zeromem(st, sizeof(sosemanuk_state));
803 return CRYPT_OK;
804 }
805
806
807 #endif
808