1 /* LibTomCrypt, modular cryptographic library -- Tom St Denis */
2 /* SPDX-License-Identifier: Unlicense */
3 
4 /*
5  * This LTC implementation was adapted from:
6  *    http://www.ecrypt.eu.org/stream/e2-sosemanuk.html
7  */
8 
9 /*
10  * SOSEMANUK reference implementation.
11  *
12  * This code is supposed to run on any conforming C implementation (C90
13  * or later).
14  *
15  * (c) 2005 X-CRYPT project. This software is provided 'as-is', without
16  * any express or implied warranty. In no event will the authors be held
17  * liable for any damages arising from the use of this software.
18  *
19  * Permission is granted to anyone to use this software for any purpose,
20  * including commercial applications, and to alter it and redistribute it
21  * freely, subject to no restriction.
22  *
23  * Technical remarks and questions can be addressed to
24  * <thomas.pornin@cryptolog.com>
25  */
26 
27 #include "tomcrypt_private.h"
28 
29 #ifdef LTC_SOSEMANUK
30 
31 /* ======================================================================== */
32 
33 /*
34  * We want (and sometimes need) to perform explicit truncations to 32 bits.
35  */
36 #define T32(x)   ((x) & (ulong32)0xFFFFFFFF)
37 
38 /*
39  * Some of our functions will be tagged as "inline" to help the compiler
40  * optimize things. We use "inline" only if the compiler is advanced
41  * enough to understand it; C99 compilers, and pre-C99 versions of gcc,
42  * understand enough "inline" for our purposes.
43  */
44 
45 /* ======================================================================== */
46 
47 /*
48  * Serpent S-boxes, implemented in bitslice mode. These circuits have
49  * been published by Dag Arne Osvik ("Speeding up Serpent", published in
50  * the 3rd AES Candidate Conference) and work on five 32-bit registers:
51  * the four inputs, and a fifth scratch register. There are meant to be
52  * quite fast on Pentium-class processors. These are not the fastest
53  * published, but they are "fast enough" and they are unencumbered as
54  * far as intellectual property is concerned (note: these are rewritten
55  * from the article itself, and hence are not covered by the GPL on
56  * Dag's code, which was not used here).
57  *
58  * The output bits are permuted. Here is the correspondance:
59  *   S0:  1420
60  *   S1:  2031
61  *   S2:  2314
62  *   S3:  1234
63  *   S4:  1403
64  *   S5:  1302
65  *   S6:  0142
66  *   S7:  4310
67  * (for instance, the output of S0 is in "r1, r4, r2, r0").
68  */
69 
70 #define S0(r0, r1, r2, r3, r4)   do { \
71         r3 ^= r0;  r4  = r1; \
72         r1 &= r3;  r4 ^= r2; \
73         r1 ^= r0;  r0 |= r3; \
74         r0 ^= r4;  r4 ^= r3; \
75         r3 ^= r2;  r2 |= r1; \
76         r2 ^= r4;  r4 = ~r4; \
77         r4 |= r1;  r1 ^= r3; \
78         r1 ^= r4;  r3 |= r0; \
79         r1 ^= r3;  r4 ^= r3; \
80     } while (0)
81 
82 #define S1(r0, r1, r2, r3, r4)   do { \
83         r0 = ~r0;  r2 = ~r2; \
84         r4  = r0;  r0 &= r1; \
85         r2 ^= r0;  r0 |= r3; \
86         r3 ^= r2;  r1 ^= r0; \
87         r0 ^= r4;  r4 |= r1; \
88         r1 ^= r3;  r2 |= r0; \
89         r2 &= r4;  r0 ^= r1; \
90         r1 &= r2; \
91         r1 ^= r0;  r0 &= r2; \
92         r0 ^= r4; \
93     } while (0)
94 
95 #define S2(r0, r1, r2, r3, r4)   do { \
96         r4  = r0;  r0 &= r2; \
97         r0 ^= r3;  r2 ^= r1; \
98         r2 ^= r0;  r3 |= r4; \
99         r3 ^= r1;  r4 ^= r2; \
100         r1  = r3;  r3 |= r4; \
101         r3 ^= r0;  r0 &= r1; \
102         r4 ^= r0;  r1 ^= r3; \
103         r1 ^= r4;  r4 = ~r4; \
104     } while (0)
105 
106 #define S3(r0, r1, r2, r3, r4)   do { \
107         r4  = r0;  r0 |= r3; \
108         r3 ^= r1;  r1 &= r4; \
109         r4 ^= r2;  r2 ^= r3; \
110         r3 &= r0;  r4 |= r1; \
111         r3 ^= r4;  r0 ^= r1; \
112         r4 &= r0;  r1 ^= r3; \
113         r4 ^= r2;  r1 |= r0; \
114         r1 ^= r2;  r0 ^= r3; \
115         r2  = r1;  r1 |= r3; \
116         r1 ^= r0; \
117     } while (0)
118 
119 #define S4(r0, r1, r2, r3, r4)   do { \
120         r1 ^= r3;  r3 = ~r3; \
121         r2 ^= r3;  r3 ^= r0; \
122         r4  = r1;  r1 &= r3; \
123         r1 ^= r2;  r4 ^= r3; \
124         r0 ^= r4;  r2 &= r4; \
125         r2 ^= r0;  r0 &= r1; \
126         r3 ^= r0;  r4 |= r1; \
127         r4 ^= r0;  r0 |= r3; \
128         r0 ^= r2;  r2 &= r3; \
129         r0 = ~r0;  r4 ^= r2; \
130     } while (0)
131 
132 #define S5(r0, r1, r2, r3, r4)   do { \
133         r0 ^= r1;  r1 ^= r3; \
134         r3 = ~r3;  r4  = r1; \
135         r1 &= r0;  r2 ^= r3; \
136         r1 ^= r2;  r2 |= r4; \
137         r4 ^= r3;  r3 &= r1; \
138         r3 ^= r0;  r4 ^= r1; \
139         r4 ^= r2;  r2 ^= r0; \
140         r0 &= r3;  r2 = ~r2; \
141         r0 ^= r4;  r4 |= r3; \
142         r2 ^= r4; \
143     } while (0)
144 
145 #define S6(r0, r1, r2, r3, r4)   do { \
146         r2 = ~r2;  r4  = r3; \
147         r3 &= r0;  r0 ^= r4; \
148         r3 ^= r2;  r2 |= r4; \
149         r1 ^= r3;  r2 ^= r0; \
150         r0 |= r1;  r2 ^= r1; \
151         r4 ^= r0;  r0 |= r3; \
152         r0 ^= r2;  r4 ^= r3; \
153         r4 ^= r0;  r3 = ~r3; \
154         r2 &= r4; \
155         r2 ^= r3; \
156     } while (0)
157 
158 #define S7(r0, r1, r2, r3, r4)   do { \
159         r4  = r1;  r1 |= r2; \
160         r1 ^= r3;  r4 ^= r2; \
161         r2 ^= r1;  r3 |= r4; \
162         r3 &= r0;  r4 ^= r2; \
163         r3 ^= r1;  r1 |= r4; \
164         r1 ^= r0;  r0 |= r4; \
165         r0 ^= r2;  r1 ^= r4; \
166         r2 ^= r1;  r1 &= r0; \
167         r1 ^= r4;  r2 = ~r2; \
168         r2 |= r0; \
169         r4 ^= r2; \
170     } while (0)
171 
172 /*
173  * The Serpent linear transform.
174  */
175 #define SERPENT_LT(x0, x1, x2, x3)  do { \
176         x0 = ROLc(x0, 13); \
177         x2 = ROLc(x2, 3); \
178         x1 = x1 ^ x0 ^ x2; \
179         x3 = x3 ^ x2 ^ T32(x0 << 3); \
180         x1 = ROLc(x1, 1); \
181         x3 = ROLc(x3, 7); \
182         x0 = x0 ^ x1 ^ x3; \
183         x2 = x2 ^ x3 ^ T32(x1 << 7); \
184         x0 = ROLc(x0, 5); \
185         x2 = ROLc(x2, 22); \
186     } while (0)
187 
188 /* ======================================================================== */
189 
190 /*
191  * Initialize Sosemanuk's state by providing a key. The key is an array of
192  * 1 to 32 bytes.
193  * @param st       The Sosemanuk state
194  * @param key      Key
195  * @param keylen   Length of key in bytes
196  * @return CRYPT_OK on success
197  */
sosemanuk_setup(sosemanuk_state * st,const unsigned char * key,unsigned long keylen)198 int sosemanuk_setup(sosemanuk_state *st, const unsigned char *key, unsigned long keylen)
199 {
200     /*
201      * This key schedule is actually a truncated Serpent key schedule.
202      * The key-derived words (w_i) are computed within the eight
203      * local variables w0 to w7, which are reused again and again.
204      */
205 
206 #define SKS(S, o0, o1, o2, o3, d0, d1, d2, d3)   do { \
207         ulong32 r0, r1, r2, r3, r4; \
208         r0 = w ## o0; \
209         r1 = w ## o1; \
210         r2 = w ## o2; \
211         r3 = w ## o3; \
212         S(r0, r1, r2, r3, r4); \
213         st->kc[i ++] = r ## d0; \
214         st->kc[i ++] = r ## d1; \
215         st->kc[i ++] = r ## d2; \
216         st->kc[i ++] = r ## d3; \
217     } while (0)
218 
219 #define SKS0    SKS(S0, 4, 5, 6, 7, 1, 4, 2, 0)
220 #define SKS1    SKS(S1, 0, 1, 2, 3, 2, 0, 3, 1)
221 #define SKS2    SKS(S2, 4, 5, 6, 7, 2, 3, 1, 4)
222 #define SKS3    SKS(S3, 0, 1, 2, 3, 1, 2, 3, 4)
223 #define SKS4    SKS(S4, 4, 5, 6, 7, 1, 4, 0, 3)
224 #define SKS5    SKS(S5, 0, 1, 2, 3, 1, 3, 0, 2)
225 #define SKS6    SKS(S6, 4, 5, 6, 7, 0, 1, 4, 2)
226 #define SKS7    SKS(S7, 0, 1, 2, 3, 4, 3, 1, 0)
227 
228 #define WUP(wi, wi5, wi3, wi1, cc)   do { \
229         ulong32 tt = (wi) ^ (wi5) ^ (wi3) \
230             ^ (wi1) ^ (0x9E3779B9 ^ (ulong32)(cc)); \
231         (wi) = ROLc(tt, 11); \
232     } while (0)
233 
234 #define WUP0(cc)   do { \
235         WUP(w0, w3, w5, w7, cc); \
236         WUP(w1, w4, w6, w0, cc + 1); \
237         WUP(w2, w5, w7, w1, cc + 2); \
238         WUP(w3, w6, w0, w2, cc + 3); \
239     } while (0)
240 
241 #define WUP1(cc)   do { \
242         WUP(w4, w7, w1, w3, cc); \
243         WUP(w5, w0, w2, w4, cc + 1); \
244         WUP(w6, w1, w3, w5, cc + 2); \
245         WUP(w7, w2, w4, w6, cc + 3); \
246     } while (0)
247 
248     unsigned char wbuf[32];
249     ulong32 w0, w1, w2, w3, w4, w5, w6, w7;
250     int i = 0;
251 
252    LTC_ARGCHK(st  != NULL);
253    LTC_ARGCHK(key != NULL);
254    LTC_ARGCHK(keylen > 0 && keylen <= 32);
255 
256     /*
257      * The key is copied into the wbuf[] buffer and padded to 256 bits
258      * as described in the Serpent specification.
259      */
260     XMEMCPY(wbuf, key, keylen);
261     if (keylen < 32) {
262         wbuf[keylen] = 0x01;
263         if (keylen < 31) {
264             XMEMSET(wbuf + keylen + 1, 0, 31 - keylen);
265         }
266     }
267 
268     LOAD32L(w0, wbuf);
269     LOAD32L(w1, wbuf + 4);
270     LOAD32L(w2, wbuf + 8);
271     LOAD32L(w3, wbuf + 12);
272     LOAD32L(w4, wbuf + 16);
273     LOAD32L(w5, wbuf + 20);
274     LOAD32L(w6, wbuf + 24);
275     LOAD32L(w7, wbuf + 28);
276 
277     WUP0(0);   SKS3;
278     WUP1(4);   SKS2;
279     WUP0(8);   SKS1;
280     WUP1(12);  SKS0;
281     WUP0(16);  SKS7;
282     WUP1(20);  SKS6;
283     WUP0(24);  SKS5;
284     WUP1(28);  SKS4;
285     WUP0(32);  SKS3;
286     WUP1(36);  SKS2;
287     WUP0(40);  SKS1;
288     WUP1(44);  SKS0;
289     WUP0(48);  SKS7;
290     WUP1(52);  SKS6;
291     WUP0(56);  SKS5;
292     WUP1(60);  SKS4;
293     WUP0(64);  SKS3;
294     WUP1(68);  SKS2;
295     WUP0(72);  SKS1;
296     WUP1(76);  SKS0;
297     WUP0(80);  SKS7;
298     WUP1(84);  SKS6;
299     WUP0(88);  SKS5;
300     WUP1(92);  SKS4;
301     WUP0(96);  SKS3;
302 
303 #undef SKS
304 #undef SKS0
305 #undef SKS1
306 #undef SKS2
307 #undef SKS3
308 #undef SKS4
309 #undef SKS5
310 #undef SKS6
311 #undef SKS7
312 #undef WUP
313 #undef WUP0
314 #undef WUP1
315 
316     return CRYPT_OK;
317 }
318 
319 
320 /*
321  * Initialization continues by setting the IV. The IV length is up to 16 bytes.
322  * If "ivlen" is 0 (no IV), then the "iv" parameter can be NULL.  If multiple
323  * encryptions/decryptions are to be performed with the same key and
324  * sosemanuk_done() has not been called, only sosemanuk_setiv() need be called
325  * to set the state.
326  * @param st       The Sosemanuk state
327  * @param iv       Initialization vector
328  * @param ivlen    Length of iv in bytes
329  * @return CRYPT_OK on success
330  */
sosemanuk_setiv(sosemanuk_state * st,const unsigned char * iv,unsigned long ivlen)331 int sosemanuk_setiv(sosemanuk_state *st, const unsigned char *iv, unsigned long ivlen)
332 {
333 
334     /*
335      * The Serpent key addition step.
336      */
337 #define KA(zc, x0, x1, x2, x3)  do { \
338         x0 ^= st->kc[(zc)]; \
339         x1 ^= st->kc[(zc) + 1]; \
340         x2 ^= st->kc[(zc) + 2]; \
341         x3 ^= st->kc[(zc) + 3]; \
342     } while (0)
343 
344     /*
345      * One Serpent round.
346      *   zc = current subkey counter
347      *   S = S-box macro for this round
348      *   i0 to i4 = input register numbers (the fifth is a scratch register)
349      *   o0 to o3 = output register numbers
350      */
351 #define FSS(zc, S, i0, i1, i2, i3, i4, o0, o1, o2, o3)  do { \
352         KA(zc, r ## i0, r ## i1, r ## i2, r ## i3); \
353         S(r ## i0, r ## i1, r ## i2, r ## i3, r ## i4); \
354         SERPENT_LT(r ## o0, r ## o1, r ## o2, r ## o3); \
355     } while (0)
356 
357     /*
358      * Last Serpent round. Contrary to the "true" Serpent, we keep
359      * the linear transformation for that last round.
360      */
361 #define FSF(zc, S, i0, i1, i2, i3, i4, o0, o1, o2, o3)  do { \
362         KA(zc, r ## i0, r ## i1, r ## i2, r ## i3); \
363         S(r ## i0, r ## i1, r ## i2, r ## i3, r ## i4); \
364         SERPENT_LT(r ## o0, r ## o1, r ## o2, r ## o3); \
365         KA(zc + 4, r ## o0, r ## o1, r ## o2, r ## o3); \
366     } while (0)
367 
368     ulong32 r0, r1, r2, r3, r4;
369     unsigned char ivtmp[16] = {0};
370 
371     LTC_ARGCHK(st != NULL);
372     LTC_ARGCHK(ivlen <= 16);
373     LTC_ARGCHK(iv != NULL || ivlen == 0);
374 
375     if (ivlen > 0) XMEMCPY(ivtmp, iv, ivlen);
376 
377     /*
378      * Decode IV into four 32-bit words (little-endian).
379      */
380     LOAD32L(r0, ivtmp);
381     LOAD32L(r1, ivtmp + 4);
382     LOAD32L(r2, ivtmp + 8);
383     LOAD32L(r3, ivtmp + 12);
384 
385     /*
386      * Encrypt IV with Serpent24. Some values are extracted from the
387      * output of the twelfth, eighteenth and twenty-fourth rounds.
388      */
389     FSS(0, S0, 0, 1, 2, 3, 4, 1, 4, 2, 0);
390     FSS(4, S1, 1, 4, 2, 0, 3, 2, 1, 0, 4);
391     FSS(8, S2, 2, 1, 0, 4, 3, 0, 4, 1, 3);
392     FSS(12, S3, 0, 4, 1, 3, 2, 4, 1, 3, 2);
393     FSS(16, S4, 4, 1, 3, 2, 0, 1, 0, 4, 2);
394     FSS(20, S5, 1, 0, 4, 2, 3, 0, 2, 1, 4);
395     FSS(24, S6, 0, 2, 1, 4, 3, 0, 2, 3, 1);
396     FSS(28, S7, 0, 2, 3, 1, 4, 4, 1, 2, 0);
397     FSS(32, S0, 4, 1, 2, 0, 3, 1, 3, 2, 4);
398     FSS(36, S1, 1, 3, 2, 4, 0, 2, 1, 4, 3);
399     FSS(40, S2, 2, 1, 4, 3, 0, 4, 3, 1, 0);
400     FSS(44, S3, 4, 3, 1, 0, 2, 3, 1, 0, 2);
401     st->s09 = r3;
402     st->s08 = r1;
403     st->s07 = r0;
404     st->s06 = r2;
405 
406     FSS(48, S4, 3, 1, 0, 2, 4, 1, 4, 3, 2);
407     FSS(52, S5, 1, 4, 3, 2, 0, 4, 2, 1, 3);
408     FSS(56, S6, 4, 2, 1, 3, 0, 4, 2, 0, 1);
409     FSS(60, S7, 4, 2, 0, 1, 3, 3, 1, 2, 4);
410     FSS(64, S0, 3, 1, 2, 4, 0, 1, 0, 2, 3);
411     FSS(68, S1, 1, 0, 2, 3, 4, 2, 1, 3, 0);
412     st->r1  = r2;
413     st->s04 = r1;
414     st->r2  = r3;
415     st->s05 = r0;
416 
417     FSS(72, S2, 2, 1, 3, 0, 4, 3, 0, 1, 4);
418     FSS(76, S3, 3, 0, 1, 4, 2, 0, 1, 4, 2);
419     FSS(80, S4, 0, 1, 4, 2, 3, 1, 3, 0, 2);
420     FSS(84, S5, 1, 3, 0, 2, 4, 3, 2, 1, 0);
421     FSS(88, S6, 3, 2, 1, 0, 4, 3, 2, 4, 1);
422     FSF(92, S7, 3, 2, 4, 1, 0, 0, 1, 2, 3);
423     st->s03 = r0;
424     st->s02 = r1;
425     st->s01 = r2;
426     st->s00 = r3;
427 
428     st->ptr = sizeof(st->buf);
429 
430 #undef KA
431 #undef FSS
432 #undef FSF
433 
434     return CRYPT_OK;
435 }
436 
437 /*
438  * Multiplication by alpha: alpha * x = T32(x << 8) ^ mul_a[x >> 24]
439  */
440 static const ulong32 mul_a[] = {
441     0x00000000, 0xE19FCF13, 0x6B973726, 0x8A08F835,
442     0xD6876E4C, 0x3718A15F, 0xBD10596A, 0x5C8F9679,
443     0x05A7DC98, 0xE438138B, 0x6E30EBBE, 0x8FAF24AD,
444     0xD320B2D4, 0x32BF7DC7, 0xB8B785F2, 0x59284AE1,
445     0x0AE71199, 0xEB78DE8A, 0x617026BF, 0x80EFE9AC,
446     0xDC607FD5, 0x3DFFB0C6, 0xB7F748F3, 0x566887E0,
447     0x0F40CD01, 0xEEDF0212, 0x64D7FA27, 0x85483534,
448     0xD9C7A34D, 0x38586C5E, 0xB250946B, 0x53CF5B78,
449     0x1467229B, 0xF5F8ED88, 0x7FF015BD, 0x9E6FDAAE,
450     0xC2E04CD7, 0x237F83C4, 0xA9777BF1, 0x48E8B4E2,
451     0x11C0FE03, 0xF05F3110, 0x7A57C925, 0x9BC80636,
452     0xC747904F, 0x26D85F5C, 0xACD0A769, 0x4D4F687A,
453     0x1E803302, 0xFF1FFC11, 0x75170424, 0x9488CB37,
454     0xC8075D4E, 0x2998925D, 0xA3906A68, 0x420FA57B,
455     0x1B27EF9A, 0xFAB82089, 0x70B0D8BC, 0x912F17AF,
456     0xCDA081D6, 0x2C3F4EC5, 0xA637B6F0, 0x47A879E3,
457     0x28CE449F, 0xC9518B8C, 0x435973B9, 0xA2C6BCAA,
458     0xFE492AD3, 0x1FD6E5C0, 0x95DE1DF5, 0x7441D2E6,
459     0x2D699807, 0xCCF65714, 0x46FEAF21, 0xA7616032,
460     0xFBEEF64B, 0x1A713958, 0x9079C16D, 0x71E60E7E,
461     0x22295506, 0xC3B69A15, 0x49BE6220, 0xA821AD33,
462     0xF4AE3B4A, 0x1531F459, 0x9F390C6C, 0x7EA6C37F,
463     0x278E899E, 0xC611468D, 0x4C19BEB8, 0xAD8671AB,
464     0xF109E7D2, 0x109628C1, 0x9A9ED0F4, 0x7B011FE7,
465     0x3CA96604, 0xDD36A917, 0x573E5122, 0xB6A19E31,
466     0xEA2E0848, 0x0BB1C75B, 0x81B93F6E, 0x6026F07D,
467     0x390EBA9C, 0xD891758F, 0x52998DBA, 0xB30642A9,
468     0xEF89D4D0, 0x0E161BC3, 0x841EE3F6, 0x65812CE5,
469     0x364E779D, 0xD7D1B88E, 0x5DD940BB, 0xBC468FA8,
470     0xE0C919D1, 0x0156D6C2, 0x8B5E2EF7, 0x6AC1E1E4,
471     0x33E9AB05, 0xD2766416, 0x587E9C23, 0xB9E15330,
472     0xE56EC549, 0x04F10A5A, 0x8EF9F26F, 0x6F663D7C,
473     0x50358897, 0xB1AA4784, 0x3BA2BFB1, 0xDA3D70A2,
474     0x86B2E6DB, 0x672D29C8, 0xED25D1FD, 0x0CBA1EEE,
475     0x5592540F, 0xB40D9B1C, 0x3E056329, 0xDF9AAC3A,
476     0x83153A43, 0x628AF550, 0xE8820D65, 0x091DC276,
477     0x5AD2990E, 0xBB4D561D, 0x3145AE28, 0xD0DA613B,
478     0x8C55F742, 0x6DCA3851, 0xE7C2C064, 0x065D0F77,
479     0x5F754596, 0xBEEA8A85, 0x34E272B0, 0xD57DBDA3,
480     0x89F22BDA, 0x686DE4C9, 0xE2651CFC, 0x03FAD3EF,
481     0x4452AA0C, 0xA5CD651F, 0x2FC59D2A, 0xCE5A5239,
482     0x92D5C440, 0x734A0B53, 0xF942F366, 0x18DD3C75,
483     0x41F57694, 0xA06AB987, 0x2A6241B2, 0xCBFD8EA1,
484     0x977218D8, 0x76EDD7CB, 0xFCE52FFE, 0x1D7AE0ED,
485     0x4EB5BB95, 0xAF2A7486, 0x25228CB3, 0xC4BD43A0,
486     0x9832D5D9, 0x79AD1ACA, 0xF3A5E2FF, 0x123A2DEC,
487     0x4B12670D, 0xAA8DA81E, 0x2085502B, 0xC11A9F38,
488     0x9D950941, 0x7C0AC652, 0xF6023E67, 0x179DF174,
489     0x78FBCC08, 0x9964031B, 0x136CFB2E, 0xF2F3343D,
490     0xAE7CA244, 0x4FE36D57, 0xC5EB9562, 0x24745A71,
491     0x7D5C1090, 0x9CC3DF83, 0x16CB27B6, 0xF754E8A5,
492     0xABDB7EDC, 0x4A44B1CF, 0xC04C49FA, 0x21D386E9,
493     0x721CDD91, 0x93831282, 0x198BEAB7, 0xF81425A4,
494     0xA49BB3DD, 0x45047CCE, 0xCF0C84FB, 0x2E934BE8,
495     0x77BB0109, 0x9624CE1A, 0x1C2C362F, 0xFDB3F93C,
496     0xA13C6F45, 0x40A3A056, 0xCAAB5863, 0x2B349770,
497     0x6C9CEE93, 0x8D032180, 0x070BD9B5, 0xE69416A6,
498     0xBA1B80DF, 0x5B844FCC, 0xD18CB7F9, 0x301378EA,
499     0x693B320B, 0x88A4FD18, 0x02AC052D, 0xE333CA3E,
500     0xBFBC5C47, 0x5E239354, 0xD42B6B61, 0x35B4A472,
501     0x667BFF0A, 0x87E43019, 0x0DECC82C, 0xEC73073F,
502     0xB0FC9146, 0x51635E55, 0xDB6BA660, 0x3AF46973,
503     0x63DC2392, 0x8243EC81, 0x084B14B4, 0xE9D4DBA7,
504     0xB55B4DDE, 0x54C482CD, 0xDECC7AF8, 0x3F53B5EB
505 };
506 
507 /*
508  * Multiplication by 1/alpha: 1/alpha * x = (x >> 8) ^ mul_ia[x & 0xFF]
509  */
510 static const ulong32 mul_ia[] = {
511     0x00000000, 0x180F40CD, 0x301E8033, 0x2811C0FE,
512     0x603CA966, 0x7833E9AB, 0x50222955, 0x482D6998,
513     0xC078FBCC, 0xD877BB01, 0xF0667BFF, 0xE8693B32,
514     0xA04452AA, 0xB84B1267, 0x905AD299, 0x88559254,
515     0x29F05F31, 0x31FF1FFC, 0x19EEDF02, 0x01E19FCF,
516     0x49CCF657, 0x51C3B69A, 0x79D27664, 0x61DD36A9,
517     0xE988A4FD, 0xF187E430, 0xD99624CE, 0xC1996403,
518     0x89B40D9B, 0x91BB4D56, 0xB9AA8DA8, 0xA1A5CD65,
519     0x5249BE62, 0x4A46FEAF, 0x62573E51, 0x7A587E9C,
520     0x32751704, 0x2A7A57C9, 0x026B9737, 0x1A64D7FA,
521     0x923145AE, 0x8A3E0563, 0xA22FC59D, 0xBA208550,
522     0xF20DECC8, 0xEA02AC05, 0xC2136CFB, 0xDA1C2C36,
523     0x7BB9E153, 0x63B6A19E, 0x4BA76160, 0x53A821AD,
524     0x1B854835, 0x038A08F8, 0x2B9BC806, 0x339488CB,
525     0xBBC11A9F, 0xA3CE5A52, 0x8BDF9AAC, 0x93D0DA61,
526     0xDBFDB3F9, 0xC3F2F334, 0xEBE333CA, 0xF3EC7307,
527     0xA492D5C4, 0xBC9D9509, 0x948C55F7, 0x8C83153A,
528     0xC4AE7CA2, 0xDCA13C6F, 0xF4B0FC91, 0xECBFBC5C,
529     0x64EA2E08, 0x7CE56EC5, 0x54F4AE3B, 0x4CFBEEF6,
530     0x04D6876E, 0x1CD9C7A3, 0x34C8075D, 0x2CC74790,
531     0x8D628AF5, 0x956DCA38, 0xBD7C0AC6, 0xA5734A0B,
532     0xED5E2393, 0xF551635E, 0xDD40A3A0, 0xC54FE36D,
533     0x4D1A7139, 0x551531F4, 0x7D04F10A, 0x650BB1C7,
534     0x2D26D85F, 0x35299892, 0x1D38586C, 0x053718A1,
535     0xF6DB6BA6, 0xEED42B6B, 0xC6C5EB95, 0xDECAAB58,
536     0x96E7C2C0, 0x8EE8820D, 0xA6F942F3, 0xBEF6023E,
537     0x36A3906A, 0x2EACD0A7, 0x06BD1059, 0x1EB25094,
538     0x569F390C, 0x4E9079C1, 0x6681B93F, 0x7E8EF9F2,
539     0xDF2B3497, 0xC724745A, 0xEF35B4A4, 0xF73AF469,
540     0xBF179DF1, 0xA718DD3C, 0x8F091DC2, 0x97065D0F,
541     0x1F53CF5B, 0x075C8F96, 0x2F4D4F68, 0x37420FA5,
542     0x7F6F663D, 0x676026F0, 0x4F71E60E, 0x577EA6C3,
543     0xE18D0321, 0xF98243EC, 0xD1938312, 0xC99CC3DF,
544     0x81B1AA47, 0x99BEEA8A, 0xB1AF2A74, 0xA9A06AB9,
545     0x21F5F8ED, 0x39FAB820, 0x11EB78DE, 0x09E43813,
546     0x41C9518B, 0x59C61146, 0x71D7D1B8, 0x69D89175,
547     0xC87D5C10, 0xD0721CDD, 0xF863DC23, 0xE06C9CEE,
548     0xA841F576, 0xB04EB5BB, 0x985F7545, 0x80503588,
549     0x0805A7DC, 0x100AE711, 0x381B27EF, 0x20146722,
550     0x68390EBA, 0x70364E77, 0x58278E89, 0x4028CE44,
551     0xB3C4BD43, 0xABCBFD8E, 0x83DA3D70, 0x9BD57DBD,
552     0xD3F81425, 0xCBF754E8, 0xE3E69416, 0xFBE9D4DB,
553     0x73BC468F, 0x6BB30642, 0x43A2C6BC, 0x5BAD8671,
554     0x1380EFE9, 0x0B8FAF24, 0x239E6FDA, 0x3B912F17,
555     0x9A34E272, 0x823BA2BF, 0xAA2A6241, 0xB225228C,
556     0xFA084B14, 0xE2070BD9, 0xCA16CB27, 0xD2198BEA,
557     0x5A4C19BE, 0x42435973, 0x6A52998D, 0x725DD940,
558     0x3A70B0D8, 0x227FF015, 0x0A6E30EB, 0x12617026,
559     0x451FD6E5, 0x5D109628, 0x750156D6, 0x6D0E161B,
560     0x25237F83, 0x3D2C3F4E, 0x153DFFB0, 0x0D32BF7D,
561     0x85672D29, 0x9D686DE4, 0xB579AD1A, 0xAD76EDD7,
562     0xE55B844F, 0xFD54C482, 0xD545047C, 0xCD4A44B1,
563     0x6CEF89D4, 0x74E0C919, 0x5CF109E7, 0x44FE492A,
564     0x0CD320B2, 0x14DC607F, 0x3CCDA081, 0x24C2E04C,
565     0xAC977218, 0xB49832D5, 0x9C89F22B, 0x8486B2E6,
566     0xCCABDB7E, 0xD4A49BB3, 0xFCB55B4D, 0xE4BA1B80,
567     0x17566887, 0x0F59284A, 0x2748E8B4, 0x3F47A879,
568     0x776AC1E1, 0x6F65812C, 0x477441D2, 0x5F7B011F,
569     0xD72E934B, 0xCF21D386, 0xE7301378, 0xFF3F53B5,
570     0xB7123A2D, 0xAF1D7AE0, 0x870CBA1E, 0x9F03FAD3,
571     0x3EA637B6, 0x26A9777B, 0x0EB8B785, 0x16B7F748,
572     0x5E9A9ED0, 0x4695DE1D, 0x6E841EE3, 0x768B5E2E,
573     0xFEDECC7A, 0xE6D18CB7, 0xCEC04C49, 0xD6CF0C84,
574     0x9EE2651C, 0x86ED25D1, 0xAEFCE52F, 0xB6F3A5E2
575 };
576 
577 
578 /*
579  * Compute the next block of bits of output stream. This is equivalent
580  * to one full rotation of the shift register.
581  */
s_sosemanuk_internal(sosemanuk_state * st)582 static LTC_INLINE void s_sosemanuk_internal(sosemanuk_state *st)
583 {
584     /*
585      * MUL_A(x) computes alpha * x (in F_{2^32}).
586      * MUL_G(x) computes 1/alpha * x (in F_{2^32}).
587      */
588 #define MUL_A(x)    (T32((x) << 8) ^ mul_a[(x) >> 24])
589 #define MUL_G(x)    (((x) >> 8) ^ mul_ia[(x) & 0xFF])
590 
591     /*
592      * This macro computes the special multiplexer, which chooses
593      * between "x" and "x xor y", depending on the least significant
594      * bit of the control word. We use the C "?:" selection operator
595      * (which most compilers know how to optimise) except for Alpha,
596      * where the manual sign extension seems to perform equally well
597      * with DEC/Compaq/HP compiler, and much better with gcc.
598      */
599 #ifdef __alpha
600 #define XMUX(c, x, y)   ((((signed int)((c) << 31) >> 31) & (y)) ^ (x))
601 #else
602 #define XMUX(c, x, y)   (((c) & 0x1) ? ((x) ^ (y)) : (x))
603 #endif
604 
605     /*
606      * FSM() updates the finite state machine.
607      */
608 #define FSM(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9)   do { \
609         ulong32 tt, or1; \
610         tt = XMUX(r1, s ## x1, s ## x8); \
611         or1 = r1; \
612         r1 = T32(r2 + tt); \
613         tt = T32(or1 * 0x54655307); \
614         r2 = ROLc(tt, 7); \
615     } while (0)
616 
617     /*
618      * LRU updates the shift register; the dropped value is stored
619      * in variable "dd".
620      */
621 #define LRU(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, dd)   do { \
622         dd = s ## x0; \
623         s ## x0 = MUL_A(s ## x0) ^ MUL_G(s ## x3) ^ s ## x9; \
624     } while (0)
625 
626     /*
627      * CC1 stores into variable "ee" the next intermediate word
628      * (combination of the new states of the LFSR and the FSM).
629      */
630 #define CC1(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, ee)   do { \
631         ee = T32(s ## x9 + r1) ^ r2; \
632     } while (0)
633 
634     /*
635      * STEP computes one internal round. "dd" receives the "s_t"
636      * value (dropped from the LFSR) and "ee" gets the value computed
637      * from the LFSR and FSM.
638      */
639 #define STEP(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, dd, ee)   do { \
640         FSM(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9); \
641         LRU(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, dd); \
642         CC1(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, ee); \
643     } while (0)
644 
645     /*
646      * Apply one Serpent round (with the provided S-box macro), XOR
647      * the result with the "v" values, and encode the result into
648      * the destination buffer, at the provided offset. The "x*"
649      * arguments encode the output permutation of the "S" macro.
650      */
651 #define SRD(S, x0, x1, x2, x3, ooff)   do { \
652         S(u0, u1, u2, u3, u4); \
653         STORE32L(u ## x0 ^ v0, st->buf + ooff); \
654         STORE32L(u ## x1 ^ v1, st->buf + ooff +  4); \
655         STORE32L(u ## x2 ^ v2, st->buf + ooff +  8); \
656         STORE32L(u ## x3 ^ v3, st->buf + ooff + 12); \
657     } while (0)
658 
659     ulong32 s00 = st->s00;
660     ulong32 s01 = st->s01;
661     ulong32 s02 = st->s02;
662     ulong32 s03 = st->s03;
663     ulong32 s04 = st->s04;
664     ulong32 s05 = st->s05;
665     ulong32 s06 = st->s06;
666     ulong32 s07 = st->s07;
667     ulong32 s08 = st->s08;
668     ulong32 s09 = st->s09;
669     ulong32 r1 = st->r1;
670     ulong32 r2 = st->r2;
671     ulong32 u0, u1, u2, u3, u4;
672     ulong32 v0, v1, v2, v3;
673 
674     STEP(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, v0, u0);
675     STEP(01, 02, 03, 04, 05, 06, 07, 08, 09, 00, v1, u1);
676     STEP(02, 03, 04, 05, 06, 07, 08, 09, 00, 01, v2, u2);
677     STEP(03, 04, 05, 06, 07, 08, 09, 00, 01, 02, v3, u3);
678     SRD(S2, 2, 3, 1, 4, 0);
679     STEP(04, 05, 06, 07, 08, 09, 00, 01, 02, 03, v0, u0);
680     STEP(05, 06, 07, 08, 09, 00, 01, 02, 03, 04, v1, u1);
681     STEP(06, 07, 08, 09, 00, 01, 02, 03, 04, 05, v2, u2);
682     STEP(07, 08, 09, 00, 01, 02, 03, 04, 05, 06, v3, u3);
683     SRD(S2, 2, 3, 1, 4, 16);
684     STEP(08, 09, 00, 01, 02, 03, 04, 05, 06, 07, v0, u0);
685     STEP(09, 00, 01, 02, 03, 04, 05, 06, 07, 08, v1, u1);
686     STEP(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, v2, u2);
687     STEP(01, 02, 03, 04, 05, 06, 07, 08, 09, 00, v3, u3);
688     SRD(S2, 2, 3, 1, 4, 32);
689     STEP(02, 03, 04, 05, 06, 07, 08, 09, 00, 01, v0, u0);
690     STEP(03, 04, 05, 06, 07, 08, 09, 00, 01, 02, v1, u1);
691     STEP(04, 05, 06, 07, 08, 09, 00, 01, 02, 03, v2, u2);
692     STEP(05, 06, 07, 08, 09, 00, 01, 02, 03, 04, v3, u3);
693     SRD(S2, 2, 3, 1, 4, 48);
694     STEP(06, 07, 08, 09, 00, 01, 02, 03, 04, 05, v0, u0);
695     STEP(07, 08, 09, 00, 01, 02, 03, 04, 05, 06, v1, u1);
696     STEP(08, 09, 00, 01, 02, 03, 04, 05, 06, 07, v2, u2);
697     STEP(09, 00, 01, 02, 03, 04, 05, 06, 07, 08, v3, u3);
698     SRD(S2, 2, 3, 1, 4, 64);
699 
700     st->s00 = s00;
701     st->s01 = s01;
702     st->s02 = s02;
703     st->s03 = s03;
704     st->s04 = s04;
705     st->s05 = s05;
706     st->s06 = s06;
707     st->s07 = s07;
708     st->s08 = s08;
709     st->s09 = s09;
710     st->r1 = r1;
711     st->r2 = r2;
712 }
713 
714 /*
715  * Combine buffers in1[] and in2[] by XOR, result in out[]. The length
716  * is "datalen" (in bytes). Partial overlap of out[] with either in1[]
717  * or in2[] is not allowed. Total overlap (out == in1 and/or out == in2)
718  * is allowed.
719  */
s_xorbuf(const unsigned char * in1,const unsigned char * in2,unsigned char * out,unsigned long datalen)720 static LTC_INLINE void s_xorbuf(const unsigned char *in1, const unsigned char *in2,
721     unsigned char *out, unsigned long datalen)
722 {
723     while (datalen -- > 0) {
724         *out ++ = *in1 ++ ^ *in2 ++;
725     }
726 }
727 
728 
729 /*
730  * Cipher operation, as a stream cipher: data is read from the "in"
731  * buffer, combined by XOR with the stream, and the result is written
732  * in the "out" buffer. "in" and "out" must be either equal, or
733  * reference distinct buffers (no partial overlap is allowed).
734  * @param st       The Sosemanuk state
735  * @param in       Data in
736  * @param inlen    Length of data in bytes
737  * @param out      Data out
738  * @return CRYPT_OK on success
739  */
sosemanuk_crypt(sosemanuk_state * st,const unsigned char * in,unsigned long inlen,unsigned char * out)740 int sosemanuk_crypt(sosemanuk_state *st,
741                         const unsigned char *in, unsigned long inlen, unsigned char *out)
742 {
743     LTC_ARGCHK(st  != NULL);
744     LTC_ARGCHK(in  != NULL);
745     LTC_ARGCHK(out != NULL);
746 
747     if (st->ptr < (sizeof(st->buf))) {
748         unsigned long rlen = (sizeof(st->buf)) - st->ptr;
749 
750         if (rlen > inlen) {
751             rlen = inlen;
752         }
753         s_xorbuf(st->buf + st->ptr, in, out, rlen);
754         in += rlen;
755         out += rlen;
756         inlen -= rlen;
757         st->ptr += rlen;
758     }
759     while (inlen > 0) {
760         s_sosemanuk_internal(st);
761         if (inlen >= sizeof(st->buf)) {
762             s_xorbuf(st->buf, in, out, sizeof(st->buf));
763             in += sizeof(st->buf);
764             out += sizeof(st->buf);
765             inlen -= sizeof(st->buf);
766         } else {
767             s_xorbuf(st->buf, in, out, inlen);
768             st->ptr = inlen;
769             inlen = 0;
770         }
771     }
772     return CRYPT_OK;
773 }
774 
775 
776 
777 /*
778  * Cipher operation, as a PRNG: the provided output buffer is filled with
779  * pseudo-random bytes as output from the stream cipher.
780  * @param st       The Sosemanuk state
781  * @param out      Data out
782  * @param outlen   Length of output in bytes
783  * @return CRYPT_OK on success
784  */
sosemanuk_keystream(sosemanuk_state * st,unsigned char * out,unsigned long outlen)785 int sosemanuk_keystream(sosemanuk_state *st, unsigned char *out, unsigned long outlen)
786 {
787    if (outlen == 0) return CRYPT_OK; /* nothing to do */
788    LTC_ARGCHK(out != NULL);
789    XMEMSET(out, 0, outlen);
790    return sosemanuk_crypt(st, out, outlen, out);
791 }
792 
793 
794 /*
795  * Terminate and clear Sosemanuk key context
796  * @param st      The Sosemanuk state
797  * @return CRYPT_OK on success
798  */
sosemanuk_done(sosemanuk_state * st)799 int sosemanuk_done(sosemanuk_state *st)
800 {
801    LTC_ARGCHK(st != NULL);
802    zeromem(st, sizeof(sosemanuk_state));
803    return CRYPT_OK;
804 }
805 
806 
807 #endif
808