1// Copyright 2023 The BoringSSL Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include <openssl/base.h>
16
17#include <assert.h>
18#include <stdlib.h>
19
20#include "../../internal.h"
21#include "./internal.h"
22
23
24// keccak_f implements the Keccak-1600 permutation as described at
25// https://keccak.team/keccak_specs_summary.html. Each lane is represented as a
26// 64-bit value and the 5×5 lanes are stored as an array in row-major order.
27static void keccak_f(uint64_t state[25]) {
28  static const int kNumRounds = 24;
29  for (int round = 0; round < kNumRounds; round++) {
30    // θ step
31    uint64_t c[5];
32    for (int x = 0; x < 5; x++) {
33      c[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^
34             state[x + 20];
35    }
36
37    for (int x = 0; x < 5; x++) {
38      const uint64_t d = c[(x + 4) % 5] ^ CRYPTO_rotl_u64(c[(x + 1) % 5], 1);
39      for (int y = 0; y < 5; y++) {
40        state[y * 5 + x] ^= d;
41      }
42    }
43
44    // ρ and π steps.
45    //
46    // These steps involve a mapping of the state matrix. Each input point,
47    // (x,y), is rotated and written to the point (y, 2x + 3y). In the Keccak
48    // pseudo-code a separate array is used because an in-place operation would
49    // overwrite some values that are subsequently needed. However, the mapping
50    // forms a trail through 24 of the 25 values so we can do it in place with
51    // only a single temporary variable.
52    //
53    // Start with (1, 0). The value here will be mapped and end up at (0, 2).
54    // That value will end up at (2, 1), then (1, 2), and so on. After 24
55    // steps, 24 of the 25 values have been hit (as this mapping is injective)
56    // and the sequence will repeat. All that remains is to handle the element
57    // at (0, 0), but the rotation for that element is zero, and it goes to (0,
58    // 0), so we can ignore it.
59    uint64_t prev_value = state[1];
60#define PI_RHO_STEP(index, rotation)                              \
61  do {                                                            \
62    const uint64_t value = CRYPTO_rotl_u64(prev_value, rotation); \
63    prev_value = state[index];                                    \
64    state[index] = value;                                         \
65  } while (0)
66
67    PI_RHO_STEP(10, 1);
68    PI_RHO_STEP(7, 3);
69    PI_RHO_STEP(11, 6);
70    PI_RHO_STEP(17, 10);
71    PI_RHO_STEP(18, 15);
72    PI_RHO_STEP(3, 21);
73    PI_RHO_STEP(5, 28);
74    PI_RHO_STEP(16, 36);
75    PI_RHO_STEP(8, 45);
76    PI_RHO_STEP(21, 55);
77    PI_RHO_STEP(24, 2);
78    PI_RHO_STEP(4, 14);
79    PI_RHO_STEP(15, 27);
80    PI_RHO_STEP(23, 41);
81    PI_RHO_STEP(19, 56);
82    PI_RHO_STEP(13, 8);
83    PI_RHO_STEP(12, 25);
84    PI_RHO_STEP(2, 43);
85    PI_RHO_STEP(20, 62);
86    PI_RHO_STEP(14, 18);
87    PI_RHO_STEP(22, 39);
88    PI_RHO_STEP(9, 61);
89    PI_RHO_STEP(6, 20);
90    PI_RHO_STEP(1, 44);
91
92#undef PI_RHO_STEP
93
94    // χ step
95    for (int y = 0; y < 5; y++) {
96      const int row_index = 5 * y;
97      const uint64_t orig_x0 = state[row_index];
98      const uint64_t orig_x1 = state[row_index + 1];
99      state[row_index] ^= ~orig_x1 & state[row_index + 2];
100      state[row_index + 1] ^= ~state[row_index + 2] & state[row_index + 3];
101      state[row_index + 2] ^= ~state[row_index + 3] & state[row_index + 4];
102      state[row_index + 3] ^= ~state[row_index + 4] & orig_x0;
103      state[row_index + 4] ^= ~orig_x0 & orig_x1;
104    }
105
106    // ι step
107    //
108    // From https://keccak.team/files/Keccak-reference-3.0.pdf, section
109    // 1.2, the round constants are based on the output of a LFSR. Thus, as
110    // suggested in the appendix of of
111    // https://keccak.team/keccak_specs_summary.html, the values are
112    // simply encoded here.
113    static const uint64_t kRoundConstants[24] = {
114        0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
115        0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
116        0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
117        0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
118        0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
119        0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
120        0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
121        0x8000000000008080, 0x0000000080000001, 0x8000000080008008,
122    };
123
124    state[0] ^= kRoundConstants[round];
125  }
126}
127
128static void keccak_init(struct BORINGSSL_keccak_st *ctx,
129                        enum boringssl_keccak_config_t config) {
130  size_t required_out_len;
131  size_t capacity_bytes;
132  switch (config) {
133    case boringssl_sha3_256:
134      capacity_bytes = 512 / 8;
135      required_out_len = 32;
136      break;
137    case boringssl_sha3_512:
138      capacity_bytes = 1024 / 8;
139      required_out_len = 64;
140      break;
141    case boringssl_shake128:
142      capacity_bytes = 256 / 8;
143      required_out_len = 0;
144      break;
145    case boringssl_shake256:
146      capacity_bytes = 512 / 8;
147      required_out_len = 0;
148      break;
149    default:
150      abort();
151  }
152
153  OPENSSL_memset(ctx, 0, sizeof(*ctx));
154  ctx->config = config;
155  ctx->phase = boringssl_keccak_phase_absorb;
156  ctx->required_out_len = required_out_len;
157  ctx->rate_bytes = 200 - capacity_bytes;
158  assert(ctx->rate_bytes % 8 == 0);
159}
160
161void BORINGSSL_keccak(uint8_t *out, size_t out_len, const uint8_t *in,
162                      size_t in_len, enum boringssl_keccak_config_t config) {
163  struct BORINGSSL_keccak_st ctx;
164  keccak_init(&ctx, config);
165  if (ctx.required_out_len != 0 && out_len != ctx.required_out_len) {
166    abort();
167  }
168  BORINGSSL_keccak_absorb(&ctx, in, in_len);
169  BORINGSSL_keccak_squeeze(&ctx, out, out_len);
170}
171
172void BORINGSSL_keccak_init(struct BORINGSSL_keccak_st *ctx,
173                           enum boringssl_keccak_config_t config) {
174  keccak_init(ctx, config);
175}
176
177void BORINGSSL_keccak_absorb(struct BORINGSSL_keccak_st *ctx, const uint8_t *in,
178                             size_t in_len) {
179  if (ctx->phase == boringssl_keccak_phase_squeeze) {
180    // It's illegal to call absorb() again after calling squeeze().
181    abort();
182  }
183
184  const size_t rate_words = ctx->rate_bytes / 8;
185  // XOR the input. Accessing |ctx->state| as a |uint8_t*| is allowed by strict
186  // aliasing because we require |uint8_t| to be a character type.
187  uint8_t *state_bytes = (uint8_t *)ctx->state;
188
189  // Absorb partial block.
190  if (ctx->absorb_offset != 0) {
191    assert(ctx->absorb_offset < ctx->rate_bytes);
192    size_t first_block_len = ctx->rate_bytes - ctx->absorb_offset;
193    for (size_t i = 0; i < first_block_len && i < in_len; i++) {
194      state_bytes[ctx->absorb_offset + i] ^= in[i];
195    }
196
197    // This input didn't fill the block.
198    if (first_block_len > in_len) {
199      ctx->absorb_offset += in_len;
200      return;
201    }
202
203    keccak_f(ctx->state);
204    in += first_block_len;
205    in_len -= first_block_len;
206  }
207
208  // Absorb full blocks.
209  while (in_len >= ctx->rate_bytes) {
210    for (size_t i = 0; i < rate_words; i++) {
211      ctx->state[i] ^= CRYPTO_load_u64_le(in + 8 * i);
212    }
213    keccak_f(ctx->state);
214    in += ctx->rate_bytes;
215    in_len -= ctx->rate_bytes;
216  }
217
218  // Absorb partial block.
219  assert(in_len < ctx->rate_bytes);
220  for (size_t i = 0; i < in_len; i++) {
221    state_bytes[i] ^= in[i];
222  }
223  ctx->absorb_offset = in_len;
224}
225
226static void keccak_finalize(struct BORINGSSL_keccak_st *ctx) {
227  uint8_t terminator;
228  switch (ctx->config) {
229    case boringssl_sha3_256:
230    case boringssl_sha3_512:
231      terminator = 0x06;
232      break;
233    case boringssl_shake128:
234    case boringssl_shake256:
235      terminator = 0x1f;
236      break;
237    default:
238      abort();
239  }
240
241  // XOR the terminator. Accessing |ctx->state| as a |uint8_t*| is allowed by
242  // strict aliasing because we require |uint8_t| to be a character type.
243  uint8_t *state_bytes = (uint8_t *)ctx->state;
244  state_bytes[ctx->absorb_offset] ^= terminator;
245  state_bytes[ctx->rate_bytes - 1] ^= 0x80;
246  keccak_f(ctx->state);
247}
248
249void BORINGSSL_keccak_squeeze(struct BORINGSSL_keccak_st *ctx, uint8_t *out,
250                              size_t out_len) {
251  if (ctx->required_out_len != 0 &&
252      (ctx->phase == boringssl_keccak_phase_squeeze ||
253       out_len != ctx->required_out_len)) {
254    // The SHA-3 variants must be squeezed in a single call, to confirm that the
255    // output length is correct.
256    abort();
257  }
258
259  if (ctx->phase == boringssl_keccak_phase_absorb) {
260    keccak_finalize(ctx);
261    ctx->phase = boringssl_keccak_phase_squeeze;
262  }
263
264  // Accessing |ctx->state| as a |uint8_t*| is allowed by strict aliasing
265  // because we require |uint8_t| to be a character type.
266  const uint8_t *state_bytes = (const uint8_t *)ctx->state;
267  while (out_len) {
268    if (ctx->squeeze_offset == ctx->rate_bytes) {
269      keccak_f(ctx->state);
270      ctx->squeeze_offset = 0;
271    }
272
273    size_t remaining = ctx->rate_bytes - ctx->squeeze_offset;
274    size_t todo = out_len;
275    if (todo > remaining) {
276      todo = remaining;
277    }
278    OPENSSL_memcpy(out, &state_bytes[ctx->squeeze_offset], todo);
279    out += todo;
280    out_len -= todo;
281    ctx->squeeze_offset += todo;
282  }
283}
284