1// Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#include <string.h> 16 17#include <openssl/mem.h> 18 19#include "../../internal.h" 20#include "../bcm_interface.h" 21#include "../service_indicator/internal.h" 22#include "internal.h" 23 24 25// The 32-bit hash algorithms share a common byte-order neutral collector and 26// padding function implementations that operate on unaligned data, 27// ../digest/md32_common.h. SHA-512 is the only 64-bit hash algorithm, as of 28// this writing, so there is no need for a common collector/padding 29// implementation yet. 30 31static void sha512_final_impl(uint8_t *out, size_t md_len, SHA512_CTX *sha); 32 33bcm_infallible BCM_sha384_init(SHA512_CTX *sha) { 34 sha->h[0] = UINT64_C(0xcbbb9d5dc1059ed8); 35 sha->h[1] = UINT64_C(0x629a292a367cd507); 36 sha->h[2] = UINT64_C(0x9159015a3070dd17); 37 sha->h[3] = UINT64_C(0x152fecd8f70e5939); 38 sha->h[4] = UINT64_C(0x67332667ffc00b31); 39 sha->h[5] = UINT64_C(0x8eb44a8768581511); 40 sha->h[6] = UINT64_C(0xdb0c2e0d64f98fa7); 41 sha->h[7] = UINT64_C(0x47b5481dbefa4fa4); 42 43 sha->bytes_so_far_low = 0; 44 sha->bytes_so_far_high = 0; 45 sha->num = 0; 46 sha->md_len = BCM_SHA384_DIGEST_LENGTH; 47 return bcm_infallible::approved; 48} 49 50 51bcm_infallible BCM_sha512_init(SHA512_CTX *sha) { 52 sha->h[0] = UINT64_C(0x6a09e667f3bcc908); 53 sha->h[1] = UINT64_C(0xbb67ae8584caa73b); 54 sha->h[2] = UINT64_C(0x3c6ef372fe94f82b); 55 sha->h[3] = UINT64_C(0xa54ff53a5f1d36f1); 56 sha->h[4] = UINT64_C(0x510e527fade682d1); 57 sha->h[5] = UINT64_C(0x9b05688c2b3e6c1f); 58 sha->h[6] = UINT64_C(0x1f83d9abfb41bd6b); 59 sha->h[7] = UINT64_C(0x5be0cd19137e2179); 60 61 sha->bytes_so_far_low = 0; 62 sha->bytes_so_far_high = 0; 63 sha->num = 0; 64 sha->md_len = BCM_SHA512_DIGEST_LENGTH; 65 return bcm_infallible::approved; 66} 67 68bcm_infallible BCM_sha512_256_init(SHA512_CTX *sha) { 69 sha->h[0] = UINT64_C(0x22312194fc2bf72c); 70 sha->h[1] = UINT64_C(0x9f555fa3c84c64c2); 71 sha->h[2] = UINT64_C(0x2393b86b6f53b151); 72 sha->h[3] = UINT64_C(0x963877195940eabd); 73 sha->h[4] = UINT64_C(0x96283ee2a88effe3); 74 sha->h[5] = UINT64_C(0xbe5e1e2553863992); 75 sha->h[6] = UINT64_C(0x2b0199fc2c85b8aa); 76 sha->h[7] = UINT64_C(0x0eb72ddc81c52ca2); 77 78 sha->bytes_so_far_low = 0; 79 sha->bytes_so_far_high = 0; 80 sha->num = 0; 81 sha->md_len = BCM_SHA512_256_DIGEST_LENGTH; 82 return bcm_infallible::approved; 83} 84 85#if !defined(SHA512_ASM) 86static void sha512_block_data_order(uint64_t state[8], const uint8_t *in, 87 size_t num_blocks); 88#endif 89 90 91bcm_infallible BCM_sha384_final(uint8_t out[BCM_SHA384_DIGEST_LENGTH], 92 SHA512_CTX *sha) { 93 // This function must be paired with |BCM_sha384_init|, which sets 94 // |sha->md_len| to |BCM_SHA384_DIGEST_LENGTH|. 95 assert(sha->md_len == BCM_SHA384_DIGEST_LENGTH); 96 sha512_final_impl(out, BCM_SHA384_DIGEST_LENGTH, sha); 97 return bcm_infallible::approved; 98} 99 100bcm_infallible BCM_sha384_update(SHA512_CTX *sha, const void *data, 101 size_t len) { 102 return BCM_sha512_update(sha, data, len); 103} 104 105bcm_infallible BCM_sha512_256_update(SHA512_CTX *sha, const void *data, 106 size_t len) { 107 return BCM_sha512_update(sha, data, len); 108} 109 110bcm_infallible BCM_sha512_256_final(uint8_t out[BCM_SHA512_256_DIGEST_LENGTH], 111 SHA512_CTX *sha) { 112 // This function must be paired with |BCM_sha512_256_init|, which sets 113 // |sha->md_len| to |BCM_SHA512_256_DIGEST_LENGTH|. 114 assert(sha->md_len == BCM_SHA512_256_DIGEST_LENGTH); 115 sha512_final_impl(out, BCM_SHA512_256_DIGEST_LENGTH, sha); 116 return bcm_infallible::approved; 117} 118 119bcm_infallible BCM_sha512_transform(SHA512_CTX *c, 120 const uint8_t block[SHA512_CBLOCK]) { 121 sha512_block_data_order(c->h, block, 1); 122 return bcm_infallible::approved; 123} 124 125bcm_infallible BCM_sha512_update(SHA512_CTX *c, const void *in_data, 126 size_t len) { 127 uint8_t *p = c->p; 128 const uint8_t *data = reinterpret_cast<const uint8_t *>(in_data); 129 130 if (len == 0) { 131 return bcm_infallible::approved; 132 } 133 134 c->bytes_so_far_low += len; 135 if (c->bytes_so_far_low < len) { 136 c->bytes_so_far_high++; 137 } 138 139 if (c->num != 0) { 140 size_t n = sizeof(c->p) - c->num; 141 142 if (len < n) { 143 OPENSSL_memcpy(p + c->num, data, len); 144 c->num += (unsigned int)len; 145 return bcm_infallible::approved; 146 } else { 147 OPENSSL_memcpy(p + c->num, data, n), c->num = 0; 148 len -= n; 149 data += n; 150 sha512_block_data_order(c->h, p, 1); 151 } 152 } 153 154 if (len >= sizeof(c->p)) { 155 sha512_block_data_order(c->h, data, len / sizeof(c->p)); 156 data += len; 157 len %= sizeof(c->p); 158 data -= len; 159 } 160 161 if (len != 0) { 162 OPENSSL_memcpy(p, data, len); 163 c->num = (int)len; 164 } 165 166 return bcm_infallible::approved; 167} 168 169bcm_infallible BCM_sha512_final(uint8_t out[BCM_SHA512_DIGEST_LENGTH], 170 SHA512_CTX *sha) { 171 // Ideally we would assert |sha->md_len| is |BCM_SHA512_DIGEST_LENGTH| to 172 // match the size hint, but calling code often pairs |BCM_sha384_init| with 173 // |BCM_sha512_final| and expects |sha->md_len| to carry the size over. 174 // 175 // TODO(davidben): Add an assert and fix code to match them up. 176 sha512_final_impl(out, sha->md_len, sha); 177 return bcm_infallible::approved; 178} 179 180static void sha512_final_impl(uint8_t *out, size_t md_len, SHA512_CTX *sha) { 181 uint8_t *p = sha->p; 182 size_t n = sha->num; 183 184 p[n] = 0x80; // There always is a room for one 185 n++; 186 if (n > (sizeof(sha->p) - 16)) { 187 OPENSSL_memset(p + n, 0, sizeof(sha->p) - n); 188 n = 0; 189 sha512_block_data_order(sha->h, p, 1); 190 } 191 192 OPENSSL_memset(p + n, 0, sizeof(sha->p) - 16 - n); 193 const uint64_t Nh = (uint64_t{sha->bytes_so_far_high} << 3) | 194 (sha->bytes_so_far_low >> (64 - 3)); 195 const uint64_t Nl = sha->bytes_so_far_low << 3; 196 CRYPTO_store_u64_be(p + sizeof(sha->p) - 16, Nh); 197 CRYPTO_store_u64_be(p + sizeof(sha->p) - 8, Nl); 198 199 sha512_block_data_order(sha->h, p, 1); 200 201 assert(md_len % 8 == 0); 202 const size_t out_words = md_len / 8; 203 for (size_t i = 0; i < out_words; i++) { 204 CRYPTO_store_u64_be(out, sha->h[i]); 205 out += 8; 206 } 207 208 FIPS_service_indicator_update_state(); 209} 210 211#if !defined(SHA512_ASM) 212 213#if !defined(SHA512_ASM_NOHW) 214static const uint64_t K512[80] = { 215 UINT64_C(0x428a2f98d728ae22), UINT64_C(0x7137449123ef65cd), 216 UINT64_C(0xb5c0fbcfec4d3b2f), UINT64_C(0xe9b5dba58189dbbc), 217 UINT64_C(0x3956c25bf348b538), UINT64_C(0x59f111f1b605d019), 218 UINT64_C(0x923f82a4af194f9b), UINT64_C(0xab1c5ed5da6d8118), 219 UINT64_C(0xd807aa98a3030242), UINT64_C(0x12835b0145706fbe), 220 UINT64_C(0x243185be4ee4b28c), UINT64_C(0x550c7dc3d5ffb4e2), 221 UINT64_C(0x72be5d74f27b896f), UINT64_C(0x80deb1fe3b1696b1), 222 UINT64_C(0x9bdc06a725c71235), UINT64_C(0xc19bf174cf692694), 223 UINT64_C(0xe49b69c19ef14ad2), UINT64_C(0xefbe4786384f25e3), 224 UINT64_C(0x0fc19dc68b8cd5b5), UINT64_C(0x240ca1cc77ac9c65), 225 UINT64_C(0x2de92c6f592b0275), UINT64_C(0x4a7484aa6ea6e483), 226 UINT64_C(0x5cb0a9dcbd41fbd4), UINT64_C(0x76f988da831153b5), 227 UINT64_C(0x983e5152ee66dfab), UINT64_C(0xa831c66d2db43210), 228 UINT64_C(0xb00327c898fb213f), UINT64_C(0xbf597fc7beef0ee4), 229 UINT64_C(0xc6e00bf33da88fc2), UINT64_C(0xd5a79147930aa725), 230 UINT64_C(0x06ca6351e003826f), UINT64_C(0x142929670a0e6e70), 231 UINT64_C(0x27b70a8546d22ffc), UINT64_C(0x2e1b21385c26c926), 232 UINT64_C(0x4d2c6dfc5ac42aed), UINT64_C(0x53380d139d95b3df), 233 UINT64_C(0x650a73548baf63de), UINT64_C(0x766a0abb3c77b2a8), 234 UINT64_C(0x81c2c92e47edaee6), UINT64_C(0x92722c851482353b), 235 UINT64_C(0xa2bfe8a14cf10364), UINT64_C(0xa81a664bbc423001), 236 UINT64_C(0xc24b8b70d0f89791), UINT64_C(0xc76c51a30654be30), 237 UINT64_C(0xd192e819d6ef5218), UINT64_C(0xd69906245565a910), 238 UINT64_C(0xf40e35855771202a), UINT64_C(0x106aa07032bbd1b8), 239 UINT64_C(0x19a4c116b8d2d0c8), UINT64_C(0x1e376c085141ab53), 240 UINT64_C(0x2748774cdf8eeb99), UINT64_C(0x34b0bcb5e19b48a8), 241 UINT64_C(0x391c0cb3c5c95a63), UINT64_C(0x4ed8aa4ae3418acb), 242 UINT64_C(0x5b9cca4f7763e373), UINT64_C(0x682e6ff3d6b2b8a3), 243 UINT64_C(0x748f82ee5defb2fc), UINT64_C(0x78a5636f43172f60), 244 UINT64_C(0x84c87814a1f0ab72), UINT64_C(0x8cc702081a6439ec), 245 UINT64_C(0x90befffa23631e28), UINT64_C(0xa4506cebde82bde9), 246 UINT64_C(0xbef9a3f7b2c67915), UINT64_C(0xc67178f2e372532b), 247 UINT64_C(0xca273eceea26619c), UINT64_C(0xd186b8c721c0c207), 248 UINT64_C(0xeada7dd6cde0eb1e), UINT64_C(0xf57d4f7fee6ed178), 249 UINT64_C(0x06f067aa72176fba), UINT64_C(0x0a637dc5a2c898a6), 250 UINT64_C(0x113f9804bef90dae), UINT64_C(0x1b710b35131c471b), 251 UINT64_C(0x28db77f523047d84), UINT64_C(0x32caab7b40c72493), 252 UINT64_C(0x3c9ebe0a15c9bebc), UINT64_C(0x431d67c49c100d4c), 253 UINT64_C(0x4cc5d4becb3e42b6), UINT64_C(0x597f299cfc657e2a), 254 UINT64_C(0x5fcb6fab3ad6faec), UINT64_C(0x6c44198c4a475817), 255}; 256 257#define Sigma0(x) \ 258 (CRYPTO_rotr_u64((x), 28) ^ CRYPTO_rotr_u64((x), 34) ^ \ 259 CRYPTO_rotr_u64((x), 39)) 260#define Sigma1(x) \ 261 (CRYPTO_rotr_u64((x), 14) ^ CRYPTO_rotr_u64((x), 18) ^ \ 262 CRYPTO_rotr_u64((x), 41)) 263#define sigma0(x) \ 264 (CRYPTO_rotr_u64((x), 1) ^ CRYPTO_rotr_u64((x), 8) ^ ((x) >> 7)) 265#define sigma1(x) \ 266 (CRYPTO_rotr_u64((x), 19) ^ CRYPTO_rotr_u64((x), 61) ^ ((x) >> 6)) 267 268#define Ch(x, y, z) (((x) & (y)) ^ ((~(x)) & (z))) 269#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) 270 271 272#if defined(__i386) || defined(__i386__) || defined(_M_IX86) 273// This code should give better results on 32-bit CPU with less than 274// ~24 registers, both size and performance wise... 275static void sha512_block_data_order_nohw(uint64_t state[8], const uint8_t *in, 276 size_t num) { 277 uint64_t A, E, T; 278 uint64_t X[9 + 80], *F; 279 int i; 280 281 while (num--) { 282 F = X + 80; 283 A = state[0]; 284 F[1] = state[1]; 285 F[2] = state[2]; 286 F[3] = state[3]; 287 E = state[4]; 288 F[5] = state[5]; 289 F[6] = state[6]; 290 F[7] = state[7]; 291 292 for (i = 0; i < 16; i++, F--) { 293 T = CRYPTO_load_u64_be(in + i * 8); 294 F[0] = A; 295 F[4] = E; 296 F[8] = T; 297 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i]; 298 E = F[3] + T; 299 A = T + Sigma0(A) + Maj(A, F[1], F[2]); 300 } 301 302 for (; i < 80; i++, F--) { 303 T = sigma0(F[8 + 16 - 1]); 304 T += sigma1(F[8 + 16 - 14]); 305 T += F[8 + 16] + F[8 + 16 - 9]; 306 307 F[0] = A; 308 F[4] = E; 309 F[8] = T; 310 T += F[7] + Sigma1(E) + Ch(E, F[5], F[6]) + K512[i]; 311 E = F[3] + T; 312 A = T + Sigma0(A) + Maj(A, F[1], F[2]); 313 } 314 315 state[0] += A; 316 state[1] += F[1]; 317 state[2] += F[2]; 318 state[3] += F[3]; 319 state[4] += E; 320 state[5] += F[5]; 321 state[6] += F[6]; 322 state[7] += F[7]; 323 324 in += 16 * 8; 325 } 326} 327 328#else 329 330#define ROUND_00_15(i, a, b, c, d, e, f, g, h) \ 331 do { \ 332 T1 += h + Sigma1(e) + Ch(e, f, g) + K512[i]; \ 333 h = Sigma0(a) + Maj(a, b, c); \ 334 d += T1; \ 335 h += T1; \ 336 } while (0) 337 338#define ROUND_16_80(i, j, a, b, c, d, e, f, g, h, X) \ 339 do { \ 340 s0 = X[(j + 1) & 0x0f]; \ 341 s0 = sigma0(s0); \ 342 s1 = X[(j + 14) & 0x0f]; \ 343 s1 = sigma1(s1); \ 344 T1 = X[(j) & 0x0f] += s0 + s1 + X[(j + 9) & 0x0f]; \ 345 ROUND_00_15(i + j, a, b, c, d, e, f, g, h); \ 346 } while (0) 347 348static void sha512_block_data_order_nohw(uint64_t state[8], const uint8_t *in, 349 size_t num) { 350 uint64_t a, b, c, d, e, f, g, h, s0, s1, T1; 351 uint64_t X[16]; 352 int i; 353 354 while (num--) { 355 a = state[0]; 356 b = state[1]; 357 c = state[2]; 358 d = state[3]; 359 e = state[4]; 360 f = state[5]; 361 g = state[6]; 362 h = state[7]; 363 364 T1 = X[0] = CRYPTO_load_u64_be(in); 365 ROUND_00_15(0, a, b, c, d, e, f, g, h); 366 T1 = X[1] = CRYPTO_load_u64_be(in + 8); 367 ROUND_00_15(1, h, a, b, c, d, e, f, g); 368 T1 = X[2] = CRYPTO_load_u64_be(in + 2 * 8); 369 ROUND_00_15(2, g, h, a, b, c, d, e, f); 370 T1 = X[3] = CRYPTO_load_u64_be(in + 3 * 8); 371 ROUND_00_15(3, f, g, h, a, b, c, d, e); 372 T1 = X[4] = CRYPTO_load_u64_be(in + 4 * 8); 373 ROUND_00_15(4, e, f, g, h, a, b, c, d); 374 T1 = X[5] = CRYPTO_load_u64_be(in + 5 * 8); 375 ROUND_00_15(5, d, e, f, g, h, a, b, c); 376 T1 = X[6] = CRYPTO_load_u64_be(in + 6 * 8); 377 ROUND_00_15(6, c, d, e, f, g, h, a, b); 378 T1 = X[7] = CRYPTO_load_u64_be(in + 7 * 8); 379 ROUND_00_15(7, b, c, d, e, f, g, h, a); 380 T1 = X[8] = CRYPTO_load_u64_be(in + 8 * 8); 381 ROUND_00_15(8, a, b, c, d, e, f, g, h); 382 T1 = X[9] = CRYPTO_load_u64_be(in + 9 * 8); 383 ROUND_00_15(9, h, a, b, c, d, e, f, g); 384 T1 = X[10] = CRYPTO_load_u64_be(in + 10 * 8); 385 ROUND_00_15(10, g, h, a, b, c, d, e, f); 386 T1 = X[11] = CRYPTO_load_u64_be(in + 11 * 8); 387 ROUND_00_15(11, f, g, h, a, b, c, d, e); 388 T1 = X[12] = CRYPTO_load_u64_be(in + 12 * 8); 389 ROUND_00_15(12, e, f, g, h, a, b, c, d); 390 T1 = X[13] = CRYPTO_load_u64_be(in + 13 * 8); 391 ROUND_00_15(13, d, e, f, g, h, a, b, c); 392 T1 = X[14] = CRYPTO_load_u64_be(in + 14 * 8); 393 ROUND_00_15(14, c, d, e, f, g, h, a, b); 394 T1 = X[15] = CRYPTO_load_u64_be(in + 15 * 8); 395 ROUND_00_15(15, b, c, d, e, f, g, h, a); 396 397 for (i = 16; i < 80; i += 16) { 398 ROUND_16_80(i, 0, a, b, c, d, e, f, g, h, X); 399 ROUND_16_80(i, 1, h, a, b, c, d, e, f, g, X); 400 ROUND_16_80(i, 2, g, h, a, b, c, d, e, f, X); 401 ROUND_16_80(i, 3, f, g, h, a, b, c, d, e, X); 402 ROUND_16_80(i, 4, e, f, g, h, a, b, c, d, X); 403 ROUND_16_80(i, 5, d, e, f, g, h, a, b, c, X); 404 ROUND_16_80(i, 6, c, d, e, f, g, h, a, b, X); 405 ROUND_16_80(i, 7, b, c, d, e, f, g, h, a, X); 406 ROUND_16_80(i, 8, a, b, c, d, e, f, g, h, X); 407 ROUND_16_80(i, 9, h, a, b, c, d, e, f, g, X); 408 ROUND_16_80(i, 10, g, h, a, b, c, d, e, f, X); 409 ROUND_16_80(i, 11, f, g, h, a, b, c, d, e, X); 410 ROUND_16_80(i, 12, e, f, g, h, a, b, c, d, X); 411 ROUND_16_80(i, 13, d, e, f, g, h, a, b, c, X); 412 ROUND_16_80(i, 14, c, d, e, f, g, h, a, b, X); 413 ROUND_16_80(i, 15, b, c, d, e, f, g, h, a, X); 414 } 415 416 state[0] += a; 417 state[1] += b; 418 state[2] += c; 419 state[3] += d; 420 state[4] += e; 421 state[5] += f; 422 state[6] += g; 423 state[7] += h; 424 425 in += 16 * 8; 426 } 427} 428 429#endif 430 431#endif // !SHA512_ASM_NOHW 432 433static void sha512_block_data_order(uint64_t state[8], const uint8_t *data, 434 size_t num) { 435#if defined(SHA512_ASM_HW) 436 if (sha512_hw_capable()) { 437 sha512_block_data_order_hw(state, data, num); 438 return; 439 } 440#endif 441#if defined(SHA512_ASM_AVX) 442 if (sha512_avx_capable()) { 443 sha512_block_data_order_avx(state, data, num); 444 return; 445 } 446#endif 447#if defined(SHA512_ASM_SSSE3) 448 if (sha512_ssse3_capable()) { 449 sha512_block_data_order_ssse3(state, data, num); 450 return; 451 } 452#endif 453#if defined(SHA512_ASM_NEON) 454 if (CRYPTO_is_NEON_capable()) { 455 sha512_block_data_order_neon(state, data, num); 456 return; 457 } 458#endif 459 sha512_block_data_order_nohw(state, data, num); 460} 461 462#endif // !SHA512_ASM 463 464#undef Sigma0 465#undef Sigma1 466#undef sigma0 467#undef sigma1 468#undef Ch 469#undef Maj 470#undef ROUND_00_15 471#undef ROUND_16_80 472