1 // Copyright 2015-2016 The OpenSSL Project Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <openssl/evp.h>
16 
17 #include <assert.h>
18 
19 #include <openssl/err.h>
20 #include <openssl/mem.h>
21 
22 #include "../internal.h"
23 
24 
25 // This file implements scrypt, described in RFC 7914.
26 //
27 // Note scrypt refers to both "blocks" and a "block size" parameter, r. These
28 // are two different notions of blocks. A Salsa20 block is 64 bytes long,
29 // represented in this implementation by 16 |uint32_t|s. |r| determines the
30 // number of 64-byte Salsa20 blocks in a scryptBlockMix block, which is 2 * |r|
31 // Salsa20 blocks. This implementation refers to them as Salsa20 blocks and
32 // scrypt blocks, respectively.
33 
34 // A block_t is a Salsa20 block.
35 typedef struct {
36   uint32_t words[16];
37 } block_t;
38 
39 static_assert(sizeof(block_t) == 64, "block_t has padding");
40 
41 // salsa208_word_specification implements the Salsa20/8 core function, also
42 // described in RFC 7914, section 3. It modifies the block at |inout|
43 // in-place.
salsa208_word_specification(block_t * inout)44 static void salsa208_word_specification(block_t *inout) {
45   block_t x;
46   OPENSSL_memcpy(&x, inout, sizeof(x));
47 
48   for (int i = 8; i > 0; i -= 2) {
49     x.words[4] ^= CRYPTO_rotl_u32(x.words[0] + x.words[12], 7);
50     x.words[8] ^= CRYPTO_rotl_u32(x.words[4] + x.words[0], 9);
51     x.words[12] ^= CRYPTO_rotl_u32(x.words[8] + x.words[4], 13);
52     x.words[0] ^= CRYPTO_rotl_u32(x.words[12] + x.words[8], 18);
53     x.words[9] ^= CRYPTO_rotl_u32(x.words[5] + x.words[1], 7);
54     x.words[13] ^= CRYPTO_rotl_u32(x.words[9] + x.words[5], 9);
55     x.words[1] ^= CRYPTO_rotl_u32(x.words[13] + x.words[9], 13);
56     x.words[5] ^= CRYPTO_rotl_u32(x.words[1] + x.words[13], 18);
57     x.words[14] ^= CRYPTO_rotl_u32(x.words[10] + x.words[6], 7);
58     x.words[2] ^= CRYPTO_rotl_u32(x.words[14] + x.words[10], 9);
59     x.words[6] ^= CRYPTO_rotl_u32(x.words[2] + x.words[14], 13);
60     x.words[10] ^= CRYPTO_rotl_u32(x.words[6] + x.words[2], 18);
61     x.words[3] ^= CRYPTO_rotl_u32(x.words[15] + x.words[11], 7);
62     x.words[7] ^= CRYPTO_rotl_u32(x.words[3] + x.words[15], 9);
63     x.words[11] ^= CRYPTO_rotl_u32(x.words[7] + x.words[3], 13);
64     x.words[15] ^= CRYPTO_rotl_u32(x.words[11] + x.words[7], 18);
65     x.words[1] ^= CRYPTO_rotl_u32(x.words[0] + x.words[3], 7);
66     x.words[2] ^= CRYPTO_rotl_u32(x.words[1] + x.words[0], 9);
67     x.words[3] ^= CRYPTO_rotl_u32(x.words[2] + x.words[1], 13);
68     x.words[0] ^= CRYPTO_rotl_u32(x.words[3] + x.words[2], 18);
69     x.words[6] ^= CRYPTO_rotl_u32(x.words[5] + x.words[4], 7);
70     x.words[7] ^= CRYPTO_rotl_u32(x.words[6] + x.words[5], 9);
71     x.words[4] ^= CRYPTO_rotl_u32(x.words[7] + x.words[6], 13);
72     x.words[5] ^= CRYPTO_rotl_u32(x.words[4] + x.words[7], 18);
73     x.words[11] ^= CRYPTO_rotl_u32(x.words[10] + x.words[9], 7);
74     x.words[8] ^= CRYPTO_rotl_u32(x.words[11] + x.words[10], 9);
75     x.words[9] ^= CRYPTO_rotl_u32(x.words[8] + x.words[11], 13);
76     x.words[10] ^= CRYPTO_rotl_u32(x.words[9] + x.words[8], 18);
77     x.words[12] ^= CRYPTO_rotl_u32(x.words[15] + x.words[14], 7);
78     x.words[13] ^= CRYPTO_rotl_u32(x.words[12] + x.words[15], 9);
79     x.words[14] ^= CRYPTO_rotl_u32(x.words[13] + x.words[12], 13);
80     x.words[15] ^= CRYPTO_rotl_u32(x.words[14] + x.words[13], 18);
81   }
82 
83   for (int i = 0; i < 16; ++i) {
84     inout->words[i] += x.words[i];
85   }
86 }
87 
88 // xor_block sets |*out| to be |*a| XOR |*b|.
xor_block(block_t * out,const block_t * a,const block_t * b)89 static void xor_block(block_t *out, const block_t *a, const block_t *b) {
90   for (size_t i = 0; i < 16; i++) {
91     out->words[i] = a->words[i] ^ b->words[i];
92   }
93 }
94 
95 // scryptBlockMix implements the function described in RFC 7914, section 4. B'
96 // is written to |out|. |out| and |B| may not alias and must be each one scrypt
97 // block (2 * |r| Salsa20 blocks) long.
scryptBlockMix(block_t * out,const block_t * B,uint64_t r)98 static void scryptBlockMix(block_t *out, const block_t *B, uint64_t r) {
99   assert(out != B);
100 
101   block_t X;
102   OPENSSL_memcpy(&X, &B[r * 2 - 1], sizeof(X));
103   for (uint64_t i = 0; i < r * 2; i++) {
104     xor_block(&X, &X, &B[i]);
105     salsa208_word_specification(&X);
106 
107     // This implements the permutation in step 3.
108     OPENSSL_memcpy(&out[i / 2 + (i & 1) * r], &X, sizeof(X));
109   }
110 }
111 
112 // scryptROMix implements the function described in RFC 7914, section 5.  |B| is
113 // an scrypt block (2 * |r| Salsa20 blocks) and is modified in-place. |T| and
114 // |V| are scratch space allocated by the caller. |T| must have space for one
115 // scrypt block (2 * |r| Salsa20 blocks). |V| must have space for |N| scrypt
116 // blocks (2 * |r| * |N| Salsa20 blocks).
scryptROMix(block_t * B,uint64_t r,uint64_t N,block_t * T,block_t * V)117 static void scryptROMix(block_t *B, uint64_t r, uint64_t N, block_t *T,
118                         block_t *V) {
119   // Steps 1 and 2.
120   OPENSSL_memcpy(V, B, 2 * r * sizeof(block_t));
121   for (uint64_t i = 1; i < N; i++) {
122     scryptBlockMix(&V[2 * r * i /* scrypt block i */],
123                    &V[2 * r * (i - 1) /* scrypt block i-1 */], r);
124   }
125   scryptBlockMix(B, &V[2 * r * (N - 1) /* scrypt block N-1 */], r);
126 
127   // Step 3.
128   for (uint64_t i = 0; i < N; i++) {
129     // Note this assumes |N| <= 2^32 and is a power of 2.
130     uint32_t j = B[2 * r - 1].words[0] & (N - 1);
131     for (size_t k = 0; k < 2 * r; k++) {
132       xor_block(&T[k], &B[k], &V[2 * r * j + k]);
133     }
134     scryptBlockMix(B, T, r);
135   }
136 }
137 
138 // SCRYPT_PR_MAX is the maximum value of p * r. This is equivalent to the
139 // bounds on p in section 6:
140 //
141 //   p <= ((2^32-1) * hLen) / MFLen iff
142 //   p <= ((2^32-1) * 32) / (128 * r) iff
143 //   p * r <= (2^30-1)
144 #define SCRYPT_PR_MAX ((1 << 30) - 1)
145 
146 // SCRYPT_MAX_MEM is the default maximum memory that may be allocated by
147 // |EVP_PBE_scrypt|.
148 #define SCRYPT_MAX_MEM (1024 * 1024 * 65)
149 
EVP_PBE_scrypt(const char * password,size_t password_len,const uint8_t * salt,size_t salt_len,uint64_t N,uint64_t r,uint64_t p,size_t max_mem,uint8_t * out_key,size_t key_len)150 int EVP_PBE_scrypt(const char *password, size_t password_len,
151                    const uint8_t *salt, size_t salt_len, uint64_t N, uint64_t r,
152                    uint64_t p, size_t max_mem, uint8_t *out_key,
153                    size_t key_len) {
154   if (r == 0 || p == 0 || p > SCRYPT_PR_MAX / r ||
155       // |N| must be a power of two.
156       N < 2 || (N & (N - 1)) ||
157       // We only support |N| <= 2^32 in |scryptROMix|.
158       N > UINT64_C(1) << 32 ||
159       // Check that |N| < 2^(128×r / 8).
160       (16 * r <= 63 && N >= UINT64_C(1) << (16 * r))) {
161     OPENSSL_PUT_ERROR(EVP, EVP_R_INVALID_PARAMETERS);
162     return 0;
163   }
164 
165   // Determine the amount of memory needed. B, T, and V are |p|, 1, and |N|
166   // scrypt blocks, respectively. Each scrypt block is 2*|r| |block_t|s.
167   if (max_mem == 0) {
168     max_mem = SCRYPT_MAX_MEM;
169   }
170 
171   size_t max_scrypt_blocks = max_mem / (2 * r * sizeof(block_t));
172   if (max_scrypt_blocks < p + 1 || max_scrypt_blocks - p - 1 < N) {
173     OPENSSL_PUT_ERROR(EVP, EVP_R_MEMORY_LIMIT_EXCEEDED);
174     return 0;
175   }
176 
177   // Allocate and divide up the scratch space. |max_mem| fits in a size_t, which
178   // is no bigger than uint64_t, so none of these operations may overflow.
179   static_assert(UINT64_MAX >= SIZE_MAX, "size_t exceeds uint64_t");
180   size_t B_blocks = p * 2 * r;
181   size_t B_bytes = B_blocks * sizeof(block_t);
182   size_t T_blocks = 2 * r;
183   size_t V_blocks = N * 2 * r;
184   block_t *B = reinterpret_cast<block_t *>(
185       OPENSSL_calloc(B_blocks + T_blocks + V_blocks, sizeof(block_t)));
186   if (B == NULL) {
187     return 0;
188   }
189 
190   int ret = 0;
191   block_t *T = B + B_blocks;
192   block_t *V = T + T_blocks;
193 
194   // NOTE: PKCS5_PBKDF2_HMAC can only fail due to allocation failure
195   // or |iterations| of 0 (we pass 1 here). This is consistent with
196   // the documented failure conditions of EVP_PBE_scrypt.
197   if (!PKCS5_PBKDF2_HMAC(password, password_len, salt, salt_len, 1,
198                          EVP_sha256(), B_bytes, (uint8_t *)B)) {
199     goto err;
200   }
201 
202   for (uint64_t i = 0; i < p; i++) {
203     scryptROMix(B + 2 * r * i, r, N, T, V);
204   }
205 
206   if (!PKCS5_PBKDF2_HMAC(password, password_len, (const uint8_t *)B, B_bytes, 1,
207                          EVP_sha256(), key_len, out_key)) {
208     goto err;
209   }
210 
211   ret = 1;
212 
213 err:
214   OPENSSL_free(B);
215   return ret;
216 }
217