1 // © 2021 Qualcomm Innovation Center, Inc. All rights reserved.
2 //
3 // SPDX-License-Identifier: BSD-3-Clause
4 
5 // This PRNG implements a "fast-key-erasure RNG" as described by D.J.Bernstein
6 // https://blog.cr.yp.to/20170723-random.html
7 //
8 // The algorithm ensures that the RNG won't contribute to any failure of
9 // forward security of its clients. Random data is generated into a buffer
10 // using a key, then the key used is immediately destroyed, and a new key from
11 // the first output block is created.
12 //
13 // Requests for randomness return data from the buffer. When the buffer is
14 // exhausted, new randomness is generated, with another new key being generated
15 // as described above. Additionally, the random bytes returned are cleared from
16 // the buffer for similar forward security reasons.
17 //
18 // This implementation uses the block function from the ChaCha20 stream cipher
19 // which is used to generate a pseudo-random bitstream in counter mode, and is
20 // much faster than alternative approaches, such as hash/HMAC based DRGBs, and
21 // counter-cipher schemes such as AES-CTR-DRBG (which don't immediately destroy
22 // the key).
23 //
24 // Finally, randomness from a HW RNG is added to the key periodically. An
25 // update timestamp is maintained, and when requesting randomness, if the last
26 // update was more than 5 minutes go, new randomness is added.
27 
28 #include <assert.h>
29 #include <hyptypes.h>
30 #include <string.h>
31 
32 #include <hypregisters.h>
33 
34 #include <bootmem.h>
35 #include <compiler.h>
36 #include <log.h>
37 #include <panic.h>
38 #include <platform_prng.h>
39 #include <platform_timer.h>
40 #include <prng.h>
41 #include <spinlock.h>
42 #include <trace.h>
43 #include <util.h>
44 
45 #include <asm/cache.h>
46 #include <asm/cpu.h>
47 
48 #include "chacha20.h"
49 #include "event_handlers.h"
50 
51 #define WORD_BITS	   32U
52 #define BLOCK_WORDS	   (512U / WORD_BITS)
53 #define KEY_WORDS	   (256U / WORD_BITS)
54 #define BUFFER_KEY_OFFSET  0U
55 #define BUFFER_DATA_OFFSET KEY_WORDS // first bytes are reserved for the key
56 #define BUFFER_BLOCKS	   4U
57 #define BUFFER_WORDS	   (BUFFER_BLOCKS * BLOCK_WORDS)
58 
59 #define REKEY_TIMEOUT_NS ((uint64_t)300U * 1000000000U) // 300 seconds
60 
61 extern uint32_t hypervisor_prng_seed[KEY_WORDS];
62 extern uint64_t hypervisor_prng_nonce;
63 
64 #define CACHE_LINE_SIZE (1 << CPU_L1D_LINE_BITS)
65 
66 typedef struct {
67 	uint32_t alignas(CACHE_LINE_SIZE) key[KEY_WORDS];
68 
69 	ticks_t key_timestamp;
70 	ticks_t key_timeout;
71 	count_t pool_index; // index in units of words
72 
73 	uint32_t nonce[3];
74 
75 	uint32_t alignas(CACHE_LINE_SIZE)
76 		entropy_pool[BUFFER_BLOCKS][BLOCK_WORDS];
77 } prng_data_t;
78 
79 static bool prng_initialized = false;
80 
81 static spinlock_t   prng_lock;
82 static prng_data_t *prng_data PTR_PROTECTED_BY(prng_lock);
83 
84 void
prng_simple_handle_boot_runtime_first_init(void)85 prng_simple_handle_boot_runtime_first_init(void)
86 {
87 	spinlock_init(&prng_lock);
88 	spinlock_acquire_nopreempt(&prng_lock);
89 
90 	void_ptr_result_t ret;
91 
92 	// Allocate boot entropy pool
93 	ret = bootmem_allocate(sizeof(prng_data_t), alignof(prng_data_t));
94 	if (ret.e != OK) {
95 		panic("unable to allocate boot entropy pool");
96 	}
97 
98 	prng_data = (prng_data_t *)ret.r;
99 	assert(prng_data != NULL);
100 
101 	(void)memset_s(prng_data, sizeof(*prng_data), 0, sizeof(*prng_data));
102 
103 	prng_data->pool_index = BUFFER_WORDS; // Buffer is Empty
104 	(void)memscpy(&prng_data->key, sizeof(prng_data->key),
105 		      hypervisor_prng_seed, sizeof(prng_data->key));
106 
107 	// Ensure no stale copies remain in ram
108 	assert(hypervisor_prng_seed != NULL);
109 	(void)memset_s(hypervisor_prng_seed, sizeof(hypervisor_prng_seed), 0,
110 		       sizeof(hypervisor_prng_seed));
111 	CACHE_CLEAN_INVALIDATE_OBJECT(hypervisor_prng_seed);
112 
113 	prng_data->key_timestamp = platform_timer_get_current_ticks();
114 	prng_data->key_timeout =
115 		platform_timer_convert_ns_to_ticks(REKEY_TIMEOUT_NS);
116 
117 	uint32_t serial[4];
118 
119 	error_t err = platform_get_serial(serial);
120 	if (err != OK) {
121 		panic("unable to get serial number");
122 	}
123 
124 	prng_data->nonce[0] = serial[0];
125 	prng_data->nonce[1] = serial[1];
126 	prng_data->nonce[2] = serial[2];
127 
128 	// Add in some chip specific noise
129 	prng_data->nonce[1] ^= (uint32_t)(hypervisor_prng_nonce & 0xffffffffU);
130 	prng_data->nonce[2] ^= (uint32_t)(hypervisor_prng_nonce >> 32);
131 
132 	// Ensure no stale copies remain in ram
133 	(void)memset_s(&hypervisor_prng_nonce, sizeof(hypervisor_prng_nonce), 0,
134 		       sizeof(hypervisor_prng_nonce));
135 	CACHE_CLEAN_INVALIDATE_OBJECT(hypervisor_prng_nonce);
136 
137 	prng_initialized = true;
138 	spinlock_release_nopreempt(&prng_lock);
139 }
140 
141 void
prng_simple_handle_boot_hypervisor_start(void)142 prng_simple_handle_boot_hypervisor_start(void)
143 {
144 	// FIXME:
145 	// Post boot prng_data protection
146 	//  * allocate an unmapped 4K page for the prng_data
147 	//  * Aarch64 PAN implementation:
148 	//    - map the page with EL2&0 user-rw permissions
149 	//    - enable PAN to access the prng_data
150 	//  * copy the boot prng_data to the new page and zero it afterwards
151 	//  * update prng_data pointer to new location
152 }
153 
154 static bool
add_platform_entropy(void)155 add_platform_entropy(void) REQUIRE_SPINLOCK(prng_lock)
156 {
157 	error_t ret;
158 	bool	success;
159 	platform_prng_data256_t new;
160 
161 	ret = platform_get_entropy(&new);
162 	if (ret == OK) {
163 		// mix in new key entropy
164 		prng_data->key[0] ^= new.word[0];
165 		prng_data->key[1] ^= new.word[1];
166 		prng_data->key[2] ^= new.word[2];
167 		prng_data->key[3] ^= new.word[3];
168 		prng_data->key[4] ^= new.word[4];
169 		prng_data->key[5] ^= new.word[5];
170 		prng_data->key[6] ^= new.word[6];
171 		prng_data->key[7] ^= new.word[7];
172 
173 		// Ensure no stale copy remains on the stack
174 		(void)memset_s(&new, sizeof(new), 0, sizeof(new));
175 		CACHE_CLEAN_INVALIDATE_OBJECT(new);
176 
177 		success = true;
178 	} else if (ret == ERROR_BUSY) {
179 		LOG(DEBUG, INFO, "platform_get_entropy busy");
180 		success = false;
181 	} else {
182 		LOG(ERROR, WARN, "platform_get_entropy err: {:d}",
183 		    (register_t)ret);
184 		panic("Failed to get platform_get_entropy");
185 	}
186 
187 	return success;
188 }
189 
190 static void
prng_update(void)191 prng_update(void) REQUIRE_SPINLOCK(prng_lock)
192 {
193 	uint32_t counter = 1U;
194 	count_t	 i;
195 
196 	ticks_t now = platform_timer_get_current_ticks();
197 
198 	// Add new key entropy periodically, this is not critical if platform
199 	// is busy, we'll try again next time.
200 	if ((now - prng_data->key_timestamp) > prng_data->key_timeout) {
201 		if (add_platform_entropy()) {
202 			prng_data->key_timestamp = now;
203 		}
204 	}
205 
206 	// Generate a new set of blocks
207 	for (i = 0U; i < BUFFER_BLOCKS; i++) {
208 		chacha20_block(&prng_data->key, counter, &prng_data->nonce,
209 			       &prng_data->entropy_pool[i]);
210 		counter++;
211 	}
212 	// Nonce must not be repeated for the same key! Even though we re-key
213 	// below, we increment the nonce anyway!
214 	prng_data->nonce[0] += 1U;
215 	if (prng_data->nonce[0] == 0U) {
216 		// Addition overflow of nonce[0]
217 		prng_data->nonce[1] += 1U;
218 		if (prng_data->nonce[1] == 0U) {
219 			// Addition overflow of nonce[1]
220 			prng_data->nonce[2] += 1U;
221 		}
222 	}
223 
224 	// Fast key update from block 0
225 	(void)memscpy(prng_data->key, sizeof(prng_data->key),
226 		      &prng_data->entropy_pool[0],
227 		      sizeof(prng_data->entropy_pool[0]));
228 	// Ensure no stale copies remain in ram
229 	CACHE_CLEAN_FIXED_RANGE(prng_data->key, 32U);
230 	// Clear the used bytes just in case
231 	(void)memset_s(&prng_data->entropy_pool[0],
232 		       sizeof(prng_data->entropy_pool[0]), 0,
233 		       BUFFER_DATA_OFFSET * sizeof(uint32_t));
234 	// Ensure no stale copies remain in ram
235 	CACHE_CLEAN_FIXED_RANGE(&prng_data->entropy_pool[0],
236 				BUFFER_DATA_OFFSET * sizeof(uint32_t));
237 
238 	prng_data->pool_index = BUFFER_DATA_OFFSET;
239 }
240 
241 uint64_result_t
prng_get64(void)242 prng_get64(void)
243 {
244 	uint64_result_t ret;
245 
246 	assert(prng_initialized);
247 
248 	spinlock_acquire(&prng_lock);
249 
250 	count_t index = prng_data->pool_index;
251 
252 	if (index > (BUFFER_WORDS - (64U / WORD_BITS))) {
253 		// Not enough buffered randomness, get more
254 		prng_update();
255 		index = prng_data->pool_index;
256 	}
257 	prng_data->pool_index += (64U / WORD_BITS);
258 
259 	index_t	  block = index / BLOCK_WORDS;
260 	index_t	  word	= index % BLOCK_WORDS;
261 	uint32_t *data	= &prng_data->entropy_pool[block][word];
262 
263 	ret.r = data[0];
264 	ret.r |= (uint64_t)data[1] << 32;
265 
266 	ret.e = OK;
267 	// Pointer difference in bytes
268 	ptrdiff_t len = (char *)data - (char *)prng_data->entropy_pool[0];
269 
270 	assert(len >= 0);
271 
272 	// Clear used bits
273 	(void)memset_s(data, sizeof(prng_data->entropy_pool) - (size_t)len, 0,
274 		       sizeof(ret.r));
275 	// Ensure used bits are cleared from caches
276 	CACHE_CLEAN_FIXED_RANGE(data, sizeof(ret.r));
277 
278 	spinlock_release(&prng_lock);
279 
280 	return ret;
281 }
282