1 /*
2 * Copyright 2014, General Dynamics C4 Systems
3 *
4 * SPDX-License-Identifier: GPL-2.0-only
5 */
6
7 #include <assert.h>
8 #include <stdint.h>
9 #include <util.h>
10
11 /*
12 * memzero needs a custom type that allows us to use a word
13 * that has the aliasing properties of a char.
14 */
15 typedef unsigned long __attribute__((__may_alias__)) ulong_alias;
16
17 /*
18 * Zero 'n' bytes of memory starting from 's'.
19 *
20 * 'n' and 's' must be word aligned.
21 */
memzero(void * s,unsigned long n)22 void memzero(void *s, unsigned long n)
23 {
24 uint8_t *p = s;
25
26 /* Ensure alignment constraints are met. */
27 assert((unsigned long)s % sizeof(unsigned long) == 0);
28 assert(n % sizeof(unsigned long) == 0);
29
30 /* We will never memzero an area larger than the largest current
31 live object */
32 /** GHOSTUPD: "(gs_get_assn cap_get_capSizeBits_'proc \<acute>ghost'state = 0
33 \<or> \<acute>n <= gs_get_assn cap_get_capSizeBits_'proc \<acute>ghost'state, id)" */
34
35 /* Write out words. */
36 while (n != 0) {
37 *(ulong_alias *)p = 0;
38 p += sizeof(ulong_alias);
39 n -= sizeof(ulong_alias);
40 }
41 }
42
memset(void * s,unsigned long c,unsigned long n)43 void *VISIBLE memset(void *s, unsigned long c, unsigned long n)
44 {
45 uint8_t *p;
46
47 /*
48 * If we are only writing zeros and we are word aligned, we can
49 * use the optimized 'memzero' function.
50 */
51 if (likely(c == 0 && ((unsigned long)s % sizeof(unsigned long)) == 0 && (n % sizeof(unsigned long)) == 0)) {
52 memzero(s, n);
53 } else {
54 /* Otherwise, we use a slower, simple memset. */
55 for (p = (uint8_t *)s; n > 0; n--, p++) {
56 *p = (uint8_t)c;
57 }
58 }
59
60 return s;
61 }
62
memcpy(void * ptr_dst,const void * ptr_src,unsigned long n)63 void *VISIBLE memcpy(void *ptr_dst, const void *ptr_src, unsigned long n)
64 {
65 uint8_t *p;
66 const uint8_t *q;
67
68 for (p = (uint8_t *)ptr_dst, q = (const uint8_t *)ptr_src; n; n--, p++, q++) {
69 *p = *q;
70 }
71
72 return ptr_dst;
73 }
74
strncmp(const char * s1,const char * s2,int n)75 int PURE strncmp(const char *s1, const char *s2, int n)
76 {
77 word_t i;
78 int diff;
79
80 for (i = 0; i < n; i++) {
81 diff = ((unsigned char *)s1)[i] - ((unsigned char *)s2)[i];
82 if (diff != 0 || s1[i] == '\0') {
83 return diff;
84 }
85 }
86
87 return 0;
88 }
89
char_to_long(char c)90 long CONST char_to_long(char c)
91 {
92 if (c >= '0' && c <= '9') {
93 return c - '0';
94 } else if (c >= 'A' && c <= 'F') {
95 return c - 'A' + 10;
96 } else if (c >= 'a' && c <= 'f') {
97 return c - 'a' + 10;
98 }
99 return -1;
100 }
101
str_to_long(const char * str)102 long PURE str_to_long(const char *str)
103 {
104 unsigned int base;
105 long res;
106 long val = 0;
107 char c;
108
109 /*check for "0x" */
110 if (*str == '0' && (*(str + 1) == 'x' || *(str + 1) == 'X')) {
111 base = 16;
112 str += 2;
113 } else {
114 base = 10;
115 }
116
117 if (!*str) {
118 return -1;
119 }
120
121 c = *str;
122 while (c != '\0') {
123 res = char_to_long(c);
124 if (res == -1 || res >= base) {
125 return -1;
126 }
127 val = val * base + res;
128 str++;
129 c = *str;
130 }
131
132 return val;
133 }
134
135 // The following implementations of CLZ (count leading zeros) and CTZ (count
136 // trailing zeros) perform a binary search for the first 1 bit from the
137 // beginning (resp. end) of the input. Initially, the focus is the whole input.
138 // Then, each iteration determines whether there are any 1 bits set in the
139 // upper (resp. lower) half of the current focus. If there are (resp. are not),
140 // then the upper half is shifted into the lower half. Either way, the lower
141 // half of the current focus becomes the new focus for the next iteration.
142 // After enough iterations (6 for 64-bit inputs, 5 for 32-bit inputs), the
143 // focus is reduced to a single bit, and the total number of bits shifted can
144 // be used to determine the number of zeros before (resp. after) the first 1
145 // bit.
146 //
147 // Although the details vary, the general approach is used in several library
148 // implementations, including in LLVM and GCC. Wikipedia has some references:
149 // https://en.wikipedia.org/wiki/Find_first_set
150 //
151 // The current implementation avoids branching. The test that determines
152 // whether the upper (resp. lower) half contains any ones directly produces a
153 // number which can be used for an unconditional shift. If the upper (resp.
154 // lower) half is all zeros, the test produces a zero, and the shift is a
155 // no-op. A branchless implementation has the disadvantage that it requires
156 // more instructions to execute than one which branches, but the advantage is
157 // that none will be mispredicted branches. Whether this is a good tradeoff
158 // depends on the branch predictability and the architecture's pipeline depth.
159 // The most critical use of clzl in the kernel is in the scheduler priority
160 // queue. In the absence of a concrete application and hardware implementation
161 // to evaluate the tradeoff, we somewhat arbitrarily choose a branchless
162 // implementation. In any case, the compiler might convert this to a branching
163 // binary.
164
165 // Check some assumptions made by the clzl, clzll, ctzl functions:
166 compile_assert(clz_ulong_32_or_64, sizeof(unsigned long) == 4 || sizeof(unsigned long) == 8);
167 compile_assert(clz_ullong_64, sizeof(unsigned long long) == 8);
168 compile_assert(clz_word_size, sizeof(unsigned long) * 8 == CONFIG_WORD_SIZE);
169
170 // Count leading zeros.
171 // This implementation contains no branches. If the architecture provides an
172 // instruction to set a register to a boolean value on a comparison, then the
173 // binary might also avoid branching. A branchless implementation might be
174 // preferable on architectures with deep pipelines, or when the maximum
175 // priority of runnable threads frequently varies. However, note that the
176 // compiler may choose to convert this to a branching implementation.
177 //
178 // These functions are potentially `UNUSED` because we want to always expose
179 // them to verification without necessarily linking them into the kernel
180 // binary.
clz32(uint32_t x)181 static UNUSED CONST inline unsigned clz32(uint32_t x)
182 {
183 // Compiler builtins typically return int, but we use unsigned internally
184 // to reduce the number of guards we see in the proofs.
185 unsigned count = 32;
186 uint32_t mask = UINT32_MAX;
187
188 // Each iteration i (counting backwards) considers the least significant
189 // 2^(i+1) bits of x as the current focus. At the first iteration, the
190 // focus is the whole input. Each iteration assumes x contains no 1 bits
191 // outside its focus. The iteration contains a test which determines
192 // whether there are any 1 bits in the upper half (2^i bits) of the focus,
193 // setting `bits` to 2^i if there are, or zero if not. Shifting by `bits`
194 // then narrows the focus to the lower 2^i bits and satisfies the
195 // assumption for the next iteration. After the final iteration, the focus
196 // is just the least significant bit, and the most significsnt 1 bit of the
197 // original input (if any) has been shifted into this position. The leading
198 // zero count can be determined from the total shift.
199 //
200 // The iterations are given a very regular structure to facilitate proofs,
201 // while also generating reasonably efficient binary code.
202
203 // The `if (1)` blocks make it easier to reason by chunks in the proofs.
204 if (1) {
205 // iteration 4
206 mask >>= (1 << 4); // 0x0000ffff
207 unsigned bits = ((unsigned)(mask < x)) << 4; // [0, 16]
208 x >>= bits; // <= 0x0000ffff
209 count -= bits; // [16, 32]
210 }
211 if (1) {
212 // iteration 3
213 mask >>= (1 << 3); // 0x000000ff
214 unsigned bits = ((unsigned)(mask < x)) << 3; // [0, 8]
215 x >>= bits; // <= 0x000000ff
216 count -= bits; // [8, 16, 24, 32]
217 }
218 if (1) {
219 // iteration 2
220 mask >>= (1 << 2); // 0x0000000f
221 unsigned bits = ((unsigned)(mask < x)) << 2; // [0, 4]
222 x >>= bits; // <= 0x0000000f
223 count -= bits; // [4, 8, 12, ..., 32]
224 }
225 if (1) {
226 // iteration 1
227 mask >>= (1 << 1); // 0x00000003
228 unsigned bits = ((unsigned)(mask < x)) << 1; // [0, 2]
229 x >>= bits; // <= 0x00000003
230 count -= bits; // [2, 4, 6, ..., 32]
231 }
232 if (1) {
233 // iteration 0
234 mask >>= (1 << 0); // 0x00000001
235 unsigned bits = ((unsigned)(mask < x)) << 0; // [0, 1]
236 x >>= bits; // <= 0x00000001
237 count -= bits; // [1, 2, 3, ..., 32]
238 }
239
240 // If the original input was zero, there will have been no shifts, so this
241 // gives a result of 32. Otherwise, x is now exactly 1, so subtracting from
242 // count gives a result from [0, 1, 2, ..., 31].
243 return count - x;
244 }
245
clz64(uint64_t x)246 static UNUSED CONST inline unsigned clz64(uint64_t x)
247 {
248 unsigned count = 64;
249 uint64_t mask = UINT64_MAX;
250
251 // Although we could implement this using clz32, we spell out the
252 // iterations in full for slightly better code generation at low
253 // optimisation levels, and to allow us to reuse the proof machinery we
254 // developed for clz32.
255 if (1) {
256 // iteration 5
257 mask >>= (1 << 5); // 0x00000000ffffffff
258 unsigned bits = ((unsigned)(mask < x)) << 5; // [0, 32]
259 x >>= bits; // <= 0x00000000ffffffff
260 count -= bits; // [32, 64]
261 }
262 if (1) {
263 // iteration 4
264 mask >>= (1 << 4); // 0x000000000000ffff
265 unsigned bits = ((unsigned)(mask < x)) << 4; // [0, 16]
266 x >>= bits; // <= 0x000000000000ffff
267 count -= bits; // [16, 32, 48, 64]
268 }
269 if (1) {
270 // iteration 3
271 mask >>= (1 << 3); // 0x00000000000000ff
272 unsigned bits = ((unsigned)(mask < x)) << 3; // [0, 8]
273 x >>= bits; // <= 0x00000000000000ff
274 count -= bits; // [8, 16, 24, ..., 64]
275 }
276 if (1) {
277 // iteration 2
278 mask >>= (1 << 2); // 0x000000000000000f
279 unsigned bits = ((unsigned)(mask < x)) << 2; // [0, 4]
280 x >>= bits; // <= 0x000000000000000f
281 count -= bits; // [4, 8, 12, ..., 64]
282 }
283 if (1) {
284 // iteration 1
285 mask >>= (1 << 1); // 0x0000000000000003
286 unsigned bits = ((unsigned)(mask < x)) << 1; // [0, 2]
287 x >>= bits; // <= 0x0000000000000003
288 count -= bits; // [2, 4, 6, ..., 64]
289 }
290 if (1) {
291 // iteration 0
292 mask >>= (1 << 0); // 0x0000000000000001
293 unsigned bits = ((unsigned)(mask < x)) << 0; // [0, 1]
294 x >>= bits; // <= 0x0000000000000001
295 count -= bits; // [1, 2, 3, ..., 64]
296 }
297
298 return count - x;
299 }
300
301 // Count trailing zeros.
302 // See comments on clz32.
ctz32(uint32_t x)303 static UNUSED CONST inline unsigned ctz32(uint32_t x)
304 {
305 unsigned count = (x == 0);
306 uint32_t mask = UINT32_MAX;
307
308 // Each iteration i (counting backwards) considers the least significant
309 // 2^(i+1) bits of x as the current focus. At the first iteration, the
310 // focus is the whole input. The iteration contains a test which determines
311 // whether there are any 1 bits in the lower half (2^i bits) of the focus,
312 // setting `bits` to zero if there are, or 2^i if not. Shifting by `bits`
313 // then narrows the focus to the lower 2^i bits for the next iteration.
314 // After the final iteration, the focus is just the least significant bit,
315 // and the least significsnt 1 bit of the original input (if any) has been
316 // shifted into this position. The trailing zero count can be determined
317 // from the total shift.
318 //
319 // If the initial input is zero, every iteration causes a shift, for a
320 // total shift count of 31, so in that case, we add one for a total count
321 // of 32. In the comments, xi is the initial value of x.
322 //
323 // The iterations are given a very regular structure to facilitate proofs,
324 // while also generating reasonably efficient binary code.
325
326 if (1) {
327 // iteration 4
328 mask >>= (1 << 4); // 0x0000ffff
329 unsigned bits = ((unsigned)((x & mask) == 0)) << 4; // [0, 16]
330 x >>= bits; // xi != 0 --> x & 0x0000ffff != 0
331 count += bits; // if xi != 0 then [0, 16] else 17
332 }
333 if (1) {
334 // iteration 3
335 mask >>= (1 << 3); // 0x000000ff
336 unsigned bits = ((unsigned)((x & mask) == 0)) << 3; // [0, 8]
337 x >>= bits; // xi != 0 --> x & 0x000000ff != 0
338 count += bits; // if xi != 0 then [0, 8, 16, 24] else 25
339 }
340 if (1) {
341 // iteration 2
342 mask >>= (1 << 2); // 0x0000000f
343 unsigned bits = ((unsigned)((x & mask) == 0)) << 2; // [0, 4]
344 x >>= bits; // xi != 0 --> x & 0x0000000f != 0
345 count += bits; // if xi != 0 then [0, 4, 8, ..., 28] else 29
346 }
347 if (1) {
348 // iteration 1
349 mask >>= (1 << 1); // 0x00000003
350 unsigned bits = ((unsigned)((x & mask) == 0)) << 1; // [0, 2]
351 x >>= bits; // xi != 0 --> x & 0x00000003 != 0
352 count += bits; // if xi != 0 then [0, 2, 4, ..., 30] else 31
353 }
354 if (1) {
355 // iteration 0
356 mask >>= (1 << 0); // 0x00000001
357 unsigned bits = ((unsigned)((x & mask) == 0)) << 0; // [0, 1]
358 x >>= bits; // xi != 0 --> x & 0x00000001 != 0
359 count += bits; // if xi != 0 then [0, 1, 2, ..., 31] else 32
360 }
361
362 return count;
363 }
364
ctz64(uint64_t x)365 static UNUSED CONST inline unsigned ctz64(uint64_t x)
366 {
367 unsigned count = (x == 0);
368 uint64_t mask = UINT64_MAX;
369
370 if (1) {
371 // iteration 5
372 mask >>= (1 << 5); // 0x00000000ffffffff
373 unsigned bits = ((unsigned)((x & mask) == 0)) << 5; // [0, 32]
374 x >>= bits; // xi != 0 --> x & 0x00000000ffffffff != 0
375 count += bits; // if xi != 0 then [0, 32] else 33
376 }
377 if (1) {
378 // iteration 4
379 mask >>= (1 << 4); // 0x000000000000ffff
380 unsigned bits = ((unsigned)((x & mask) == 0)) << 4; // [0, 16]
381 x >>= bits; // xi != 0 --> x & 0x000000000000ffff != 0
382 count += bits; // if xi != 0 then [0, 16, 32, 48] else 49
383 }
384 if (1) {
385 // iteration 3
386 mask >>= (1 << 3); // 0x00000000000000ff
387 unsigned bits = ((unsigned)((x & mask) == 0)) << 3; // [0, 8]
388 x >>= bits; // xi != 0 --> x & 0x00000000000000ff != 0
389 count += bits; // if xi != 0 then [0, 8, 16, ..., 56] else 57
390 }
391 if (1) {
392 // iteration 2
393 mask >>= (1 << 2); // 0x000000000000000f
394 unsigned bits = ((unsigned)((x & mask) == 0)) << 2; // [0, 4]
395 x >>= bits; // xi != 0 --> x & 0x000000000000000f != 0
396 count += bits; // if xi != 0 then [0, 4, 8, ..., 60] else 61
397 }
398 if (1) {
399 // iteration 1
400 mask >>= (1 << 1); // 0x0000000000000003
401 unsigned bits = ((unsigned)((x & mask) == 0)) << 1; // [0, 2]
402 x >>= bits; // xi != 0 --> x & 0x0000000000000003 != 0
403 count += bits; // if xi != 0 then [0, 2, 4, ..., 62] else 63
404 }
405 if (1) {
406 // iteration 0
407 mask >>= (1 << 0); // 0x0000000000000001
408 unsigned bits = ((unsigned)((x & mask) == 0)) << 0; // [0, 1]
409 x >>= bits; // xi != 0 --> x & 0x0000000000000001 != 0
410 count += bits; // if xi != 0 then [0, 1, 2, ..., 63] else 64
411 }
412
413 return count;
414 }
415
416 // GCC's builtins will emit calls to these functions when the platform does
417 // not provide suitable inline assembly.
418 // These are only provided when the relevant config items are set.
419 // We define these separately from `ctz32` etc. so that we can verify all of
420 // `ctz32` etc. without necessarily linking them into the kernel binary.
421 #ifdef CONFIG_CLZ_32
__clzsi2(uint32_t x)422 CONST int __clzsi2(uint32_t x)
423 {
424 return clz32(x);
425 }
426 #endif
427
428 #ifdef CONFIG_CLZ_64
__clzdi2(uint64_t x)429 CONST int __clzdi2(uint64_t x)
430 {
431 return clz64(x);
432 }
433 #endif
434
435 #ifdef CONFIG_CTZ_32
__ctzsi2(uint32_t x)436 CONST int __ctzsi2(uint32_t x)
437 {
438 return ctz32(x);
439 }
440 #endif
441
442 #ifdef CONFIG_CTZ_64
__ctzdi2(uint64_t x)443 CONST int __ctzdi2(uint64_t x)
444 {
445 return ctz64(x);
446 }
447 #endif
448