1 /*
2  * Copyright 2014, General Dynamics C4 Systems
3  *
4  * SPDX-License-Identifier: GPL-2.0-only
5  */
6 
7 #include <assert.h>
8 #include <stdint.h>
9 #include <util.h>
10 
11 /*
12  * memzero needs a custom type that allows us to use a word
13  * that has the aliasing properties of a char.
14  */
15 typedef unsigned long __attribute__((__may_alias__)) ulong_alias;
16 
17 /*
18  * Zero 'n' bytes of memory starting from 's'.
19  *
20  * 'n' and 's' must be word aligned.
21  */
memzero(void * s,unsigned long n)22 void memzero(void *s, unsigned long n)
23 {
24     uint8_t *p = s;
25 
26     /* Ensure alignment constraints are met. */
27     assert((unsigned long)s % sizeof(unsigned long) == 0);
28     assert(n % sizeof(unsigned long) == 0);
29 
30     /* We will never memzero an area larger than the largest current
31        live object */
32     /** GHOSTUPD: "(gs_get_assn cap_get_capSizeBits_'proc \<acute>ghost'state = 0
33         \<or> \<acute>n <= gs_get_assn cap_get_capSizeBits_'proc \<acute>ghost'state, id)" */
34 
35     /* Write out words. */
36     while (n != 0) {
37         *(ulong_alias *)p = 0;
38         p += sizeof(ulong_alias);
39         n -= sizeof(ulong_alias);
40     }
41 }
42 
memset(void * s,unsigned long c,unsigned long n)43 void *VISIBLE memset(void *s, unsigned long c, unsigned long n)
44 {
45     uint8_t *p;
46 
47     /*
48      * If we are only writing zeros and we are word aligned, we can
49      * use the optimized 'memzero' function.
50      */
51     if (likely(c == 0 && ((unsigned long)s % sizeof(unsigned long)) == 0 && (n % sizeof(unsigned long)) == 0)) {
52         memzero(s, n);
53     } else {
54         /* Otherwise, we use a slower, simple memset. */
55         for (p = (uint8_t *)s; n > 0; n--, p++) {
56             *p = (uint8_t)c;
57         }
58     }
59 
60     return s;
61 }
62 
memcpy(void * ptr_dst,const void * ptr_src,unsigned long n)63 void *VISIBLE memcpy(void *ptr_dst, const void *ptr_src, unsigned long n)
64 {
65     uint8_t *p;
66     const uint8_t *q;
67 
68     for (p = (uint8_t *)ptr_dst, q = (const uint8_t *)ptr_src; n; n--, p++, q++) {
69         *p = *q;
70     }
71 
72     return ptr_dst;
73 }
74 
strncmp(const char * s1,const char * s2,int n)75 int PURE strncmp(const char *s1, const char *s2, int n)
76 {
77     word_t i;
78     int diff;
79 
80     for (i = 0; i < n; i++) {
81         diff = ((unsigned char *)s1)[i] - ((unsigned char *)s2)[i];
82         if (diff != 0 || s1[i] == '\0') {
83             return diff;
84         }
85     }
86 
87     return 0;
88 }
89 
char_to_long(char c)90 long CONST char_to_long(char c)
91 {
92     if (c >= '0' && c <= '9') {
93         return c - '0';
94     } else if (c >= 'A' && c <= 'F') {
95         return c - 'A' + 10;
96     } else if (c >= 'a' && c <= 'f') {
97         return c - 'a' + 10;
98     }
99     return -1;
100 }
101 
str_to_long(const char * str)102 long PURE str_to_long(const char *str)
103 {
104     unsigned int base;
105     long res;
106     long val = 0;
107     char c;
108 
109     /*check for "0x" */
110     if (*str == '0' && (*(str + 1) == 'x' || *(str + 1) == 'X')) {
111         base = 16;
112         str += 2;
113     } else {
114         base = 10;
115     }
116 
117     if (!*str) {
118         return -1;
119     }
120 
121     c = *str;
122     while (c != '\0') {
123         res = char_to_long(c);
124         if (res == -1 || res >= base) {
125             return -1;
126         }
127         val = val * base + res;
128         str++;
129         c = *str;
130     }
131 
132     return val;
133 }
134 
135 // The following implementations of CLZ (count leading zeros) and CTZ (count
136 // trailing zeros) perform a binary search for the first 1 bit from the
137 // beginning (resp. end) of the input. Initially, the focus is the whole input.
138 // Then, each iteration determines whether there are any 1 bits set in the
139 // upper (resp. lower) half of the current focus. If there are (resp. are not),
140 // then the upper half is shifted into the lower half. Either way, the lower
141 // half of the current focus becomes the new focus for the next iteration.
142 // After enough iterations (6 for 64-bit inputs, 5 for 32-bit inputs), the
143 // focus is reduced to a single bit, and the total number of bits shifted can
144 // be used to determine the number of zeros before (resp. after) the first 1
145 // bit.
146 //
147 // Although the details vary, the general approach is used in several library
148 // implementations, including in LLVM and GCC. Wikipedia has some references:
149 // https://en.wikipedia.org/wiki/Find_first_set
150 //
151 // The current implementation avoids branching. The test that determines
152 // whether the upper (resp. lower) half contains any ones directly produces a
153 // number which can be used for an unconditional shift. If the upper (resp.
154 // lower) half is all zeros, the test produces a zero, and the shift is a
155 // no-op. A branchless implementation has the disadvantage that it requires
156 // more instructions to execute than one which branches, but the advantage is
157 // that none will be mispredicted branches. Whether this is a good tradeoff
158 // depends on the branch predictability and the architecture's pipeline depth.
159 // The most critical use of clzl in the kernel is in the scheduler priority
160 // queue. In the absence of a concrete application and hardware implementation
161 // to evaluate the tradeoff, we somewhat arbitrarily choose a branchless
162 // implementation. In any case, the compiler might convert this to a branching
163 // binary.
164 
165 // Check some assumptions made by the clzl, clzll, ctzl functions:
166 compile_assert(clz_ulong_32_or_64, sizeof(unsigned long) == 4 || sizeof(unsigned long) == 8);
167 compile_assert(clz_ullong_64, sizeof(unsigned long long) == 8);
168 compile_assert(clz_word_size, sizeof(unsigned long) * 8 == CONFIG_WORD_SIZE);
169 
170 // Count leading zeros.
171 // This implementation contains no branches. If the architecture provides an
172 // instruction to set a register to a boolean value on a comparison, then the
173 // binary might also avoid branching. A branchless implementation might be
174 // preferable on architectures with deep pipelines, or when the maximum
175 // priority of runnable threads frequently varies. However, note that the
176 // compiler may choose to convert this to a branching implementation.
177 //
178 // These functions are potentially `UNUSED` because we want to always expose
179 // them to verification without necessarily linking them into the kernel
180 // binary.
clz32(uint32_t x)181 static UNUSED CONST inline unsigned clz32(uint32_t x)
182 {
183     // Compiler builtins typically return int, but we use unsigned internally
184     // to reduce the number of guards we see in the proofs.
185     unsigned count = 32;
186     uint32_t mask = UINT32_MAX;
187 
188     // Each iteration i (counting backwards) considers the least significant
189     // 2^(i+1) bits of x as the current focus. At the first iteration, the
190     // focus is the whole input. Each iteration assumes x contains no 1 bits
191     // outside its focus. The iteration contains a test which determines
192     // whether there are any 1 bits in the upper half (2^i bits) of the focus,
193     // setting `bits` to 2^i if there are, or zero if not. Shifting by `bits`
194     // then narrows the focus to the lower 2^i bits and satisfies the
195     // assumption for the next iteration. After the final iteration, the focus
196     // is just the least significant bit, and the most significsnt 1 bit of the
197     // original input (if any) has been shifted into this position. The leading
198     // zero count can be determined from the total shift.
199     //
200     // The iterations are given a very regular structure to facilitate proofs,
201     // while also generating reasonably efficient binary code.
202 
203     // The `if (1)` blocks make it easier to reason by chunks in the proofs.
204     if (1) {
205         // iteration 4
206         mask >>= (1 << 4); // 0x0000ffff
207         unsigned bits = ((unsigned)(mask < x)) << 4; // [0, 16]
208         x >>= bits; // <= 0x0000ffff
209         count -= bits; // [16, 32]
210     }
211     if (1) {
212         // iteration 3
213         mask >>= (1 << 3); // 0x000000ff
214         unsigned bits = ((unsigned)(mask < x)) << 3; // [0, 8]
215         x >>= bits; // <= 0x000000ff
216         count -= bits; // [8, 16, 24, 32]
217     }
218     if (1) {
219         // iteration 2
220         mask >>= (1 << 2); // 0x0000000f
221         unsigned bits = ((unsigned)(mask < x)) << 2; // [0, 4]
222         x >>= bits; // <= 0x0000000f
223         count -= bits; // [4, 8, 12, ..., 32]
224     }
225     if (1) {
226         // iteration 1
227         mask >>= (1 << 1); // 0x00000003
228         unsigned bits = ((unsigned)(mask < x)) << 1; // [0, 2]
229         x >>= bits; // <= 0x00000003
230         count -= bits; // [2, 4, 6, ..., 32]
231     }
232     if (1) {
233         // iteration 0
234         mask >>= (1 << 0); // 0x00000001
235         unsigned bits = ((unsigned)(mask < x)) << 0; // [0, 1]
236         x >>= bits; // <= 0x00000001
237         count -= bits; // [1, 2, 3, ..., 32]
238     }
239 
240     // If the original input was zero, there will have been no shifts, so this
241     // gives a result of 32. Otherwise, x is now exactly 1, so subtracting from
242     // count gives a result from [0, 1, 2, ..., 31].
243     return count - x;
244 }
245 
clz64(uint64_t x)246 static UNUSED CONST inline unsigned clz64(uint64_t x)
247 {
248     unsigned count = 64;
249     uint64_t mask = UINT64_MAX;
250 
251     // Although we could implement this using clz32, we spell out the
252     // iterations in full for slightly better code generation at low
253     // optimisation levels, and to allow us to reuse the proof machinery we
254     // developed for clz32.
255     if (1) {
256         // iteration 5
257         mask >>= (1 << 5); // 0x00000000ffffffff
258         unsigned bits = ((unsigned)(mask < x)) << 5; // [0, 32]
259         x >>= bits; // <= 0x00000000ffffffff
260         count -= bits; // [32, 64]
261     }
262     if (1) {
263         // iteration 4
264         mask >>= (1 << 4); // 0x000000000000ffff
265         unsigned bits = ((unsigned)(mask < x)) << 4; // [0, 16]
266         x >>= bits; // <= 0x000000000000ffff
267         count -= bits; // [16, 32, 48, 64]
268     }
269     if (1) {
270         // iteration 3
271         mask >>= (1 << 3); // 0x00000000000000ff
272         unsigned bits = ((unsigned)(mask < x)) << 3; // [0, 8]
273         x >>= bits; // <= 0x00000000000000ff
274         count -= bits; // [8, 16, 24, ..., 64]
275     }
276     if (1) {
277         // iteration 2
278         mask >>= (1 << 2); // 0x000000000000000f
279         unsigned bits = ((unsigned)(mask < x)) << 2; // [0, 4]
280         x >>= bits; // <= 0x000000000000000f
281         count -= bits; // [4, 8, 12, ..., 64]
282     }
283     if (1) {
284         // iteration 1
285         mask >>= (1 << 1); // 0x0000000000000003
286         unsigned bits = ((unsigned)(mask < x)) << 1; // [0, 2]
287         x >>= bits; // <= 0x0000000000000003
288         count -= bits; // [2, 4, 6, ..., 64]
289     }
290     if (1) {
291         // iteration 0
292         mask >>= (1 << 0); // 0x0000000000000001
293         unsigned bits = ((unsigned)(mask < x)) << 0; // [0, 1]
294         x >>= bits; // <= 0x0000000000000001
295         count -= bits; // [1, 2, 3, ..., 64]
296     }
297 
298     return count - x;
299 }
300 
301 // Count trailing zeros.
302 // See comments on clz32.
ctz32(uint32_t x)303 static UNUSED CONST inline unsigned ctz32(uint32_t x)
304 {
305     unsigned count = (x == 0);
306     uint32_t mask = UINT32_MAX;
307 
308     // Each iteration i (counting backwards) considers the least significant
309     // 2^(i+1) bits of x as the current focus. At the first iteration, the
310     // focus is the whole input. The iteration contains a test which determines
311     // whether there are any 1 bits in the lower half (2^i bits) of the focus,
312     // setting `bits` to zero if there are, or 2^i if not. Shifting by `bits`
313     // then narrows the focus to the lower 2^i bits for the next iteration.
314     // After the final iteration, the focus is just the least significant bit,
315     // and the least significsnt 1 bit of the original input (if any) has been
316     // shifted into this position. The trailing zero count can be determined
317     // from the total shift.
318     //
319     // If the initial input is zero, every iteration causes a shift, for a
320     // total shift count of 31, so in that case, we add one for a total count
321     // of 32. In the comments, xi is the initial value of x.
322     //
323     // The iterations are given a very regular structure to facilitate proofs,
324     // while also generating reasonably efficient binary code.
325 
326     if (1) {
327         // iteration 4
328         mask >>= (1 << 4); // 0x0000ffff
329         unsigned bits = ((unsigned)((x & mask) == 0)) << 4; // [0, 16]
330         x >>= bits; // xi != 0 --> x & 0x0000ffff != 0
331         count += bits; // if xi != 0 then [0, 16] else 17
332     }
333     if (1) {
334         // iteration 3
335         mask >>= (1 << 3); // 0x000000ff
336         unsigned bits = ((unsigned)((x & mask) == 0)) << 3; // [0, 8]
337         x >>= bits; // xi != 0 --> x & 0x000000ff != 0
338         count += bits; // if xi != 0 then [0, 8, 16, 24] else 25
339     }
340     if (1) {
341         // iteration 2
342         mask >>= (1 << 2); // 0x0000000f
343         unsigned bits = ((unsigned)((x & mask) == 0)) << 2; // [0, 4]
344         x >>= bits; // xi != 0 --> x & 0x0000000f != 0
345         count += bits; // if xi != 0 then [0, 4, 8, ..., 28] else 29
346     }
347     if (1) {
348         // iteration 1
349         mask >>= (1 << 1); // 0x00000003
350         unsigned bits = ((unsigned)((x & mask) == 0)) << 1; // [0, 2]
351         x >>= bits; // xi != 0 --> x & 0x00000003 != 0
352         count += bits; // if xi != 0 then [0, 2, 4, ..., 30] else 31
353     }
354     if (1) {
355         // iteration 0
356         mask >>= (1 << 0); // 0x00000001
357         unsigned bits = ((unsigned)((x & mask) == 0)) << 0; // [0, 1]
358         x >>= bits; // xi != 0 --> x & 0x00000001 != 0
359         count += bits; // if xi != 0 then [0, 1, 2, ..., 31] else 32
360     }
361 
362     return count;
363 }
364 
ctz64(uint64_t x)365 static UNUSED CONST inline unsigned ctz64(uint64_t x)
366 {
367     unsigned count = (x == 0);
368     uint64_t mask = UINT64_MAX;
369 
370     if (1) {
371         // iteration 5
372         mask >>= (1 << 5); // 0x00000000ffffffff
373         unsigned bits = ((unsigned)((x & mask) == 0)) << 5; // [0, 32]
374         x >>= bits; // xi != 0 --> x & 0x00000000ffffffff != 0
375         count += bits; // if xi != 0 then [0, 32] else 33
376     }
377     if (1) {
378         // iteration 4
379         mask >>= (1 << 4); // 0x000000000000ffff
380         unsigned bits = ((unsigned)((x & mask) == 0)) << 4; // [0, 16]
381         x >>= bits; // xi != 0 --> x & 0x000000000000ffff != 0
382         count += bits; // if xi != 0 then [0, 16, 32, 48] else 49
383     }
384     if (1) {
385         // iteration 3
386         mask >>= (1 << 3); // 0x00000000000000ff
387         unsigned bits = ((unsigned)((x & mask) == 0)) << 3; // [0, 8]
388         x >>= bits; // xi != 0 --> x & 0x00000000000000ff != 0
389         count += bits; // if xi != 0 then [0, 8, 16, ..., 56] else 57
390     }
391     if (1) {
392         // iteration 2
393         mask >>= (1 << 2); // 0x000000000000000f
394         unsigned bits = ((unsigned)((x & mask) == 0)) << 2; // [0, 4]
395         x >>= bits; // xi != 0 --> x & 0x000000000000000f != 0
396         count += bits; // if xi != 0 then [0, 4, 8, ..., 60] else 61
397     }
398     if (1) {
399         // iteration 1
400         mask >>= (1 << 1); // 0x0000000000000003
401         unsigned bits = ((unsigned)((x & mask) == 0)) << 1; // [0, 2]
402         x >>= bits; // xi != 0 --> x & 0x0000000000000003 != 0
403         count += bits; // if xi != 0 then [0, 2, 4, ..., 62] else 63
404     }
405     if (1) {
406         // iteration 0
407         mask >>= (1 << 0); // 0x0000000000000001
408         unsigned bits = ((unsigned)((x & mask) == 0)) << 0; // [0, 1]
409         x >>= bits; // xi != 0 --> x & 0x0000000000000001 != 0
410         count += bits; // if xi != 0 then [0, 1, 2, ..., 63] else 64
411     }
412 
413     return count;
414 }
415 
416 // GCC's builtins will emit calls to these functions when the platform does
417 // not provide suitable inline assembly.
418 // These are only provided when the relevant config items are set.
419 // We define these separately from `ctz32` etc. so that we can verify all of
420 // `ctz32` etc. without necessarily linking them into the kernel binary.
421 #ifdef CONFIG_CLZ_32
__clzsi2(uint32_t x)422 CONST int __clzsi2(uint32_t x)
423 {
424     return clz32(x);
425 }
426 #endif
427 
428 #ifdef CONFIG_CLZ_64
__clzdi2(uint64_t x)429 CONST int __clzdi2(uint64_t x)
430 {
431     return clz64(x);
432 }
433 #endif
434 
435 #ifdef CONFIG_CTZ_32
__ctzsi2(uint32_t x)436 CONST int __ctzsi2(uint32_t x)
437 {
438     return ctz32(x);
439 }
440 #endif
441 
442 #ifdef CONFIG_CTZ_64
__ctzdi2(uint64_t x)443 CONST int __ctzdi2(uint64_t x)
444 {
445     return ctz64(x);
446 }
447 #endif
448