1 // © 2021 Qualcomm Innovation Center, Inc. All rights reserved.
2 //
3 // SPDX-License-Identifier: BSD-3-Clause
4 
5 #include <assert.h>
6 #include <stdint.h>
7 #include <stdnoreturn.h>
8 #include <string.h>
9 
10 #include <compiler.h>
11 #include <panic.h>
12 #include <util.h>
13 
14 #include <asm/cpu.h>
15 #include <asm/prefetch.h>
16 
17 // Assembly functions. All of these come in at least three variants:
18 //
19 // - _align16 for at least 16 bytes with target known to be 16-aligned;
20 // - _alignable for at least 31 bytes with unknown target alignment;
21 // - _below32 for less than 32 bytes (i.e. one access of each size).
22 //
23 // Note the overlap between _alignable and _below32 at n==31; either variant
24 // may be used at that size. We use _below32 because the logic to trigger its
25 // first 16-byte copy is simpler.
26 //
27 // For memset to zero there is additionally a _dczva variant, where the target
28 // is aligned to a DC ZVA block (typically a 64-byte cache line) and is at
29 // least that size.
30 //
31 // The variants other than _below32 fall through to the more-aligned versions
32 // once the necessary alignment has been established.
33 
34 // TODO: Clang does only simple constant propagation when LTO is enabled,
35 // preferring to leave it until LTO. However, the LLVM IR has no way to
36 // represent __builtin_constant_p(). So it is not really possible to use
37 // __builtin_constant_p() here to avoid runtime checks, since it will nearly
38 // always evaluate to 0.
39 //
40 // To make any static assertions or build-time variant selection actually
41 // effective, we need to move all of the definitions below into inlines in a
42 // header. We would then need to either include it here to generate extern
43 // definitions for the backend to use, or else define them in the assembly.
44 
45 void
46 memcpy_below32(void *restrict s1, const void *restrict s2, size_t n);
47 
48 void
49 memcpy_alignable(void *restrict s1, const void *restrict s2, size_t n);
50 
51 void
52 memcpy_align16(void *restrict s1, const void *restrict s2, size_t n);
53 
54 void
55 memcpy_bytes(void *restrict s1, const void *restrict s2, size_t n);
56 
57 void
58 memset_zeros_alignable(void *s, size_t n);
59 
60 void
61 memset_zeros_below32(void *s, size_t n);
62 
63 void
64 memset_zeros_align16(void *s, size_t n);
65 
66 void
67 memset_zeros_dczva(void *s, size_t n);
68 
69 void
70 memset_alignable(void *s, uint8_t c, size_t n);
71 
72 void
73 memset_below32(void *s, uint64_t cs, size_t n);
74 
75 void
76 memset_align16(void *s, uint64_t cs, size_t n);
77 
78 void *
memcpy(void * restrict s1,const void * restrict s2,size_t n)79 memcpy(void *restrict s1, const void *restrict s2, size_t n)
80 {
81 	assert(compiler_sizeof_object(s1) >= n);
82 	assert(compiler_sizeof_object(s2) >= n);
83 	if (n == 0U) {
84 		// Nothing to do.
85 	} else if (n < 32U) {
86 		prefetch_store_keep(s1);
87 		prefetch_load_stream(s2);
88 		memcpy_below32(s1, s2, n);
89 	} else {
90 		prefetch_store_keep(s1);
91 		prefetch_load_stream(s2);
92 		uintptr_t a16 = (uintptr_t)s1 & (uintptr_t)15;
93 		if (a16 == 0U) {
94 			memcpy_align16(s1, s2, n);
95 		} else {
96 			memcpy_alignable(s1, s2, n);
97 		}
98 	}
99 
100 	return s1;
101 }
102 
103 static void
memmove_bytes_reverse(uint8_t * dst,const uint8_t * src,size_t n)104 memmove_bytes_reverse(uint8_t *dst, const uint8_t *src, size_t n)
105 {
106 	assert((uintptr_t)src < (uintptr_t)dst);
107 
108 	// move to a higher address, copy backwards
109 	const uint8_t *srcr;
110 	uint8_t	      *dstr;
111 	srcr = src + (n - 1U);
112 	dstr = dst + (n - 1U);
113 
114 	for (; n != 0; n--) {
115 		*dstr = *srcr;
116 		dstr--;
117 		srcr--;
118 	}
119 }
120 
121 void *
memmove(void * dst,const void * src,size_t n)122 memmove(void *dst, const void *src, size_t n)
123 {
124 	if (n == 0) {
125 		goto out;
126 	}
127 
128 	if (util_add_overflows((uintptr_t)dst, n - 1U) ||
129 	    util_add_overflows((uintptr_t)src, n - 1U)) {
130 		panic("memmove_bytes addr overflow");
131 	}
132 
133 	if ((uintptr_t)dst == (uintptr_t)src) {
134 		// Nothing to do.
135 	} else if ((uintptr_t)dst < (uintptr_t)src) {
136 		(void)memcpy(dst, src, n);
137 	} else if ((uintptr_t)src + (n - 1) < (uintptr_t)dst) {
138 		(void)memcpy(dst, src, n);
139 	} else {
140 		(void)memmove_bytes_reverse(dst, src, n);
141 	}
142 
143 out:
144 	return dst;
145 }
146 
147 errno_t
memset_s(void * s,rsize_t smax,int c,rsize_t n)148 memset_s(void *s, rsize_t smax, int c, rsize_t n)
149 {
150 	assert(compiler_sizeof_object(s) >= smax);
151 	uintptr_t a16 = (uintptr_t)s & (uintptr_t)15;
152 
153 	errno_t err = 0;
154 
155 	if (s == NULL) {
156 		err = 1;
157 		goto out_null;
158 	}
159 	if (n > smax) {
160 		err = 1;
161 		n   = smax;
162 	}
163 
164 	if (n == 0U) {
165 		// Nothing to do.
166 	} else if (c == 0) {
167 		uintptr_t a_zva = (uintptr_t)s &
168 				  (uintptr_t)util_mask(CPU_DCZVA_BITS);
169 		if (n < 32U) {
170 			prefetch_store_keep(s);
171 			memset_zeros_below32(s, n);
172 		} else if ((a_zva == 0U) && ((n >> CPU_DCZVA_BITS) > 0U)) {
173 			memset_zeros_dczva(s, n);
174 		} else if (a16 == 0U) {
175 			prefetch_store_keep(s);
176 			memset_zeros_align16(s, n);
177 		} else {
178 			prefetch_store_keep(s);
179 			memset_zeros_alignable(s, n);
180 		}
181 	} else {
182 		uint64_t cs = (uint64_t)(uint8_t)c;
183 		cs |= cs << 8;
184 		cs |= cs << 16;
185 		cs |= cs << 32;
186 		if (n < 32U) {
187 			prefetch_store_keep(s);
188 			memset_below32(s, cs, n);
189 		} else if (a16 == 0U) {
190 			prefetch_store_keep(s);
191 			memset_align16(s, cs, n);
192 		} else {
193 			prefetch_store_keep(s);
194 			memset_alignable(s, (uint8_t)c, n);
195 		}
196 	}
197 
198 out_null:
199 	return err;
200 }
201 
202 void *
memset(void * s,int c,size_t n)203 memset(void *s, int c, size_t n)
204 {
205 	(void)memset_s(s, n, c, n);
206 	return s;
207 }
208 
209 size_t
strlen(const char * str)210 strlen(const char *str)
211 {
212 	const char *end = str;
213 
214 	assert(str != NULL);
215 
216 	for (; *end != '\0'; end++) {
217 	}
218 
219 	return (size_t)((uintptr_t)end - (uintptr_t)str);
220 }
221 
222 char *
strchr(const char * str,int c)223 strchr(const char *str, int c)
224 {
225 	uintptr_t   ret = (uintptr_t)NULL;
226 	const char *end = str;
227 
228 	for (; *end != '\0'; end++) {
229 		if (*end == (char)c) {
230 			ret = (uintptr_t)end;
231 			break;
232 		}
233 	}
234 
235 	return (char *)ret;
236 }
237