1 // © 2021 Qualcomm Innovation Center, Inc. All rights reserved.
2 //
3 // SPDX-License-Identifier: BSD-3-Clause
4
5 #include <assert.h>
6 #include <stdint.h>
7 #include <stdnoreturn.h>
8 #include <string.h>
9
10 #include <compiler.h>
11 #include <panic.h>
12 #include <util.h>
13
14 #include <asm/cpu.h>
15 #include <asm/prefetch.h>
16
17 // Assembly functions. All of these come in at least three variants:
18 //
19 // - _align16 for at least 16 bytes with target known to be 16-aligned;
20 // - _alignable for at least 31 bytes with unknown target alignment;
21 // - _below32 for less than 32 bytes (i.e. one access of each size).
22 //
23 // Note the overlap between _alignable and _below32 at n==31; either variant
24 // may be used at that size. We use _below32 because the logic to trigger its
25 // first 16-byte copy is simpler.
26 //
27 // For memset to zero there is additionally a _dczva variant, where the target
28 // is aligned to a DC ZVA block (typically a 64-byte cache line) and is at
29 // least that size.
30 //
31 // The variants other than _below32 fall through to the more-aligned versions
32 // once the necessary alignment has been established.
33
34 // TODO: Clang does only simple constant propagation when LTO is enabled,
35 // preferring to leave it until LTO. However, the LLVM IR has no way to
36 // represent __builtin_constant_p(). So it is not really possible to use
37 // __builtin_constant_p() here to avoid runtime checks, since it will nearly
38 // always evaluate to 0.
39 //
40 // To make any static assertions or build-time variant selection actually
41 // effective, we need to move all of the definitions below into inlines in a
42 // header. We would then need to either include it here to generate extern
43 // definitions for the backend to use, or else define them in the assembly.
44
45 void
46 memcpy_below32(void *restrict s1, const void *restrict s2, size_t n);
47
48 void
49 memcpy_alignable(void *restrict s1, const void *restrict s2, size_t n);
50
51 void
52 memcpy_align16(void *restrict s1, const void *restrict s2, size_t n);
53
54 void
55 memcpy_bytes(void *restrict s1, const void *restrict s2, size_t n);
56
57 void
58 memset_zeros_alignable(void *s, size_t n);
59
60 void
61 memset_zeros_below32(void *s, size_t n);
62
63 void
64 memset_zeros_align16(void *s, size_t n);
65
66 void
67 memset_zeros_dczva(void *s, size_t n);
68
69 void
70 memset_alignable(void *s, uint8_t c, size_t n);
71
72 void
73 memset_below32(void *s, uint64_t cs, size_t n);
74
75 void
76 memset_align16(void *s, uint64_t cs, size_t n);
77
78 void *
memcpy(void * restrict s1,const void * restrict s2,size_t n)79 memcpy(void *restrict s1, const void *restrict s2, size_t n)
80 {
81 assert(compiler_sizeof_object(s1) >= n);
82 assert(compiler_sizeof_object(s2) >= n);
83 if (n == 0U) {
84 // Nothing to do.
85 } else if (n < 32U) {
86 prefetch_store_keep(s1);
87 prefetch_load_stream(s2);
88 memcpy_below32(s1, s2, n);
89 } else {
90 prefetch_store_keep(s1);
91 prefetch_load_stream(s2);
92 uintptr_t a16 = (uintptr_t)s1 & (uintptr_t)15;
93 if (a16 == 0U) {
94 memcpy_align16(s1, s2, n);
95 } else {
96 memcpy_alignable(s1, s2, n);
97 }
98 }
99
100 return s1;
101 }
102
103 static void
memmove_bytes_reverse(uint8_t * dst,const uint8_t * src,size_t n)104 memmove_bytes_reverse(uint8_t *dst, const uint8_t *src, size_t n)
105 {
106 assert((uintptr_t)src < (uintptr_t)dst);
107
108 // move to a higher address, copy backwards
109 const uint8_t *srcr;
110 uint8_t *dstr;
111 srcr = src + (n - 1U);
112 dstr = dst + (n - 1U);
113
114 for (; n != 0; n--) {
115 *dstr = *srcr;
116 dstr--;
117 srcr--;
118 }
119 }
120
121 void *
memmove(void * dst,const void * src,size_t n)122 memmove(void *dst, const void *src, size_t n)
123 {
124 if (n == 0) {
125 goto out;
126 }
127
128 if (util_add_overflows((uintptr_t)dst, n - 1U) ||
129 util_add_overflows((uintptr_t)src, n - 1U)) {
130 panic("memmove_bytes addr overflow");
131 }
132
133 if ((uintptr_t)dst == (uintptr_t)src) {
134 // Nothing to do.
135 } else if ((uintptr_t)dst < (uintptr_t)src) {
136 (void)memcpy(dst, src, n);
137 } else if ((uintptr_t)src + (n - 1) < (uintptr_t)dst) {
138 (void)memcpy(dst, src, n);
139 } else {
140 (void)memmove_bytes_reverse(dst, src, n);
141 }
142
143 out:
144 return dst;
145 }
146
147 errno_t
memset_s(void * s,rsize_t smax,int c,rsize_t n)148 memset_s(void *s, rsize_t smax, int c, rsize_t n)
149 {
150 assert(compiler_sizeof_object(s) >= smax);
151 uintptr_t a16 = (uintptr_t)s & (uintptr_t)15;
152
153 errno_t err = 0;
154
155 if (s == NULL) {
156 err = 1;
157 goto out_null;
158 }
159 if (n > smax) {
160 err = 1;
161 n = smax;
162 }
163
164 if (n == 0U) {
165 // Nothing to do.
166 } else if (c == 0) {
167 uintptr_t a_zva = (uintptr_t)s &
168 (uintptr_t)util_mask(CPU_DCZVA_BITS);
169 if (n < 32U) {
170 prefetch_store_keep(s);
171 memset_zeros_below32(s, n);
172 } else if ((a_zva == 0U) && ((n >> CPU_DCZVA_BITS) > 0U)) {
173 memset_zeros_dczva(s, n);
174 } else if (a16 == 0U) {
175 prefetch_store_keep(s);
176 memset_zeros_align16(s, n);
177 } else {
178 prefetch_store_keep(s);
179 memset_zeros_alignable(s, n);
180 }
181 } else {
182 uint64_t cs = (uint64_t)(uint8_t)c;
183 cs |= cs << 8;
184 cs |= cs << 16;
185 cs |= cs << 32;
186 if (n < 32U) {
187 prefetch_store_keep(s);
188 memset_below32(s, cs, n);
189 } else if (a16 == 0U) {
190 prefetch_store_keep(s);
191 memset_align16(s, cs, n);
192 } else {
193 prefetch_store_keep(s);
194 memset_alignable(s, (uint8_t)c, n);
195 }
196 }
197
198 out_null:
199 return err;
200 }
201
202 void *
memset(void * s,int c,size_t n)203 memset(void *s, int c, size_t n)
204 {
205 (void)memset_s(s, n, c, n);
206 return s;
207 }
208
209 size_t
strlen(const char * str)210 strlen(const char *str)
211 {
212 const char *end = str;
213
214 assert(str != NULL);
215
216 for (; *end != '\0'; end++) {
217 }
218
219 return (size_t)((uintptr_t)end - (uintptr_t)str);
220 }
221
222 char *
strchr(const char * str,int c)223 strchr(const char *str, int c)
224 {
225 uintptr_t ret = (uintptr_t)NULL;
226 const char *end = str;
227
228 for (; *end != '\0'; end++) {
229 if (*end == (char)c) {
230 ret = (uintptr_t)end;
231 break;
232 }
233 }
234
235 return (char *)ret;
236 }
237