1 /*
2 * Copyright (c) 2008-2014 Travis Geiselbrecht
3 *
4 * Use of this source code is governed by a MIT-style
5 * license that can be found in the LICENSE file or at
6 * https://opensource.org/licenses/MIT
7 */
8 #include <stdio.h>
9 #include <string.h>
10 #include <malloc.h>
11 #include <app.h>
12 #include <platform.h>
13 #include <kernel/thread.h>
14 #include <lk/console_cmd.h>
15
16 static uint8_t *src;
17 static uint8_t *dst;
18
19 static uint8_t *src2;
20 static uint8_t *dst2;
21
22 #define BUFFER_SIZE (2*1024*1024)
23 #define ITERATIONS (256*1024*1024 / BUFFER_SIZE) // enough iterations to have to copy/set 256MB of memory
24
25 #if 1
mymemcpy(void * dest,const void * source,size_t len)26 static inline void *mymemcpy(void *dest, const void *source, size_t len) { return memcpy(dest, source, len); }
mymemset(void * dest,int c,size_t len)27 static inline void *mymemset(void *dest, int c, size_t len) { return memset(dest, c, len); }
28 #else
29 // if we're testing our own memcpy, use this
30 extern void *mymemcpy(void *dst, const void *src, size_t len);
31 extern void *mymemset(void *dst, int c, size_t len);
32 #endif
33
34 /* reference implementations of memmove/memcpy */
35 typedef long word;
36
37 #define lsize sizeof(word)
38 #define lmask (lsize - 1)
39
c_memmove(void * dest,void const * source,size_t count)40 static void *c_memmove(void *dest, void const *source, size_t count) {
41 char *d = (char *)dest;
42 const char *s = (const char *)source;
43 int len;
44
45 if (count == 0 || dest == source)
46 return dest;
47
48 if ((long)d < (long)s) {
49 if (((long)d | (long)s) & lmask) {
50 // source and/or dest do not align on word boundary
51 if ((((long)d ^ (long)s) & lmask) || (count < lsize))
52 len = count; // copy the rest of the buffer with the byte mover
53 else
54 len = lsize - ((long)d & lmask); // move the ptrs up to a word boundary
55
56 count -= len;
57 for (; len > 0; len--)
58 *d++ = *s++;
59 }
60 for (len = count / lsize; len > 0; len--) {
61 *(word *)d = *(word *)s;
62 d += lsize;
63 s += lsize;
64 }
65 for (len = count & lmask; len > 0; len--)
66 *d++ = *s++;
67 } else {
68 d += count;
69 s += count;
70 if (((long)d | (long)s) & lmask) {
71 // src and/or dest do not align on word boundary
72 if ((((long)d ^ (long)s) & lmask) || (count <= lsize))
73 len = count;
74 else
75 len = ((long)d & lmask);
76
77 count -= len;
78 for (; len > 0; len--)
79 *--d = *--s;
80 }
81 for (len = count / lsize; len > 0; len--) {
82 d -= lsize;
83 s -= lsize;
84 *(word *)d = *(word *)s;
85 }
86 for (len = count & lmask; len > 0; len--)
87 *--d = *--s;
88 }
89
90 return dest;
91 }
92
c_memset(void * s,int c,size_t count)93 static void *c_memset(void *s, int c, size_t count) {
94 char *xs = (char *) s;
95 size_t len = (-(size_t)s) & lmask;
96 word cc = c & 0xff;
97
98 if ( count > len ) {
99 count -= len;
100 cc |= cc << 8;
101 cc |= cc << 16;
102 if (sizeof(word) == 8)
103 cc |= (uint64_t)cc << 32; // should be optimized out on 32 bit machines
104
105 // write to non-aligned memory byte-wise
106 for ( ; len > 0; len-- )
107 *xs++ = c;
108
109 // write to aligned memory dword-wise
110 for ( len = count / lsize; len > 0; len-- ) {
111 *((word *)xs) = (word)cc;
112 xs += lsize;
113 }
114
115 count &= lmask;
116 }
117
118 // write remaining bytes
119 for ( ; count > 0; count-- )
120 *xs++ = c;
121
122 return s;
123 }
124
null_memcpy(void * dest,const void * source,size_t len)125 static void *null_memcpy(void *dest, const void *source, size_t len) {
126 return dest;
127 }
128
bench_memcpy_routine(void * memcpy_routine (void *,const void *,size_t),size_t srcalign,size_t dstalign)129 static lk_time_t bench_memcpy_routine(void *memcpy_routine(void *, const void *, size_t), size_t srcalign, size_t dstalign) {
130 int i;
131 lk_time_t t0;
132
133 t0 = current_time();
134 for (i=0; i < ITERATIONS; i++) {
135 memcpy_routine(dst + dstalign, src + srcalign, BUFFER_SIZE);
136 }
137 return current_time() - t0;
138 }
139
bench_memcpy(void)140 static void bench_memcpy(void) {
141 lk_time_t null, c, libc, mine;
142 size_t srcalign, dstalign;
143
144 printf("memcpy speed test\n");
145 thread_sleep(200); // let the debug string clear the serial port
146
147 for (srcalign = 0; srcalign < 64; ) {
148 for (dstalign = 0; dstalign < 64; ) {
149
150 null = bench_memcpy_routine(&null_memcpy, srcalign, dstalign);
151 c = bench_memcpy_routine(&c_memmove, srcalign, dstalign);
152 libc = bench_memcpy_routine(&memcpy, srcalign, dstalign);
153 mine = bench_memcpy_routine(&mymemcpy, srcalign, dstalign);
154
155 printf("srcalign %zu, dstalign %zu: ", srcalign, dstalign);
156 printf(" null memcpy %u msecs\n", null);
157 printf("c memcpy %u msecs, %llu bytes/sec; ", c, (uint64_t)BUFFER_SIZE * ITERATIONS * 1000ULL / c);
158 printf("libc memcpy %u msecs, %llu bytes/sec; ", libc, (uint64_t)BUFFER_SIZE * ITERATIONS * 1000ULL / libc);
159 printf("my memcpy %u msecs, %llu bytes/sec; ", mine, (uint64_t)BUFFER_SIZE * ITERATIONS * 1000ULL / mine);
160 printf("\n");
161
162 if (dstalign < 8)
163 dstalign++;
164 else
165 dstalign <<= 1;
166 }
167 if (srcalign < 8)
168 srcalign++;
169 else
170 srcalign <<= 1;
171 }
172 }
173
fillbuf(void * ptr,size_t len,uint32_t seed)174 static void fillbuf(void *ptr, size_t len, uint32_t seed) {
175 size_t i;
176
177 for (i = 0; i < len; i++) {
178 ((char *)ptr)[i] = seed;
179 seed *= 0x1234567;
180 }
181 }
182
validate_memcpy(void)183 static void validate_memcpy(void) {
184 size_t srcalign, dstalign, size;
185 const size_t maxsize = 256;
186
187 printf("testing memcpy for correctness\n");
188
189 /*
190 * do the simple tests to make sure that memcpy doesn't color outside
191 * the lines for all alignment cases
192 */
193 for (srcalign = 0; srcalign < 64; srcalign++) {
194 printf("srcalign %zu\n", srcalign);
195 for (dstalign = 0; dstalign < 64; dstalign++) {
196 //printf("\tdstalign %zu\n", dstalign);
197 for (size = 0; size < maxsize; size++) {
198
199 //printf("srcalign %zu, dstalign %zu, size %zu\n", srcalign, dstalign, size);
200
201 fillbuf(src, maxsize * 2, 567);
202 fillbuf(src2, maxsize * 2, 567);
203 fillbuf(dst, maxsize * 2, 123514);
204 fillbuf(dst2, maxsize * 2, 123514);
205
206 c_memmove(dst + dstalign, src + srcalign, size);
207 memcpy(dst2 + dstalign, src2 + srcalign, size);
208
209 int comp = memcmp(dst, dst2, maxsize * 2);
210 if (comp != 0) {
211 printf("error! srcalign %zu, dstalign %zu, size %zu\n", srcalign, dstalign, size);
212 }
213 }
214 }
215 }
216 }
217
bench_memset_routine(void * memset_routine (void *,int,size_t),size_t dstalign,size_t len)218 static lk_time_t bench_memset_routine(void *memset_routine(void *, int, size_t), size_t dstalign, size_t len) {
219 int i;
220 lk_time_t t0;
221
222 t0 = current_time();
223 for (i=0; i < ITERATIONS; i++) {
224 memset_routine(dst + dstalign, 0, len);
225 }
226 return current_time() - t0;
227 }
228
bench_memset(void)229 static void bench_memset(void) {
230 lk_time_t c, libc, mine;
231 size_t dstalign;
232
233 printf("memset speed test\n");
234 thread_sleep(200); // let the debug string clear the serial port
235
236 for (dstalign = 0; dstalign < 64; dstalign++) {
237
238 c = bench_memset_routine(&c_memset, dstalign, BUFFER_SIZE);
239 libc = bench_memset_routine(&memset, dstalign, BUFFER_SIZE);
240 mine = bench_memset_routine(&mymemset, dstalign, BUFFER_SIZE);
241
242 printf("dstalign %zu: ", dstalign);
243 printf("c memset %u msecs, %llu bytes/sec; ", c, (uint64_t)BUFFER_SIZE * ITERATIONS * 1000ULL / c);
244 printf("libc memset %u msecs, %llu bytes/sec; ", libc, (uint64_t)BUFFER_SIZE * ITERATIONS * 1000ULL / libc);
245 printf("my memset %u msecs, %llu bytes/sec; ", mine, (uint64_t)BUFFER_SIZE * ITERATIONS * 1000ULL / mine);
246 printf("\n");
247 }
248 }
249
validate_memset(void)250 static void validate_memset(void) {
251 size_t dstalign, size;
252 int c;
253 const size_t maxsize = 256;
254
255 printf("testing memset for correctness\n");
256
257 for (dstalign = 0; dstalign < 64; dstalign++) {
258 printf("align %zd\n", dstalign);
259 for (size = 0; size < maxsize; size++) {
260 for (c = -1; c < 257; c++) {
261
262 fillbuf(dst, maxsize * 2, 123514);
263 fillbuf(dst2, maxsize * 2, 123514);
264
265 c_memset(dst + dstalign, c, size);
266 memset(dst2 + dstalign, c, size);
267
268 int comp = memcmp(dst, dst2, maxsize * 2);
269 if (comp != 0) {
270 printf("error! align %zu, c 0x%hhx, size %zu\n", dstalign, c, size);
271 }
272 }
273 }
274 }
275 }
276
string_tests(int argc,const console_cmd_args * argv)277 static int string_tests(int argc, const console_cmd_args *argv) {
278 src = memalign(64, BUFFER_SIZE + 256);
279 dst = memalign(64, BUFFER_SIZE + 256);
280 src2 = memalign(64, BUFFER_SIZE + 256);
281 dst2 = memalign(64, BUFFER_SIZE + 256);
282
283 printf("src %p, dst %p\n", src, dst);
284 printf("src2 %p, dst2 %p\n", src2, dst2);
285
286 if (!src || !dst || !src2 || !dst2) {
287 printf("failed to allocate all the buffers\n");
288 goto out;
289 }
290
291 if (argc < 3) {
292 printf("not enough arguments:\n");
293 usage:
294 printf("%s validate <routine>\n", argv[0].str);
295 printf("%s bench <routine>\n", argv[0].str);
296 goto out;
297 }
298
299 if (!strcmp(argv[1].str, "validate")) {
300 if (!strcmp(argv[2].str, "memcpy")) {
301 validate_memcpy();
302 } else if (!strcmp(argv[2].str, "memset")) {
303 validate_memset();
304 }
305 } else if (!strcmp(argv[1].str, "bench")) {
306 if (!strcmp(argv[2].str, "memcpy")) {
307 bench_memcpy();
308 } else if (!strcmp(argv[2].str, "memset")) {
309 bench_memset();
310 }
311 } else {
312 goto usage;
313 }
314
315 out:
316 free(src);
317 free(dst);
318 free(src2);
319 free(dst2);
320
321 return 0;
322 }
323
324 STATIC_COMMAND_START
325 STATIC_COMMAND("string", "memcpy tests", &string_tests)
326 STATIC_COMMAND_END(stringtests);
327
328 APP_START(stringtests)
329 APP_END
330
331