1 /*
2  * Copyright (c) 2008-2014 Travis Geiselbrecht
3  *
4  * Use of this source code is governed by a MIT-style
5  * license that can be found in the LICENSE file or at
6  * https://opensource.org/licenses/MIT
7  */
8 #include <stdio.h>
9 #include <string.h>
10 #include <malloc.h>
11 #include <app.h>
12 #include <platform.h>
13 #include <kernel/thread.h>
14 #include <lk/console_cmd.h>
15 
16 static uint8_t *src;
17 static uint8_t *dst;
18 
19 static uint8_t *src2;
20 static uint8_t *dst2;
21 
22 #define BUFFER_SIZE (2*1024*1024)
23 #define ITERATIONS (256*1024*1024 / BUFFER_SIZE) // enough iterations to have to copy/set 256MB of memory
24 
25 #if 1
mymemcpy(void * dest,const void * source,size_t len)26 static inline void *mymemcpy(void *dest, const void *source, size_t len) { return memcpy(dest, source, len); }
mymemset(void * dest,int c,size_t len)27 static inline void *mymemset(void *dest, int c, size_t len) { return memset(dest, c, len); }
28 #else
29 // if we're testing our own memcpy, use this
30 extern void *mymemcpy(void *dst, const void *src, size_t len);
31 extern void *mymemset(void *dst, int c, size_t len);
32 #endif
33 
34 /* reference implementations of memmove/memcpy */
35 typedef long word;
36 
37 #define lsize sizeof(word)
38 #define lmask (lsize - 1)
39 
c_memmove(void * dest,void const * source,size_t count)40 static void *c_memmove(void *dest, void const *source, size_t count) {
41     char *d = (char *)dest;
42     const char *s = (const char *)source;
43     int len;
44 
45     if (count == 0 || dest == source)
46         return dest;
47 
48     if ((long)d < (long)s) {
49         if (((long)d | (long)s) & lmask) {
50             // source and/or dest do not align on word boundary
51             if ((((long)d ^ (long)s) & lmask) || (count < lsize))
52                 len = count; // copy the rest of the buffer with the byte mover
53             else
54                 len = lsize - ((long)d & lmask); // move the ptrs up to a word boundary
55 
56             count -= len;
57             for (; len > 0; len--)
58                 *d++ = *s++;
59         }
60         for (len = count / lsize; len > 0; len--) {
61             *(word *)d = *(word *)s;
62             d += lsize;
63             s += lsize;
64         }
65         for (len = count & lmask; len > 0; len--)
66             *d++ = *s++;
67     } else {
68         d += count;
69         s += count;
70         if (((long)d | (long)s) & lmask) {
71             // src and/or dest do not align on word boundary
72             if ((((long)d ^ (long)s) & lmask) || (count <= lsize))
73                 len = count;
74             else
75                 len = ((long)d & lmask);
76 
77             count -= len;
78             for (; len > 0; len--)
79                 *--d = *--s;
80         }
81         for (len = count / lsize; len > 0; len--) {
82             d -= lsize;
83             s -= lsize;
84             *(word *)d = *(word *)s;
85         }
86         for (len = count & lmask; len > 0; len--)
87             *--d = *--s;
88     }
89 
90     return dest;
91 }
92 
c_memset(void * s,int c,size_t count)93 static void *c_memset(void *s, int c, size_t count) {
94     char *xs = (char *) s;
95     size_t len = (-(size_t)s) & lmask;
96     word cc = c & 0xff;
97 
98     if ( count > len ) {
99         count -= len;
100         cc |= cc << 8;
101         cc |= cc << 16;
102         if (sizeof(word) == 8)
103             cc |= (uint64_t)cc << 32; // should be optimized out on 32 bit machines
104 
105         // write to non-aligned memory byte-wise
106         for ( ; len > 0; len-- )
107             *xs++ = c;
108 
109         // write to aligned memory dword-wise
110         for ( len = count / lsize; len > 0; len-- ) {
111             *((word *)xs) = (word)cc;
112             xs += lsize;
113         }
114 
115         count &= lmask;
116     }
117 
118     // write remaining bytes
119     for ( ; count > 0; count-- )
120         *xs++ = c;
121 
122     return s;
123 }
124 
null_memcpy(void * dest,const void * source,size_t len)125 static void *null_memcpy(void *dest, const void *source, size_t len) {
126     return dest;
127 }
128 
bench_memcpy_routine(void * memcpy_routine (void *,const void *,size_t),size_t srcalign,size_t dstalign)129 static lk_time_t bench_memcpy_routine(void *memcpy_routine(void *, const void *, size_t), size_t srcalign, size_t dstalign) {
130     int i;
131     lk_time_t t0;
132 
133     t0 = current_time();
134     for (i=0; i < ITERATIONS; i++) {
135         memcpy_routine(dst + dstalign, src + srcalign, BUFFER_SIZE);
136     }
137     return current_time() - t0;
138 }
139 
bench_memcpy(void)140 static void bench_memcpy(void) {
141     lk_time_t null, c, libc, mine;
142     size_t srcalign, dstalign;
143 
144     printf("memcpy speed test\n");
145     thread_sleep(200); // let the debug string clear the serial port
146 
147     for (srcalign = 0; srcalign < 64; ) {
148         for (dstalign = 0; dstalign < 64; ) {
149 
150             null = bench_memcpy_routine(&null_memcpy, srcalign, dstalign);
151             c = bench_memcpy_routine(&c_memmove, srcalign, dstalign);
152             libc = bench_memcpy_routine(&memcpy, srcalign, dstalign);
153             mine = bench_memcpy_routine(&mymemcpy, srcalign, dstalign);
154 
155             printf("srcalign %zu, dstalign %zu: ", srcalign, dstalign);
156             printf("   null memcpy %u msecs\n", null);
157             printf("c memcpy %u msecs, %llu bytes/sec; ", c, (uint64_t)BUFFER_SIZE * ITERATIONS * 1000ULL / c);
158             printf("libc memcpy %u msecs, %llu bytes/sec; ", libc, (uint64_t)BUFFER_SIZE * ITERATIONS * 1000ULL / libc);
159             printf("my memcpy %u msecs, %llu bytes/sec; ", mine, (uint64_t)BUFFER_SIZE * ITERATIONS * 1000ULL / mine);
160             printf("\n");
161 
162             if (dstalign < 8)
163                 dstalign++;
164             else
165                 dstalign <<= 1;
166         }
167         if (srcalign < 8)
168             srcalign++;
169         else
170             srcalign <<= 1;
171     }
172 }
173 
fillbuf(void * ptr,size_t len,uint32_t seed)174 static void fillbuf(void *ptr, size_t len, uint32_t seed) {
175     size_t i;
176 
177     for (i = 0; i < len; i++) {
178         ((char *)ptr)[i] = seed;
179         seed *= 0x1234567;
180     }
181 }
182 
validate_memcpy(void)183 static void validate_memcpy(void) {
184     size_t srcalign, dstalign, size;
185     const size_t maxsize = 256;
186 
187     printf("testing memcpy for correctness\n");
188 
189     /*
190      * do the simple tests to make sure that memcpy doesn't color outside
191      * the lines for all alignment cases
192      */
193     for (srcalign = 0; srcalign < 64; srcalign++) {
194         printf("srcalign %zu\n", srcalign);
195         for (dstalign = 0; dstalign < 64; dstalign++) {
196             //printf("\tdstalign %zu\n", dstalign);
197             for (size = 0; size < maxsize; size++) {
198 
199                 //printf("srcalign %zu, dstalign %zu, size %zu\n", srcalign, dstalign, size);
200 
201                 fillbuf(src, maxsize * 2, 567);
202                 fillbuf(src2, maxsize * 2, 567);
203                 fillbuf(dst, maxsize * 2, 123514);
204                 fillbuf(dst2, maxsize * 2, 123514);
205 
206                 c_memmove(dst + dstalign, src + srcalign, size);
207                 memcpy(dst2 + dstalign, src2 + srcalign, size);
208 
209                 int comp = memcmp(dst, dst2, maxsize * 2);
210                 if (comp != 0) {
211                     printf("error! srcalign %zu, dstalign %zu, size %zu\n", srcalign, dstalign, size);
212                 }
213             }
214         }
215     }
216 }
217 
bench_memset_routine(void * memset_routine (void *,int,size_t),size_t dstalign,size_t len)218 static lk_time_t bench_memset_routine(void *memset_routine(void *, int, size_t), size_t dstalign, size_t len) {
219     int i;
220     lk_time_t t0;
221 
222     t0 = current_time();
223     for (i=0; i < ITERATIONS; i++) {
224         memset_routine(dst + dstalign, 0, len);
225     }
226     return current_time() - t0;
227 }
228 
bench_memset(void)229 static void bench_memset(void) {
230     lk_time_t c, libc, mine;
231     size_t dstalign;
232 
233     printf("memset speed test\n");
234     thread_sleep(200); // let the debug string clear the serial port
235 
236     for (dstalign = 0; dstalign < 64; dstalign++) {
237 
238         c = bench_memset_routine(&c_memset, dstalign, BUFFER_SIZE);
239         libc = bench_memset_routine(&memset, dstalign, BUFFER_SIZE);
240         mine = bench_memset_routine(&mymemset, dstalign, BUFFER_SIZE);
241 
242         printf("dstalign %zu: ", dstalign);
243         printf("c memset %u msecs, %llu bytes/sec; ", c, (uint64_t)BUFFER_SIZE * ITERATIONS * 1000ULL / c);
244         printf("libc memset %u msecs, %llu bytes/sec; ", libc, (uint64_t)BUFFER_SIZE * ITERATIONS * 1000ULL / libc);
245         printf("my memset %u msecs, %llu bytes/sec; ", mine, (uint64_t)BUFFER_SIZE * ITERATIONS * 1000ULL / mine);
246         printf("\n");
247     }
248 }
249 
validate_memset(void)250 static void validate_memset(void) {
251     size_t dstalign, size;
252     int c;
253     const size_t maxsize = 256;
254 
255     printf("testing memset for correctness\n");
256 
257     for (dstalign = 0; dstalign < 64; dstalign++) {
258         printf("align %zd\n", dstalign);
259         for (size = 0; size < maxsize; size++) {
260             for (c = -1; c < 257; c++) {
261 
262                 fillbuf(dst, maxsize * 2, 123514);
263                 fillbuf(dst2, maxsize * 2, 123514);
264 
265                 c_memset(dst + dstalign, c, size);
266                 memset(dst2 + dstalign, c, size);
267 
268                 int comp = memcmp(dst, dst2, maxsize * 2);
269                 if (comp != 0) {
270                     printf("error! align %zu, c 0x%hhx, size %zu\n", dstalign, c, size);
271                 }
272             }
273         }
274     }
275 }
276 
string_tests(int argc,const console_cmd_args * argv)277 static int string_tests(int argc, const console_cmd_args *argv) {
278     src = memalign(64, BUFFER_SIZE + 256);
279     dst = memalign(64, BUFFER_SIZE + 256);
280     src2 = memalign(64, BUFFER_SIZE + 256);
281     dst2 = memalign(64, BUFFER_SIZE + 256);
282 
283     printf("src %p, dst %p\n", src, dst);
284     printf("src2 %p, dst2 %p\n", src2, dst2);
285 
286     if (!src || !dst || !src2 || !dst2) {
287         printf("failed to allocate all the buffers\n");
288         goto out;
289     }
290 
291     if (argc < 3) {
292         printf("not enough arguments:\n");
293 usage:
294         printf("%s validate <routine>\n", argv[0].str);
295         printf("%s bench <routine>\n", argv[0].str);
296         goto out;
297     }
298 
299     if (!strcmp(argv[1].str, "validate")) {
300         if (!strcmp(argv[2].str, "memcpy")) {
301             validate_memcpy();
302         } else if (!strcmp(argv[2].str, "memset")) {
303             validate_memset();
304         }
305     } else if (!strcmp(argv[1].str, "bench")) {
306         if (!strcmp(argv[2].str, "memcpy")) {
307             bench_memcpy();
308         } else if (!strcmp(argv[2].str, "memset")) {
309             bench_memset();
310         }
311     } else {
312         goto usage;
313     }
314 
315 out:
316     free(src);
317     free(dst);
318     free(src2);
319     free(dst2);
320 
321     return 0;
322 }
323 
324 STATIC_COMMAND_START
325 STATIC_COMMAND("string", "memcpy tests", &string_tests)
326 STATIC_COMMAND_END(stringtests);
327 
328 APP_START(stringtests)
329 APP_END
330 
331