1 /*
2   Simple DirectMedia Layer
3   Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
4 
5   This software is provided 'as-is', without any express or implied
6   warranty.  In no event will the authors be held liable for any damages
7   arising from the use of this software.
8 
9   Permission is granted to anyone to use this software for any purpose,
10   including commercial applications, and to alter it and redistribute it
11   freely, subject to the following restrictions:
12 
13   1. The origin of this software must not be misrepresented; you must not
14      claim that you wrote the original software. If you use this software
15      in a product, an acknowledgment in the product documentation would be
16      appreciated but is not required.
17   2. Altered source versions must be plainly marked as such, and must not be
18      misrepresented as being the original software.
19   3. This notice may not be removed or altered from any source distribution.
20 */
21 #ifdef TEST_MAIN
22 #include "SDL_config.h"
23 #else
24 #include "../SDL_internal.h"
25 #endif
26 
27 #if defined(__WIN32__) || defined(__WINRT__)
28 #include "../core/windows/SDL_windows.h"
29 #endif
30 #if defined(__OS2__)
31 #define INCL_DOS
32 #include <os2.h>
33 #ifndef QSV_NUMPROCESSORS
34 #define QSV_NUMPROCESSORS 26
35 #endif
36 #endif
37 
38 /* CPU feature detection for SDL */
39 
40 #include "SDL_cpuinfo.h"
41 #include "SDL_assert.h"
42 
43 #ifdef HAVE_SYSCONF
44 #include <unistd.h>
45 #endif
46 #ifdef HAVE_SYSCTLBYNAME
47 #include <sys/types.h>
48 #include <sys/sysctl.h>
49 #endif
50 #if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
51 #include <sys/sysctl.h>         /* For AltiVec check */
52 #elif defined(__OpenBSD__) && defined(__powerpc__)
53 #include <sys/param.h>
54 #include <sys/sysctl.h> /* For AltiVec check */
55 #include <machine/cpu.h>
56 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
57 #include <signal.h>
58 #include <setjmp.h>
59 #endif
60 
61 #if defined(__QNXNTO__)
62 #include <sys/syspage.h>
63 #endif
64 
65 #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__ARM_ARCH)
66 /*#include <asm/hwcap.h>*/
67 #ifndef AT_HWCAP
68 #define AT_HWCAP 16
69 #endif
70 #ifndef AT_PLATFORM
71 #define AT_PLATFORM 15
72 #endif
73 /* Prevent compilation error when including elf.h would also try to define AT_* as an enum */
74 #ifndef AT_NULL
75 #define AT_NULL 0
76 #endif
77 #ifndef HWCAP_NEON
78 #define HWCAP_NEON (1 << 12)
79 #endif
80 #if defined HAVE_GETAUXVAL
81 #include <sys/auxv.h>
82 #else
83 #include <fcntl.h>
84 #endif
85 #endif
86 
87 #if defined(__ANDROID__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
88 #if __ARM_ARCH < 8
89 #include <cpu-features.h>
90 #endif
91 #endif
92 
93 #ifdef __RISCOS__
94 #include <kernel.h>
95 #include <swis.h>
96 #endif
97 
98 #define CPU_HAS_RDTSC   (1 << 0)
99 #define CPU_HAS_ALTIVEC (1 << 1)
100 #define CPU_HAS_MMX     (1 << 2)
101 #define CPU_HAS_3DNOW   (1 << 3)
102 #define CPU_HAS_SSE     (1 << 4)
103 #define CPU_HAS_SSE2    (1 << 5)
104 #define CPU_HAS_SSE3    (1 << 6)
105 #define CPU_HAS_SSE41   (1 << 7)
106 #define CPU_HAS_SSE42   (1 << 8)
107 #define CPU_HAS_AVX     (1 << 9)
108 #define CPU_HAS_AVX2    (1 << 10)
109 #define CPU_HAS_NEON    (1 << 11)
110 #define CPU_HAS_AVX512F (1 << 12)
111 #define CPU_HAS_ARM_SIMD (1 << 13)
112 
113 #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
114 /* This is the brute force way of detecting instruction sets...
115    the idea is borrowed from the libmpeg2 library - thanks!
116  */
117 static jmp_buf jmpbuf;
118 static void
illegal_instruction(int sig)119 illegal_instruction(int sig)
120 {
121     longjmp(jmpbuf, 1);
122 }
123 #endif /* HAVE_SETJMP */
124 
125 static int
CPU_haveCPUID(void)126 CPU_haveCPUID(void)
127 {
128     int has_CPUID = 0;
129 
130 /* *INDENT-OFF* */
131 #ifndef SDL_CPUINFO_DISABLED
132 #if defined(__GNUC__) && defined(i386)
133     __asm__ (
134 "        pushfl                      # Get original EFLAGS             \n"
135 "        popl    %%eax                                                 \n"
136 "        movl    %%eax,%%ecx                                           \n"
137 "        xorl    $0x200000,%%eax     # Flip ID bit in EFLAGS           \n"
138 "        pushl   %%eax               # Save new EFLAGS value on stack  \n"
139 "        popfl                       # Replace current EFLAGS value    \n"
140 "        pushfl                      # Get new EFLAGS                  \n"
141 "        popl    %%eax               # Store new EFLAGS in EAX         \n"
142 "        xorl    %%ecx,%%eax         # Can not toggle ID bit,          \n"
143 "        jz      1f                  # Processor=80486                 \n"
144 "        movl    $1,%0               # We have CPUID support           \n"
145 "1:                                                                    \n"
146     : "=m" (has_CPUID)
147     :
148     : "%eax", "%ecx"
149     );
150 #elif defined(__GNUC__) && defined(__x86_64__)
151 /* Technically, if this is being compiled under __x86_64__ then it has
152    CPUid by definition.  But it's nice to be able to prove it.  :)      */
153     __asm__ (
154 "        pushfq                      # Get original EFLAGS             \n"
155 "        popq    %%rax                                                 \n"
156 "        movq    %%rax,%%rcx                                           \n"
157 "        xorl    $0x200000,%%eax     # Flip ID bit in EFLAGS           \n"
158 "        pushq   %%rax               # Save new EFLAGS value on stack  \n"
159 "        popfq                       # Replace current EFLAGS value    \n"
160 "        pushfq                      # Get new EFLAGS                  \n"
161 "        popq    %%rax               # Store new EFLAGS in EAX         \n"
162 "        xorl    %%ecx,%%eax         # Can not toggle ID bit,          \n"
163 "        jz      1f                  # Processor=80486                 \n"
164 "        movl    $1,%0               # We have CPUID support           \n"
165 "1:                                                                    \n"
166     : "=m" (has_CPUID)
167     :
168     : "%rax", "%rcx"
169     );
170 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
171     __asm {
172         pushfd                      ; Get original EFLAGS
173         pop     eax
174         mov     ecx, eax
175         xor     eax, 200000h        ; Flip ID bit in EFLAGS
176         push    eax                 ; Save new EFLAGS value on stack
177         popfd                       ; Replace current EFLAGS value
178         pushfd                      ; Get new EFLAGS
179         pop     eax                 ; Store new EFLAGS in EAX
180         xor     eax, ecx            ; Can not toggle ID bit,
181         jz      done                ; Processor=80486
182         mov     has_CPUID,1         ; We have CPUID support
183 done:
184     }
185 #elif defined(_MSC_VER) && defined(_M_X64)
186     has_CPUID = 1;
187 #elif defined(__sun) && defined(__i386)
188     __asm (
189 "       pushfl                 \n"
190 "       popl    %eax           \n"
191 "       movl    %eax,%ecx      \n"
192 "       xorl    $0x200000,%eax \n"
193 "       pushl   %eax           \n"
194 "       popfl                  \n"
195 "       pushfl                 \n"
196 "       popl    %eax           \n"
197 "       xorl    %ecx,%eax      \n"
198 "       jz      1f             \n"
199 "       movl    $1,-8(%ebp)    \n"
200 "1:                            \n"
201     );
202 #elif defined(__sun) && defined(__amd64)
203     __asm (
204 "       pushfq                 \n"
205 "       popq    %rax           \n"
206 "       movq    %rax,%rcx      \n"
207 "       xorl    $0x200000,%eax \n"
208 "       pushq   %rax           \n"
209 "       popfq                  \n"
210 "       pushfq                 \n"
211 "       popq    %rax           \n"
212 "       xorl    %ecx,%eax      \n"
213 "       jz      1f             \n"
214 "       movl    $1,-8(%rbp)    \n"
215 "1:                            \n"
216     );
217 #endif
218 #endif
219 /* *INDENT-ON* */
220     return has_CPUID;
221 }
222 
223 #if defined(__GNUC__) && defined(i386)
224 #define cpuid(func, a, b, c, d) \
225     __asm__ __volatile__ ( \
226 "        pushl %%ebx        \n" \
227 "        xorl %%ecx,%%ecx   \n" \
228 "        cpuid              \n" \
229 "        movl %%ebx, %%esi  \n" \
230 "        popl %%ebx         \n" : \
231             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
232 #elif defined(__GNUC__) && defined(__x86_64__)
233 #define cpuid(func, a, b, c, d) \
234     __asm__ __volatile__ ( \
235 "        pushq %%rbx        \n" \
236 "        xorq %%rcx,%%rcx   \n" \
237 "        cpuid              \n" \
238 "        movq %%rbx, %%rsi  \n" \
239 "        popq %%rbx         \n" : \
240             "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
241 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
242 #define cpuid(func, a, b, c, d) \
243     __asm { \
244         __asm mov eax, func \
245         __asm xor ecx, ecx \
246         __asm cpuid \
247         __asm mov a, eax \
248         __asm mov b, ebx \
249         __asm mov c, ecx \
250         __asm mov d, edx \
251 }
252 #elif defined(_MSC_VER) && defined(_M_X64)
253 #define cpuid(func, a, b, c, d) \
254 { \
255     int CPUInfo[4]; \
256     __cpuid(CPUInfo, func); \
257     a = CPUInfo[0]; \
258     b = CPUInfo[1]; \
259     c = CPUInfo[2]; \
260     d = CPUInfo[3]; \
261 }
262 #else
263 #define cpuid(func, a, b, c, d) \
264     do { a = b = c = d = 0; (void) a; (void) b; (void) c; (void) d; } while (0)
265 #endif
266 
267 static int CPU_CPUIDFeatures[4];
268 static int CPU_CPUIDMaxFunction = 0;
269 static SDL_bool CPU_OSSavesYMM = SDL_FALSE;
270 static SDL_bool CPU_OSSavesZMM = SDL_FALSE;
271 
272 static void
CPU_calcCPUIDFeatures(void)273 CPU_calcCPUIDFeatures(void)
274 {
275     static SDL_bool checked = SDL_FALSE;
276     if (!checked) {
277         checked = SDL_TRUE;
278         if (CPU_haveCPUID()) {
279             int a, b, c, d;
280             cpuid(0, a, b, c, d);
281             CPU_CPUIDMaxFunction = a;
282             if (CPU_CPUIDMaxFunction >= 1) {
283                 cpuid(1, a, b, c, d);
284                 CPU_CPUIDFeatures[0] = a;
285                 CPU_CPUIDFeatures[1] = b;
286                 CPU_CPUIDFeatures[2] = c;
287                 CPU_CPUIDFeatures[3] = d;
288 
289                 /* Check to make sure we can call xgetbv */
290                 if (c & 0x08000000) {
291                     /* Call xgetbv to see if YMM (etc) register state is saved */
292 #if defined(__GNUC__) && (defined(i386) || defined(__x86_64__))
293                     __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
294 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
295                     a = (int)_xgetbv(0);
296 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
297                     __asm
298                     {
299                         xor ecx, ecx
300                         _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
301                         mov a, eax
302                     }
303 #endif
304                     CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
305                     CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE;
306                 }
307             }
308         }
309     }
310 }
311 
312 static int
CPU_haveAltiVec(void)313 CPU_haveAltiVec(void)
314 {
315     volatile int altivec = 0;
316 #ifndef SDL_CPUINFO_DISABLED
317 #if (defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__))
318 #ifdef __OpenBSD__
319     int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
320 #else
321     int selectors[2] = { CTL_HW, HW_VECTORUNIT };
322 #endif
323     int hasVectorUnit = 0;
324     size_t length = sizeof(hasVectorUnit);
325     int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
326     if (0 == error)
327         altivec = (hasVectorUnit != 0);
328 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
329     void (*handler) (int sig);
330     handler = signal(SIGILL, illegal_instruction);
331     if (setjmp(jmpbuf) == 0) {
332         asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
333         altivec = 1;
334     }
335     signal(SIGILL, handler);
336 #endif
337 #endif
338     return altivec;
339 }
340 
341 #if defined(__ARM_ARCH) && (__ARM_ARCH >= 6)
342 static int
CPU_haveARMSIMD(void)343 CPU_haveARMSIMD(void)
344 {
345 	return 1;
346 }
347 
348 #elif !defined(__arm__)
349 static int
CPU_haveARMSIMD(void)350 CPU_haveARMSIMD(void)
351 {
352 	return 0;
353 }
354 
355 #elif defined(__LINUX__)
356 #include <unistd.h>
357 #include <sys/types.h>
358 #include <sys/stat.h>
359 #include <fcntl.h>
360 #include <elf.h>
361 
362 static int
CPU_haveARMSIMD(void)363 CPU_haveARMSIMD(void)
364 {
365     int arm_simd = 0;
366     int fd;
367 
368     fd = open("/proc/self/auxv", O_RDONLY);
369     if (fd >= 0)
370     {
371         Elf32_auxv_t aux;
372         while (read(fd, &aux, sizeof aux) == sizeof aux)
373         {
374             if (aux.a_type == AT_PLATFORM)
375             {
376                 const char *plat = (const char *) aux.a_un.a_val;
377                 if (plat) {
378                     arm_simd = strncmp(plat, "v6l", 3) == 0 ||
379                                strncmp(plat, "v7l", 3) == 0;
380                 }
381             }
382         }
383         close(fd);
384     }
385     return arm_simd;
386 }
387 
388 #elif defined(__RISCOS__)
389 
390 static int
CPU_haveARMSIMD(void)391 CPU_haveARMSIMD(void)
392 {
393 	_kernel_swi_regs regs;
394 	regs.r[0] = 0;
395 	if (_kernel_swi(OS_PlatformFeatures, &regs, &regs) != NULL)
396 		return 0;
397 
398 	if (!(regs.r[0] & (1<<31)))
399 		return 0;
400 
401 	regs.r[0] = 34;
402 	regs.r[1] = 29;
403 	if (_kernel_swi(OS_PlatformFeatures, &regs, &regs) != NULL)
404 		return 0;
405 
406 	return regs.r[0];
407 }
408 
409 #else
410 static int
CPU_haveARMSIMD(void)411 CPU_haveARMSIMD(void)
412 {
413 #warning SDL_HasARMSIMD is not implemented for this ARM platform. Write me.
414     return 0;
415 }
416 #endif
417 
418 #if defined(__LINUX__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
419 static int
readProcAuxvForNeon(void)420 readProcAuxvForNeon(void)
421 {
422     int neon = 0;
423     int kv[2];
424     const int fd = open("/proc/self/auxv", O_RDONLY);
425     if (fd != -1) {
426         while (read(fd, kv, sizeof (kv)) == sizeof (kv)) {
427             if (kv[0] == AT_HWCAP) {
428                 neon = ((kv[1] & HWCAP_NEON) == HWCAP_NEON);
429                 break;
430             }
431         }
432         close(fd);
433     }
434     return neon;
435 }
436 #endif
437 
438 static int
CPU_haveNEON(void)439 CPU_haveNEON(void)
440 {
441 /* The way you detect NEON is a privileged instruction on ARM, so you have
442    query the OS kernel in a platform-specific way. :/ */
443 #if defined(SDL_CPUINFO_DISABLED)
444    return 0; /* disabled */
445 #elif (defined(__WINDOWS__) || defined(__WINRT__)) && (defined(_M_ARM) || defined(_M_ARM64))
446 /* Visual Studio, for ARM, doesn't define __ARM_ARCH. Handle this first. */
447 /* Seems to have been removed */
448 #  if !defined(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)
449 #    define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
450 #  endif
451 /* All WinRT ARM devices are required to support NEON, but just in case. */
452     return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0;
453 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
454     return 1;  /* ARMv8 always has non-optional NEON support. */
455 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7)
456     /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */
457     return 1;  /* all Apple ARMv7 chips and later have NEON. */
458 #elif defined(__APPLE__)
459     return 0;  /* assume anything else from Apple doesn't have NEON. */
460 #elif defined(__OpenBSD__)
461     return 1;  /* OpenBSD only supports ARMv7 CPUs that have NEON. */
462 #elif !defined(__arm__)
463     return 0;  /* not an ARM CPU at all. */
464 #elif defined(__QNXNTO__)
465     return SYSPAGE_ENTRY(cpuinfo)->flags & ARM_CPU_FLAG_NEON;
466 #elif (defined(__LINUX__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL)
467     return ((getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON);
468 #elif defined(__LINUX__)
469     return readProcAuxvForNeon();
470 #elif defined(__ANDROID__)
471     /* Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo */
472     {
473         AndroidCpuFamily cpu_family = android_getCpuFamily();
474         if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
475             uint64_t cpu_features = android_getCpuFeatures();
476             if ((cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0) {
477                 return 1;
478             }
479         }
480         return 0;
481     }
482 #elif defined(__RISCOS__)
483     /* Use the VFPSupport_Features SWI to access the MVFR registers */
484     {
485         _kernel_swi_regs regs;
486 	regs.r[0] = 0;
487         if (_kernel_swi(VFPSupport_Features, &regs, &regs) == NULL) {
488             if ((regs.r[2] & 0xFFF000) == 0x111000) {
489                 return 1;
490             }
491         }
492         return 0;
493     }
494 #else
495 #warning SDL_HasNEON is not implemented for this ARM platform. Write me.
496     return 0;
497 #endif
498 }
499 
500 static int
CPU_have3DNow(void)501 CPU_have3DNow(void)
502 {
503     if (CPU_CPUIDMaxFunction > 0) {  /* that is, do we have CPUID at all? */
504         int a, b, c, d;
505         cpuid(0x80000000, a, b, c, d);
506         if (a >= 0x80000001) {
507             cpuid(0x80000001, a, b, c, d);
508             return (d & 0x80000000);
509         }
510     }
511     return 0;
512 }
513 
514 #define CPU_haveRDTSC() (CPU_CPUIDFeatures[3] & 0x00000010)
515 #define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000)
516 #define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000)
517 #define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000)
518 #define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001)
519 #define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000)
520 #define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000)
521 #define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000))
522 
523 static int
CPU_haveAVX2(void)524 CPU_haveAVX2(void)
525 {
526     if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) {
527         int a, b, c, d;
528         (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
529         cpuid(7, a, b, c, d);
530         return (b & 0x00000020);
531     }
532     return 0;
533 }
534 
535 static int
CPU_haveAVX512F(void)536 CPU_haveAVX512F(void)
537 {
538     if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
539         int a, b, c, d;
540         (void) a; (void) b; (void) c; (void) d;  /* compiler warnings... */
541         cpuid(7, a, b, c, d);
542         return (b & 0x00010000);
543     }
544     return 0;
545 }
546 
547 static int SDL_CPUCount = 0;
548 
549 int
SDL_GetCPUCount(void)550 SDL_GetCPUCount(void)
551 {
552     if (!SDL_CPUCount) {
553 #ifndef SDL_CPUINFO_DISABLED
554 #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
555         if (SDL_CPUCount <= 0) {
556             SDL_CPUCount = (int)sysconf(_SC_NPROCESSORS_ONLN);
557         }
558 #endif
559 #ifdef HAVE_SYSCTLBYNAME
560         if (SDL_CPUCount <= 0) {
561             size_t size = sizeof(SDL_CPUCount);
562             sysctlbyname("hw.ncpu", &SDL_CPUCount, &size, NULL, 0);
563         }
564 #endif
565 #ifdef __WIN32__
566         if (SDL_CPUCount <= 0) {
567             SYSTEM_INFO info;
568             GetSystemInfo(&info);
569             SDL_CPUCount = info.dwNumberOfProcessors;
570         }
571 #endif
572 #ifdef __OS2__
573         if (SDL_CPUCount <= 0) {
574             DosQuerySysInfo(QSV_NUMPROCESSORS, QSV_NUMPROCESSORS,
575                             &SDL_CPUCount, sizeof(SDL_CPUCount) );
576         }
577 #endif
578 #endif
579         /* There has to be at least 1, right? :) */
580         if (SDL_CPUCount <= 0) {
581             SDL_CPUCount = 1;
582         }
583     }
584     return SDL_CPUCount;
585 }
586 
587 /* Oh, such a sweet sweet trick, just not very useful. :) */
588 static const char *
SDL_GetCPUType(void)589 SDL_GetCPUType(void)
590 {
591     static char SDL_CPUType[13];
592 
593     if (!SDL_CPUType[0]) {
594         int i = 0;
595 
596         CPU_calcCPUIDFeatures();
597         if (CPU_CPUIDMaxFunction > 0) {  /* do we have CPUID at all? */
598             int a, b, c, d;
599             cpuid(0x00000000, a, b, c, d);
600             (void) a;
601             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
602             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
603             SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
604             SDL_CPUType[i++] = (char)(b & 0xff);
605 
606             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
607             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
608             SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
609             SDL_CPUType[i++] = (char)(d & 0xff);
610 
611             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
612             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
613             SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
614             SDL_CPUType[i++] = (char)(c & 0xff);
615         }
616         if (!SDL_CPUType[0]) {
617             SDL_strlcpy(SDL_CPUType, "Unknown", sizeof(SDL_CPUType));
618         }
619     }
620     return SDL_CPUType;
621 }
622 
623 
624 #ifdef TEST_MAIN  /* !!! FIXME: only used for test at the moment. */
625 static const char *
SDL_GetCPUName(void)626 SDL_GetCPUName(void)
627 {
628     static char SDL_CPUName[48];
629 
630     if (!SDL_CPUName[0]) {
631         int i = 0;
632         int a, b, c, d;
633 
634         CPU_calcCPUIDFeatures();
635         if (CPU_CPUIDMaxFunction > 0) {  /* do we have CPUID at all? */
636             cpuid(0x80000000, a, b, c, d);
637             if (a >= 0x80000004) {
638                 cpuid(0x80000002, a, b, c, d);
639                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
640                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
641                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
642                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
643                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
644                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
645                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
646                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
647                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
648                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
649                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
650                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
651                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
652                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
653                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
654                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
655                 cpuid(0x80000003, a, b, c, d);
656                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
657                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
658                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
659                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
660                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
661                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
662                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
663                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
664                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
665                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
666                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
667                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
668                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
669                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
670                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
671                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
672                 cpuid(0x80000004, a, b, c, d);
673                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
674                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
675                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
676                 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
677                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
678                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
679                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
680                 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
681                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
682                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
683                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
684                 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
685                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
686                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
687                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
688                 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
689             }
690         }
691         if (!SDL_CPUName[0]) {
692             SDL_strlcpy(SDL_CPUName, "Unknown", sizeof(SDL_CPUName));
693         }
694     }
695     return SDL_CPUName;
696 }
697 #endif
698 
699 int
SDL_GetCPUCacheLineSize(void)700 SDL_GetCPUCacheLineSize(void)
701 {
702     const char *cpuType = SDL_GetCPUType();
703     int a, b, c, d;
704     (void) a; (void) b; (void) c; (void) d;
705     if (SDL_strcmp(cpuType, "GenuineIntel") == 0) {
706         cpuid(0x00000001, a, b, c, d);
707         return (((b >> 8) & 0xff) * 8);
708     } else if (SDL_strcmp(cpuType, "AuthenticAMD") == 0 || SDL_strcmp(cpuType, "HygonGenuine") == 0) {
709         cpuid(0x80000005, a, b, c, d);
710         return (c & 0xff);
711     } else {
712         /* Just make a guess here... */
713         return SDL_CACHELINE_SIZE;
714     }
715 }
716 
717 static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
718 static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
719 
720 static Uint32
SDL_GetCPUFeatures(void)721 SDL_GetCPUFeatures(void)
722 {
723     if (SDL_CPUFeatures == 0xFFFFFFFF) {
724         CPU_calcCPUIDFeatures();
725         SDL_CPUFeatures = 0;
726         SDL_SIMDAlignment = sizeof(void *);  /* a good safe base value */
727         if (CPU_haveRDTSC()) {
728             SDL_CPUFeatures |= CPU_HAS_RDTSC;
729         }
730         if (CPU_haveAltiVec()) {
731             SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
732             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
733         }
734         if (CPU_haveMMX()) {
735             SDL_CPUFeatures |= CPU_HAS_MMX;
736             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
737         }
738         if (CPU_have3DNow()) {
739             SDL_CPUFeatures |= CPU_HAS_3DNOW;
740             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
741         }
742         if (CPU_haveSSE()) {
743             SDL_CPUFeatures |= CPU_HAS_SSE;
744             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
745         }
746         if (CPU_haveSSE2()) {
747             SDL_CPUFeatures |= CPU_HAS_SSE2;
748             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
749         }
750         if (CPU_haveSSE3()) {
751             SDL_CPUFeatures |= CPU_HAS_SSE3;
752             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
753         }
754         if (CPU_haveSSE41()) {
755             SDL_CPUFeatures |= CPU_HAS_SSE41;
756             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
757         }
758         if (CPU_haveSSE42()) {
759             SDL_CPUFeatures |= CPU_HAS_SSE42;
760             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
761         }
762         if (CPU_haveAVX()) {
763             SDL_CPUFeatures |= CPU_HAS_AVX;
764             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
765         }
766         if (CPU_haveAVX2()) {
767             SDL_CPUFeatures |= CPU_HAS_AVX2;
768             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
769         }
770         if (CPU_haveAVX512F()) {
771             SDL_CPUFeatures |= CPU_HAS_AVX512F;
772             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
773         }
774         if (CPU_haveARMSIMD()) {
775             SDL_CPUFeatures |= CPU_HAS_ARM_SIMD;
776             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
777         }
778         if (CPU_haveNEON()) {
779             SDL_CPUFeatures |= CPU_HAS_NEON;
780             SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
781         }
782     }
783     return SDL_CPUFeatures;
784 }
785 
786 #define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & f) ? SDL_TRUE : SDL_FALSE)
787 
SDL_HasRDTSC(void)788 SDL_bool SDL_HasRDTSC(void)
789 {
790     return CPU_FEATURE_AVAILABLE(CPU_HAS_RDTSC);
791 }
792 
793 SDL_bool
SDL_HasAltiVec(void)794 SDL_HasAltiVec(void)
795 {
796     return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC);
797 }
798 
799 SDL_bool
SDL_HasMMX(void)800 SDL_HasMMX(void)
801 {
802     return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX);
803 }
804 
805 SDL_bool
SDL_Has3DNow(void)806 SDL_Has3DNow(void)
807 {
808     return CPU_FEATURE_AVAILABLE(CPU_HAS_3DNOW);
809 }
810 
811 SDL_bool
SDL_HasSSE(void)812 SDL_HasSSE(void)
813 {
814     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE);
815 }
816 
817 SDL_bool
SDL_HasSSE2(void)818 SDL_HasSSE2(void)
819 {
820     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2);
821 }
822 
823 SDL_bool
SDL_HasSSE3(void)824 SDL_HasSSE3(void)
825 {
826     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3);
827 }
828 
829 SDL_bool
SDL_HasSSE41(void)830 SDL_HasSSE41(void)
831 {
832     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41);
833 }
834 
835 SDL_bool
SDL_HasSSE42(void)836 SDL_HasSSE42(void)
837 {
838     return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42);
839 }
840 
841 SDL_bool
SDL_HasAVX(void)842 SDL_HasAVX(void)
843 {
844     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX);
845 }
846 
847 SDL_bool
SDL_HasAVX2(void)848 SDL_HasAVX2(void)
849 {
850     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2);
851 }
852 
853 SDL_bool
SDL_HasAVX512F(void)854 SDL_HasAVX512F(void)
855 {
856     return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
857 }
858 
859 SDL_bool
SDL_HasARMSIMD(void)860 SDL_HasARMSIMD(void)
861 {
862     return CPU_FEATURE_AVAILABLE(CPU_HAS_ARM_SIMD);
863 }
864 
865 SDL_bool
SDL_HasNEON(void)866 SDL_HasNEON(void)
867 {
868     return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
869 }
870 
871 static int SDL_SystemRAM = 0;
872 
873 int
SDL_GetSystemRAM(void)874 SDL_GetSystemRAM(void)
875 {
876     if (!SDL_SystemRAM) {
877 #ifndef SDL_CPUINFO_DISABLED
878 #if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
879         if (SDL_SystemRAM <= 0) {
880             SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024*1024));
881         }
882 #endif
883 #ifdef HAVE_SYSCTLBYNAME
884         if (SDL_SystemRAM <= 0) {
885 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__)
886 #ifdef HW_REALMEM
887             int mib[2] = {CTL_HW, HW_REALMEM};
888 #else
889             /* might only report up to 2 GiB */
890             int mib[2] = {CTL_HW, HW_PHYSMEM};
891 #endif /* HW_REALMEM */
892 #else
893             int mib[2] = {CTL_HW, HW_MEMSIZE};
894 #endif /* __FreeBSD__ || __FreeBSD_kernel__ */
895             Uint64 memsize = 0;
896             size_t len = sizeof(memsize);
897 
898             if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) {
899                 SDL_SystemRAM = (int)(memsize / (1024*1024));
900             }
901         }
902 #endif
903 #ifdef __WIN32__
904         if (SDL_SystemRAM <= 0) {
905             MEMORYSTATUSEX stat;
906             stat.dwLength = sizeof(stat);
907             if (GlobalMemoryStatusEx(&stat)) {
908                 SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024));
909             }
910         }
911 #endif
912 #ifdef __OS2__
913         if (SDL_SystemRAM <= 0) {
914             Uint32 sysram = 0;
915             DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, &sysram, 4);
916             SDL_SystemRAM = (int) (sysram / 0x100000U);
917         }
918 #endif
919 #ifdef __RISCOS__
920         if (SDL_SystemRAM <= 0) {
921             _kernel_swi_regs regs;
922             regs.r[0] = 0x108;
923             if (_kernel_swi(OS_Memory, &regs, &regs) == NULL) {
924                 SDL_SystemRAM = (int)(regs.r[1] * regs.r[2] / (1024 * 1024));
925             }
926         }
927 #endif
928 #endif
929     }
930     return SDL_SystemRAM;
931 }
932 
933 
934 size_t
SDL_SIMDGetAlignment(void)935 SDL_SIMDGetAlignment(void)
936 {
937     if (SDL_SIMDAlignment == 0xFFFFFFFF) {
938         SDL_GetCPUFeatures();  /* make sure this has been calculated */
939     }
940     SDL_assert(SDL_SIMDAlignment != 0);
941     return SDL_SIMDAlignment;
942 }
943 
944 void *
SDL_SIMDAlloc(const size_t len)945 SDL_SIMDAlloc(const size_t len)
946 {
947     const size_t alignment = SDL_SIMDGetAlignment();
948     const size_t padding = alignment - (len % alignment);
949     const size_t padded = (padding != alignment) ? (len + padding) : len;
950     Uint8 *retval = NULL;
951     Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *));
952     if (ptr) {
953         /* store the actual malloc pointer right before our aligned pointer. */
954         retval = ptr + sizeof (void *);
955         retval += alignment - (((size_t) retval) % alignment);
956         *(((void **) retval) - 1) = ptr;
957     }
958     return retval;
959 }
960 
961 void *
SDL_SIMDRealloc(void * mem,const size_t len)962 SDL_SIMDRealloc(void *mem, const size_t len)
963 {
964     const size_t alignment = SDL_SIMDGetAlignment();
965     const size_t padding = alignment - (len % alignment);
966     const size_t padded = (padding != alignment) ? (len + padding) : len;
967     Uint8 *retval = (Uint8*) mem;
968     void *oldmem = mem;
969     size_t memdiff, ptrdiff;
970     Uint8 *ptr;
971 
972     if (mem) {
973         void **realptr = (void **) mem;
974         realptr--;
975         mem = *(((void **) mem) - 1);
976 
977         /* Check the delta between the real pointer and user pointer */
978         memdiff = ((size_t) oldmem) - ((size_t) mem);
979     }
980 
981     ptr = (Uint8 *) SDL_realloc(mem, padded + alignment + sizeof (void *));
982 
983     if (ptr == mem) {
984         return retval; /* Pointer didn't change, nothing to do */
985     }
986     if (ptr == NULL) {
987         return NULL; /* Out of memory, bail! */
988     }
989 
990     /* Store the actual malloc pointer right before our aligned pointer. */
991     retval = ptr + sizeof (void *);
992     retval += alignment - (((size_t) retval) % alignment);
993 
994     /* Make sure the delta is the same! */
995     if (mem) {
996         ptrdiff = ((size_t) retval) - ((size_t) ptr);
997         if (memdiff != ptrdiff) { /* Delta has changed, copy to new offset! */
998             oldmem = (void*) (((size_t) ptr) + memdiff);
999 
1000             /* Even though the data past the old `len` is undefined, this is the
1001              * only length value we have, and it guarantees that we copy all the
1002              * previous memory anyhow.
1003              */
1004             SDL_memmove(retval, oldmem, len);
1005         }
1006     }
1007 
1008     /* Actually store the malloc pointer, finally. */
1009     *(((void **) retval) - 1) = ptr;
1010     return retval;
1011 }
1012 
1013 void
SDL_SIMDFree(void * ptr)1014 SDL_SIMDFree(void *ptr)
1015 {
1016     if (ptr) {
1017         void **realptr = (void **) ptr;
1018         realptr--;
1019         SDL_free(*(((void **) ptr) - 1));
1020     }
1021 }
1022 
1023 
1024 #ifdef TEST_MAIN
1025 
1026 #include <stdio.h>
1027 
1028 int
main()1029 main()
1030 {
1031     printf("CPU count: %d\n", SDL_GetCPUCount());
1032     printf("CPU type: %s\n", SDL_GetCPUType());
1033     printf("CPU name: %s\n", SDL_GetCPUName());
1034     printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
1035     printf("RDTSC: %d\n", SDL_HasRDTSC());
1036     printf("Altivec: %d\n", SDL_HasAltiVec());
1037     printf("MMX: %d\n", SDL_HasMMX());
1038     printf("3DNow: %d\n", SDL_Has3DNow());
1039     printf("SSE: %d\n", SDL_HasSSE());
1040     printf("SSE2: %d\n", SDL_HasSSE2());
1041     printf("SSE3: %d\n", SDL_HasSSE3());
1042     printf("SSE4.1: %d\n", SDL_HasSSE41());
1043     printf("SSE4.2: %d\n", SDL_HasSSE42());
1044     printf("AVX: %d\n", SDL_HasAVX());
1045     printf("AVX2: %d\n", SDL_HasAVX2());
1046     printf("AVX-512F: %d\n", SDL_HasAVX512F());
1047     printf("ARM SIMD: %d\n", SDL_HasARMSIMD());
1048     printf("NEON: %d\n", SDL_HasNEON());
1049     printf("RAM: %d MB\n", SDL_GetSystemRAM());
1050     return 0;
1051 }
1052 
1053 #endif /* TEST_MAIN */
1054 
1055 /* vi: set ts=4 sw=4 expandtab: */
1056