1 /*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2020 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20 */
21 #ifdef TEST_MAIN
22 #include "SDL_config.h"
23 #else
24 #include "../SDL_internal.h"
25 #endif
26
27 #if defined(__WIN32__) || defined(__WINRT__)
28 #include "../core/windows/SDL_windows.h"
29 #endif
30 #if defined(__OS2__)
31 #define INCL_DOS
32 #include <os2.h>
33 #ifndef QSV_NUMPROCESSORS
34 #define QSV_NUMPROCESSORS 26
35 #endif
36 #endif
37
38 /* CPU feature detection for SDL */
39
40 #include "SDL_cpuinfo.h"
41 #include "SDL_assert.h"
42
43 #ifdef HAVE_SYSCONF
44 #include <unistd.h>
45 #endif
46 #ifdef HAVE_SYSCTLBYNAME
47 #include <sys/types.h>
48 #include <sys/sysctl.h>
49 #endif
50 #if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
51 #include <sys/sysctl.h> /* For AltiVec check */
52 #elif defined(__OpenBSD__) && defined(__powerpc__)
53 #include <sys/param.h>
54 #include <sys/sysctl.h> /* For AltiVec check */
55 #include <machine/cpu.h>
56 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
57 #include <signal.h>
58 #include <setjmp.h>
59 #endif
60
61 #if defined(__QNXNTO__)
62 #include <sys/syspage.h>
63 #endif
64
65 #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__ARM_ARCH)
66 /*#include <asm/hwcap.h>*/
67 #ifndef AT_HWCAP
68 #define AT_HWCAP 16
69 #endif
70 #ifndef AT_PLATFORM
71 #define AT_PLATFORM 15
72 #endif
73 /* Prevent compilation error when including elf.h would also try to define AT_* as an enum */
74 #ifndef AT_NULL
75 #define AT_NULL 0
76 #endif
77 #ifndef HWCAP_NEON
78 #define HWCAP_NEON (1 << 12)
79 #endif
80 #if defined HAVE_GETAUXVAL
81 #include <sys/auxv.h>
82 #else
83 #include <fcntl.h>
84 #endif
85 #endif
86
87 #if defined(__ANDROID__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
88 #if __ARM_ARCH < 8
89 #include <cpu-features.h>
90 #endif
91 #endif
92
93 #ifdef __RISCOS__
94 #include <kernel.h>
95 #include <swis.h>
96 #endif
97
98 #define CPU_HAS_RDTSC (1 << 0)
99 #define CPU_HAS_ALTIVEC (1 << 1)
100 #define CPU_HAS_MMX (1 << 2)
101 #define CPU_HAS_3DNOW (1 << 3)
102 #define CPU_HAS_SSE (1 << 4)
103 #define CPU_HAS_SSE2 (1 << 5)
104 #define CPU_HAS_SSE3 (1 << 6)
105 #define CPU_HAS_SSE41 (1 << 7)
106 #define CPU_HAS_SSE42 (1 << 8)
107 #define CPU_HAS_AVX (1 << 9)
108 #define CPU_HAS_AVX2 (1 << 10)
109 #define CPU_HAS_NEON (1 << 11)
110 #define CPU_HAS_AVX512F (1 << 12)
111 #define CPU_HAS_ARM_SIMD (1 << 13)
112
113 #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__
114 /* This is the brute force way of detecting instruction sets...
115 the idea is borrowed from the libmpeg2 library - thanks!
116 */
117 static jmp_buf jmpbuf;
118 static void
illegal_instruction(int sig)119 illegal_instruction(int sig)
120 {
121 longjmp(jmpbuf, 1);
122 }
123 #endif /* HAVE_SETJMP */
124
125 static int
CPU_haveCPUID(void)126 CPU_haveCPUID(void)
127 {
128 int has_CPUID = 0;
129
130 /* *INDENT-OFF* */
131 #ifndef SDL_CPUINFO_DISABLED
132 #if defined(__GNUC__) && defined(i386)
133 __asm__ (
134 " pushfl # Get original EFLAGS \n"
135 " popl %%eax \n"
136 " movl %%eax,%%ecx \n"
137 " xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n"
138 " pushl %%eax # Save new EFLAGS value on stack \n"
139 " popfl # Replace current EFLAGS value \n"
140 " pushfl # Get new EFLAGS \n"
141 " popl %%eax # Store new EFLAGS in EAX \n"
142 " xorl %%ecx,%%eax # Can not toggle ID bit, \n"
143 " jz 1f # Processor=80486 \n"
144 " movl $1,%0 # We have CPUID support \n"
145 "1: \n"
146 : "=m" (has_CPUID)
147 :
148 : "%eax", "%ecx"
149 );
150 #elif defined(__GNUC__) && defined(__x86_64__)
151 /* Technically, if this is being compiled under __x86_64__ then it has
152 CPUid by definition. But it's nice to be able to prove it. :) */
153 __asm__ (
154 " pushfq # Get original EFLAGS \n"
155 " popq %%rax \n"
156 " movq %%rax,%%rcx \n"
157 " xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n"
158 " pushq %%rax # Save new EFLAGS value on stack \n"
159 " popfq # Replace current EFLAGS value \n"
160 " pushfq # Get new EFLAGS \n"
161 " popq %%rax # Store new EFLAGS in EAX \n"
162 " xorl %%ecx,%%eax # Can not toggle ID bit, \n"
163 " jz 1f # Processor=80486 \n"
164 " movl $1,%0 # We have CPUID support \n"
165 "1: \n"
166 : "=m" (has_CPUID)
167 :
168 : "%rax", "%rcx"
169 );
170 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
171 __asm {
172 pushfd ; Get original EFLAGS
173 pop eax
174 mov ecx, eax
175 xor eax, 200000h ; Flip ID bit in EFLAGS
176 push eax ; Save new EFLAGS value on stack
177 popfd ; Replace current EFLAGS value
178 pushfd ; Get new EFLAGS
179 pop eax ; Store new EFLAGS in EAX
180 xor eax, ecx ; Can not toggle ID bit,
181 jz done ; Processor=80486
182 mov has_CPUID,1 ; We have CPUID support
183 done:
184 }
185 #elif defined(_MSC_VER) && defined(_M_X64)
186 has_CPUID = 1;
187 #elif defined(__sun) && defined(__i386)
188 __asm (
189 " pushfl \n"
190 " popl %eax \n"
191 " movl %eax,%ecx \n"
192 " xorl $0x200000,%eax \n"
193 " pushl %eax \n"
194 " popfl \n"
195 " pushfl \n"
196 " popl %eax \n"
197 " xorl %ecx,%eax \n"
198 " jz 1f \n"
199 " movl $1,-8(%ebp) \n"
200 "1: \n"
201 );
202 #elif defined(__sun) && defined(__amd64)
203 __asm (
204 " pushfq \n"
205 " popq %rax \n"
206 " movq %rax,%rcx \n"
207 " xorl $0x200000,%eax \n"
208 " pushq %rax \n"
209 " popfq \n"
210 " pushfq \n"
211 " popq %rax \n"
212 " xorl %ecx,%eax \n"
213 " jz 1f \n"
214 " movl $1,-8(%rbp) \n"
215 "1: \n"
216 );
217 #endif
218 #endif
219 /* *INDENT-ON* */
220 return has_CPUID;
221 }
222
223 #if defined(__GNUC__) && defined(i386)
224 #define cpuid(func, a, b, c, d) \
225 __asm__ __volatile__ ( \
226 " pushl %%ebx \n" \
227 " xorl %%ecx,%%ecx \n" \
228 " cpuid \n" \
229 " movl %%ebx, %%esi \n" \
230 " popl %%ebx \n" : \
231 "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
232 #elif defined(__GNUC__) && defined(__x86_64__)
233 #define cpuid(func, a, b, c, d) \
234 __asm__ __volatile__ ( \
235 " pushq %%rbx \n" \
236 " xorq %%rcx,%%rcx \n" \
237 " cpuid \n" \
238 " movq %%rbx, %%rsi \n" \
239 " popq %%rbx \n" : \
240 "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
241 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
242 #define cpuid(func, a, b, c, d) \
243 __asm { \
244 __asm mov eax, func \
245 __asm xor ecx, ecx \
246 __asm cpuid \
247 __asm mov a, eax \
248 __asm mov b, ebx \
249 __asm mov c, ecx \
250 __asm mov d, edx \
251 }
252 #elif defined(_MSC_VER) && defined(_M_X64)
253 #define cpuid(func, a, b, c, d) \
254 { \
255 int CPUInfo[4]; \
256 __cpuid(CPUInfo, func); \
257 a = CPUInfo[0]; \
258 b = CPUInfo[1]; \
259 c = CPUInfo[2]; \
260 d = CPUInfo[3]; \
261 }
262 #else
263 #define cpuid(func, a, b, c, d) \
264 do { a = b = c = d = 0; (void) a; (void) b; (void) c; (void) d; } while (0)
265 #endif
266
267 static int CPU_CPUIDFeatures[4];
268 static int CPU_CPUIDMaxFunction = 0;
269 static SDL_bool CPU_OSSavesYMM = SDL_FALSE;
270 static SDL_bool CPU_OSSavesZMM = SDL_FALSE;
271
272 static void
CPU_calcCPUIDFeatures(void)273 CPU_calcCPUIDFeatures(void)
274 {
275 static SDL_bool checked = SDL_FALSE;
276 if (!checked) {
277 checked = SDL_TRUE;
278 if (CPU_haveCPUID()) {
279 int a, b, c, d;
280 cpuid(0, a, b, c, d);
281 CPU_CPUIDMaxFunction = a;
282 if (CPU_CPUIDMaxFunction >= 1) {
283 cpuid(1, a, b, c, d);
284 CPU_CPUIDFeatures[0] = a;
285 CPU_CPUIDFeatures[1] = b;
286 CPU_CPUIDFeatures[2] = c;
287 CPU_CPUIDFeatures[3] = d;
288
289 /* Check to make sure we can call xgetbv */
290 if (c & 0x08000000) {
291 /* Call xgetbv to see if YMM (etc) register state is saved */
292 #if defined(__GNUC__) && (defined(i386) || defined(__x86_64__))
293 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
294 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
295 a = (int)_xgetbv(0);
296 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
297 __asm
298 {
299 xor ecx, ecx
300 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
301 mov a, eax
302 }
303 #endif
304 CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
305 CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE;
306 }
307 }
308 }
309 }
310 }
311
312 static int
CPU_haveAltiVec(void)313 CPU_haveAltiVec(void)
314 {
315 volatile int altivec = 0;
316 #ifndef SDL_CPUINFO_DISABLED
317 #if (defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__))
318 #ifdef __OpenBSD__
319 int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
320 #else
321 int selectors[2] = { CTL_HW, HW_VECTORUNIT };
322 #endif
323 int hasVectorUnit = 0;
324 size_t length = sizeof(hasVectorUnit);
325 int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
326 if (0 == error)
327 altivec = (hasVectorUnit != 0);
328 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
329 void (*handler) (int sig);
330 handler = signal(SIGILL, illegal_instruction);
331 if (setjmp(jmpbuf) == 0) {
332 asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
333 altivec = 1;
334 }
335 signal(SIGILL, handler);
336 #endif
337 #endif
338 return altivec;
339 }
340
341 #if defined(__ARM_ARCH) && (__ARM_ARCH >= 6)
342 static int
CPU_haveARMSIMD(void)343 CPU_haveARMSIMD(void)
344 {
345 return 1;
346 }
347
348 #elif !defined(__arm__)
349 static int
CPU_haveARMSIMD(void)350 CPU_haveARMSIMD(void)
351 {
352 return 0;
353 }
354
355 #elif defined(__LINUX__)
356 #include <unistd.h>
357 #include <sys/types.h>
358 #include <sys/stat.h>
359 #include <fcntl.h>
360 #include <elf.h>
361
362 static int
CPU_haveARMSIMD(void)363 CPU_haveARMSIMD(void)
364 {
365 int arm_simd = 0;
366 int fd;
367
368 fd = open("/proc/self/auxv", O_RDONLY);
369 if (fd >= 0)
370 {
371 Elf32_auxv_t aux;
372 while (read(fd, &aux, sizeof aux) == sizeof aux)
373 {
374 if (aux.a_type == AT_PLATFORM)
375 {
376 const char *plat = (const char *) aux.a_un.a_val;
377 if (plat) {
378 arm_simd = strncmp(plat, "v6l", 3) == 0 ||
379 strncmp(plat, "v7l", 3) == 0;
380 }
381 }
382 }
383 close(fd);
384 }
385 return arm_simd;
386 }
387
388 #elif defined(__RISCOS__)
389
390 static int
CPU_haveARMSIMD(void)391 CPU_haveARMSIMD(void)
392 {
393 _kernel_swi_regs regs;
394 regs.r[0] = 0;
395 if (_kernel_swi(OS_PlatformFeatures, ®s, ®s) != NULL)
396 return 0;
397
398 if (!(regs.r[0] & (1<<31)))
399 return 0;
400
401 regs.r[0] = 34;
402 regs.r[1] = 29;
403 if (_kernel_swi(OS_PlatformFeatures, ®s, ®s) != NULL)
404 return 0;
405
406 return regs.r[0];
407 }
408
409 #else
410 static int
CPU_haveARMSIMD(void)411 CPU_haveARMSIMD(void)
412 {
413 #warning SDL_HasARMSIMD is not implemented for this ARM platform. Write me.
414 return 0;
415 }
416 #endif
417
418 #if defined(__LINUX__) && defined(__ARM_ARCH) && !defined(HAVE_GETAUXVAL)
419 static int
readProcAuxvForNeon(void)420 readProcAuxvForNeon(void)
421 {
422 int neon = 0;
423 int kv[2];
424 const int fd = open("/proc/self/auxv", O_RDONLY);
425 if (fd != -1) {
426 while (read(fd, kv, sizeof (kv)) == sizeof (kv)) {
427 if (kv[0] == AT_HWCAP) {
428 neon = ((kv[1] & HWCAP_NEON) == HWCAP_NEON);
429 break;
430 }
431 }
432 close(fd);
433 }
434 return neon;
435 }
436 #endif
437
438 static int
CPU_haveNEON(void)439 CPU_haveNEON(void)
440 {
441 /* The way you detect NEON is a privileged instruction on ARM, so you have
442 query the OS kernel in a platform-specific way. :/ */
443 #if defined(SDL_CPUINFO_DISABLED)
444 return 0; /* disabled */
445 #elif (defined(__WINDOWS__) || defined(__WINRT__)) && (defined(_M_ARM) || defined(_M_ARM64))
446 /* Visual Studio, for ARM, doesn't define __ARM_ARCH. Handle this first. */
447 /* Seems to have been removed */
448 # if !defined(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)
449 # define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
450 # endif
451 /* All WinRT ARM devices are required to support NEON, but just in case. */
452 return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0;
453 #elif defined(__ARM_ARCH) && (__ARM_ARCH >= 8)
454 return 1; /* ARMv8 always has non-optional NEON support. */
455 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7)
456 /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */
457 return 1; /* all Apple ARMv7 chips and later have NEON. */
458 #elif defined(__APPLE__)
459 return 0; /* assume anything else from Apple doesn't have NEON. */
460 #elif defined(__OpenBSD__)
461 return 1; /* OpenBSD only supports ARMv7 CPUs that have NEON. */
462 #elif !defined(__arm__)
463 return 0; /* not an ARM CPU at all. */
464 #elif defined(__QNXNTO__)
465 return SYSPAGE_ENTRY(cpuinfo)->flags & ARM_CPU_FLAG_NEON;
466 #elif (defined(__LINUX__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL)
467 return ((getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON);
468 #elif defined(__LINUX__)
469 return readProcAuxvForNeon();
470 #elif defined(__ANDROID__)
471 /* Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo */
472 {
473 AndroidCpuFamily cpu_family = android_getCpuFamily();
474 if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
475 uint64_t cpu_features = android_getCpuFeatures();
476 if ((cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0) {
477 return 1;
478 }
479 }
480 return 0;
481 }
482 #elif defined(__RISCOS__)
483 /* Use the VFPSupport_Features SWI to access the MVFR registers */
484 {
485 _kernel_swi_regs regs;
486 regs.r[0] = 0;
487 if (_kernel_swi(VFPSupport_Features, ®s, ®s) == NULL) {
488 if ((regs.r[2] & 0xFFF000) == 0x111000) {
489 return 1;
490 }
491 }
492 return 0;
493 }
494 #else
495 #warning SDL_HasNEON is not implemented for this ARM platform. Write me.
496 return 0;
497 #endif
498 }
499
500 static int
CPU_have3DNow(void)501 CPU_have3DNow(void)
502 {
503 if (CPU_CPUIDMaxFunction > 0) { /* that is, do we have CPUID at all? */
504 int a, b, c, d;
505 cpuid(0x80000000, a, b, c, d);
506 if (a >= 0x80000001) {
507 cpuid(0x80000001, a, b, c, d);
508 return (d & 0x80000000);
509 }
510 }
511 return 0;
512 }
513
514 #define CPU_haveRDTSC() (CPU_CPUIDFeatures[3] & 0x00000010)
515 #define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000)
516 #define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000)
517 #define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000)
518 #define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001)
519 #define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000)
520 #define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000)
521 #define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000))
522
523 static int
CPU_haveAVX2(void)524 CPU_haveAVX2(void)
525 {
526 if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) {
527 int a, b, c, d;
528 (void) a; (void) b; (void) c; (void) d; /* compiler warnings... */
529 cpuid(7, a, b, c, d);
530 return (b & 0x00000020);
531 }
532 return 0;
533 }
534
535 static int
CPU_haveAVX512F(void)536 CPU_haveAVX512F(void)
537 {
538 if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
539 int a, b, c, d;
540 (void) a; (void) b; (void) c; (void) d; /* compiler warnings... */
541 cpuid(7, a, b, c, d);
542 return (b & 0x00010000);
543 }
544 return 0;
545 }
546
547 static int SDL_CPUCount = 0;
548
549 int
SDL_GetCPUCount(void)550 SDL_GetCPUCount(void)
551 {
552 if (!SDL_CPUCount) {
553 #ifndef SDL_CPUINFO_DISABLED
554 #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
555 if (SDL_CPUCount <= 0) {
556 SDL_CPUCount = (int)sysconf(_SC_NPROCESSORS_ONLN);
557 }
558 #endif
559 #ifdef HAVE_SYSCTLBYNAME
560 if (SDL_CPUCount <= 0) {
561 size_t size = sizeof(SDL_CPUCount);
562 sysctlbyname("hw.ncpu", &SDL_CPUCount, &size, NULL, 0);
563 }
564 #endif
565 #ifdef __WIN32__
566 if (SDL_CPUCount <= 0) {
567 SYSTEM_INFO info;
568 GetSystemInfo(&info);
569 SDL_CPUCount = info.dwNumberOfProcessors;
570 }
571 #endif
572 #ifdef __OS2__
573 if (SDL_CPUCount <= 0) {
574 DosQuerySysInfo(QSV_NUMPROCESSORS, QSV_NUMPROCESSORS,
575 &SDL_CPUCount, sizeof(SDL_CPUCount) );
576 }
577 #endif
578 #endif
579 /* There has to be at least 1, right? :) */
580 if (SDL_CPUCount <= 0) {
581 SDL_CPUCount = 1;
582 }
583 }
584 return SDL_CPUCount;
585 }
586
587 /* Oh, such a sweet sweet trick, just not very useful. :) */
588 static const char *
SDL_GetCPUType(void)589 SDL_GetCPUType(void)
590 {
591 static char SDL_CPUType[13];
592
593 if (!SDL_CPUType[0]) {
594 int i = 0;
595
596 CPU_calcCPUIDFeatures();
597 if (CPU_CPUIDMaxFunction > 0) { /* do we have CPUID at all? */
598 int a, b, c, d;
599 cpuid(0x00000000, a, b, c, d);
600 (void) a;
601 SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
602 SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
603 SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
604 SDL_CPUType[i++] = (char)(b & 0xff);
605
606 SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
607 SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
608 SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
609 SDL_CPUType[i++] = (char)(d & 0xff);
610
611 SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
612 SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
613 SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
614 SDL_CPUType[i++] = (char)(c & 0xff);
615 }
616 if (!SDL_CPUType[0]) {
617 SDL_strlcpy(SDL_CPUType, "Unknown", sizeof(SDL_CPUType));
618 }
619 }
620 return SDL_CPUType;
621 }
622
623
624 #ifdef TEST_MAIN /* !!! FIXME: only used for test at the moment. */
625 static const char *
SDL_GetCPUName(void)626 SDL_GetCPUName(void)
627 {
628 static char SDL_CPUName[48];
629
630 if (!SDL_CPUName[0]) {
631 int i = 0;
632 int a, b, c, d;
633
634 CPU_calcCPUIDFeatures();
635 if (CPU_CPUIDMaxFunction > 0) { /* do we have CPUID at all? */
636 cpuid(0x80000000, a, b, c, d);
637 if (a >= 0x80000004) {
638 cpuid(0x80000002, a, b, c, d);
639 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
640 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
641 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
642 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
643 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
644 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
645 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
646 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
647 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
648 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
649 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
650 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
651 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
652 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
653 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
654 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
655 cpuid(0x80000003, a, b, c, d);
656 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
657 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
658 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
659 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
660 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
661 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
662 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
663 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
664 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
665 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
666 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
667 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
668 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
669 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
670 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
671 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
672 cpuid(0x80000004, a, b, c, d);
673 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
674 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
675 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
676 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
677 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
678 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
679 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
680 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
681 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
682 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
683 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
684 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
685 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
686 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
687 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
688 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
689 }
690 }
691 if (!SDL_CPUName[0]) {
692 SDL_strlcpy(SDL_CPUName, "Unknown", sizeof(SDL_CPUName));
693 }
694 }
695 return SDL_CPUName;
696 }
697 #endif
698
699 int
SDL_GetCPUCacheLineSize(void)700 SDL_GetCPUCacheLineSize(void)
701 {
702 const char *cpuType = SDL_GetCPUType();
703 int a, b, c, d;
704 (void) a; (void) b; (void) c; (void) d;
705 if (SDL_strcmp(cpuType, "GenuineIntel") == 0) {
706 cpuid(0x00000001, a, b, c, d);
707 return (((b >> 8) & 0xff) * 8);
708 } else if (SDL_strcmp(cpuType, "AuthenticAMD") == 0 || SDL_strcmp(cpuType, "HygonGenuine") == 0) {
709 cpuid(0x80000005, a, b, c, d);
710 return (c & 0xff);
711 } else {
712 /* Just make a guess here... */
713 return SDL_CACHELINE_SIZE;
714 }
715 }
716
717 static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
718 static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
719
720 static Uint32
SDL_GetCPUFeatures(void)721 SDL_GetCPUFeatures(void)
722 {
723 if (SDL_CPUFeatures == 0xFFFFFFFF) {
724 CPU_calcCPUIDFeatures();
725 SDL_CPUFeatures = 0;
726 SDL_SIMDAlignment = sizeof(void *); /* a good safe base value */
727 if (CPU_haveRDTSC()) {
728 SDL_CPUFeatures |= CPU_HAS_RDTSC;
729 }
730 if (CPU_haveAltiVec()) {
731 SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
732 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
733 }
734 if (CPU_haveMMX()) {
735 SDL_CPUFeatures |= CPU_HAS_MMX;
736 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
737 }
738 if (CPU_have3DNow()) {
739 SDL_CPUFeatures |= CPU_HAS_3DNOW;
740 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
741 }
742 if (CPU_haveSSE()) {
743 SDL_CPUFeatures |= CPU_HAS_SSE;
744 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
745 }
746 if (CPU_haveSSE2()) {
747 SDL_CPUFeatures |= CPU_HAS_SSE2;
748 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
749 }
750 if (CPU_haveSSE3()) {
751 SDL_CPUFeatures |= CPU_HAS_SSE3;
752 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
753 }
754 if (CPU_haveSSE41()) {
755 SDL_CPUFeatures |= CPU_HAS_SSE41;
756 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
757 }
758 if (CPU_haveSSE42()) {
759 SDL_CPUFeatures |= CPU_HAS_SSE42;
760 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
761 }
762 if (CPU_haveAVX()) {
763 SDL_CPUFeatures |= CPU_HAS_AVX;
764 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
765 }
766 if (CPU_haveAVX2()) {
767 SDL_CPUFeatures |= CPU_HAS_AVX2;
768 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
769 }
770 if (CPU_haveAVX512F()) {
771 SDL_CPUFeatures |= CPU_HAS_AVX512F;
772 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
773 }
774 if (CPU_haveARMSIMD()) {
775 SDL_CPUFeatures |= CPU_HAS_ARM_SIMD;
776 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
777 }
778 if (CPU_haveNEON()) {
779 SDL_CPUFeatures |= CPU_HAS_NEON;
780 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
781 }
782 }
783 return SDL_CPUFeatures;
784 }
785
786 #define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & f) ? SDL_TRUE : SDL_FALSE)
787
SDL_HasRDTSC(void)788 SDL_bool SDL_HasRDTSC(void)
789 {
790 return CPU_FEATURE_AVAILABLE(CPU_HAS_RDTSC);
791 }
792
793 SDL_bool
SDL_HasAltiVec(void)794 SDL_HasAltiVec(void)
795 {
796 return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC);
797 }
798
799 SDL_bool
SDL_HasMMX(void)800 SDL_HasMMX(void)
801 {
802 return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX);
803 }
804
805 SDL_bool
SDL_Has3DNow(void)806 SDL_Has3DNow(void)
807 {
808 return CPU_FEATURE_AVAILABLE(CPU_HAS_3DNOW);
809 }
810
811 SDL_bool
SDL_HasSSE(void)812 SDL_HasSSE(void)
813 {
814 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE);
815 }
816
817 SDL_bool
SDL_HasSSE2(void)818 SDL_HasSSE2(void)
819 {
820 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2);
821 }
822
823 SDL_bool
SDL_HasSSE3(void)824 SDL_HasSSE3(void)
825 {
826 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3);
827 }
828
829 SDL_bool
SDL_HasSSE41(void)830 SDL_HasSSE41(void)
831 {
832 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41);
833 }
834
835 SDL_bool
SDL_HasSSE42(void)836 SDL_HasSSE42(void)
837 {
838 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42);
839 }
840
841 SDL_bool
SDL_HasAVX(void)842 SDL_HasAVX(void)
843 {
844 return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX);
845 }
846
847 SDL_bool
SDL_HasAVX2(void)848 SDL_HasAVX2(void)
849 {
850 return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2);
851 }
852
853 SDL_bool
SDL_HasAVX512F(void)854 SDL_HasAVX512F(void)
855 {
856 return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
857 }
858
859 SDL_bool
SDL_HasARMSIMD(void)860 SDL_HasARMSIMD(void)
861 {
862 return CPU_FEATURE_AVAILABLE(CPU_HAS_ARM_SIMD);
863 }
864
865 SDL_bool
SDL_HasNEON(void)866 SDL_HasNEON(void)
867 {
868 return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
869 }
870
871 static int SDL_SystemRAM = 0;
872
873 int
SDL_GetSystemRAM(void)874 SDL_GetSystemRAM(void)
875 {
876 if (!SDL_SystemRAM) {
877 #ifndef SDL_CPUINFO_DISABLED
878 #if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
879 if (SDL_SystemRAM <= 0) {
880 SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024*1024));
881 }
882 #endif
883 #ifdef HAVE_SYSCTLBYNAME
884 if (SDL_SystemRAM <= 0) {
885 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__)
886 #ifdef HW_REALMEM
887 int mib[2] = {CTL_HW, HW_REALMEM};
888 #else
889 /* might only report up to 2 GiB */
890 int mib[2] = {CTL_HW, HW_PHYSMEM};
891 #endif /* HW_REALMEM */
892 #else
893 int mib[2] = {CTL_HW, HW_MEMSIZE};
894 #endif /* __FreeBSD__ || __FreeBSD_kernel__ */
895 Uint64 memsize = 0;
896 size_t len = sizeof(memsize);
897
898 if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) {
899 SDL_SystemRAM = (int)(memsize / (1024*1024));
900 }
901 }
902 #endif
903 #ifdef __WIN32__
904 if (SDL_SystemRAM <= 0) {
905 MEMORYSTATUSEX stat;
906 stat.dwLength = sizeof(stat);
907 if (GlobalMemoryStatusEx(&stat)) {
908 SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024));
909 }
910 }
911 #endif
912 #ifdef __OS2__
913 if (SDL_SystemRAM <= 0) {
914 Uint32 sysram = 0;
915 DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, &sysram, 4);
916 SDL_SystemRAM = (int) (sysram / 0x100000U);
917 }
918 #endif
919 #ifdef __RISCOS__
920 if (SDL_SystemRAM <= 0) {
921 _kernel_swi_regs regs;
922 regs.r[0] = 0x108;
923 if (_kernel_swi(OS_Memory, ®s, ®s) == NULL) {
924 SDL_SystemRAM = (int)(regs.r[1] * regs.r[2] / (1024 * 1024));
925 }
926 }
927 #endif
928 #endif
929 }
930 return SDL_SystemRAM;
931 }
932
933
934 size_t
SDL_SIMDGetAlignment(void)935 SDL_SIMDGetAlignment(void)
936 {
937 if (SDL_SIMDAlignment == 0xFFFFFFFF) {
938 SDL_GetCPUFeatures(); /* make sure this has been calculated */
939 }
940 SDL_assert(SDL_SIMDAlignment != 0);
941 return SDL_SIMDAlignment;
942 }
943
944 void *
SDL_SIMDAlloc(const size_t len)945 SDL_SIMDAlloc(const size_t len)
946 {
947 const size_t alignment = SDL_SIMDGetAlignment();
948 const size_t padding = alignment - (len % alignment);
949 const size_t padded = (padding != alignment) ? (len + padding) : len;
950 Uint8 *retval = NULL;
951 Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *));
952 if (ptr) {
953 /* store the actual malloc pointer right before our aligned pointer. */
954 retval = ptr + sizeof (void *);
955 retval += alignment - (((size_t) retval) % alignment);
956 *(((void **) retval) - 1) = ptr;
957 }
958 return retval;
959 }
960
961 void *
SDL_SIMDRealloc(void * mem,const size_t len)962 SDL_SIMDRealloc(void *mem, const size_t len)
963 {
964 const size_t alignment = SDL_SIMDGetAlignment();
965 const size_t padding = alignment - (len % alignment);
966 const size_t padded = (padding != alignment) ? (len + padding) : len;
967 Uint8 *retval = (Uint8*) mem;
968 void *oldmem = mem;
969 size_t memdiff, ptrdiff;
970 Uint8 *ptr;
971
972 if (mem) {
973 void **realptr = (void **) mem;
974 realptr--;
975 mem = *(((void **) mem) - 1);
976
977 /* Check the delta between the real pointer and user pointer */
978 memdiff = ((size_t) oldmem) - ((size_t) mem);
979 }
980
981 ptr = (Uint8 *) SDL_realloc(mem, padded + alignment + sizeof (void *));
982
983 if (ptr == mem) {
984 return retval; /* Pointer didn't change, nothing to do */
985 }
986 if (ptr == NULL) {
987 return NULL; /* Out of memory, bail! */
988 }
989
990 /* Store the actual malloc pointer right before our aligned pointer. */
991 retval = ptr + sizeof (void *);
992 retval += alignment - (((size_t) retval) % alignment);
993
994 /* Make sure the delta is the same! */
995 if (mem) {
996 ptrdiff = ((size_t) retval) - ((size_t) ptr);
997 if (memdiff != ptrdiff) { /* Delta has changed, copy to new offset! */
998 oldmem = (void*) (((size_t) ptr) + memdiff);
999
1000 /* Even though the data past the old `len` is undefined, this is the
1001 * only length value we have, and it guarantees that we copy all the
1002 * previous memory anyhow.
1003 */
1004 SDL_memmove(retval, oldmem, len);
1005 }
1006 }
1007
1008 /* Actually store the malloc pointer, finally. */
1009 *(((void **) retval) - 1) = ptr;
1010 return retval;
1011 }
1012
1013 void
SDL_SIMDFree(void * ptr)1014 SDL_SIMDFree(void *ptr)
1015 {
1016 if (ptr) {
1017 void **realptr = (void **) ptr;
1018 realptr--;
1019 SDL_free(*(((void **) ptr) - 1));
1020 }
1021 }
1022
1023
1024 #ifdef TEST_MAIN
1025
1026 #include <stdio.h>
1027
1028 int
main()1029 main()
1030 {
1031 printf("CPU count: %d\n", SDL_GetCPUCount());
1032 printf("CPU type: %s\n", SDL_GetCPUType());
1033 printf("CPU name: %s\n", SDL_GetCPUName());
1034 printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
1035 printf("RDTSC: %d\n", SDL_HasRDTSC());
1036 printf("Altivec: %d\n", SDL_HasAltiVec());
1037 printf("MMX: %d\n", SDL_HasMMX());
1038 printf("3DNow: %d\n", SDL_Has3DNow());
1039 printf("SSE: %d\n", SDL_HasSSE());
1040 printf("SSE2: %d\n", SDL_HasSSE2());
1041 printf("SSE3: %d\n", SDL_HasSSE3());
1042 printf("SSE4.1: %d\n", SDL_HasSSE41());
1043 printf("SSE4.2: %d\n", SDL_HasSSE42());
1044 printf("AVX: %d\n", SDL_HasAVX());
1045 printf("AVX2: %d\n", SDL_HasAVX2());
1046 printf("AVX-512F: %d\n", SDL_HasAVX512F());
1047 printf("ARM SIMD: %d\n", SDL_HasARMSIMD());
1048 printf("NEON: %d\n", SDL_HasNEON());
1049 printf("RAM: %d MB\n", SDL_GetSystemRAM());
1050 return 0;
1051 }
1052
1053 #endif /* TEST_MAIN */
1054
1055 /* vi: set ts=4 sw=4 expandtab: */
1056