1 /*
2  * Copyright 2011-2024 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <openssl/crypto.h>
14 #ifdef __APPLE__
15 #include <sys/sysctl.h>
16 #else
17 #include <setjmp.h>
18 #include <signal.h>
19 #endif
20 #include "internal/cryptlib.h"
21 #ifdef _WIN32
22 #include <windows.h>
23 #else
24 #include <unistd.h>
25 #endif
26 #include "arm_arch.h"
27 
28 unsigned int OPENSSL_armcap_P = 0;
29 unsigned int OPENSSL_arm_midr = 0;
30 unsigned int OPENSSL_armv8_rsa_neonized = 0;
31 
32 #ifdef _WIN32
OPENSSL_cpuid_setup(void)33 void OPENSSL_cpuid_setup(void)
34 {
35     OPENSSL_armcap_P |= ARMV7_NEON;
36     OPENSSL_armv8_rsa_neonized = 1;
37     if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) {
38         /* These are all covered by one call in Windows */
39         OPENSSL_armcap_P |= ARMV8_AES;
40         OPENSSL_armcap_P |= ARMV8_PMULL;
41         OPENSSL_armcap_P |= ARMV8_SHA1;
42         OPENSSL_armcap_P |= ARMV8_SHA256;
43     }
44 }
45 
OPENSSL_rdtsc(void)46 uint32_t OPENSSL_rdtsc(void)
47 {
48     return 0;
49 }
50 #elif __ARM_MAX_ARCH__ < 7
OPENSSL_cpuid_setup(void)51 void OPENSSL_cpuid_setup(void)
52 {
53 }
54 
OPENSSL_rdtsc(void)55 uint32_t OPENSSL_rdtsc(void)
56 {
57     return 0;
58 }
59 #else /* !_WIN32 && __ARM_MAX_ARCH__ >= 7 */
60 
61  /* 3 ways of handling things here: __APPLE__,  getauxval() or SIGILL detect */
62 
63  /* First determine if getauxval() is available (OSSL_IMPLEMENT_GETAUXVAL) */
64 
65 # if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
66 #  if __GLIBC_PREREQ(2, 16)
67 #   include <sys/auxv.h>
68 #   define OSSL_IMPLEMENT_GETAUXVAL
69 #  endif
70 # elif defined(__ANDROID_API__)
71 /* see https://developer.android.google.cn/ndk/guides/cpu-features */
72 #  if __ANDROID_API__ >= 18
73 #   include <sys/auxv.h>
74 #   define OSSL_IMPLEMENT_GETAUXVAL
75 #  endif
76 # endif
77 # if defined(__FreeBSD__) || defined(__OpenBSD__)
78 #  include <sys/param.h>
79 #  if (defined(__FreeBSD__) && __FreeBSD_version >= 1200000) || \
80     (defined(__OpenBSD__) && OpenBSD >= 202409)
81 #   include <sys/auxv.h>
82 #   define OSSL_IMPLEMENT_GETAUXVAL
83 
getauxval(unsigned long key)84 static unsigned long getauxval(unsigned long key)
85 {
86   unsigned long val = 0ul;
87 
88   if (elf_aux_info((int)key, &val, sizeof(val)) != 0)
89     return 0ul;
90 
91   return val;
92 }
93 #  endif
94 # endif
95 
96 /*
97  * Android: according to https://developer.android.com/ndk/guides/cpu-features,
98  * getauxval is supported starting with API level 18
99  */
100 # if defined(__ANDROID__) && defined(__ANDROID_API__) && __ANDROID_API__ >= 18
101 #  include <sys/auxv.h>
102 #  define OSSL_IMPLEMENT_GETAUXVAL
103 # endif
104 
105 /*
106  * ARM puts the feature bits for Crypto Extensions in AT_HWCAP2, whereas
107  * AArch64 used AT_HWCAP.
108  */
109 # ifndef AT_HWCAP
110 #  define AT_HWCAP               16
111 # endif
112 # ifndef AT_HWCAP2
113 #  define AT_HWCAP2              26
114 # endif
115 # if defined(__arm__) || defined (__arm)
116 #  define OSSL_HWCAP                  AT_HWCAP
117 #  define OSSL_HWCAP_NEON             (1 << 12)
118 
119 #  define OSSL_HWCAP_CE               AT_HWCAP2
120 #  define OSSL_HWCAP_CE_AES           (1 << 0)
121 #  define OSSL_HWCAP_CE_PMULL         (1 << 1)
122 #  define OSSL_HWCAP_CE_SHA1          (1 << 2)
123 #  define OSSL_HWCAP_CE_SHA256        (1 << 3)
124 # elif defined(__aarch64__)
125 #  define OSSL_HWCAP                  AT_HWCAP
126 #  define OSSL_HWCAP_NEON             (1 << 1)
127 
128 #  define OSSL_HWCAP_CE               AT_HWCAP
129 #  define OSSL_HWCAP_CE_AES           (1 << 3)
130 #  define OSSL_HWCAP_CE_PMULL         (1 << 4)
131 #  define OSSL_HWCAP_CE_SHA1          (1 << 5)
132 #  define OSSL_HWCAP_CE_SHA256        (1 << 6)
133 #  define OSSL_HWCAP_CPUID            (1 << 11)
134 #  define OSSL_HWCAP_SHA3             (1 << 17)
135 #  define OSSL_HWCAP_CE_SM3           (1 << 18)
136 #  define OSSL_HWCAP_CE_SM4           (1 << 19)
137 #  define OSSL_HWCAP_CE_SHA512        (1 << 21)
138 #  define OSSL_HWCAP_SVE              (1 << 22)
139                                       /* AT_HWCAP2 */
140 #  define OSSL_HWCAP2                 26
141 #  define OSSL_HWCAP2_SVE2            (1 << 1)
142 #  define OSSL_HWCAP2_RNG             (1 << 16)
143 # endif
144 
145 uint32_t _armv7_tick(void);
146 
OPENSSL_rdtsc(void)147 uint32_t OPENSSL_rdtsc(void)
148 {
149     if (OPENSSL_armcap_P & ARMV7_TICK)
150         return _armv7_tick();
151     else
152         return 0;
153 }
154 
155 # ifdef __aarch64__
156 size_t OPENSSL_rndr_asm(unsigned char *buf, size_t len);
157 size_t OPENSSL_rndrrs_asm(unsigned char *buf, size_t len);
158 
159 size_t OPENSSL_rndr_bytes(unsigned char *buf, size_t len);
160 size_t OPENSSL_rndrrs_bytes(unsigned char *buf, size_t len);
161 
OPENSSL_rndr_wrapper(size_t (* func)(unsigned char *,size_t),unsigned char * buf,size_t len)162 static size_t OPENSSL_rndr_wrapper(size_t (*func)(unsigned char *, size_t), unsigned char *buf, size_t len)
163 {
164     size_t buffer_size = 0;
165     int i;
166 
167     for (i = 0; i < 8; i++) {
168         buffer_size = func(buf, len);
169         if (buffer_size == len)
170             break;
171         usleep(5000);  /* 5000 microseconds (5 milliseconds) */
172     }
173     return buffer_size;
174 }
175 
OPENSSL_rndr_bytes(unsigned char * buf,size_t len)176 size_t OPENSSL_rndr_bytes(unsigned char *buf, size_t len)
177 {
178     return OPENSSL_rndr_wrapper(OPENSSL_rndr_asm, buf, len);
179 }
180 
OPENSSL_rndrrs_bytes(unsigned char * buf,size_t len)181 size_t OPENSSL_rndrrs_bytes(unsigned char *buf, size_t len)
182 {
183     return OPENSSL_rndr_wrapper(OPENSSL_rndrrs_asm, buf, len);
184 }
185 # endif
186 
187 # if !defined(__APPLE__) && !defined(OSSL_IMPLEMENT_GETAUXVAL)
188 static sigset_t all_masked;
189 
190 static sigjmp_buf ill_jmp;
ill_handler(int sig)191 static void ill_handler(int sig)
192 {
193     siglongjmp(ill_jmp, sig);
194 }
195 
196 /*
197  * Following subroutines could have been inlined, but not all
198  * ARM compilers support inline assembler, and we'd then have to
199  * worry about the compiler optimising out the detection code...
200  */
201 void _armv7_neon_probe(void);
202 void _armv8_aes_probe(void);
203 void _armv8_sha1_probe(void);
204 void _armv8_sha256_probe(void);
205 void _armv8_pmull_probe(void);
206 #  ifdef __aarch64__
207 void _armv8_sm3_probe(void);
208 void _armv8_sm4_probe(void);
209 void _armv8_sha512_probe(void);
210 void _armv8_eor3_probe(void);
211 void _armv8_sve_probe(void);
212 void _armv8_sve2_probe(void);
213 void _armv8_rng_probe(void);
214 #  endif
215 # endif /* !__APPLE__ && !OSSL_IMPLEMENT_GETAUXVAL */
216 
217 /* We only call _armv8_cpuid_probe() if (OPENSSL_armcap_P & ARMV8_CPUID) != 0 */
218 unsigned int _armv8_cpuid_probe(void);
219 
220 # if defined(__APPLE__)
221 /*
222  * Checks the specified integer sysctl, returning `value` if it's 1, otherwise returning 0.
223  */
sysctl_query(const char * name,unsigned int value)224 static unsigned int sysctl_query(const char *name, unsigned int value)
225 {
226     unsigned int sys_value = 0;
227     size_t len = sizeof(sys_value);
228 
229     return (sysctlbyname(name, &sys_value, &len, NULL, 0) == 0 && sys_value == 1) ? value : 0;
230 }
231 # elif !defined(OSSL_IMPLEMENT_GETAUXVAL)
232 /*
233  * Calls a provided probe function, which may SIGILL. If it doesn't, return `value`, otherwise return 0.
234  */
arm_probe_for(void (* probe)(void),volatile unsigned int value)235 static unsigned int arm_probe_for(void (*probe)(void), volatile unsigned int value)
236 {
237     if (sigsetjmp(ill_jmp, 1) == 0) {
238         probe();
239         return value;
240     } else {
241         /* The probe function gave us SIGILL */
242         return 0;
243     }
244 }
245 # endif
246 
OPENSSL_cpuid_setup(void)247 void OPENSSL_cpuid_setup(void)
248 {
249     const char *e;
250 # if !defined(__APPLE__) && !defined(OSSL_IMPLEMENT_GETAUXVAL)
251     struct sigaction ill_oact, ill_act;
252     sigset_t oset;
253 # endif
254     static int trigger = 0;
255 
256     if (trigger)
257         return;
258     trigger = 1;
259 
260     OPENSSL_armcap_P = 0;
261 
262     if ((e = getenv("OPENSSL_armcap"))) {
263         OPENSSL_armcap_P = (unsigned int)strtoul(e, NULL, 0);
264         return;
265     }
266 
267 # if defined(__APPLE__)
268 #  if !defined(__aarch64__)
269     /*
270      * Capability probing by catching SIGILL appears to be problematic
271      * on iOS. But since Apple universe is "monocultural", it's actually
272      * possible to simply set pre-defined processor capability mask.
273      */
274     if (1) {
275         OPENSSL_armcap_P = ARMV7_NEON;
276         return;
277     }
278 #  else
279     {
280         /*
281          * From
282          * https://github.com/llvm/llvm-project/blob/412237dcd07e5a2afbb1767858262a5f037149a3/llvm/lib/Target/AArch64/AArch64.td#L719
283          * all of these have been available on 64-bit Apple Silicon from the
284          * beginning (the A7).
285          */
286         OPENSSL_armcap_P |= ARMV7_NEON | ARMV8_PMULL | ARMV8_AES | ARMV8_SHA1 | ARMV8_SHA256;
287 
288         /* More recent extensions are indicated by sysctls */
289         OPENSSL_armcap_P |= sysctl_query("hw.optional.armv8_2_sha512", ARMV8_SHA512);
290         OPENSSL_armcap_P |= sysctl_query("hw.optional.armv8_2_sha3", ARMV8_SHA3);
291 
292         if (OPENSSL_armcap_P & ARMV8_SHA3) {
293             char uarch[64];
294 
295             size_t len = sizeof(uarch);
296             if ((sysctlbyname("machdep.cpu.brand_string", uarch, &len, NULL, 0) == 0) &&
297                ((strncmp(uarch, "Apple M1", 8) == 0) ||
298                 (strncmp(uarch, "Apple M2", 8) == 0) ||
299                 (strncmp(uarch, "Apple M3", 8) == 0) ||
300                 (strncmp(uarch, "Apple M4", 8) == 0))) {
301                 OPENSSL_armcap_P |= ARMV8_UNROLL8_EOR3;
302                 OPENSSL_armcap_P |= ARMV8_HAVE_SHA3_AND_WORTH_USING;
303             }
304         }
305     }
306 #  endif       /* __aarch64__ */
307 
308 # elif defined(OSSL_IMPLEMENT_GETAUXVAL)
309 
310     if (getauxval(OSSL_HWCAP) & OSSL_HWCAP_NEON) {
311         unsigned long hwcap = getauxval(OSSL_HWCAP_CE);
312 
313         OPENSSL_armcap_P |= ARMV7_NEON;
314 
315         if (hwcap & OSSL_HWCAP_CE_AES)
316             OPENSSL_armcap_P |= ARMV8_AES;
317 
318         if (hwcap & OSSL_HWCAP_CE_PMULL)
319             OPENSSL_armcap_P |= ARMV8_PMULL;
320 
321         if (hwcap & OSSL_HWCAP_CE_SHA1)
322             OPENSSL_armcap_P |= ARMV8_SHA1;
323 
324         if (hwcap & OSSL_HWCAP_CE_SHA256)
325             OPENSSL_armcap_P |= ARMV8_SHA256;
326 
327 #  ifdef __aarch64__
328         if (hwcap & OSSL_HWCAP_CE_SM4)
329             OPENSSL_armcap_P |= ARMV8_SM4;
330 
331         if (hwcap & OSSL_HWCAP_CE_SHA512)
332             OPENSSL_armcap_P |= ARMV8_SHA512;
333 
334         if (hwcap & OSSL_HWCAP_CPUID)
335             OPENSSL_armcap_P |= ARMV8_CPUID;
336 
337         if (hwcap & OSSL_HWCAP_CE_SM3)
338             OPENSSL_armcap_P |= ARMV8_SM3;
339         if (hwcap & OSSL_HWCAP_SHA3)
340             OPENSSL_armcap_P |= ARMV8_SHA3;
341 #  endif
342     }
343 #  ifdef __aarch64__
344         if (getauxval(OSSL_HWCAP) & OSSL_HWCAP_SVE)
345             OPENSSL_armcap_P |= ARMV8_SVE;
346 
347         if (getauxval(OSSL_HWCAP2) & OSSL_HWCAP2_SVE2)
348             OPENSSL_armcap_P |= ARMV8_SVE2;
349 
350         if (getauxval(OSSL_HWCAP2) & OSSL_HWCAP2_RNG)
351             OPENSSL_armcap_P |= ARMV8_RNG;
352 #  endif
353 
354 # else /* !__APPLE__ && !OSSL_IMPLEMENT_GETAUXVAL */
355 
356     /* If all else fails, do brute force SIGILL-based feature detection */
357 
358     sigfillset(&all_masked);
359     sigdelset(&all_masked, SIGILL);
360     sigdelset(&all_masked, SIGTRAP);
361     sigdelset(&all_masked, SIGFPE);
362     sigdelset(&all_masked, SIGBUS);
363     sigdelset(&all_masked, SIGSEGV);
364 
365     memset(&ill_act, 0, sizeof(ill_act));
366     ill_act.sa_handler = ill_handler;
367     ill_act.sa_mask = all_masked;
368 
369     sigprocmask(SIG_SETMASK, &ill_act.sa_mask, &oset);
370     sigaction(SIGILL, &ill_act, &ill_oact);
371 
372     OPENSSL_armcap_P |= arm_probe_for(_armv7_neon_probe, ARMV7_NEON);
373 
374     if (OPENSSL_armcap_P & ARMV7_NEON) {
375 
376         OPENSSL_armcap_P |= arm_probe_for(_armv8_pmull_probe, ARMV8_PMULL | ARMV8_AES);
377         if (!(OPENSSL_armcap_P & ARMV8_AES)) {
378             OPENSSL_armcap_P |= arm_probe_for(_armv8_aes_probe, ARMV8_AES);
379         }
380 
381         OPENSSL_armcap_P |= arm_probe_for(_armv8_sha1_probe, ARMV8_SHA1);
382         OPENSSL_armcap_P |= arm_probe_for(_armv8_sha256_probe, ARMV8_SHA256);
383 
384 #  if defined(__aarch64__)
385         OPENSSL_armcap_P |= arm_probe_for(_armv8_sm3_probe, ARMV8_SM3);
386         OPENSSL_armcap_P |= arm_probe_for(_armv8_sm4_probe, ARMV8_SM4);
387         OPENSSL_armcap_P |= arm_probe_for(_armv8_sha512_probe, ARMV8_SHA512);
388         OPENSSL_armcap_P |= arm_probe_for(_armv8_eor3_probe, ARMV8_SHA3);
389 #  endif
390     }
391 #  ifdef __aarch64__
392     OPENSSL_armcap_P |= arm_probe_for(_armv8_sve_probe, ARMV8_SVE);
393     OPENSSL_armcap_P |= arm_probe_for(_armv8_sve2_probe, ARMV8_SVE2);
394     OPENSSL_armcap_P |= arm_probe_for(_armv8_rng_probe, ARMV8_RNG);
395 #  endif
396 
397     /*
398      * Probing for ARMV7_TICK is known to produce unreliable results,
399      * so we only use the feature when the user explicitly enables it
400      * with OPENSSL_armcap.
401      */
402 
403     sigaction(SIGILL, &ill_oact, NULL);
404     sigprocmask(SIG_SETMASK, &oset, NULL);
405 
406 # endif /* __APPLE__, OSSL_IMPLEMENT_GETAUXVAL */
407 
408 # ifdef __aarch64__
409     if (OPENSSL_armcap_P & ARMV8_CPUID)
410         OPENSSL_arm_midr = _armv8_cpuid_probe();
411 
412     if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) ||
413          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_N1)) &&
414         (OPENSSL_armcap_P & ARMV7_NEON)) {
415             OPENSSL_armv8_rsa_neonized = 1;
416     }
417     if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V1) ||
418          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_N2) ||
419          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_QCOMM, QCOM_CPU_PART_ORYON_X1) ||
420          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_COBALT_100) ||
421          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V2) ||
422          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_N3) ||
423          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V3) ||
424          MIDR_IMPLEMENTER(OPENSSL_arm_midr) == ARM_CPU_IMP_AMPERE) &&
425         (OPENSSL_armcap_P & ARMV8_SHA3))
426         OPENSSL_armcap_P |= ARMV8_UNROLL8_EOR3;
427     if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V1) ||
428          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V2) ||
429          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V3) ||
430          MIDR_IMPLEMENTER(OPENSSL_arm_midr) == ARM_CPU_IMP_AMPERE) &&
431         (OPENSSL_armcap_P & ARMV8_SHA3))
432         OPENSSL_armcap_P |= ARMV8_UNROLL12_EOR3;
433     if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)     ||
434          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)      ||
435          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO) ||
436          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)  ||
437          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX) ||
438          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX)  ||
439          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)     ||
440          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)      ||
441          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO) ||
442          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO)  ||
443          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) ||
444          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)  ||
445          MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_QCOMM, QCOM_CPU_PART_ORYON_X1)) &&
446         (OPENSSL_armcap_P & ARMV8_SHA3))
447         OPENSSL_armcap_P |= ARMV8_HAVE_SHA3_AND_WORTH_USING;
448 # endif
449 }
450 #endif /* _WIN32, __ARM_MAX_ARCH__ >= 7 */
451