1 #include "ruy/cpuinfo.h"
2 
3 #include <algorithm>
4 #include <cstdint>
5 #include <limits>
6 
7 #include "ruy/check_macros.h"
8 #include "ruy/cpu_cache_params.h"
9 #include "ruy/platform.h"
10 
11 #ifdef RUY_HAVE_CPUINFO
12 #include <cpuinfo.h>
13 #endif
14 
15 namespace ruy {
16 
17 namespace {
MakeDummyCacheParams(CpuCacheParams * result)18 void MakeDummyCacheParams(CpuCacheParams* result) {
19   // Reasonable dummy values
20   result->local_cache_size = 32 * 1024;
21   result->last_level_cache_size = 512 * 1024;
22 }
23 }  // end namespace
24 
25 #ifdef RUY_HAVE_CPUINFO
26 
~CpuInfo()27 CpuInfo::~CpuInfo() {
28   if (init_status_ == InitStatus::kInitialized) {
29     cpuinfo_deinitialize();
30   }
31 }
32 
EnsureInitialized()33 bool CpuInfo::EnsureInitialized() {
34   if (init_status_ == InitStatus::kNotYetAttempted) {
35     init_status_ = Initialize();
36     RUY_DCHECK_NE(init_status_, InitStatus::kNotYetAttempted);
37   }
38   return init_status_ == InitStatus::kInitialized;
39 }
40 
41 namespace {
QueryCacheParams(CpuCacheParams * cache_params)42 void QueryCacheParams(CpuCacheParams* cache_params) {
43   const int processors_count = cpuinfo_get_processors_count();
44   RUY_DCHECK_GT(processors_count, 0);
45   int overall_local_cache_size = std::numeric_limits<int>::max();
46   int overall_last_level_cache_size = std::numeric_limits<int>::max();
47   for (int i = 0; i < processors_count; i++) {
48     int local_cache_size = 0;
49     int last_level_cache_size = 0;
50     const cpuinfo_processor* processor = cpuinfo_get_processor(i);
51     // Loop over cache levels. Ignoring L4 for now: it seems that in CPUs that
52     // have L4, we would still prefer to stay in lower-latency L3.
53     for (const cpuinfo_cache* cache :
54          {processor->cache.l1d, processor->cache.l2, processor->cache.l3}) {
55       if (!cache) {
56         continue;  // continue, not break, it is possible to have L1+L3 but no
57                    // L2.
58       }
59       const bool is_local =
60           cpuinfo_get_processor(cache->processor_start)->core ==
61           cpuinfo_get_processor(cache->processor_start +
62                                 cache->processor_count - 1)
63               ->core;
64       if (is_local) {
65         local_cache_size = cache->size;
66       }
67       last_level_cache_size = cache->size;
68     }
69     // If no local cache was found, use the last-level cache.
70     if (!local_cache_size) {
71       local_cache_size = last_level_cache_size;
72     }
73     RUY_DCHECK_GT(local_cache_size, 0);
74     RUY_DCHECK_GT(last_level_cache_size, 0);
75     RUY_DCHECK_GE(last_level_cache_size, local_cache_size);
76     overall_local_cache_size =
77         std::min(overall_local_cache_size, local_cache_size);
78     overall_last_level_cache_size =
79         std::min(overall_last_level_cache_size, last_level_cache_size);
80   }
81   cache_params->local_cache_size = overall_local_cache_size;
82   cache_params->last_level_cache_size = overall_last_level_cache_size;
83 }
84 }  // end namespace
85 
Initialize()86 CpuInfo::InitStatus CpuInfo::Initialize() {
87   RUY_DCHECK_EQ(init_status_, InitStatus::kNotYetAttempted);
88   if (!cpuinfo_initialize()) {
89     MakeDummyCacheParams(&cache_params_);
90     return InitStatus::kFailed;
91   }
92   QueryCacheParams(&cache_params_);
93   return InitStatus::kInitialized;
94 }
95 
NeonDotprod()96 bool CpuInfo::NeonDotprod() {
97   return EnsureInitialized() && cpuinfo_has_arm_neon_dot();
98 }
99 
Sse42()100 bool CpuInfo::Sse42() {
101   return EnsureInitialized() && cpuinfo_has_x86_sse4_2();
102 }
103 
Avx2Fma()104 bool CpuInfo::Avx2Fma() {
105   return EnsureInitialized() && cpuinfo_has_x86_avx2() &&
106          cpuinfo_has_x86_fma3();
107 }
108 
Avx()109 bool CpuInfo::Avx() { return EnsureInitialized() && cpuinfo_has_x86_avx(); }
110 
Avx512()111 bool CpuInfo::Avx512() {
112   return EnsureInitialized() && cpuinfo_has_x86_avx512f() &&
113          cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512cd() &&
114          cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512vl();
115 }
116 
AvxVnni()117 bool CpuInfo::AvxVnni() {
118   return EnsureInitialized() && cpuinfo_has_x86_avx512vnni();
119 }
120 
CurrentCpuIsA55ish()121 bool CpuInfo::CurrentCpuIsA55ish() {
122   if (!EnsureInitialized()) {
123     return false;
124   }
125 
126   switch (cpuinfo_get_uarch(cpuinfo_get_current_uarch_index())->uarch) {
127     case cpuinfo_uarch_cortex_a53:
128     case cpuinfo_uarch_cortex_a55r0:
129     case cpuinfo_uarch_cortex_a55:
130       return true;
131     default:
132       return false;
133   }
134 }
135 
CurrentCpuIsX1()136 bool CpuInfo::CurrentCpuIsX1() {
137   if (!EnsureInitialized()) {
138     return false;
139   }
140   if (cpuinfo_get_uarch(cpuinfo_get_current_uarch_index())->uarch ==
141       cpuinfo_uarch_cortex_x1) {
142     return true;
143   }
144   return false;
145 }
146 
147 #else  // not defined RUY_HAVE_CPUINFO
148 
~CpuInfo()149 CpuInfo::~CpuInfo() {}
EnsureInitialized()150 bool CpuInfo::EnsureInitialized() {
151   if (init_status_ == InitStatus::kNotYetAttempted) {
152     MakeDummyCacheParams(&cache_params_);
153     init_status_ = InitStatus::kInitialized;
154   }
155   RUY_DCHECK_EQ(init_status_, InitStatus::kInitialized);
156   return true;
157 }
NeonDotprod()158 bool CpuInfo::NeonDotprod() { return false; }
Sse42()159 bool CpuInfo::Sse42() { return false; }
Avx()160 bool CpuInfo::Avx() { return false; }
Avx2Fma()161 bool CpuInfo::Avx2Fma() { return false; }
Avx512()162 bool CpuInfo::Avx512() { return false; }
AvxVnni()163 bool CpuInfo::AvxVnni() { return false; }
CurrentCpuIsA55ish()164 bool CpuInfo::CurrentCpuIsA55ish() { return false; }
CurrentCpuIsX1()165 bool CpuInfo::CurrentCpuIsX1() { return false; }
166 
167 #endif
168 
CacheParams()169 const CpuCacheParams& CpuInfo::CacheParams() {
170   EnsureInitialized();
171   // On failure, EnsureInitialized leaves dummy values in cache_params_.
172   return cache_params_;
173 }
174 
175 }  // namespace ruy
176