1 #include <xen/init.h>
2 #include <xen/param.h>
3
4 #include <asm/microcode.h>
5 #include <asm/msr.h>
6 #include <asm/processor.h>
7
8 /*
9 * Valid values:
10 * 1 => Explicit tsx=1
11 * 0 => Explicit tsx=0
12 * -1 => Default, altered to 0/1 (if unspecified) by:
13 * - TAA heuristics/settings for speculative safety
14 * - "TSX vs PCR3" select for TSX memory ordering safety
15 * -2 => Implicit tsx=0 (from RTM_ALWAYS_ABORT vs RTM mismatch)
16 * -3 => Implicit tsx=1 (feed-through from spec-ctrl=0)
17 *
18 * This is arranged such that the bottom bit encodes whether TSX is actually
19 * disabled, while identifying various explicit (>=0) and implicit (<0)
20 * conditions.
21 *
22 * This option only has any effect on systems presenting a mechanism of
23 * controlling TSX behaviour, and where TSX isn't force-disabled by firmware.
24 */
25 int8_t __read_mostly opt_tsx = -1;
26 bool __read_mostly rtm_disabled;
27
parse_tsx(const char * s)28 static int __init cf_check parse_tsx(const char *s)
29 {
30 int rc = 0, val = parse_bool(s, NULL);
31
32 if ( val >= 0 )
33 opt_tsx = val;
34 else
35 rc = -EINVAL;
36
37 return rc;
38 }
39 custom_param("tsx", parse_tsx);
40
tsx_init(void)41 void tsx_init(void)
42 {
43 static bool __read_mostly once;
44
45 /*
46 * This function is first called between microcode being loaded, and
47 * CPUID being scanned generally. early_cpu_init() has already prepared
48 * the feature bits needed here. And early_microcode_init() has ensured
49 * they are not stale after the microcode update.
50 */
51 if ( unlikely(!once) )
52 {
53 bool has_rtm_always_abort;
54
55 once = true;
56
57 has_rtm_always_abort = cpu_has_rtm_always_abort;
58
59 if ( cpu_has_tsx_ctrl && cpu_has_srbds_ctrl )
60 {
61 /*
62 * On a TAA-vulnerable or later part with at least the May 2020
63 * microcode mitigating SRBDS.
64 */
65 uint64_t val;
66
67 rdmsrl(MSR_MCU_OPT_CTRL, val);
68
69 /*
70 * Probe for the February 2022 microcode which de-features TSX on
71 * TAA-vulnerable client parts - WHL-R/CFL-R.
72 *
73 * RTM_ALWAYS_ABORT (read above) enumerates the new functionality,
74 * but is read as zero if MCU_OPT_CTRL.RTM_ALLOW has been set
75 * before we run. Undo this.
76 */
77 if ( val & MCU_OPT_CTRL_RTM_ALLOW )
78 has_rtm_always_abort = true;
79
80 if ( has_rtm_always_abort )
81 {
82 if ( val & MCU_OPT_CTRL_RTM_LOCKED )
83 {
84 /*
85 * If RTM_LOCKED is set, TSX is disabled because SGX is
86 * enabled, and there is nothing we can do. Override with
87 * tsx=0 so all other logic takes sensible actions.
88 */
89 printk(XENLOG_WARNING "TSX locked by firmware - disabling\n");
90 opt_tsx = 0;
91 }
92 else
93 {
94 /*
95 * Otherwise, set RTM_ALLOW. Not because we necessarily
96 * intend to enable RTM, but it prevents
97 * MSR_TSX_CTRL.RTM_DISABLE from being ignored, thus
98 * allowing the rest of the TSX selection logic to work as
99 * before.
100 */
101 val |= MCU_OPT_CTRL_RTM_ALLOW;
102 }
103
104 set_in_mcu_opt_ctrl(
105 MCU_OPT_CTRL_RTM_LOCKED | MCU_OPT_CTRL_RTM_ALLOW, val);
106
107 /*
108 * If no explicit tsx= option is provided, pick a default.
109 *
110 * With RTM_ALWAYS_ABORT, the default ucode behaviour is to
111 * disable, so match that. This does not override explicit user
112 * choices, or implicit choices as a side effect of spec-ctrl=0.
113 */
114 if ( opt_tsx == -1 )
115 opt_tsx = 0;
116 }
117 }
118
119 if ( cpu_has_tsx_force_abort )
120 {
121 uint64_t val;
122
123 /*
124 * On an early TSX-enabled Skylake part subject to the memory
125 * ordering erratum, with at least the March 2019 microcode.
126 */
127
128 rdmsrl(MSR_TSX_FORCE_ABORT, val);
129
130 /*
131 * At the time of writing (April 2024), it was discovered that
132 * some parts (e.g. CoffeeLake 8th Gen, 06-9e-0a, ucode 0xf6)
133 * advertise RTM_ALWAYS_ABORT, but XBEGIN instructions #UD. Other
134 * similar parts (e.g. KabyLake Xeon-E3, 06-9e-09, ucode 0xf8)
135 * operate as expected.
136 *
137 * In this case:
138 * - RTM_ALWAYS_ABORT and MSR_TSX_FORCE_ABORT are enumerated.
139 * - XBEGIN instructions genuinely #UD.
140 * - MSR_TSX_FORCE_ABORT appears to be write-discard and fails to
141 * hold its value.
142 * - HLE and RTM are not enumerated, despite
143 * MSR_TSX_FORCE_ABORT.TSX_CPUID_CLEAR being clear.
144 *
145 * Spot RTM being unavailable without CLEAR_CPUID being set, and
146 * treat it as if no TSX is available at all. This will prevent
147 * Xen from thinking it's safe to offer HLE/RTM to VMs.
148 */
149 if ( val == 0 && cpu_has_rtm_always_abort && !cpu_has_rtm )
150 {
151 printk(XENLOG_ERR
152 "FIRMWARE BUG: CPU %02x-%02x-%02x, ucode 0x%08x: RTM_ALWAYS_ABORT vs RTM mismatch\n",
153 boot_cpu_data.x86, boot_cpu_data.x86_model,
154 boot_cpu_data.x86_mask, this_cpu(cpu_sig).rev);
155
156 setup_clear_cpu_cap(X86_FEATURE_RTM_ALWAYS_ABORT);
157 setup_clear_cpu_cap(X86_FEATURE_TSX_FORCE_ABORT);
158
159 if ( opt_tsx < 0 )
160 opt_tsx = -2;
161
162 goto done_probe;
163 }
164
165 /*
166 * Probe for the June 2021 microcode which de-features TSX on
167 * client parts. (Note - this is a subset of parts impacted by
168 * the memory ordering errata.)
169 *
170 * RTM_ALWAYS_ABORT enumerates the new functionality, but is also
171 * read as zero if TSX_FORCE_ABORT.ENABLE_RTM has been set before
172 * we run.
173 */
174 if ( val & TSX_ENABLE_RTM )
175 has_rtm_always_abort = true;
176
177 /*
178 * If no explicit tsx= option is provided, pick a default.
179 *
180 * This deliberately overrides the implicit opt_tsx=-3 from
181 * `spec-ctrl=0` because:
182 * - parse_spec_ctrl() ran before any CPU details where know.
183 * - We now know we're running on a CPU not affected by TAA (as
184 * TSX_FORCE_ABORT is enumerated).
185 * - When RTM_ALWAYS_ABORT is enumerated, TSX malfunctions, so we
186 * only ever want it enabled by explicit user choice.
187 *
188 * Without RTM_ALWAYS_ABORT, leave TSX active. In particular,
189 * this includes SKX where TSX is still supported.
190 *
191 * With RTM_ALWAYS_ABORT, disable TSX.
192 */
193 if ( opt_tsx < 0 )
194 opt_tsx = !has_rtm_always_abort;
195 }
196
197 /*
198 * Always force RTM_ALWAYS_ABORT, even if it currently visible. If
199 * the user explicitly opts to enable TSX, we'll set the appropriate
200 * RTM_ENABLE bit and cause RTM_ALWAYS_ABORT to be hidden from the
201 * general CPUID scan later.
202 */
203 if ( has_rtm_always_abort )
204 setup_force_cpu_cap(X86_FEATURE_RTM_ALWAYS_ABORT);
205
206 /*
207 * The TSX features (HLE/RTM) are handled specially. They both
208 * enumerate features but, on certain parts, have mechanisms to be
209 * hidden without disrupting running software.
210 *
211 * At the moment, we're running in an unknown context (WRT hiding -
212 * particularly if another fully fledged kernel ran before us) and
213 * depending on user settings, may elect to continue hiding them from
214 * native CPUID instructions.
215 *
216 * Xen doesn't use TSX itself, but use cpu_has_{hle,rtm} for various
217 * system reasons, mostly errata detection, so the meaning is more
218 * useful as "TSX infrastructure available", as opposed to "features
219 * advertised and working".
220 *
221 * Force the features to be visible in Xen's view if we see any of the
222 * infrastructure capable of hiding them.
223 */
224 if ( cpu_has_tsx_ctrl || cpu_has_tsx_force_abort )
225 {
226 setup_force_cpu_cap(X86_FEATURE_HLE);
227 setup_force_cpu_cap(X86_FEATURE_RTM);
228 }
229 }
230 done_probe:
231
232 /*
233 * Note: MSR_TSX_CTRL is enumerated on TSX-enabled MDS_NO and later parts.
234 * MSR_TSX_FORCE_ABORT is enumerated on TSX-enabled pre-MDS_NO Skylake
235 * parts only. The two features are on a disjoint set of CPUs, and not
236 * offered to guests by hypervisors.
237 */
238 if ( cpu_has_tsx_ctrl )
239 {
240 /*
241 * On a TAA-vulnerable part with at least the November 2019 microcode,
242 * or newer part with TAA fixed.
243 *
244 * Notes:
245 * - With the February 2022 microcode, if SGX has caused TSX to be
246 * locked off, opt_tsx is overridden to 0. TSX_CTRL.RTM_DISABLE is
247 * an ignored bit, but we write it such that it matches the
248 * behaviour enforced by microcode.
249 * - Otherwise, if SGX isn't enabled and TSX is available to be
250 * controlled, we have or will set MSR_MCU_OPT_CTRL.RTM_ALLOW to
251 * let TSX_CTRL.RTM_DISABLE be usable.
252 */
253 uint32_t hi, lo;
254
255 rdmsr(MSR_TSX_CTRL, lo, hi);
256
257 /* Check bottom bit only. Higher bits are various sentinels. */
258 rtm_disabled = !(opt_tsx & 1);
259
260 lo &= ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR);
261 if ( rtm_disabled )
262 lo |= TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR;
263
264 wrmsr(MSR_TSX_CTRL, lo, hi);
265 }
266 else if ( cpu_has_tsx_force_abort )
267 {
268 /*
269 * On an early TSX-enable Skylake part subject to the memory ordering
270 * erratum, with at least the March 2019 microcode.
271 */
272 uint32_t hi, lo;
273
274 rdmsr(MSR_TSX_FORCE_ABORT, lo, hi);
275
276 /* Check bottom bit only. Higher bits are various sentinels. */
277 rtm_disabled = !(opt_tsx & 1);
278
279 lo &= ~(TSX_FORCE_ABORT_RTM | TSX_CPUID_CLEAR | TSX_ENABLE_RTM);
280
281 if ( cpu_has_rtm_always_abort )
282 {
283 /*
284 * June 2021 microcode, on a client part with TSX de-featured:
285 * - There are no mitigations for the TSX memory ordering errata.
286 * - Performance counter 3 works. (I.e. it isn't being used by
287 * microcode to work around the memory ordering errata.)
288 * - TSX_FORCE_ABORT.FORCE_ABORT_RTM is fixed read1/write-discard.
289 * - TSX_FORCE_ABORT.TSX_CPUID_CLEAR can be used to hide the
290 * HLE/RTM CPUID bits.
291 * - TSX_FORCE_ABORT.ENABLE_RTM may be used to opt in to
292 * re-enabling RTM, at the users own risk.
293 */
294 lo |= rtm_disabled ? TSX_CPUID_CLEAR : TSX_ENABLE_RTM;
295 }
296 else
297 {
298 /*
299 * Either a server part where TSX isn't de-featured, or pre-June
300 * 2021 microcode:
301 * - By default, the TSX memory ordering errata is worked around
302 * in microcode at the cost of Performance Counter 3.
303 * - "Working TSX" vs "Working PCR3" can be selected by way of
304 * setting TSX_FORCE_ABORT.FORCE_ABORT_RTM.
305 */
306 if ( rtm_disabled )
307 lo |= TSX_FORCE_ABORT_RTM;
308 }
309
310 wrmsr(MSR_TSX_FORCE_ABORT, lo, hi);
311 }
312 else if ( opt_tsx >= 0 )
313 printk_once(XENLOG_WARNING
314 "TSX controls not available - Ignoring tsx= setting\n");
315 }
316
317 /*
318 * Local variables:
319 * mode: C
320 * c-file-style: "BSD"
321 * c-basic-offset: 4
322 * tab-width: 4
323 * indent-tabs-mode: nil
324 * End:
325 */
326