1 #include <xen/init.h>
2 #include <xen/param.h>
3 #include <asm/microcode.h>
4 #include <asm/msr.h>
5
6 /*
7 * Valid values:
8 * 1 => Explicit tsx=1
9 * 0 => Explicit tsx=0
10 * -1 => Default, altered to 0/1 (if unspecified) by:
11 * - TAA heuristics/settings for speculative safety
12 * - "TSX vs PCR3" select for TSX memory ordering safety
13 * -2 => Implicit tsx=0 (from RTM_ALWAYS_ABORT vs RTM mismatch)
14 * -3 => Implicit tsx=1 (feed-through from spec-ctrl=0)
15 *
16 * This is arranged such that the bottom bit encodes whether TSX is actually
17 * disabled, while identifying various explicit (>=0) and implicit (<0)
18 * conditions.
19 *
20 * This option only has any effect on systems presenting a mechanism of
21 * controlling TSX behaviour, and where TSX isn't force-disabled by firmware.
22 */
23 int8_t __read_mostly opt_tsx = -1;
24 bool __read_mostly rtm_disabled;
25
parse_tsx(const char * s)26 static int __init cf_check parse_tsx(const char *s)
27 {
28 int rc = 0, val = parse_bool(s, NULL);
29
30 if ( val >= 0 )
31 opt_tsx = val;
32 else
33 rc = -EINVAL;
34
35 return rc;
36 }
37 custom_param("tsx", parse_tsx);
38
tsx_init(void)39 void tsx_init(void)
40 {
41 static bool __read_mostly once;
42
43 /*
44 * This function is first called between microcode being loaded, and
45 * CPUID being scanned generally. early_cpu_init() has already prepared
46 * the feature bits needed here. And early_microcode_init() has ensured
47 * they are not stale after the microcode update.
48 */
49 if ( unlikely(!once) )
50 {
51 bool has_rtm_always_abort;
52
53 once = true;
54
55 has_rtm_always_abort = cpu_has_rtm_always_abort;
56
57 if ( cpu_has_tsx_ctrl && cpu_has_srbds_ctrl )
58 {
59 /*
60 * On a TAA-vulnerable or later part with at least the May 2020
61 * microcode mitigating SRBDS.
62 */
63 uint64_t val;
64
65 rdmsrl(MSR_MCU_OPT_CTRL, val);
66
67 /*
68 * Probe for the February 2022 microcode which de-features TSX on
69 * TAA-vulnerable client parts - WHL-R/CFL-R.
70 *
71 * RTM_ALWAYS_ABORT (read above) enumerates the new functionality,
72 * but is read as zero if MCU_OPT_CTRL.RTM_ALLOW has been set
73 * before we run. Undo this.
74 */
75 if ( val & MCU_OPT_CTRL_RTM_ALLOW )
76 has_rtm_always_abort = true;
77
78 if ( has_rtm_always_abort )
79 {
80 if ( val & MCU_OPT_CTRL_RTM_LOCKED )
81 {
82 /*
83 * If RTM_LOCKED is set, TSX is disabled because SGX is
84 * enabled, and there is nothing we can do. Override with
85 * tsx=0 so all other logic takes sensible actions.
86 */
87 printk(XENLOG_WARNING "TSX locked by firmware - disabling\n");
88 opt_tsx = 0;
89 }
90 else
91 {
92 /*
93 * Otherwise, set RTM_ALLOW. Not because we necessarily
94 * intend to enable RTM, but it prevents
95 * MSR_TSX_CTRL.RTM_DISABLE from being ignored, thus
96 * allowing the rest of the TSX selection logic to work as
97 * before.
98 */
99 val |= MCU_OPT_CTRL_RTM_ALLOW;
100 }
101
102 set_in_mcu_opt_ctrl(
103 MCU_OPT_CTRL_RTM_LOCKED | MCU_OPT_CTRL_RTM_ALLOW, val);
104
105 /*
106 * If no explicit tsx= option is provided, pick a default.
107 *
108 * With RTM_ALWAYS_ABORT, the default ucode behaviour is to
109 * disable, so match that. This does not override explicit user
110 * choices, or implicit choices as a side effect of spec-ctrl=0.
111 */
112 if ( opt_tsx == -1 )
113 opt_tsx = 0;
114 }
115 }
116
117 if ( cpu_has_tsx_force_abort )
118 {
119 uint64_t val;
120
121 /*
122 * On an early TSX-enabled Skylake part subject to the memory
123 * ordering erratum, with at least the March 2019 microcode.
124 */
125
126 rdmsrl(MSR_TSX_FORCE_ABORT, val);
127
128 /*
129 * At the time of writing (April 2024), it was discovered that
130 * some parts (e.g. CoffeeLake 8th Gen, 06-9e-0a, ucode 0xf6)
131 * advertise RTM_ALWAYS_ABORT, but XBEGIN instructions #UD. Other
132 * similar parts (e.g. KabyLake Xeon-E3, 06-9e-09, ucode 0xf8)
133 * operate as expected.
134 *
135 * In this case:
136 * - RTM_ALWAYS_ABORT and MSR_TSX_FORCE_ABORT are enumerated.
137 * - XBEGIN instructions genuinely #UD.
138 * - MSR_TSX_FORCE_ABORT appears to be write-discard and fails to
139 * hold its value.
140 * - HLE and RTM are not enumerated, despite
141 * MSR_TSX_FORCE_ABORT.TSX_CPUID_CLEAR being clear.
142 *
143 * Spot RTM being unavailable without CLEAR_CPUID being set, and
144 * treat it as if no TSX is available at all. This will prevent
145 * Xen from thinking it's safe to offer HLE/RTM to VMs.
146 */
147 if ( val == 0 && cpu_has_rtm_always_abort && !cpu_has_rtm )
148 {
149 printk(XENLOG_ERR
150 "FIRMWARE BUG: CPU %02x-%02x-%02x, ucode 0x%08x: RTM_ALWAYS_ABORT vs RTM mismatch\n",
151 boot_cpu_data.x86, boot_cpu_data.x86_model,
152 boot_cpu_data.x86_mask, this_cpu(cpu_sig).rev);
153
154 setup_clear_cpu_cap(X86_FEATURE_RTM_ALWAYS_ABORT);
155 setup_clear_cpu_cap(X86_FEATURE_TSX_FORCE_ABORT);
156
157 if ( opt_tsx < 0 )
158 opt_tsx = -2;
159
160 goto done_probe;
161 }
162
163 /*
164 * Probe for the June 2021 microcode which de-features TSX on
165 * client parts. (Note - this is a subset of parts impacted by
166 * the memory ordering errata.)
167 *
168 * RTM_ALWAYS_ABORT enumerates the new functionality, but is also
169 * read as zero if TSX_FORCE_ABORT.ENABLE_RTM has been set before
170 * we run.
171 */
172 if ( val & TSX_ENABLE_RTM )
173 has_rtm_always_abort = true;
174
175 /*
176 * If no explicit tsx= option is provided, pick a default.
177 *
178 * This deliberately overrides the implicit opt_tsx=-3 from
179 * `spec-ctrl=0` because:
180 * - parse_spec_ctrl() ran before any CPU details where know.
181 * - We now know we're running on a CPU not affected by TAA (as
182 * TSX_FORCE_ABORT is enumerated).
183 * - When RTM_ALWAYS_ABORT is enumerated, TSX malfunctions, so we
184 * only ever want it enabled by explicit user choice.
185 *
186 * Without RTM_ALWAYS_ABORT, leave TSX active. In particular,
187 * this includes SKX where TSX is still supported.
188 *
189 * With RTM_ALWAYS_ABORT, disable TSX.
190 */
191 if ( opt_tsx < 0 )
192 opt_tsx = !has_rtm_always_abort;
193 }
194
195 /*
196 * Always force RTM_ALWAYS_ABORT, even if it currently visible. If
197 * the user explicitly opts to enable TSX, we'll set the appropriate
198 * RTM_ENABLE bit and cause RTM_ALWAYS_ABORT to be hidden from the
199 * general CPUID scan later.
200 */
201 if ( has_rtm_always_abort )
202 setup_force_cpu_cap(X86_FEATURE_RTM_ALWAYS_ABORT);
203
204 /*
205 * The TSX features (HLE/RTM) are handled specially. They both
206 * enumerate features but, on certain parts, have mechanisms to be
207 * hidden without disrupting running software.
208 *
209 * At the moment, we're running in an unknown context (WRT hiding -
210 * particularly if another fully fledged kernel ran before us) and
211 * depending on user settings, may elect to continue hiding them from
212 * native CPUID instructions.
213 *
214 * Xen doesn't use TSX itself, but use cpu_has_{hle,rtm} for various
215 * system reasons, mostly errata detection, so the meaning is more
216 * useful as "TSX infrastructure available", as opposed to "features
217 * advertised and working".
218 *
219 * Force the features to be visible in Xen's view if we see any of the
220 * infrastructure capable of hiding them.
221 */
222 if ( cpu_has_tsx_ctrl || cpu_has_tsx_force_abort )
223 {
224 setup_force_cpu_cap(X86_FEATURE_HLE);
225 setup_force_cpu_cap(X86_FEATURE_RTM);
226 }
227 }
228 done_probe:
229
230 /*
231 * Note: MSR_TSX_CTRL is enumerated on TSX-enabled MDS_NO and later parts.
232 * MSR_TSX_FORCE_ABORT is enumerated on TSX-enabled pre-MDS_NO Skylake
233 * parts only. The two features are on a disjoint set of CPUs, and not
234 * offered to guests by hypervisors.
235 */
236 if ( cpu_has_tsx_ctrl )
237 {
238 /*
239 * On a TAA-vulnerable part with at least the November 2019 microcode,
240 * or newer part with TAA fixed.
241 *
242 * Notes:
243 * - With the February 2022 microcode, if SGX has caused TSX to be
244 * locked off, opt_tsx is overridden to 0. TSX_CTRL.RTM_DISABLE is
245 * an ignored bit, but we write it such that it matches the
246 * behaviour enforced by microcode.
247 * - Otherwise, if SGX isn't enabled and TSX is available to be
248 * controlled, we have or will set MSR_MCU_OPT_CTRL.RTM_ALLOW to
249 * let TSX_CTRL.RTM_DISABLE be usable.
250 */
251 uint32_t hi, lo;
252
253 rdmsr(MSR_TSX_CTRL, lo, hi);
254
255 /* Check bottom bit only. Higher bits are various sentinels. */
256 rtm_disabled = !(opt_tsx & 1);
257
258 lo &= ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR);
259 if ( rtm_disabled )
260 lo |= TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR;
261
262 wrmsr(MSR_TSX_CTRL, lo, hi);
263 }
264 else if ( cpu_has_tsx_force_abort )
265 {
266 /*
267 * On an early TSX-enable Skylake part subject to the memory ordering
268 * erratum, with at least the March 2019 microcode.
269 */
270 uint32_t hi, lo;
271
272 rdmsr(MSR_TSX_FORCE_ABORT, lo, hi);
273
274 /* Check bottom bit only. Higher bits are various sentinels. */
275 rtm_disabled = !(opt_tsx & 1);
276
277 lo &= ~(TSX_FORCE_ABORT_RTM | TSX_CPUID_CLEAR | TSX_ENABLE_RTM);
278
279 if ( cpu_has_rtm_always_abort )
280 {
281 /*
282 * June 2021 microcode, on a client part with TSX de-featured:
283 * - There are no mitigations for the TSX memory ordering errata.
284 * - Performance counter 3 works. (I.e. it isn't being used by
285 * microcode to work around the memory ordering errata.)
286 * - TSX_FORCE_ABORT.FORCE_ABORT_RTM is fixed read1/write-discard.
287 * - TSX_FORCE_ABORT.TSX_CPUID_CLEAR can be used to hide the
288 * HLE/RTM CPUID bits.
289 * - TSX_FORCE_ABORT.ENABLE_RTM may be used to opt in to
290 * re-enabling RTM, at the users own risk.
291 */
292 lo |= rtm_disabled ? TSX_CPUID_CLEAR : TSX_ENABLE_RTM;
293 }
294 else
295 {
296 /*
297 * Either a server part where TSX isn't de-featured, or pre-June
298 * 2021 microcode:
299 * - By default, the TSX memory ordering errata is worked around
300 * in microcode at the cost of Performance Counter 3.
301 * - "Working TSX" vs "Working PCR3" can be selected by way of
302 * setting TSX_FORCE_ABORT.FORCE_ABORT_RTM.
303 */
304 if ( rtm_disabled )
305 lo |= TSX_FORCE_ABORT_RTM;
306 }
307
308 wrmsr(MSR_TSX_FORCE_ABORT, lo, hi);
309 }
310 else if ( opt_tsx >= 0 )
311 printk_once(XENLOG_WARNING
312 "TSX controls not available - Ignoring tsx= setting\n");
313 }
314
315 /*
316 * Local variables:
317 * mode: C
318 * c-file-style: "BSD"
319 * c-basic-offset: 4
320 * tab-width: 4
321 * indent-tabs-mode: nil
322 * End:
323 */
324