1 #include <xen/init.h>
2 #include <xen/param.h>
3 
4 #include <asm/microcode.h>
5 #include <asm/msr.h>
6 #include <asm/processor.h>
7 
8 /*
9  * Valid values:
10  *   1 => Explicit tsx=1
11  *   0 => Explicit tsx=0
12  *  -1 => Default, altered to 0/1 (if unspecified) by:
13  *                 - TAA heuristics/settings for speculative safety
14  *                 - "TSX vs PCR3" select for TSX memory ordering safety
15  *  -2 => Implicit tsx=0 (from RTM_ALWAYS_ABORT vs RTM mismatch)
16  *  -3 => Implicit tsx=1 (feed-through from spec-ctrl=0)
17  *
18  * This is arranged such that the bottom bit encodes whether TSX is actually
19  * disabled, while identifying various explicit (>=0) and implicit (<0)
20  * conditions.
21  *
22  * This option only has any effect on systems presenting a mechanism of
23  * controlling TSX behaviour, and where TSX isn't force-disabled by firmware.
24  */
25 int8_t __read_mostly opt_tsx = -1;
26 bool __read_mostly rtm_disabled;
27 
parse_tsx(const char * s)28 static int __init cf_check parse_tsx(const char *s)
29 {
30     int rc = 0, val = parse_bool(s, NULL);
31 
32     if ( val >= 0 )
33         opt_tsx = val;
34     else
35         rc = -EINVAL;
36 
37     return rc;
38 }
39 custom_param("tsx", parse_tsx);
40 
tsx_init(void)41 void tsx_init(void)
42 {
43     static bool __read_mostly once;
44 
45     /*
46      * This function is first called between microcode being loaded, and
47      * CPUID being scanned generally. early_cpu_init() has already prepared
48      * the feature bits needed here. And early_microcode_init() has ensured
49      * they are not stale after the microcode update.
50      */
51     if ( unlikely(!once) )
52     {
53         bool has_rtm_always_abort;
54 
55         once = true;
56 
57         has_rtm_always_abort = cpu_has_rtm_always_abort;
58 
59         if ( cpu_has_tsx_ctrl && cpu_has_srbds_ctrl )
60         {
61             /*
62              * On a TAA-vulnerable or later part with at least the May 2020
63              * microcode mitigating SRBDS.
64              */
65             uint64_t val;
66 
67             rdmsrl(MSR_MCU_OPT_CTRL, val);
68 
69             /*
70              * Probe for the February 2022 microcode which de-features TSX on
71              * TAA-vulnerable client parts - WHL-R/CFL-R.
72              *
73              * RTM_ALWAYS_ABORT (read above) enumerates the new functionality,
74              * but is read as zero if MCU_OPT_CTRL.RTM_ALLOW has been set
75              * before we run.  Undo this.
76              */
77             if ( val & MCU_OPT_CTRL_RTM_ALLOW )
78                 has_rtm_always_abort = true;
79 
80             if ( has_rtm_always_abort )
81             {
82                 if ( val & MCU_OPT_CTRL_RTM_LOCKED )
83                 {
84                     /*
85                      * If RTM_LOCKED is set, TSX is disabled because SGX is
86                      * enabled, and there is nothing we can do.  Override with
87                      * tsx=0 so all other logic takes sensible actions.
88                      */
89                     printk(XENLOG_WARNING "TSX locked by firmware - disabling\n");
90                     opt_tsx = 0;
91                 }
92                 else
93                 {
94                     /*
95                      * Otherwise, set RTM_ALLOW.  Not because we necessarily
96                      * intend to enable RTM, but it prevents
97                      * MSR_TSX_CTRL.RTM_DISABLE from being ignored, thus
98                      * allowing the rest of the TSX selection logic to work as
99                      * before.
100                      */
101                     val |= MCU_OPT_CTRL_RTM_ALLOW;
102                 }
103 
104                 set_in_mcu_opt_ctrl(
105                     MCU_OPT_CTRL_RTM_LOCKED | MCU_OPT_CTRL_RTM_ALLOW, val);
106 
107                 /*
108                  * If no explicit tsx= option is provided, pick a default.
109                  *
110                  * With RTM_ALWAYS_ABORT, the default ucode behaviour is to
111                  * disable, so match that.  This does not override explicit user
112                  * choices, or implicit choices as a side effect of spec-ctrl=0.
113                  */
114                 if ( opt_tsx == -1 )
115                     opt_tsx = 0;
116             }
117         }
118 
119         if ( cpu_has_tsx_force_abort )
120         {
121             uint64_t val;
122 
123             /*
124              * On an early TSX-enabled Skylake part subject to the memory
125              * ordering erratum, with at least the March 2019 microcode.
126              */
127 
128             rdmsrl(MSR_TSX_FORCE_ABORT, val);
129 
130             /*
131              * At the time of writing (April 2024), it was discovered that
132              * some parts (e.g. CoffeeLake 8th Gen, 06-9e-0a, ucode 0xf6)
133              * advertise RTM_ALWAYS_ABORT, but XBEGIN instructions #UD.  Other
134              * similar parts (e.g. KabyLake Xeon-E3, 06-9e-09, ucode 0xf8)
135              * operate as expected.
136              *
137              * In this case:
138              *  - RTM_ALWAYS_ABORT and MSR_TSX_FORCE_ABORT are enumerated.
139              *  - XBEGIN instructions genuinely #UD.
140              *  - MSR_TSX_FORCE_ABORT appears to be write-discard and fails to
141              *    hold its value.
142              *  - HLE and RTM are not enumerated, despite
143              *    MSR_TSX_FORCE_ABORT.TSX_CPUID_CLEAR being clear.
144              *
145              * Spot RTM being unavailable without CLEAR_CPUID being set, and
146              * treat it as if no TSX is available at all.  This will prevent
147              * Xen from thinking it's safe to offer HLE/RTM to VMs.
148              */
149             if ( val == 0 && cpu_has_rtm_always_abort && !cpu_has_rtm )
150             {
151                 printk(XENLOG_ERR
152                        "FIRMWARE BUG: CPU %02x-%02x-%02x, ucode 0x%08x: RTM_ALWAYS_ABORT vs RTM mismatch\n",
153                        boot_cpu_data.x86, boot_cpu_data.x86_model,
154                        boot_cpu_data.x86_mask, this_cpu(cpu_sig).rev);
155 
156                 setup_clear_cpu_cap(X86_FEATURE_RTM_ALWAYS_ABORT);
157                 setup_clear_cpu_cap(X86_FEATURE_TSX_FORCE_ABORT);
158 
159                 if ( opt_tsx < 0 )
160                     opt_tsx = -2;
161 
162                 goto done_probe;
163             }
164 
165             /*
166              * Probe for the June 2021 microcode which de-features TSX on
167              * client parts.  (Note - this is a subset of parts impacted by
168              * the memory ordering errata.)
169              *
170              * RTM_ALWAYS_ABORT enumerates the new functionality, but is also
171              * read as zero if TSX_FORCE_ABORT.ENABLE_RTM has been set before
172              * we run.
173              */
174             if ( val & TSX_ENABLE_RTM )
175                 has_rtm_always_abort = true;
176 
177             /*
178              * If no explicit tsx= option is provided, pick a default.
179              *
180              * This deliberately overrides the implicit opt_tsx=-3 from
181              * `spec-ctrl=0` because:
182              * - parse_spec_ctrl() ran before any CPU details where know.
183              * - We now know we're running on a CPU not affected by TAA (as
184              *   TSX_FORCE_ABORT is enumerated).
185              * - When RTM_ALWAYS_ABORT is enumerated, TSX malfunctions, so we
186              *   only ever want it enabled by explicit user choice.
187              *
188              * Without RTM_ALWAYS_ABORT, leave TSX active.  In particular,
189              * this includes SKX where TSX is still supported.
190              *
191              * With RTM_ALWAYS_ABORT, disable TSX.
192              */
193             if ( opt_tsx < 0 )
194                 opt_tsx = !has_rtm_always_abort;
195         }
196 
197         /*
198          * Always force RTM_ALWAYS_ABORT, even if it currently visible.  If
199          * the user explicitly opts to enable TSX, we'll set the appropriate
200          * RTM_ENABLE bit and cause RTM_ALWAYS_ABORT to be hidden from the
201          * general CPUID scan later.
202          */
203         if ( has_rtm_always_abort )
204             setup_force_cpu_cap(X86_FEATURE_RTM_ALWAYS_ABORT);
205 
206         /*
207          * The TSX features (HLE/RTM) are handled specially.  They both
208          * enumerate features but, on certain parts, have mechanisms to be
209          * hidden without disrupting running software.
210          *
211          * At the moment, we're running in an unknown context (WRT hiding -
212          * particularly if another fully fledged kernel ran before us) and
213          * depending on user settings, may elect to continue hiding them from
214          * native CPUID instructions.
215          *
216          * Xen doesn't use TSX itself, but use cpu_has_{hle,rtm} for various
217          * system reasons, mostly errata detection, so the meaning is more
218          * useful as "TSX infrastructure available", as opposed to "features
219          * advertised and working".
220          *
221          * Force the features to be visible in Xen's view if we see any of the
222          * infrastructure capable of hiding them.
223          */
224         if ( cpu_has_tsx_ctrl || cpu_has_tsx_force_abort )
225         {
226             setup_force_cpu_cap(X86_FEATURE_HLE);
227             setup_force_cpu_cap(X86_FEATURE_RTM);
228         }
229     }
230  done_probe:
231 
232     /*
233      * Note: MSR_TSX_CTRL is enumerated on TSX-enabled MDS_NO and later parts.
234      * MSR_TSX_FORCE_ABORT is enumerated on TSX-enabled pre-MDS_NO Skylake
235      * parts only.  The two features are on a disjoint set of CPUs, and not
236      * offered to guests by hypervisors.
237      */
238     if ( cpu_has_tsx_ctrl )
239     {
240         /*
241          * On a TAA-vulnerable part with at least the November 2019 microcode,
242          * or newer part with TAA fixed.
243          *
244          * Notes:
245          *  - With the February 2022 microcode, if SGX has caused TSX to be
246          *    locked off, opt_tsx is overridden to 0.  TSX_CTRL.RTM_DISABLE is
247          *    an ignored bit, but we write it such that it matches the
248          *    behaviour enforced by microcode.
249          *  - Otherwise, if SGX isn't enabled and TSX is available to be
250          *    controlled, we have or will set MSR_MCU_OPT_CTRL.RTM_ALLOW to
251          *    let TSX_CTRL.RTM_DISABLE be usable.
252          */
253         uint32_t hi, lo;
254 
255         rdmsr(MSR_TSX_CTRL, lo, hi);
256 
257         /* Check bottom bit only.  Higher bits are various sentinels. */
258         rtm_disabled = !(opt_tsx & 1);
259 
260         lo &= ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR);
261         if ( rtm_disabled )
262             lo |= TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR;
263 
264         wrmsr(MSR_TSX_CTRL, lo, hi);
265     }
266     else if ( cpu_has_tsx_force_abort )
267     {
268         /*
269          * On an early TSX-enable Skylake part subject to the memory ordering
270          * erratum, with at least the March 2019 microcode.
271          */
272         uint32_t hi, lo;
273 
274         rdmsr(MSR_TSX_FORCE_ABORT, lo, hi);
275 
276         /* Check bottom bit only.  Higher bits are various sentinels. */
277         rtm_disabled = !(opt_tsx & 1);
278 
279         lo &= ~(TSX_FORCE_ABORT_RTM | TSX_CPUID_CLEAR | TSX_ENABLE_RTM);
280 
281         if ( cpu_has_rtm_always_abort )
282         {
283             /*
284              * June 2021 microcode, on a client part with TSX de-featured:
285              *  - There are no mitigations for the TSX memory ordering errata.
286              *  - Performance counter 3 works.  (I.e. it isn't being used by
287              *    microcode to work around the memory ordering errata.)
288              *  - TSX_FORCE_ABORT.FORCE_ABORT_RTM is fixed read1/write-discard.
289              *  - TSX_FORCE_ABORT.TSX_CPUID_CLEAR can be used to hide the
290              *    HLE/RTM CPUID bits.
291              *  - TSX_FORCE_ABORT.ENABLE_RTM may be used to opt in to
292              *    re-enabling RTM, at the users own risk.
293              */
294             lo |= rtm_disabled ? TSX_CPUID_CLEAR : TSX_ENABLE_RTM;
295         }
296         else
297         {
298             /*
299              * Either a server part where TSX isn't de-featured, or pre-June
300              * 2021 microcode:
301              *  - By default, the TSX memory ordering errata is worked around
302              *    in microcode at the cost of Performance Counter 3.
303              *  - "Working TSX" vs "Working PCR3" can be selected by way of
304              *    setting TSX_FORCE_ABORT.FORCE_ABORT_RTM.
305              */
306             if ( rtm_disabled )
307                 lo |= TSX_FORCE_ABORT_RTM;
308         }
309 
310         wrmsr(MSR_TSX_FORCE_ABORT, lo, hi);
311     }
312     else if ( opt_tsx >= 0 )
313         printk_once(XENLOG_WARNING
314                     "TSX controls not available - Ignoring tsx= setting\n");
315 }
316 
317 /*
318  * Local variables:
319  * mode: C
320  * c-file-style: "BSD"
321  * c-basic-offset: 4
322  * tab-width: 4
323  * indent-tabs-mode: nil
324  * End:
325  */
326