1 #include <xen/init.h>
2 #include <xen/param.h>
3 #include <asm/microcode.h>
4 #include <asm/msr.h>
5 
6 /*
7  * Valid values:
8  *   1 => Explicit tsx=1
9  *   0 => Explicit tsx=0
10  *  -1 => Default, altered to 0/1 (if unspecified) by:
11  *                 - TAA heuristics/settings for speculative safety
12  *                 - "TSX vs PCR3" select for TSX memory ordering safety
13  *  -2 => Implicit tsx=0 (from RTM_ALWAYS_ABORT vs RTM mismatch)
14  *  -3 => Implicit tsx=1 (feed-through from spec-ctrl=0)
15  *
16  * This is arranged such that the bottom bit encodes whether TSX is actually
17  * disabled, while identifying various explicit (>=0) and implicit (<0)
18  * conditions.
19  *
20  * This option only has any effect on systems presenting a mechanism of
21  * controlling TSX behaviour, and where TSX isn't force-disabled by firmware.
22  */
23 int8_t __read_mostly opt_tsx = -1;
24 bool __read_mostly rtm_disabled;
25 
parse_tsx(const char * s)26 static int __init cf_check parse_tsx(const char *s)
27 {
28     int rc = 0, val = parse_bool(s, NULL);
29 
30     if ( val >= 0 )
31         opt_tsx = val;
32     else
33         rc = -EINVAL;
34 
35     return rc;
36 }
37 custom_param("tsx", parse_tsx);
38 
tsx_init(void)39 void tsx_init(void)
40 {
41     static bool __read_mostly once;
42 
43     /*
44      * This function is first called between microcode being loaded, and
45      * CPUID being scanned generally. early_cpu_init() has already prepared
46      * the feature bits needed here. And early_microcode_init() has ensured
47      * they are not stale after the microcode update.
48      */
49     if ( unlikely(!once) )
50     {
51         bool has_rtm_always_abort;
52 
53         once = true;
54 
55         has_rtm_always_abort = cpu_has_rtm_always_abort;
56 
57         if ( cpu_has_tsx_ctrl && cpu_has_srbds_ctrl )
58         {
59             /*
60              * On a TAA-vulnerable or later part with at least the May 2020
61              * microcode mitigating SRBDS.
62              */
63             uint64_t val;
64 
65             rdmsrl(MSR_MCU_OPT_CTRL, val);
66 
67             /*
68              * Probe for the February 2022 microcode which de-features TSX on
69              * TAA-vulnerable client parts - WHL-R/CFL-R.
70              *
71              * RTM_ALWAYS_ABORT (read above) enumerates the new functionality,
72              * but is read as zero if MCU_OPT_CTRL.RTM_ALLOW has been set
73              * before we run.  Undo this.
74              */
75             if ( val & MCU_OPT_CTRL_RTM_ALLOW )
76                 has_rtm_always_abort = true;
77 
78             if ( has_rtm_always_abort )
79             {
80                 if ( val & MCU_OPT_CTRL_RTM_LOCKED )
81                 {
82                     /*
83                      * If RTM_LOCKED is set, TSX is disabled because SGX is
84                      * enabled, and there is nothing we can do.  Override with
85                      * tsx=0 so all other logic takes sensible actions.
86                      */
87                     printk(XENLOG_WARNING "TSX locked by firmware - disabling\n");
88                     opt_tsx = 0;
89                 }
90                 else
91                 {
92                     /*
93                      * Otherwise, set RTM_ALLOW.  Not because we necessarily
94                      * intend to enable RTM, but it prevents
95                      * MSR_TSX_CTRL.RTM_DISABLE from being ignored, thus
96                      * allowing the rest of the TSX selection logic to work as
97                      * before.
98                      */
99                     val |= MCU_OPT_CTRL_RTM_ALLOW;
100                 }
101 
102                 set_in_mcu_opt_ctrl(
103                     MCU_OPT_CTRL_RTM_LOCKED | MCU_OPT_CTRL_RTM_ALLOW, val);
104 
105                 /*
106                  * If no explicit tsx= option is provided, pick a default.
107                  *
108                  * With RTM_ALWAYS_ABORT, the default ucode behaviour is to
109                  * disable, so match that.  This does not override explicit user
110                  * choices, or implicit choices as a side effect of spec-ctrl=0.
111                  */
112                 if ( opt_tsx == -1 )
113                     opt_tsx = 0;
114             }
115         }
116 
117         if ( cpu_has_tsx_force_abort )
118         {
119             uint64_t val;
120 
121             /*
122              * On an early TSX-enabled Skylake part subject to the memory
123              * ordering erratum, with at least the March 2019 microcode.
124              */
125 
126             rdmsrl(MSR_TSX_FORCE_ABORT, val);
127 
128             /*
129              * At the time of writing (April 2024), it was discovered that
130              * some parts (e.g. CoffeeLake 8th Gen, 06-9e-0a, ucode 0xf6)
131              * advertise RTM_ALWAYS_ABORT, but XBEGIN instructions #UD.  Other
132              * similar parts (e.g. KabyLake Xeon-E3, 06-9e-09, ucode 0xf8)
133              * operate as expected.
134              *
135              * In this case:
136              *  - RTM_ALWAYS_ABORT and MSR_TSX_FORCE_ABORT are enumerated.
137              *  - XBEGIN instructions genuinely #UD.
138              *  - MSR_TSX_FORCE_ABORT appears to be write-discard and fails to
139              *    hold its value.
140              *  - HLE and RTM are not enumerated, despite
141              *    MSR_TSX_FORCE_ABORT.TSX_CPUID_CLEAR being clear.
142              *
143              * Spot RTM being unavailable without CLEAR_CPUID being set, and
144              * treat it as if no TSX is available at all.  This will prevent
145              * Xen from thinking it's safe to offer HLE/RTM to VMs.
146              */
147             if ( val == 0 && cpu_has_rtm_always_abort && !cpu_has_rtm )
148             {
149                 printk(XENLOG_ERR
150                        "FIRMWARE BUG: CPU %02x-%02x-%02x, ucode 0x%08x: RTM_ALWAYS_ABORT vs RTM mismatch\n",
151                        boot_cpu_data.x86, boot_cpu_data.x86_model,
152                        boot_cpu_data.x86_mask, this_cpu(cpu_sig).rev);
153 
154                 setup_clear_cpu_cap(X86_FEATURE_RTM_ALWAYS_ABORT);
155                 setup_clear_cpu_cap(X86_FEATURE_TSX_FORCE_ABORT);
156 
157                 if ( opt_tsx < 0 )
158                     opt_tsx = -2;
159 
160                 goto done_probe;
161             }
162 
163             /*
164              * Probe for the June 2021 microcode which de-features TSX on
165              * client parts.  (Note - this is a subset of parts impacted by
166              * the memory ordering errata.)
167              *
168              * RTM_ALWAYS_ABORT enumerates the new functionality, but is also
169              * read as zero if TSX_FORCE_ABORT.ENABLE_RTM has been set before
170              * we run.
171              */
172             if ( val & TSX_ENABLE_RTM )
173                 has_rtm_always_abort = true;
174 
175             /*
176              * If no explicit tsx= option is provided, pick a default.
177              *
178              * This deliberately overrides the implicit opt_tsx=-3 from
179              * `spec-ctrl=0` because:
180              * - parse_spec_ctrl() ran before any CPU details where know.
181              * - We now know we're running on a CPU not affected by TAA (as
182              *   TSX_FORCE_ABORT is enumerated).
183              * - When RTM_ALWAYS_ABORT is enumerated, TSX malfunctions, so we
184              *   only ever want it enabled by explicit user choice.
185              *
186              * Without RTM_ALWAYS_ABORT, leave TSX active.  In particular,
187              * this includes SKX where TSX is still supported.
188              *
189              * With RTM_ALWAYS_ABORT, disable TSX.
190              */
191             if ( opt_tsx < 0 )
192                 opt_tsx = !has_rtm_always_abort;
193         }
194 
195         /*
196          * Always force RTM_ALWAYS_ABORT, even if it currently visible.  If
197          * the user explicitly opts to enable TSX, we'll set the appropriate
198          * RTM_ENABLE bit and cause RTM_ALWAYS_ABORT to be hidden from the
199          * general CPUID scan later.
200          */
201         if ( has_rtm_always_abort )
202             setup_force_cpu_cap(X86_FEATURE_RTM_ALWAYS_ABORT);
203 
204         /*
205          * The TSX features (HLE/RTM) are handled specially.  They both
206          * enumerate features but, on certain parts, have mechanisms to be
207          * hidden without disrupting running software.
208          *
209          * At the moment, we're running in an unknown context (WRT hiding -
210          * particularly if another fully fledged kernel ran before us) and
211          * depending on user settings, may elect to continue hiding them from
212          * native CPUID instructions.
213          *
214          * Xen doesn't use TSX itself, but use cpu_has_{hle,rtm} for various
215          * system reasons, mostly errata detection, so the meaning is more
216          * useful as "TSX infrastructure available", as opposed to "features
217          * advertised and working".
218          *
219          * Force the features to be visible in Xen's view if we see any of the
220          * infrastructure capable of hiding them.
221          */
222         if ( cpu_has_tsx_ctrl || cpu_has_tsx_force_abort )
223         {
224             setup_force_cpu_cap(X86_FEATURE_HLE);
225             setup_force_cpu_cap(X86_FEATURE_RTM);
226         }
227     }
228  done_probe:
229 
230     /*
231      * Note: MSR_TSX_CTRL is enumerated on TSX-enabled MDS_NO and later parts.
232      * MSR_TSX_FORCE_ABORT is enumerated on TSX-enabled pre-MDS_NO Skylake
233      * parts only.  The two features are on a disjoint set of CPUs, and not
234      * offered to guests by hypervisors.
235      */
236     if ( cpu_has_tsx_ctrl )
237     {
238         /*
239          * On a TAA-vulnerable part with at least the November 2019 microcode,
240          * or newer part with TAA fixed.
241          *
242          * Notes:
243          *  - With the February 2022 microcode, if SGX has caused TSX to be
244          *    locked off, opt_tsx is overridden to 0.  TSX_CTRL.RTM_DISABLE is
245          *    an ignored bit, but we write it such that it matches the
246          *    behaviour enforced by microcode.
247          *  - Otherwise, if SGX isn't enabled and TSX is available to be
248          *    controlled, we have or will set MSR_MCU_OPT_CTRL.RTM_ALLOW to
249          *    let TSX_CTRL.RTM_DISABLE be usable.
250          */
251         uint32_t hi, lo;
252 
253         rdmsr(MSR_TSX_CTRL, lo, hi);
254 
255         /* Check bottom bit only.  Higher bits are various sentinels. */
256         rtm_disabled = !(opt_tsx & 1);
257 
258         lo &= ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR);
259         if ( rtm_disabled )
260             lo |= TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR;
261 
262         wrmsr(MSR_TSX_CTRL, lo, hi);
263     }
264     else if ( cpu_has_tsx_force_abort )
265     {
266         /*
267          * On an early TSX-enable Skylake part subject to the memory ordering
268          * erratum, with at least the March 2019 microcode.
269          */
270         uint32_t hi, lo;
271 
272         rdmsr(MSR_TSX_FORCE_ABORT, lo, hi);
273 
274         /* Check bottom bit only.  Higher bits are various sentinels. */
275         rtm_disabled = !(opt_tsx & 1);
276 
277         lo &= ~(TSX_FORCE_ABORT_RTM | TSX_CPUID_CLEAR | TSX_ENABLE_RTM);
278 
279         if ( cpu_has_rtm_always_abort )
280         {
281             /*
282              * June 2021 microcode, on a client part with TSX de-featured:
283              *  - There are no mitigations for the TSX memory ordering errata.
284              *  - Performance counter 3 works.  (I.e. it isn't being used by
285              *    microcode to work around the memory ordering errata.)
286              *  - TSX_FORCE_ABORT.FORCE_ABORT_RTM is fixed read1/write-discard.
287              *  - TSX_FORCE_ABORT.TSX_CPUID_CLEAR can be used to hide the
288              *    HLE/RTM CPUID bits.
289              *  - TSX_FORCE_ABORT.ENABLE_RTM may be used to opt in to
290              *    re-enabling RTM, at the users own risk.
291              */
292             lo |= rtm_disabled ? TSX_CPUID_CLEAR : TSX_ENABLE_RTM;
293         }
294         else
295         {
296             /*
297              * Either a server part where TSX isn't de-featured, or pre-June
298              * 2021 microcode:
299              *  - By default, the TSX memory ordering errata is worked around
300              *    in microcode at the cost of Performance Counter 3.
301              *  - "Working TSX" vs "Working PCR3" can be selected by way of
302              *    setting TSX_FORCE_ABORT.FORCE_ABORT_RTM.
303              */
304             if ( rtm_disabled )
305                 lo |= TSX_FORCE_ABORT_RTM;
306         }
307 
308         wrmsr(MSR_TSX_FORCE_ABORT, lo, hi);
309     }
310     else if ( opt_tsx >= 0 )
311         printk_once(XENLOG_WARNING
312                     "TSX controls not available - Ignoring tsx= setting\n");
313 }
314 
315 /*
316  * Local variables:
317  * mode: C
318  * c-file-style: "BSD"
319  * c-basic-offset: 4
320  * tab-width: 4
321  * indent-tabs-mode: nil
322  * End:
323  */
324