1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /******************************************************************************
3  * include/asm-x86/spec_ctrl.h
4  *
5  * Copyright (c) 2017-2018 Citrix Systems Ltd.
6  */
7 
8 #ifndef __X86_SPEC_CTRL_ASM_H__
9 #define __X86_SPEC_CTRL_ASM_H__
10 
11 #ifdef __ASSEMBLY__
12 #include <asm/msr-index.h>
13 #include <asm/spec_ctrl.h>
14 
15 /*
16  * Saving and restoring MSR_SPEC_CTRL state is a little tricky.
17  *
18  * We want the guests choice of SPEC_CTRL while in guest context, and Xen's
19  * choice (set or clear, depending on the hardware) while running in Xen
20  * context.  Therefore, a simplistic algorithm is:
21  *
22  *  - Set/clear IBRS on entry to Xen
23  *  - Set the guests' choice on exit to guest
24  *  - Leave SPEC_CTRL unchanged on exit to xen
25  *
26  * There are two complicating factors:
27  *  1) HVM guests can have direct access to the MSR, so it can change
28  *     behind Xen's back.
29  *  2) An NMI or MCE can interrupt at any point, including early in the entry
30  *     path, or late in the exit path after restoring the guest value.  This
31  *     will corrupt the guest value.
32  *
33  * Factor 1 is dealt with:
34  *   - On VMX by using MSR load/save lists to have vmentry/exit atomically
35  *     load/save the guest value.  Xen's value is loaded in regular code, and
36  *     there is no need to use the shadow logic (below).
37  *   - On SVM by altering MSR_SPEC_CTRL inside the CLGI/STGI region.  This
38  *     makes the changes atomic with respect to NMIs/etc, so no need for
39  *     shadowing logic.
40  *
41  * Factor 2 is harder.  We maintain a shadow_spec_ctrl value, and a use_shadow
42  * boolean in the per cpu scf.  The synchronous use is:
43  *
44  *  1) Store guest value in shadow_spec_ctrl
45  *  2) Set the use_shadow boolean
46  *  3) Load guest value into MSR_SPEC_CTRL
47  *  4) Exit to guest
48  *  5) Entry from guest
49  *  6) Clear the use_shadow boolean
50  *  7) Load Xen's value into MSR_SPEC_CTRL
51  *
52  * The asynchronous use for interrupts/exceptions is:
53  *  -  Set/clear IBRS on entry to Xen
54  *  -  On exit to Xen, check use_shadow
55  *  -  If set, load shadow_spec_ctrl
56  *
57  * Therefore, an interrupt/exception which hits the synchronous path between
58  * steps 2 and 6 will restore the shadow value rather than leaving Xen's value
59  * loaded and corrupting the value used in guest context.
60  *
61  * Additionally, in some cases it is safe to skip writes to MSR_SPEC_CTRL when
62  * we don't require any of the side effects of an identical write.  Maintain a
63  * per-cpu last_spec_ctrl value for this purpose.
64  *
65  * The following ASM fragments implement this algorithm.  See their local
66  * comments for further details.
67  *  - SPEC_CTRL_ENTRY_FROM_PV
68  *  - SPEC_CTRL_ENTRY_FROM_INTR
69  *  - SPEC_CTRL_ENTRY_FROM_INTR_IST
70  *  - SPEC_CTRL_EXIT_TO_XEN
71  *  - SPEC_CTRL_EXIT_TO_PV
72  *
73  * Additionally, the following grep-fodder exists to find the HVM logic.
74  *  - SPEC_CTRL_ENTRY_FROM_{SVM,VMX}
75  *  - SPEC_CTRL_EXIT_TO_{SVM,VMX}
76  */
77 
78 .macro DO_COND_IBPB
79 /*
80  * Requires %rbx=SCF, %rdx=0
81  * Clobbers %rax, %rcx
82  *
83  * Conditionally issue IBPB if SCF_entry_ibpb is active.
84  */
85     testb  $SCF_entry_ibpb, %bl
86     jz     .L\@_skip
87 
88     mov     $MSR_PRED_CMD, %ecx
89     mov     $PRED_CMD_IBPB, %eax
90     wrmsr
91 
92 .L\@_skip:
93 .endm
94 
95 .macro DO_OVERWRITE_RSB tmp=rax, xu
96 /*
97  * Requires nothing
98  * Clobbers \tmp (%rax by default), %rcx
99  *
100  * xu is an optional parameter to add eXtra Uniqueness.  It is intended for
101  * passing %= in from an asm() block, in order to work around
102  * https://github.com/llvm/llvm-project/issues/60792 where Clang-IAS doesn't
103  * expand \@ uniquely.
104  *
105  * Requires 256 bytes of {,shadow}stack space, but %rsp/SSP has no net
106  * change. Based on Google's performance numbers, the loop is unrolled to 16
107  * iterations and two calls per iteration.
108  *
109  * The call filling the RSB needs a nonzero displacement, and int3 halts
110  * speculation.
111  *
112  * %rsp is preserved by using an extra GPR because a) we've got plenty spare,
113  * b) the two movs are shorter to encode than `add $32*8, %rsp`, and c) can be
114  * optimised with mov-elimination in modern cores.
115  */
116     mov $16, %ecx                   /* 16 iterations, two calls per loop */
117     mov %rsp, %\tmp                 /* Store the current %rsp */
118 
119 .L\@_fill_rsb_loop\xu:
120 
121     .irp n, 1, 2                    /* Unrolled twice. */
122     call .L\@_insert_rsb_entry\xu\n /* Create an RSB entry. */
123     int3                            /* Halt rogue speculation. */
124 
125 .L\@_insert_rsb_entry\xu\n:
126     .endr
127 
128     sub $1, %ecx
129     jnz .L\@_fill_rsb_loop\xu
130     mov %\tmp, %rsp                 /* Restore old %rsp */
131 
132 #ifdef CONFIG_XEN_SHSTK
133     mov $1, %ecx
134     rdsspd %ecx
135     cmp $1, %ecx
136     je .L\@_shstk_done\xu
137     mov $64, %ecx                   /* 64 * 4 bytes, given incsspd */
138     incsspd %ecx                    /* Restore old SSP */
139 .L\@_shstk_done\xu:
140 #endif
141 .endm
142 
143 /*
144  * Helper to improve the readibility of stack dispacements with %rsp in
145  * unusual positions.  Both @field and @top_of_stack should be constants from
146  * the same object.  @top_of_stack should be where %rsp is currently pointing.
147  */
148 #define STK_REL(field, top_of_stk) ((field) - (top_of_stk))
149 
150 .macro SPEC_CTRL_COND_VERW \
151     scf=STK_REL(CPUINFO_scf,      CPUINFO_error_code), \
152     sel=STK_REL(CPUINFO_verw_sel, CPUINFO_error_code)
153 /*
154  * Requires \scf and \sel as %rsp-relative expressions
155  * Clobbers eflags
156  *
157  * VERW needs to run after guest GPRs have been restored, where only %rsp is
158  * good to use.  Default to expecting %rsp pointing at CPUINFO_error_code.
159  * Contexts where this is not true must provide an alternative \scf and \sel.
160  *
161  * Issue a VERW for its flushing side effect, if indicated.  This is a Spectre
162  * v1 gadget, but the IRET/VMEntry is serialising.
163  */
164     testb $SCF_verw, \scf(%rsp)
165     jz .L\@_verw_skip
166     verw \sel(%rsp)
167 .L\@_verw_skip:
168 .endm
169 
170 .macro DO_SPEC_CTRL_DIV
171 /*
172  * Requires nothing
173  * Clobbers %rax
174  *
175  * Issue a DIV for its flushing side effect (Zen1 uarch specific).  Any
176  * non-faulting DIV will do; a byte DIV has least latency, and doesn't clobber
177  * %rdx.
178  */
179     mov $1, %eax
180     div %al
181 .endm
182 
183 .macro DO_SPEC_CTRL_ENTRY maybexen:req
184 /*
185  * Requires %rsp=regs (also cpuinfo if !maybexen)
186  * Requires %r14=stack_end (if maybexen)
187  * Clobbers %rax, %rcx, %rdx
188  *
189  * PV guests can't update MSR_SPEC_CTRL behind Xen's back, so no need to read
190  * it back.  Entries from guest context need to clear SPEC_CTRL shadowing,
191  * while entries from Xen must leave shadowing in its current state.
192  */
193     mov $MSR_SPEC_CTRL, %ecx
194     xor %edx, %edx
195 
196     /*
197      * Clear SPEC_CTRL shadowing *before* loading Xen's value.  If entering
198      * from a possibly-xen context, %rsp doesn't necessarily alias the cpuinfo
199      * block so calculate the position directly.
200      */
201     .if \maybexen
202         xor %eax, %eax
203         /* Branchless `if ( !xen ) clear_shadowing` */
204         testb $3, UREGS_cs(%rsp)
205         setnz %al
206         not %eax
207         and %al, STACK_CPUINFO_FIELD(scf)(%r14)
208         mov STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax
209     .else
210         andb $~SCF_use_shadow, CPUINFO_scf(%rsp)
211         mov  CPUINFO_xen_spec_ctrl(%rsp), %eax
212     .endif
213 
214     wrmsr
215 .endm
216 
217 .macro DO_SPEC_CTRL_EXIT_TO_GUEST
218 /*
219  * Requires %eax=spec_ctrl, %rsp=regs/cpuinfo
220  * Clobbers %rcx, %rdx
221  *
222  * When returning to guest context, set up SPEC_CTRL shadowing and load the
223  * guest value.
224  */
225     /* Set up shadow value *before* enabling shadowing. */
226     mov %eax, CPUINFO_shadow_spec_ctrl(%rsp)
227 
228     /* Set SPEC_CTRL shadowing *before* loading the guest value. */
229     orb $SCF_use_shadow, CPUINFO_scf(%rsp)
230 
231     mov $MSR_SPEC_CTRL, %ecx
232     xor %edx, %edx
233     wrmsr
234 .endm
235 
236 /*
237  * Used after an entry from PV context: SYSCALL, SYSENTER, INT,
238  * etc.  There is always a guest speculation state in context.
239  */
240 .macro SPEC_CTRL_ENTRY_FROM_PV
241 /*
242  * Requires %rsp=regs/cpuinfo, %r14=stack_end, %rdx=0
243  * Clobbers %rax, %rbx, %rcx, %rdx
244  */
245     movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx
246 
247     /*
248      * For all safety notes, 32bit PV guest kernels run in Ring 1 and are
249      * therefore supervisor (== Xen) in the architecture.  As a result, most
250      * hardware isolation techniques do not work.
251      */
252 
253     /*
254      * IBPB is to mitigate BTC/SRSO on AMD/Hygon parts, in particular making
255      * type-confused RETs safe to use.  This is not needed on Zen5 and later
256      * parts when SRSO_U/S_NO is enumerated.
257      */
258     ALTERNATIVE "", DO_COND_IBPB, X86_FEATURE_IBPB_ENTRY_PV
259 
260     /*
261      * RSB stuffing is to prevent RET predictions following guest entries.
262      * This is not needed if SMEP is active and the RSB is full-width.
263      */
264     ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV
265 
266     /*
267      * Only used on Intel parts.  Restore Xen's MSR_SPEC_CTRL setting.  The
268      * guest can't change it's value behind Xen's back.  For Legacy IBRS, this
269      * flushes/inhibits indirect predictions and does not flush the RSB.  For
270      * eIBRS, this prevents CALLs/JMPs using predictions learnt at a lower
271      * predictor mode, and it flushes the RSB.
272      */
273     ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0),         \
274         X86_FEATURE_SC_MSR_PV
275 
276     /*
277      * Clear the BHB to mitigate BHI.  Used on eIBRS parts, and uses RETs
278      * itself so must be after we've perfomed all the RET-safety we can.
279      */
280     testb $SCF_entry_bhb, %bl
281     jz .L\@_skip_bhb
282     ALTERNATIVE_2 "",                                    \
283         "call clear_bhb_loops", X86_SPEC_BHB_LOOPS,      \
284         "call clear_bhb_tsx", X86_SPEC_BHB_TSX
285 .L\@_skip_bhb:
286 
287     ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_PV
288 .endm
289 
290 /*
291  * Used after an exception or maskable interrupt, hitting Xen or PV context.
292  * There will either be a guest speculation context, or a well-formed Xen
293  * speculation context, with the exception of one case.  IRET #GP handling may
294  * have a guest choice of MSR_SPEC_CTRL.
295  *
296  * Therefore, we can skip the flush/barrier-like protections when hitting Xen,
297  * but we must still run the mode-based protections.
298  */
299 .macro SPEC_CTRL_ENTRY_FROM_INTR
300 /*
301  * Requires %rsp=regs, %r14=stack_end, %rdx=0
302  * Clobbers %rax, %rbx, %rcx, %rdx
303  */
304     movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx
305 
306     /*
307      * All safety notes the same as SPEC_CTRL_ENTRY_FROM_PV, although there is
308      * a conditional jump skipping some actions when interrupting Xen.
309      *
310      * On Intel parts, the IRET #GP path ends up here with the guest's choice
311      * of MSR_SPEC_CTRL.
312      */
313 
314     testb $3, UREGS_cs(%rsp)
315     jz .L\@_skip
316 
317     ALTERNATIVE "", DO_COND_IBPB, X86_FEATURE_IBPB_ENTRY_PV
318 
319     ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV
320 
321 .L\@_skip:
322     ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1),         \
323         X86_FEATURE_SC_MSR_PV
324 
325     testb $SCF_entry_bhb, %bl
326     jz .L\@_skip_bhb
327     ALTERNATIVE_2 "",                                    \
328         "call clear_bhb_loops", X86_SPEC_BHB_LOOPS,      \
329         "call clear_bhb_tsx", X86_SPEC_BHB_TSX
330 .L\@_skip_bhb:
331 
332     ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_INTR
333 .endm
334 
335 /*
336  * Used when exiting from any entry context, back to PV context.  This
337  * includes from an IST entry which moved onto the primary stack.
338  */
339 .macro SPEC_CTRL_EXIT_TO_PV
340 /*
341  * Requires %rax=spec_ctrl, %rsp=regs/info
342  * Clobbers %rcx, %rdx
343  */
344     ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV
345 
346     ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
347 .endm
348 
349 /*
350  * Used after an IST entry hitting Xen or PV context.  Special care is needed,
351  * because when hitting Xen context, there may not be a well-formed
352  * speculation context.  (i.e. it can hit in the middle of
353  * SPEC_CTRL_{ENTRY,EXIT}_* regions.)
354  *
355  * An IST entry which hits PV context moves onto the primary stack and leaves
356  * via SPEC_CTRL_EXIT_TO_PV, *not* SPEC_CTRL_EXIT_TO_XEN.
357  */
358 .macro SPEC_CTRL_ENTRY_FROM_INTR_IST
359 /*
360  * Requires %rsp=regs, %r14=stack_end, %rdx=0
361  * Clobbers %rax, %rbx, %rcx, %rdx
362  *
363  * This is logical merge of:
364  *    DO_COND_IBPB
365  *    DO_OVERWRITE_RSB
366  *    DO_SPEC_CTRL_ENTRY maybexen=1
367  * but with conditionals rather than alternatives.
368  */
369     movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx
370 
371     /*
372      * For all safety notes, 32bit PV guest kernels run in Ring 1 and are
373      * therefore supervisor (== Xen) in the architecture.  As a result, most
374      * hardware isolation techniques do not work.
375      */
376 
377     /*
378      * IBPB is to mitigate BTC/SRSO on AMD/Hygon parts, in particular making
379      * type-confused RETs safe to use.  This is not needed on Zen5 and later
380      * parts when SRSO_U/S_NO is enumerated.  The SVM path takes care of
381      * Host/Guest interactions prior to clearing GIF, and it's not used on the
382      * VMX path.
383      */
384     test    $SCF_ist_ibpb, %bl
385     jz      .L\@_skip_ibpb
386 
387     mov     $MSR_PRED_CMD, %ecx
388     mov     $PRED_CMD_IBPB, %eax
389     wrmsr
390 
391 .L\@_skip_ibpb:
392 
393     /*
394      * RSB stuffing is to prevent RET predictions following guest entries.
395      * SCF_ist_rsb is active if either PV or HVM protections are needed.  The
396      * VMX path cannot guarantee to make the RSB safe ahead of taking an IST
397      * vector.
398      */
399     test $SCF_ist_rsb, %bl
400     jz .L\@_skip_rsb
401 
402     DO_OVERWRITE_RSB         /* Clobbers %rax/%rcx */
403 
404 .L\@_skip_rsb:
405 
406     /*
407      * Only used on Intel parts.  Restore Xen's MSR_SPEC_CTRL setting.  PV
408      * guests can't change their value behind Xen's back.  HVM guests have
409      * their value stored in the MSR load/save list.  For Legacy IBRS, this
410      * flushes/inhibits indirect predictions and does not flush the RSB.  For
411      * eIBRS, this prevents CALLs/JMPs using predictions learnt at a lower
412      * predictor mode, and it flushes the RSB.  On eIBRS parts that also
413      * suffer from PBRSB, the prior RSB stuffing suffices to make the RSB
414      * safe.
415      */
416     test $SCF_ist_sc_msr, %bl
417     jz .L\@_skip_msr_spec_ctrl
418 
419     xor %eax, %eax
420     testb $3, UREGS_cs(%rsp)
421     setnz %al
422     not %eax
423     and %al, STACK_CPUINFO_FIELD(scf)(%r14)
424 
425     /* Load Xen's intended value. */
426     mov $MSR_SPEC_CTRL, %ecx
427     mov STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax
428     wrmsr
429 
430 .L\@_skip_msr_spec_ctrl:
431 
432     /*
433      * Clear the BHB to mitigate BHI.  Used on eIBRS parts, and uses RETs
434      * itself so must be after we've perfomed all the RET-safety we can.
435      */
436     testb $SCF_entry_bhb, %bl
437     jz .L\@_skip_bhb
438 
439     ALTERNATIVE_2 "",                                    \
440         "call clear_bhb_loops", X86_SPEC_BHB_LOOPS,      \
441         "call clear_bhb_tsx", X86_SPEC_BHB_TSX
442 .L\@_skip_bhb:
443 
444     lfence
445 .endm
446 
447 /*
448  * Use when exiting from any entry context, back to Xen context.  This
449  * includes returning to other SPEC_CTRL_{ENTRY,EXIT}_* regions with an
450  * incomplete speculation context.
451  *
452  * Because we might have interrupted Xen beyond SPEC_CTRL_EXIT_TO_$GUEST, we
453  * need to treat this as if it were an EXIT_TO_$GUEST case too.
454  */
455 .macro SPEC_CTRL_EXIT_TO_XEN
456 /*
457  * Requires %r12=ist_exit, %r14=stack_end, %rsp=regs
458  * Clobbers %rax, %rbx, %rcx, %rdx
459  */
460     movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx
461 
462     testb $SCF_ist_sc_msr, %bl
463     jz .L\@_skip_sc_msr
464 
465     /*
466      * When returning to Xen context, look to see whether SPEC_CTRL shadowing
467      * is in effect, and reload the shadow value.  This covers race conditions
468      * which exist with an NMI/MCE/etc hitting late in the return-to-guest
469      * path.
470      */
471     xor %edx, %edx
472 
473     testb $SCF_use_shadow, %bl
474     jz .L\@_skip_sc_msr
475 
476     mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%r14), %eax
477     mov $MSR_SPEC_CTRL, %ecx
478     wrmsr
479 
480 .L\@_skip_sc_msr:
481 
482     test %r12, %r12
483     jz .L\@_skip_ist_exit
484 
485     /*
486      * Stash SCF and verw_sel above eflags in the case of an IST_exit.  The
487      * VERW logic needs to run after guest GPRs have been restored; i.e. where
488      * we cannot use %r12 or %r14 for the purposes they have here.
489      *
490      * When the CPU pushed this exception frame, it zero-extended eflags.
491      * Therefore it is safe for the VERW logic to look at the stashed SCF
492      * outside of the ist_exit condition.  Also, this stashing won't influence
493      * any other restore_all_guest() paths.
494      */
495     or $(__HYPERVISOR_DS32 << 16), %ebx
496     mov %ebx, UREGS_eflags + 4(%rsp) /* EFRAME_shadow_scf/sel */
497 
498     ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV
499 
500 .L\@_skip_ist_exit:
501 .endm
502 
503 #endif /* __ASSEMBLY__ */
504 #endif /* !__X86_SPEC_CTRL_ASM_H__ */
505 
506 /*
507  * Local variables:
508  * mode: C
509  * c-file-style: "BSD"
510  * c-basic-offset: 4
511  * tab-width: 4
512  * indent-tabs-mode: nil
513  * End:
514  */
515