1 /* SPDX-License-Identifier: GPL-2.0-or-later */ 2 /****************************************************************************** 3 * include/asm-x86/spec_ctrl.h 4 * 5 * Copyright (c) 2017-2018 Citrix Systems Ltd. 6 */ 7 8 #ifndef __X86_SPEC_CTRL_ASM_H__ 9 #define __X86_SPEC_CTRL_ASM_H__ 10 11 #ifdef __ASSEMBLY__ 12 #include <asm/msr-index.h> 13 #include <asm/spec_ctrl.h> 14 15 /* 16 * Saving and restoring MSR_SPEC_CTRL state is a little tricky. 17 * 18 * We want the guests choice of SPEC_CTRL while in guest context, and Xen's 19 * choice (set or clear, depending on the hardware) while running in Xen 20 * context. Therefore, a simplistic algorithm is: 21 * 22 * - Set/clear IBRS on entry to Xen 23 * - Set the guests' choice on exit to guest 24 * - Leave SPEC_CTRL unchanged on exit to xen 25 * 26 * There are two complicating factors: 27 * 1) HVM guests can have direct access to the MSR, so it can change 28 * behind Xen's back. 29 * 2) An NMI or MCE can interrupt at any point, including early in the entry 30 * path, or late in the exit path after restoring the guest value. This 31 * will corrupt the guest value. 32 * 33 * Factor 1 is dealt with: 34 * - On VMX by using MSR load/save lists to have vmentry/exit atomically 35 * load/save the guest value. Xen's value is loaded in regular code, and 36 * there is no need to use the shadow logic (below). 37 * - On SVM by altering MSR_SPEC_CTRL inside the CLGI/STGI region. This 38 * makes the changes atomic with respect to NMIs/etc, so no need for 39 * shadowing logic. 40 * 41 * Factor 2 is harder. We maintain a shadow_spec_ctrl value, and a use_shadow 42 * boolean in the per cpu scf. The synchronous use is: 43 * 44 * 1) Store guest value in shadow_spec_ctrl 45 * 2) Set the use_shadow boolean 46 * 3) Load guest value into MSR_SPEC_CTRL 47 * 4) Exit to guest 48 * 5) Entry from guest 49 * 6) Clear the use_shadow boolean 50 * 7) Load Xen's value into MSR_SPEC_CTRL 51 * 52 * The asynchronous use for interrupts/exceptions is: 53 * - Set/clear IBRS on entry to Xen 54 * - On exit to Xen, check use_shadow 55 * - If set, load shadow_spec_ctrl 56 * 57 * Therefore, an interrupt/exception which hits the synchronous path between 58 * steps 2 and 6 will restore the shadow value rather than leaving Xen's value 59 * loaded and corrupting the value used in guest context. 60 * 61 * Additionally, in some cases it is safe to skip writes to MSR_SPEC_CTRL when 62 * we don't require any of the side effects of an identical write. Maintain a 63 * per-cpu last_spec_ctrl value for this purpose. 64 * 65 * The following ASM fragments implement this algorithm. See their local 66 * comments for further details. 67 * - SPEC_CTRL_ENTRY_FROM_PV 68 * - SPEC_CTRL_ENTRY_FROM_INTR 69 * - SPEC_CTRL_ENTRY_FROM_INTR_IST 70 * - SPEC_CTRL_EXIT_TO_XEN 71 * - SPEC_CTRL_EXIT_TO_PV 72 * 73 * Additionally, the following grep-fodder exists to find the HVM logic. 74 * - SPEC_CTRL_ENTRY_FROM_{SVM,VMX} 75 * - SPEC_CTRL_EXIT_TO_{SVM,VMX} 76 */ 77 78 .macro DO_COND_IBPB 79 /* 80 * Requires %rbx=SCF, %rdx=0 81 * Clobbers %rax, %rcx 82 * 83 * Conditionally issue IBPB if SCF_entry_ibpb is active. 84 */ 85 testb $SCF_entry_ibpb, %bl 86 jz .L\@_skip 87 88 mov $MSR_PRED_CMD, %ecx 89 mov $PRED_CMD_IBPB, %eax 90 wrmsr 91 92 .L\@_skip: 93 .endm 94 95 .macro DO_OVERWRITE_RSB tmp=rax, xu 96 /* 97 * Requires nothing 98 * Clobbers \tmp (%rax by default), %rcx 99 * 100 * xu is an optional parameter to add eXtra Uniqueness. It is intended for 101 * passing %= in from an asm() block, in order to work around 102 * https://github.com/llvm/llvm-project/issues/60792 where Clang-IAS doesn't 103 * expand \@ uniquely. 104 * 105 * Requires 256 bytes of {,shadow}stack space, but %rsp/SSP has no net 106 * change. Based on Google's performance numbers, the loop is unrolled to 16 107 * iterations and two calls per iteration. 108 * 109 * The call filling the RSB needs a nonzero displacement, and int3 halts 110 * speculation. 111 * 112 * %rsp is preserved by using an extra GPR because a) we've got plenty spare, 113 * b) the two movs are shorter to encode than `add $32*8, %rsp`, and c) can be 114 * optimised with mov-elimination in modern cores. 115 */ 116 mov $16, %ecx /* 16 iterations, two calls per loop */ 117 mov %rsp, %\tmp /* Store the current %rsp */ 118 119 .L\@_fill_rsb_loop\xu: 120 121 .irp n, 1, 2 /* Unrolled twice. */ 122 call .L\@_insert_rsb_entry\xu\n /* Create an RSB entry. */ 123 int3 /* Halt rogue speculation. */ 124 125 .L\@_insert_rsb_entry\xu\n: 126 .endr 127 128 sub $1, %ecx 129 jnz .L\@_fill_rsb_loop\xu 130 mov %\tmp, %rsp /* Restore old %rsp */ 131 132 #ifdef CONFIG_XEN_SHSTK 133 mov $1, %ecx 134 rdsspd %ecx 135 cmp $1, %ecx 136 je .L\@_shstk_done\xu 137 mov $64, %ecx /* 64 * 4 bytes, given incsspd */ 138 incsspd %ecx /* Restore old SSP */ 139 .L\@_shstk_done\xu: 140 #endif 141 .endm 142 143 /* 144 * Helper to improve the readibility of stack dispacements with %rsp in 145 * unusual positions. Both @field and @top_of_stack should be constants from 146 * the same object. @top_of_stack should be where %rsp is currently pointing. 147 */ 148 #define STK_REL(field, top_of_stk) ((field) - (top_of_stk)) 149 150 .macro SPEC_CTRL_COND_VERW \ 151 scf=STK_REL(CPUINFO_scf, CPUINFO_error_code), \ 152 sel=STK_REL(CPUINFO_verw_sel, CPUINFO_error_code) 153 /* 154 * Requires \scf and \sel as %rsp-relative expressions 155 * Clobbers eflags 156 * 157 * VERW needs to run after guest GPRs have been restored, where only %rsp is 158 * good to use. Default to expecting %rsp pointing at CPUINFO_error_code. 159 * Contexts where this is not true must provide an alternative \scf and \sel. 160 * 161 * Issue a VERW for its flushing side effect, if indicated. This is a Spectre 162 * v1 gadget, but the IRET/VMEntry is serialising. 163 */ 164 testb $SCF_verw, \scf(%rsp) 165 jz .L\@_verw_skip 166 verw \sel(%rsp) 167 .L\@_verw_skip: 168 .endm 169 170 .macro DO_SPEC_CTRL_DIV 171 /* 172 * Requires nothing 173 * Clobbers %rax 174 * 175 * Issue a DIV for its flushing side effect (Zen1 uarch specific). Any 176 * non-faulting DIV will do; a byte DIV has least latency, and doesn't clobber 177 * %rdx. 178 */ 179 mov $1, %eax 180 div %al 181 .endm 182 183 .macro DO_SPEC_CTRL_ENTRY maybexen:req 184 /* 185 * Requires %rsp=regs (also cpuinfo if !maybexen) 186 * Requires %r14=stack_end (if maybexen) 187 * Clobbers %rax, %rcx, %rdx 188 * 189 * PV guests can't update MSR_SPEC_CTRL behind Xen's back, so no need to read 190 * it back. Entries from guest context need to clear SPEC_CTRL shadowing, 191 * while entries from Xen must leave shadowing in its current state. 192 */ 193 mov $MSR_SPEC_CTRL, %ecx 194 xor %edx, %edx 195 196 /* 197 * Clear SPEC_CTRL shadowing *before* loading Xen's value. If entering 198 * from a possibly-xen context, %rsp doesn't necessarily alias the cpuinfo 199 * block so calculate the position directly. 200 */ 201 .if \maybexen 202 xor %eax, %eax 203 /* Branchless `if ( !xen ) clear_shadowing` */ 204 testb $3, UREGS_cs(%rsp) 205 setnz %al 206 not %eax 207 and %al, STACK_CPUINFO_FIELD(scf)(%r14) 208 mov STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax 209 .else 210 andb $~SCF_use_shadow, CPUINFO_scf(%rsp) 211 mov CPUINFO_xen_spec_ctrl(%rsp), %eax 212 .endif 213 214 wrmsr 215 .endm 216 217 .macro DO_SPEC_CTRL_EXIT_TO_GUEST 218 /* 219 * Requires %eax=spec_ctrl, %rsp=regs/cpuinfo 220 * Clobbers %rcx, %rdx 221 * 222 * When returning to guest context, set up SPEC_CTRL shadowing and load the 223 * guest value. 224 */ 225 /* Set up shadow value *before* enabling shadowing. */ 226 mov %eax, CPUINFO_shadow_spec_ctrl(%rsp) 227 228 /* Set SPEC_CTRL shadowing *before* loading the guest value. */ 229 orb $SCF_use_shadow, CPUINFO_scf(%rsp) 230 231 mov $MSR_SPEC_CTRL, %ecx 232 xor %edx, %edx 233 wrmsr 234 .endm 235 236 /* 237 * Used after an entry from PV context: SYSCALL, SYSENTER, INT, 238 * etc. There is always a guest speculation state in context. 239 */ 240 .macro SPEC_CTRL_ENTRY_FROM_PV 241 /* 242 * Requires %rsp=regs/cpuinfo, %r14=stack_end, %rdx=0 243 * Clobbers %rax, %rbx, %rcx, %rdx 244 */ 245 movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx 246 247 /* 248 * For all safety notes, 32bit PV guest kernels run in Ring 1 and are 249 * therefore supervisor (== Xen) in the architecture. As a result, most 250 * hardware isolation techniques do not work. 251 */ 252 253 /* 254 * IBPB is to mitigate BTC/SRSO on AMD/Hygon parts, in particular making 255 * type-confused RETs safe to use. This is not needed on Zen5 and later 256 * parts when SRSO_U/S_NO is enumerated. 257 */ 258 ALTERNATIVE "", DO_COND_IBPB, X86_FEATURE_IBPB_ENTRY_PV 259 260 /* 261 * RSB stuffing is to prevent RET predictions following guest entries. 262 * This is not needed if SMEP is active and the RSB is full-width. 263 */ 264 ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV 265 266 /* 267 * Only used on Intel parts. Restore Xen's MSR_SPEC_CTRL setting. The 268 * guest can't change it's value behind Xen's back. For Legacy IBRS, this 269 * flushes/inhibits indirect predictions and does not flush the RSB. For 270 * eIBRS, this prevents CALLs/JMPs using predictions learnt at a lower 271 * predictor mode, and it flushes the RSB. 272 */ 273 ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), \ 274 X86_FEATURE_SC_MSR_PV 275 276 /* 277 * Clear the BHB to mitigate BHI. Used on eIBRS parts, and uses RETs 278 * itself so must be after we've perfomed all the RET-safety we can. 279 */ 280 testb $SCF_entry_bhb, %bl 281 jz .L\@_skip_bhb 282 ALTERNATIVE_2 "", \ 283 "call clear_bhb_loops", X86_SPEC_BHB_LOOPS, \ 284 "call clear_bhb_tsx", X86_SPEC_BHB_TSX 285 .L\@_skip_bhb: 286 287 ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_PV 288 .endm 289 290 /* 291 * Used after an exception or maskable interrupt, hitting Xen or PV context. 292 * There will either be a guest speculation context, or a well-formed Xen 293 * speculation context, with the exception of one case. IRET #GP handling may 294 * have a guest choice of MSR_SPEC_CTRL. 295 * 296 * Therefore, we can skip the flush/barrier-like protections when hitting Xen, 297 * but we must still run the mode-based protections. 298 */ 299 .macro SPEC_CTRL_ENTRY_FROM_INTR 300 /* 301 * Requires %rsp=regs, %r14=stack_end, %rdx=0 302 * Clobbers %rax, %rbx, %rcx, %rdx 303 */ 304 movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx 305 306 /* 307 * All safety notes the same as SPEC_CTRL_ENTRY_FROM_PV, although there is 308 * a conditional jump skipping some actions when interrupting Xen. 309 * 310 * On Intel parts, the IRET #GP path ends up here with the guest's choice 311 * of MSR_SPEC_CTRL. 312 */ 313 314 testb $3, UREGS_cs(%rsp) 315 jz .L\@_skip 316 317 ALTERNATIVE "", DO_COND_IBPB, X86_FEATURE_IBPB_ENTRY_PV 318 319 ALTERNATIVE "", DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV 320 321 .L\@_skip: 322 ALTERNATIVE "", __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), \ 323 X86_FEATURE_SC_MSR_PV 324 325 testb $SCF_entry_bhb, %bl 326 jz .L\@_skip_bhb 327 ALTERNATIVE_2 "", \ 328 "call clear_bhb_loops", X86_SPEC_BHB_LOOPS, \ 329 "call clear_bhb_tsx", X86_SPEC_BHB_TSX 330 .L\@_skip_bhb: 331 332 ALTERNATIVE "lfence", "", X86_SPEC_NO_LFENCE_ENTRY_INTR 333 .endm 334 335 /* 336 * Used when exiting from any entry context, back to PV context. This 337 * includes from an IST entry which moved onto the primary stack. 338 */ 339 .macro SPEC_CTRL_EXIT_TO_PV 340 /* 341 * Requires %rax=spec_ctrl, %rsp=regs/info 342 * Clobbers %rcx, %rdx 343 */ 344 ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV 345 346 ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV 347 .endm 348 349 /* 350 * Used after an IST entry hitting Xen or PV context. Special care is needed, 351 * because when hitting Xen context, there may not be a well-formed 352 * speculation context. (i.e. it can hit in the middle of 353 * SPEC_CTRL_{ENTRY,EXIT}_* regions.) 354 * 355 * An IST entry which hits PV context moves onto the primary stack and leaves 356 * via SPEC_CTRL_EXIT_TO_PV, *not* SPEC_CTRL_EXIT_TO_XEN. 357 */ 358 .macro SPEC_CTRL_ENTRY_FROM_INTR_IST 359 /* 360 * Requires %rsp=regs, %r14=stack_end, %rdx=0 361 * Clobbers %rax, %rbx, %rcx, %rdx 362 * 363 * This is logical merge of: 364 * DO_COND_IBPB 365 * DO_OVERWRITE_RSB 366 * DO_SPEC_CTRL_ENTRY maybexen=1 367 * but with conditionals rather than alternatives. 368 */ 369 movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx 370 371 /* 372 * For all safety notes, 32bit PV guest kernels run in Ring 1 and are 373 * therefore supervisor (== Xen) in the architecture. As a result, most 374 * hardware isolation techniques do not work. 375 */ 376 377 /* 378 * IBPB is to mitigate BTC/SRSO on AMD/Hygon parts, in particular making 379 * type-confused RETs safe to use. This is not needed on Zen5 and later 380 * parts when SRSO_U/S_NO is enumerated. The SVM path takes care of 381 * Host/Guest interactions prior to clearing GIF, and it's not used on the 382 * VMX path. 383 */ 384 test $SCF_ist_ibpb, %bl 385 jz .L\@_skip_ibpb 386 387 mov $MSR_PRED_CMD, %ecx 388 mov $PRED_CMD_IBPB, %eax 389 wrmsr 390 391 .L\@_skip_ibpb: 392 393 /* 394 * RSB stuffing is to prevent RET predictions following guest entries. 395 * SCF_ist_rsb is active if either PV or HVM protections are needed. The 396 * VMX path cannot guarantee to make the RSB safe ahead of taking an IST 397 * vector. 398 */ 399 test $SCF_ist_rsb, %bl 400 jz .L\@_skip_rsb 401 402 DO_OVERWRITE_RSB /* Clobbers %rax/%rcx */ 403 404 .L\@_skip_rsb: 405 406 /* 407 * Only used on Intel parts. Restore Xen's MSR_SPEC_CTRL setting. PV 408 * guests can't change their value behind Xen's back. HVM guests have 409 * their value stored in the MSR load/save list. For Legacy IBRS, this 410 * flushes/inhibits indirect predictions and does not flush the RSB. For 411 * eIBRS, this prevents CALLs/JMPs using predictions learnt at a lower 412 * predictor mode, and it flushes the RSB. On eIBRS parts that also 413 * suffer from PBRSB, the prior RSB stuffing suffices to make the RSB 414 * safe. 415 */ 416 test $SCF_ist_sc_msr, %bl 417 jz .L\@_skip_msr_spec_ctrl 418 419 xor %eax, %eax 420 testb $3, UREGS_cs(%rsp) 421 setnz %al 422 not %eax 423 and %al, STACK_CPUINFO_FIELD(scf)(%r14) 424 425 /* Load Xen's intended value. */ 426 mov $MSR_SPEC_CTRL, %ecx 427 mov STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax 428 wrmsr 429 430 .L\@_skip_msr_spec_ctrl: 431 432 /* 433 * Clear the BHB to mitigate BHI. Used on eIBRS parts, and uses RETs 434 * itself so must be after we've perfomed all the RET-safety we can. 435 */ 436 testb $SCF_entry_bhb, %bl 437 jz .L\@_skip_bhb 438 439 ALTERNATIVE_2 "", \ 440 "call clear_bhb_loops", X86_SPEC_BHB_LOOPS, \ 441 "call clear_bhb_tsx", X86_SPEC_BHB_TSX 442 .L\@_skip_bhb: 443 444 lfence 445 .endm 446 447 /* 448 * Use when exiting from any entry context, back to Xen context. This 449 * includes returning to other SPEC_CTRL_{ENTRY,EXIT}_* regions with an 450 * incomplete speculation context. 451 * 452 * Because we might have interrupted Xen beyond SPEC_CTRL_EXIT_TO_$GUEST, we 453 * need to treat this as if it were an EXIT_TO_$GUEST case too. 454 */ 455 .macro SPEC_CTRL_EXIT_TO_XEN 456 /* 457 * Requires %r12=ist_exit, %r14=stack_end, %rsp=regs 458 * Clobbers %rax, %rbx, %rcx, %rdx 459 */ 460 movzbl STACK_CPUINFO_FIELD(scf)(%r14), %ebx 461 462 testb $SCF_ist_sc_msr, %bl 463 jz .L\@_skip_sc_msr 464 465 /* 466 * When returning to Xen context, look to see whether SPEC_CTRL shadowing 467 * is in effect, and reload the shadow value. This covers race conditions 468 * which exist with an NMI/MCE/etc hitting late in the return-to-guest 469 * path. 470 */ 471 xor %edx, %edx 472 473 testb $SCF_use_shadow, %bl 474 jz .L\@_skip_sc_msr 475 476 mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%r14), %eax 477 mov $MSR_SPEC_CTRL, %ecx 478 wrmsr 479 480 .L\@_skip_sc_msr: 481 482 test %r12, %r12 483 jz .L\@_skip_ist_exit 484 485 /* 486 * Stash SCF and verw_sel above eflags in the case of an IST_exit. The 487 * VERW logic needs to run after guest GPRs have been restored; i.e. where 488 * we cannot use %r12 or %r14 for the purposes they have here. 489 * 490 * When the CPU pushed this exception frame, it zero-extended eflags. 491 * Therefore it is safe for the VERW logic to look at the stashed SCF 492 * outside of the ist_exit condition. Also, this stashing won't influence 493 * any other restore_all_guest() paths. 494 */ 495 or $(__HYPERVISOR_DS32 << 16), %ebx 496 mov %ebx, UREGS_eflags + 4(%rsp) /* EFRAME_shadow_scf/sel */ 497 498 ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV 499 500 .L\@_skip_ist_exit: 501 .endm 502 503 #endif /* __ASSEMBLY__ */ 504 #endif /* !__X86_SPEC_CTRL_ASM_H__ */ 505 506 /* 507 * Local variables: 508 * mode: C 509 * c-file-style: "BSD" 510 * c-basic-offset: 4 511 * tab-width: 4 512 * indent-tabs-mode: nil 513 * End: 514 */ 515