1 /******************************************************************************
2 * current.h
3 *
4 * Information structure that lives at the bottom of the per-cpu Xen stack.
5 */
6
7 #ifndef __X86_CURRENT_H__
8 #define __X86_CURRENT_H__
9
10 #include <xen/percpu.h>
11 #include <xen/page-size.h>
12 #include <public/xen.h>
13
14 /*
15 * Xen's cpu stacks are 8 pages (8-page aligned), arranged as:
16 *
17 * 7 - Primary stack (with a struct cpu_info at the top)
18 * 6 - Primary stack
19 * 5 - Primay Shadow Stack (read-only)
20 * 4 - #DF IST stack
21 * 3 - #DB IST stack
22 * 2 - NMI IST stack
23 * 1 - #MC IST stack
24 * 0 - IST Shadow Stacks (4x 1k, read-only)
25 */
26
27 /*
28 * Identify which stack page the stack pointer is on. Returns an index
29 * as per the comment above.
30 */
get_stack_page(unsigned long sp)31 static inline unsigned int get_stack_page(unsigned long sp)
32 {
33 return (sp & (STACK_SIZE-1)) >> PAGE_SHIFT;
34 }
35
36 struct vcpu;
37
38 struct cpu_info {
39 struct cpu_user_regs guest_cpu_user_regs;
40 unsigned int processor_id;
41 unsigned int verw_sel;
42 struct vcpu *current_vcpu;
43 unsigned long per_cpu_offset;
44 unsigned long cr4;
45 /*
46 * Of the two following fields the latter is being set to the CR3 value
47 * to be used on the given pCPU for loading whenever 64-bit PV guest
48 * context is being entered. A value of zero indicates no setting of CR3
49 * is to be performed.
50 * The former is the value to restore when re-entering Xen, if any. IOW
51 * its value being zero means there's nothing to restore.
52 */
53 unsigned long xen_cr3;
54 unsigned long pv_cr3;
55
56 /* See asm/spec_ctrl_asm.h for usage. */
57 unsigned int shadow_spec_ctrl;
58 unsigned int xen_spec_ctrl;
59 unsigned int last_spec_ctrl;
60 uint8_t scf; /* SCF_* */
61
62 /*
63 * The following field controls copying of the L4 page table of 64-bit
64 * PV guests to the per-cpu root page table on entering the guest context.
65 * If set the L4 page table is being copied to the root page table and
66 * the field will be reset.
67 */
68 bool root_pgt_changed;
69
70 /*
71 * use_pv_cr3 is set in case the value of pv_cr3 is to be written into
72 * CR3 when returning from an interrupt. The main use is when returning
73 * from a NMI or MCE to hypervisor code where pv_cr3 was active.
74 */
75 bool use_pv_cr3;
76
77 /* get_stack_bottom() must be 16-byte aligned */
78 };
79
get_cpu_info_from_stack(unsigned long sp)80 static inline struct cpu_info *get_cpu_info_from_stack(unsigned long sp)
81 {
82 return (struct cpu_info *)((sp | (STACK_SIZE - 1)) + 1) - 1;
83 }
84
get_cpu_info(void)85 static inline struct cpu_info *get_cpu_info(void)
86 {
87 #ifdef __clang__
88 /* Clang complains that sp in the else case is not initialised. */
89 unsigned long sp;
90 asm ( "mov %%rsp, %0" : "=r" (sp) );
91 #else
92 register unsigned long sp asm("rsp");
93 #endif
94
95 return get_cpu_info_from_stack(sp);
96 }
97
98 #define get_current() (get_cpu_info()->current_vcpu)
99 #define set_current(vcpu) (get_cpu_info()->current_vcpu = (vcpu))
100 #define current (get_current())
101
102 #define smp_processor_id() (get_cpu_info()->processor_id)
103 #define guest_cpu_user_regs() (&get_cpu_info()->guest_cpu_user_regs)
104
105 #define get_per_cpu_offset() (get_cpu_info()->per_cpu_offset)
106
107 /*
108 * Get the bottom-of-stack, as stored in the per-CPU TSS. This actually points
109 * into the middle of cpu_info.guest_cpu_user_regs, at the section that
110 * precisely corresponds to a CPU trap frame.
111 */
112 #define get_stack_bottom() \
113 ((unsigned long)&get_cpu_info()->guest_cpu_user_regs.es)
114
115 /*
116 * Get the reasonable stack bounds for stack traces and stack dumps. Stack
117 * dumps have a slightly larger range to include exception frames in the
118 * printed information. The returned word is inside the interesting range.
119 */
120 unsigned long get_stack_trace_bottom(unsigned long sp);
121 unsigned long get_stack_dump_bottom (unsigned long sp);
122
123 #ifdef CONFIG_LIVEPATCH
124 # define CHECK_FOR_LIVEPATCH_WORK "call check_for_livepatch_work;"
125 #elif defined(CONFIG_DEBUG)
126 /* Mimic the clobbering effect a call has on registers. */
127 # define CHECK_FOR_LIVEPATCH_WORK \
128 "mov $0x1234567890abcdef, %%rax\n\t" \
129 "mov %%rax, %%rcx; mov %%rax, %%rdx\n\t" \
130 "mov %%rax, %%rsi; mov %%rax, %%rdi\n\t" \
131 "mov %%rax, %%r8; mov %%rax, %%r9\n\t" \
132 "mov %%rax, %%r10; mov %%rax, %%r11\n\t"
133 #else
134 # define CHECK_FOR_LIVEPATCH_WORK ""
135 #endif
136
137 #ifdef CONFIG_XEN_SHSTK
138 /*
139 * We need to unwind the primary shadow stack to its supervisor token, located
140 * in the last word of the primary shadow stack.
141 *
142 * Read the shadow stack pointer, subtract it from supervisor token position,
143 * and divide by 8 to get the number of slots needing popping.
144 *
145 * INCSSPQ can't pop more than 255 entries. We shouldn't ever need to pop
146 * that many entries, and getting this wrong will cause us to #DF later. Turn
147 * it into a BUG() now for fractionally easier debugging.
148 */
149 # define SHADOW_STACK_WORK \
150 "mov $1, %[ssp];" \
151 "rdsspd %[ssp];" \
152 "cmp $1, %[ssp];" \
153 "je .L_shstk_done.%=;" /* CET not active? Skip. */ \
154 "mov $%c[skstk_base], %[val];" \
155 "and $%c[stack_mask], %[ssp];" \
156 "sub %[ssp], %[val];" \
157 "shr $3, %[val];" \
158 "cmp $255, %[val];" /* More than 255 entries? Crash. */ \
159 UNLIKELY_START(a, shstk_adjust) \
160 _ASM_BUGFRAME_TEXT(0) \
161 UNLIKELY_END_SECTION ";" \
162 "incsspq %q[val];" \
163 ".L_shstk_done.%=:"
164 #else
165 # define SHADOW_STACK_WORK ""
166 #endif
167
168 #if __GNUC__ >= 9
169 # define ssaj_has_attr_noreturn(fn) __builtin_has_attribute(fn, __noreturn__)
170 #else
171 /* Simply can't check the property with older gcc. */
172 # define ssaj_has_attr_noreturn(fn) true
173 #endif
174
175 #define switch_stack_and_jump(fn, instr, constr) \
176 ({ \
177 unsigned int tmp; \
178 BUILD_BUG_ON(!ssaj_has_attr_noreturn(fn)); \
179 __asm__ __volatile__ ( \
180 SHADOW_STACK_WORK \
181 "mov %[stk], %%rsp;" \
182 CHECK_FOR_LIVEPATCH_WORK \
183 instr "[fun]" \
184 : [val] "=&r" (tmp), \
185 [ssp] "=&r" (tmp) \
186 : [stk] "r" (guest_cpu_user_regs()), \
187 [fun] constr (fn), \
188 [skstk_base] "i" \
189 ((PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8), \
190 [stack_mask] "i" (STACK_SIZE - 1), \
191 _ASM_BUGFRAME_INFO(BUGFRAME_bug, __LINE__, \
192 __FILE__, NULL) \
193 : "memory" ); \
194 unreachable(); \
195 })
196
197 #define reset_stack_and_jump(fn) \
198 switch_stack_and_jump(fn, "jmp %c", "i")
199
200 /* The constraint may only specify non-call-clobbered registers. */
201 #define reset_stack_and_call_ind(fn) \
202 ({ \
203 (void)((fn) == (void (*)(void))NULL); \
204 switch_stack_and_jump(fn, "INDIRECT_CALL %", "b"); \
205 })
206
207 /*
208 * Which VCPU's state is currently running on each CPU?
209 * This is not necesasrily the same as 'current' as a CPU may be
210 * executing a lazy state switch.
211 */
212 DECLARE_PER_CPU(struct vcpu *, curr_vcpu);
213
214 #endif /* __X86_CURRENT_H__ */
215