1 /******************************************************************************
2  * current.h
3  *
4  * Information structure that lives at the bottom of the per-cpu Xen stack.
5  */
6 
7 #ifndef __X86_CURRENT_H__
8 #define __X86_CURRENT_H__
9 
10 #include <xen/percpu.h>
11 #include <xen/page-size.h>
12 #include <public/xen.h>
13 
14 /*
15  * Xen's cpu stacks are 8 pages (8-page aligned), arranged as:
16  *
17  * 7 - Primary stack (with a struct cpu_info at the top)
18  * 6 - Primary stack
19  * 5 - Primay Shadow Stack (read-only)
20  * 4 - #DF IST stack
21  * 3 - #DB IST stack
22  * 2 - NMI IST stack
23  * 1 - #MC IST stack
24  * 0 - IST Shadow Stacks (4x 1k, read-only)
25  */
26 
27 /*
28  * Identify which stack page the stack pointer is on.  Returns an index
29  * as per the comment above.
30  */
get_stack_page(unsigned long sp)31 static inline unsigned int get_stack_page(unsigned long sp)
32 {
33     return (sp & (STACK_SIZE-1)) >> PAGE_SHIFT;
34 }
35 
36 struct vcpu;
37 
38 struct cpu_info {
39     struct cpu_user_regs guest_cpu_user_regs;
40     unsigned int processor_id;
41     unsigned int verw_sel;
42     struct vcpu *current_vcpu;
43     unsigned long per_cpu_offset;
44     unsigned long cr4;
45     /*
46      * Of the two following fields the latter is being set to the CR3 value
47      * to be used on the given pCPU for loading whenever 64-bit PV guest
48      * context is being entered. A value of zero indicates no setting of CR3
49      * is to be performed.
50      * The former is the value to restore when re-entering Xen, if any. IOW
51      * its value being zero means there's nothing to restore.
52      */
53     unsigned long xen_cr3;
54     unsigned long pv_cr3;
55 
56     /* See asm/spec_ctrl_asm.h for usage. */
57     unsigned int shadow_spec_ctrl;
58     unsigned int xen_spec_ctrl;
59     unsigned int last_spec_ctrl;
60     uint8_t      scf; /* SCF_* */
61 
62     /*
63      * The following field controls copying of the L4 page table of 64-bit
64      * PV guests to the per-cpu root page table on entering the guest context.
65      * If set the L4 page table is being copied to the root page table and
66      * the field will be reset.
67      */
68     bool         root_pgt_changed;
69 
70     /*
71      * use_pv_cr3 is set in case the value of pv_cr3 is to be written into
72      * CR3 when returning from an interrupt. The main use is when returning
73      * from a NMI or MCE to hypervisor code where pv_cr3 was active.
74      */
75     bool         use_pv_cr3;
76 
77     /* get_stack_bottom() must be 16-byte aligned */
78 };
79 
get_cpu_info_from_stack(unsigned long sp)80 static inline struct cpu_info *get_cpu_info_from_stack(unsigned long sp)
81 {
82     return (struct cpu_info *)((sp | (STACK_SIZE - 1)) + 1) - 1;
83 }
84 
get_cpu_info(void)85 static inline struct cpu_info *get_cpu_info(void)
86 {
87 #ifdef __clang__
88     /* Clang complains that sp in the else case is not initialised. */
89     unsigned long sp;
90     asm ( "mov %%rsp, %0" : "=r" (sp) );
91 #else
92     register unsigned long sp asm("rsp");
93 #endif
94 
95     return get_cpu_info_from_stack(sp);
96 }
97 
98 #define get_current()         (get_cpu_info()->current_vcpu)
99 #define set_current(vcpu)     (get_cpu_info()->current_vcpu = (vcpu))
100 #define current               (get_current())
101 
102 #define smp_processor_id()    (get_cpu_info()->processor_id)
103 #define guest_cpu_user_regs() (&get_cpu_info()->guest_cpu_user_regs)
104 
105 #define get_per_cpu_offset()  (get_cpu_info()->per_cpu_offset)
106 
107 /*
108  * Get the bottom-of-stack, as stored in the per-CPU TSS. This actually points
109  * into the middle of cpu_info.guest_cpu_user_regs, at the section that
110  * precisely corresponds to a CPU trap frame.
111  */
112 #define get_stack_bottom()                      \
113     ((unsigned long)&get_cpu_info()->guest_cpu_user_regs.es)
114 
115 /*
116  * Get the reasonable stack bounds for stack traces and stack dumps.  Stack
117  * dumps have a slightly larger range to include exception frames in the
118  * printed information.  The returned word is inside the interesting range.
119  */
120 unsigned long get_stack_trace_bottom(unsigned long sp);
121 unsigned long get_stack_dump_bottom (unsigned long sp);
122 
123 #ifdef CONFIG_LIVEPATCH
124 # define CHECK_FOR_LIVEPATCH_WORK "call check_for_livepatch_work;"
125 #elif defined(CONFIG_DEBUG)
126 /* Mimic the clobbering effect a call has on registers. */
127 # define CHECK_FOR_LIVEPATCH_WORK \
128     "mov $0x1234567890abcdef, %%rax\n\t" \
129     "mov %%rax, %%rcx; mov %%rax, %%rdx\n\t" \
130     "mov %%rax, %%rsi; mov %%rax, %%rdi\n\t" \
131     "mov %%rax, %%r8; mov %%rax, %%r9\n\t" \
132     "mov %%rax, %%r10; mov %%rax, %%r11\n\t"
133 #else
134 # define CHECK_FOR_LIVEPATCH_WORK ""
135 #endif
136 
137 #ifdef CONFIG_XEN_SHSTK
138 /*
139  * We need to unwind the primary shadow stack to its supervisor token, located
140  * in the last word of the primary shadow stack.
141  *
142  * Read the shadow stack pointer, subtract it from supervisor token position,
143  * and divide by 8 to get the number of slots needing popping.
144  *
145  * INCSSPQ can't pop more than 255 entries.  We shouldn't ever need to pop
146  * that many entries, and getting this wrong will cause us to #DF later.  Turn
147  * it into a BUG() now for fractionally easier debugging.
148  */
149 # define SHADOW_STACK_WORK                                      \
150     "mov $1, %[ssp];"                                           \
151     "rdsspd %[ssp];"                                            \
152     "cmp $1, %[ssp];"                                           \
153     "je .L_shstk_done.%=;" /* CET not active?  Skip. */         \
154     "mov $%c[skstk_base], %[val];"                              \
155     "and $%c[stack_mask], %[ssp];"                              \
156     "sub %[ssp], %[val];"                                       \
157     "shr $3, %[val];"                                           \
158     "cmp $255, %[val];" /* More than 255 entries?  Crash. */    \
159     UNLIKELY_START(a, shstk_adjust)                             \
160     _ASM_BUGFRAME_TEXT(0)                                       \
161     UNLIKELY_END_SECTION ";"                                    \
162     "incsspq %q[val];"                                          \
163     ".L_shstk_done.%=:"
164 #else
165 # define SHADOW_STACK_WORK ""
166 #endif
167 
168 #if __GNUC__ >= 9
169 # define ssaj_has_attr_noreturn(fn) __builtin_has_attribute(fn, __noreturn__)
170 #else
171 /* Simply can't check the property with older gcc. */
172 # define ssaj_has_attr_noreturn(fn) true
173 #endif
174 
175 #define switch_stack_and_jump(fn, instr, constr)                        \
176     ({                                                                  \
177         unsigned int tmp;                                               \
178         BUILD_BUG_ON(!ssaj_has_attr_noreturn(fn));                      \
179         __asm__ __volatile__ (                                          \
180             SHADOW_STACK_WORK                                           \
181             "mov %[stk], %%rsp;"                                        \
182             CHECK_FOR_LIVEPATCH_WORK                                    \
183             instr "[fun]"                                               \
184             : [val] "=&r" (tmp),                                        \
185               [ssp] "=&r" (tmp)                                         \
186             : [stk] "r" (guest_cpu_user_regs()),                        \
187               [fun] constr (fn),                                        \
188               [skstk_base] "i"                                          \
189               ((PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8),               \
190               [stack_mask] "i" (STACK_SIZE - 1),                        \
191               _ASM_BUGFRAME_INFO(BUGFRAME_bug, __LINE__,                \
192                                  __FILE__, NULL)                        \
193             : "memory" );                                               \
194         unreachable();                                                  \
195     })
196 
197 #define reset_stack_and_jump(fn)                                        \
198     switch_stack_and_jump(fn, "jmp %c", "i")
199 
200 /* The constraint may only specify non-call-clobbered registers. */
201 #define reset_stack_and_call_ind(fn)                                    \
202     ({                                                                  \
203         (void)((fn) == (void (*)(void))NULL);                           \
204         switch_stack_and_jump(fn, "INDIRECT_CALL %", "b");              \
205     })
206 
207 /*
208  * Which VCPU's state is currently running on each CPU?
209  * This is not necesasrily the same as 'current' as a CPU may be
210  * executing a lazy state switch.
211  */
212 DECLARE_PER_CPU(struct vcpu *, curr_vcpu);
213 
214 #endif /* __X86_CURRENT_H__ */
215