1
2 #include <xen/version.h>
3 #include <xen/init.h>
4 #include <xen/sched.h>
5 #include <xen/lib.h>
6 #include <xen/errno.h>
7 #include <xen/mm.h>
8 #include <xen/irq.h>
9 #include <xen/symbols.h>
10 #include <xen/console.h>
11 #include <xen/sched.h>
12 #include <xen/shutdown.h>
13 #include <xen/guest_access.h>
14 #include <xen/watchdog.h>
15 #include <xen/hypercall.h>
16 #include <asm/current.h>
17 #include <asm/flushtlb.h>
18 #include <asm/traps.h>
19 #include <asm/event.h>
20 #include <asm/nmi.h>
21 #include <asm/msr.h>
22 #include <asm/page.h>
23 #include <asm/shared.h>
24 #include <asm/hvm/hvm.h>
25 #include <asm/hvm/support.h>
26
27
print_xen_info(void)28 static void print_xen_info(void)
29 {
30 char taint_str[TAINT_STRING_MAX_LEN];
31
32 printk("----[ Xen-%d.%d%s x86_64 debug=%c " gcov_string " %s ]----\n",
33 xen_major_version(), xen_minor_version(), xen_extra_version(),
34 debug_build() ? 'y' : 'n', print_tainted(taint_str));
35 }
36
37 enum context { CTXT_hypervisor, CTXT_pv_guest, CTXT_hvm_guest };
38
39 /* (ab)use crs[5..7] for fs/gs bases. */
read_registers(struct cpu_user_regs * regs,unsigned long crs[8])40 static void read_registers(struct cpu_user_regs *regs, unsigned long crs[8])
41 {
42 crs[0] = read_cr0();
43 crs[2] = read_cr2();
44 crs[3] = read_cr3();
45 crs[4] = read_cr4();
46 regs->ds = read_sreg(ds);
47 regs->es = read_sreg(es);
48 regs->fs = read_sreg(fs);
49 regs->gs = read_sreg(gs);
50 crs[5] = rdfsbase();
51 crs[6] = rdgsbase();
52 rdmsrl(MSR_SHADOW_GS_BASE, crs[7]);
53 }
54
_show_registers(const struct cpu_user_regs * regs,unsigned long crs[8],enum context context,const struct vcpu * v)55 static void _show_registers(
56 const struct cpu_user_regs *regs, unsigned long crs[8],
57 enum context context, const struct vcpu *v)
58 {
59 static const char *const context_names[] = {
60 [CTXT_hypervisor] = "hypervisor",
61 [CTXT_pv_guest] = "pv guest",
62 [CTXT_hvm_guest] = "hvm guest"
63 };
64
65 printk("RIP: %04x:[<%016lx>]", regs->cs, regs->rip);
66 if ( context == CTXT_hypervisor )
67 printk(" %pS", _p(regs->rip));
68 printk("\nRFLAGS: %016lx ", regs->rflags);
69 if ( (context == CTXT_pv_guest) && v && v->vcpu_info )
70 printk("EM: %d ", !!vcpu_info(v, evtchn_upcall_mask));
71 printk("CONTEXT: %s", context_names[context]);
72 if ( v && !is_idle_vcpu(v) )
73 printk(" (%pv)", v);
74
75 printk("\nrax: %016lx rbx: %016lx rcx: %016lx\n",
76 regs->rax, regs->rbx, regs->rcx);
77 printk("rdx: %016lx rsi: %016lx rdi: %016lx\n",
78 regs->rdx, regs->rsi, regs->rdi);
79 printk("rbp: %016lx rsp: %016lx r8: %016lx\n",
80 regs->rbp, regs->rsp, regs->r8);
81 printk("r9: %016lx r10: %016lx r11: %016lx\n",
82 regs->r9, regs->r10, regs->r11);
83 if ( !(regs->entry_vector & TRAP_regs_partial) )
84 {
85 printk("r12: %016lx r13: %016lx r14: %016lx\n",
86 regs->r12, regs->r13, regs->r14);
87 printk("r15: %016lx cr0: %016lx cr4: %016lx\n",
88 regs->r15, crs[0], crs[4]);
89 }
90 else
91 printk("cr0: %016lx cr4: %016lx\n", crs[0], crs[4]);
92 printk("cr3: %016lx cr2: %016lx\n", crs[3], crs[2]);
93 printk("fsb: %016lx gsb: %016lx gss: %016lx\n",
94 crs[5], crs[6], crs[7]);
95 printk("ds: %04x es: %04x fs: %04x gs: %04x "
96 "ss: %04x cs: %04x\n",
97 regs->ds, regs->es, regs->fs,
98 regs->gs, regs->ss, regs->cs);
99 }
100
show_registers(const struct cpu_user_regs * regs)101 void show_registers(const struct cpu_user_regs *regs)
102 {
103 struct cpu_user_regs fault_regs = *regs;
104 unsigned long fault_crs[8];
105 enum context context;
106 struct vcpu *v = system_state >= SYS_STATE_smp_boot ? current : NULL;
107
108 if ( guest_mode(regs) && is_hvm_vcpu(v) )
109 {
110 struct segment_register sreg;
111 context = CTXT_hvm_guest;
112 fault_crs[0] = v->arch.hvm_vcpu.guest_cr[0];
113 fault_crs[2] = v->arch.hvm_vcpu.guest_cr[2];
114 fault_crs[3] = v->arch.hvm_vcpu.guest_cr[3];
115 fault_crs[4] = v->arch.hvm_vcpu.guest_cr[4];
116 hvm_get_segment_register(v, x86_seg_cs, &sreg);
117 fault_regs.cs = sreg.sel;
118 hvm_get_segment_register(v, x86_seg_ds, &sreg);
119 fault_regs.ds = sreg.sel;
120 hvm_get_segment_register(v, x86_seg_es, &sreg);
121 fault_regs.es = sreg.sel;
122 hvm_get_segment_register(v, x86_seg_fs, &sreg);
123 fault_regs.fs = sreg.sel;
124 fault_crs[5] = sreg.base;
125 hvm_get_segment_register(v, x86_seg_gs, &sreg);
126 fault_regs.gs = sreg.sel;
127 fault_crs[6] = sreg.base;
128 hvm_get_segment_register(v, x86_seg_ss, &sreg);
129 fault_regs.ss = sreg.sel;
130 fault_crs[7] = hvm_get_shadow_gs_base(v);
131 }
132 else
133 {
134 read_registers(&fault_regs, fault_crs);
135
136 if ( guest_mode(regs) )
137 {
138 context = CTXT_pv_guest;
139 fault_crs[2] = arch_get_cr2(v);
140 }
141 else
142 {
143 context = CTXT_hypervisor;
144 fault_crs[2] = read_cr2();
145 }
146 }
147
148 print_xen_info();
149 printk("CPU: %d\n", smp_processor_id());
150 _show_registers(&fault_regs, fault_crs, context, v);
151
152 if ( this_cpu(ler_msr) && !guest_mode(regs) )
153 {
154 u64 from, to;
155 rdmsrl(this_cpu(ler_msr), from);
156 rdmsrl(this_cpu(ler_msr) + 1, to);
157 printk("ler: %016lx -> %016lx\n", from, to);
158 }
159 }
160
vcpu_show_registers(const struct vcpu * v)161 void vcpu_show_registers(const struct vcpu *v)
162 {
163 const struct cpu_user_regs *regs = &v->arch.user_regs;
164 bool kernel = guest_kernel_mode(v, regs);
165 unsigned long crs[8];
166
167 /* Only handle PV guests for now */
168 if ( !is_pv_vcpu(v) )
169 return;
170
171 crs[0] = v->arch.pv_vcpu.ctrlreg[0];
172 crs[2] = arch_get_cr2(v);
173 crs[3] = pagetable_get_paddr(kernel ?
174 v->arch.guest_table :
175 v->arch.guest_table_user);
176 crs[4] = v->arch.pv_vcpu.ctrlreg[4];
177 crs[5] = v->arch.pv_vcpu.fs_base;
178 crs[6 + !kernel] = v->arch.pv_vcpu.gs_base_kernel;
179 crs[7 - !kernel] = v->arch.pv_vcpu.gs_base_user;
180
181 _show_registers(regs, crs, CTXT_pv_guest, v);
182 }
183
show_page_walk(unsigned long addr)184 void show_page_walk(unsigned long addr)
185 {
186 unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
187 l4_pgentry_t l4e, *l4t;
188 l3_pgentry_t l3e, *l3t;
189 l2_pgentry_t l2e, *l2t;
190 l1_pgentry_t l1e, *l1t;
191
192 printk("Pagetable walk from %016lx:\n", addr);
193 if ( !is_canonical_address(addr) )
194 return;
195
196 l4t = map_domain_page(_mfn(mfn));
197 l4e = l4t[l4_table_offset(addr)];
198 unmap_domain_page(l4t);
199 mfn = l4e_get_pfn(l4e);
200 pfn = mfn_valid(_mfn(mfn)) && machine_to_phys_mapping_valid ?
201 get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
202 printk(" L4[0x%03lx] = %"PRIpte" %016lx\n",
203 l4_table_offset(addr), l4e_get_intpte(l4e), pfn);
204 if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ||
205 !mfn_valid(_mfn(mfn)) )
206 return;
207
208 l3t = map_domain_page(_mfn(mfn));
209 l3e = l3t[l3_table_offset(addr)];
210 unmap_domain_page(l3t);
211 mfn = l3e_get_pfn(l3e);
212 pfn = mfn_valid(_mfn(mfn)) && machine_to_phys_mapping_valid ?
213 get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
214 printk(" L3[0x%03lx] = %"PRIpte" %016lx%s\n",
215 l3_table_offset(addr), l3e_get_intpte(l3e), pfn,
216 (l3e_get_flags(l3e) & _PAGE_PSE) ? " (PSE)" : "");
217 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ||
218 (l3e_get_flags(l3e) & _PAGE_PSE) ||
219 !mfn_valid(_mfn(mfn)) )
220 return;
221
222 l2t = map_domain_page(_mfn(mfn));
223 l2e = l2t[l2_table_offset(addr)];
224 unmap_domain_page(l2t);
225 mfn = l2e_get_pfn(l2e);
226 pfn = mfn_valid(_mfn(mfn)) && machine_to_phys_mapping_valid ?
227 get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
228 printk(" L2[0x%03lx] = %"PRIpte" %016lx%s\n",
229 l2_table_offset(addr), l2e_get_intpte(l2e), pfn,
230 (l2e_get_flags(l2e) & _PAGE_PSE) ? " (PSE)" : "");
231 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
232 (l2e_get_flags(l2e) & _PAGE_PSE) ||
233 !mfn_valid(_mfn(mfn)) )
234 return;
235
236 l1t = map_domain_page(_mfn(mfn));
237 l1e = l1t[l1_table_offset(addr)];
238 unmap_domain_page(l1t);
239 mfn = l1e_get_pfn(l1e);
240 pfn = mfn_valid(_mfn(mfn)) && machine_to_phys_mapping_valid ?
241 get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
242 printk(" L1[0x%03lx] = %"PRIpte" %016lx\n",
243 l1_table_offset(addr), l1e_get_intpte(l1e), pfn);
244 }
245
do_double_fault(struct cpu_user_regs * regs)246 void do_double_fault(struct cpu_user_regs *regs)
247 {
248 unsigned int cpu;
249 unsigned long crs[8];
250
251 console_force_unlock();
252
253 asm ( "lsll %1, %0" : "=r" (cpu) : "rm" (PER_CPU_GDT_ENTRY << 3) );
254
255 /* Find information saved during fault and dump it to the console. */
256 printk("*** DOUBLE FAULT ***\n");
257 print_xen_info();
258
259 read_registers(regs, crs);
260
261 printk("CPU: %d\n", cpu);
262 _show_registers(regs, crs, CTXT_hypervisor, NULL);
263 show_stack_overflow(cpu, regs);
264
265 panic("DOUBLE FAULT -- system shutdown");
266 }
267
write_stub_trampoline(unsigned char * stub,unsigned long stub_va,unsigned long stack_bottom,unsigned long target_va)268 static unsigned int write_stub_trampoline(
269 unsigned char *stub, unsigned long stub_va,
270 unsigned long stack_bottom, unsigned long target_va)
271 {
272 /* movabsq %rax, stack_bottom - 8 */
273 stub[0] = 0x48;
274 stub[1] = 0xa3;
275 *(uint64_t *)&stub[2] = stack_bottom - 8;
276
277 /* movq %rsp, %rax */
278 stub[10] = 0x48;
279 stub[11] = 0x89;
280 stub[12] = 0xe0;
281
282 /* movabsq $stack_bottom - 8, %rsp */
283 stub[13] = 0x48;
284 stub[14] = 0xbc;
285 *(uint64_t *)&stub[15] = stack_bottom - 8;
286
287 /* pushq %rax */
288 stub[23] = 0x50;
289
290 /* jmp target_va */
291 stub[24] = 0xe9;
292 *(int32_t *)&stub[25] = target_va - (stub_va + 29);
293
294 /* Round up to a multiple of 16 bytes. */
295 return 32;
296 }
297
298 DEFINE_PER_CPU(struct stubs, stubs);
299 void lstar_enter(void);
300 void cstar_enter(void);
301
subarch_percpu_traps_init(void)302 void subarch_percpu_traps_init(void)
303 {
304 unsigned long stack_bottom = get_stack_bottom();
305 unsigned long stub_va = this_cpu(stubs.addr);
306 unsigned char *stub_page;
307 unsigned int offset;
308
309 /* IST_MAX IST pages + 1 syscall page + 1 guard page + primary stack. */
310 BUILD_BUG_ON((IST_MAX + 2) * PAGE_SIZE + PRIMARY_STACK_SIZE > STACK_SIZE);
311
312 stub_page = map_domain_page(_mfn(this_cpu(stubs.mfn)));
313
314 /*
315 * Trampoline for SYSCALL entry from 64-bit mode. The VT-x HVM vcpu
316 * context switch logic relies on the SYSCALL trampoline being at the
317 * start of the stubs.
318 */
319 wrmsrl(MSR_LSTAR, stub_va);
320 offset = write_stub_trampoline(stub_page + (stub_va & ~PAGE_MASK),
321 stub_va, stack_bottom,
322 (unsigned long)lstar_enter);
323 stub_va += offset;
324
325 if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
326 boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR )
327 {
328 /* SYSENTER entry. */
329 wrmsrl(MSR_IA32_SYSENTER_ESP, stack_bottom);
330 wrmsrl(MSR_IA32_SYSENTER_EIP, (unsigned long)sysenter_entry);
331 wrmsr(MSR_IA32_SYSENTER_CS, __HYPERVISOR_CS, 0);
332 }
333
334 /* Trampoline for SYSCALL entry from compatibility mode. */
335 wrmsrl(MSR_CSTAR, stub_va);
336 offset += write_stub_trampoline(stub_page + (stub_va & ~PAGE_MASK),
337 stub_va, stack_bottom,
338 (unsigned long)cstar_enter);
339
340 /* Don't consume more than half of the stub space here. */
341 ASSERT(offset <= STUB_BUF_SIZE / 2);
342
343 unmap_domain_page(stub_page);
344
345 /* Common SYSCALL parameters. */
346 wrmsrl(MSR_STAR, XEN_MSR_STAR);
347 wrmsrl(MSR_SYSCALL_MASK, XEN_SYSCALL_MASK);
348 }
349
hypercall_page_initialise(struct domain * d,void * hypercall_page)350 void hypercall_page_initialise(struct domain *d, void *hypercall_page)
351 {
352 memset(hypercall_page, 0xCC, PAGE_SIZE);
353 if ( is_hvm_domain(d) )
354 hvm_hypercall_page_initialise(d, hypercall_page);
355 else if ( !is_pv_32bit_domain(d) )
356 hypercall_page_initialise_ring3_kernel(hypercall_page);
357 else
358 hypercall_page_initialise_ring1_kernel(hypercall_page);
359 }
360
361 /*
362 * Local variables:
363 * mode: C
364 * c-file-style: "BSD"
365 * c-basic-offset: 4
366 * tab-width: 4
367 * indent-tabs-mode: nil
368 * End:
369 */
370