1 
2 #include <xen/version.h>
3 #include <xen/init.h>
4 #include <xen/sched.h>
5 #include <xen/lib.h>
6 #include <xen/errno.h>
7 #include <xen/mm.h>
8 #include <xen/irq.h>
9 #include <xen/symbols.h>
10 #include <xen/console.h>
11 #include <xen/sched.h>
12 #include <xen/shutdown.h>
13 #include <xen/guest_access.h>
14 #include <xen/watchdog.h>
15 #include <xen/hypercall.h>
16 #include <asm/current.h>
17 #include <asm/flushtlb.h>
18 #include <asm/traps.h>
19 #include <asm/event.h>
20 #include <asm/nmi.h>
21 #include <asm/msr.h>
22 #include <asm/page.h>
23 #include <asm/shared.h>
24 #include <asm/hvm/hvm.h>
25 #include <asm/hvm/support.h>
26 
27 
print_xen_info(void)28 static void print_xen_info(void)
29 {
30     char taint_str[TAINT_STRING_MAX_LEN];
31 
32     printk("----[ Xen-%d.%d%s  x86_64  debug=%c " gcov_string "  %s ]----\n",
33            xen_major_version(), xen_minor_version(), xen_extra_version(),
34            debug_build() ? 'y' : 'n', print_tainted(taint_str));
35 }
36 
37 enum context { CTXT_hypervisor, CTXT_pv_guest, CTXT_hvm_guest };
38 
39 /* (ab)use crs[5..7] for fs/gs bases. */
read_registers(struct cpu_user_regs * regs,unsigned long crs[8])40 static void read_registers(struct cpu_user_regs *regs, unsigned long crs[8])
41 {
42     crs[0] = read_cr0();
43     crs[2] = read_cr2();
44     crs[3] = read_cr3();
45     crs[4] = read_cr4();
46     regs->ds = read_sreg(ds);
47     regs->es = read_sreg(es);
48     regs->fs = read_sreg(fs);
49     regs->gs = read_sreg(gs);
50     crs[5] = rdfsbase();
51     crs[6] = rdgsbase();
52     rdmsrl(MSR_SHADOW_GS_BASE, crs[7]);
53 }
54 
_show_registers(const struct cpu_user_regs * regs,unsigned long crs[8],enum context context,const struct vcpu * v)55 static void _show_registers(
56     const struct cpu_user_regs *regs, unsigned long crs[8],
57     enum context context, const struct vcpu *v)
58 {
59     static const char *const context_names[] = {
60         [CTXT_hypervisor] = "hypervisor",
61         [CTXT_pv_guest]   = "pv guest",
62         [CTXT_hvm_guest]  = "hvm guest"
63     };
64 
65     printk("RIP:    %04x:[<%016lx>]", regs->cs, regs->rip);
66     if ( context == CTXT_hypervisor )
67         printk(" %pS", _p(regs->rip));
68     printk("\nRFLAGS: %016lx   ", regs->rflags);
69     if ( (context == CTXT_pv_guest) && v && v->vcpu_info )
70         printk("EM: %d   ", !!vcpu_info(v, evtchn_upcall_mask));
71     printk("CONTEXT: %s", context_names[context]);
72     if ( v && !is_idle_vcpu(v) )
73         printk(" (%pv)", v);
74 
75     printk("\nrax: %016lx   rbx: %016lx   rcx: %016lx\n",
76            regs->rax, regs->rbx, regs->rcx);
77     printk("rdx: %016lx   rsi: %016lx   rdi: %016lx\n",
78            regs->rdx, regs->rsi, regs->rdi);
79     printk("rbp: %016lx   rsp: %016lx   r8:  %016lx\n",
80            regs->rbp, regs->rsp, regs->r8);
81     printk("r9:  %016lx   r10: %016lx   r11: %016lx\n",
82            regs->r9,  regs->r10, regs->r11);
83     if ( !(regs->entry_vector & TRAP_regs_partial) )
84     {
85         printk("r12: %016lx   r13: %016lx   r14: %016lx\n",
86                regs->r12, regs->r13, regs->r14);
87         printk("r15: %016lx   cr0: %016lx   cr4: %016lx\n",
88                regs->r15, crs[0], crs[4]);
89     }
90     else
91         printk("cr0: %016lx   cr4: %016lx\n", crs[0], crs[4]);
92     printk("cr3: %016lx   cr2: %016lx\n", crs[3], crs[2]);
93     printk("fsb: %016lx   gsb: %016lx   gss: %016lx\n",
94            crs[5], crs[6], crs[7]);
95     printk("ds: %04x   es: %04x   fs: %04x   gs: %04x   "
96            "ss: %04x   cs: %04x\n",
97            regs->ds, regs->es, regs->fs,
98            regs->gs, regs->ss, regs->cs);
99 }
100 
show_registers(const struct cpu_user_regs * regs)101 void show_registers(const struct cpu_user_regs *regs)
102 {
103     struct cpu_user_regs fault_regs = *regs;
104     unsigned long fault_crs[8];
105     enum context context;
106     struct vcpu *v = system_state >= SYS_STATE_smp_boot ? current : NULL;
107 
108     if ( guest_mode(regs) && is_hvm_vcpu(v) )
109     {
110         struct segment_register sreg;
111         context = CTXT_hvm_guest;
112         fault_crs[0] = v->arch.hvm_vcpu.guest_cr[0];
113         fault_crs[2] = v->arch.hvm_vcpu.guest_cr[2];
114         fault_crs[3] = v->arch.hvm_vcpu.guest_cr[3];
115         fault_crs[4] = v->arch.hvm_vcpu.guest_cr[4];
116         hvm_get_segment_register(v, x86_seg_cs, &sreg);
117         fault_regs.cs = sreg.sel;
118         hvm_get_segment_register(v, x86_seg_ds, &sreg);
119         fault_regs.ds = sreg.sel;
120         hvm_get_segment_register(v, x86_seg_es, &sreg);
121         fault_regs.es = sreg.sel;
122         hvm_get_segment_register(v, x86_seg_fs, &sreg);
123         fault_regs.fs = sreg.sel;
124         fault_crs[5] = sreg.base;
125         hvm_get_segment_register(v, x86_seg_gs, &sreg);
126         fault_regs.gs = sreg.sel;
127         fault_crs[6] = sreg.base;
128         hvm_get_segment_register(v, x86_seg_ss, &sreg);
129         fault_regs.ss = sreg.sel;
130         fault_crs[7] = hvm_get_shadow_gs_base(v);
131     }
132     else
133     {
134         read_registers(&fault_regs, fault_crs);
135 
136         if ( guest_mode(regs) )
137         {
138             context = CTXT_pv_guest;
139             fault_crs[2] = arch_get_cr2(v);
140         }
141         else
142         {
143             context = CTXT_hypervisor;
144             fault_crs[2] = read_cr2();
145         }
146     }
147 
148     print_xen_info();
149     printk("CPU:    %d\n", smp_processor_id());
150     _show_registers(&fault_regs, fault_crs, context, v);
151 
152     if ( this_cpu(ler_msr) && !guest_mode(regs) )
153     {
154         u64 from, to;
155         rdmsrl(this_cpu(ler_msr), from);
156         rdmsrl(this_cpu(ler_msr) + 1, to);
157         printk("ler: %016lx -> %016lx\n", from, to);
158     }
159 }
160 
vcpu_show_registers(const struct vcpu * v)161 void vcpu_show_registers(const struct vcpu *v)
162 {
163     const struct cpu_user_regs *regs = &v->arch.user_regs;
164     bool kernel = guest_kernel_mode(v, regs);
165     unsigned long crs[8];
166 
167     /* Only handle PV guests for now */
168     if ( !is_pv_vcpu(v) )
169         return;
170 
171     crs[0] = v->arch.pv_vcpu.ctrlreg[0];
172     crs[2] = arch_get_cr2(v);
173     crs[3] = pagetable_get_paddr(kernel ?
174                                  v->arch.guest_table :
175                                  v->arch.guest_table_user);
176     crs[4] = v->arch.pv_vcpu.ctrlreg[4];
177     crs[5] = v->arch.pv_vcpu.fs_base;
178     crs[6 + !kernel] = v->arch.pv_vcpu.gs_base_kernel;
179     crs[7 - !kernel] = v->arch.pv_vcpu.gs_base_user;
180 
181     _show_registers(regs, crs, CTXT_pv_guest, v);
182 }
183 
show_page_walk(unsigned long addr)184 void show_page_walk(unsigned long addr)
185 {
186     unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT;
187     l4_pgentry_t l4e, *l4t;
188     l3_pgentry_t l3e, *l3t;
189     l2_pgentry_t l2e, *l2t;
190     l1_pgentry_t l1e, *l1t;
191 
192     printk("Pagetable walk from %016lx:\n", addr);
193     if ( !is_canonical_address(addr) )
194         return;
195 
196     l4t = map_domain_page(_mfn(mfn));
197     l4e = l4t[l4_table_offset(addr)];
198     unmap_domain_page(l4t);
199     mfn = l4e_get_pfn(l4e);
200     pfn = mfn_valid(_mfn(mfn)) && machine_to_phys_mapping_valid ?
201           get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
202     printk(" L4[0x%03lx] = %"PRIpte" %016lx\n",
203            l4_table_offset(addr), l4e_get_intpte(l4e), pfn);
204     if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ||
205          !mfn_valid(_mfn(mfn)) )
206         return;
207 
208     l3t = map_domain_page(_mfn(mfn));
209     l3e = l3t[l3_table_offset(addr)];
210     unmap_domain_page(l3t);
211     mfn = l3e_get_pfn(l3e);
212     pfn = mfn_valid(_mfn(mfn)) && machine_to_phys_mapping_valid ?
213           get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
214     printk(" L3[0x%03lx] = %"PRIpte" %016lx%s\n",
215            l3_table_offset(addr), l3e_get_intpte(l3e), pfn,
216            (l3e_get_flags(l3e) & _PAGE_PSE) ? " (PSE)" : "");
217     if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ||
218          (l3e_get_flags(l3e) & _PAGE_PSE) ||
219          !mfn_valid(_mfn(mfn)) )
220         return;
221 
222     l2t = map_domain_page(_mfn(mfn));
223     l2e = l2t[l2_table_offset(addr)];
224     unmap_domain_page(l2t);
225     mfn = l2e_get_pfn(l2e);
226     pfn = mfn_valid(_mfn(mfn)) && machine_to_phys_mapping_valid ?
227           get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
228     printk(" L2[0x%03lx] = %"PRIpte" %016lx%s\n",
229            l2_table_offset(addr), l2e_get_intpte(l2e), pfn,
230            (l2e_get_flags(l2e) & _PAGE_PSE) ? " (PSE)" : "");
231     if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ||
232          (l2e_get_flags(l2e) & _PAGE_PSE) ||
233          !mfn_valid(_mfn(mfn)) )
234         return;
235 
236     l1t = map_domain_page(_mfn(mfn));
237     l1e = l1t[l1_table_offset(addr)];
238     unmap_domain_page(l1t);
239     mfn = l1e_get_pfn(l1e);
240     pfn = mfn_valid(_mfn(mfn)) && machine_to_phys_mapping_valid ?
241           get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY;
242     printk(" L1[0x%03lx] = %"PRIpte" %016lx\n",
243            l1_table_offset(addr), l1e_get_intpte(l1e), pfn);
244 }
245 
do_double_fault(struct cpu_user_regs * regs)246 void do_double_fault(struct cpu_user_regs *regs)
247 {
248     unsigned int cpu;
249     unsigned long crs[8];
250 
251     console_force_unlock();
252 
253     asm ( "lsll %1, %0" : "=r" (cpu) : "rm" (PER_CPU_GDT_ENTRY << 3) );
254 
255     /* Find information saved during fault and dump it to the console. */
256     printk("*** DOUBLE FAULT ***\n");
257     print_xen_info();
258 
259     read_registers(regs, crs);
260 
261     printk("CPU:    %d\n", cpu);
262     _show_registers(regs, crs, CTXT_hypervisor, NULL);
263     show_stack_overflow(cpu, regs);
264 
265     panic("DOUBLE FAULT -- system shutdown");
266 }
267 
write_stub_trampoline(unsigned char * stub,unsigned long stub_va,unsigned long stack_bottom,unsigned long target_va)268 static unsigned int write_stub_trampoline(
269     unsigned char *stub, unsigned long stub_va,
270     unsigned long stack_bottom, unsigned long target_va)
271 {
272     /* movabsq %rax, stack_bottom - 8 */
273     stub[0] = 0x48;
274     stub[1] = 0xa3;
275     *(uint64_t *)&stub[2] = stack_bottom - 8;
276 
277     /* movq %rsp, %rax */
278     stub[10] = 0x48;
279     stub[11] = 0x89;
280     stub[12] = 0xe0;
281 
282     /* movabsq $stack_bottom - 8, %rsp */
283     stub[13] = 0x48;
284     stub[14] = 0xbc;
285     *(uint64_t *)&stub[15] = stack_bottom - 8;
286 
287     /* pushq %rax */
288     stub[23] = 0x50;
289 
290     /* jmp target_va */
291     stub[24] = 0xe9;
292     *(int32_t *)&stub[25] = target_va - (stub_va + 29);
293 
294     /* Round up to a multiple of 16 bytes. */
295     return 32;
296 }
297 
298 DEFINE_PER_CPU(struct stubs, stubs);
299 void lstar_enter(void);
300 void cstar_enter(void);
301 
subarch_percpu_traps_init(void)302 void subarch_percpu_traps_init(void)
303 {
304     unsigned long stack_bottom = get_stack_bottom();
305     unsigned long stub_va = this_cpu(stubs.addr);
306     unsigned char *stub_page;
307     unsigned int offset;
308 
309     /* IST_MAX IST pages + 1 syscall page + 1 guard page + primary stack. */
310     BUILD_BUG_ON((IST_MAX + 2) * PAGE_SIZE + PRIMARY_STACK_SIZE > STACK_SIZE);
311 
312     stub_page = map_domain_page(_mfn(this_cpu(stubs.mfn)));
313 
314     /*
315      * Trampoline for SYSCALL entry from 64-bit mode.  The VT-x HVM vcpu
316      * context switch logic relies on the SYSCALL trampoline being at the
317      * start of the stubs.
318      */
319     wrmsrl(MSR_LSTAR, stub_va);
320     offset = write_stub_trampoline(stub_page + (stub_va & ~PAGE_MASK),
321                                    stub_va, stack_bottom,
322                                    (unsigned long)lstar_enter);
323     stub_va += offset;
324 
325     if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ||
326          boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR )
327     {
328         /* SYSENTER entry. */
329         wrmsrl(MSR_IA32_SYSENTER_ESP, stack_bottom);
330         wrmsrl(MSR_IA32_SYSENTER_EIP, (unsigned long)sysenter_entry);
331         wrmsr(MSR_IA32_SYSENTER_CS, __HYPERVISOR_CS, 0);
332     }
333 
334     /* Trampoline for SYSCALL entry from compatibility mode. */
335     wrmsrl(MSR_CSTAR, stub_va);
336     offset += write_stub_trampoline(stub_page + (stub_va & ~PAGE_MASK),
337                                     stub_va, stack_bottom,
338                                     (unsigned long)cstar_enter);
339 
340     /* Don't consume more than half of the stub space here. */
341     ASSERT(offset <= STUB_BUF_SIZE / 2);
342 
343     unmap_domain_page(stub_page);
344 
345     /* Common SYSCALL parameters. */
346     wrmsrl(MSR_STAR, XEN_MSR_STAR);
347     wrmsrl(MSR_SYSCALL_MASK, XEN_SYSCALL_MASK);
348 }
349 
hypercall_page_initialise(struct domain * d,void * hypercall_page)350 void hypercall_page_initialise(struct domain *d, void *hypercall_page)
351 {
352     memset(hypercall_page, 0xCC, PAGE_SIZE);
353     if ( is_hvm_domain(d) )
354         hvm_hypercall_page_initialise(d, hypercall_page);
355     else if ( !is_pv_32bit_domain(d) )
356         hypercall_page_initialise_ring3_kernel(hypercall_page);
357     else
358         hypercall_page_initialise_ring1_kernel(hypercall_page);
359 }
360 
361 /*
362  * Local variables:
363  * mode: C
364  * c-file-style: "BSD"
365  * c-basic-offset: 4
366  * tab-width: 4
367  * indent-tabs-mode: nil
368  * End:
369  */
370