1 /******************************************************************************
2 * crash.c
3 *
4 * Based heavily on arch/i386/kernel/crash.c from Linux 2.6.16
5 *
6 * Xen port written by:
7 * - Simon 'Horms' Horman <horms@verge.net.au>
8 * - Magnus Damm <magnus@valinux.co.jp>
9 */
10
11 #include <asm/atomic.h>
12 #include <asm/elf.h>
13 #include <asm/percpu.h>
14 #include <xen/types.h>
15 #include <xen/irq.h>
16 #include <asm/nmi.h>
17 #include <xen/string.h>
18 #include <xen/elf.h>
19 #include <xen/elfcore.h>
20 #include <xen/smp.h>
21 #include <xen/delay.h>
22 #include <xen/perfc.h>
23 #include <xen/kexec.h>
24 #include <xen/sched.h>
25 #include <xen/keyhandler.h>
26 #include <public/xen.h>
27 #include <asm/shared.h>
28 #include <asm/hvm/support.h>
29 #include <asm/apic.h>
30 #include <asm/io_apic.h>
31 #include <xen/iommu.h>
32 #include <asm/hpet.h>
33
34 static cpumask_t waiting_to_crash;
35 static unsigned int crashing_cpu;
36 static DEFINE_PER_CPU_READ_MOSTLY(bool, crash_save_done);
37
38 /* This becomes the NMI handler for non-crashing CPUs, when Xen is crashing. */
do_nmi_crash(const struct cpu_user_regs * regs)39 static void noreturn do_nmi_crash(const struct cpu_user_regs *regs)
40 {
41 unsigned int cpu = smp_processor_id();
42
43 stac();
44
45 /* nmi_shootdown_cpus() should ensure that this assertion is correct. */
46 ASSERT(cpu != crashing_cpu);
47
48 /* Save crash information and shut down CPU. Attempt only once. */
49 if ( !this_cpu(crash_save_done) )
50 {
51 /* Disable the interrupt stack table for the MCE handler. This
52 * prevents race conditions between clearing MCIP and receving a
53 * new MCE, during which the exception frame would be clobbered
54 * and the MCE handler fall into an infinite loop. We are soon
55 * going to disable the NMI watchdog, so the loop would not be
56 * caught.
57 *
58 * We do not need to change the NMI IST, as the nmi_crash
59 * handler is immue to corrupt exception frames, by virtue of
60 * being designed never to return.
61 *
62 * This update is safe from a security point of view, as this
63 * pcpu is never going to try to sysret back to a PV vcpu.
64 */
65 set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
66
67 kexec_crash_save_cpu();
68 __stop_this_cpu();
69
70 this_cpu(crash_save_done) = true;
71 cpumask_clear_cpu(cpu, &waiting_to_crash);
72 }
73
74 /* Poor mans self_nmi(). __stop_this_cpu() has reverted the LAPIC
75 * back to its boot state, so we are unable to rely on the regular
76 * apic_* functions, due to 'x2apic_enabled' being possibly wrong.
77 * (The likely scenario is that we have reverted from x2apic mode to
78 * xapic, at which point #GPFs will occur if we use the apic_*
79 * functions)
80 *
81 * The ICR and APIC ID of the LAPIC are still valid even during
82 * software disable (Intel SDM Vol 3, 10.4.7.2). As a result, we
83 * can deliberately queue up another NMI at the LAPIC which will not
84 * be delivered as the hardware NMI latch is currently in effect.
85 * This means that if NMIs become unlatched (e.g. following a
86 * non-fatal MCE), the LAPIC will force us back here rather than
87 * wandering back into regular Xen code.
88 */
89 switch ( current_local_apic_mode() )
90 {
91 u32 apic_id;
92
93 case APIC_MODE_X2APIC:
94 apic_id = apic_rdmsr(APIC_ID);
95
96 apic_wrmsr(APIC_ICR, APIC_DM_NMI | APIC_DEST_PHYSICAL
97 | ((u64)apic_id << 32));
98 break;
99
100 case APIC_MODE_XAPIC:
101 apic_id = GET_xAPIC_ID(apic_mem_read(APIC_ID));
102
103 while ( apic_mem_read(APIC_ICR) & APIC_ICR_BUSY )
104 cpu_relax();
105
106 apic_mem_write(APIC_ICR2, apic_id << 24);
107 apic_mem_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_PHYSICAL);
108 break;
109
110 default:
111 break;
112 }
113
114 for ( ; ; )
115 halt();
116 }
117
nmi_shootdown_cpus(void)118 static void nmi_shootdown_cpus(void)
119 {
120 unsigned long msecs;
121 unsigned int cpu = smp_processor_id();
122
123 disable_lapic_nmi_watchdog();
124 local_irq_disable();
125
126 crashing_cpu = cpu;
127 local_irq_count(crashing_cpu) = 0;
128
129 cpumask_andnot(&waiting_to_crash, &cpu_online_map, cpumask_of(cpu));
130
131 /*
132 * Disable IST for MCEs to avoid stack corruption race conditions, and
133 * change the NMI handler to a nop to avoid deviation from this codepath.
134 */
135 _set_gate_lower(&idt_tables[cpu][TRAP_nmi],
136 SYS_DESC_irq_gate, 0, &trap_nop);
137 set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
138
139 /*
140 * Ideally would be:
141 * exception_table[TRAP_nmi] = &do_nmi_crash;
142 *
143 * but the exception_table is read only. Access it via its directmap
144 * mappings.
145 */
146 write_atomic((unsigned long *)__va(__pa(&exception_table[TRAP_nmi])),
147 (unsigned long)&do_nmi_crash);
148
149 /* Ensure the new callback function is set before sending out the NMI. */
150 wmb();
151
152 smp_send_nmi_allbutself();
153
154 msecs = 1000; /* Wait at most a second for the other cpus to stop */
155 while ( !cpumask_empty(&waiting_to_crash) && msecs )
156 {
157 mdelay(1);
158 msecs--;
159 }
160
161 /* Leave a hint of how well we did trying to shoot down the other cpus */
162 if ( cpumask_empty(&waiting_to_crash) )
163 printk("Shot down all CPUs\n");
164 else
165 {
166 cpulist_scnprintf(keyhandler_scratch, sizeof keyhandler_scratch,
167 &waiting_to_crash);
168 printk("Failed to shoot down CPUs {%s}\n", keyhandler_scratch);
169 }
170
171 /* Crash shutdown any IOMMU functionality as the crashdump kernel is not
172 * happy when booting if interrupt/dma remapping is still enabled */
173 iommu_crash_shutdown();
174
175 __stop_this_cpu();
176
177 /* This is a bit of a hack due to the problems with the x2apic_enabled
178 * variable, but we can't do any better without a significant refactoring
179 * of the APIC code */
180 x2apic_enabled = (current_local_apic_mode() == APIC_MODE_X2APIC);
181
182 disable_IO_APIC();
183 hpet_disable();
184 }
185
machine_crash_shutdown(void)186 void machine_crash_shutdown(void)
187 {
188 crash_xen_info_t *info;
189
190 nmi_shootdown_cpus();
191
192 /* Reset CPUID masking and faulting to the host's default. */
193 ctxt_switch_levelling(NULL);
194
195 info = kexec_crash_save_info();
196 info->xen_phys_start = xen_phys_start;
197 info->dom0_pfn_to_mfn_frame_list_list =
198 arch_get_pfn_to_mfn_frame_list_list(hardware_domain);
199 }
200
201 /*
202 * Local variables:
203 * mode: C
204 * c-file-style: "BSD"
205 * c-basic-offset: 4
206 * tab-width: 4
207 * indent-tabs-mode: nil
208 * End:
209 */
210