1 #include <xen/types.h>
2 #include <xen/sched.h>
3 #include "mcaction.h"
4 #include "vmce.h"
5 #include "mce.h"
6
7 static struct mcinfo_recovery *
mci_action_add_pageoffline(int bank,struct mc_info * mi,uint64_t mfn,uint32_t status)8 mci_action_add_pageoffline(int bank, struct mc_info *mi,
9 uint64_t mfn, uint32_t status)
10 {
11 struct mcinfo_recovery *rec;
12
13 if ( !mi )
14 return NULL;
15
16 rec = x86_mcinfo_reserve(mi, sizeof(*rec), MC_TYPE_RECOVERY);
17 if ( !rec )
18 {
19 mi->flags |= MCINFO_FLAGS_UNCOMPLETE;
20 return NULL;
21 }
22
23 rec->mc_bank = bank;
24 rec->action_types = MC_ACTION_PAGE_OFFLINE;
25 rec->action_info.page_retire.mfn = mfn;
26 rec->action_info.page_retire.status = status;
27 return rec;
28 }
29
30 mce_check_addr_t mc_check_addr = NULL;
31
mce_register_addrcheck(mce_check_addr_t cbfunc)32 void mce_register_addrcheck(mce_check_addr_t cbfunc)
33 {
34 mc_check_addr = cbfunc;
35 }
36
37 void
mc_memerr_dhandler(struct mca_binfo * binfo,enum mce_result * result,const struct cpu_user_regs * regs)38 mc_memerr_dhandler(struct mca_binfo *binfo,
39 enum mce_result *result,
40 const struct cpu_user_regs *regs)
41 {
42 struct mcinfo_bank *bank = binfo->mib;
43 struct mcinfo_global *global = binfo->mig;
44 struct domain *d;
45 unsigned long mfn, gfn;
46 uint32_t status;
47 int vmce_vcpuid;
48 unsigned int mc_vcpuid;
49
50 if ( !mc_check_addr(bank->mc_status, bank->mc_misc, MC_ADDR_PHYSICAL) )
51 {
52 dprintk(XENLOG_WARNING,
53 "No physical address provided for memory error\n");
54 return;
55 }
56
57 mfn = bank->mc_addr >> PAGE_SHIFT;
58 if ( offline_page(mfn, 1, &status) )
59 {
60 dprintk(XENLOG_WARNING,
61 "Failed to offline page %lx for MCE error\n", mfn);
62 return;
63 }
64
65 mci_action_add_pageoffline(binfo->bank, binfo->mi, mfn, status);
66
67 /* This is free page */
68 if ( status & PG_OFFLINE_OFFLINED )
69 *result = MCER_RECOVERED;
70 else if ( status & PG_OFFLINE_AGAIN )
71 *result = MCER_CONTINUE;
72 else if ( status & PG_OFFLINE_PENDING )
73 {
74 /* This page has owner */
75 if ( status & PG_OFFLINE_OWNED )
76 {
77 bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT;
78 mce_printk(MCE_QUIET, "MCE: This error page is ownded"
79 " by DOM %d\n", bank->mc_domid);
80 /*
81 * XXX: Cannot handle shared pages yet
82 * (this should identify all domains and gfn mapping to
83 * the mfn in question)
84 */
85 BUG_ON( bank->mc_domid == DOMID_COW );
86 if ( bank->mc_domid != DOMID_XEN )
87 {
88 d = get_domain_by_id(bank->mc_domid);
89 ASSERT(d);
90 gfn = get_gpfn_from_mfn((bank->mc_addr) >> PAGE_SHIFT);
91
92 if ( unmmap_broken_page(d, _mfn(mfn), gfn) )
93 {
94 printk("Unmap broken memory %lx for DOM%d failed\n",
95 mfn, d->domain_id);
96 goto vmce_failed;
97 }
98
99 mc_vcpuid = global->mc_vcpuid;
100 if ( mc_vcpuid == XEN_MC_VCPUID_INVALID ||
101 /*
102 * Because MC# may happen asynchronously with the actual
103 * operation that triggers the error, the domain ID as
104 * well as the vCPU ID collected in 'global' at MC# are
105 * not always precise. In that case, fallback to broadcast.
106 */
107 global->mc_domid != bank->mc_domid ||
108 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
109 (!(global->mc_gstatus & MCG_STATUS_LMCE) ||
110 !(d->vcpu[mc_vcpuid]->arch.vmce.mcg_ext_ctl &
111 MCG_EXT_CTL_LMCE_EN))) )
112 vmce_vcpuid = VMCE_INJECT_BROADCAST;
113 else
114 vmce_vcpuid = mc_vcpuid;
115
116 bank->mc_addr = gfn << PAGE_SHIFT |
117 (bank->mc_addr & (PAGE_SIZE - 1));
118 if ( fill_vmsr_data(bank, d, global->mc_gstatus, vmce_vcpuid) )
119 {
120 mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d "
121 "failed\n", bank->mc_domid);
122 goto vmce_failed;
123 }
124
125 /* We will inject vMCE to DOMU */
126 if ( inject_vmce(d, vmce_vcpuid) < 0 )
127 {
128 mce_printk(MCE_QUIET, "inject vMCE to DOM%d"
129 " failed\n", d->domain_id);
130 goto vmce_failed;
131 }
132
133 /*
134 * Impacted domain go on with domain's recovery job
135 * if the domain has its own MCA handler.
136 * For xen, it has contained the error and finished
137 * its own recovery job.
138 */
139 *result = MCER_RECOVERED;
140 put_domain(d);
141
142 return;
143 vmce_failed:
144 put_domain(d);
145 domain_crash(d);
146 }
147 }
148 }
149 }
150