1 /******************************************************************************
2  * arch/x86/pv/domain.c
3  *
4  * PV domain handling
5  */
6 
7 #include <xen/domain_page.h>
8 #include <xen/errno.h>
9 #include <xen/lib.h>
10 #include <xen/sched.h>
11 
12 #include <asm/pv/domain.h>
13 
14 /* Override macros from asm/page.h to make them work with mfn_t */
15 #undef mfn_to_page
16 #define mfn_to_page(mfn) __mfn_to_page(mfn_x(mfn))
17 #undef page_to_mfn
18 #define page_to_mfn(pg) _mfn(__page_to_mfn(pg))
19 
continue_nonidle_domain(struct vcpu * v)20 static void noreturn continue_nonidle_domain(struct vcpu *v)
21 {
22     check_wakeup_from_wait();
23     mark_regs_dirty(guest_cpu_user_regs());
24     reset_stack_and_jump(ret_from_intr);
25 }
26 
setup_compat_l4(struct vcpu * v)27 static int setup_compat_l4(struct vcpu *v)
28 {
29     struct page_info *pg;
30     l4_pgentry_t *l4tab;
31     mfn_t mfn;
32 
33     pg = alloc_domheap_page(v->domain, MEMF_no_owner);
34     if ( pg == NULL )
35         return -ENOMEM;
36 
37     mfn = page_to_mfn(pg);
38     l4tab = map_domain_page(mfn);
39     clear_page(l4tab);
40     init_xen_l4_slots(l4tab, mfn, v->domain, INVALID_MFN, false);
41     unmap_domain_page(l4tab);
42 
43     /* This page needs to look like a pagetable so that it can be shadowed */
44     pg->u.inuse.type_info = PGT_l4_page_table | PGT_validated | 1;
45 
46     v->arch.guest_table = pagetable_from_page(pg);
47     v->arch.guest_table_user = v->arch.guest_table;
48 
49     return 0;
50 }
51 
release_compat_l4(struct vcpu * v)52 static void release_compat_l4(struct vcpu *v)
53 {
54     if ( !pagetable_is_null(v->arch.guest_table) )
55         free_domheap_page(pagetable_get_page(v->arch.guest_table));
56     v->arch.guest_table = pagetable_null();
57     v->arch.guest_table_user = pagetable_null();
58 }
59 
switch_compat(struct domain * d)60 int switch_compat(struct domain *d)
61 {
62     struct vcpu *v;
63     int rc;
64 
65     if ( is_hvm_domain(d) || d->tot_pages != 0 )
66         return -EACCES;
67     if ( is_pv_32bit_domain(d) )
68         return 0;
69 
70     d->arch.has_32bit_shinfo = 1;
71     d->arch.is_32bit_pv = 1;
72 
73     for_each_vcpu( d, v )
74     {
75         if ( (rc = setup_compat_arg_xlat(v)) ||
76              (rc = setup_compat_l4(v)) )
77             goto undo_and_fail;
78     }
79 
80     domain_set_alloc_bitsize(d);
81     recalculate_cpuid_policy(d);
82 
83     d->arch.x87_fip_width = 4;
84 
85     return 0;
86 
87  undo_and_fail:
88     d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
89     for_each_vcpu( d, v )
90     {
91         free_compat_arg_xlat(v);
92         release_compat_l4(v);
93     }
94 
95     return rc;
96 }
97 
pv_create_gdt_ldt_l1tab(struct vcpu * v)98 static int pv_create_gdt_ldt_l1tab(struct vcpu *v)
99 {
100     return create_perdomain_mapping(v->domain, GDT_VIRT_START(v),
101                                     1U << GDT_LDT_VCPU_SHIFT,
102                                     v->domain->arch.pv_domain.gdt_ldt_l1tab,
103                                     NULL);
104 }
105 
pv_destroy_gdt_ldt_l1tab(struct vcpu * v)106 static void pv_destroy_gdt_ldt_l1tab(struct vcpu *v)
107 {
108     destroy_perdomain_mapping(v->domain, GDT_VIRT_START(v),
109                               1U << GDT_LDT_VCPU_SHIFT);
110 }
111 
pv_vcpu_destroy(struct vcpu * v)112 void pv_vcpu_destroy(struct vcpu *v)
113 {
114     if ( is_pv_32bit_vcpu(v) )
115     {
116         free_compat_arg_xlat(v);
117         release_compat_l4(v);
118     }
119 
120     pv_destroy_gdt_ldt_l1tab(v);
121     xfree(v->arch.pv_vcpu.trap_ctxt);
122     v->arch.pv_vcpu.trap_ctxt = NULL;
123 }
124 
pv_vcpu_initialise(struct vcpu * v)125 int pv_vcpu_initialise(struct vcpu *v)
126 {
127     struct domain *d = v->domain;
128     int rc;
129 
130     ASSERT(!is_idle_domain(d));
131 
132     spin_lock_init(&v->arch.pv_vcpu.shadow_ldt_lock);
133 
134     rc = pv_create_gdt_ldt_l1tab(v);
135     if ( rc )
136         return rc;
137 
138     BUILD_BUG_ON(NR_VECTORS * sizeof(*v->arch.pv_vcpu.trap_ctxt) >
139                  PAGE_SIZE);
140     v->arch.pv_vcpu.trap_ctxt = xzalloc_array(struct trap_info,
141                                               NR_VECTORS);
142     if ( !v->arch.pv_vcpu.trap_ctxt )
143     {
144         rc = -ENOMEM;
145         goto done;
146     }
147 
148     /* PV guests by default have a 100Hz ticker. */
149     v->periodic_period = MILLISECS(10);
150 
151     v->arch.pv_vcpu.ctrlreg[4] = real_cr4_to_pv_guest_cr4(mmu_cr4_features);
152 
153     if ( is_pv_32bit_domain(d) )
154     {
155         if ( (rc = setup_compat_arg_xlat(v)) )
156             goto done;
157 
158         if ( (rc = setup_compat_l4(v)) )
159             goto done;
160     }
161 
162  done:
163     if ( rc )
164         pv_vcpu_destroy(v);
165     return rc;
166 }
167 
pv_domain_destroy(struct domain * d)168 void pv_domain_destroy(struct domain *d)
169 {
170     destroy_perdomain_mapping(d, GDT_LDT_VIRT_START,
171                               GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
172 
173     xfree(d->arch.pv_domain.cpuidmasks);
174     d->arch.pv_domain.cpuidmasks = NULL;
175 
176     free_xenheap_page(d->arch.pv_domain.gdt_ldt_l1tab);
177     d->arch.pv_domain.gdt_ldt_l1tab = NULL;
178 }
179 
180 
pv_domain_initialise(struct domain * d,unsigned int domcr_flags,struct xen_arch_domainconfig * config)181 int pv_domain_initialise(struct domain *d, unsigned int domcr_flags,
182                          struct xen_arch_domainconfig *config)
183 {
184     static const struct arch_csw pv_csw = {
185         .from = paravirt_ctxt_switch_from,
186         .to   = paravirt_ctxt_switch_to,
187         .tail = continue_nonidle_domain,
188     };
189     int rc = -ENOMEM;
190 
191     d->arch.pv_domain.gdt_ldt_l1tab =
192         alloc_xenheap_pages(0, MEMF_node(domain_to_node(d)));
193     if ( !d->arch.pv_domain.gdt_ldt_l1tab )
194         goto fail;
195     clear_page(d->arch.pv_domain.gdt_ldt_l1tab);
196 
197     if ( levelling_caps & ~LCAP_faulting )
198     {
199         d->arch.pv_domain.cpuidmasks = xmalloc(struct cpuidmasks);
200         if ( !d->arch.pv_domain.cpuidmasks )
201             goto fail;
202         *d->arch.pv_domain.cpuidmasks = cpuidmask_defaults;
203     }
204 
205     rc = create_perdomain_mapping(d, GDT_LDT_VIRT_START,
206                                   GDT_LDT_MBYTES << (20 - PAGE_SHIFT),
207                                   NULL, NULL);
208     if ( rc )
209         goto fail;
210 
211     d->arch.ctxt_switch = &pv_csw;
212 
213     /* 64-bit PV guest by default. */
214     d->arch.is_32bit_pv = d->arch.has_32bit_shinfo = 0;
215 
216     return 0;
217 
218   fail:
219     pv_domain_destroy(d);
220 
221     return rc;
222 }
223 
toggle_guest_mode(struct vcpu * v)224 void toggle_guest_mode(struct vcpu *v)
225 {
226     if ( is_pv_32bit_vcpu(v) )
227         return;
228 
229     if ( cpu_has_fsgsbase )
230     {
231         if ( v->arch.flags & TF_kernel_mode )
232             v->arch.pv_vcpu.gs_base_kernel = __rdgsbase();
233         else
234             v->arch.pv_vcpu.gs_base_user = __rdgsbase();
235     }
236     asm volatile ( "swapgs" );
237 
238     toggle_guest_pt(v);
239 }
240 
toggle_guest_pt(struct vcpu * v)241 void toggle_guest_pt(struct vcpu *v)
242 {
243     if ( is_pv_32bit_vcpu(v) )
244         return;
245 
246     v->arch.flags ^= TF_kernel_mode;
247     update_cr3(v);
248     /* Don't flush user global mappings from the TLB. Don't tick TLB clock. */
249     asm volatile ( "mov %0, %%cr3" : : "r" (v->arch.cr3) : "memory" );
250 
251     if ( !(v->arch.flags & TF_kernel_mode) )
252         return;
253 
254     if ( v->arch.pv_vcpu.need_update_runstate_area &&
255          update_runstate_area(v) )
256         v->arch.pv_vcpu.need_update_runstate_area = 0;
257 
258     if ( v->arch.pv_vcpu.pending_system_time.version &&
259          update_secondary_system_time(v,
260                                       &v->arch.pv_vcpu.pending_system_time) )
261         v->arch.pv_vcpu.pending_system_time.version = 0;
262 }
263 
264 /*
265  * Local variables:
266  * mode: C
267  * c-file-style: "BSD"
268  * c-basic-offset: 4
269  * tab-width: 4
270  * indent-tabs-mode: nil
271  * End:
272  */
273