1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * arch/x86/pv/descriptor-tables.c
4 *
5 * Descriptor table manipulation code for PV guests
6 *
7 * Copyright (c) 2002-2005 K A Fraser
8 * Copyright (c) 2004 Christian Limpach
9 */
10
11 #include <xen/guest_access.h>
12 #include <xen/hypercall.h>
13
14 #include <asm/p2m.h>
15 #include <asm/pv/mm.h>
16
17 /*
18 * Flush the LDT, dropping any typerefs. Returns a boolean indicating whether
19 * mappings have been removed (i.e. a TLB flush is needed).
20 */
pv_destroy_ldt(struct vcpu * v)21 bool pv_destroy_ldt(struct vcpu *v)
22 {
23 l1_pgentry_t *pl1e;
24 unsigned int i, mappings_dropped = 0;
25 struct page_info *page;
26
27 ASSERT(!in_irq());
28
29 ASSERT(v == current || !vcpu_cpu_dirty(v));
30
31 pl1e = pv_ldt_ptes(v);
32
33 for ( i = 0; i < 16; i++ )
34 {
35 if ( !(l1e_get_flags(pl1e[i]) & _PAGE_PRESENT) )
36 continue;
37
38 page = l1e_get_page(pl1e[i]);
39 l1e_write(&pl1e[i], l1e_empty());
40 mappings_dropped++;
41
42 ASSERT_PAGE_IS_TYPE(page, PGT_seg_desc_page);
43 ASSERT_PAGE_IS_DOMAIN(page, v->domain);
44 put_page_and_type(page);
45 }
46
47 return mappings_dropped;
48 }
49
pv_destroy_gdt(struct vcpu * v)50 void pv_destroy_gdt(struct vcpu *v)
51 {
52 l1_pgentry_t *pl1e = pv_gdt_ptes(v);
53 mfn_t zero_mfn = _mfn(virt_to_mfn(zero_page));
54 l1_pgentry_t zero_l1e = l1e_from_mfn(zero_mfn, __PAGE_HYPERVISOR_RO);
55 unsigned int i;
56
57 ASSERT(v == current || !vcpu_cpu_dirty(v));
58
59 v->arch.pv.gdt_ents = 0;
60 for ( i = 0; i < FIRST_RESERVED_GDT_PAGE; i++ )
61 {
62 mfn_t mfn = l1e_get_mfn(pl1e[i]);
63
64 if ( (l1e_get_flags(pl1e[i]) & _PAGE_PRESENT) &&
65 !mfn_eq(mfn, zero_mfn) )
66 put_page_and_type(mfn_to_page(mfn));
67
68 l1e_write(&pl1e[i], zero_l1e);
69 v->arch.pv.gdt_frames[i] = 0;
70 }
71 }
72
pv_set_gdt(struct vcpu * v,const unsigned long frames[],unsigned int entries)73 int pv_set_gdt(struct vcpu *v, const unsigned long frames[],
74 unsigned int entries)
75 {
76 struct domain *d = v->domain;
77 l1_pgentry_t *pl1e;
78 unsigned int i, nr_frames = DIV_ROUND_UP(entries, 512);
79
80 ASSERT(v == current || !vcpu_cpu_dirty(v));
81
82 if ( entries > FIRST_RESERVED_GDT_ENTRY )
83 return -EINVAL;
84
85 /* Check the pages in the new GDT. */
86 for ( i = 0; i < nr_frames; i++ )
87 {
88 mfn_t mfn = _mfn(frames[i]);
89
90 if ( !mfn_valid(mfn) ||
91 !get_page_and_type(mfn_to_page(mfn), d, PGT_seg_desc_page) )
92 goto fail;
93 }
94
95 /* Tear down the old GDT. */
96 pv_destroy_gdt(v);
97
98 /* Install the new GDT. */
99 v->arch.pv.gdt_ents = entries;
100 pl1e = pv_gdt_ptes(v);
101 for ( i = 0; i < nr_frames; i++ )
102 {
103 v->arch.pv.gdt_frames[i] = frames[i];
104 l1e_write(&pl1e[i], l1e_from_pfn(frames[i], __PAGE_HYPERVISOR_RW));
105 }
106
107 return 0;
108
109 fail:
110 while ( i-- > 0 )
111 put_page_and_type(mfn_to_page(_mfn(frames[i])));
112
113 return -EINVAL;
114 }
115
do_set_gdt(XEN_GUEST_HANDLE_PARAM (xen_ulong_t)frame_list,unsigned int entries)116 long do_set_gdt(
117 XEN_GUEST_HANDLE_PARAM(xen_ulong_t) frame_list, unsigned int entries)
118 {
119 unsigned int nr_frames = DIV_ROUND_UP(entries, 512);
120 unsigned long frames[16];
121 struct vcpu *curr = current;
122 long ret;
123
124 /* Rechecked in set_gdt, but ensures a sane limit for copy_from_user(). */
125 if ( entries > FIRST_RESERVED_GDT_ENTRY )
126 return -EINVAL;
127
128 if ( copy_from_guest(frames, frame_list, nr_frames) )
129 return -EFAULT;
130
131 domain_lock(curr->domain);
132
133 if ( (ret = pv_set_gdt(curr, frames, entries)) == 0 )
134 flush_tlb_local();
135
136 domain_unlock(curr->domain);
137
138 return ret;
139 }
140
141 #ifdef CONFIG_PV32
142
compat_set_gdt(XEN_GUEST_HANDLE_PARAM (uint)frame_list,unsigned int entries)143 int compat_set_gdt(
144 XEN_GUEST_HANDLE_PARAM(uint) frame_list, unsigned int entries)
145 {
146 struct vcpu *curr = current;
147 unsigned int i, nr_frames = DIV_ROUND_UP(entries, 512);
148 unsigned long frames[16];
149 int ret;
150
151 /* Rechecked in set_gdt, but ensures a sane limit for copy_from_user(). */
152 if ( entries > FIRST_RESERVED_GDT_ENTRY )
153 return -EINVAL;
154
155 if ( !guest_handle_okay(frame_list, nr_frames) )
156 return -EFAULT;
157
158 for ( i = 0; i < nr_frames; ++i )
159 {
160 unsigned int frame;
161
162 if ( __copy_from_guest(&frame, frame_list, 1) )
163 return -EFAULT;
164
165 frames[i] = frame;
166 guest_handle_add_offset(frame_list, 1);
167 }
168
169 domain_lock(curr->domain);
170
171 if ( (ret = pv_set_gdt(curr, frames, entries)) == 0 )
172 flush_tlb_local();
173
174 domain_unlock(curr->domain);
175
176 return ret;
177 }
178
compat_update_descriptor(uint32_t pa_lo,uint32_t pa_hi,uint32_t desc_lo,uint32_t desc_hi)179 int compat_update_descriptor(
180 uint32_t pa_lo, uint32_t pa_hi, uint32_t desc_lo, uint32_t desc_hi)
181 {
182 seg_desc_t d;
183
184 d.raw = ((uint64_t)desc_hi << 32) | desc_lo;
185
186 return do_update_descriptor(pa_lo | ((uint64_t)pa_hi << 32), d);
187 }
188
189 #endif /* CONFIG_PV32 */
190
check_descriptor(const struct domain * dom,seg_desc_t * d)191 static bool check_descriptor(const struct domain *dom, seg_desc_t *d)
192 {
193 unsigned int a = d->a, b = d->b, cs, dpl;
194
195 /* A not-present descriptor will always fault, so is safe. */
196 if ( !(b & _SEGMENT_P) )
197 return true;
198
199 /* Check and fix up the DPL. */
200 dpl = (b >> 13) & 3;
201 __fixup_guest_selector(dom, dpl);
202 b = (b & ~_SEGMENT_DPL) | (dpl << 13);
203
204 /* All code and data segments are okay. No base/limit checking. */
205 if ( b & _SEGMENT_S )
206 {
207 if ( is_pv_32bit_domain(dom) )
208 {
209 unsigned long base, limit;
210
211 if ( b & _SEGMENT_L )
212 goto bad;
213
214 /*
215 * Older PAE Linux guests use segments which are limited to
216 * 0xf6800000. Extend these to allow access to the larger read-only
217 * M2P table available in 32on64 mode.
218 */
219 base = (b & 0xff000000) | ((b & 0xff) << 16) | (a >> 16);
220
221 limit = (b & 0xf0000) | (a & 0xffff);
222 limit++; /* We add one because limit is inclusive. */
223
224 if ( b & _SEGMENT_G )
225 limit <<= 12;
226
227 if ( (base == 0) && (limit > HYPERVISOR_COMPAT_VIRT_START(dom)) )
228 {
229 a |= 0x0000ffff;
230 b |= 0x000f0000;
231 }
232 }
233
234 goto good;
235 }
236
237 /* Invalid type 0 is harmless. It is used for 2nd half of a call gate. */
238 if ( (b & _SEGMENT_TYPE) == 0x000 )
239 return true;
240
241 /* Everything but a call gate is discarded here. */
242 if ( (b & _SEGMENT_TYPE) != 0xc00 )
243 goto bad;
244
245 /* Validate the target code selector. */
246 cs = a >> 16;
247 if ( !guest_gate_selector_okay(dom, cs) )
248 goto bad;
249 /*
250 * Force DPL to zero, causing a GP fault with its error code indicating
251 * the gate in use, allowing emulation. This is necessary because with
252 * native guests (kernel in ring 3) call gates cannot be used directly
253 * to transition from user to kernel mode (and whether a gate is used
254 * to enter the kernel can only be determined when the gate is being
255 * used), and with compat guests call gates cannot be used at all as
256 * there are only 64-bit ones.
257 * Store the original DPL in the selector's RPL field.
258 */
259 b &= ~_SEGMENT_DPL;
260 cs = (cs & ~3) | dpl;
261 a = (a & 0xffffU) | (cs << 16);
262
263 /* Reserved bits must be zero. */
264 if ( b & (is_pv_32bit_domain(dom) ? 0xe0 : 0xff) )
265 goto bad;
266
267 good:
268 d->a = a;
269 d->b = b;
270 return true;
271
272 bad:
273 return false;
274 }
275
validate_segdesc_page(struct page_info * page)276 int validate_segdesc_page(struct page_info *page)
277 {
278 const struct domain *owner = page_get_owner(page);
279 seg_desc_t *descs = __map_domain_page(page);
280 unsigned i;
281
282 for ( i = 0; i < 512; i++ )
283 if ( unlikely(!check_descriptor(owner, &descs[i])) )
284 break;
285
286 unmap_domain_page(descs);
287
288 return i == 512 ? 0 : -EINVAL;
289 }
290
do_update_descriptor(uint64_t gaddr,seg_desc_t d)291 long do_update_descriptor(uint64_t gaddr, seg_desc_t d)
292 {
293 struct domain *currd = current->domain;
294 gfn_t gfn = gaddr_to_gfn(gaddr);
295 mfn_t mfn;
296 seg_desc_t *entry;
297 struct page_info *page;
298 long ret = -EINVAL;
299
300 /* gaddr must be aligned, or it will corrupt adjacent descriptors. */
301 if ( !IS_ALIGNED(gaddr, sizeof(d)) || !check_descriptor(currd, &d) )
302 return -EINVAL;
303
304 page = get_page_from_gfn(currd, gfn_x(gfn), NULL, P2M_ALLOC);
305 if ( !page )
306 return -EINVAL;
307
308 mfn = page_to_mfn(page);
309
310 /* Check if the given frame is in use in an unsafe context. */
311 switch ( page->u.inuse.type_info & PGT_type_mask )
312 {
313 case PGT_seg_desc_page:
314 if ( unlikely(!get_page_type(page, PGT_seg_desc_page)) )
315 goto out;
316 break;
317 default:
318 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
319 goto out;
320 break;
321 }
322
323 paging_mark_dirty(currd, mfn);
324
325 /* All is good so make the update. */
326 entry = map_domain_page(mfn) + (gaddr & ~PAGE_MASK);
327 ACCESS_ONCE(entry->raw) = d.raw;
328 unmap_domain_page(entry);
329
330 put_page_type(page);
331
332 ret = 0; /* success */
333
334 out:
335 put_page(page);
336
337 return ret;
338 }
339
340 /*
341 * Local variables:
342 * mode: C
343 * c-file-style: "BSD"
344 * c-basic-offset: 4
345 * tab-width: 4
346 * indent-tabs-mode: nil
347 * End:
348 */
349