1 /******************************************************************************
2 * arch/x86/pv/emul-gate-op.c
3 *
4 * Emulate gate op for PV guests
5 *
6 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include <xen/errno.h>
23 #include <xen/event.h>
24 #include <xen/guest_access.h>
25 #include <xen/iocap.h>
26 #include <xen/spinlock.h>
27 #include <xen/trace.h>
28
29 #include <asm/apic.h>
30 #include <asm/debugreg.h>
31 #include <asm/hpet.h>
32 #include <asm/hypercall.h>
33 #include <asm/mc146818rtc.h>
34 #include <asm/p2m.h>
35 #include <asm/pv/traps.h>
36 #include <asm/shared.h>
37 #include <asm/traps.h>
38 #include <asm/x86_emulate.h>
39
40 #include <xsm/xsm.h>
41
42 #include "emulate.h"
43
44 /* Override macros from asm/page.h to make them work with mfn_t */
45 #undef mfn_to_page
46 #define mfn_to_page(mfn) __mfn_to_page(mfn_x(mfn))
47 #undef page_to_mfn
48 #define page_to_mfn(pg) _mfn(__page_to_mfn(pg))
49
read_gate_descriptor(unsigned int gate_sel,const struct vcpu * v,unsigned int * sel,unsigned long * off,unsigned int * ar)50 static int read_gate_descriptor(unsigned int gate_sel,
51 const struct vcpu *v,
52 unsigned int *sel,
53 unsigned long *off,
54 unsigned int *ar)
55 {
56 struct desc_struct desc;
57 const struct desc_struct *pdesc;
58
59 pdesc = (const struct desc_struct *)
60 (!(gate_sel & 4) ? GDT_VIRT_START(v) : LDT_VIRT_START(v))
61 + (gate_sel >> 3);
62 if ( (gate_sel < 4) ||
63 ((gate_sel >= FIRST_RESERVED_GDT_BYTE) && !(gate_sel & 4)) ||
64 __get_user(desc, pdesc) )
65 return 0;
66
67 *sel = (desc.a >> 16) & 0x0000fffc;
68 *off = (desc.a & 0x0000ffff) | (desc.b & 0xffff0000);
69 *ar = desc.b & 0x0000ffff;
70
71 /*
72 * check_descriptor() clears the DPL field and stores the
73 * guest requested DPL in the selector's RPL field.
74 */
75 if ( *ar & _SEGMENT_DPL )
76 return 0;
77 *ar |= (desc.a >> (16 - 13)) & _SEGMENT_DPL;
78
79 if ( !is_pv_32bit_vcpu(v) )
80 {
81 if ( (*ar & 0x1f00) != 0x0c00 ||
82 (gate_sel >= FIRST_RESERVED_GDT_BYTE - 8 && !(gate_sel & 4)) ||
83 __get_user(desc, pdesc + 1) ||
84 (desc.b & 0x1f00) )
85 return 0;
86
87 *off |= (unsigned long)desc.a << 32;
88 return 1;
89 }
90
91 switch ( *ar & 0x1f00 )
92 {
93 case 0x0400:
94 *off &= 0xffff;
95 break;
96 case 0x0c00:
97 break;
98 default:
99 return 0;
100 }
101
102 return 1;
103 }
104
check_stack_limit(unsigned int ar,unsigned int limit,unsigned int esp,unsigned int decr)105 static inline bool check_stack_limit(unsigned int ar, unsigned int limit,
106 unsigned int esp, unsigned int decr)
107 {
108 return (((esp - decr) < (esp - 1)) &&
109 (!(ar & _SEGMENT_EC) ? (esp - 1) <= limit : (esp - decr) > limit));
110 }
111
112 struct gate_op_ctxt {
113 struct x86_emulate_ctxt ctxt;
114 struct {
115 unsigned long base, limit;
116 } cs;
117 bool insn_fetch;
118 };
119
read_mem(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)120 static int read_mem(enum x86_segment seg, unsigned long offset, void *p_data,
121 unsigned int bytes, struct x86_emulate_ctxt *ctxt)
122 {
123 const struct gate_op_ctxt *goc =
124 container_of(ctxt, struct gate_op_ctxt, ctxt);
125 unsigned int rc = bytes, sel = 0;
126 unsigned long addr = offset, limit = 0;
127
128 switch ( seg )
129 {
130 case x86_seg_cs:
131 addr += goc->cs.base;
132 limit = goc->cs.limit;
133 break;
134 case x86_seg_ds:
135 sel = read_sreg(ds);
136 break;
137 case x86_seg_es:
138 sel = read_sreg(es);
139 break;
140 case x86_seg_fs:
141 sel = read_sreg(fs);
142 break;
143 case x86_seg_gs:
144 sel = read_sreg(gs);
145 break;
146 case x86_seg_ss:
147 sel = ctxt->regs->ss;
148 break;
149 default:
150 return X86EMUL_UNHANDLEABLE;
151 }
152 if ( sel )
153 {
154 unsigned int ar;
155
156 ASSERT(!goc->insn_fetch);
157 if ( !pv_emul_read_descriptor(sel, current, &addr, &limit, &ar, 0) ||
158 !(ar & _SEGMENT_S) ||
159 !(ar & _SEGMENT_P) ||
160 ((ar & _SEGMENT_CODE) && !(ar & _SEGMENT_WR)) )
161 return X86EMUL_UNHANDLEABLE;
162 addr += offset;
163 }
164 else if ( seg != x86_seg_cs )
165 return X86EMUL_UNHANDLEABLE;
166
167 /* We don't mean to emulate any branches. */
168 if ( limit < bytes - 1 || offset > limit - bytes + 1 )
169 return X86EMUL_UNHANDLEABLE;
170
171 addr = (uint32_t)addr;
172
173 if ( (rc = __copy_from_user(p_data, (void *)addr, bytes)) )
174 {
175 /*
176 * TODO: This should report PFEC_insn_fetch when goc->insn_fetch &&
177 * cpu_has_nx, but we'd then need a "fetch" variant of
178 * __copy_from_user() respecting NX, SMEP, and protection keys.
179 */
180 x86_emul_pagefault(0, addr + bytes - rc, ctxt);
181 return X86EMUL_EXCEPTION;
182 }
183
184 return X86EMUL_OKAY;
185 }
186
pv_emulate_gate_op(struct cpu_user_regs * regs)187 void pv_emulate_gate_op(struct cpu_user_regs *regs)
188 {
189 struct vcpu *v = current;
190 unsigned int sel, ar, dpl, nparm, insn_len;
191 struct gate_op_ctxt ctxt = { .ctxt.regs = regs, .insn_fetch = true };
192 struct x86_emulate_state *state;
193 unsigned long off, base, limit;
194 uint16_t opnd_sel = 0;
195 int jump = -1, rc = X86EMUL_OKAY;
196
197 /* Check whether this fault is due to the use of a call gate. */
198 if ( !read_gate_descriptor(regs->error_code, v, &sel, &off, &ar) ||
199 (((ar >> 13) & 3) < (regs->cs & 3)) ||
200 ((ar & _SEGMENT_TYPE) != 0xc00) )
201 {
202 pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
203 return;
204 }
205 if ( !(ar & _SEGMENT_P) )
206 {
207 pv_inject_hw_exception(TRAP_no_segment, regs->error_code);
208 return;
209 }
210 dpl = (ar >> 13) & 3;
211 nparm = ar & 0x1f;
212
213 /*
214 * Decode instruction (and perhaps operand) to determine RPL,
215 * whether this is a jump or a call, and the call return offset.
216 */
217 if ( !pv_emul_read_descriptor(regs->cs, v, &ctxt.cs.base, &ctxt.cs.limit,
218 &ar, 0) ||
219 !(ar & _SEGMENT_S) ||
220 !(ar & _SEGMENT_P) ||
221 !(ar & _SEGMENT_CODE) )
222 {
223 pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
224 return;
225 }
226
227 ctxt.ctxt.addr_size = ar & _SEGMENT_DB ? 32 : 16;
228 /* Leave zero in ctxt.ctxt.sp_size, as it's not needed for decoding. */
229 state = x86_decode_insn(&ctxt.ctxt, read_mem);
230 ctxt.insn_fetch = false;
231 if ( IS_ERR_OR_NULL(state) )
232 {
233 if ( PTR_ERR(state) == -X86EMUL_EXCEPTION )
234 pv_inject_event(&ctxt.ctxt.event);
235 else
236 pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
237 return;
238 }
239
240 switch ( ctxt.ctxt.opcode )
241 {
242 unsigned int modrm_345;
243
244 case 0xea:
245 ++jump;
246 /* fall through */
247 case 0x9a:
248 ++jump;
249 opnd_sel = x86_insn_immediate(state, 1);
250 break;
251 case 0xff:
252 if ( x86_insn_modrm(state, NULL, &modrm_345) >= 3 )
253 break;
254 switch ( modrm_345 & 7 )
255 {
256 enum x86_segment seg;
257
258 case 5:
259 ++jump;
260 /* fall through */
261 case 3:
262 ++jump;
263 base = x86_insn_operand_ea(state, &seg);
264 rc = read_mem(seg, base + (x86_insn_opsize(state) >> 3),
265 &opnd_sel, sizeof(opnd_sel), &ctxt.ctxt);
266 break;
267 }
268 break;
269 }
270
271 insn_len = x86_insn_length(state, &ctxt.ctxt);
272 x86_emulate_free_state(state);
273
274 if ( rc == X86EMUL_EXCEPTION )
275 {
276 pv_inject_event(&ctxt.ctxt.event);
277 return;
278 }
279
280 if ( rc != X86EMUL_OKAY ||
281 jump < 0 ||
282 (opnd_sel & ~3) != regs->error_code ||
283 dpl < (opnd_sel & 3) )
284 {
285 pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
286 return;
287 }
288
289 if ( !pv_emul_read_descriptor(sel, v, &base, &limit, &ar, 0) ||
290 !(ar & _SEGMENT_S) ||
291 !(ar & _SEGMENT_CODE) ||
292 (!jump || (ar & _SEGMENT_EC) ?
293 ((ar >> 13) & 3) > (regs->cs & 3) :
294 ((ar >> 13) & 3) != (regs->cs & 3)) )
295 {
296 pv_inject_hw_exception(TRAP_gp_fault, sel);
297 return;
298 }
299 if ( !(ar & _SEGMENT_P) )
300 {
301 pv_inject_hw_exception(TRAP_no_segment, sel);
302 return;
303 }
304 if ( off > limit )
305 {
306 pv_inject_hw_exception(TRAP_gp_fault, 0);
307 return;
308 }
309
310 if ( !jump )
311 {
312 unsigned int ss, esp, *stkp;
313 int rc;
314 #define push(item) do \
315 { \
316 --stkp; \
317 esp -= 4; \
318 rc = __put_user(item, stkp); \
319 if ( rc ) \
320 { \
321 pv_inject_page_fault(PFEC_write_access, \
322 (unsigned long)(stkp + 1) - rc); \
323 return; \
324 } \
325 } while ( 0 )
326
327 if ( ((ar >> 13) & 3) < (regs->cs & 3) )
328 {
329 sel |= (ar >> 13) & 3;
330 /* Inner stack known only for kernel ring. */
331 if ( (sel & 3) != GUEST_KERNEL_RPL(v->domain) )
332 {
333 pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
334 return;
335 }
336 esp = v->arch.pv_vcpu.kernel_sp;
337 ss = v->arch.pv_vcpu.kernel_ss;
338 if ( (ss & 3) != (sel & 3) ||
339 !pv_emul_read_descriptor(ss, v, &base, &limit, &ar, 0) ||
340 ((ar >> 13) & 3) != (sel & 3) ||
341 !(ar & _SEGMENT_S) ||
342 (ar & _SEGMENT_CODE) ||
343 !(ar & _SEGMENT_WR) )
344 {
345 pv_inject_hw_exception(TRAP_invalid_tss, ss & ~3);
346 return;
347 }
348 if ( !(ar & _SEGMENT_P) ||
349 !check_stack_limit(ar, limit, esp, (4 + nparm) * 4) )
350 {
351 pv_inject_hw_exception(TRAP_stack_error, ss & ~3);
352 return;
353 }
354 stkp = (unsigned int *)(unsigned long)((unsigned int)base + esp);
355 if ( !compat_access_ok(stkp - 4 - nparm, 16 + nparm * 4) )
356 {
357 pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
358 return;
359 }
360 push(regs->ss);
361 push(regs->rsp);
362 if ( nparm )
363 {
364 const unsigned int *ustkp;
365
366 if ( !pv_emul_read_descriptor(regs->ss, v, &base,
367 &limit, &ar, 0) ||
368 ((ar >> 13) & 3) != (regs->cs & 3) ||
369 !(ar & _SEGMENT_S) ||
370 (ar & _SEGMENT_CODE) ||
371 !(ar & _SEGMENT_WR) ||
372 !check_stack_limit(ar, limit, esp + nparm * 4, nparm * 4) )
373 return pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
374 ustkp = (unsigned int *)(unsigned long)
375 ((unsigned int)base + regs->esp + nparm * 4);
376 if ( !compat_access_ok(ustkp - nparm, 0 + nparm * 4) )
377 {
378 pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
379 return;
380 }
381 do
382 {
383 unsigned int parm;
384
385 --ustkp;
386 rc = __get_user(parm, ustkp);
387 if ( rc )
388 {
389 pv_inject_page_fault(0, (unsigned long)(ustkp + 1) - rc);
390 return;
391 }
392 push(parm);
393 } while ( --nparm );
394 }
395 }
396 else
397 {
398 sel |= (regs->cs & 3);
399 esp = regs->rsp;
400 ss = regs->ss;
401 if ( !pv_emul_read_descriptor(ss, v, &base, &limit, &ar, 0) ||
402 ((ar >> 13) & 3) != (sel & 3) )
403 {
404 pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
405 return;
406 }
407 if ( !check_stack_limit(ar, limit, esp, 2 * 4) )
408 {
409 pv_inject_hw_exception(TRAP_stack_error, 0);
410 return;
411 }
412 stkp = (unsigned int *)(unsigned long)((unsigned int)base + esp);
413 if ( !compat_access_ok(stkp - 2, 2 * 4) )
414 {
415 pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
416 return;
417 }
418 }
419 push(regs->cs);
420 push(regs->rip + insn_len);
421 #undef push
422 regs->rsp = esp;
423 regs->ss = ss;
424 }
425 else
426 sel |= (regs->cs & 3);
427
428 regs->cs = sel;
429 pv_emul_instruction_done(regs, off);
430 }
431
432 /*
433 * Local variables:
434 * mode: C
435 * c-file-style: "BSD"
436 * c-basic-offset: 4
437 * tab-width: 4
438 * indent-tabs-mode: nil
439 * End:
440 */
441