1 /******************************************************************************
2  * arch/x86/pv/emul-gate-op.c
3  *
4  * Emulate gate op for PV guests
5  *
6  * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #include <xen/errno.h>
23 #include <xen/event.h>
24 #include <xen/guest_access.h>
25 #include <xen/iocap.h>
26 #include <xen/spinlock.h>
27 #include <xen/trace.h>
28 
29 #include <asm/apic.h>
30 #include <asm/debugreg.h>
31 #include <asm/hpet.h>
32 #include <asm/hypercall.h>
33 #include <asm/mc146818rtc.h>
34 #include <asm/p2m.h>
35 #include <asm/pv/traps.h>
36 #include <asm/shared.h>
37 #include <asm/traps.h>
38 #include <asm/x86_emulate.h>
39 
40 #include <xsm/xsm.h>
41 
42 #include "emulate.h"
43 
44 /* Override macros from asm/page.h to make them work with mfn_t */
45 #undef mfn_to_page
46 #define mfn_to_page(mfn) __mfn_to_page(mfn_x(mfn))
47 #undef page_to_mfn
48 #define page_to_mfn(pg) _mfn(__page_to_mfn(pg))
49 
read_gate_descriptor(unsigned int gate_sel,const struct vcpu * v,unsigned int * sel,unsigned long * off,unsigned int * ar)50 static int read_gate_descriptor(unsigned int gate_sel,
51                                 const struct vcpu *v,
52                                 unsigned int *sel,
53                                 unsigned long *off,
54                                 unsigned int *ar)
55 {
56     struct desc_struct desc;
57     const struct desc_struct *pdesc;
58 
59     pdesc = (const struct desc_struct *)
60         (!(gate_sel & 4) ? GDT_VIRT_START(v) : LDT_VIRT_START(v))
61         + (gate_sel >> 3);
62     if ( (gate_sel < 4) ||
63          ((gate_sel >= FIRST_RESERVED_GDT_BYTE) && !(gate_sel & 4)) ||
64          __get_user(desc, pdesc) )
65         return 0;
66 
67     *sel = (desc.a >> 16) & 0x0000fffc;
68     *off = (desc.a & 0x0000ffff) | (desc.b & 0xffff0000);
69     *ar = desc.b & 0x0000ffff;
70 
71     /*
72      * check_descriptor() clears the DPL field and stores the
73      * guest requested DPL in the selector's RPL field.
74      */
75     if ( *ar & _SEGMENT_DPL )
76         return 0;
77     *ar |= (desc.a >> (16 - 13)) & _SEGMENT_DPL;
78 
79     if ( !is_pv_32bit_vcpu(v) )
80     {
81         if ( (*ar & 0x1f00) != 0x0c00 ||
82              (gate_sel >= FIRST_RESERVED_GDT_BYTE - 8 && !(gate_sel & 4)) ||
83              __get_user(desc, pdesc + 1) ||
84              (desc.b & 0x1f00) )
85             return 0;
86 
87         *off |= (unsigned long)desc.a << 32;
88         return 1;
89     }
90 
91     switch ( *ar & 0x1f00 )
92     {
93     case 0x0400:
94         *off &= 0xffff;
95         break;
96     case 0x0c00:
97         break;
98     default:
99         return 0;
100     }
101 
102     return 1;
103 }
104 
check_stack_limit(unsigned int ar,unsigned int limit,unsigned int esp,unsigned int decr)105 static inline bool check_stack_limit(unsigned int ar, unsigned int limit,
106                                      unsigned int esp, unsigned int decr)
107 {
108     return (((esp - decr) < (esp - 1)) &&
109             (!(ar & _SEGMENT_EC) ? (esp - 1) <= limit : (esp - decr) > limit));
110 }
111 
112 struct gate_op_ctxt {
113     struct x86_emulate_ctxt ctxt;
114     struct {
115         unsigned long base, limit;
116     } cs;
117     bool insn_fetch;
118 };
119 
read_mem(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)120 static int read_mem(enum x86_segment seg, unsigned long offset, void *p_data,
121                     unsigned int bytes, struct x86_emulate_ctxt *ctxt)
122 {
123     const struct gate_op_ctxt *goc =
124         container_of(ctxt, struct gate_op_ctxt, ctxt);
125     unsigned int rc = bytes, sel = 0;
126     unsigned long addr = offset, limit = 0;
127 
128     switch ( seg )
129     {
130     case x86_seg_cs:
131         addr += goc->cs.base;
132         limit = goc->cs.limit;
133         break;
134     case x86_seg_ds:
135         sel = read_sreg(ds);
136         break;
137     case x86_seg_es:
138         sel = read_sreg(es);
139         break;
140     case x86_seg_fs:
141         sel = read_sreg(fs);
142         break;
143     case x86_seg_gs:
144         sel = read_sreg(gs);
145         break;
146     case x86_seg_ss:
147         sel = ctxt->regs->ss;
148         break;
149     default:
150         return X86EMUL_UNHANDLEABLE;
151     }
152     if ( sel )
153     {
154         unsigned int ar;
155 
156         ASSERT(!goc->insn_fetch);
157         if ( !pv_emul_read_descriptor(sel, current, &addr, &limit, &ar, 0) ||
158              !(ar & _SEGMENT_S) ||
159              !(ar & _SEGMENT_P) ||
160              ((ar & _SEGMENT_CODE) && !(ar & _SEGMENT_WR)) )
161             return X86EMUL_UNHANDLEABLE;
162         addr += offset;
163     }
164     else if ( seg != x86_seg_cs )
165         return X86EMUL_UNHANDLEABLE;
166 
167     /* We don't mean to emulate any branches. */
168     if ( limit < bytes - 1 || offset > limit - bytes + 1 )
169         return X86EMUL_UNHANDLEABLE;
170 
171     addr = (uint32_t)addr;
172 
173     if ( (rc = __copy_from_user(p_data, (void *)addr, bytes)) )
174     {
175         /*
176          * TODO: This should report PFEC_insn_fetch when goc->insn_fetch &&
177          * cpu_has_nx, but we'd then need a "fetch" variant of
178          * __copy_from_user() respecting NX, SMEP, and protection keys.
179          */
180         x86_emul_pagefault(0, addr + bytes - rc, ctxt);
181         return X86EMUL_EXCEPTION;
182     }
183 
184     return X86EMUL_OKAY;
185 }
186 
pv_emulate_gate_op(struct cpu_user_regs * regs)187 void pv_emulate_gate_op(struct cpu_user_regs *regs)
188 {
189     struct vcpu *v = current;
190     unsigned int sel, ar, dpl, nparm, insn_len;
191     struct gate_op_ctxt ctxt = { .ctxt.regs = regs, .insn_fetch = true };
192     struct x86_emulate_state *state;
193     unsigned long off, base, limit;
194     uint16_t opnd_sel = 0;
195     int jump = -1, rc = X86EMUL_OKAY;
196 
197     /* Check whether this fault is due to the use of a call gate. */
198     if ( !read_gate_descriptor(regs->error_code, v, &sel, &off, &ar) ||
199          (((ar >> 13) & 3) < (regs->cs & 3)) ||
200          ((ar & _SEGMENT_TYPE) != 0xc00) )
201     {
202         pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
203         return;
204     }
205     if ( !(ar & _SEGMENT_P) )
206     {
207         pv_inject_hw_exception(TRAP_no_segment, regs->error_code);
208         return;
209     }
210     dpl = (ar >> 13) & 3;
211     nparm = ar & 0x1f;
212 
213     /*
214      * Decode instruction (and perhaps operand) to determine RPL,
215      * whether this is a jump or a call, and the call return offset.
216      */
217     if ( !pv_emul_read_descriptor(regs->cs, v, &ctxt.cs.base, &ctxt.cs.limit,
218                                   &ar, 0) ||
219          !(ar & _SEGMENT_S) ||
220          !(ar & _SEGMENT_P) ||
221          !(ar & _SEGMENT_CODE) )
222     {
223         pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
224         return;
225     }
226 
227     ctxt.ctxt.addr_size = ar & _SEGMENT_DB ? 32 : 16;
228     /* Leave zero in ctxt.ctxt.sp_size, as it's not needed for decoding. */
229     state = x86_decode_insn(&ctxt.ctxt, read_mem);
230     ctxt.insn_fetch = false;
231     if ( IS_ERR_OR_NULL(state) )
232     {
233         if ( PTR_ERR(state) == -X86EMUL_EXCEPTION )
234             pv_inject_event(&ctxt.ctxt.event);
235         else
236             pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
237         return;
238     }
239 
240     switch ( ctxt.ctxt.opcode )
241     {
242         unsigned int modrm_345;
243 
244     case 0xea:
245         ++jump;
246         /* fall through */
247     case 0x9a:
248         ++jump;
249         opnd_sel = x86_insn_immediate(state, 1);
250         break;
251     case 0xff:
252         if ( x86_insn_modrm(state, NULL, &modrm_345) >= 3 )
253             break;
254         switch ( modrm_345 & 7 )
255         {
256             enum x86_segment seg;
257 
258         case 5:
259             ++jump;
260             /* fall through */
261         case 3:
262             ++jump;
263             base = x86_insn_operand_ea(state, &seg);
264             rc = read_mem(seg, base + (x86_insn_opsize(state) >> 3),
265                           &opnd_sel, sizeof(opnd_sel), &ctxt.ctxt);
266             break;
267         }
268         break;
269     }
270 
271     insn_len = x86_insn_length(state, &ctxt.ctxt);
272     x86_emulate_free_state(state);
273 
274     if ( rc == X86EMUL_EXCEPTION )
275     {
276         pv_inject_event(&ctxt.ctxt.event);
277         return;
278     }
279 
280     if ( rc != X86EMUL_OKAY ||
281          jump < 0 ||
282          (opnd_sel & ~3) != regs->error_code ||
283          dpl < (opnd_sel & 3) )
284     {
285         pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
286         return;
287     }
288 
289     if ( !pv_emul_read_descriptor(sel, v, &base, &limit, &ar, 0) ||
290          !(ar & _SEGMENT_S) ||
291          !(ar & _SEGMENT_CODE) ||
292          (!jump || (ar & _SEGMENT_EC) ?
293           ((ar >> 13) & 3) > (regs->cs & 3) :
294           ((ar >> 13) & 3) != (regs->cs & 3)) )
295     {
296         pv_inject_hw_exception(TRAP_gp_fault, sel);
297         return;
298     }
299     if ( !(ar & _SEGMENT_P) )
300     {
301         pv_inject_hw_exception(TRAP_no_segment, sel);
302         return;
303     }
304     if ( off > limit )
305     {
306         pv_inject_hw_exception(TRAP_gp_fault, 0);
307         return;
308     }
309 
310     if ( !jump )
311     {
312         unsigned int ss, esp, *stkp;
313         int rc;
314 #define push(item) do \
315         { \
316             --stkp; \
317             esp -= 4; \
318             rc = __put_user(item, stkp); \
319             if ( rc ) \
320             { \
321                 pv_inject_page_fault(PFEC_write_access, \
322                                      (unsigned long)(stkp + 1) - rc); \
323                 return; \
324             } \
325         } while ( 0 )
326 
327         if ( ((ar >> 13) & 3) < (regs->cs & 3) )
328         {
329             sel |= (ar >> 13) & 3;
330             /* Inner stack known only for kernel ring. */
331             if ( (sel & 3) != GUEST_KERNEL_RPL(v->domain) )
332             {
333                 pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
334                 return;
335             }
336             esp = v->arch.pv_vcpu.kernel_sp;
337             ss = v->arch.pv_vcpu.kernel_ss;
338             if ( (ss & 3) != (sel & 3) ||
339                  !pv_emul_read_descriptor(ss, v, &base, &limit, &ar, 0) ||
340                  ((ar >> 13) & 3) != (sel & 3) ||
341                  !(ar & _SEGMENT_S) ||
342                  (ar & _SEGMENT_CODE) ||
343                  !(ar & _SEGMENT_WR) )
344             {
345                 pv_inject_hw_exception(TRAP_invalid_tss, ss & ~3);
346                 return;
347             }
348             if ( !(ar & _SEGMENT_P) ||
349                  !check_stack_limit(ar, limit, esp, (4 + nparm) * 4) )
350             {
351                 pv_inject_hw_exception(TRAP_stack_error, ss & ~3);
352                 return;
353             }
354             stkp = (unsigned int *)(unsigned long)((unsigned int)base + esp);
355             if ( !compat_access_ok(stkp - 4 - nparm, 16 + nparm * 4) )
356             {
357                 pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
358                 return;
359             }
360             push(regs->ss);
361             push(regs->rsp);
362             if ( nparm )
363             {
364                 const unsigned int *ustkp;
365 
366                 if ( !pv_emul_read_descriptor(regs->ss, v, &base,
367                                               &limit, &ar, 0) ||
368                      ((ar >> 13) & 3) != (regs->cs & 3) ||
369                      !(ar & _SEGMENT_S) ||
370                      (ar & _SEGMENT_CODE) ||
371                      !(ar & _SEGMENT_WR) ||
372                      !check_stack_limit(ar, limit, esp + nparm * 4, nparm * 4) )
373                     return pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
374                 ustkp = (unsigned int *)(unsigned long)
375                         ((unsigned int)base + regs->esp + nparm * 4);
376                 if ( !compat_access_ok(ustkp - nparm, 0 + nparm * 4) )
377                 {
378                     pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
379                     return;
380                 }
381                 do
382                 {
383                     unsigned int parm;
384 
385                     --ustkp;
386                     rc = __get_user(parm, ustkp);
387                     if ( rc )
388                     {
389                         pv_inject_page_fault(0, (unsigned long)(ustkp + 1) - rc);
390                         return;
391                     }
392                     push(parm);
393                 } while ( --nparm );
394             }
395         }
396         else
397         {
398             sel |= (regs->cs & 3);
399             esp = regs->rsp;
400             ss = regs->ss;
401             if ( !pv_emul_read_descriptor(ss, v, &base, &limit, &ar, 0) ||
402                  ((ar >> 13) & 3) != (sel & 3) )
403             {
404                 pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
405                 return;
406             }
407             if ( !check_stack_limit(ar, limit, esp, 2 * 4) )
408             {
409                 pv_inject_hw_exception(TRAP_stack_error, 0);
410                 return;
411             }
412             stkp = (unsigned int *)(unsigned long)((unsigned int)base + esp);
413             if ( !compat_access_ok(stkp - 2, 2 * 4) )
414             {
415                 pv_inject_hw_exception(TRAP_gp_fault, regs->error_code);
416                 return;
417             }
418         }
419         push(regs->cs);
420         push(regs->rip + insn_len);
421 #undef push
422         regs->rsp = esp;
423         regs->ss = ss;
424     }
425     else
426         sel |= (regs->cs & 3);
427 
428     regs->cs = sel;
429     pv_emul_instruction_done(regs, off);
430 }
431 
432 /*
433  * Local variables:
434  * mode: C
435  * c-file-style: "BSD"
436  * c-basic-offset: 4
437  * tab-width: 4
438  * indent-tabs-mode: nil
439  * End:
440  */
441