1 /******************************************************************************
2  * arch/x86/pv/hypercall.c
3  *
4  * PV hypercall dispatching routines
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Copyright (c) 2017 Citrix Systems Ltd.
20  */
21 
22 #include <xen/compiler.h>
23 #include <xen/hypercall.h>
24 #include <xen/trace.h>
25 
26 #define HYPERCALL(x)                                                \
27     [ __HYPERVISOR_ ## x ] = { (hypercall_fn_t *) do_ ## x,         \
28                                (hypercall_fn_t *) do_ ## x }
29 #define COMPAT_CALL(x)                                              \
30     [ __HYPERVISOR_ ## x ] = { (hypercall_fn_t *) do_ ## x,         \
31                                (hypercall_fn_t *) compat_ ## x }
32 
33 #define do_arch_1             paging_domctl_continuation
34 
35 static const hypercall_table_t pv_hypercall_table[] = {
36     COMPAT_CALL(set_trap_table),
37     HYPERCALL(mmu_update),
38     COMPAT_CALL(set_gdt),
39     HYPERCALL(stack_switch),
40     COMPAT_CALL(set_callbacks),
41     HYPERCALL(fpu_taskswitch),
42     HYPERCALL(sched_op_compat),
43     COMPAT_CALL(platform_op),
44     HYPERCALL(set_debugreg),
45     HYPERCALL(get_debugreg),
46     COMPAT_CALL(update_descriptor),
47     COMPAT_CALL(memory_op),
48     COMPAT_CALL(multicall),
49     COMPAT_CALL(update_va_mapping),
50     COMPAT_CALL(set_timer_op),
51     HYPERCALL(event_channel_op_compat),
52     COMPAT_CALL(xen_version),
53     HYPERCALL(console_io),
54     COMPAT_CALL(physdev_op_compat),
55     COMPAT_CALL(grant_table_op),
56     COMPAT_CALL(vm_assist),
57     COMPAT_CALL(update_va_mapping_otherdomain),
58     COMPAT_CALL(iret),
59     COMPAT_CALL(vcpu_op),
60     HYPERCALL(set_segment_base),
61     COMPAT_CALL(mmuext_op),
62     COMPAT_CALL(xsm_op),
63     COMPAT_CALL(nmi_op),
64     COMPAT_CALL(sched_op),
65     COMPAT_CALL(callback_op),
66 #ifdef CONFIG_XENOPROF
67     COMPAT_CALL(xenoprof_op),
68 #endif
69     HYPERCALL(event_channel_op),
70     COMPAT_CALL(physdev_op),
71     HYPERCALL(hvm_op),
72     HYPERCALL(sysctl),
73     HYPERCALL(domctl),
74 #ifdef CONFIG_KEXEC
75     COMPAT_CALL(kexec_op),
76 #endif
77 #ifdef CONFIG_TMEM
78     HYPERCALL(tmem_op),
79 #endif
80     HYPERCALL(xenpmu_op),
81     COMPAT_CALL(dm_op),
82     HYPERCALL(mca),
83     HYPERCALL(arch_1),
84 };
85 
86 #undef do_arch_1
87 #undef COMPAT_CALL
88 #undef HYPERCALL
89 
pv_hypercall(struct cpu_user_regs * regs)90 void pv_hypercall(struct cpu_user_regs *regs)
91 {
92     struct vcpu *curr = current;
93     unsigned long eax;
94 
95     ASSERT(guest_kernel_mode(curr, regs));
96 
97     eax = is_pv_32bit_vcpu(curr) ? regs->eax : regs->rax;
98 
99     BUILD_BUG_ON(ARRAY_SIZE(pv_hypercall_table) >
100                  ARRAY_SIZE(hypercall_args_table));
101 
102     if ( (eax >= ARRAY_SIZE(pv_hypercall_table)) ||
103          !pv_hypercall_table[eax].native )
104     {
105         regs->rax = -ENOSYS;
106         return;
107     }
108 
109     curr->hcall_preempted = false;
110 
111     if ( !is_pv_32bit_vcpu(curr) )
112     {
113         unsigned long rdi = regs->rdi;
114         unsigned long rsi = regs->rsi;
115         unsigned long rdx = regs->rdx;
116         unsigned long r10 = regs->r10;
117         unsigned long r8 = regs->r8;
118         unsigned long r9 = regs->r9;
119 
120 #ifndef NDEBUG
121         /* Deliberately corrupt parameter regs not used by this hypercall. */
122         switch ( hypercall_args_table[eax].native )
123         {
124         case 0: rdi = 0xdeadbeefdeadf00dUL;
125         case 1: rsi = 0xdeadbeefdeadf00dUL;
126         case 2: rdx = 0xdeadbeefdeadf00dUL;
127         case 3: r10 = 0xdeadbeefdeadf00dUL;
128         case 4: r8 = 0xdeadbeefdeadf00dUL;
129         case 5: r9 = 0xdeadbeefdeadf00dUL;
130         }
131 #endif
132         if ( unlikely(tb_init_done) )
133         {
134             unsigned long args[6] = { rdi, rsi, rdx, r10, r8, r9 };
135 
136             __trace_hypercall(TRC_PV_HYPERCALL_V2, eax, args);
137         }
138 
139         regs->rax = pv_hypercall_table[eax].native(rdi, rsi, rdx, r10, r8, r9);
140 
141 #ifndef NDEBUG
142         if ( !curr->hcall_preempted )
143         {
144             /* Deliberately corrupt parameter regs used by this hypercall. */
145             switch ( hypercall_args_table[eax].native )
146             {
147             case 6: regs->r9  = 0xdeadbeefdeadf00dUL;
148             case 5: regs->r8  = 0xdeadbeefdeadf00dUL;
149             case 4: regs->r10 = 0xdeadbeefdeadf00dUL;
150             case 3: regs->rdx = 0xdeadbeefdeadf00dUL;
151             case 2: regs->rsi = 0xdeadbeefdeadf00dUL;
152             case 1: regs->rdi = 0xdeadbeefdeadf00dUL;
153             }
154         }
155 #endif
156     }
157     else
158     {
159         unsigned int ebx = regs->ebx;
160         unsigned int ecx = regs->ecx;
161         unsigned int edx = regs->edx;
162         unsigned int esi = regs->esi;
163         unsigned int edi = regs->edi;
164         unsigned int ebp = regs->ebp;
165 
166 #ifndef NDEBUG
167         /* Deliberately corrupt parameter regs not used by this hypercall. */
168         switch ( hypercall_args_table[eax].compat )
169         {
170         case 0: ebx = 0xdeadf00d;
171         case 1: ecx = 0xdeadf00d;
172         case 2: edx = 0xdeadf00d;
173         case 3: esi = 0xdeadf00d;
174         case 4: edi = 0xdeadf00d;
175         case 5: ebp = 0xdeadf00d;
176         }
177 #endif
178 
179         if ( unlikely(tb_init_done) )
180         {
181             unsigned long args[6] = { ebx, ecx, edx, esi, edi, ebp };
182 
183             __trace_hypercall(TRC_PV_HYPERCALL_V2, eax, args);
184         }
185 
186         curr->hcall_compat = true;
187         regs->eax = pv_hypercall_table[eax].compat(ebx, ecx, edx, esi, edi, ebp);
188         curr->hcall_compat = false;
189 
190 #ifndef NDEBUG
191         if ( !curr->hcall_preempted )
192         {
193             /* Deliberately corrupt parameter regs used by this hypercall. */
194             switch ( hypercall_args_table[eax].compat )
195             {
196             case 6: regs->ebp = 0xdeadf00d;
197             case 5: regs->edi = 0xdeadf00d;
198             case 4: regs->esi = 0xdeadf00d;
199             case 3: regs->edx = 0xdeadf00d;
200             case 2: regs->ecx = 0xdeadf00d;
201             case 1: regs->ebx = 0xdeadf00d;
202             }
203         }
204 #endif
205     }
206 
207     /*
208      * PV guests use SYSCALL or INT $0x82 to make a hypercall, both of which
209      * have trap semantics.  If the hypercall has been preempted, rewind the
210      * instruction pointer to reexecute the instruction.
211      */
212     if ( curr->hcall_preempted )
213         regs->rip -= 2;
214 
215     perfc_incr(hypercalls);
216 }
217 
arch_do_multicall_call(struct mc_state * state)218 enum mc_disposition arch_do_multicall_call(struct mc_state *state)
219 {
220     struct vcpu *curr = current;
221     unsigned long op;
222 
223     if ( !is_pv_32bit_vcpu(curr) )
224     {
225         struct multicall_entry *call = &state->call;
226 
227         op = call->op;
228         if ( (op < ARRAY_SIZE(pv_hypercall_table)) &&
229              pv_hypercall_table[op].native )
230             call->result = pv_hypercall_table[op].native(
231                 call->args[0], call->args[1], call->args[2],
232                 call->args[3], call->args[4], call->args[5]);
233         else
234             call->result = -ENOSYS;
235     }
236 #ifdef CONFIG_COMPAT
237     else
238     {
239         struct compat_multicall_entry *call = &state->compat_call;
240 
241         op = call->op;
242         if ( (op < ARRAY_SIZE(pv_hypercall_table)) &&
243              pv_hypercall_table[op].compat )
244             call->result = pv_hypercall_table[op].compat(
245                 call->args[0], call->args[1], call->args[2],
246                 call->args[3], call->args[4], call->args[5]);
247         else
248             call->result = -ENOSYS;
249     }
250 #endif
251 
252     return unlikely(op == __HYPERVISOR_iret)
253            ? mc_exit
254            : likely(guest_kernel_mode(curr, guest_cpu_user_regs()))
255              ? mc_continue : mc_preempt;
256 }
257 
hypercall_page_initialise_ring3_kernel(void * hypercall_page)258 void hypercall_page_initialise_ring3_kernel(void *hypercall_page)
259 {
260     void *p = hypercall_page;
261     unsigned int i;
262 
263     /* Fill in all the transfer points with template machine code. */
264     for ( i = 0; i < (PAGE_SIZE / 32); i++, p += 32 )
265     {
266         if ( i == __HYPERVISOR_iret )
267             continue;
268 
269         *(u8  *)(p+ 0) = 0x51;    /* push %rcx */
270         *(u16 *)(p+ 1) = 0x5341;  /* push %r11 */
271         *(u8  *)(p+ 3) = 0xb8;    /* mov  $<i>,%eax */
272         *(u32 *)(p+ 4) = i;
273         *(u16 *)(p+ 8) = 0x050f;  /* syscall */
274         *(u16 *)(p+10) = 0x5b41;  /* pop  %r11 */
275         *(u8  *)(p+12) = 0x59;    /* pop  %rcx */
276         *(u8  *)(p+13) = 0xc3;    /* ret */
277     }
278 
279     /*
280      * HYPERVISOR_iret is special because it doesn't return and expects a
281      * special stack frame. Guests jump at this transfer point instead of
282      * calling it.
283      */
284     p = hypercall_page + (__HYPERVISOR_iret * 32);
285     *(u8  *)(p+ 0) = 0x51;    /* push %rcx */
286     *(u16 *)(p+ 1) = 0x5341;  /* push %r11 */
287     *(u8  *)(p+ 3) = 0x50;    /* push %rax */
288     *(u8  *)(p+ 4) = 0xb8;    /* mov  $__HYPERVISOR_iret,%eax */
289     *(u32 *)(p+ 5) = __HYPERVISOR_iret;
290     *(u16 *)(p+ 9) = 0x050f;  /* syscall */
291 }
292 
hypercall_page_initialise_ring1_kernel(void * hypercall_page)293 void hypercall_page_initialise_ring1_kernel(void *hypercall_page)
294 {
295     void *p = hypercall_page;
296     unsigned int i;
297 
298     /* Fill in all the transfer points with template machine code. */
299 
300     for ( i = 0; i < (PAGE_SIZE / 32); i++, p += 32 )
301     {
302         if ( i == __HYPERVISOR_iret )
303             continue;
304 
305         *(u8  *)(p+ 0) = 0xb8;    /* mov  $<i>,%eax */
306         *(u32 *)(p+ 1) = i;
307         *(u16 *)(p+ 5) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int  $xx */
308         *(u8  *)(p+ 7) = 0xc3;    /* ret */
309     }
310 
311     /*
312      * HYPERVISOR_iret is special because it doesn't return and expects a
313      * special stack frame. Guests jump at this transfer point instead of
314      * calling it.
315      */
316     p = hypercall_page + (__HYPERVISOR_iret * 32);
317     *(u8  *)(p+ 0) = 0x50;    /* push %eax */
318     *(u8  *)(p+ 1) = 0xb8;    /* mov  $__HYPERVISOR_iret,%eax */
319     *(u32 *)(p+ 2) = __HYPERVISOR_iret;
320     *(u16 *)(p+ 6) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int  $xx */
321 }
322 
pv_hypercall_table_replace(unsigned int hypercall,hypercall_fn_t * native,hypercall_fn_t * compat)323 void __init pv_hypercall_table_replace(unsigned int hypercall,
324                                        hypercall_fn_t * native,
325                                        hypercall_fn_t *compat)
326 {
327 #define HANDLER_POINTER(f) \
328     ((unsigned long *)__va(__pa(&pv_hypercall_table[hypercall].f)))
329     write_atomic(HANDLER_POINTER(native), (unsigned long)native);
330     write_atomic(HANDLER_POINTER(compat), (unsigned long)compat);
331 #undef HANDLER_POINTER
332 }
333 
pv_get_hypercall_handler(unsigned int hypercall,bool compat)334 hypercall_fn_t *pv_get_hypercall_handler(unsigned int hypercall, bool compat)
335 {
336     return compat ? pv_hypercall_table[hypercall].compat
337                   : pv_hypercall_table[hypercall].native;
338 }
339 
340 /*
341  * Local variables:
342  * mode: C
343  * c-file-style: "BSD"
344  * c-basic-offset: 4
345  * tab-width: 4
346  * indent-tabs-mode: nil
347  * End:
348  */
349 
350