/******************************************************************************
* arch/x86/pv/hypercall.c
*
* PV hypercall dispatching routines
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; If not, see .
*
* Copyright (c) 2017 Citrix Systems Ltd.
*/
#include
#include
#include
#define HYPERCALL(x) \
[ __HYPERVISOR_ ## x ] = { (hypercall_fn_t *) do_ ## x, \
(hypercall_fn_t *) do_ ## x }
#define COMPAT_CALL(x) \
[ __HYPERVISOR_ ## x ] = { (hypercall_fn_t *) do_ ## x, \
(hypercall_fn_t *) compat_ ## x }
#define do_arch_1 paging_domctl_continuation
static const hypercall_table_t pv_hypercall_table[] = {
COMPAT_CALL(set_trap_table),
HYPERCALL(mmu_update),
COMPAT_CALL(set_gdt),
HYPERCALL(stack_switch),
COMPAT_CALL(set_callbacks),
HYPERCALL(fpu_taskswitch),
HYPERCALL(sched_op_compat),
COMPAT_CALL(platform_op),
HYPERCALL(set_debugreg),
HYPERCALL(get_debugreg),
COMPAT_CALL(update_descriptor),
COMPAT_CALL(memory_op),
COMPAT_CALL(multicall),
COMPAT_CALL(update_va_mapping),
COMPAT_CALL(set_timer_op),
HYPERCALL(event_channel_op_compat),
COMPAT_CALL(xen_version),
HYPERCALL(console_io),
COMPAT_CALL(physdev_op_compat),
COMPAT_CALL(grant_table_op),
COMPAT_CALL(vm_assist),
COMPAT_CALL(update_va_mapping_otherdomain),
COMPAT_CALL(iret),
COMPAT_CALL(vcpu_op),
HYPERCALL(set_segment_base),
COMPAT_CALL(mmuext_op),
COMPAT_CALL(xsm_op),
COMPAT_CALL(nmi_op),
COMPAT_CALL(sched_op),
COMPAT_CALL(callback_op),
#ifdef CONFIG_XENOPROF
COMPAT_CALL(xenoprof_op),
#endif
HYPERCALL(event_channel_op),
COMPAT_CALL(physdev_op),
HYPERCALL(hvm_op),
HYPERCALL(sysctl),
HYPERCALL(domctl),
#ifdef CONFIG_KEXEC
COMPAT_CALL(kexec_op),
#endif
#ifdef CONFIG_TMEM
HYPERCALL(tmem_op),
#endif
HYPERCALL(xenpmu_op),
COMPAT_CALL(dm_op),
HYPERCALL(mca),
HYPERCALL(arch_1),
};
#undef do_arch_1
#undef COMPAT_CALL
#undef HYPERCALL
void pv_hypercall(struct cpu_user_regs *regs)
{
struct vcpu *curr = current;
unsigned long eax;
ASSERT(guest_kernel_mode(curr, regs));
eax = is_pv_32bit_vcpu(curr) ? regs->eax : regs->rax;
BUILD_BUG_ON(ARRAY_SIZE(pv_hypercall_table) >
ARRAY_SIZE(hypercall_args_table));
if ( (eax >= ARRAY_SIZE(pv_hypercall_table)) ||
!pv_hypercall_table[eax].native )
{
regs->rax = -ENOSYS;
return;
}
curr->hcall_preempted = false;
if ( !is_pv_32bit_vcpu(curr) )
{
unsigned long rdi = regs->rdi;
unsigned long rsi = regs->rsi;
unsigned long rdx = regs->rdx;
unsigned long r10 = regs->r10;
unsigned long r8 = regs->r8;
unsigned long r9 = regs->r9;
#ifndef NDEBUG
/* Deliberately corrupt parameter regs not used by this hypercall. */
switch ( hypercall_args_table[eax].native )
{
case 0: rdi = 0xdeadbeefdeadf00dUL;
case 1: rsi = 0xdeadbeefdeadf00dUL;
case 2: rdx = 0xdeadbeefdeadf00dUL;
case 3: r10 = 0xdeadbeefdeadf00dUL;
case 4: r8 = 0xdeadbeefdeadf00dUL;
case 5: r9 = 0xdeadbeefdeadf00dUL;
}
#endif
if ( unlikely(tb_init_done) )
{
unsigned long args[6] = { rdi, rsi, rdx, r10, r8, r9 };
__trace_hypercall(TRC_PV_HYPERCALL_V2, eax, args);
}
regs->rax = pv_hypercall_table[eax].native(rdi, rsi, rdx, r10, r8, r9);
#ifndef NDEBUG
if ( !curr->hcall_preempted )
{
/* Deliberately corrupt parameter regs used by this hypercall. */
switch ( hypercall_args_table[eax].native )
{
case 6: regs->r9 = 0xdeadbeefdeadf00dUL;
case 5: regs->r8 = 0xdeadbeefdeadf00dUL;
case 4: regs->r10 = 0xdeadbeefdeadf00dUL;
case 3: regs->rdx = 0xdeadbeefdeadf00dUL;
case 2: regs->rsi = 0xdeadbeefdeadf00dUL;
case 1: regs->rdi = 0xdeadbeefdeadf00dUL;
}
}
#endif
}
else
{
unsigned int ebx = regs->ebx;
unsigned int ecx = regs->ecx;
unsigned int edx = regs->edx;
unsigned int esi = regs->esi;
unsigned int edi = regs->edi;
unsigned int ebp = regs->ebp;
#ifndef NDEBUG
/* Deliberately corrupt parameter regs not used by this hypercall. */
switch ( hypercall_args_table[eax].compat )
{
case 0: ebx = 0xdeadf00d;
case 1: ecx = 0xdeadf00d;
case 2: edx = 0xdeadf00d;
case 3: esi = 0xdeadf00d;
case 4: edi = 0xdeadf00d;
case 5: ebp = 0xdeadf00d;
}
#endif
if ( unlikely(tb_init_done) )
{
unsigned long args[6] = { ebx, ecx, edx, esi, edi, ebp };
__trace_hypercall(TRC_PV_HYPERCALL_V2, eax, args);
}
curr->hcall_compat = true;
regs->eax = pv_hypercall_table[eax].compat(ebx, ecx, edx, esi, edi, ebp);
curr->hcall_compat = false;
#ifndef NDEBUG
if ( !curr->hcall_preempted )
{
/* Deliberately corrupt parameter regs used by this hypercall. */
switch ( hypercall_args_table[eax].compat )
{
case 6: regs->ebp = 0xdeadf00d;
case 5: regs->edi = 0xdeadf00d;
case 4: regs->esi = 0xdeadf00d;
case 3: regs->edx = 0xdeadf00d;
case 2: regs->ecx = 0xdeadf00d;
case 1: regs->ebx = 0xdeadf00d;
}
}
#endif
}
/*
* PV guests use SYSCALL or INT $0x82 to make a hypercall, both of which
* have trap semantics. If the hypercall has been preempted, rewind the
* instruction pointer to reexecute the instruction.
*/
if ( curr->hcall_preempted )
regs->rip -= 2;
perfc_incr(hypercalls);
}
enum mc_disposition arch_do_multicall_call(struct mc_state *state)
{
struct vcpu *curr = current;
unsigned long op;
if ( !is_pv_32bit_vcpu(curr) )
{
struct multicall_entry *call = &state->call;
op = call->op;
if ( (op < ARRAY_SIZE(pv_hypercall_table)) &&
pv_hypercall_table[op].native )
call->result = pv_hypercall_table[op].native(
call->args[0], call->args[1], call->args[2],
call->args[3], call->args[4], call->args[5]);
else
call->result = -ENOSYS;
}
#ifdef CONFIG_COMPAT
else
{
struct compat_multicall_entry *call = &state->compat_call;
op = call->op;
if ( (op < ARRAY_SIZE(pv_hypercall_table)) &&
pv_hypercall_table[op].compat )
call->result = pv_hypercall_table[op].compat(
call->args[0], call->args[1], call->args[2],
call->args[3], call->args[4], call->args[5]);
else
call->result = -ENOSYS;
}
#endif
return unlikely(op == __HYPERVISOR_iret)
? mc_exit
: likely(guest_kernel_mode(curr, guest_cpu_user_regs()))
? mc_continue : mc_preempt;
}
void hypercall_page_initialise_ring3_kernel(void *hypercall_page)
{
void *p = hypercall_page;
unsigned int i;
/* Fill in all the transfer points with template machine code. */
for ( i = 0; i < (PAGE_SIZE / 32); i++, p += 32 )
{
if ( i == __HYPERVISOR_iret )
continue;
*(u8 *)(p+ 0) = 0x51; /* push %rcx */
*(u16 *)(p+ 1) = 0x5341; /* push %r11 */
*(u8 *)(p+ 3) = 0xb8; /* mov $,%eax */
*(u32 *)(p+ 4) = i;
*(u16 *)(p+ 8) = 0x050f; /* syscall */
*(u16 *)(p+10) = 0x5b41; /* pop %r11 */
*(u8 *)(p+12) = 0x59; /* pop %rcx */
*(u8 *)(p+13) = 0xc3; /* ret */
}
/*
* HYPERVISOR_iret is special because it doesn't return and expects a
* special stack frame. Guests jump at this transfer point instead of
* calling it.
*/
p = hypercall_page + (__HYPERVISOR_iret * 32);
*(u8 *)(p+ 0) = 0x51; /* push %rcx */
*(u16 *)(p+ 1) = 0x5341; /* push %r11 */
*(u8 *)(p+ 3) = 0x50; /* push %rax */
*(u8 *)(p+ 4) = 0xb8; /* mov $__HYPERVISOR_iret,%eax */
*(u32 *)(p+ 5) = __HYPERVISOR_iret;
*(u16 *)(p+ 9) = 0x050f; /* syscall */
}
void hypercall_page_initialise_ring1_kernel(void *hypercall_page)
{
void *p = hypercall_page;
unsigned int i;
/* Fill in all the transfer points with template machine code. */
for ( i = 0; i < (PAGE_SIZE / 32); i++, p += 32 )
{
if ( i == __HYPERVISOR_iret )
continue;
*(u8 *)(p+ 0) = 0xb8; /* mov $,%eax */
*(u32 *)(p+ 1) = i;
*(u16 *)(p+ 5) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int $xx */
*(u8 *)(p+ 7) = 0xc3; /* ret */
}
/*
* HYPERVISOR_iret is special because it doesn't return and expects a
* special stack frame. Guests jump at this transfer point instead of
* calling it.
*/
p = hypercall_page + (__HYPERVISOR_iret * 32);
*(u8 *)(p+ 0) = 0x50; /* push %eax */
*(u8 *)(p+ 1) = 0xb8; /* mov $__HYPERVISOR_iret,%eax */
*(u32 *)(p+ 2) = __HYPERVISOR_iret;
*(u16 *)(p+ 6) = (HYPERCALL_VECTOR << 8) | 0xcd; /* int $xx */
}
void __init pv_hypercall_table_replace(unsigned int hypercall,
hypercall_fn_t * native,
hypercall_fn_t *compat)
{
#define HANDLER_POINTER(f) \
((unsigned long *)__va(__pa(&pv_hypercall_table[hypercall].f)))
write_atomic(HANDLER_POINTER(native), (unsigned long)native);
write_atomic(HANDLER_POINTER(compat), (unsigned long)compat);
#undef HANDLER_POINTER
}
hypercall_fn_t *pv_get_hypercall_handler(unsigned int hypercall, bool compat)
{
return compat ? pv_hypercall_table[hypercall].compat
: pv_hypercall_table[hypercall].native;
}
/*
* Local variables:
* mode: C
* c-file-style: "BSD"
* c-basic-offset: 4
* tab-width: 4
* indent-tabs-mode: nil
* End:
*/