/****************************************************************************** * domctl.c * * Domain management operations. For use by node control stack. * * Copyright (c) 2002-2006, K A Fraser */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static DEFINE_SPINLOCK(domctl_lock); DEFINE_SPINLOCK(vcpu_alloc_lock); static int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap, const unsigned long *bitmap, unsigned int nbits) { unsigned int guest_bytes, copy_bytes, i; uint8_t zero = 0; int err = 0; uint8_t *bytemap = xmalloc_array(uint8_t, (nbits + 7) / 8); if ( !bytemap ) return -ENOMEM; guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8; copy_bytes = min_t(unsigned int, guest_bytes, (nbits + 7) / 8); bitmap_long_to_byte(bytemap, bitmap, nbits); if ( copy_bytes != 0 ) if ( copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes) ) err = -EFAULT; for ( i = copy_bytes; !err && i < guest_bytes; i++ ) if ( copy_to_guest_offset(xenctl_bitmap->bitmap, i, &zero, 1) ) err = -EFAULT; xfree(bytemap); return err; } static int xenctl_bitmap_to_bitmap(unsigned long *bitmap, const struct xenctl_bitmap *xenctl_bitmap, unsigned int nbits) { unsigned int guest_bytes, copy_bytes; int err = 0; uint8_t *bytemap = xzalloc_array(uint8_t, (nbits + 7) / 8); if ( !bytemap ) return -ENOMEM; guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8; copy_bytes = min_t(unsigned int, guest_bytes, (nbits + 7) / 8); if ( copy_bytes != 0 ) { if ( copy_from_guest(bytemap, xenctl_bitmap->bitmap, copy_bytes) ) err = -EFAULT; if ( (xenctl_bitmap->nr_bits & 7) && (guest_bytes == copy_bytes) ) bytemap[guest_bytes-1] &= ~(0xff << (xenctl_bitmap->nr_bits & 7)); } if ( !err ) bitmap_byte_to_long(bitmap, bytemap, nbits); xfree(bytemap); return err; } int cpumask_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_cpumap, const cpumask_t *cpumask) { return bitmap_to_xenctl_bitmap(xenctl_cpumap, cpumask_bits(cpumask), nr_cpu_ids); } int xenctl_bitmap_to_cpumask(cpumask_var_t *cpumask, const struct xenctl_bitmap *xenctl_cpumap) { int err = 0; if ( alloc_cpumask_var(cpumask) ) { err = xenctl_bitmap_to_bitmap(cpumask_bits(*cpumask), xenctl_cpumap, nr_cpu_ids); /* In case of error, cleanup is up to us, as the caller won't care! */ if ( err ) free_cpumask_var(*cpumask); } else err = -ENOMEM; return err; } static int nodemask_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_nodemap, const nodemask_t *nodemask) { return bitmap_to_xenctl_bitmap(xenctl_nodemap, nodes_addr(*nodemask), MAX_NUMNODES); } static int xenctl_bitmap_to_nodemask(nodemask_t *nodemask, const struct xenctl_bitmap *xenctl_nodemap) { return xenctl_bitmap_to_bitmap(nodes_addr(*nodemask), xenctl_nodemap, MAX_NUMNODES); } static inline int is_free_domid(domid_t dom) { struct domain *d; if ( dom >= DOMID_FIRST_RESERVED ) return 0; if ( (d = rcu_lock_domain_by_id(dom)) == NULL ) return 1; rcu_unlock_domain(d); return 0; } void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info) { struct vcpu *v; u64 cpu_time = 0; int flags = XEN_DOMINF_blocked; struct vcpu_runstate_info runstate; info->domain = d->domain_id; info->max_vcpu_id = XEN_INVALID_MAX_VCPU_ID; info->nr_online_vcpus = 0; info->ssidref = 0; /* * - domain is marked as blocked only if all its vcpus are blocked * - domain is marked as running if any of its vcpus is running */ for_each_vcpu ( d, v ) { vcpu_runstate_get(v, &runstate); cpu_time += runstate.time[RUNSTATE_running]; info->max_vcpu_id = v->vcpu_id; if ( !(v->pause_flags & VPF_down) ) { if ( !(v->pause_flags & VPF_blocked) ) flags &= ~XEN_DOMINF_blocked; if ( v->is_running ) flags |= XEN_DOMINF_running; info->nr_online_vcpus++; } } info->cpu_time = cpu_time; info->flags = (info->nr_online_vcpus ? flags : 0) | ((d->is_dying == DOMDYING_dead) ? XEN_DOMINF_dying : 0) | (d->is_shut_down ? XEN_DOMINF_shutdown : 0) | (d->controller_pause_count > 0 ? XEN_DOMINF_paused : 0) | (d->debugger_attached ? XEN_DOMINF_debugged : 0) | (d->is_xenstore ? XEN_DOMINF_xs_domain : 0) | d->shutdown_code << XEN_DOMINF_shutdownshift; switch ( d->guest_type ) { case guest_type_hvm: info->flags |= XEN_DOMINF_hvm_guest; break; default: break; } xsm_security_domaininfo(d, info); info->tot_pages = d->tot_pages; info->max_pages = d->max_pages; info->outstanding_pages = d->outstanding_pages; info->shr_pages = atomic_read(&d->shr_pages); info->paged_pages = atomic_read(&d->paged_pages); info->shared_info_frame = mfn_to_gmfn(d, virt_to_mfn(d->shared_info)); BUG_ON(SHARED_M2P(info->shared_info_frame)); info->cpupool = d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE; memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t)); arch_get_domain_info(d, info); } bool_t domctl_lock_acquire(void) { /* * Caller may try to pause its own VCPUs. We must prevent deadlock * against other non-domctl routines which try to do the same. */ if ( !spin_trylock(¤t->domain->hypercall_deadlock_mutex) ) return 0; /* * Trylock here is paranoia if we have multiple privileged domains. Then * we could have one domain trying to pause another which is spinning * on domctl_lock -- results in deadlock. */ if ( spin_trylock(&domctl_lock) ) return 1; spin_unlock(¤t->domain->hypercall_deadlock_mutex); return 0; } void domctl_lock_release(void) { spin_unlock(&domctl_lock); spin_unlock(¤t->domain->hypercall_deadlock_mutex); } static inline int vcpuaffinity_params_invalid(const struct xen_domctl_vcpuaffinity *vcpuaff) { return vcpuaff->flags == 0 || ((vcpuaff->flags & XEN_VCPUAFFINITY_HARD) && guest_handle_is_null(vcpuaff->cpumap_hard.bitmap)) || ((vcpuaff->flags & XEN_VCPUAFFINITY_SOFT) && guest_handle_is_null(vcpuaff->cpumap_soft.bitmap)); } void vnuma_destroy(struct vnuma_info *vnuma) { if ( vnuma ) { xfree(vnuma->vmemrange); xfree(vnuma->vcpu_to_vnode); xfree(vnuma->vdistance); xfree(vnuma->vnode_to_pnode); xfree(vnuma); } } /* * Allocates memory for vNUMA, **vnuma should be NULL. * Caller has to make sure that domain has max_pages * and number of vcpus set for domain. * Verifies that single allocation does not exceed * PAGE_SIZE. */ static struct vnuma_info *vnuma_alloc(unsigned int nr_vnodes, unsigned int nr_ranges, unsigned int nr_vcpus) { struct vnuma_info *vnuma; /* * Check if any of the allocations are bigger than PAGE_SIZE. * See XSA-77. */ if ( nr_vnodes * nr_vnodes > (PAGE_SIZE / sizeof(*vnuma->vdistance)) || nr_ranges > (PAGE_SIZE / sizeof(*vnuma->vmemrange)) ) return ERR_PTR(-EINVAL); /* * If allocations become larger then PAGE_SIZE, these allocations * should be split into PAGE_SIZE allocations due to XSA-77. */ vnuma = xmalloc(struct vnuma_info); if ( !vnuma ) return ERR_PTR(-ENOMEM); vnuma->vdistance = xmalloc_array(unsigned int, nr_vnodes * nr_vnodes); vnuma->vcpu_to_vnode = xmalloc_array(unsigned int, nr_vcpus); vnuma->vnode_to_pnode = xmalloc_array(nodeid_t, nr_vnodes); vnuma->vmemrange = xmalloc_array(xen_vmemrange_t, nr_ranges); if ( vnuma->vdistance == NULL || vnuma->vmemrange == NULL || vnuma->vcpu_to_vnode == NULL || vnuma->vnode_to_pnode == NULL ) { vnuma_destroy(vnuma); return ERR_PTR(-ENOMEM); } return vnuma; } /* * Construct vNUMA topology form uinfo. */ static struct vnuma_info *vnuma_init(const struct xen_domctl_vnuma *uinfo, const struct domain *d) { unsigned int i, nr_vnodes; int ret = -EINVAL; struct vnuma_info *info; nr_vnodes = uinfo->nr_vnodes; if ( nr_vnodes == 0 || uinfo->nr_vcpus != d->max_vcpus || uinfo->pad != 0 ) return ERR_PTR(ret); info = vnuma_alloc(nr_vnodes, uinfo->nr_vmemranges, d->max_vcpus); if ( IS_ERR(info) ) return info; ret = -EFAULT; if ( copy_from_guest(info->vdistance, uinfo->vdistance, nr_vnodes * nr_vnodes) ) goto vnuma_fail; if ( copy_from_guest(info->vmemrange, uinfo->vmemrange, uinfo->nr_vmemranges) ) goto vnuma_fail; if ( copy_from_guest(info->vcpu_to_vnode, uinfo->vcpu_to_vnode, d->max_vcpus) ) goto vnuma_fail; ret = -E2BIG; for ( i = 0; i < d->max_vcpus; ++i ) if ( info->vcpu_to_vnode[i] >= nr_vnodes ) goto vnuma_fail; for ( i = 0; i < nr_vnodes; ++i ) { unsigned int pnode; ret = -EFAULT; if ( copy_from_guest_offset(&pnode, uinfo->vnode_to_pnode, i, 1) ) goto vnuma_fail; ret = -E2BIG; if ( pnode >= MAX_NUMNODES ) goto vnuma_fail; info->vnode_to_pnode[i] = pnode; } info->nr_vnodes = nr_vnodes; info->nr_vmemranges = uinfo->nr_vmemranges; /* Check that vmemranges flags are zero. */ ret = -EINVAL; for ( i = 0; i < info->nr_vmemranges; i++ ) if ( info->vmemrange[i].flags != 0 ) goto vnuma_fail; return info; vnuma_fail: vnuma_destroy(info); return ERR_PTR(ret); } long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl) { long ret = 0; bool_t copyback = 0; struct xen_domctl curop, *op = &curop; struct domain *d; if ( copy_from_guest(op, u_domctl, 1) ) return -EFAULT; if ( op->interface_version != XEN_DOMCTL_INTERFACE_VERSION ) return -EACCES; switch ( op->cmd ) { case XEN_DOMCTL_test_assign_device: if ( op->domain == DOMID_INVALID ) { case XEN_DOMCTL_createdomain: case XEN_DOMCTL_gdbsx_guestmemio: d = NULL; break; } /* fall through */ default: d = rcu_lock_domain_by_id(op->domain); if ( !d && op->cmd != XEN_DOMCTL_getdomaininfo ) return -ESRCH; } ret = xsm_domctl(XSM_OTHER, d, op->cmd); if ( ret ) goto domctl_out_unlock_domonly; if ( !domctl_lock_acquire() ) { if ( d ) rcu_unlock_domain(d); return hypercall_create_continuation( __HYPERVISOR_domctl, "h", u_domctl); } switch ( op->cmd ) { case XEN_DOMCTL_setvcpucontext: { vcpu_guest_context_u c = { .nat = NULL }; unsigned int vcpu = op->u.vcpucontext.vcpu; struct vcpu *v; ret = -EINVAL; if ( (d == current->domain) || /* no domain_pause() */ (vcpu >= d->max_vcpus) || ((v = d->vcpu[vcpu]) == NULL) ) break; if ( guest_handle_is_null(op->u.vcpucontext.ctxt) ) { ret = vcpu_reset(v); if ( ret == -ERESTART ) ret = hypercall_create_continuation( __HYPERVISOR_domctl, "h", u_domctl); break; } #ifdef CONFIG_COMPAT BUILD_BUG_ON(sizeof(struct vcpu_guest_context) < sizeof(struct compat_vcpu_guest_context)); #endif ret = -ENOMEM; if ( (c.nat = alloc_vcpu_guest_context()) == NULL ) break; #ifdef CONFIG_COMPAT if ( !is_pv_32bit_domain(d) ) ret = copy_from_guest(c.nat, op->u.vcpucontext.ctxt, 1); else ret = copy_from_guest(c.cmp, guest_handle_cast(op->u.vcpucontext.ctxt, void), 1); #else ret = copy_from_guest(c.nat, op->u.vcpucontext.ctxt, 1); #endif ret = ret ? -EFAULT : 0; if ( ret == 0 ) { domain_pause(d); ret = arch_set_info_guest(v, c); domain_unpause(d); if ( ret == -ERESTART ) ret = hypercall_create_continuation( __HYPERVISOR_domctl, "h", u_domctl); } free_vcpu_guest_context(c.nat); break; } case XEN_DOMCTL_pausedomain: ret = -EINVAL; if ( d != current->domain ) ret = domain_pause_by_systemcontroller(d); break; case XEN_DOMCTL_unpausedomain: ret = domain_unpause_by_systemcontroller(d); break; case XEN_DOMCTL_resumedomain: if ( d == current->domain ) /* no domain_pause() */ ret = -EINVAL; else domain_resume(d); break; case XEN_DOMCTL_createdomain: { domid_t dom; static domid_t rover = 0; unsigned int domcr_flags; ret = -EINVAL; if ( (op->u.createdomain.flags & ~(XEN_DOMCTL_CDF_hvm_guest | XEN_DOMCTL_CDF_hap | XEN_DOMCTL_CDF_s3_integrity | XEN_DOMCTL_CDF_oos_off | XEN_DOMCTL_CDF_xs_domain)) ) break; dom = op->domain; if ( (dom > 0) && (dom < DOMID_FIRST_RESERVED) ) { ret = -EINVAL; if ( !is_free_domid(dom) ) break; } else { for ( dom = rover + 1; dom != rover; dom++ ) { if ( dom == DOMID_FIRST_RESERVED ) dom = 1; if ( is_free_domid(dom) ) break; } ret = -ENOMEM; if ( dom == rover ) break; rover = dom; } domcr_flags = 0; if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hvm_guest ) domcr_flags |= DOMCRF_hvm; if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hap ) domcr_flags |= DOMCRF_hap; if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_s3_integrity ) domcr_flags |= DOMCRF_s3_integrity; if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_oos_off ) domcr_flags |= DOMCRF_oos_off; if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_xs_domain ) domcr_flags |= DOMCRF_xs_domain; d = domain_create(dom, domcr_flags, op->u.createdomain.ssidref, &op->u.createdomain.config); if ( IS_ERR(d) ) { ret = PTR_ERR(d); d = NULL; break; } ret = 0; memcpy(d->handle, op->u.createdomain.handle, sizeof(xen_domain_handle_t)); op->domain = d->domain_id; copyback = 1; d = NULL; break; } case XEN_DOMCTL_max_vcpus: { unsigned int i, max = op->u.max_vcpus.max, cpu; cpumask_t *online; ret = -EINVAL; if ( (d == current->domain) || /* no domain_pause() */ (max > domain_max_vcpus(d)) ) break; /* Until Xenoprof can dynamically grow its vcpu-s array... */ if ( d->xenoprof ) { ret = -EAGAIN; break; } /* Needed, for example, to ensure writable p.t. state is synced. */ domain_pause(d); /* * Certain operations (e.g. CPU microcode updates) modify data which is * used during VCPU allocation/initialization */ while ( !spin_trylock(&vcpu_alloc_lock) ) { if ( hypercall_preempt_check() ) { ret = hypercall_create_continuation( __HYPERVISOR_domctl, "h", u_domctl); goto maxvcpu_out_novcpulock; } } /* We cannot reduce maximum VCPUs. */ ret = -EINVAL; if ( (max < d->max_vcpus) && (d->vcpu[max] != NULL) ) goto maxvcpu_out; /* * For now don't allow increasing the vcpu count from a non-zero * value: This code and all readers of d->vcpu would otherwise need * to be converted to use RCU, but at present there's no tools side * code path that would issue such a request. */ ret = -EBUSY; if ( (d->max_vcpus > 0) && (max > d->max_vcpus) ) goto maxvcpu_out; ret = -ENOMEM; online = cpupool_domain_cpumask(d); if ( max > d->max_vcpus ) { struct vcpu **vcpus; BUG_ON(d->vcpu != NULL); BUG_ON(d->max_vcpus != 0); if ( (vcpus = xzalloc_array(struct vcpu *, max)) == NULL ) goto maxvcpu_out; /* Install vcpu array /then/ update max_vcpus. */ d->vcpu = vcpus; smp_wmb(); d->max_vcpus = max; } for ( i = 0; i < max; i++ ) { if ( d->vcpu[i] != NULL ) continue; cpu = (i == 0) ? cpumask_any(online) : cpumask_cycle(d->vcpu[i-1]->processor, online); if ( alloc_vcpu(d, i, cpu) == NULL ) goto maxvcpu_out; } ret = 0; maxvcpu_out: spin_unlock(&vcpu_alloc_lock); maxvcpu_out_novcpulock: domain_unpause(d); break; } case XEN_DOMCTL_soft_reset: if ( d == current->domain ) /* no domain_pause() */ { ret = -EINVAL; break; } ret = domain_soft_reset(d); break; case XEN_DOMCTL_destroydomain: ret = domain_kill(d); if ( ret == -ERESTART ) ret = hypercall_create_continuation( __HYPERVISOR_domctl, "h", u_domctl); break; case XEN_DOMCTL_setnodeaffinity: { nodemask_t new_affinity; ret = xenctl_bitmap_to_nodemask(&new_affinity, &op->u.nodeaffinity.nodemap); if ( !ret ) ret = domain_set_node_affinity(d, &new_affinity); break; } case XEN_DOMCTL_getnodeaffinity: ret = nodemask_to_xenctl_bitmap(&op->u.nodeaffinity.nodemap, &d->node_affinity); break; case XEN_DOMCTL_setvcpuaffinity: case XEN_DOMCTL_getvcpuaffinity: { struct vcpu *v; struct xen_domctl_vcpuaffinity *vcpuaff = &op->u.vcpuaffinity; ret = -EINVAL; if ( vcpuaff->vcpu >= d->max_vcpus ) break; ret = -ESRCH; if ( (v = d->vcpu[vcpuaff->vcpu]) == NULL ) break; ret = -EINVAL; if ( vcpuaffinity_params_invalid(vcpuaff) ) break; if ( op->cmd == XEN_DOMCTL_setvcpuaffinity ) { cpumask_var_t new_affinity, old_affinity; cpumask_t *online = cpupool_domain_cpumask(v->domain); /* * We want to be able to restore hard affinity if we are trying * setting both and changing soft affinity (which happens later, * when hard affinity has been succesfully chaged already) fails. */ if ( !alloc_cpumask_var(&old_affinity) ) { ret = -ENOMEM; break; } cpumask_copy(old_affinity, v->cpu_hard_affinity); if ( !alloc_cpumask_var(&new_affinity) ) { free_cpumask_var(old_affinity); ret = -ENOMEM; break; } /* Undo a stuck SCHED_pin_override? */ if ( vcpuaff->flags & XEN_VCPUAFFINITY_FORCE ) vcpu_pin_override(v, -1); ret = 0; /* * We both set a new affinity and report back to the caller what * the scheduler will be effectively using. */ if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD ) { ret = xenctl_bitmap_to_bitmap(cpumask_bits(new_affinity), &vcpuaff->cpumap_hard, nr_cpu_ids); if ( !ret ) ret = vcpu_set_hard_affinity(v, new_affinity); if ( ret ) goto setvcpuaffinity_out; /* * For hard affinity, what we return is the intersection of * cpupool's online mask and the new hard affinity. */ cpumask_and(new_affinity, online, v->cpu_hard_affinity); ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_hard, new_affinity); } if ( vcpuaff->flags & XEN_VCPUAFFINITY_SOFT ) { ret = xenctl_bitmap_to_bitmap(cpumask_bits(new_affinity), &vcpuaff->cpumap_soft, nr_cpu_ids); if ( !ret) ret = vcpu_set_soft_affinity(v, new_affinity); if ( ret ) { /* * Since we're returning error, the caller expects nothing * happened, so we rollback the changes to hard affinity * (if any). */ if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD ) vcpu_set_hard_affinity(v, old_affinity); goto setvcpuaffinity_out; } /* * For soft affinity, we return the intersection between the * new soft affinity, the cpupool's online map and the (new) * hard affinity. */ cpumask_and(new_affinity, new_affinity, online); cpumask_and(new_affinity, new_affinity, v->cpu_hard_affinity); ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_soft, new_affinity); } setvcpuaffinity_out: free_cpumask_var(new_affinity); free_cpumask_var(old_affinity); } else { if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD ) ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_hard, v->cpu_hard_affinity); if ( vcpuaff->flags & XEN_VCPUAFFINITY_SOFT ) ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_soft, v->cpu_soft_affinity); } break; } case XEN_DOMCTL_scheduler_op: ret = sched_adjust(d, &op->u.scheduler_op); copyback = 1; break; case XEN_DOMCTL_getdomaininfo: { domid_t dom = DOMID_INVALID; if ( !d ) { ret = -EINVAL; if ( op->domain >= DOMID_FIRST_RESERVED ) break; rcu_read_lock(&domlist_read_lock); dom = op->domain; for_each_domain ( d ) if ( d->domain_id >= dom ) break; } ret = -ESRCH; if ( d == NULL ) goto getdomaininfo_out; ret = xsm_getdomaininfo(XSM_HOOK, d); if ( ret ) goto getdomaininfo_out; getdomaininfo(d, &op->u.getdomaininfo); op->domain = op->u.getdomaininfo.domain; copyback = 1; getdomaininfo_out: /* When d was non-NULL upon entry, no cleanup is needed. */ if ( dom == DOMID_INVALID ) break; rcu_read_unlock(&domlist_read_lock); d = NULL; break; } case XEN_DOMCTL_getvcpucontext: { vcpu_guest_context_u c = { .nat = NULL }; struct vcpu *v; ret = -EINVAL; if ( op->u.vcpucontext.vcpu >= d->max_vcpus || (v = d->vcpu[op->u.vcpucontext.vcpu]) == NULL || v == current ) /* no vcpu_pause() */ goto getvcpucontext_out; ret = -ENODATA; if ( !v->is_initialised ) goto getvcpucontext_out; #ifdef CONFIG_COMPAT BUILD_BUG_ON(sizeof(struct vcpu_guest_context) < sizeof(struct compat_vcpu_guest_context)); #endif ret = -ENOMEM; if ( (c.nat = xzalloc(struct vcpu_guest_context)) == NULL ) goto getvcpucontext_out; vcpu_pause(v); arch_get_info_guest(v, c); ret = 0; vcpu_unpause(v); #ifdef CONFIG_COMPAT if ( !is_pv_32bit_domain(d) ) ret = copy_to_guest(op->u.vcpucontext.ctxt, c.nat, 1); else ret = copy_to_guest(guest_handle_cast(op->u.vcpucontext.ctxt, void), c.cmp, 1); #else ret = copy_to_guest(op->u.vcpucontext.ctxt, c.nat, 1); #endif if ( ret ) ret = -EFAULT; copyback = 1; getvcpucontext_out: xfree(c.nat); break; } case XEN_DOMCTL_getvcpuinfo: { struct vcpu *v; struct vcpu_runstate_info runstate; ret = -EINVAL; if ( op->u.getvcpuinfo.vcpu >= d->max_vcpus ) break; ret = -ESRCH; if ( (v = d->vcpu[op->u.getvcpuinfo.vcpu]) == NULL ) break; vcpu_runstate_get(v, &runstate); op->u.getvcpuinfo.online = !(v->pause_flags & VPF_down); op->u.getvcpuinfo.blocked = !!(v->pause_flags & VPF_blocked); op->u.getvcpuinfo.running = v->is_running; op->u.getvcpuinfo.cpu_time = runstate.time[RUNSTATE_running]; op->u.getvcpuinfo.cpu = v->processor; ret = 0; copyback = 1; break; } case XEN_DOMCTL_max_mem: { uint64_t new_max = op->u.max_mem.max_memkb >> (PAGE_SHIFT - 10); spin_lock(&d->page_alloc_lock); /* * NB. We removed a check that new_max >= current tot_pages; this means * that the domain will now be allowed to "ratchet" down to new_max. In * the meantime, while tot > max, all new allocations are disallowed. */ d->max_pages = min(new_max, (uint64_t)(typeof(d->max_pages))-1); spin_unlock(&d->page_alloc_lock); break; } case XEN_DOMCTL_setdomainhandle: memcpy(d->handle, op->u.setdomainhandle.handle, sizeof(xen_domain_handle_t)); break; case XEN_DOMCTL_setdebugging: if ( unlikely(d == current->domain) ) /* no domain_pause() */ ret = -EINVAL; else { domain_pause(d); d->debugger_attached = !!op->u.setdebugging.enable; domain_unpause(d); /* causes guest to latch new status */ } break; case XEN_DOMCTL_irq_permission: { unsigned int pirq = op->u.irq_permission.pirq, irq; int allow = op->u.irq_permission.allow_access; if ( pirq >= current->domain->nr_pirqs ) { ret = -EINVAL; break; } irq = pirq_access_permitted(current->domain, pirq); if ( !irq || xsm_irq_permission(XSM_HOOK, d, irq, allow) ) ret = -EPERM; else if ( allow ) ret = irq_permit_access(d, irq); else ret = irq_deny_access(d, irq); break; } case XEN_DOMCTL_iomem_permission: { unsigned long mfn = op->u.iomem_permission.first_mfn; unsigned long nr_mfns = op->u.iomem_permission.nr_mfns; int allow = op->u.iomem_permission.allow_access; ret = -EINVAL; if ( (mfn + nr_mfns - 1) < mfn ) /* wrap? */ break; if ( !iomem_access_permitted(current->domain, mfn, mfn + nr_mfns - 1) || xsm_iomem_permission(XSM_HOOK, d, mfn, mfn + nr_mfns - 1, allow) ) ret = -EPERM; else if ( allow ) ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1); else ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1); if ( !ret ) memory_type_changed(d); break; } case XEN_DOMCTL_memory_mapping: { unsigned long gfn = op->u.memory_mapping.first_gfn; unsigned long mfn = op->u.memory_mapping.first_mfn; unsigned long nr_mfns = op->u.memory_mapping.nr_mfns; unsigned long mfn_end = mfn + nr_mfns - 1; int add = op->u.memory_mapping.add_mapping; ret = -EINVAL; if ( mfn_end < mfn || /* wrap? */ ((mfn | mfn_end) >> (paddr_bits - PAGE_SHIFT)) || (gfn + nr_mfns - 1) < gfn ) /* wrap? */ break; #ifndef CONFIG_X86 /* XXX ARM!? */ ret = -E2BIG; /* Must break hypercall up as this could take a while. */ if ( nr_mfns > 64 ) break; #endif ret = -EPERM; if ( !iomem_access_permitted(current->domain, mfn, mfn_end) || !iomem_access_permitted(d, mfn, mfn_end) ) break; ret = xsm_iomem_mapping(XSM_HOOK, d, mfn, mfn_end, add); if ( ret ) break; if ( add ) { printk(XENLOG_G_DEBUG "memory_map:add: dom%d gfn=%lx mfn=%lx nr=%lx\n", d->domain_id, gfn, mfn, nr_mfns); ret = map_mmio_regions(d, _gfn(gfn), nr_mfns, _mfn(mfn)); if ( ret < 0 ) printk(XENLOG_G_WARNING "memory_map:fail: dom%d gfn=%lx mfn=%lx nr=%lx ret:%ld\n", d->domain_id, gfn, mfn, nr_mfns, ret); } else { printk(XENLOG_G_DEBUG "memory_map:remove: dom%d gfn=%lx mfn=%lx nr=%lx\n", d->domain_id, gfn, mfn, nr_mfns); ret = unmap_mmio_regions(d, _gfn(gfn), nr_mfns, _mfn(mfn)); if ( ret < 0 && is_hardware_domain(current->domain) ) printk(XENLOG_ERR "memory_map: error %ld removing dom%d access to [%lx,%lx]\n", ret, d->domain_id, mfn, mfn_end); } /* Do this unconditionally to cover errors on above failure paths. */ memory_type_changed(d); break; } case XEN_DOMCTL_settimeoffset: domain_set_time_offset(d, op->u.settimeoffset.time_offset_seconds); break; case XEN_DOMCTL_set_target: { struct domain *e; ret = -ESRCH; e = get_domain_by_id(op->u.set_target.target); if ( e == NULL ) break; ret = -EINVAL; if ( (d == e) || (d->target != NULL) ) { put_domain(e); break; } ret = -EOPNOTSUPP; if ( is_hvm_domain(e) ) ret = xsm_set_target(XSM_HOOK, d, e); if ( ret ) { put_domain(e); break; } /* Hold reference on @e until we destroy @d. */ d->target = e; break; } case XEN_DOMCTL_subscribe: d->suspend_evtchn = op->u.subscribe.port; break; case XEN_DOMCTL_vm_event_op: ret = vm_event_domctl(d, &op->u.vm_event_op, guest_handle_cast(u_domctl, void)); copyback = 1; break; #ifdef CONFIG_HAS_MEM_ACCESS case XEN_DOMCTL_set_access_required: if ( unlikely(current->domain == d) ) /* no domain_pause() */ ret = -EPERM; else { domain_pause(d); p2m_get_hostp2m(d)->access_required = op->u.access_required.access_required; domain_unpause(d); } break; #endif case XEN_DOMCTL_set_virq_handler: ret = set_global_virq_handler(d, op->u.set_virq_handler.virq); break; case XEN_DOMCTL_set_max_evtchn: d->max_evtchn_port = min_t(unsigned int, op->u.set_max_evtchn.max_port, INT_MAX); break; case XEN_DOMCTL_setvnumainfo: { struct vnuma_info *vnuma; vnuma = vnuma_init(&op->u.vnuma, d); if ( IS_ERR(vnuma) ) { ret = PTR_ERR(vnuma); break; } /* overwrite vnuma topology for domain. */ write_lock(&d->vnuma_rwlock); vnuma_destroy(d->vnuma); d->vnuma = vnuma; write_unlock(&d->vnuma_rwlock); break; } case XEN_DOMCTL_monitor_op: ret = monitor_domctl(d, &op->u.monitor_op); if ( !ret ) copyback = 1; break; case XEN_DOMCTL_set_gnttab_limits: ret = grant_table_set_limits(d, op->u.set_gnttab_limits.grant_frames, op->u.set_gnttab_limits.maptrack_frames); break; default: ret = arch_do_domctl(op, d, u_domctl); break; } domctl_lock_release(); domctl_out_unlock_domonly: if ( d ) rcu_unlock_domain(d); if ( copyback && __copy_to_guest(u_domctl, op, 1) ) ret = -EFAULT; return ret; } /* * Local variables: * mode: C * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */