1 /******************************************************************************
2 * domctl.c
3 *
4 * Domain management operations. For use by node control stack.
5 *
6 * Copyright (c) 2002-2006, K A Fraser
7 */
8
9 #include <xen/types.h>
10 #include <xen/lib.h>
11 #include <xen/err.h>
12 #include <xen/mm.h>
13 #include <xen/sched.h>
14 #include <xen/sched-if.h>
15 #include <xen/domain.h>
16 #include <xen/event.h>
17 #include <xen/grant_table.h>
18 #include <xen/domain_page.h>
19 #include <xen/trace.h>
20 #include <xen/console.h>
21 #include <xen/iocap.h>
22 #include <xen/rcupdate.h>
23 #include <xen/guest_access.h>
24 #include <xen/bitmap.h>
25 #include <xen/paging.h>
26 #include <xen/hypercall.h>
27 #include <xen/vm_event.h>
28 #include <xen/monitor.h>
29 #include <asm/current.h>
30 #include <asm/irq.h>
31 #include <asm/page.h>
32 #include <asm/p2m.h>
33 #include <public/domctl.h>
34 #include <xsm/xsm.h>
35
36 static DEFINE_SPINLOCK(domctl_lock);
37 DEFINE_SPINLOCK(vcpu_alloc_lock);
38
bitmap_to_xenctl_bitmap(struct xenctl_bitmap * xenctl_bitmap,const unsigned long * bitmap,unsigned int nbits)39 static int bitmap_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_bitmap,
40 const unsigned long *bitmap,
41 unsigned int nbits)
42 {
43 unsigned int guest_bytes, copy_bytes, i;
44 uint8_t zero = 0;
45 int err = 0;
46 uint8_t *bytemap = xmalloc_array(uint8_t, (nbits + 7) / 8);
47
48 if ( !bytemap )
49 return -ENOMEM;
50
51 guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
52 copy_bytes = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
53
54 bitmap_long_to_byte(bytemap, bitmap, nbits);
55
56 if ( copy_bytes != 0 )
57 if ( copy_to_guest(xenctl_bitmap->bitmap, bytemap, copy_bytes) )
58 err = -EFAULT;
59
60 for ( i = copy_bytes; !err && i < guest_bytes; i++ )
61 if ( copy_to_guest_offset(xenctl_bitmap->bitmap, i, &zero, 1) )
62 err = -EFAULT;
63
64 xfree(bytemap);
65
66 return err;
67 }
68
xenctl_bitmap_to_bitmap(unsigned long * bitmap,const struct xenctl_bitmap * xenctl_bitmap,unsigned int nbits)69 static int xenctl_bitmap_to_bitmap(unsigned long *bitmap,
70 const struct xenctl_bitmap *xenctl_bitmap,
71 unsigned int nbits)
72 {
73 unsigned int guest_bytes, copy_bytes;
74 int err = 0;
75 uint8_t *bytemap = xzalloc_array(uint8_t, (nbits + 7) / 8);
76
77 if ( !bytemap )
78 return -ENOMEM;
79
80 guest_bytes = (xenctl_bitmap->nr_bits + 7) / 8;
81 copy_bytes = min_t(unsigned int, guest_bytes, (nbits + 7) / 8);
82
83 if ( copy_bytes != 0 )
84 {
85 if ( copy_from_guest(bytemap, xenctl_bitmap->bitmap, copy_bytes) )
86 err = -EFAULT;
87 if ( (xenctl_bitmap->nr_bits & 7) && (guest_bytes == copy_bytes) )
88 bytemap[guest_bytes-1] &= ~(0xff << (xenctl_bitmap->nr_bits & 7));
89 }
90
91 if ( !err )
92 bitmap_byte_to_long(bitmap, bytemap, nbits);
93
94 xfree(bytemap);
95
96 return err;
97 }
98
cpumask_to_xenctl_bitmap(struct xenctl_bitmap * xenctl_cpumap,const cpumask_t * cpumask)99 int cpumask_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_cpumap,
100 const cpumask_t *cpumask)
101 {
102 return bitmap_to_xenctl_bitmap(xenctl_cpumap, cpumask_bits(cpumask),
103 nr_cpu_ids);
104 }
105
xenctl_bitmap_to_cpumask(cpumask_var_t * cpumask,const struct xenctl_bitmap * xenctl_cpumap)106 int xenctl_bitmap_to_cpumask(cpumask_var_t *cpumask,
107 const struct xenctl_bitmap *xenctl_cpumap)
108 {
109 int err = 0;
110
111 if ( alloc_cpumask_var(cpumask) ) {
112 err = xenctl_bitmap_to_bitmap(cpumask_bits(*cpumask), xenctl_cpumap,
113 nr_cpu_ids);
114 /* In case of error, cleanup is up to us, as the caller won't care! */
115 if ( err )
116 free_cpumask_var(*cpumask);
117 }
118 else
119 err = -ENOMEM;
120
121 return err;
122 }
123
nodemask_to_xenctl_bitmap(struct xenctl_bitmap * xenctl_nodemap,const nodemask_t * nodemask)124 static int nodemask_to_xenctl_bitmap(struct xenctl_bitmap *xenctl_nodemap,
125 const nodemask_t *nodemask)
126 {
127 return bitmap_to_xenctl_bitmap(xenctl_nodemap, nodes_addr(*nodemask),
128 MAX_NUMNODES);
129 }
130
xenctl_bitmap_to_nodemask(nodemask_t * nodemask,const struct xenctl_bitmap * xenctl_nodemap)131 static int xenctl_bitmap_to_nodemask(nodemask_t *nodemask,
132 const struct xenctl_bitmap *xenctl_nodemap)
133 {
134 return xenctl_bitmap_to_bitmap(nodes_addr(*nodemask), xenctl_nodemap,
135 MAX_NUMNODES);
136 }
137
is_free_domid(domid_t dom)138 static inline int is_free_domid(domid_t dom)
139 {
140 struct domain *d;
141
142 if ( dom >= DOMID_FIRST_RESERVED )
143 return 0;
144
145 if ( (d = rcu_lock_domain_by_id(dom)) == NULL )
146 return 1;
147
148 rcu_unlock_domain(d);
149 return 0;
150 }
151
getdomaininfo(struct domain * d,struct xen_domctl_getdomaininfo * info)152 void getdomaininfo(struct domain *d, struct xen_domctl_getdomaininfo *info)
153 {
154 struct vcpu *v;
155 u64 cpu_time = 0;
156 int flags = XEN_DOMINF_blocked;
157 struct vcpu_runstate_info runstate;
158
159 info->domain = d->domain_id;
160 info->max_vcpu_id = XEN_INVALID_MAX_VCPU_ID;
161 info->nr_online_vcpus = 0;
162 info->ssidref = 0;
163
164 /*
165 * - domain is marked as blocked only if all its vcpus are blocked
166 * - domain is marked as running if any of its vcpus is running
167 */
168 for_each_vcpu ( d, v )
169 {
170 vcpu_runstate_get(v, &runstate);
171 cpu_time += runstate.time[RUNSTATE_running];
172 info->max_vcpu_id = v->vcpu_id;
173 if ( !(v->pause_flags & VPF_down) )
174 {
175 if ( !(v->pause_flags & VPF_blocked) )
176 flags &= ~XEN_DOMINF_blocked;
177 if ( v->is_running )
178 flags |= XEN_DOMINF_running;
179 info->nr_online_vcpus++;
180 }
181 }
182
183 info->cpu_time = cpu_time;
184
185 info->flags = (info->nr_online_vcpus ? flags : 0) |
186 ((d->is_dying == DOMDYING_dead) ? XEN_DOMINF_dying : 0) |
187 (d->is_shut_down ? XEN_DOMINF_shutdown : 0) |
188 (d->controller_pause_count > 0 ? XEN_DOMINF_paused : 0) |
189 (d->debugger_attached ? XEN_DOMINF_debugged : 0) |
190 (d->is_xenstore ? XEN_DOMINF_xs_domain : 0) |
191 d->shutdown_code << XEN_DOMINF_shutdownshift;
192
193 switch ( d->guest_type )
194 {
195 case guest_type_hvm:
196 info->flags |= XEN_DOMINF_hvm_guest;
197 break;
198 default:
199 break;
200 }
201
202 xsm_security_domaininfo(d, info);
203
204 info->tot_pages = d->tot_pages;
205 info->max_pages = d->max_pages;
206 info->outstanding_pages = d->outstanding_pages;
207 info->shr_pages = atomic_read(&d->shr_pages);
208 info->paged_pages = atomic_read(&d->paged_pages);
209 info->shared_info_frame = mfn_to_gmfn(d, virt_to_mfn(d->shared_info));
210 BUG_ON(SHARED_M2P(info->shared_info_frame));
211
212 info->cpupool = d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE;
213
214 memcpy(info->handle, d->handle, sizeof(xen_domain_handle_t));
215
216 arch_get_domain_info(d, info);
217 }
218
domctl_lock_acquire(void)219 bool_t domctl_lock_acquire(void)
220 {
221 /*
222 * Caller may try to pause its own VCPUs. We must prevent deadlock
223 * against other non-domctl routines which try to do the same.
224 */
225 if ( !spin_trylock(¤t->domain->hypercall_deadlock_mutex) )
226 return 0;
227
228 /*
229 * Trylock here is paranoia if we have multiple privileged domains. Then
230 * we could have one domain trying to pause another which is spinning
231 * on domctl_lock -- results in deadlock.
232 */
233 if ( spin_trylock(&domctl_lock) )
234 return 1;
235
236 spin_unlock(¤t->domain->hypercall_deadlock_mutex);
237 return 0;
238 }
239
domctl_lock_release(void)240 void domctl_lock_release(void)
241 {
242 spin_unlock(&domctl_lock);
243 spin_unlock(¤t->domain->hypercall_deadlock_mutex);
244 }
245
246 static inline
vcpuaffinity_params_invalid(const struct xen_domctl_vcpuaffinity * vcpuaff)247 int vcpuaffinity_params_invalid(const struct xen_domctl_vcpuaffinity *vcpuaff)
248 {
249 return vcpuaff->flags == 0 ||
250 ((vcpuaff->flags & XEN_VCPUAFFINITY_HARD) &&
251 guest_handle_is_null(vcpuaff->cpumap_hard.bitmap)) ||
252 ((vcpuaff->flags & XEN_VCPUAFFINITY_SOFT) &&
253 guest_handle_is_null(vcpuaff->cpumap_soft.bitmap));
254 }
255
vnuma_destroy(struct vnuma_info * vnuma)256 void vnuma_destroy(struct vnuma_info *vnuma)
257 {
258 if ( vnuma )
259 {
260 xfree(vnuma->vmemrange);
261 xfree(vnuma->vcpu_to_vnode);
262 xfree(vnuma->vdistance);
263 xfree(vnuma->vnode_to_pnode);
264 xfree(vnuma);
265 }
266 }
267
268 /*
269 * Allocates memory for vNUMA, **vnuma should be NULL.
270 * Caller has to make sure that domain has max_pages
271 * and number of vcpus set for domain.
272 * Verifies that single allocation does not exceed
273 * PAGE_SIZE.
274 */
vnuma_alloc(unsigned int nr_vnodes,unsigned int nr_ranges,unsigned int nr_vcpus)275 static struct vnuma_info *vnuma_alloc(unsigned int nr_vnodes,
276 unsigned int nr_ranges,
277 unsigned int nr_vcpus)
278 {
279
280 struct vnuma_info *vnuma;
281
282 /*
283 * Check if any of the allocations are bigger than PAGE_SIZE.
284 * See XSA-77.
285 */
286 if ( nr_vnodes * nr_vnodes > (PAGE_SIZE / sizeof(*vnuma->vdistance)) ||
287 nr_ranges > (PAGE_SIZE / sizeof(*vnuma->vmemrange)) )
288 return ERR_PTR(-EINVAL);
289
290 /*
291 * If allocations become larger then PAGE_SIZE, these allocations
292 * should be split into PAGE_SIZE allocations due to XSA-77.
293 */
294 vnuma = xmalloc(struct vnuma_info);
295 if ( !vnuma )
296 return ERR_PTR(-ENOMEM);
297
298 vnuma->vdistance = xmalloc_array(unsigned int, nr_vnodes * nr_vnodes);
299 vnuma->vcpu_to_vnode = xmalloc_array(unsigned int, nr_vcpus);
300 vnuma->vnode_to_pnode = xmalloc_array(nodeid_t, nr_vnodes);
301 vnuma->vmemrange = xmalloc_array(xen_vmemrange_t, nr_ranges);
302
303 if ( vnuma->vdistance == NULL || vnuma->vmemrange == NULL ||
304 vnuma->vcpu_to_vnode == NULL || vnuma->vnode_to_pnode == NULL )
305 {
306 vnuma_destroy(vnuma);
307 return ERR_PTR(-ENOMEM);
308 }
309
310 return vnuma;
311 }
312
313 /*
314 * Construct vNUMA topology form uinfo.
315 */
vnuma_init(const struct xen_domctl_vnuma * uinfo,const struct domain * d)316 static struct vnuma_info *vnuma_init(const struct xen_domctl_vnuma *uinfo,
317 const struct domain *d)
318 {
319 unsigned int i, nr_vnodes;
320 int ret = -EINVAL;
321 struct vnuma_info *info;
322
323 nr_vnodes = uinfo->nr_vnodes;
324
325 if ( nr_vnodes == 0 || uinfo->nr_vcpus != d->max_vcpus || uinfo->pad != 0 )
326 return ERR_PTR(ret);
327
328 info = vnuma_alloc(nr_vnodes, uinfo->nr_vmemranges, d->max_vcpus);
329 if ( IS_ERR(info) )
330 return info;
331
332 ret = -EFAULT;
333
334 if ( copy_from_guest(info->vdistance, uinfo->vdistance,
335 nr_vnodes * nr_vnodes) )
336 goto vnuma_fail;
337
338 if ( copy_from_guest(info->vmemrange, uinfo->vmemrange,
339 uinfo->nr_vmemranges) )
340 goto vnuma_fail;
341
342 if ( copy_from_guest(info->vcpu_to_vnode, uinfo->vcpu_to_vnode,
343 d->max_vcpus) )
344 goto vnuma_fail;
345
346 ret = -E2BIG;
347 for ( i = 0; i < d->max_vcpus; ++i )
348 if ( info->vcpu_to_vnode[i] >= nr_vnodes )
349 goto vnuma_fail;
350
351 for ( i = 0; i < nr_vnodes; ++i )
352 {
353 unsigned int pnode;
354
355 ret = -EFAULT;
356 if ( copy_from_guest_offset(&pnode, uinfo->vnode_to_pnode, i, 1) )
357 goto vnuma_fail;
358 ret = -E2BIG;
359 if ( pnode >= MAX_NUMNODES )
360 goto vnuma_fail;
361 info->vnode_to_pnode[i] = pnode;
362 }
363
364 info->nr_vnodes = nr_vnodes;
365 info->nr_vmemranges = uinfo->nr_vmemranges;
366
367 /* Check that vmemranges flags are zero. */
368 ret = -EINVAL;
369 for ( i = 0; i < info->nr_vmemranges; i++ )
370 if ( info->vmemrange[i].flags != 0 )
371 goto vnuma_fail;
372
373 return info;
374
375 vnuma_fail:
376 vnuma_destroy(info);
377 return ERR_PTR(ret);
378 }
379
do_domctl(XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl)380 long do_domctl(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
381 {
382 long ret = 0;
383 bool_t copyback = 0;
384 struct xen_domctl curop, *op = &curop;
385 struct domain *d;
386
387 if ( copy_from_guest(op, u_domctl, 1) )
388 return -EFAULT;
389
390 if ( op->interface_version != XEN_DOMCTL_INTERFACE_VERSION )
391 return -EACCES;
392
393 switch ( op->cmd )
394 {
395 case XEN_DOMCTL_test_assign_device:
396 if ( op->domain == DOMID_INVALID )
397 {
398 case XEN_DOMCTL_createdomain:
399 case XEN_DOMCTL_gdbsx_guestmemio:
400 d = NULL;
401 break;
402 }
403 /* fall through */
404 default:
405 d = rcu_lock_domain_by_id(op->domain);
406 if ( !d && op->cmd != XEN_DOMCTL_getdomaininfo )
407 return -ESRCH;
408 }
409
410 ret = xsm_domctl(XSM_OTHER, d, op->cmd);
411 if ( ret )
412 goto domctl_out_unlock_domonly;
413
414 if ( !domctl_lock_acquire() )
415 {
416 if ( d )
417 rcu_unlock_domain(d);
418 return hypercall_create_continuation(
419 __HYPERVISOR_domctl, "h", u_domctl);
420 }
421
422 switch ( op->cmd )
423 {
424
425 case XEN_DOMCTL_setvcpucontext:
426 {
427 vcpu_guest_context_u c = { .nat = NULL };
428 unsigned int vcpu = op->u.vcpucontext.vcpu;
429 struct vcpu *v;
430
431 ret = -EINVAL;
432 if ( (d == current->domain) || /* no domain_pause() */
433 (vcpu >= d->max_vcpus) || ((v = d->vcpu[vcpu]) == NULL) )
434 break;
435
436 if ( guest_handle_is_null(op->u.vcpucontext.ctxt) )
437 {
438 ret = vcpu_reset(v);
439 if ( ret == -ERESTART )
440 ret = hypercall_create_continuation(
441 __HYPERVISOR_domctl, "h", u_domctl);
442 break;
443 }
444
445 #ifdef CONFIG_COMPAT
446 BUILD_BUG_ON(sizeof(struct vcpu_guest_context)
447 < sizeof(struct compat_vcpu_guest_context));
448 #endif
449 ret = -ENOMEM;
450 if ( (c.nat = alloc_vcpu_guest_context()) == NULL )
451 break;
452
453 #ifdef CONFIG_COMPAT
454 if ( !is_pv_32bit_domain(d) )
455 ret = copy_from_guest(c.nat, op->u.vcpucontext.ctxt, 1);
456 else
457 ret = copy_from_guest(c.cmp,
458 guest_handle_cast(op->u.vcpucontext.ctxt,
459 void), 1);
460 #else
461 ret = copy_from_guest(c.nat, op->u.vcpucontext.ctxt, 1);
462 #endif
463 ret = ret ? -EFAULT : 0;
464
465 if ( ret == 0 )
466 {
467 domain_pause(d);
468 ret = arch_set_info_guest(v, c);
469 domain_unpause(d);
470
471 if ( ret == -ERESTART )
472 ret = hypercall_create_continuation(
473 __HYPERVISOR_domctl, "h", u_domctl);
474 }
475
476 free_vcpu_guest_context(c.nat);
477 break;
478 }
479
480 case XEN_DOMCTL_pausedomain:
481 ret = -EINVAL;
482 if ( d != current->domain )
483 ret = domain_pause_by_systemcontroller(d);
484 break;
485
486 case XEN_DOMCTL_unpausedomain:
487 ret = domain_unpause_by_systemcontroller(d);
488 break;
489
490 case XEN_DOMCTL_resumedomain:
491 if ( d == current->domain ) /* no domain_pause() */
492 ret = -EINVAL;
493 else
494 domain_resume(d);
495 break;
496
497 case XEN_DOMCTL_createdomain:
498 {
499 domid_t dom;
500 static domid_t rover = 0;
501 unsigned int domcr_flags;
502
503 ret = -EINVAL;
504 if ( (op->u.createdomain.flags &
505 ~(XEN_DOMCTL_CDF_hvm_guest
506 | XEN_DOMCTL_CDF_hap
507 | XEN_DOMCTL_CDF_s3_integrity
508 | XEN_DOMCTL_CDF_oos_off
509 | XEN_DOMCTL_CDF_xs_domain)) )
510 break;
511
512 dom = op->domain;
513 if ( (dom > 0) && (dom < DOMID_FIRST_RESERVED) )
514 {
515 ret = -EINVAL;
516 if ( !is_free_domid(dom) )
517 break;
518 }
519 else
520 {
521 for ( dom = rover + 1; dom != rover; dom++ )
522 {
523 if ( dom == DOMID_FIRST_RESERVED )
524 dom = 1;
525 if ( is_free_domid(dom) )
526 break;
527 }
528
529 ret = -ENOMEM;
530 if ( dom == rover )
531 break;
532
533 rover = dom;
534 }
535
536 domcr_flags = 0;
537 if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hvm_guest )
538 domcr_flags |= DOMCRF_hvm;
539 if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hap )
540 domcr_flags |= DOMCRF_hap;
541 if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_s3_integrity )
542 domcr_flags |= DOMCRF_s3_integrity;
543 if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_oos_off )
544 domcr_flags |= DOMCRF_oos_off;
545 if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_xs_domain )
546 domcr_flags |= DOMCRF_xs_domain;
547
548 d = domain_create(dom, domcr_flags, op->u.createdomain.ssidref,
549 &op->u.createdomain.config);
550 if ( IS_ERR(d) )
551 {
552 ret = PTR_ERR(d);
553 d = NULL;
554 break;
555 }
556
557 ret = 0;
558
559 memcpy(d->handle, op->u.createdomain.handle,
560 sizeof(xen_domain_handle_t));
561
562 op->domain = d->domain_id;
563 copyback = 1;
564 d = NULL;
565 break;
566 }
567
568 case XEN_DOMCTL_max_vcpus:
569 {
570 unsigned int i, max = op->u.max_vcpus.max, cpu;
571 cpumask_t *online;
572
573 ret = -EINVAL;
574 if ( (d == current->domain) || /* no domain_pause() */
575 (max > domain_max_vcpus(d)) )
576 break;
577
578 /* Until Xenoprof can dynamically grow its vcpu-s array... */
579 if ( d->xenoprof )
580 {
581 ret = -EAGAIN;
582 break;
583 }
584
585 /* Needed, for example, to ensure writable p.t. state is synced. */
586 domain_pause(d);
587
588 /*
589 * Certain operations (e.g. CPU microcode updates) modify data which is
590 * used during VCPU allocation/initialization
591 */
592 while ( !spin_trylock(&vcpu_alloc_lock) )
593 {
594 if ( hypercall_preempt_check() )
595 {
596 ret = hypercall_create_continuation(
597 __HYPERVISOR_domctl, "h", u_domctl);
598 goto maxvcpu_out_novcpulock;
599 }
600 }
601
602 /* We cannot reduce maximum VCPUs. */
603 ret = -EINVAL;
604 if ( (max < d->max_vcpus) && (d->vcpu[max] != NULL) )
605 goto maxvcpu_out;
606
607 /*
608 * For now don't allow increasing the vcpu count from a non-zero
609 * value: This code and all readers of d->vcpu would otherwise need
610 * to be converted to use RCU, but at present there's no tools side
611 * code path that would issue such a request.
612 */
613 ret = -EBUSY;
614 if ( (d->max_vcpus > 0) && (max > d->max_vcpus) )
615 goto maxvcpu_out;
616
617 ret = -ENOMEM;
618 online = cpupool_domain_cpumask(d);
619 if ( max > d->max_vcpus )
620 {
621 struct vcpu **vcpus;
622
623 BUG_ON(d->vcpu != NULL);
624 BUG_ON(d->max_vcpus != 0);
625
626 if ( (vcpus = xzalloc_array(struct vcpu *, max)) == NULL )
627 goto maxvcpu_out;
628
629 /* Install vcpu array /then/ update max_vcpus. */
630 d->vcpu = vcpus;
631 smp_wmb();
632 d->max_vcpus = max;
633 }
634
635 for ( i = 0; i < max; i++ )
636 {
637 if ( d->vcpu[i] != NULL )
638 continue;
639
640 cpu = (i == 0) ?
641 cpumask_any(online) :
642 cpumask_cycle(d->vcpu[i-1]->processor, online);
643
644 if ( alloc_vcpu(d, i, cpu) == NULL )
645 goto maxvcpu_out;
646 }
647
648 ret = 0;
649
650 maxvcpu_out:
651 spin_unlock(&vcpu_alloc_lock);
652
653 maxvcpu_out_novcpulock:
654 domain_unpause(d);
655 break;
656 }
657
658 case XEN_DOMCTL_soft_reset:
659 if ( d == current->domain ) /* no domain_pause() */
660 {
661 ret = -EINVAL;
662 break;
663 }
664 ret = domain_soft_reset(d);
665 break;
666
667 case XEN_DOMCTL_destroydomain:
668 ret = domain_kill(d);
669 if ( ret == -ERESTART )
670 ret = hypercall_create_continuation(
671 __HYPERVISOR_domctl, "h", u_domctl);
672 break;
673
674 case XEN_DOMCTL_setnodeaffinity:
675 {
676 nodemask_t new_affinity;
677
678 ret = xenctl_bitmap_to_nodemask(&new_affinity,
679 &op->u.nodeaffinity.nodemap);
680 if ( !ret )
681 ret = domain_set_node_affinity(d, &new_affinity);
682 break;
683 }
684
685 case XEN_DOMCTL_getnodeaffinity:
686 ret = nodemask_to_xenctl_bitmap(&op->u.nodeaffinity.nodemap,
687 &d->node_affinity);
688 break;
689
690 case XEN_DOMCTL_setvcpuaffinity:
691 case XEN_DOMCTL_getvcpuaffinity:
692 {
693 struct vcpu *v;
694 struct xen_domctl_vcpuaffinity *vcpuaff = &op->u.vcpuaffinity;
695
696 ret = -EINVAL;
697 if ( vcpuaff->vcpu >= d->max_vcpus )
698 break;
699
700 ret = -ESRCH;
701 if ( (v = d->vcpu[vcpuaff->vcpu]) == NULL )
702 break;
703
704 ret = -EINVAL;
705 if ( vcpuaffinity_params_invalid(vcpuaff) )
706 break;
707
708 if ( op->cmd == XEN_DOMCTL_setvcpuaffinity )
709 {
710 cpumask_var_t new_affinity, old_affinity;
711 cpumask_t *online = cpupool_domain_cpumask(v->domain);
712
713 /*
714 * We want to be able to restore hard affinity if we are trying
715 * setting both and changing soft affinity (which happens later,
716 * when hard affinity has been succesfully chaged already) fails.
717 */
718 if ( !alloc_cpumask_var(&old_affinity) )
719 {
720 ret = -ENOMEM;
721 break;
722 }
723 cpumask_copy(old_affinity, v->cpu_hard_affinity);
724
725 if ( !alloc_cpumask_var(&new_affinity) )
726 {
727 free_cpumask_var(old_affinity);
728 ret = -ENOMEM;
729 break;
730 }
731
732 /* Undo a stuck SCHED_pin_override? */
733 if ( vcpuaff->flags & XEN_VCPUAFFINITY_FORCE )
734 vcpu_pin_override(v, -1);
735
736 ret = 0;
737
738 /*
739 * We both set a new affinity and report back to the caller what
740 * the scheduler will be effectively using.
741 */
742 if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD )
743 {
744 ret = xenctl_bitmap_to_bitmap(cpumask_bits(new_affinity),
745 &vcpuaff->cpumap_hard,
746 nr_cpu_ids);
747 if ( !ret )
748 ret = vcpu_set_hard_affinity(v, new_affinity);
749 if ( ret )
750 goto setvcpuaffinity_out;
751
752 /*
753 * For hard affinity, what we return is the intersection of
754 * cpupool's online mask and the new hard affinity.
755 */
756 cpumask_and(new_affinity, online, v->cpu_hard_affinity);
757 ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_hard,
758 new_affinity);
759 }
760 if ( vcpuaff->flags & XEN_VCPUAFFINITY_SOFT )
761 {
762 ret = xenctl_bitmap_to_bitmap(cpumask_bits(new_affinity),
763 &vcpuaff->cpumap_soft,
764 nr_cpu_ids);
765 if ( !ret)
766 ret = vcpu_set_soft_affinity(v, new_affinity);
767 if ( ret )
768 {
769 /*
770 * Since we're returning error, the caller expects nothing
771 * happened, so we rollback the changes to hard affinity
772 * (if any).
773 */
774 if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD )
775 vcpu_set_hard_affinity(v, old_affinity);
776 goto setvcpuaffinity_out;
777 }
778
779 /*
780 * For soft affinity, we return the intersection between the
781 * new soft affinity, the cpupool's online map and the (new)
782 * hard affinity.
783 */
784 cpumask_and(new_affinity, new_affinity, online);
785 cpumask_and(new_affinity, new_affinity, v->cpu_hard_affinity);
786 ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_soft,
787 new_affinity);
788 }
789
790 setvcpuaffinity_out:
791 free_cpumask_var(new_affinity);
792 free_cpumask_var(old_affinity);
793 }
794 else
795 {
796 if ( vcpuaff->flags & XEN_VCPUAFFINITY_HARD )
797 ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_hard,
798 v->cpu_hard_affinity);
799 if ( vcpuaff->flags & XEN_VCPUAFFINITY_SOFT )
800 ret = cpumask_to_xenctl_bitmap(&vcpuaff->cpumap_soft,
801 v->cpu_soft_affinity);
802 }
803 break;
804 }
805
806 case XEN_DOMCTL_scheduler_op:
807 ret = sched_adjust(d, &op->u.scheduler_op);
808 copyback = 1;
809 break;
810
811 case XEN_DOMCTL_getdomaininfo:
812 {
813 domid_t dom = DOMID_INVALID;
814
815 if ( !d )
816 {
817 ret = -EINVAL;
818 if ( op->domain >= DOMID_FIRST_RESERVED )
819 break;
820
821 rcu_read_lock(&domlist_read_lock);
822
823 dom = op->domain;
824 for_each_domain ( d )
825 if ( d->domain_id >= dom )
826 break;
827 }
828
829 ret = -ESRCH;
830 if ( d == NULL )
831 goto getdomaininfo_out;
832
833 ret = xsm_getdomaininfo(XSM_HOOK, d);
834 if ( ret )
835 goto getdomaininfo_out;
836
837 getdomaininfo(d, &op->u.getdomaininfo);
838
839 op->domain = op->u.getdomaininfo.domain;
840 copyback = 1;
841
842 getdomaininfo_out:
843 /* When d was non-NULL upon entry, no cleanup is needed. */
844 if ( dom == DOMID_INVALID )
845 break;
846
847 rcu_read_unlock(&domlist_read_lock);
848 d = NULL;
849 break;
850 }
851
852 case XEN_DOMCTL_getvcpucontext:
853 {
854 vcpu_guest_context_u c = { .nat = NULL };
855 struct vcpu *v;
856
857 ret = -EINVAL;
858 if ( op->u.vcpucontext.vcpu >= d->max_vcpus ||
859 (v = d->vcpu[op->u.vcpucontext.vcpu]) == NULL ||
860 v == current ) /* no vcpu_pause() */
861 goto getvcpucontext_out;
862
863 ret = -ENODATA;
864 if ( !v->is_initialised )
865 goto getvcpucontext_out;
866
867 #ifdef CONFIG_COMPAT
868 BUILD_BUG_ON(sizeof(struct vcpu_guest_context)
869 < sizeof(struct compat_vcpu_guest_context));
870 #endif
871 ret = -ENOMEM;
872 if ( (c.nat = xzalloc(struct vcpu_guest_context)) == NULL )
873 goto getvcpucontext_out;
874
875 vcpu_pause(v);
876
877 arch_get_info_guest(v, c);
878 ret = 0;
879
880 vcpu_unpause(v);
881
882 #ifdef CONFIG_COMPAT
883 if ( !is_pv_32bit_domain(d) )
884 ret = copy_to_guest(op->u.vcpucontext.ctxt, c.nat, 1);
885 else
886 ret = copy_to_guest(guest_handle_cast(op->u.vcpucontext.ctxt,
887 void), c.cmp, 1);
888 #else
889 ret = copy_to_guest(op->u.vcpucontext.ctxt, c.nat, 1);
890 #endif
891
892 if ( ret )
893 ret = -EFAULT;
894 copyback = 1;
895
896 getvcpucontext_out:
897 xfree(c.nat);
898 break;
899 }
900
901 case XEN_DOMCTL_getvcpuinfo:
902 {
903 struct vcpu *v;
904 struct vcpu_runstate_info runstate;
905
906 ret = -EINVAL;
907 if ( op->u.getvcpuinfo.vcpu >= d->max_vcpus )
908 break;
909
910 ret = -ESRCH;
911 if ( (v = d->vcpu[op->u.getvcpuinfo.vcpu]) == NULL )
912 break;
913
914 vcpu_runstate_get(v, &runstate);
915
916 op->u.getvcpuinfo.online = !(v->pause_flags & VPF_down);
917 op->u.getvcpuinfo.blocked = !!(v->pause_flags & VPF_blocked);
918 op->u.getvcpuinfo.running = v->is_running;
919 op->u.getvcpuinfo.cpu_time = runstate.time[RUNSTATE_running];
920 op->u.getvcpuinfo.cpu = v->processor;
921 ret = 0;
922 copyback = 1;
923 break;
924 }
925
926 case XEN_DOMCTL_max_mem:
927 {
928 uint64_t new_max = op->u.max_mem.max_memkb >> (PAGE_SHIFT - 10);
929
930 spin_lock(&d->page_alloc_lock);
931 /*
932 * NB. We removed a check that new_max >= current tot_pages; this means
933 * that the domain will now be allowed to "ratchet" down to new_max. In
934 * the meantime, while tot > max, all new allocations are disallowed.
935 */
936 d->max_pages = min(new_max, (uint64_t)(typeof(d->max_pages))-1);
937 spin_unlock(&d->page_alloc_lock);
938 break;
939 }
940
941 case XEN_DOMCTL_setdomainhandle:
942 memcpy(d->handle, op->u.setdomainhandle.handle,
943 sizeof(xen_domain_handle_t));
944 break;
945
946 case XEN_DOMCTL_setdebugging:
947 if ( unlikely(d == current->domain) ) /* no domain_pause() */
948 ret = -EINVAL;
949 else
950 {
951 domain_pause(d);
952 d->debugger_attached = !!op->u.setdebugging.enable;
953 domain_unpause(d); /* causes guest to latch new status */
954 }
955 break;
956
957 case XEN_DOMCTL_irq_permission:
958 {
959 unsigned int pirq = op->u.irq_permission.pirq, irq;
960 int allow = op->u.irq_permission.allow_access;
961
962 if ( pirq >= current->domain->nr_pirqs )
963 {
964 ret = -EINVAL;
965 break;
966 }
967 irq = pirq_access_permitted(current->domain, pirq);
968 if ( !irq || xsm_irq_permission(XSM_HOOK, d, irq, allow) )
969 ret = -EPERM;
970 else if ( allow )
971 ret = irq_permit_access(d, irq);
972 else
973 ret = irq_deny_access(d, irq);
974 break;
975 }
976
977 case XEN_DOMCTL_iomem_permission:
978 {
979 unsigned long mfn = op->u.iomem_permission.first_mfn;
980 unsigned long nr_mfns = op->u.iomem_permission.nr_mfns;
981 int allow = op->u.iomem_permission.allow_access;
982
983 ret = -EINVAL;
984 if ( (mfn + nr_mfns - 1) < mfn ) /* wrap? */
985 break;
986
987 if ( !iomem_access_permitted(current->domain,
988 mfn, mfn + nr_mfns - 1) ||
989 xsm_iomem_permission(XSM_HOOK, d, mfn, mfn + nr_mfns - 1, allow) )
990 ret = -EPERM;
991 else if ( allow )
992 ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
993 else
994 ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
995 if ( !ret )
996 memory_type_changed(d);
997 break;
998 }
999
1000 case XEN_DOMCTL_memory_mapping:
1001 {
1002 unsigned long gfn = op->u.memory_mapping.first_gfn;
1003 unsigned long mfn = op->u.memory_mapping.first_mfn;
1004 unsigned long nr_mfns = op->u.memory_mapping.nr_mfns;
1005 unsigned long mfn_end = mfn + nr_mfns - 1;
1006 int add = op->u.memory_mapping.add_mapping;
1007
1008 ret = -EINVAL;
1009 if ( mfn_end < mfn || /* wrap? */
1010 ((mfn | mfn_end) >> (paddr_bits - PAGE_SHIFT)) ||
1011 (gfn + nr_mfns - 1) < gfn ) /* wrap? */
1012 break;
1013
1014 #ifndef CONFIG_X86 /* XXX ARM!? */
1015 ret = -E2BIG;
1016 /* Must break hypercall up as this could take a while. */
1017 if ( nr_mfns > 64 )
1018 break;
1019 #endif
1020
1021 ret = -EPERM;
1022 if ( !iomem_access_permitted(current->domain, mfn, mfn_end) ||
1023 !iomem_access_permitted(d, mfn, mfn_end) )
1024 break;
1025
1026 ret = xsm_iomem_mapping(XSM_HOOK, d, mfn, mfn_end, add);
1027 if ( ret )
1028 break;
1029
1030 if ( add )
1031 {
1032 printk(XENLOG_G_DEBUG
1033 "memory_map:add: dom%d gfn=%lx mfn=%lx nr=%lx\n",
1034 d->domain_id, gfn, mfn, nr_mfns);
1035
1036 ret = map_mmio_regions(d, _gfn(gfn), nr_mfns, _mfn(mfn));
1037 if ( ret < 0 )
1038 printk(XENLOG_G_WARNING
1039 "memory_map:fail: dom%d gfn=%lx mfn=%lx nr=%lx ret:%ld\n",
1040 d->domain_id, gfn, mfn, nr_mfns, ret);
1041 }
1042 else
1043 {
1044 printk(XENLOG_G_DEBUG
1045 "memory_map:remove: dom%d gfn=%lx mfn=%lx nr=%lx\n",
1046 d->domain_id, gfn, mfn, nr_mfns);
1047
1048 ret = unmap_mmio_regions(d, _gfn(gfn), nr_mfns, _mfn(mfn));
1049 if ( ret < 0 && is_hardware_domain(current->domain) )
1050 printk(XENLOG_ERR
1051 "memory_map: error %ld removing dom%d access to [%lx,%lx]\n",
1052 ret, d->domain_id, mfn, mfn_end);
1053 }
1054 /* Do this unconditionally to cover errors on above failure paths. */
1055 memory_type_changed(d);
1056 break;
1057 }
1058
1059 case XEN_DOMCTL_settimeoffset:
1060 domain_set_time_offset(d, op->u.settimeoffset.time_offset_seconds);
1061 break;
1062
1063 case XEN_DOMCTL_set_target:
1064 {
1065 struct domain *e;
1066
1067 ret = -ESRCH;
1068 e = get_domain_by_id(op->u.set_target.target);
1069 if ( e == NULL )
1070 break;
1071
1072 ret = -EINVAL;
1073 if ( (d == e) || (d->target != NULL) )
1074 {
1075 put_domain(e);
1076 break;
1077 }
1078
1079 ret = -EOPNOTSUPP;
1080 if ( is_hvm_domain(e) )
1081 ret = xsm_set_target(XSM_HOOK, d, e);
1082 if ( ret )
1083 {
1084 put_domain(e);
1085 break;
1086 }
1087
1088 /* Hold reference on @e until we destroy @d. */
1089 d->target = e;
1090 break;
1091 }
1092
1093 case XEN_DOMCTL_subscribe:
1094 d->suspend_evtchn = op->u.subscribe.port;
1095 break;
1096
1097 case XEN_DOMCTL_vm_event_op:
1098 ret = vm_event_domctl(d, &op->u.vm_event_op,
1099 guest_handle_cast(u_domctl, void));
1100 copyback = 1;
1101 break;
1102
1103 #ifdef CONFIG_HAS_MEM_ACCESS
1104 case XEN_DOMCTL_set_access_required:
1105 if ( unlikely(current->domain == d) ) /* no domain_pause() */
1106 ret = -EPERM;
1107 else
1108 {
1109 domain_pause(d);
1110 p2m_get_hostp2m(d)->access_required =
1111 op->u.access_required.access_required;
1112 domain_unpause(d);
1113 }
1114 break;
1115 #endif
1116
1117 case XEN_DOMCTL_set_virq_handler:
1118 ret = set_global_virq_handler(d, op->u.set_virq_handler.virq);
1119 break;
1120
1121 case XEN_DOMCTL_set_max_evtchn:
1122 d->max_evtchn_port = min_t(unsigned int,
1123 op->u.set_max_evtchn.max_port,
1124 INT_MAX);
1125 break;
1126
1127 case XEN_DOMCTL_setvnumainfo:
1128 {
1129 struct vnuma_info *vnuma;
1130
1131 vnuma = vnuma_init(&op->u.vnuma, d);
1132 if ( IS_ERR(vnuma) )
1133 {
1134 ret = PTR_ERR(vnuma);
1135 break;
1136 }
1137
1138 /* overwrite vnuma topology for domain. */
1139 write_lock(&d->vnuma_rwlock);
1140 vnuma_destroy(d->vnuma);
1141 d->vnuma = vnuma;
1142 write_unlock(&d->vnuma_rwlock);
1143
1144 break;
1145 }
1146
1147 case XEN_DOMCTL_monitor_op:
1148 ret = monitor_domctl(d, &op->u.monitor_op);
1149 if ( !ret )
1150 copyback = 1;
1151 break;
1152
1153 case XEN_DOMCTL_set_gnttab_limits:
1154 ret = grant_table_set_limits(d, op->u.set_gnttab_limits.grant_frames,
1155 op->u.set_gnttab_limits.maptrack_frames);
1156 break;
1157
1158 default:
1159 ret = arch_do_domctl(op, d, u_domctl);
1160 break;
1161 }
1162
1163 domctl_lock_release();
1164
1165 domctl_out_unlock_domonly:
1166 if ( d )
1167 rcu_unlock_domain(d);
1168
1169 if ( copyback && __copy_to_guest(u_domctl, op, 1) )
1170 ret = -EFAULT;
1171
1172 return ret;
1173 }
1174
1175 /*
1176 * Local variables:
1177 * mode: C
1178 * c-file-style: "BSD"
1179 * c-basic-offset: 4
1180 * tab-width: 4
1181 * indent-tabs-mode: nil
1182 * End:
1183 */
1184