1 /******************************************************************************
2  * domain.c
3  *
4  * Generic domain-handling functions.
5  */
6 
7 #include <xen/compat.h>
8 #include <xen/init.h>
9 #include <xen/lib.h>
10 #include <xen/ctype.h>
11 #include <xen/err.h>
12 #include <xen/param.h>
13 #include <xen/sched.h>
14 #include <xen/domain.h>
15 #include <xen/mm.h>
16 #include <xen/event.h>
17 #include <xen/vm_event.h>
18 #include <xen/time.h>
19 #include <xen/console.h>
20 #include <xen/softirq.h>
21 #include <xen/tasklet.h>
22 #include <xen/domain_page.h>
23 #include <xen/rangeset.h>
24 #include <xen/guest_access.h>
25 #include <xen/hypercall.h>
26 #include <xen/delay.h>
27 #include <xen/shutdown.h>
28 #include <xen/percpu.h>
29 #include <xen/multicall.h>
30 #include <xen/rcupdate.h>
31 #include <xen/wait.h>
32 #include <xen/grant_table.h>
33 #include <xen/xenoprof.h>
34 #include <xen/irq.h>
35 #include <xen/argo.h>
36 #include <asm/p2m.h>
37 #include <asm/processor.h>
38 #include <public/sched.h>
39 #include <public/sysctl.h>
40 #include <public/vcpu.h>
41 #include <xsm/xsm.h>
42 #include <xen/trace.h>
43 #include <asm/setup.h>
44 
45 #ifdef CONFIG_X86
46 #include <asm/guest.h>
47 #endif
48 
49 /* Linux config option: propageted to domain0 */
50 /* xen_processor_pmbits: xen control Cx, Px, ... */
51 unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX;
52 
53 /* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */
54 bool opt_dom0_vcpus_pin;
55 boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin);
56 
57 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
58 DEFINE_SPINLOCK(domlist_update_lock);
59 DEFINE_RCU_READ_LOCK(domlist_read_lock);
60 
61 #define DOMAIN_HASH_SIZE 256
62 #define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
63 static struct domain *domain_hash[DOMAIN_HASH_SIZE];
64 struct domain *domain_list;
65 
66 struct domain *hardware_domain __read_mostly;
67 
68 #ifdef CONFIG_LATE_HWDOM
69 domid_t hardware_domid __read_mostly;
70 integer_param("hardware_dom", hardware_domid);
71 #endif
72 
73 /* Private domain structs for DOMID_XEN, DOMID_IO, etc. */
74 struct domain *__read_mostly dom_xen;
75 struct domain *__read_mostly dom_io;
76 #ifdef CONFIG_MEM_SHARING
77 struct domain *__read_mostly dom_cow;
78 #endif
79 
80 struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
81 
82 vcpu_info_t dummy_vcpu_info;
83 
84 bool __read_mostly vmtrace_available;
85 
86 bool __read_mostly vpmu_is_available;
87 
__domain_finalise_shutdown(struct domain * d)88 static void __domain_finalise_shutdown(struct domain *d)
89 {
90     struct vcpu *v;
91 
92     BUG_ON(!spin_is_locked(&d->shutdown_lock));
93 
94     if ( d->is_shut_down )
95         return;
96 
97     for_each_vcpu ( d, v )
98         if ( !v->paused_for_shutdown )
99             return;
100 
101     d->is_shut_down = 1;
102     if ( (d->shutdown_code == SHUTDOWN_suspend) && d->suspend_evtchn )
103         evtchn_send(d, d->suspend_evtchn);
104     else
105         send_global_virq(VIRQ_DOM_EXC);
106 }
107 
vcpu_check_shutdown(struct vcpu * v)108 static void vcpu_check_shutdown(struct vcpu *v)
109 {
110     struct domain *d = v->domain;
111 
112     spin_lock(&d->shutdown_lock);
113 
114     if ( d->is_shutting_down )
115     {
116         if ( !v->paused_for_shutdown )
117             vcpu_pause_nosync(v);
118         v->paused_for_shutdown = 1;
119         v->defer_shutdown = 0;
120         __domain_finalise_shutdown(d);
121     }
122 
123     spin_unlock(&d->shutdown_lock);
124 }
125 
vcpu_info_reset(struct vcpu * v)126 static void vcpu_info_reset(struct vcpu *v)
127 {
128     struct domain *d = v->domain;
129 
130     v->vcpu_info_area.map =
131         ((v->vcpu_id < XEN_LEGACY_MAX_VCPUS)
132          ? (vcpu_info_t *)&shared_info(d, vcpu_info[v->vcpu_id])
133          : &dummy_vcpu_info);
134 }
135 
vmtrace_free_buffer(struct vcpu * v)136 static void vmtrace_free_buffer(struct vcpu *v)
137 {
138     const struct domain *d = v->domain;
139     struct page_info *pg = v->vmtrace.pg;
140     unsigned int i;
141 
142     if ( !pg )
143         return;
144 
145     v->vmtrace.pg = NULL;
146 
147     for ( i = 0; i < (d->vmtrace_size >> PAGE_SHIFT); i++ )
148     {
149         put_page_alloc_ref(&pg[i]);
150         put_page_and_type(&pg[i]);
151     }
152 }
153 
vmtrace_alloc_buffer(struct vcpu * v)154 static int vmtrace_alloc_buffer(struct vcpu *v)
155 {
156     struct domain *d = v->domain;
157     struct page_info *pg;
158     unsigned int i;
159 
160     if ( !d->vmtrace_size )
161         return 0;
162 
163     pg = alloc_domheap_pages(d, get_order_from_bytes(d->vmtrace_size),
164                              MEMF_no_refcount);
165     if ( !pg )
166         return -ENOMEM;
167 
168     for ( i = 0; i < (d->vmtrace_size >> PAGE_SHIFT); i++ )
169         if ( unlikely(!get_page_and_type(&pg[i], d, PGT_writable_page)) )
170             /*
171              * The domain can't possibly know about this page yet, so failure
172              * here is a clear indication of something fishy going on.
173              */
174             goto refcnt_err;
175 
176     /*
177      * We must only let vmtrace_free_buffer() take any action in the success
178      * case when we've taken all the refs it intends to drop.
179      */
180     v->vmtrace.pg = pg;
181     return 0;
182 
183  refcnt_err:
184     /*
185      * We can theoretically reach this point if someone has taken 2^43 refs on
186      * the frames in the time the above loop takes to execute, or someone has
187      * made a blind decrease reservation hypercall and managed to pick the
188      * right mfn.  Free the memory we safely can, and leak the rest.
189      */
190     while ( i-- )
191     {
192         put_page_alloc_ref(&pg[i]);
193         put_page_and_type(&pg[i]);
194     }
195 
196     return -ENODATA;
197 }
198 
199 /*
200  * Release resources held by a vcpu.  There may or may not be live references
201  * to the vcpu, and it may or may not be fully constructed.
202  *
203  * If d->is_dying is DOMDYING_dead, this must not return non-zero.
204  */
vcpu_teardown(struct vcpu * v)205 static int vcpu_teardown(struct vcpu *v)
206 {
207     vmtrace_free_buffer(v);
208 
209     return 0;
210 }
211 
212 /*
213  * Destoy a vcpu once all references to it have been dropped.  Used either
214  * from domain_destroy()'s RCU path, or from the vcpu_create() error path
215  * before the vcpu is placed on the domain's vcpu list.
216  */
vcpu_destroy(struct vcpu * v)217 static void vcpu_destroy(struct vcpu *v)
218 {
219     free_vcpu_struct(v);
220 }
221 
vcpu_create(struct domain * d,unsigned int vcpu_id)222 struct vcpu *vcpu_create(struct domain *d, unsigned int vcpu_id)
223 {
224     struct vcpu *v;
225 
226     /*
227      * Sanity check some input expectations:
228      * - vcpu_id should be bounded by d->max_vcpus, and not previously
229      *   allocated.
230      * - VCPUs should be tightly packed and allocated in ascending order,
231      *   except for the idle domain which may vary based on PCPU numbering.
232      */
233     if ( vcpu_id >= d->max_vcpus || d->vcpu[vcpu_id] ||
234          (!is_idle_domain(d) && vcpu_id && !d->vcpu[vcpu_id - 1]) )
235     {
236         ASSERT_UNREACHABLE();
237         return NULL;
238     }
239 
240     if ( (v = alloc_vcpu_struct(d)) == NULL )
241         return NULL;
242 
243     v->domain = d;
244     v->vcpu_id = vcpu_id;
245     v->dirty_cpu = VCPU_CPU_CLEAN;
246 
247     rwlock_init(&v->virq_lock);
248 
249     tasklet_init(&v->continue_hypercall_tasklet, NULL, NULL);
250 
251     grant_table_init_vcpu(v);
252 
253     if ( is_idle_domain(d) )
254     {
255         v->runstate.state = RUNSTATE_running;
256         v->new_state = RUNSTATE_running;
257     }
258     else
259     {
260         v->runstate.state = RUNSTATE_offline;
261         v->runstate.state_entry_time = NOW();
262         set_bit(_VPF_down, &v->pause_flags);
263         vcpu_info_reset(v);
264         init_waitqueue_vcpu(v);
265     }
266 
267     if ( sched_init_vcpu(v) != 0 )
268         goto fail_wq;
269 
270     if ( vmtrace_alloc_buffer(v) != 0 )
271         goto fail_wq;
272 
273     if ( arch_vcpu_create(v) != 0 )
274         goto fail_sched;
275 
276     d->vcpu[vcpu_id] = v;
277     if ( vcpu_id != 0 )
278     {
279         int prev_id = v->vcpu_id - 1;
280         while ( (prev_id >= 0) && (d->vcpu[prev_id] == NULL) )
281             prev_id--;
282         BUG_ON(prev_id < 0);
283         v->next_in_list = d->vcpu[prev_id]->next_in_list;
284         d->vcpu[prev_id]->next_in_list = v;
285     }
286 
287     /* Must be called after making new vcpu visible to for_each_vcpu(). */
288     vcpu_check_shutdown(v);
289 
290     return v;
291 
292  fail_sched:
293     sched_destroy_vcpu(v);
294  fail_wq:
295     destroy_waitqueue_vcpu(v);
296 
297     /* Must not hit a continuation in this context. */
298     if ( vcpu_teardown(v) )
299         ASSERT_UNREACHABLE();
300 
301     vcpu_destroy(v);
302 
303     return NULL;
304 }
305 
late_hwdom_init(struct domain * d)306 static int late_hwdom_init(struct domain *d)
307 {
308 #ifdef CONFIG_LATE_HWDOM
309     struct domain *dom0;
310     int rv;
311 
312     if ( d != hardware_domain || d->domain_id == 0 )
313         return 0;
314 
315     rv = xsm_init_hardware_domain(XSM_HOOK, d);
316     if ( rv )
317         return rv;
318 
319     printk("Initialising hardware domain %d\n", hardware_domid);
320 
321     dom0 = rcu_lock_domain_by_id(0);
322     ASSERT(dom0 != NULL);
323     /*
324      * Hardware resource ranges for domain 0 have been set up from
325      * various sources intended to restrict the hardware domain's
326      * access.  Apply these ranges to the actual hardware domain.
327      *
328      * Because the lists are being swapped, a side effect of this
329      * operation is that Domain 0's rangesets are cleared.  Since
330      * domain 0 should not be accessing the hardware when it constructs
331      * a hardware domain, this should not be a problem.  Both lists
332      * may be modified after this hypercall returns if a more complex
333      * device model is desired.
334      */
335     rangeset_swap(d->irq_caps, dom0->irq_caps);
336     rangeset_swap(d->iomem_caps, dom0->iomem_caps);
337 #ifdef CONFIG_X86
338     rangeset_swap(d->arch.ioport_caps, dom0->arch.ioport_caps);
339     setup_io_bitmap(d);
340     setup_io_bitmap(dom0);
341 #endif
342 
343     rcu_unlock_domain(dom0);
344 
345     iommu_hwdom_init(d);
346 
347     return rv;
348 #else
349     return 0;
350 #endif
351 }
352 
353 #ifdef CONFIG_HAS_PIRQ
354 
355 static unsigned int __read_mostly extra_hwdom_irqs;
356 #define DEFAULT_EXTRA_DOMU_IRQS 32U
357 static unsigned int __read_mostly extra_domU_irqs = DEFAULT_EXTRA_DOMU_IRQS;
358 
parse_extra_guest_irqs(const char * s)359 static int __init cf_check parse_extra_guest_irqs(const char *s)
360 {
361     if ( isdigit(*s) )
362         extra_domU_irqs = simple_strtoul(s, &s, 0);
363     if ( *s == ',' && isdigit(*++s) )
364         extra_hwdom_irqs = simple_strtoul(s, &s, 0);
365 
366     return *s ? -EINVAL : 0;
367 }
368 custom_param("extra_guest_irqs", parse_extra_guest_irqs);
369 
370 #endif /* CONFIG_HAS_PIRQ */
371 
parse_dom0_param(const char * s)372 static int __init cf_check parse_dom0_param(const char *s)
373 {
374     const char *ss;
375     int rc = 0;
376 
377     do {
378         int ret;
379 
380         ss = strchr(s, ',');
381         if ( !ss )
382             ss = strchr(s, '\0');
383 
384         ret = parse_arch_dom0_param(s, ss);
385         if ( ret && !rc )
386             rc = ret;
387 
388         s = ss + 1;
389     } while ( *ss );
390 
391     return rc;
392 }
393 custom_param("dom0", parse_dom0_param);
394 
395 /*
396  * Release resources held by a domain.  There may or may not be live
397  * references to the domain, and it may or may not be fully constructed.
398  *
399  * d->is_dying differing between DOMDYING_dying and DOMDYING_dead can be used
400  * to determine if live references to the domain exist, and also whether
401  * continuations are permitted.
402  *
403  * If d->is_dying is DOMDYING_dead, this must not return non-zero.
404  */
domain_teardown(struct domain * d)405 static int domain_teardown(struct domain *d)
406 {
407     struct vcpu *v;
408     int rc;
409 
410     BUG_ON(!d->is_dying);
411 
412     /*
413      * This hypercall can take minutes of wallclock time to complete.  This
414      * logic implements a co-routine, stashing state in struct domain across
415      * hypercall continuation boundaries.
416      */
417     switch ( d->teardown.val )
418     {
419         /*
420          * Record the current progress.  Subsequent hypercall continuations
421          * will logically restart work from this point.
422          *
423          * PROGRESS() markers must not be in the middle of loops.  The loop
424          * variable isn't preserved across a continuation.  PROGRESS_VCPU()
425          * markers may be used in the middle of for_each_vcpu() loops, which
426          * preserve v but no other loop variables.
427          *
428          * To avoid redundant work, there should be a marker before each
429          * function which may return -ERESTART.
430          */
431 #define PROGRESS(x)                             \
432         d->teardown.val = PROG_ ## x;           \
433         fallthrough;                            \
434     case PROG_ ## x
435 
436 #define PROGRESS_VCPU(x)                        \
437         d->teardown.val = PROG_vcpu_ ## x;      \
438         d->teardown.vcpu = v;                   \
439         fallthrough;                            \
440     case PROG_vcpu_ ## x:                       \
441         v = d->teardown.vcpu
442 
443         enum {
444             PROG_none,
445             PROG_gnttab_mappings,
446             PROG_vcpu_teardown,
447             PROG_arch_teardown,
448             PROG_done,
449         };
450 
451     case PROG_none:
452         BUILD_BUG_ON(PROG_none != 0);
453 
454     PROGRESS(gnttab_mappings):
455         rc = gnttab_release_mappings(d);
456         if ( rc )
457             return rc;
458 
459         for_each_vcpu ( d, v )
460         {
461             /* SAF-5-safe MISRA C Rule 16.2: switch label enclosed by for loop */
462             PROGRESS_VCPU(teardown);
463 
464             rc = vcpu_teardown(v);
465             if ( rc )
466                 return rc;
467         }
468 
469     PROGRESS(arch_teardown):
470         rc = arch_domain_teardown(d);
471         if ( rc )
472             return rc;
473 
474     PROGRESS(done):
475         break;
476 
477 #undef PROGRESS_VCPU
478 #undef PROGRESS
479 
480     default:
481         BUG();
482     }
483 
484     return 0;
485 }
486 
487 /*
488  * Destroy a domain once all references to it have been dropped.  Used either
489  * from the RCU path, or from the domain_create() error path before the domain
490  * is inserted into the domlist.
491  */
_domain_destroy(struct domain * d)492 static void _domain_destroy(struct domain *d)
493 {
494     BUG_ON(!d->is_dying);
495     BUG_ON(atomic_read(&d->refcnt) != DOMAIN_DESTROYED);
496 
497     xfree(d->pbuf);
498 
499     argo_destroy(d);
500 
501     rangeset_domain_destroy(d);
502 
503     free_cpumask_var(d->dirty_cpumask);
504 
505     xsm_free_security_domain(d);
506 
507     lock_profile_deregister_struct(LOCKPROF_TYPE_PERDOM, d);
508 
509     free_domain_struct(d);
510 }
511 
sanitise_domain_config(struct xen_domctl_createdomain * config)512 static int sanitise_domain_config(struct xen_domctl_createdomain *config)
513 {
514     bool hvm = config->flags & XEN_DOMCTL_CDF_hvm;
515     bool hap = config->flags & XEN_DOMCTL_CDF_hap;
516     bool iommu = config->flags & XEN_DOMCTL_CDF_iommu;
517     bool vpmu = config->flags & XEN_DOMCTL_CDF_vpmu;
518 
519     if ( config->flags &
520          ~(XEN_DOMCTL_CDF_hvm | XEN_DOMCTL_CDF_hap |
521            XEN_DOMCTL_CDF_s3_integrity | XEN_DOMCTL_CDF_oos_off |
522            XEN_DOMCTL_CDF_xs_domain | XEN_DOMCTL_CDF_iommu |
523            XEN_DOMCTL_CDF_nested_virt | XEN_DOMCTL_CDF_vpmu) )
524     {
525         dprintk(XENLOG_INFO, "Unknown CDF flags %#x\n", config->flags);
526         return -EINVAL;
527     }
528 
529     if ( config->grant_opts & ~XEN_DOMCTL_GRANT_version_mask )
530     {
531         dprintk(XENLOG_INFO, "Unknown grant options %#x\n", config->grant_opts);
532         return -EINVAL;
533     }
534 
535     if ( config->max_vcpus < 1 )
536     {
537         dprintk(XENLOG_INFO, "No vCPUS\n");
538         return -EINVAL;
539     }
540 
541     if ( hap && !hvm )
542     {
543         dprintk(XENLOG_INFO, "HAP requested for non-HVM guest\n");
544         return -EINVAL;
545     }
546 
547     if ( iommu )
548     {
549         if ( config->iommu_opts & ~XEN_DOMCTL_IOMMU_no_sharept )
550         {
551             dprintk(XENLOG_INFO, "Unknown IOMMU options %#x\n",
552                     config->iommu_opts);
553             return -EINVAL;
554         }
555 
556         if ( !iommu_enabled )
557         {
558             dprintk(XENLOG_INFO, "IOMMU requested but not available\n");
559             return -EINVAL;
560         }
561     }
562     else
563     {
564         if ( config->iommu_opts )
565         {
566             dprintk(XENLOG_INFO,
567                     "IOMMU options specified but IOMMU not requested\n");
568             return -EINVAL;
569         }
570     }
571 
572     if ( config->vmtrace_size && !vmtrace_available )
573     {
574         dprintk(XENLOG_INFO, "vmtrace requested but not available\n");
575         return -EINVAL;
576     }
577 
578     if ( vpmu && !vpmu_is_available )
579     {
580         dprintk(XENLOG_INFO, "vpmu requested but cannot be enabled this way\n");
581         return -EINVAL;
582     }
583 
584     return arch_sanitise_domain_config(config);
585 }
586 
domain_create(domid_t domid,struct xen_domctl_createdomain * config,unsigned int flags)587 struct domain *domain_create(domid_t domid,
588                              struct xen_domctl_createdomain *config,
589                              unsigned int flags)
590 {
591     struct domain *d, **pd, *old_hwdom = NULL;
592     enum { INIT_watchdog = 1u<<1,
593            INIT_evtchn = 1u<<3, INIT_gnttab = 1u<<4, INIT_arch = 1u<<5 };
594     int err, init_status = 0;
595 
596     if ( config && (err = sanitise_domain_config(config)) )
597         return ERR_PTR(err);
598 
599     if ( (d = alloc_domain_struct()) == NULL )
600         return ERR_PTR(-ENOMEM);
601 
602     /* Sort out our idea of is_system_domain(). */
603     d->domain_id = domid;
604 
605     /* Holding CDF_* internal flags. */
606     d->cdf = flags;
607 
608     /* Debug sanity. */
609     ASSERT(is_system_domain(d) ? config == NULL : config != NULL);
610 
611     if ( config )
612     {
613         d->options = config->flags;
614         d->vmtrace_size = config->vmtrace_size;
615     }
616 
617     /* Sort out our idea of is_control_domain(). */
618     d->is_privileged = flags & CDF_privileged;
619 
620     /* Sort out our idea of is_hardware_domain(). */
621     if ( domid == 0 || domid == hardware_domid )
622     {
623         if ( hardware_domid < 0 || hardware_domid >= DOMID_FIRST_RESERVED )
624             panic("The value of hardware_dom must be a valid domain ID\n");
625 
626         old_hwdom = hardware_domain;
627         hardware_domain = d;
628     }
629 
630     TRACE_TIME(TRC_DOM0_DOM_ADD, d->domain_id);
631 
632     lock_profile_register_struct(LOCKPROF_TYPE_PERDOM, d, domid);
633 
634     atomic_set(&d->refcnt, 1);
635     RCU_READ_LOCK_INIT(&d->rcu_lock);
636     rspin_lock_init_prof(d, domain_lock);
637     rspin_lock_init_prof(d, page_alloc_lock);
638     spin_lock_init(&d->hypercall_deadlock_mutex);
639     INIT_PAGE_LIST_HEAD(&d->page_list);
640     INIT_PAGE_LIST_HEAD(&d->extra_page_list);
641     INIT_PAGE_LIST_HEAD(&d->xenpage_list);
642 #ifdef CONFIG_STATIC_MEMORY
643     INIT_PAGE_LIST_HEAD(&d->resv_page_list);
644 #endif
645 
646 
647     spin_lock_init(&d->node_affinity_lock);
648     d->node_affinity = NODE_MASK_ALL;
649     d->auto_node_affinity = 1;
650 
651     spin_lock_init(&d->shutdown_lock);
652     d->shutdown_code = SHUTDOWN_CODE_INVALID;
653 
654     spin_lock_init(&d->pbuf_lock);
655 
656     rwlock_init(&d->vnuma_rwlock);
657 
658 #ifdef CONFIG_HAS_PCI
659     INIT_LIST_HEAD(&d->pdev_list);
660     rwlock_init(&d->pci_lock);
661 #endif
662 
663     /* All error paths can depend on the above setup. */
664 
665     /*
666      * Allocate d->vcpu[] and set ->max_vcpus up early.  Various per-domain
667      * resources want to be sized based on max_vcpus.
668      */
669     if ( !is_system_domain(d) )
670     {
671         err = -ENOMEM;
672         d->vcpu = xzalloc_array(struct vcpu *, config->max_vcpus);
673         if ( !d->vcpu )
674             goto fail;
675 
676         d->max_vcpus = config->max_vcpus;
677     }
678 
679     if ( (err = xsm_alloc_security_domain(d)) != 0 )
680         goto fail;
681 
682     err = -ENOMEM;
683     if ( !zalloc_cpumask_var(&d->dirty_cpumask) )
684         goto fail;
685 
686     rangeset_domain_initialise(d);
687 
688     /* DOMID_{XEN,IO,etc} (other than IDLE) are sufficiently constructed. */
689     if ( is_system_domain(d) && !is_idle_domain(d) )
690         return d;
691 
692 #ifdef CONFIG_HAS_PIRQ
693     if ( !is_idle_domain(d) )
694     {
695         if ( !is_hardware_domain(d) )
696             d->nr_pirqs = nr_static_irqs + extra_domU_irqs;
697         else
698             d->nr_pirqs = extra_hwdom_irqs ? nr_static_irqs + extra_hwdom_irqs
699                                            : arch_hwdom_irqs(d);
700         d->nr_pirqs = min(d->nr_pirqs, nr_irqs);
701 
702         radix_tree_init(&d->pirq_tree);
703     }
704 #endif
705 
706     if ( (err = arch_domain_create(d, config, flags)) != 0 )
707         goto fail;
708     init_status |= INIT_arch;
709 
710     if ( !is_idle_domain(d) )
711     {
712         /*
713          * The assertion helps static analysis tools infer that config cannot
714          * be NULL in this branch, which in turn means that it can be safely
715          * dereferenced. Therefore, this assertion is not redundant.
716          */
717         ASSERT(config);
718 
719         watchdog_domain_init(d);
720         init_status |= INIT_watchdog;
721 
722         err = -ENOMEM;
723         d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
724         d->irq_caps   = rangeset_new(d, "Interrupts", 0);
725         if ( !d->iomem_caps || !d->irq_caps )
726             goto fail;
727 
728         if ( (err = xsm_domain_create(XSM_HOOK, d, config->ssidref)) != 0 )
729             goto fail;
730 
731         d->controller_pause_count = 1;
732         atomic_inc(&d->pause_count);
733 
734         if ( (err = evtchn_init(d, config->max_evtchn_port)) != 0 )
735             goto fail;
736         init_status |= INIT_evtchn;
737 
738         if ( (err = grant_table_init(d, config->max_grant_frames,
739                                      config->max_maptrack_frames,
740                                      config->grant_opts)) != 0 )
741             goto fail;
742         init_status |= INIT_gnttab;
743 
744         if ( (err = argo_init(d)) != 0 )
745             goto fail;
746 
747         err = -ENOMEM;
748 
749         d->pbuf = xzalloc_array(char, DOMAIN_PBUF_SIZE);
750         if ( !d->pbuf )
751             goto fail;
752 
753         if ( (err = sched_init_domain(d, config->cpupool_id)) != 0 )
754             goto fail;
755 
756         if ( (err = late_hwdom_init(d)) != 0 )
757             goto fail;
758 
759         /*
760          * Must not fail beyond this point, as our caller doesn't know whether
761          * the domain has been entered into domain_list or not.
762          */
763 
764         spin_lock(&domlist_update_lock);
765         pd = &domain_list; /* NB. domain_list maintained in order of domid. */
766         for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
767             if ( (*pd)->domain_id > d->domain_id )
768                 break;
769         d->next_in_list = *pd;
770         d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
771         rcu_assign_pointer(*pd, d);
772         rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
773         spin_unlock(&domlist_update_lock);
774 
775         memcpy(d->handle, config->handle, sizeof(d->handle));
776     }
777 
778     return d;
779 
780  fail:
781     ASSERT(err < 0);      /* Sanity check paths leading here. */
782     err = err ?: -EILSEQ; /* Release build safety. */
783 
784     d->is_dying = DOMDYING_dead;
785     if ( hardware_domain == d )
786         hardware_domain = old_hwdom;
787     atomic_set(&d->refcnt, DOMAIN_DESTROYED);
788 
789     sched_destroy_domain(d);
790 
791     if ( d->max_vcpus )
792     {
793         d->max_vcpus = 0;
794         XFREE(d->vcpu);
795     }
796     if ( init_status & INIT_arch )
797         arch_domain_destroy(d);
798     if ( init_status & INIT_gnttab )
799         grant_table_destroy(d);
800     if ( init_status & INIT_evtchn )
801     {
802         evtchn_destroy(d);
803         evtchn_destroy_final(d);
804 #ifdef CONFIG_HAS_PIRQ
805         radix_tree_destroy(&d->pirq_tree, free_pirq_struct);
806 #endif
807     }
808     if ( init_status & INIT_watchdog )
809         watchdog_domain_destroy(d);
810 
811     /* Must not hit a continuation in this context. */
812     if ( domain_teardown(d) )
813         ASSERT_UNREACHABLE();
814 
815     _domain_destroy(d);
816 
817     return ERR_PTR(err);
818 }
819 
setup_system_domains(void)820 void __init setup_system_domains(void)
821 {
822     /*
823      * Initialise our DOMID_XEN domain.
824      * Any Xen-heap pages that we will allow to be mapped will have
825      * their domain field set to dom_xen.
826      * Hidden PCI devices will also be associated with this domain
827      * (but be [partly] controlled by Dom0 nevertheless).
828      */
829     dom_xen = domain_create(DOMID_XEN, NULL, 0);
830     if ( IS_ERR(dom_xen) )
831         panic("Failed to create d[XEN]: %ld\n", PTR_ERR(dom_xen));
832 
833 #ifdef CONFIG_HAS_PIRQ
834     /* Bound-check values passed via "extra_guest_irqs=". */
835     {
836         unsigned int n = max(arch_hwdom_irqs(dom_xen), nr_static_irqs);
837 
838         if ( extra_hwdom_irqs > n - nr_static_irqs )
839         {
840             extra_hwdom_irqs = n - nr_static_irqs;
841             printk(XENLOG_WARNING "hwdom IRQs bounded to %u\n", n);
842         }
843         if ( extra_domU_irqs >
844              max(DEFAULT_EXTRA_DOMU_IRQS, n - nr_static_irqs) )
845         {
846             extra_domU_irqs = n - nr_static_irqs;
847             printk(XENLOG_WARNING "domU IRQs bounded to %u\n", n);
848         }
849     }
850 #endif
851 
852     /*
853      * Initialise our DOMID_IO domain.
854      * This domain owns I/O pages that are within the range of the page_info
855      * array. Mappings occur at the priv of the caller.
856      * Quarantined PCI devices will be associated with this domain.
857      *
858      * DOMID_IO is also the default owner of memory pre-shared among multiple
859      * domains at boot time.
860      */
861     dom_io = domain_create(DOMID_IO, NULL, 0);
862     if ( IS_ERR(dom_io) )
863         panic("Failed to create d[IO]: %ld\n", PTR_ERR(dom_io));
864 
865 #ifdef CONFIG_MEM_SHARING
866     /*
867      * Initialise our COW domain.
868      * This domain owns sharable pages.
869      */
870     dom_cow = domain_create(DOMID_COW, NULL, 0);
871     if ( IS_ERR(dom_cow) )
872         panic("Failed to create d[COW]: %ld\n", PTR_ERR(dom_cow));
873 #endif
874 }
875 
domain_set_node_affinity(struct domain * d,const nodemask_t * affinity)876 int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity)
877 {
878     /* Being disjoint with the system is just wrong. */
879     if ( !nodes_intersects(*affinity, node_online_map) )
880         return -EINVAL;
881 
882     spin_lock(&d->node_affinity_lock);
883 
884     /*
885      * Being/becoming explicitly affine to all nodes is not particularly
886      * useful. Let's take it as the `reset node affinity` command.
887      */
888     if ( nodes_full(*affinity) )
889     {
890         d->auto_node_affinity = 1;
891         goto out;
892     }
893 
894     d->auto_node_affinity = 0;
895     d->node_affinity = *affinity;
896 
897 out:
898     spin_unlock(&d->node_affinity_lock);
899 
900     domain_update_node_affinity(d);
901 
902     return 0;
903 }
904 
905 /* rcu_read_lock(&domlist_read_lock) must be held. */
domid_to_domain(domid_t dom)906 static struct domain *domid_to_domain(domid_t dom)
907 {
908     struct domain *d;
909 
910     for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
911           d != NULL;
912           d = rcu_dereference(d->next_in_hashbucket) )
913     {
914         if ( d->domain_id == dom )
915             return d;
916     }
917 
918     return NULL;
919 }
920 
get_domain_by_id(domid_t dom)921 struct domain *get_domain_by_id(domid_t dom)
922 {
923     struct domain *d;
924 
925     rcu_read_lock(&domlist_read_lock);
926 
927     d = domid_to_domain(dom);
928     if ( d && unlikely(!get_domain(d)) )
929         d = NULL;
930 
931     rcu_read_unlock(&domlist_read_lock);
932 
933     return d;
934 }
935 
936 
rcu_lock_domain_by_id(domid_t dom)937 struct domain *rcu_lock_domain_by_id(domid_t dom)
938 {
939     struct domain *d;
940 
941     rcu_read_lock(&domlist_read_lock);
942 
943     d = domid_to_domain(dom);
944     if ( d )
945         rcu_lock_domain(d);
946 
947     rcu_read_unlock(&domlist_read_lock);
948 
949     return d;
950 }
951 
knownalive_domain_from_domid(domid_t dom)952 struct domain *knownalive_domain_from_domid(domid_t dom)
953 {
954     struct domain *d;
955 
956     rcu_read_lock(&domlist_read_lock);
957 
958     d = domid_to_domain(dom);
959 
960     rcu_read_unlock(&domlist_read_lock);
961 
962     return d;
963 }
964 
rcu_lock_domain_by_any_id(domid_t dom)965 struct domain *rcu_lock_domain_by_any_id(domid_t dom)
966 {
967     if ( dom == DOMID_SELF )
968         return rcu_lock_current_domain();
969     return rcu_lock_domain_by_id(dom);
970 }
971 
rcu_lock_remote_domain_by_id(domid_t dom,struct domain ** d)972 int rcu_lock_remote_domain_by_id(domid_t dom, struct domain **d)
973 {
974     if ( (*d = rcu_lock_domain_by_id(dom)) == NULL )
975         return -ESRCH;
976 
977     if ( *d == current->domain )
978     {
979         rcu_unlock_domain(*d);
980         return -EPERM;
981     }
982 
983     return 0;
984 }
985 
rcu_lock_live_remote_domain_by_id(domid_t dom,struct domain ** d)986 int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
987 {
988     int rv;
989     rv = rcu_lock_remote_domain_by_id(dom, d);
990     if ( rv )
991         return rv;
992     if ( (*d)->is_dying )
993     {
994         rcu_unlock_domain(*d);
995         return -EINVAL;
996     }
997 
998     return 0;
999 }
1000 
domain_kill(struct domain * d)1001 int domain_kill(struct domain *d)
1002 {
1003     int rc = 0;
1004     struct vcpu *v;
1005 
1006     if ( d == current->domain )
1007         return -EINVAL;
1008 
1009     /* Protected by domctl_lock. */
1010     switch ( d->is_dying )
1011     {
1012     case DOMDYING_alive:
1013         domain_pause(d);
1014         d->is_dying = DOMDYING_dying;
1015         rspin_barrier(&d->domain_lock);
1016         argo_destroy(d);
1017         vnuma_destroy(d->vnuma);
1018         domain_set_outstanding_pages(d, 0);
1019         /* fallthrough */
1020     case DOMDYING_dying:
1021         rc = domain_teardown(d);
1022         if ( rc )
1023             break;
1024         rc = evtchn_destroy(d);
1025         if ( rc )
1026             break;
1027         rc = domain_relinquish_resources(d);
1028         if ( rc != 0 )
1029             break;
1030         if ( cpupool_move_domain(d, cpupool0) )
1031             return -ERESTART;
1032         for_each_vcpu ( d, v )
1033         {
1034             unmap_guest_area(v, &v->vcpu_info_area);
1035             unmap_guest_area(v, &v->runstate_guest_area);
1036         }
1037         d->is_dying = DOMDYING_dead;
1038         /* Mem event cleanup has to go here because the rings
1039          * have to be put before we call put_domain. */
1040         vm_event_cleanup(d);
1041         put_domain(d);
1042         send_global_virq(VIRQ_DOM_EXC);
1043         /* fallthrough */
1044     case DOMDYING_dead:
1045         break;
1046     }
1047 
1048     return rc;
1049 }
1050 
1051 
__domain_crash(struct domain * d)1052 void __domain_crash(struct domain *d)
1053 {
1054     if ( d->is_shutting_down )
1055     {
1056         /* Print nothing: the domain is already shutting down. */
1057     }
1058     else if ( d == current->domain )
1059     {
1060         printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
1061                d->domain_id, current->vcpu_id, smp_processor_id());
1062         show_execution_state(guest_cpu_user_regs());
1063     }
1064     else
1065     {
1066         printk("Domain %d reported crashed by domain %d on cpu#%d:\n",
1067                d->domain_id, current->domain->domain_id, smp_processor_id());
1068     }
1069 
1070     domain_shutdown(d, SHUTDOWN_crash);
1071 }
1072 
1073 
domain_shutdown(struct domain * d,u8 reason)1074 int domain_shutdown(struct domain *d, u8 reason)
1075 {
1076     struct vcpu *v;
1077 
1078 #ifdef CONFIG_X86
1079     if ( pv_shim )
1080         return pv_shim_shutdown(reason);
1081 #endif
1082 
1083     spin_lock(&d->shutdown_lock);
1084 
1085     if ( d->shutdown_code == SHUTDOWN_CODE_INVALID )
1086         d->shutdown_code = reason;
1087     reason = d->shutdown_code;
1088 
1089     if ( is_hardware_domain(d) )
1090         hwdom_shutdown(reason);
1091 
1092     if ( d->is_shutting_down )
1093     {
1094         spin_unlock(&d->shutdown_lock);
1095         return 0;
1096     }
1097 
1098     d->is_shutting_down = 1;
1099 
1100     smp_mb(); /* set shutdown status /then/ check for per-cpu deferrals */
1101 
1102     for_each_vcpu ( d, v )
1103     {
1104         if ( reason == SHUTDOWN_crash )
1105             v->defer_shutdown = 0;
1106         else if ( v->defer_shutdown )
1107             continue;
1108         vcpu_pause_nosync(v);
1109         v->paused_for_shutdown = 1;
1110     }
1111 
1112     arch_domain_shutdown(d);
1113 
1114     __domain_finalise_shutdown(d);
1115 
1116     spin_unlock(&d->shutdown_lock);
1117 
1118     return 0;
1119 }
1120 
domain_resume(struct domain * d)1121 void domain_resume(struct domain *d)
1122 {
1123     struct vcpu *v;
1124 
1125     /*
1126      * Some code paths assume that shutdown status does not get reset under
1127      * their feet (e.g., some assertions make this assumption).
1128      */
1129     domain_pause(d);
1130 
1131     spin_lock(&d->shutdown_lock);
1132 
1133     d->is_shutting_down = d->is_shut_down = 0;
1134     d->shutdown_code = SHUTDOWN_CODE_INVALID;
1135 
1136     for_each_vcpu ( d, v )
1137     {
1138         if ( v->paused_for_shutdown )
1139             vcpu_unpause(v);
1140         v->paused_for_shutdown = 0;
1141     }
1142 
1143     spin_unlock(&d->shutdown_lock);
1144 
1145     domain_unpause(d);
1146 }
1147 
vcpu_start_shutdown_deferral(struct vcpu * v)1148 int vcpu_start_shutdown_deferral(struct vcpu *v)
1149 {
1150     if ( v->defer_shutdown )
1151         return 1;
1152 
1153     v->defer_shutdown = 1;
1154     smp_mb(); /* set deferral status /then/ check for shutdown */
1155     if ( unlikely(v->domain->is_shutting_down) )
1156         vcpu_check_shutdown(v);
1157 
1158     return v->defer_shutdown;
1159 }
1160 
vcpu_end_shutdown_deferral(struct vcpu * v)1161 void vcpu_end_shutdown_deferral(struct vcpu *v)
1162 {
1163     v->defer_shutdown = 0;
1164     smp_mb(); /* clear deferral status /then/ check for shutdown */
1165     if ( unlikely(v->domain->is_shutting_down) )
1166         vcpu_check_shutdown(v);
1167 }
1168 
1169 /* Complete domain destroy after RCU readers are not holding old references. */
complete_domain_destroy(struct rcu_head * head)1170 static void cf_check complete_domain_destroy(struct rcu_head *head)
1171 {
1172     struct domain *d = container_of(head, struct domain, rcu);
1173     struct vcpu *v;
1174     int i;
1175 
1176     /*
1177      * Flush all state for the vCPU previously having run on the current CPU.
1178      * This is in particular relevant for x86 HVM ones on VMX, so that this
1179      * flushing of state won't happen from the TLB flush IPI handler behind
1180      * the back of a vmx_vmcs_enter() / vmx_vmcs_exit() section.
1181      */
1182     sync_local_execstate();
1183 
1184     for ( i = d->max_vcpus - 1; i >= 0; i-- )
1185     {
1186         if ( (v = d->vcpu[i]) == NULL )
1187             continue;
1188         tasklet_kill(&v->continue_hypercall_tasklet);
1189         arch_vcpu_destroy(v);
1190         sched_destroy_vcpu(v);
1191         destroy_waitqueue_vcpu(v);
1192     }
1193 
1194     grant_table_destroy(d);
1195 
1196     arch_domain_destroy(d);
1197 
1198     watchdog_domain_destroy(d);
1199 
1200     sched_destroy_domain(d);
1201 
1202     /* Free page used by xen oprofile buffer. */
1203 #ifdef CONFIG_XENOPROF
1204     free_xenoprof_pages(d);
1205 #endif
1206 
1207 #ifdef CONFIG_MEM_PAGING
1208     xfree(d->vm_event_paging);
1209 #endif
1210     xfree(d->vm_event_monitor);
1211 #ifdef CONFIG_MEM_SHARING
1212     xfree(d->vm_event_share);
1213 #endif
1214 
1215     for ( i = d->max_vcpus - 1; i >= 0; i-- )
1216         if ( (v = d->vcpu[i]) != NULL )
1217             vcpu_destroy(v);
1218 
1219     if ( d->target != NULL )
1220         put_domain(d->target);
1221 
1222     evtchn_destroy_final(d);
1223 
1224 #ifdef CONFIG_HAS_PIRQ
1225     radix_tree_destroy(&d->pirq_tree, free_pirq_struct);
1226 #endif
1227 
1228     xfree(d->vcpu);
1229 
1230     _domain_destroy(d);
1231 
1232     send_global_virq(VIRQ_DOM_EXC);
1233 }
1234 
1235 /* Release resources belonging to task @p. */
domain_destroy(struct domain * d)1236 void domain_destroy(struct domain *d)
1237 {
1238     struct domain **pd;
1239 
1240     BUG_ON(!d->is_dying);
1241 
1242     /* May be already destroyed, or get_domain() can race us. */
1243     if ( atomic_cmpxchg(&d->refcnt, 0, DOMAIN_DESTROYED) != 0 )
1244         return;
1245 
1246     TRACE_TIME(TRC_DOM0_DOM_REM, d->domain_id);
1247 
1248     /* Delete from task list and task hashtable. */
1249     spin_lock(&domlist_update_lock);
1250     pd = &domain_list;
1251     while ( *pd != d )
1252         pd = &(*pd)->next_in_list;
1253     rcu_assign_pointer(*pd, d->next_in_list);
1254     pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
1255     while ( *pd != d )
1256         pd = &(*pd)->next_in_hashbucket;
1257     rcu_assign_pointer(*pd, d->next_in_hashbucket);
1258     spin_unlock(&domlist_update_lock);
1259 
1260     /* Schedule RCU asynchronous completion of domain destroy. */
1261     call_rcu(&d->rcu, complete_domain_destroy);
1262 }
1263 
vcpu_pause(struct vcpu * v)1264 void vcpu_pause(struct vcpu *v)
1265 {
1266     ASSERT(v != current);
1267     atomic_inc(&v->pause_count);
1268     vcpu_sleep_sync(v);
1269 }
1270 
vcpu_pause_nosync(struct vcpu * v)1271 void vcpu_pause_nosync(struct vcpu *v)
1272 {
1273     atomic_inc(&v->pause_count);
1274     vcpu_sleep_nosync(v);
1275 }
1276 
vcpu_unpause(struct vcpu * v)1277 void vcpu_unpause(struct vcpu *v)
1278 {
1279     if ( atomic_dec_and_test(&v->pause_count) )
1280         vcpu_wake(v);
1281 }
1282 
vcpu_pause_by_systemcontroller(struct vcpu * v)1283 int vcpu_pause_by_systemcontroller(struct vcpu *v)
1284 {
1285     int old, new, prev = v->controller_pause_count;
1286 
1287     do
1288     {
1289         old = prev;
1290         new = old + 1;
1291 
1292         if ( new > 255 )
1293             return -EOVERFLOW;
1294 
1295         prev = cmpxchg(&v->controller_pause_count, old, new);
1296     } while ( prev != old );
1297 
1298     vcpu_pause(v);
1299 
1300     return 0;
1301 }
1302 
vcpu_unpause_by_systemcontroller(struct vcpu * v)1303 int vcpu_unpause_by_systemcontroller(struct vcpu *v)
1304 {
1305     int old, new, prev = v->controller_pause_count;
1306 
1307     do
1308     {
1309         old = prev;
1310         new = old - 1;
1311 
1312         if ( new < 0 )
1313             return -EINVAL;
1314 
1315         prev = cmpxchg(&v->controller_pause_count, old, new);
1316     } while ( prev != old );
1317 
1318     vcpu_unpause(v);
1319 
1320     return 0;
1321 }
1322 
_domain_pause(struct domain * d,bool sync)1323 static void _domain_pause(struct domain *d, bool sync)
1324 {
1325     struct vcpu *v;
1326 
1327     atomic_inc(&d->pause_count);
1328 
1329     if ( sync )
1330         for_each_vcpu ( d, v )
1331             vcpu_sleep_sync(v);
1332     else
1333         for_each_vcpu ( d, v )
1334             vcpu_sleep_nosync(v);
1335 
1336     arch_domain_pause(d);
1337 }
1338 
domain_pause(struct domain * d)1339 void domain_pause(struct domain *d)
1340 {
1341     ASSERT(d != current->domain);
1342     _domain_pause(d, true /* sync */);
1343 }
1344 
domain_pause_nosync(struct domain * d)1345 void domain_pause_nosync(struct domain *d)
1346 {
1347     _domain_pause(d, false /* nosync */);
1348 }
1349 
domain_unpause(struct domain * d)1350 void domain_unpause(struct domain *d)
1351 {
1352     struct vcpu *v;
1353 
1354     arch_domain_unpause(d);
1355 
1356     if ( atomic_dec_and_test(&d->pause_count) )
1357         for_each_vcpu( d, v )
1358             vcpu_wake(v);
1359 }
1360 
_domain_pause_by_systemcontroller(struct domain * d,bool sync)1361 static int _domain_pause_by_systemcontroller(struct domain *d, bool sync)
1362 {
1363     int old, new, prev = d->controller_pause_count;
1364 
1365     do
1366     {
1367         old = prev;
1368         new = old + 1;
1369 
1370         /*
1371          * Limit the toolstack pause count to an arbitrary 255 to prevent the
1372          * toolstack overflowing d->pause_count with many repeated hypercalls.
1373          */
1374         if ( new > 255 )
1375             return -EOVERFLOW;
1376 
1377         prev = cmpxchg(&d->controller_pause_count, old, new);
1378     } while ( prev != old );
1379 
1380     _domain_pause(d, sync);
1381 
1382     return 0;
1383 }
1384 
domain_pause_by_systemcontroller(struct domain * d)1385 int domain_pause_by_systemcontroller(struct domain *d)
1386 {
1387     return _domain_pause_by_systemcontroller(d, true /* sync */);
1388 }
1389 
domain_pause_by_systemcontroller_nosync(struct domain * d)1390 int domain_pause_by_systemcontroller_nosync(struct domain *d)
1391 {
1392     return _domain_pause_by_systemcontroller(d, false /* nosync */);
1393 }
1394 
domain_unpause_by_systemcontroller(struct domain * d)1395 int domain_unpause_by_systemcontroller(struct domain *d)
1396 {
1397     int old, new, prev = d->controller_pause_count;
1398 
1399     do
1400     {
1401         old = prev;
1402         new = old - 1;
1403 
1404         if ( new < 0 )
1405             return -EINVAL;
1406 
1407         prev = cmpxchg(&d->controller_pause_count, old, new);
1408     } while ( prev != old );
1409 
1410     /*
1411      * d->controller_pause_count is initialised to 1, and the toolstack is
1412      * responsible for making one unpause hypercall when it wishes the guest
1413      * to start running.
1414      *
1415      * All other toolstack operations should make a pair of pause/unpause
1416      * calls and rely on the reference counting here.
1417      *
1418      * Creation is considered finished when the controller reference count
1419      * first drops to 0.
1420      */
1421     if ( new == 0 && !d->creation_finished )
1422     {
1423         d->creation_finished = true;
1424         arch_domain_creation_finished(d);
1425     }
1426 
1427     domain_unpause(d);
1428 
1429     return 0;
1430 }
1431 
domain_pause_except_self(struct domain * d)1432 int domain_pause_except_self(struct domain *d)
1433 {
1434     struct vcpu *v, *curr = current;
1435 
1436     if ( curr->domain == d )
1437     {
1438         /* Avoid racing with other vcpus which may want to be pausing us */
1439         if ( !spin_trylock(&d->hypercall_deadlock_mutex) )
1440             return -ERESTART;
1441         for_each_vcpu( d, v )
1442             if ( likely(v != curr) )
1443                 vcpu_pause(v);
1444         spin_unlock(&d->hypercall_deadlock_mutex);
1445     }
1446     else
1447         domain_pause(d);
1448 
1449     return 0;
1450 }
1451 
domain_unpause_except_self(struct domain * d)1452 void domain_unpause_except_self(struct domain *d)
1453 {
1454     struct vcpu *v, *curr = current;
1455 
1456     if ( curr->domain == d )
1457     {
1458         for_each_vcpu( d, v )
1459             if ( likely(v != curr) )
1460                 vcpu_unpause(v);
1461     }
1462     else
1463         domain_unpause(d);
1464 }
1465 
domain_soft_reset(struct domain * d,bool resuming)1466 int domain_soft_reset(struct domain *d, bool resuming)
1467 {
1468     struct vcpu *v;
1469     int rc;
1470 
1471     spin_lock(&d->shutdown_lock);
1472     for_each_vcpu ( d, v )
1473         if ( !v->paused_for_shutdown )
1474         {
1475             spin_unlock(&d->shutdown_lock);
1476             return -EINVAL;
1477         }
1478     spin_unlock(&d->shutdown_lock);
1479 
1480     rc = evtchn_reset(d, resuming);
1481     if ( rc )
1482         return rc;
1483 
1484     grant_table_warn_active_grants(d);
1485 
1486     argo_soft_reset(d);
1487 
1488     for_each_vcpu ( d, v )
1489     {
1490         set_xen_guest_handle(runstate_guest(v), NULL);
1491         unmap_guest_area(v, &v->vcpu_info_area);
1492         unmap_guest_area(v, &v->runstate_guest_area);
1493     }
1494 
1495     rc = arch_domain_soft_reset(d);
1496     if ( !rc )
1497         domain_resume(d);
1498     else
1499         domain_crash(d);
1500 
1501     return rc;
1502 }
1503 
vcpu_reset(struct vcpu * v)1504 int vcpu_reset(struct vcpu *v)
1505 {
1506     struct domain *d = v->domain;
1507     int rc;
1508 
1509     vcpu_pause(v);
1510     domain_lock(d);
1511 
1512     set_bit(_VPF_in_reset, &v->pause_flags);
1513     rc = arch_vcpu_reset(v);
1514     if ( rc )
1515         goto out_unlock;
1516 
1517     set_bit(_VPF_down, &v->pause_flags);
1518 
1519     clear_bit(v->vcpu_id, d->poll_mask);
1520     v->poll_evtchn = 0;
1521 
1522     v->fpu_initialised = 0;
1523     v->fpu_dirtied     = 0;
1524     v->is_initialised  = 0;
1525     if ( v->affinity_broken & VCPU_AFFINITY_OVERRIDE )
1526         vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_OVERRIDE);
1527     if ( v->affinity_broken & VCPU_AFFINITY_WAIT )
1528         vcpu_temporary_affinity(v, NR_CPUS, VCPU_AFFINITY_WAIT);
1529     clear_bit(_VPF_blocked, &v->pause_flags);
1530     clear_bit(_VPF_in_reset, &v->pause_flags);
1531 
1532  out_unlock:
1533     domain_unlock(v->domain);
1534     vcpu_unpause(v);
1535 
1536     return rc;
1537 }
1538 
map_guest_area(struct vcpu * v,paddr_t gaddr,unsigned int size,struct guest_area * area,void (* populate)(void * dst,struct vcpu * v))1539 int map_guest_area(struct vcpu *v, paddr_t gaddr, unsigned int size,
1540                    struct guest_area *area,
1541                    void (*populate)(void *dst, struct vcpu *v))
1542 {
1543     struct domain *d = v->domain;
1544     void *map = NULL;
1545     struct page_info *pg = NULL;
1546     int rc = 0;
1547 
1548     if ( ~gaddr ) /* Map (i.e. not just unmap)? */
1549     {
1550         unsigned long gfn = PFN_DOWN(gaddr);
1551         unsigned int align;
1552         p2m_type_t p2mt;
1553 
1554         if ( gfn != PFN_DOWN(gaddr + size - 1) )
1555             return -ENXIO;
1556 
1557 #ifdef CONFIG_COMPAT
1558         if ( has_32bit_shinfo(d) )
1559             align = alignof(compat_ulong_t);
1560         else
1561 #endif
1562             align = alignof(xen_ulong_t);
1563         if ( !IS_ALIGNED(gaddr, align) )
1564             return -ENXIO;
1565 
1566         rc = check_get_page_from_gfn(d, _gfn(gfn), false, &p2mt, &pg);
1567         if ( rc )
1568             return rc;
1569 
1570         if ( !get_page_type(pg, PGT_writable_page) )
1571         {
1572             put_page(pg);
1573             return -EACCES;
1574         }
1575 
1576         map = __map_domain_page_global(pg);
1577         if ( !map )
1578         {
1579             put_page_and_type(pg);
1580             return -ENOMEM;
1581         }
1582         map += PAGE_OFFSET(gaddr);
1583     }
1584 
1585     if ( v != current )
1586     {
1587         if ( !spin_trylock(&d->hypercall_deadlock_mutex) )
1588         {
1589             rc = -ERESTART;
1590             goto unmap;
1591         }
1592 
1593         vcpu_pause(v);
1594 
1595         spin_unlock(&d->hypercall_deadlock_mutex);
1596     }
1597 
1598     domain_lock(d);
1599 
1600     /* No re-registration of the vCPU info area. */
1601     if ( area != &v->vcpu_info_area || !area->pg )
1602     {
1603         if ( map && populate )
1604             populate(map, v);
1605 
1606         SWAP(area->pg, pg);
1607         SWAP(area->map, map);
1608     }
1609     else
1610         rc = -EBUSY;
1611 
1612     domain_unlock(d);
1613 
1614     /* Set pending flags /after/ new vcpu_info pointer was set. */
1615     if ( area == &v->vcpu_info_area && !rc )
1616     {
1617         /*
1618          * Mark everything as being pending just to make sure nothing gets
1619          * lost.  The domain will get a spurious event, but it can cope.
1620          */
1621 #ifdef CONFIG_COMPAT
1622         if ( !has_32bit_shinfo(d) )
1623         {
1624             vcpu_info_t *info = area->map;
1625 
1626             /* For VCPUOP_register_vcpu_info handling in common_vcpu_op(). */
1627             BUILD_BUG_ON(sizeof(*info) != sizeof(info->compat));
1628             write_atomic(&info->native.evtchn_pending_sel, ~0);
1629         }
1630         else
1631 #endif
1632             write_atomic(&vcpu_info(v, evtchn_pending_sel), ~0);
1633         vcpu_mark_events_pending(v);
1634 
1635         force_update_vcpu_system_time(v);
1636     }
1637 
1638     if ( v != current )
1639         vcpu_unpause(v);
1640 
1641  unmap:
1642     if ( pg )
1643     {
1644         unmap_domain_page_global((void *)((unsigned long)map & PAGE_MASK));
1645         put_page_and_type(pg);
1646     }
1647 
1648     return rc;
1649 }
1650 
1651 /*
1652  * This is only intended to be used for domain cleanup (or more generally only
1653  * with at least the respective vCPU, if it's not the current one, reliably
1654  * paused).
1655  */
unmap_guest_area(struct vcpu * v,struct guest_area * area)1656 void unmap_guest_area(struct vcpu *v, struct guest_area *area)
1657 {
1658     struct domain *d = v->domain;
1659     void *map;
1660     struct page_info *pg;
1661 
1662     if ( v != current )
1663         ASSERT(atomic_read(&v->pause_count) | atomic_read(&d->pause_count));
1664 
1665     domain_lock(d);
1666     map = area->map;
1667     if ( area == &v->vcpu_info_area )
1668         vcpu_info_reset(v);
1669     else
1670         area->map = NULL;
1671     pg = area->pg;
1672     area->pg = NULL;
1673     domain_unlock(d);
1674 
1675     if ( pg )
1676     {
1677         unmap_domain_page_global((void *)((unsigned long)map & PAGE_MASK));
1678         put_page_and_type(pg);
1679     }
1680 }
1681 
default_initialise_vcpu(struct vcpu * v,XEN_GUEST_HANDLE_PARAM (void)arg)1682 int default_initialise_vcpu(struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
1683 {
1684     struct vcpu_guest_context *ctxt;
1685     struct domain *d = v->domain;
1686     int rc;
1687 
1688     if ( (ctxt = alloc_vcpu_guest_context()) == NULL )
1689         return -ENOMEM;
1690 
1691     if ( copy_from_guest(ctxt, arg, 1) )
1692     {
1693         free_vcpu_guest_context(ctxt);
1694         return -EFAULT;
1695     }
1696 
1697     domain_lock(d);
1698     rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, ctxt);
1699     domain_unlock(d);
1700 
1701     free_vcpu_guest_context(ctxt);
1702 
1703     return rc;
1704 }
1705 
1706 /* Update per-VCPU guest runstate shared memory area (if registered). */
update_runstate_area(struct vcpu * v)1707 bool update_runstate_area(struct vcpu *v)
1708 {
1709     bool rc;
1710     struct guest_memory_policy policy = { };
1711     void __user *guest_handle = NULL;
1712     struct vcpu_runstate_info runstate = v->runstate;
1713     struct vcpu_runstate_info *map = v->runstate_guest_area.map;
1714 
1715     if ( map )
1716     {
1717         uint64_t *pset;
1718 #ifdef CONFIG_COMPAT
1719         struct compat_vcpu_runstate_info *cmap = NULL;
1720 
1721         if ( v->runstate_guest_area_compat )
1722             cmap = (void *)map;
1723 #endif
1724 
1725         /*
1726          * NB: No VM_ASSIST(v->domain, runstate_update_flag) check here.
1727          *     Always using that updating model.
1728          */
1729 #ifdef CONFIG_COMPAT
1730         if ( cmap )
1731             pset = &cmap->state_entry_time;
1732         else
1733 #endif
1734             pset = &map->state_entry_time;
1735         runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
1736         write_atomic(pset, runstate.state_entry_time);
1737         smp_wmb();
1738 
1739 #ifdef CONFIG_COMPAT
1740         if ( cmap )
1741             XLAT_vcpu_runstate_info(cmap, &runstate);
1742         else
1743 #endif
1744             *map = runstate;
1745 
1746         smp_wmb();
1747         runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
1748         write_atomic(pset, runstate.state_entry_time);
1749 
1750         return true;
1751     }
1752 
1753     if ( guest_handle_is_null(runstate_guest(v)) )
1754         return true;
1755 
1756     update_guest_memory_policy(v, &policy);
1757 
1758     if ( VM_ASSIST(v->domain, runstate_update_flag) )
1759     {
1760 #ifdef CONFIG_COMPAT
1761         guest_handle = has_32bit_shinfo(v->domain)
1762             ? &v->runstate_guest.compat.p->state_entry_time + 1
1763             : &v->runstate_guest.native.p->state_entry_time + 1;
1764 #else
1765         guest_handle = &v->runstate_guest.p->state_entry_time + 1;
1766 #endif
1767         guest_handle--;
1768         runstate.state_entry_time |= XEN_RUNSTATE_UPDATE;
1769         __raw_copy_to_guest(guest_handle,
1770                             (void *)(&runstate.state_entry_time + 1) - 1, 1);
1771         smp_wmb();
1772     }
1773 
1774 #ifdef CONFIG_COMPAT
1775     if ( has_32bit_shinfo(v->domain) )
1776     {
1777         struct compat_vcpu_runstate_info info;
1778 
1779         XLAT_vcpu_runstate_info(&info, &runstate);
1780         __copy_to_guest(v->runstate_guest.compat, &info, 1);
1781         rc = true;
1782     }
1783     else
1784 #endif
1785         rc = __copy_to_guest(runstate_guest(v), &runstate, 1) !=
1786              sizeof(runstate);
1787 
1788     if ( guest_handle )
1789     {
1790         runstate.state_entry_time &= ~XEN_RUNSTATE_UPDATE;
1791         smp_wmb();
1792         __raw_copy_to_guest(guest_handle,
1793                             (void *)(&runstate.state_entry_time + 1) - 1, 1);
1794     }
1795 
1796     update_guest_memory_policy(v, &policy);
1797 
1798     return rc;
1799 }
1800 
1801 /*
1802  * This makes sure that the vcpu_info is always pointing at a valid piece of
1803  * memory, and it sets a pending event to make sure that a pending event
1804  * doesn't get missed.
1805  */
1806 static void cf_check
vcpu_info_populate(void * map,struct vcpu * v)1807 vcpu_info_populate(void *map, struct vcpu *v)
1808 {
1809     vcpu_info_t *info = map;
1810 
1811     if ( v->vcpu_info_area.map == &dummy_vcpu_info )
1812     {
1813         memset(info, 0, sizeof(*info));
1814 #ifdef XEN_HAVE_PV_UPCALL_MASK
1815         __vcpu_info(v, info, evtchn_upcall_mask) = 1;
1816 #endif
1817     }
1818     else
1819         memcpy(info, v->vcpu_info_area.map, sizeof(*info));
1820 }
1821 
1822 static void cf_check
runstate_area_populate(void * map,struct vcpu * v)1823 runstate_area_populate(void *map, struct vcpu *v)
1824 {
1825 #ifdef CONFIG_PV
1826     if ( is_pv_vcpu(v) )
1827         v->arch.pv.need_update_runstate_area = false;
1828 #endif
1829 
1830 #ifdef CONFIG_COMPAT
1831     v->runstate_guest_area_compat = false;
1832 #endif
1833 
1834     if ( v == current )
1835     {
1836         struct vcpu_runstate_info *info = map;
1837 
1838         *info = v->runstate;
1839     }
1840 }
1841 
common_vcpu_op(int cmd,struct vcpu * v,XEN_GUEST_HANDLE_PARAM (void)arg)1842 long common_vcpu_op(int cmd, struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
1843 {
1844     long rc = 0;
1845     struct domain *d = v->domain;
1846     unsigned int vcpuid = v->vcpu_id;
1847 
1848     switch ( cmd )
1849     {
1850     case VCPUOP_initialise:
1851         if ( is_pv_domain(d) && v->vcpu_info_area.map == &dummy_vcpu_info )
1852             return -EINVAL;
1853 
1854         rc = arch_initialise_vcpu(v, arg);
1855         if ( rc == -ERESTART )
1856             rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih",
1857                                                cmd, vcpuid, arg);
1858 
1859         break;
1860 
1861     case VCPUOP_up:
1862 #ifdef CONFIG_X86
1863         if ( pv_shim )
1864             rc = continue_hypercall_on_cpu(0, pv_shim_cpu_up, v);
1865         else
1866 #endif
1867         {
1868             bool wake = false;
1869 
1870             domain_lock(d);
1871             if ( !v->is_initialised )
1872                 rc = -EINVAL;
1873             else
1874                 wake = test_and_clear_bit(_VPF_down, &v->pause_flags);
1875             domain_unlock(d);
1876             if ( wake )
1877                 vcpu_wake(v);
1878         }
1879 
1880         break;
1881 
1882     case VCPUOP_down:
1883         for_each_vcpu ( d, v )
1884             if ( v->vcpu_id != vcpuid && !test_bit(_VPF_down, &v->pause_flags) )
1885             {
1886                rc = 1;
1887                break;
1888             }
1889 
1890         if ( !rc ) /* Last vcpu going down? */
1891         {
1892             domain_shutdown(d, SHUTDOWN_poweroff);
1893             break;
1894         }
1895 
1896         rc = 0;
1897         v = d->vcpu[vcpuid];
1898 
1899 #ifdef CONFIG_X86
1900         if ( pv_shim )
1901             rc = continue_hypercall_on_cpu(0, pv_shim_cpu_down, v);
1902         else
1903 #endif
1904             if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
1905                 vcpu_sleep_nosync(v);
1906 
1907         break;
1908 
1909     case VCPUOP_is_up:
1910         rc = !(v->pause_flags & VPF_down);
1911         break;
1912 
1913     case VCPUOP_get_runstate_info:
1914     {
1915         struct vcpu_runstate_info runstate;
1916         vcpu_runstate_get(v, &runstate);
1917         if ( copy_to_guest(arg, &runstate, 1) )
1918             rc = -EFAULT;
1919         break;
1920     }
1921 
1922     case VCPUOP_set_periodic_timer:
1923     {
1924         struct vcpu_set_periodic_timer set;
1925 
1926         if ( copy_from_guest(&set, arg, 1) )
1927             return -EFAULT;
1928 
1929         if ( set.period_ns < MILLISECS(1) )
1930             return -EINVAL;
1931 
1932         if ( set.period_ns > STIME_DELTA_MAX )
1933             return -EINVAL;
1934 
1935         vcpu_set_periodic_timer(v, set.period_ns);
1936 
1937         break;
1938     }
1939 
1940     case VCPUOP_stop_periodic_timer:
1941         vcpu_set_periodic_timer(v, 0);
1942         break;
1943 
1944     case VCPUOP_set_singleshot_timer:
1945     {
1946         struct vcpu_set_singleshot_timer set;
1947 
1948         if ( v != current )
1949             return -EINVAL;
1950 
1951         if ( copy_from_guest(&set, arg, 1) )
1952             return -EFAULT;
1953 
1954         if ( set.timeout_abs_ns < NOW() )
1955         {
1956             /*
1957              * Simplify the logic if the timeout has already expired and just
1958              * inject the event.
1959              */
1960             stop_timer(&v->singleshot_timer);
1961             send_timer_event(v);
1962             break;
1963         }
1964 
1965         migrate_timer(&v->singleshot_timer, smp_processor_id());
1966         set_timer(&v->singleshot_timer, set.timeout_abs_ns);
1967 
1968         break;
1969     }
1970 
1971     case VCPUOP_stop_singleshot_timer:
1972         if ( v != current )
1973             return -EINVAL;
1974 
1975         stop_timer(&v->singleshot_timer);
1976 
1977         break;
1978 
1979     case VCPUOP_register_vcpu_info:
1980     {
1981         struct vcpu_register_vcpu_info info;
1982         paddr_t gaddr;
1983 
1984         rc = -EFAULT;
1985         if ( copy_from_guest(&info, arg, 1) )
1986             break;
1987 
1988         rc = -EINVAL;
1989         gaddr = gfn_to_gaddr(_gfn(info.mfn)) + info.offset;
1990         if ( !~gaddr ||
1991              gfn_x(gaddr_to_gfn(gaddr)) != info.mfn )
1992             break;
1993 
1994         /* Preliminary check only; see map_guest_area(). */
1995         rc = -EBUSY;
1996         if ( v->vcpu_info_area.pg )
1997             break;
1998 
1999         /* See the BUILD_BUG_ON() in vcpu_info_populate(). */
2000         rc = map_guest_area(v, gaddr, sizeof(vcpu_info_t),
2001                             &v->vcpu_info_area, vcpu_info_populate);
2002         if ( rc == -ERESTART )
2003             rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih",
2004                                                cmd, vcpuid, arg);
2005 
2006         break;
2007     }
2008 
2009     case VCPUOP_register_runstate_memory_area:
2010     {
2011         struct vcpu_register_runstate_memory_area area;
2012         struct vcpu_runstate_info runstate;
2013 
2014         rc = -EFAULT;
2015         if ( copy_from_guest(&area, arg, 1) )
2016             break;
2017 
2018         if ( !guest_handle_okay(area.addr.h, 1) )
2019             break;
2020 
2021         rc = 0;
2022         runstate_guest(v) = area.addr.h;
2023 
2024         if ( v == current )
2025         {
2026             __copy_to_guest(runstate_guest(v), &v->runstate, 1);
2027         }
2028         else
2029         {
2030             vcpu_runstate_get(v, &runstate);
2031             __copy_to_guest(runstate_guest(v), &runstate, 1);
2032         }
2033 
2034         break;
2035     }
2036 
2037     case VCPUOP_register_runstate_phys_area:
2038     {
2039         struct vcpu_register_runstate_memory_area area;
2040 
2041         rc = -ENOSYS;
2042         if ( 0 /* TODO: Dom's XENFEAT_runstate_phys_area setting */ )
2043             break;
2044 
2045         rc = -EFAULT;
2046         if ( copy_from_guest(&area.addr.p, arg, 1) )
2047             break;
2048 
2049         rc = map_guest_area(v, area.addr.p,
2050                             sizeof(struct vcpu_runstate_info),
2051                             &v->runstate_guest_area,
2052                             runstate_area_populate);
2053         if ( rc == -ERESTART )
2054             rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iih",
2055                                                cmd, vcpuid, arg);
2056 
2057         break;
2058     }
2059 
2060     default:
2061         rc = -ENOSYS;
2062         break;
2063     }
2064 
2065     return rc;
2066 }
2067 
2068 #ifdef arch_vm_assist_valid_mask
do_vm_assist(unsigned int cmd,unsigned int type)2069 long do_vm_assist(unsigned int cmd, unsigned int type)
2070 {
2071     struct domain *currd = current->domain;
2072     const unsigned long valid = arch_vm_assist_valid_mask(currd);
2073 
2074     if ( type >= BITS_PER_LONG || !test_bit(type, &valid) )
2075         return -EINVAL;
2076 
2077     switch ( cmd )
2078     {
2079     case VMASST_CMD_enable:
2080         set_bit(type, &currd->vm_assist);
2081         return 0;
2082 
2083     case VMASST_CMD_disable:
2084         clear_bit(type, &currd->vm_assist);
2085         return 0;
2086     }
2087 
2088     return -ENOSYS;
2089 }
2090 #endif
2091 
2092 #ifdef CONFIG_HAS_PIRQ
2093 
pirq_get_info(struct domain * d,int pirq)2094 struct pirq *pirq_get_info(struct domain *d, int pirq)
2095 {
2096     struct pirq *info = pirq_info(d, pirq);
2097 
2098     if ( !info && (info = alloc_pirq_struct(d)) != NULL )
2099     {
2100         info->pirq = pirq;
2101         if ( radix_tree_insert(&d->pirq_tree, pirq, info) )
2102         {
2103             free_pirq_struct(info);
2104             info = NULL;
2105         }
2106     }
2107 
2108     return info;
2109 }
2110 
_free_pirq_struct(struct rcu_head * head)2111 static void cf_check _free_pirq_struct(struct rcu_head *head)
2112 {
2113     xfree(container_of(head, struct pirq, rcu_head));
2114 }
2115 
free_pirq_struct(void * ptr)2116 void cf_check free_pirq_struct(void *ptr)
2117 {
2118     struct pirq *pirq = ptr;
2119 
2120     call_rcu(&pirq->rcu_head, _free_pirq_struct);
2121 }
2122 
2123 #endif /* CONFIG_HAS_PIRQ */
2124 
2125 struct migrate_info {
2126     long (*func)(void *data);
2127     void *data;
2128     struct vcpu *vcpu;
2129     unsigned int cpu;
2130     unsigned int nest;
2131 };
2132 
2133 static DEFINE_PER_CPU(struct migrate_info *, continue_info);
2134 
continue_hypercall_tasklet_handler(void * data)2135 static void cf_check continue_hypercall_tasklet_handler(void *data)
2136 {
2137     struct migrate_info *info = data;
2138     struct vcpu *v = info->vcpu;
2139     long res = -EINVAL;
2140 
2141     /* Wait for vcpu to sleep so that we can access its register state. */
2142     vcpu_sleep_sync(v);
2143 
2144     this_cpu(continue_info) = info;
2145 
2146     if ( likely(info->cpu == smp_processor_id()) )
2147         res = info->func(info->data);
2148 
2149     arch_hypercall_tasklet_result(v, res);
2150 
2151     this_cpu(continue_info) = NULL;
2152 
2153     if ( info->nest-- == 0 )
2154     {
2155         xfree(info);
2156         vcpu_unpause(v);
2157         put_domain(v->domain);
2158     }
2159 }
2160 
continue_hypercall_on_cpu(unsigned int cpu,long (* func)(void * data),void * data)2161 int continue_hypercall_on_cpu(
2162     unsigned int cpu, long (*func)(void *data), void *data)
2163 {
2164     struct migrate_info *info;
2165 
2166     if ( (cpu >= nr_cpu_ids) || !cpu_online(cpu) )
2167         return -EINVAL;
2168 
2169     info = this_cpu(continue_info);
2170     if ( info == NULL )
2171     {
2172         struct vcpu *curr = current;
2173 
2174         info = xmalloc(struct migrate_info);
2175         if ( info == NULL )
2176             return -ENOMEM;
2177 
2178         info->vcpu = curr;
2179         info->nest = 0;
2180 
2181         tasklet_kill(&curr->continue_hypercall_tasklet);
2182         tasklet_init(&curr->continue_hypercall_tasklet,
2183                      continue_hypercall_tasklet_handler, info);
2184 
2185         get_knownalive_domain(curr->domain);
2186         vcpu_pause_nosync(curr);
2187     }
2188     else
2189     {
2190         BUG_ON(info->nest != 0);
2191         info->nest++;
2192     }
2193 
2194     info->func = func;
2195     info->data = data;
2196     info->cpu  = cpu;
2197 
2198     tasklet_schedule_on_cpu(&info->vcpu->continue_hypercall_tasklet, cpu);
2199 
2200     /* Dummy return value will be overwritten by tasklet. */
2201     return 0;
2202 }
2203 
2204 /*
2205  * Local variables:
2206  * mode: C
2207  * c-file-style: "BSD"
2208  * c-basic-offset: 4
2209  * tab-width: 4
2210  * indent-tabs-mode: nil
2211  * End:
2212  */
2213