1 /******************************************************************************
2  * domain.c
3  *
4  * Generic domain-handling functions.
5  */
6 
7 #include <xen/compat.h>
8 #include <xen/init.h>
9 #include <xen/lib.h>
10 #include <xen/ctype.h>
11 #include <xen/err.h>
12 #include <xen/sched.h>
13 #include <xen/sched-if.h>
14 #include <xen/domain.h>
15 #include <xen/mm.h>
16 #include <xen/event.h>
17 #include <xen/vm_event.h>
18 #include <xen/time.h>
19 #include <xen/console.h>
20 #include <xen/softirq.h>
21 #include <xen/tasklet.h>
22 #include <xen/domain_page.h>
23 #include <xen/rangeset.h>
24 #include <xen/guest_access.h>
25 #include <xen/hypercall.h>
26 #include <xen/delay.h>
27 #include <xen/shutdown.h>
28 #include <xen/percpu.h>
29 #include <xen/multicall.h>
30 #include <xen/rcupdate.h>
31 #include <xen/wait.h>
32 #include <xen/grant_table.h>
33 #include <xen/xenoprof.h>
34 #include <xen/irq.h>
35 #include <asm/debugger.h>
36 #include <asm/p2m.h>
37 #include <asm/processor.h>
38 #include <public/sched.h>
39 #include <public/sysctl.h>
40 #include <public/vcpu.h>
41 #include <xsm/xsm.h>
42 #include <xen/trace.h>
43 #include <xen/tmem.h>
44 #include <asm/setup.h>
45 
46 #ifdef CONFIG_X86
47 #include <asm/guest.h>
48 #endif
49 
50 /* Linux config option: propageted to domain0 */
51 /* xen_processor_pmbits: xen control Cx, Px, ... */
52 unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX;
53 
54 /* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */
55 bool_t opt_dom0_vcpus_pin;
56 boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin);
57 
58 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
59 DEFINE_SPINLOCK(domlist_update_lock);
60 DEFINE_RCU_READ_LOCK(domlist_read_lock);
61 
62 #define DOMAIN_HASH_SIZE 256
63 #define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
64 static struct domain *domain_hash[DOMAIN_HASH_SIZE];
65 struct domain *domain_list;
66 
67 struct domain *hardware_domain __read_mostly;
68 
69 #ifdef CONFIG_LATE_HWDOM
70 domid_t hardware_domid __read_mostly;
71 integer_param("hardware_dom", hardware_domid);
72 #endif
73 
74 struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
75 
76 vcpu_info_t dummy_vcpu_info;
77 
__domain_finalise_shutdown(struct domain * d)78 static void __domain_finalise_shutdown(struct domain *d)
79 {
80     struct vcpu *v;
81 
82     BUG_ON(!spin_is_locked(&d->shutdown_lock));
83 
84     if ( d->is_shut_down )
85         return;
86 
87     for_each_vcpu ( d, v )
88         if ( !v->paused_for_shutdown )
89             return;
90 
91     d->is_shut_down = 1;
92     if ( (d->shutdown_code == SHUTDOWN_suspend) && d->suspend_evtchn )
93         evtchn_send(d, d->suspend_evtchn);
94     else
95         send_global_virq(VIRQ_DOM_EXC);
96 }
97 
vcpu_check_shutdown(struct vcpu * v)98 static void vcpu_check_shutdown(struct vcpu *v)
99 {
100     struct domain *d = v->domain;
101 
102     spin_lock(&d->shutdown_lock);
103 
104     if ( d->is_shutting_down )
105     {
106         if ( !v->paused_for_shutdown )
107             vcpu_pause_nosync(v);
108         v->paused_for_shutdown = 1;
109         v->defer_shutdown = 0;
110         __domain_finalise_shutdown(d);
111     }
112 
113     spin_unlock(&d->shutdown_lock);
114 }
115 
vcpu_info_reset(struct vcpu * v)116 static void vcpu_info_reset(struct vcpu *v)
117 {
118     struct domain *d = v->domain;
119 
120     v->vcpu_info = ((v->vcpu_id < XEN_LEGACY_MAX_VCPUS)
121                     ? (vcpu_info_t *)&shared_info(d, vcpu_info[v->vcpu_id])
122                     : &dummy_vcpu_info);
123     v->vcpu_info_mfn = INVALID_MFN;
124 }
125 
alloc_vcpu(struct domain * d,unsigned int vcpu_id,unsigned int cpu_id)126 struct vcpu *alloc_vcpu(
127     struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
128 {
129     struct vcpu *v;
130 
131     BUG_ON((!is_idle_domain(d) || vcpu_id) && d->vcpu[vcpu_id]);
132 
133     if ( (v = alloc_vcpu_struct()) == NULL )
134         return NULL;
135 
136     v->domain = d;
137     v->vcpu_id = vcpu_id;
138 
139     spin_lock_init(&v->virq_lock);
140 
141     tasklet_init(&v->continue_hypercall_tasklet, NULL, 0);
142 
143     grant_table_init_vcpu(v);
144 
145     if ( !zalloc_cpumask_var(&v->cpu_hard_affinity) ||
146          !zalloc_cpumask_var(&v->cpu_hard_affinity_tmp) ||
147          !zalloc_cpumask_var(&v->cpu_hard_affinity_saved) ||
148          !zalloc_cpumask_var(&v->cpu_soft_affinity) ||
149          !zalloc_cpumask_var(&v->vcpu_dirty_cpumask) )
150         goto fail_free;
151 
152     if ( is_idle_domain(d) )
153     {
154         v->runstate.state = RUNSTATE_running;
155     }
156     else
157     {
158         v->runstate.state = RUNSTATE_offline;
159         v->runstate.state_entry_time = NOW();
160         set_bit(_VPF_down, &v->pause_flags);
161         vcpu_info_reset(v);
162         init_waitqueue_vcpu(v);
163     }
164 
165     if ( sched_init_vcpu(v, cpu_id) != 0 )
166         goto fail_wq;
167 
168     if ( vcpu_initialise(v) != 0 )
169     {
170         sched_destroy_vcpu(v);
171  fail_wq:
172         destroy_waitqueue_vcpu(v);
173  fail_free:
174         free_cpumask_var(v->cpu_hard_affinity);
175         free_cpumask_var(v->cpu_hard_affinity_tmp);
176         free_cpumask_var(v->cpu_hard_affinity_saved);
177         free_cpumask_var(v->cpu_soft_affinity);
178         free_cpumask_var(v->vcpu_dirty_cpumask);
179         free_vcpu_struct(v);
180         return NULL;
181     }
182 
183     d->vcpu[vcpu_id] = v;
184     if ( vcpu_id != 0 )
185     {
186         int prev_id = v->vcpu_id - 1;
187         while ( (prev_id >= 0) && (d->vcpu[prev_id] == NULL) )
188             prev_id--;
189         BUG_ON(prev_id < 0);
190         v->next_in_list = d->vcpu[prev_id]->next_in_list;
191         d->vcpu[prev_id]->next_in_list = v;
192     }
193 
194     /* Must be called after making new vcpu visible to for_each_vcpu(). */
195     vcpu_check_shutdown(v);
196 
197     if ( !is_idle_domain(d) )
198         domain_update_node_affinity(d);
199 
200     return v;
201 }
202 
late_hwdom_init(struct domain * d)203 static int late_hwdom_init(struct domain *d)
204 {
205 #ifdef CONFIG_LATE_HWDOM
206     struct domain *dom0;
207     int rv;
208 
209     if ( d != hardware_domain || d->domain_id == 0 )
210         return 0;
211 
212     rv = xsm_init_hardware_domain(XSM_HOOK, d);
213     if ( rv )
214         return rv;
215 
216     printk("Initialising hardware domain %d\n", hardware_domid);
217 
218     dom0 = rcu_lock_domain_by_id(0);
219     ASSERT(dom0 != NULL);
220     /*
221      * Hardware resource ranges for domain 0 have been set up from
222      * various sources intended to restrict the hardware domain's
223      * access.  Apply these ranges to the actual hardware domain.
224      *
225      * Because the lists are being swapped, a side effect of this
226      * operation is that Domain 0's rangesets are cleared.  Since
227      * domain 0 should not be accessing the hardware when it constructs
228      * a hardware domain, this should not be a problem.  Both lists
229      * may be modified after this hypercall returns if a more complex
230      * device model is desired.
231      */
232     rangeset_swap(d->irq_caps, dom0->irq_caps);
233     rangeset_swap(d->iomem_caps, dom0->iomem_caps);
234 #ifdef CONFIG_X86
235     rangeset_swap(d->arch.ioport_caps, dom0->arch.ioport_caps);
236     setup_io_bitmap(d);
237     setup_io_bitmap(dom0);
238 #endif
239 
240     rcu_unlock_domain(dom0);
241 
242     iommu_hwdom_init(d);
243 
244     return rv;
245 #else
246     return 0;
247 #endif
248 }
249 
250 static unsigned int __read_mostly extra_hwdom_irqs;
251 static unsigned int __read_mostly extra_domU_irqs = 32;
252 
parse_extra_guest_irqs(const char * s)253 static int __init parse_extra_guest_irqs(const char *s)
254 {
255     if ( isdigit(*s) )
256         extra_domU_irqs = simple_strtoul(s, &s, 0);
257     if ( *s == ',' && isdigit(*++s) )
258         extra_hwdom_irqs = simple_strtoul(s, &s, 0);
259 
260     return *s ? -EINVAL : 0;
261 }
262 custom_param("extra_guest_irqs", parse_extra_guest_irqs);
263 
domain_create(domid_t domid,unsigned int domcr_flags,uint32_t ssidref,struct xen_arch_domainconfig * config)264 struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
265                              uint32_t ssidref,
266                              struct xen_arch_domainconfig *config)
267 {
268     struct domain *d, **pd, *old_hwdom = NULL;
269     enum { INIT_xsm = 1u<<0, INIT_watchdog = 1u<<1, INIT_rangeset = 1u<<2,
270            INIT_evtchn = 1u<<3, INIT_gnttab = 1u<<4, INIT_arch = 1u<<5 };
271     int err, init_status = 0;
272     int poolid = CPUPOOLID_NONE;
273 
274     if ( (d = alloc_domain_struct()) == NULL )
275         return ERR_PTR(-ENOMEM);
276 
277     d->domain_id = domid;
278 
279     TRACE_1D(TRC_DOM0_DOM_ADD, d->domain_id);
280 
281     lock_profile_register_struct(LOCKPROF_TYPE_PERDOM, d, domid, "Domain");
282 
283     if ( (err = xsm_alloc_security_domain(d)) != 0 )
284         goto fail;
285     init_status |= INIT_xsm;
286 
287     watchdog_domain_init(d);
288     init_status |= INIT_watchdog;
289 
290     atomic_set(&d->refcnt, 1);
291     spin_lock_init_prof(d, domain_lock);
292     spin_lock_init_prof(d, page_alloc_lock);
293     spin_lock_init(&d->hypercall_deadlock_mutex);
294     INIT_PAGE_LIST_HEAD(&d->page_list);
295     INIT_PAGE_LIST_HEAD(&d->xenpage_list);
296 
297     spin_lock_init(&d->node_affinity_lock);
298     d->node_affinity = NODE_MASK_ALL;
299     d->auto_node_affinity = 1;
300 
301     spin_lock_init(&d->shutdown_lock);
302     d->shutdown_code = SHUTDOWN_CODE_INVALID;
303 
304     spin_lock_init(&d->pbuf_lock);
305 
306     rwlock_init(&d->vnuma_rwlock);
307 
308     err = -ENOMEM;
309     if ( !zalloc_cpumask_var(&d->domain_dirty_cpumask) )
310         goto fail;
311 
312     if ( domcr_flags & DOMCRF_hvm )
313         d->guest_type = guest_type_hvm;
314     else
315         d->guest_type = guest_type_pv;
316 
317     if ( domid == 0 || domid == hardware_domid )
318     {
319         if ( hardware_domid < 0 || hardware_domid >= DOMID_FIRST_RESERVED )
320             panic("The value of hardware_dom must be a valid domain ID");
321         d->is_pinned = opt_dom0_vcpus_pin;
322         d->disable_migrate = 1;
323         old_hwdom = hardware_domain;
324         hardware_domain = d;
325     }
326 
327     if ( domcr_flags & DOMCRF_xs_domain )
328     {
329         d->is_xenstore = 1;
330         d->disable_migrate = 1;
331     }
332 
333     rangeset_domain_initialise(d);
334     init_status |= INIT_rangeset;
335 
336     d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
337     d->irq_caps   = rangeset_new(d, "Interrupts", 0);
338     if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) )
339         goto fail;
340 
341     if ( domcr_flags & DOMCRF_dummy )
342         return d;
343 
344     if ( !is_idle_domain(d) )
345     {
346         if ( (err = xsm_domain_create(XSM_HOOK, d, ssidref)) != 0 )
347             goto fail;
348 
349         d->controller_pause_count = 1;
350         atomic_inc(&d->pause_count);
351 
352         if ( !is_hardware_domain(d) )
353             d->nr_pirqs = nr_static_irqs + extra_domU_irqs;
354         else
355             d->nr_pirqs = extra_hwdom_irqs ? nr_static_irqs + extra_hwdom_irqs
356                                            : arch_hwdom_irqs(domid);
357         if ( d->nr_pirqs > nr_irqs )
358             d->nr_pirqs = nr_irqs;
359 
360         radix_tree_init(&d->pirq_tree);
361 
362         if ( (err = evtchn_init(d)) != 0 )
363             goto fail;
364         init_status |= INIT_evtchn;
365 
366         if ( (err = grant_table_create(d)) != 0 )
367             goto fail;
368         init_status |= INIT_gnttab;
369 
370         poolid = 0;
371 
372         err = -ENOMEM;
373 
374         d->pbuf = xzalloc_array(char, DOMAIN_PBUF_SIZE);
375         if ( !d->pbuf )
376             goto fail;
377     }
378 
379     if ( (err = arch_domain_create(d, domcr_flags, config)) != 0 )
380         goto fail;
381     init_status |= INIT_arch;
382 
383     if ( (err = sched_init_domain(d, poolid)) != 0 )
384         goto fail;
385 
386     if ( (err = late_hwdom_init(d)) != 0 )
387         goto fail;
388 
389     if ( !is_idle_domain(d) )
390     {
391         spin_lock(&domlist_update_lock);
392         pd = &domain_list; /* NB. domain_list maintained in order of domid. */
393         for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
394             if ( (*pd)->domain_id > d->domain_id )
395                 break;
396         d->next_in_list = *pd;
397         d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
398         rcu_assign_pointer(*pd, d);
399         rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
400         spin_unlock(&domlist_update_lock);
401     }
402 
403     return d;
404 
405  fail:
406     d->is_dying = DOMDYING_dead;
407     if ( hardware_domain == d )
408         hardware_domain = old_hwdom;
409     atomic_set(&d->refcnt, DOMAIN_DESTROYED);
410     xfree(d->pbuf);
411     if ( init_status & INIT_arch )
412         arch_domain_destroy(d);
413     if ( init_status & INIT_gnttab )
414         grant_table_destroy(d);
415     if ( init_status & INIT_evtchn )
416     {
417         evtchn_destroy(d);
418         evtchn_destroy_final(d);
419         radix_tree_destroy(&d->pirq_tree, free_pirq_struct);
420     }
421     if ( init_status & INIT_rangeset )
422         rangeset_domain_destroy(d);
423     if ( init_status & INIT_watchdog )
424         watchdog_domain_destroy(d);
425     if ( init_status & INIT_xsm )
426         xsm_free_security_domain(d);
427     free_cpumask_var(d->domain_dirty_cpumask);
428     free_domain_struct(d);
429     return ERR_PTR(err);
430 }
431 
432 
domain_update_node_affinity(struct domain * d)433 void domain_update_node_affinity(struct domain *d)
434 {
435     cpumask_var_t dom_cpumask, dom_cpumask_soft;
436     cpumask_t *dom_affinity;
437     const cpumask_t *online;
438     struct vcpu *v;
439     unsigned int cpu;
440 
441     /* Do we have vcpus already? If not, no need to update node-affinity. */
442     if ( !d->vcpu || !d->vcpu[0] )
443         return;
444 
445     if ( !zalloc_cpumask_var(&dom_cpumask) )
446         return;
447     if ( !zalloc_cpumask_var(&dom_cpumask_soft) )
448     {
449         free_cpumask_var(dom_cpumask);
450         return;
451     }
452 
453     online = cpupool_domain_cpumask(d);
454 
455     spin_lock(&d->node_affinity_lock);
456 
457     /*
458      * If d->auto_node_affinity is true, let's compute the domain's
459      * node-affinity and update d->node_affinity accordingly. if false,
460      * just leave d->auto_node_affinity alone.
461      */
462     if ( d->auto_node_affinity )
463     {
464         /*
465          * We want the narrowest possible set of pcpus (to get the narowest
466          * possible set of nodes). What we need is the cpumask of where the
467          * domain can run (the union of the hard affinity of all its vcpus),
468          * and the full mask of where it would prefer to run (the union of
469          * the soft affinity of all its various vcpus). Let's build them.
470          */
471         for_each_vcpu ( d, v )
472         {
473             cpumask_or(dom_cpumask, dom_cpumask, v->cpu_hard_affinity);
474             cpumask_or(dom_cpumask_soft, dom_cpumask_soft,
475                        v->cpu_soft_affinity);
476         }
477         /* Filter out non-online cpus */
478         cpumask_and(dom_cpumask, dom_cpumask, online);
479         ASSERT(!cpumask_empty(dom_cpumask));
480         /* And compute the intersection between hard, online and soft */
481         cpumask_and(dom_cpumask_soft, dom_cpumask_soft, dom_cpumask);
482 
483         /*
484          * If not empty, the intersection of hard, soft and online is the
485          * narrowest set we want. If empty, we fall back to hard&online.
486          */
487         dom_affinity = cpumask_empty(dom_cpumask_soft) ?
488                            dom_cpumask : dom_cpumask_soft;
489 
490         nodes_clear(d->node_affinity);
491         for_each_cpu ( cpu, dom_affinity )
492             node_set(cpu_to_node(cpu), d->node_affinity);
493     }
494 
495     spin_unlock(&d->node_affinity_lock);
496 
497     free_cpumask_var(dom_cpumask_soft);
498     free_cpumask_var(dom_cpumask);
499 }
500 
501 
domain_set_node_affinity(struct domain * d,const nodemask_t * affinity)502 int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity)
503 {
504     /* Being affine with no nodes is just wrong */
505     if ( nodes_empty(*affinity) )
506         return -EINVAL;
507 
508     spin_lock(&d->node_affinity_lock);
509 
510     /*
511      * Being/becoming explicitly affine to all nodes is not particularly
512      * useful. Let's take it as the `reset node affinity` command.
513      */
514     if ( nodes_full(*affinity) )
515     {
516         d->auto_node_affinity = 1;
517         goto out;
518     }
519 
520     d->auto_node_affinity = 0;
521     d->node_affinity = *affinity;
522 
523 out:
524     spin_unlock(&d->node_affinity_lock);
525 
526     domain_update_node_affinity(d);
527 
528     return 0;
529 }
530 
531 
get_domain_by_id(domid_t dom)532 struct domain *get_domain_by_id(domid_t dom)
533 {
534     struct domain *d;
535 
536     rcu_read_lock(&domlist_read_lock);
537 
538     for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
539           d != NULL;
540           d = rcu_dereference(d->next_in_hashbucket) )
541     {
542         if ( d->domain_id == dom )
543         {
544             if ( unlikely(!get_domain(d)) )
545                 d = NULL;
546             break;
547         }
548     }
549 
550     rcu_read_unlock(&domlist_read_lock);
551 
552     return d;
553 }
554 
555 
rcu_lock_domain_by_id(domid_t dom)556 struct domain *rcu_lock_domain_by_id(domid_t dom)
557 {
558     struct domain *d = NULL;
559 
560     rcu_read_lock(&domlist_read_lock);
561 
562     for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
563           d != NULL;
564           d = rcu_dereference(d->next_in_hashbucket) )
565     {
566         if ( d->domain_id == dom )
567         {
568             rcu_lock_domain(d);
569             break;
570         }
571     }
572 
573     rcu_read_unlock(&domlist_read_lock);
574 
575     return d;
576 }
577 
rcu_lock_domain_by_any_id(domid_t dom)578 struct domain *rcu_lock_domain_by_any_id(domid_t dom)
579 {
580     if ( dom == DOMID_SELF )
581         return rcu_lock_current_domain();
582     return rcu_lock_domain_by_id(dom);
583 }
584 
rcu_lock_remote_domain_by_id(domid_t dom,struct domain ** d)585 int rcu_lock_remote_domain_by_id(domid_t dom, struct domain **d)
586 {
587     if ( (*d = rcu_lock_domain_by_id(dom)) == NULL )
588         return -ESRCH;
589 
590     if ( *d == current->domain )
591     {
592         rcu_unlock_domain(*d);
593         return -EPERM;
594     }
595 
596     return 0;
597 }
598 
rcu_lock_live_remote_domain_by_id(domid_t dom,struct domain ** d)599 int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
600 {
601     int rv;
602     rv = rcu_lock_remote_domain_by_id(dom, d);
603     if ( rv )
604         return rv;
605     if ( (*d)->is_dying )
606     {
607         rcu_unlock_domain(*d);
608         return -EINVAL;
609     }
610 
611     return 0;
612 }
613 
domain_kill(struct domain * d)614 int domain_kill(struct domain *d)
615 {
616     int rc = 0;
617     struct vcpu *v;
618 
619     if ( d == current->domain )
620         return -EINVAL;
621 
622     /* Protected by domctl_lock. */
623     switch ( d->is_dying )
624     {
625     case DOMDYING_alive:
626         domain_pause(d);
627         d->is_dying = DOMDYING_dying;
628         spin_barrier(&d->domain_lock);
629         evtchn_destroy(d);
630         gnttab_release_mappings(d);
631         tmem_destroy(d->tmem_client);
632         vnuma_destroy(d->vnuma);
633         domain_set_outstanding_pages(d, 0);
634         d->tmem_client = NULL;
635         /* fallthrough */
636     case DOMDYING_dying:
637         rc = domain_relinquish_resources(d);
638         if ( rc != 0 )
639             break;
640         if ( cpupool_move_domain(d, cpupool0) )
641             return -ERESTART;
642         for_each_vcpu ( d, v )
643             unmap_vcpu_info(v);
644         d->is_dying = DOMDYING_dead;
645         /* Mem event cleanup has to go here because the rings
646          * have to be put before we call put_domain. */
647         vm_event_cleanup(d);
648         put_domain(d);
649         send_global_virq(VIRQ_DOM_EXC);
650         /* fallthrough */
651     case DOMDYING_dead:
652         break;
653     }
654 
655     return rc;
656 }
657 
658 
__domain_crash(struct domain * d)659 void __domain_crash(struct domain *d)
660 {
661     if ( d->is_shutting_down )
662     {
663         /* Print nothing: the domain is already shutting down. */
664     }
665     else if ( d == current->domain )
666     {
667         printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
668                d->domain_id, current->vcpu_id, smp_processor_id());
669         show_execution_state(guest_cpu_user_regs());
670     }
671     else
672     {
673         printk("Domain %d reported crashed by domain %d on cpu#%d:\n",
674                d->domain_id, current->domain->domain_id, smp_processor_id());
675     }
676 
677     domain_shutdown(d, SHUTDOWN_crash);
678 }
679 
680 
__domain_crash_synchronous(void)681 void __domain_crash_synchronous(void)
682 {
683     __domain_crash(current->domain);
684 
685     vcpu_end_shutdown_deferral(current);
686 
687     for ( ; ; )
688         do_softirq();
689 }
690 
691 
domain_shutdown(struct domain * d,u8 reason)692 int domain_shutdown(struct domain *d, u8 reason)
693 {
694     struct vcpu *v;
695 
696 #ifdef CONFIG_X86
697     if ( pv_shim )
698         return pv_shim_shutdown(reason);
699 #endif
700 
701     spin_lock(&d->shutdown_lock);
702 
703     if ( d->shutdown_code == SHUTDOWN_CODE_INVALID )
704         d->shutdown_code = reason;
705     reason = d->shutdown_code;
706 
707     if ( is_hardware_domain(d) )
708         hwdom_shutdown(reason);
709 
710     if ( d->is_shutting_down )
711     {
712         spin_unlock(&d->shutdown_lock);
713         return 0;
714     }
715 
716     d->is_shutting_down = 1;
717 
718     smp_mb(); /* set shutdown status /then/ check for per-cpu deferrals */
719 
720     for_each_vcpu ( d, v )
721     {
722         if ( reason == SHUTDOWN_crash )
723             v->defer_shutdown = 0;
724         else if ( v->defer_shutdown )
725             continue;
726         vcpu_pause_nosync(v);
727         v->paused_for_shutdown = 1;
728     }
729 
730     arch_domain_shutdown(d);
731 
732     __domain_finalise_shutdown(d);
733 
734     spin_unlock(&d->shutdown_lock);
735 
736     return 0;
737 }
738 
domain_resume(struct domain * d)739 void domain_resume(struct domain *d)
740 {
741     struct vcpu *v;
742 
743     /*
744      * Some code paths assume that shutdown status does not get reset under
745      * their feet (e.g., some assertions make this assumption).
746      */
747     domain_pause(d);
748 
749     spin_lock(&d->shutdown_lock);
750 
751     d->is_shutting_down = d->is_shut_down = 0;
752     d->shutdown_code = SHUTDOWN_CODE_INVALID;
753 
754     for_each_vcpu ( d, v )
755     {
756         if ( v->paused_for_shutdown )
757             vcpu_unpause(v);
758         v->paused_for_shutdown = 0;
759     }
760 
761     spin_unlock(&d->shutdown_lock);
762 
763     domain_unpause(d);
764 }
765 
vcpu_start_shutdown_deferral(struct vcpu * v)766 int vcpu_start_shutdown_deferral(struct vcpu *v)
767 {
768     if ( v->defer_shutdown )
769         return 1;
770 
771     v->defer_shutdown = 1;
772     smp_mb(); /* set deferral status /then/ check for shutdown */
773     if ( unlikely(v->domain->is_shutting_down) )
774         vcpu_check_shutdown(v);
775 
776     return v->defer_shutdown;
777 }
778 
vcpu_end_shutdown_deferral(struct vcpu * v)779 void vcpu_end_shutdown_deferral(struct vcpu *v)
780 {
781     v->defer_shutdown = 0;
782     smp_mb(); /* clear deferral status /then/ check for shutdown */
783     if ( unlikely(v->domain->is_shutting_down) )
784         vcpu_check_shutdown(v);
785 }
786 
787 #ifdef CONFIG_HAS_GDBSX
domain_pause_for_debugger(void)788 void domain_pause_for_debugger(void)
789 {
790     struct vcpu *curr = current;
791     struct domain *d = curr->domain;
792 
793     domain_pause_by_systemcontroller_nosync(d);
794 
795     /* if gdbsx active, we just need to pause the domain */
796     if ( curr->arch.gdbsx_vcpu_event == 0 )
797         send_global_virq(VIRQ_DEBUGGER);
798 }
799 #endif
800 
801 /* Complete domain destroy after RCU readers are not holding old references. */
complete_domain_destroy(struct rcu_head * head)802 static void complete_domain_destroy(struct rcu_head *head)
803 {
804     struct domain *d = container_of(head, struct domain, rcu);
805     struct vcpu *v;
806     int i;
807 
808     /*
809      * Flush all state for the vCPU previously having run on the current CPU.
810      * This is in particular relevant for x86 HVM ones on VMX, so that this
811      * flushing of state won't happen from the TLB flush IPI handler behind
812      * the back of a vmx_vmcs_enter() / vmx_vmcs_exit() section.
813      */
814     sync_local_execstate();
815 
816     for ( i = d->max_vcpus - 1; i >= 0; i-- )
817     {
818         if ( (v = d->vcpu[i]) == NULL )
819             continue;
820         tasklet_kill(&v->continue_hypercall_tasklet);
821         vcpu_destroy(v);
822         sched_destroy_vcpu(v);
823         destroy_waitqueue_vcpu(v);
824     }
825 
826     grant_table_destroy(d);
827 
828     arch_domain_destroy(d);
829 
830     watchdog_domain_destroy(d);
831 
832     rangeset_domain_destroy(d);
833 
834     sched_destroy_domain(d);
835 
836     /* Free page used by xen oprofile buffer. */
837 #ifdef CONFIG_XENOPROF
838     free_xenoprof_pages(d);
839 #endif
840 
841 #ifdef CONFIG_HAS_MEM_PAGING
842     xfree(d->vm_event_paging);
843 #endif
844     xfree(d->vm_event_monitor);
845 #ifdef CONFIG_HAS_MEM_SHARING
846     xfree(d->vm_event_share);
847 #endif
848 
849     xfree(d->pbuf);
850 
851     for ( i = d->max_vcpus - 1; i >= 0; i-- )
852         if ( (v = d->vcpu[i]) != NULL )
853         {
854             free_cpumask_var(v->cpu_hard_affinity);
855             free_cpumask_var(v->cpu_hard_affinity_tmp);
856             free_cpumask_var(v->cpu_hard_affinity_saved);
857             free_cpumask_var(v->cpu_soft_affinity);
858             free_cpumask_var(v->vcpu_dirty_cpumask);
859             free_vcpu_struct(v);
860         }
861 
862     if ( d->target != NULL )
863         put_domain(d->target);
864 
865     evtchn_destroy_final(d);
866 
867     radix_tree_destroy(&d->pirq_tree, free_pirq_struct);
868 
869     xsm_free_security_domain(d);
870     free_cpumask_var(d->domain_dirty_cpumask);
871     xfree(d->vcpu);
872     free_domain_struct(d);
873 
874     send_global_virq(VIRQ_DOM_EXC);
875 }
876 
877 /* Release resources belonging to task @p. */
domain_destroy(struct domain * d)878 void domain_destroy(struct domain *d)
879 {
880     struct domain **pd;
881 
882     BUG_ON(!d->is_dying);
883 
884     /* May be already destroyed, or get_domain() can race us. */
885     if ( atomic_cmpxchg(&d->refcnt, 0, DOMAIN_DESTROYED) != 0 )
886         return;
887 
888     TRACE_1D(TRC_DOM0_DOM_REM, d->domain_id);
889 
890     /* Delete from task list and task hashtable. */
891     spin_lock(&domlist_update_lock);
892     pd = &domain_list;
893     while ( *pd != d )
894         pd = &(*pd)->next_in_list;
895     rcu_assign_pointer(*pd, d->next_in_list);
896     pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
897     while ( *pd != d )
898         pd = &(*pd)->next_in_hashbucket;
899     rcu_assign_pointer(*pd, d->next_in_hashbucket);
900     spin_unlock(&domlist_update_lock);
901 
902     /* Schedule RCU asynchronous completion of domain destroy. */
903     call_rcu(&d->rcu, complete_domain_destroy);
904 }
905 
vcpu_pause(struct vcpu * v)906 void vcpu_pause(struct vcpu *v)
907 {
908     ASSERT(v != current);
909     atomic_inc(&v->pause_count);
910     vcpu_sleep_sync(v);
911 }
912 
vcpu_pause_nosync(struct vcpu * v)913 void vcpu_pause_nosync(struct vcpu *v)
914 {
915     atomic_inc(&v->pause_count);
916     vcpu_sleep_nosync(v);
917 }
918 
vcpu_unpause(struct vcpu * v)919 void vcpu_unpause(struct vcpu *v)
920 {
921     if ( atomic_dec_and_test(&v->pause_count) )
922         vcpu_wake(v);
923 }
924 
vcpu_pause_by_systemcontroller(struct vcpu * v)925 int vcpu_pause_by_systemcontroller(struct vcpu *v)
926 {
927     int old, new, prev = v->controller_pause_count;
928 
929     do
930     {
931         old = prev;
932         new = old + 1;
933 
934         if ( new > 255 )
935             return -EOVERFLOW;
936 
937         prev = cmpxchg(&v->controller_pause_count, old, new);
938     } while ( prev != old );
939 
940     vcpu_pause(v);
941 
942     return 0;
943 }
944 
vcpu_unpause_by_systemcontroller(struct vcpu * v)945 int vcpu_unpause_by_systemcontroller(struct vcpu *v)
946 {
947     int old, new, prev = v->controller_pause_count;
948 
949     do
950     {
951         old = prev;
952         new = old - 1;
953 
954         if ( new < 0 )
955             return -EINVAL;
956 
957         prev = cmpxchg(&v->controller_pause_count, old, new);
958     } while ( prev != old );
959 
960     vcpu_unpause(v);
961 
962     return 0;
963 }
964 
do_domain_pause(struct domain * d,void (* sleep_fn)(struct vcpu * v))965 static void do_domain_pause(struct domain *d,
966                             void (*sleep_fn)(struct vcpu *v))
967 {
968     struct vcpu *v;
969 
970     atomic_inc(&d->pause_count);
971 
972     for_each_vcpu( d, v )
973         sleep_fn(v);
974 
975     arch_domain_pause(d);
976 }
977 
domain_pause(struct domain * d)978 void domain_pause(struct domain *d)
979 {
980     ASSERT(d != current->domain);
981     do_domain_pause(d, vcpu_sleep_sync);
982 }
983 
domain_pause_nosync(struct domain * d)984 void domain_pause_nosync(struct domain *d)
985 {
986     do_domain_pause(d, vcpu_sleep_nosync);
987 }
988 
domain_unpause(struct domain * d)989 void domain_unpause(struct domain *d)
990 {
991     struct vcpu *v;
992 
993     arch_domain_unpause(d);
994 
995     if ( atomic_dec_and_test(&d->pause_count) )
996         for_each_vcpu( d, v )
997             vcpu_wake(v);
998 }
999 
__domain_pause_by_systemcontroller(struct domain * d,void (* pause_fn)(struct domain * d))1000 int __domain_pause_by_systemcontroller(struct domain *d,
1001                                        void (*pause_fn)(struct domain *d))
1002 {
1003     int old, new, prev = d->controller_pause_count;
1004 
1005     do
1006     {
1007         old = prev;
1008         new = old + 1;
1009 
1010         /*
1011          * Limit the toolstack pause count to an arbitrary 255 to prevent the
1012          * toolstack overflowing d->pause_count with many repeated hypercalls.
1013          */
1014         if ( new > 255 )
1015             return -EOVERFLOW;
1016 
1017         prev = cmpxchg(&d->controller_pause_count, old, new);
1018     } while ( prev != old );
1019 
1020     pause_fn(d);
1021 
1022     return 0;
1023 }
1024 
domain_unpause_by_systemcontroller(struct domain * d)1025 int domain_unpause_by_systemcontroller(struct domain *d)
1026 {
1027     int old, new, prev = d->controller_pause_count;
1028 
1029     do
1030     {
1031         old = prev;
1032         new = old - 1;
1033 
1034         if ( new < 0 )
1035             return -EINVAL;
1036 
1037         prev = cmpxchg(&d->controller_pause_count, old, new);
1038     } while ( prev != old );
1039 
1040     /*
1041      * d->controller_pause_count is initialised to 1, and the toolstack is
1042      * responsible for making one unpause hypercall when it wishes the guest
1043      * to start running.
1044      *
1045      * All other toolstack operations should make a pair of pause/unpause
1046      * calls and rely on the reference counting here.
1047      *
1048      * Creation is considered finished when the controller reference count
1049      * first drops to 0.
1050      */
1051     if ( new == 0 )
1052         d->creation_finished = true;
1053 
1054     domain_unpause(d);
1055 
1056     return 0;
1057 }
1058 
domain_pause_except_self(struct domain * d)1059 void domain_pause_except_self(struct domain *d)
1060 {
1061     struct vcpu *v, *curr = current;
1062 
1063     if ( curr->domain == d )
1064     {
1065         for_each_vcpu( d, v )
1066             if ( likely(v != curr) )
1067                 vcpu_pause(v);
1068     }
1069     else
1070         domain_pause(d);
1071 }
1072 
domain_unpause_except_self(struct domain * d)1073 void domain_unpause_except_self(struct domain *d)
1074 {
1075     struct vcpu *v, *curr = current;
1076 
1077     if ( curr->domain == d )
1078     {
1079         for_each_vcpu( d, v )
1080             if ( likely(v != curr) )
1081                 vcpu_unpause(v);
1082     }
1083     else
1084         domain_unpause(d);
1085 }
1086 
domain_soft_reset(struct domain * d)1087 int domain_soft_reset(struct domain *d)
1088 {
1089     struct vcpu *v;
1090     int rc;
1091 
1092     spin_lock(&d->shutdown_lock);
1093     for_each_vcpu ( d, v )
1094         if ( !v->paused_for_shutdown )
1095         {
1096             spin_unlock(&d->shutdown_lock);
1097             return -EINVAL;
1098         }
1099     spin_unlock(&d->shutdown_lock);
1100 
1101     rc = evtchn_reset(d);
1102     if ( rc )
1103         return rc;
1104 
1105     grant_table_warn_active_grants(d);
1106 
1107     for_each_vcpu ( d, v )
1108     {
1109         set_xen_guest_handle(runstate_guest(v), NULL);
1110         unmap_vcpu_info(v);
1111     }
1112 
1113     rc = arch_domain_soft_reset(d);
1114     if ( !rc )
1115         domain_resume(d);
1116     else
1117         domain_crash(d);
1118 
1119     return rc;
1120 }
1121 
vcpu_reset(struct vcpu * v)1122 int vcpu_reset(struct vcpu *v)
1123 {
1124     struct domain *d = v->domain;
1125     int rc;
1126 
1127     vcpu_pause(v);
1128     domain_lock(d);
1129 
1130     set_bit(_VPF_in_reset, &v->pause_flags);
1131     rc = arch_vcpu_reset(v);
1132     if ( rc )
1133         goto out_unlock;
1134 
1135     set_bit(_VPF_down, &v->pause_flags);
1136 
1137     clear_bit(v->vcpu_id, d->poll_mask);
1138     v->poll_evtchn = 0;
1139 
1140     v->fpu_initialised = 0;
1141     v->fpu_dirtied     = 0;
1142     v->is_initialised  = 0;
1143 #ifdef VCPU_TRAP_LAST
1144     v->async_exception_mask = 0;
1145     memset(v->async_exception_state, 0, sizeof(v->async_exception_state));
1146 #endif
1147     cpumask_clear(v->cpu_hard_affinity_tmp);
1148     clear_bit(_VPF_blocked, &v->pause_flags);
1149     clear_bit(_VPF_in_reset, &v->pause_flags);
1150 
1151  out_unlock:
1152     domain_unlock(v->domain);
1153     vcpu_unpause(v);
1154 
1155     return rc;
1156 }
1157 
1158 /*
1159  * Map a guest page in and point the vcpu_info pointer at it.  This
1160  * makes sure that the vcpu_info is always pointing at a valid piece
1161  * of memory, and it sets a pending event to make sure that a pending
1162  * event doesn't get missed.
1163  */
map_vcpu_info(struct vcpu * v,unsigned long gfn,unsigned offset)1164 int map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned offset)
1165 {
1166     struct domain *d = v->domain;
1167     void *mapping;
1168     vcpu_info_t *new_info;
1169     struct page_info *page;
1170     int i;
1171 
1172     if ( offset > (PAGE_SIZE - sizeof(vcpu_info_t)) )
1173         return -EINVAL;
1174 
1175     if ( !mfn_eq(v->vcpu_info_mfn, INVALID_MFN) )
1176         return -EINVAL;
1177 
1178     /* Run this command on yourself or on other offline VCPUS. */
1179     if ( (v != current) && !(v->pause_flags & VPF_down) )
1180         return -EINVAL;
1181 
1182     page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
1183     if ( !page )
1184         return -EINVAL;
1185 
1186     if ( !get_page_type(page, PGT_writable_page) )
1187     {
1188         put_page(page);
1189         return -EINVAL;
1190     }
1191 
1192     mapping = __map_domain_page_global(page);
1193     if ( mapping == NULL )
1194     {
1195         put_page_and_type(page);
1196         return -ENOMEM;
1197     }
1198 
1199     new_info = (vcpu_info_t *)(mapping + offset);
1200 
1201     if ( v->vcpu_info == &dummy_vcpu_info )
1202     {
1203         memset(new_info, 0, sizeof(*new_info));
1204 #ifdef XEN_HAVE_PV_UPCALL_MASK
1205         __vcpu_info(v, new_info, evtchn_upcall_mask) = 1;
1206 #endif
1207     }
1208     else
1209     {
1210         memcpy(new_info, v->vcpu_info, sizeof(*new_info));
1211     }
1212 
1213     v->vcpu_info = new_info;
1214     v->vcpu_info_mfn = _mfn(page_to_mfn(page));
1215 
1216     /* Set new vcpu_info pointer /before/ setting pending flags. */
1217     smp_wmb();
1218 
1219     /*
1220      * Mark everything as being pending just to make sure nothing gets
1221      * lost.  The domain will get a spurious event, but it can cope.
1222      */
1223     vcpu_info(v, evtchn_upcall_pending) = 1;
1224     for ( i = 0; i < BITS_PER_EVTCHN_WORD(d); i++ )
1225         set_bit(i, &vcpu_info(v, evtchn_pending_sel));
1226     arch_evtchn_inject(v);
1227 
1228     return 0;
1229 }
1230 
1231 /*
1232  * Unmap the vcpu info page if the guest decided to place it somewhere
1233  * else. This is used from domain_kill() and domain_soft_reset().
1234  */
unmap_vcpu_info(struct vcpu * v)1235 void unmap_vcpu_info(struct vcpu *v)
1236 {
1237     mfn_t mfn = v->vcpu_info_mfn;
1238 
1239     if ( mfn_eq(mfn, INVALID_MFN) )
1240         return;
1241 
1242     unmap_domain_page_global((void *)
1243                              ((unsigned long)v->vcpu_info & PAGE_MASK));
1244 
1245     vcpu_info_reset(v); /* NB: Clobbers v->vcpu_info_mfn */
1246 
1247     put_page_and_type(mfn_to_page(mfn_x(mfn)));
1248 }
1249 
default_initialise_vcpu(struct vcpu * v,XEN_GUEST_HANDLE_PARAM (void)arg)1250 int default_initialise_vcpu(struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
1251 {
1252     struct vcpu_guest_context *ctxt;
1253     struct domain *d = v->domain;
1254     int rc;
1255 
1256     if ( (ctxt = alloc_vcpu_guest_context()) == NULL )
1257         return -ENOMEM;
1258 
1259     if ( copy_from_guest(ctxt, arg, 1) )
1260     {
1261         free_vcpu_guest_context(ctxt);
1262         return -EFAULT;
1263     }
1264 
1265     domain_lock(d);
1266     rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, ctxt);
1267     domain_unlock(d);
1268 
1269     free_vcpu_guest_context(ctxt);
1270 
1271     return rc;
1272 }
1273 
do_vcpu_op(int cmd,unsigned int vcpuid,XEN_GUEST_HANDLE_PARAM (void)arg)1274 long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
1275 {
1276     struct domain *d = current->domain;
1277     struct vcpu *v;
1278     long rc = 0;
1279 
1280     if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
1281         return -ENOENT;
1282 
1283     switch ( cmd )
1284     {
1285     case VCPUOP_initialise:
1286         if ( v->vcpu_info == &dummy_vcpu_info )
1287             return -EINVAL;
1288 
1289         rc = arch_initialise_vcpu(v, arg);
1290         if ( rc == -ERESTART )
1291             rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iuh",
1292                                                cmd, vcpuid, arg);
1293 
1294         break;
1295 
1296     case VCPUOP_up:
1297 #ifdef CONFIG_X86
1298         if ( pv_shim )
1299             rc = continue_hypercall_on_cpu(0, pv_shim_cpu_up, v);
1300         else
1301 #endif
1302         {
1303             bool wake = false;
1304 
1305             domain_lock(d);
1306             if ( !v->is_initialised )
1307                 rc = -EINVAL;
1308             else
1309                 wake = test_and_clear_bit(_VPF_down, &v->pause_flags);
1310             domain_unlock(d);
1311             if ( wake )
1312                 vcpu_wake(v);
1313         }
1314 
1315         break;
1316 
1317     case VCPUOP_down:
1318 #ifdef CONFIG_X86
1319         if ( pv_shim )
1320             rc = continue_hypercall_on_cpu(0, pv_shim_cpu_down, v);
1321         else
1322 #endif
1323             if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
1324                 vcpu_sleep_nosync(v);
1325 
1326         break;
1327 
1328     case VCPUOP_is_up:
1329         rc = !(v->pause_flags & VPF_down);
1330         break;
1331 
1332     case VCPUOP_get_runstate_info:
1333     {
1334         struct vcpu_runstate_info runstate;
1335         vcpu_runstate_get(v, &runstate);
1336         if ( copy_to_guest(arg, &runstate, 1) )
1337             rc = -EFAULT;
1338         break;
1339     }
1340 
1341     case VCPUOP_set_periodic_timer:
1342     {
1343         struct vcpu_set_periodic_timer set;
1344 
1345         if ( copy_from_guest(&set, arg, 1) )
1346             return -EFAULT;
1347 
1348         if ( set.period_ns < MILLISECS(1) )
1349             return -EINVAL;
1350 
1351         if ( set.period_ns > STIME_DELTA_MAX )
1352             return -EINVAL;
1353 
1354         v->periodic_period = set.period_ns;
1355         vcpu_force_reschedule(v);
1356 
1357         break;
1358     }
1359 
1360     case VCPUOP_stop_periodic_timer:
1361         v->periodic_period = 0;
1362         vcpu_force_reschedule(v);
1363         break;
1364 
1365     case VCPUOP_set_singleshot_timer:
1366     {
1367         struct vcpu_set_singleshot_timer set;
1368 
1369         if ( v != current )
1370             return -EINVAL;
1371 
1372         if ( copy_from_guest(&set, arg, 1) )
1373             return -EFAULT;
1374 
1375         if ( (set.flags & VCPU_SSHOTTMR_future) &&
1376              (set.timeout_abs_ns < NOW()) )
1377             return -ETIME;
1378 
1379         migrate_timer(&v->singleshot_timer, smp_processor_id());
1380         set_timer(&v->singleshot_timer, set.timeout_abs_ns);
1381 
1382         break;
1383     }
1384 
1385     case VCPUOP_stop_singleshot_timer:
1386         if ( v != current )
1387             return -EINVAL;
1388 
1389         stop_timer(&v->singleshot_timer);
1390 
1391         break;
1392 
1393     case VCPUOP_register_vcpu_info:
1394     {
1395         struct vcpu_register_vcpu_info info;
1396 
1397         rc = -EFAULT;
1398         if ( copy_from_guest(&info, arg, 1) )
1399             break;
1400 
1401         domain_lock(d);
1402         rc = map_vcpu_info(v, info.mfn, info.offset);
1403         domain_unlock(d);
1404 
1405         break;
1406     }
1407 
1408     case VCPUOP_register_runstate_memory_area:
1409     {
1410         struct vcpu_register_runstate_memory_area area;
1411         struct vcpu_runstate_info runstate;
1412 
1413         rc = -EFAULT;
1414         if ( copy_from_guest(&area, arg, 1) )
1415             break;
1416 
1417         if ( !guest_handle_okay(area.addr.h, 1) )
1418             break;
1419 
1420         rc = 0;
1421         runstate_guest(v) = area.addr.h;
1422 
1423         if ( v == current )
1424         {
1425             __copy_to_guest(runstate_guest(v), &v->runstate, 1);
1426         }
1427         else
1428         {
1429             vcpu_runstate_get(v, &runstate);
1430             __copy_to_guest(runstate_guest(v), &runstate, 1);
1431         }
1432 
1433         break;
1434     }
1435 
1436 #ifdef VCPU_TRAP_NMI
1437     case VCPUOP_send_nmi:
1438         if ( !guest_handle_is_null(arg) )
1439             return -EINVAL;
1440 
1441         if ( !test_and_set_bool(v->nmi_pending) )
1442             vcpu_kick(v);
1443 
1444         break;
1445 #endif
1446 
1447     default:
1448         rc = arch_do_vcpu_op(cmd, v, arg);
1449         break;
1450     }
1451 
1452     return rc;
1453 }
1454 
1455 #ifdef VM_ASSIST_VALID
vm_assist(struct domain * p,unsigned int cmd,unsigned int type,unsigned long valid)1456 long vm_assist(struct domain *p, unsigned int cmd, unsigned int type,
1457                unsigned long valid)
1458 {
1459     if ( type >= BITS_PER_LONG || !test_bit(type, &valid) )
1460         return -EINVAL;
1461 
1462     switch ( cmd )
1463     {
1464     case VMASST_CMD_enable:
1465         set_bit(type, &p->vm_assist);
1466         return 0;
1467     case VMASST_CMD_disable:
1468         clear_bit(type, &p->vm_assist);
1469         return 0;
1470     }
1471 
1472     return -ENOSYS;
1473 }
1474 #endif
1475 
pirq_get_info(struct domain * d,int pirq)1476 struct pirq *pirq_get_info(struct domain *d, int pirq)
1477 {
1478     struct pirq *info = pirq_info(d, pirq);
1479 
1480     if ( !info && (info = alloc_pirq_struct(d)) != NULL )
1481     {
1482         info->pirq = pirq;
1483         if ( radix_tree_insert(&d->pirq_tree, pirq, info) )
1484         {
1485             free_pirq_struct(info);
1486             info = NULL;
1487         }
1488     }
1489 
1490     return info;
1491 }
1492 
_free_pirq_struct(struct rcu_head * head)1493 static void _free_pirq_struct(struct rcu_head *head)
1494 {
1495     xfree(container_of(head, struct pirq, rcu_head));
1496 }
1497 
free_pirq_struct(void * ptr)1498 void free_pirq_struct(void *ptr)
1499 {
1500     struct pirq *pirq = ptr;
1501 
1502     call_rcu(&pirq->rcu_head, _free_pirq_struct);
1503 }
1504 
1505 struct migrate_info {
1506     long (*func)(void *data);
1507     void *data;
1508     struct vcpu *vcpu;
1509     unsigned int cpu;
1510     unsigned int nest;
1511 };
1512 
1513 static DEFINE_PER_CPU(struct migrate_info *, continue_info);
1514 
continue_hypercall_tasklet_handler(unsigned long _info)1515 static void continue_hypercall_tasklet_handler(unsigned long _info)
1516 {
1517     struct migrate_info *info = (struct migrate_info *)_info;
1518     struct vcpu *v = info->vcpu;
1519 
1520     /* Wait for vcpu to sleep so that we can access its register state. */
1521     vcpu_sleep_sync(v);
1522 
1523     this_cpu(continue_info) = info;
1524     return_reg(v) = (info->cpu == smp_processor_id())
1525         ? info->func(info->data) : -EINVAL;
1526     this_cpu(continue_info) = NULL;
1527 
1528     if ( info->nest-- == 0 )
1529     {
1530         xfree(info);
1531         vcpu_unpause(v);
1532         put_domain(v->domain);
1533     }
1534 }
1535 
continue_hypercall_on_cpu(unsigned int cpu,long (* func)(void * data),void * data)1536 int continue_hypercall_on_cpu(
1537     unsigned int cpu, long (*func)(void *data), void *data)
1538 {
1539     struct migrate_info *info;
1540 
1541     if ( (cpu >= nr_cpu_ids) || !cpu_online(cpu) )
1542         return -EINVAL;
1543 
1544     info = this_cpu(continue_info);
1545     if ( info == NULL )
1546     {
1547         struct vcpu *curr = current;
1548 
1549         info = xmalloc(struct migrate_info);
1550         if ( info == NULL )
1551             return -ENOMEM;
1552 
1553         info->vcpu = curr;
1554         info->nest = 0;
1555 
1556         tasklet_kill(
1557             &curr->continue_hypercall_tasklet);
1558         tasklet_init(
1559             &curr->continue_hypercall_tasklet,
1560             continue_hypercall_tasklet_handler,
1561             (unsigned long)info);
1562 
1563         get_knownalive_domain(curr->domain);
1564         vcpu_pause_nosync(curr);
1565     }
1566     else
1567     {
1568         BUG_ON(info->nest != 0);
1569         info->nest++;
1570     }
1571 
1572     info->func = func;
1573     info->data = data;
1574     info->cpu  = cpu;
1575 
1576     tasklet_schedule_on_cpu(&info->vcpu->continue_hypercall_tasklet, cpu);
1577 
1578     /* Dummy return value will be overwritten by tasklet. */
1579     return 0;
1580 }
1581 
1582 /*
1583  * Local variables:
1584  * mode: C
1585  * c-file-style: "BSD"
1586  * c-basic-offset: 4
1587  * tab-width: 4
1588  * indent-tabs-mode: nil
1589  * End:
1590  */
1591