1 /******************************************************************************
2 * domain.c
3 *
4 * Generic domain-handling functions.
5 */
6
7 #include <xen/compat.h>
8 #include <xen/init.h>
9 #include <xen/lib.h>
10 #include <xen/ctype.h>
11 #include <xen/err.h>
12 #include <xen/sched.h>
13 #include <xen/sched-if.h>
14 #include <xen/domain.h>
15 #include <xen/mm.h>
16 #include <xen/event.h>
17 #include <xen/vm_event.h>
18 #include <xen/time.h>
19 #include <xen/console.h>
20 #include <xen/softirq.h>
21 #include <xen/tasklet.h>
22 #include <xen/domain_page.h>
23 #include <xen/rangeset.h>
24 #include <xen/guest_access.h>
25 #include <xen/hypercall.h>
26 #include <xen/delay.h>
27 #include <xen/shutdown.h>
28 #include <xen/percpu.h>
29 #include <xen/multicall.h>
30 #include <xen/rcupdate.h>
31 #include <xen/wait.h>
32 #include <xen/grant_table.h>
33 #include <xen/xenoprof.h>
34 #include <xen/irq.h>
35 #include <asm/debugger.h>
36 #include <asm/p2m.h>
37 #include <asm/processor.h>
38 #include <public/sched.h>
39 #include <public/sysctl.h>
40 #include <public/vcpu.h>
41 #include <xsm/xsm.h>
42 #include <xen/trace.h>
43 #include <xen/tmem.h>
44 #include <asm/setup.h>
45
46 #ifdef CONFIG_X86
47 #include <asm/guest.h>
48 #endif
49
50 /* Linux config option: propageted to domain0 */
51 /* xen_processor_pmbits: xen control Cx, Px, ... */
52 unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX;
53
54 /* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */
55 bool_t opt_dom0_vcpus_pin;
56 boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin);
57
58 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
59 DEFINE_SPINLOCK(domlist_update_lock);
60 DEFINE_RCU_READ_LOCK(domlist_read_lock);
61
62 #define DOMAIN_HASH_SIZE 256
63 #define DOMAIN_HASH(_id) ((int)(_id)&(DOMAIN_HASH_SIZE-1))
64 static struct domain *domain_hash[DOMAIN_HASH_SIZE];
65 struct domain *domain_list;
66
67 struct domain *hardware_domain __read_mostly;
68
69 #ifdef CONFIG_LATE_HWDOM
70 domid_t hardware_domid __read_mostly;
71 integer_param("hardware_dom", hardware_domid);
72 #endif
73
74 struct vcpu *idle_vcpu[NR_CPUS] __read_mostly;
75
76 vcpu_info_t dummy_vcpu_info;
77
__domain_finalise_shutdown(struct domain * d)78 static void __domain_finalise_shutdown(struct domain *d)
79 {
80 struct vcpu *v;
81
82 BUG_ON(!spin_is_locked(&d->shutdown_lock));
83
84 if ( d->is_shut_down )
85 return;
86
87 for_each_vcpu ( d, v )
88 if ( !v->paused_for_shutdown )
89 return;
90
91 d->is_shut_down = 1;
92 if ( (d->shutdown_code == SHUTDOWN_suspend) && d->suspend_evtchn )
93 evtchn_send(d, d->suspend_evtchn);
94 else
95 send_global_virq(VIRQ_DOM_EXC);
96 }
97
vcpu_check_shutdown(struct vcpu * v)98 static void vcpu_check_shutdown(struct vcpu *v)
99 {
100 struct domain *d = v->domain;
101
102 spin_lock(&d->shutdown_lock);
103
104 if ( d->is_shutting_down )
105 {
106 if ( !v->paused_for_shutdown )
107 vcpu_pause_nosync(v);
108 v->paused_for_shutdown = 1;
109 v->defer_shutdown = 0;
110 __domain_finalise_shutdown(d);
111 }
112
113 spin_unlock(&d->shutdown_lock);
114 }
115
vcpu_info_reset(struct vcpu * v)116 static void vcpu_info_reset(struct vcpu *v)
117 {
118 struct domain *d = v->domain;
119
120 v->vcpu_info = ((v->vcpu_id < XEN_LEGACY_MAX_VCPUS)
121 ? (vcpu_info_t *)&shared_info(d, vcpu_info[v->vcpu_id])
122 : &dummy_vcpu_info);
123 v->vcpu_info_mfn = INVALID_MFN;
124 }
125
alloc_vcpu(struct domain * d,unsigned int vcpu_id,unsigned int cpu_id)126 struct vcpu *alloc_vcpu(
127 struct domain *d, unsigned int vcpu_id, unsigned int cpu_id)
128 {
129 struct vcpu *v;
130
131 BUG_ON((!is_idle_domain(d) || vcpu_id) && d->vcpu[vcpu_id]);
132
133 if ( (v = alloc_vcpu_struct()) == NULL )
134 return NULL;
135
136 v->domain = d;
137 v->vcpu_id = vcpu_id;
138
139 spin_lock_init(&v->virq_lock);
140
141 tasklet_init(&v->continue_hypercall_tasklet, NULL, 0);
142
143 grant_table_init_vcpu(v);
144
145 if ( !zalloc_cpumask_var(&v->cpu_hard_affinity) ||
146 !zalloc_cpumask_var(&v->cpu_hard_affinity_tmp) ||
147 !zalloc_cpumask_var(&v->cpu_hard_affinity_saved) ||
148 !zalloc_cpumask_var(&v->cpu_soft_affinity) ||
149 !zalloc_cpumask_var(&v->vcpu_dirty_cpumask) )
150 goto fail_free;
151
152 if ( is_idle_domain(d) )
153 {
154 v->runstate.state = RUNSTATE_running;
155 }
156 else
157 {
158 v->runstate.state = RUNSTATE_offline;
159 v->runstate.state_entry_time = NOW();
160 set_bit(_VPF_down, &v->pause_flags);
161 vcpu_info_reset(v);
162 init_waitqueue_vcpu(v);
163 }
164
165 if ( sched_init_vcpu(v, cpu_id) != 0 )
166 goto fail_wq;
167
168 if ( vcpu_initialise(v) != 0 )
169 {
170 sched_destroy_vcpu(v);
171 fail_wq:
172 destroy_waitqueue_vcpu(v);
173 fail_free:
174 free_cpumask_var(v->cpu_hard_affinity);
175 free_cpumask_var(v->cpu_hard_affinity_tmp);
176 free_cpumask_var(v->cpu_hard_affinity_saved);
177 free_cpumask_var(v->cpu_soft_affinity);
178 free_cpumask_var(v->vcpu_dirty_cpumask);
179 free_vcpu_struct(v);
180 return NULL;
181 }
182
183 d->vcpu[vcpu_id] = v;
184 if ( vcpu_id != 0 )
185 {
186 int prev_id = v->vcpu_id - 1;
187 while ( (prev_id >= 0) && (d->vcpu[prev_id] == NULL) )
188 prev_id--;
189 BUG_ON(prev_id < 0);
190 v->next_in_list = d->vcpu[prev_id]->next_in_list;
191 d->vcpu[prev_id]->next_in_list = v;
192 }
193
194 /* Must be called after making new vcpu visible to for_each_vcpu(). */
195 vcpu_check_shutdown(v);
196
197 if ( !is_idle_domain(d) )
198 domain_update_node_affinity(d);
199
200 return v;
201 }
202
late_hwdom_init(struct domain * d)203 static int late_hwdom_init(struct domain *d)
204 {
205 #ifdef CONFIG_LATE_HWDOM
206 struct domain *dom0;
207 int rv;
208
209 if ( d != hardware_domain || d->domain_id == 0 )
210 return 0;
211
212 rv = xsm_init_hardware_domain(XSM_HOOK, d);
213 if ( rv )
214 return rv;
215
216 printk("Initialising hardware domain %d\n", hardware_domid);
217
218 dom0 = rcu_lock_domain_by_id(0);
219 ASSERT(dom0 != NULL);
220 /*
221 * Hardware resource ranges for domain 0 have been set up from
222 * various sources intended to restrict the hardware domain's
223 * access. Apply these ranges to the actual hardware domain.
224 *
225 * Because the lists are being swapped, a side effect of this
226 * operation is that Domain 0's rangesets are cleared. Since
227 * domain 0 should not be accessing the hardware when it constructs
228 * a hardware domain, this should not be a problem. Both lists
229 * may be modified after this hypercall returns if a more complex
230 * device model is desired.
231 */
232 rangeset_swap(d->irq_caps, dom0->irq_caps);
233 rangeset_swap(d->iomem_caps, dom0->iomem_caps);
234 #ifdef CONFIG_X86
235 rangeset_swap(d->arch.ioport_caps, dom0->arch.ioport_caps);
236 setup_io_bitmap(d);
237 setup_io_bitmap(dom0);
238 #endif
239
240 rcu_unlock_domain(dom0);
241
242 iommu_hwdom_init(d);
243
244 return rv;
245 #else
246 return 0;
247 #endif
248 }
249
250 static unsigned int __read_mostly extra_hwdom_irqs;
251 static unsigned int __read_mostly extra_domU_irqs = 32;
252
parse_extra_guest_irqs(const char * s)253 static int __init parse_extra_guest_irqs(const char *s)
254 {
255 if ( isdigit(*s) )
256 extra_domU_irqs = simple_strtoul(s, &s, 0);
257 if ( *s == ',' && isdigit(*++s) )
258 extra_hwdom_irqs = simple_strtoul(s, &s, 0);
259
260 return *s ? -EINVAL : 0;
261 }
262 custom_param("extra_guest_irqs", parse_extra_guest_irqs);
263
domain_create(domid_t domid,unsigned int domcr_flags,uint32_t ssidref,struct xen_arch_domainconfig * config)264 struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
265 uint32_t ssidref,
266 struct xen_arch_domainconfig *config)
267 {
268 struct domain *d, **pd, *old_hwdom = NULL;
269 enum { INIT_xsm = 1u<<0, INIT_watchdog = 1u<<1, INIT_rangeset = 1u<<2,
270 INIT_evtchn = 1u<<3, INIT_gnttab = 1u<<4, INIT_arch = 1u<<5 };
271 int err, init_status = 0;
272 int poolid = CPUPOOLID_NONE;
273
274 if ( (d = alloc_domain_struct()) == NULL )
275 return ERR_PTR(-ENOMEM);
276
277 d->domain_id = domid;
278
279 TRACE_1D(TRC_DOM0_DOM_ADD, d->domain_id);
280
281 lock_profile_register_struct(LOCKPROF_TYPE_PERDOM, d, domid, "Domain");
282
283 if ( (err = xsm_alloc_security_domain(d)) != 0 )
284 goto fail;
285 init_status |= INIT_xsm;
286
287 watchdog_domain_init(d);
288 init_status |= INIT_watchdog;
289
290 atomic_set(&d->refcnt, 1);
291 spin_lock_init_prof(d, domain_lock);
292 spin_lock_init_prof(d, page_alloc_lock);
293 spin_lock_init(&d->hypercall_deadlock_mutex);
294 INIT_PAGE_LIST_HEAD(&d->page_list);
295 INIT_PAGE_LIST_HEAD(&d->xenpage_list);
296
297 spin_lock_init(&d->node_affinity_lock);
298 d->node_affinity = NODE_MASK_ALL;
299 d->auto_node_affinity = 1;
300
301 spin_lock_init(&d->shutdown_lock);
302 d->shutdown_code = SHUTDOWN_CODE_INVALID;
303
304 spin_lock_init(&d->pbuf_lock);
305
306 rwlock_init(&d->vnuma_rwlock);
307
308 err = -ENOMEM;
309 if ( !zalloc_cpumask_var(&d->domain_dirty_cpumask) )
310 goto fail;
311
312 if ( domcr_flags & DOMCRF_hvm )
313 d->guest_type = guest_type_hvm;
314 else
315 d->guest_type = guest_type_pv;
316
317 if ( domid == 0 || domid == hardware_domid )
318 {
319 if ( hardware_domid < 0 || hardware_domid >= DOMID_FIRST_RESERVED )
320 panic("The value of hardware_dom must be a valid domain ID");
321 d->is_pinned = opt_dom0_vcpus_pin;
322 d->disable_migrate = 1;
323 old_hwdom = hardware_domain;
324 hardware_domain = d;
325 }
326
327 if ( domcr_flags & DOMCRF_xs_domain )
328 {
329 d->is_xenstore = 1;
330 d->disable_migrate = 1;
331 }
332
333 rangeset_domain_initialise(d);
334 init_status |= INIT_rangeset;
335
336 d->iomem_caps = rangeset_new(d, "I/O Memory", RANGESETF_prettyprint_hex);
337 d->irq_caps = rangeset_new(d, "Interrupts", 0);
338 if ( (d->iomem_caps == NULL) || (d->irq_caps == NULL) )
339 goto fail;
340
341 if ( domcr_flags & DOMCRF_dummy )
342 return d;
343
344 if ( !is_idle_domain(d) )
345 {
346 if ( (err = xsm_domain_create(XSM_HOOK, d, ssidref)) != 0 )
347 goto fail;
348
349 d->controller_pause_count = 1;
350 atomic_inc(&d->pause_count);
351
352 if ( !is_hardware_domain(d) )
353 d->nr_pirqs = nr_static_irqs + extra_domU_irqs;
354 else
355 d->nr_pirqs = extra_hwdom_irqs ? nr_static_irqs + extra_hwdom_irqs
356 : arch_hwdom_irqs(domid);
357 if ( d->nr_pirqs > nr_irqs )
358 d->nr_pirqs = nr_irqs;
359
360 radix_tree_init(&d->pirq_tree);
361
362 if ( (err = evtchn_init(d)) != 0 )
363 goto fail;
364 init_status |= INIT_evtchn;
365
366 if ( (err = grant_table_create(d)) != 0 )
367 goto fail;
368 init_status |= INIT_gnttab;
369
370 poolid = 0;
371
372 err = -ENOMEM;
373
374 d->pbuf = xzalloc_array(char, DOMAIN_PBUF_SIZE);
375 if ( !d->pbuf )
376 goto fail;
377 }
378
379 if ( (err = arch_domain_create(d, domcr_flags, config)) != 0 )
380 goto fail;
381 init_status |= INIT_arch;
382
383 if ( (err = sched_init_domain(d, poolid)) != 0 )
384 goto fail;
385
386 if ( (err = late_hwdom_init(d)) != 0 )
387 goto fail;
388
389 if ( !is_idle_domain(d) )
390 {
391 spin_lock(&domlist_update_lock);
392 pd = &domain_list; /* NB. domain_list maintained in order of domid. */
393 for ( pd = &domain_list; *pd != NULL; pd = &(*pd)->next_in_list )
394 if ( (*pd)->domain_id > d->domain_id )
395 break;
396 d->next_in_list = *pd;
397 d->next_in_hashbucket = domain_hash[DOMAIN_HASH(domid)];
398 rcu_assign_pointer(*pd, d);
399 rcu_assign_pointer(domain_hash[DOMAIN_HASH(domid)], d);
400 spin_unlock(&domlist_update_lock);
401 }
402
403 return d;
404
405 fail:
406 d->is_dying = DOMDYING_dead;
407 if ( hardware_domain == d )
408 hardware_domain = old_hwdom;
409 atomic_set(&d->refcnt, DOMAIN_DESTROYED);
410 xfree(d->pbuf);
411 if ( init_status & INIT_arch )
412 arch_domain_destroy(d);
413 if ( init_status & INIT_gnttab )
414 grant_table_destroy(d);
415 if ( init_status & INIT_evtchn )
416 {
417 evtchn_destroy(d);
418 evtchn_destroy_final(d);
419 radix_tree_destroy(&d->pirq_tree, free_pirq_struct);
420 }
421 if ( init_status & INIT_rangeset )
422 rangeset_domain_destroy(d);
423 if ( init_status & INIT_watchdog )
424 watchdog_domain_destroy(d);
425 if ( init_status & INIT_xsm )
426 xsm_free_security_domain(d);
427 free_cpumask_var(d->domain_dirty_cpumask);
428 free_domain_struct(d);
429 return ERR_PTR(err);
430 }
431
432
domain_update_node_affinity(struct domain * d)433 void domain_update_node_affinity(struct domain *d)
434 {
435 cpumask_var_t dom_cpumask, dom_cpumask_soft;
436 cpumask_t *dom_affinity;
437 const cpumask_t *online;
438 struct vcpu *v;
439 unsigned int cpu;
440
441 /* Do we have vcpus already? If not, no need to update node-affinity. */
442 if ( !d->vcpu || !d->vcpu[0] )
443 return;
444
445 if ( !zalloc_cpumask_var(&dom_cpumask) )
446 return;
447 if ( !zalloc_cpumask_var(&dom_cpumask_soft) )
448 {
449 free_cpumask_var(dom_cpumask);
450 return;
451 }
452
453 online = cpupool_domain_cpumask(d);
454
455 spin_lock(&d->node_affinity_lock);
456
457 /*
458 * If d->auto_node_affinity is true, let's compute the domain's
459 * node-affinity and update d->node_affinity accordingly. if false,
460 * just leave d->auto_node_affinity alone.
461 */
462 if ( d->auto_node_affinity )
463 {
464 /*
465 * We want the narrowest possible set of pcpus (to get the narowest
466 * possible set of nodes). What we need is the cpumask of where the
467 * domain can run (the union of the hard affinity of all its vcpus),
468 * and the full mask of where it would prefer to run (the union of
469 * the soft affinity of all its various vcpus). Let's build them.
470 */
471 for_each_vcpu ( d, v )
472 {
473 cpumask_or(dom_cpumask, dom_cpumask, v->cpu_hard_affinity);
474 cpumask_or(dom_cpumask_soft, dom_cpumask_soft,
475 v->cpu_soft_affinity);
476 }
477 /* Filter out non-online cpus */
478 cpumask_and(dom_cpumask, dom_cpumask, online);
479 ASSERT(!cpumask_empty(dom_cpumask));
480 /* And compute the intersection between hard, online and soft */
481 cpumask_and(dom_cpumask_soft, dom_cpumask_soft, dom_cpumask);
482
483 /*
484 * If not empty, the intersection of hard, soft and online is the
485 * narrowest set we want. If empty, we fall back to hard&online.
486 */
487 dom_affinity = cpumask_empty(dom_cpumask_soft) ?
488 dom_cpumask : dom_cpumask_soft;
489
490 nodes_clear(d->node_affinity);
491 for_each_cpu ( cpu, dom_affinity )
492 node_set(cpu_to_node(cpu), d->node_affinity);
493 }
494
495 spin_unlock(&d->node_affinity_lock);
496
497 free_cpumask_var(dom_cpumask_soft);
498 free_cpumask_var(dom_cpumask);
499 }
500
501
domain_set_node_affinity(struct domain * d,const nodemask_t * affinity)502 int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity)
503 {
504 /* Being affine with no nodes is just wrong */
505 if ( nodes_empty(*affinity) )
506 return -EINVAL;
507
508 spin_lock(&d->node_affinity_lock);
509
510 /*
511 * Being/becoming explicitly affine to all nodes is not particularly
512 * useful. Let's take it as the `reset node affinity` command.
513 */
514 if ( nodes_full(*affinity) )
515 {
516 d->auto_node_affinity = 1;
517 goto out;
518 }
519
520 d->auto_node_affinity = 0;
521 d->node_affinity = *affinity;
522
523 out:
524 spin_unlock(&d->node_affinity_lock);
525
526 domain_update_node_affinity(d);
527
528 return 0;
529 }
530
531
get_domain_by_id(domid_t dom)532 struct domain *get_domain_by_id(domid_t dom)
533 {
534 struct domain *d;
535
536 rcu_read_lock(&domlist_read_lock);
537
538 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
539 d != NULL;
540 d = rcu_dereference(d->next_in_hashbucket) )
541 {
542 if ( d->domain_id == dom )
543 {
544 if ( unlikely(!get_domain(d)) )
545 d = NULL;
546 break;
547 }
548 }
549
550 rcu_read_unlock(&domlist_read_lock);
551
552 return d;
553 }
554
555
rcu_lock_domain_by_id(domid_t dom)556 struct domain *rcu_lock_domain_by_id(domid_t dom)
557 {
558 struct domain *d = NULL;
559
560 rcu_read_lock(&domlist_read_lock);
561
562 for ( d = rcu_dereference(domain_hash[DOMAIN_HASH(dom)]);
563 d != NULL;
564 d = rcu_dereference(d->next_in_hashbucket) )
565 {
566 if ( d->domain_id == dom )
567 {
568 rcu_lock_domain(d);
569 break;
570 }
571 }
572
573 rcu_read_unlock(&domlist_read_lock);
574
575 return d;
576 }
577
rcu_lock_domain_by_any_id(domid_t dom)578 struct domain *rcu_lock_domain_by_any_id(domid_t dom)
579 {
580 if ( dom == DOMID_SELF )
581 return rcu_lock_current_domain();
582 return rcu_lock_domain_by_id(dom);
583 }
584
rcu_lock_remote_domain_by_id(domid_t dom,struct domain ** d)585 int rcu_lock_remote_domain_by_id(domid_t dom, struct domain **d)
586 {
587 if ( (*d = rcu_lock_domain_by_id(dom)) == NULL )
588 return -ESRCH;
589
590 if ( *d == current->domain )
591 {
592 rcu_unlock_domain(*d);
593 return -EPERM;
594 }
595
596 return 0;
597 }
598
rcu_lock_live_remote_domain_by_id(domid_t dom,struct domain ** d)599 int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d)
600 {
601 int rv;
602 rv = rcu_lock_remote_domain_by_id(dom, d);
603 if ( rv )
604 return rv;
605 if ( (*d)->is_dying )
606 {
607 rcu_unlock_domain(*d);
608 return -EINVAL;
609 }
610
611 return 0;
612 }
613
domain_kill(struct domain * d)614 int domain_kill(struct domain *d)
615 {
616 int rc = 0;
617 struct vcpu *v;
618
619 if ( d == current->domain )
620 return -EINVAL;
621
622 /* Protected by domctl_lock. */
623 switch ( d->is_dying )
624 {
625 case DOMDYING_alive:
626 domain_pause(d);
627 d->is_dying = DOMDYING_dying;
628 spin_barrier(&d->domain_lock);
629 evtchn_destroy(d);
630 gnttab_release_mappings(d);
631 tmem_destroy(d->tmem_client);
632 vnuma_destroy(d->vnuma);
633 domain_set_outstanding_pages(d, 0);
634 d->tmem_client = NULL;
635 /* fallthrough */
636 case DOMDYING_dying:
637 rc = domain_relinquish_resources(d);
638 if ( rc != 0 )
639 break;
640 if ( cpupool_move_domain(d, cpupool0) )
641 return -ERESTART;
642 for_each_vcpu ( d, v )
643 unmap_vcpu_info(v);
644 d->is_dying = DOMDYING_dead;
645 /* Mem event cleanup has to go here because the rings
646 * have to be put before we call put_domain. */
647 vm_event_cleanup(d);
648 put_domain(d);
649 send_global_virq(VIRQ_DOM_EXC);
650 /* fallthrough */
651 case DOMDYING_dead:
652 break;
653 }
654
655 return rc;
656 }
657
658
__domain_crash(struct domain * d)659 void __domain_crash(struct domain *d)
660 {
661 if ( d->is_shutting_down )
662 {
663 /* Print nothing: the domain is already shutting down. */
664 }
665 else if ( d == current->domain )
666 {
667 printk("Domain %d (vcpu#%d) crashed on cpu#%d:\n",
668 d->domain_id, current->vcpu_id, smp_processor_id());
669 show_execution_state(guest_cpu_user_regs());
670 }
671 else
672 {
673 printk("Domain %d reported crashed by domain %d on cpu#%d:\n",
674 d->domain_id, current->domain->domain_id, smp_processor_id());
675 }
676
677 domain_shutdown(d, SHUTDOWN_crash);
678 }
679
680
__domain_crash_synchronous(void)681 void __domain_crash_synchronous(void)
682 {
683 __domain_crash(current->domain);
684
685 vcpu_end_shutdown_deferral(current);
686
687 for ( ; ; )
688 do_softirq();
689 }
690
691
domain_shutdown(struct domain * d,u8 reason)692 int domain_shutdown(struct domain *d, u8 reason)
693 {
694 struct vcpu *v;
695
696 #ifdef CONFIG_X86
697 if ( pv_shim )
698 return pv_shim_shutdown(reason);
699 #endif
700
701 spin_lock(&d->shutdown_lock);
702
703 if ( d->shutdown_code == SHUTDOWN_CODE_INVALID )
704 d->shutdown_code = reason;
705 reason = d->shutdown_code;
706
707 if ( is_hardware_domain(d) )
708 hwdom_shutdown(reason);
709
710 if ( d->is_shutting_down )
711 {
712 spin_unlock(&d->shutdown_lock);
713 return 0;
714 }
715
716 d->is_shutting_down = 1;
717
718 smp_mb(); /* set shutdown status /then/ check for per-cpu deferrals */
719
720 for_each_vcpu ( d, v )
721 {
722 if ( reason == SHUTDOWN_crash )
723 v->defer_shutdown = 0;
724 else if ( v->defer_shutdown )
725 continue;
726 vcpu_pause_nosync(v);
727 v->paused_for_shutdown = 1;
728 }
729
730 arch_domain_shutdown(d);
731
732 __domain_finalise_shutdown(d);
733
734 spin_unlock(&d->shutdown_lock);
735
736 return 0;
737 }
738
domain_resume(struct domain * d)739 void domain_resume(struct domain *d)
740 {
741 struct vcpu *v;
742
743 /*
744 * Some code paths assume that shutdown status does not get reset under
745 * their feet (e.g., some assertions make this assumption).
746 */
747 domain_pause(d);
748
749 spin_lock(&d->shutdown_lock);
750
751 d->is_shutting_down = d->is_shut_down = 0;
752 d->shutdown_code = SHUTDOWN_CODE_INVALID;
753
754 for_each_vcpu ( d, v )
755 {
756 if ( v->paused_for_shutdown )
757 vcpu_unpause(v);
758 v->paused_for_shutdown = 0;
759 }
760
761 spin_unlock(&d->shutdown_lock);
762
763 domain_unpause(d);
764 }
765
vcpu_start_shutdown_deferral(struct vcpu * v)766 int vcpu_start_shutdown_deferral(struct vcpu *v)
767 {
768 if ( v->defer_shutdown )
769 return 1;
770
771 v->defer_shutdown = 1;
772 smp_mb(); /* set deferral status /then/ check for shutdown */
773 if ( unlikely(v->domain->is_shutting_down) )
774 vcpu_check_shutdown(v);
775
776 return v->defer_shutdown;
777 }
778
vcpu_end_shutdown_deferral(struct vcpu * v)779 void vcpu_end_shutdown_deferral(struct vcpu *v)
780 {
781 v->defer_shutdown = 0;
782 smp_mb(); /* clear deferral status /then/ check for shutdown */
783 if ( unlikely(v->domain->is_shutting_down) )
784 vcpu_check_shutdown(v);
785 }
786
787 #ifdef CONFIG_HAS_GDBSX
domain_pause_for_debugger(void)788 void domain_pause_for_debugger(void)
789 {
790 struct vcpu *curr = current;
791 struct domain *d = curr->domain;
792
793 domain_pause_by_systemcontroller_nosync(d);
794
795 /* if gdbsx active, we just need to pause the domain */
796 if ( curr->arch.gdbsx_vcpu_event == 0 )
797 send_global_virq(VIRQ_DEBUGGER);
798 }
799 #endif
800
801 /* Complete domain destroy after RCU readers are not holding old references. */
complete_domain_destroy(struct rcu_head * head)802 static void complete_domain_destroy(struct rcu_head *head)
803 {
804 struct domain *d = container_of(head, struct domain, rcu);
805 struct vcpu *v;
806 int i;
807
808 /*
809 * Flush all state for the vCPU previously having run on the current CPU.
810 * This is in particular relevant for x86 HVM ones on VMX, so that this
811 * flushing of state won't happen from the TLB flush IPI handler behind
812 * the back of a vmx_vmcs_enter() / vmx_vmcs_exit() section.
813 */
814 sync_local_execstate();
815
816 for ( i = d->max_vcpus - 1; i >= 0; i-- )
817 {
818 if ( (v = d->vcpu[i]) == NULL )
819 continue;
820 tasklet_kill(&v->continue_hypercall_tasklet);
821 vcpu_destroy(v);
822 sched_destroy_vcpu(v);
823 destroy_waitqueue_vcpu(v);
824 }
825
826 grant_table_destroy(d);
827
828 arch_domain_destroy(d);
829
830 watchdog_domain_destroy(d);
831
832 rangeset_domain_destroy(d);
833
834 sched_destroy_domain(d);
835
836 /* Free page used by xen oprofile buffer. */
837 #ifdef CONFIG_XENOPROF
838 free_xenoprof_pages(d);
839 #endif
840
841 #ifdef CONFIG_HAS_MEM_PAGING
842 xfree(d->vm_event_paging);
843 #endif
844 xfree(d->vm_event_monitor);
845 #ifdef CONFIG_HAS_MEM_SHARING
846 xfree(d->vm_event_share);
847 #endif
848
849 xfree(d->pbuf);
850
851 for ( i = d->max_vcpus - 1; i >= 0; i-- )
852 if ( (v = d->vcpu[i]) != NULL )
853 {
854 free_cpumask_var(v->cpu_hard_affinity);
855 free_cpumask_var(v->cpu_hard_affinity_tmp);
856 free_cpumask_var(v->cpu_hard_affinity_saved);
857 free_cpumask_var(v->cpu_soft_affinity);
858 free_cpumask_var(v->vcpu_dirty_cpumask);
859 free_vcpu_struct(v);
860 }
861
862 if ( d->target != NULL )
863 put_domain(d->target);
864
865 evtchn_destroy_final(d);
866
867 radix_tree_destroy(&d->pirq_tree, free_pirq_struct);
868
869 xsm_free_security_domain(d);
870 free_cpumask_var(d->domain_dirty_cpumask);
871 xfree(d->vcpu);
872 free_domain_struct(d);
873
874 send_global_virq(VIRQ_DOM_EXC);
875 }
876
877 /* Release resources belonging to task @p. */
domain_destroy(struct domain * d)878 void domain_destroy(struct domain *d)
879 {
880 struct domain **pd;
881
882 BUG_ON(!d->is_dying);
883
884 /* May be already destroyed, or get_domain() can race us. */
885 if ( atomic_cmpxchg(&d->refcnt, 0, DOMAIN_DESTROYED) != 0 )
886 return;
887
888 TRACE_1D(TRC_DOM0_DOM_REM, d->domain_id);
889
890 /* Delete from task list and task hashtable. */
891 spin_lock(&domlist_update_lock);
892 pd = &domain_list;
893 while ( *pd != d )
894 pd = &(*pd)->next_in_list;
895 rcu_assign_pointer(*pd, d->next_in_list);
896 pd = &domain_hash[DOMAIN_HASH(d->domain_id)];
897 while ( *pd != d )
898 pd = &(*pd)->next_in_hashbucket;
899 rcu_assign_pointer(*pd, d->next_in_hashbucket);
900 spin_unlock(&domlist_update_lock);
901
902 /* Schedule RCU asynchronous completion of domain destroy. */
903 call_rcu(&d->rcu, complete_domain_destroy);
904 }
905
vcpu_pause(struct vcpu * v)906 void vcpu_pause(struct vcpu *v)
907 {
908 ASSERT(v != current);
909 atomic_inc(&v->pause_count);
910 vcpu_sleep_sync(v);
911 }
912
vcpu_pause_nosync(struct vcpu * v)913 void vcpu_pause_nosync(struct vcpu *v)
914 {
915 atomic_inc(&v->pause_count);
916 vcpu_sleep_nosync(v);
917 }
918
vcpu_unpause(struct vcpu * v)919 void vcpu_unpause(struct vcpu *v)
920 {
921 if ( atomic_dec_and_test(&v->pause_count) )
922 vcpu_wake(v);
923 }
924
vcpu_pause_by_systemcontroller(struct vcpu * v)925 int vcpu_pause_by_systemcontroller(struct vcpu *v)
926 {
927 int old, new, prev = v->controller_pause_count;
928
929 do
930 {
931 old = prev;
932 new = old + 1;
933
934 if ( new > 255 )
935 return -EOVERFLOW;
936
937 prev = cmpxchg(&v->controller_pause_count, old, new);
938 } while ( prev != old );
939
940 vcpu_pause(v);
941
942 return 0;
943 }
944
vcpu_unpause_by_systemcontroller(struct vcpu * v)945 int vcpu_unpause_by_systemcontroller(struct vcpu *v)
946 {
947 int old, new, prev = v->controller_pause_count;
948
949 do
950 {
951 old = prev;
952 new = old - 1;
953
954 if ( new < 0 )
955 return -EINVAL;
956
957 prev = cmpxchg(&v->controller_pause_count, old, new);
958 } while ( prev != old );
959
960 vcpu_unpause(v);
961
962 return 0;
963 }
964
do_domain_pause(struct domain * d,void (* sleep_fn)(struct vcpu * v))965 static void do_domain_pause(struct domain *d,
966 void (*sleep_fn)(struct vcpu *v))
967 {
968 struct vcpu *v;
969
970 atomic_inc(&d->pause_count);
971
972 for_each_vcpu( d, v )
973 sleep_fn(v);
974
975 arch_domain_pause(d);
976 }
977
domain_pause(struct domain * d)978 void domain_pause(struct domain *d)
979 {
980 ASSERT(d != current->domain);
981 do_domain_pause(d, vcpu_sleep_sync);
982 }
983
domain_pause_nosync(struct domain * d)984 void domain_pause_nosync(struct domain *d)
985 {
986 do_domain_pause(d, vcpu_sleep_nosync);
987 }
988
domain_unpause(struct domain * d)989 void domain_unpause(struct domain *d)
990 {
991 struct vcpu *v;
992
993 arch_domain_unpause(d);
994
995 if ( atomic_dec_and_test(&d->pause_count) )
996 for_each_vcpu( d, v )
997 vcpu_wake(v);
998 }
999
__domain_pause_by_systemcontroller(struct domain * d,void (* pause_fn)(struct domain * d))1000 int __domain_pause_by_systemcontroller(struct domain *d,
1001 void (*pause_fn)(struct domain *d))
1002 {
1003 int old, new, prev = d->controller_pause_count;
1004
1005 do
1006 {
1007 old = prev;
1008 new = old + 1;
1009
1010 /*
1011 * Limit the toolstack pause count to an arbitrary 255 to prevent the
1012 * toolstack overflowing d->pause_count with many repeated hypercalls.
1013 */
1014 if ( new > 255 )
1015 return -EOVERFLOW;
1016
1017 prev = cmpxchg(&d->controller_pause_count, old, new);
1018 } while ( prev != old );
1019
1020 pause_fn(d);
1021
1022 return 0;
1023 }
1024
domain_unpause_by_systemcontroller(struct domain * d)1025 int domain_unpause_by_systemcontroller(struct domain *d)
1026 {
1027 int old, new, prev = d->controller_pause_count;
1028
1029 do
1030 {
1031 old = prev;
1032 new = old - 1;
1033
1034 if ( new < 0 )
1035 return -EINVAL;
1036
1037 prev = cmpxchg(&d->controller_pause_count, old, new);
1038 } while ( prev != old );
1039
1040 /*
1041 * d->controller_pause_count is initialised to 1, and the toolstack is
1042 * responsible for making one unpause hypercall when it wishes the guest
1043 * to start running.
1044 *
1045 * All other toolstack operations should make a pair of pause/unpause
1046 * calls and rely on the reference counting here.
1047 *
1048 * Creation is considered finished when the controller reference count
1049 * first drops to 0.
1050 */
1051 if ( new == 0 )
1052 d->creation_finished = true;
1053
1054 domain_unpause(d);
1055
1056 return 0;
1057 }
1058
domain_pause_except_self(struct domain * d)1059 void domain_pause_except_self(struct domain *d)
1060 {
1061 struct vcpu *v, *curr = current;
1062
1063 if ( curr->domain == d )
1064 {
1065 for_each_vcpu( d, v )
1066 if ( likely(v != curr) )
1067 vcpu_pause(v);
1068 }
1069 else
1070 domain_pause(d);
1071 }
1072
domain_unpause_except_self(struct domain * d)1073 void domain_unpause_except_self(struct domain *d)
1074 {
1075 struct vcpu *v, *curr = current;
1076
1077 if ( curr->domain == d )
1078 {
1079 for_each_vcpu( d, v )
1080 if ( likely(v != curr) )
1081 vcpu_unpause(v);
1082 }
1083 else
1084 domain_unpause(d);
1085 }
1086
domain_soft_reset(struct domain * d)1087 int domain_soft_reset(struct domain *d)
1088 {
1089 struct vcpu *v;
1090 int rc;
1091
1092 spin_lock(&d->shutdown_lock);
1093 for_each_vcpu ( d, v )
1094 if ( !v->paused_for_shutdown )
1095 {
1096 spin_unlock(&d->shutdown_lock);
1097 return -EINVAL;
1098 }
1099 spin_unlock(&d->shutdown_lock);
1100
1101 rc = evtchn_reset(d);
1102 if ( rc )
1103 return rc;
1104
1105 grant_table_warn_active_grants(d);
1106
1107 for_each_vcpu ( d, v )
1108 {
1109 set_xen_guest_handle(runstate_guest(v), NULL);
1110 unmap_vcpu_info(v);
1111 }
1112
1113 rc = arch_domain_soft_reset(d);
1114 if ( !rc )
1115 domain_resume(d);
1116 else
1117 domain_crash(d);
1118
1119 return rc;
1120 }
1121
vcpu_reset(struct vcpu * v)1122 int vcpu_reset(struct vcpu *v)
1123 {
1124 struct domain *d = v->domain;
1125 int rc;
1126
1127 vcpu_pause(v);
1128 domain_lock(d);
1129
1130 set_bit(_VPF_in_reset, &v->pause_flags);
1131 rc = arch_vcpu_reset(v);
1132 if ( rc )
1133 goto out_unlock;
1134
1135 set_bit(_VPF_down, &v->pause_flags);
1136
1137 clear_bit(v->vcpu_id, d->poll_mask);
1138 v->poll_evtchn = 0;
1139
1140 v->fpu_initialised = 0;
1141 v->fpu_dirtied = 0;
1142 v->is_initialised = 0;
1143 #ifdef VCPU_TRAP_LAST
1144 v->async_exception_mask = 0;
1145 memset(v->async_exception_state, 0, sizeof(v->async_exception_state));
1146 #endif
1147 cpumask_clear(v->cpu_hard_affinity_tmp);
1148 clear_bit(_VPF_blocked, &v->pause_flags);
1149 clear_bit(_VPF_in_reset, &v->pause_flags);
1150
1151 out_unlock:
1152 domain_unlock(v->domain);
1153 vcpu_unpause(v);
1154
1155 return rc;
1156 }
1157
1158 /*
1159 * Map a guest page in and point the vcpu_info pointer at it. This
1160 * makes sure that the vcpu_info is always pointing at a valid piece
1161 * of memory, and it sets a pending event to make sure that a pending
1162 * event doesn't get missed.
1163 */
map_vcpu_info(struct vcpu * v,unsigned long gfn,unsigned offset)1164 int map_vcpu_info(struct vcpu *v, unsigned long gfn, unsigned offset)
1165 {
1166 struct domain *d = v->domain;
1167 void *mapping;
1168 vcpu_info_t *new_info;
1169 struct page_info *page;
1170 int i;
1171
1172 if ( offset > (PAGE_SIZE - sizeof(vcpu_info_t)) )
1173 return -EINVAL;
1174
1175 if ( !mfn_eq(v->vcpu_info_mfn, INVALID_MFN) )
1176 return -EINVAL;
1177
1178 /* Run this command on yourself or on other offline VCPUS. */
1179 if ( (v != current) && !(v->pause_flags & VPF_down) )
1180 return -EINVAL;
1181
1182 page = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
1183 if ( !page )
1184 return -EINVAL;
1185
1186 if ( !get_page_type(page, PGT_writable_page) )
1187 {
1188 put_page(page);
1189 return -EINVAL;
1190 }
1191
1192 mapping = __map_domain_page_global(page);
1193 if ( mapping == NULL )
1194 {
1195 put_page_and_type(page);
1196 return -ENOMEM;
1197 }
1198
1199 new_info = (vcpu_info_t *)(mapping + offset);
1200
1201 if ( v->vcpu_info == &dummy_vcpu_info )
1202 {
1203 memset(new_info, 0, sizeof(*new_info));
1204 #ifdef XEN_HAVE_PV_UPCALL_MASK
1205 __vcpu_info(v, new_info, evtchn_upcall_mask) = 1;
1206 #endif
1207 }
1208 else
1209 {
1210 memcpy(new_info, v->vcpu_info, sizeof(*new_info));
1211 }
1212
1213 v->vcpu_info = new_info;
1214 v->vcpu_info_mfn = _mfn(page_to_mfn(page));
1215
1216 /* Set new vcpu_info pointer /before/ setting pending flags. */
1217 smp_wmb();
1218
1219 /*
1220 * Mark everything as being pending just to make sure nothing gets
1221 * lost. The domain will get a spurious event, but it can cope.
1222 */
1223 vcpu_info(v, evtchn_upcall_pending) = 1;
1224 for ( i = 0; i < BITS_PER_EVTCHN_WORD(d); i++ )
1225 set_bit(i, &vcpu_info(v, evtchn_pending_sel));
1226 arch_evtchn_inject(v);
1227
1228 return 0;
1229 }
1230
1231 /*
1232 * Unmap the vcpu info page if the guest decided to place it somewhere
1233 * else. This is used from domain_kill() and domain_soft_reset().
1234 */
unmap_vcpu_info(struct vcpu * v)1235 void unmap_vcpu_info(struct vcpu *v)
1236 {
1237 mfn_t mfn = v->vcpu_info_mfn;
1238
1239 if ( mfn_eq(mfn, INVALID_MFN) )
1240 return;
1241
1242 unmap_domain_page_global((void *)
1243 ((unsigned long)v->vcpu_info & PAGE_MASK));
1244
1245 vcpu_info_reset(v); /* NB: Clobbers v->vcpu_info_mfn */
1246
1247 put_page_and_type(mfn_to_page(mfn_x(mfn)));
1248 }
1249
default_initialise_vcpu(struct vcpu * v,XEN_GUEST_HANDLE_PARAM (void)arg)1250 int default_initialise_vcpu(struct vcpu *v, XEN_GUEST_HANDLE_PARAM(void) arg)
1251 {
1252 struct vcpu_guest_context *ctxt;
1253 struct domain *d = v->domain;
1254 int rc;
1255
1256 if ( (ctxt = alloc_vcpu_guest_context()) == NULL )
1257 return -ENOMEM;
1258
1259 if ( copy_from_guest(ctxt, arg, 1) )
1260 {
1261 free_vcpu_guest_context(ctxt);
1262 return -EFAULT;
1263 }
1264
1265 domain_lock(d);
1266 rc = v->is_initialised ? -EEXIST : arch_set_info_guest(v, ctxt);
1267 domain_unlock(d);
1268
1269 free_vcpu_guest_context(ctxt);
1270
1271 return rc;
1272 }
1273
do_vcpu_op(int cmd,unsigned int vcpuid,XEN_GUEST_HANDLE_PARAM (void)arg)1274 long do_vcpu_op(int cmd, unsigned int vcpuid, XEN_GUEST_HANDLE_PARAM(void) arg)
1275 {
1276 struct domain *d = current->domain;
1277 struct vcpu *v;
1278 long rc = 0;
1279
1280 if ( vcpuid >= d->max_vcpus || (v = d->vcpu[vcpuid]) == NULL )
1281 return -ENOENT;
1282
1283 switch ( cmd )
1284 {
1285 case VCPUOP_initialise:
1286 if ( v->vcpu_info == &dummy_vcpu_info )
1287 return -EINVAL;
1288
1289 rc = arch_initialise_vcpu(v, arg);
1290 if ( rc == -ERESTART )
1291 rc = hypercall_create_continuation(__HYPERVISOR_vcpu_op, "iuh",
1292 cmd, vcpuid, arg);
1293
1294 break;
1295
1296 case VCPUOP_up:
1297 #ifdef CONFIG_X86
1298 if ( pv_shim )
1299 rc = continue_hypercall_on_cpu(0, pv_shim_cpu_up, v);
1300 else
1301 #endif
1302 {
1303 bool wake = false;
1304
1305 domain_lock(d);
1306 if ( !v->is_initialised )
1307 rc = -EINVAL;
1308 else
1309 wake = test_and_clear_bit(_VPF_down, &v->pause_flags);
1310 domain_unlock(d);
1311 if ( wake )
1312 vcpu_wake(v);
1313 }
1314
1315 break;
1316
1317 case VCPUOP_down:
1318 #ifdef CONFIG_X86
1319 if ( pv_shim )
1320 rc = continue_hypercall_on_cpu(0, pv_shim_cpu_down, v);
1321 else
1322 #endif
1323 if ( !test_and_set_bit(_VPF_down, &v->pause_flags) )
1324 vcpu_sleep_nosync(v);
1325
1326 break;
1327
1328 case VCPUOP_is_up:
1329 rc = !(v->pause_flags & VPF_down);
1330 break;
1331
1332 case VCPUOP_get_runstate_info:
1333 {
1334 struct vcpu_runstate_info runstate;
1335 vcpu_runstate_get(v, &runstate);
1336 if ( copy_to_guest(arg, &runstate, 1) )
1337 rc = -EFAULT;
1338 break;
1339 }
1340
1341 case VCPUOP_set_periodic_timer:
1342 {
1343 struct vcpu_set_periodic_timer set;
1344
1345 if ( copy_from_guest(&set, arg, 1) )
1346 return -EFAULT;
1347
1348 if ( set.period_ns < MILLISECS(1) )
1349 return -EINVAL;
1350
1351 if ( set.period_ns > STIME_DELTA_MAX )
1352 return -EINVAL;
1353
1354 v->periodic_period = set.period_ns;
1355 vcpu_force_reschedule(v);
1356
1357 break;
1358 }
1359
1360 case VCPUOP_stop_periodic_timer:
1361 v->periodic_period = 0;
1362 vcpu_force_reschedule(v);
1363 break;
1364
1365 case VCPUOP_set_singleshot_timer:
1366 {
1367 struct vcpu_set_singleshot_timer set;
1368
1369 if ( v != current )
1370 return -EINVAL;
1371
1372 if ( copy_from_guest(&set, arg, 1) )
1373 return -EFAULT;
1374
1375 if ( (set.flags & VCPU_SSHOTTMR_future) &&
1376 (set.timeout_abs_ns < NOW()) )
1377 return -ETIME;
1378
1379 migrate_timer(&v->singleshot_timer, smp_processor_id());
1380 set_timer(&v->singleshot_timer, set.timeout_abs_ns);
1381
1382 break;
1383 }
1384
1385 case VCPUOP_stop_singleshot_timer:
1386 if ( v != current )
1387 return -EINVAL;
1388
1389 stop_timer(&v->singleshot_timer);
1390
1391 break;
1392
1393 case VCPUOP_register_vcpu_info:
1394 {
1395 struct vcpu_register_vcpu_info info;
1396
1397 rc = -EFAULT;
1398 if ( copy_from_guest(&info, arg, 1) )
1399 break;
1400
1401 domain_lock(d);
1402 rc = map_vcpu_info(v, info.mfn, info.offset);
1403 domain_unlock(d);
1404
1405 break;
1406 }
1407
1408 case VCPUOP_register_runstate_memory_area:
1409 {
1410 struct vcpu_register_runstate_memory_area area;
1411 struct vcpu_runstate_info runstate;
1412
1413 rc = -EFAULT;
1414 if ( copy_from_guest(&area, arg, 1) )
1415 break;
1416
1417 if ( !guest_handle_okay(area.addr.h, 1) )
1418 break;
1419
1420 rc = 0;
1421 runstate_guest(v) = area.addr.h;
1422
1423 if ( v == current )
1424 {
1425 __copy_to_guest(runstate_guest(v), &v->runstate, 1);
1426 }
1427 else
1428 {
1429 vcpu_runstate_get(v, &runstate);
1430 __copy_to_guest(runstate_guest(v), &runstate, 1);
1431 }
1432
1433 break;
1434 }
1435
1436 #ifdef VCPU_TRAP_NMI
1437 case VCPUOP_send_nmi:
1438 if ( !guest_handle_is_null(arg) )
1439 return -EINVAL;
1440
1441 if ( !test_and_set_bool(v->nmi_pending) )
1442 vcpu_kick(v);
1443
1444 break;
1445 #endif
1446
1447 default:
1448 rc = arch_do_vcpu_op(cmd, v, arg);
1449 break;
1450 }
1451
1452 return rc;
1453 }
1454
1455 #ifdef VM_ASSIST_VALID
vm_assist(struct domain * p,unsigned int cmd,unsigned int type,unsigned long valid)1456 long vm_assist(struct domain *p, unsigned int cmd, unsigned int type,
1457 unsigned long valid)
1458 {
1459 if ( type >= BITS_PER_LONG || !test_bit(type, &valid) )
1460 return -EINVAL;
1461
1462 switch ( cmd )
1463 {
1464 case VMASST_CMD_enable:
1465 set_bit(type, &p->vm_assist);
1466 return 0;
1467 case VMASST_CMD_disable:
1468 clear_bit(type, &p->vm_assist);
1469 return 0;
1470 }
1471
1472 return -ENOSYS;
1473 }
1474 #endif
1475
pirq_get_info(struct domain * d,int pirq)1476 struct pirq *pirq_get_info(struct domain *d, int pirq)
1477 {
1478 struct pirq *info = pirq_info(d, pirq);
1479
1480 if ( !info && (info = alloc_pirq_struct(d)) != NULL )
1481 {
1482 info->pirq = pirq;
1483 if ( radix_tree_insert(&d->pirq_tree, pirq, info) )
1484 {
1485 free_pirq_struct(info);
1486 info = NULL;
1487 }
1488 }
1489
1490 return info;
1491 }
1492
_free_pirq_struct(struct rcu_head * head)1493 static void _free_pirq_struct(struct rcu_head *head)
1494 {
1495 xfree(container_of(head, struct pirq, rcu_head));
1496 }
1497
free_pirq_struct(void * ptr)1498 void free_pirq_struct(void *ptr)
1499 {
1500 struct pirq *pirq = ptr;
1501
1502 call_rcu(&pirq->rcu_head, _free_pirq_struct);
1503 }
1504
1505 struct migrate_info {
1506 long (*func)(void *data);
1507 void *data;
1508 struct vcpu *vcpu;
1509 unsigned int cpu;
1510 unsigned int nest;
1511 };
1512
1513 static DEFINE_PER_CPU(struct migrate_info *, continue_info);
1514
continue_hypercall_tasklet_handler(unsigned long _info)1515 static void continue_hypercall_tasklet_handler(unsigned long _info)
1516 {
1517 struct migrate_info *info = (struct migrate_info *)_info;
1518 struct vcpu *v = info->vcpu;
1519
1520 /* Wait for vcpu to sleep so that we can access its register state. */
1521 vcpu_sleep_sync(v);
1522
1523 this_cpu(continue_info) = info;
1524 return_reg(v) = (info->cpu == smp_processor_id())
1525 ? info->func(info->data) : -EINVAL;
1526 this_cpu(continue_info) = NULL;
1527
1528 if ( info->nest-- == 0 )
1529 {
1530 xfree(info);
1531 vcpu_unpause(v);
1532 put_domain(v->domain);
1533 }
1534 }
1535
continue_hypercall_on_cpu(unsigned int cpu,long (* func)(void * data),void * data)1536 int continue_hypercall_on_cpu(
1537 unsigned int cpu, long (*func)(void *data), void *data)
1538 {
1539 struct migrate_info *info;
1540
1541 if ( (cpu >= nr_cpu_ids) || !cpu_online(cpu) )
1542 return -EINVAL;
1543
1544 info = this_cpu(continue_info);
1545 if ( info == NULL )
1546 {
1547 struct vcpu *curr = current;
1548
1549 info = xmalloc(struct migrate_info);
1550 if ( info == NULL )
1551 return -ENOMEM;
1552
1553 info->vcpu = curr;
1554 info->nest = 0;
1555
1556 tasklet_kill(
1557 &curr->continue_hypercall_tasklet);
1558 tasklet_init(
1559 &curr->continue_hypercall_tasklet,
1560 continue_hypercall_tasklet_handler,
1561 (unsigned long)info);
1562
1563 get_knownalive_domain(curr->domain);
1564 vcpu_pause_nosync(curr);
1565 }
1566 else
1567 {
1568 BUG_ON(info->nest != 0);
1569 info->nest++;
1570 }
1571
1572 info->func = func;
1573 info->data = data;
1574 info->cpu = cpu;
1575
1576 tasklet_schedule_on_cpu(&info->vcpu->continue_hypercall_tasklet, cpu);
1577
1578 /* Dummy return value will be overwritten by tasklet. */
1579 return 0;
1580 }
1581
1582 /*
1583 * Local variables:
1584 * mode: C
1585 * c-file-style: "BSD"
1586 * c-basic-offset: 4
1587 * tab-width: 4
1588 * indent-tabs-mode: nil
1589 * End:
1590 */
1591