1
2 #ifndef __SCHED_H__
3 #define __SCHED_H__
4
5 #include <xen/types.h>
6 #include <xen/spinlock.h>
7 #include <xen/rwlock.h>
8 #include <xen/shared.h>
9 #include <xen/timer.h>
10 #include <xen/rangeset.h>
11 #include <xen/domain.h>
12 #include <xen/iommu.h>
13 #include <xen/rcupdate.h>
14 #include <xen/cpumask.h>
15 #include <xen/nodemask.h>
16 #include <xen/radix-tree.h>
17 #include <xen/multicall.h>
18 #include <xen/tasklet.h>
19 #include <xen/mm.h>
20 #include <xen/smp.h>
21 #include <xen/perfc.h>
22 #include <asm/atomic.h>
23 #include <xen/wait.h>
24 #include <public/xen.h>
25 #include <public/domctl.h>
26 #include <public/sysctl.h>
27 #include <public/vcpu.h>
28 #include <public/vm_event.h>
29 #include <public/event_channel.h>
30
31 #ifdef CONFIG_COMPAT
32 #include <compat/vcpu.h>
33 DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t);
34 #endif
35
36 /*
37 * Stats
38 *
39 * Enable and ease the use of scheduling related performance counters.
40 *
41 */
42 #ifdef CONFIG_PERF_COUNTERS
43 #define SCHED_STATS
44 #endif
45
46 #define SCHED_STAT_CRANK(_X) (perfc_incr(_X))
47
48 /* A global pointer to the hardware domain (usually DOM0). */
49 extern struct domain *hardware_domain;
50
51 #ifdef CONFIG_LATE_HWDOM
52 extern domid_t hardware_domid;
53 #else
54 #define hardware_domid 0
55 #endif
56
57 #ifndef CONFIG_COMPAT
58 #define BITS_PER_EVTCHN_WORD(d) BITS_PER_XEN_ULONG
59 #else
60 #define BITS_PER_EVTCHN_WORD(d) (has_32bit_shinfo(d) ? 32 : BITS_PER_XEN_ULONG)
61 #endif
62
63 #define BUCKETS_PER_GROUP (PAGE_SIZE/sizeof(struct evtchn *))
64 /* Round size of struct evtchn up to power of 2 size */
65 #define __RDU2(x) ( (x) | ( (x) >> 1))
66 #define __RDU4(x) ( __RDU2(x) | ( __RDU2(x) >> 2))
67 #define __RDU8(x) ( __RDU4(x) | ( __RDU4(x) >> 4))
68 #define __RDU16(x) ( __RDU8(x) | ( __RDU8(x) >> 8))
69 #define __RDU32(x) (__RDU16(x) | (__RDU16(x) >>16))
70 #define next_power_of_2(x) (__RDU32((x)-1) + 1)
71
72 /* Maximum number of event channels for any ABI. */
73 #define MAX_NR_EVTCHNS MAX(EVTCHN_2L_NR_CHANNELS, EVTCHN_FIFO_NR_CHANNELS)
74
75 #define EVTCHNS_PER_BUCKET (PAGE_SIZE / next_power_of_2(sizeof(struct evtchn)))
76 #define EVTCHNS_PER_GROUP (BUCKETS_PER_GROUP * EVTCHNS_PER_BUCKET)
77 #define NR_EVTCHN_GROUPS DIV_ROUND_UP(MAX_NR_EVTCHNS, EVTCHNS_PER_GROUP)
78
79 #define XEN_CONSUMER_BITS 3
80 #define NR_XEN_CONSUMERS ((1 << XEN_CONSUMER_BITS) - 1)
81
82 struct evtchn
83 {
84 spinlock_t lock;
85 #define ECS_FREE 0 /* Channel is available for use. */
86 #define ECS_RESERVED 1 /* Channel is reserved. */
87 #define ECS_UNBOUND 2 /* Channel is waiting to bind to a remote domain. */
88 #define ECS_INTERDOMAIN 3 /* Channel is bound to another domain. */
89 #define ECS_PIRQ 4 /* Channel is bound to a physical IRQ line. */
90 #define ECS_VIRQ 5 /* Channel is bound to a virtual IRQ line. */
91 #define ECS_IPI 6 /* Channel is bound to a virtual IPI line. */
92 u8 state; /* ECS_* */
93 u8 xen_consumer:XEN_CONSUMER_BITS; /* Consumer in Xen if nonzero */
94 u8 pending:1;
95 u16 notify_vcpu_id; /* VCPU for local delivery notification */
96 u32 port;
97 union {
98 struct {
99 domid_t remote_domid;
100 } unbound; /* state == ECS_UNBOUND */
101 struct {
102 evtchn_port_t remote_port;
103 struct domain *remote_dom;
104 } interdomain; /* state == ECS_INTERDOMAIN */
105 struct {
106 u32 irq;
107 evtchn_port_t next_port;
108 evtchn_port_t prev_port;
109 } pirq; /* state == ECS_PIRQ */
110 u16 virq; /* state == ECS_VIRQ */
111 } u;
112 u8 priority;
113 u8 last_priority;
114 u16 last_vcpu_id;
115 #ifdef CONFIG_XSM
116 union {
117 #ifdef XSM_NEED_GENERIC_EVTCHN_SSID
118 /*
119 * If an XSM module needs more space for its event channel context,
120 * this pointer stores the necessary data for the security server.
121 */
122 void *generic;
123 #endif
124 #ifdef CONFIG_FLASK
125 /*
126 * Inlining the contents of the structure for FLASK avoids unneeded
127 * allocations, and on 64-bit platforms with only FLASK enabled,
128 * reduces the size of struct evtchn.
129 */
130 u32 flask_sid;
131 #endif
132 } ssid;
133 #endif
134 } __attribute__((aligned(64)));
135
136 int evtchn_init(struct domain *d); /* from domain_create */
137 void evtchn_destroy(struct domain *d); /* from domain_kill */
138 void evtchn_destroy_final(struct domain *d); /* from complete_domain_destroy */
139
140 struct waitqueue_vcpu;
141
142 struct vcpu
143 {
144 int vcpu_id;
145
146 int processor;
147
148 vcpu_info_t *vcpu_info;
149
150 struct domain *domain;
151
152 struct vcpu *next_in_list;
153
154 s_time_t periodic_period;
155 s_time_t periodic_last_event;
156 struct timer periodic_timer;
157 struct timer singleshot_timer;
158
159 struct timer poll_timer; /* timeout for SCHEDOP_poll */
160
161 void *sched_priv; /* scheduler-specific data */
162
163 struct vcpu_runstate_info runstate;
164 #ifndef CONFIG_COMPAT
165 # define runstate_guest(v) ((v)->runstate_guest)
166 XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
167 #else
168 # define runstate_guest(v) ((v)->runstate_guest.native)
169 union {
170 XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
171 XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
172 } runstate_guest; /* guest address */
173 #endif
174
175 /* last time when vCPU is scheduled out */
176 uint64_t last_run_time;
177
178 /* Has the FPU been initialised? */
179 bool fpu_initialised;
180 /* Has the FPU been used since it was last saved? */
181 bool fpu_dirtied;
182 /* Initialization completed for this VCPU? */
183 bool is_initialised;
184 /* Currently running on a CPU? */
185 bool is_running;
186 /* VCPU should wake fast (do not deep sleep the CPU). */
187 bool is_urgent;
188
189 #ifdef VCPU_TRAP_LAST
190 #define VCPU_TRAP_NONE 0
191 struct {
192 bool pending;
193 uint8_t old_mask;
194 } async_exception_state[VCPU_TRAP_LAST];
195 #define async_exception_state(t) async_exception_state[(t)-1]
196 uint8_t async_exception_mask;
197 #endif
198
199 /* Require shutdown to be deferred for some asynchronous operation? */
200 bool defer_shutdown;
201 /* VCPU is paused following shutdown request (d->is_shutting_down)? */
202 bool paused_for_shutdown;
203 /* VCPU need affinity restored */
204 bool affinity_broken;
205
206 /* A hypercall has been preempted. */
207 bool hcall_preempted;
208 #ifdef CONFIG_COMPAT
209 /* A hypercall is using the compat ABI? */
210 bool hcall_compat;
211 #endif
212
213
214 /*
215 * > 0: a single port is being polled;
216 * = 0: nothing is being polled (vcpu should be clear in d->poll_mask);
217 * < 0: multiple ports may be being polled.
218 */
219 int poll_evtchn;
220
221 /* (over-)protected by ->domain->event_lock */
222 int pirq_evtchn_head;
223
224 unsigned long pause_flags;
225 atomic_t pause_count;
226
227 /* VCPU paused for vm_event replies. */
228 atomic_t vm_event_pause_count;
229 /* VCPU paused by system controller. */
230 int controller_pause_count;
231
232 /* Grant table map tracking. */
233 spinlock_t maptrack_freelist_lock;
234 unsigned int maptrack_head;
235 unsigned int maptrack_tail;
236
237 /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
238 evtchn_port_t virq_to_evtchn[NR_VIRQS];
239 spinlock_t virq_lock;
240
241 /* Bitmask of CPUs on which this VCPU may run. */
242 cpumask_var_t cpu_hard_affinity;
243 /* Used to change affinity temporarily. */
244 cpumask_var_t cpu_hard_affinity_tmp;
245 /* Used to restore affinity across S3. */
246 cpumask_var_t cpu_hard_affinity_saved;
247
248 /* Bitmask of CPUs on which this VCPU prefers to run. */
249 cpumask_var_t cpu_soft_affinity;
250
251 /* Bitmask of CPUs which are holding onto this VCPU's state. */
252 cpumask_var_t vcpu_dirty_cpumask;
253
254 /* Tasklet for continue_hypercall_on_cpu(). */
255 struct tasklet continue_hypercall_tasklet;
256
257 /* Multicall information. */
258 struct mc_state mc_state;
259
260 struct waitqueue_vcpu *waitqueue_vcpu;
261
262 /* Guest-specified relocation of vcpu_info. */
263 mfn_t vcpu_info_mfn;
264
265 struct evtchn_fifo_vcpu *evtchn_fifo;
266
267 struct arch_vcpu arch;
268 };
269
270 /* Per-domain lock can be recursively acquired in fault handlers. */
271 #define domain_lock(d) spin_lock_recursive(&(d)->domain_lock)
272 #define domain_unlock(d) spin_unlock_recursive(&(d)->domain_lock)
273
274 /* VM event */
275 struct vm_event_domain
276 {
277 /* ring lock */
278 spinlock_t ring_lock;
279 /* The ring has 64 entries */
280 unsigned char foreign_producers;
281 unsigned char target_producers;
282 /* shared ring page */
283 void *ring_page;
284 struct page_info *ring_pg_struct;
285 /* front-end ring */
286 vm_event_front_ring_t front_ring;
287 /* event channel port (vcpu0 only) */
288 int xen_port;
289 /* vm_event bit for vcpu->pause_flags */
290 int pause_flag;
291 /* list of vcpus waiting for room in the ring */
292 struct waitqueue_head wq;
293 /* the number of vCPUs blocked */
294 unsigned int blocked;
295 /* The last vcpu woken up */
296 unsigned int last_vcpu_wake_up;
297 };
298
299 struct evtchn_port_ops;
300
301 enum guest_type {
302 guest_type_pv, guest_type_hvm
303 };
304
305 struct domain
306 {
307 domid_t domain_id;
308
309 unsigned int max_vcpus;
310 struct vcpu **vcpu;
311
312 shared_info_t *shared_info; /* shared data area */
313
314 spinlock_t domain_lock;
315
316 spinlock_t page_alloc_lock; /* protects all the following fields */
317 struct page_list_head page_list; /* linked list */
318 struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */
319 unsigned int tot_pages; /* number of pages currently possesed */
320 unsigned int xenheap_pages; /* # pages allocated from Xen heap */
321 unsigned int outstanding_pages; /* pages claimed but not possessed */
322 unsigned int max_pages; /* maximum value for tot_pages */
323 atomic_t shr_pages; /* number of shared pages */
324 atomic_t paged_pages; /* number of paged-out pages */
325
326 /* Scheduling. */
327 void *sched_priv; /* scheduler-specific data */
328 struct cpupool *cpupool;
329
330 struct domain *next_in_list;
331 struct domain *next_in_hashbucket;
332
333 struct list_head rangesets;
334 spinlock_t rangesets_lock;
335
336 /* Event channel information. */
337 struct evtchn *evtchn; /* first bucket only */
338 struct evtchn **evtchn_group[NR_EVTCHN_GROUPS]; /* all other buckets */
339 unsigned int max_evtchns; /* number supported by ABI */
340 unsigned int max_evtchn_port; /* max permitted port number */
341 unsigned int valid_evtchns; /* number of allocated event channels */
342 spinlock_t event_lock;
343 const struct evtchn_port_ops *evtchn_port_ops;
344 struct evtchn_fifo_domain *evtchn_fifo;
345
346 struct grant_table *grant_table;
347
348 /*
349 * Interrupt to event-channel mappings and other per-guest-pirq data.
350 * Protected by the domain's event-channel spinlock.
351 */
352 struct radix_tree_root pirq_tree;
353 unsigned int nr_pirqs;
354
355 enum guest_type guest_type;
356
357 /* Is this guest dying (i.e., a zombie)? */
358 enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying;
359
360 /* Domain is paused by controller software? */
361 int controller_pause_count;
362
363 int64_t time_offset_seconds;
364
365 #ifdef CONFIG_HAS_PASSTHROUGH
366 struct domain_iommu iommu;
367
368 /* Does this guest need iommu mappings (-1 meaning "being set up")? */
369 s8 need_iommu;
370 #endif
371 /* is node-affinity automatically computed? */
372 bool auto_node_affinity;
373 /* Is this guest fully privileged (aka dom0)? */
374 bool is_privileged;
375 /* Is this a xenstore domain (not dom0)? */
376 bool is_xenstore;
377 /* Domain's VCPUs are pinned 1:1 to physical CPUs? */
378 bool is_pinned;
379 /* Non-migratable and non-restoreable? */
380 bool disable_migrate;
381 /* Is this guest being debugged by dom0? */
382 bool debugger_attached;
383 /*
384 * Set to true at the very end of domain creation, when the domain is
385 * unpaused for the first time by the systemcontroller.
386 */
387 bool creation_finished;
388
389 /* Which guest this guest has privileges on */
390 struct domain *target;
391
392 /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
393 #if MAX_VIRT_CPUS <= BITS_PER_LONG
394 DECLARE_BITMAP(poll_mask, MAX_VIRT_CPUS);
395 #else
396 unsigned long *poll_mask;
397 #endif
398
399 /* I/O capabilities (access to IRQs and memory-mapped I/O). */
400 struct rangeset *iomem_caps;
401 struct rangeset *irq_caps;
402
403 /* Guest has shut down (inc. reason code)? */
404 spinlock_t shutdown_lock;
405 bool is_shutting_down; /* in process of shutting down? */
406 bool is_shut_down; /* fully shut down? */
407 #define SHUTDOWN_CODE_INVALID ~0u
408 unsigned int shutdown_code;
409
410 /* If this is not 0, send suspend notification here instead of
411 * raising DOM_EXC */
412 evtchn_port_t suspend_evtchn;
413
414 atomic_t pause_count;
415 atomic_t refcnt;
416
417 unsigned long vm_assist;
418
419 /* Bitmask of CPUs which are holding onto this domain's state. */
420 cpumask_var_t domain_dirty_cpumask;
421
422 struct arch_domain arch;
423
424 void *ssid; /* sHype security subject identifier */
425
426 /* Control-plane tools handle for this domain. */
427 xen_domain_handle_t handle;
428
429 /* hvm_print_line() and guest_console_write() logging. */
430 #define DOMAIN_PBUF_SIZE 200
431 char *pbuf;
432 unsigned pbuf_idx;
433 spinlock_t pbuf_lock;
434
435 /* OProfile support. */
436 struct xenoprof *xenoprof;
437
438 /* Domain watchdog. */
439 #define NR_DOMAIN_WATCHDOG_TIMERS 2
440 spinlock_t watchdog_lock;
441 uint32_t watchdog_inuse_map;
442 struct timer watchdog_timer[NR_DOMAIN_WATCHDOG_TIMERS];
443
444 struct rcu_head rcu;
445
446 /*
447 * Hypercall deadlock avoidance lock. Used if a hypercall might
448 * cause a deadlock. Acquirers don't spin waiting; they preempt.
449 */
450 spinlock_t hypercall_deadlock_mutex;
451
452 /* transcendent memory, auto-allocated on first tmem op by each domain */
453 struct client *tmem_client;
454
455 struct lock_profile_qhead profile_head;
456
457 /* Various vm_events */
458
459 /* Memory sharing support */
460 #ifdef CONFIG_HAS_MEM_SHARING
461 struct vm_event_domain *vm_event_share;
462 #endif
463 /* Memory paging support */
464 #ifdef CONFIG_HAS_MEM_PAGING
465 struct vm_event_domain *vm_event_paging;
466 #endif
467 /* VM event monitor support */
468 struct vm_event_domain *vm_event_monitor;
469
470 /*
471 * Can be specified by the user. If that is not the case, it is
472 * computed from the union of all the vcpu cpu-affinity masks.
473 */
474 nodemask_t node_affinity;
475 unsigned int last_alloc_node;
476 spinlock_t node_affinity_lock;
477
478 /* vNUMA topology accesses are protected by rwlock. */
479 rwlock_t vnuma_rwlock;
480 struct vnuma_info *vnuma;
481
482 /* Common monitor options */
483 struct {
484 unsigned int guest_request_enabled : 1;
485 unsigned int guest_request_sync : 1;
486 } monitor;
487 };
488
489 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
490 extern spinlock_t domlist_update_lock;
491 extern rcu_read_lock_t domlist_read_lock;
492
493 extern struct vcpu *idle_vcpu[NR_CPUS];
494 #define is_idle_domain(d) ((d)->domain_id == DOMID_IDLE)
495 #define is_idle_vcpu(v) (is_idle_domain((v)->domain))
496
497 #define DOMAIN_DESTROYED (1u << 31) /* assumes atomic_t is >= 32 bits */
498 #define put_domain(_d) \
499 if ( atomic_dec_and_test(&(_d)->refcnt) ) domain_destroy(_d)
500
501 /*
502 * Use this when you don't have an existing reference to @d. It returns
503 * FALSE if @d is being destroyed.
504 */
get_domain(struct domain * d)505 static always_inline int get_domain(struct domain *d)
506 {
507 int old, seen = atomic_read(&d->refcnt);
508 do
509 {
510 old = seen;
511 if ( unlikely(old & DOMAIN_DESTROYED) )
512 return 0;
513 seen = atomic_cmpxchg(&d->refcnt, old, old + 1);
514 }
515 while ( unlikely(seen != old) );
516 return 1;
517 }
518
519 /*
520 * Use this when you already have, or are borrowing, a reference to @d.
521 * In this case we know that @d cannot be destroyed under our feet.
522 */
get_knownalive_domain(struct domain * d)523 static inline void get_knownalive_domain(struct domain *d)
524 {
525 atomic_inc(&d->refcnt);
526 ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED));
527 }
528
529 int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity);
530 void domain_update_node_affinity(struct domain *d);
531
532 /*
533 * Create a domain: the configuration is only necessary for real domain
534 * (i.e !DOMCRF_dummy, excluded idle domain).
535 */
536 struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
537 uint32_t ssidref,
538 struct xen_arch_domainconfig *config);
539 /* DOMCRF_hvm: Create an HVM domain, as opposed to a PV domain. */
540 #define _DOMCRF_hvm 0
541 #define DOMCRF_hvm (1U<<_DOMCRF_hvm)
542 /* DOMCRF_hap: Create a domain with hardware-assisted paging. */
543 #define _DOMCRF_hap 1
544 #define DOMCRF_hap (1U<<_DOMCRF_hap)
545 /* DOMCRF_s3_integrity: Create a domain with tboot memory integrity protection
546 by tboot */
547 #define _DOMCRF_s3_integrity 2
548 #define DOMCRF_s3_integrity (1U<<_DOMCRF_s3_integrity)
549 /* DOMCRF_dummy: Create a dummy domain (not scheduled; not on domain list) */
550 #define _DOMCRF_dummy 3
551 #define DOMCRF_dummy (1U<<_DOMCRF_dummy)
552 /* DOMCRF_oos_off: dont use out-of-sync optimization for shadow page tables */
553 #define _DOMCRF_oos_off 4
554 #define DOMCRF_oos_off (1U<<_DOMCRF_oos_off)
555 /* DOMCRF_xs_domain: xenstore domain */
556 #define _DOMCRF_xs_domain 5
557 #define DOMCRF_xs_domain (1U<<_DOMCRF_xs_domain)
558
559 /*
560 * rcu_lock_domain_by_id() is more efficient than get_domain_by_id().
561 * This is the preferred function if the returned domain reference
562 * is short lived, but it cannot be used if the domain reference needs
563 * to be kept beyond the current scope (e.g., across a softirq).
564 * The returned domain reference must be discarded using rcu_unlock_domain().
565 */
566 struct domain *rcu_lock_domain_by_id(domid_t dom);
567
568 /*
569 * As above function, but resolves DOMID_SELF to current domain
570 */
571 struct domain *rcu_lock_domain_by_any_id(domid_t dom);
572
573 /*
574 * As rcu_lock_domain_by_id(), but will fail EPERM or ESRCH rather than resolve
575 * to local domain.
576 */
577 int rcu_lock_remote_domain_by_id(domid_t dom, struct domain **d);
578
579 /*
580 * As rcu_lock_remote_domain_by_id() but will fail EINVAL if the domain is
581 * dying.
582 */
583 int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d);
584
rcu_unlock_domain(struct domain * d)585 static inline void rcu_unlock_domain(struct domain *d)
586 {
587 if ( d != current->domain )
588 rcu_read_unlock(d);
589 }
590
rcu_lock_domain(struct domain * d)591 static inline struct domain *rcu_lock_domain(struct domain *d)
592 {
593 if ( d != current->domain )
594 rcu_read_lock(d);
595 return d;
596 }
597
rcu_lock_current_domain(void)598 static inline struct domain *rcu_lock_current_domain(void)
599 {
600 return /*rcu_lock_domain*/(current->domain);
601 }
602
603 struct domain *get_domain_by_id(domid_t dom);
604 void domain_destroy(struct domain *d);
605 int domain_kill(struct domain *d);
606 int domain_shutdown(struct domain *d, u8 reason);
607 void domain_resume(struct domain *d);
608 void domain_pause_for_debugger(void);
609
610 int domain_soft_reset(struct domain *d);
611
612 int vcpu_start_shutdown_deferral(struct vcpu *v);
613 void vcpu_end_shutdown_deferral(struct vcpu *v);
614
615 /*
616 * Mark specified domain as crashed. This function always returns, even if the
617 * caller is the specified domain. The domain is not synchronously descheduled
618 * from any processor.
619 */
620 void __domain_crash(struct domain *d);
621 #define domain_crash(d) do { \
622 printk("domain_crash called from %s:%d\n", __FILE__, __LINE__); \
623 __domain_crash(d); \
624 } while (0)
625
626 /*
627 * Mark current domain as crashed and synchronously deschedule from the local
628 * processor. This function never returns.
629 */
630 void noreturn __domain_crash_synchronous(void);
631 #define domain_crash_synchronous() do { \
632 printk("domain_crash_sync called from %s:%d\n", __FILE__, __LINE__); \
633 __domain_crash_synchronous(); \
634 } while (0)
635
636 /*
637 * Called from assembly code, with an optional address to help indicate why
638 * the crash occured. If addr is 0, look up address from last extable
639 * redirection.
640 */
641 void noreturn asm_domain_crash_synchronous(unsigned long addr);
642
643 #define set_current_state(_s) do { current->state = (_s); } while (0)
644 void scheduler_init(void);
645 int sched_init_vcpu(struct vcpu *v, unsigned int processor);
646 void sched_destroy_vcpu(struct vcpu *v);
647 int sched_init_domain(struct domain *d, int poolid);
648 void sched_destroy_domain(struct domain *d);
649 int sched_move_domain(struct domain *d, struct cpupool *c);
650 long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
651 long sched_adjust_global(struct xen_sysctl_scheduler_op *);
652 int sched_id(void);
653 void sched_tick_suspend(void);
654 void sched_tick_resume(void);
655 void vcpu_wake(struct vcpu *v);
656 long vcpu_yield(void);
657 void vcpu_sleep_nosync(struct vcpu *v);
658 void vcpu_sleep_sync(struct vcpu *v);
659
660 /*
661 * Force synchronisation of given VCPU's state. If it is currently descheduled,
662 * this call will ensure that all its state is committed to memory and that
663 * no CPU is using critical state (e.g., page tables) belonging to the VCPU.
664 */
665 void sync_vcpu_execstate(struct vcpu *v);
666
667 /* As above, for any lazy state being held on the local CPU. */
668 void sync_local_execstate(void);
669
670 /*
671 * Called by the scheduler to switch to another VCPU. This function must
672 * call context_saved(@prev) when the local CPU is no longer running in
673 * @prev's context, and that context is saved to memory. Alternatively, if
674 * implementing lazy context switching, it suffices to ensure that invoking
675 * sync_vcpu_execstate() will switch and commit @prev's state.
676 */
677 void context_switch(
678 struct vcpu *prev,
679 struct vcpu *next);
680
681 /*
682 * As described above, context_switch() must call this function when the
683 * local CPU is no longer running in @prev's context, and @prev's context is
684 * saved to memory. Alternatively, if implementing lazy context switching,
685 * ensure that invoking sync_vcpu_execstate() will switch and commit @prev.
686 */
687 void context_saved(struct vcpu *prev);
688
689 /* Called by the scheduler to continue running the current VCPU. */
690 void continue_running(
691 struct vcpu *same);
692
693 void startup_cpu_idle_loop(void);
694 extern void (*pm_idle) (void);
695 extern void (*dead_idle) (void);
696
697
698 /*
699 * Creates a continuation to resume the current hypercall. The caller should
700 * return immediately, propagating the value returned from this invocation.
701 * The format string specifies the types and number of hypercall arguments.
702 * It contains one character per argument as follows:
703 * 'i' [unsigned] {char, int}
704 * 'l' [unsigned] long
705 * 'h' guest handle (XEN_GUEST_HANDLE(foo))
706 */
707 unsigned long hypercall_create_continuation(
708 unsigned int op, const char *format, ...);
709
hypercall_cancel_continuation(struct vcpu * v)710 static inline void hypercall_cancel_continuation(struct vcpu *v)
711 {
712 v->hcall_preempted = false;
713 }
714
715 /*
716 * For long-running operations that must be in hypercall context, check
717 * if there is background work to be done that should interrupt this
718 * operation.
719 */
720 #define hypercall_preempt_check() (unlikely( \
721 softirq_pending(smp_processor_id()) | \
722 local_events_need_delivery() \
723 ))
724
725 /*
726 * For long-running operations that may be in hypercall context or on
727 * the idle vcpu (e.g. during dom0 construction), check if there is
728 * background work to be done that should interrupt this operation.
729 */
730 #define general_preempt_check() (unlikely( \
731 softirq_pending(smp_processor_id()) || \
732 (!is_idle_vcpu(current) && local_events_need_delivery()) \
733 ))
734
735 extern struct domain *domain_list;
736
737 /* Caller must hold the domlist_read_lock or domlist_update_lock. */
first_domain_in_cpupool(struct cpupool * c)738 static inline struct domain *first_domain_in_cpupool( struct cpupool *c)
739 {
740 struct domain *d;
741 for (d = rcu_dereference(domain_list); d && d->cpupool != c;
742 d = rcu_dereference(d->next_in_list));
743 return d;
744 }
next_domain_in_cpupool(struct domain * d,struct cpupool * c)745 static inline struct domain *next_domain_in_cpupool(
746 struct domain *d, struct cpupool *c)
747 {
748 for (d = rcu_dereference(d->next_in_list); d && d->cpupool != c;
749 d = rcu_dereference(d->next_in_list));
750 return d;
751 }
752
753 #define for_each_domain(_d) \
754 for ( (_d) = rcu_dereference(domain_list); \
755 (_d) != NULL; \
756 (_d) = rcu_dereference((_d)->next_in_list )) \
757
758 #define for_each_domain_in_cpupool(_d,_c) \
759 for ( (_d) = first_domain_in_cpupool(_c); \
760 (_d) != NULL; \
761 (_d) = next_domain_in_cpupool((_d), (_c)))
762
763 #define for_each_vcpu(_d,_v) \
764 for ( (_v) = (_d)->vcpu ? (_d)->vcpu[0] : NULL; \
765 (_v) != NULL; \
766 (_v) = (_v)->next_in_list )
767
768 /*
769 * Per-VCPU pause flags.
770 */
771 /* Domain is blocked waiting for an event. */
772 #define _VPF_blocked 0
773 #define VPF_blocked (1UL<<_VPF_blocked)
774 /* VCPU is offline. */
775 #define _VPF_down 1
776 #define VPF_down (1UL<<_VPF_down)
777 /* VCPU is blocked awaiting an event to be consumed by Xen. */
778 #define _VPF_blocked_in_xen 2
779 #define VPF_blocked_in_xen (1UL<<_VPF_blocked_in_xen)
780 /* VCPU affinity has changed: migrating to a new CPU. */
781 #define _VPF_migrating 3
782 #define VPF_migrating (1UL<<_VPF_migrating)
783 /* VCPU is blocked due to missing mem_paging ring. */
784 #define _VPF_mem_paging 4
785 #define VPF_mem_paging (1UL<<_VPF_mem_paging)
786 /* VCPU is blocked due to missing mem_access ring. */
787 #define _VPF_mem_access 5
788 #define VPF_mem_access (1UL<<_VPF_mem_access)
789 /* VCPU is blocked due to missing mem_sharing ring. */
790 #define _VPF_mem_sharing 6
791 #define VPF_mem_sharing (1UL<<_VPF_mem_sharing)
792 /* VCPU is being reset. */
793 #define _VPF_in_reset 7
794 #define VPF_in_reset (1UL<<_VPF_in_reset)
795 /* VCPU is parked. */
796 #define _VPF_parked 8
797 #define VPF_parked (1UL<<_VPF_parked)
798
vcpu_runnable(struct vcpu * v)799 static inline int vcpu_runnable(struct vcpu *v)
800 {
801 return !(v->pause_flags |
802 atomic_read(&v->pause_count) |
803 atomic_read(&v->domain->pause_count));
804 }
805
806 void vcpu_block(void);
807 void vcpu_unblock(struct vcpu *v);
808 void vcpu_pause(struct vcpu *v);
809 void vcpu_pause_nosync(struct vcpu *v);
810 void vcpu_unpause(struct vcpu *v);
811 int vcpu_pause_by_systemcontroller(struct vcpu *v);
812 int vcpu_unpause_by_systemcontroller(struct vcpu *v);
813
814 void domain_pause(struct domain *d);
815 void domain_pause_nosync(struct domain *d);
816 void domain_unpause(struct domain *d);
817 int domain_unpause_by_systemcontroller(struct domain *d);
818 int __domain_pause_by_systemcontroller(struct domain *d,
819 void (*pause_fn)(struct domain *d));
domain_pause_by_systemcontroller(struct domain * d)820 static inline int domain_pause_by_systemcontroller(struct domain *d)
821 {
822 return __domain_pause_by_systemcontroller(d, domain_pause);
823 }
domain_pause_by_systemcontroller_nosync(struct domain * d)824 static inline int domain_pause_by_systemcontroller_nosync(struct domain *d)
825 {
826 return __domain_pause_by_systemcontroller(d, domain_pause_nosync);
827 }
828
829 /* domain_pause() but safe against trying to pause current. */
830 void domain_pause_except_self(struct domain *d);
831 void domain_unpause_except_self(struct domain *d);
832
833 void cpu_init(void);
834
835 struct scheduler;
836
837 struct scheduler *scheduler_get_default(void);
838 struct scheduler *scheduler_alloc(unsigned int sched_id, int *perr);
839 void scheduler_free(struct scheduler *sched);
840 int schedule_cpu_switch(unsigned int cpu, struct cpupool *c);
841 void vcpu_force_reschedule(struct vcpu *v);
842 int cpu_disable_scheduler(unsigned int cpu);
843 int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity);
844 int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity);
845 void restore_vcpu_affinity(struct domain *d);
846 int vcpu_pin_override(struct vcpu *v, int cpu);
847
848 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
849 uint64_t get_cpu_idle_time(unsigned int cpu);
850
851 /*
852 * Used by idle loop to decide whether there is work to do:
853 * (1) Deal with RCU; (2) or run softirqs; or (3) Play dead;
854 * or (4) Run tasklets.
855 *
856 * About (3), if a tasklet is enqueued, it will be scheduled
857 * really really soon, and hence it's pointless to try to
858 * sleep between these two events (that's why we don't call
859 * the tasklet_work_to_do() helper).
860 */
861 #define cpu_is_haltable(cpu) \
862 (!rcu_needs_cpu(cpu) && \
863 !softirq_pending(cpu) && \
864 cpu_online(cpu) && \
865 !per_cpu(tasklet_work_to_do, cpu))
866
867 void watchdog_domain_init(struct domain *d);
868 void watchdog_domain_destroy(struct domain *d);
869
870 /*
871 * Use this check when the following are both true:
872 * - Using this feature or interface requires full access to the hardware
873 * (that is, this would not be suitable for a driver domain)
874 * - There is never a reason to deny the hardware domain access to this
875 */
876 #define is_hardware_domain(_d) ((_d) == hardware_domain)
877
878 /* This check is for functionality specific to a control domain */
879 #define is_control_domain(_d) ((_d)->is_privileged)
880
881 #define VM_ASSIST(d, t) (test_bit(VMASST_TYPE_ ## t, &(d)->vm_assist))
882
883 #define is_pv_domain(d) ((d)->guest_type == guest_type_pv)
884 #define is_pv_vcpu(v) (is_pv_domain((v)->domain))
885 #define is_hvm_domain(d) ((d)->guest_type == guest_type_hvm)
886 #define is_hvm_vcpu(v) (is_hvm_domain(v->domain))
887 #define is_pinned_vcpu(v) ((v)->domain->is_pinned || \
888 cpumask_weight((v)->cpu_hard_affinity) == 1)
889 #ifdef CONFIG_HAS_PASSTHROUGH
890 #define need_iommu(d) ((d)->need_iommu)
891 #else
892 #define need_iommu(d) (0)
893 #endif
894
is_vcpu_online(const struct vcpu * v)895 static inline bool is_vcpu_online(const struct vcpu *v)
896 {
897 return !test_bit(_VPF_down, &v->pause_flags);
898 }
899
900 void set_vcpu_migration_delay(unsigned int delay);
901 unsigned int get_vcpu_migration_delay(void);
902
903 extern bool sched_smt_power_savings;
904
905 extern enum cpufreq_controller {
906 FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
907 } cpufreq_controller;
908
909 #define CPUPOOLID_NONE -1
910
911 struct cpupool *cpupool_get_by_id(int poolid);
912 void cpupool_put(struct cpupool *pool);
913 int cpupool_add_domain(struct domain *d, int poolid);
914 void cpupool_rm_domain(struct domain *d);
915 int cpupool_move_domain(struct domain *d, struct cpupool *c);
916 int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op);
917 void schedule_dump(struct cpupool *c);
918 extern void dump_runq(unsigned char key);
919
920 void arch_do_physinfo(struct xen_sysctl_physinfo *pi);
921
922 #endif /* __SCHED_H__ */
923
924 /*
925 * Local variables:
926 * mode: C
927 * c-file-style: "BSD"
928 * c-basic-offset: 4
929 * tab-width: 4
930 * indent-tabs-mode: nil
931 * End:
932 */
933