1 
2 #ifndef __SCHED_H__
3 #define __SCHED_H__
4 
5 #include <xen/types.h>
6 #include <xen/spinlock.h>
7 #include <xen/rwlock.h>
8 #include <xen/shared.h>
9 #include <xen/timer.h>
10 #include <xen/rangeset.h>
11 #include <xen/domain.h>
12 #include <xen/iommu.h>
13 #include <xen/rcupdate.h>
14 #include <xen/cpumask.h>
15 #include <xen/nodemask.h>
16 #include <xen/radix-tree.h>
17 #include <xen/multicall.h>
18 #include <xen/tasklet.h>
19 #include <xen/mm.h>
20 #include <xen/smp.h>
21 #include <xen/perfc.h>
22 #include <asm/atomic.h>
23 #include <xen/wait.h>
24 #include <public/xen.h>
25 #include <public/domctl.h>
26 #include <public/sysctl.h>
27 #include <public/vcpu.h>
28 #include <public/vm_event.h>
29 #include <public/event_channel.h>
30 
31 #ifdef CONFIG_COMPAT
32 #include <compat/vcpu.h>
33 DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t);
34 #endif
35 
36 /*
37  * Stats
38  *
39  * Enable and ease the use of scheduling related performance counters.
40  *
41  */
42 #ifdef CONFIG_PERF_COUNTERS
43 #define SCHED_STATS
44 #endif
45 
46 #define SCHED_STAT_CRANK(_X)                (perfc_incr(_X))
47 
48 /* A global pointer to the hardware domain (usually DOM0). */
49 extern struct domain *hardware_domain;
50 
51 #ifdef CONFIG_LATE_HWDOM
52 extern domid_t hardware_domid;
53 #else
54 #define hardware_domid 0
55 #endif
56 
57 #ifndef CONFIG_COMPAT
58 #define BITS_PER_EVTCHN_WORD(d) BITS_PER_XEN_ULONG
59 #else
60 #define BITS_PER_EVTCHN_WORD(d) (has_32bit_shinfo(d) ? 32 : BITS_PER_XEN_ULONG)
61 #endif
62 
63 #define BUCKETS_PER_GROUP  (PAGE_SIZE/sizeof(struct evtchn *))
64 /* Round size of struct evtchn up to power of 2 size */
65 #define __RDU2(x)   (       (x) | (   (x) >> 1))
66 #define __RDU4(x)   ( __RDU2(x) | ( __RDU2(x) >> 2))
67 #define __RDU8(x)   ( __RDU4(x) | ( __RDU4(x) >> 4))
68 #define __RDU16(x)  ( __RDU8(x) | ( __RDU8(x) >> 8))
69 #define __RDU32(x)  (__RDU16(x) | (__RDU16(x) >>16))
70 #define next_power_of_2(x)      (__RDU32((x)-1) + 1)
71 
72 /* Maximum number of event channels for any ABI. */
73 #define MAX_NR_EVTCHNS MAX(EVTCHN_2L_NR_CHANNELS, EVTCHN_FIFO_NR_CHANNELS)
74 
75 #define EVTCHNS_PER_BUCKET (PAGE_SIZE / next_power_of_2(sizeof(struct evtchn)))
76 #define EVTCHNS_PER_GROUP  (BUCKETS_PER_GROUP * EVTCHNS_PER_BUCKET)
77 #define NR_EVTCHN_GROUPS   DIV_ROUND_UP(MAX_NR_EVTCHNS, EVTCHNS_PER_GROUP)
78 
79 #define XEN_CONSUMER_BITS 3
80 #define NR_XEN_CONSUMERS ((1 << XEN_CONSUMER_BITS) - 1)
81 
82 struct evtchn
83 {
84     spinlock_t lock;
85 #define ECS_FREE         0 /* Channel is available for use.                  */
86 #define ECS_RESERVED     1 /* Channel is reserved.                           */
87 #define ECS_UNBOUND      2 /* Channel is waiting to bind to a remote domain. */
88 #define ECS_INTERDOMAIN  3 /* Channel is bound to another domain.            */
89 #define ECS_PIRQ         4 /* Channel is bound to a physical IRQ line.       */
90 #define ECS_VIRQ         5 /* Channel is bound to a virtual IRQ line.        */
91 #define ECS_IPI          6 /* Channel is bound to a virtual IPI line.        */
92     u8  state;             /* ECS_* */
93     u8  xen_consumer:XEN_CONSUMER_BITS; /* Consumer in Xen if nonzero */
94     u8  pending:1;
95     u16 notify_vcpu_id;    /* VCPU for local delivery notification */
96     u32 port;
97     union {
98         struct {
99             domid_t remote_domid;
100         } unbound;     /* state == ECS_UNBOUND */
101         struct {
102             evtchn_port_t  remote_port;
103             struct domain *remote_dom;
104         } interdomain; /* state == ECS_INTERDOMAIN */
105         struct {
106             u32            irq;
107             evtchn_port_t  next_port;
108             evtchn_port_t  prev_port;
109         } pirq;        /* state == ECS_PIRQ */
110         u16 virq;      /* state == ECS_VIRQ */
111     } u;
112     u8 priority;
113     u8 last_priority;
114     u16 last_vcpu_id;
115 #ifdef CONFIG_XSM
116     union {
117 #ifdef XSM_NEED_GENERIC_EVTCHN_SSID
118         /*
119          * If an XSM module needs more space for its event channel context,
120          * this pointer stores the necessary data for the security server.
121          */
122         void *generic;
123 #endif
124 #ifdef CONFIG_FLASK
125         /*
126          * Inlining the contents of the structure for FLASK avoids unneeded
127          * allocations, and on 64-bit platforms with only FLASK enabled,
128          * reduces the size of struct evtchn.
129          */
130         u32 flask_sid;
131 #endif
132     } ssid;
133 #endif
134 } __attribute__((aligned(64)));
135 
136 int  evtchn_init(struct domain *d); /* from domain_create */
137 void evtchn_destroy(struct domain *d); /* from domain_kill */
138 void evtchn_destroy_final(struct domain *d); /* from complete_domain_destroy */
139 
140 struct waitqueue_vcpu;
141 
142 struct vcpu
143 {
144     int              vcpu_id;
145 
146     int              processor;
147 
148     vcpu_info_t     *vcpu_info;
149 
150     struct domain   *domain;
151 
152     struct vcpu     *next_in_list;
153 
154     s_time_t         periodic_period;
155     s_time_t         periodic_last_event;
156     struct timer     periodic_timer;
157     struct timer     singleshot_timer;
158 
159     struct timer     poll_timer;    /* timeout for SCHEDOP_poll */
160 
161     void            *sched_priv;    /* scheduler-specific data */
162 
163     struct vcpu_runstate_info runstate;
164 #ifndef CONFIG_COMPAT
165 # define runstate_guest(v) ((v)->runstate_guest)
166     XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
167 #else
168 # define runstate_guest(v) ((v)->runstate_guest.native)
169     union {
170         XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
171         XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
172     } runstate_guest; /* guest address */
173 #endif
174 
175     /* last time when vCPU is scheduled out */
176     uint64_t last_run_time;
177 
178     /* Has the FPU been initialised? */
179     bool             fpu_initialised;
180     /* Has the FPU been used since it was last saved? */
181     bool             fpu_dirtied;
182     /* Initialization completed for this VCPU? */
183     bool             is_initialised;
184     /* Currently running on a CPU? */
185     bool             is_running;
186     /* VCPU should wake fast (do not deep sleep the CPU). */
187     bool             is_urgent;
188 
189 #ifdef VCPU_TRAP_LAST
190 #define VCPU_TRAP_NONE    0
191     struct {
192         bool             pending;
193         uint8_t          old_mask;
194     }                async_exception_state[VCPU_TRAP_LAST];
195 #define async_exception_state(t) async_exception_state[(t)-1]
196     uint8_t          async_exception_mask;
197 #endif
198 
199     /* Require shutdown to be deferred for some asynchronous operation? */
200     bool             defer_shutdown;
201     /* VCPU is paused following shutdown request (d->is_shutting_down)? */
202     bool             paused_for_shutdown;
203     /* VCPU need affinity restored */
204     bool             affinity_broken;
205 
206     /* A hypercall has been preempted. */
207     bool             hcall_preempted;
208 #ifdef CONFIG_COMPAT
209     /* A hypercall is using the compat ABI? */
210     bool             hcall_compat;
211 #endif
212 
213 
214     /*
215      * > 0: a single port is being polled;
216      * = 0: nothing is being polled (vcpu should be clear in d->poll_mask);
217      * < 0: multiple ports may be being polled.
218      */
219     int              poll_evtchn;
220 
221     /* (over-)protected by ->domain->event_lock */
222     int              pirq_evtchn_head;
223 
224     unsigned long    pause_flags;
225     atomic_t         pause_count;
226 
227     /* VCPU paused for vm_event replies. */
228     atomic_t         vm_event_pause_count;
229     /* VCPU paused by system controller. */
230     int              controller_pause_count;
231 
232     /* Grant table map tracking. */
233     spinlock_t       maptrack_freelist_lock;
234     unsigned int     maptrack_head;
235     unsigned int     maptrack_tail;
236 
237     /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
238     evtchn_port_t    virq_to_evtchn[NR_VIRQS];
239     spinlock_t       virq_lock;
240 
241     /* Bitmask of CPUs on which this VCPU may run. */
242     cpumask_var_t    cpu_hard_affinity;
243     /* Used to change affinity temporarily. */
244     cpumask_var_t    cpu_hard_affinity_tmp;
245     /* Used to restore affinity across S3. */
246     cpumask_var_t    cpu_hard_affinity_saved;
247 
248     /* Bitmask of CPUs on which this VCPU prefers to run. */
249     cpumask_var_t    cpu_soft_affinity;
250 
251     /* Bitmask of CPUs which are holding onto this VCPU's state. */
252     cpumask_var_t    vcpu_dirty_cpumask;
253 
254     /* Tasklet for continue_hypercall_on_cpu(). */
255     struct tasklet   continue_hypercall_tasklet;
256 
257     /* Multicall information. */
258     struct mc_state  mc_state;
259 
260     struct waitqueue_vcpu *waitqueue_vcpu;
261 
262     /* Guest-specified relocation of vcpu_info. */
263     mfn_t            vcpu_info_mfn;
264 
265     struct evtchn_fifo_vcpu *evtchn_fifo;
266 
267     struct arch_vcpu arch;
268 };
269 
270 /* Per-domain lock can be recursively acquired in fault handlers. */
271 #define domain_lock(d) spin_lock_recursive(&(d)->domain_lock)
272 #define domain_unlock(d) spin_unlock_recursive(&(d)->domain_lock)
273 
274 /* VM event */
275 struct vm_event_domain
276 {
277     /* ring lock */
278     spinlock_t ring_lock;
279     /* The ring has 64 entries */
280     unsigned char foreign_producers;
281     unsigned char target_producers;
282     /* shared ring page */
283     void *ring_page;
284     struct page_info *ring_pg_struct;
285     /* front-end ring */
286     vm_event_front_ring_t front_ring;
287     /* event channel port (vcpu0 only) */
288     int xen_port;
289     /* vm_event bit for vcpu->pause_flags */
290     int pause_flag;
291     /* list of vcpus waiting for room in the ring */
292     struct waitqueue_head wq;
293     /* the number of vCPUs blocked */
294     unsigned int blocked;
295     /* The last vcpu woken up */
296     unsigned int last_vcpu_wake_up;
297 };
298 
299 struct evtchn_port_ops;
300 
301 enum guest_type {
302     guest_type_pv, guest_type_hvm
303 };
304 
305 struct domain
306 {
307     domid_t          domain_id;
308 
309     unsigned int     max_vcpus;
310     struct vcpu    **vcpu;
311 
312     shared_info_t   *shared_info;     /* shared data area */
313 
314     spinlock_t       domain_lock;
315 
316     spinlock_t       page_alloc_lock; /* protects all the following fields  */
317     struct page_list_head page_list;  /* linked list */
318     struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */
319     unsigned int     tot_pages;       /* number of pages currently possesed */
320     unsigned int     xenheap_pages;   /* # pages allocated from Xen heap    */
321     unsigned int     outstanding_pages; /* pages claimed but not possessed  */
322     unsigned int     max_pages;       /* maximum value for tot_pages        */
323     atomic_t         shr_pages;       /* number of shared pages             */
324     atomic_t         paged_pages;     /* number of paged-out pages          */
325 
326     /* Scheduling. */
327     void            *sched_priv;    /* scheduler-specific data */
328     struct cpupool  *cpupool;
329 
330     struct domain   *next_in_list;
331     struct domain   *next_in_hashbucket;
332 
333     struct list_head rangesets;
334     spinlock_t       rangesets_lock;
335 
336     /* Event channel information. */
337     struct evtchn   *evtchn;                         /* first bucket only */
338     struct evtchn  **evtchn_group[NR_EVTCHN_GROUPS]; /* all other buckets */
339     unsigned int     max_evtchns;     /* number supported by ABI */
340     unsigned int     max_evtchn_port; /* max permitted port number */
341     unsigned int     valid_evtchns;   /* number of allocated event channels */
342     spinlock_t       event_lock;
343     const struct evtchn_port_ops *evtchn_port_ops;
344     struct evtchn_fifo_domain *evtchn_fifo;
345 
346     struct grant_table *grant_table;
347 
348     /*
349      * Interrupt to event-channel mappings and other per-guest-pirq data.
350      * Protected by the domain's event-channel spinlock.
351      */
352     struct radix_tree_root pirq_tree;
353     unsigned int     nr_pirqs;
354 
355     enum guest_type guest_type;
356 
357     /* Is this guest dying (i.e., a zombie)? */
358     enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying;
359 
360     /* Domain is paused by controller software? */
361     int              controller_pause_count;
362 
363     int64_t          time_offset_seconds;
364 
365 #ifdef CONFIG_HAS_PASSTHROUGH
366     struct domain_iommu iommu;
367 
368     /* Does this guest need iommu mappings (-1 meaning "being set up")? */
369     s8               need_iommu;
370 #endif
371     /* is node-affinity automatically computed? */
372     bool             auto_node_affinity;
373     /* Is this guest fully privileged (aka dom0)? */
374     bool             is_privileged;
375     /* Is this a xenstore domain (not dom0)? */
376     bool             is_xenstore;
377     /* Domain's VCPUs are pinned 1:1 to physical CPUs? */
378     bool             is_pinned;
379     /* Non-migratable and non-restoreable? */
380     bool             disable_migrate;
381     /* Is this guest being debugged by dom0? */
382     bool             debugger_attached;
383     /*
384      * Set to true at the very end of domain creation, when the domain is
385      * unpaused for the first time by the systemcontroller.
386      */
387     bool             creation_finished;
388 
389     /* Which guest this guest has privileges on */
390     struct domain   *target;
391 
392     /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
393 #if MAX_VIRT_CPUS <= BITS_PER_LONG
394     DECLARE_BITMAP(poll_mask, MAX_VIRT_CPUS);
395 #else
396     unsigned long   *poll_mask;
397 #endif
398 
399     /* I/O capabilities (access to IRQs and memory-mapped I/O). */
400     struct rangeset *iomem_caps;
401     struct rangeset *irq_caps;
402 
403     /* Guest has shut down (inc. reason code)? */
404     spinlock_t       shutdown_lock;
405     bool             is_shutting_down; /* in process of shutting down? */
406     bool             is_shut_down;     /* fully shut down? */
407 #define SHUTDOWN_CODE_INVALID ~0u
408     unsigned int     shutdown_code;
409 
410     /* If this is not 0, send suspend notification here instead of
411      * raising DOM_EXC */
412     evtchn_port_t    suspend_evtchn;
413 
414     atomic_t         pause_count;
415     atomic_t         refcnt;
416 
417     unsigned long    vm_assist;
418 
419     /* Bitmask of CPUs which are holding onto this domain's state. */
420     cpumask_var_t    domain_dirty_cpumask;
421 
422     struct arch_domain arch;
423 
424     void *ssid; /* sHype security subject identifier */
425 
426     /* Control-plane tools handle for this domain. */
427     xen_domain_handle_t handle;
428 
429     /* hvm_print_line() and guest_console_write() logging. */
430 #define DOMAIN_PBUF_SIZE 200
431     char       *pbuf;
432     unsigned    pbuf_idx;
433     spinlock_t  pbuf_lock;
434 
435     /* OProfile support. */
436     struct xenoprof *xenoprof;
437 
438     /* Domain watchdog. */
439 #define NR_DOMAIN_WATCHDOG_TIMERS 2
440     spinlock_t watchdog_lock;
441     uint32_t watchdog_inuse_map;
442     struct timer watchdog_timer[NR_DOMAIN_WATCHDOG_TIMERS];
443 
444     struct rcu_head rcu;
445 
446     /*
447      * Hypercall deadlock avoidance lock. Used if a hypercall might
448      * cause a deadlock. Acquirers don't spin waiting; they preempt.
449      */
450     spinlock_t hypercall_deadlock_mutex;
451 
452     /* transcendent memory, auto-allocated on first tmem op by each domain */
453     struct client *tmem_client;
454 
455     struct lock_profile_qhead profile_head;
456 
457     /* Various vm_events */
458 
459     /* Memory sharing support */
460 #ifdef CONFIG_HAS_MEM_SHARING
461     struct vm_event_domain *vm_event_share;
462 #endif
463     /* Memory paging support */
464 #ifdef CONFIG_HAS_MEM_PAGING
465     struct vm_event_domain *vm_event_paging;
466 #endif
467     /* VM event monitor support */
468     struct vm_event_domain *vm_event_monitor;
469 
470     /*
471      * Can be specified by the user. If that is not the case, it is
472      * computed from the union of all the vcpu cpu-affinity masks.
473      */
474     nodemask_t node_affinity;
475     unsigned int last_alloc_node;
476     spinlock_t node_affinity_lock;
477 
478     /* vNUMA topology accesses are protected by rwlock. */
479     rwlock_t vnuma_rwlock;
480     struct vnuma_info *vnuma;
481 
482     /* Common monitor options */
483     struct {
484         unsigned int guest_request_enabled       : 1;
485         unsigned int guest_request_sync          : 1;
486     } monitor;
487 };
488 
489 /* Protect updates/reads (resp.) of domain_list and domain_hash. */
490 extern spinlock_t domlist_update_lock;
491 extern rcu_read_lock_t domlist_read_lock;
492 
493 extern struct vcpu *idle_vcpu[NR_CPUS];
494 #define is_idle_domain(d) ((d)->domain_id == DOMID_IDLE)
495 #define is_idle_vcpu(v)   (is_idle_domain((v)->domain))
496 
497 #define DOMAIN_DESTROYED (1u << 31) /* assumes atomic_t is >= 32 bits */
498 #define put_domain(_d) \
499   if ( atomic_dec_and_test(&(_d)->refcnt) ) domain_destroy(_d)
500 
501 /*
502  * Use this when you don't have an existing reference to @d. It returns
503  * FALSE if @d is being destroyed.
504  */
get_domain(struct domain * d)505 static always_inline int get_domain(struct domain *d)
506 {
507     int old, seen = atomic_read(&d->refcnt);
508     do
509     {
510         old = seen;
511         if ( unlikely(old & DOMAIN_DESTROYED) )
512             return 0;
513         seen = atomic_cmpxchg(&d->refcnt, old, old + 1);
514     }
515     while ( unlikely(seen != old) );
516     return 1;
517 }
518 
519 /*
520  * Use this when you already have, or are borrowing, a reference to @d.
521  * In this case we know that @d cannot be destroyed under our feet.
522  */
get_knownalive_domain(struct domain * d)523 static inline void get_knownalive_domain(struct domain *d)
524 {
525     atomic_inc(&d->refcnt);
526     ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED));
527 }
528 
529 int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity);
530 void domain_update_node_affinity(struct domain *d);
531 
532 /*
533  * Create a domain: the configuration is only necessary for real domain
534  * (i.e !DOMCRF_dummy, excluded idle domain).
535  */
536 struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
537                              uint32_t ssidref,
538                              struct xen_arch_domainconfig *config);
539  /* DOMCRF_hvm: Create an HVM domain, as opposed to a PV domain. */
540 #define _DOMCRF_hvm           0
541 #define DOMCRF_hvm            (1U<<_DOMCRF_hvm)
542  /* DOMCRF_hap: Create a domain with hardware-assisted paging. */
543 #define _DOMCRF_hap           1
544 #define DOMCRF_hap            (1U<<_DOMCRF_hap)
545  /* DOMCRF_s3_integrity: Create a domain with tboot memory integrity protection
546                         by tboot */
547 #define _DOMCRF_s3_integrity  2
548 #define DOMCRF_s3_integrity   (1U<<_DOMCRF_s3_integrity)
549  /* DOMCRF_dummy: Create a dummy domain (not scheduled; not on domain list) */
550 #define _DOMCRF_dummy         3
551 #define DOMCRF_dummy          (1U<<_DOMCRF_dummy)
552  /* DOMCRF_oos_off: dont use out-of-sync optimization for shadow page tables */
553 #define _DOMCRF_oos_off         4
554 #define DOMCRF_oos_off          (1U<<_DOMCRF_oos_off)
555  /* DOMCRF_xs_domain: xenstore domain */
556 #define _DOMCRF_xs_domain       5
557 #define DOMCRF_xs_domain        (1U<<_DOMCRF_xs_domain)
558 
559 /*
560  * rcu_lock_domain_by_id() is more efficient than get_domain_by_id().
561  * This is the preferred function if the returned domain reference
562  * is short lived,  but it cannot be used if the domain reference needs
563  * to be kept beyond the current scope (e.g., across a softirq).
564  * The returned domain reference must be discarded using rcu_unlock_domain().
565  */
566 struct domain *rcu_lock_domain_by_id(domid_t dom);
567 
568 /*
569  * As above function, but resolves DOMID_SELF to current domain
570  */
571 struct domain *rcu_lock_domain_by_any_id(domid_t dom);
572 
573 /*
574  * As rcu_lock_domain_by_id(), but will fail EPERM or ESRCH rather than resolve
575  * to local domain.
576  */
577 int rcu_lock_remote_domain_by_id(domid_t dom, struct domain **d);
578 
579 /*
580  * As rcu_lock_remote_domain_by_id() but will fail EINVAL if the domain is
581  * dying.
582  */
583 int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d);
584 
rcu_unlock_domain(struct domain * d)585 static inline void rcu_unlock_domain(struct domain *d)
586 {
587     if ( d != current->domain )
588         rcu_read_unlock(d);
589 }
590 
rcu_lock_domain(struct domain * d)591 static inline struct domain *rcu_lock_domain(struct domain *d)
592 {
593     if ( d != current->domain )
594         rcu_read_lock(d);
595     return d;
596 }
597 
rcu_lock_current_domain(void)598 static inline struct domain *rcu_lock_current_domain(void)
599 {
600     return /*rcu_lock_domain*/(current->domain);
601 }
602 
603 struct domain *get_domain_by_id(domid_t dom);
604 void domain_destroy(struct domain *d);
605 int domain_kill(struct domain *d);
606 int domain_shutdown(struct domain *d, u8 reason);
607 void domain_resume(struct domain *d);
608 void domain_pause_for_debugger(void);
609 
610 int domain_soft_reset(struct domain *d);
611 
612 int vcpu_start_shutdown_deferral(struct vcpu *v);
613 void vcpu_end_shutdown_deferral(struct vcpu *v);
614 
615 /*
616  * Mark specified domain as crashed. This function always returns, even if the
617  * caller is the specified domain. The domain is not synchronously descheduled
618  * from any processor.
619  */
620 void __domain_crash(struct domain *d);
621 #define domain_crash(d) do {                                              \
622     printk("domain_crash called from %s:%d\n", __FILE__, __LINE__);       \
623     __domain_crash(d);                                                    \
624 } while (0)
625 
626 /*
627  * Mark current domain as crashed and synchronously deschedule from the local
628  * processor. This function never returns.
629  */
630 void noreturn __domain_crash_synchronous(void);
631 #define domain_crash_synchronous() do {                                   \
632     printk("domain_crash_sync called from %s:%d\n", __FILE__, __LINE__);  \
633     __domain_crash_synchronous();                                         \
634 } while (0)
635 
636 /*
637  * Called from assembly code, with an optional address to help indicate why
638  * the crash occured.  If addr is 0, look up address from last extable
639  * redirection.
640  */
641 void noreturn asm_domain_crash_synchronous(unsigned long addr);
642 
643 #define set_current_state(_s) do { current->state = (_s); } while (0)
644 void scheduler_init(void);
645 int  sched_init_vcpu(struct vcpu *v, unsigned int processor);
646 void sched_destroy_vcpu(struct vcpu *v);
647 int  sched_init_domain(struct domain *d, int poolid);
648 void sched_destroy_domain(struct domain *d);
649 int sched_move_domain(struct domain *d, struct cpupool *c);
650 long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
651 long sched_adjust_global(struct xen_sysctl_scheduler_op *);
652 int  sched_id(void);
653 void sched_tick_suspend(void);
654 void sched_tick_resume(void);
655 void vcpu_wake(struct vcpu *v);
656 long vcpu_yield(void);
657 void vcpu_sleep_nosync(struct vcpu *v);
658 void vcpu_sleep_sync(struct vcpu *v);
659 
660 /*
661  * Force synchronisation of given VCPU's state. If it is currently descheduled,
662  * this call will ensure that all its state is committed to memory and that
663  * no CPU is using critical state (e.g., page tables) belonging to the VCPU.
664  */
665 void sync_vcpu_execstate(struct vcpu *v);
666 
667 /* As above, for any lazy state being held on the local CPU. */
668 void sync_local_execstate(void);
669 
670 /*
671  * Called by the scheduler to switch to another VCPU. This function must
672  * call context_saved(@prev) when the local CPU is no longer running in
673  * @prev's context, and that context is saved to memory. Alternatively, if
674  * implementing lazy context switching, it suffices to ensure that invoking
675  * sync_vcpu_execstate() will switch and commit @prev's state.
676  */
677 void context_switch(
678     struct vcpu *prev,
679     struct vcpu *next);
680 
681 /*
682  * As described above, context_switch() must call this function when the
683  * local CPU is no longer running in @prev's context, and @prev's context is
684  * saved to memory. Alternatively, if implementing lazy context switching,
685  * ensure that invoking sync_vcpu_execstate() will switch and commit @prev.
686  */
687 void context_saved(struct vcpu *prev);
688 
689 /* Called by the scheduler to continue running the current VCPU. */
690 void continue_running(
691     struct vcpu *same);
692 
693 void startup_cpu_idle_loop(void);
694 extern void (*pm_idle) (void);
695 extern void (*dead_idle) (void);
696 
697 
698 /*
699  * Creates a continuation to resume the current hypercall. The caller should
700  * return immediately, propagating the value returned from this invocation.
701  * The format string specifies the types and number of hypercall arguments.
702  * It contains one character per argument as follows:
703  *  'i' [unsigned] {char, int}
704  *  'l' [unsigned] long
705  *  'h' guest handle (XEN_GUEST_HANDLE(foo))
706  */
707 unsigned long hypercall_create_continuation(
708     unsigned int op, const char *format, ...);
709 
hypercall_cancel_continuation(struct vcpu * v)710 static inline void hypercall_cancel_continuation(struct vcpu *v)
711 {
712     v->hcall_preempted = false;
713 }
714 
715 /*
716  * For long-running operations that must be in hypercall context, check
717  * if there is background work to be done that should interrupt this
718  * operation.
719  */
720 #define hypercall_preempt_check() (unlikely(    \
721         softirq_pending(smp_processor_id()) |   \
722         local_events_need_delivery()            \
723     ))
724 
725 /*
726  * For long-running operations that may be in hypercall context or on
727  * the idle vcpu (e.g. during dom0 construction), check if there is
728  * background work to be done that should interrupt this operation.
729  */
730 #define general_preempt_check() (unlikely(                          \
731         softirq_pending(smp_processor_id()) ||                      \
732         (!is_idle_vcpu(current) && local_events_need_delivery())    \
733     ))
734 
735 extern struct domain *domain_list;
736 
737 /* Caller must hold the domlist_read_lock or domlist_update_lock. */
first_domain_in_cpupool(struct cpupool * c)738 static inline struct domain *first_domain_in_cpupool( struct cpupool *c)
739 {
740     struct domain *d;
741     for (d = rcu_dereference(domain_list); d && d->cpupool != c;
742          d = rcu_dereference(d->next_in_list));
743     return d;
744 }
next_domain_in_cpupool(struct domain * d,struct cpupool * c)745 static inline struct domain *next_domain_in_cpupool(
746     struct domain *d, struct cpupool *c)
747 {
748     for (d = rcu_dereference(d->next_in_list); d && d->cpupool != c;
749          d = rcu_dereference(d->next_in_list));
750     return d;
751 }
752 
753 #define for_each_domain(_d)                     \
754  for ( (_d) = rcu_dereference(domain_list);     \
755        (_d) != NULL;                            \
756        (_d) = rcu_dereference((_d)->next_in_list )) \
757 
758 #define for_each_domain_in_cpupool(_d,_c)       \
759  for ( (_d) = first_domain_in_cpupool(_c);      \
760        (_d) != NULL;                            \
761        (_d) = next_domain_in_cpupool((_d), (_c)))
762 
763 #define for_each_vcpu(_d,_v)                    \
764  for ( (_v) = (_d)->vcpu ? (_d)->vcpu[0] : NULL; \
765        (_v) != NULL;                            \
766        (_v) = (_v)->next_in_list )
767 
768 /*
769  * Per-VCPU pause flags.
770  */
771  /* Domain is blocked waiting for an event. */
772 #define _VPF_blocked         0
773 #define VPF_blocked          (1UL<<_VPF_blocked)
774  /* VCPU is offline. */
775 #define _VPF_down            1
776 #define VPF_down             (1UL<<_VPF_down)
777  /* VCPU is blocked awaiting an event to be consumed by Xen. */
778 #define _VPF_blocked_in_xen  2
779 #define VPF_blocked_in_xen   (1UL<<_VPF_blocked_in_xen)
780  /* VCPU affinity has changed: migrating to a new CPU. */
781 #define _VPF_migrating       3
782 #define VPF_migrating        (1UL<<_VPF_migrating)
783  /* VCPU is blocked due to missing mem_paging ring. */
784 #define _VPF_mem_paging      4
785 #define VPF_mem_paging       (1UL<<_VPF_mem_paging)
786  /* VCPU is blocked due to missing mem_access ring. */
787 #define _VPF_mem_access      5
788 #define VPF_mem_access       (1UL<<_VPF_mem_access)
789  /* VCPU is blocked due to missing mem_sharing ring. */
790 #define _VPF_mem_sharing     6
791 #define VPF_mem_sharing      (1UL<<_VPF_mem_sharing)
792  /* VCPU is being reset. */
793 #define _VPF_in_reset        7
794 #define VPF_in_reset         (1UL<<_VPF_in_reset)
795 /* VCPU is parked. */
796 #define _VPF_parked          8
797 #define VPF_parked           (1UL<<_VPF_parked)
798 
vcpu_runnable(struct vcpu * v)799 static inline int vcpu_runnable(struct vcpu *v)
800 {
801     return !(v->pause_flags |
802              atomic_read(&v->pause_count) |
803              atomic_read(&v->domain->pause_count));
804 }
805 
806 void vcpu_block(void);
807 void vcpu_unblock(struct vcpu *v);
808 void vcpu_pause(struct vcpu *v);
809 void vcpu_pause_nosync(struct vcpu *v);
810 void vcpu_unpause(struct vcpu *v);
811 int vcpu_pause_by_systemcontroller(struct vcpu *v);
812 int vcpu_unpause_by_systemcontroller(struct vcpu *v);
813 
814 void domain_pause(struct domain *d);
815 void domain_pause_nosync(struct domain *d);
816 void domain_unpause(struct domain *d);
817 int domain_unpause_by_systemcontroller(struct domain *d);
818 int __domain_pause_by_systemcontroller(struct domain *d,
819                                        void (*pause_fn)(struct domain *d));
domain_pause_by_systemcontroller(struct domain * d)820 static inline int domain_pause_by_systemcontroller(struct domain *d)
821 {
822     return __domain_pause_by_systemcontroller(d, domain_pause);
823 }
domain_pause_by_systemcontroller_nosync(struct domain * d)824 static inline int domain_pause_by_systemcontroller_nosync(struct domain *d)
825 {
826     return __domain_pause_by_systemcontroller(d, domain_pause_nosync);
827 }
828 
829 /* domain_pause() but safe against trying to pause current. */
830 void domain_pause_except_self(struct domain *d);
831 void domain_unpause_except_self(struct domain *d);
832 
833 void cpu_init(void);
834 
835 struct scheduler;
836 
837 struct scheduler *scheduler_get_default(void);
838 struct scheduler *scheduler_alloc(unsigned int sched_id, int *perr);
839 void scheduler_free(struct scheduler *sched);
840 int schedule_cpu_switch(unsigned int cpu, struct cpupool *c);
841 void vcpu_force_reschedule(struct vcpu *v);
842 int cpu_disable_scheduler(unsigned int cpu);
843 int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity);
844 int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity);
845 void restore_vcpu_affinity(struct domain *d);
846 int vcpu_pin_override(struct vcpu *v, int cpu);
847 
848 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
849 uint64_t get_cpu_idle_time(unsigned int cpu);
850 
851 /*
852  * Used by idle loop to decide whether there is work to do:
853  *  (1) Deal with RCU; (2) or run softirqs; or (3) Play dead;
854  *  or (4) Run tasklets.
855  *
856  * About (3), if a tasklet is enqueued, it will be scheduled
857  * really really soon, and hence it's pointless to try to
858  * sleep between these two events (that's why we don't call
859  * the tasklet_work_to_do() helper).
860  */
861 #define cpu_is_haltable(cpu)                    \
862     (!rcu_needs_cpu(cpu) &&                     \
863      !softirq_pending(cpu) &&                   \
864      cpu_online(cpu) &&                         \
865      !per_cpu(tasklet_work_to_do, cpu))
866 
867 void watchdog_domain_init(struct domain *d);
868 void watchdog_domain_destroy(struct domain *d);
869 
870 /*
871  * Use this check when the following are both true:
872  *  - Using this feature or interface requires full access to the hardware
873  *    (that is, this would not be suitable for a driver domain)
874  *  - There is never a reason to deny the hardware domain access to this
875  */
876 #define is_hardware_domain(_d) ((_d) == hardware_domain)
877 
878 /* This check is for functionality specific to a control domain */
879 #define is_control_domain(_d) ((_d)->is_privileged)
880 
881 #define VM_ASSIST(d, t) (test_bit(VMASST_TYPE_ ## t, &(d)->vm_assist))
882 
883 #define is_pv_domain(d) ((d)->guest_type == guest_type_pv)
884 #define is_pv_vcpu(v)   (is_pv_domain((v)->domain))
885 #define is_hvm_domain(d) ((d)->guest_type == guest_type_hvm)
886 #define is_hvm_vcpu(v)   (is_hvm_domain(v->domain))
887 #define is_pinned_vcpu(v) ((v)->domain->is_pinned || \
888                            cpumask_weight((v)->cpu_hard_affinity) == 1)
889 #ifdef CONFIG_HAS_PASSTHROUGH
890 #define need_iommu(d)    ((d)->need_iommu)
891 #else
892 #define need_iommu(d)    (0)
893 #endif
894 
is_vcpu_online(const struct vcpu * v)895 static inline bool is_vcpu_online(const struct vcpu *v)
896 {
897     return !test_bit(_VPF_down, &v->pause_flags);
898 }
899 
900 void set_vcpu_migration_delay(unsigned int delay);
901 unsigned int get_vcpu_migration_delay(void);
902 
903 extern bool sched_smt_power_savings;
904 
905 extern enum cpufreq_controller {
906     FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
907 } cpufreq_controller;
908 
909 #define CPUPOOLID_NONE    -1
910 
911 struct cpupool *cpupool_get_by_id(int poolid);
912 void cpupool_put(struct cpupool *pool);
913 int cpupool_add_domain(struct domain *d, int poolid);
914 void cpupool_rm_domain(struct domain *d);
915 int cpupool_move_domain(struct domain *d, struct cpupool *c);
916 int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op);
917 void schedule_dump(struct cpupool *c);
918 extern void dump_runq(unsigned char key);
919 
920 void arch_do_physinfo(struct xen_sysctl_physinfo *pi);
921 
922 #endif /* __SCHED_H__ */
923 
924 /*
925  * Local variables:
926  * mode: C
927  * c-file-style: "BSD"
928  * c-basic-offset: 4
929  * tab-width: 4
930  * indent-tabs-mode: nil
931  * End:
932  */
933