1  
2  #ifndef __SCHED_H__
3  #define __SCHED_H__
4  
5  #include <xen/types.h>
6  #include <xen/spinlock.h>
7  #include <xen/rwlock.h>
8  #include <xen/shared.h>
9  #include <xen/timer.h>
10  #include <xen/rangeset.h>
11  #include <xen/domain.h>
12  #include <xen/iommu.h>
13  #include <xen/rcupdate.h>
14  #include <xen/cpumask.h>
15  #include <xen/nodemask.h>
16  #include <xen/radix-tree.h>
17  #include <xen/multicall.h>
18  #include <xen/tasklet.h>
19  #include <xen/mm.h>
20  #include <xen/smp.h>
21  #include <xen/perfc.h>
22  #include <asm/atomic.h>
23  #include <xen/wait.h>
24  #include <public/xen.h>
25  #include <public/domctl.h>
26  #include <public/sysctl.h>
27  #include <public/vcpu.h>
28  #include <public/vm_event.h>
29  #include <public/event_channel.h>
30  
31  #ifdef CONFIG_COMPAT
32  #include <compat/vcpu.h>
33  DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t);
34  #endif
35  
36  /*
37   * Stats
38   *
39   * Enable and ease the use of scheduling related performance counters.
40   *
41   */
42  #ifdef CONFIG_PERF_COUNTERS
43  #define SCHED_STATS
44  #endif
45  
46  #define SCHED_STAT_CRANK(_X)                (perfc_incr(_X))
47  
48  /* A global pointer to the hardware domain (usually DOM0). */
49  extern struct domain *hardware_domain;
50  
51  #ifdef CONFIG_LATE_HWDOM
52  extern domid_t hardware_domid;
53  #else
54  #define hardware_domid 0
55  #endif
56  
57  #ifndef CONFIG_COMPAT
58  #define BITS_PER_EVTCHN_WORD(d) BITS_PER_XEN_ULONG
59  #else
60  #define BITS_PER_EVTCHN_WORD(d) (has_32bit_shinfo(d) ? 32 : BITS_PER_XEN_ULONG)
61  #endif
62  
63  #define BUCKETS_PER_GROUP  (PAGE_SIZE/sizeof(struct evtchn *))
64  /* Round size of struct evtchn up to power of 2 size */
65  #define __RDU2(x)   (       (x) | (   (x) >> 1))
66  #define __RDU4(x)   ( __RDU2(x) | ( __RDU2(x) >> 2))
67  #define __RDU8(x)   ( __RDU4(x) | ( __RDU4(x) >> 4))
68  #define __RDU16(x)  ( __RDU8(x) | ( __RDU8(x) >> 8))
69  #define __RDU32(x)  (__RDU16(x) | (__RDU16(x) >>16))
70  #define next_power_of_2(x)      (__RDU32((x)-1) + 1)
71  
72  /* Maximum number of event channels for any ABI. */
73  #define MAX_NR_EVTCHNS MAX(EVTCHN_2L_NR_CHANNELS, EVTCHN_FIFO_NR_CHANNELS)
74  
75  #define EVTCHNS_PER_BUCKET (PAGE_SIZE / next_power_of_2(sizeof(struct evtchn)))
76  #define EVTCHNS_PER_GROUP  (BUCKETS_PER_GROUP * EVTCHNS_PER_BUCKET)
77  #define NR_EVTCHN_GROUPS   DIV_ROUND_UP(MAX_NR_EVTCHNS, EVTCHNS_PER_GROUP)
78  
79  #define XEN_CONSUMER_BITS 3
80  #define NR_XEN_CONSUMERS ((1 << XEN_CONSUMER_BITS) - 1)
81  
82  struct evtchn
83  {
84      spinlock_t lock;
85  #define ECS_FREE         0 /* Channel is available for use.                  */
86  #define ECS_RESERVED     1 /* Channel is reserved.                           */
87  #define ECS_UNBOUND      2 /* Channel is waiting to bind to a remote domain. */
88  #define ECS_INTERDOMAIN  3 /* Channel is bound to another domain.            */
89  #define ECS_PIRQ         4 /* Channel is bound to a physical IRQ line.       */
90  #define ECS_VIRQ         5 /* Channel is bound to a virtual IRQ line.        */
91  #define ECS_IPI          6 /* Channel is bound to a virtual IPI line.        */
92      u8  state;             /* ECS_* */
93      u8  xen_consumer:XEN_CONSUMER_BITS; /* Consumer in Xen if nonzero */
94      u8  pending:1;
95      u16 notify_vcpu_id;    /* VCPU for local delivery notification */
96      u32 port;
97      union {
98          struct {
99              domid_t remote_domid;
100          } unbound;     /* state == ECS_UNBOUND */
101          struct {
102              evtchn_port_t  remote_port;
103              struct domain *remote_dom;
104          } interdomain; /* state == ECS_INTERDOMAIN */
105          struct {
106              u32            irq;
107              evtchn_port_t  next_port;
108              evtchn_port_t  prev_port;
109          } pirq;        /* state == ECS_PIRQ */
110          u16 virq;      /* state == ECS_VIRQ */
111      } u;
112      u8 priority;
113      u8 last_priority;
114      u16 last_vcpu_id;
115  #ifdef CONFIG_XSM
116      union {
117  #ifdef XSM_NEED_GENERIC_EVTCHN_SSID
118          /*
119           * If an XSM module needs more space for its event channel context,
120           * this pointer stores the necessary data for the security server.
121           */
122          void *generic;
123  #endif
124  #ifdef CONFIG_FLASK
125          /*
126           * Inlining the contents of the structure for FLASK avoids unneeded
127           * allocations, and on 64-bit platforms with only FLASK enabled,
128           * reduces the size of struct evtchn.
129           */
130          u32 flask_sid;
131  #endif
132      } ssid;
133  #endif
134  } __attribute__((aligned(64)));
135  
136  int  evtchn_init(struct domain *d); /* from domain_create */
137  void evtchn_destroy(struct domain *d); /* from domain_kill */
138  void evtchn_destroy_final(struct domain *d); /* from complete_domain_destroy */
139  
140  struct waitqueue_vcpu;
141  
142  struct vcpu
143  {
144      int              vcpu_id;
145  
146      int              processor;
147  
148      vcpu_info_t     *vcpu_info;
149  
150      struct domain   *domain;
151  
152      struct vcpu     *next_in_list;
153  
154      s_time_t         periodic_period;
155      s_time_t         periodic_last_event;
156      struct timer     periodic_timer;
157      struct timer     singleshot_timer;
158  
159      struct timer     poll_timer;    /* timeout for SCHEDOP_poll */
160  
161      void            *sched_priv;    /* scheduler-specific data */
162  
163      struct vcpu_runstate_info runstate;
164  #ifndef CONFIG_COMPAT
165  # define runstate_guest(v) ((v)->runstate_guest)
166      XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */
167  #else
168  # define runstate_guest(v) ((v)->runstate_guest.native)
169      union {
170          XEN_GUEST_HANDLE(vcpu_runstate_info_t) native;
171          XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat;
172      } runstate_guest; /* guest address */
173  #endif
174  
175      /* last time when vCPU is scheduled out */
176      uint64_t last_run_time;
177  
178      /* Has the FPU been initialised? */
179      bool             fpu_initialised;
180      /* Has the FPU been used since it was last saved? */
181      bool             fpu_dirtied;
182      /* Initialization completed for this VCPU? */
183      bool             is_initialised;
184      /* Currently running on a CPU? */
185      bool             is_running;
186      /* VCPU should wake fast (do not deep sleep the CPU). */
187      bool             is_urgent;
188  
189  #ifdef VCPU_TRAP_LAST
190  #define VCPU_TRAP_NONE    0
191      struct {
192          bool             pending;
193          uint8_t          old_mask;
194      }                async_exception_state[VCPU_TRAP_LAST];
195  #define async_exception_state(t) async_exception_state[(t)-1]
196      uint8_t          async_exception_mask;
197  #endif
198  
199      /* Require shutdown to be deferred for some asynchronous operation? */
200      bool             defer_shutdown;
201      /* VCPU is paused following shutdown request (d->is_shutting_down)? */
202      bool             paused_for_shutdown;
203      /* VCPU need affinity restored */
204      bool             affinity_broken;
205  
206      /* A hypercall has been preempted. */
207      bool             hcall_preempted;
208  #ifdef CONFIG_COMPAT
209      /* A hypercall is using the compat ABI? */
210      bool             hcall_compat;
211  #endif
212  
213  
214      /*
215       * > 0: a single port is being polled;
216       * = 0: nothing is being polled (vcpu should be clear in d->poll_mask);
217       * < 0: multiple ports may be being polled.
218       */
219      int              poll_evtchn;
220  
221      /* (over-)protected by ->domain->event_lock */
222      int              pirq_evtchn_head;
223  
224      unsigned long    pause_flags;
225      atomic_t         pause_count;
226  
227      /* VCPU paused for vm_event replies. */
228      atomic_t         vm_event_pause_count;
229      /* VCPU paused by system controller. */
230      int              controller_pause_count;
231  
232      /* Grant table map tracking. */
233      spinlock_t       maptrack_freelist_lock;
234      unsigned int     maptrack_head;
235      unsigned int     maptrack_tail;
236  
237      /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
238      evtchn_port_t    virq_to_evtchn[NR_VIRQS];
239      spinlock_t       virq_lock;
240  
241      /* Bitmask of CPUs on which this VCPU may run. */
242      cpumask_var_t    cpu_hard_affinity;
243      /* Used to change affinity temporarily. */
244      cpumask_var_t    cpu_hard_affinity_tmp;
245      /* Used to restore affinity across S3. */
246      cpumask_var_t    cpu_hard_affinity_saved;
247  
248      /* Bitmask of CPUs on which this VCPU prefers to run. */
249      cpumask_var_t    cpu_soft_affinity;
250  
251      /* Bitmask of CPUs which are holding onto this VCPU's state. */
252      cpumask_var_t    vcpu_dirty_cpumask;
253  
254      /* Tasklet for continue_hypercall_on_cpu(). */
255      struct tasklet   continue_hypercall_tasklet;
256  
257      /* Multicall information. */
258      struct mc_state  mc_state;
259  
260      struct waitqueue_vcpu *waitqueue_vcpu;
261  
262      /* Guest-specified relocation of vcpu_info. */
263      mfn_t            vcpu_info_mfn;
264  
265      struct evtchn_fifo_vcpu *evtchn_fifo;
266  
267      struct arch_vcpu arch;
268  };
269  
270  /* Per-domain lock can be recursively acquired in fault handlers. */
271  #define domain_lock(d) spin_lock_recursive(&(d)->domain_lock)
272  #define domain_unlock(d) spin_unlock_recursive(&(d)->domain_lock)
273  
274  /* VM event */
275  struct vm_event_domain
276  {
277      /* ring lock */
278      spinlock_t ring_lock;
279      /* The ring has 64 entries */
280      unsigned char foreign_producers;
281      unsigned char target_producers;
282      /* shared ring page */
283      void *ring_page;
284      struct page_info *ring_pg_struct;
285      /* front-end ring */
286      vm_event_front_ring_t front_ring;
287      /* event channel port (vcpu0 only) */
288      int xen_port;
289      /* vm_event bit for vcpu->pause_flags */
290      int pause_flag;
291      /* list of vcpus waiting for room in the ring */
292      struct waitqueue_head wq;
293      /* the number of vCPUs blocked */
294      unsigned int blocked;
295      /* The last vcpu woken up */
296      unsigned int last_vcpu_wake_up;
297  };
298  
299  struct evtchn_port_ops;
300  
301  enum guest_type {
302      guest_type_pv, guest_type_hvm
303  };
304  
305  struct domain
306  {
307      domid_t          domain_id;
308  
309      unsigned int     max_vcpus;
310      struct vcpu    **vcpu;
311  
312      shared_info_t   *shared_info;     /* shared data area */
313  
314      spinlock_t       domain_lock;
315  
316      spinlock_t       page_alloc_lock; /* protects all the following fields  */
317      struct page_list_head page_list;  /* linked list */
318      struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */
319      unsigned int     tot_pages;       /* number of pages currently possesed */
320      unsigned int     xenheap_pages;   /* # pages allocated from Xen heap    */
321      unsigned int     outstanding_pages; /* pages claimed but not possessed  */
322      unsigned int     max_pages;       /* maximum value for tot_pages        */
323      atomic_t         shr_pages;       /* number of shared pages             */
324      atomic_t         paged_pages;     /* number of paged-out pages          */
325  
326      /* Scheduling. */
327      void            *sched_priv;    /* scheduler-specific data */
328      struct cpupool  *cpupool;
329  
330      struct domain   *next_in_list;
331      struct domain   *next_in_hashbucket;
332  
333      struct list_head rangesets;
334      spinlock_t       rangesets_lock;
335  
336      /* Event channel information. */
337      struct evtchn   *evtchn;                         /* first bucket only */
338      struct evtchn  **evtchn_group[NR_EVTCHN_GROUPS]; /* all other buckets */
339      unsigned int     max_evtchns;     /* number supported by ABI */
340      unsigned int     max_evtchn_port; /* max permitted port number */
341      unsigned int     valid_evtchns;   /* number of allocated event channels */
342      spinlock_t       event_lock;
343      const struct evtchn_port_ops *evtchn_port_ops;
344      struct evtchn_fifo_domain *evtchn_fifo;
345  
346      struct grant_table *grant_table;
347  
348      /*
349       * Interrupt to event-channel mappings and other per-guest-pirq data.
350       * Protected by the domain's event-channel spinlock.
351       */
352      struct radix_tree_root pirq_tree;
353      unsigned int     nr_pirqs;
354  
355      enum guest_type guest_type;
356  
357      /* Is this guest dying (i.e., a zombie)? */
358      enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying;
359  
360      /* Domain is paused by controller software? */
361      int              controller_pause_count;
362  
363      int64_t          time_offset_seconds;
364  
365  #ifdef CONFIG_HAS_PASSTHROUGH
366      struct domain_iommu iommu;
367  
368      /* Does this guest need iommu mappings (-1 meaning "being set up")? */
369      s8               need_iommu;
370  #endif
371      /* is node-affinity automatically computed? */
372      bool             auto_node_affinity;
373      /* Is this guest fully privileged (aka dom0)? */
374      bool             is_privileged;
375      /* Is this a xenstore domain (not dom0)? */
376      bool             is_xenstore;
377      /* Domain's VCPUs are pinned 1:1 to physical CPUs? */
378      bool             is_pinned;
379      /* Non-migratable and non-restoreable? */
380      bool             disable_migrate;
381      /* Is this guest being debugged by dom0? */
382      bool             debugger_attached;
383      /*
384       * Set to true at the very end of domain creation, when the domain is
385       * unpaused for the first time by the systemcontroller.
386       */
387      bool             creation_finished;
388  
389      /* Which guest this guest has privileges on */
390      struct domain   *target;
391  
392      /* Are any VCPUs polling event channels (SCHEDOP_poll)? */
393  #if MAX_VIRT_CPUS <= BITS_PER_LONG
394      DECLARE_BITMAP(poll_mask, MAX_VIRT_CPUS);
395  #else
396      unsigned long   *poll_mask;
397  #endif
398  
399      /* I/O capabilities (access to IRQs and memory-mapped I/O). */
400      struct rangeset *iomem_caps;
401      struct rangeset *irq_caps;
402  
403      /* Guest has shut down (inc. reason code)? */
404      spinlock_t       shutdown_lock;
405      bool             is_shutting_down; /* in process of shutting down? */
406      bool             is_shut_down;     /* fully shut down? */
407  #define SHUTDOWN_CODE_INVALID ~0u
408      unsigned int     shutdown_code;
409  
410      /* If this is not 0, send suspend notification here instead of
411       * raising DOM_EXC */
412      evtchn_port_t    suspend_evtchn;
413  
414      atomic_t         pause_count;
415      atomic_t         refcnt;
416  
417      unsigned long    vm_assist;
418  
419      /* Bitmask of CPUs which are holding onto this domain's state. */
420      cpumask_var_t    domain_dirty_cpumask;
421  
422      struct arch_domain arch;
423  
424      void *ssid; /* sHype security subject identifier */
425  
426      /* Control-plane tools handle for this domain. */
427      xen_domain_handle_t handle;
428  
429      /* hvm_print_line() and guest_console_write() logging. */
430  #define DOMAIN_PBUF_SIZE 200
431      char       *pbuf;
432      unsigned    pbuf_idx;
433      spinlock_t  pbuf_lock;
434  
435      /* OProfile support. */
436      struct xenoprof *xenoprof;
437  
438      /* Domain watchdog. */
439  #define NR_DOMAIN_WATCHDOG_TIMERS 2
440      spinlock_t watchdog_lock;
441      uint32_t watchdog_inuse_map;
442      struct timer watchdog_timer[NR_DOMAIN_WATCHDOG_TIMERS];
443  
444      struct rcu_head rcu;
445  
446      /*
447       * Hypercall deadlock avoidance lock. Used if a hypercall might
448       * cause a deadlock. Acquirers don't spin waiting; they preempt.
449       */
450      spinlock_t hypercall_deadlock_mutex;
451  
452      /* transcendent memory, auto-allocated on first tmem op by each domain */
453      struct client *tmem_client;
454  
455      struct lock_profile_qhead profile_head;
456  
457      /* Various vm_events */
458  
459      /* Memory sharing support */
460  #ifdef CONFIG_HAS_MEM_SHARING
461      struct vm_event_domain *vm_event_share;
462  #endif
463      /* Memory paging support */
464  #ifdef CONFIG_HAS_MEM_PAGING
465      struct vm_event_domain *vm_event_paging;
466  #endif
467      /* VM event monitor support */
468      struct vm_event_domain *vm_event_monitor;
469  
470      /*
471       * Can be specified by the user. If that is not the case, it is
472       * computed from the union of all the vcpu cpu-affinity masks.
473       */
474      nodemask_t node_affinity;
475      unsigned int last_alloc_node;
476      spinlock_t node_affinity_lock;
477  
478      /* vNUMA topology accesses are protected by rwlock. */
479      rwlock_t vnuma_rwlock;
480      struct vnuma_info *vnuma;
481  
482      /* Common monitor options */
483      struct {
484          unsigned int guest_request_enabled       : 1;
485          unsigned int guest_request_sync          : 1;
486      } monitor;
487  };
488  
489  /* Protect updates/reads (resp.) of domain_list and domain_hash. */
490  extern spinlock_t domlist_update_lock;
491  extern rcu_read_lock_t domlist_read_lock;
492  
493  extern struct vcpu *idle_vcpu[NR_CPUS];
494  #define is_idle_domain(d) ((d)->domain_id == DOMID_IDLE)
495  #define is_idle_vcpu(v)   (is_idle_domain((v)->domain))
496  
497  #define DOMAIN_DESTROYED (1u << 31) /* assumes atomic_t is >= 32 bits */
498  #define put_domain(_d) \
499    if ( atomic_dec_and_test(&(_d)->refcnt) ) domain_destroy(_d)
500  
501  /*
502   * Use this when you don't have an existing reference to @d. It returns
503   * FALSE if @d is being destroyed.
504   */
get_domain(struct domain * d)505  static always_inline int get_domain(struct domain *d)
506  {
507      int old, seen = atomic_read(&d->refcnt);
508      do
509      {
510          old = seen;
511          if ( unlikely(old & DOMAIN_DESTROYED) )
512              return 0;
513          seen = atomic_cmpxchg(&d->refcnt, old, old + 1);
514      }
515      while ( unlikely(seen != old) );
516      return 1;
517  }
518  
519  /*
520   * Use this when you already have, or are borrowing, a reference to @d.
521   * In this case we know that @d cannot be destroyed under our feet.
522   */
get_knownalive_domain(struct domain * d)523  static inline void get_knownalive_domain(struct domain *d)
524  {
525      atomic_inc(&d->refcnt);
526      ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED));
527  }
528  
529  int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity);
530  void domain_update_node_affinity(struct domain *d);
531  
532  /*
533   * Create a domain: the configuration is only necessary for real domain
534   * (i.e !DOMCRF_dummy, excluded idle domain).
535   */
536  struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
537                               uint32_t ssidref,
538                               struct xen_arch_domainconfig *config);
539   /* DOMCRF_hvm: Create an HVM domain, as opposed to a PV domain. */
540  #define _DOMCRF_hvm           0
541  #define DOMCRF_hvm            (1U<<_DOMCRF_hvm)
542   /* DOMCRF_hap: Create a domain with hardware-assisted paging. */
543  #define _DOMCRF_hap           1
544  #define DOMCRF_hap            (1U<<_DOMCRF_hap)
545   /* DOMCRF_s3_integrity: Create a domain with tboot memory integrity protection
546                          by tboot */
547  #define _DOMCRF_s3_integrity  2
548  #define DOMCRF_s3_integrity   (1U<<_DOMCRF_s3_integrity)
549   /* DOMCRF_dummy: Create a dummy domain (not scheduled; not on domain list) */
550  #define _DOMCRF_dummy         3
551  #define DOMCRF_dummy          (1U<<_DOMCRF_dummy)
552   /* DOMCRF_oos_off: dont use out-of-sync optimization for shadow page tables */
553  #define _DOMCRF_oos_off         4
554  #define DOMCRF_oos_off          (1U<<_DOMCRF_oos_off)
555   /* DOMCRF_xs_domain: xenstore domain */
556  #define _DOMCRF_xs_domain       5
557  #define DOMCRF_xs_domain        (1U<<_DOMCRF_xs_domain)
558  
559  /*
560   * rcu_lock_domain_by_id() is more efficient than get_domain_by_id().
561   * This is the preferred function if the returned domain reference
562   * is short lived,  but it cannot be used if the domain reference needs
563   * to be kept beyond the current scope (e.g., across a softirq).
564   * The returned domain reference must be discarded using rcu_unlock_domain().
565   */
566  struct domain *rcu_lock_domain_by_id(domid_t dom);
567  
568  /*
569   * As above function, but resolves DOMID_SELF to current domain
570   */
571  struct domain *rcu_lock_domain_by_any_id(domid_t dom);
572  
573  /*
574   * As rcu_lock_domain_by_id(), but will fail EPERM or ESRCH rather than resolve
575   * to local domain.
576   */
577  int rcu_lock_remote_domain_by_id(domid_t dom, struct domain **d);
578  
579  /*
580   * As rcu_lock_remote_domain_by_id() but will fail EINVAL if the domain is
581   * dying.
582   */
583  int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d);
584  
rcu_unlock_domain(struct domain * d)585  static inline void rcu_unlock_domain(struct domain *d)
586  {
587      if ( d != current->domain )
588          rcu_read_unlock(d);
589  }
590  
rcu_lock_domain(struct domain * d)591  static inline struct domain *rcu_lock_domain(struct domain *d)
592  {
593      if ( d != current->domain )
594          rcu_read_lock(d);
595      return d;
596  }
597  
rcu_lock_current_domain(void)598  static inline struct domain *rcu_lock_current_domain(void)
599  {
600      return /*rcu_lock_domain*/(current->domain);
601  }
602  
603  struct domain *get_domain_by_id(domid_t dom);
604  void domain_destroy(struct domain *d);
605  int domain_kill(struct domain *d);
606  int domain_shutdown(struct domain *d, u8 reason);
607  void domain_resume(struct domain *d);
608  void domain_pause_for_debugger(void);
609  
610  int domain_soft_reset(struct domain *d);
611  
612  int vcpu_start_shutdown_deferral(struct vcpu *v);
613  void vcpu_end_shutdown_deferral(struct vcpu *v);
614  
615  /*
616   * Mark specified domain as crashed. This function always returns, even if the
617   * caller is the specified domain. The domain is not synchronously descheduled
618   * from any processor.
619   */
620  void __domain_crash(struct domain *d);
621  #define domain_crash(d) do {                                              \
622      printk("domain_crash called from %s:%d\n", __FILE__, __LINE__);       \
623      __domain_crash(d);                                                    \
624  } while (0)
625  
626  /*
627   * Mark current domain as crashed and synchronously deschedule from the local
628   * processor. This function never returns.
629   */
630  void noreturn __domain_crash_synchronous(void);
631  #define domain_crash_synchronous() do {                                   \
632      printk("domain_crash_sync called from %s:%d\n", __FILE__, __LINE__);  \
633      __domain_crash_synchronous();                                         \
634  } while (0)
635  
636  /*
637   * Called from assembly code, with an optional address to help indicate why
638   * the crash occured.  If addr is 0, look up address from last extable
639   * redirection.
640   */
641  void noreturn asm_domain_crash_synchronous(unsigned long addr);
642  
643  #define set_current_state(_s) do { current->state = (_s); } while (0)
644  void scheduler_init(void);
645  int  sched_init_vcpu(struct vcpu *v, unsigned int processor);
646  void sched_destroy_vcpu(struct vcpu *v);
647  int  sched_init_domain(struct domain *d, int poolid);
648  void sched_destroy_domain(struct domain *d);
649  int sched_move_domain(struct domain *d, struct cpupool *c);
650  long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *);
651  long sched_adjust_global(struct xen_sysctl_scheduler_op *);
652  int  sched_id(void);
653  void sched_tick_suspend(void);
654  void sched_tick_resume(void);
655  void vcpu_wake(struct vcpu *v);
656  long vcpu_yield(void);
657  void vcpu_sleep_nosync(struct vcpu *v);
658  void vcpu_sleep_sync(struct vcpu *v);
659  
660  /*
661   * Force synchronisation of given VCPU's state. If it is currently descheduled,
662   * this call will ensure that all its state is committed to memory and that
663   * no CPU is using critical state (e.g., page tables) belonging to the VCPU.
664   */
665  void sync_vcpu_execstate(struct vcpu *v);
666  
667  /* As above, for any lazy state being held on the local CPU. */
668  void sync_local_execstate(void);
669  
670  /*
671   * Called by the scheduler to switch to another VCPU. This function must
672   * call context_saved(@prev) when the local CPU is no longer running in
673   * @prev's context, and that context is saved to memory. Alternatively, if
674   * implementing lazy context switching, it suffices to ensure that invoking
675   * sync_vcpu_execstate() will switch and commit @prev's state.
676   */
677  void context_switch(
678      struct vcpu *prev,
679      struct vcpu *next);
680  
681  /*
682   * As described above, context_switch() must call this function when the
683   * local CPU is no longer running in @prev's context, and @prev's context is
684   * saved to memory. Alternatively, if implementing lazy context switching,
685   * ensure that invoking sync_vcpu_execstate() will switch and commit @prev.
686   */
687  void context_saved(struct vcpu *prev);
688  
689  /* Called by the scheduler to continue running the current VCPU. */
690  void continue_running(
691      struct vcpu *same);
692  
693  void startup_cpu_idle_loop(void);
694  extern void (*pm_idle) (void);
695  extern void (*dead_idle) (void);
696  
697  
698  /*
699   * Creates a continuation to resume the current hypercall. The caller should
700   * return immediately, propagating the value returned from this invocation.
701   * The format string specifies the types and number of hypercall arguments.
702   * It contains one character per argument as follows:
703   *  'i' [unsigned] {char, int}
704   *  'l' [unsigned] long
705   *  'h' guest handle (XEN_GUEST_HANDLE(foo))
706   */
707  unsigned long hypercall_create_continuation(
708      unsigned int op, const char *format, ...);
709  
hypercall_cancel_continuation(struct vcpu * v)710  static inline void hypercall_cancel_continuation(struct vcpu *v)
711  {
712      v->hcall_preempted = false;
713  }
714  
715  /*
716   * For long-running operations that must be in hypercall context, check
717   * if there is background work to be done that should interrupt this
718   * operation.
719   */
720  #define hypercall_preempt_check() (unlikely(    \
721          softirq_pending(smp_processor_id()) |   \
722          local_events_need_delivery()            \
723      ))
724  
725  /*
726   * For long-running operations that may be in hypercall context or on
727   * the idle vcpu (e.g. during dom0 construction), check if there is
728   * background work to be done that should interrupt this operation.
729   */
730  #define general_preempt_check() (unlikely(                          \
731          softirq_pending(smp_processor_id()) ||                      \
732          (!is_idle_vcpu(current) && local_events_need_delivery())    \
733      ))
734  
735  extern struct domain *domain_list;
736  
737  /* Caller must hold the domlist_read_lock or domlist_update_lock. */
first_domain_in_cpupool(struct cpupool * c)738  static inline struct domain *first_domain_in_cpupool( struct cpupool *c)
739  {
740      struct domain *d;
741      for (d = rcu_dereference(domain_list); d && d->cpupool != c;
742           d = rcu_dereference(d->next_in_list));
743      return d;
744  }
next_domain_in_cpupool(struct domain * d,struct cpupool * c)745  static inline struct domain *next_domain_in_cpupool(
746      struct domain *d, struct cpupool *c)
747  {
748      for (d = rcu_dereference(d->next_in_list); d && d->cpupool != c;
749           d = rcu_dereference(d->next_in_list));
750      return d;
751  }
752  
753  #define for_each_domain(_d)                     \
754   for ( (_d) = rcu_dereference(domain_list);     \
755         (_d) != NULL;                            \
756         (_d) = rcu_dereference((_d)->next_in_list )) \
757  
758  #define for_each_domain_in_cpupool(_d,_c)       \
759   for ( (_d) = first_domain_in_cpupool(_c);      \
760         (_d) != NULL;                            \
761         (_d) = next_domain_in_cpupool((_d), (_c)))
762  
763  #define for_each_vcpu(_d,_v)                    \
764   for ( (_v) = (_d)->vcpu ? (_d)->vcpu[0] : NULL; \
765         (_v) != NULL;                            \
766         (_v) = (_v)->next_in_list )
767  
768  /*
769   * Per-VCPU pause flags.
770   */
771   /* Domain is blocked waiting for an event. */
772  #define _VPF_blocked         0
773  #define VPF_blocked          (1UL<<_VPF_blocked)
774   /* VCPU is offline. */
775  #define _VPF_down            1
776  #define VPF_down             (1UL<<_VPF_down)
777   /* VCPU is blocked awaiting an event to be consumed by Xen. */
778  #define _VPF_blocked_in_xen  2
779  #define VPF_blocked_in_xen   (1UL<<_VPF_blocked_in_xen)
780   /* VCPU affinity has changed: migrating to a new CPU. */
781  #define _VPF_migrating       3
782  #define VPF_migrating        (1UL<<_VPF_migrating)
783   /* VCPU is blocked due to missing mem_paging ring. */
784  #define _VPF_mem_paging      4
785  #define VPF_mem_paging       (1UL<<_VPF_mem_paging)
786   /* VCPU is blocked due to missing mem_access ring. */
787  #define _VPF_mem_access      5
788  #define VPF_mem_access       (1UL<<_VPF_mem_access)
789   /* VCPU is blocked due to missing mem_sharing ring. */
790  #define _VPF_mem_sharing     6
791  #define VPF_mem_sharing      (1UL<<_VPF_mem_sharing)
792   /* VCPU is being reset. */
793  #define _VPF_in_reset        7
794  #define VPF_in_reset         (1UL<<_VPF_in_reset)
795  /* VCPU is parked. */
796  #define _VPF_parked          8
797  #define VPF_parked           (1UL<<_VPF_parked)
798  
vcpu_runnable(struct vcpu * v)799  static inline int vcpu_runnable(struct vcpu *v)
800  {
801      return !(v->pause_flags |
802               atomic_read(&v->pause_count) |
803               atomic_read(&v->domain->pause_count));
804  }
805  
806  void vcpu_block(void);
807  void vcpu_unblock(struct vcpu *v);
808  void vcpu_pause(struct vcpu *v);
809  void vcpu_pause_nosync(struct vcpu *v);
810  void vcpu_unpause(struct vcpu *v);
811  int vcpu_pause_by_systemcontroller(struct vcpu *v);
812  int vcpu_unpause_by_systemcontroller(struct vcpu *v);
813  
814  void domain_pause(struct domain *d);
815  void domain_pause_nosync(struct domain *d);
816  void domain_unpause(struct domain *d);
817  int domain_unpause_by_systemcontroller(struct domain *d);
818  int __domain_pause_by_systemcontroller(struct domain *d,
819                                         void (*pause_fn)(struct domain *d));
domain_pause_by_systemcontroller(struct domain * d)820  static inline int domain_pause_by_systemcontroller(struct domain *d)
821  {
822      return __domain_pause_by_systemcontroller(d, domain_pause);
823  }
domain_pause_by_systemcontroller_nosync(struct domain * d)824  static inline int domain_pause_by_systemcontroller_nosync(struct domain *d)
825  {
826      return __domain_pause_by_systemcontroller(d, domain_pause_nosync);
827  }
828  
829  /* domain_pause() but safe against trying to pause current. */
830  void domain_pause_except_self(struct domain *d);
831  void domain_unpause_except_self(struct domain *d);
832  
833  void cpu_init(void);
834  
835  struct scheduler;
836  
837  struct scheduler *scheduler_get_default(void);
838  struct scheduler *scheduler_alloc(unsigned int sched_id, int *perr);
839  void scheduler_free(struct scheduler *sched);
840  int schedule_cpu_switch(unsigned int cpu, struct cpupool *c);
841  void vcpu_force_reschedule(struct vcpu *v);
842  int cpu_disable_scheduler(unsigned int cpu);
843  int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity);
844  int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity);
845  void restore_vcpu_affinity(struct domain *d);
846  int vcpu_pin_override(struct vcpu *v, int cpu);
847  
848  void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
849  uint64_t get_cpu_idle_time(unsigned int cpu);
850  
851  /*
852   * Used by idle loop to decide whether there is work to do:
853   *  (1) Deal with RCU; (2) or run softirqs; or (3) Play dead;
854   *  or (4) Run tasklets.
855   *
856   * About (3), if a tasklet is enqueued, it will be scheduled
857   * really really soon, and hence it's pointless to try to
858   * sleep between these two events (that's why we don't call
859   * the tasklet_work_to_do() helper).
860   */
861  #define cpu_is_haltable(cpu)                    \
862      (!rcu_needs_cpu(cpu) &&                     \
863       !softirq_pending(cpu) &&                   \
864       cpu_online(cpu) &&                         \
865       !per_cpu(tasklet_work_to_do, cpu))
866  
867  void watchdog_domain_init(struct domain *d);
868  void watchdog_domain_destroy(struct domain *d);
869  
870  /*
871   * Use this check when the following are both true:
872   *  - Using this feature or interface requires full access to the hardware
873   *    (that is, this would not be suitable for a driver domain)
874   *  - There is never a reason to deny the hardware domain access to this
875   */
876  #define is_hardware_domain(_d) ((_d) == hardware_domain)
877  
878  /* This check is for functionality specific to a control domain */
879  #define is_control_domain(_d) ((_d)->is_privileged)
880  
881  #define VM_ASSIST(d, t) (test_bit(VMASST_TYPE_ ## t, &(d)->vm_assist))
882  
883  #define is_pv_domain(d) ((d)->guest_type == guest_type_pv)
884  #define is_pv_vcpu(v)   (is_pv_domain((v)->domain))
885  #define is_hvm_domain(d) ((d)->guest_type == guest_type_hvm)
886  #define is_hvm_vcpu(v)   (is_hvm_domain(v->domain))
887  #define is_pinned_vcpu(v) ((v)->domain->is_pinned || \
888                             cpumask_weight((v)->cpu_hard_affinity) == 1)
889  #ifdef CONFIG_HAS_PASSTHROUGH
890  #define need_iommu(d)    ((d)->need_iommu)
891  #else
892  #define need_iommu(d)    (0)
893  #endif
894  
is_vcpu_online(const struct vcpu * v)895  static inline bool is_vcpu_online(const struct vcpu *v)
896  {
897      return !test_bit(_VPF_down, &v->pause_flags);
898  }
899  
900  void set_vcpu_migration_delay(unsigned int delay);
901  unsigned int get_vcpu_migration_delay(void);
902  
903  extern bool sched_smt_power_savings;
904  
905  extern enum cpufreq_controller {
906      FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
907  } cpufreq_controller;
908  
909  #define CPUPOOLID_NONE    -1
910  
911  struct cpupool *cpupool_get_by_id(int poolid);
912  void cpupool_put(struct cpupool *pool);
913  int cpupool_add_domain(struct domain *d, int poolid);
914  void cpupool_rm_domain(struct domain *d);
915  int cpupool_move_domain(struct domain *d, struct cpupool *c);
916  int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op);
917  void schedule_dump(struct cpupool *c);
918  extern void dump_runq(unsigned char key);
919  
920  void arch_do_physinfo(struct xen_sysctl_physinfo *pi);
921  
922  #endif /* __SCHED_H__ */
923  
924  /*
925   * Local variables:
926   * mode: C
927   * c-file-style: "BSD"
928   * c-basic-offset: 4
929   * tab-width: 4
930   * indent-tabs-mode: nil
931   * End:
932   */
933