1 2 #ifndef __SCHED_H__ 3 #define __SCHED_H__ 4 5 #include <xen/types.h> 6 #include <xen/spinlock.h> 7 #include <xen/rwlock.h> 8 #include <xen/shared.h> 9 #include <xen/timer.h> 10 #include <xen/rangeset.h> 11 #include <xen/domain.h> 12 #include <xen/iommu.h> 13 #include <xen/rcupdate.h> 14 #include <xen/cpumask.h> 15 #include <xen/nodemask.h> 16 #include <xen/radix-tree.h> 17 #include <xen/multicall.h> 18 #include <xen/tasklet.h> 19 #include <xen/mm.h> 20 #include <xen/smp.h> 21 #include <xen/perfc.h> 22 #include <asm/atomic.h> 23 #include <xen/wait.h> 24 #include <public/xen.h> 25 #include <public/domctl.h> 26 #include <public/sysctl.h> 27 #include <public/vcpu.h> 28 #include <public/vm_event.h> 29 #include <public/event_channel.h> 30 31 #ifdef CONFIG_COMPAT 32 #include <compat/vcpu.h> 33 DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t); 34 #endif 35 36 /* 37 * Stats 38 * 39 * Enable and ease the use of scheduling related performance counters. 40 * 41 */ 42 #ifdef CONFIG_PERF_COUNTERS 43 #define SCHED_STATS 44 #endif 45 46 #define SCHED_STAT_CRANK(_X) (perfc_incr(_X)) 47 48 /* A global pointer to the hardware domain (usually DOM0). */ 49 extern struct domain *hardware_domain; 50 51 #ifdef CONFIG_LATE_HWDOM 52 extern domid_t hardware_domid; 53 #else 54 #define hardware_domid 0 55 #endif 56 57 #ifndef CONFIG_COMPAT 58 #define BITS_PER_EVTCHN_WORD(d) BITS_PER_XEN_ULONG 59 #else 60 #define BITS_PER_EVTCHN_WORD(d) (has_32bit_shinfo(d) ? 32 : BITS_PER_XEN_ULONG) 61 #endif 62 63 #define BUCKETS_PER_GROUP (PAGE_SIZE/sizeof(struct evtchn *)) 64 /* Round size of struct evtchn up to power of 2 size */ 65 #define __RDU2(x) ( (x) | ( (x) >> 1)) 66 #define __RDU4(x) ( __RDU2(x) | ( __RDU2(x) >> 2)) 67 #define __RDU8(x) ( __RDU4(x) | ( __RDU4(x) >> 4)) 68 #define __RDU16(x) ( __RDU8(x) | ( __RDU8(x) >> 8)) 69 #define __RDU32(x) (__RDU16(x) | (__RDU16(x) >>16)) 70 #define next_power_of_2(x) (__RDU32((x)-1) + 1) 71 72 /* Maximum number of event channels for any ABI. */ 73 #define MAX_NR_EVTCHNS MAX(EVTCHN_2L_NR_CHANNELS, EVTCHN_FIFO_NR_CHANNELS) 74 75 #define EVTCHNS_PER_BUCKET (PAGE_SIZE / next_power_of_2(sizeof(struct evtchn))) 76 #define EVTCHNS_PER_GROUP (BUCKETS_PER_GROUP * EVTCHNS_PER_BUCKET) 77 #define NR_EVTCHN_GROUPS DIV_ROUND_UP(MAX_NR_EVTCHNS, EVTCHNS_PER_GROUP) 78 79 #define XEN_CONSUMER_BITS 3 80 #define NR_XEN_CONSUMERS ((1 << XEN_CONSUMER_BITS) - 1) 81 82 struct evtchn 83 { 84 spinlock_t lock; 85 #define ECS_FREE 0 /* Channel is available for use. */ 86 #define ECS_RESERVED 1 /* Channel is reserved. */ 87 #define ECS_UNBOUND 2 /* Channel is waiting to bind to a remote domain. */ 88 #define ECS_INTERDOMAIN 3 /* Channel is bound to another domain. */ 89 #define ECS_PIRQ 4 /* Channel is bound to a physical IRQ line. */ 90 #define ECS_VIRQ 5 /* Channel is bound to a virtual IRQ line. */ 91 #define ECS_IPI 6 /* Channel is bound to a virtual IPI line. */ 92 u8 state; /* ECS_* */ 93 u8 xen_consumer:XEN_CONSUMER_BITS; /* Consumer in Xen if nonzero */ 94 u8 pending:1; 95 u16 notify_vcpu_id; /* VCPU for local delivery notification */ 96 u32 port; 97 union { 98 struct { 99 domid_t remote_domid; 100 } unbound; /* state == ECS_UNBOUND */ 101 struct { 102 evtchn_port_t remote_port; 103 struct domain *remote_dom; 104 } interdomain; /* state == ECS_INTERDOMAIN */ 105 struct { 106 u32 irq; 107 evtchn_port_t next_port; 108 evtchn_port_t prev_port; 109 } pirq; /* state == ECS_PIRQ */ 110 u16 virq; /* state == ECS_VIRQ */ 111 } u; 112 u8 priority; 113 u8 last_priority; 114 u16 last_vcpu_id; 115 #ifdef CONFIG_XSM 116 union { 117 #ifdef XSM_NEED_GENERIC_EVTCHN_SSID 118 /* 119 * If an XSM module needs more space for its event channel context, 120 * this pointer stores the necessary data for the security server. 121 */ 122 void *generic; 123 #endif 124 #ifdef CONFIG_FLASK 125 /* 126 * Inlining the contents of the structure for FLASK avoids unneeded 127 * allocations, and on 64-bit platforms with only FLASK enabled, 128 * reduces the size of struct evtchn. 129 */ 130 u32 flask_sid; 131 #endif 132 } ssid; 133 #endif 134 } __attribute__((aligned(64))); 135 136 int evtchn_init(struct domain *d); /* from domain_create */ 137 void evtchn_destroy(struct domain *d); /* from domain_kill */ 138 void evtchn_destroy_final(struct domain *d); /* from complete_domain_destroy */ 139 140 struct waitqueue_vcpu; 141 142 struct vcpu 143 { 144 int vcpu_id; 145 146 int processor; 147 148 vcpu_info_t *vcpu_info; 149 150 struct domain *domain; 151 152 struct vcpu *next_in_list; 153 154 s_time_t periodic_period; 155 s_time_t periodic_last_event; 156 struct timer periodic_timer; 157 struct timer singleshot_timer; 158 159 struct timer poll_timer; /* timeout for SCHEDOP_poll */ 160 161 void *sched_priv; /* scheduler-specific data */ 162 163 struct vcpu_runstate_info runstate; 164 #ifndef CONFIG_COMPAT 165 # define runstate_guest(v) ((v)->runstate_guest) 166 XEN_GUEST_HANDLE(vcpu_runstate_info_t) runstate_guest; /* guest address */ 167 #else 168 # define runstate_guest(v) ((v)->runstate_guest.native) 169 union { 170 XEN_GUEST_HANDLE(vcpu_runstate_info_t) native; 171 XEN_GUEST_HANDLE(vcpu_runstate_info_compat_t) compat; 172 } runstate_guest; /* guest address */ 173 #endif 174 175 /* last time when vCPU is scheduled out */ 176 uint64_t last_run_time; 177 178 /* Has the FPU been initialised? */ 179 bool fpu_initialised; 180 /* Has the FPU been used since it was last saved? */ 181 bool fpu_dirtied; 182 /* Initialization completed for this VCPU? */ 183 bool is_initialised; 184 /* Currently running on a CPU? */ 185 bool is_running; 186 /* VCPU should wake fast (do not deep sleep the CPU). */ 187 bool is_urgent; 188 189 #ifdef VCPU_TRAP_LAST 190 #define VCPU_TRAP_NONE 0 191 struct { 192 bool pending; 193 uint8_t old_mask; 194 } async_exception_state[VCPU_TRAP_LAST]; 195 #define async_exception_state(t) async_exception_state[(t)-1] 196 uint8_t async_exception_mask; 197 #endif 198 199 /* Require shutdown to be deferred for some asynchronous operation? */ 200 bool defer_shutdown; 201 /* VCPU is paused following shutdown request (d->is_shutting_down)? */ 202 bool paused_for_shutdown; 203 /* VCPU need affinity restored */ 204 bool affinity_broken; 205 206 /* A hypercall has been preempted. */ 207 bool hcall_preempted; 208 #ifdef CONFIG_COMPAT 209 /* A hypercall is using the compat ABI? */ 210 bool hcall_compat; 211 #endif 212 213 214 /* 215 * > 0: a single port is being polled; 216 * = 0: nothing is being polled (vcpu should be clear in d->poll_mask); 217 * < 0: multiple ports may be being polled. 218 */ 219 int poll_evtchn; 220 221 /* (over-)protected by ->domain->event_lock */ 222 int pirq_evtchn_head; 223 224 unsigned long pause_flags; 225 atomic_t pause_count; 226 227 /* VCPU paused for vm_event replies. */ 228 atomic_t vm_event_pause_count; 229 /* VCPU paused by system controller. */ 230 int controller_pause_count; 231 232 /* Grant table map tracking. */ 233 spinlock_t maptrack_freelist_lock; 234 unsigned int maptrack_head; 235 unsigned int maptrack_tail; 236 237 /* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */ 238 evtchn_port_t virq_to_evtchn[NR_VIRQS]; 239 spinlock_t virq_lock; 240 241 /* Bitmask of CPUs on which this VCPU may run. */ 242 cpumask_var_t cpu_hard_affinity; 243 /* Used to change affinity temporarily. */ 244 cpumask_var_t cpu_hard_affinity_tmp; 245 /* Used to restore affinity across S3. */ 246 cpumask_var_t cpu_hard_affinity_saved; 247 248 /* Bitmask of CPUs on which this VCPU prefers to run. */ 249 cpumask_var_t cpu_soft_affinity; 250 251 /* Bitmask of CPUs which are holding onto this VCPU's state. */ 252 cpumask_var_t vcpu_dirty_cpumask; 253 254 /* Tasklet for continue_hypercall_on_cpu(). */ 255 struct tasklet continue_hypercall_tasklet; 256 257 /* Multicall information. */ 258 struct mc_state mc_state; 259 260 struct waitqueue_vcpu *waitqueue_vcpu; 261 262 /* Guest-specified relocation of vcpu_info. */ 263 mfn_t vcpu_info_mfn; 264 265 struct evtchn_fifo_vcpu *evtchn_fifo; 266 267 struct arch_vcpu arch; 268 }; 269 270 /* Per-domain lock can be recursively acquired in fault handlers. */ 271 #define domain_lock(d) spin_lock_recursive(&(d)->domain_lock) 272 #define domain_unlock(d) spin_unlock_recursive(&(d)->domain_lock) 273 274 /* VM event */ 275 struct vm_event_domain 276 { 277 /* ring lock */ 278 spinlock_t ring_lock; 279 /* The ring has 64 entries */ 280 unsigned char foreign_producers; 281 unsigned char target_producers; 282 /* shared ring page */ 283 void *ring_page; 284 struct page_info *ring_pg_struct; 285 /* front-end ring */ 286 vm_event_front_ring_t front_ring; 287 /* event channel port (vcpu0 only) */ 288 int xen_port; 289 /* vm_event bit for vcpu->pause_flags */ 290 int pause_flag; 291 /* list of vcpus waiting for room in the ring */ 292 struct waitqueue_head wq; 293 /* the number of vCPUs blocked */ 294 unsigned int blocked; 295 /* The last vcpu woken up */ 296 unsigned int last_vcpu_wake_up; 297 }; 298 299 struct evtchn_port_ops; 300 301 enum guest_type { 302 guest_type_pv, guest_type_hvm 303 }; 304 305 struct domain 306 { 307 domid_t domain_id; 308 309 unsigned int max_vcpus; 310 struct vcpu **vcpu; 311 312 shared_info_t *shared_info; /* shared data area */ 313 314 spinlock_t domain_lock; 315 316 spinlock_t page_alloc_lock; /* protects all the following fields */ 317 struct page_list_head page_list; /* linked list */ 318 struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */ 319 unsigned int tot_pages; /* number of pages currently possesed */ 320 unsigned int xenheap_pages; /* # pages allocated from Xen heap */ 321 unsigned int outstanding_pages; /* pages claimed but not possessed */ 322 unsigned int max_pages; /* maximum value for tot_pages */ 323 atomic_t shr_pages; /* number of shared pages */ 324 atomic_t paged_pages; /* number of paged-out pages */ 325 326 /* Scheduling. */ 327 void *sched_priv; /* scheduler-specific data */ 328 struct cpupool *cpupool; 329 330 struct domain *next_in_list; 331 struct domain *next_in_hashbucket; 332 333 struct list_head rangesets; 334 spinlock_t rangesets_lock; 335 336 /* Event channel information. */ 337 struct evtchn *evtchn; /* first bucket only */ 338 struct evtchn **evtchn_group[NR_EVTCHN_GROUPS]; /* all other buckets */ 339 unsigned int max_evtchns; /* number supported by ABI */ 340 unsigned int max_evtchn_port; /* max permitted port number */ 341 unsigned int valid_evtchns; /* number of allocated event channels */ 342 spinlock_t event_lock; 343 const struct evtchn_port_ops *evtchn_port_ops; 344 struct evtchn_fifo_domain *evtchn_fifo; 345 346 struct grant_table *grant_table; 347 348 /* 349 * Interrupt to event-channel mappings and other per-guest-pirq data. 350 * Protected by the domain's event-channel spinlock. 351 */ 352 struct radix_tree_root pirq_tree; 353 unsigned int nr_pirqs; 354 355 enum guest_type guest_type; 356 357 /* Is this guest dying (i.e., a zombie)? */ 358 enum { DOMDYING_alive, DOMDYING_dying, DOMDYING_dead } is_dying; 359 360 /* Domain is paused by controller software? */ 361 int controller_pause_count; 362 363 int64_t time_offset_seconds; 364 365 #ifdef CONFIG_HAS_PASSTHROUGH 366 struct domain_iommu iommu; 367 368 /* Does this guest need iommu mappings (-1 meaning "being set up")? */ 369 s8 need_iommu; 370 #endif 371 /* is node-affinity automatically computed? */ 372 bool auto_node_affinity; 373 /* Is this guest fully privileged (aka dom0)? */ 374 bool is_privileged; 375 /* Is this a xenstore domain (not dom0)? */ 376 bool is_xenstore; 377 /* Domain's VCPUs are pinned 1:1 to physical CPUs? */ 378 bool is_pinned; 379 /* Non-migratable and non-restoreable? */ 380 bool disable_migrate; 381 /* Is this guest being debugged by dom0? */ 382 bool debugger_attached; 383 /* 384 * Set to true at the very end of domain creation, when the domain is 385 * unpaused for the first time by the systemcontroller. 386 */ 387 bool creation_finished; 388 389 /* Which guest this guest has privileges on */ 390 struct domain *target; 391 392 /* Are any VCPUs polling event channels (SCHEDOP_poll)? */ 393 #if MAX_VIRT_CPUS <= BITS_PER_LONG 394 DECLARE_BITMAP(poll_mask, MAX_VIRT_CPUS); 395 #else 396 unsigned long *poll_mask; 397 #endif 398 399 /* I/O capabilities (access to IRQs and memory-mapped I/O). */ 400 struct rangeset *iomem_caps; 401 struct rangeset *irq_caps; 402 403 /* Guest has shut down (inc. reason code)? */ 404 spinlock_t shutdown_lock; 405 bool is_shutting_down; /* in process of shutting down? */ 406 bool is_shut_down; /* fully shut down? */ 407 #define SHUTDOWN_CODE_INVALID ~0u 408 unsigned int shutdown_code; 409 410 /* If this is not 0, send suspend notification here instead of 411 * raising DOM_EXC */ 412 evtchn_port_t suspend_evtchn; 413 414 atomic_t pause_count; 415 atomic_t refcnt; 416 417 unsigned long vm_assist; 418 419 /* Bitmask of CPUs which are holding onto this domain's state. */ 420 cpumask_var_t domain_dirty_cpumask; 421 422 struct arch_domain arch; 423 424 void *ssid; /* sHype security subject identifier */ 425 426 /* Control-plane tools handle for this domain. */ 427 xen_domain_handle_t handle; 428 429 /* hvm_print_line() and guest_console_write() logging. */ 430 #define DOMAIN_PBUF_SIZE 200 431 char *pbuf; 432 unsigned pbuf_idx; 433 spinlock_t pbuf_lock; 434 435 /* OProfile support. */ 436 struct xenoprof *xenoprof; 437 438 /* Domain watchdog. */ 439 #define NR_DOMAIN_WATCHDOG_TIMERS 2 440 spinlock_t watchdog_lock; 441 uint32_t watchdog_inuse_map; 442 struct timer watchdog_timer[NR_DOMAIN_WATCHDOG_TIMERS]; 443 444 struct rcu_head rcu; 445 446 /* 447 * Hypercall deadlock avoidance lock. Used if a hypercall might 448 * cause a deadlock. Acquirers don't spin waiting; they preempt. 449 */ 450 spinlock_t hypercall_deadlock_mutex; 451 452 /* transcendent memory, auto-allocated on first tmem op by each domain */ 453 struct client *tmem_client; 454 455 struct lock_profile_qhead profile_head; 456 457 /* Various vm_events */ 458 459 /* Memory sharing support */ 460 #ifdef CONFIG_HAS_MEM_SHARING 461 struct vm_event_domain *vm_event_share; 462 #endif 463 /* Memory paging support */ 464 #ifdef CONFIG_HAS_MEM_PAGING 465 struct vm_event_domain *vm_event_paging; 466 #endif 467 /* VM event monitor support */ 468 struct vm_event_domain *vm_event_monitor; 469 470 /* 471 * Can be specified by the user. If that is not the case, it is 472 * computed from the union of all the vcpu cpu-affinity masks. 473 */ 474 nodemask_t node_affinity; 475 unsigned int last_alloc_node; 476 spinlock_t node_affinity_lock; 477 478 /* vNUMA topology accesses are protected by rwlock. */ 479 rwlock_t vnuma_rwlock; 480 struct vnuma_info *vnuma; 481 482 /* Common monitor options */ 483 struct { 484 unsigned int guest_request_enabled : 1; 485 unsigned int guest_request_sync : 1; 486 } monitor; 487 }; 488 489 /* Protect updates/reads (resp.) of domain_list and domain_hash. */ 490 extern spinlock_t domlist_update_lock; 491 extern rcu_read_lock_t domlist_read_lock; 492 493 extern struct vcpu *idle_vcpu[NR_CPUS]; 494 #define is_idle_domain(d) ((d)->domain_id == DOMID_IDLE) 495 #define is_idle_vcpu(v) (is_idle_domain((v)->domain)) 496 497 #define DOMAIN_DESTROYED (1u << 31) /* assumes atomic_t is >= 32 bits */ 498 #define put_domain(_d) \ 499 if ( atomic_dec_and_test(&(_d)->refcnt) ) domain_destroy(_d) 500 501 /* 502 * Use this when you don't have an existing reference to @d. It returns 503 * FALSE if @d is being destroyed. 504 */ get_domain(struct domain * d)505 static always_inline int get_domain(struct domain *d) 506 { 507 int old, seen = atomic_read(&d->refcnt); 508 do 509 { 510 old = seen; 511 if ( unlikely(old & DOMAIN_DESTROYED) ) 512 return 0; 513 seen = atomic_cmpxchg(&d->refcnt, old, old + 1); 514 } 515 while ( unlikely(seen != old) ); 516 return 1; 517 } 518 519 /* 520 * Use this when you already have, or are borrowing, a reference to @d. 521 * In this case we know that @d cannot be destroyed under our feet. 522 */ get_knownalive_domain(struct domain * d)523 static inline void get_knownalive_domain(struct domain *d) 524 { 525 atomic_inc(&d->refcnt); 526 ASSERT(!(atomic_read(&d->refcnt) & DOMAIN_DESTROYED)); 527 } 528 529 int domain_set_node_affinity(struct domain *d, const nodemask_t *affinity); 530 void domain_update_node_affinity(struct domain *d); 531 532 /* 533 * Create a domain: the configuration is only necessary for real domain 534 * (i.e !DOMCRF_dummy, excluded idle domain). 535 */ 536 struct domain *domain_create(domid_t domid, unsigned int domcr_flags, 537 uint32_t ssidref, 538 struct xen_arch_domainconfig *config); 539 /* DOMCRF_hvm: Create an HVM domain, as opposed to a PV domain. */ 540 #define _DOMCRF_hvm 0 541 #define DOMCRF_hvm (1U<<_DOMCRF_hvm) 542 /* DOMCRF_hap: Create a domain with hardware-assisted paging. */ 543 #define _DOMCRF_hap 1 544 #define DOMCRF_hap (1U<<_DOMCRF_hap) 545 /* DOMCRF_s3_integrity: Create a domain with tboot memory integrity protection 546 by tboot */ 547 #define _DOMCRF_s3_integrity 2 548 #define DOMCRF_s3_integrity (1U<<_DOMCRF_s3_integrity) 549 /* DOMCRF_dummy: Create a dummy domain (not scheduled; not on domain list) */ 550 #define _DOMCRF_dummy 3 551 #define DOMCRF_dummy (1U<<_DOMCRF_dummy) 552 /* DOMCRF_oos_off: dont use out-of-sync optimization for shadow page tables */ 553 #define _DOMCRF_oos_off 4 554 #define DOMCRF_oos_off (1U<<_DOMCRF_oos_off) 555 /* DOMCRF_xs_domain: xenstore domain */ 556 #define _DOMCRF_xs_domain 5 557 #define DOMCRF_xs_domain (1U<<_DOMCRF_xs_domain) 558 559 /* 560 * rcu_lock_domain_by_id() is more efficient than get_domain_by_id(). 561 * This is the preferred function if the returned domain reference 562 * is short lived, but it cannot be used if the domain reference needs 563 * to be kept beyond the current scope (e.g., across a softirq). 564 * The returned domain reference must be discarded using rcu_unlock_domain(). 565 */ 566 struct domain *rcu_lock_domain_by_id(domid_t dom); 567 568 /* 569 * As above function, but resolves DOMID_SELF to current domain 570 */ 571 struct domain *rcu_lock_domain_by_any_id(domid_t dom); 572 573 /* 574 * As rcu_lock_domain_by_id(), but will fail EPERM or ESRCH rather than resolve 575 * to local domain. 576 */ 577 int rcu_lock_remote_domain_by_id(domid_t dom, struct domain **d); 578 579 /* 580 * As rcu_lock_remote_domain_by_id() but will fail EINVAL if the domain is 581 * dying. 582 */ 583 int rcu_lock_live_remote_domain_by_id(domid_t dom, struct domain **d); 584 rcu_unlock_domain(struct domain * d)585 static inline void rcu_unlock_domain(struct domain *d) 586 { 587 if ( d != current->domain ) 588 rcu_read_unlock(d); 589 } 590 rcu_lock_domain(struct domain * d)591 static inline struct domain *rcu_lock_domain(struct domain *d) 592 { 593 if ( d != current->domain ) 594 rcu_read_lock(d); 595 return d; 596 } 597 rcu_lock_current_domain(void)598 static inline struct domain *rcu_lock_current_domain(void) 599 { 600 return /*rcu_lock_domain*/(current->domain); 601 } 602 603 struct domain *get_domain_by_id(domid_t dom); 604 void domain_destroy(struct domain *d); 605 int domain_kill(struct domain *d); 606 int domain_shutdown(struct domain *d, u8 reason); 607 void domain_resume(struct domain *d); 608 void domain_pause_for_debugger(void); 609 610 int domain_soft_reset(struct domain *d); 611 612 int vcpu_start_shutdown_deferral(struct vcpu *v); 613 void vcpu_end_shutdown_deferral(struct vcpu *v); 614 615 /* 616 * Mark specified domain as crashed. This function always returns, even if the 617 * caller is the specified domain. The domain is not synchronously descheduled 618 * from any processor. 619 */ 620 void __domain_crash(struct domain *d); 621 #define domain_crash(d) do { \ 622 printk("domain_crash called from %s:%d\n", __FILE__, __LINE__); \ 623 __domain_crash(d); \ 624 } while (0) 625 626 /* 627 * Mark current domain as crashed and synchronously deschedule from the local 628 * processor. This function never returns. 629 */ 630 void noreturn __domain_crash_synchronous(void); 631 #define domain_crash_synchronous() do { \ 632 printk("domain_crash_sync called from %s:%d\n", __FILE__, __LINE__); \ 633 __domain_crash_synchronous(); \ 634 } while (0) 635 636 /* 637 * Called from assembly code, with an optional address to help indicate why 638 * the crash occured. If addr is 0, look up address from last extable 639 * redirection. 640 */ 641 void noreturn asm_domain_crash_synchronous(unsigned long addr); 642 643 #define set_current_state(_s) do { current->state = (_s); } while (0) 644 void scheduler_init(void); 645 int sched_init_vcpu(struct vcpu *v, unsigned int processor); 646 void sched_destroy_vcpu(struct vcpu *v); 647 int sched_init_domain(struct domain *d, int poolid); 648 void sched_destroy_domain(struct domain *d); 649 int sched_move_domain(struct domain *d, struct cpupool *c); 650 long sched_adjust(struct domain *, struct xen_domctl_scheduler_op *); 651 long sched_adjust_global(struct xen_sysctl_scheduler_op *); 652 int sched_id(void); 653 void sched_tick_suspend(void); 654 void sched_tick_resume(void); 655 void vcpu_wake(struct vcpu *v); 656 long vcpu_yield(void); 657 void vcpu_sleep_nosync(struct vcpu *v); 658 void vcpu_sleep_sync(struct vcpu *v); 659 660 /* 661 * Force synchronisation of given VCPU's state. If it is currently descheduled, 662 * this call will ensure that all its state is committed to memory and that 663 * no CPU is using critical state (e.g., page tables) belonging to the VCPU. 664 */ 665 void sync_vcpu_execstate(struct vcpu *v); 666 667 /* As above, for any lazy state being held on the local CPU. */ 668 void sync_local_execstate(void); 669 670 /* 671 * Called by the scheduler to switch to another VCPU. This function must 672 * call context_saved(@prev) when the local CPU is no longer running in 673 * @prev's context, and that context is saved to memory. Alternatively, if 674 * implementing lazy context switching, it suffices to ensure that invoking 675 * sync_vcpu_execstate() will switch and commit @prev's state. 676 */ 677 void context_switch( 678 struct vcpu *prev, 679 struct vcpu *next); 680 681 /* 682 * As described above, context_switch() must call this function when the 683 * local CPU is no longer running in @prev's context, and @prev's context is 684 * saved to memory. Alternatively, if implementing lazy context switching, 685 * ensure that invoking sync_vcpu_execstate() will switch and commit @prev. 686 */ 687 void context_saved(struct vcpu *prev); 688 689 /* Called by the scheduler to continue running the current VCPU. */ 690 void continue_running( 691 struct vcpu *same); 692 693 void startup_cpu_idle_loop(void); 694 extern void (*pm_idle) (void); 695 extern void (*dead_idle) (void); 696 697 698 /* 699 * Creates a continuation to resume the current hypercall. The caller should 700 * return immediately, propagating the value returned from this invocation. 701 * The format string specifies the types and number of hypercall arguments. 702 * It contains one character per argument as follows: 703 * 'i' [unsigned] {char, int} 704 * 'l' [unsigned] long 705 * 'h' guest handle (XEN_GUEST_HANDLE(foo)) 706 */ 707 unsigned long hypercall_create_continuation( 708 unsigned int op, const char *format, ...); 709 hypercall_cancel_continuation(struct vcpu * v)710 static inline void hypercall_cancel_continuation(struct vcpu *v) 711 { 712 v->hcall_preempted = false; 713 } 714 715 /* 716 * For long-running operations that must be in hypercall context, check 717 * if there is background work to be done that should interrupt this 718 * operation. 719 */ 720 #define hypercall_preempt_check() (unlikely( \ 721 softirq_pending(smp_processor_id()) | \ 722 local_events_need_delivery() \ 723 )) 724 725 /* 726 * For long-running operations that may be in hypercall context or on 727 * the idle vcpu (e.g. during dom0 construction), check if there is 728 * background work to be done that should interrupt this operation. 729 */ 730 #define general_preempt_check() (unlikely( \ 731 softirq_pending(smp_processor_id()) || \ 732 (!is_idle_vcpu(current) && local_events_need_delivery()) \ 733 )) 734 735 extern struct domain *domain_list; 736 737 /* Caller must hold the domlist_read_lock or domlist_update_lock. */ first_domain_in_cpupool(struct cpupool * c)738 static inline struct domain *first_domain_in_cpupool( struct cpupool *c) 739 { 740 struct domain *d; 741 for (d = rcu_dereference(domain_list); d && d->cpupool != c; 742 d = rcu_dereference(d->next_in_list)); 743 return d; 744 } next_domain_in_cpupool(struct domain * d,struct cpupool * c)745 static inline struct domain *next_domain_in_cpupool( 746 struct domain *d, struct cpupool *c) 747 { 748 for (d = rcu_dereference(d->next_in_list); d && d->cpupool != c; 749 d = rcu_dereference(d->next_in_list)); 750 return d; 751 } 752 753 #define for_each_domain(_d) \ 754 for ( (_d) = rcu_dereference(domain_list); \ 755 (_d) != NULL; \ 756 (_d) = rcu_dereference((_d)->next_in_list )) \ 757 758 #define for_each_domain_in_cpupool(_d,_c) \ 759 for ( (_d) = first_domain_in_cpupool(_c); \ 760 (_d) != NULL; \ 761 (_d) = next_domain_in_cpupool((_d), (_c))) 762 763 #define for_each_vcpu(_d,_v) \ 764 for ( (_v) = (_d)->vcpu ? (_d)->vcpu[0] : NULL; \ 765 (_v) != NULL; \ 766 (_v) = (_v)->next_in_list ) 767 768 /* 769 * Per-VCPU pause flags. 770 */ 771 /* Domain is blocked waiting for an event. */ 772 #define _VPF_blocked 0 773 #define VPF_blocked (1UL<<_VPF_blocked) 774 /* VCPU is offline. */ 775 #define _VPF_down 1 776 #define VPF_down (1UL<<_VPF_down) 777 /* VCPU is blocked awaiting an event to be consumed by Xen. */ 778 #define _VPF_blocked_in_xen 2 779 #define VPF_blocked_in_xen (1UL<<_VPF_blocked_in_xen) 780 /* VCPU affinity has changed: migrating to a new CPU. */ 781 #define _VPF_migrating 3 782 #define VPF_migrating (1UL<<_VPF_migrating) 783 /* VCPU is blocked due to missing mem_paging ring. */ 784 #define _VPF_mem_paging 4 785 #define VPF_mem_paging (1UL<<_VPF_mem_paging) 786 /* VCPU is blocked due to missing mem_access ring. */ 787 #define _VPF_mem_access 5 788 #define VPF_mem_access (1UL<<_VPF_mem_access) 789 /* VCPU is blocked due to missing mem_sharing ring. */ 790 #define _VPF_mem_sharing 6 791 #define VPF_mem_sharing (1UL<<_VPF_mem_sharing) 792 /* VCPU is being reset. */ 793 #define _VPF_in_reset 7 794 #define VPF_in_reset (1UL<<_VPF_in_reset) 795 /* VCPU is parked. */ 796 #define _VPF_parked 8 797 #define VPF_parked (1UL<<_VPF_parked) 798 vcpu_runnable(struct vcpu * v)799 static inline int vcpu_runnable(struct vcpu *v) 800 { 801 return !(v->pause_flags | 802 atomic_read(&v->pause_count) | 803 atomic_read(&v->domain->pause_count)); 804 } 805 806 void vcpu_block(void); 807 void vcpu_unblock(struct vcpu *v); 808 void vcpu_pause(struct vcpu *v); 809 void vcpu_pause_nosync(struct vcpu *v); 810 void vcpu_unpause(struct vcpu *v); 811 int vcpu_pause_by_systemcontroller(struct vcpu *v); 812 int vcpu_unpause_by_systemcontroller(struct vcpu *v); 813 814 void domain_pause(struct domain *d); 815 void domain_pause_nosync(struct domain *d); 816 void domain_unpause(struct domain *d); 817 int domain_unpause_by_systemcontroller(struct domain *d); 818 int __domain_pause_by_systemcontroller(struct domain *d, 819 void (*pause_fn)(struct domain *d)); domain_pause_by_systemcontroller(struct domain * d)820 static inline int domain_pause_by_systemcontroller(struct domain *d) 821 { 822 return __domain_pause_by_systemcontroller(d, domain_pause); 823 } domain_pause_by_systemcontroller_nosync(struct domain * d)824 static inline int domain_pause_by_systemcontroller_nosync(struct domain *d) 825 { 826 return __domain_pause_by_systemcontroller(d, domain_pause_nosync); 827 } 828 829 /* domain_pause() but safe against trying to pause current. */ 830 void domain_pause_except_self(struct domain *d); 831 void domain_unpause_except_self(struct domain *d); 832 833 void cpu_init(void); 834 835 struct scheduler; 836 837 struct scheduler *scheduler_get_default(void); 838 struct scheduler *scheduler_alloc(unsigned int sched_id, int *perr); 839 void scheduler_free(struct scheduler *sched); 840 int schedule_cpu_switch(unsigned int cpu, struct cpupool *c); 841 void vcpu_force_reschedule(struct vcpu *v); 842 int cpu_disable_scheduler(unsigned int cpu); 843 int vcpu_set_hard_affinity(struct vcpu *v, const cpumask_t *affinity); 844 int vcpu_set_soft_affinity(struct vcpu *v, const cpumask_t *affinity); 845 void restore_vcpu_affinity(struct domain *d); 846 int vcpu_pin_override(struct vcpu *v, int cpu); 847 848 void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate); 849 uint64_t get_cpu_idle_time(unsigned int cpu); 850 851 /* 852 * Used by idle loop to decide whether there is work to do: 853 * (1) Deal with RCU; (2) or run softirqs; or (3) Play dead; 854 * or (4) Run tasklets. 855 * 856 * About (3), if a tasklet is enqueued, it will be scheduled 857 * really really soon, and hence it's pointless to try to 858 * sleep between these two events (that's why we don't call 859 * the tasklet_work_to_do() helper). 860 */ 861 #define cpu_is_haltable(cpu) \ 862 (!rcu_needs_cpu(cpu) && \ 863 !softirq_pending(cpu) && \ 864 cpu_online(cpu) && \ 865 !per_cpu(tasklet_work_to_do, cpu)) 866 867 void watchdog_domain_init(struct domain *d); 868 void watchdog_domain_destroy(struct domain *d); 869 870 /* 871 * Use this check when the following are both true: 872 * - Using this feature or interface requires full access to the hardware 873 * (that is, this would not be suitable for a driver domain) 874 * - There is never a reason to deny the hardware domain access to this 875 */ 876 #define is_hardware_domain(_d) ((_d) == hardware_domain) 877 878 /* This check is for functionality specific to a control domain */ 879 #define is_control_domain(_d) ((_d)->is_privileged) 880 881 #define VM_ASSIST(d, t) (test_bit(VMASST_TYPE_ ## t, &(d)->vm_assist)) 882 883 #define is_pv_domain(d) ((d)->guest_type == guest_type_pv) 884 #define is_pv_vcpu(v) (is_pv_domain((v)->domain)) 885 #define is_hvm_domain(d) ((d)->guest_type == guest_type_hvm) 886 #define is_hvm_vcpu(v) (is_hvm_domain(v->domain)) 887 #define is_pinned_vcpu(v) ((v)->domain->is_pinned || \ 888 cpumask_weight((v)->cpu_hard_affinity) == 1) 889 #ifdef CONFIG_HAS_PASSTHROUGH 890 #define need_iommu(d) ((d)->need_iommu) 891 #else 892 #define need_iommu(d) (0) 893 #endif 894 is_vcpu_online(const struct vcpu * v)895 static inline bool is_vcpu_online(const struct vcpu *v) 896 { 897 return !test_bit(_VPF_down, &v->pause_flags); 898 } 899 900 void set_vcpu_migration_delay(unsigned int delay); 901 unsigned int get_vcpu_migration_delay(void); 902 903 extern bool sched_smt_power_savings; 904 905 extern enum cpufreq_controller { 906 FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen 907 } cpufreq_controller; 908 909 #define CPUPOOLID_NONE -1 910 911 struct cpupool *cpupool_get_by_id(int poolid); 912 void cpupool_put(struct cpupool *pool); 913 int cpupool_add_domain(struct domain *d, int poolid); 914 void cpupool_rm_domain(struct domain *d); 915 int cpupool_move_domain(struct domain *d, struct cpupool *c); 916 int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op); 917 void schedule_dump(struct cpupool *c); 918 extern void dump_runq(unsigned char key); 919 920 void arch_do_physinfo(struct xen_sysctl_physinfo *pi); 921 922 #endif /* __SCHED_H__ */ 923 924 /* 925 * Local variables: 926 * mode: C 927 * c-file-style: "BSD" 928 * c-basic-offset: 4 929 * tab-width: 4 930 * indent-tabs-mode: nil 931 * End: 932 */ 933