1 /******************************************************************************
2  * Additional declarations for the generic scheduler interface.  This should
3  * only be included by files that implement conforming schedulers.
4  *
5  * Portions by Mark Williamson are (C) 2004 Intel Research Cambridge
6  */
7 
8 #ifndef __XEN_SCHED_IF_H__
9 #define __XEN_SCHED_IF_H__
10 
11 #include <xen/err.h>
12 #include <xen/list.h>
13 #include <xen/percpu.h>
14 #include <xen/rcupdate.h>
15 
16 /* cpus currently in no cpupool */
17 extern cpumask_t cpupool_free_cpus;
18 
19 /* Scheduler generic parameters
20  * */
21 #define SCHED_DEFAULT_RATELIMIT_US 1000
22 extern int sched_ratelimit_us;
23 
24 /* Scheduling resource mask. */
25 extern cpumask_t sched_res_mask;
26 
27 /* Number of vcpus per struct sched_unit. */
28 enum sched_gran {
29     SCHED_GRAN_cpu,
30     SCHED_GRAN_core,
31     SCHED_GRAN_socket
32 };
33 
34 /*
35  * In order to allow a scheduler to remap the lock->cpu mapping,
36  * we have a per-cpu pointer, along with a pre-allocated set of
37  * locks.  The generic schedule init code will point each schedule lock
38  * pointer to the schedule lock; if the scheduler wants to remap them,
39  * it can simply modify the schedule locks.
40  *
41  * For cache betterness, keep the actual lock in the same cache area
42  * as the rest of the struct.  Just have the scheduler point to the
43  * one it wants (This may be the one right in front of it).
44  */
45 struct sched_resource {
46     struct scheduler   *scheduler;
47     struct cpupool     *cpupool;
48     spinlock_t         *schedule_lock,
49                        _lock;
50     struct sched_unit  *curr;
51     struct sched_unit  *sched_unit_idle;
52     struct sched_unit  *prev;
53     void               *sched_priv;
54     struct timer        s_timer;        /* scheduling timer                */
55 
56     /* Cpu with lowest id in scheduling resource. */
57     unsigned int        master_cpu;
58     unsigned int        granularity;
59     cpumask_var_t       cpus;           /* cpus covered by this struct     */
60     struct rcu_head     rcu;
61 };
62 
63 DECLARE_PER_CPU(struct sched_resource *, sched_res);
64 extern rcu_read_lock_t sched_res_rculock;
65 
get_sched_res(unsigned int cpu)66 static inline struct sched_resource *get_sched_res(unsigned int cpu)
67 {
68     return rcu_dereference(per_cpu(sched_res, cpu));
69 }
70 
set_sched_res(unsigned int cpu,struct sched_resource * res)71 static inline void set_sched_res(unsigned int cpu, struct sched_resource *res)
72 {
73     rcu_assign_pointer(per_cpu(sched_res, cpu), res);
74 }
75 
curr_on_cpu(unsigned int cpu)76 static inline struct sched_unit *curr_on_cpu(unsigned int cpu)
77 {
78     return get_sched_res(cpu)->curr;
79 }
80 
is_idle_unit(const struct sched_unit * unit)81 static inline bool is_idle_unit(const struct sched_unit *unit)
82 {
83     return is_idle_vcpu(unit->vcpu_list);
84 }
85 
86 /* Returns true if at least one vcpu of the unit is online. */
is_unit_online(const struct sched_unit * unit)87 static inline bool is_unit_online(const struct sched_unit *unit)
88 {
89     const struct vcpu *v;
90 
91     for_each_sched_unit_vcpu ( unit, v )
92         if ( is_vcpu_online(v) )
93             return true;
94 
95     return false;
96 }
97 
unit_running(const struct sched_unit * unit)98 static inline unsigned int unit_running(const struct sched_unit *unit)
99 {
100     return unit->runstate_cnt[RUNSTATE_running];
101 }
102 
103 /* Returns true if at least one vcpu of the unit is runnable. */
unit_runnable(const struct sched_unit * unit)104 static inline bool unit_runnable(const struct sched_unit *unit)
105 {
106     const struct vcpu *v;
107 
108     for_each_sched_unit_vcpu ( unit, v )
109         if ( vcpu_runnable(v) )
110             return true;
111 
112     return false;
113 }
114 
vcpu_runstate_blocked(const struct vcpu * v)115 static inline int vcpu_runstate_blocked(const struct vcpu *v)
116 {
117     return (v->pause_flags & VPF_blocked) ? RUNSTATE_blocked : RUNSTATE_offline;
118 }
119 
120 /*
121  * Returns whether a sched_unit is runnable and sets new_state for each of its
122  * vcpus. It is mandatory to determine the new runstate for all vcpus of a unit
123  * without dropping the schedule lock (which happens when synchronizing the
124  * context switch of the vcpus of a unit) in order to avoid races with e.g.
125  * vcpu_sleep().
126  */
unit_runnable_state(const struct sched_unit * unit)127 static inline bool unit_runnable_state(const struct sched_unit *unit)
128 {
129     struct vcpu *v;
130     bool runnable, ret = false;
131 
132     if ( is_idle_unit(unit) )
133         return true;
134 
135     for_each_sched_unit_vcpu ( unit, v )
136     {
137         runnable = vcpu_runnable(v);
138 
139         v->new_state = runnable ? RUNSTATE_running : vcpu_runstate_blocked(v);
140 
141         if ( runnable )
142             ret = true;
143     }
144 
145     return ret;
146 }
147 
sched_set_res(struct sched_unit * unit,struct sched_resource * res)148 static inline void sched_set_res(struct sched_unit *unit,
149                                  struct sched_resource *res)
150 {
151     unsigned int cpu = cpumask_first(res->cpus);
152     struct vcpu *v;
153 
154     for_each_sched_unit_vcpu ( unit, v )
155     {
156         ASSERT(cpu < nr_cpu_ids);
157         v->processor = cpu;
158         cpu = cpumask_next(cpu, res->cpus);
159     }
160 
161     unit->res = res;
162 }
163 
164 /* Return master cpu of the scheduling resource the unit is assigned to. */
sched_unit_master(const struct sched_unit * unit)165 static inline unsigned int sched_unit_master(const struct sched_unit *unit)
166 {
167     return unit->res->master_cpu;
168 }
169 
170 /* Set a bit in pause_flags of all vcpus of a unit. */
sched_set_pause_flags(struct sched_unit * unit,unsigned int bit)171 static inline void sched_set_pause_flags(struct sched_unit *unit,
172                                          unsigned int bit)
173 {
174     struct vcpu *v;
175 
176     for_each_sched_unit_vcpu ( unit, v )
177         set_bit(bit, &v->pause_flags);
178 }
179 
180 /* Clear a bit in pause_flags of all vcpus of a unit. */
sched_clear_pause_flags(struct sched_unit * unit,unsigned int bit)181 static inline void sched_clear_pause_flags(struct sched_unit *unit,
182                                            unsigned int bit)
183 {
184     struct vcpu *v;
185 
186     for_each_sched_unit_vcpu ( unit, v )
187         clear_bit(bit, &v->pause_flags);
188 }
189 
sched_idle_unit(unsigned int cpu)190 static inline struct sched_unit *sched_idle_unit(unsigned int cpu)
191 {
192     return get_sched_res(cpu)->sched_unit_idle;
193 }
194 
sched_get_resource_cpu(unsigned int cpu)195 static inline unsigned int sched_get_resource_cpu(unsigned int cpu)
196 {
197     return get_sched_res(cpu)->master_cpu;
198 }
199 
200 /*
201  * Scratch space, for avoiding having too many cpumask_t on the stack.
202  * Within each scheduler, when using the scratch mask of one pCPU:
203  * - the pCPU must belong to the scheduler,
204  * - the caller must own the per-pCPU scheduler lock (a.k.a. runqueue
205  *   lock).
206  */
207 DECLARE_PER_CPU(cpumask_t, cpumask_scratch);
208 #define cpumask_scratch        (&this_cpu(cpumask_scratch))
209 #define cpumask_scratch_cpu(c) (&per_cpu(cpumask_scratch, c))
210 
211 /*
212  * Deal with _spin_lock_irqsave() returning the flags value instead of storing
213  * it in a passed parameter.
214  */
215 #define _sched_spinlock0(lock, irq) _spin_lock##irq(lock)
216 #define _sched_spinlock1(lock, irq, arg) ({ \
217     BUILD_BUG_ON(sizeof(arg) != sizeof(unsigned long)); \
218     (arg) = _spin_lock##irq(lock); \
219 })
220 
221 #define _sched_spinlock__(nr) _sched_spinlock ## nr
222 #define _sched_spinlock_(nr)  _sched_spinlock__(nr)
223 #define _sched_spinlock(lock, irq, args...) \
224     _sched_spinlock_(count_args(args))(lock, irq, ## args)
225 
226 #define sched_lock(kind, param, cpu, irq, arg...) \
227 static always_inline spinlock_t \
228 *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
229 { \
230     for ( ; ; ) \
231     { \
232         spinlock_t *lock = get_sched_res(cpu)->schedule_lock; \
233         /* \
234          * v->processor may change when grabbing the lock; but \
235          * per_cpu(v->processor) may also change, if changing cpu pool \
236          * also changes the scheduler lock.  Retry until they match. \
237          * \
238          * It may also be the case that v->processor may change but the \
239          * lock may be the same; this will succeed in that case. \
240          * \
241          * Use the speculation unsafe locking helper, there's a speculation \
242          * barrier before returning to the caller. \
243          */ \
244         _sched_spinlock(lock, irq, ## arg); \
245         if ( likely(lock == get_sched_res(cpu)->schedule_lock) ) \
246         { \
247             block_lock_speculation(); \
248             return lock; \
249         } \
250         spin_unlock##irq(lock, ## arg); \
251     } \
252 }
253 
254 #define sched_unlock(kind, param, cpu, irq, arg...) \
255 static inline void kind##_schedule_unlock##irq(spinlock_t *lock \
256                                                EXTRA_TYPE(arg), param) \
257 { \
258     ASSERT(lock == get_sched_res(cpu)->schedule_lock); \
259     spin_unlock##irq(lock, ## arg); \
260 }
261 
262 #define EXTRA_TYPE(arg)
263 sched_lock(pcpu, unsigned int cpu,     cpu, )
264 sched_lock(unit, const struct sched_unit *i, i->res->master_cpu, )
sched_lock(pcpu,unsigned int cpu,cpu,_irq)265 sched_lock(pcpu, unsigned int cpu,     cpu,          _irq)
266 sched_lock(unit, const struct sched_unit *i, i->res->master_cpu, _irq)
267 sched_unlock(pcpu, unsigned int cpu,     cpu, )
268 sched_unlock(unit, const struct sched_unit *i, i->res->master_cpu, )
269 sched_unlock(pcpu, unsigned int cpu,     cpu,          _irq)
270 sched_unlock(unit, const struct sched_unit *i, i->res->master_cpu, _irq)
271 #undef EXTRA_TYPE
272 
273 #define EXTRA_TYPE(arg) , unsigned long arg
274 #define spin_unlock_irqsave spin_unlock_irqrestore
275 sched_lock(pcpu, unsigned int cpu,     cpu,          _irqsave, *flags)
276 sched_lock(unit, const struct sched_unit *i, i->res->master_cpu, _irqsave, *flags)
277 #undef spin_unlock_irqsave
278 sched_unlock(pcpu, unsigned int cpu,     cpu,          _irqrestore, flags)
279 sched_unlock(unit, const struct sched_unit *i, i->res->master_cpu, _irqrestore, flags)
280 #undef EXTRA_TYPE
281 
282 #undef sched_unlock
283 #undef sched_lock
284 
285 static inline spinlock_t *pcpu_schedule_trylock(unsigned int cpu)
286 {
287     spinlock_t *lock = get_sched_res(cpu)->schedule_lock;
288 
289     if ( !spin_trylock(lock) )
290         return NULL;
291     if ( lock == get_sched_res(cpu)->schedule_lock )
292         return lock;
293     spin_unlock(lock);
294     return NULL;
295 }
296 
297 struct scheduler {
298     const char *name;       /* full name for this scheduler      */
299     const char *opt_name;   /* option name for this scheduler    */
300     unsigned int sched_id;  /* ID for this scheduler             */
301     void *sched_data;       /* global data pointer               */
302     struct cpupool *cpupool;/* points to this scheduler's pool   */
303 
304     int          (*global_init)    (void);
305 
306     int          (*init)           (struct scheduler *ops);
307     void         (*deinit)         (struct scheduler *ops);
308 
309     void         (*free_udata)     (const struct scheduler *ops, void *priv);
310     void *       (*alloc_udata)    (const struct scheduler *ops,
311                                     struct sched_unit *unit, void *dd);
312 
313     void         (*free_pdata)     (const struct scheduler *ops,
314                                     void *pcpu, int cpu);
315     void *       (*alloc_pdata)    (const struct scheduler *ops, int cpu);
316     void         (*deinit_pdata)   (const struct scheduler *ops,
317                                     void *pcpu, int cpu);
318 
319     /* Returns ERR_PTR(-err) for error, NULL for 'nothing needed'. */
320     void *       (*alloc_domdata)  (const struct scheduler *ops,
321                                     struct domain *dom);
322     /* Idempotent. */
323     void         (*free_domdata)   (const struct scheduler *ops, void *data);
324 
325     spinlock_t * (*switch_sched)   (struct scheduler *new_ops, unsigned int cpu,
326                                     void *pdata, void *vdata);
327 
328     /* Activate / deactivate units in a cpu pool */
329     void         (*insert_unit)    (const struct scheduler *ops,
330                                     struct sched_unit *unit);
331     void         (*remove_unit)    (const struct scheduler *ops,
332                                     struct sched_unit *unit);
333 
334     void         (*sleep)          (const struct scheduler *ops,
335                                     struct sched_unit *unit);
336     void         (*wake)           (const struct scheduler *ops,
337                                     struct sched_unit *unit);
338     void         (*yield)          (const struct scheduler *ops,
339                                     struct sched_unit *unit);
340     void         (*context_saved)  (const struct scheduler *ops,
341                                     struct sched_unit *unit);
342 
343     void         (*do_schedule)    (const struct scheduler *ops,
344                                     struct sched_unit *currunit, s_time_t now,
345                                     bool tasklet_work_scheduled);
346 
347     struct sched_resource *(*pick_resource)(const struct scheduler *ops,
348                                             const struct sched_unit *unit);
349     void         (*migrate)        (const struct scheduler *ops,
350                                     struct sched_unit *unit,
351                                     unsigned int new_cpu);
352     int          (*adjust)         (const struct scheduler *ops,
353                                     struct domain *d,
354                                     struct xen_domctl_scheduler_op *op);
355     void         (*adjust_affinity)(const struct scheduler *ops,
356                                     struct sched_unit *unit,
357                                     const struct cpumask *hard,
358                                     const struct cpumask *soft);
359 #ifdef CONFIG_SYSCTL
360     int          (*adjust_global)  (const struct scheduler *ops,
361                                     struct xen_sysctl_scheduler_op *sc);
362 #endif
363     void         (*dump_settings)  (const struct scheduler *ops);
364     void         (*dump_cpu_state) (const struct scheduler *ops, int cpu);
365     void         (*move_timers)    (const struct scheduler *ops,
366                                     struct sched_resource *sr);
367 };
368 
sched_init(struct scheduler * s)369 static inline int sched_init(struct scheduler *s)
370 {
371     return s->init(s);
372 }
373 
sched_deinit(struct scheduler * s)374 static inline void sched_deinit(struct scheduler *s)
375 {
376     s->deinit(s);
377 }
378 
sched_switch_sched(struct scheduler * s,unsigned int cpu,void * pdata,void * vdata)379 static inline spinlock_t *sched_switch_sched(struct scheduler *s,
380                                              unsigned int cpu,
381                                              void *pdata, void *vdata)
382 {
383     return s->switch_sched(s, cpu, pdata, vdata);
384 }
385 
sched_dump_settings(const struct scheduler * s)386 static inline void sched_dump_settings(const struct scheduler *s)
387 {
388     if ( s->dump_settings )
389         s->dump_settings(s);
390 }
391 
sched_dump_cpu_state(const struct scheduler * s,int cpu)392 static inline void sched_dump_cpu_state(const struct scheduler *s, int cpu)
393 {
394     if ( s->dump_cpu_state )
395         s->dump_cpu_state(s, cpu);
396 }
397 
sched_alloc_domdata(const struct scheduler * s,struct domain * d)398 static inline void *sched_alloc_domdata(const struct scheduler *s,
399                                         struct domain *d)
400 {
401     return s->alloc_domdata ? s->alloc_domdata(s, d) : NULL;
402 }
403 
sched_free_domdata(const struct scheduler * s,void * data)404 static inline void sched_free_domdata(const struct scheduler *s,
405                                       void *data)
406 {
407     ASSERT(s->free_domdata || !data);
408     if ( s->free_domdata )
409         s->free_domdata(s, data);
410 }
411 
sched_alloc_pdata(const struct scheduler * s,int cpu)412 static inline void *sched_alloc_pdata(const struct scheduler *s, int cpu)
413 {
414     return s->alloc_pdata ? s->alloc_pdata(s, cpu) : NULL;
415 }
416 
sched_free_pdata(const struct scheduler * s,void * data,int cpu)417 static inline void sched_free_pdata(const struct scheduler *s, void *data,
418                                     int cpu)
419 {
420     ASSERT(s->free_pdata || !data);
421     if ( s->free_pdata )
422         s->free_pdata(s, data, cpu);
423 }
424 
sched_deinit_pdata(const struct scheduler * s,void * data,int cpu)425 static inline void sched_deinit_pdata(const struct scheduler *s, void *data,
426                                       int cpu)
427 {
428     if ( s->deinit_pdata )
429         s->deinit_pdata(s, data, cpu);
430 }
431 
sched_alloc_udata(const struct scheduler * s,struct sched_unit * unit,void * dom_data)432 static inline void *sched_alloc_udata(const struct scheduler *s,
433                                       struct sched_unit *unit, void *dom_data)
434 {
435     return s->alloc_udata(s, unit, dom_data);
436 }
437 
sched_free_udata(const struct scheduler * s,void * data)438 static inline void sched_free_udata(const struct scheduler *s, void *data)
439 {
440     s->free_udata(s, data);
441 }
442 
sched_insert_unit(const struct scheduler * s,struct sched_unit * unit)443 static inline void sched_insert_unit(const struct scheduler *s,
444                                      struct sched_unit *unit)
445 {
446     if ( s->insert_unit )
447         s->insert_unit(s, unit);
448 }
449 
sched_remove_unit(const struct scheduler * s,struct sched_unit * unit)450 static inline void sched_remove_unit(const struct scheduler *s,
451                                      struct sched_unit *unit)
452 {
453     if ( s->remove_unit )
454         s->remove_unit(s, unit);
455 }
456 
sched_sleep(const struct scheduler * s,struct sched_unit * unit)457 static inline void sched_sleep(const struct scheduler *s,
458                                struct sched_unit *unit)
459 {
460     if ( s->sleep )
461         s->sleep(s, unit);
462 }
463 
sched_wake(const struct scheduler * s,struct sched_unit * unit)464 static inline void sched_wake(const struct scheduler *s,
465                               struct sched_unit *unit)
466 {
467     if ( s->wake )
468         s->wake(s, unit);
469 }
470 
sched_yield(const struct scheduler * s,struct sched_unit * unit)471 static inline void sched_yield(const struct scheduler *s,
472                                struct sched_unit *unit)
473 {
474     if ( s->yield )
475         s->yield(s, unit);
476 }
477 
sched_context_saved(const struct scheduler * s,struct sched_unit * unit)478 static inline void sched_context_saved(const struct scheduler *s,
479                                        struct sched_unit *unit)
480 {
481     if ( s->context_saved )
482         s->context_saved(s, unit);
483 }
484 
sched_migrate(const struct scheduler * s,struct sched_unit * unit,unsigned int cpu)485 static inline void sched_migrate(const struct scheduler *s,
486                                  struct sched_unit *unit, unsigned int cpu)
487 {
488     if ( s->migrate )
489         s->migrate(s, unit, cpu);
490     else
491         sched_set_res(unit, get_sched_res(cpu));
492 }
493 
sched_pick_resource(const struct scheduler * s,const struct sched_unit * unit)494 static inline struct sched_resource *sched_pick_resource(
495     const struct scheduler *s, const struct sched_unit *unit)
496 {
497     return s->pick_resource(s, unit);
498 }
499 
sched_adjust_affinity(const struct scheduler * s,struct sched_unit * unit,const cpumask_t * hard,const cpumask_t * soft)500 static inline void sched_adjust_affinity(const struct scheduler *s,
501                                          struct sched_unit *unit,
502                                          const cpumask_t *hard,
503                                          const cpumask_t *soft)
504 {
505     if ( s->adjust_affinity )
506         s->adjust_affinity(s, unit, hard, soft);
507 }
508 
sched_adjust_dom(const struct scheduler * s,struct domain * d,struct xen_domctl_scheduler_op * op)509 static inline int sched_adjust_dom(const struct scheduler *s, struct domain *d,
510                                    struct xen_domctl_scheduler_op *op)
511 {
512     return s->adjust ? s->adjust(s, d, op) : 0;
513 }
514 
515 #ifdef CONFIG_SYSCTL
sched_adjust_cpupool(const struct scheduler * s,struct xen_sysctl_scheduler_op * op)516 static inline int sched_adjust_cpupool(const struct scheduler *s,
517                                        struct xen_sysctl_scheduler_op *op)
518 {
519     return s->adjust_global ? s->adjust_global(s, op) : 0;
520 }
521 #endif
522 
sched_move_timers(const struct scheduler * s,struct sched_resource * sr)523 static inline void sched_move_timers(const struct scheduler *s,
524                                      struct sched_resource *sr)
525 {
526     if ( s->move_timers )
527         s->move_timers(s, sr);
528 }
529 
sched_unit_pause_nosync(const struct sched_unit * unit)530 static inline void sched_unit_pause_nosync(const struct sched_unit *unit)
531 {
532     struct vcpu *v;
533 
534     for_each_sched_unit_vcpu ( unit, v )
535         vcpu_pause_nosync(v);
536 }
537 
sched_unit_unpause(const struct sched_unit * unit)538 static inline void sched_unit_unpause(const struct sched_unit *unit)
539 {
540     struct vcpu *v;
541 
542     for_each_sched_unit_vcpu ( unit, v )
543         vcpu_unpause(v);
544 }
545 
546 #define REGISTER_SCHEDULER(x) static const struct scheduler *x##_entry \
547   __used_section(".data.schedulers") = &(x)
548 
549 struct cpupool
550 {
551     unsigned int     cpupool_id;
552 #define CPUPOOLID_NONE    (~0U)
553     unsigned int     n_dom;
554     cpumask_var_t    cpu_valid;      /* all cpus assigned to pool */
555     cpumask_var_t    res_valid;      /* all scheduling resources of pool */
556     struct list_head list;
557     struct cpupool   *next;
558     struct scheduler *sched;
559     atomic_t         refcnt;
560     enum sched_gran  gran;
561     unsigned int     sched_gran;     /* Number of cpus per sched-item. */
562 };
563 
cpupool_domain_master_cpumask(const struct domain * d)564 static inline cpumask_t *cpupool_domain_master_cpumask(const struct domain *d)
565 {
566     /*
567      * d->cpupool is NULL only for the idle domain, and no one should
568      * be interested in calling this for the idle domain.
569      */
570     ASSERT(d->cpupool != NULL);
571     return d->cpupool->res_valid;
572 }
573 
574 unsigned int cpupool_get_granularity(const struct cpupool *c);
575 
576 /*
577  * Hard and soft affinity load balancing.
578  *
579  * Idea is each vcpu has some pcpus that it prefers, some that it does not
580  * prefer but is OK with, and some that it cannot run on at all. The first
581  * set of pcpus are the ones that are both in the soft affinity *and* in the
582  * hard affinity; the second set of pcpus are the ones that are in the hard
583  * affinity but *not* in the soft affinity; the third set of pcpus are the
584  * ones that are not in the hard affinity.
585  *
586  * We implement a two step balancing logic. Basically, every time there is
587  * the need to decide where to run a vcpu, we first check the soft affinity
588  * (well, actually, the && between soft and hard affinity), to see if we can
589  * send it where it prefers to (and can) run on. However, if the first step
590  * does not find any suitable and free pcpu, we fall back checking the hard
591  * affinity.
592  */
593 #define BALANCE_SOFT_AFFINITY    0
594 #define BALANCE_HARD_AFFINITY    1
595 
596 #define for_each_affinity_balance_step(step) \
597     for ( (step) = 0; (step) <= BALANCE_HARD_AFFINITY; (step)++ )
598 
599 /*
600  * Hard affinity balancing is always necessary and must never be skipped.
601  * But soft affinity need only be considered when it has a functionally
602  * different effect than other constraints (such as hard affinity, cpus
603  * online, or cpupools).
604  *
605  * Soft affinity only needs to be considered if:
606  * * The cpus in the cpupool are not a subset of soft affinity
607  * * The hard affinity is not a subset of soft affinity
608  * * There is an overlap between the soft and hard affinity masks
609  */
has_soft_affinity(const struct sched_unit * unit)610 static inline bool has_soft_affinity(const struct sched_unit *unit)
611 {
612     return unit->soft_aff_effective &&
613            !cpumask_subset(cpupool_domain_master_cpumask(unit->domain),
614                            unit->cpu_soft_affinity);
615 }
616 
617 /*
618  * This function copies in mask the cpumask that should be used for a
619  * particular affinity balancing step. For the soft affinity one, the pcpus
620  * that are not part of vc's hard affinity are filtered out from the result,
621  * to avoid running a vcpu where it would like, but is not allowed to!
622  */
623 static inline void
affinity_balance_cpumask(const struct sched_unit * unit,int step,cpumask_t * mask)624 affinity_balance_cpumask(const struct sched_unit *unit, int step,
625                          cpumask_t *mask)
626 {
627     if ( step == BALANCE_SOFT_AFFINITY )
628     {
629         cpumask_and(mask, unit->cpu_soft_affinity, unit->cpu_hard_affinity);
630 
631         if ( unlikely(cpumask_empty(mask)) )
632             cpumask_copy(mask, unit->cpu_hard_affinity);
633     }
634     else /* step == BALANCE_HARD_AFFINITY */
635         cpumask_copy(mask, unit->cpu_hard_affinity);
636 }
637 
638 struct affinity_masks {
639     cpumask_var_t hard;
640     cpumask_var_t soft;
641 };
642 
643 bool alloc_affinity_masks(struct affinity_masks *affinity);
644 void free_affinity_masks(struct affinity_masks *affinity);
645 
646 /* Memory allocation related data for schedule_cpu_rm(). */
647 struct cpu_rm_data {
648     struct affinity_masks affinity;
649     const struct scheduler *old_ops;
650     void *ppriv_old;
651     void *vpriv_old;
652     struct sched_resource *sr[];
653 };
654 
655 void sched_rm_cpu(unsigned int cpu);
656 const cpumask_t *sched_get_opt_cpumask(enum sched_gran opt, unsigned int cpu);
657 void schedule_dump(struct cpupool *c);
658 struct scheduler *scheduler_get_default(void);
659 struct scheduler *scheduler_alloc(unsigned int sched_id);
660 void scheduler_free(struct scheduler *sched);
661 int cpu_disable_scheduler(unsigned int cpu);
662 int schedule_cpu_add(unsigned int cpu, struct cpupool *c);
663 struct cpu_rm_data *alloc_cpu_rm_data(unsigned int cpu, bool aff_alloc);
664 void free_cpu_rm_data(struct cpu_rm_data *mem, unsigned int cpu);
665 int schedule_cpu_rm(unsigned int cpu, struct cpu_rm_data *data);
666 int sched_move_domain(struct domain *d, struct cpupool *c);
667 void sched_migrate_timers(unsigned int cpu);
668 struct cpupool *cpupool_get_by_id(unsigned int poolid);
669 void cpupool_put(struct cpupool *pool);
670 int cpupool_add_domain(struct domain *d, unsigned int poolid);
671 void cpupool_rm_domain(struct domain *d);
672 
673 #endif /* __XEN_SCHED_IF_H__ */
674