1 /******************************************************************************
2 * Additional declarations for the generic scheduler interface. This should
3 * only be included by files that implement conforming schedulers.
4 *
5 * Portions by Mark Williamson are (C) 2004 Intel Research Cambridge
6 */
7
8 #ifndef __XEN_SCHED_IF_H__
9 #define __XEN_SCHED_IF_H__
10
11 #include <xen/percpu.h>
12
13 /* A global pointer to the initial cpupool (POOL0). */
14 extern struct cpupool *cpupool0;
15
16 /* cpus currently in no cpupool */
17 extern cpumask_t cpupool_free_cpus;
18
19 /* Scheduler generic parameters
20 * */
21 #define SCHED_DEFAULT_RATELIMIT_US 1000
22 extern int sched_ratelimit_us;
23
24
25 /*
26 * In order to allow a scheduler to remap the lock->cpu mapping,
27 * we have a per-cpu pointer, along with a pre-allocated set of
28 * locks. The generic schedule init code will point each schedule lock
29 * pointer to the schedule lock; if the scheduler wants to remap them,
30 * it can simply modify the schedule locks.
31 *
32 * For cache betterness, keep the actual lock in the same cache area
33 * as the rest of the struct. Just have the scheduler point to the
34 * one it wants (This may be the one right in front of it).*/
35 struct schedule_data {
36 spinlock_t *schedule_lock,
37 _lock;
38 struct vcpu *curr; /* current task */
39 void *sched_priv;
40 struct timer s_timer; /* scheduling timer */
41 atomic_t urgent_count; /* how many urgent vcpus */
42 };
43
44 #define curr_on_cpu(c) (per_cpu(schedule_data, c).curr)
45
46 DECLARE_PER_CPU(struct schedule_data, schedule_data);
47 DECLARE_PER_CPU(struct scheduler *, scheduler);
48 DECLARE_PER_CPU(struct cpupool *, cpupool);
49
50 /*
51 * Scratch space, for avoiding having too many cpumask_t on the stack.
52 * Within each scheduler, when using the scratch mask of one pCPU:
53 * - the pCPU must belong to the scheduler,
54 * - the caller must own the per-pCPU scheduler lock (a.k.a. runqueue
55 * lock).
56 */
57 DECLARE_PER_CPU(cpumask_t, cpumask_scratch);
58 #define cpumask_scratch (&this_cpu(cpumask_scratch))
59 #define cpumask_scratch_cpu(c) (&per_cpu(cpumask_scratch, c))
60
61 #define sched_lock(kind, param, cpu, irq, arg...) \
62 static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \
63 { \
64 for ( ; ; ) \
65 { \
66 spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock; \
67 /* \
68 * v->processor may change when grabbing the lock; but \
69 * per_cpu(v->processor) may also change, if changing cpu pool \
70 * also changes the scheduler lock. Retry until they match. \
71 * \
72 * It may also be the case that v->processor may change but the \
73 * lock may be the same; this will succeed in that case. \
74 */ \
75 spin_lock##irq(lock, ## arg); \
76 if ( likely(lock == per_cpu(schedule_data, cpu).schedule_lock) ) \
77 return lock; \
78 spin_unlock##irq(lock, ## arg); \
79 } \
80 }
81
82 #define sched_unlock(kind, param, cpu, irq, arg...) \
83 static inline void kind##_schedule_unlock##irq(spinlock_t *lock \
84 EXTRA_TYPE(arg), param) \
85 { \
86 ASSERT(lock == per_cpu(schedule_data, cpu).schedule_lock); \
87 spin_unlock##irq(lock, ## arg); \
88 }
89
90 #define EXTRA_TYPE(arg)
91 sched_lock(pcpu, unsigned int cpu, cpu, )
92 sched_lock(vcpu, const struct vcpu *v, v->processor, )
sched_lock(pcpu,unsigned int cpu,cpu,_irq)93 sched_lock(pcpu, unsigned int cpu, cpu, _irq)
94 sched_lock(vcpu, const struct vcpu *v, v->processor, _irq)
95 sched_unlock(pcpu, unsigned int cpu, cpu, )
96 sched_unlock(vcpu, const struct vcpu *v, v->processor, )
97 sched_unlock(pcpu, unsigned int cpu, cpu, _irq)
98 sched_unlock(vcpu, const struct vcpu *v, v->processor, _irq)
99 #undef EXTRA_TYPE
100
101 #define EXTRA_TYPE(arg) , unsigned long arg
102 #define spin_unlock_irqsave spin_unlock_irqrestore
103 sched_lock(pcpu, unsigned int cpu, cpu, _irqsave, *flags)
104 sched_lock(vcpu, const struct vcpu *v, v->processor, _irqsave, *flags)
105 #undef spin_unlock_irqsave
106 sched_unlock(pcpu, unsigned int cpu, cpu, _irqrestore, flags)
107 sched_unlock(vcpu, const struct vcpu *v, v->processor, _irqrestore, flags)
108 #undef EXTRA_TYPE
109
110 #undef sched_unlock
111 #undef sched_lock
112
113 static inline spinlock_t *pcpu_schedule_trylock(unsigned int cpu)
114 {
115 spinlock_t *lock = per_cpu(schedule_data, cpu).schedule_lock;
116
117 if ( !spin_trylock(lock) )
118 return NULL;
119 if ( lock == per_cpu(schedule_data, cpu).schedule_lock )
120 return lock;
121 spin_unlock(lock);
122 return NULL;
123 }
124
125 struct task_slice {
126 struct vcpu *task;
127 s_time_t time;
128 bool_t migrated;
129 };
130
131 struct scheduler {
132 char *name; /* full name for this scheduler */
133 char *opt_name; /* option name for this scheduler */
134 unsigned int sched_id; /* ID for this scheduler */
135 void *sched_data; /* global data pointer */
136
137 int (*global_init) (void);
138
139 int (*init) (struct scheduler *);
140 void (*deinit) (struct scheduler *);
141
142 void (*free_vdata) (const struct scheduler *, void *);
143 void * (*alloc_vdata) (const struct scheduler *, struct vcpu *,
144 void *);
145 void (*free_pdata) (const struct scheduler *, void *, int);
146 void * (*alloc_pdata) (const struct scheduler *, int);
147 void (*init_pdata) (const struct scheduler *, void *, int);
148 void (*deinit_pdata) (const struct scheduler *, void *, int);
149 void (*free_domdata) (const struct scheduler *, void *);
150 void * (*alloc_domdata) (const struct scheduler *, struct domain *);
151
152 void (*switch_sched) (struct scheduler *, unsigned int,
153 void *, void *);
154
155 int (*init_domain) (const struct scheduler *, struct domain *);
156 void (*destroy_domain) (const struct scheduler *, struct domain *);
157
158 /* Activate / deactivate vcpus in a cpu pool */
159 void (*insert_vcpu) (const struct scheduler *, struct vcpu *);
160 void (*remove_vcpu) (const struct scheduler *, struct vcpu *);
161
162 void (*sleep) (const struct scheduler *, struct vcpu *);
163 void (*wake) (const struct scheduler *, struct vcpu *);
164 void (*yield) (const struct scheduler *, struct vcpu *);
165 void (*context_saved) (const struct scheduler *, struct vcpu *);
166
167 struct task_slice (*do_schedule) (const struct scheduler *, s_time_t,
168 bool_t tasklet_work_scheduled);
169
170 int (*pick_cpu) (const struct scheduler *, struct vcpu *);
171 void (*migrate) (const struct scheduler *, struct vcpu *,
172 unsigned int);
173 int (*adjust) (const struct scheduler *, struct domain *,
174 struct xen_domctl_scheduler_op *);
175 int (*adjust_global) (const struct scheduler *,
176 struct xen_sysctl_scheduler_op *);
177 void (*dump_settings) (const struct scheduler *);
178 void (*dump_cpu_state) (const struct scheduler *, int);
179
180 void (*tick_suspend) (const struct scheduler *, unsigned int);
181 void (*tick_resume) (const struct scheduler *, unsigned int);
182 };
183
184 #define REGISTER_SCHEDULER(x) static const struct scheduler *x##_entry \
185 __used_section(".data.schedulers") = &x;
186
187 struct cpupool
188 {
189 int cpupool_id;
190 cpumask_var_t cpu_valid; /* all cpus assigned to pool */
191 cpumask_var_t cpu_suspended; /* cpus in S3 that should be in this pool */
192 struct cpupool *next;
193 unsigned int n_dom;
194 struct scheduler *sched;
195 atomic_t refcnt;
196 };
197
198 #define cpupool_online_cpumask(_pool) \
199 (((_pool) == NULL) ? &cpu_online_map : (_pool)->cpu_valid)
200
cpupool_domain_cpumask(struct domain * d)201 static inline cpumask_t* cpupool_domain_cpumask(struct domain *d)
202 {
203 /*
204 * d->cpupool is NULL only for the idle domain, and no one should
205 * be interested in calling this for the idle domain.
206 */
207 ASSERT(d->cpupool != NULL);
208 return d->cpupool->cpu_valid;
209 }
210
211 /*
212 * Hard and soft affinity load balancing.
213 *
214 * Idea is each vcpu has some pcpus that it prefers, some that it does not
215 * prefer but is OK with, and some that it cannot run on at all. The first
216 * set of pcpus are the ones that are both in the soft affinity *and* in the
217 * hard affinity; the second set of pcpus are the ones that are in the hard
218 * affinity but *not* in the soft affinity; the third set of pcpus are the
219 * ones that are not in the hard affinity.
220 *
221 * We implement a two step balancing logic. Basically, every time there is
222 * the need to decide where to run a vcpu, we first check the soft affinity
223 * (well, actually, the && between soft and hard affinity), to see if we can
224 * send it where it prefers to (and can) run on. However, if the first step
225 * does not find any suitable and free pcpu, we fall back checking the hard
226 * affinity.
227 */
228 #define BALANCE_SOFT_AFFINITY 0
229 #define BALANCE_HARD_AFFINITY 1
230
231 #define for_each_affinity_balance_step(step) \
232 for ( (step) = 0; (step) <= BALANCE_HARD_AFFINITY; (step)++ )
233
234 /*
235 * Hard affinity balancing is always necessary and must never be skipped.
236 * But soft affinity need only be considered when it has a functionally
237 * different effect than other constraints (such as hard affinity, cpus
238 * online, or cpupools).
239 *
240 * Soft affinity only needs to be considered if:
241 * * The cpus in the cpupool are not a subset of soft affinity
242 * * The hard affinity is not a subset of soft affinity
243 * * There is an overlap between the soft affinity and the mask which is
244 * currently being considered.
245 */
has_soft_affinity(const struct vcpu * v,const cpumask_t * mask)246 static inline int has_soft_affinity(const struct vcpu *v,
247 const cpumask_t *mask)
248 {
249 return !cpumask_subset(cpupool_domain_cpumask(v->domain),
250 v->cpu_soft_affinity) &&
251 !cpumask_subset(v->cpu_hard_affinity, v->cpu_soft_affinity) &&
252 cpumask_intersects(v->cpu_soft_affinity, mask);
253 }
254
255 /*
256 * This function copies in mask the cpumask that should be used for a
257 * particular affinity balancing step. For the soft affinity one, the pcpus
258 * that are not part of vc's hard affinity are filtered out from the result,
259 * to avoid running a vcpu where it would like, but is not allowed to!
260 */
261 static inline void
affinity_balance_cpumask(const struct vcpu * v,int step,cpumask_t * mask)262 affinity_balance_cpumask(const struct vcpu *v, int step, cpumask_t *mask)
263 {
264 if ( step == BALANCE_SOFT_AFFINITY )
265 {
266 cpumask_and(mask, v->cpu_soft_affinity, v->cpu_hard_affinity);
267
268 if ( unlikely(cpumask_empty(mask)) )
269 cpumask_copy(mask, v->cpu_hard_affinity);
270 }
271 else /* step == BALANCE_HARD_AFFINITY */
272 cpumask_copy(mask, v->cpu_hard_affinity);
273 }
274
275 #endif /* __XEN_SCHED_IF_H__ */
276