1 /******************************************************************************
2 * cpupool.c
3 *
4 * Generic cpupool-handling functions.
5 *
6 * Cpupools are a feature to have configurable scheduling domains. Each
7 * cpupool runs an own scheduler on a dedicated set of physical cpus.
8 * A domain is bound to one cpupool at any time, but it can be moved to
9 * another cpupool.
10 *
11 * (C) 2009, Juergen Gross, Fujitsu Technology Solutions
12 */
13
14 #include <xen/cpu.h>
15 #include <xen/cpumask.h>
16 #include <xen/guest_access.h>
17 #include <xen/hypfs.h>
18 #include <xen/init.h>
19 #include <xen/keyhandler.h>
20 #include <xen/lib.h>
21 #include <xen/list.h>
22 #include <xen/param.h>
23 #include <xen/percpu.h>
24 #include <xen/sched.h>
25 #include <xen/warning.h>
26
27 #include "private.h"
28
29 struct cpupool *cpupool0; /* Initial cpupool with Dom0 */
30 cpumask_t cpupool_free_cpus; /* cpus not in any cpupool */
31
32 static LIST_HEAD(cpupool_list); /* linked list, sorted by poolid */
33 static unsigned int n_cpupools;
34
35 static int cpupool_moving_cpu = -1;
36 static struct cpupool *cpupool_cpu_moving = NULL;
37 static cpumask_t cpupool_locked_cpus;
38
39 /* This lock nests inside sysctl or hypfs lock. */
40 static DEFINE_SPINLOCK(cpupool_lock);
41
42 static enum sched_gran __read_mostly opt_sched_granularity = SCHED_GRAN_cpu;
43 static unsigned int __read_mostly sched_granularity = 1;
44
45 #define SCHED_GRAN_NAME_LEN 8
46 struct sched_gran_name {
47 enum sched_gran mode;
48 char name[SCHED_GRAN_NAME_LEN];
49 };
50
51 static const struct sched_gran_name sg_name[] = {
52 {SCHED_GRAN_cpu, "cpu"},
53 {SCHED_GRAN_core, "core"},
54 {SCHED_GRAN_socket, "socket"},
55 };
56
sched_gran_get_name(enum sched_gran mode)57 static const char *sched_gran_get_name(enum sched_gran mode)
58 {
59 const char *name = "";
60 unsigned int i;
61
62 for ( i = 0; i < ARRAY_SIZE(sg_name); i++ )
63 {
64 if ( mode == sg_name[i].mode )
65 {
66 name = sg_name[i].name;
67 break;
68 }
69 }
70
71 return name;
72 }
73
sched_gran_print(enum sched_gran mode,unsigned int gran)74 static void sched_gran_print(enum sched_gran mode, unsigned int gran)
75 {
76 printk("Scheduling granularity: %s, %u CPU%s per sched-resource\n",
77 sched_gran_get_name(mode), gran, gran == 1 ? "" : "s");
78 }
79
80 #ifdef CONFIG_HAS_SCHED_GRANULARITY
sched_gran_get(const char * str,enum sched_gran * mode)81 static int sched_gran_get(const char *str, enum sched_gran *mode)
82 {
83 unsigned int i;
84
85 for ( i = 0; i < ARRAY_SIZE(sg_name); i++ )
86 {
87 if ( strcmp(sg_name[i].name, str) == 0 )
88 {
89 *mode = sg_name[i].mode;
90 return 0;
91 }
92 }
93
94 return -EINVAL;
95 }
96
sched_select_granularity(const char * str)97 static int __init cf_check sched_select_granularity(const char *str)
98 {
99 return sched_gran_get(str, &opt_sched_granularity);
100 }
101 custom_param("sched-gran", sched_select_granularity);
102 #elif defined(CONFIG_HYPFS)
sched_gran_get(const char * str,enum sched_gran * mode)103 static int sched_gran_get(const char *str, enum sched_gran *mode)
104 {
105 return -EINVAL;
106 }
107 #endif
108
cpupool_check_granularity(enum sched_gran mode)109 static unsigned int cpupool_check_granularity(enum sched_gran mode)
110 {
111 unsigned int cpu;
112 unsigned int siblings, gran = 0;
113
114 if ( mode == SCHED_GRAN_cpu )
115 return 1;
116
117 for_each_online_cpu ( cpu )
118 {
119 siblings = cpumask_weight(sched_get_opt_cpumask(mode, cpu));
120 if ( gran == 0 )
121 gran = siblings;
122 else if ( gran != siblings )
123 return 0;
124 }
125
126 return gran;
127 }
128
129 /* Setup data for selected scheduler granularity. */
cpupool_gran_init(void)130 static void __init cpupool_gran_init(void)
131 {
132 unsigned int gran = 0;
133 const char *fallback = NULL;
134
135 while ( gran == 0 )
136 {
137 gran = cpupool_check_granularity(opt_sched_granularity);
138
139 if ( gran == 0 )
140 {
141 switch ( opt_sched_granularity )
142 {
143 case SCHED_GRAN_core:
144 opt_sched_granularity = SCHED_GRAN_cpu;
145 fallback = "Asymmetric cpu configuration.\n"
146 "Falling back to sched-gran=cpu.\n";
147 break;
148 case SCHED_GRAN_socket:
149 opt_sched_granularity = SCHED_GRAN_core;
150 fallback = "Asymmetric cpu configuration.\n"
151 "Falling back to sched-gran=core.\n";
152 break;
153 default:
154 ASSERT_UNREACHABLE();
155 break;
156 }
157 }
158 }
159
160 if ( fallback )
161 warning_add(fallback);
162
163 if ( opt_sched_granularity != SCHED_GRAN_cpu )
164 sched_disable_smt_switching = true;
165
166 sched_granularity = gran;
167 sched_gran_print(opt_sched_granularity, sched_granularity);
168 }
169
cpupool_get_granularity(const struct cpupool * c)170 unsigned int cpupool_get_granularity(const struct cpupool *c)
171 {
172 return c ? c->sched_gran : 1;
173 }
174
free_cpupool_struct(struct cpupool * c)175 static void free_cpupool_struct(struct cpupool *c)
176 {
177 if ( c )
178 {
179 free_cpumask_var(c->res_valid);
180 free_cpumask_var(c->cpu_valid);
181 }
182 xfree(c);
183 }
184
alloc_cpupool_struct(void)185 static struct cpupool *alloc_cpupool_struct(void)
186 {
187 struct cpupool *c = xzalloc(struct cpupool);
188
189 if ( !c )
190 return NULL;
191
192 if ( !zalloc_cpumask_var(&c->cpu_valid) ||
193 !zalloc_cpumask_var(&c->res_valid) )
194 {
195 free_cpupool_struct(c);
196 c = NULL;
197 }
198
199 return c;
200 }
201
202 /*
203 * find a cpupool by it's id. to be called with cpupool lock held
204 * if exact is not specified, the first cpupool with an id larger or equal to
205 * the searched id is returned
206 * returns NULL if not found.
207 */
__cpupool_find_by_id(unsigned int id,bool exact)208 static struct cpupool *__cpupool_find_by_id(unsigned int id, bool exact)
209 {
210 struct cpupool *q;
211
212 ASSERT(spin_is_locked(&cpupool_lock));
213
214 list_for_each_entry(q, &cpupool_list, list)
215 if ( q->cpupool_id == id || (!exact && q->cpupool_id > id) )
216 return q;
217
218 return NULL;
219 }
220
cpupool_find_by_id(unsigned int poolid)221 static struct cpupool *cpupool_find_by_id(unsigned int poolid)
222 {
223 return __cpupool_find_by_id(poolid, true);
224 }
225
__cpupool_get_by_id(unsigned int poolid,bool exact)226 static struct cpupool *__cpupool_get_by_id(unsigned int poolid, bool exact)
227 {
228 struct cpupool *c;
229 spin_lock(&cpupool_lock);
230 c = __cpupool_find_by_id(poolid, exact);
231 if ( c != NULL )
232 atomic_inc(&c->refcnt);
233 spin_unlock(&cpupool_lock);
234 return c;
235 }
236
cpupool_get_by_id(unsigned int poolid)237 struct cpupool *cpupool_get_by_id(unsigned int poolid)
238 {
239 return __cpupool_get_by_id(poolid, true);
240 }
241
cpupool_get_next_by_id(unsigned int poolid)242 static struct cpupool *cpupool_get_next_by_id(unsigned int poolid)
243 {
244 return __cpupool_get_by_id(poolid, false);
245 }
246
cpupool_put(struct cpupool * pool)247 void cpupool_put(struct cpupool *pool)
248 {
249 if ( !atomic_dec_and_test(&pool->refcnt) )
250 return;
251 scheduler_free(pool->sched);
252 free_cpupool_struct(pool);
253 }
254
255 /*
256 * create a new cpupool with specified poolid and scheduler
257 * returns pointer to new cpupool structure if okay, NULL else
258 * possible failures:
259 * - no memory
260 * - poolid already used
261 * - unknown scheduler
262 */
cpupool_create(unsigned int poolid,unsigned int sched_id)263 static struct cpupool *cpupool_create(unsigned int poolid,
264 unsigned int sched_id)
265 {
266 struct cpupool *c;
267 struct cpupool *q;
268 int ret;
269
270 if ( (c = alloc_cpupool_struct()) == NULL )
271 return ERR_PTR(-ENOMEM);
272
273 /* One reference for caller, one reference for cpupool_destroy(). */
274 atomic_set(&c->refcnt, 2);
275
276 debugtrace_printk("cpupool_create(pool=%u,sched=%u)\n", poolid, sched_id);
277
278 spin_lock(&cpupool_lock);
279
280 /* Don't allow too many cpupools. */
281 if ( n_cpupools >= 2 * nr_cpu_ids )
282 {
283 ret = -ENOSPC;
284 goto unlock;
285 }
286 n_cpupools++;
287
288 if ( poolid != CPUPOOLID_NONE )
289 {
290 q = __cpupool_find_by_id(poolid, false);
291 if ( !q )
292 list_add_tail(&c->list, &cpupool_list);
293 else
294 {
295 list_add_tail(&c->list, &q->list);
296 if ( q->cpupool_id == poolid )
297 {
298 ret = -EEXIST;
299 goto err;
300 }
301 }
302
303 c->cpupool_id = poolid;
304 }
305 else
306 {
307 /* Cpupool 0 is created with specified id at boot and never removed. */
308 ASSERT(!list_empty(&cpupool_list));
309
310 q = list_last_entry(&cpupool_list, struct cpupool, list);
311 /* In case of wrap search for first free id. */
312 if ( q->cpupool_id == CPUPOOLID_NONE - 1 )
313 {
314 list_for_each_entry(q, &cpupool_list, list)
315 if ( q->cpupool_id + 1 != list_next_entry(q, list)->cpupool_id )
316 break;
317 }
318
319 list_add(&c->list, &q->list);
320
321 c->cpupool_id = q->cpupool_id + 1;
322 }
323
324 c->sched = scheduler_alloc(sched_id);
325 if ( IS_ERR(c->sched) )
326 {
327 ret = PTR_ERR(c->sched);
328 goto err;
329 }
330
331 c->sched->cpupool = c;
332 c->gran = opt_sched_granularity;
333 c->sched_gran = sched_granularity;
334
335 spin_unlock(&cpupool_lock);
336
337 debugtrace_printk("Created cpupool %u with scheduler %s (%s)\n",
338 c->cpupool_id, c->sched->name, c->sched->opt_name);
339
340 return c;
341
342 err:
343 list_del(&c->list);
344 n_cpupools--;
345
346 unlock:
347 spin_unlock(&cpupool_lock);
348
349 free_cpupool_struct(c);
350
351 return ERR_PTR(ret);
352 }
353 /*
354 * destroys the given cpupool
355 * returns 0 on success, 1 else
356 * possible failures:
357 * - pool still in use
358 * - cpus still assigned to pool
359 */
cpupool_destroy(struct cpupool * c)360 static int cpupool_destroy(struct cpupool *c)
361 {
362 spin_lock(&cpupool_lock);
363
364 if ( (c->n_dom != 0) || cpumask_weight(c->cpu_valid) )
365 {
366 spin_unlock(&cpupool_lock);
367 return -EBUSY;
368 }
369
370 n_cpupools--;
371 list_del(&c->list);
372
373 spin_unlock(&cpupool_lock);
374
375 cpupool_put(c);
376
377 debugtrace_printk("cpupool_destroy(pool=%u)\n", c->cpupool_id);
378 return 0;
379 }
380
381 /*
382 * Move domain to another cpupool
383 */
cpupool_move_domain_locked(struct domain * d,struct cpupool * c)384 static int cpupool_move_domain_locked(struct domain *d, struct cpupool *c)
385 {
386 int ret;
387
388 if ( unlikely(d->cpupool == c) )
389 return 0;
390
391 d->cpupool->n_dom--;
392 ret = sched_move_domain(d, c);
393 if ( ret )
394 d->cpupool->n_dom++;
395 else
396 c->n_dom++;
397
398 return ret;
399 }
cpupool_move_domain(struct domain * d,struct cpupool * c)400 int cpupool_move_domain(struct domain *d, struct cpupool *c)
401 {
402 int ret;
403
404 spin_lock(&cpupool_lock);
405
406 ret = cpupool_move_domain_locked(d, c);
407
408 spin_unlock(&cpupool_lock);
409
410 return ret;
411 }
412
413 /* Update affinities of all domains in a cpupool. */
cpupool_update_node_affinity(const struct cpupool * c,struct affinity_masks * masks)414 static void cpupool_update_node_affinity(const struct cpupool *c,
415 struct affinity_masks *masks)
416 {
417 struct affinity_masks local_masks;
418 struct domain *d;
419
420 if ( !masks )
421 {
422 if ( !alloc_affinity_masks(&local_masks) )
423 return;
424 masks = &local_masks;
425 }
426
427 rcu_read_lock(&domlist_read_lock);
428
429 for_each_domain_in_cpupool(d, c)
430 domain_update_node_aff(d, masks);
431
432 rcu_read_unlock(&domlist_read_lock);
433
434 if ( masks == &local_masks )
435 free_affinity_masks(masks);
436 }
437
438 /*
439 * assign a specific cpu to a cpupool
440 * cpupool_lock must be held
441 */
cpupool_assign_cpu_locked(struct cpupool * c,unsigned int cpu)442 static int cpupool_assign_cpu_locked(struct cpupool *c, unsigned int cpu)
443 {
444 int ret;
445 const cpumask_t *cpus;
446
447 cpus = sched_get_opt_cpumask(c->gran, cpu);
448
449 if ( (cpupool_moving_cpu == cpu) && (c != cpupool_cpu_moving) )
450 return -EADDRNOTAVAIL;
451 ret = schedule_cpu_add(cpumask_first(cpus), c);
452 if ( ret )
453 return ret;
454
455 rcu_read_lock(&sched_res_rculock);
456
457 cpumask_andnot(&cpupool_free_cpus, &cpupool_free_cpus, cpus);
458 if (cpupool_moving_cpu == cpu)
459 {
460 cpupool_moving_cpu = -1;
461 cpupool_put(cpupool_cpu_moving);
462 cpupool_cpu_moving = NULL;
463 }
464 cpumask_or(c->cpu_valid, c->cpu_valid, cpus);
465 cpumask_and(c->res_valid, c->cpu_valid, &sched_res_mask);
466
467 rcu_read_unlock(&sched_res_rculock);
468
469 cpupool_update_node_affinity(c, NULL);
470
471 return 0;
472 }
473
cpupool_unassign_cpu_finish(struct cpupool * c,struct cpu_rm_data * mem)474 static int cpupool_unassign_cpu_finish(struct cpupool *c,
475 struct cpu_rm_data *mem)
476 {
477 int cpu = cpupool_moving_cpu;
478 const cpumask_t *cpus;
479 struct affinity_masks *masks = mem ? &mem->affinity : NULL;
480 int ret;
481
482 if ( c != cpupool_cpu_moving )
483 return -EADDRNOTAVAIL;
484
485 rcu_read_lock(&domlist_read_lock);
486 ret = cpu_disable_scheduler(cpu);
487 rcu_read_unlock(&domlist_read_lock);
488
489 rcu_read_lock(&sched_res_rculock);
490 cpus = get_sched_res(cpu)->cpus;
491 cpumask_or(&cpupool_free_cpus, &cpupool_free_cpus, cpus);
492
493 /*
494 * cpu_disable_scheduler() returning an error doesn't require resetting
495 * cpupool_free_cpus' cpu bit. All error cases should be of temporary
496 * nature and tools will retry the operation. Even if the number of
497 * retries may be limited, the in-between state can easily be repaired
498 * by adding the cpu to the cpupool again.
499 */
500 if ( !ret )
501 {
502 ret = schedule_cpu_rm(cpu, mem);
503 if ( ret )
504 cpumask_andnot(&cpupool_free_cpus, &cpupool_free_cpus, cpus);
505 else
506 {
507 cpupool_moving_cpu = -1;
508 cpupool_put(cpupool_cpu_moving);
509 cpupool_cpu_moving = NULL;
510 }
511 }
512 rcu_read_unlock(&sched_res_rculock);
513
514 cpupool_update_node_affinity(c, masks);
515
516 return ret;
517 }
518
cpupool_unassign_cpu_start(struct cpupool * c,unsigned int cpu)519 static int cpupool_unassign_cpu_start(struct cpupool *c, unsigned int cpu)
520 {
521 int ret;
522 struct domain *d;
523 const cpumask_t *cpus;
524
525 spin_lock(&cpupool_lock);
526 ret = -EADDRNOTAVAIL;
527 if ( ((cpupool_moving_cpu != -1) || !cpumask_test_cpu(cpu, c->cpu_valid))
528 && (cpu != cpupool_moving_cpu) )
529 goto out;
530
531 ret = 0;
532 rcu_read_lock(&sched_res_rculock);
533 cpus = get_sched_res(cpu)->cpus;
534
535 if ( (c->n_dom > 0) &&
536 (cpumask_weight(c->cpu_valid) == cpumask_weight(cpus)) &&
537 (cpu != cpupool_moving_cpu) )
538 {
539 rcu_read_lock(&domlist_read_lock);
540 for_each_domain_in_cpupool(d, c)
541 {
542 if ( !d->is_dying && system_state == SYS_STATE_active )
543 {
544 ret = -EBUSY;
545 break;
546 }
547 ret = cpupool_move_domain_locked(d, cpupool0);
548 if ( ret )
549 break;
550 }
551 rcu_read_unlock(&domlist_read_lock);
552 if ( ret )
553 goto out_rcu;
554 }
555 cpupool_moving_cpu = cpu;
556 atomic_inc(&c->refcnt);
557 cpupool_cpu_moving = c;
558 cpumask_andnot(c->cpu_valid, c->cpu_valid, cpus);
559 cpumask_and(c->res_valid, c->cpu_valid, &sched_res_mask);
560
561 out_rcu:
562 rcu_read_unlock(&sched_res_rculock);
563 out:
564 spin_unlock(&cpupool_lock);
565
566 return ret;
567 }
568
cpupool_unassign_cpu_helper(void * info)569 static long cf_check cpupool_unassign_cpu_helper(void *info)
570 {
571 struct cpupool *c = info;
572 long ret;
573
574 debugtrace_printk("cpupool_unassign_cpu(pool=%u,cpu=%d)\n",
575 cpupool_cpu_moving->cpupool_id, cpupool_moving_cpu);
576 spin_lock(&cpupool_lock);
577
578 ret = cpupool_unassign_cpu_finish(c, NULL);
579
580 spin_unlock(&cpupool_lock);
581 debugtrace_printk("cpupool_unassign_cpu ret=%ld\n", ret);
582
583 return ret;
584 }
585
586 /*
587 * unassign a specific cpu from a cpupool
588 * we must be sure not to run on the cpu to be unassigned! to achieve this
589 * the main functionality is performed via continue_hypercall_on_cpu on a
590 * specific cpu.
591 * if the cpu to be removed is the last one of the cpupool no active domain
592 * must be bound to the cpupool. dying domains are moved to cpupool0 as they
593 * might be zombies.
594 * possible failures:
595 * - last cpu and still active domains in cpupool
596 * - cpu just being unplugged
597 * - Attempt to remove boot cpu from cpupool0
598 */
cpupool_unassign_cpu(struct cpupool * c,unsigned int cpu)599 static int cpupool_unassign_cpu(struct cpupool *c, unsigned int cpu)
600 {
601 int work_cpu;
602 int ret;
603 unsigned int master_cpu;
604
605 debugtrace_printk("cpupool_unassign_cpu(pool=%u,cpu=%d)\n",
606 c->cpupool_id, cpu);
607
608 /*
609 * Cpu0 must remain in cpupool0, otherwise some operations like moving cpus
610 * between cpupools, cpu hotplug, destroying cpupools, shutdown of the host,
611 * might not work in a sane way.
612 */
613 if ( (!c->cpupool_id && !cpu) || !cpu_online(cpu) )
614 return -EINVAL;
615
616 master_cpu = sched_get_resource_cpu(cpu);
617 ret = cpupool_unassign_cpu_start(c, master_cpu);
618 if ( ret )
619 {
620 debugtrace_printk("cpupool_unassign_cpu(pool=%u,cpu=%d) ret %d\n",
621 c->cpupool_id, cpu, ret);
622 return ret;
623 }
624
625 work_cpu = sched_get_resource_cpu(smp_processor_id());
626 if ( work_cpu == master_cpu )
627 {
628 work_cpu = cpumask_first(cpupool0->cpu_valid);
629 if ( work_cpu == master_cpu )
630 work_cpu = cpumask_last(cpupool0->cpu_valid);
631 }
632 return continue_hypercall_on_cpu(work_cpu, cpupool_unassign_cpu_helper, c);
633 }
634
635 /*
636 * add a new domain to a cpupool
637 * possible failures:
638 * - pool does not exist
639 * - no cpu assigned to pool
640 */
cpupool_add_domain(struct domain * d,unsigned int poolid)641 int cpupool_add_domain(struct domain *d, unsigned int poolid)
642 {
643 struct cpupool *c;
644 int rc;
645 int n_dom = 0;
646
647 spin_lock(&cpupool_lock);
648 c = cpupool_find_by_id(poolid);
649 if ( c == NULL )
650 rc = -ESRCH;
651 else if ( !cpumask_weight(c->cpu_valid) )
652 rc = -ENODEV;
653 else
654 {
655 c->n_dom++;
656 n_dom = c->n_dom;
657 d->cpupool = c;
658 rc = 0;
659 }
660 spin_unlock(&cpupool_lock);
661 debugtrace_printk("cpupool_add_domain(dom=%d,pool=%u) n_dom %d rc %d\n",
662 d->domain_id, poolid, n_dom, rc);
663 return rc;
664 }
665
666 /*
667 * remove a domain from a cpupool
668 */
cpupool_rm_domain(struct domain * d)669 void cpupool_rm_domain(struct domain *d)
670 {
671 unsigned int cpupool_id;
672 int n_dom;
673
674 if ( d->cpupool == NULL )
675 return;
676 spin_lock(&cpupool_lock);
677 cpupool_id = d->cpupool->cpupool_id;
678 d->cpupool->n_dom--;
679 n_dom = d->cpupool->n_dom;
680 d->cpupool = NULL;
681 spin_unlock(&cpupool_lock);
682 debugtrace_printk("cpupool_rm_domain(dom=%d,pool=%u) n_dom %d\n",
683 d->domain_id, cpupool_id, n_dom);
684 return;
685 }
686
687 /*
688 * Called to add a cpu to a pool. CPUs being hot-plugged are added to pool0,
689 * as they must have been in there when unplugged.
690 */
cpupool_cpu_add(unsigned int cpu)691 static int cpupool_cpu_add(unsigned int cpu)
692 {
693 int ret = 0;
694 const cpumask_t *cpus;
695
696 spin_lock(&cpupool_lock);
697 cpumask_clear_cpu(cpu, &cpupool_locked_cpus);
698 cpumask_set_cpu(cpu, &cpupool_free_cpus);
699
700 /*
701 * If we are not resuming, we are hot-plugging cpu, and in which case
702 * we add it to pool0, as it certainly was there when hot-unplagged
703 * (or unplugging would have failed) and that is the default behavior
704 * anyway.
705 */
706 rcu_read_lock(&sched_res_rculock);
707 get_sched_res(cpu)->cpupool = NULL;
708
709 cpus = sched_get_opt_cpumask(cpupool0->gran, cpu);
710 if ( cpumask_subset(cpus, &cpupool_free_cpus) &&
711 cpumask_weight(cpus) == cpupool_get_granularity(cpupool0) )
712 ret = cpupool_assign_cpu_locked(cpupool0, cpu);
713
714 rcu_read_unlock(&sched_res_rculock);
715
716 spin_unlock(&cpupool_lock);
717
718 return ret;
719 }
720
721 /*
722 * This function is called in stop_machine context, so we can be sure no
723 * non-idle vcpu is active on the system.
724 */
cpupool_cpu_remove(unsigned int cpu,struct cpu_rm_data * mem)725 static void cpupool_cpu_remove(unsigned int cpu, struct cpu_rm_data *mem)
726 {
727 int ret;
728
729 ASSERT(is_idle_vcpu(current));
730
731 if ( !cpumask_test_cpu(cpu, &cpupool_free_cpus) )
732 {
733 ret = cpupool_unassign_cpu_finish(cpupool0, mem);
734 BUG_ON(ret);
735 }
736 cpumask_clear_cpu(cpu, &cpupool_free_cpus);
737 }
738
739 /*
740 * Called before a CPU is being removed from the system.
741 * Removing a CPU is allowed for free CPUs or CPUs in Pool-0 (those are moved
742 * to free cpus actually before removing them).
743 * The CPU is locked, to forbid adding it again to another cpupool.
744 */
cpupool_cpu_remove_prologue(unsigned int cpu)745 static int cpupool_cpu_remove_prologue(unsigned int cpu)
746 {
747 int ret = 0;
748 cpumask_t *cpus;
749 unsigned int master_cpu;
750
751 spin_lock(&cpupool_lock);
752
753 rcu_read_lock(&sched_res_rculock);
754 cpus = get_sched_res(cpu)->cpus;
755 master_cpu = sched_get_resource_cpu(cpu);
756 if ( cpumask_intersects(cpus, &cpupool_locked_cpus) )
757 ret = -EBUSY;
758 else
759 cpumask_set_cpu(cpu, &cpupool_locked_cpus);
760 rcu_read_unlock(&sched_res_rculock);
761
762 spin_unlock(&cpupool_lock);
763
764 if ( ret )
765 return ret;
766
767 if ( cpumask_test_cpu(master_cpu, cpupool0->cpu_valid) )
768 {
769 /* Cpupool0 is populated only after all cpus are up. */
770 ASSERT(system_state == SYS_STATE_active);
771
772 ret = cpupool_unassign_cpu_start(cpupool0, master_cpu);
773 }
774 else if ( !cpumask_test_cpu(master_cpu, &cpupool_free_cpus) )
775 ret = -ENODEV;
776
777 return ret;
778 }
779
780 /*
781 * Called during resume for all cpus which didn't come up again. The cpu must
782 * be removed from the cpupool it is assigned to. In case a cpupool will be
783 * left without cpu we move all domains of that cpupool to cpupool0.
784 * As we are called with all domains still frozen there is no need to take the
785 * cpupool lock here.
786 */
cpupool_cpu_remove_forced(unsigned int cpu)787 static void cpupool_cpu_remove_forced(unsigned int cpu)
788 {
789 struct cpupool *c;
790 int ret;
791 unsigned int master_cpu = sched_get_resource_cpu(cpu);
792
793 list_for_each_entry(c, &cpupool_list, list)
794 {
795 if ( cpumask_test_cpu(master_cpu, c->cpu_valid) )
796 {
797 ret = cpupool_unassign_cpu_start(c, master_cpu);
798 BUG_ON(ret);
799 ret = cpupool_unassign_cpu_finish(c, NULL);
800 BUG_ON(ret);
801 }
802 }
803
804 cpumask_clear_cpu(cpu, &cpupool_free_cpus);
805
806 rcu_read_lock(&sched_res_rculock);
807 sched_rm_cpu(cpu);
808 rcu_read_unlock(&sched_res_rculock);
809 }
810
811 /*
812 * do cpupool related sysctl operations
813 */
cpupool_do_sysctl(struct xen_sysctl_cpupool_op * op)814 int cpupool_do_sysctl(struct xen_sysctl_cpupool_op *op)
815 {
816 int ret = 0;
817 struct cpupool *c;
818
819 switch ( op->op )
820 {
821
822 case XEN_SYSCTL_CPUPOOL_OP_CREATE:
823 {
824 unsigned int poolid;
825
826 poolid = (op->cpupool_id == XEN_SYSCTL_CPUPOOL_PAR_ANY) ?
827 CPUPOOLID_NONE: op->cpupool_id;
828 c = cpupool_create(poolid, op->sched_id);
829 if ( IS_ERR(c) )
830 ret = PTR_ERR(c);
831 else
832 {
833 op->cpupool_id = c->cpupool_id;
834 cpupool_put(c);
835 }
836 }
837 break;
838
839 case XEN_SYSCTL_CPUPOOL_OP_DESTROY:
840 {
841 c = cpupool_get_by_id(op->cpupool_id);
842 ret = -ENOENT;
843 if ( c == NULL )
844 break;
845 ret = cpupool_destroy(c);
846 cpupool_put(c);
847 }
848 break;
849
850 case XEN_SYSCTL_CPUPOOL_OP_INFO:
851 {
852 c = cpupool_get_next_by_id(op->cpupool_id);
853 ret = -ENOENT;
854 if ( c == NULL )
855 break;
856 op->cpupool_id = c->cpupool_id;
857 op->sched_id = c->sched->sched_id;
858 op->n_dom = c->n_dom;
859 ret = cpumask_to_xenctl_bitmap(&op->cpumap, c->cpu_valid);
860 cpupool_put(c);
861 }
862 break;
863
864 case XEN_SYSCTL_CPUPOOL_OP_ADDCPU:
865 {
866 unsigned int cpu;
867 const cpumask_t *cpus;
868
869 cpu = op->cpu;
870 debugtrace_printk("cpupool_assign_cpu(pool=%u,cpu=%u)\n",
871 op->cpupool_id, cpu);
872
873 spin_lock(&cpupool_lock);
874
875 c = cpupool_find_by_id(op->cpupool_id);
876 ret = -ENOENT;
877 if ( c == NULL )
878 goto addcpu_out;
879 if ( cpu == XEN_SYSCTL_CPUPOOL_PAR_ANY )
880 {
881 for_each_cpu ( cpu, &cpupool_free_cpus )
882 {
883 cpus = sched_get_opt_cpumask(c->gran, cpu);
884 if ( cpumask_subset(cpus, &cpupool_free_cpus) )
885 break;
886 }
887 ret = -ENODEV;
888 if ( cpu >= nr_cpu_ids )
889 goto addcpu_out;
890 }
891 ret = -EINVAL;
892 if ( cpu >= nr_cpu_ids )
893 goto addcpu_out;
894 ret = -ENODEV;
895 if ( !cpu_online(cpu) )
896 goto addcpu_out;
897 cpus = sched_get_opt_cpumask(c->gran, cpu);
898 if ( !cpumask_subset(cpus, &cpupool_free_cpus) ||
899 cpumask_intersects(cpus, &cpupool_locked_cpus) )
900 goto addcpu_out;
901 ret = cpupool_assign_cpu_locked(c, cpu);
902
903 addcpu_out:
904 spin_unlock(&cpupool_lock);
905 debugtrace_printk("cpupool_assign_cpu(pool=%u,cpu=%u) ret %d\n",
906 op->cpupool_id, cpu, ret);
907
908 }
909 break;
910
911 case XEN_SYSCTL_CPUPOOL_OP_RMCPU:
912 {
913 unsigned int cpu;
914
915 c = cpupool_get_by_id(op->cpupool_id);
916 ret = -ENOENT;
917 if ( c == NULL )
918 break;
919 cpu = op->cpu;
920 if ( cpu == XEN_SYSCTL_CPUPOOL_PAR_ANY )
921 cpu = cpumask_last(c->cpu_valid);
922 ret = (cpu < nr_cpu_ids) ? cpupool_unassign_cpu(c, cpu) : -EINVAL;
923 cpupool_put(c);
924 }
925 break;
926
927 case XEN_SYSCTL_CPUPOOL_OP_MOVEDOMAIN:
928 {
929 struct domain *d;
930
931 ret = rcu_lock_remote_domain_by_id(op->domid, &d);
932 if ( ret )
933 break;
934 if ( d->cpupool == NULL )
935 {
936 ret = -EINVAL;
937 rcu_unlock_domain(d);
938 break;
939 }
940 if ( op->cpupool_id == d->cpupool->cpupool_id )
941 {
942 ret = 0;
943 rcu_unlock_domain(d);
944 break;
945 }
946 debugtrace_printk("cpupool move_domain(dom=%d)->pool=%u\n",
947 d->domain_id, op->cpupool_id);
948 ret = -ENOENT;
949 spin_lock(&cpupool_lock);
950
951 c = cpupool_find_by_id(op->cpupool_id);
952 if ( (c != NULL) && cpumask_weight(c->cpu_valid) )
953 ret = cpupool_move_domain_locked(d, c);
954
955 spin_unlock(&cpupool_lock);
956 debugtrace_printk("cpupool move_domain(dom=%d)->pool=%u ret %d\n",
957 d->domain_id, op->cpupool_id, ret);
958 rcu_unlock_domain(d);
959 }
960 break;
961
962 case XEN_SYSCTL_CPUPOOL_OP_FREEINFO:
963 {
964 ret = cpumask_to_xenctl_bitmap(
965 &op->cpumap, &cpupool_free_cpus);
966 }
967 break;
968
969 default:
970 ret = -ENOSYS;
971 break;
972 }
973
974 return ret;
975 }
976
cpupool_get_id(const struct domain * d)977 unsigned int cpupool_get_id(const struct domain *d)
978 {
979 return d->cpupool ? d->cpupool->cpupool_id : CPUPOOLID_NONE;
980 }
981
cpupool_valid_cpus(const struct cpupool * pool)982 const cpumask_t *cpupool_valid_cpus(const struct cpupool *pool)
983 {
984 return pool->cpu_valid;
985 }
986
dump_runq(unsigned char key)987 void cf_check dump_runq(unsigned char key)
988 {
989 s_time_t now = NOW();
990 struct cpupool *c;
991
992 spin_lock(&cpupool_lock);
993
994 printk("sched_smt_power_savings: %s\n",
995 sched_smt_power_savings? "enabled":"disabled");
996 printk("NOW=%"PRI_stime"\n", now);
997
998 printk("Online Cpus: %*pbl\n", CPUMASK_PR(&cpu_online_map));
999 if ( !cpumask_empty(&cpupool_free_cpus) )
1000 {
1001 printk("Free Cpus: %*pbl\n", CPUMASK_PR(&cpupool_free_cpus));
1002 schedule_dump(NULL);
1003 }
1004
1005 list_for_each_entry(c, &cpupool_list, list)
1006 {
1007 printk("Cpupool %u:\n", c->cpupool_id);
1008 printk("Cpus: %*pbl\n", CPUMASK_PR(c->cpu_valid));
1009 sched_gran_print(c->gran, cpupool_get_granularity(c));
1010 schedule_dump(c);
1011 }
1012
1013 spin_unlock(&cpupool_lock);
1014 }
1015
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)1016 static int cf_check cpu_callback(
1017 struct notifier_block *nfb, unsigned long action, void *hcpu)
1018 {
1019 static struct cpu_rm_data *mem;
1020
1021 unsigned int cpu = (unsigned long)hcpu;
1022 int rc = 0;
1023
1024 switch ( action )
1025 {
1026 case CPU_DOWN_FAILED:
1027 if ( system_state <= SYS_STATE_active )
1028 {
1029 if ( mem )
1030 {
1031 free_cpu_rm_data(mem, cpu);
1032 mem = NULL;
1033 }
1034 rc = cpupool_cpu_add(cpu);
1035 }
1036 break;
1037 case CPU_ONLINE:
1038 if ( system_state <= SYS_STATE_active )
1039 rc = cpupool_cpu_add(cpu);
1040 else
1041 sched_migrate_timers(cpu);
1042 break;
1043 case CPU_DOWN_PREPARE:
1044 /* Suspend/Resume don't change assignments of cpus to cpupools. */
1045 if ( system_state <= SYS_STATE_active )
1046 {
1047 rc = cpupool_cpu_remove_prologue(cpu);
1048 if ( !rc )
1049 {
1050 ASSERT(!mem);
1051 mem = alloc_cpu_rm_data(cpu, true);
1052 rc = mem ? 0 : -ENOMEM;
1053 }
1054 }
1055 break;
1056 case CPU_DYING:
1057 /* Suspend/Resume don't change assignments of cpus to cpupools. */
1058 if ( system_state <= SYS_STATE_active )
1059 {
1060 ASSERT(mem);
1061 cpupool_cpu_remove(cpu, mem);
1062 }
1063 break;
1064 case CPU_DEAD:
1065 if ( system_state <= SYS_STATE_active )
1066 {
1067 ASSERT(mem);
1068 free_cpu_rm_data(mem, cpu);
1069 mem = NULL;
1070 }
1071 break;
1072 case CPU_RESUME_FAILED:
1073 cpupool_cpu_remove_forced(cpu);
1074 break;
1075 default:
1076 break;
1077 }
1078
1079 return notifier_from_errno(rc);
1080 }
1081
1082 static struct notifier_block cpu_nfb = {
1083 .notifier_call = cpu_callback
1084 };
1085
1086 #ifdef CONFIG_HYPFS
1087
1088 static HYPFS_DIR_INIT(cpupool_pooldir, "%u");
1089
cpupool_dir_read(const struct hypfs_entry * entry,XEN_GUEST_HANDLE_PARAM (void)uaddr)1090 static int cf_check cpupool_dir_read(
1091 const struct hypfs_entry *entry, XEN_GUEST_HANDLE_PARAM(void) uaddr)
1092 {
1093 int ret = 0;
1094 struct cpupool *c;
1095 struct hypfs_dyndir_id *data;
1096
1097 data = hypfs_get_dyndata();
1098
1099 list_for_each_entry(c, &cpupool_list, list)
1100 {
1101 data->id = c->cpupool_id;
1102 data->data = c;
1103
1104 ret = hypfs_read_dyndir_id_entry(&cpupool_pooldir, c->cpupool_id,
1105 list_is_last(&c->list, &cpupool_list),
1106 &uaddr);
1107 if ( ret )
1108 break;
1109 }
1110
1111 return ret;
1112 }
1113
cpupool_dir_getsize(const struct hypfs_entry * entry)1114 static unsigned int cf_check cpupool_dir_getsize(
1115 const struct hypfs_entry *entry)
1116 {
1117 const struct cpupool *c;
1118 unsigned int size = 0;
1119
1120 list_for_each_entry(c, &cpupool_list, list)
1121 size += hypfs_dynid_entry_size(entry, c->cpupool_id);
1122
1123 return size;
1124 }
1125
cpupool_dir_enter(const struct hypfs_entry * entry)1126 static const struct hypfs_entry *cf_check cpupool_dir_enter(
1127 const struct hypfs_entry *entry)
1128 {
1129 struct hypfs_dyndir_id *data;
1130
1131 data = hypfs_alloc_dyndata(struct hypfs_dyndir_id);
1132 if ( !data )
1133 return ERR_PTR(-ENOMEM);
1134 data->id = CPUPOOLID_NONE;
1135
1136 spin_lock(&cpupool_lock);
1137
1138 return entry;
1139 }
1140
cpupool_dir_exit(const struct hypfs_entry * entry)1141 static void cf_check cpupool_dir_exit(const struct hypfs_entry *entry)
1142 {
1143 spin_unlock(&cpupool_lock);
1144
1145 hypfs_free_dyndata();
1146 }
1147
cpupool_dir_findentry(const struct hypfs_entry_dir * dir,const char * name,unsigned int name_len)1148 static struct hypfs_entry *cf_check cpupool_dir_findentry(
1149 const struct hypfs_entry_dir *dir, const char *name, unsigned int name_len)
1150 {
1151 unsigned long id;
1152 const char *end;
1153 struct cpupool *cpupool;
1154
1155 id = simple_strtoul(name, &end, 10);
1156 if ( end != name + name_len || id > UINT_MAX )
1157 return ERR_PTR(-ENOENT);
1158
1159 cpupool = __cpupool_find_by_id(id, true);
1160
1161 if ( !cpupool )
1162 return ERR_PTR(-ENOENT);
1163
1164 return hypfs_gen_dyndir_id_entry(&cpupool_pooldir, id, cpupool);
1165 }
1166
cpupool_gran_read(const struct hypfs_entry * entry,XEN_GUEST_HANDLE_PARAM (void)uaddr)1167 static int cf_check cpupool_gran_read(
1168 const struct hypfs_entry *entry, XEN_GUEST_HANDLE_PARAM(void) uaddr)
1169 {
1170 const struct hypfs_dyndir_id *data;
1171 const struct cpupool *cpupool;
1172 const char *gran;
1173
1174 data = hypfs_get_dyndata();
1175 cpupool = data->data;
1176 ASSERT(cpupool);
1177
1178 gran = sched_gran_get_name(cpupool->gran);
1179
1180 if ( !*gran )
1181 return -ENOENT;
1182
1183 return copy_to_guest(uaddr, gran, strlen(gran) + 1) ? -EFAULT : 0;
1184 }
1185
hypfs_gran_getsize(const struct hypfs_entry * entry)1186 static unsigned int cf_check hypfs_gran_getsize(const struct hypfs_entry *entry)
1187 {
1188 const struct hypfs_dyndir_id *data;
1189 const struct cpupool *cpupool;
1190 const char *gran;
1191
1192 data = hypfs_get_dyndata();
1193 cpupool = data->data;
1194 ASSERT(cpupool);
1195
1196 gran = sched_gran_get_name(cpupool->gran);
1197
1198 return strlen(gran) + 1;
1199 }
1200
cpupool_gran_write(struct hypfs_entry_leaf * leaf,XEN_GUEST_HANDLE_PARAM (const_void)uaddr,unsigned int ulen)1201 static int cf_check cpupool_gran_write(
1202 struct hypfs_entry_leaf *leaf, XEN_GUEST_HANDLE_PARAM(const_void) uaddr,
1203 unsigned int ulen)
1204 {
1205 const struct hypfs_dyndir_id *data;
1206 struct cpupool *cpupool;
1207 enum sched_gran gran;
1208 unsigned int sched_gran = 0;
1209 char name[SCHED_GRAN_NAME_LEN];
1210 int ret = 0;
1211
1212 if ( ulen > SCHED_GRAN_NAME_LEN )
1213 return -ENOSPC;
1214
1215 if ( copy_from_guest(name, uaddr, ulen) )
1216 return -EFAULT;
1217
1218 if ( memchr(name, 0, ulen) == (name + ulen - 1) )
1219 sched_gran = sched_gran_get(name, &gran) ?
1220 0 : cpupool_check_granularity(gran);
1221 if ( sched_gran == 0 )
1222 return -EINVAL;
1223
1224 data = hypfs_get_dyndata();
1225 cpupool = data->data;
1226 ASSERT(cpupool);
1227
1228 /* Guarded by the cpupool_lock taken in cpupool_dir_enter(). */
1229 if ( !cpumask_empty(cpupool->cpu_valid) )
1230 ret = -EBUSY;
1231 else
1232 {
1233 cpupool->gran = gran;
1234 cpupool->sched_gran = sched_gran;
1235 }
1236
1237 return ret;
1238 }
1239
1240 static const struct hypfs_funcs cpupool_gran_funcs = {
1241 .enter = hypfs_node_enter,
1242 .exit = hypfs_node_exit,
1243 .read = cpupool_gran_read,
1244 .write = cpupool_gran_write,
1245 .getsize = hypfs_gran_getsize,
1246 .findentry = hypfs_leaf_findentry,
1247 };
1248
1249 static HYPFS_VARSIZE_INIT(cpupool_gran, XEN_HYPFS_TYPE_STRING, "sched-gran",
1250 SCHED_GRAN_NAME_LEN, &cpupool_gran_funcs);
1251 static char granstr[SCHED_GRAN_NAME_LEN] = {
1252 [0 ... SCHED_GRAN_NAME_LEN - 2] = '?',
1253 [SCHED_GRAN_NAME_LEN - 1] = 0
1254 };
1255
1256 static const struct hypfs_funcs cpupool_dir_funcs = {
1257 .enter = cpupool_dir_enter,
1258 .exit = cpupool_dir_exit,
1259 .read = cpupool_dir_read,
1260 .write = hypfs_write_deny,
1261 .getsize = cpupool_dir_getsize,
1262 .findentry = cpupool_dir_findentry,
1263 };
1264
1265 static HYPFS_DIR_INIT_FUNC(cpupool_dir, "cpupool", &cpupool_dir_funcs);
1266
cpupool_hypfs_init(void)1267 static void cpupool_hypfs_init(void)
1268 {
1269 hypfs_add_dir(&hypfs_root, &cpupool_dir, true);
1270 hypfs_add_dyndir(&cpupool_dir, &cpupool_pooldir);
1271 hypfs_string_set_reference(&cpupool_gran, granstr);
1272 hypfs_add_leaf(&cpupool_pooldir, &cpupool_gran, true);
1273 }
1274
1275 #else /* CONFIG_HYPFS */
1276
cpupool_hypfs_init(void)1277 static void cpupool_hypfs_init(void)
1278 {
1279 }
1280
1281 #endif /* CONFIG_HYPFS */
1282
cpupool_create_pool(unsigned int pool_id,int sched_id)1283 struct cpupool *__init cpupool_create_pool(unsigned int pool_id, int sched_id)
1284 {
1285 struct cpupool *pool;
1286
1287 if ( sched_id < 0 )
1288 sched_id = scheduler_get_default()->sched_id;
1289
1290 pool = cpupool_create(pool_id, sched_id);
1291
1292 BUG_ON(IS_ERR(pool));
1293 cpupool_put(pool);
1294
1295 return pool;
1296 }
1297
cpupool_init(void)1298 static int __init cf_check cpupool_init(void)
1299 {
1300 unsigned int cpu;
1301
1302 cpupool_gran_init();
1303
1304 cpupool_hypfs_init();
1305
1306 register_cpu_notifier(&cpu_nfb);
1307
1308 btcpupools_dtb_parse();
1309
1310 btcpupools_allocate_pools();
1311
1312 spin_lock(&cpupool_lock);
1313
1314 cpumask_copy(&cpupool_free_cpus, &cpu_online_map);
1315
1316 for_each_cpu ( cpu, &cpupool_free_cpus )
1317 {
1318 unsigned int pool_id = btcpupools_get_cpupool_id(cpu);
1319 struct cpupool *pool = cpupool_find_by_id(pool_id);
1320
1321 ASSERT(pool);
1322 cpupool_assign_cpu_locked(pool, cpu);
1323 }
1324
1325 spin_unlock(&cpupool_lock);
1326
1327 return 0;
1328 }
1329 __initcall(cpupool_init);
1330
1331 /*
1332 * Local variables:
1333 * mode: C
1334 * c-file-style: "BSD"
1335 * c-basic-offset: 4
1336 * tab-width: 4
1337 * indent-tabs-mode: nil
1338 * End:
1339 */
1340