1 /*
2 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
3 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
4 * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
5 * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@intel.com>
6 *
7 * Feb 2008 - Liu Jinsong <jinsong.liu@intel.com>
8 * Add cpufreq limit change handle and per-cpu cpufreq add/del
9 * to cope with cpu hotplug
10 *
11 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or (at
16 * your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful, but
19 * WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License along
24 * with this program; If not, see <http://www.gnu.org/licenses/>.
25 *
26 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27 */
28
29 #include <xen/types.h>
30 #include <xen/errno.h>
31 #include <xen/delay.h>
32 #include <xen/cpumask.h>
33 #include <xen/list.h>
34 #include <xen/sched.h>
35 #include <xen/string.h>
36 #include <xen/timer.h>
37 #include <xen/xmalloc.h>
38 #include <xen/guest_access.h>
39 #include <xen/domain.h>
40 #include <xen/cpu.h>
41 #include <asm/bug.h>
42 #include <asm/io.h>
43 #include <asm/processor.h>
44 #include <asm/percpu.h>
45 #include <acpi/acpi.h>
46 #include <acpi/cpufreq/cpufreq.h>
47
48 static unsigned int __read_mostly usr_min_freq;
49 static unsigned int __read_mostly usr_max_freq;
50 static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy);
51
52 struct cpufreq_dom {
53 unsigned int dom;
54 cpumask_var_t map;
55 struct list_head node;
56 };
57 static LIST_HEAD_READ_MOSTLY(cpufreq_dom_list_head);
58
59 struct cpufreq_governor *__read_mostly cpufreq_opt_governor;
60 LIST_HEAD_READ_MOSTLY(cpufreq_governor_list);
61
62 /* set xen as default cpufreq */
63 enum cpufreq_controller cpufreq_controller = FREQCTL_xen;
64
65 static int __init cpufreq_cmdline_parse(const char *s);
66
setup_cpufreq_option(const char * str)67 static int __init setup_cpufreq_option(const char *str)
68 {
69 const char *arg = strpbrk(str, ",:");
70 int choice;
71
72 if ( !arg )
73 arg = strchr(str, '\0');
74 choice = parse_bool(str, arg);
75
76 if ( choice < 0 && !strncmp(str, "dom0-kernel", arg - str) )
77 {
78 xen_processor_pmbits &= ~XEN_PROCESSOR_PM_PX;
79 cpufreq_controller = FREQCTL_dom0_kernel;
80 opt_dom0_vcpus_pin = 1;
81 return 0;
82 }
83
84 if ( choice == 0 || !strncmp(str, "none", arg - str) )
85 {
86 xen_processor_pmbits &= ~XEN_PROCESSOR_PM_PX;
87 cpufreq_controller = FREQCTL_none;
88 return 0;
89 }
90
91 if ( choice > 0 || !strncmp(str, "xen", arg - str) )
92 {
93 xen_processor_pmbits |= XEN_PROCESSOR_PM_PX;
94 cpufreq_controller = FREQCTL_xen;
95 if ( *arg && *(arg + 1) )
96 return cpufreq_cmdline_parse(arg + 1);
97 }
98
99 return (choice < 0) ? -EINVAL : 0;
100 }
101 custom_param("cpufreq", setup_cpufreq_option);
102
103 bool_t __read_mostly cpufreq_verbose;
104
__find_governor(const char * governor)105 struct cpufreq_governor *__find_governor(const char *governor)
106 {
107 struct cpufreq_governor *t;
108
109 if (!governor)
110 return NULL;
111
112 list_for_each_entry(t, &cpufreq_governor_list, governor_list)
113 if (!strnicmp(governor, t->name, CPUFREQ_NAME_LEN))
114 return t;
115
116 return NULL;
117 }
118
cpufreq_register_governor(struct cpufreq_governor * governor)119 int __init cpufreq_register_governor(struct cpufreq_governor *governor)
120 {
121 if (!governor)
122 return -EINVAL;
123
124 if (__find_governor(governor->name) != NULL)
125 return -EEXIST;
126
127 list_add(&governor->governor_list, &cpufreq_governor_list);
128 return 0;
129 }
130
cpufreq_limit_change(unsigned int cpu)131 int cpufreq_limit_change(unsigned int cpu)
132 {
133 struct processor_performance *perf;
134 struct cpufreq_policy *data;
135 struct cpufreq_policy policy;
136
137 if (!cpu_online(cpu) || !(data = per_cpu(cpufreq_cpu_policy, cpu)) ||
138 !processor_pminfo[cpu])
139 return -ENODEV;
140
141 perf = &processor_pminfo[cpu]->perf;
142
143 if (perf->platform_limit >= perf->state_count)
144 return -EINVAL;
145
146 memcpy(&policy, data, sizeof(struct cpufreq_policy));
147
148 policy.max =
149 perf->states[perf->platform_limit].core_frequency * 1000;
150
151 return __cpufreq_set_policy(data, &policy);
152 }
153
cpufreq_add_cpu(unsigned int cpu)154 int cpufreq_add_cpu(unsigned int cpu)
155 {
156 int ret = 0;
157 unsigned int firstcpu;
158 unsigned int dom, domexist = 0;
159 unsigned int hw_all = 0;
160 struct list_head *pos;
161 struct cpufreq_dom *cpufreq_dom = NULL;
162 struct cpufreq_policy new_policy;
163 struct cpufreq_policy *policy;
164 struct processor_performance *perf;
165
166 /* to protect the case when Px was not controlled by xen */
167 if ( !processor_pminfo[cpu] || !cpu_online(cpu) )
168 return -EINVAL;
169
170 perf = &processor_pminfo[cpu]->perf;
171
172 if ( !(perf->init & XEN_PX_INIT) )
173 return -EINVAL;
174
175 if (!cpufreq_driver)
176 return 0;
177
178 if (per_cpu(cpufreq_cpu_policy, cpu))
179 return 0;
180
181 if (perf->shared_type == CPUFREQ_SHARED_TYPE_HW)
182 hw_all = 1;
183
184 dom = perf->domain_info.domain;
185
186 list_for_each(pos, &cpufreq_dom_list_head) {
187 cpufreq_dom = list_entry(pos, struct cpufreq_dom, node);
188 if (dom == cpufreq_dom->dom) {
189 domexist = 1;
190 break;
191 }
192 }
193
194 if (!domexist) {
195 cpufreq_dom = xzalloc(struct cpufreq_dom);
196 if (!cpufreq_dom)
197 return -ENOMEM;
198
199 if (!zalloc_cpumask_var(&cpufreq_dom->map)) {
200 xfree(cpufreq_dom);
201 return -ENOMEM;
202 }
203
204 cpufreq_dom->dom = dom;
205 list_add(&cpufreq_dom->node, &cpufreq_dom_list_head);
206 } else {
207 /* domain sanity check under whatever coordination type */
208 firstcpu = cpumask_first(cpufreq_dom->map);
209 if ((perf->domain_info.coord_type !=
210 processor_pminfo[firstcpu]->perf.domain_info.coord_type) ||
211 (perf->domain_info.num_processors !=
212 processor_pminfo[firstcpu]->perf.domain_info.num_processors)) {
213
214 printk(KERN_WARNING "cpufreq fail to add CPU%d:"
215 "incorrect _PSD(%"PRIu64":%"PRIu64"), "
216 "expect(%"PRIu64"/%"PRIu64")\n",
217 cpu, perf->domain_info.coord_type,
218 perf->domain_info.num_processors,
219 processor_pminfo[firstcpu]->perf.domain_info.coord_type,
220 processor_pminfo[firstcpu]->perf.domain_info.num_processors
221 );
222 return -EINVAL;
223 }
224 }
225
226 if (!domexist || hw_all) {
227 policy = xzalloc(struct cpufreq_policy);
228 if (!policy) {
229 ret = -ENOMEM;
230 goto err0;
231 }
232
233 if (!zalloc_cpumask_var(&policy->cpus)) {
234 xfree(policy);
235 ret = -ENOMEM;
236 goto err0;
237 }
238
239 policy->cpu = cpu;
240 per_cpu(cpufreq_cpu_policy, cpu) = policy;
241
242 ret = cpufreq_driver->init(policy);
243 if (ret) {
244 free_cpumask_var(policy->cpus);
245 xfree(policy);
246 per_cpu(cpufreq_cpu_policy, cpu) = NULL;
247 goto err0;
248 }
249 if (cpufreq_verbose)
250 printk("CPU %u initialization completed\n", cpu);
251 } else {
252 firstcpu = cpumask_first(cpufreq_dom->map);
253 policy = per_cpu(cpufreq_cpu_policy, firstcpu);
254
255 per_cpu(cpufreq_cpu_policy, cpu) = policy;
256 if (cpufreq_verbose)
257 printk("adding CPU %u\n", cpu);
258 }
259
260 cpumask_set_cpu(cpu, policy->cpus);
261 cpumask_set_cpu(cpu, cpufreq_dom->map);
262
263 ret = cpufreq_statistic_init(cpu);
264 if (ret)
265 goto err1;
266
267 if (hw_all || (cpumask_weight(cpufreq_dom->map) ==
268 perf->domain_info.num_processors)) {
269 memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
270 policy->governor = NULL;
271
272 cpufreq_cmdline_common_para(&new_policy);
273
274 ret = __cpufreq_set_policy(policy, &new_policy);
275 if (ret) {
276 if (new_policy.governor == CPUFREQ_DEFAULT_GOVERNOR)
277 /* if default governor fail, cpufreq really meet troubles */
278 goto err2;
279 else {
280 /* grub option governor fail */
281 /* give one more chance to default gov */
282 memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
283 new_policy.governor = CPUFREQ_DEFAULT_GOVERNOR;
284 ret = __cpufreq_set_policy(policy, &new_policy);
285 if (ret)
286 goto err2;
287 }
288 }
289 }
290
291 return 0;
292
293 err2:
294 cpufreq_statistic_exit(cpu);
295 err1:
296 per_cpu(cpufreq_cpu_policy, cpu) = NULL;
297 cpumask_clear_cpu(cpu, policy->cpus);
298 cpumask_clear_cpu(cpu, cpufreq_dom->map);
299
300 if (cpumask_empty(policy->cpus)) {
301 cpufreq_driver->exit(policy);
302 free_cpumask_var(policy->cpus);
303 xfree(policy);
304 }
305 err0:
306 if (cpumask_empty(cpufreq_dom->map)) {
307 list_del(&cpufreq_dom->node);
308 free_cpumask_var(cpufreq_dom->map);
309 xfree(cpufreq_dom);
310 }
311
312 return ret;
313 }
314
cpufreq_del_cpu(unsigned int cpu)315 int cpufreq_del_cpu(unsigned int cpu)
316 {
317 unsigned int dom, domexist = 0;
318 unsigned int hw_all = 0;
319 struct list_head *pos;
320 struct cpufreq_dom *cpufreq_dom = NULL;
321 struct cpufreq_policy *policy;
322 struct processor_performance *perf;
323
324 /* to protect the case when Px was not controlled by xen */
325 if ( !processor_pminfo[cpu] || !cpu_online(cpu) )
326 return -EINVAL;
327
328 perf = &processor_pminfo[cpu]->perf;
329
330 if ( !(perf->init & XEN_PX_INIT) )
331 return -EINVAL;
332
333 if (!per_cpu(cpufreq_cpu_policy, cpu))
334 return 0;
335
336 if (perf->shared_type == CPUFREQ_SHARED_TYPE_HW)
337 hw_all = 1;
338
339 dom = perf->domain_info.domain;
340 policy = per_cpu(cpufreq_cpu_policy, cpu);
341
342 list_for_each(pos, &cpufreq_dom_list_head) {
343 cpufreq_dom = list_entry(pos, struct cpufreq_dom, node);
344 if (dom == cpufreq_dom->dom) {
345 domexist = 1;
346 break;
347 }
348 }
349
350 if (!domexist)
351 return -EINVAL;
352
353 /* for HW_ALL, stop gov for each core of the _PSD domain */
354 /* for SW_ALL & SW_ANY, stop gov for the 1st core of the _PSD domain */
355 if (hw_all || (cpumask_weight(cpufreq_dom->map) ==
356 perf->domain_info.num_processors))
357 __cpufreq_governor(policy, CPUFREQ_GOV_STOP);
358
359 cpufreq_statistic_exit(cpu);
360 per_cpu(cpufreq_cpu_policy, cpu) = NULL;
361 cpumask_clear_cpu(cpu, policy->cpus);
362 cpumask_clear_cpu(cpu, cpufreq_dom->map);
363
364 if (cpumask_empty(policy->cpus)) {
365 cpufreq_driver->exit(policy);
366 free_cpumask_var(policy->cpus);
367 xfree(policy);
368 }
369
370 /* for the last cpu of the domain, clean room */
371 /* It's safe here to free freq_table, drv_data and policy */
372 if (cpumask_empty(cpufreq_dom->map)) {
373 list_del(&cpufreq_dom->node);
374 free_cpumask_var(cpufreq_dom->map);
375 xfree(cpufreq_dom);
376 }
377
378 if (cpufreq_verbose)
379 printk("deleting CPU %u\n", cpu);
380 return 0;
381 }
382
print_PCT(struct xen_pct_register * ptr)383 static void print_PCT(struct xen_pct_register *ptr)
384 {
385 printk("\t_PCT: descriptor=%d, length=%d, space_id=%d, "
386 "bit_width=%d, bit_offset=%d, reserved=%d, address=%"PRId64"\n",
387 ptr->descriptor, ptr->length, ptr->space_id, ptr->bit_width,
388 ptr->bit_offset, ptr->reserved, ptr->address);
389 }
390
print_PSS(struct xen_processor_px * ptr,int count)391 static void print_PSS(struct xen_processor_px *ptr, int count)
392 {
393 int i;
394 printk("\t_PSS: state_count=%d\n", count);
395 for (i=0; i<count; i++){
396 printk("\tState%d: %"PRId64"MHz %"PRId64"mW %"PRId64"us "
397 "%"PRId64"us %#"PRIx64" %#"PRIx64"\n",
398 i,
399 ptr[i].core_frequency,
400 ptr[i].power,
401 ptr[i].transition_latency,
402 ptr[i].bus_master_latency,
403 ptr[i].control,
404 ptr[i].status);
405 }
406 }
407
print_PSD(struct xen_psd_package * ptr)408 static void print_PSD( struct xen_psd_package *ptr)
409 {
410 printk("\t_PSD: num_entries=%"PRId64" rev=%"PRId64
411 " domain=%"PRId64" coord_type=%"PRId64" num_processors=%"PRId64"\n",
412 ptr->num_entries, ptr->revision, ptr->domain, ptr->coord_type,
413 ptr->num_processors);
414 }
415
print_PPC(unsigned int platform_limit)416 static void print_PPC(unsigned int platform_limit)
417 {
418 printk("\t_PPC: %d\n", platform_limit);
419 }
420
set_px_pminfo(uint32_t acpi_id,struct xen_processor_performance * dom0_px_info)421 int set_px_pminfo(uint32_t acpi_id, struct xen_processor_performance *dom0_px_info)
422 {
423 int ret=0, cpuid;
424 struct processor_pminfo *pmpt;
425 struct processor_performance *pxpt;
426
427 cpuid = get_cpu_id(acpi_id);
428 if ( cpuid < 0 || !dom0_px_info)
429 {
430 ret = -EINVAL;
431 goto out;
432 }
433 if ( cpufreq_verbose )
434 printk("Set CPU acpi_id(%d) cpuid(%d) Px State info:\n",
435 acpi_id, cpuid);
436
437 pmpt = processor_pminfo[cpuid];
438 if ( !pmpt )
439 {
440 pmpt = xzalloc(struct processor_pminfo);
441 if ( !pmpt )
442 {
443 ret = -ENOMEM;
444 goto out;
445 }
446 processor_pminfo[cpuid] = pmpt;
447 }
448 pxpt = &pmpt->perf;
449 pmpt->acpi_id = acpi_id;
450 pmpt->id = cpuid;
451
452 if ( dom0_px_info->flags & XEN_PX_PCT )
453 {
454 /* space_id check */
455 if (dom0_px_info->control_register.space_id !=
456 dom0_px_info->status_register.space_id)
457 {
458 ret = -EINVAL;
459 goto out;
460 }
461
462 memcpy ((void *)&pxpt->control_register,
463 (void *)&dom0_px_info->control_register,
464 sizeof(struct xen_pct_register));
465 memcpy ((void *)&pxpt->status_register,
466 (void *)&dom0_px_info->status_register,
467 sizeof(struct xen_pct_register));
468
469 if ( cpufreq_verbose )
470 {
471 print_PCT(&pxpt->control_register);
472 print_PCT(&pxpt->status_register);
473 }
474 }
475
476 if ( dom0_px_info->flags & XEN_PX_PSS )
477 {
478 /* capability check */
479 if (dom0_px_info->state_count <= 1)
480 {
481 ret = -EINVAL;
482 goto out;
483 }
484
485 if ( !(pxpt->states = xmalloc_array(struct xen_processor_px,
486 dom0_px_info->state_count)) )
487 {
488 ret = -ENOMEM;
489 goto out;
490 }
491 if ( copy_from_guest(pxpt->states, dom0_px_info->states,
492 dom0_px_info->state_count) )
493 {
494 ret = -EFAULT;
495 goto out;
496 }
497 pxpt->state_count = dom0_px_info->state_count;
498
499 if ( cpufreq_verbose )
500 print_PSS(pxpt->states,pxpt->state_count);
501 }
502
503 if ( dom0_px_info->flags & XEN_PX_PSD )
504 {
505 /* check domain coordination */
506 if (dom0_px_info->shared_type != CPUFREQ_SHARED_TYPE_ALL &&
507 dom0_px_info->shared_type != CPUFREQ_SHARED_TYPE_ANY &&
508 dom0_px_info->shared_type != CPUFREQ_SHARED_TYPE_HW)
509 {
510 ret = -EINVAL;
511 goto out;
512 }
513
514 pxpt->shared_type = dom0_px_info->shared_type;
515 memcpy ((void *)&pxpt->domain_info,
516 (void *)&dom0_px_info->domain_info,
517 sizeof(struct xen_psd_package));
518
519 if ( cpufreq_verbose )
520 print_PSD(&pxpt->domain_info);
521 }
522
523 if ( dom0_px_info->flags & XEN_PX_PPC )
524 {
525 pxpt->platform_limit = dom0_px_info->platform_limit;
526
527 if ( cpufreq_verbose )
528 print_PPC(pxpt->platform_limit);
529
530 if ( pxpt->init == XEN_PX_INIT )
531 {
532 ret = cpufreq_limit_change(cpuid);
533 goto out;
534 }
535 }
536
537 if ( dom0_px_info->flags == ( XEN_PX_PCT | XEN_PX_PSS |
538 XEN_PX_PSD | XEN_PX_PPC ) )
539 {
540 pxpt->init = XEN_PX_INIT;
541
542 ret = cpufreq_cpu_init(cpuid);
543 goto out;
544 }
545
546 out:
547 return ret;
548 }
549
cpufreq_cmdline_common_para(struct cpufreq_policy * new_policy)550 static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy)
551 {
552 if (usr_max_freq)
553 new_policy->max = usr_max_freq;
554 if (usr_min_freq)
555 new_policy->min = usr_min_freq;
556 }
557
cpufreq_handle_common_option(const char * name,const char * val)558 static int __init cpufreq_handle_common_option(const char *name, const char *val)
559 {
560 if (!strcmp(name, "maxfreq") && val) {
561 usr_max_freq = simple_strtoul(val, NULL, 0);
562 return 1;
563 }
564
565 if (!strcmp(name, "minfreq") && val) {
566 usr_min_freq = simple_strtoul(val, NULL, 0);
567 return 1;
568 }
569
570 if (!strcmp(name, "verbose")) {
571 cpufreq_verbose = !val || !!simple_strtoul(val, NULL, 0);
572 return 1;
573 }
574
575 return 0;
576 }
577
cpufreq_cmdline_parse(const char * s)578 static int __init cpufreq_cmdline_parse(const char *s)
579 {
580 static struct cpufreq_governor *__initdata cpufreq_governors[] =
581 {
582 CPUFREQ_DEFAULT_GOVERNOR,
583 &cpufreq_gov_userspace,
584 &cpufreq_gov_dbs,
585 &cpufreq_gov_performance,
586 &cpufreq_gov_powersave
587 };
588 static char __initdata buf[128];
589 char *str = buf;
590 unsigned int gov_index = 0;
591 int rc = 0;
592
593 strlcpy(buf, s, sizeof(buf));
594 do {
595 char *val, *end = strchr(str, ',');
596 unsigned int i;
597
598 if (end)
599 *end++ = '\0';
600 val = strchr(str, '=');
601 if (val)
602 *val++ = '\0';
603
604 if (!cpufreq_opt_governor) {
605 if (!val) {
606 for (i = 0; i < ARRAY_SIZE(cpufreq_governors); ++i) {
607 if (!strcmp(str, cpufreq_governors[i]->name)) {
608 cpufreq_opt_governor = cpufreq_governors[i];
609 gov_index = i;
610 str = NULL;
611 break;
612 }
613 }
614 } else {
615 cpufreq_opt_governor = CPUFREQ_DEFAULT_GOVERNOR;
616 }
617 }
618
619 if (str && !cpufreq_handle_common_option(str, val) &&
620 (!cpufreq_governors[gov_index]->handle_option ||
621 !cpufreq_governors[gov_index]->handle_option(str, val)))
622 {
623 printk(XENLOG_WARNING "cpufreq/%s: option '%s' not recognized\n",
624 cpufreq_governors[gov_index]->name, str);
625 rc = -EINVAL;
626 }
627
628 str = end;
629 } while (str);
630
631 return rc;
632 }
633
cpu_callback(struct notifier_block * nfb,unsigned long action,void * hcpu)634 static int cpu_callback(
635 struct notifier_block *nfb, unsigned long action, void *hcpu)
636 {
637 unsigned int cpu = (unsigned long)hcpu;
638
639 switch ( action )
640 {
641 case CPU_DOWN_FAILED:
642 case CPU_ONLINE:
643 (void)cpufreq_add_cpu(cpu);
644 break;
645 case CPU_DOWN_PREPARE:
646 (void)cpufreq_del_cpu(cpu);
647 break;
648 default:
649 break;
650 }
651
652 return NOTIFY_DONE;
653 }
654
655 static struct notifier_block cpu_nfb = {
656 .notifier_call = cpu_callback
657 };
658
cpufreq_presmp_init(void)659 static int __init cpufreq_presmp_init(void)
660 {
661 register_cpu_notifier(&cpu_nfb);
662 return 0;
663 }
664 presmp_initcall(cpufreq_presmp_init);
665
cpufreq_register_driver(struct cpufreq_driver * driver_data)666 int __init cpufreq_register_driver(struct cpufreq_driver *driver_data)
667 {
668 if ( !driver_data || !driver_data->init ||
669 !driver_data->verify || !driver_data->exit ||
670 (!driver_data->target == !driver_data->setpolicy) )
671 return -EINVAL;
672
673 if ( cpufreq_driver )
674 return -EBUSY;
675
676 cpufreq_driver = driver_data;
677
678 return 0;
679 }
680