1 /*
2 * cpufreq.c - ACPI Processor P-States Driver ($Revision: 1.4 $)
3 *
4 * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
5 * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
6 * Copyright (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
7 * Copyright (C) 2006 Denis Sadykov <denis.m.sadykov@intel.com>
8 *
9 * Feb 2008 - Liu Jinsong <jinsong.liu@intel.com>
10 * porting acpi-cpufreq.c from Linux 2.6.23 to Xen hypervisor
11 *
12 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or (at
17 * your option) any later version.
18 *
19 * This program is distributed in the hope that it will be useful, but
20 * WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 * General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License along
25 * with this program; If not, see <http://www.gnu.org/licenses/>.
26 *
27 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
28 */
29
30 #include <xen/types.h>
31 #include <xen/errno.h>
32 #include <xen/delay.h>
33 #include <xen/cpumask.h>
34 #include <xen/sched.h>
35 #include <xen/timer.h>
36 #include <xen/xmalloc.h>
37 #include <asm/bug.h>
38 #include <asm/msr.h>
39 #include <asm/io.h>
40 #include <asm/processor.h>
41 #include <asm/percpu.h>
42 #include <asm/cpufeature.h>
43 #include <acpi/acpi.h>
44 #include <acpi/cpufreq/cpufreq.h>
45
46 enum {
47 UNDEFINED_CAPABLE = 0,
48 SYSTEM_INTEL_MSR_CAPABLE,
49 SYSTEM_IO_CAPABLE,
50 };
51
52 #define INTEL_MSR_RANGE (0xffffull)
53
54 struct acpi_cpufreq_data *cpufreq_drv_data[NR_CPUS];
55
56 static struct cpufreq_driver acpi_cpufreq_driver;
57
58 static bool __read_mostly acpi_pstate_strict;
59 boolean_param("acpi_pstate_strict", acpi_pstate_strict);
60
check_est_cpu(unsigned int cpuid)61 static int check_est_cpu(unsigned int cpuid)
62 {
63 struct cpuinfo_x86 *cpu = &cpu_data[cpuid];
64
65 if (cpu->x86_vendor != X86_VENDOR_INTEL ||
66 !cpu_has(cpu, X86_FEATURE_EIST))
67 return 0;
68
69 return 1;
70 }
71
extract_io(u32 value,struct acpi_cpufreq_data * data)72 static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
73 {
74 struct processor_performance *perf;
75 int i;
76
77 perf = data->acpi_data;
78
79 for (i=0; i<perf->state_count; i++) {
80 if (value == perf->states[i].status)
81 return data->freq_table[i].frequency;
82 }
83 return 0;
84 }
85
extract_msr(u32 msr,struct acpi_cpufreq_data * data)86 static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
87 {
88 int i;
89 struct processor_performance *perf;
90
91 msr &= INTEL_MSR_RANGE;
92 perf = data->acpi_data;
93
94 for (i=0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
95 if (msr == perf->states[data->freq_table[i].index].status)
96 return data->freq_table[i].frequency;
97 }
98 return data->freq_table[0].frequency;
99 }
100
extract_freq(u32 val,struct acpi_cpufreq_data * data)101 static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
102 {
103 switch (data->arch_cpu_flags) {
104 case SYSTEM_INTEL_MSR_CAPABLE:
105 return extract_msr(val, data);
106 case SYSTEM_IO_CAPABLE:
107 return extract_io(val, data);
108 default:
109 return 0;
110 }
111 }
112
113 struct msr_addr {
114 u32 reg;
115 };
116
117 struct io_addr {
118 u16 port;
119 u8 bit_width;
120 };
121
122 typedef union {
123 struct msr_addr msr;
124 struct io_addr io;
125 } drv_addr_union;
126
127 struct drv_cmd {
128 unsigned int type;
129 const cpumask_t *mask;
130 drv_addr_union addr;
131 u32 val;
132 };
133
do_drv_read(void * drvcmd)134 static void do_drv_read(void *drvcmd)
135 {
136 struct drv_cmd *cmd;
137
138 cmd = (struct drv_cmd *)drvcmd;
139
140 switch (cmd->type) {
141 case SYSTEM_INTEL_MSR_CAPABLE:
142 rdmsrl(cmd->addr.msr.reg, cmd->val);
143 break;
144 case SYSTEM_IO_CAPABLE:
145 acpi_os_read_port((acpi_io_address)cmd->addr.io.port,
146 &cmd->val, (u32)cmd->addr.io.bit_width);
147 break;
148 default:
149 break;
150 }
151 }
152
do_drv_write(void * drvcmd)153 static void do_drv_write(void *drvcmd)
154 {
155 struct drv_cmd *cmd;
156 uint64_t msr_content;
157
158 cmd = (struct drv_cmd *)drvcmd;
159
160 switch (cmd->type) {
161 case SYSTEM_INTEL_MSR_CAPABLE:
162 rdmsrl(cmd->addr.msr.reg, msr_content);
163 msr_content = (msr_content & ~INTEL_MSR_RANGE)
164 | (cmd->val & INTEL_MSR_RANGE);
165 wrmsrl(cmd->addr.msr.reg, msr_content);
166 break;
167 case SYSTEM_IO_CAPABLE:
168 acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
169 cmd->val, (u32)cmd->addr.io.bit_width);
170 break;
171 default:
172 break;
173 }
174 }
175
drv_read(struct drv_cmd * cmd)176 static void drv_read(struct drv_cmd *cmd)
177 {
178 cmd->val = 0;
179
180 ASSERT(cpumask_weight(cmd->mask) == 1);
181
182 /* to reduce IPI for the sake of performance */
183 if (likely(cpumask_test_cpu(smp_processor_id(), cmd->mask)))
184 do_drv_read((void *)cmd);
185 else
186 on_selected_cpus(cmd->mask, do_drv_read, cmd, 1);
187 }
188
drv_write(struct drv_cmd * cmd)189 static void drv_write(struct drv_cmd *cmd)
190 {
191 if (cpumask_equal(cmd->mask, cpumask_of(smp_processor_id())))
192 do_drv_write((void *)cmd);
193 else
194 on_selected_cpus(cmd->mask, do_drv_write, cmd, 1);
195 }
196
get_cur_val(const cpumask_t * mask)197 static u32 get_cur_val(const cpumask_t *mask)
198 {
199 struct cpufreq_policy *policy;
200 struct processor_performance *perf;
201 struct drv_cmd cmd;
202 unsigned int cpu = smp_processor_id();
203
204 if (unlikely(cpumask_empty(mask)))
205 return 0;
206
207 if (!cpumask_test_cpu(cpu, mask))
208 cpu = cpumask_first(mask);
209 if (cpu >= nr_cpu_ids || !cpu_online(cpu))
210 return 0;
211
212 policy = per_cpu(cpufreq_cpu_policy, cpu);
213 if (!policy || !cpufreq_drv_data[policy->cpu])
214 return 0;
215
216 switch (cpufreq_drv_data[policy->cpu]->arch_cpu_flags) {
217 case SYSTEM_INTEL_MSR_CAPABLE:
218 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
219 cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
220 break;
221 case SYSTEM_IO_CAPABLE:
222 cmd.type = SYSTEM_IO_CAPABLE;
223 perf = cpufreq_drv_data[policy->cpu]->acpi_data;
224 cmd.addr.io.port = perf->control_register.address;
225 cmd.addr.io.bit_width = perf->control_register.bit_width;
226 break;
227 default:
228 return 0;
229 }
230
231 cmd.mask = cpumask_of(cpu);
232
233 drv_read(&cmd);
234 return cmd.val;
235 }
236
237 struct perf_pair {
238 union {
239 struct {
240 uint32_t lo;
241 uint32_t hi;
242 } split;
243 uint64_t whole;
244 } aperf, mperf;
245 };
246 static DEFINE_PER_CPU(struct perf_pair, gov_perf_pair);
247 static DEFINE_PER_CPU(struct perf_pair, usr_perf_pair);
248
read_measured_perf_ctrs(void * _readin)249 static void read_measured_perf_ctrs(void *_readin)
250 {
251 struct perf_pair *readin = _readin;
252
253 rdmsrl(MSR_IA32_APERF, readin->aperf.whole);
254 rdmsrl(MSR_IA32_MPERF, readin->mperf.whole);
255 }
256
257 /*
258 * Return the measured active (C0) frequency on this CPU since last call
259 * to this function.
260 * Input: cpu number
261 * Return: Average CPU frequency in terms of max frequency (zero on error)
262 *
263 * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
264 * over a period of time, while CPU is in C0 state.
265 * IA32_MPERF counts at the rate of max advertised frequency
266 * IA32_APERF counts at the rate of actual CPU frequency
267 * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
268 * no meaning should be associated with absolute values of these MSRs.
269 */
get_measured_perf(unsigned int cpu,unsigned int flag)270 unsigned int get_measured_perf(unsigned int cpu, unsigned int flag)
271 {
272 struct cpufreq_policy *policy;
273 struct perf_pair readin, cur, *saved;
274 unsigned int perf_percent;
275 unsigned int retval;
276
277 if (!cpu_online(cpu))
278 return 0;
279
280 policy = per_cpu(cpufreq_cpu_policy, cpu);
281 if (!policy || !policy->aperf_mperf)
282 return 0;
283
284 switch (flag)
285 {
286 case GOV_GETAVG:
287 {
288 saved = &per_cpu(gov_perf_pair, cpu);
289 break;
290 }
291 case USR_GETAVG:
292 {
293 saved = &per_cpu(usr_perf_pair, cpu);
294 break;
295 }
296 default:
297 return 0;
298 }
299
300 if (cpu == smp_processor_id()) {
301 read_measured_perf_ctrs((void *)&readin);
302 } else {
303 on_selected_cpus(cpumask_of(cpu), read_measured_perf_ctrs,
304 &readin, 1);
305 }
306
307 cur.aperf.whole = readin.aperf.whole - saved->aperf.whole;
308 cur.mperf.whole = readin.mperf.whole - saved->mperf.whole;
309 saved->aperf.whole = readin.aperf.whole;
310 saved->mperf.whole = readin.mperf.whole;
311
312 if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
313 int shift_count = 7;
314 cur.aperf.whole >>= shift_count;
315 cur.mperf.whole >>= shift_count;
316 }
317
318 if (cur.aperf.whole && cur.mperf.whole)
319 perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
320 else
321 perf_percent = 0;
322
323 retval = policy->cpuinfo.max_freq * perf_percent / 100;
324
325 return retval;
326 }
327
get_cur_freq_on_cpu(unsigned int cpu)328 static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
329 {
330 struct cpufreq_policy *policy;
331 struct acpi_cpufreq_data *data;
332 unsigned int freq;
333
334 if (!cpu_online(cpu))
335 return 0;
336
337 policy = per_cpu(cpufreq_cpu_policy, cpu);
338 if (!policy)
339 return 0;
340
341 data = cpufreq_drv_data[policy->cpu];
342 if (unlikely(data == NULL ||
343 data->acpi_data == NULL || data->freq_table == NULL))
344 return 0;
345
346 freq = extract_freq(get_cur_val(cpumask_of(cpu)), data);
347 return freq;
348 }
349
feature_detect(void * info)350 static void feature_detect(void *info)
351 {
352 struct cpufreq_policy *policy = info;
353 unsigned int eax;
354
355 if ( cpu_has_aperfmperf )
356 {
357 policy->aperf_mperf = 1;
358 acpi_cpufreq_driver.getavg = get_measured_perf;
359 }
360
361 eax = cpuid_eax(6);
362 if (eax & 0x2) {
363 policy->turbo = CPUFREQ_TURBO_ENABLED;
364 if (cpufreq_verbose)
365 printk(XENLOG_INFO "CPU%u: Turbo Mode detected and enabled\n",
366 smp_processor_id());
367 }
368 }
369
check_freqs(const cpumask_t * mask,unsigned int freq,struct acpi_cpufreq_data * data)370 static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq,
371 struct acpi_cpufreq_data *data)
372 {
373 unsigned int cur_freq;
374 unsigned int i;
375
376 for (i=0; i<100; i++) {
377 cur_freq = extract_freq(get_cur_val(mask), data);
378 if (cur_freq == freq)
379 return 1;
380 udelay(10);
381 }
382 return 0;
383 }
384
acpi_cpufreq_target(struct cpufreq_policy * policy,unsigned int target_freq,unsigned int relation)385 static int acpi_cpufreq_target(struct cpufreq_policy *policy,
386 unsigned int target_freq, unsigned int relation)
387 {
388 struct acpi_cpufreq_data *data = cpufreq_drv_data[policy->cpu];
389 struct processor_performance *perf;
390 struct cpufreq_freqs freqs;
391 cpumask_t online_policy_cpus;
392 struct drv_cmd cmd;
393 unsigned int next_state = 0; /* Index into freq_table */
394 unsigned int next_perf_state = 0; /* Index into perf table */
395 unsigned int j;
396 int result = 0;
397
398 if (unlikely(data == NULL ||
399 data->acpi_data == NULL || data->freq_table == NULL)) {
400 return -ENODEV;
401 }
402
403 if (policy->turbo == CPUFREQ_TURBO_DISABLED)
404 if (target_freq > policy->cpuinfo.second_max_freq)
405 target_freq = policy->cpuinfo.second_max_freq;
406
407 perf = data->acpi_data;
408 result = cpufreq_frequency_table_target(policy,
409 data->freq_table,
410 target_freq,
411 relation, &next_state);
412 if (unlikely(result))
413 return -ENODEV;
414
415 cpumask_and(&online_policy_cpus, &cpu_online_map, policy->cpus);
416
417 next_perf_state = data->freq_table[next_state].index;
418 if (perf->state == next_perf_state) {
419 if (unlikely(policy->resume))
420 policy->resume = 0;
421 else
422 return 0;
423 }
424
425 switch (data->arch_cpu_flags) {
426 case SYSTEM_INTEL_MSR_CAPABLE:
427 cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
428 cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
429 cmd.val = (u32) perf->states[next_perf_state].control;
430 break;
431 case SYSTEM_IO_CAPABLE:
432 cmd.type = SYSTEM_IO_CAPABLE;
433 cmd.addr.io.port = perf->control_register.address;
434 cmd.addr.io.bit_width = perf->control_register.bit_width;
435 cmd.val = (u32) perf->states[next_perf_state].control;
436 break;
437 default:
438 return -ENODEV;
439 }
440
441 if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
442 cmd.mask = &online_policy_cpus;
443 else
444 cmd.mask = cpumask_of(policy->cpu);
445
446 freqs.old = perf->states[perf->state].core_frequency * 1000;
447 freqs.new = data->freq_table[next_state].frequency;
448
449 drv_write(&cmd);
450
451 if (acpi_pstate_strict && !check_freqs(cmd.mask, freqs.new, data)) {
452 printk(KERN_WARNING "Fail transfer to new freq %d\n", freqs.new);
453 return -EAGAIN;
454 }
455
456 for_each_cpu(j, &online_policy_cpus)
457 cpufreq_statistic_update(j, perf->state, next_perf_state);
458
459 perf->state = next_perf_state;
460 policy->cur = freqs.new;
461
462 return result;
463 }
464
acpi_cpufreq_verify(struct cpufreq_policy * policy)465 static int acpi_cpufreq_verify(struct cpufreq_policy *policy)
466 {
467 struct acpi_cpufreq_data *data;
468 struct processor_performance *perf;
469
470 if (!policy || !(data = cpufreq_drv_data[policy->cpu]) ||
471 !processor_pminfo[policy->cpu])
472 return -EINVAL;
473
474 perf = &processor_pminfo[policy->cpu]->perf;
475
476 cpufreq_verify_within_limits(policy, 0,
477 perf->states[perf->platform_limit].core_frequency * 1000);
478
479 return cpufreq_frequency_table_verify(policy, data->freq_table);
480 }
481
482 static unsigned long
acpi_cpufreq_guess_freq(struct acpi_cpufreq_data * data,unsigned int cpu)483 acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
484 {
485 struct processor_performance *perf = data->acpi_data;
486
487 if (cpu_khz) {
488 /* search the closest match to cpu_khz */
489 unsigned int i;
490 unsigned long freq;
491 unsigned long freqn = perf->states[0].core_frequency * 1000;
492
493 for (i=0; i<(perf->state_count-1); i++) {
494 freq = freqn;
495 freqn = perf->states[i+1].core_frequency * 1000;
496 if ((2 * cpu_khz) > (freqn + freq)) {
497 perf->state = i;
498 return freq;
499 }
500 }
501 perf->state = perf->state_count-1;
502 return freqn;
503 } else {
504 /* assume CPU is at P0... */
505 perf->state = 0;
506 return perf->states[0].core_frequency * 1000;
507 }
508 }
509
510 static int
acpi_cpufreq_cpu_init(struct cpufreq_policy * policy)511 acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
512 {
513 unsigned int i;
514 unsigned int valid_states = 0;
515 unsigned int cpu = policy->cpu;
516 struct acpi_cpufreq_data *data;
517 unsigned int result = 0;
518 struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
519 struct processor_performance *perf;
520
521 data = xzalloc(struct acpi_cpufreq_data);
522 if (!data)
523 return -ENOMEM;
524
525 cpufreq_drv_data[cpu] = data;
526
527 data->acpi_data = &processor_pminfo[cpu]->perf;
528
529 perf = data->acpi_data;
530 policy->shared_type = perf->shared_type;
531
532 switch (perf->control_register.space_id) {
533 case ACPI_ADR_SPACE_SYSTEM_IO:
534 if (cpufreq_verbose)
535 printk("xen_pminfo: @acpi_cpufreq_cpu_init,"
536 "SYSTEM IO addr space\n");
537 data->arch_cpu_flags = SYSTEM_IO_CAPABLE;
538 break;
539 case ACPI_ADR_SPACE_FIXED_HARDWARE:
540 if (cpufreq_verbose)
541 printk("xen_pminfo: @acpi_cpufreq_cpu_init,"
542 "HARDWARE addr space\n");
543 if (!check_est_cpu(cpu)) {
544 result = -ENODEV;
545 goto err_unreg;
546 }
547 data->arch_cpu_flags = SYSTEM_INTEL_MSR_CAPABLE;
548 break;
549 default:
550 result = -ENODEV;
551 goto err_unreg;
552 }
553
554 data->freq_table = xmalloc_array(struct cpufreq_frequency_table,
555 (perf->state_count+1));
556 if (!data->freq_table) {
557 result = -ENOMEM;
558 goto err_unreg;
559 }
560
561 /* detect transition latency */
562 policy->cpuinfo.transition_latency = 0;
563 for (i=0; i<perf->state_count; i++) {
564 if ((perf->states[i].transition_latency * 1000) >
565 policy->cpuinfo.transition_latency)
566 policy->cpuinfo.transition_latency =
567 perf->states[i].transition_latency * 1000;
568 }
569
570 policy->governor = cpufreq_opt_governor ? : CPUFREQ_DEFAULT_GOVERNOR;
571
572 /* table init */
573 for (i=0; i<perf->state_count; i++) {
574 if (i>0 && perf->states[i].core_frequency >=
575 data->freq_table[valid_states-1].frequency / 1000)
576 continue;
577
578 data->freq_table[valid_states].index = i;
579 data->freq_table[valid_states].frequency =
580 perf->states[i].core_frequency * 1000;
581 valid_states++;
582 }
583 data->freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
584 perf->state = 0;
585
586 result = cpufreq_frequency_table_cpuinfo(policy, data->freq_table);
587 if (result)
588 goto err_freqfree;
589
590 switch (perf->control_register.space_id) {
591 case ACPI_ADR_SPACE_SYSTEM_IO:
592 /* Current speed is unknown and not detectable by IO port */
593 policy->cur = acpi_cpufreq_guess_freq(data, policy->cpu);
594 break;
595 case ACPI_ADR_SPACE_FIXED_HARDWARE:
596 acpi_cpufreq_driver.get = get_cur_freq_on_cpu;
597 policy->cur = get_cur_freq_on_cpu(cpu);
598 break;
599 default:
600 break;
601 }
602
603 /* Check for APERF/MPERF support in hardware
604 * also check for boost support */
605 if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6)
606 on_selected_cpus(cpumask_of(cpu), feature_detect, policy, 1);
607
608 /*
609 * the first call to ->target() should result in us actually
610 * writing something to the appropriate registers.
611 */
612 policy->resume = 1;
613
614 return result;
615
616 err_freqfree:
617 xfree(data->freq_table);
618 err_unreg:
619 xfree(data);
620 cpufreq_drv_data[cpu] = NULL;
621
622 return result;
623 }
624
acpi_cpufreq_cpu_exit(struct cpufreq_policy * policy)625 static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
626 {
627 struct acpi_cpufreq_data *data = cpufreq_drv_data[policy->cpu];
628
629 if (data) {
630 cpufreq_drv_data[policy->cpu] = NULL;
631 xfree(data->freq_table);
632 xfree(data);
633 }
634
635 return 0;
636 }
637
638 static struct cpufreq_driver acpi_cpufreq_driver = {
639 .name = "acpi-cpufreq",
640 .verify = acpi_cpufreq_verify,
641 .target = acpi_cpufreq_target,
642 .init = acpi_cpufreq_cpu_init,
643 .exit = acpi_cpufreq_cpu_exit,
644 };
645
cpufreq_driver_init(void)646 static int __init cpufreq_driver_init(void)
647 {
648 int ret = 0;
649
650 if ((cpufreq_controller == FREQCTL_xen) &&
651 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
652 ret = cpufreq_register_driver(&acpi_cpufreq_driver);
653 else if ((cpufreq_controller == FREQCTL_xen) &&
654 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
655 ret = powernow_register_driver();
656
657 return ret;
658 }
659 __initcall(cpufreq_driver_init);
660
cpufreq_cpu_init(unsigned int cpuid)661 int cpufreq_cpu_init(unsigned int cpuid)
662 {
663 int ret;
664
665 /* Currently we only handle Intel and AMD processor */
666 if ( (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) ||
667 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) )
668 ret = cpufreq_add_cpu(cpuid);
669 else
670 ret = -EFAULT;
671 return ret;
672 }
673