1 /*
2 * xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
3 *
4 * Copyright (C) 2001 Russell King
5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
6 * Jun Nakajima <jun.nakajima@intel.com>
7 * Feb 2008 Liu Jinsong <jinsong.liu@intel.com>
8 * Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14
15 #include <xen/types.h>
16 #include <xen/percpu.h>
17 #include <xen/cpumask.h>
18 #include <xen/types.h>
19 #include <xen/sched.h>
20 #include <xen/timer.h>
21 #include <acpi/cpufreq/cpufreq.h>
22
23 #define DEF_FREQUENCY_UP_THRESHOLD (80)
24 #define MIN_FREQUENCY_UP_THRESHOLD (11)
25 #define MAX_FREQUENCY_UP_THRESHOLD (100)
26
27 #define MIN_DBS_INTERVAL (MICROSECS(100))
28 #define MIN_SAMPLING_RATE_RATIO (2)
29 #define MIN_SAMPLING_MILLISECS (MIN_SAMPLING_RATE_RATIO * 10)
30 #define MIN_STAT_SAMPLING_RATE \
31 (MIN_SAMPLING_MILLISECS * MILLISECS(1))
32 #define MIN_SAMPLING_RATE \
33 (def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
34 #define MAX_SAMPLING_RATE (500 * def_sampling_rate)
35 #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000)
36 #define TRANSITION_LATENCY_LIMIT (10 * 1000 )
37
38 static uint64_t def_sampling_rate;
39 static uint64_t usr_sampling_rate;
40
41 /* Sampling types */
42 enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
43
44 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
45
46 static unsigned int dbs_enable; /* number of CPUs using this policy */
47
48 static struct dbs_tuners {
49 uint64_t sampling_rate;
50 unsigned int up_threshold;
51 unsigned int powersave_bias;
52 } dbs_tuners_ins = {
53 .sampling_rate = 0,
54 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
55 .powersave_bias = 0,
56 };
57
58 static DEFINE_PER_CPU(struct timer, dbs_timer);
59
60 #ifdef CONFIG_PM_OP
write_ondemand_sampling_rate(unsigned int sampling_rate)61 int write_ondemand_sampling_rate(unsigned int sampling_rate)
62 {
63 if ( (sampling_rate > MAX_SAMPLING_RATE / MICROSECS(1)) ||
64 (sampling_rate < MIN_SAMPLING_RATE / MICROSECS(1)) )
65 return -EINVAL;
66
67 dbs_tuners_ins.sampling_rate = sampling_rate * MICROSECS(1);
68 return 0;
69 }
70
write_ondemand_up_threshold(unsigned int up_threshold)71 int write_ondemand_up_threshold(unsigned int up_threshold)
72 {
73 if ( (up_threshold > MAX_FREQUENCY_UP_THRESHOLD) ||
74 (up_threshold < MIN_FREQUENCY_UP_THRESHOLD) )
75 return -EINVAL;
76
77 dbs_tuners_ins.up_threshold = up_threshold;
78 return 0;
79 }
80
get_cpufreq_ondemand_para(uint32_t * sampling_rate_max,uint32_t * sampling_rate_min,uint32_t * sampling_rate,uint32_t * up_threshold)81 int get_cpufreq_ondemand_para(uint32_t *sampling_rate_max,
82 uint32_t *sampling_rate_min,
83 uint32_t *sampling_rate,
84 uint32_t *up_threshold)
85 {
86 if (!sampling_rate_max || !sampling_rate_min ||
87 !sampling_rate || !up_threshold)
88 return -EINVAL;
89
90 *sampling_rate_max = MAX_SAMPLING_RATE/MICROSECS(1);
91 *sampling_rate_min = MIN_SAMPLING_RATE/MICROSECS(1);
92 *sampling_rate = dbs_tuners_ins.sampling_rate / MICROSECS(1);
93 *up_threshold = dbs_tuners_ins.up_threshold;
94
95 return 0;
96 }
97 #endif /* CONFIG_PM_OP */
98
dbs_check_cpu(struct cpu_dbs_info_s * this_dbs_info)99 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
100 {
101 uint64_t cur_ns, total_ns;
102 uint64_t max_load_freq = 0;
103 struct cpufreq_policy *policy;
104 unsigned int max;
105 unsigned int j;
106
107 if (!this_dbs_info->enable)
108 return;
109
110 policy = this_dbs_info->cur_policy;
111 max = policy->max;
112
113 if (unlikely(policy->resume)) {
114 __cpufreq_driver_target(policy, max,CPUFREQ_RELATION_H);
115 return;
116 }
117
118 cur_ns = NOW();
119 total_ns = cur_ns - this_dbs_info->prev_cpu_wall;
120 this_dbs_info->prev_cpu_wall = NOW();
121
122 if (total_ns < MIN_DBS_INTERVAL)
123 return;
124
125 /* Get Idle Time */
126 for_each_cpu(j, policy->cpus) {
127 uint64_t idle_ns, total_idle_ns;
128 uint64_t load, load_freq, freq_avg;
129 struct cpu_dbs_info_s *j_dbs_info;
130
131 j_dbs_info = &per_cpu(cpu_dbs_info, j);
132 total_idle_ns = get_cpu_idle_time(j);
133 idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle;
134 j_dbs_info->prev_cpu_idle = total_idle_ns;
135
136 if (unlikely(total_ns < idle_ns))
137 continue;
138
139 load = 100 * (total_ns - idle_ns) / total_ns;
140
141 freq_avg = cpufreq_driver_getavg(j, GOV_GETAVG);
142
143 load_freq = load * freq_avg;
144 if (load_freq > max_load_freq)
145 max_load_freq = load_freq;
146 }
147
148 /* Check for frequency increase */
149 if (max_load_freq > (uint64_t) dbs_tuners_ins.up_threshold * policy->cur) {
150 /* if we are already at full speed then break out early */
151 if (policy->cur == max)
152 return;
153 __cpufreq_driver_target(policy, max, CPUFREQ_RELATION_H);
154 return;
155 }
156
157 /* Check for frequency decrease */
158 /* if we cannot reduce the frequency anymore, break out early */
159 if (policy->cur == policy->min)
160 return;
161
162 /*
163 * The optimal frequency is the frequency that is the lowest that
164 * can support the current CPU usage without triggering the up
165 * policy. To be safe, we focus 10 points under the threshold.
166 */
167 if (max_load_freq
168 < (uint64_t) (dbs_tuners_ins.up_threshold - 10) * policy->cur) {
169 uint64_t freq_next;
170
171 freq_next = max_load_freq / (dbs_tuners_ins.up_threshold - 10);
172
173 __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
174 }
175 }
176
do_dbs_timer(void * dbs)177 static void cf_check do_dbs_timer(void *dbs)
178 {
179 struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs;
180
181 if (!dbs_info->enable)
182 return;
183
184 dbs_check_cpu(dbs_info);
185
186 set_timer(&per_cpu(dbs_timer, dbs_info->cpu),
187 align_timer(NOW() , dbs_tuners_ins.sampling_rate));
188 }
189
dbs_timer_init(struct cpu_dbs_info_s * dbs_info)190 static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
191 {
192 dbs_info->enable = 1;
193
194 init_timer(&per_cpu(dbs_timer, dbs_info->cpu), do_dbs_timer,
195 (void *)dbs_info, dbs_info->cpu);
196
197 set_timer(&per_cpu(dbs_timer, dbs_info->cpu), NOW()+dbs_tuners_ins.sampling_rate);
198
199 if ( processor_pminfo[dbs_info->cpu]->perf.shared_type
200 == CPUFREQ_SHARED_TYPE_HW )
201 {
202 dbs_info->stoppable = 1;
203 }
204 }
205
dbs_timer_exit(struct cpu_dbs_info_s * dbs_info)206 static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
207 {
208 dbs_info->enable = 0;
209
210 /*
211 * The timer function may be running (from cpufreq_dbs_timer_resume) -
212 * wait for it to complete.
213 */
214 while ( cmpxchg(&dbs_info->stoppable, 1, 0) < 0 )
215 cpu_relax();
216
217 kill_timer(&per_cpu(dbs_timer, dbs_info->cpu));
218 }
219
cpufreq_governor_dbs(struct cpufreq_policy * policy,unsigned int event)220 static int cf_check cpufreq_governor_dbs(
221 struct cpufreq_policy *policy, unsigned int event)
222 {
223 unsigned int cpu = policy->cpu;
224 struct cpu_dbs_info_s *this_dbs_info;
225 unsigned int j;
226
227 this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
228
229 switch (event) {
230 case CPUFREQ_GOV_START:
231 if ((!cpu_online(cpu)) || (!policy->cur))
232 return -EINVAL;
233
234 if (policy->cpuinfo.transition_latency >
235 (TRANSITION_LATENCY_LIMIT * 1000)) {
236 printk(KERN_WARNING "ondemand governor failed to load "
237 "due to too long transition latency\n");
238 return -EINVAL;
239 }
240 if (this_dbs_info->enable)
241 /* Already enabled */
242 break;
243
244 dbs_enable++;
245
246 for_each_cpu(j, policy->cpus) {
247 struct cpu_dbs_info_s *j_dbs_info;
248 j_dbs_info = &per_cpu(cpu_dbs_info, j);
249 j_dbs_info->cur_policy = policy;
250
251 j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
252 j_dbs_info->prev_cpu_wall = NOW();
253 }
254 this_dbs_info->cpu = cpu;
255 /*
256 * Start the timerschedule work, when this governor
257 * is used for first time
258 */
259 if ((dbs_enable == 1) && !dbs_tuners_ins.sampling_rate) {
260 def_sampling_rate = (uint64_t) policy->cpuinfo.transition_latency *
261 DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
262
263 if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
264 def_sampling_rate = MIN_STAT_SAMPLING_RATE;
265
266 if (!usr_sampling_rate)
267 dbs_tuners_ins.sampling_rate = def_sampling_rate;
268 else if (usr_sampling_rate < MIN_SAMPLING_RATE) {
269 printk(KERN_WARNING "cpufreq/ondemand: "
270 "specified sampling rate too low, using %"PRIu64"\n",
271 MIN_SAMPLING_RATE);
272 dbs_tuners_ins.sampling_rate = MIN_SAMPLING_RATE;
273 } else if (usr_sampling_rate > MAX_SAMPLING_RATE) {
274 printk(KERN_WARNING "cpufreq/ondemand: "
275 "specified sampling rate too high, using %"PRIu64"\n",
276 MAX_SAMPLING_RATE);
277 dbs_tuners_ins.sampling_rate = MAX_SAMPLING_RATE;
278 } else
279 dbs_tuners_ins.sampling_rate = usr_sampling_rate;
280 }
281 dbs_timer_init(this_dbs_info);
282
283 break;
284
285 case CPUFREQ_GOV_STOP:
286 if ( !this_dbs_info->enable )
287 /* Already not enabled */
288 break;
289
290 dbs_timer_exit(this_dbs_info);
291 dbs_enable--;
292
293 break;
294
295 case CPUFREQ_GOV_LIMITS:
296 if ( this_dbs_info->cur_policy == NULL )
297 {
298 printk(KERN_WARNING "CPU%d ondemand governor not started yet,"
299 "unable to GOV_LIMIT\n", cpu);
300 return -EINVAL;
301 }
302 if (policy->max < this_dbs_info->cur_policy->cur)
303 __cpufreq_driver_target(this_dbs_info->cur_policy,
304 policy->max, CPUFREQ_RELATION_H);
305 else if (policy->min > this_dbs_info->cur_policy->cur)
306 __cpufreq_driver_target(this_dbs_info->cur_policy,
307 policy->min, CPUFREQ_RELATION_L);
308 break;
309 }
310 return 0;
311 }
312
cpufreq_dbs_handle_option(const char * name,const char * val)313 static bool __init cf_check cpufreq_dbs_handle_option(
314 const char *name, const char *val)
315 {
316 if ( !strcmp(name, "rate") && val )
317 {
318 usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1);
319 }
320 else if ( !strcmp(name, "up_threshold") && val )
321 {
322 unsigned long tmp = simple_strtoul(val, NULL, 0);
323
324 if ( tmp < MIN_FREQUENCY_UP_THRESHOLD )
325 {
326 printk(XENLOG_WARNING "cpufreq/ondemand: "
327 "specified threshold too low, using %d\n",
328 MIN_FREQUENCY_UP_THRESHOLD);
329 tmp = MIN_FREQUENCY_UP_THRESHOLD;
330 }
331 else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD )
332 {
333 printk(XENLOG_WARNING "cpufreq/ondemand: "
334 "specified threshold too high, using %d\n",
335 MAX_FREQUENCY_UP_THRESHOLD);
336 tmp = MAX_FREQUENCY_UP_THRESHOLD;
337 }
338 dbs_tuners_ins.up_threshold = tmp;
339 }
340 else if ( !strcmp(name, "bias") && val )
341 {
342 unsigned long tmp = simple_strtoul(val, NULL, 0);
343
344 if ( tmp > 1000 )
345 {
346 printk(XENLOG_WARNING "cpufreq/ondemand: "
347 "specified bias too high, using 1000\n");
348 tmp = 1000;
349 }
350 dbs_tuners_ins.powersave_bias = tmp;
351 }
352 else
353 return 0;
354 return 1;
355 }
356
357 struct cpufreq_governor cpufreq_gov_dbs = {
358 .name = "ondemand",
359 .governor = cpufreq_governor_dbs,
360 .handle_option = cpufreq_dbs_handle_option
361 };
362
cpufreq_gov_dbs_init(void)363 static int __init cf_check cpufreq_gov_dbs_init(void)
364 {
365 if ( cpufreq_governor_internal )
366 return 0;
367
368 return cpufreq_register_governor(&cpufreq_gov_dbs);
369 }
370 __initcall(cpufreq_gov_dbs_init);
371
cpufreq_dbs_timer_suspend(void)372 void cpufreq_dbs_timer_suspend(void)
373 {
374 int cpu;
375
376 cpu = smp_processor_id();
377
378 if ( per_cpu(cpu_dbs_info,cpu).stoppable )
379 {
380 stop_timer( &per_cpu(dbs_timer, cpu) );
381 }
382 }
383
cpufreq_dbs_timer_resume(void)384 void cpufreq_dbs_timer_resume(void)
385 {
386 unsigned int cpu = smp_processor_id();
387 int8_t *stoppable = &per_cpu(cpu_dbs_info, cpu).stoppable;
388
389 if ( *stoppable )
390 {
391 s_time_t now = NOW();
392 struct timer *t = &per_cpu(dbs_timer, cpu);
393
394 if ( t->expires <= now )
395 {
396 if ( !cmpxchg(stoppable, 1, -1) )
397 return;
398 t->function(t->data);
399 (void)cmpxchg(stoppable, -1, 1);
400 }
401 else
402 set_timer(t, align_timer(now, dbs_tuners_ins.sampling_rate));
403 }
404 }
405