1 /*
2 * xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
3 *
4 * Copyright (C) 2001 Russell King
5 * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
6 * Jun Nakajima <jun.nakajima@intel.com>
7 * Feb 2008 Liu Jinsong <jinsong.liu@intel.com>
8 * Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14
15 #include <xen/types.h>
16 #include <xen/percpu.h>
17 #include <xen/cpumask.h>
18 #include <xen/types.h>
19 #include <xen/sched.h>
20 #include <xen/timer.h>
21 #include <acpi/cpufreq/cpufreq.h>
22
23 #define DEF_FREQUENCY_UP_THRESHOLD (80)
24 #define MIN_FREQUENCY_UP_THRESHOLD (11)
25 #define MAX_FREQUENCY_UP_THRESHOLD (100)
26
27 #define MIN_DBS_INTERVAL (MICROSECS(100))
28 #define MIN_SAMPLING_RATE_RATIO (2)
29 #define MIN_SAMPLING_MILLISECS (MIN_SAMPLING_RATE_RATIO * 10)
30 #define MIN_STAT_SAMPLING_RATE \
31 (MIN_SAMPLING_MILLISECS * MILLISECS(1))
32 #define MIN_SAMPLING_RATE \
33 (def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
34 #define MAX_SAMPLING_RATE (500 * def_sampling_rate)
35 #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000)
36 #define TRANSITION_LATENCY_LIMIT (10 * 1000 )
37
38 static uint64_t def_sampling_rate;
39 static uint64_t usr_sampling_rate;
40
41 /* Sampling types */
42 enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
43
44 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
45
46 static unsigned int dbs_enable; /* number of CPUs using this policy */
47
48 static struct dbs_tuners {
49 uint64_t sampling_rate;
50 unsigned int up_threshold;
51 unsigned int powersave_bias;
52 } dbs_tuners_ins = {
53 .sampling_rate = 0,
54 .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
55 .powersave_bias = 0,
56 };
57
58 static DEFINE_PER_CPU(struct timer, dbs_timer);
59
write_ondemand_sampling_rate(unsigned int sampling_rate)60 int write_ondemand_sampling_rate(unsigned int sampling_rate)
61 {
62 if ( (sampling_rate > MAX_SAMPLING_RATE / MICROSECS(1)) ||
63 (sampling_rate < MIN_SAMPLING_RATE / MICROSECS(1)) )
64 return -EINVAL;
65
66 dbs_tuners_ins.sampling_rate = sampling_rate * MICROSECS(1);
67 return 0;
68 }
69
write_ondemand_up_threshold(unsigned int up_threshold)70 int write_ondemand_up_threshold(unsigned int up_threshold)
71 {
72 if ( (up_threshold > MAX_FREQUENCY_UP_THRESHOLD) ||
73 (up_threshold < MIN_FREQUENCY_UP_THRESHOLD) )
74 return -EINVAL;
75
76 dbs_tuners_ins.up_threshold = up_threshold;
77 return 0;
78 }
79
get_cpufreq_ondemand_para(uint32_t * sampling_rate_max,uint32_t * sampling_rate_min,uint32_t * sampling_rate,uint32_t * up_threshold)80 int get_cpufreq_ondemand_para(uint32_t *sampling_rate_max,
81 uint32_t *sampling_rate_min,
82 uint32_t *sampling_rate,
83 uint32_t *up_threshold)
84 {
85 if (!sampling_rate_max || !sampling_rate_min ||
86 !sampling_rate || !up_threshold)
87 return -EINVAL;
88
89 *sampling_rate_max = MAX_SAMPLING_RATE/MICROSECS(1);
90 *sampling_rate_min = MIN_SAMPLING_RATE/MICROSECS(1);
91 *sampling_rate = dbs_tuners_ins.sampling_rate / MICROSECS(1);
92 *up_threshold = dbs_tuners_ins.up_threshold;
93
94 return 0;
95 }
96
dbs_check_cpu(struct cpu_dbs_info_s * this_dbs_info)97 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
98 {
99 uint64_t cur_ns, total_ns;
100 uint64_t max_load_freq = 0;
101 struct cpufreq_policy *policy;
102 unsigned int max;
103 unsigned int j;
104
105 if (!this_dbs_info->enable)
106 return;
107
108 policy = this_dbs_info->cur_policy;
109 max = policy->max;
110
111 if (unlikely(policy->resume)) {
112 __cpufreq_driver_target(policy, max,CPUFREQ_RELATION_H);
113 return;
114 }
115
116 cur_ns = NOW();
117 total_ns = cur_ns - this_dbs_info->prev_cpu_wall;
118 this_dbs_info->prev_cpu_wall = NOW();
119
120 if (total_ns < MIN_DBS_INTERVAL)
121 return;
122
123 /* Get Idle Time */
124 for_each_cpu(j, policy->cpus) {
125 uint64_t idle_ns, total_idle_ns;
126 uint64_t load, load_freq, freq_avg;
127 struct cpu_dbs_info_s *j_dbs_info;
128
129 j_dbs_info = &per_cpu(cpu_dbs_info, j);
130 total_idle_ns = get_cpu_idle_time(j);
131 idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle;
132 j_dbs_info->prev_cpu_idle = total_idle_ns;
133
134 if (unlikely(total_ns < idle_ns))
135 continue;
136
137 load = 100 * (total_ns - idle_ns) / total_ns;
138
139 freq_avg = cpufreq_driver_getavg(j, GOV_GETAVG);
140
141 load_freq = load * freq_avg;
142 if (load_freq > max_load_freq)
143 max_load_freq = load_freq;
144 }
145
146 /* Check for frequency increase */
147 if (max_load_freq > (uint64_t) dbs_tuners_ins.up_threshold * policy->cur) {
148 /* if we are already at full speed then break out early */
149 if (policy->cur == max)
150 return;
151 __cpufreq_driver_target(policy, max, CPUFREQ_RELATION_H);
152 return;
153 }
154
155 /* Check for frequency decrease */
156 /* if we cannot reduce the frequency anymore, break out early */
157 if (policy->cur == policy->min)
158 return;
159
160 /*
161 * The optimal frequency is the frequency that is the lowest that
162 * can support the current CPU usage without triggering the up
163 * policy. To be safe, we focus 10 points under the threshold.
164 */
165 if (max_load_freq
166 < (uint64_t) (dbs_tuners_ins.up_threshold - 10) * policy->cur) {
167 uint64_t freq_next;
168
169 freq_next = max_load_freq / (dbs_tuners_ins.up_threshold - 10);
170
171 __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
172 }
173 }
174
do_dbs_timer(void * dbs)175 static void do_dbs_timer(void *dbs)
176 {
177 struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs;
178
179 if (!dbs_info->enable)
180 return;
181
182 dbs_check_cpu(dbs_info);
183
184 set_timer(&per_cpu(dbs_timer, dbs_info->cpu),
185 align_timer(NOW() , dbs_tuners_ins.sampling_rate));
186 }
187
dbs_timer_init(struct cpu_dbs_info_s * dbs_info)188 static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
189 {
190 dbs_info->enable = 1;
191
192 init_timer(&per_cpu(dbs_timer, dbs_info->cpu), do_dbs_timer,
193 (void *)dbs_info, dbs_info->cpu);
194
195 set_timer(&per_cpu(dbs_timer, dbs_info->cpu), NOW()+dbs_tuners_ins.sampling_rate);
196
197 if ( processor_pminfo[dbs_info->cpu]->perf.shared_type
198 == CPUFREQ_SHARED_TYPE_HW )
199 {
200 dbs_info->stoppable = 1;
201 }
202 }
203
dbs_timer_exit(struct cpu_dbs_info_s * dbs_info)204 static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
205 {
206 dbs_info->enable = 0;
207 dbs_info->stoppable = 0;
208 kill_timer(&per_cpu(dbs_timer, dbs_info->cpu));
209 }
210
cpufreq_governor_dbs(struct cpufreq_policy * policy,unsigned int event)211 int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
212 {
213 unsigned int cpu = policy->cpu;
214 struct cpu_dbs_info_s *this_dbs_info;
215 unsigned int j;
216
217 this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
218
219 switch (event) {
220 case CPUFREQ_GOV_START:
221 if ((!cpu_online(cpu)) || (!policy->cur))
222 return -EINVAL;
223
224 if (policy->cpuinfo.transition_latency >
225 (TRANSITION_LATENCY_LIMIT * 1000)) {
226 printk(KERN_WARNING "ondemand governor failed to load "
227 "due to too long transition latency\n");
228 return -EINVAL;
229 }
230 if (this_dbs_info->enable)
231 /* Already enabled */
232 break;
233
234 dbs_enable++;
235
236 for_each_cpu(j, policy->cpus) {
237 struct cpu_dbs_info_s *j_dbs_info;
238 j_dbs_info = &per_cpu(cpu_dbs_info, j);
239 j_dbs_info->cur_policy = policy;
240
241 j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
242 j_dbs_info->prev_cpu_wall = NOW();
243 }
244 this_dbs_info->cpu = cpu;
245 /*
246 * Start the timerschedule work, when this governor
247 * is used for first time
248 */
249 if ((dbs_enable == 1) && !dbs_tuners_ins.sampling_rate) {
250 def_sampling_rate = (uint64_t) policy->cpuinfo.transition_latency *
251 DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
252
253 if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
254 def_sampling_rate = MIN_STAT_SAMPLING_RATE;
255
256 if (!usr_sampling_rate)
257 dbs_tuners_ins.sampling_rate = def_sampling_rate;
258 else if (usr_sampling_rate < MIN_SAMPLING_RATE) {
259 printk(KERN_WARNING "cpufreq/ondemand: "
260 "specified sampling rate too low, using %"PRIu64"\n",
261 MIN_SAMPLING_RATE);
262 dbs_tuners_ins.sampling_rate = MIN_SAMPLING_RATE;
263 } else if (usr_sampling_rate > MAX_SAMPLING_RATE) {
264 printk(KERN_WARNING "cpufreq/ondemand: "
265 "specified sampling rate too high, using %"PRIu64"\n",
266 MAX_SAMPLING_RATE);
267 dbs_tuners_ins.sampling_rate = MAX_SAMPLING_RATE;
268 } else
269 dbs_tuners_ins.sampling_rate = usr_sampling_rate;
270 }
271 dbs_timer_init(this_dbs_info);
272
273 break;
274
275 case CPUFREQ_GOV_STOP:
276 if ( !this_dbs_info->enable )
277 /* Already not enabled */
278 break;
279
280 dbs_timer_exit(this_dbs_info);
281 dbs_enable--;
282
283 break;
284
285 case CPUFREQ_GOV_LIMITS:
286 if ( this_dbs_info->cur_policy == NULL )
287 {
288 printk(KERN_WARNING "CPU%d ondemand governor not started yet,"
289 "unable to GOV_LIMIT\n", cpu);
290 return -EINVAL;
291 }
292 if (policy->max < this_dbs_info->cur_policy->cur)
293 __cpufreq_driver_target(this_dbs_info->cur_policy,
294 policy->max, CPUFREQ_RELATION_H);
295 else if (policy->min > this_dbs_info->cur_policy->cur)
296 __cpufreq_driver_target(this_dbs_info->cur_policy,
297 policy->min, CPUFREQ_RELATION_L);
298 break;
299 }
300 return 0;
301 }
302
cpufreq_dbs_handle_option(const char * name,const char * val)303 static bool_t __init cpufreq_dbs_handle_option(const char *name, const char *val)
304 {
305 if ( !strcmp(name, "rate") && val )
306 {
307 usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1);
308 }
309 else if ( !strcmp(name, "up_threshold") && val )
310 {
311 unsigned long tmp = simple_strtoul(val, NULL, 0);
312
313 if ( tmp < MIN_FREQUENCY_UP_THRESHOLD )
314 {
315 printk(XENLOG_WARNING "cpufreq/ondemand: "
316 "specified threshold too low, using %d\n",
317 MIN_FREQUENCY_UP_THRESHOLD);
318 tmp = MIN_FREQUENCY_UP_THRESHOLD;
319 }
320 else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD )
321 {
322 printk(XENLOG_WARNING "cpufreq/ondemand: "
323 "specified threshold too high, using %d\n",
324 MAX_FREQUENCY_UP_THRESHOLD);
325 tmp = MAX_FREQUENCY_UP_THRESHOLD;
326 }
327 dbs_tuners_ins.up_threshold = tmp;
328 }
329 else if ( !strcmp(name, "bias") && val )
330 {
331 unsigned long tmp = simple_strtoul(val, NULL, 0);
332
333 if ( tmp > 1000 )
334 {
335 printk(XENLOG_WARNING "cpufreq/ondemand: "
336 "specified bias too high, using 1000\n");
337 tmp = 1000;
338 }
339 dbs_tuners_ins.powersave_bias = tmp;
340 }
341 else
342 return 0;
343 return 1;
344 }
345
346 struct cpufreq_governor cpufreq_gov_dbs = {
347 .name = "ondemand",
348 .governor = cpufreq_governor_dbs,
349 .handle_option = cpufreq_dbs_handle_option
350 };
351
cpufreq_gov_dbs_init(void)352 static int __init cpufreq_gov_dbs_init(void)
353 {
354 return cpufreq_register_governor(&cpufreq_gov_dbs);
355 }
356 __initcall(cpufreq_gov_dbs_init);
357
cpufreq_dbs_timer_suspend(void)358 void cpufreq_dbs_timer_suspend(void)
359 {
360 int cpu;
361
362 cpu = smp_processor_id();
363
364 if ( per_cpu(cpu_dbs_info,cpu).stoppable )
365 {
366 stop_timer( &per_cpu(dbs_timer, cpu) );
367 }
368 }
369
cpufreq_dbs_timer_resume(void)370 void cpufreq_dbs_timer_resume(void)
371 {
372 int cpu;
373 struct timer* t;
374 s_time_t now;
375
376 cpu = smp_processor_id();
377
378 if ( per_cpu(cpu_dbs_info,cpu).stoppable )
379 {
380 now = NOW();
381 t = &per_cpu(dbs_timer, cpu);
382 if (t->expires <= now)
383 {
384 t->function(t->data);
385 }
386 else
387 {
388 set_timer(t, align_timer(now , dbs_tuners_ins.sampling_rate));
389 }
390 }
391 }
392