1 /*
2  *  xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
3  *
4  *  Copyright (C)  2001 Russell King
5  *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
6  *                      Jun Nakajima <jun.nakajima@intel.com>
7  *             Feb 2008 Liu Jinsong <jinsong.liu@intel.com>
8  *             Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License version 2 as
12  * published by the Free Software Foundation.
13  */
14 
15 #include <xen/types.h>
16 #include <xen/percpu.h>
17 #include <xen/cpumask.h>
18 #include <xen/types.h>
19 #include <xen/sched.h>
20 #include <xen/timer.h>
21 #include <acpi/cpufreq/cpufreq.h>
22 
23 #define DEF_FREQUENCY_UP_THRESHOLD              (80)
24 #define MIN_FREQUENCY_UP_THRESHOLD              (11)
25 #define MAX_FREQUENCY_UP_THRESHOLD              (100)
26 
27 #define MIN_DBS_INTERVAL                        (MICROSECS(100))
28 #define MIN_SAMPLING_RATE_RATIO                 (2)
29 #define MIN_SAMPLING_MILLISECS                  (MIN_SAMPLING_RATE_RATIO * 10)
30 #define MIN_STAT_SAMPLING_RATE                  \
31     (MIN_SAMPLING_MILLISECS * MILLISECS(1))
32 #define MIN_SAMPLING_RATE                       \
33     (def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
34 #define MAX_SAMPLING_RATE                       (500 * def_sampling_rate)
35 #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER    (1000)
36 #define TRANSITION_LATENCY_LIMIT                (10 * 1000 )
37 
38 static uint64_t def_sampling_rate;
39 static uint64_t usr_sampling_rate;
40 
41 /* Sampling types */
42 enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
43 
44 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
45 
46 static unsigned int dbs_enable;    /* number of CPUs using this policy */
47 
48 static struct dbs_tuners {
49     uint64_t     sampling_rate;
50     unsigned int up_threshold;
51     unsigned int powersave_bias;
52 } dbs_tuners_ins = {
53     .sampling_rate = 0,
54     .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
55     .powersave_bias = 0,
56 };
57 
58 static DEFINE_PER_CPU(struct timer, dbs_timer);
59 
60 #ifdef CONFIG_PM_OP
write_ondemand_sampling_rate(unsigned int sampling_rate)61 int write_ondemand_sampling_rate(unsigned int sampling_rate)
62 {
63     if ( (sampling_rate > MAX_SAMPLING_RATE / MICROSECS(1)) ||
64          (sampling_rate < MIN_SAMPLING_RATE / MICROSECS(1)) )
65         return -EINVAL;
66 
67     dbs_tuners_ins.sampling_rate = sampling_rate * MICROSECS(1);
68     return 0;
69 }
70 
write_ondemand_up_threshold(unsigned int up_threshold)71 int write_ondemand_up_threshold(unsigned int up_threshold)
72 {
73     if ( (up_threshold > MAX_FREQUENCY_UP_THRESHOLD) ||
74          (up_threshold < MIN_FREQUENCY_UP_THRESHOLD) )
75         return -EINVAL;
76 
77     dbs_tuners_ins.up_threshold = up_threshold;
78     return 0;
79 }
80 
get_cpufreq_ondemand_para(uint32_t * sampling_rate_max,uint32_t * sampling_rate_min,uint32_t * sampling_rate,uint32_t * up_threshold)81 int get_cpufreq_ondemand_para(uint32_t *sampling_rate_max,
82                               uint32_t *sampling_rate_min,
83                               uint32_t *sampling_rate,
84                               uint32_t *up_threshold)
85 {
86     if (!sampling_rate_max || !sampling_rate_min ||
87         !sampling_rate || !up_threshold)
88         return -EINVAL;
89 
90     *sampling_rate_max = MAX_SAMPLING_RATE/MICROSECS(1);
91     *sampling_rate_min = MIN_SAMPLING_RATE/MICROSECS(1);
92     *sampling_rate = dbs_tuners_ins.sampling_rate / MICROSECS(1);
93     *up_threshold = dbs_tuners_ins.up_threshold;
94 
95     return 0;
96 }
97 #endif /* CONFIG_PM_OP */
98 
dbs_check_cpu(struct cpu_dbs_info_s * this_dbs_info)99 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
100 {
101     uint64_t cur_ns, total_ns;
102     uint64_t max_load_freq = 0;
103     struct cpufreq_policy *policy;
104     unsigned int max;
105     unsigned int j;
106 
107     if (!this_dbs_info->enable)
108         return;
109 
110     policy = this_dbs_info->cur_policy;
111     max = policy->max;
112 
113     if (unlikely(policy->resume)) {
114         __cpufreq_driver_target(policy, max,CPUFREQ_RELATION_H);
115         return;
116     }
117 
118     cur_ns = NOW();
119     total_ns = cur_ns - this_dbs_info->prev_cpu_wall;
120     this_dbs_info->prev_cpu_wall = NOW();
121 
122     if (total_ns < MIN_DBS_INTERVAL)
123         return;
124 
125     /* Get Idle Time */
126     for_each_cpu(j, policy->cpus) {
127         uint64_t idle_ns, total_idle_ns;
128         uint64_t load, load_freq, freq_avg;
129         struct cpu_dbs_info_s *j_dbs_info;
130 
131         j_dbs_info = &per_cpu(cpu_dbs_info, j);
132         total_idle_ns = get_cpu_idle_time(j);
133         idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle;
134         j_dbs_info->prev_cpu_idle = total_idle_ns;
135 
136         if (unlikely(total_ns < idle_ns))
137             continue;
138 
139         load = 100 * (total_ns - idle_ns) / total_ns;
140 
141         freq_avg = cpufreq_driver_getavg(j, GOV_GETAVG);
142 
143         load_freq = load * freq_avg;
144         if (load_freq > max_load_freq)
145             max_load_freq = load_freq;
146     }
147 
148     /* Check for frequency increase */
149     if (max_load_freq > (uint64_t) dbs_tuners_ins.up_threshold * policy->cur) {
150         /* if we are already at full speed then break out early */
151         if (policy->cur == max)
152             return;
153         __cpufreq_driver_target(policy, max, CPUFREQ_RELATION_H);
154         return;
155     }
156 
157     /* Check for frequency decrease */
158     /* if we cannot reduce the frequency anymore, break out early */
159     if (policy->cur == policy->min)
160         return;
161 
162     /*
163      * The optimal frequency is the frequency that is the lowest that
164      * can support the current CPU usage without triggering the up
165      * policy. To be safe, we focus 10 points under the threshold.
166      */
167     if (max_load_freq
168         < (uint64_t) (dbs_tuners_ins.up_threshold - 10) * policy->cur) {
169         uint64_t freq_next;
170 
171         freq_next = max_load_freq / (dbs_tuners_ins.up_threshold - 10);
172 
173         __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
174     }
175 }
176 
do_dbs_timer(void * dbs)177 static void cf_check do_dbs_timer(void *dbs)
178 {
179     struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs;
180 
181     if (!dbs_info->enable)
182         return;
183 
184     dbs_check_cpu(dbs_info);
185 
186     set_timer(&per_cpu(dbs_timer, dbs_info->cpu),
187             align_timer(NOW() , dbs_tuners_ins.sampling_rate));
188 }
189 
dbs_timer_init(struct cpu_dbs_info_s * dbs_info)190 static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
191 {
192     dbs_info->enable = 1;
193 
194     init_timer(&per_cpu(dbs_timer, dbs_info->cpu), do_dbs_timer,
195         (void *)dbs_info, dbs_info->cpu);
196 
197     set_timer(&per_cpu(dbs_timer, dbs_info->cpu), NOW()+dbs_tuners_ins.sampling_rate);
198 
199     if ( processor_pminfo[dbs_info->cpu]->perf.shared_type
200             == CPUFREQ_SHARED_TYPE_HW )
201     {
202         dbs_info->stoppable = 1;
203     }
204 }
205 
dbs_timer_exit(struct cpu_dbs_info_s * dbs_info)206 static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
207 {
208     dbs_info->enable = 0;
209 
210     /*
211      * The timer function may be running (from cpufreq_dbs_timer_resume) -
212      * wait for it to complete.
213      */
214     while ( cmpxchg(&dbs_info->stoppable, 1, 0) < 0 )
215         cpu_relax();
216 
217     kill_timer(&per_cpu(dbs_timer, dbs_info->cpu));
218 }
219 
cpufreq_governor_dbs(struct cpufreq_policy * policy,unsigned int event)220 static int cf_check cpufreq_governor_dbs(
221     struct cpufreq_policy *policy, unsigned int event)
222 {
223     unsigned int cpu = policy->cpu;
224     struct cpu_dbs_info_s *this_dbs_info;
225     unsigned int j;
226 
227     this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
228 
229     switch (event) {
230     case CPUFREQ_GOV_START:
231         if ((!cpu_online(cpu)) || (!policy->cur))
232             return -EINVAL;
233 
234         if (policy->cpuinfo.transition_latency >
235             (TRANSITION_LATENCY_LIMIT * 1000)) {
236             printk(KERN_WARNING "ondemand governor failed to load "
237                 "due to too long transition latency\n");
238             return -EINVAL;
239         }
240         if (this_dbs_info->enable)
241             /* Already enabled */
242             break;
243 
244         dbs_enable++;
245 
246         for_each_cpu(j, policy->cpus) {
247             struct cpu_dbs_info_s *j_dbs_info;
248             j_dbs_info = &per_cpu(cpu_dbs_info, j);
249             j_dbs_info->cur_policy = policy;
250 
251             j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
252             j_dbs_info->prev_cpu_wall = NOW();
253         }
254         this_dbs_info->cpu = cpu;
255         /*
256          * Start the timerschedule work, when this governor
257          * is used for first time
258          */
259         if ((dbs_enable == 1) && !dbs_tuners_ins.sampling_rate) {
260             def_sampling_rate = (uint64_t) policy->cpuinfo.transition_latency *
261                 DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
262 
263             if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
264                 def_sampling_rate = MIN_STAT_SAMPLING_RATE;
265 
266             if (!usr_sampling_rate)
267                 dbs_tuners_ins.sampling_rate = def_sampling_rate;
268             else if (usr_sampling_rate < MIN_SAMPLING_RATE) {
269                 printk(KERN_WARNING "cpufreq/ondemand: "
270                        "specified sampling rate too low, using %"PRIu64"\n",
271                        MIN_SAMPLING_RATE);
272                 dbs_tuners_ins.sampling_rate = MIN_SAMPLING_RATE;
273             } else if (usr_sampling_rate > MAX_SAMPLING_RATE) {
274                 printk(KERN_WARNING "cpufreq/ondemand: "
275                        "specified sampling rate too high, using %"PRIu64"\n",
276                        MAX_SAMPLING_RATE);
277                 dbs_tuners_ins.sampling_rate = MAX_SAMPLING_RATE;
278             } else
279                 dbs_tuners_ins.sampling_rate = usr_sampling_rate;
280         }
281         dbs_timer_init(this_dbs_info);
282 
283         break;
284 
285     case CPUFREQ_GOV_STOP:
286         if ( !this_dbs_info->enable )
287             /* Already not enabled */
288             break;
289 
290         dbs_timer_exit(this_dbs_info);
291         dbs_enable--;
292 
293         break;
294 
295     case CPUFREQ_GOV_LIMITS:
296         if ( this_dbs_info->cur_policy == NULL )
297         {
298             printk(KERN_WARNING "CPU%d ondemand governor not started yet,"
299                     "unable to GOV_LIMIT\n", cpu);
300             return -EINVAL;
301         }
302         if (policy->max < this_dbs_info->cur_policy->cur)
303             __cpufreq_driver_target(this_dbs_info->cur_policy,
304                 policy->max, CPUFREQ_RELATION_H);
305         else if (policy->min > this_dbs_info->cur_policy->cur)
306             __cpufreq_driver_target(this_dbs_info->cur_policy,
307                 policy->min, CPUFREQ_RELATION_L);
308         break;
309     }
310     return 0;
311 }
312 
cpufreq_dbs_handle_option(const char * name,const char * val)313 static bool __init cf_check cpufreq_dbs_handle_option(
314     const char *name, const char *val)
315 {
316     if ( !strcmp(name, "rate") && val )
317     {
318         usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1);
319     }
320     else if ( !strcmp(name, "up_threshold") && val )
321     {
322         unsigned long tmp = simple_strtoul(val, NULL, 0);
323 
324         if ( tmp < MIN_FREQUENCY_UP_THRESHOLD )
325         {
326             printk(XENLOG_WARNING "cpufreq/ondemand: "
327                    "specified threshold too low, using %d\n",
328                    MIN_FREQUENCY_UP_THRESHOLD);
329             tmp = MIN_FREQUENCY_UP_THRESHOLD;
330         }
331         else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD )
332         {
333             printk(XENLOG_WARNING "cpufreq/ondemand: "
334                    "specified threshold too high, using %d\n",
335                    MAX_FREQUENCY_UP_THRESHOLD);
336             tmp = MAX_FREQUENCY_UP_THRESHOLD;
337         }
338         dbs_tuners_ins.up_threshold = tmp;
339     }
340     else if ( !strcmp(name, "bias") && val )
341     {
342         unsigned long tmp = simple_strtoul(val, NULL, 0);
343 
344         if ( tmp > 1000 )
345         {
346             printk(XENLOG_WARNING "cpufreq/ondemand: "
347                    "specified bias too high, using 1000\n");
348             tmp = 1000;
349         }
350         dbs_tuners_ins.powersave_bias = tmp;
351     }
352     else
353         return 0;
354     return 1;
355 }
356 
357 struct cpufreq_governor cpufreq_gov_dbs = {
358     .name = "ondemand",
359     .governor = cpufreq_governor_dbs,
360     .handle_option = cpufreq_dbs_handle_option
361 };
362 
cpufreq_gov_dbs_init(void)363 static int __init cf_check cpufreq_gov_dbs_init(void)
364 {
365     if ( cpufreq_governor_internal )
366         return 0;
367 
368     return cpufreq_register_governor(&cpufreq_gov_dbs);
369 }
370 __initcall(cpufreq_gov_dbs_init);
371 
cpufreq_dbs_timer_suspend(void)372 void cpufreq_dbs_timer_suspend(void)
373 {
374     int cpu;
375 
376     cpu = smp_processor_id();
377 
378     if ( per_cpu(cpu_dbs_info,cpu).stoppable )
379     {
380         stop_timer( &per_cpu(dbs_timer, cpu) );
381     }
382 }
383 
cpufreq_dbs_timer_resume(void)384 void cpufreq_dbs_timer_resume(void)
385 {
386     unsigned int cpu = smp_processor_id();
387     int8_t *stoppable = &per_cpu(cpu_dbs_info, cpu).stoppable;
388 
389     if ( *stoppable )
390     {
391         s_time_t now = NOW();
392         struct timer *t = &per_cpu(dbs_timer, cpu);
393 
394         if ( t->expires <= now )
395         {
396             if ( !cmpxchg(stoppable, 1, -1) )
397                 return;
398             t->function(t->data);
399             (void)cmpxchg(stoppable, -1, 1);
400         }
401         else
402             set_timer(t, align_timer(now, dbs_tuners_ins.sampling_rate));
403     }
404 }
405