1 /*
2  *  xen/arch/x86/acpi/cpufreq/cpufreq_ondemand.c
3  *
4  *  Copyright (C)  2001 Russell King
5  *            (C)  2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
6  *                      Jun Nakajima <jun.nakajima@intel.com>
7  *             Feb 2008 Liu Jinsong <jinsong.liu@intel.com>
8  *             Porting cpufreq_ondemand.c from Liunx 2.6.23 to Xen hypervisor
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License version 2 as
12  * published by the Free Software Foundation.
13  */
14 
15 #include <xen/types.h>
16 #include <xen/percpu.h>
17 #include <xen/cpumask.h>
18 #include <xen/types.h>
19 #include <xen/sched.h>
20 #include <xen/timer.h>
21 #include <acpi/cpufreq/cpufreq.h>
22 
23 #define DEF_FREQUENCY_UP_THRESHOLD              (80)
24 #define MIN_FREQUENCY_UP_THRESHOLD              (11)
25 #define MAX_FREQUENCY_UP_THRESHOLD              (100)
26 
27 #define MIN_DBS_INTERVAL                        (MICROSECS(100))
28 #define MIN_SAMPLING_RATE_RATIO                 (2)
29 #define MIN_SAMPLING_MILLISECS                  (MIN_SAMPLING_RATE_RATIO * 10)
30 #define MIN_STAT_SAMPLING_RATE                  \
31     (MIN_SAMPLING_MILLISECS * MILLISECS(1))
32 #define MIN_SAMPLING_RATE                       \
33     (def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
34 #define MAX_SAMPLING_RATE                       (500 * def_sampling_rate)
35 #define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER    (1000)
36 #define TRANSITION_LATENCY_LIMIT                (10 * 1000 )
37 
38 static uint64_t def_sampling_rate;
39 static uint64_t usr_sampling_rate;
40 
41 /* Sampling types */
42 enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
43 
44 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
45 
46 static unsigned int dbs_enable;    /* number of CPUs using this policy */
47 
48 static struct dbs_tuners {
49     uint64_t     sampling_rate;
50     unsigned int up_threshold;
51     unsigned int powersave_bias;
52 } dbs_tuners_ins = {
53     .sampling_rate = 0,
54     .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
55     .powersave_bias = 0,
56 };
57 
58 static DEFINE_PER_CPU(struct timer, dbs_timer);
59 
write_ondemand_sampling_rate(unsigned int sampling_rate)60 int write_ondemand_sampling_rate(unsigned int sampling_rate)
61 {
62     if ( (sampling_rate > MAX_SAMPLING_RATE / MICROSECS(1)) ||
63          (sampling_rate < MIN_SAMPLING_RATE / MICROSECS(1)) )
64         return -EINVAL;
65 
66     dbs_tuners_ins.sampling_rate = sampling_rate * MICROSECS(1);
67     return 0;
68 }
69 
write_ondemand_up_threshold(unsigned int up_threshold)70 int write_ondemand_up_threshold(unsigned int up_threshold)
71 {
72     if ( (up_threshold > MAX_FREQUENCY_UP_THRESHOLD) ||
73          (up_threshold < MIN_FREQUENCY_UP_THRESHOLD) )
74         return -EINVAL;
75 
76     dbs_tuners_ins.up_threshold = up_threshold;
77     return 0;
78 }
79 
get_cpufreq_ondemand_para(uint32_t * sampling_rate_max,uint32_t * sampling_rate_min,uint32_t * sampling_rate,uint32_t * up_threshold)80 int get_cpufreq_ondemand_para(uint32_t *sampling_rate_max,
81                               uint32_t *sampling_rate_min,
82                               uint32_t *sampling_rate,
83                               uint32_t *up_threshold)
84 {
85     if (!sampling_rate_max || !sampling_rate_min ||
86         !sampling_rate || !up_threshold)
87         return -EINVAL;
88 
89     *sampling_rate_max = MAX_SAMPLING_RATE/MICROSECS(1);
90     *sampling_rate_min = MIN_SAMPLING_RATE/MICROSECS(1);
91     *sampling_rate = dbs_tuners_ins.sampling_rate / MICROSECS(1);
92     *up_threshold = dbs_tuners_ins.up_threshold;
93 
94     return 0;
95 }
96 
dbs_check_cpu(struct cpu_dbs_info_s * this_dbs_info)97 static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
98 {
99     uint64_t cur_ns, total_ns;
100     uint64_t max_load_freq = 0;
101     struct cpufreq_policy *policy;
102     unsigned int max;
103     unsigned int j;
104 
105     if (!this_dbs_info->enable)
106         return;
107 
108     policy = this_dbs_info->cur_policy;
109     max = policy->max;
110 
111     if (unlikely(policy->resume)) {
112         __cpufreq_driver_target(policy, max,CPUFREQ_RELATION_H);
113         return;
114     }
115 
116     cur_ns = NOW();
117     total_ns = cur_ns - this_dbs_info->prev_cpu_wall;
118     this_dbs_info->prev_cpu_wall = NOW();
119 
120     if (total_ns < MIN_DBS_INTERVAL)
121         return;
122 
123     /* Get Idle Time */
124     for_each_cpu(j, policy->cpus) {
125         uint64_t idle_ns, total_idle_ns;
126         uint64_t load, load_freq, freq_avg;
127         struct cpu_dbs_info_s *j_dbs_info;
128 
129         j_dbs_info = &per_cpu(cpu_dbs_info, j);
130         total_idle_ns = get_cpu_idle_time(j);
131         idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle;
132         j_dbs_info->prev_cpu_idle = total_idle_ns;
133 
134         if (unlikely(total_ns < idle_ns))
135             continue;
136 
137         load = 100 * (total_ns - idle_ns) / total_ns;
138 
139         freq_avg = cpufreq_driver_getavg(j, GOV_GETAVG);
140 
141         load_freq = load * freq_avg;
142         if (load_freq > max_load_freq)
143             max_load_freq = load_freq;
144     }
145 
146     /* Check for frequency increase */
147     if (max_load_freq > (uint64_t) dbs_tuners_ins.up_threshold * policy->cur) {
148         /* if we are already at full speed then break out early */
149         if (policy->cur == max)
150             return;
151         __cpufreq_driver_target(policy, max, CPUFREQ_RELATION_H);
152         return;
153     }
154 
155     /* Check for frequency decrease */
156     /* if we cannot reduce the frequency anymore, break out early */
157     if (policy->cur == policy->min)
158         return;
159 
160     /*
161      * The optimal frequency is the frequency that is the lowest that
162      * can support the current CPU usage without triggering the up
163      * policy. To be safe, we focus 10 points under the threshold.
164      */
165     if (max_load_freq
166         < (uint64_t) (dbs_tuners_ins.up_threshold - 10) * policy->cur) {
167         uint64_t freq_next;
168 
169         freq_next = max_load_freq / (dbs_tuners_ins.up_threshold - 10);
170 
171         __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
172     }
173 }
174 
do_dbs_timer(void * dbs)175 static void do_dbs_timer(void *dbs)
176 {
177     struct cpu_dbs_info_s *dbs_info = (struct cpu_dbs_info_s *)dbs;
178 
179     if (!dbs_info->enable)
180         return;
181 
182     dbs_check_cpu(dbs_info);
183 
184     set_timer(&per_cpu(dbs_timer, dbs_info->cpu),
185             align_timer(NOW() , dbs_tuners_ins.sampling_rate));
186 }
187 
dbs_timer_init(struct cpu_dbs_info_s * dbs_info)188 static void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
189 {
190     dbs_info->enable = 1;
191 
192     init_timer(&per_cpu(dbs_timer, dbs_info->cpu), do_dbs_timer,
193         (void *)dbs_info, dbs_info->cpu);
194 
195     set_timer(&per_cpu(dbs_timer, dbs_info->cpu), NOW()+dbs_tuners_ins.sampling_rate);
196 
197     if ( processor_pminfo[dbs_info->cpu]->perf.shared_type
198             == CPUFREQ_SHARED_TYPE_HW )
199     {
200         dbs_info->stoppable = 1;
201     }
202 }
203 
dbs_timer_exit(struct cpu_dbs_info_s * dbs_info)204 static void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
205 {
206     dbs_info->enable = 0;
207     dbs_info->stoppable = 0;
208     kill_timer(&per_cpu(dbs_timer, dbs_info->cpu));
209 }
210 
cpufreq_governor_dbs(struct cpufreq_policy * policy,unsigned int event)211 int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event)
212 {
213     unsigned int cpu = policy->cpu;
214     struct cpu_dbs_info_s *this_dbs_info;
215     unsigned int j;
216 
217     this_dbs_info = &per_cpu(cpu_dbs_info, cpu);
218 
219     switch (event) {
220     case CPUFREQ_GOV_START:
221         if ((!cpu_online(cpu)) || (!policy->cur))
222             return -EINVAL;
223 
224         if (policy->cpuinfo.transition_latency >
225             (TRANSITION_LATENCY_LIMIT * 1000)) {
226             printk(KERN_WARNING "ondemand governor failed to load "
227                 "due to too long transition latency\n");
228             return -EINVAL;
229         }
230         if (this_dbs_info->enable)
231             /* Already enabled */
232             break;
233 
234         dbs_enable++;
235 
236         for_each_cpu(j, policy->cpus) {
237             struct cpu_dbs_info_s *j_dbs_info;
238             j_dbs_info = &per_cpu(cpu_dbs_info, j);
239             j_dbs_info->cur_policy = policy;
240 
241             j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j);
242             j_dbs_info->prev_cpu_wall = NOW();
243         }
244         this_dbs_info->cpu = cpu;
245         /*
246          * Start the timerschedule work, when this governor
247          * is used for first time
248          */
249         if ((dbs_enable == 1) && !dbs_tuners_ins.sampling_rate) {
250             def_sampling_rate = (uint64_t) policy->cpuinfo.transition_latency *
251                 DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
252 
253             if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
254                 def_sampling_rate = MIN_STAT_SAMPLING_RATE;
255 
256             if (!usr_sampling_rate)
257                 dbs_tuners_ins.sampling_rate = def_sampling_rate;
258             else if (usr_sampling_rate < MIN_SAMPLING_RATE) {
259                 printk(KERN_WARNING "cpufreq/ondemand: "
260                        "specified sampling rate too low, using %"PRIu64"\n",
261                        MIN_SAMPLING_RATE);
262                 dbs_tuners_ins.sampling_rate = MIN_SAMPLING_RATE;
263             } else if (usr_sampling_rate > MAX_SAMPLING_RATE) {
264                 printk(KERN_WARNING "cpufreq/ondemand: "
265                        "specified sampling rate too high, using %"PRIu64"\n",
266                        MAX_SAMPLING_RATE);
267                 dbs_tuners_ins.sampling_rate = MAX_SAMPLING_RATE;
268             } else
269                 dbs_tuners_ins.sampling_rate = usr_sampling_rate;
270         }
271         dbs_timer_init(this_dbs_info);
272 
273         break;
274 
275     case CPUFREQ_GOV_STOP:
276         if ( !this_dbs_info->enable )
277             /* Already not enabled */
278             break;
279 
280         dbs_timer_exit(this_dbs_info);
281         dbs_enable--;
282 
283         break;
284 
285     case CPUFREQ_GOV_LIMITS:
286         if ( this_dbs_info->cur_policy == NULL )
287         {
288             printk(KERN_WARNING "CPU%d ondemand governor not started yet,"
289                     "unable to GOV_LIMIT\n", cpu);
290             return -EINVAL;
291         }
292         if (policy->max < this_dbs_info->cur_policy->cur)
293             __cpufreq_driver_target(this_dbs_info->cur_policy,
294                 policy->max, CPUFREQ_RELATION_H);
295         else if (policy->min > this_dbs_info->cur_policy->cur)
296             __cpufreq_driver_target(this_dbs_info->cur_policy,
297                 policy->min, CPUFREQ_RELATION_L);
298         break;
299     }
300     return 0;
301 }
302 
cpufreq_dbs_handle_option(const char * name,const char * val)303 static bool_t __init cpufreq_dbs_handle_option(const char *name, const char *val)
304 {
305     if ( !strcmp(name, "rate") && val )
306     {
307         usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1);
308     }
309     else if ( !strcmp(name, "up_threshold") && val )
310     {
311         unsigned long tmp = simple_strtoul(val, NULL, 0);
312 
313         if ( tmp < MIN_FREQUENCY_UP_THRESHOLD )
314         {
315             printk(XENLOG_WARNING "cpufreq/ondemand: "
316                    "specified threshold too low, using %d\n",
317                    MIN_FREQUENCY_UP_THRESHOLD);
318             tmp = MIN_FREQUENCY_UP_THRESHOLD;
319         }
320         else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD )
321         {
322             printk(XENLOG_WARNING "cpufreq/ondemand: "
323                    "specified threshold too high, using %d\n",
324                    MAX_FREQUENCY_UP_THRESHOLD);
325             tmp = MAX_FREQUENCY_UP_THRESHOLD;
326         }
327         dbs_tuners_ins.up_threshold = tmp;
328     }
329     else if ( !strcmp(name, "bias") && val )
330     {
331         unsigned long tmp = simple_strtoul(val, NULL, 0);
332 
333         if ( tmp > 1000 )
334         {
335             printk(XENLOG_WARNING "cpufreq/ondemand: "
336                    "specified bias too high, using 1000\n");
337             tmp = 1000;
338         }
339         dbs_tuners_ins.powersave_bias = tmp;
340     }
341     else
342         return 0;
343     return 1;
344 }
345 
346 struct cpufreq_governor cpufreq_gov_dbs = {
347     .name = "ondemand",
348     .governor = cpufreq_governor_dbs,
349     .handle_option = cpufreq_dbs_handle_option
350 };
351 
cpufreq_gov_dbs_init(void)352 static int __init cpufreq_gov_dbs_init(void)
353 {
354     return cpufreq_register_governor(&cpufreq_gov_dbs);
355 }
356 __initcall(cpufreq_gov_dbs_init);
357 
cpufreq_dbs_timer_suspend(void)358 void cpufreq_dbs_timer_suspend(void)
359 {
360     int cpu;
361 
362     cpu = smp_processor_id();
363 
364     if ( per_cpu(cpu_dbs_info,cpu).stoppable )
365     {
366         stop_timer( &per_cpu(dbs_timer, cpu) );
367     }
368 }
369 
cpufreq_dbs_timer_resume(void)370 void cpufreq_dbs_timer_resume(void)
371 {
372     int cpu;
373     struct timer* t;
374     s_time_t now;
375 
376     cpu = smp_processor_id();
377 
378     if ( per_cpu(cpu_dbs_info,cpu).stoppable )
379     {
380         now = NOW();
381         t = &per_cpu(dbs_timer, cpu);
382         if (t->expires <= now)
383         {
384             t->function(t->data);
385         }
386         else
387         {
388             set_timer(t, align_timer(now , dbs_tuners_ins.sampling_rate));
389         }
390     }
391 }
392