1 /*
2 * Copyright (C) 2020-2022 Intel Corporation.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7 #include <list.h>
8 #include <asm/per_cpu.h>
9 #include <schedule.h>
10 #include <ticks.h>
11
12 #define BVT_MCU_MS 1U
13 /* context switch allowance */
14 #define BVT_CSA_MCU 5U
15
16 /*
17 * limit the weight range to [1, 128]. It's enough to allocate CPU resources
18 * for different types of vCPUs
19 */
20 #define BVT_WEIGHT_MIN 1U
21 #define BVT_WEIGHT_MAX 128U
22
23 /*
24 * the VT (Virtual Time) ratio is proportional to 1 / weight and making the VT
25 * ratio an integer will ease translation between virtual time and physical
26 * time.
27 * Max (theoretical VT ratio - actual VT ratio) is
28 * 1 (< 1 because of integer round down).
29 * The minimum total VT ratios of VCPUs (at least two) is
30 * 2 * 8 (Min per-vcpu VT ratio)
31 * So the max VT ratio share error is about 1/16.
32 * To reduce it, we can enlarge the BVT_VT_RATIO_MIN.
33 * However increasing VT ratio will reduce the total time needed to overflow
34 * AVT. AVT is of type int64_t. The max VT ratio is 1024. MCU is 1 ms.
35 * So the time to overflow AVT is about:
36 * 2^63 / (1024 * 1000) s, i.e. ~= 9 * 10^12(s) ~= 10^8 day
37 * It's so large that we can ignore the AVT overflow case.
38 */
39 #define BVT_VT_RATIO_MIN 8U
40 #define BVT_VT_RATIO_MAX (BVT_WEIGHT_MAX * BVT_VT_RATIO_MIN / BVT_WEIGHT_MIN)
41
42 struct sched_bvt_data {
43 /* keep list as the first item */
44 struct list_head list;
45 /* minimum charging unit in cycles */
46 uint64_t mcu;
47 /* a thread receives a share of cpu in proportion to its weight */
48 uint8_t weight;
49 /* virtual time advance variable, proportional to 1 / weight */
50 uint64_t vt_ratio;
51 bool warp_on;
52 int32_t warp_value;
53 uint32_t warp_limit;
54 uint32_t unwarp_period;
55 /* actual virtual time in units of mcu */
56 int64_t avt;
57 /* effective virtual time in units of mcu */
58 int64_t evt;
59 uint64_t residual;
60
61 uint64_t start_tsc;
62 };
63
64 /*
65 * @pre obj != NULL
66 * @pre obj->data != NULL
67 */
is_inqueue(struct thread_object * obj)68 static bool is_inqueue(struct thread_object *obj)
69 {
70 struct sched_bvt_data *data = (struct sched_bvt_data *)obj->data;
71 return !list_empty(&data->list);
72 }
73
74 /*
75 * @pre bvt_ctl != NULL
76 */
update_svt(struct sched_bvt_control * bvt_ctl)77 static void update_svt(struct sched_bvt_control *bvt_ctl)
78 {
79 struct sched_bvt_data *obj_data;
80 struct thread_object *tmp_obj;
81
82 if (!list_empty(&bvt_ctl->runqueue)) {
83 tmp_obj = get_first_item(&bvt_ctl->runqueue, struct thread_object, data);
84 obj_data = (struct sched_bvt_data *)tmp_obj->data;
85 bvt_ctl->svt = obj_data->avt;
86 }
87 }
88
89 /*
90 * @pre obj != NULL
91 * @pre obj->data != NULL
92 * @pre obj->sched_ctl != NULL
93 * @pre obj->sched_ctl->priv != NULL
94 */
runqueue_add(struct thread_object * obj)95 static void runqueue_add(struct thread_object *obj)
96 {
97 struct sched_bvt_control *bvt_ctl =
98 (struct sched_bvt_control *)obj->sched_ctl->priv;
99 struct sched_bvt_data *data = (struct sched_bvt_data *)obj->data;
100 struct list_head *pos;
101 struct thread_object *iter_obj;
102 struct sched_bvt_data *iter_data;
103
104 /*
105 * the earliest evt has highest priority,
106 * the runqueue is ordered by priority.
107 */
108
109 if (list_empty(&bvt_ctl->runqueue)) {
110 list_add(&data->list, &bvt_ctl->runqueue);
111 } else {
112 list_for_each(pos, &bvt_ctl->runqueue) {
113 iter_obj = container_of(pos, struct thread_object, data);
114 iter_data = (struct sched_bvt_data *)iter_obj->data;
115 if (iter_data->evt > data->evt) {
116 list_add_node(&data->list, pos->prev, pos);
117 break;
118 }
119 }
120 if (!is_inqueue(obj)) {
121 list_add_tail(&data->list, &bvt_ctl->runqueue);
122 }
123 }
124 }
125
126 /*
127 * @pre obj != NULL
128 * @pre obj->data != NULL
129 */
runqueue_remove(struct thread_object * obj)130 static void runqueue_remove(struct thread_object *obj)
131 {
132 struct sched_bvt_data *data = (struct sched_bvt_data *)obj->data;
133
134 list_del_init(&data->list);
135 }
136
137 /*
138 * @brief Get the SVT (scheduler virtual time) which indicates the
139 * minimum AVT of any runnable threads.
140 * @pre obj != NULL
141 * @pre obj->data != NULL
142 * @pre obj->sched_ctl != NULL
143 * @pre obj->sched_ctl->priv != NULL
144 */
145
get_svt(struct thread_object * obj)146 static int64_t get_svt(struct thread_object *obj)
147 {
148 struct sched_bvt_control *bvt_ctl = (struct sched_bvt_control *)obj->sched_ctl->priv;
149
150 return bvt_ctl->svt;
151 }
152
sched_tick_handler(void * param)153 static void sched_tick_handler(void *param)
154 {
155 struct sched_control *ctl = (struct sched_control *)param;
156 struct sched_bvt_control *bvt_ctl = (struct sched_bvt_control *)ctl->priv;
157 struct thread_object *current;
158 uint16_t pcpu_id = get_pcpu_id();
159 uint64_t rflags;
160
161 obtain_schedule_lock(pcpu_id, &rflags);
162 current = ctl->curr_obj;
163
164 if (current != NULL ) {
165 /* only non-idle thread need to consume run_countdown */
166 if (!is_idle_thread(current)) {
167 make_reschedule_request(pcpu_id);
168 } else {
169 if (!list_empty(&bvt_ctl->runqueue)) {
170 make_reschedule_request(pcpu_id);
171 }
172 }
173 }
174 release_schedule_lock(pcpu_id, rflags);
175 }
176
177 /*
178 *@pre: ctl->pcpu_id == get_pcpu_id()
179 */
sched_bvt_init(struct sched_control * ctl)180 static int sched_bvt_init(struct sched_control *ctl)
181 {
182 struct sched_bvt_control *bvt_ctl = &per_cpu(sched_bvt_ctl, ctl->pcpu_id);
183 int ret = 0;
184
185 ASSERT(ctl->pcpu_id == get_pcpu_id(), "Init scheduler on wrong CPU!");
186
187 ctl->priv = bvt_ctl;
188 INIT_LIST_HEAD(&bvt_ctl->runqueue);
189
190 /* The tick_timer is periodically */
191 initialize_timer(&bvt_ctl->tick_timer, sched_tick_handler, ctl, 0, 0);
192
193 return ret;
194 }
195
sched_bvt_deinit(struct sched_control * ctl)196 static void sched_bvt_deinit(struct sched_control *ctl)
197 {
198 struct sched_bvt_control *bvt_ctl = (struct sched_bvt_control *)ctl->priv;
199 del_timer(&bvt_ctl->tick_timer);
200 }
201
sched_bvt_init_data(struct thread_object * obj,struct sched_params * params)202 static void sched_bvt_init_data(struct thread_object *obj, struct sched_params * params)
203 {
204 struct sched_bvt_data *data;
205
206 data = (struct sched_bvt_data *)obj->data;
207 INIT_LIST_HEAD(&data->list);
208 data->mcu = BVT_MCU_MS * TICKS_PER_MS;
209 data->weight = clamp(params->bvt_weight, BVT_WEIGHT_MIN, BVT_WEIGHT_MAX);
210 data->warp_value = params->bvt_warp_value;
211 data->warp_limit = params->bvt_warp_limit;
212 data->unwarp_period = params->bvt_unwarp_period;
213 data->warp_on = false; /* warp disabled by default */
214 data->vt_ratio = BVT_VT_RATIO_MAX / data->weight;
215 data->residual = 0U;
216 }
217
sched_bvt_suspend(struct sched_control * ctl)218 static void sched_bvt_suspend(struct sched_control *ctl)
219 {
220 sched_bvt_deinit(ctl);
221 }
222
v2p(uint64_t virt_time,uint64_t ratio)223 static uint64_t v2p(uint64_t virt_time, uint64_t ratio)
224 {
225 return (uint64_t)(virt_time / ratio);
226 }
227
p2v(uint64_t phy_time,uint64_t ratio)228 static uint64_t p2v(uint64_t phy_time, uint64_t ratio)
229 {
230 return (uint64_t)(phy_time * ratio);
231 }
232
update_vt(struct thread_object * obj)233 static void update_vt(struct thread_object *obj)
234 {
235 struct sched_bvt_data *data;
236 uint64_t now_tsc = cpu_ticks();
237 uint64_t v_delta, delta_mcu = 0U;
238
239 data = (struct sched_bvt_data *)obj->data;
240
241 /* update current thread's avt and evt */
242 if (now_tsc > data->start_tsc) {
243 v_delta = p2v(now_tsc - data->start_tsc, data->vt_ratio) + data->residual;
244 delta_mcu = (uint64_t)(v_delta / data->mcu);
245 data->residual = v_delta % data->mcu;
246 }
247 data->avt += delta_mcu;
248 /* TODO: evt = avt - (warp ? warpback : 0U) */
249 data->evt = data->avt;
250
251 if (is_inqueue(obj)) {
252 runqueue_remove(obj);
253 runqueue_add(obj);
254 }
255 }
256
sched_bvt_pick_next(struct sched_control * ctl)257 static struct thread_object *sched_bvt_pick_next(struct sched_control *ctl)
258 {
259 struct sched_bvt_control *bvt_ctl = (struct sched_bvt_control *)ctl->priv;
260 struct thread_object *first_obj = NULL, *second_obj = NULL;
261 struct sched_bvt_data *first_data = NULL, *second_data = NULL;
262 struct list_head *first, *sec;
263 struct thread_object *next = NULL;
264 struct thread_object *current = ctl->curr_obj;
265 uint64_t now_tsc = cpu_ticks();
266 uint64_t delta_mcu = 0U;
267 uint64_t tick_period = BVT_MCU_MS * TICKS_PER_MS;
268 uint64_t run_countdown;
269
270 if (!is_idle_thread(current)) {
271 update_vt(current);
272 }
273 /* always align the svt with the avt of the first thread object in runqueue.*/
274 update_svt(bvt_ctl);
275
276 del_timer(&bvt_ctl->tick_timer);
277
278 if (!list_empty(&bvt_ctl->runqueue)) {
279 first = bvt_ctl->runqueue.next;
280 sec = (first->next == &bvt_ctl->runqueue) ? NULL : first->next;
281
282 first_obj = container_of(first, struct thread_object, data);
283 first_data = (struct sched_bvt_data *)first_obj->data;
284
285 /* The run_countdown is used to describe how may mcu the next thread
286 * can run for. A one-shot timer is set to expire at
287 * current time + run_countdown. The next thread can run until the
288 * timer interrupts. But when there is only one object
289 * in runqueue, it can run forever. so, no timer is set.
290 */
291 if (sec != NULL) {
292 second_obj = container_of(sec, struct thread_object, data);
293 second_data = (struct sched_bvt_data *)second_obj->data;
294 delta_mcu = second_data->evt - first_data->evt;
295 run_countdown = v2p(delta_mcu, first_data->vt_ratio) + BVT_CSA_MCU;
296 } else {
297 run_countdown = UINT64_MAX;
298 }
299 first_data->start_tsc = now_tsc;
300 next = first_obj;
301 if (run_countdown != UINT64_MAX) {
302 update_timer(&bvt_ctl->tick_timer, cpu_ticks() + run_countdown * tick_period, 0);
303 (void)add_timer(&bvt_ctl->tick_timer);
304 }
305 } else {
306 next = &get_cpu_var(idle);
307 }
308
309 return next;
310 }
311
sched_bvt_sleep(struct thread_object * obj)312 static void sched_bvt_sleep(struct thread_object *obj)
313 {
314 runqueue_remove(obj);
315 }
316
sched_bvt_wake(struct thread_object * obj)317 static void sched_bvt_wake(struct thread_object *obj)
318 {
319 struct sched_bvt_data *data;
320 int64_t svt, threshold;
321
322 data = (struct sched_bvt_data *)obj->data;
323 svt = get_svt(obj);
324 threshold = svt - BVT_CSA_MCU;
325 /* adjusting AVT for a thread after a long sleep */
326 data->avt = (data->avt > threshold) ? data->avt : svt;
327 /* TODO: evt = avt - (warp ? warpback : 0U) */
328 data->evt = data->avt;
329 /* add to runqueue in order */
330 runqueue_add(obj);
331
332 }
333
334 struct acrn_scheduler sched_bvt = {
335 .name = "sched_bvt",
336 .init = sched_bvt_init,
337 .init_data = sched_bvt_init_data,
338 .pick_next = sched_bvt_pick_next,
339 .sleep = sched_bvt_sleep,
340 .wake = sched_bvt_wake,
341 .deinit = sched_bvt_deinit,
342 /* Now suspend is just to do del_timer and add_timer will be delayed to
343 * shedule after resume.
344 * So no need to add .resume now.
345 */
346 .suspend = sched_bvt_suspend,
347 };
348