1 /*
2  * Copyright (C) 2020-2022 Intel Corporation.
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  */
6 
7 #include <list.h>
8 #include <asm/per_cpu.h>
9 #include <schedule.h>
10 #include <ticks.h>
11 
12 #define BVT_MCU_MS		1U
13 /* context switch allowance */
14 #define BVT_CSA_MCU		5U
15 
16 /*
17  * limit the weight range to [1, 128]. It's enough to allocate CPU resources
18  * for different types of vCPUs
19  */
20 #define BVT_WEIGHT_MIN		1U
21 #define BVT_WEIGHT_MAX		128U
22 
23 /*
24  * the VT (Virtual Time) ratio is proportional to 1 / weight and making the VT
25  * ratio an integer will ease translation between virtual time and physical
26  * time.
27  * Max (theoretical VT ratio - actual VT ratio) is
28  *   1 (< 1 because of integer round down).
29  * The minimum total VT ratios of VCPUs (at least two) is
30  *   2 * 8 (Min per-vcpu VT ratio)
31  * So the max VT ratio share error is about 1/16.
32  * To reduce it, we can enlarge the BVT_VT_RATIO_MIN.
33  * However increasing VT ratio will reduce the total time needed to overflow
34  * AVT. AVT is of type int64_t. The max VT ratio is 1024. MCU is 1 ms.
35  * So the time to overflow AVT is about:
36  *   2^63  / (1024 * 1000) s, i.e. ~= 9 * 10^12(s) ~= 10^8 day
37  * It's so large that we can ignore the AVT overflow case.
38  */
39 #define BVT_VT_RATIO_MIN	8U
40 #define BVT_VT_RATIO_MAX	(BVT_WEIGHT_MAX * BVT_VT_RATIO_MIN / BVT_WEIGHT_MIN)
41 
42 struct sched_bvt_data {
43 	/* keep list as the first item */
44 	struct list_head list;
45 	/* minimum charging unit in cycles */
46 	uint64_t mcu;
47 	/* a thread receives a share of cpu in proportion to its weight */
48 	uint8_t weight;
49 	/* virtual time advance variable, proportional to 1 / weight */
50 	uint64_t vt_ratio;
51 	bool warp_on;
52 	int32_t warp_value;
53 	uint32_t warp_limit;
54 	uint32_t unwarp_period;
55 	/* actual virtual time in units of mcu */
56 	int64_t avt;
57 	/* effective virtual time in units of mcu */
58 	int64_t evt;
59 	uint64_t residual;
60 
61 	uint64_t start_tsc;
62 };
63 
64 /*
65  * @pre obj != NULL
66  * @pre obj->data != NULL
67  */
is_inqueue(struct thread_object * obj)68 static bool is_inqueue(struct thread_object *obj)
69 {
70 	struct sched_bvt_data *data = (struct sched_bvt_data *)obj->data;
71 	return !list_empty(&data->list);
72 }
73 
74 /*
75  * @pre bvt_ctl != NULL
76  */
update_svt(struct sched_bvt_control * bvt_ctl)77 static void update_svt(struct sched_bvt_control *bvt_ctl)
78 {
79 	struct sched_bvt_data *obj_data;
80 	struct thread_object *tmp_obj;
81 
82 	if (!list_empty(&bvt_ctl->runqueue)) {
83 		tmp_obj = get_first_item(&bvt_ctl->runqueue, struct thread_object, data);
84 		obj_data = (struct sched_bvt_data *)tmp_obj->data;
85 		bvt_ctl->svt = obj_data->avt;
86 	}
87 }
88 
89 /*
90  * @pre obj != NULL
91  * @pre obj->data != NULL
92  * @pre obj->sched_ctl != NULL
93  * @pre obj->sched_ctl->priv != NULL
94  */
runqueue_add(struct thread_object * obj)95 static void runqueue_add(struct thread_object *obj)
96 {
97 	struct sched_bvt_control *bvt_ctl =
98 		(struct sched_bvt_control *)obj->sched_ctl->priv;
99 	struct sched_bvt_data *data = (struct sched_bvt_data *)obj->data;
100 	struct list_head *pos;
101 	struct thread_object *iter_obj;
102 	struct sched_bvt_data *iter_data;
103 
104 	/*
105 	 * the earliest evt has highest priority,
106 	 * the runqueue is ordered by priority.
107 	 */
108 
109 	if (list_empty(&bvt_ctl->runqueue)) {
110 		list_add(&data->list, &bvt_ctl->runqueue);
111 	} else {
112 		list_for_each(pos, &bvt_ctl->runqueue) {
113 			iter_obj = container_of(pos, struct thread_object, data);
114 			iter_data = (struct sched_bvt_data *)iter_obj->data;
115 			if (iter_data->evt > data->evt) {
116 				list_add_node(&data->list, pos->prev, pos);
117 				break;
118 			}
119 		}
120 		if (!is_inqueue(obj)) {
121 			list_add_tail(&data->list, &bvt_ctl->runqueue);
122 		}
123 	}
124 }
125 
126 /*
127  * @pre obj != NULL
128  * @pre obj->data != NULL
129  */
runqueue_remove(struct thread_object * obj)130 static void runqueue_remove(struct thread_object *obj)
131 {
132 	struct sched_bvt_data *data = (struct sched_bvt_data *)obj->data;
133 
134 	list_del_init(&data->list);
135 }
136 
137 /*
138  * @brief Get the SVT (scheduler virtual time) which indicates the
139  * minimum AVT of any runnable threads.
140  * @pre obj != NULL
141  * @pre obj->data != NULL
142  * @pre obj->sched_ctl != NULL
143  * @pre obj->sched_ctl->priv != NULL
144  */
145 
get_svt(struct thread_object * obj)146 static int64_t get_svt(struct thread_object *obj)
147 {
148 	struct sched_bvt_control *bvt_ctl = (struct sched_bvt_control *)obj->sched_ctl->priv;
149 
150 	return bvt_ctl->svt;
151 }
152 
sched_tick_handler(void * param)153 static void sched_tick_handler(void *param)
154 {
155 	struct sched_control  *ctl = (struct sched_control *)param;
156 	struct sched_bvt_control *bvt_ctl = (struct sched_bvt_control *)ctl->priv;
157 	struct thread_object *current;
158 	uint16_t pcpu_id = get_pcpu_id();
159 	uint64_t rflags;
160 
161 	obtain_schedule_lock(pcpu_id, &rflags);
162 	current = ctl->curr_obj;
163 
164 	if (current != NULL ) {
165 		/* only non-idle thread need to consume run_countdown */
166 		if (!is_idle_thread(current)) {
167 			make_reschedule_request(pcpu_id);
168 		} else {
169 			if (!list_empty(&bvt_ctl->runqueue)) {
170 				make_reschedule_request(pcpu_id);
171 			}
172 		}
173 	}
174 	release_schedule_lock(pcpu_id, rflags);
175 }
176 
177 /*
178  *@pre: ctl->pcpu_id == get_pcpu_id()
179  */
sched_bvt_init(struct sched_control * ctl)180 static int sched_bvt_init(struct sched_control *ctl)
181 {
182 	struct sched_bvt_control *bvt_ctl = &per_cpu(sched_bvt_ctl, ctl->pcpu_id);
183 	int ret = 0;
184 
185 	ASSERT(ctl->pcpu_id == get_pcpu_id(), "Init scheduler on wrong CPU!");
186 
187 	ctl->priv = bvt_ctl;
188 	INIT_LIST_HEAD(&bvt_ctl->runqueue);
189 
190 	/* The tick_timer is periodically */
191 	initialize_timer(&bvt_ctl->tick_timer, sched_tick_handler, ctl, 0, 0);
192 
193 	return ret;
194 }
195 
sched_bvt_deinit(struct sched_control * ctl)196 static void sched_bvt_deinit(struct sched_control *ctl)
197 {
198 	struct sched_bvt_control *bvt_ctl = (struct sched_bvt_control *)ctl->priv;
199 	del_timer(&bvt_ctl->tick_timer);
200 }
201 
sched_bvt_init_data(struct thread_object * obj,struct sched_params * params)202 static void sched_bvt_init_data(struct thread_object *obj, struct sched_params * params)
203 {
204 	struct sched_bvt_data *data;
205 
206 	data = (struct sched_bvt_data *)obj->data;
207 	INIT_LIST_HEAD(&data->list);
208 	data->mcu = BVT_MCU_MS * TICKS_PER_MS;
209 	data->weight = clamp(params->bvt_weight, BVT_WEIGHT_MIN, BVT_WEIGHT_MAX);
210 	data->warp_value = params->bvt_warp_value;
211 	data->warp_limit = params->bvt_warp_limit;
212 	data->unwarp_period = params->bvt_unwarp_period;
213 	data->warp_on = false;	/* warp disabled by default */
214 	data->vt_ratio = BVT_VT_RATIO_MAX / data->weight;
215 	data->residual = 0U;
216 }
217 
sched_bvt_suspend(struct sched_control * ctl)218 static void sched_bvt_suspend(struct sched_control *ctl)
219 {
220 	sched_bvt_deinit(ctl);
221 }
222 
v2p(uint64_t virt_time,uint64_t ratio)223 static uint64_t v2p(uint64_t virt_time, uint64_t ratio)
224 {
225 	return (uint64_t)(virt_time / ratio);
226 }
227 
p2v(uint64_t phy_time,uint64_t ratio)228 static uint64_t p2v(uint64_t phy_time, uint64_t ratio)
229 {
230 	return (uint64_t)(phy_time * ratio);
231 }
232 
update_vt(struct thread_object * obj)233 static void update_vt(struct thread_object *obj)
234 {
235 	struct sched_bvt_data *data;
236 	uint64_t now_tsc = cpu_ticks();
237 	uint64_t v_delta, delta_mcu = 0U;
238 
239 	data = (struct sched_bvt_data *)obj->data;
240 
241 	/* update current thread's avt and evt */
242 	if (now_tsc > data->start_tsc) {
243 		v_delta = p2v(now_tsc - data->start_tsc, data->vt_ratio) + data->residual;
244 		delta_mcu = (uint64_t)(v_delta / data->mcu);
245 		data->residual = v_delta % data->mcu;
246 	}
247 	data->avt += delta_mcu;
248 	/* TODO: evt = avt - (warp ? warpback : 0U) */
249 	data->evt = data->avt;
250 
251 	if (is_inqueue(obj)) {
252 		runqueue_remove(obj);
253 		runqueue_add(obj);
254 	}
255 }
256 
sched_bvt_pick_next(struct sched_control * ctl)257 static struct thread_object *sched_bvt_pick_next(struct sched_control *ctl)
258 {
259 	struct sched_bvt_control *bvt_ctl = (struct sched_bvt_control *)ctl->priv;
260 	struct thread_object *first_obj = NULL, *second_obj = NULL;
261 	struct sched_bvt_data *first_data = NULL, *second_data = NULL;
262 	struct list_head *first, *sec;
263 	struct thread_object *next = NULL;
264 	struct thread_object *current = ctl->curr_obj;
265 	uint64_t now_tsc = cpu_ticks();
266 	uint64_t delta_mcu = 0U;
267 	uint64_t tick_period = BVT_MCU_MS * TICKS_PER_MS;
268 	uint64_t run_countdown;
269 
270 	if (!is_idle_thread(current)) {
271 		update_vt(current);
272 	}
273 	/* always align the svt with the avt of the first thread object in runqueue.*/
274 	update_svt(bvt_ctl);
275 
276 	del_timer(&bvt_ctl->tick_timer);
277 
278 	if (!list_empty(&bvt_ctl->runqueue)) {
279 		first = bvt_ctl->runqueue.next;
280 		sec = (first->next == &bvt_ctl->runqueue) ? NULL : first->next;
281 
282 		first_obj = container_of(first, struct thread_object, data);
283 		first_data = (struct sched_bvt_data *)first_obj->data;
284 
285 		/* The run_countdown is used to describe how may mcu the next thread
286 		 * can run for. A one-shot timer is set to expire at
287 		 * current time + run_countdown. The next thread can run until the
288 		 * timer interrupts. But when there is only one object
289 		 * in runqueue, it can run forever. so, no timer is set.
290 		 */
291 		if (sec != NULL) {
292 			second_obj = container_of(sec, struct thread_object, data);
293 			second_data = (struct sched_bvt_data *)second_obj->data;
294 			delta_mcu = second_data->evt - first_data->evt;
295 			run_countdown = v2p(delta_mcu, first_data->vt_ratio) + BVT_CSA_MCU;
296 		} else {
297 			run_countdown = UINT64_MAX;
298 		}
299 		first_data->start_tsc = now_tsc;
300 		next = first_obj;
301 		if (run_countdown != UINT64_MAX) {
302 			update_timer(&bvt_ctl->tick_timer, cpu_ticks() + run_countdown * tick_period, 0);
303 			(void)add_timer(&bvt_ctl->tick_timer);
304 		}
305 	} else {
306 		next = &get_cpu_var(idle);
307 	}
308 
309 	return next;
310 }
311 
sched_bvt_sleep(struct thread_object * obj)312 static void sched_bvt_sleep(struct thread_object *obj)
313 {
314 	runqueue_remove(obj);
315 }
316 
sched_bvt_wake(struct thread_object * obj)317 static void sched_bvt_wake(struct thread_object *obj)
318 {
319 	struct sched_bvt_data *data;
320 	int64_t svt, threshold;
321 
322 	data = (struct sched_bvt_data *)obj->data;
323 	svt = get_svt(obj);
324 	threshold = svt - BVT_CSA_MCU;
325 	/* adjusting AVT for a thread after a long sleep */
326 	data->avt = (data->avt > threshold) ? data->avt : svt;
327 	/* TODO: evt = avt - (warp ? warpback : 0U) */
328 	data->evt = data->avt;
329 	/* add to runqueue in order */
330 	runqueue_add(obj);
331 
332 }
333 
334 struct acrn_scheduler sched_bvt = {
335 	.name		= "sched_bvt",
336 	.init		= sched_bvt_init,
337 	.init_data	= sched_bvt_init_data,
338 	.pick_next	= sched_bvt_pick_next,
339 	.sleep		= sched_bvt_sleep,
340 	.wake		= sched_bvt_wake,
341 	.deinit		= sched_bvt_deinit,
342 	/* Now suspend is just to do del_timer and add_timer will be delayed to
343 	 * shedule after resume.
344 	 * So no need to add .resume now.
345 	 */
346 	.suspend	= sched_bvt_suspend,
347 };
348