1 /*
2  * Project Acrn
3  * Acrn-dm-monitor
4  *
5  * Copyright (C) 2018-2022 Intel Corporation.
6  *
7  * SPDX-License-Identifier: BSD-3-Clause
8  *
9  *
10  * Author: TaoYuhong <yuhong.tao@intel.com>
11  */
12 
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <sys/stat.h>
17 #include <sys/queue.h>
18 #include <unistd.h>
19 #include <pthread.h>
20 #include "dm.h"
21 #include "dm_string.h"
22 #include "monitor.h"
23 #include "acrn_mngr.h"
24 #include "pm.h"
25 #include "vmmapi.h"
26 #include "log.h"
27 
28 #define INTR_STORM_MONITOR_PERIOD	10 /* 10 seconds */
29 #define INTR_STORM_THRESHOLD	100000 /* 10K times per second */
30 
31 #define DELAY_INTR_TIME	1 /* 1ms */
32 #define DELAY_DURATION	100000 /* 100ms of total duration for delay intr */
33 #define TIME_TO_CHECK_AGAIN	2 /* 2seconds */
34 
35 struct intr_monitor_setting_t {
36 	bool enable;
37 	uint32_t threshold;    /* intr count in probe_period when intr storm happens */
38 	uint32_t probe_period;  /* seconds: the period to probe intr data */
39 	uint32_t delay_time;      /* ms: the time to delay each intr injection */
40 	uint32_t delay_duration;  /* us: the delay duration, after it, intr injection restore to normal */
41 };
42 
43 union intr_monitor_t {
44 	struct acrn_intr_monitor monitor;
45 	char reserved[4096];
46 } __aligned(4096);
47 
48 static union intr_monitor_t intr_data;
49 static uint64_t intr_cnt_buf[MAX_PTDEV_NUM * 2];
50 static pthread_t intr_storm_monitor_pid;
51 
52 static struct intr_monitor_setting_t intr_monitor_setting = {
53 	.enable = false,
54 };
55 
56 /* switch macro, just open in debug */
57 /* #define INTR_MONITOR_DBG */
58 
59 #ifdef INTR_MONITOR_DBG
60 static FILE * dbg_file;
61 #define DPRINTF(format, args...) \
62 do { fprintf(dbg_file, format, args); fflush(dbg_file); } while (0)
63 
64 /* this is a debug function */
write_intr_data_to_file(const struct acrn_intr_monitor * hdr)65 static void write_intr_data_to_file(const struct acrn_intr_monitor *hdr)
66 {
67 	static int wr_cnt;
68 	int j;
69 
70 	wr_cnt++;
71 	fprintf(dbg_file, "\n==%d time devs=%d==\n", wr_cnt, hdr->buf_cnt / 2);
72 	fprintf(dbg_file, "IRQ\t\tCount\n");
73 
74 	for (j = 0; j < hdr->buf_cnt; j += 2) {
75 		if (hdr->buffer[j + 1] != 0) {
76 			fprintf(dbg_file, "%ld\t\t%ld\n", hdr->buffer[j], hdr->buffer[j + 1]);
77 		}
78 	}
79 
80 	fflush(dbg_file);
81 }
82 #else
83 #define DPRINTF(format, arg...)
84 #endif
85 
intr_storm_monitor_thread(void * arg)86 static void *intr_storm_monitor_thread(void *arg)
87 {
88 	struct vmctx *ctx = (struct vmctx *)arg;
89 	struct acrn_intr_monitor *hdr = &intr_data.monitor;
90 	uint64_t delta = 0UL;
91 	int ret, i;
92 
93 #ifdef INTR_MONITOR_DBG
94 	dbg_file = fopen("/tmp/intr_log", "w+");
95 #endif
96 	sleep(intr_monitor_setting.probe_period);
97 
98 	/* first to get interrupt data */
99 	hdr->cmd = INTR_CMD_GET_DATA;
100 	hdr->buf_cnt = MAX_PTDEV_NUM * 2;
101 	memset(hdr->buffer, 0, sizeof(uint64_t) * hdr->buf_cnt);
102 
103 	ret = vm_intr_monitor(ctx, hdr);
104 	if (ret) {
105 		DPRINTF("first get intr data failed, ret: %d\n", ret);
106 		intr_storm_monitor_pid = 0;
107 		return NULL;
108 	}
109 
110 	while (1) {
111 #ifdef INTR_MONITOR_DBG
112 		write_intr_data_to_file(hdr);
113 #endif
114 		memcpy(intr_cnt_buf, hdr->buffer, sizeof(uint64_t) * hdr->buf_cnt);
115 		sleep(intr_monitor_setting.probe_period);
116 
117 		/* next time to get interrupt data */
118 		memset(hdr->buffer, 0, sizeof(uint64_t) * hdr->buf_cnt);
119 		ret = vm_intr_monitor(ctx, hdr);
120 		if (ret) {
121 			pr_err("next get intr data failed, ret: %d\n", ret);
122 			intr_storm_monitor_pid = 0;
123 			break;
124 		}
125 
126 		/*
127 		 * calculate the delta of the two times count of interrupt;
128 		 * compare the IRQ number first, if not same just drop it,
129 		 * for it just happens rarely when devices dynamically
130 		 * allocation in Service VM or User VM, it can be calculated next time
131 		 */
132 		for (i = 0; i < hdr->buf_cnt; i += 2) {
133 			if (hdr->buffer[i] != intr_cnt_buf[i])
134 				continue;
135 
136 			/* avoid delta overflow */
137 			if (hdr->buffer[i + 1] < intr_cnt_buf[i + 1])
138 				continue;
139 
140 			delta = hdr->buffer[i + 1] - intr_cnt_buf[i + 1];
141 			if (delta > intr_monitor_setting.threshold) {
142 #ifdef INTR_MONITOR_DBG
143 				write_intr_data_to_file(hdr);
144 #endif
145 				break;
146 			}
147 		}
148 
149 		/* storm detected, handle the intr abnormal status */
150 		if (i < hdr->buf_cnt) {
151 			pr_notice("irq=%ld, delta=%ld\n", intr_cnt_buf[i], delta);
152 
153 			hdr->cmd = INTR_CMD_DELAY_INT;
154 			hdr->buffer[0] = intr_monitor_setting.delay_time;
155 			vm_intr_monitor(ctx, hdr);
156 			usleep(intr_monitor_setting.delay_duration); /* sleep-delay intr */
157 			hdr->buffer[0] = 0; /* cancel to delay intr */
158 			vm_intr_monitor(ctx, hdr);
159 
160 			sleep(TIME_TO_CHECK_AGAIN); /* time to get data again */
161 			hdr->cmd = INTR_CMD_GET_DATA;
162 			hdr->buf_cnt = MAX_PTDEV_NUM * 2;
163 			memset(hdr->buffer, 0, sizeof(uint64_t) * hdr->buf_cnt);
164 			vm_intr_monitor(ctx, hdr);
165 		}
166 	}
167 
168 	return NULL;
169 }
170 
start_intr_storm_monitor(struct vmctx * ctx)171 static void start_intr_storm_monitor(struct vmctx *ctx)
172 {
173 	if (intr_monitor_setting.enable) {
174 		int ret = pthread_create(&intr_storm_monitor_pid, NULL, intr_storm_monitor_thread, ctx);
175 		if (ret) {
176 			pr_err("failed %s %d\n", __func__, __LINE__);
177 			intr_storm_monitor_pid = 0;
178 		}
179 		pthread_setname_np(intr_storm_monitor_pid, "storm_monitor");
180 
181 		pr_info("start monitor interrupt data...\n");
182 	}
183 }
184 
stop_intr_storm_monitor(void)185 static void stop_intr_storm_monitor(void)
186 {
187 	if (intr_storm_monitor_pid) {
188 		void *ret;
189 
190 		pthread_cancel(intr_storm_monitor_pid);
191 		pthread_join(intr_storm_monitor_pid, &ret);
192 		intr_storm_monitor_pid = 0;
193 	}
194 }
195 
196 /*
197 .* interrupt monitor setting params, current interrupt mitigation will delay User VM's
198 .* pass-through devices' interrupt injection, the settings input from acrn-dm:
199 .* params:
200 .* threshold: each intr count/second when intr storm happens;
201 .* probe_period: seconds -- the period to probe intr data;
202 .* delay_time: ms -- the time to delay each intr injection;
203  * delay_duration; us -- the delay duration, after it, intr injection restore to normal
204 .*/
acrn_parse_intr_monitor(const char * opt)205 int acrn_parse_intr_monitor(const char *opt)
206 {
207 	uint32_t threshold, period, delay, duration;
208 	char *cp;
209 
210 	if((!dm_strtoui(opt, &cp, 10, &threshold) && *cp == ',') &&
211 		(!dm_strtoui(cp + 1, &cp, 10, &period) && *cp == ',') &&
212 		(!dm_strtoui(cp + 1, &cp, 10, &delay) && *cp == ',') &&
213 		(!dm_strtoui(cp + 1, &cp, 10, &duration))) {
214 		pr_dbg("interrupt storm monitor params: %d, %d, %d, %d\n", threshold, period, delay, duration);
215 	} else {
216 		pr_err("%s: not correct, it should be like: --intr_monitor 10000,10,1,100, please check!\n", opt);
217 		return -1;
218 	}
219 
220 	intr_monitor_setting.enable = true;
221 	intr_monitor_setting.threshold = threshold * period;
222 	intr_monitor_setting.probe_period = period;
223 	intr_monitor_setting.delay_time = delay;
224 	intr_monitor_setting.delay_duration = duration * 1000;
225 
226 	return 0;
227 }
228 
229 struct vm_ops {
230 	char name[16];
231 	void *arg;
232 	struct monitor_vm_ops *ops;
233 	LIST_ENTRY(vm_ops) list;
234 };
235 
236 static unsigned wakeup_reason = 0;
237 
get_wakeup_reason(void)238 unsigned get_wakeup_reason(void)
239 {
240 	return wakeup_reason;
241 }
242 
set_wakeup_timer(time_t t)243 int set_wakeup_timer(time_t t)
244 {
245 	int acrnd_fd;
246 	struct mngr_msg req;
247 	struct mngr_msg ack;
248 	int ret;
249 
250 	acrnd_fd = mngr_open_un("acrnd", MNGR_CLIENT);
251 	if (acrnd_fd < 0) {
252 		return -1;
253 	}
254 
255 	req.magic = MNGR_MSG_MAGIC;
256 	req.msgid = ACRND_TIMER;
257 	req.timestamp = time(NULL);
258 
259 	req.data.rtc_timer.t = t;
260 	strncpy(req.data.rtc_timer.vmname, vmname,
261 			sizeof(req.data.rtc_timer.vmname));
262 
263 	memset(&ack, 0, sizeof(struct mngr_msg));
264 	ret = mngr_send_msg(acrnd_fd, &req, &ack, 2);
265 	mngr_close(acrnd_fd);
266 	if (ret != sizeof(ack)) {
267 		pr_err("%s %d\r\n", __func__, __LINE__);
268 		return -1;
269 	}
270 
271 	return ack.data.err;
272 }
273 
274 static LIST_HEAD(vm_ops_list, vm_ops) vm_ops_head;
275 static pthread_mutex_t vm_ops_mtx = PTHREAD_MUTEX_INITIALIZER;
276 
monitor_register_vm_ops(struct monitor_vm_ops * mops,void * arg,const char * name)277 int monitor_register_vm_ops(struct monitor_vm_ops *mops, void *arg,
278 			    const char *name)
279 {
280 	struct vm_ops *ops;
281 
282 	if (!mops) {
283 		pr_err("%s %d\r\n", __func__, __LINE__);
284 		return -1;
285 	}
286 
287 	ops = calloc(1, sizeof(*ops));
288 	if (!ops) {
289 		pr_err("Alloc ops");
290 		return -1;
291 	}
292 
293 	if (name)
294 		strncpy(ops->name, name, sizeof(ops->name) - 1);
295 
296 	ops->ops = mops;
297 	ops->arg = arg;
298 
299 	pthread_mutex_lock(&vm_ops_mtx);
300 	LIST_INSERT_HEAD(&vm_ops_head, ops, list);
301 	pthread_mutex_unlock(&vm_ops_mtx);
302 
303 	return 0;
304 }
305 
306 static int monitor_fd = -1;
307 
308 /* handlers */
309 #define ACK_TIMEOUT	1
310 
311 #define DEFINE_HANDLER(name, func)				\
312 static void name(struct mngr_msg *msg, int client_fd, void *param)	\
313 {									\
314 	struct mngr_msg ack;					\
315 	struct vm_ops *ops;					\
316 								\
317 	int ret = 0;						\
318 	int count = 0;						\
319 								\
320 	ack.magic = MNGR_MSG_MAGIC;				\
321 	ack.msgid = msg->msgid;					\
322 	ack.timestamp = msg->timestamp;				\
323 								\
324 	LIST_FOREACH(ops, &vm_ops_head, list) {			\
325 		if (ops->ops->func) {				\
326 			ret += ops->ops->func(ops->arg);	\
327 			count++;				\
328 		}						\
329 	}							\
330 								\
331 	if (!count) {						\
332 		ack.data.err = -1;					\
333 		pr_err("No handler for id:%u\r\n", msg->msgid);	\
334 	} else									\
335 		ack.data.err = ret;							\
336 										\
337 	mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT);		\
338 }
339 
340 DEFINE_HANDLER(handle_suspend, suspend);
341 
handle_stop(struct mngr_msg * msg,int client_fd,void * param)342 static void handle_stop(struct mngr_msg *msg, int client_fd, void *param)
343 {
344 	struct mngr_msg ack;
345 	struct vm_ops *ops;
346 	int ret = 0;
347 	int count = 0;
348 
349 	ack.magic = MNGR_MSG_MAGIC;
350 	ack.msgid = msg->msgid;
351 	ack.timestamp = msg->timestamp;
352 
353 	if (msg->data.acrnd_stop.force) {
354 		pr_info("%s: setting VM state to %s\n", __func__, vm_state_to_str(VM_SUSPEND_POWEROFF));
355 		vm_set_suspend_mode(VM_SUSPEND_POWEROFF);
356 		ack.data.err = 0;
357 	} else {
358 		LIST_FOREACH(ops, &vm_ops_head, list) {
359 			if (ops->ops->stop) {
360 				ret += ops->ops->stop(ops->arg);
361 				count++;
362 			}
363 		}
364 
365 		if (!count) {
366 			ack.data.err = -1;
367 			pr_err("No handler for id:%u\r\n", msg->msgid);
368 		} else
369 			ack.data.err = ret;
370 	}
371 
372 	mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT);
373 }
374 
handle_resume(struct mngr_msg * msg,int client_fd,void * param)375 static void handle_resume(struct mngr_msg *msg, int client_fd, void *param)
376 {
377 	struct mngr_msg ack;
378 	struct vm_ops *ops;
379 	int ret = 0;
380 	int count = 0;
381 
382 	ack.magic = MNGR_MSG_MAGIC;
383 	ack.msgid = msg->msgid;
384 	ack.timestamp = msg->timestamp;
385 
386 	wakeup_reason = msg->data.reason;
387 
388 	LIST_FOREACH(ops, &vm_ops_head, list) {
389 		if (ops->ops->resume) {
390 			ret += ops->ops->resume(ops->arg);
391 			count++;
392 		}
393 	}
394 
395 	if (!count) {
396 		ack.data.err = -1;
397 		pr_err("No handler for id:%u\r\n", msg->msgid);
398 	} else
399 		ack.data.err = ret;
400 
401 	mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT);
402 }
403 
handle_query(struct mngr_msg * msg,int client_fd,void * param)404 static void handle_query(struct mngr_msg *msg, int client_fd, void *param)
405 {
406 	struct mngr_msg ack;
407 	struct vm_ops *ops;
408 
409 	ack.magic = MNGR_MSG_MAGIC;
410 	ack.msgid = msg->msgid;
411 	ack.timestamp = msg->timestamp;
412 	ack.data.state = -1;
413 
414 	LIST_FOREACH(ops, &vm_ops_head, list) {
415 		if (ops->ops->query) {
416 			ack.data.state = ops->ops->query(ops->arg);
417 			break;
418 		}
419 	}
420 
421 	mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT);
422 }
423 
handle_blkrescan(struct mngr_msg * msg,int client_fd,void * param)424 static void handle_blkrescan(struct mngr_msg *msg, int client_fd, void *param)
425 {
426 	struct mngr_msg ack;
427 	struct vm_ops *ops;
428 	int ret = 0;
429 	int count = 0;
430 
431 	ack.magic = MNGR_MSG_MAGIC;
432 	ack.msgid = msg->msgid;
433 	ack.timestamp = msg->timestamp;
434 
435 	wakeup_reason = msg->data.reason;
436 
437 	LIST_FOREACH(ops, &vm_ops_head, list) {
438 		if (ops->ops->rescan) {
439 			ret += ops->ops->rescan(ops->arg, msg->data.devargs);
440 			count++;
441 		}
442 	}
443 
444 	if (!count) {
445 		ack.data.err = -1;
446 		pr_err("No handler for id:%u\r\n", msg->msgid);
447 	} else
448 		ack.data.err = ret;
449 
450 	mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT);
451 }
452 
453 static struct monitor_vm_ops pmc_ops = {
454 	.stop       = NULL,
455 	.resume     = vm_monitor_resume,
456 	.suspend    = NULL,
457 	.pause      = NULL,
458 	.unpause    = NULL,
459 	.query      = vm_monitor_query,
460 };
461 
monitor_init(struct vmctx * ctx)462 int monitor_init(struct vmctx *ctx)
463 {
464 	int ret;
465 	char path[128] = {};
466 
467 	ret = check_dir(ACRN_DM_BASE_PATH, CHK_CREAT);
468 	if (ret) {
469 		pr_err("%s %d\r\n", __func__, __LINE__);
470 		goto dir_err;
471 	}
472 
473 	ret = check_dir(ACRN_DM_SOCK_PATH, CHK_CREAT);
474 	if (ret) {
475 		pr_err("%s %d\r\n", __func__, __LINE__);
476 		goto dir_err;
477 	}
478 
479 	snprintf(path, sizeof(path) - 1, "%s.monitor", vmname);
480 
481 	monitor_fd = mngr_open_un(path, MNGR_SERVER);
482 	if (monitor_fd < 0) {
483 		pr_err("%s %d\r\n", __func__, __LINE__);
484 		goto server_err;
485 	}
486 
487 	ret = 0;
488 	ret += mngr_add_handler(monitor_fd, DM_STOP, handle_stop, NULL);
489 	ret += mngr_add_handler(monitor_fd, DM_SUSPEND, handle_suspend, NULL);
490 	ret += mngr_add_handler(monitor_fd, DM_RESUME, handle_resume, NULL);
491 	ret += mngr_add_handler(monitor_fd, DM_QUERY, handle_query, NULL);
492 	ret += mngr_add_handler(monitor_fd, DM_BLKRESCAN, handle_blkrescan, NULL);
493 
494 	if (ret) {
495 		pr_err("%s %d\r\n", __func__, __LINE__);
496 		goto handlers_err;
497 	}
498 
499 	monitor_register_vm_ops(&pmc_ops, ctx, "PMC_VM_OPs");
500 
501 	start_intr_storm_monitor(ctx);
502 
503 	return 0;
504 
505  handlers_err:
506 	mngr_close(monitor_fd);
507 	monitor_fd = -1;
508  server_err:
509  dir_err:
510 	return -1;
511 }
512 
monitor_close(void)513 void monitor_close(void)
514 {
515 	if (monitor_fd >= 0)
516 		mngr_close(monitor_fd);
517 
518 	stop_intr_storm_monitor();
519 }
520