1 /*
2 * Project Acrn
3 * Acrn-dm-monitor
4 *
5 * Copyright (C) 2018-2022 Intel Corporation.
6 *
7 * SPDX-License-Identifier: BSD-3-Clause
8 *
9 *
10 * Author: TaoYuhong <yuhong.tao@intel.com>
11 */
12
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <sys/stat.h>
17 #include <sys/queue.h>
18 #include <unistd.h>
19 #include <pthread.h>
20 #include "dm.h"
21 #include "dm_string.h"
22 #include "monitor.h"
23 #include "acrn_mngr.h"
24 #include "pm.h"
25 #include "vmmapi.h"
26 #include "log.h"
27
28 #define INTR_STORM_MONITOR_PERIOD 10 /* 10 seconds */
29 #define INTR_STORM_THRESHOLD 100000 /* 10K times per second */
30
31 #define DELAY_INTR_TIME 1 /* 1ms */
32 #define DELAY_DURATION 100000 /* 100ms of total duration for delay intr */
33 #define TIME_TO_CHECK_AGAIN 2 /* 2seconds */
34
35 struct intr_monitor_setting_t {
36 bool enable;
37 uint32_t threshold; /* intr count in probe_period when intr storm happens */
38 uint32_t probe_period; /* seconds: the period to probe intr data */
39 uint32_t delay_time; /* ms: the time to delay each intr injection */
40 uint32_t delay_duration; /* us: the delay duration, after it, intr injection restore to normal */
41 };
42
43 union intr_monitor_t {
44 struct acrn_intr_monitor monitor;
45 char reserved[4096];
46 } __aligned(4096);
47
48 static union intr_monitor_t intr_data;
49 static uint64_t intr_cnt_buf[MAX_PTDEV_NUM * 2];
50 static pthread_t intr_storm_monitor_pid;
51
52 static struct intr_monitor_setting_t intr_monitor_setting = {
53 .enable = false,
54 };
55
56 /* switch macro, just open in debug */
57 /* #define INTR_MONITOR_DBG */
58
59 #ifdef INTR_MONITOR_DBG
60 static FILE * dbg_file;
61 #define DPRINTF(format, args...) \
62 do { fprintf(dbg_file, format, args); fflush(dbg_file); } while (0)
63
64 /* this is a debug function */
write_intr_data_to_file(const struct acrn_intr_monitor * hdr)65 static void write_intr_data_to_file(const struct acrn_intr_monitor *hdr)
66 {
67 static int wr_cnt;
68 int j;
69
70 wr_cnt++;
71 fprintf(dbg_file, "\n==%d time devs=%d==\n", wr_cnt, hdr->buf_cnt / 2);
72 fprintf(dbg_file, "IRQ\t\tCount\n");
73
74 for (j = 0; j < hdr->buf_cnt; j += 2) {
75 if (hdr->buffer[j + 1] != 0) {
76 fprintf(dbg_file, "%ld\t\t%ld\n", hdr->buffer[j], hdr->buffer[j + 1]);
77 }
78 }
79
80 fflush(dbg_file);
81 }
82 #else
83 #define DPRINTF(format, arg...)
84 #endif
85
intr_storm_monitor_thread(void * arg)86 static void *intr_storm_monitor_thread(void *arg)
87 {
88 struct vmctx *ctx = (struct vmctx *)arg;
89 struct acrn_intr_monitor *hdr = &intr_data.monitor;
90 uint64_t delta = 0UL;
91 int ret, i;
92
93 #ifdef INTR_MONITOR_DBG
94 dbg_file = fopen("/tmp/intr_log", "w+");
95 #endif
96 sleep(intr_monitor_setting.probe_period);
97
98 /* first to get interrupt data */
99 hdr->cmd = INTR_CMD_GET_DATA;
100 hdr->buf_cnt = MAX_PTDEV_NUM * 2;
101 memset(hdr->buffer, 0, sizeof(uint64_t) * hdr->buf_cnt);
102
103 ret = vm_intr_monitor(ctx, hdr);
104 if (ret) {
105 DPRINTF("first get intr data failed, ret: %d\n", ret);
106 intr_storm_monitor_pid = 0;
107 return NULL;
108 }
109
110 while (1) {
111 #ifdef INTR_MONITOR_DBG
112 write_intr_data_to_file(hdr);
113 #endif
114 memcpy(intr_cnt_buf, hdr->buffer, sizeof(uint64_t) * hdr->buf_cnt);
115 sleep(intr_monitor_setting.probe_period);
116
117 /* next time to get interrupt data */
118 memset(hdr->buffer, 0, sizeof(uint64_t) * hdr->buf_cnt);
119 ret = vm_intr_monitor(ctx, hdr);
120 if (ret) {
121 pr_err("next get intr data failed, ret: %d\n", ret);
122 intr_storm_monitor_pid = 0;
123 break;
124 }
125
126 /*
127 * calculate the delta of the two times count of interrupt;
128 * compare the IRQ number first, if not same just drop it,
129 * for it just happens rarely when devices dynamically
130 * allocation in Service VM or User VM, it can be calculated next time
131 */
132 for (i = 0; i < hdr->buf_cnt; i += 2) {
133 if (hdr->buffer[i] != intr_cnt_buf[i])
134 continue;
135
136 /* avoid delta overflow */
137 if (hdr->buffer[i + 1] < intr_cnt_buf[i + 1])
138 continue;
139
140 delta = hdr->buffer[i + 1] - intr_cnt_buf[i + 1];
141 if (delta > intr_monitor_setting.threshold) {
142 #ifdef INTR_MONITOR_DBG
143 write_intr_data_to_file(hdr);
144 #endif
145 break;
146 }
147 }
148
149 /* storm detected, handle the intr abnormal status */
150 if (i < hdr->buf_cnt) {
151 pr_notice("irq=%ld, delta=%ld\n", intr_cnt_buf[i], delta);
152
153 hdr->cmd = INTR_CMD_DELAY_INT;
154 hdr->buffer[0] = intr_monitor_setting.delay_time;
155 vm_intr_monitor(ctx, hdr);
156 usleep(intr_monitor_setting.delay_duration); /* sleep-delay intr */
157 hdr->buffer[0] = 0; /* cancel to delay intr */
158 vm_intr_monitor(ctx, hdr);
159
160 sleep(TIME_TO_CHECK_AGAIN); /* time to get data again */
161 hdr->cmd = INTR_CMD_GET_DATA;
162 hdr->buf_cnt = MAX_PTDEV_NUM * 2;
163 memset(hdr->buffer, 0, sizeof(uint64_t) * hdr->buf_cnt);
164 vm_intr_monitor(ctx, hdr);
165 }
166 }
167
168 return NULL;
169 }
170
start_intr_storm_monitor(struct vmctx * ctx)171 static void start_intr_storm_monitor(struct vmctx *ctx)
172 {
173 if (intr_monitor_setting.enable) {
174 int ret = pthread_create(&intr_storm_monitor_pid, NULL, intr_storm_monitor_thread, ctx);
175 if (ret) {
176 pr_err("failed %s %d\n", __func__, __LINE__);
177 intr_storm_monitor_pid = 0;
178 }
179 pthread_setname_np(intr_storm_monitor_pid, "storm_monitor");
180
181 pr_info("start monitor interrupt data...\n");
182 }
183 }
184
stop_intr_storm_monitor(void)185 static void stop_intr_storm_monitor(void)
186 {
187 if (intr_storm_monitor_pid) {
188 void *ret;
189
190 pthread_cancel(intr_storm_monitor_pid);
191 pthread_join(intr_storm_monitor_pid, &ret);
192 intr_storm_monitor_pid = 0;
193 }
194 }
195
196 /*
197 .* interrupt monitor setting params, current interrupt mitigation will delay User VM's
198 .* pass-through devices' interrupt injection, the settings input from acrn-dm:
199 .* params:
200 .* threshold: each intr count/second when intr storm happens;
201 .* probe_period: seconds -- the period to probe intr data;
202 .* delay_time: ms -- the time to delay each intr injection;
203 * delay_duration; us -- the delay duration, after it, intr injection restore to normal
204 .*/
acrn_parse_intr_monitor(const char * opt)205 int acrn_parse_intr_monitor(const char *opt)
206 {
207 uint32_t threshold, period, delay, duration;
208 char *cp;
209
210 if((!dm_strtoui(opt, &cp, 10, &threshold) && *cp == ',') &&
211 (!dm_strtoui(cp + 1, &cp, 10, &period) && *cp == ',') &&
212 (!dm_strtoui(cp + 1, &cp, 10, &delay) && *cp == ',') &&
213 (!dm_strtoui(cp + 1, &cp, 10, &duration))) {
214 pr_dbg("interrupt storm monitor params: %d, %d, %d, %d\n", threshold, period, delay, duration);
215 } else {
216 pr_err("%s: not correct, it should be like: --intr_monitor 10000,10,1,100, please check!\n", opt);
217 return -1;
218 }
219
220 intr_monitor_setting.enable = true;
221 intr_monitor_setting.threshold = threshold * period;
222 intr_monitor_setting.probe_period = period;
223 intr_monitor_setting.delay_time = delay;
224 intr_monitor_setting.delay_duration = duration * 1000;
225
226 return 0;
227 }
228
229 struct vm_ops {
230 char name[16];
231 void *arg;
232 struct monitor_vm_ops *ops;
233 LIST_ENTRY(vm_ops) list;
234 };
235
236 static unsigned wakeup_reason = 0;
237
get_wakeup_reason(void)238 unsigned get_wakeup_reason(void)
239 {
240 return wakeup_reason;
241 }
242
set_wakeup_timer(time_t t)243 int set_wakeup_timer(time_t t)
244 {
245 int acrnd_fd;
246 struct mngr_msg req;
247 struct mngr_msg ack;
248 int ret;
249
250 acrnd_fd = mngr_open_un("acrnd", MNGR_CLIENT);
251 if (acrnd_fd < 0) {
252 return -1;
253 }
254
255 req.magic = MNGR_MSG_MAGIC;
256 req.msgid = ACRND_TIMER;
257 req.timestamp = time(NULL);
258
259 req.data.rtc_timer.t = t;
260 strncpy(req.data.rtc_timer.vmname, vmname,
261 sizeof(req.data.rtc_timer.vmname));
262
263 memset(&ack, 0, sizeof(struct mngr_msg));
264 ret = mngr_send_msg(acrnd_fd, &req, &ack, 2);
265 mngr_close(acrnd_fd);
266 if (ret != sizeof(ack)) {
267 pr_err("%s %d\r\n", __func__, __LINE__);
268 return -1;
269 }
270
271 return ack.data.err;
272 }
273
274 static LIST_HEAD(vm_ops_list, vm_ops) vm_ops_head;
275 static pthread_mutex_t vm_ops_mtx = PTHREAD_MUTEX_INITIALIZER;
276
monitor_register_vm_ops(struct monitor_vm_ops * mops,void * arg,const char * name)277 int monitor_register_vm_ops(struct monitor_vm_ops *mops, void *arg,
278 const char *name)
279 {
280 struct vm_ops *ops;
281
282 if (!mops) {
283 pr_err("%s %d\r\n", __func__, __LINE__);
284 return -1;
285 }
286
287 ops = calloc(1, sizeof(*ops));
288 if (!ops) {
289 pr_err("Alloc ops");
290 return -1;
291 }
292
293 if (name)
294 strncpy(ops->name, name, sizeof(ops->name) - 1);
295
296 ops->ops = mops;
297 ops->arg = arg;
298
299 pthread_mutex_lock(&vm_ops_mtx);
300 LIST_INSERT_HEAD(&vm_ops_head, ops, list);
301 pthread_mutex_unlock(&vm_ops_mtx);
302
303 return 0;
304 }
305
306 static int monitor_fd = -1;
307
308 /* handlers */
309 #define ACK_TIMEOUT 1
310
311 #define DEFINE_HANDLER(name, func) \
312 static void name(struct mngr_msg *msg, int client_fd, void *param) \
313 { \
314 struct mngr_msg ack; \
315 struct vm_ops *ops; \
316 \
317 int ret = 0; \
318 int count = 0; \
319 \
320 ack.magic = MNGR_MSG_MAGIC; \
321 ack.msgid = msg->msgid; \
322 ack.timestamp = msg->timestamp; \
323 \
324 LIST_FOREACH(ops, &vm_ops_head, list) { \
325 if (ops->ops->func) { \
326 ret += ops->ops->func(ops->arg); \
327 count++; \
328 } \
329 } \
330 \
331 if (!count) { \
332 ack.data.err = -1; \
333 pr_err("No handler for id:%u\r\n", msg->msgid); \
334 } else \
335 ack.data.err = ret; \
336 \
337 mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT); \
338 }
339
340 DEFINE_HANDLER(handle_suspend, suspend);
341
handle_stop(struct mngr_msg * msg,int client_fd,void * param)342 static void handle_stop(struct mngr_msg *msg, int client_fd, void *param)
343 {
344 struct mngr_msg ack;
345 struct vm_ops *ops;
346 int ret = 0;
347 int count = 0;
348
349 ack.magic = MNGR_MSG_MAGIC;
350 ack.msgid = msg->msgid;
351 ack.timestamp = msg->timestamp;
352
353 if (msg->data.acrnd_stop.force) {
354 pr_info("%s: setting VM state to %s\n", __func__, vm_state_to_str(VM_SUSPEND_POWEROFF));
355 vm_set_suspend_mode(VM_SUSPEND_POWEROFF);
356 ack.data.err = 0;
357 } else {
358 LIST_FOREACH(ops, &vm_ops_head, list) {
359 if (ops->ops->stop) {
360 ret += ops->ops->stop(ops->arg);
361 count++;
362 }
363 }
364
365 if (!count) {
366 ack.data.err = -1;
367 pr_err("No handler for id:%u\r\n", msg->msgid);
368 } else
369 ack.data.err = ret;
370 }
371
372 mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT);
373 }
374
handle_resume(struct mngr_msg * msg,int client_fd,void * param)375 static void handle_resume(struct mngr_msg *msg, int client_fd, void *param)
376 {
377 struct mngr_msg ack;
378 struct vm_ops *ops;
379 int ret = 0;
380 int count = 0;
381
382 ack.magic = MNGR_MSG_MAGIC;
383 ack.msgid = msg->msgid;
384 ack.timestamp = msg->timestamp;
385
386 wakeup_reason = msg->data.reason;
387
388 LIST_FOREACH(ops, &vm_ops_head, list) {
389 if (ops->ops->resume) {
390 ret += ops->ops->resume(ops->arg);
391 count++;
392 }
393 }
394
395 if (!count) {
396 ack.data.err = -1;
397 pr_err("No handler for id:%u\r\n", msg->msgid);
398 } else
399 ack.data.err = ret;
400
401 mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT);
402 }
403
handle_query(struct mngr_msg * msg,int client_fd,void * param)404 static void handle_query(struct mngr_msg *msg, int client_fd, void *param)
405 {
406 struct mngr_msg ack;
407 struct vm_ops *ops;
408
409 ack.magic = MNGR_MSG_MAGIC;
410 ack.msgid = msg->msgid;
411 ack.timestamp = msg->timestamp;
412 ack.data.state = -1;
413
414 LIST_FOREACH(ops, &vm_ops_head, list) {
415 if (ops->ops->query) {
416 ack.data.state = ops->ops->query(ops->arg);
417 break;
418 }
419 }
420
421 mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT);
422 }
423
handle_blkrescan(struct mngr_msg * msg,int client_fd,void * param)424 static void handle_blkrescan(struct mngr_msg *msg, int client_fd, void *param)
425 {
426 struct mngr_msg ack;
427 struct vm_ops *ops;
428 int ret = 0;
429 int count = 0;
430
431 ack.magic = MNGR_MSG_MAGIC;
432 ack.msgid = msg->msgid;
433 ack.timestamp = msg->timestamp;
434
435 wakeup_reason = msg->data.reason;
436
437 LIST_FOREACH(ops, &vm_ops_head, list) {
438 if (ops->ops->rescan) {
439 ret += ops->ops->rescan(ops->arg, msg->data.devargs);
440 count++;
441 }
442 }
443
444 if (!count) {
445 ack.data.err = -1;
446 pr_err("No handler for id:%u\r\n", msg->msgid);
447 } else
448 ack.data.err = ret;
449
450 mngr_send_msg(client_fd, &ack, NULL, ACK_TIMEOUT);
451 }
452
453 static struct monitor_vm_ops pmc_ops = {
454 .stop = NULL,
455 .resume = vm_monitor_resume,
456 .suspend = NULL,
457 .pause = NULL,
458 .unpause = NULL,
459 .query = vm_monitor_query,
460 };
461
monitor_init(struct vmctx * ctx)462 int monitor_init(struct vmctx *ctx)
463 {
464 int ret;
465 char path[128] = {};
466
467 ret = check_dir(ACRN_DM_BASE_PATH, CHK_CREAT);
468 if (ret) {
469 pr_err("%s %d\r\n", __func__, __LINE__);
470 goto dir_err;
471 }
472
473 ret = check_dir(ACRN_DM_SOCK_PATH, CHK_CREAT);
474 if (ret) {
475 pr_err("%s %d\r\n", __func__, __LINE__);
476 goto dir_err;
477 }
478
479 snprintf(path, sizeof(path) - 1, "%s.monitor", vmname);
480
481 monitor_fd = mngr_open_un(path, MNGR_SERVER);
482 if (monitor_fd < 0) {
483 pr_err("%s %d\r\n", __func__, __LINE__);
484 goto server_err;
485 }
486
487 ret = 0;
488 ret += mngr_add_handler(monitor_fd, DM_STOP, handle_stop, NULL);
489 ret += mngr_add_handler(monitor_fd, DM_SUSPEND, handle_suspend, NULL);
490 ret += mngr_add_handler(monitor_fd, DM_RESUME, handle_resume, NULL);
491 ret += mngr_add_handler(monitor_fd, DM_QUERY, handle_query, NULL);
492 ret += mngr_add_handler(monitor_fd, DM_BLKRESCAN, handle_blkrescan, NULL);
493
494 if (ret) {
495 pr_err("%s %d\r\n", __func__, __LINE__);
496 goto handlers_err;
497 }
498
499 monitor_register_vm_ops(&pmc_ops, ctx, "PMC_VM_OPs");
500
501 start_intr_storm_monitor(ctx);
502
503 return 0;
504
505 handlers_err:
506 mngr_close(monitor_fd);
507 monitor_fd = -1;
508 server_err:
509 dir_err:
510 return -1;
511 }
512
monitor_close(void)513 void monitor_close(void)
514 {
515 if (monitor_fd >= 0)
516 mngr_close(monitor_fd);
517
518 stop_intr_storm_monitor();
519 }
520