1 /*
2  * Copyright (C) 2022 Intel Corporation.
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5 */
6 
7 #include <errno.h>
8 #include <stdlib.h>
9 #include <stdio.h>
10 #include <stdbool.h>
11 #include <fcntl.h>
12 #include <unistd.h>
13 #include <sys/epoll.h>
14 #include <sys/queue.h>
15 #include <pthread.h>
16 #include <signal.h>
17 #include <string.h>
18 
19 #include "iothread.h"
20 #include "log.h"
21 #include "mevent.h"
22 #include "dm.h"
23 
24 
25 #define MEVENT_MAX 64
26 
27 static struct iothread_ctx ioctxes[IOTHREAD_NUM];
28 static int ioctx_active_cnt;
29 /* mutex to protect the free ioctx slot allocation */
30 static pthread_mutex_t ioctxes_mutex = PTHREAD_MUTEX_INITIALIZER;
31 
32 static void *
io_thread(void * arg)33 io_thread(void *arg)
34 {
35 	struct epoll_event eventlist[MEVENT_MAX];
36 	struct iothread_mevent *aevp;
37 	int i, n;
38 	struct iothread_ctx *ioctx_x = (struct iothread_ctx *)arg;
39 
40 	set_thread_priority(PRIO_IOTHREAD, true);
41 
42 	while(ioctx_x->started) {
43 		n = epoll_wait(ioctx_x->epfd, eventlist, MEVENT_MAX, -1);
44 		if (n < 0) {
45 			if (errno == EINTR) {
46 				/* EINTR may happen when io_uring fd is monitored, it is harmless. */
47 				continue;
48 			} else {
49 				pr_err("%s: return from epoll wait with errno %d\r\n", __func__, errno);
50 				break;
51 			}
52 		}
53 		for (i = 0; i < n; i++) {
54 			aevp = eventlist[i].data.ptr;
55 			if (aevp && aevp->run) {
56 				(*aevp->run)(aevp->arg);
57 			}
58 		}
59 	}
60 
61 	return NULL;
62 }
63 
64 static int
iothread_start(struct iothread_ctx * ioctx_x)65 iothread_start(struct iothread_ctx *ioctx_x)
66 {
67 	int ret;
68 
69 	pthread_mutex_lock(&ioctx_x->mtx);
70 
71 	if (ioctx_x->started) {
72 		pthread_mutex_unlock(&ioctx_x->mtx);
73 		return 0;
74 	}
75 
76 	if (pthread_create(&ioctx_x->tid, NULL, io_thread, ioctx_x) != 0) {
77 		pthread_mutex_unlock(&ioctx_x->mtx);
78 		pr_err("%s", "iothread create failed\r\n");
79 		return -1;
80 	}
81 
82 	ioctx_x->started = true;
83 	pthread_setname_np(ioctx_x->tid, ioctx_x->name);
84 
85 	if (CPU_COUNT(&(ioctx_x->cpuset)) != 0) {
86 		ret = pthread_setaffinity_np(ioctx_x->tid, sizeof(cpuset_t), &(ioctx_x->cpuset));
87 		if (ret != 0) {
88 			pr_err("pthread_setaffinity_np fails %d \n", ret);
89 		}
90 	}
91 
92 	pthread_mutex_unlock(&ioctx_x->mtx);
93 	pr_info("%s started\n", ioctx_x->name);
94 
95 	return 0;
96 }
97 
98 int
iothread_add(struct iothread_ctx * ioctx_x,int fd,struct iothread_mevent * aevt)99 iothread_add(struct iothread_ctx *ioctx_x, int fd, struct iothread_mevent *aevt)
100 {
101 	struct epoll_event ee;
102 	int ret;
103 
104 	if (ioctx_x == NULL) {
105 		pr_err("%s: ioctx_x is NULL \n", __func__);
106 		return -1;
107 	}
108 
109 	/* Create a epoll instance before the first fd is added.*/
110 	ee.events = EPOLLIN;
111 	ee.data.ptr = aevt;
112 	ret = epoll_ctl(ioctx_x->epfd, EPOLL_CTL_ADD, fd, &ee);
113 	if (ret < 0) {
114 		pr_err("%s: failed to add fd, error is %d\n",
115 			__func__, errno);
116 		return ret;
117 	}
118 
119 	/* Start the iothread after the first fd is added.*/
120 	ret = iothread_start(ioctx_x);
121 	if (ret < 0) {
122 		pr_err("%s: failed to start iothread thread\n",
123 			__func__);
124 	}
125 	return ret;
126 }
127 
128 int
iothread_del(struct iothread_ctx * ioctx_x,int fd)129 iothread_del(struct iothread_ctx *ioctx_x, int fd)
130 {
131 	int ret = 0;
132 
133 	if (ioctx_x == NULL) {
134 		pr_err("%s: ioctx_x is NULL \n", __func__);
135 		return -1;
136 	}
137 
138 	if (ioctx_x->epfd) {
139 		ret = epoll_ctl(ioctx_x->epfd, EPOLL_CTL_DEL, fd, NULL);
140 		if (ret < 0)
141 			pr_err("%s: failed to delete fd from epoll fd, error is %d\n",
142 				__func__, errno);
143 	}
144 	return ret;
145 }
146 
147 void
iothread_deinit(void)148 iothread_deinit(void)
149 {
150 	void *jval;
151 	int i;
152 	struct iothread_ctx *ioctx_x;
153 
154 	pthread_mutex_lock(&ioctxes_mutex);
155 	for (i = 0; i < ioctx_active_cnt; i++) {
156 		ioctx_x = &ioctxes[i];
157 
158 		if (ioctx_x->tid > 0) {
159 			pthread_mutex_lock(&ioctx_x->mtx);
160 			ioctx_x->started = false;
161 			pthread_mutex_unlock(&ioctx_x->mtx);
162 			pthread_kill(ioctx_x->tid, SIGCONT);
163 			pthread_join(ioctx_x->tid, &jval);
164 		}
165 		if (ioctx_x->epfd > 0) {
166 			close(ioctx_x->epfd);
167 			ioctx_x->epfd = -1;
168 		}
169 		pthread_mutex_destroy(&ioctx_x->mtx);
170 		pr_info("%s stop \n", ioctx_x->name);
171 	}
172 	ioctx_active_cnt = 0;
173 	pthread_mutex_unlock(&ioctxes_mutex);
174 }
175 
176 /*
177  * Create @ioctx_num iothread context instances
178  * Return NULL if fails. Otherwise, return the base of those iothread context instances.
179  *
180  * Notes:
181  * The caller of iothread_create() shall call iothread_free_options() afterwards to free the resources that
182  * are dynamically allocated during iothread_parse_options(), such as iothr_opt->cpusets.
183  *
184  * A general calling sequence from the virtual device owner is like:
185  * 1. Call iothread_parse_options() to parse the options from the user.
186  * 2. Call iothread_create() to create the iothread instances.
187  * 3. Call iothread_free_options() to free the dynamic resources.
188  */
189 struct iothread_ctx *
iothread_create(struct iothreads_option * iothr_opt)190 iothread_create(struct iothreads_option *iothr_opt)
191 {
192 	pthread_mutexattr_t attr;
193 	int i, ret, base, end;
194 	struct iothread_ctx *ioctx_x;
195 	struct iothread_ctx *ioctx_base = NULL;
196 	ret = 0;
197 
198 	if (iothr_opt == NULL) {
199 		pr_err("%s: iothr_opt is NULL \n", __func__);
200 		return ioctx_base;
201 	}
202 
203 	pthread_mutex_lock(&ioctxes_mutex);
204 	base = ioctx_active_cnt;
205 	end = base + iothr_opt->num;
206 
207 	if (end > IOTHREAD_NUM) {
208 		ret = -1;
209 		pr_err("%s: fails to create new iothread context, max number of instances is %d \n",
210 			__func__, IOTHREAD_NUM);
211 	} else {
212 		for (i = base; i < end; i++) {
213 			ioctx_x = &ioctxes[i];
214 
215 			pthread_mutexattr_init(&attr);
216 			pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
217 			pthread_mutex_init(&(ioctx_x->mtx), &attr);
218 			pthread_mutexattr_destroy(&attr);
219 
220 			ioctx_x->idx = i;
221 			ioctx_x->tid = 0;
222 			ioctx_x->started = false;
223 			ioctx_x->epfd = epoll_create1(0);
224 
225 			CPU_ZERO(&(ioctx_x->cpuset));
226 			if (iothr_opt->cpusets != NULL) {
227 				memcpy(&(ioctx_x->cpuset), iothr_opt->cpusets + (i - base), sizeof(cpu_set_t));
228 			}
229 
230 			if (snprintf(ioctx_x->name, PTHREAD_NAME_MAX_LEN,
231 				"iothr-%d-%s", ioctx_x->idx, iothr_opt->tag) >= PTHREAD_NAME_MAX_LEN) {
232 				pr_err("%s: iothread name too long \n", __func__);
233 			}
234 
235 			if (ioctx_x->epfd < 0) {
236 				ret = -1;
237 				pr_err("%s: failed to create epoll fd, error is %d\r\n",
238 					__func__, errno);
239 				break;
240 			}
241 		}
242 		if (ret == 0) {
243 			ioctx_base = &ioctxes[base];
244 			ioctx_active_cnt = end;
245 		}
246 	}
247 	pthread_mutex_unlock(&ioctxes_mutex);
248 
249 	return ioctx_base;
250 }
251 
252 /*
253  * Parse the iothread options from @str and fill the options in @iothr_opt if successes.
254  * Return -1 if fails to parse. Otherwise, return 0.
255  */
256 int
iothread_parse_options(char * str,struct iothreads_option * iothr_opt)257 iothread_parse_options(char *str, struct iothreads_option *iothr_opt)
258 {
259 	char *tmp_num = NULL;
260 	char *tmp_cpusets = NULL;
261 	char *tmp_cpux = NULL;
262 	int service_vm_cpuid, iothread_sub_idx, num;
263 	cpu_set_t *cpuset_list = NULL;
264 
265 	/*
266 	 * Create one iothread instance if DM parameters contain 'iothread', but the number is not specified.
267 	 */
268 	num = 1;
269 
270 	/*
271 	 * Valid 'iothread' setting examples:
272 	 * - create 1 iothread instance for virtio-blk
273 	 *   ... virtio-blk iothread,...
274 	 *
275 	 * - create 1 iothread instance for virtio-blk
276 	 *   ... virtio-blk iothread=1,...
277 	 *
278 	 * - create 3 iothread instances for virtio-blk
279 	 *   ... virtio-blk iothread=3,...
280 	 *
281 	 * - create 3 iothread instances for virtio-blk with CPU affinity settings
282 	 *   ... virtio-blk iothread=3@0:1:2/0:1,...
283 	 *   CPU affinity of iothread instances for this virtio-blk device:
284 	 *   - 1st iothread instance <-> Service VM CPU 0,1,2
285 	 *   - 2nd iothread instance <-> Service VM CPU 0,1
286 	 *   - 3rd iothread instance <-> No CPU affinity settings
287 	 *
288 	 */
289 	if (str != NULL) {
290 		/*
291 		 * "@" is used to separate the following two settings:
292 		 * - the number of iothread instances
293 		 * - the CPU affinity settings for each iothread instance.
294 		 */
295 		tmp_num = strsep(&str, "@");
296 
297 		if (tmp_num != NULL) {
298 			if (dm_strtoi(tmp_num, &tmp_num, 10, &num) || (num <= 0)) {
299 				pr_err("%s: invalid iothread number %s \n", __func__, tmp_num);
300 				return -1;
301 			}
302 
303 			cpuset_list = calloc(num, sizeof(cpu_set_t));
304 			if (cpuset_list == NULL) {
305 				pr_err("%s: calloc cpuset_list returns NULL \n", __func__);
306 				return -1;
307 			}
308 
309 			iothread_sub_idx = 0;
310 			while ((str != NULL) && (*str !='\0') && (iothread_sub_idx < num)) {
311 				/* "/" is used to separate the CPU affinity setting for each iothread instance. */
312 				tmp_cpusets = strsep(&str, "/");
313 
314 				CPU_ZERO(cpuset_list + iothread_sub_idx);
315 				while ((tmp_cpusets != NULL) && (*tmp_cpusets !='\0')) {
316 					/* ":" is used to separate different CPU cores. */
317 					tmp_cpux = strsep(&tmp_cpusets, ":");
318 
319 					/*
320 					 * char '*' can be used to skip the setting for the
321 					 * specific iothread instance.
322 					 */
323 					if (*tmp_cpux == '*') {
324 						break;
325 					}
326 
327 					if (dm_strtoi(tmp_cpux, &tmp_cpux, 10, &service_vm_cpuid) ||
328 						(service_vm_cpuid < 0)) {
329 						pr_err("%s: invalid CPU affinity setting %s \n",
330 							__func__, tmp_cpux);
331 
332 						free(cpuset_list);
333 						return -1;
334 					}
335 
336 					CPU_SET(service_vm_cpuid, cpuset_list + iothread_sub_idx);
337 					pr_err("%s: iothread[%d]: set service_vm_cpuid %d \n",
338 						__func__, iothread_sub_idx, service_vm_cpuid);
339 				}
340 				iothread_sub_idx++;
341 			}
342 		}
343 	}
344 	iothr_opt->num = num;
345 	iothr_opt->cpusets = cpuset_list;
346 
347 	return 0;
348 }
349 
350 /*
351  * This interface is used to free the elements that are allocated dynamically in iothread_parse_options(),
352  * such as iothr_opt->cpusets.
353  */
iothread_free_options(struct iothreads_option * iothr_opt)354 void iothread_free_options(struct iothreads_option *iothr_opt)
355 {
356 	if ((iothr_opt != NULL) && (iothr_opt->cpusets != NULL)) {
357 		free(iothr_opt->cpusets);
358 		iothr_opt->cpusets = NULL;
359 	}
360 
361 	return;
362 }
363