1 /*
2 * Copyright (C) 2022 Intel Corporation.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7 #include <errno.h>
8 #include <stdlib.h>
9 #include <stdio.h>
10 #include <stdbool.h>
11 #include <fcntl.h>
12 #include <unistd.h>
13 #include <sys/epoll.h>
14 #include <sys/queue.h>
15 #include <pthread.h>
16 #include <signal.h>
17 #include <string.h>
18
19 #include "iothread.h"
20 #include "log.h"
21 #include "mevent.h"
22 #include "dm.h"
23
24
25 #define MEVENT_MAX 64
26
27 static struct iothread_ctx ioctxes[IOTHREAD_NUM];
28 static int ioctx_active_cnt;
29 /* mutex to protect the free ioctx slot allocation */
30 static pthread_mutex_t ioctxes_mutex = PTHREAD_MUTEX_INITIALIZER;
31
32 static void *
io_thread(void * arg)33 io_thread(void *arg)
34 {
35 struct epoll_event eventlist[MEVENT_MAX];
36 struct iothread_mevent *aevp;
37 int i, n;
38 struct iothread_ctx *ioctx_x = (struct iothread_ctx *)arg;
39
40 set_thread_priority(PRIO_IOTHREAD, true);
41
42 while(ioctx_x->started) {
43 n = epoll_wait(ioctx_x->epfd, eventlist, MEVENT_MAX, -1);
44 if (n < 0) {
45 if (errno == EINTR) {
46 /* EINTR may happen when io_uring fd is monitored, it is harmless. */
47 continue;
48 } else {
49 pr_err("%s: return from epoll wait with errno %d\r\n", __func__, errno);
50 break;
51 }
52 }
53 for (i = 0; i < n; i++) {
54 aevp = eventlist[i].data.ptr;
55 if (aevp && aevp->run) {
56 (*aevp->run)(aevp->arg);
57 }
58 }
59 }
60
61 return NULL;
62 }
63
64 static int
iothread_start(struct iothread_ctx * ioctx_x)65 iothread_start(struct iothread_ctx *ioctx_x)
66 {
67 int ret;
68
69 pthread_mutex_lock(&ioctx_x->mtx);
70
71 if (ioctx_x->started) {
72 pthread_mutex_unlock(&ioctx_x->mtx);
73 return 0;
74 }
75
76 if (pthread_create(&ioctx_x->tid, NULL, io_thread, ioctx_x) != 0) {
77 pthread_mutex_unlock(&ioctx_x->mtx);
78 pr_err("%s", "iothread create failed\r\n");
79 return -1;
80 }
81
82 ioctx_x->started = true;
83 pthread_setname_np(ioctx_x->tid, ioctx_x->name);
84
85 if (CPU_COUNT(&(ioctx_x->cpuset)) != 0) {
86 ret = pthread_setaffinity_np(ioctx_x->tid, sizeof(cpuset_t), &(ioctx_x->cpuset));
87 if (ret != 0) {
88 pr_err("pthread_setaffinity_np fails %d \n", ret);
89 }
90 }
91
92 pthread_mutex_unlock(&ioctx_x->mtx);
93 pr_info("%s started\n", ioctx_x->name);
94
95 return 0;
96 }
97
98 int
iothread_add(struct iothread_ctx * ioctx_x,int fd,struct iothread_mevent * aevt)99 iothread_add(struct iothread_ctx *ioctx_x, int fd, struct iothread_mevent *aevt)
100 {
101 struct epoll_event ee;
102 int ret;
103
104 if (ioctx_x == NULL) {
105 pr_err("%s: ioctx_x is NULL \n", __func__);
106 return -1;
107 }
108
109 /* Create a epoll instance before the first fd is added.*/
110 ee.events = EPOLLIN;
111 ee.data.ptr = aevt;
112 ret = epoll_ctl(ioctx_x->epfd, EPOLL_CTL_ADD, fd, &ee);
113 if (ret < 0) {
114 pr_err("%s: failed to add fd, error is %d\n",
115 __func__, errno);
116 return ret;
117 }
118
119 /* Start the iothread after the first fd is added.*/
120 ret = iothread_start(ioctx_x);
121 if (ret < 0) {
122 pr_err("%s: failed to start iothread thread\n",
123 __func__);
124 }
125 return ret;
126 }
127
128 int
iothread_del(struct iothread_ctx * ioctx_x,int fd)129 iothread_del(struct iothread_ctx *ioctx_x, int fd)
130 {
131 int ret = 0;
132
133 if (ioctx_x == NULL) {
134 pr_err("%s: ioctx_x is NULL \n", __func__);
135 return -1;
136 }
137
138 if (ioctx_x->epfd) {
139 ret = epoll_ctl(ioctx_x->epfd, EPOLL_CTL_DEL, fd, NULL);
140 if (ret < 0)
141 pr_err("%s: failed to delete fd from epoll fd, error is %d\n",
142 __func__, errno);
143 }
144 return ret;
145 }
146
147 void
iothread_deinit(void)148 iothread_deinit(void)
149 {
150 void *jval;
151 int i;
152 struct iothread_ctx *ioctx_x;
153
154 pthread_mutex_lock(&ioctxes_mutex);
155 for (i = 0; i < ioctx_active_cnt; i++) {
156 ioctx_x = &ioctxes[i];
157
158 if (ioctx_x->tid > 0) {
159 pthread_mutex_lock(&ioctx_x->mtx);
160 ioctx_x->started = false;
161 pthread_mutex_unlock(&ioctx_x->mtx);
162 pthread_kill(ioctx_x->tid, SIGCONT);
163 pthread_join(ioctx_x->tid, &jval);
164 }
165 if (ioctx_x->epfd > 0) {
166 close(ioctx_x->epfd);
167 ioctx_x->epfd = -1;
168 }
169 pthread_mutex_destroy(&ioctx_x->mtx);
170 pr_info("%s stop \n", ioctx_x->name);
171 }
172 ioctx_active_cnt = 0;
173 pthread_mutex_unlock(&ioctxes_mutex);
174 }
175
176 /*
177 * Create @ioctx_num iothread context instances
178 * Return NULL if fails. Otherwise, return the base of those iothread context instances.
179 *
180 * Notes:
181 * The caller of iothread_create() shall call iothread_free_options() afterwards to free the resources that
182 * are dynamically allocated during iothread_parse_options(), such as iothr_opt->cpusets.
183 *
184 * A general calling sequence from the virtual device owner is like:
185 * 1. Call iothread_parse_options() to parse the options from the user.
186 * 2. Call iothread_create() to create the iothread instances.
187 * 3. Call iothread_free_options() to free the dynamic resources.
188 */
189 struct iothread_ctx *
iothread_create(struct iothreads_option * iothr_opt)190 iothread_create(struct iothreads_option *iothr_opt)
191 {
192 pthread_mutexattr_t attr;
193 int i, ret, base, end;
194 struct iothread_ctx *ioctx_x;
195 struct iothread_ctx *ioctx_base = NULL;
196 ret = 0;
197
198 if (iothr_opt == NULL) {
199 pr_err("%s: iothr_opt is NULL \n", __func__);
200 return ioctx_base;
201 }
202
203 pthread_mutex_lock(&ioctxes_mutex);
204 base = ioctx_active_cnt;
205 end = base + iothr_opt->num;
206
207 if (end > IOTHREAD_NUM) {
208 ret = -1;
209 pr_err("%s: fails to create new iothread context, max number of instances is %d \n",
210 __func__, IOTHREAD_NUM);
211 } else {
212 for (i = base; i < end; i++) {
213 ioctx_x = &ioctxes[i];
214
215 pthread_mutexattr_init(&attr);
216 pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
217 pthread_mutex_init(&(ioctx_x->mtx), &attr);
218 pthread_mutexattr_destroy(&attr);
219
220 ioctx_x->idx = i;
221 ioctx_x->tid = 0;
222 ioctx_x->started = false;
223 ioctx_x->epfd = epoll_create1(0);
224
225 CPU_ZERO(&(ioctx_x->cpuset));
226 if (iothr_opt->cpusets != NULL) {
227 memcpy(&(ioctx_x->cpuset), iothr_opt->cpusets + (i - base), sizeof(cpu_set_t));
228 }
229
230 if (snprintf(ioctx_x->name, PTHREAD_NAME_MAX_LEN,
231 "iothr-%d-%s", ioctx_x->idx, iothr_opt->tag) >= PTHREAD_NAME_MAX_LEN) {
232 pr_err("%s: iothread name too long \n", __func__);
233 }
234
235 if (ioctx_x->epfd < 0) {
236 ret = -1;
237 pr_err("%s: failed to create epoll fd, error is %d\r\n",
238 __func__, errno);
239 break;
240 }
241 }
242 if (ret == 0) {
243 ioctx_base = &ioctxes[base];
244 ioctx_active_cnt = end;
245 }
246 }
247 pthread_mutex_unlock(&ioctxes_mutex);
248
249 return ioctx_base;
250 }
251
252 /*
253 * Parse the iothread options from @str and fill the options in @iothr_opt if successes.
254 * Return -1 if fails to parse. Otherwise, return 0.
255 */
256 int
iothread_parse_options(char * str,struct iothreads_option * iothr_opt)257 iothread_parse_options(char *str, struct iothreads_option *iothr_opt)
258 {
259 char *tmp_num = NULL;
260 char *tmp_cpusets = NULL;
261 char *tmp_cpux = NULL;
262 int service_vm_cpuid, iothread_sub_idx, num;
263 cpu_set_t *cpuset_list = NULL;
264
265 /*
266 * Create one iothread instance if DM parameters contain 'iothread', but the number is not specified.
267 */
268 num = 1;
269
270 /*
271 * Valid 'iothread' setting examples:
272 * - create 1 iothread instance for virtio-blk
273 * ... virtio-blk iothread,...
274 *
275 * - create 1 iothread instance for virtio-blk
276 * ... virtio-blk iothread=1,...
277 *
278 * - create 3 iothread instances for virtio-blk
279 * ... virtio-blk iothread=3,...
280 *
281 * - create 3 iothread instances for virtio-blk with CPU affinity settings
282 * ... virtio-blk iothread=3@0:1:2/0:1,...
283 * CPU affinity of iothread instances for this virtio-blk device:
284 * - 1st iothread instance <-> Service VM CPU 0,1,2
285 * - 2nd iothread instance <-> Service VM CPU 0,1
286 * - 3rd iothread instance <-> No CPU affinity settings
287 *
288 */
289 if (str != NULL) {
290 /*
291 * "@" is used to separate the following two settings:
292 * - the number of iothread instances
293 * - the CPU affinity settings for each iothread instance.
294 */
295 tmp_num = strsep(&str, "@");
296
297 if (tmp_num != NULL) {
298 if (dm_strtoi(tmp_num, &tmp_num, 10, &num) || (num <= 0)) {
299 pr_err("%s: invalid iothread number %s \n", __func__, tmp_num);
300 return -1;
301 }
302
303 cpuset_list = calloc(num, sizeof(cpu_set_t));
304 if (cpuset_list == NULL) {
305 pr_err("%s: calloc cpuset_list returns NULL \n", __func__);
306 return -1;
307 }
308
309 iothread_sub_idx = 0;
310 while ((str != NULL) && (*str !='\0') && (iothread_sub_idx < num)) {
311 /* "/" is used to separate the CPU affinity setting for each iothread instance. */
312 tmp_cpusets = strsep(&str, "/");
313
314 CPU_ZERO(cpuset_list + iothread_sub_idx);
315 while ((tmp_cpusets != NULL) && (*tmp_cpusets !='\0')) {
316 /* ":" is used to separate different CPU cores. */
317 tmp_cpux = strsep(&tmp_cpusets, ":");
318
319 /*
320 * char '*' can be used to skip the setting for the
321 * specific iothread instance.
322 */
323 if (*tmp_cpux == '*') {
324 break;
325 }
326
327 if (dm_strtoi(tmp_cpux, &tmp_cpux, 10, &service_vm_cpuid) ||
328 (service_vm_cpuid < 0)) {
329 pr_err("%s: invalid CPU affinity setting %s \n",
330 __func__, tmp_cpux);
331
332 free(cpuset_list);
333 return -1;
334 }
335
336 CPU_SET(service_vm_cpuid, cpuset_list + iothread_sub_idx);
337 pr_err("%s: iothread[%d]: set service_vm_cpuid %d \n",
338 __func__, iothread_sub_idx, service_vm_cpuid);
339 }
340 iothread_sub_idx++;
341 }
342 }
343 }
344 iothr_opt->num = num;
345 iothr_opt->cpusets = cpuset_list;
346
347 return 0;
348 }
349
350 /*
351 * This interface is used to free the elements that are allocated dynamically in iothread_parse_options(),
352 * such as iothr_opt->cpusets.
353 */
iothread_free_options(struct iothreads_option * iothr_opt)354 void iothread_free_options(struct iothreads_option *iothr_opt)
355 {
356 if ((iothr_opt != NULL) && (iothr_opt->cpusets != NULL)) {
357 free(iothr_opt->cpusets);
358 iothr_opt->cpusets = NULL;
359 }
360
361 return;
362 }
363