1 /*-
2 * Copyright (c) 2011 NetApp, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * $FreeBSD$
27 */
28
29 /*
30 * Micro event library for FreeBSD, designed for a single i/o thread
31 * using EPOLL, and having events be persistent by default.
32 */
33 #include <errno.h>
34 #include <stdlib.h>
35 #include <stdio.h>
36 #include <stdbool.h>
37 #include <fcntl.h>
38 #include <unistd.h>
39 #include <sys/epoll.h>
40 #include <sys/queue.h>
41 #include <pthread.h>
42
43 #include "mevent.h"
44 #include "vmmapi.h"
45 #include "log.h"
46
47 #define MEVENT_MAX 64
48
49 #define MEV_ADD 1
50 #define MEV_ENABLE 2
51 #define MEV_DISABLE 3
52 #define MEV_DEL_PENDING 4
53
54 static int epoll_fd;
55 static pthread_t mevent_tid;
56 static int mevent_pipefd[2];
57 static pthread_mutex_t mevent_lmutex;
58
59 struct mevent {
60 void (*run)(int, enum ev_type, void *);
61 void *run_param;
62 void (*teardown)(void *);
63 void *teardown_param;
64
65 int me_fd;
66 enum ev_type me_type;
67 int me_cq;
68 int me_state;
69
70 int closefd;
71 LIST_ENTRY(mevent) me_list;
72 };
73
74 static LIST_HEAD(listhead, mevent) global_head;
75 /* List holds the mevent node which is requested to be deleted */
76 static LIST_HEAD(del_listhead, mevent) del_head;
77
78 static void
mevent_qlock(void)79 mevent_qlock(void)
80 {
81 pthread_mutex_lock(&mevent_lmutex);
82 }
83
84 static void
mevent_qunlock(void)85 mevent_qunlock(void)
86 {
87 pthread_mutex_unlock(&mevent_lmutex);
88 }
89
90 static bool
is_dispatch_thread(void)91 is_dispatch_thread(void)
92 {
93 return (pthread_self() == mevent_tid);
94 }
95
96 static void
mevent_pipe_read(int fd,enum ev_type type,void * param)97 mevent_pipe_read(int fd, enum ev_type type, void *param)
98 {
99 char buf[MEVENT_MAX];
100 ssize_t status;
101
102 /*
103 * Drain the pipe read side. The fd is non-blocking so this is
104 * safe to do.
105 */
106 do {
107 status = read(fd, buf, sizeof(buf));
108 } while (status == MEVENT_MAX);
109 }
110
111 /* On error, -1 is returned, else return zero */
112 int
mevent_notify(void)113 mevent_notify(void)
114 {
115 char c = 0;
116
117 /*
118 * If calling from outside the i/o thread, write a byte on the
119 * pipe to force the i/o thread to exit the blocking epoll call.
120 */
121 if (mevent_pipefd[1] != 0 && !is_dispatch_thread())
122 if (write(mevent_pipefd[1], &c, 1) <= 0)
123 return -1;
124 return 0;
125 }
126
127 static int
mevent_kq_filter(struct mevent * mevp)128 mevent_kq_filter(struct mevent *mevp)
129 {
130 int retval;
131
132 retval = 0;
133
134 if (mevp->me_type == EVF_READ)
135 retval = EPOLLIN;
136
137 if (mevp->me_type == EVF_READ_ET)
138 retval = EPOLLIN | EPOLLET;
139
140 if (mevp->me_type == EVF_WRITE)
141 retval = EPOLLOUT;
142
143 if (mevp->me_type == EVF_WRITE_ET)
144 retval = EPOLLOUT | EPOLLET;
145
146 return retval;
147 }
148
149 static void
mevent_destroy(void)150 mevent_destroy(void)
151 {
152 struct mevent *mevp, *tmpp;
153
154 mevent_qlock();
155 list_foreach_safe(mevp, &global_head, me_list, tmpp) {
156 LIST_REMOVE(mevp, me_list);
157 epoll_ctl(epoll_fd, EPOLL_CTL_DEL, mevp->me_fd, NULL);
158
159 if ((mevp->me_type == EVF_READ ||
160 mevp->me_type == EVF_READ_ET ||
161 mevp->me_type == EVF_WRITE ||
162 mevp->me_type == EVF_WRITE_ET) &&
163 mevp->me_fd != STDIN_FILENO)
164 close(mevp->me_fd);
165
166 if (mevp->teardown)
167 mevp->teardown(mevp->teardown_param);
168
169 free(mevp);
170 }
171
172 /* the mevp in del_head was removed from epoll when add it
173 * to del_head already.
174 */
175 list_foreach_safe(mevp, &del_head, me_list, tmpp) {
176 LIST_REMOVE(mevp, me_list);
177
178 if ((mevp->me_type == EVF_READ ||
179 mevp->me_type == EVF_READ_ET ||
180 mevp->me_type == EVF_WRITE ||
181 mevp->me_type == EVF_WRITE_ET) &&
182 mevp->me_fd != STDIN_FILENO)
183 close(mevp->me_fd);
184
185 if (mevp->teardown)
186 mevp->teardown(mevp->teardown_param);
187
188 free(mevp);
189 }
190 mevent_qunlock();
191 }
192
193 static void
mevent_handle(struct epoll_event * kev,int numev)194 mevent_handle(struct epoll_event *kev, int numev)
195 {
196 int i;
197 struct mevent *mevp;
198
199 for (i = 0; i < numev; i++) {
200 mevp = kev[i].data.ptr;
201
202 if (mevp->me_state)
203 (*mevp->run)(mevp->me_fd, mevp->me_type, mevp->run_param);
204 }
205 }
206
207 struct mevent *
mevent_add(int tfd,enum ev_type type,void (* run)(int,enum ev_type,void *),void * run_param,void (* teardown)(void *),void * teardown_param)208 mevent_add(int tfd, enum ev_type type,
209 void (*run)(int, enum ev_type, void *), void *run_param,
210 void (*teardown)(void *), void *teardown_param)
211 {
212 int ret;
213 struct epoll_event ee;
214 struct mevent *lp, *mevp;
215
216 if (tfd < 0 || run == NULL)
217 return NULL;
218
219 if (type == EVF_TIMER)
220 return NULL;
221
222 mevent_qlock();
223 /* Verify that the fd/type tuple is not present in the list */
224 LIST_FOREACH(lp, &global_head, me_list) {
225 if (lp->me_fd == tfd && lp->me_type == type) {
226 mevent_qunlock();
227 return lp;
228 }
229 }
230 mevent_qunlock();
231
232 /*
233 * Allocate an entry, populate it, and add it to the list.
234 */
235 mevp = calloc(1, sizeof(struct mevent));
236 if (mevp == NULL)
237 return NULL;
238
239 mevp->me_fd = tfd;
240 mevp->me_type = type;
241 mevp->me_state = 1;
242
243 mevp->run = run;
244 mevp->run_param = run_param;
245 mevp->teardown = teardown;
246 mevp->teardown_param = teardown_param;
247
248 ee.events = mevent_kq_filter(mevp);
249 ee.data.ptr = mevp;
250 ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, mevp->me_fd, &ee);
251
252 if (ret == 0) {
253 mevent_qlock();
254 LIST_INSERT_HEAD(&global_head, mevp, me_list);
255 mevent_qunlock();
256
257 return mevp;
258 } else {
259 free(mevp);
260 return NULL;
261 }
262 }
263
264 int
mevent_enable(struct mevent * evp)265 mevent_enable(struct mevent *evp)
266 {
267 int ret;
268 struct epoll_event ee;
269 struct mevent *lp, *mevp = NULL;
270
271 mevent_qlock();
272 /* Verify that the fd/type tuple is not present in the list */
273 LIST_FOREACH(lp, &global_head, me_list) {
274 if (lp == evp) {
275 mevp = lp;
276 break;
277 }
278 }
279 mevent_qunlock();
280
281 if (!mevp)
282 return -1;
283
284 ee.events = mevent_kq_filter(mevp);
285 ee.data.ptr = mevp;
286 ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, mevp->me_fd, &ee);
287 if (ret < 0 && errno == EEXIST)
288 ret = 0;
289
290 return ret;
291 }
292
293 int
mevent_disable(struct mevent * evp)294 mevent_disable(struct mevent *evp)
295 {
296 int ret;
297
298 ret = epoll_ctl(epoll_fd, EPOLL_CTL_DEL, evp->me_fd, NULL);
299 if (ret < 0 && errno == ENOENT)
300 ret = 0;
301
302 return ret;
303 }
304
305 static void
mevent_add_to_del_list(struct mevent * evp,int closefd)306 mevent_add_to_del_list(struct mevent *evp, int closefd)
307 {
308 mevent_qlock();
309 LIST_INSERT_HEAD(&del_head, evp, me_list);
310 mevent_qunlock();
311
312 mevent_notify();
313 }
314
315 static void
mevent_drain_del_list(void)316 mevent_drain_del_list(void)
317 {
318 struct mevent *evp, *tmpp;
319
320 mevent_qlock();
321 list_foreach_safe(evp, &del_head, me_list, tmpp) {
322 LIST_REMOVE(evp, me_list);
323 if (evp->closefd) {
324 close(evp->me_fd);
325 }
326
327 if (evp->teardown)
328 evp->teardown(evp->teardown_param);
329 free(evp);
330 }
331 mevent_qunlock();
332 }
333
334 static int
mevent_delete_event(struct mevent * evp,int closefd)335 mevent_delete_event(struct mevent *evp, int closefd)
336 {
337 mevent_qlock();
338 LIST_REMOVE(evp, me_list);
339 mevent_qunlock();
340 evp->me_state = 0;
341 evp->closefd = closefd;
342
343 epoll_ctl(epoll_fd, EPOLL_CTL_DEL, evp->me_fd, NULL);
344 if (!is_dispatch_thread()) {
345 mevent_add_to_del_list(evp, closefd);
346 } else {
347 if (evp->closefd) {
348 close(evp->me_fd);
349 }
350
351 if (evp->teardown)
352 evp->teardown(evp->teardown_param);
353 free(evp);
354 }
355 return 0;
356 }
357
358 int
mevent_delete(struct mevent * evp)359 mevent_delete(struct mevent *evp)
360 {
361 return mevent_delete_event(evp, 0);
362 }
363
364 int
mevent_delete_close(struct mevent * evp)365 mevent_delete_close(struct mevent *evp)
366 {
367 return mevent_delete_event(evp, 1);
368 }
369
370 static void
mevent_set_name(void)371 mevent_set_name(void)
372 {
373 pthread_setname_np(mevent_tid, "mevent");
374 }
375
376 int
mevent_init(void)377 mevent_init(void)
378 {
379 pthread_mutexattr_t attr;
380
381 pthread_mutexattr_init(&attr);
382 pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
383 pthread_mutex_init(&mevent_lmutex, &attr);
384 pthread_mutexattr_destroy(&attr);
385
386 epoll_fd = epoll_create1(0);
387
388 if (epoll_fd >= 0)
389 return 0;
390 else
391 return -1;
392 }
393
394 void
mevent_deinit(void)395 mevent_deinit(void)
396 {
397 mevent_destroy();
398 close(epoll_fd);
399 if (mevent_pipefd[1] != 0)
400 close(mevent_pipefd[1]);
401
402 pthread_mutex_destroy(&mevent_lmutex);
403 }
404
405 void
mevent_dispatch(void)406 mevent_dispatch(void)
407 {
408 struct epoll_event eventlist[MEVENT_MAX];
409
410 struct mevent *pipev;
411 int ret;
412
413 mevent_tid = pthread_self();
414 mevent_set_name();
415
416 /*
417 * Open the pipe that will be used for other threads to force
418 * the blocking kqueue call to exit by writing to it. Set the
419 * descriptor to non-blocking.
420 */
421 ret = pipe2(mevent_pipefd, O_NONBLOCK);
422 if (ret < 0) {
423 pr_err("pipe");
424 exit(0);
425 }
426
427 /*
428 * Add internal event handler for the pipe write fd
429 */
430 pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL, NULL, NULL);
431 if (!pipev) {
432 pr_err("pipefd mevent_add failed\n");
433 exit(0);
434 }
435
436 for (;;) {
437 int suspend_mode;
438
439 /*
440 * Block awaiting events
441 */
442 ret = epoll_wait(epoll_fd, eventlist, MEVENT_MAX, -1);
443
444 if (ret == -1 && errno != EINTR)
445 pr_err("Error return from epoll_wait");
446
447 /*
448 * Handle reported events
449 */
450 mevent_handle(eventlist, ret);
451 mevent_drain_del_list();
452
453 suspend_mode = vm_get_suspend_mode();
454 if ((suspend_mode != VM_SUSPEND_NONE) &&
455 (suspend_mode != VM_SUSPEND_SYSTEM_RESET) &&
456 (suspend_mode != VM_SUSPEND_SUSPEND))
457 break;
458 }
459 }
460