1 /*
2  * Copyright (C) 2011      Citrix Ltd.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU Lesser General Public License as published
6  * by the Free Software Foundation; version 2.1 only. with the special
7  * exception on linking described in file LICENSE.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser General Public License for more details.
13  */
14 /*
15  * Internal event machinery for use by other parts of libxl
16  */
17 
18 #include <poll.h>
19 
20 #include "libxl_internal.h"
21 
22 
23 //#define DEBUG 1
24 
25 #ifdef DEBUG
26 # define LIBXL__DBG_LOG(ctx, args, ...) \
27     LIBXL__LOG((ctx), XTL_DEBUG, args, __VA_ARGS__)
28 #else
29 # define LIBXL__DBG_LOG(ctx, args, ...) ((void)0)
30 #endif
31 #define DBG(args, ...) LIBXL__DBG_LOG(CTX, args, __VA_ARGS__)
32 
33 
34 static libxl__ao *ao_nested_root(libxl__ao *ao);
35 
36 static void ao__check_destroy(libxl_ctx *ctx, libxl__ao *ao);
37 
38 
39 /*
40  * The counter osevent_in_hook is used to ensure that the application
41  * honours the reentrancy restriction documented in libxl_event.h.
42  *
43  * The application's registration hooks should be called ONLY via
44  * these macros, with the ctx locked.  Likewise all the "occurred"
45  * entrypoints from the application should assert(!in_hook);
46  *
47  * During the hook call - including while the arguments are being
48  * evaluated - ev->nexus is guaranteed to be valid and refer to the
49  * nexus which is being used for this event registration.  The
50  * arguments should specify ev->nexus for the for_libxl argument and
51  * ev->nexus->for_app_reg (or a pointer to it) for for_app_reg.
52  */
53 #define OSEVENT_HOOK_INTERN(retval, failedp, evkind, hookop, nexusop, ...) do { \
54     if (CTX->osevent_hooks) {                                           \
55         CTX->osevent_in_hook++;                                         \
56         libxl__osevent_hook_nexi *nexi = &CTX->hook_##evkind##_nexi_idle; \
57         osevent_hook_pre_##nexusop(gc, ev, nexi, &ev->nexus);            \
58         retval CTX->osevent_hooks->evkind##_##hookop                    \
59             (CTX->osevent_user, __VA_ARGS__);                           \
60         if ((failedp))                                                  \
61             osevent_hook_failed_##nexusop(gc, ev, nexi, &ev->nexus);     \
62         CTX->osevent_in_hook--;                                         \
63     }                                                                   \
64 } while (0)
65 
66 #define OSEVENT_HOOK(evkind, hookop, nexusop, ...) ({                   \
67     int osevent_hook_rc = 0;                                    \
68     OSEVENT_HOOK_INTERN(osevent_hook_rc =, !!osevent_hook_rc,   \
69                         evkind, hookop, nexusop, __VA_ARGS__);          \
70     osevent_hook_rc;                                            \
71 })
72 
73 #define OSEVENT_HOOK_VOID(evkind, hookop, nexusop, ...)                         \
74     OSEVENT_HOOK_INTERN(/* void */, 0, evkind, hookop, nexusop, __VA_ARGS__)
75 
76 /*
77  * The application's calls to libxl_osevent_occurred_... may be
78  * indefinitely delayed with respect to the rest of the program (since
79  * they are not necessarily called with any lock held).  So the
80  * for_libxl value we receive may be (almost) arbitrarily old.  All we
81  * know is that it came from this ctx.
82  *
83  * Therefore we may not free the object referred to by any for_libxl
84  * value until we free the whole libxl_ctx.  And if we reuse it we
85  * must be able to tell when an old use turns up, and discard the
86  * stale event.
87  *
88  * Thus we cannot use the ev directly as the for_libxl value - we need
89  * a layer of indirection.
90  *
91  * We do this by keeping a pool of libxl__osevent_hook_nexus structs,
92  * and use pointers to them as for_libxl values.  In fact, there are
93  * two pools: one for fds and one for timeouts.  This ensures that we
94  * don't risk a type error when we upcast nexus->ev.  In each nexus
95  * the ev is either null or points to a valid libxl__ev_time or
96  * libxl__ev_fd, as applicable.
97  *
98  * We /do/ allow ourselves to reassociate an old nexus with a new ev
99  * as otherwise we would have to leak nexi.  (This reassociation
100  * might, of course, be an old ev being reused for a new purpose so
101  * simply comparing the ev pointer is not sufficient.)  Thus the
102  * libxl_osevent_occurred functions need to check that the condition
103  * allegedly signalled by this event actually exists.
104  *
105  * The nexi and the lists are all protected by the ctx lock.
106  */
107 
108 struct libxl__osevent_hook_nexus {
109     void *ev;
110     void *for_app_reg;
111     LIBXL_SLIST_ENTRY(libxl__osevent_hook_nexus) next;
112 };
113 
osevent_ev_from_hook_nexus(libxl_ctx * ctx,libxl__osevent_hook_nexus * nexus)114 static void *osevent_ev_from_hook_nexus(libxl_ctx *ctx,
115            libxl__osevent_hook_nexus *nexus /* pass  void *for_libxl */)
116 {
117     return nexus->ev;
118 }
119 
osevent_release_nexus(libxl__gc * gc,libxl__osevent_hook_nexi * nexi_idle,libxl__osevent_hook_nexus * nexus)120 static void osevent_release_nexus(libxl__gc *gc,
121                                   libxl__osevent_hook_nexi *nexi_idle,
122                                   libxl__osevent_hook_nexus *nexus)
123 {
124     nexus->ev = 0;
125     LIBXL_SLIST_INSERT_HEAD(nexi_idle, nexus, next);
126 }
127 
128 /*----- OSEVENT* hook functions for nexusop "alloc" -----*/
osevent_hook_pre_alloc(libxl__gc * gc,void * ev,libxl__osevent_hook_nexi * nexi_idle,libxl__osevent_hook_nexus ** nexus_r)129 static void osevent_hook_pre_alloc(libxl__gc *gc, void *ev,
130                                    libxl__osevent_hook_nexi *nexi_idle,
131                                    libxl__osevent_hook_nexus **nexus_r)
132 {
133     libxl__osevent_hook_nexus *nexus = LIBXL_SLIST_FIRST(nexi_idle);
134     if (nexus) {
135         LIBXL_SLIST_REMOVE_HEAD(nexi_idle, next);
136     } else {
137         nexus = libxl__zalloc(NOGC, sizeof(*nexus));
138     }
139     nexus->ev = ev;
140     *nexus_r = nexus;
141 }
osevent_hook_failed_alloc(libxl__gc * gc,void * ev,libxl__osevent_hook_nexi * nexi_idle,libxl__osevent_hook_nexus ** nexus)142 static void osevent_hook_failed_alloc(libxl__gc *gc, void *ev,
143                                       libxl__osevent_hook_nexi *nexi_idle,
144                                       libxl__osevent_hook_nexus **nexus)
145 {
146     osevent_release_nexus(gc, nexi_idle, *nexus);
147 }
148 
149 /*----- OSEVENT* hook functions for nexusop "release" -----*/
osevent_hook_pre_release(libxl__gc * gc,void * ev,libxl__osevent_hook_nexi * nexi_idle,libxl__osevent_hook_nexus ** nexus)150 static void osevent_hook_pre_release(libxl__gc *gc, void *ev,
151                                      libxl__osevent_hook_nexi *nexi_idle,
152                                      libxl__osevent_hook_nexus **nexus)
153 {
154     osevent_release_nexus(gc, nexi_idle, *nexus);
155 }
osevent_hook_failed_release(libxl__gc * gc,void * ev,libxl__osevent_hook_nexi * nexi_idle,libxl__osevent_hook_nexus ** nexus)156 static void osevent_hook_failed_release(libxl__gc *gc, void *ev,
157                                         libxl__osevent_hook_nexi *nexi_idle,
158                                         libxl__osevent_hook_nexus **nexus)
159 {
160     abort();
161 }
162 
163 /*----- OSEVENT* hook functions for nexusop "noop" -----*/
osevent_hook_pre_noop(libxl__gc * gc,void * ev,libxl__osevent_hook_nexi * nexi_idle,libxl__osevent_hook_nexus ** nexus)164 static void osevent_hook_pre_noop(libxl__gc *gc, void *ev,
165                                   libxl__osevent_hook_nexi *nexi_idle,
166                                   libxl__osevent_hook_nexus **nexus) { }
osevent_hook_failed_noop(libxl__gc * gc,void * ev,libxl__osevent_hook_nexi * nexi_idle,libxl__osevent_hook_nexus ** nexus)167 static void osevent_hook_failed_noop(libxl__gc *gc, void *ev,
168                                      libxl__osevent_hook_nexi *nexi_idle,
169                                      libxl__osevent_hook_nexus **nexus) { }
170 
171 
172 /*
173  * fd events
174  */
175 
libxl__ev_fd_register(libxl__gc * gc,libxl__ev_fd * ev,libxl__ev_fd_callback * func,int fd,short events)176 int libxl__ev_fd_register(libxl__gc *gc, libxl__ev_fd *ev,
177                           libxl__ev_fd_callback *func,
178                           int fd, short events)
179 {
180     int rc;
181 
182     assert(fd >= 0);
183 
184     CTX_LOCK;
185 
186     DBG("ev_fd=%p register fd=%d events=%x", ev, fd, events);
187 
188     rc = OSEVENT_HOOK(fd,register, alloc, fd, &ev->nexus->for_app_reg,
189                       events, ev->nexus);
190     if (rc) goto out;
191 
192     ev->fd = fd;
193     ev->events = events;
194     ev->func = func;
195 
196     LIBXL_LIST_INSERT_HEAD(&CTX->efds, ev, entry);
197 
198     rc = 0;
199 
200  out:
201     CTX_UNLOCK;
202     return rc;
203 }
204 
libxl__ev_fd_modify(libxl__gc * gc,libxl__ev_fd * ev,short events)205 int libxl__ev_fd_modify(libxl__gc *gc, libxl__ev_fd *ev, short events)
206 {
207     int rc;
208 
209     CTX_LOCK;
210     assert(libxl__ev_fd_isregistered(ev));
211 
212     DBG("ev_fd=%p modify fd=%d events=%x", ev, ev->fd, events);
213 
214     rc = OSEVENT_HOOK(fd,modify, noop, ev->fd, &ev->nexus->for_app_reg, events);
215     if (rc) goto out;
216 
217     ev->events = events;
218 
219     rc = 0;
220  out:
221     CTX_UNLOCK;
222     return rc;
223 }
224 
libxl__ev_fd_deregister(libxl__gc * gc,libxl__ev_fd * ev)225 void libxl__ev_fd_deregister(libxl__gc *gc, libxl__ev_fd *ev)
226 {
227     CTX_LOCK;
228     libxl__poller *poller;
229 
230     if (!libxl__ev_fd_isregistered(ev)) {
231         DBG("ev_fd=%p deregister unregistered",ev);
232         goto out;
233     }
234 
235     DBG("ev_fd=%p deregister fd=%d", ev, ev->fd);
236 
237     OSEVENT_HOOK_VOID(fd,deregister, release, ev->fd, ev->nexus->for_app_reg);
238     LIBXL_LIST_REMOVE(ev, entry);
239     ev->fd = -1;
240 
241     LIBXL_LIST_FOREACH(poller, &CTX->pollers_fds_changed, fds_changed_entry)
242         poller->fds_changed = 1;
243 
244  out:
245     CTX_UNLOCK;
246 }
247 
libxl__fd_poll_recheck(libxl__egc * egc,int fd,short events)248 short libxl__fd_poll_recheck(libxl__egc *egc, int fd, short events) {
249     struct pollfd check;
250     int r;
251 
252     for (;;) {
253         check.fd = fd;
254         check.events = events;
255         r = poll(&check, 1, 0);
256         DBG("poll recheck fd=%d r=%d revents=%#x", fd, r, check.revents);
257         if (!r)
258             break;
259         if (r==1)
260             break;
261         assert(r<0);
262         if (errno != EINTR) {
263             LIBXL__EVENT_DISASTER(egc, "failed poll to check for fd", errno, 0);
264             return 0;
265         }
266     }
267     assert(!!r == !!check.revents);
268     return check.revents;
269 }
270 
271 /*
272  * timeouts
273  */
274 
275 
libxl__gettimeofday(libxl__gc * gc,struct timeval * now_r)276 int libxl__gettimeofday(libxl__gc *gc, struct timeval *now_r)
277 {
278     int rc = gettimeofday(now_r, 0);
279     if (rc) {
280         LOGE(ERROR, "gettimeofday failed");
281         return ERROR_FAIL;
282     }
283     return 0;
284 }
285 
time_rel_to_abs(libxl__gc * gc,int ms,struct timeval * abs_out)286 static int time_rel_to_abs(libxl__gc *gc, int ms, struct timeval *abs_out)
287 {
288     int rc;
289     struct timeval additional = {
290         .tv_sec = ms / 1000,
291         .tv_usec = (ms % 1000) * 1000
292     };
293     struct timeval now;
294 
295     rc = libxl__gettimeofday(gc, &now);
296     if (rc) return rc;
297 
298     timeradd(&now, &additional, abs_out);
299     return 0;
300 }
301 
time_register_finite(libxl__gc * gc,libxl__ev_time * ev,struct timeval absolute)302 static int time_register_finite(libxl__gc *gc, libxl__ev_time *ev,
303                                 struct timeval absolute)
304 {
305     int rc;
306     libxl__ev_time *evsearch;
307 
308     rc = OSEVENT_HOOK(timeout,register, alloc, &ev->nexus->for_app_reg,
309                       absolute, ev->nexus);
310     if (rc) return rc;
311 
312     ev->infinite = 0;
313     ev->abs = absolute;
314     LIBXL_TAILQ_INSERT_SORTED(&CTX->etimes, entry, ev, evsearch, /*empty*/,
315                               timercmp(&ev->abs, &evsearch->abs, >));
316 
317     return 0;
318 }
319 
time_deregister(libxl__gc * gc,libxl__ev_time * ev)320 static void time_deregister(libxl__gc *gc, libxl__ev_time *ev)
321 {
322     libxl__ao_abortable_deregister(&ev->abrt);
323 
324     if (!ev->infinite) {
325         struct timeval right_away = { 0, 0 };
326         if (ev->nexus) /* only set if app provided hooks */
327             ev->nexus->ev = 0;
328         OSEVENT_HOOK_VOID(timeout,modify,
329                           noop /* release nexus in _occurred_ */,
330                           &ev->nexus->for_app_reg, right_away);
331         LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
332     }
333 }
334 
time_done_debug(libxl__gc * gc,const char * func,libxl__ev_time * ev,int rc)335 static void time_done_debug(libxl__gc *gc, const char *func,
336                             libxl__ev_time *ev, int rc)
337 {
338 #ifdef DEBUG
339     libxl__log(CTX, XTL_DEBUG, -1,__FILE__,0,func,
340                "ev_time=%p done rc=%d .func=%p infinite=%d abs=%lu.%06lu",
341                ev, rc, ev->func, ev->infinite,
342                (unsigned long)ev->abs.tv_sec, (unsigned long)ev->abs.tv_usec);
343 #endif
344 }
345 
time_aborted(libxl__egc * egc,libxl__ao_abortable * abrt,int rc)346 static void time_aborted(libxl__egc *egc, libxl__ao_abortable *abrt, int rc)
347 {
348     libxl__ev_time *ev = CONTAINER_OF(abrt, *ev, abrt);
349     EGC_GC;
350 
351     time_deregister(gc, ev);
352     DBG("ev_time=%p aborted", ev);
353     ev->func(egc, ev, &ev->abs, rc);
354 }
355 
time_register_abortable(libxl__ao * ao,libxl__ev_time * ev)356 static int time_register_abortable(libxl__ao *ao, libxl__ev_time *ev)
357 {
358     ev->abrt.ao = ao;
359     ev->abrt.callback = time_aborted;
360     return libxl__ao_abortable_register(&ev->abrt);
361 }
362 
libxl__ev_time_register_abs(libxl__ao * ao,libxl__ev_time * ev,libxl__ev_time_callback * func,struct timeval absolute)363 int libxl__ev_time_register_abs(libxl__ao *ao, libxl__ev_time *ev,
364                                 libxl__ev_time_callback *func,
365                                 struct timeval absolute)
366 {
367     AO_GC;
368     int rc;
369 
370     CTX_LOCK;
371 
372     DBG("ev_time=%p register abs=%lu.%06lu",
373         ev, (unsigned long)absolute.tv_sec, (unsigned long)absolute.tv_usec);
374 
375     rc = time_register_abortable(ao, ev);
376     if (rc) goto out;
377 
378     rc = time_register_finite(gc, ev, absolute);
379     if (rc) goto out;
380 
381     ev->func = func;
382 
383     rc = 0;
384  out:
385     libxl__ao_abortable_deregister(&ev->abrt);
386     time_done_debug(gc,__func__,ev,rc);
387     CTX_UNLOCK;
388     return rc;
389 }
390 
391 
libxl__ev_time_register_rel(libxl__ao * ao,libxl__ev_time * ev,libxl__ev_time_callback * func,int milliseconds)392 int libxl__ev_time_register_rel(libxl__ao *ao, libxl__ev_time *ev,
393                                 libxl__ev_time_callback *func,
394                                 int milliseconds /* as for poll(2) */)
395 {
396     AO_GC;
397     struct timeval absolute;
398     int rc;
399 
400     CTX_LOCK;
401 
402     DBG("ev_time=%p register ms=%d", ev, milliseconds);
403 
404     rc = time_register_abortable(ao, ev);
405     if (rc) goto out;
406 
407     if (milliseconds < 0) {
408         ev->infinite = 1;
409     } else {
410         rc = time_rel_to_abs(gc, milliseconds, &absolute);
411         if (rc) goto out;
412 
413         rc = time_register_finite(gc, ev, absolute);
414         if (rc) goto out;
415     }
416 
417     ev->func = func;
418     rc = 0;
419 
420  out:
421     if (!libxl__ev_time_isregistered(ev))
422         libxl__ao_abortable_deregister(&ev->abrt);
423     time_done_debug(gc,__func__,ev,rc);
424     CTX_UNLOCK;
425     return rc;
426 }
427 
libxl__ev_time_deregister(libxl__gc * gc,libxl__ev_time * ev)428 void libxl__ev_time_deregister(libxl__gc *gc, libxl__ev_time *ev)
429 {
430     CTX_LOCK;
431 
432     DBG("ev_time=%p deregister", ev);
433 
434     if (!libxl__ev_time_isregistered(ev))
435         goto out;
436 
437     time_deregister(gc, ev);
438     ev->func = 0;
439 
440  out:
441     time_done_debug(gc,__func__,ev,0);
442     CTX_UNLOCK;
443     return;
444 }
445 
time_occurs(libxl__egc * egc,libxl__ev_time * etime,int rc)446 static void time_occurs(libxl__egc *egc, libxl__ev_time *etime, int rc)
447 {
448     DBG("ev_time=%p occurs abs=%lu.%06lu",
449         etime, (unsigned long)etime->abs.tv_sec,
450         (unsigned long)etime->abs.tv_usec);
451 
452     libxl__ev_time_callback *func = etime->func;
453     etime->func = 0;
454     func(egc, etime, &etime->abs, rc);
455 }
456 
457 
458 /*
459  * xenstore watches
460  */
461 
libxl__watch_slot_contents(libxl__gc * gc,int slotnum)462 libxl__ev_xswatch *libxl__watch_slot_contents(libxl__gc *gc, int slotnum)
463 {
464     libxl__ev_watch_slot *slot = &CTX->watch_slots[slotnum];
465     libxl__ev_watch_slot *slotcontents = LIBXL_SLIST_NEXT(slot, empty);
466 
467     if (slotcontents == NULL ||
468         ((uintptr_t)slotcontents >= (uintptr_t)CTX->watch_slots &&
469          (uintptr_t)slotcontents < (uintptr_t)(CTX->watch_slots +
470                                                CTX->watch_nslots)))
471         /* An empty slot has either a NULL pointer (end of the
472          * free list), or a pointer to another entry in the array.
473          * So we can do a bounds check to distinguish empty from
474          * full slots.
475          */
476         /* We need to do the comparisons as uintptr_t because
477          * comparing pointers which are not in the same object is
478          * undefined behaviour; if the compiler managed to figure
479          * out that watch_slots[0..watch_nslots-1] is all of the
480          * whole array object it could prove that the above bounds
481          * check was always true if it was legal, and remove it!
482          *
483          * uintptr_t because even on a machine with signed
484          * pointers, objects do not cross zero; whereas on
485          * machines with unsigned pointers, they may cross
486          * 0x8bazillion.
487          */
488         return NULL;
489 
490         /* see comment near libxl__ev_watch_slot definition */
491     return (void*)slotcontents;
492 }
493 
libxl__set_watch_slot_contents(libxl__ev_watch_slot * slot,libxl__ev_xswatch * w)494 static void libxl__set_watch_slot_contents(libxl__ev_watch_slot *slot,
495                                            libxl__ev_xswatch *w)
496 {
497     /* we look a bit behind the curtain of LIBXL_SLIST, to explicitly
498      * assign to the pointer that's the next link.  See the comment
499      * by the definition of libxl__ev_watch_slot */
500     slot->empty.sle_next = (void*)w;
501 }
502 
watchfd_callback(libxl__egc * egc,libxl__ev_fd * ev,int fd,short events,short revents)503 static void watchfd_callback(libxl__egc *egc, libxl__ev_fd *ev,
504                              int fd, short events, short revents)
505 {
506     EGC_GC;
507 
508     if (revents & (POLLERR|POLLHUP))
509         LIBXL__EVENT_DISASTER(egc, "unexpected poll event on watch fd", 0, 0);
510 
511     for (;;) {
512         char **event = xs_check_watch(CTX->xsh);
513         if (!event) {
514             if (errno == EAGAIN) break;
515             if (errno == EINTR) continue;
516             LIBXL__EVENT_DISASTER(egc, "cannot check/read watches", errno, 0);
517             return;
518         }
519 
520         const char *epath = event[0];
521         const char *token = event[1];
522         int slotnum;
523         uint32_t counterval;
524         int rc = sscanf(token, "%d/%"SCNx32, &slotnum, &counterval);
525         if (rc != 2) {
526             LOG(ERROR, "watch epath=%s token=%s: failed to parse token",
527                 epath, token);
528             /* oh well */
529             goto ignore;
530         }
531         if (slotnum < 0 || slotnum >= CTX->watch_nslots) {
532             /* perhaps in the future we will make the watchslots array shrink */
533             LIBXL__LOG(CTX, LIBXL__LOG_DEBUG, "watch epath=%s token=%s:"
534                        " slotnum %d out of range [0,%d>",
535                        epath, token, slotnum, CTX->watch_nslots);
536             goto ignore;
537         }
538 
539         libxl__ev_xswatch *w = libxl__watch_slot_contents(gc, slotnum);
540 
541         if (!w) {
542             LOG(DEBUG, "watch epath=%s token=%s: empty slot", epath, token);
543             goto ignore;
544         }
545 
546         if (w->counterval != counterval) {
547             LOG(DEBUG, "watch w=%p epath=%s token=%s: counter != %"PRIx32,
548                 w, epath, token, w->counterval);
549             goto ignore;
550         }
551 
552         /* Now it's possible, though unlikely, that this was an event
553          * from a previous use of the same slot with the same counterval.
554          *
555          * In that case either:
556          *  - the event path is a child of the watch path, in
557          *    which case this watch would really have generated this
558          *    event if it had been registered soon enough and we are
559          *    OK to give this possibly-spurious event to the caller; or
560          * - it is not, in which case we must suppress it as the
561          *   caller should not see events for unrelated paths.
562          *
563          * See also docs/misc/xenstore.txt.
564          */
565         if (!xs_path_is_subpath(w->path, epath)) {
566             LOG(DEBUG, "watch w=%p wpath=%s token=%s: unexpected epath=%s",
567                 w, w->path, token, epath);
568             goto ignore;
569         }
570 
571         /* At last, we have checked everything! */
572         LOG(DEBUG, "watch w=%p wpath=%s token=%s: event epath=%s",
573             w, w->path, token, epath);
574         w->callback(egc, w, w->path, epath);
575 
576     ignore:
577         free(event);
578     }
579 }
580 
watch_token(libxl__gc * gc,int slotnum,uint32_t counterval)581 static char *watch_token(libxl__gc *gc, int slotnum, uint32_t counterval)
582 {
583     return GCSPRINTF("%d/%"PRIx32, slotnum, counterval);
584 }
585 
watches_check_fd_deregister(libxl__gc * gc)586 static void watches_check_fd_deregister(libxl__gc *gc)
587 {
588     assert(CTX->nwatches>=0);
589     if (!CTX->nwatches)
590         libxl__ev_fd_deregister(gc, &CTX->watch_efd);
591 }
592 
libxl__ev_xswatch_register(libxl__gc * gc,libxl__ev_xswatch * w,libxl__ev_xswatch_callback * func,const char * path)593 int libxl__ev_xswatch_register(libxl__gc *gc, libxl__ev_xswatch *w,
594                                libxl__ev_xswatch_callback *func,
595                                const char *path /* copied */)
596 {
597     libxl__ev_watch_slot *use = NULL;
598     char *path_copy = NULL;
599     int rc;
600 
601     CTX_LOCK;
602 
603     if (!libxl__ev_fd_isregistered(&CTX->watch_efd)) {
604         rc = libxl__ev_fd_register(gc, &CTX->watch_efd, watchfd_callback,
605                                    xs_fileno(CTX->xsh), POLLIN);
606         if (rc) goto out_rc;
607     }
608 
609     if (LIBXL_SLIST_EMPTY(&CTX->watch_freeslots)) {
610         /* Free list is empty so there is not in fact a linked
611          * free list in the array and we can safely realloc it */
612         int newarraysize = (CTX->watch_nslots + 1) << 2;
613         int i;
614         libxl__ev_watch_slot *newarray =
615             libxl__realloc(NOGC,
616                            CTX->watch_slots, sizeof(*newarray) * newarraysize);
617         if (!newarray) goto out_nomem;
618         for (i = CTX->watch_nslots; i < newarraysize; i++)
619             LIBXL_SLIST_INSERT_HEAD(&CTX->watch_freeslots,
620                                     &newarray[i], empty);
621         CTX->watch_slots = newarray;
622         CTX->watch_nslots = newarraysize;
623     }
624     use = LIBXL_SLIST_FIRST(&CTX->watch_freeslots);
625     assert(use);
626     LIBXL_SLIST_REMOVE_HEAD(&CTX->watch_freeslots, empty);
627 
628     path_copy = strdup(path);
629     if (!path_copy) goto out_nomem;
630 
631     int slotnum = use - CTX->watch_slots;
632     w->counterval = CTX->watch_counter++;
633 
634     const char *token = watch_token(gc, slotnum, w->counterval);
635     LOG(DEBUG, "watch w=%p wpath=%s token=%s: register slotnum=%d",
636         w, path, token, slotnum);
637 
638     if (!xs_watch(CTX->xsh, path, token)) {
639         LOGEV(ERROR, errno, "create watch for path %s", path);
640         rc = ERROR_FAIL;
641         goto out_rc;
642     }
643 
644     w->slotnum = slotnum;
645     w->path = path_copy;
646     w->callback = func;
647     CTX->nwatches++;
648     libxl__set_watch_slot_contents(use, w);
649 
650     CTX_UNLOCK;
651     return 0;
652 
653  out_nomem:
654     rc = ERROR_NOMEM;
655  out_rc:
656     if (use)
657         LIBXL_SLIST_INSERT_HEAD(&CTX->watch_freeslots, use, empty);
658     free(path_copy);
659     watches_check_fd_deregister(gc);
660     CTX_UNLOCK;
661     return rc;
662 }
663 
libxl__ev_xswatch_deregister(libxl__gc * gc,libxl__ev_xswatch * w)664 void libxl__ev_xswatch_deregister(libxl__gc *gc, libxl__ev_xswatch *w)
665 {
666     /* it is legal to deregister from within _callback */
667     CTX_LOCK;
668 
669     if (w->slotnum >= 0) {
670         const char *token = watch_token(gc, w->slotnum, w->counterval);
671 
672         LOG(DEBUG, "watch w=%p wpath=%s token=%s: deregister slotnum=%d",
673             w, w->path, token, w->slotnum);
674 
675         if (!xs_unwatch(CTX->xsh, w->path, token))
676             /* Oh well, we will just get watch events forever more
677              * and ignore them.  But we should complain to the log. */
678             LOGEV(ERROR, errno, "remove watch for path %s", w->path);
679 
680         libxl__ev_watch_slot *slot = &CTX->watch_slots[w->slotnum];
681         LIBXL_SLIST_INSERT_HEAD(&CTX->watch_freeslots, slot, empty);
682         w->slotnum = -1;
683         CTX->nwatches--;
684         watches_check_fd_deregister(gc);
685     } else {
686         LOG(DEBUG, "watch w=%p: deregister unregistered", w);
687     }
688 
689     free(w->path);
690     w->path = NULL;
691 
692     CTX_UNLOCK;
693 }
694 
695 /*
696  * evtchn
697  */
698 
evtchn_revents_check(libxl__egc * egc,int revents)699 static int evtchn_revents_check(libxl__egc *egc, int revents)
700 {
701     EGC_GC;
702 
703     if (revents & ~POLLIN) {
704         LOG(ERROR, "unexpected poll event on event channel fd: %x", revents);
705         LIBXL__EVENT_DISASTER(egc,
706                    "unexpected poll event on event channel fd", 0, 0);
707         libxl__ev_fd_deregister(gc, &CTX->evtchn_efd);
708         return ERROR_FAIL;
709     }
710 
711     assert(revents & POLLIN);
712 
713     return 0;
714 }
715 
evtchn_fd_callback(libxl__egc * egc,libxl__ev_fd * ev,int fd,short events,short revents)716 static void evtchn_fd_callback(libxl__egc *egc, libxl__ev_fd *ev,
717                                int fd, short events, short revents)
718 {
719     EGC_GC;
720     libxl__ev_evtchn *evev;
721     int rc;
722     xenevtchn_port_or_error_t port;
723 
724     rc = evtchn_revents_check(egc, revents);
725     if (rc) return;
726 
727     for (;;) {
728         /* Check the fd again.  The incoming revent may no longer be
729          * true, because the libxl ctx lock has not necessarily been
730          * held continuously since someone noticed the fd.  Normally
731          * this wouldn't be a problem but evtchn devices don't always
732          * honour O_NONBLOCK (see xenctrl.h). */
733         revents = libxl__fd_poll_recheck(egc,fd,POLLIN);
734         if (!revents)
735             break;
736         rc = evtchn_revents_check(egc, revents);
737         if (rc) return;
738 
739         /* OK, that's that workaround done.  We can actually check for
740          * work for us to do: */
741 
742         port = xenevtchn_pending(CTX->xce);
743         if (port < 0) {
744             if (errno == EAGAIN)
745                 break;
746             LIBXL__EVENT_DISASTER(egc,
747      "unexpected failure fetching occurring event port number from evtchn",
748                                   errno, 0);
749             return;
750         }
751 
752         LIBXL_LIST_FOREACH(evev, &CTX->evtchns_waiting, entry)
753             if (port == evev->port)
754                 goto found;
755         /* not found */
756         DBG("ev_evtchn port=%d no-one cared", port);
757         continue;
758 
759     found:
760         DBG("ev_evtchn=%p port=%d signaled", evev, port);
761         evev->waiting = 0;
762         LIBXL_LIST_REMOVE(evev, entry);
763         evev->callback(egc, evev);
764     }
765 }
766 
libxl__ctx_evtchn_init(libxl__gc * gc)767 int libxl__ctx_evtchn_init(libxl__gc *gc) {
768     xenevtchn_handle *xce;
769     int rc, fd;
770 
771     if (CTX->xce)
772         return 0;
773 
774     xce = xenevtchn_open(CTX->lg, 0);
775     if (!xce) {
776         LOGE(ERROR,"cannot open libxc evtchn handle");
777         rc = ERROR_FAIL;
778         goto out;
779     }
780 
781     fd = xenevtchn_fd(xce);
782     assert(fd >= 0);
783 
784     rc = libxl_fd_set_nonblock(CTX, fd, 1);
785     if (rc) goto out;
786 
787     CTX->xce = xce;
788     return 0;
789 
790  out:
791     xenevtchn_close(xce);
792     return rc;
793 }
794 
evtchn_check_fd_deregister(libxl__gc * gc)795 static void evtchn_check_fd_deregister(libxl__gc *gc)
796 {
797     if (CTX->xce && LIBXL_LIST_EMPTY(&CTX->evtchns_waiting))
798         libxl__ev_fd_deregister(gc, &CTX->evtchn_efd);
799 }
800 
libxl__ev_evtchn_wait(libxl__gc * gc,libxl__ev_evtchn * evev)801 int libxl__ev_evtchn_wait(libxl__gc *gc, libxl__ev_evtchn *evev)
802 {
803     int r, rc;
804 
805     DBG("ev_evtchn=%p port=%d wait (was waiting=%d)",
806         evev, evev->port, evev->waiting);
807 
808     rc = libxl__ctx_evtchn_init(gc);
809     if (rc) goto out;
810 
811     if (!libxl__ev_fd_isregistered(&CTX->evtchn_efd)) {
812         rc = libxl__ev_fd_register(gc, &CTX->evtchn_efd, evtchn_fd_callback,
813                                    xenevtchn_fd(CTX->xce), POLLIN);
814         if (rc) goto out;
815     }
816 
817     if (evev->waiting)
818         return 0;
819 
820     r = xenevtchn_unmask(CTX->xce, evev->port);
821     if (r) {
822         LOGE(ERROR,"cannot unmask event channel %d",evev->port);
823         rc = ERROR_FAIL;
824         goto out;
825     }
826 
827     evev->waiting = 1;
828     LIBXL_LIST_INSERT_HEAD(&CTX->evtchns_waiting, evev, entry);
829     return 0;
830 
831  out:
832     evtchn_check_fd_deregister(gc);
833     return rc;
834 }
835 
libxl__ev_evtchn_cancel(libxl__gc * gc,libxl__ev_evtchn * evev)836 void libxl__ev_evtchn_cancel(libxl__gc *gc, libxl__ev_evtchn *evev)
837 {
838     DBG("ev_evtchn=%p port=%d cancel (was waiting=%d)",
839         evev, evev->port, evev->waiting);
840 
841     if (!evev->waiting)
842         return;
843 
844     evev->waiting = 0;
845     LIBXL_LIST_REMOVE(evev, entry);
846     evtchn_check_fd_deregister(gc);
847 }
848 
849 /*
850  * waiting for device state
851  */
852 
devstate_callback(libxl__egc * egc,libxl__xswait_state * xsw,int rc,const char * sstate)853 static void devstate_callback(libxl__egc *egc, libxl__xswait_state *xsw,
854                               int rc, const char *sstate)
855 {
856     EGC_GC;
857     libxl__ev_devstate *ds = CONTAINER_OF(xsw, *ds, w);
858 
859     if (rc) {
860         if (rc == ERROR_TIMEDOUT)
861             LOG(DEBUG, "backend %s wanted state %d "" timed out", ds->w.path,
862                 ds->wanted);
863         goto out;
864     }
865     if (!sstate) {
866         LOG(DEBUG, "backend %s wanted state %d"" but it was removed",
867             ds->w.path, ds->wanted);
868         rc = ERROR_INVAL;
869         goto out;
870     }
871 
872     int got = atoi(sstate);
873     if (got == ds->wanted) {
874         LOG(DEBUG, "backend %s wanted state %d ok", ds->w.path, ds->wanted);
875         rc = 0;
876     } else {
877         LOG(DEBUG, "backend %s wanted state %d"" still waiting state %d",
878             ds->w.path, ds->wanted, got);
879         return;
880     }
881 
882  out:
883     libxl__ev_devstate_cancel(gc, ds);
884     ds->callback(egc, ds, rc);
885 }
886 
libxl__ev_devstate_wait(libxl__ao * ao,libxl__ev_devstate * ds,libxl__ev_devstate_callback cb,const char * state_path,int state,int milliseconds)887 int libxl__ev_devstate_wait(libxl__ao *ao, libxl__ev_devstate *ds,
888                             libxl__ev_devstate_callback cb,
889                             const char *state_path, int state, int milliseconds)
890 {
891     AO_GC;
892     int rc;
893 
894     libxl__xswait_init(&ds->w);
895     ds->wanted = state;
896     ds->callback = cb;
897 
898     ds->w.ao = ao;
899     ds->w.what = GCSPRINTF("backend %s (hoping for state change to %d)",
900                            state_path, state);
901     ds->w.path = state_path;
902     ds->w.timeout_ms = milliseconds;
903     ds->w.callback = devstate_callback;
904     rc = libxl__xswait_start(gc, &ds->w);
905     if (rc) goto out;
906 
907     return 0;
908 
909  out:
910     libxl__ev_devstate_cancel(gc, ds);
911     return rc;
912 }
913 
914 /*
915  * domain death/destruction
916  */
917 
918 /*
919  * We use a xenstore watch on the domain's path, rather than using an
920  * @releaseDomain watch and asking the hypervisor.  This is simpler
921  * because turning @releaseDomain into domain-specific information is
922  * complicated.
923  *
924  * It is also sufficient for our callers, which are generally trying
925  * to do cleanup of their own execution state on domain death, for the
926  * following reason: if the domain is destroyed then either (a) the
927  * entries in xenstore have already been deleted, in which case the
928  * test here works or (b) they have not in which case something has
929  * gone very badly wrong and we are going to leak those xenstore
930  * entries, in which case trying to avoid leaking other stuff is
931  * futile.
932  */
933 
libxl__domaindeathcheck_init(libxl__domaindeathcheck * dc)934 void libxl__domaindeathcheck_init(libxl__domaindeathcheck *dc)
935 {
936     libxl__ao_abortable_init(&dc->abrt);
937     libxl__ev_xswatch_init(&dc->watch);
938 }
939 
libxl__domaindeathcheck_stop(libxl__gc * gc,libxl__domaindeathcheck * dc)940 void libxl__domaindeathcheck_stop(libxl__gc *gc, libxl__domaindeathcheck *dc)
941 {
942     libxl__ao_abortable_deregister(&dc->abrt);
943     libxl__ev_xswatch_deregister(gc,&dc->watch);
944 }
945 
domaindeathcheck_callback(libxl__egc * egc,libxl__ev_xswatch * w,const char * watch_path,const char * event_path)946 static void domaindeathcheck_callback(libxl__egc *egc, libxl__ev_xswatch *w,
947                             const char *watch_path, const char *event_path)
948 {
949     libxl__domaindeathcheck *dc = CONTAINER_OF(w, *dc, watch);
950     EGC_GC;
951     const char *p = libxl__xs_read(gc, XBT_NULL, watch_path);
952     if (p) return;
953 
954     libxl__domaindeathcheck_stop(gc,dc);
955 
956     if (errno!=ENOENT) {
957         LIBXL__EVENT_DISASTER(egc,"failed to read xenstore"
958                               " for domain detach check", errno, 0);
959         return;
960     }
961 
962     LOG(ERROR,"%s: domain %"PRIu32" removed (%s no longer in xenstore)",
963         dc->what, dc->domid, watch_path);
964     dc->callback(egc, dc, ERROR_DOMAIN_DESTROYED);
965 }
966 
domaindeathcheck_abort(libxl__egc * egc,libxl__ao_abortable * abrt,int rc)967 static void domaindeathcheck_abort(libxl__egc *egc,
968                                    libxl__ao_abortable *abrt,
969                                    int rc)
970 {
971     libxl__domaindeathcheck *dc = CONTAINER_OF(abrt, *dc, abrt);
972     EGC_GC;
973 
974     libxl__domaindeathcheck_stop(gc,dc);
975     dc->callback(egc, dc, rc);
976 }
977 
libxl__domaindeathcheck_start(libxl__ao * ao,libxl__domaindeathcheck * dc)978 int libxl__domaindeathcheck_start(libxl__ao *ao,
979                                   libxl__domaindeathcheck *dc)
980 {
981     AO_GC;
982     int rc;
983     const char *path = GCSPRINTF("/local/domain/%"PRIu32, dc->domid);
984 
985     libxl__domaindeathcheck_init(dc);
986 
987     dc->abrt.ao = ao;
988     dc->abrt.callback = domaindeathcheck_abort;
989     rc = libxl__ao_abortable_register(&dc->abrt);
990     if (rc) goto out;
991 
992     rc = libxl__ev_xswatch_register(gc, &dc->watch,
993                                     domaindeathcheck_callback, path);
994     if (rc) goto out;
995 
996     return 0;
997 
998  out:
999     libxl__domaindeathcheck_stop(gc,dc);
1000     return rc;
1001 }
1002 
1003 /*
1004  * osevent poll
1005  */
1006 
beforepoll_internal(libxl__gc * gc,libxl__poller * poller,int * nfds_io,struct pollfd * fds,int * timeout_upd,struct timeval now)1007 static int beforepoll_internal(libxl__gc *gc, libxl__poller *poller,
1008                                int *nfds_io, struct pollfd *fds,
1009                                int *timeout_upd, struct timeval now)
1010 {
1011     libxl__ev_fd *efd;
1012     int rc;
1013 
1014     /*
1015      * We need to look at the fds we want twice: firstly, to count
1016      * them so we can make the rindex array big enough, and secondly
1017      * to actually fill the arrays in.
1018      *
1019      * To ensure correctness and avoid repeating the logic for
1020      * deciding which fds are relevant, we define a macro
1021      *    REQUIRE_FDS( BODY )
1022      * which calls
1023      *    do{
1024      *        int req_fd;
1025      *        int req_events;
1026      *        BODY;
1027      *    }while(0)
1028      * for each fd with a nonzero events.  This is invoked twice.
1029      *
1030      * The definition of REQUIRE_FDS is simplified with the helper
1031      * macro
1032      *    void REQUIRE_FD(int req_fd, int req_events, BODY);
1033      */
1034 
1035 #define REQUIRE_FDS(BODY) do{                                          \
1036                                                                        \
1037         LIBXL_LIST_FOREACH(efd, &CTX->efds, entry)                     \
1038             REQUIRE_FD(efd->fd, efd->events, BODY);                    \
1039                                                                        \
1040         REQUIRE_FD(poller->wakeup_pipe[0], POLLIN, BODY);              \
1041                                                                        \
1042     }while(0)
1043 
1044 #define REQUIRE_FD(req_fd_, req_events_, BODY) do{      \
1045         int req_events = (req_events_);                 \
1046         int req_fd = (req_fd_);                         \
1047         if (req_events) {                               \
1048             BODY;                                       \
1049         }                                               \
1050     }while(0)
1051 
1052 
1053     /*
1054      * In order to be able to efficiently find the libxl__ev_fd for a
1055      * struct poll during _afterpoll, we maintain a shadow data
1056      * structure in CTX->fd_rindices: each fd corresponds to a slot in
1057      * fd_rindices, and each element in the rindices is three indices
1058      * into the fd array (for POLLIN, POLLPRI and POLLOUT).
1059      */
1060 
1061     if (*nfds_io) {
1062         /*
1063          * As an optimisation, we don't touch fd_rindex
1064          * if *nfds_io is zero on entry, since in that case the
1065          * caller just wanted to know how big an array to give us.
1066          *
1067          * If !*nfds_io, the unconditional parts below are guaranteed
1068          * not to mess with fd_rindex.
1069          */
1070 
1071         int maxfd = 0;
1072 
1073         REQUIRE_FDS({
1074             if (req_fd >= maxfd)
1075                 maxfd = req_fd + 1;
1076         });
1077 
1078         /* make sure our array is as big as *nfds_io */
1079         if (poller->fd_rindices_allocd < maxfd) {
1080             assert(ARRAY_SIZE_OK(poller->fd_rindices, maxfd));
1081             poller->fd_rindices =
1082                 libxl__realloc(NOGC, poller->fd_rindices,
1083                                maxfd * sizeof(*poller->fd_rindices));
1084             memset(poller->fd_rindices + poller->fd_rindices_allocd,
1085                    0,
1086                    (maxfd - poller->fd_rindices_allocd)
1087                      * sizeof(*poller->fd_rindices));
1088             poller->fd_rindices_allocd = maxfd;
1089         }
1090     }
1091 
1092     int used = 0;
1093 
1094     REQUIRE_FDS({
1095         if (used < *nfds_io) {
1096             fds[used].fd = req_fd;
1097             fds[used].events = req_events;
1098             fds[used].revents = 0;
1099             assert(req_fd < poller->fd_rindices_allocd);
1100             if (req_events & POLLIN)  poller->fd_rindices[req_fd][0] = used;
1101             if (req_events & POLLPRI) poller->fd_rindices[req_fd][1] = used;
1102             if (req_events & POLLOUT) poller->fd_rindices[req_fd][2] = used;
1103         }
1104         used++;
1105     });
1106 
1107     rc = used <= *nfds_io ? 0 : ERROR_BUFFERFULL;
1108 
1109     *nfds_io = used;
1110 
1111     poller->fds_changed = 0;
1112 
1113     libxl__ev_time *etime = LIBXL_TAILQ_FIRST(&CTX->etimes);
1114     if (etime) {
1115         int our_timeout;
1116         struct timeval rel;
1117         static struct timeval zero;
1118 
1119         timersub(&etime->abs, &now, &rel);
1120 
1121         if (timercmp(&rel, &zero, <)) {
1122             our_timeout = 0;
1123         } else if (rel.tv_sec >= 2000000) {
1124             our_timeout = 2000000000;
1125         } else {
1126             our_timeout = rel.tv_sec * 1000 + (rel.tv_usec + 999) / 1000;
1127         }
1128         if (*timeout_upd < 0 || our_timeout < *timeout_upd)
1129             *timeout_upd = our_timeout;
1130     }
1131 
1132     return rc;
1133 }
1134 
libxl_osevent_beforepoll(libxl_ctx * ctx,int * nfds_io,struct pollfd * fds,int * timeout_upd,struct timeval now)1135 int libxl_osevent_beforepoll(libxl_ctx *ctx, int *nfds_io,
1136                              struct pollfd *fds, int *timeout_upd,
1137                              struct timeval now)
1138 {
1139     EGC_INIT(ctx);
1140     CTX_LOCK;
1141     int rc = beforepoll_internal(gc, ctx->poller_app,
1142                                  nfds_io, fds, timeout_upd, now);
1143     CTX_UNLOCK;
1144     EGC_FREE;
1145     return rc;
1146 }
1147 
afterpoll_check_fd(libxl__poller * poller,const struct pollfd * fds,int nfds,int fd,int events)1148 static int afterpoll_check_fd(libxl__poller *poller,
1149                               const struct pollfd *fds, int nfds,
1150                               int fd, int events)
1151     /* Returns mask of events which were requested and occurred.  Will
1152      * return nonzero only once for each (poller,fd,events)
1153      * combination, until the next beforepoll.  If events from
1154      * different combinations overlap, between one such combination
1155      * and all distinct combinations will produce nonzero returns. */
1156 {
1157     if (fd >= poller->fd_rindices_allocd)
1158         /* added after we went into poll, have to try again */
1159         return 0;
1160 
1161     events |= POLLERR | POLLHUP;
1162 
1163     int i, revents = 0;
1164     for (i=0; i<3; i++) {
1165         int *slotp = &poller->fd_rindices[fd][i];
1166         int slot = *slotp;
1167 
1168         if (slot >= nfds)
1169             /* stale slot entry (again, added afterwards), */
1170             /* or slot for which we have already returned nonzero */
1171             continue;
1172 
1173         if (fds[slot].fd != fd)
1174             /* again, stale slot entry */
1175             continue;
1176 
1177         assert(poller->fds_changed || !(fds[slot].revents & POLLNVAL));
1178 
1179         /* we mask in case requested events have changed */
1180         int slot_revents = fds[slot].revents & events;
1181         if (!slot_revents)
1182             /* this slot is for a different set of events */
1183             continue;
1184 
1185         revents |= slot_revents;
1186         *slotp = INT_MAX; /* so that next time we'll see slot >= nfds */
1187     }
1188 
1189     return revents;
1190 }
1191 
fd_occurs(libxl__egc * egc,libxl__ev_fd * efd,short revents_ign)1192 static void fd_occurs(libxl__egc *egc, libxl__ev_fd *efd, short revents_ign)
1193 {
1194     short revents_current = libxl__fd_poll_recheck(egc, efd->fd, efd->events);
1195 
1196     DBG("ev_fd=%p occurs fd=%d events=%x revents_ign=%x revents_current=%x",
1197         efd, efd->fd, efd->events, revents_ign, revents_current);
1198 
1199     if (revents_current)
1200         efd->func(egc, efd, efd->fd, efd->events, revents_current);
1201 }
1202 
afterpoll_internal(libxl__egc * egc,libxl__poller * poller,int nfds,const struct pollfd * fds,struct timeval now)1203 static void afterpoll_internal(libxl__egc *egc, libxl__poller *poller,
1204                                int nfds, const struct pollfd *fds,
1205                                struct timeval now)
1206 {
1207     /* May make callbacks into the application for child processes.
1208      * ctx must be locked exactly once */
1209     EGC_GC;
1210     libxl__ev_fd *efd;
1211 
1212     /*
1213      * Warning! Reentrancy hazards!
1214      *
1215      * Many parts of this function eventually call arbitrary callback
1216      * functions which may modify the event handling data structures.
1217      *
1218      * Of the data structures used here:
1219      *
1220      *   egc, poller, now
1221      *                are allocated by our caller and relate to the
1222      *                current thread and its call stack down into the
1223      *                event machinery; it is not freed until we return.
1224      *                So it is safe.
1225      *
1226      *   fds          is either what application passed into
1227      *                libxl_osevent_afterpoll (which, although this
1228      *                isn't explicitly stated, clearly must remain
1229      *                valid until libxl_osevent_afterpoll returns) or
1230      *                it's poller->fd_polls which is modified only by
1231      *                our (non-recursive) caller eventloop_iteration.
1232      *
1233      *   CTX          comes from our caller, and applications are
1234      *                forbidden from destroying it while we are running.
1235      *                So the ctx pointer itself is safe to use; now
1236      *                for its contents:
1237      *
1238      *   CTX->etimes  is used in a simple reentrancy-safe manner.
1239      *
1240      *   CTX->efds    is more complicated; see below.
1241      */
1242 
1243     for (;;) {
1244         /* We restart our scan of fd events whenever we call a
1245          * callback function.  This is necessary because such
1246          * a callback might make arbitrary changes to CTX->efds.
1247          * We invalidate the fd_rindices[] entries which were used
1248          * so that we don't call the same function again. */
1249         int revents;
1250 
1251         LIBXL_LIST_FOREACH(efd, &CTX->efds, entry) {
1252 
1253             if (!efd->events)
1254                 continue;
1255 
1256             revents = afterpoll_check_fd(poller,fds,nfds,
1257                                          efd->fd,efd->events);
1258             if (revents)
1259                 goto found_fd_event;
1260         }
1261         /* no ordinary fd events, then */
1262         break;
1263 
1264     found_fd_event:
1265         fd_occurs(egc, efd, revents);
1266     }
1267 
1268     if (afterpoll_check_fd(poller,fds,nfds, poller->wakeup_pipe[0],POLLIN)) {
1269         int e = libxl__self_pipe_eatall(poller->wakeup_pipe[0]);
1270         if (e) LIBXL__EVENT_DISASTER(egc, "read wakeup", e, 0);
1271     }
1272 
1273     for (;;) {
1274         libxl__ev_time *etime = LIBXL_TAILQ_FIRST(&CTX->etimes);
1275         if (!etime)
1276             break;
1277 
1278         assert(!etime->infinite);
1279 
1280         if (timercmp(&etime->abs, &now, >))
1281             break;
1282 
1283         time_deregister(gc, etime);
1284 
1285         time_occurs(egc, etime, ERROR_TIMEDOUT);
1286     }
1287 }
1288 
libxl_osevent_afterpoll(libxl_ctx * ctx,int nfds,const struct pollfd * fds,struct timeval now)1289 void libxl_osevent_afterpoll(libxl_ctx *ctx, int nfds, const struct pollfd *fds,
1290                              struct timeval now)
1291 {
1292     EGC_INIT(ctx);
1293     CTX_LOCK;
1294     afterpoll_internal(egc, ctx->poller_app, nfds, fds, now);
1295     CTX_UNLOCK;
1296     EGC_FREE;
1297 }
1298 
1299 /*
1300  * osevent hook and callback machinery
1301  */
1302 
libxl_osevent_register_hooks(libxl_ctx * ctx,const libxl_osevent_hooks * hooks,void * user)1303 void libxl_osevent_register_hooks(libxl_ctx *ctx,
1304                                   const libxl_osevent_hooks *hooks,
1305                                   void *user)
1306 {
1307     GC_INIT(ctx);
1308     CTX_LOCK;
1309     assert(LIBXL_LIST_EMPTY(&ctx->efds));
1310     assert(LIBXL_TAILQ_EMPTY(&ctx->etimes));
1311     ctx->osevent_hooks = hooks;
1312     ctx->osevent_user = user;
1313     CTX_UNLOCK;
1314     GC_FREE;
1315 }
1316 
1317 
libxl_osevent_occurred_fd(libxl_ctx * ctx,void * for_libxl,int fd,short events_ign,short revents_ign)1318 void libxl_osevent_occurred_fd(libxl_ctx *ctx, void *for_libxl,
1319                                int fd, short events_ign, short revents_ign)
1320 {
1321     EGC_INIT(ctx);
1322     CTX_LOCK;
1323     assert(!CTX->osevent_in_hook);
1324 
1325     libxl__ev_fd *ev = osevent_ev_from_hook_nexus(ctx, for_libxl);
1326     if (!ev) goto out;
1327     if (ev->fd != fd) goto out;
1328 
1329     fd_occurs(egc, ev, revents_ign);
1330 
1331  out:
1332     CTX_UNLOCK;
1333     EGC_FREE;
1334 }
1335 
libxl_osevent_occurred_timeout(libxl_ctx * ctx,void * for_libxl)1336 void libxl_osevent_occurred_timeout(libxl_ctx *ctx, void *for_libxl)
1337 {
1338     EGC_INIT(ctx);
1339     CTX_LOCK;
1340     assert(!CTX->osevent_in_hook);
1341 
1342     libxl__osevent_hook_nexus *nexus = for_libxl;
1343     libxl__ev_time *ev = osevent_ev_from_hook_nexus(ctx, nexus);
1344 
1345     osevent_release_nexus(gc, &CTX->hook_timeout_nexi_idle, nexus);
1346 
1347     if (!ev) goto out;
1348     assert(!ev->infinite);
1349 
1350     LIBXL_TAILQ_REMOVE(&CTX->etimes, ev, entry);
1351 
1352     time_occurs(egc, ev, ERROR_TIMEDOUT);
1353 
1354  out:
1355     CTX_UNLOCK;
1356     EGC_FREE;
1357 }
1358 
libxl__event_disaster(libxl__egc * egc,const char * msg,int errnoval,libxl_event_type type,const char * file,int line,const char * func)1359 void libxl__event_disaster(libxl__egc *egc, const char *msg, int errnoval,
1360                            libxl_event_type type /* may be 0 */,
1361                            const char *file, int line, const char *func)
1362 {
1363     EGC_GC;
1364 
1365     libxl__log(CTX, XTL_CRITICAL, errnoval, file, line, func, INVALID_DOMID,
1366                "DISASTER in event loop: %s%s%s%s",
1367                msg,
1368                type ? " (relates to event type " : "",
1369                type ? libxl_event_type_to_string(type) : "",
1370                type ? ")" : "");
1371 
1372     if (CTX->event_hooks && CTX->event_hooks->disaster) {
1373         CTX->event_hooks->disaster(CTX->event_hooks_user, type, msg, errnoval);
1374         return;
1375     }
1376 
1377     const char verybad[] =
1378         "DISASTER in event loop not handled by libxl application";
1379     LIBXL__LOG(CTX, XTL_CRITICAL, verybad);
1380     fprintf(stderr, "libxl: fatal error, exiting program: %s\n", verybad);
1381     exit(-1);
1382 }
1383 
egc_run_callbacks(libxl__egc * egc)1384 static void egc_run_callbacks(libxl__egc *egc)
1385 {
1386     /*
1387      * The callbacks must happen with the ctx unlocked.  See the
1388      * comment near #define EGC_GC in libxl_internal.h and those in
1389      * the definitions of libxl__egc, libxl__ao and libxl__aop.
1390      */
1391     EGC_GC;
1392     libxl_event *ev, *ev_tmp;
1393     libxl__aop_occurred *aop, *aop_tmp;
1394 
1395     LIBXL_TAILQ_FOREACH_SAFE(ev, &egc->occurred_for_callback, link, ev_tmp) {
1396         LIBXL_TAILQ_REMOVE(&egc->occurred_for_callback, ev, link);
1397         LOG(DEBUG,"event %p callback type=%s",
1398             ev, libxl_event_type_to_string(ev->type));
1399         CTX->event_hooks->event_occurs(CTX->event_hooks_user, ev);
1400     }
1401 
1402     LIBXL_TAILQ_FOREACH_SAFE(aop, &egc->aops_for_callback, entry, aop_tmp) {
1403         LIBXL_TAILQ_REMOVE(&egc->aops_for_callback, aop, entry);
1404         LOG(DEBUG,"ao %p: progress report: callback aop=%p", aop->ao, aop);
1405         aop->how->callback(CTX, aop->ev, aop->how->for_callback);
1406 
1407         CTX_LOCK;
1408         assert(aop->ao->magic == LIBXL__AO_MAGIC);
1409         aop->ao->progress_reports_outstanding--;
1410         libxl__ao_complete_check_progress_reports(egc, aop->ao);
1411         CTX_UNLOCK;
1412     }
1413 
1414     libxl__ao *ao, *ao_tmp;
1415     LIBXL_TAILQ_FOREACH_SAFE(ao, &egc->aos_for_callback,
1416                              entry_for_callback, ao_tmp) {
1417         LIBXL_TAILQ_REMOVE(&egc->aos_for_callback, ao, entry_for_callback);
1418         LOG(DEBUG,"ao %p: completion callback", ao);
1419         ao->how.callback(CTX, ao->rc, ao->how.u.for_callback);
1420         CTX_LOCK;
1421         ao->notified = 1;
1422         ao__check_destroy(CTX, ao);
1423         CTX_UNLOCK;
1424     }
1425 }
1426 
libxl__egc_cleanup(libxl__egc * egc)1427 void libxl__egc_cleanup(libxl__egc *egc)
1428 {
1429     EGC_GC;
1430     egc_run_callbacks(egc);
1431 
1432     libxl__free_all(gc);
1433 }
1434 
1435 /*
1436  * Event retrieval etc.
1437  */
1438 
libxl_event_register_callbacks(libxl_ctx * ctx,const libxl_event_hooks * hooks,void * user)1439 void libxl_event_register_callbacks(libxl_ctx *ctx,
1440                   const libxl_event_hooks *hooks, void *user)
1441 {
1442     ctx->event_hooks = hooks;
1443     ctx->event_hooks_user = user;
1444 }
1445 
libxl__event_occurred(libxl__egc * egc,libxl_event * event)1446 void libxl__event_occurred(libxl__egc *egc, libxl_event *event)
1447 {
1448     EGC_GC;
1449 
1450     if (CTX->event_hooks &&
1451         (CTX->event_hooks->event_occurs_mask & (1UL << event->type))) {
1452         /* libxl__egc_cleanup will call the callback, just before exit
1453          * from libxl.  This helps avoid reentrancy bugs: parts of
1454          * libxl that call libxl__event_occurred do not have to worry
1455          * that libxl might be reentered at that point. */
1456         LIBXL_TAILQ_INSERT_TAIL(&egc->occurred_for_callback, event, link);
1457         return;
1458     } else {
1459         libxl__poller *poller;
1460         LIBXL_TAILQ_INSERT_TAIL(&CTX->occurred, event, link);
1461         LIBXL_LIST_FOREACH(poller, &CTX->pollers_event, entry)
1462             libxl__poller_wakeup(egc, poller);
1463     }
1464 }
1465 
libxl_event_free(libxl_ctx * ctx,libxl_event * event)1466 void libxl_event_free(libxl_ctx *ctx, libxl_event *event)
1467 {
1468     /* Exceptionally, this function may be called from libxl, with ctx==0 */
1469     libxl_event_dispose(event);
1470     free(event);
1471 }
1472 
libxl__event_new(libxl__egc * egc,libxl_event_type type,uint32_t domid,libxl_ev_user for_user)1473 libxl_event *libxl__event_new(libxl__egc *egc,
1474                               libxl_event_type type, uint32_t domid,
1475                               libxl_ev_user for_user)
1476 {
1477     EGC_GC;
1478     libxl_event *ev;
1479 
1480     ev = libxl__zalloc(NOGC,sizeof(*ev));
1481 
1482     libxl_event_init(ev);
1483     libxl_event_init_type(ev, type);
1484 
1485     ev->domid = domid;
1486     ev->for_user = for_user;
1487 
1488     return ev;
1489 }
1490 
event_check_internal(libxl__egc * egc,libxl_event ** event_r,unsigned long typemask,libxl_event_predicate * pred,void * pred_user)1491 static int event_check_internal(libxl__egc *egc, libxl_event **event_r,
1492                                 unsigned long typemask,
1493                                 libxl_event_predicate *pred, void *pred_user)
1494 {
1495     EGC_GC;
1496     libxl_event *ev;
1497     int rc;
1498 
1499     LIBXL_TAILQ_FOREACH(ev, &CTX->occurred, link) {
1500         if (!(typemask & ((uint64_t)1 << ev->type)))
1501             continue;
1502 
1503         if (pred && !pred(ev, pred_user))
1504             continue;
1505 
1506         /* got one! */
1507         LIBXL_TAILQ_REMOVE(&CTX->occurred, ev, link);
1508         *event_r = ev;
1509         rc = 0;
1510         goto out;
1511     }
1512     rc = ERROR_NOT_READY;
1513 
1514  out:
1515     return rc;
1516 }
1517 
libxl_event_check(libxl_ctx * ctx,libxl_event ** event_r,uint64_t typemask,libxl_event_predicate * pred,void * pred_user)1518 int libxl_event_check(libxl_ctx *ctx, libxl_event **event_r,
1519                       uint64_t typemask,
1520                       libxl_event_predicate *pred, void *pred_user)
1521 {
1522     EGC_INIT(ctx);
1523     CTX_LOCK;
1524     int rc = event_check_internal(egc, event_r, typemask, pred, pred_user);
1525     CTX_UNLOCK;
1526     EGC_FREE;
1527     return rc;
1528 }
1529 
1530 /*
1531  * Utilities for pipes (specifically, useful for self-pipes)
1532  */
1533 
libxl__pipe_close(int fds[2])1534 void libxl__pipe_close(int fds[2])
1535 {
1536     if (fds[0] >= 0) close(fds[0]);
1537     if (fds[1] >= 0) close(fds[1]);
1538     fds[0] = fds[1] = -1;
1539 }
1540 
libxl__pipe_nonblock(libxl_ctx * ctx,int fds[2])1541 int libxl__pipe_nonblock(libxl_ctx *ctx, int fds[2])
1542 {
1543     int r, rc;
1544 
1545     r = libxl_pipe(ctx, fds);
1546     if (r) {
1547         fds[0] = fds[1] = -1;
1548         rc = ERROR_FAIL;
1549         goto out;
1550     }
1551 
1552     rc = libxl_fd_set_nonblock(ctx, fds[0], 1);
1553     if (rc) goto out;
1554 
1555     rc = libxl_fd_set_nonblock(ctx, fds[1], 1);
1556     if (rc) goto out;
1557 
1558     return 0;
1559 
1560  out:
1561     libxl__pipe_close(fds);
1562     return rc;
1563 }
1564 
libxl__self_pipe_wakeup(int fd)1565 int libxl__self_pipe_wakeup(int fd)
1566 {
1567     /* Called from signal handlers, so needs to be async-signal-safe */
1568     static const char buf[1] = "";
1569 
1570     for (;;) {
1571         int r = write(fd, buf, 1);
1572         if (r==1) return 0;
1573         assert(r==-1);
1574         if (errno == EINTR) continue;
1575         if (errno == EWOULDBLOCK) return 0;
1576         if (!errno) abort();
1577         return errno;
1578     }
1579 }
1580 
libxl__self_pipe_eatall(int fd)1581 int libxl__self_pipe_eatall(int fd)
1582 {
1583     char buf[256];
1584     for (;;) {
1585         int r = read(fd, buf, sizeof(buf));
1586         if (r == sizeof(buf)) continue;
1587         if (r >= 0) return 0;
1588         assert(r == -1);
1589         if (errno == EINTR) continue;
1590         if (errno == EWOULDBLOCK) return 0;
1591         assert(errno);
1592         return errno;
1593     }
1594 }
1595 
1596 /*
1597  * Manipulation of pollers
1598  */
1599 
libxl__poller_init(libxl__gc * gc,libxl__poller * p)1600 int libxl__poller_init(libxl__gc *gc, libxl__poller *p)
1601 {
1602     int rc;
1603     p->fd_polls = 0;
1604     p->fd_rindices = 0;
1605     p->fds_changed = 0;
1606 
1607     rc = libxl__pipe_nonblock(CTX, p->wakeup_pipe);
1608     if (rc) goto out;
1609 
1610     return 0;
1611 
1612  out:
1613     libxl__poller_dispose(p);
1614     return rc;
1615 }
1616 
libxl__poller_dispose(libxl__poller * p)1617 void libxl__poller_dispose(libxl__poller *p)
1618 {
1619     libxl__pipe_close(p->wakeup_pipe);
1620     free(p->fd_polls);
1621     free(p->fd_rindices);
1622 }
1623 
libxl__poller_get(libxl__gc * gc)1624 libxl__poller *libxl__poller_get(libxl__gc *gc)
1625 {
1626     /* must be called with ctx locked */
1627     int rc;
1628 
1629     libxl__poller *p = LIBXL_LIST_FIRST(&CTX->pollers_idle);
1630     if (p) {
1631         LIBXL_LIST_REMOVE(p, entry);
1632     } else {
1633         p = libxl__zalloc(NOGC, sizeof(*p));
1634 
1635         rc = libxl__poller_init(gc, p);
1636         if (rc) {
1637             free(p);
1638             return NULL;
1639         }
1640     }
1641 
1642     LIBXL_LIST_INSERT_HEAD(&CTX->pollers_fds_changed, p,
1643                            fds_changed_entry);
1644     return p;
1645 }
1646 
libxl__poller_put(libxl_ctx * ctx,libxl__poller * p)1647 void libxl__poller_put(libxl_ctx *ctx, libxl__poller *p)
1648 {
1649     if (!p) return;
1650     LIBXL_LIST_REMOVE(p, fds_changed_entry);
1651     LIBXL_LIST_INSERT_HEAD(&ctx->pollers_idle, p, entry);
1652 }
1653 
libxl__poller_wakeup(libxl__egc * egc,libxl__poller * p)1654 void libxl__poller_wakeup(libxl__egc *egc, libxl__poller *p)
1655 {
1656     int e = libxl__self_pipe_wakeup(p->wakeup_pipe[1]);
1657     if (e) LIBXL__EVENT_DISASTER(egc, "cannot poke watch pipe", e, 0);
1658 }
1659 
1660 /*
1661  * Main event loop iteration
1662  */
1663 
eventloop_iteration(libxl__egc * egc,libxl__poller * poller)1664 static int eventloop_iteration(libxl__egc *egc, libxl__poller *poller) {
1665     /* The CTX must be locked EXACTLY ONCE so that this function
1666      * can unlock it when it polls.
1667      */
1668     EGC_GC;
1669     int rc, nfds;
1670     struct timeval now;
1671 
1672     rc = libxl__gettimeofday(gc, &now);
1673     if (rc) goto out;
1674 
1675     int timeout;
1676 
1677     for (;;) {
1678         nfds = poller->fd_polls_allocd;
1679         timeout = -1;
1680         rc = beforepoll_internal(gc, poller, &nfds, poller->fd_polls,
1681                                  &timeout, now);
1682         if (!rc) break;
1683         if (rc != ERROR_BUFFERFULL) goto out;
1684 
1685         struct pollfd *newarray =
1686             (nfds > INT_MAX / sizeof(struct pollfd) / 2) ? 0 :
1687             libxl__realloc(NOGC, poller->fd_polls, sizeof(*newarray) * nfds);
1688 
1689         if (!newarray) { rc = ERROR_NOMEM; goto out; }
1690 
1691         poller->fd_polls = newarray;
1692         poller->fd_polls_allocd = nfds;
1693     }
1694 
1695     CTX_UNLOCK;
1696     rc = poll(poller->fd_polls, nfds, timeout);
1697     CTX_LOCK;
1698 
1699     if (rc < 0) {
1700         if (errno == EINTR)
1701             return 0; /* will go round again if caller requires */
1702 
1703         LOGEV(ERROR, errno, "poll failed");
1704         rc = ERROR_FAIL;
1705         goto out;
1706     }
1707 
1708     rc = libxl__gettimeofday(gc, &now);
1709     if (rc) goto out;
1710 
1711     afterpoll_internal(egc, poller, nfds, poller->fd_polls, now);
1712 
1713     rc = 0;
1714  out:
1715     return rc;
1716 }
1717 
libxl_event_wait(libxl_ctx * ctx,libxl_event ** event_r,uint64_t typemask,libxl_event_predicate * pred,void * pred_user)1718 int libxl_event_wait(libxl_ctx *ctx, libxl_event **event_r,
1719                      uint64_t typemask,
1720                      libxl_event_predicate *pred, void *pred_user)
1721 {
1722     int rc;
1723     libxl__poller *poller = NULL;
1724 
1725     EGC_INIT(ctx);
1726     CTX_LOCK;
1727 
1728     poller = libxl__poller_get(gc);
1729     if (!poller) { rc = ERROR_FAIL; goto out; }
1730 
1731     for (;;) {
1732         rc = event_check_internal(egc, event_r, typemask, pred, pred_user);
1733         if (rc != ERROR_NOT_READY) goto out;
1734 
1735         rc = eventloop_iteration(egc, poller);
1736         if (rc) goto out;
1737 
1738         /* we unlock and cleanup the egc each time we go through this loop,
1739          * so that (a) we don't accumulate garbage and (b) any events
1740          * which are to be dispatched by callback are actually delivered
1741          * in a timely fashion.
1742          */
1743         CTX_UNLOCK;
1744         libxl__egc_cleanup(egc);
1745         CTX_LOCK;
1746     }
1747 
1748  out:
1749     libxl__poller_put(ctx, poller);
1750 
1751     CTX_UNLOCK;
1752     EGC_FREE;
1753     return rc;
1754 }
1755 
1756 
1757 
1758 /*
1759  * The two possible state flow of an ao:
1760  *
1761  * Completion before initiator return:
1762  *
1763  *     Initiator thread                       Possible other threads
1764  *
1765  *   * ao_create allocates memory and
1766  *     initialises the struct
1767  *
1768  *   * the initiator function does its
1769  *     work, setting up various internal
1770  *     asynchronous operations -----------> * asynchronous operations
1771  *                                            start to take place and
1772  *                                            might cause ao completion
1773  *                                                |
1774  *   * initiator calls ao_inprogress              |
1775  *     - if synchronous, run event loop           |
1776  *       until the ao completes                   |
1777  *                              - ao completes on some thread
1778  *                              - completing thread releases the lock
1779  *                     <--------------'
1780  *     - ao_inprogress takes the lock
1781  *     - destroy the ao
1782  *
1783  *
1784  * Completion after initiator return (asynch. only):
1785  *
1786  *
1787  *     Initiator thread                       Possible other threads
1788  *
1789  *   * ao_create allocates memory and
1790  *     initialises the struct
1791  *
1792  *   * the initiator function does its
1793  *     work, setting up various internal
1794  *     asynchronous operations -----------> * asynchronous operations
1795  *                                            start to take place and
1796  *                                            might cause ao completion
1797  *                                                |
1798  *   * initiator calls ao_inprogress              |
1799  *     - observes event not yet done,             |
1800  *     - returns to caller                        |
1801  *                                                |
1802  *                              - ao completes on some thread
1803  *                              - generate the event or call the callback
1804  *                              - destroy the ao
1805  */
1806 
1807 
1808 /*
1809  * A "manip" is a libxl public function manipulating this ao, which
1810  * has a pointer to it.  We have to not destroy it while that's the
1811  * case, obviously.  Callers must have the ctx locked, obviously.
1812  */
ao__manip_enter(libxl__ao * ao)1813 static void ao__manip_enter(libxl__ao *ao)
1814 {
1815     assert(ao->manip_refcnt < INT_MAX);
1816     ao->manip_refcnt++;
1817 }
1818 
ao__manip_leave(libxl_ctx * ctx,libxl__ao * ao)1819 static void ao__manip_leave(libxl_ctx *ctx, libxl__ao *ao)
1820 {
1821     assert(ao->manip_refcnt > 0);
1822     ao->manip_refcnt--;
1823     ao__check_destroy(ctx, ao);
1824 }
1825 
ao__check_destroy(libxl_ctx * ctx,libxl__ao * ao)1826 static void ao__check_destroy(libxl_ctx *ctx, libxl__ao *ao)
1827 {
1828     if (!ao->manip_refcnt && ao->notified) {
1829         assert(ao->complete);
1830         libxl__ao__destroy(ctx, ao);
1831     }
1832 }
1833 
libxl__ao__destroy(libxl_ctx * ctx,libxl__ao * ao)1834 void libxl__ao__destroy(libxl_ctx *ctx, libxl__ao *ao)
1835 {
1836     AO_GC;
1837     if (!ao) return;
1838     LOG(DEBUG,"ao %p: destroy",ao);
1839     libxl__poller_put(ctx, ao->poller);
1840     ao->magic = LIBXL__AO_MAGIC_DESTROYED;
1841     libxl__free_all(&ao->gc);
1842     free(ao);
1843 }
1844 
libxl__ao_create_fail(libxl__ao * ao)1845 void libxl__ao_create_fail(libxl__ao *ao)
1846 {
1847     AO_GC;
1848     LOG(DEBUG,"ao %p: create fail",ao);
1849     assert(ao->magic == LIBXL__AO_MAGIC);
1850     assert(ao->in_initiator);
1851     assert(!ao->complete);
1852     assert(!ao->progress_reports_outstanding);
1853     assert(!ao->aborting);
1854     LIBXL_LIST_REMOVE(ao, inprogress_entry);
1855     libxl__ao__destroy(CTX, ao);
1856 }
1857 
libxl__ao_inprogress_gc(libxl__ao * ao)1858 libxl__gc *libxl__ao_inprogress_gc(libxl__ao *ao)
1859 {
1860     assert(ao);
1861     assert(ao->magic == LIBXL__AO_MAGIC);
1862     assert(!ao->complete);
1863     return &ao->gc;
1864 }
1865 
libxl__ao_complete(libxl__egc * egc,libxl__ao * ao,int rc)1866 void libxl__ao_complete(libxl__egc *egc, libxl__ao *ao, int rc)
1867 {
1868     AO_GC;
1869     LOG(DEBUG,"ao %p: complete, rc=%d",ao,rc);
1870     assert(ao->magic == LIBXL__AO_MAGIC);
1871     assert(!ao->complete);
1872     assert(!ao->nested_root);
1873     assert(!ao->nested_progeny);
1874     ao->complete = 1;
1875     ao->rc = rc;
1876     LIBXL_LIST_REMOVE(ao, inprogress_entry);
1877     libxl__ao_complete_check_progress_reports(egc, ao);
1878 }
1879 
ao_work_outstanding(libxl__ao * ao)1880 static bool ao_work_outstanding(libxl__ao *ao)
1881 {
1882     /*
1883      * We don't consider an ao complete if it has any outstanding
1884      * callbacks.  These callbacks might be outstanding on other
1885      * threads, queued up in the other threads' egc's.  Those threads
1886      * will, after making the callback, take out the lock again,
1887      * decrement progress_reports_outstanding, and call
1888      * libxl__ao_complete_check_progress_reports.
1889      */
1890     return !ao->complete || ao->progress_reports_outstanding;
1891 }
1892 
libxl__ao_complete_check_progress_reports(libxl__egc * egc,libxl__ao * ao)1893 void libxl__ao_complete_check_progress_reports(libxl__egc *egc, libxl__ao *ao)
1894 {
1895     EGC_GC;
1896     libxl_ctx *ctx = libxl__gc_owner(&egc->gc);
1897     assert(ao->progress_reports_outstanding >= 0);
1898 
1899     if (ao_work_outstanding(ao))
1900         return;
1901 
1902     if (ao->poller) {
1903         assert(ao->in_initiator);
1904         if (!ao->constructing)
1905             /* don't bother with this if we're not in the event loop */
1906             libxl__poller_wakeup(egc, ao->poller);
1907     } else if (ao->how.callback) {
1908         LOG(DEBUG, "ao %p: complete for callback", ao);
1909         LIBXL_TAILQ_INSERT_TAIL(&egc->aos_for_callback, ao, entry_for_callback);
1910     } else {
1911         libxl_event *ev;
1912         ev = NEW_EVENT(egc, OPERATION_COMPLETE, ao->domid, ao->how.u.for_event);
1913         if (ev) {
1914             ev->u.operation_complete.rc = ao->rc;
1915             libxl__event_occurred(egc, ev);
1916         }
1917         ao->notified = 1;
1918     }
1919 
1920     ao__check_destroy(ctx, ao);
1921 }
1922 
libxl__ao_create(libxl_ctx * ctx,uint32_t domid,const libxl_asyncop_how * how,const char * file,int line,const char * func)1923 libxl__ao *libxl__ao_create(libxl_ctx *ctx, uint32_t domid,
1924                             const libxl_asyncop_how *how,
1925                             const char *file, int line, const char *func)
1926 {
1927     libxl__ao *ao;
1928 
1929     ao = calloc(1, sizeof(*ao));
1930     if (!ao) goto out;
1931 
1932     ao->magic = LIBXL__AO_MAGIC;
1933     ao->constructing = 1;
1934     ao->in_initiator = 1;
1935     ao__manip_enter(ao);
1936     ao->poller = 0;
1937     ao->domid = domid;
1938     LIBXL_INIT_GC(ao->gc, ctx);
1939 
1940     if (how) {
1941         ao->how = *how;
1942     } else {
1943         ao->poller = libxl__poller_get(&ao->gc);
1944         if (!ao->poller) goto out;
1945     }
1946     libxl__log(ctx,XTL_DEBUG,-1,file,line,func,domid,
1947                "ao %p: create: how=%p callback=%p poller=%p",
1948                ao, how, ao->how.callback, ao->poller);
1949 
1950     LIBXL_LIST_INSERT_HEAD(&ctx->aos_inprogress, ao, inprogress_entry);
1951 
1952     return ao;
1953 
1954  out:
1955     if (ao) libxl__ao__destroy(ctx, ao);
1956     return NULL;
1957 }
1958 
1959 
libxl__ao_inprogress(libxl__ao * ao,const char * file,int line,const char * func)1960 int libxl__ao_inprogress(libxl__ao *ao,
1961                          const char *file, int line, const char *func)
1962 {
1963     AO_GC;
1964     int rc;
1965     uint32_t domid = ao->domid;
1966 
1967     assert(ao->magic == LIBXL__AO_MAGIC);
1968     assert(ao->constructing);
1969     assert(ao->in_initiator);
1970     ao->constructing = 0;
1971 
1972     if (ao->nested_root)
1973         domid = ao->nested_root->domid;
1974 
1975     libxl__log(CTX,XTL_DEBUG,-1,file,line,func,domid,
1976                "ao %p: inprogress: poller=%p, flags=%s%s%s%s",
1977                ao, ao->poller,
1978                ao->constructing ? "o" : "",
1979                ao->in_initiator ? "i" : "",
1980                ao->complete ? "c" : "",
1981                ao->notified ? "n" : "");
1982 
1983     if (ao->poller) {
1984         /* Caller wants it done synchronously. */
1985         /* We use a fresh gc, so that we can free things
1986          * each time round the loop. */
1987         libxl__egc egc;
1988         LIBXL_INIT_EGC(egc,CTX);
1989 
1990         for (;;) {
1991             assert(ao->magic == LIBXL__AO_MAGIC);
1992 
1993             if (!ao_work_outstanding(ao)) {
1994                 rc = ao->rc;
1995                 ao->notified = 1;
1996                 break;
1997             }
1998 
1999             DBG("ao %p: not ready, waiting",ao);
2000 
2001             rc = eventloop_iteration(&egc,ao->poller);
2002             if (rc) {
2003                 /* Oh dear, this is quite unfortunate. */
2004                 LOG(ERROR,
2005                     "Error waiting for"" event during long-running operation (rc=%d)",
2006                     rc);
2007                 sleep(1);
2008                 /* It's either this or return ERROR_I_DONT_KNOW_WHETHER
2009                  * _THE_THING_YOU_ASKED_FOR_WILL_BE_DONE_LATER_WHEN
2010                  * _YOU_DIDNT_EXPECT_IT, since we don't have a
2011                  * synchronous cancellation ability. */
2012             }
2013 
2014             CTX_UNLOCK;
2015             libxl__egc_cleanup(&egc);
2016             CTX_LOCK;
2017         }
2018     } else {
2019         rc = 0;
2020     }
2021 
2022     ao->in_initiator = 0;
2023     ao__manip_leave(CTX, ao);
2024 
2025     return rc;
2026 }
2027 
2028 
2029 /* abort requests */
2030 
ao__abort(libxl_ctx * ctx,libxl__ao * parent)2031 static int ao__abort(libxl_ctx *ctx, libxl__ao *parent)
2032 /* Temporarily unlocks ctx, which must be locked exactly once on entry. */
2033 {
2034     int rc;
2035     ao__manip_enter(parent);
2036 
2037     if (parent->aborting) {
2038         rc = ERROR_ABORTED;
2039         goto out;
2040     }
2041 
2042     parent->aborting = 1;
2043 
2044     if (LIBXL_LIST_EMPTY(&parent->abortables)) {
2045         LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
2046                    "ao %p: abort requested and noted, but no-one interested",
2047                    parent);
2048         rc = 0;
2049         goto out;
2050     }
2051 
2052     /* We keep calling abort hooks until there are none left */
2053     while (!LIBXL_LIST_EMPTY(&parent->abortables)) {
2054         libxl__egc egc;
2055         LIBXL_INIT_EGC(egc,ctx);
2056 
2057         assert(!parent->complete);
2058 
2059         libxl__ao_abortable *abrt = LIBXL_LIST_FIRST(&parent->abortables);
2060         assert(parent == ao_nested_root(abrt->ao));
2061 
2062         LIBXL_LIST_REMOVE(abrt, entry);
2063         abrt->registered = 0;
2064 
2065         LIBXL__LOG(ctx, LIBXL__LOG_DEBUG,
2066                    "ao %p: abrt=%p: aborting", parent, abrt->ao);
2067         abrt->callback(&egc, abrt, ERROR_ABORTED);
2068 
2069         libxl__ctx_unlock(ctx);
2070         libxl__egc_cleanup(&egc);
2071         libxl__ctx_lock(ctx);
2072     }
2073 
2074     rc = 0;
2075 
2076  out:
2077     ao__manip_leave(ctx, parent);
2078     return rc;
2079 }
2080 
libxl_ao_abort(libxl_ctx * ctx,const libxl_asyncop_how * how)2081 int libxl_ao_abort(libxl_ctx *ctx, const libxl_asyncop_how *how)
2082 {
2083     libxl__ao *search;
2084     libxl__ctx_lock(ctx);
2085     int rc;
2086 
2087     LIBXL_LIST_FOREACH(search, &ctx->aos_inprogress, inprogress_entry) {
2088         if (how) {
2089             /* looking for ao to be reported by callback or event */
2090             if (search->poller)
2091                 /* sync */
2092                 continue;
2093             if (how->callback != search->how.callback)
2094                 continue;
2095             if (how->callback
2096                 ? (how->u.for_callback != search->how.u.for_callback)
2097                 : (how->u.for_event != search->how.u.for_event))
2098                 continue;
2099         } else {
2100             /* looking for synchronous call */
2101             if (!search->poller)
2102                 /* async */
2103                 continue;
2104         }
2105         goto found;
2106     }
2107     rc = ERROR_NOTFOUND;
2108     goto out;
2109 
2110  found:
2111     rc = ao__abort(ctx, search);
2112  out:
2113     libxl__ctx_unlock(ctx);
2114     return rc;
2115 }
2116 
libxl__ao_aborting(libxl__ao * ao)2117 int libxl__ao_aborting(libxl__ao *ao)
2118 {
2119     libxl__ao *root = ao_nested_root(ao);
2120     if (root->aborting) {
2121         DBG("ao=%p: aborting at explicit check (root=%p)", ao, root);
2122         return ERROR_ABORTED;
2123     }
2124 
2125     return 0;
2126 }
2127 
libxl__ao_abortable_register(libxl__ao_abortable * abrt)2128 int libxl__ao_abortable_register(libxl__ao_abortable *abrt)
2129 {
2130     libxl__ao *ao = abrt->ao;
2131     libxl__ao *root = ao_nested_root(ao);
2132     AO_GC;
2133 
2134     if (root->aborting) {
2135  DBG("ao=%p: preemptively aborting ao_abortable registration %p (root=%p)",
2136             ao, abrt, root);
2137         return ERROR_ABORTED;
2138     }
2139 
2140     DBG("ao=%p, abrt=%p: registering (root=%p)", ao, abrt, root);
2141     LIBXL_LIST_INSERT_HEAD(&root->abortables, abrt, entry);
2142     abrt->registered = 1;
2143 
2144     return 0;
2145 }
2146 
libxl__ao_abortable_deregister(libxl__ao_abortable * abrt)2147 _hidden void libxl__ao_abortable_deregister(libxl__ao_abortable *abrt)
2148 {
2149     if (!abrt->registered)
2150         return;
2151 
2152     libxl__ao *ao = abrt->ao;
2153     libxl__ao *root __attribute__((unused)) = ao_nested_root(ao);
2154     AO_GC;
2155 
2156     DBG("ao=%p, abrt=%p: deregistering (root=%p)", ao, abrt, root);
2157     LIBXL_LIST_REMOVE(abrt, entry);
2158     abrt->registered = 0;
2159 }
2160 
2161 
2162 /* progress reporting */
2163 
2164 /* The application indicates a desire to ignore events by passing NULL
2165  * for how.  But we want to copy *how.  So we have this dummy function
2166  * whose address is stored in callback if the app passed how==NULL. */
dummy_asyncprogress_callback_ignore(libxl_ctx * ctx,libxl_event * ev,void * for_callback)2167 static void dummy_asyncprogress_callback_ignore
2168   (libxl_ctx *ctx, libxl_event *ev, void *for_callback) { }
2169 
libxl__ao_progress_gethow(libxl_asyncprogress_how * in_state,const libxl_asyncprogress_how * from_app)2170 void libxl__ao_progress_gethow(libxl_asyncprogress_how *in_state,
2171                                const libxl_asyncprogress_how *from_app) {
2172     if (from_app)
2173         *in_state = *from_app;
2174     else
2175         in_state->callback = dummy_asyncprogress_callback_ignore;
2176 }
2177 
libxl__ao_progress_report(libxl__egc * egc,libxl__ao * ao,const libxl_asyncprogress_how * how,libxl_event * ev)2178 void libxl__ao_progress_report(libxl__egc *egc, libxl__ao *ao,
2179         const libxl_asyncprogress_how *how, libxl_event *ev)
2180 {
2181     AO_GC;
2182     assert(!ao->nested_root);
2183     if (how->callback == dummy_asyncprogress_callback_ignore) {
2184         LOG(DEBUG,"ao %p: progress report: ignored",ao);
2185         libxl_event_free(CTX,ev);
2186         /* ignore */
2187     } else if (how->callback) {
2188         libxl__aop_occurred *aop = libxl__zalloc(&egc->gc, sizeof(*aop));
2189         ao->progress_reports_outstanding++;
2190         aop->ao = ao;
2191         aop->ev = ev;
2192         aop->how = how;
2193         LIBXL_TAILQ_INSERT_TAIL(&egc->aops_for_callback, aop, entry);
2194         LOG(DEBUG,"ao %p: progress report: callback queued aop=%p",ao,aop);
2195     } else {
2196         LOG(DEBUG,"ao %p: progress report: event queued ev=%p type=%s",
2197             ao, ev, libxl_event_type_to_string(ev->type));
2198         libxl__event_occurred(egc, ev);
2199     }
2200 }
2201 
2202 
2203 /* nested ao */
2204 
ao_nested_root(libxl__ao * ao)2205 static libxl__ao *ao_nested_root(libxl__ao *ao) {
2206     libxl__ao *root = ao->nested_root ? : ao;
2207     assert(!root->nested_root);
2208     return root;
2209 }
2210 
libxl__nested_ao_create(libxl__ao * parent)2211 _hidden libxl__ao *libxl__nested_ao_create(libxl__ao *parent)
2212 {
2213     libxl__ao *child = NULL, *root;
2214     libxl_ctx *ctx = libxl__gc_owner(&parent->gc);
2215 
2216     assert(parent->magic == LIBXL__AO_MAGIC);
2217     root = ao_nested_root(parent);
2218 
2219     child = libxl__zalloc(&ctx->nogc_gc, sizeof(*child));
2220     child->magic = LIBXL__AO_MAGIC;
2221     child->nested_root = root;
2222     assert(root->nested_progeny < INT_MAX);
2223     root->nested_progeny++;
2224     LIBXL_INIT_GC(child->gc, ctx);
2225     libxl__gc *gc = &child->gc;
2226 
2227     LOG(DEBUG,"ao %p: nested ao, parent %p", child, parent);
2228     return child;
2229 }
2230 
libxl__nested_ao_free(libxl__ao * child)2231 _hidden void libxl__nested_ao_free(libxl__ao *child)
2232 {
2233     assert(child->magic == LIBXL__AO_MAGIC);
2234     libxl__ao *root = child->nested_root;
2235     assert(root);
2236     assert(root->nested_progeny > 0);
2237     root->nested_progeny--;
2238     libxl_ctx *ctx = libxl__gc_owner(&child->gc);
2239     libxl__ao__destroy(ctx, child);
2240 }
2241 
2242 
2243 /*
2244  * Local variables:
2245  * mode: C
2246  * c-basic-offset: 4
2247  * indent-tabs-mode: nil
2248  * End:
2249  */
2250