1 /*
2  * Copyright (C) 2009      Citrix Ltd.
3  * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published
7  * by the Free Software Foundation; version 2.1 only. with the special
8  * exception on linking described in file LICENSE.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  */
15 
16 #include "libxl_osdeps.h" /* must come before any other headers */
17 
18 #include "libxl_internal.h"
19 
20 #include <xen/errno.h>
21 
22 /*========================= Domain save ============================*/
23 
24 static void stream_done(libxl__egc *egc,
25                         libxl__stream_write_state *sws, int rc);
26 static void domain_save_done(libxl__egc *egc,
27                              libxl__domain_save_state *dss, int rc);
28 
29 /*----- complicated callback, called by xc_domain_save -----*/
30 
31 static void domain_suspend_switch_qemu_xen_logdirty
32                                (libxl__egc *egc, int domid, unsigned enable,
33                                 libxl__logdirty_switch *lds);
34 static void switch_qemu_xen_logdirty_done(libxl__egc *egc,
35                                           libxl__ev_qmp *qmp,
36                                           const libxl__json_object *,
37                                           int rc);
38 static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
39                                     const struct timeval *requested_abs,
40                                     int rc);
41 static void switch_logdirty_done(libxl__egc *egc,
42                                  libxl__logdirty_switch *lds, int rc);
43 
libxl__logdirty_init(libxl__logdirty_switch * lds)44 void libxl__logdirty_init(libxl__logdirty_switch *lds)
45 {
46     lds->cmd_path = 0;
47     libxl__ev_xswatch_init(&lds->watch);
48     libxl__ev_time_init(&lds->timeout);
49     libxl__ev_qmp_init(&lds->qmp);
50 }
51 
libxl__domain_common_switch_qemu_logdirty(libxl__egc * egc,int domid,unsigned enable,libxl__logdirty_switch * lds)52 void libxl__domain_common_switch_qemu_logdirty(libxl__egc *egc,
53                                                int domid, unsigned enable,
54                                                libxl__logdirty_switch *lds)
55 {
56     STATE_AO_GC(lds->ao);
57 
58     switch (libxl__device_model_version_running(gc, domid)) {
59     case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
60         domain_suspend_switch_qemu_xen_logdirty(egc, domid, enable, lds);
61         break;
62     default:
63         LOGD(ERROR, domid, "logdirty switch failed"
64              ", no valid device model version found, abandoning suspend");
65         lds->callback(egc, lds, ERROR_FAIL);
66     }
67 }
68 
domain_suspend_switch_qemu_xen_logdirty(libxl__egc * egc,int domid,unsigned enable,libxl__logdirty_switch * lds)69 static void domain_suspend_switch_qemu_xen_logdirty
70                                (libxl__egc *egc, int domid, unsigned enable,
71                                 libxl__logdirty_switch *lds)
72 {
73     STATE_AO_GC(lds->ao);
74     int rc;
75     libxl__json_object *args = NULL;
76 
77     /* Convenience aliases. */
78     libxl__ev_qmp *const qmp = &lds->qmp;
79 
80     rc = libxl__ev_time_register_rel(ao, &lds->timeout,
81                                      switch_logdirty_timeout, 10 * 1000);
82     if (rc) goto out;
83 
84     qmp->ao = ao;
85     qmp->domid = domid;
86     qmp->payload_fd = -1;
87     qmp->callback = switch_qemu_xen_logdirty_done;
88     libxl__qmp_param_add_bool(gc, &args, "enable", enable);
89     rc = libxl__ev_qmp_send(egc, qmp, "xen-set-global-dirty-log", args);
90     if (rc) goto out;
91 
92     return;
93 out:
94     switch_qemu_xen_logdirty_done(egc, qmp, NULL, rc);
95 }
96 
switch_qemu_xen_logdirty_done(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * r,int rc)97 static void switch_qemu_xen_logdirty_done(libxl__egc *egc,
98                                           libxl__ev_qmp *qmp,
99                                           const libxl__json_object *r,
100                                           int rc)
101 {
102     EGC_GC;
103     libxl__logdirty_switch *lds = CONTAINER_OF(qmp, *lds, qmp);
104 
105     if (rc)
106         LOGD(ERROR, qmp->domid,
107              "logdirty switch failed (rc=%d), abandoning suspend",rc);
108     switch_logdirty_done(egc, lds, rc);
109 }
110 
switch_logdirty_timeout(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)111 static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
112                                     const struct timeval *requested_abs,
113                                     int rc)
114 {
115     libxl__logdirty_switch *lds = CONTAINER_OF(ev, *lds, timeout);
116     STATE_AO_GC(lds->ao);
117     LOG(ERROR,"logdirty switch: wait for device model timed out");
118     switch_logdirty_done(egc,lds,ERROR_FAIL);
119 }
120 
switch_logdirty_done(libxl__egc * egc,libxl__logdirty_switch * lds,int rc)121 static void switch_logdirty_done(libxl__egc *egc,
122                                  libxl__logdirty_switch *lds,
123                                  int rc)
124 {
125     STATE_AO_GC(lds->ao);
126 
127     libxl__ev_xswatch_deregister(gc, &lds->watch);
128     libxl__ev_time_deregister(gc, &lds->timeout);
129     libxl__ev_qmp_dispose(gc, &lds->qmp);
130 
131     lds->callback(egc, lds, rc);
132 }
133 
134 static void domain_suspend_switch_qemu_logdirty_done
135                         (libxl__egc *egc, libxl__logdirty_switch *lds, int rc);
136 
libxl__domain_suspend_common_switch_qemu_logdirty(uint32_t domid,unsigned enable,void * user)137 void libxl__domain_suspend_common_switch_qemu_logdirty
138                                (uint32_t domid, unsigned enable, void *user)
139 {
140     libxl__save_helper_state *shs = user;
141     libxl__egc *egc = shs->egc;
142     libxl__domain_save_state *dss = shs->caller_state;
143 
144     /* Convenience aliases. */
145     libxl__logdirty_switch *const lds = &dss->logdirty;
146 
147     if (dss->type == LIBXL_DOMAIN_TYPE_PVH) {
148         domain_suspend_switch_qemu_logdirty_done(egc, lds, 0);
149         return;
150     }
151 
152     lds->callback = domain_suspend_switch_qemu_logdirty_done;
153     libxl__domain_common_switch_qemu_logdirty(egc, domid, enable, lds);
154 }
155 
domain_suspend_switch_qemu_logdirty_done(libxl__egc * egc,libxl__logdirty_switch * lds,int rc)156 static void domain_suspend_switch_qemu_logdirty_done
157                         (libxl__egc *egc, libxl__logdirty_switch *lds, int rc)
158 {
159     libxl__domain_save_state *dss = CONTAINER_OF(lds, *dss, logdirty);
160 
161     if (rc) {
162         dss->rc = rc;
163         libxl__xc_domain_saverestore_async_callback_done(egc,
164                                                          &dss->sws.shs, -1);
165     } else
166         libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, 0);
167 }
168 
169 /*----- callbacks, called by xc_domain_save -----*/
170 
171 /*
172  * Expand the buffer 'buf' of length 'len', to append 'str' including its NUL
173  * terminator.
174  */
append_string(libxl__gc * gc,char ** buf,uint32_t * len,const char * str)175 static void append_string(libxl__gc *gc, char **buf, uint32_t *len,
176                           const char *str)
177 {
178     size_t extralen = strlen(str) + 1;
179     char *new = libxl__realloc(gc, *buf, *len + extralen);
180 
181     *buf = new;
182     memcpy(new + *len, str, extralen);
183     *len += extralen;
184 }
185 
libxl__save_emulator_xenstore_data(libxl__domain_save_state * dss,char ** callee_buf,uint32_t * callee_len)186 int libxl__save_emulator_xenstore_data(libxl__domain_save_state *dss,
187                                        char **callee_buf,
188                                        uint32_t *callee_len)
189 {
190     STATE_AO_GC(dss->ao);
191     const char *xs_root;
192     char **entries, *buf = NULL;
193     unsigned int nr_entries, i, j, len = 0;
194     int rc;
195 
196     const uint32_t domid = dss->domid;
197     const uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
198 
199     xs_root = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "");
200 
201     entries = libxl__xs_directory(gc, 0, GCSPRINTF("%s/physmap", xs_root),
202                                   &nr_entries);
203     if (!entries || nr_entries == 0) { rc = 0; goto out; }
204 
205     for (i = 0; i < nr_entries; ++i) {
206         static const char *const physmap_subkeys[] = {
207             "start_addr", "size", "name"
208         };
209 
210         for (j = 0; j < ARRAY_SIZE(physmap_subkeys); ++j) {
211             const char *key = GCSPRINTF("physmap/%s/%s",
212                                         entries[i], physmap_subkeys[j]);
213 
214             const char *val =
215                 libxl__xs_read(gc, XBT_NULL,
216                                GCSPRINTF("%s/%s", xs_root, key));
217 
218             if (!val) { rc = ERROR_FAIL; goto out; }
219 
220             append_string(gc, &buf, &len, key);
221             append_string(gc, &buf, &len, val);
222         }
223     }
224 
225     rc = 0;
226 
227  out:
228     if (!rc) {
229         *callee_buf = buf;
230         *callee_len = len;
231     }
232 
233     return rc;
234 }
235 
236 /*----- main code for saving, in order of execution -----*/
237 
libxl__domain_save(libxl__egc * egc,libxl__domain_save_state * dss)238 void libxl__domain_save(libxl__egc *egc, libxl__domain_save_state *dss)
239 {
240     STATE_AO_GC(dss->ao);
241     int rc, ret;
242 
243     /* Convenience aliases */
244     const uint32_t domid = dss->domid;
245     const libxl_domain_type type = dss->type;
246     const int live = dss->live;
247     const int debug = dss->debug;
248     const libxl_domain_remus_info *const r_info = dss->remus;
249     libxl__srm_save_autogen_callbacks *const callbacks =
250         &dss->sws.shs.callbacks.save.a;
251     unsigned int nr_vnodes = 0, nr_vmemranges = 0, nr_vcpus = 0;
252     libxl__domain_suspend_state *dsps = &dss->dsps;
253 
254     if (dss->checkpointed_stream != LIBXL_CHECKPOINTED_STREAM_NONE && !r_info) {
255         LOGD(ERROR, domid, "Migration stream is checkpointed, but there's no "
256                            "checkpoint info!");
257         rc = ERROR_INVAL;
258         goto out;
259     }
260 
261     dss->rc = 0;
262     libxl__logdirty_init(&dss->logdirty);
263     dss->logdirty.ao = ao;
264 
265     dsps->ao = ao;
266     dsps->domid = domid;
267     dsps->live = !!live;
268     rc = libxl__domain_suspend_init(egc, dsps, type);
269     if (rc) goto out;
270 
271     dss->xcflags = (live ? XCFLAGS_LIVE : 0)
272           | (debug ? XCFLAGS_DEBUG : 0);
273 
274     /* Disallow saving a guest with vNUMA configured because migration
275      * stream does not preserve node information.
276      *
277      * Reject any domain which has vnuma enabled, even if the
278      * configuration is empty. Only domains which have no vnuma
279      * configuration at all are supported.
280      */
281     ret = xc_domain_getvnuma(CTX->xch, domid, &nr_vnodes, &nr_vmemranges,
282                              &nr_vcpus, NULL, NULL, NULL);
283     if (ret != -1 || errno != EOPNOTSUPP) {
284         LOGD(ERROR, domid, "Cannot save a guest with vNUMA configured");
285         rc = ERROR_FAIL;
286         goto out;
287     }
288 
289     if (dss->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_NONE)
290         callbacks->suspend = libxl__domain_suspend_callback;
291 
292     callbacks->switch_qemu_logdirty = libxl__domain_suspend_common_switch_qemu_logdirty;
293 
294     dss->sws.ao  = dss->ao;
295     dss->sws.dss = dss;
296     dss->sws.fd  = dss->fd;
297     dss->sws.back_channel = false;
298     dss->sws.completion_callback = stream_done;
299 
300     libxl__stream_write_start(egc, &dss->sws);
301     return;
302 
303  out:
304     domain_save_done(egc, dss, rc);
305 }
306 
stream_done(libxl__egc * egc,libxl__stream_write_state * sws,int rc)307 static void stream_done(libxl__egc *egc,
308                         libxl__stream_write_state *sws, int rc)
309 {
310     domain_save_done(egc, sws->dss, rc);
311 }
312 
domain_save_done(libxl__egc * egc,libxl__domain_save_state * dss,int rc)313 static void domain_save_done(libxl__egc *egc,
314                              libxl__domain_save_state *dss, int rc)
315 {
316     STATE_AO_GC(dss->ao);
317 
318     /* Convenience aliases */
319     const uint32_t domid = dss->domid;
320     libxl__domain_suspend_state *dsps = &dss->dsps;
321 
322     libxl__ev_evtchn_cancel(gc, &dsps->guest_evtchn);
323 
324     if (dsps->guest_evtchn.port > 0)
325         xc_suspend_evtchn_release(CTX->xch, CTX->xce, domid,
326                         dsps->guest_evtchn.port, &dsps->guest_evtchn_lockfd);
327 
328     if (dss->remus) {
329         /*
330          * With Remus/COLO, if we reach this point, it means either
331          * backup died or some network error occurred preventing us
332          * from sending checkpoints. Teardown the network buffers and
333          * release netlink resources.  This is an async op.
334          */
335         if (libxl_defbool_val(dss->remus->colo))
336             libxl__colo_save_teardown(egc, &dss->css, rc);
337         else
338             libxl__remus_teardown(egc, &dss->rs, rc);
339         return;
340     }
341 
342     dss->callback(egc, dss, rc);
343 }
344 
345 /*========================= Domain restore ============================*/
346 
347 /*
348  * Inspect the buffer between start and end, and return a pointer to the
349  * character following the NUL terminator of start, or NULL if start is not
350  * terminated before end.
351  */
next_string(const char * start,const char * end)352 static const char *next_string(const char *start, const char *end)
353 {
354     if (start >= end) return NULL;
355 
356     size_t total_len = end - start;
357     size_t len = strnlen(start, total_len);
358 
359     if (len == total_len)
360         return NULL;
361     else
362         return start + len + 1;
363 }
364 
libxl__restore_emulator_xenstore_data(libxl__domain_create_state * dcs,const char * ptr,uint32_t size)365 int libxl__restore_emulator_xenstore_data(libxl__domain_create_state *dcs,
366                                           const char *ptr, uint32_t size)
367 {
368     STATE_AO_GC(dcs->ao);
369     const char *next = ptr, *end = ptr + size, *key, *val;
370     int rc;
371 
372     const uint32_t domid = dcs->guest_domid;
373     const uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
374     const char *xs_root = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "");
375 
376     while (next < end) {
377         key = next;
378         next = next_string(next, end);
379 
380         /* Sanitise 'key'. */
381         if (!next) {
382             rc = ERROR_FAIL;
383             LOGD(ERROR, domid, "Key in xenstore data not NUL terminated");
384             goto out;
385         }
386         if (key[0] == '\0') {
387             rc = ERROR_FAIL;
388             LOGD(ERROR, domid, "empty key found in xenstore data");
389             goto out;
390         }
391         if (key[0] == '/') {
392             rc = ERROR_FAIL;
393             LOGD(ERROR, domid, "Key in xenstore data not relative");
394             goto out;
395         }
396 
397         val = next;
398         next = next_string(next, end);
399 
400         /* Sanitise 'val'. */
401         if (!next) {
402             rc = ERROR_FAIL;
403             LOGD(ERROR, domid, "Val in xenstore data not NUL terminated");
404             goto out;
405         }
406 
407         libxl__xs_printf(gc, XBT_NULL,
408                          GCSPRINTF("%s/%s", xs_root, key),
409                          "%s", val);
410     }
411 
412     rc = 0;
413 
414  out:
415     return rc;
416 }
417 
418 /*
419  * Local variables:
420  * mode: C
421  * c-basic-offset: 4
422  * indent-tabs-mode: nil
423  * End:
424  */
425