1 /*
2  * Copyright (C) 2009      Citrix Ltd.
3  * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published
7  * by the Free Software Foundation; version 2.1 only. with the special
8  * exception on linking described in file LICENSE.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  */
15 
16 #include "libxl_osdeps.h" /* must come before any other headers */
17 
18 #include "libxl_internal.h"
19 
20 #include <xen/errno.h>
21 
22 /*========================= Domain save ============================*/
23 
24 static void stream_done(libxl__egc *egc,
25                         libxl__stream_write_state *sws, int rc);
26 static void domain_save_done(libxl__egc *egc,
27                              libxl__domain_save_state *dss, int rc);
28 
29 /*----- complicated callback, called by xc_domain_save -----*/
30 
31 /*
32  * We implement the other end of protocol for controlling qemu-dm's
33  * logdirty.  There is no documentation for this protocol, but our
34  * counterparty's implementation is in
35  * qemu-xen-traditional.git:xenstore.c in the function
36  * xenstore_process_logdirty_event
37  */
38 
39 static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
40                                     const struct timeval *requested_abs,
41                                     int rc);
42 static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch*,
43                             const char *watch_path, const char *event_path);
44 static void switch_logdirty_done(libxl__egc *egc,
45                                  libxl__logdirty_switch *lds, int rc);
46 
libxl__logdirty_init(libxl__logdirty_switch * lds)47 void libxl__logdirty_init(libxl__logdirty_switch *lds)
48 {
49     lds->cmd_path = 0;
50     libxl__ev_xswatch_init(&lds->watch);
51     libxl__ev_time_init(&lds->timeout);
52 }
53 
domain_suspend_switch_qemu_xen_traditional_logdirty(libxl__egc * egc,int domid,unsigned enable,libxl__logdirty_switch * lds)54 static void domain_suspend_switch_qemu_xen_traditional_logdirty
55                                (libxl__egc *egc, int domid, unsigned enable,
56                                 libxl__logdirty_switch *lds)
57 {
58     STATE_AO_GC(lds->ao);
59     int rc;
60     xs_transaction_t t = 0;
61     const char *got;
62 
63     if (!lds->cmd_path) {
64         uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
65         lds->cmd_path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid,
66                                              "/logdirty/cmd");
67         lds->ret_path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid,
68                                              "/logdirty/ret");
69     }
70     lds->cmd = enable ? "enable" : "disable";
71 
72     rc = libxl__ev_xswatch_register(gc, &lds->watch,
73                                 switch_logdirty_xswatch, lds->ret_path);
74     if (rc) goto out;
75 
76     rc = libxl__ev_time_register_rel(ao, &lds->timeout,
77                                 switch_logdirty_timeout, 10*1000);
78     if (rc) goto out;
79 
80     for (;;) {
81         rc = libxl__xs_transaction_start(gc, &t);
82         if (rc) goto out;
83 
84         rc = libxl__xs_read_checked(gc, t, lds->cmd_path, &got);
85         if (rc) goto out;
86 
87         if (got) {
88             const char *got_ret;
89             rc = libxl__xs_read_checked(gc, t, lds->ret_path, &got_ret);
90             if (rc) goto out;
91 
92             if (!got_ret || strcmp(got, got_ret)) {
93                 LOGD(ERROR, domid, "controlling logdirty: qemu was already sent"
94                      " command `%s' (xenstore path `%s') but result is `%s'",
95                      got, lds->cmd_path, got_ret ? got_ret : "<none>");
96                 rc = ERROR_FAIL;
97                 goto out;
98             }
99             rc = libxl__xs_rm_checked(gc, t, lds->cmd_path);
100             if (rc) goto out;
101         }
102 
103         rc = libxl__xs_rm_checked(gc, t, lds->ret_path);
104         if (rc) goto out;
105 
106         rc = libxl__xs_write_checked(gc, t, lds->cmd_path, lds->cmd);
107         if (rc) goto out;
108 
109         rc = libxl__xs_transaction_commit(gc, &t);
110         if (!rc) break;
111         if (rc<0) goto out;
112     }
113 
114     /* OK, wait for some callback */
115     return;
116 
117  out:
118     LOGD(ERROR, domid, "logdirty switch failed (rc=%d), abandoning suspend",rc);
119     libxl__xs_transaction_abort(gc, &t);
120     switch_logdirty_done(egc,lds,rc);
121 }
122 
domain_suspend_switch_qemu_xen_logdirty(libxl__egc * egc,int domid,unsigned enable,libxl__logdirty_switch * lds)123 static void domain_suspend_switch_qemu_xen_logdirty
124                                (libxl__egc *egc, int domid, unsigned enable,
125                                 libxl__logdirty_switch *lds)
126 {
127     STATE_AO_GC(lds->ao);
128     int rc;
129 
130     rc = libxl__qmp_set_global_dirty_log(gc, domid, enable);
131     if (rc)
132         LOGD(ERROR, domid,
133              "logdirty switch failed (rc=%d), abandoning suspend",rc);
134 
135     lds->callback(egc, lds, rc);
136 }
137 
domain_suspend_switch_qemu_logdirty_done(libxl__egc * egc,libxl__logdirty_switch * lds,int rc)138 static void domain_suspend_switch_qemu_logdirty_done
139                         (libxl__egc *egc, libxl__logdirty_switch *lds, int rc)
140 {
141     libxl__domain_save_state *dss = CONTAINER_OF(lds, *dss, logdirty);
142 
143     if (rc) {
144         dss->rc = rc;
145         libxl__xc_domain_saverestore_async_callback_done(egc,
146                                                          &dss->sws.shs, -1);
147     } else
148         libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, 0);
149 }
150 
libxl__domain_suspend_common_switch_qemu_logdirty(uint32_t domid,unsigned enable,void * user)151 void libxl__domain_suspend_common_switch_qemu_logdirty
152                                (uint32_t domid, unsigned enable, void *user)
153 {
154     libxl__save_helper_state *shs = user;
155     libxl__egc *egc = shs->egc;
156     libxl__domain_save_state *dss = shs->caller_state;
157 
158     /* Convenience aliases. */
159     libxl__logdirty_switch *const lds = &dss->logdirty;
160 
161     if (dss->type == LIBXL_DOMAIN_TYPE_PVH) {
162         domain_suspend_switch_qemu_logdirty_done(egc, lds, 0);
163         return;
164     }
165 
166     lds->callback = domain_suspend_switch_qemu_logdirty_done;
167     libxl__domain_common_switch_qemu_logdirty(egc, domid, enable, lds);
168 }
169 
libxl__domain_common_switch_qemu_logdirty(libxl__egc * egc,int domid,unsigned enable,libxl__logdirty_switch * lds)170 void libxl__domain_common_switch_qemu_logdirty(libxl__egc *egc,
171                                                int domid, unsigned enable,
172                                                libxl__logdirty_switch *lds)
173 {
174     STATE_AO_GC(lds->ao);
175 
176     switch (libxl__device_model_version_running(gc, domid)) {
177     case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
178         domain_suspend_switch_qemu_xen_traditional_logdirty(egc, domid, enable,
179                                                             lds);
180         break;
181     case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
182         domain_suspend_switch_qemu_xen_logdirty(egc, domid, enable, lds);
183         break;
184     default:
185         LOGD(ERROR, domid, "logdirty switch failed"
186              ", no valid device model version found, abandoning suspend");
187         lds->callback(egc, lds, ERROR_FAIL);
188     }
189 }
switch_logdirty_timeout(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)190 static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
191                                     const struct timeval *requested_abs,
192                                     int rc)
193 {
194     libxl__logdirty_switch *lds = CONTAINER_OF(ev, *lds, timeout);
195     STATE_AO_GC(lds->ao);
196     LOG(ERROR,"logdirty switch: wait for device model timed out");
197     switch_logdirty_done(egc,lds,ERROR_FAIL);
198 }
199 
switch_logdirty_xswatch(libxl__egc * egc,libxl__ev_xswatch * watch,const char * watch_path,const char * event_path)200 static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch *watch,
201                             const char *watch_path, const char *event_path)
202 {
203     libxl__logdirty_switch *lds = CONTAINER_OF(watch, *lds, watch);
204     STATE_AO_GC(lds->ao);
205     const char *got;
206     xs_transaction_t t = 0;
207     int rc;
208 
209     for (;;) {
210         rc = libxl__xs_transaction_start(gc, &t);
211         if (rc) goto out;
212 
213         rc = libxl__xs_read_checked(gc, t, lds->ret_path, &got);
214         if (rc) goto out;
215 
216         if (!got) {
217             rc = +1;
218             goto out;
219         }
220 
221         if (strcmp(got, lds->cmd)) {
222             LOG(ERROR,"logdirty switch: sent command `%s' but got reply `%s'"
223                 " (xenstore paths `%s' / `%s')", lds->cmd, got,
224                 lds->cmd_path, lds->ret_path);
225             rc = ERROR_FAIL;
226             goto out;
227         }
228 
229         rc = libxl__xs_rm_checked(gc, t, lds->cmd_path);
230         if (rc) goto out;
231 
232         rc = libxl__xs_rm_checked(gc, t, lds->ret_path);
233         if (rc) goto out;
234 
235         rc = libxl__xs_transaction_commit(gc, &t);
236         if (!rc) break;
237         if (rc<0) goto out;
238     }
239 
240  out:
241     /* rc < 0: error
242      * rc == 0: ok, we are done
243      * rc == +1: need to keep waiting
244      */
245     libxl__xs_transaction_abort(gc, &t);
246 
247     if (rc <= 0) {
248         if (rc < 0)
249             LOG(ERROR,"logdirty switch: failed (rc=%d)",rc);
250         switch_logdirty_done(egc,lds,rc);
251     }
252 }
253 
switch_logdirty_done(libxl__egc * egc,libxl__logdirty_switch * lds,int rc)254 static void switch_logdirty_done(libxl__egc *egc,
255                                  libxl__logdirty_switch *lds,
256                                  int rc)
257 {
258     STATE_AO_GC(lds->ao);
259 
260     libxl__ev_xswatch_deregister(gc, &lds->watch);
261     libxl__ev_time_deregister(gc, &lds->timeout);
262 
263     lds->callback(egc, lds, rc);
264 }
265 
266 /*----- callbacks, called by xc_domain_save -----*/
267 
268 /*
269  * Expand the buffer 'buf' of length 'len', to append 'str' including its NUL
270  * terminator.
271  */
append_string(libxl__gc * gc,char ** buf,uint32_t * len,const char * str)272 static void append_string(libxl__gc *gc, char **buf, uint32_t *len,
273                           const char *str)
274 {
275     size_t extralen = strlen(str) + 1;
276     char *new = libxl__realloc(gc, *buf, *len + extralen);
277 
278     *buf = new;
279     memcpy(new + *len, str, extralen);
280     *len += extralen;
281 }
282 
libxl__save_emulator_xenstore_data(libxl__domain_save_state * dss,char ** callee_buf,uint32_t * callee_len)283 int libxl__save_emulator_xenstore_data(libxl__domain_save_state *dss,
284                                        char **callee_buf,
285                                        uint32_t *callee_len)
286 {
287     STATE_AO_GC(dss->ao);
288     const char *xs_root;
289     char **entries, *buf = NULL;
290     unsigned int nr_entries, i, j, len = 0;
291     int rc;
292 
293     const uint32_t domid = dss->domid;
294     const uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
295 
296     xs_root = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "");
297 
298     entries = libxl__xs_directory(gc, 0, GCSPRINTF("%s/physmap", xs_root),
299                                   &nr_entries);
300     if (!entries || nr_entries == 0) { rc = 0; goto out; }
301 
302     for (i = 0; i < nr_entries; ++i) {
303         static const char *const physmap_subkeys[] = {
304             "start_addr", "size", "name"
305         };
306 
307         for (j = 0; j < ARRAY_SIZE(physmap_subkeys); ++j) {
308             const char *key = GCSPRINTF("physmap/%s/%s",
309                                         entries[i], physmap_subkeys[j]);
310 
311             const char *val =
312                 libxl__xs_read(gc, XBT_NULL,
313                                GCSPRINTF("%s/%s", xs_root, key));
314 
315             if (!val) { rc = ERROR_FAIL; goto out; }
316 
317             append_string(gc, &buf, &len, key);
318             append_string(gc, &buf, &len, val);
319         }
320     }
321 
322     rc = 0;
323 
324  out:
325     if (!rc) {
326         *callee_buf = buf;
327         *callee_len = len;
328     }
329 
330     return rc;
331 }
332 
333 /*----- main code for saving, in order of execution -----*/
334 
libxl__domain_save(libxl__egc * egc,libxl__domain_save_state * dss)335 void libxl__domain_save(libxl__egc *egc, libxl__domain_save_state *dss)
336 {
337     STATE_AO_GC(dss->ao);
338     int rc, ret;
339 
340     /* Convenience aliases */
341     const uint32_t domid = dss->domid;
342     const libxl_domain_type type = dss->type;
343     const int live = dss->live;
344     const int debug = dss->debug;
345     const libxl_domain_remus_info *const r_info = dss->remus;
346     libxl__srm_save_autogen_callbacks *const callbacks =
347         &dss->sws.shs.callbacks.save.a;
348     unsigned int nr_vnodes = 0, nr_vmemranges = 0, nr_vcpus = 0;
349     libxl__domain_suspend_state *dsps = &dss->dsps;
350 
351     if (dss->checkpointed_stream != LIBXL_CHECKPOINTED_STREAM_NONE && !r_info) {
352         LOGD(ERROR, domid, "Migration stream is checkpointed, but there's no "
353                            "checkpoint info!");
354         rc = ERROR_INVAL;
355         goto out;
356     }
357 
358     dss->rc = 0;
359     libxl__logdirty_init(&dss->logdirty);
360     dss->logdirty.ao = ao;
361 
362     dsps->ao = ao;
363     dsps->domid = domid;
364     rc = libxl__domain_suspend_init(egc, dsps, type);
365     if (rc) goto out;
366 
367     switch (type) {
368     case LIBXL_DOMAIN_TYPE_PVH:
369     case LIBXL_DOMAIN_TYPE_HVM: {
370         dss->hvm = 1;
371         break;
372     }
373     case LIBXL_DOMAIN_TYPE_PV:
374         dss->hvm = 0;
375         break;
376     default:
377         abort();
378     }
379 
380     dss->xcflags = (live ? XCFLAGS_LIVE : 0)
381           | (debug ? XCFLAGS_DEBUG : 0)
382           | (dss->hvm ? XCFLAGS_HVM : 0);
383 
384     /* Disallow saving a guest with vNUMA configured because migration
385      * stream does not preserve node information.
386      *
387      * Reject any domain which has vnuma enabled, even if the
388      * configuration is empty. Only domains which have no vnuma
389      * configuration at all are supported.
390      */
391     ret = xc_domain_getvnuma(CTX->xch, domid, &nr_vnodes, &nr_vmemranges,
392                              &nr_vcpus, NULL, NULL, NULL);
393     if (ret != -1 || errno != EOPNOTSUPP) {
394         LOGD(ERROR, domid, "Cannot save a guest with vNUMA configured");
395         rc = ERROR_FAIL;
396         goto out;
397     }
398 
399     if (dss->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_REMUS) {
400         if (libxl_defbool_val(r_info->compression))
401             dss->xcflags |= XCFLAGS_CHECKPOINT_COMPRESS;
402     }
403 
404     if (dss->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_NONE)
405         callbacks->suspend = libxl__domain_suspend_callback;
406 
407     callbacks->switch_qemu_logdirty = libxl__domain_suspend_common_switch_qemu_logdirty;
408 
409     dss->sws.ao  = dss->ao;
410     dss->sws.dss = dss;
411     dss->sws.fd  = dss->fd;
412     dss->sws.back_channel = false;
413     dss->sws.completion_callback = stream_done;
414 
415     libxl__stream_write_start(egc, &dss->sws);
416     return;
417 
418  out:
419     domain_save_done(egc, dss, rc);
420 }
421 
stream_done(libxl__egc * egc,libxl__stream_write_state * sws,int rc)422 static void stream_done(libxl__egc *egc,
423                         libxl__stream_write_state *sws, int rc)
424 {
425     domain_save_done(egc, sws->dss, rc);
426 }
427 
domain_save_done(libxl__egc * egc,libxl__domain_save_state * dss,int rc)428 static void domain_save_done(libxl__egc *egc,
429                              libxl__domain_save_state *dss, int rc)
430 {
431     STATE_AO_GC(dss->ao);
432 
433     /* Convenience aliases */
434     const uint32_t domid = dss->domid;
435     libxl__domain_suspend_state *dsps = &dss->dsps;
436 
437     libxl__ev_evtchn_cancel(gc, &dsps->guest_evtchn);
438 
439     if (dsps->guest_evtchn.port > 0)
440         xc_suspend_evtchn_release(CTX->xch, CTX->xce, domid,
441                         dsps->guest_evtchn.port, &dsps->guest_evtchn_lockfd);
442 
443     if (dss->remus) {
444         /*
445          * With Remus/COLO, if we reach this point, it means either
446          * backup died or some network error occurred preventing us
447          * from sending checkpoints. Teardown the network buffers and
448          * release netlink resources.  This is an async op.
449          */
450         if (libxl_defbool_val(dss->remus->colo))
451             libxl__colo_save_teardown(egc, &dss->css, rc);
452         else
453             libxl__remus_teardown(egc, &dss->rs, rc);
454         return;
455     }
456 
457     dss->callback(egc, dss, rc);
458 }
459 
460 /*========================= Domain restore ============================*/
461 
462 /*
463  * Inspect the buffer between start and end, and return a pointer to the
464  * character following the NUL terminator of start, or NULL if start is not
465  * terminated before end.
466  */
next_string(const char * start,const char * end)467 static const char *next_string(const char *start, const char *end)
468 {
469     if (start >= end) return NULL;
470 
471     size_t total_len = end - start;
472     size_t len = strnlen(start, total_len);
473 
474     if (len == total_len)
475         return NULL;
476     else
477         return start + len + 1;
478 }
479 
libxl__restore_emulator_xenstore_data(libxl__domain_create_state * dcs,const char * ptr,uint32_t size)480 int libxl__restore_emulator_xenstore_data(libxl__domain_create_state *dcs,
481                                           const char *ptr, uint32_t size)
482 {
483     STATE_AO_GC(dcs->ao);
484     const char *next = ptr, *end = ptr + size, *key, *val;
485     int rc;
486 
487     const uint32_t domid = dcs->guest_domid;
488     const uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
489     const char *xs_root = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "");
490 
491     while (next < end) {
492         key = next;
493         next = next_string(next, end);
494 
495         /* Sanitise 'key'. */
496         if (!next) {
497             rc = ERROR_FAIL;
498             LOGD(ERROR, domid, "Key in xenstore data not NUL terminated");
499             goto out;
500         }
501         if (key[0] == '\0') {
502             rc = ERROR_FAIL;
503             LOGD(ERROR, domid, "empty key found in xenstore data");
504             goto out;
505         }
506         if (key[0] == '/') {
507             rc = ERROR_FAIL;
508             LOGD(ERROR, domid, "Key in xenstore data not relative");
509             goto out;
510         }
511 
512         val = next;
513         next = next_string(next, end);
514 
515         /* Sanitise 'val'. */
516         if (!next) {
517             rc = ERROR_FAIL;
518             LOGD(ERROR, domid, "Val in xenstore data not NUL terminated");
519             goto out;
520         }
521 
522         libxl__xs_printf(gc, XBT_NULL,
523                          GCSPRINTF("%s/%s", xs_root, key),
524                          "%s", val);
525     }
526 
527     rc = 0;
528 
529  out:
530     return rc;
531 }
532 
533 /*
534  * Local variables:
535  * mode: C
536  * c-basic-offset: 4
537  * indent-tabs-mode: nil
538  * End:
539  */
540