1 /*
2 * Copyright (C) 2009 Citrix Ltd.
3 * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published
7 * by the Free Software Foundation; version 2.1 only. with the special
8 * exception on linking described in file LICENSE.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 */
15
16 #include "libxl_osdeps.h" /* must come before any other headers */
17
18 #include "libxl_internal.h"
19
20 #include <xen/errno.h>
21
22 /*========================= Domain save ============================*/
23
24 static void stream_done(libxl__egc *egc,
25 libxl__stream_write_state *sws, int rc);
26 static void domain_save_done(libxl__egc *egc,
27 libxl__domain_save_state *dss, int rc);
28
29 /*----- complicated callback, called by xc_domain_save -----*/
30
31 /*
32 * We implement the other end of protocol for controlling qemu-dm's
33 * logdirty. There is no documentation for this protocol, but our
34 * counterparty's implementation is in
35 * qemu-xen-traditional.git:xenstore.c in the function
36 * xenstore_process_logdirty_event
37 */
38
39 static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
40 const struct timeval *requested_abs,
41 int rc);
42 static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch*,
43 const char *watch_path, const char *event_path);
44 static void switch_logdirty_done(libxl__egc *egc,
45 libxl__logdirty_switch *lds, int rc);
46
libxl__logdirty_init(libxl__logdirty_switch * lds)47 void libxl__logdirty_init(libxl__logdirty_switch *lds)
48 {
49 lds->cmd_path = 0;
50 libxl__ev_xswatch_init(&lds->watch);
51 libxl__ev_time_init(&lds->timeout);
52 }
53
domain_suspend_switch_qemu_xen_traditional_logdirty(libxl__egc * egc,int domid,unsigned enable,libxl__logdirty_switch * lds)54 static void domain_suspend_switch_qemu_xen_traditional_logdirty
55 (libxl__egc *egc, int domid, unsigned enable,
56 libxl__logdirty_switch *lds)
57 {
58 STATE_AO_GC(lds->ao);
59 int rc;
60 xs_transaction_t t = 0;
61 const char *got;
62
63 if (!lds->cmd_path) {
64 uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
65 lds->cmd_path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid,
66 "/logdirty/cmd");
67 lds->ret_path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid,
68 "/logdirty/ret");
69 }
70 lds->cmd = enable ? "enable" : "disable";
71
72 rc = libxl__ev_xswatch_register(gc, &lds->watch,
73 switch_logdirty_xswatch, lds->ret_path);
74 if (rc) goto out;
75
76 rc = libxl__ev_time_register_rel(ao, &lds->timeout,
77 switch_logdirty_timeout, 10*1000);
78 if (rc) goto out;
79
80 for (;;) {
81 rc = libxl__xs_transaction_start(gc, &t);
82 if (rc) goto out;
83
84 rc = libxl__xs_read_checked(gc, t, lds->cmd_path, &got);
85 if (rc) goto out;
86
87 if (got) {
88 const char *got_ret;
89 rc = libxl__xs_read_checked(gc, t, lds->ret_path, &got_ret);
90 if (rc) goto out;
91
92 if (!got_ret || strcmp(got, got_ret)) {
93 LOGD(ERROR, domid, "controlling logdirty: qemu was already sent"
94 " command `%s' (xenstore path `%s') but result is `%s'",
95 got, lds->cmd_path, got_ret ? got_ret : "<none>");
96 rc = ERROR_FAIL;
97 goto out;
98 }
99 rc = libxl__xs_rm_checked(gc, t, lds->cmd_path);
100 if (rc) goto out;
101 }
102
103 rc = libxl__xs_rm_checked(gc, t, lds->ret_path);
104 if (rc) goto out;
105
106 rc = libxl__xs_write_checked(gc, t, lds->cmd_path, lds->cmd);
107 if (rc) goto out;
108
109 rc = libxl__xs_transaction_commit(gc, &t);
110 if (!rc) break;
111 if (rc<0) goto out;
112 }
113
114 /* OK, wait for some callback */
115 return;
116
117 out:
118 LOGD(ERROR, domid, "logdirty switch failed (rc=%d), abandoning suspend",rc);
119 libxl__xs_transaction_abort(gc, &t);
120 switch_logdirty_done(egc,lds,rc);
121 }
122
domain_suspend_switch_qemu_xen_logdirty(libxl__egc * egc,int domid,unsigned enable,libxl__logdirty_switch * lds)123 static void domain_suspend_switch_qemu_xen_logdirty
124 (libxl__egc *egc, int domid, unsigned enable,
125 libxl__logdirty_switch *lds)
126 {
127 STATE_AO_GC(lds->ao);
128 int rc;
129
130 rc = libxl__qmp_set_global_dirty_log(gc, domid, enable);
131 if (rc)
132 LOGD(ERROR, domid,
133 "logdirty switch failed (rc=%d), abandoning suspend",rc);
134
135 lds->callback(egc, lds, rc);
136 }
137
domain_suspend_switch_qemu_logdirty_done(libxl__egc * egc,libxl__logdirty_switch * lds,int rc)138 static void domain_suspend_switch_qemu_logdirty_done
139 (libxl__egc *egc, libxl__logdirty_switch *lds, int rc)
140 {
141 libxl__domain_save_state *dss = CONTAINER_OF(lds, *dss, logdirty);
142
143 if (rc) {
144 dss->rc = rc;
145 libxl__xc_domain_saverestore_async_callback_done(egc,
146 &dss->sws.shs, -1);
147 } else
148 libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, 0);
149 }
150
libxl__domain_suspend_common_switch_qemu_logdirty(uint32_t domid,unsigned enable,void * user)151 void libxl__domain_suspend_common_switch_qemu_logdirty
152 (uint32_t domid, unsigned enable, void *user)
153 {
154 libxl__save_helper_state *shs = user;
155 libxl__egc *egc = shs->egc;
156 libxl__domain_save_state *dss = shs->caller_state;
157
158 /* Convenience aliases. */
159 libxl__logdirty_switch *const lds = &dss->logdirty;
160
161 if (dss->type == LIBXL_DOMAIN_TYPE_PVH) {
162 domain_suspend_switch_qemu_logdirty_done(egc, lds, 0);
163 return;
164 }
165
166 lds->callback = domain_suspend_switch_qemu_logdirty_done;
167 libxl__domain_common_switch_qemu_logdirty(egc, domid, enable, lds);
168 }
169
libxl__domain_common_switch_qemu_logdirty(libxl__egc * egc,int domid,unsigned enable,libxl__logdirty_switch * lds)170 void libxl__domain_common_switch_qemu_logdirty(libxl__egc *egc,
171 int domid, unsigned enable,
172 libxl__logdirty_switch *lds)
173 {
174 STATE_AO_GC(lds->ao);
175
176 switch (libxl__device_model_version_running(gc, domid)) {
177 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
178 domain_suspend_switch_qemu_xen_traditional_logdirty(egc, domid, enable,
179 lds);
180 break;
181 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
182 domain_suspend_switch_qemu_xen_logdirty(egc, domid, enable, lds);
183 break;
184 default:
185 LOGD(ERROR, domid, "logdirty switch failed"
186 ", no valid device model version found, abandoning suspend");
187 lds->callback(egc, lds, ERROR_FAIL);
188 }
189 }
switch_logdirty_timeout(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)190 static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
191 const struct timeval *requested_abs,
192 int rc)
193 {
194 libxl__logdirty_switch *lds = CONTAINER_OF(ev, *lds, timeout);
195 STATE_AO_GC(lds->ao);
196 LOG(ERROR,"logdirty switch: wait for device model timed out");
197 switch_logdirty_done(egc,lds,ERROR_FAIL);
198 }
199
switch_logdirty_xswatch(libxl__egc * egc,libxl__ev_xswatch * watch,const char * watch_path,const char * event_path)200 static void switch_logdirty_xswatch(libxl__egc *egc, libxl__ev_xswatch *watch,
201 const char *watch_path, const char *event_path)
202 {
203 libxl__logdirty_switch *lds = CONTAINER_OF(watch, *lds, watch);
204 STATE_AO_GC(lds->ao);
205 const char *got;
206 xs_transaction_t t = 0;
207 int rc;
208
209 for (;;) {
210 rc = libxl__xs_transaction_start(gc, &t);
211 if (rc) goto out;
212
213 rc = libxl__xs_read_checked(gc, t, lds->ret_path, &got);
214 if (rc) goto out;
215
216 if (!got) {
217 rc = +1;
218 goto out;
219 }
220
221 if (strcmp(got, lds->cmd)) {
222 LOG(ERROR,"logdirty switch: sent command `%s' but got reply `%s'"
223 " (xenstore paths `%s' / `%s')", lds->cmd, got,
224 lds->cmd_path, lds->ret_path);
225 rc = ERROR_FAIL;
226 goto out;
227 }
228
229 rc = libxl__xs_rm_checked(gc, t, lds->cmd_path);
230 if (rc) goto out;
231
232 rc = libxl__xs_rm_checked(gc, t, lds->ret_path);
233 if (rc) goto out;
234
235 rc = libxl__xs_transaction_commit(gc, &t);
236 if (!rc) break;
237 if (rc<0) goto out;
238 }
239
240 out:
241 /* rc < 0: error
242 * rc == 0: ok, we are done
243 * rc == +1: need to keep waiting
244 */
245 libxl__xs_transaction_abort(gc, &t);
246
247 if (rc <= 0) {
248 if (rc < 0)
249 LOG(ERROR,"logdirty switch: failed (rc=%d)",rc);
250 switch_logdirty_done(egc,lds,rc);
251 }
252 }
253
switch_logdirty_done(libxl__egc * egc,libxl__logdirty_switch * lds,int rc)254 static void switch_logdirty_done(libxl__egc *egc,
255 libxl__logdirty_switch *lds,
256 int rc)
257 {
258 STATE_AO_GC(lds->ao);
259
260 libxl__ev_xswatch_deregister(gc, &lds->watch);
261 libxl__ev_time_deregister(gc, &lds->timeout);
262
263 lds->callback(egc, lds, rc);
264 }
265
266 /*----- callbacks, called by xc_domain_save -----*/
267
268 /*
269 * Expand the buffer 'buf' of length 'len', to append 'str' including its NUL
270 * terminator.
271 */
append_string(libxl__gc * gc,char ** buf,uint32_t * len,const char * str)272 static void append_string(libxl__gc *gc, char **buf, uint32_t *len,
273 const char *str)
274 {
275 size_t extralen = strlen(str) + 1;
276 char *new = libxl__realloc(gc, *buf, *len + extralen);
277
278 *buf = new;
279 memcpy(new + *len, str, extralen);
280 *len += extralen;
281 }
282
libxl__save_emulator_xenstore_data(libxl__domain_save_state * dss,char ** callee_buf,uint32_t * callee_len)283 int libxl__save_emulator_xenstore_data(libxl__domain_save_state *dss,
284 char **callee_buf,
285 uint32_t *callee_len)
286 {
287 STATE_AO_GC(dss->ao);
288 const char *xs_root;
289 char **entries, *buf = NULL;
290 unsigned int nr_entries, i, j, len = 0;
291 int rc;
292
293 const uint32_t domid = dss->domid;
294 const uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
295
296 xs_root = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "");
297
298 entries = libxl__xs_directory(gc, 0, GCSPRINTF("%s/physmap", xs_root),
299 &nr_entries);
300 if (!entries || nr_entries == 0) { rc = 0; goto out; }
301
302 for (i = 0; i < nr_entries; ++i) {
303 static const char *const physmap_subkeys[] = {
304 "start_addr", "size", "name"
305 };
306
307 for (j = 0; j < ARRAY_SIZE(physmap_subkeys); ++j) {
308 const char *key = GCSPRINTF("physmap/%s/%s",
309 entries[i], physmap_subkeys[j]);
310
311 const char *val =
312 libxl__xs_read(gc, XBT_NULL,
313 GCSPRINTF("%s/%s", xs_root, key));
314
315 if (!val) { rc = ERROR_FAIL; goto out; }
316
317 append_string(gc, &buf, &len, key);
318 append_string(gc, &buf, &len, val);
319 }
320 }
321
322 rc = 0;
323
324 out:
325 if (!rc) {
326 *callee_buf = buf;
327 *callee_len = len;
328 }
329
330 return rc;
331 }
332
333 /*----- main code for saving, in order of execution -----*/
334
libxl__domain_save(libxl__egc * egc,libxl__domain_save_state * dss)335 void libxl__domain_save(libxl__egc *egc, libxl__domain_save_state *dss)
336 {
337 STATE_AO_GC(dss->ao);
338 int rc, ret;
339
340 /* Convenience aliases */
341 const uint32_t domid = dss->domid;
342 const libxl_domain_type type = dss->type;
343 const int live = dss->live;
344 const int debug = dss->debug;
345 const libxl_domain_remus_info *const r_info = dss->remus;
346 libxl__srm_save_autogen_callbacks *const callbacks =
347 &dss->sws.shs.callbacks.save.a;
348 unsigned int nr_vnodes = 0, nr_vmemranges = 0, nr_vcpus = 0;
349 libxl__domain_suspend_state *dsps = &dss->dsps;
350
351 if (dss->checkpointed_stream != LIBXL_CHECKPOINTED_STREAM_NONE && !r_info) {
352 LOGD(ERROR, domid, "Migration stream is checkpointed, but there's no "
353 "checkpoint info!");
354 rc = ERROR_INVAL;
355 goto out;
356 }
357
358 dss->rc = 0;
359 libxl__logdirty_init(&dss->logdirty);
360 dss->logdirty.ao = ao;
361
362 dsps->ao = ao;
363 dsps->domid = domid;
364 rc = libxl__domain_suspend_init(egc, dsps, type);
365 if (rc) goto out;
366
367 switch (type) {
368 case LIBXL_DOMAIN_TYPE_PVH:
369 case LIBXL_DOMAIN_TYPE_HVM: {
370 dss->hvm = 1;
371 break;
372 }
373 case LIBXL_DOMAIN_TYPE_PV:
374 dss->hvm = 0;
375 break;
376 default:
377 abort();
378 }
379
380 dss->xcflags = (live ? XCFLAGS_LIVE : 0)
381 | (debug ? XCFLAGS_DEBUG : 0)
382 | (dss->hvm ? XCFLAGS_HVM : 0);
383
384 /* Disallow saving a guest with vNUMA configured because migration
385 * stream does not preserve node information.
386 *
387 * Reject any domain which has vnuma enabled, even if the
388 * configuration is empty. Only domains which have no vnuma
389 * configuration at all are supported.
390 */
391 ret = xc_domain_getvnuma(CTX->xch, domid, &nr_vnodes, &nr_vmemranges,
392 &nr_vcpus, NULL, NULL, NULL);
393 if (ret != -1 || errno != EOPNOTSUPP) {
394 LOGD(ERROR, domid, "Cannot save a guest with vNUMA configured");
395 rc = ERROR_FAIL;
396 goto out;
397 }
398
399 if (dss->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_REMUS) {
400 if (libxl_defbool_val(r_info->compression))
401 dss->xcflags |= XCFLAGS_CHECKPOINT_COMPRESS;
402 }
403
404 if (dss->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_NONE)
405 callbacks->suspend = libxl__domain_suspend_callback;
406
407 callbacks->switch_qemu_logdirty = libxl__domain_suspend_common_switch_qemu_logdirty;
408
409 dss->sws.ao = dss->ao;
410 dss->sws.dss = dss;
411 dss->sws.fd = dss->fd;
412 dss->sws.back_channel = false;
413 dss->sws.completion_callback = stream_done;
414
415 libxl__stream_write_start(egc, &dss->sws);
416 return;
417
418 out:
419 domain_save_done(egc, dss, rc);
420 }
421
stream_done(libxl__egc * egc,libxl__stream_write_state * sws,int rc)422 static void stream_done(libxl__egc *egc,
423 libxl__stream_write_state *sws, int rc)
424 {
425 domain_save_done(egc, sws->dss, rc);
426 }
427
domain_save_done(libxl__egc * egc,libxl__domain_save_state * dss,int rc)428 static void domain_save_done(libxl__egc *egc,
429 libxl__domain_save_state *dss, int rc)
430 {
431 STATE_AO_GC(dss->ao);
432
433 /* Convenience aliases */
434 const uint32_t domid = dss->domid;
435 libxl__domain_suspend_state *dsps = &dss->dsps;
436
437 libxl__ev_evtchn_cancel(gc, &dsps->guest_evtchn);
438
439 if (dsps->guest_evtchn.port > 0)
440 xc_suspend_evtchn_release(CTX->xch, CTX->xce, domid,
441 dsps->guest_evtchn.port, &dsps->guest_evtchn_lockfd);
442
443 if (dss->remus) {
444 /*
445 * With Remus/COLO, if we reach this point, it means either
446 * backup died or some network error occurred preventing us
447 * from sending checkpoints. Teardown the network buffers and
448 * release netlink resources. This is an async op.
449 */
450 if (libxl_defbool_val(dss->remus->colo))
451 libxl__colo_save_teardown(egc, &dss->css, rc);
452 else
453 libxl__remus_teardown(egc, &dss->rs, rc);
454 return;
455 }
456
457 dss->callback(egc, dss, rc);
458 }
459
460 /*========================= Domain restore ============================*/
461
462 /*
463 * Inspect the buffer between start and end, and return a pointer to the
464 * character following the NUL terminator of start, or NULL if start is not
465 * terminated before end.
466 */
next_string(const char * start,const char * end)467 static const char *next_string(const char *start, const char *end)
468 {
469 if (start >= end) return NULL;
470
471 size_t total_len = end - start;
472 size_t len = strnlen(start, total_len);
473
474 if (len == total_len)
475 return NULL;
476 else
477 return start + len + 1;
478 }
479
libxl__restore_emulator_xenstore_data(libxl__domain_create_state * dcs,const char * ptr,uint32_t size)480 int libxl__restore_emulator_xenstore_data(libxl__domain_create_state *dcs,
481 const char *ptr, uint32_t size)
482 {
483 STATE_AO_GC(dcs->ao);
484 const char *next = ptr, *end = ptr + size, *key, *val;
485 int rc;
486
487 const uint32_t domid = dcs->guest_domid;
488 const uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
489 const char *xs_root = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "");
490
491 while (next < end) {
492 key = next;
493 next = next_string(next, end);
494
495 /* Sanitise 'key'. */
496 if (!next) {
497 rc = ERROR_FAIL;
498 LOGD(ERROR, domid, "Key in xenstore data not NUL terminated");
499 goto out;
500 }
501 if (key[0] == '\0') {
502 rc = ERROR_FAIL;
503 LOGD(ERROR, domid, "empty key found in xenstore data");
504 goto out;
505 }
506 if (key[0] == '/') {
507 rc = ERROR_FAIL;
508 LOGD(ERROR, domid, "Key in xenstore data not relative");
509 goto out;
510 }
511
512 val = next;
513 next = next_string(next, end);
514
515 /* Sanitise 'val'. */
516 if (!next) {
517 rc = ERROR_FAIL;
518 LOGD(ERROR, domid, "Val in xenstore data not NUL terminated");
519 goto out;
520 }
521
522 libxl__xs_printf(gc, XBT_NULL,
523 GCSPRINTF("%s/%s", xs_root, key),
524 "%s", val);
525 }
526
527 rc = 0;
528
529 out:
530 return rc;
531 }
532
533 /*
534 * Local variables:
535 * mode: C
536 * c-basic-offset: 4
537 * indent-tabs-mode: nil
538 * End:
539 */
540