1 /*
2 * Copyright (C) 2009 Citrix Ltd.
3 * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published
7 * by the Free Software Foundation; version 2.1 only. with the special
8 * exception on linking described in file LICENSE.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 */
15
16 #include "libxl_osdeps.h" /* must come before any other headers */
17
18 #include "libxl_internal.h"
19
20 #include <xen/errno.h>
21
22 /*========================= Domain save ============================*/
23
24 static void stream_done(libxl__egc *egc,
25 libxl__stream_write_state *sws, int rc);
26 static void domain_save_done(libxl__egc *egc,
27 libxl__domain_save_state *dss, int rc);
28
29 /*----- complicated callback, called by xc_domain_save -----*/
30
31 static void domain_suspend_switch_qemu_xen_logdirty
32 (libxl__egc *egc, int domid, unsigned enable,
33 libxl__logdirty_switch *lds);
34 static void switch_qemu_xen_logdirty_done(libxl__egc *egc,
35 libxl__ev_qmp *qmp,
36 const libxl__json_object *,
37 int rc);
38 static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
39 const struct timeval *requested_abs,
40 int rc);
41 static void switch_logdirty_done(libxl__egc *egc,
42 libxl__logdirty_switch *lds, int rc);
43
libxl__logdirty_init(libxl__logdirty_switch * lds)44 void libxl__logdirty_init(libxl__logdirty_switch *lds)
45 {
46 lds->cmd_path = 0;
47 libxl__ev_xswatch_init(&lds->watch);
48 libxl__ev_time_init(&lds->timeout);
49 libxl__ev_qmp_init(&lds->qmp);
50 }
51
libxl__domain_common_switch_qemu_logdirty(libxl__egc * egc,int domid,unsigned enable,libxl__logdirty_switch * lds)52 void libxl__domain_common_switch_qemu_logdirty(libxl__egc *egc,
53 int domid, unsigned enable,
54 libxl__logdirty_switch *lds)
55 {
56 STATE_AO_GC(lds->ao);
57
58 switch (libxl__device_model_version_running(gc, domid)) {
59 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
60 domain_suspend_switch_qemu_xen_logdirty(egc, domid, enable, lds);
61 break;
62 default:
63 LOGD(ERROR, domid, "logdirty switch failed"
64 ", no valid device model version found, abandoning suspend");
65 lds->callback(egc, lds, ERROR_FAIL);
66 }
67 }
68
domain_suspend_switch_qemu_xen_logdirty(libxl__egc * egc,int domid,unsigned enable,libxl__logdirty_switch * lds)69 static void domain_suspend_switch_qemu_xen_logdirty
70 (libxl__egc *egc, int domid, unsigned enable,
71 libxl__logdirty_switch *lds)
72 {
73 STATE_AO_GC(lds->ao);
74 int rc;
75 libxl__json_object *args = NULL;
76
77 /* Convenience aliases. */
78 libxl__ev_qmp *const qmp = &lds->qmp;
79
80 rc = libxl__ev_time_register_rel(ao, &lds->timeout,
81 switch_logdirty_timeout, 10 * 1000);
82 if (rc) goto out;
83
84 qmp->ao = ao;
85 qmp->domid = domid;
86 qmp->payload_fd = -1;
87 qmp->callback = switch_qemu_xen_logdirty_done;
88 libxl__qmp_param_add_bool(gc, &args, "enable", enable);
89 rc = libxl__ev_qmp_send(egc, qmp, "xen-set-global-dirty-log", args);
90 if (rc) goto out;
91
92 return;
93 out:
94 switch_qemu_xen_logdirty_done(egc, qmp, NULL, rc);
95 }
96
switch_qemu_xen_logdirty_done(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * r,int rc)97 static void switch_qemu_xen_logdirty_done(libxl__egc *egc,
98 libxl__ev_qmp *qmp,
99 const libxl__json_object *r,
100 int rc)
101 {
102 EGC_GC;
103 libxl__logdirty_switch *lds = CONTAINER_OF(qmp, *lds, qmp);
104
105 if (rc)
106 LOGD(ERROR, qmp->domid,
107 "logdirty switch failed (rc=%d), abandoning suspend",rc);
108 switch_logdirty_done(egc, lds, rc);
109 }
110
switch_logdirty_timeout(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)111 static void switch_logdirty_timeout(libxl__egc *egc, libxl__ev_time *ev,
112 const struct timeval *requested_abs,
113 int rc)
114 {
115 libxl__logdirty_switch *lds = CONTAINER_OF(ev, *lds, timeout);
116 STATE_AO_GC(lds->ao);
117 LOG(ERROR,"logdirty switch: wait for device model timed out");
118 switch_logdirty_done(egc,lds,ERROR_FAIL);
119 }
120
switch_logdirty_done(libxl__egc * egc,libxl__logdirty_switch * lds,int rc)121 static void switch_logdirty_done(libxl__egc *egc,
122 libxl__logdirty_switch *lds,
123 int rc)
124 {
125 STATE_AO_GC(lds->ao);
126
127 libxl__ev_xswatch_deregister(gc, &lds->watch);
128 libxl__ev_time_deregister(gc, &lds->timeout);
129 libxl__ev_qmp_dispose(gc, &lds->qmp);
130
131 lds->callback(egc, lds, rc);
132 }
133
134 static void domain_suspend_switch_qemu_logdirty_done
135 (libxl__egc *egc, libxl__logdirty_switch *lds, int rc);
136
libxl__domain_suspend_common_switch_qemu_logdirty(uint32_t domid,unsigned enable,void * user)137 void libxl__domain_suspend_common_switch_qemu_logdirty
138 (uint32_t domid, unsigned enable, void *user)
139 {
140 libxl__save_helper_state *shs = user;
141 libxl__egc *egc = shs->egc;
142 libxl__domain_save_state *dss = shs->caller_state;
143
144 /* Convenience aliases. */
145 libxl__logdirty_switch *const lds = &dss->logdirty;
146
147 if (dss->type == LIBXL_DOMAIN_TYPE_PVH) {
148 domain_suspend_switch_qemu_logdirty_done(egc, lds, 0);
149 return;
150 }
151
152 lds->callback = domain_suspend_switch_qemu_logdirty_done;
153 libxl__domain_common_switch_qemu_logdirty(egc, domid, enable, lds);
154 }
155
domain_suspend_switch_qemu_logdirty_done(libxl__egc * egc,libxl__logdirty_switch * lds,int rc)156 static void domain_suspend_switch_qemu_logdirty_done
157 (libxl__egc *egc, libxl__logdirty_switch *lds, int rc)
158 {
159 libxl__domain_save_state *dss = CONTAINER_OF(lds, *dss, logdirty);
160
161 if (rc) {
162 dss->rc = rc;
163 libxl__xc_domain_saverestore_async_callback_done(egc,
164 &dss->sws.shs, -1);
165 } else
166 libxl__xc_domain_saverestore_async_callback_done(egc, &dss->sws.shs, 0);
167 }
168
169 /*----- callbacks, called by xc_domain_save -----*/
170
171 /*
172 * Expand the buffer 'buf' of length 'len', to append 'str' including its NUL
173 * terminator.
174 */
append_string(libxl__gc * gc,char ** buf,uint32_t * len,const char * str)175 static void append_string(libxl__gc *gc, char **buf, uint32_t *len,
176 const char *str)
177 {
178 size_t extralen = strlen(str) + 1;
179 char *new = libxl__realloc(gc, *buf, *len + extralen);
180
181 *buf = new;
182 memcpy(new + *len, str, extralen);
183 *len += extralen;
184 }
185
libxl__save_emulator_xenstore_data(libxl__domain_save_state * dss,char ** callee_buf,uint32_t * callee_len)186 int libxl__save_emulator_xenstore_data(libxl__domain_save_state *dss,
187 char **callee_buf,
188 uint32_t *callee_len)
189 {
190 STATE_AO_GC(dss->ao);
191 const char *xs_root;
192 char **entries, *buf = NULL;
193 unsigned int nr_entries, i, j, len = 0;
194 int rc;
195
196 const uint32_t domid = dss->domid;
197 const uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
198
199 xs_root = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "");
200
201 entries = libxl__xs_directory(gc, 0, GCSPRINTF("%s/physmap", xs_root),
202 &nr_entries);
203 if (!entries || nr_entries == 0) { rc = 0; goto out; }
204
205 for (i = 0; i < nr_entries; ++i) {
206 static const char *const physmap_subkeys[] = {
207 "start_addr", "size", "name"
208 };
209
210 for (j = 0; j < ARRAY_SIZE(physmap_subkeys); ++j) {
211 const char *key = GCSPRINTF("physmap/%s/%s",
212 entries[i], physmap_subkeys[j]);
213
214 const char *val =
215 libxl__xs_read(gc, XBT_NULL,
216 GCSPRINTF("%s/%s", xs_root, key));
217
218 if (!val) { rc = ERROR_FAIL; goto out; }
219
220 append_string(gc, &buf, &len, key);
221 append_string(gc, &buf, &len, val);
222 }
223 }
224
225 rc = 0;
226
227 out:
228 if (!rc) {
229 *callee_buf = buf;
230 *callee_len = len;
231 }
232
233 return rc;
234 }
235
236 /*----- main code for saving, in order of execution -----*/
237
libxl__domain_save(libxl__egc * egc,libxl__domain_save_state * dss)238 void libxl__domain_save(libxl__egc *egc, libxl__domain_save_state *dss)
239 {
240 STATE_AO_GC(dss->ao);
241 int rc, ret;
242
243 /* Convenience aliases */
244 const uint32_t domid = dss->domid;
245 const libxl_domain_type type = dss->type;
246 const int live = dss->live;
247 const int debug = dss->debug;
248 const libxl_domain_remus_info *const r_info = dss->remus;
249 libxl__srm_save_autogen_callbacks *const callbacks =
250 &dss->sws.shs.callbacks.save.a;
251 unsigned int nr_vnodes = 0, nr_vmemranges = 0, nr_vcpus = 0;
252 libxl__domain_suspend_state *dsps = &dss->dsps;
253
254 if (dss->checkpointed_stream != LIBXL_CHECKPOINTED_STREAM_NONE && !r_info) {
255 LOGD(ERROR, domid, "Migration stream is checkpointed, but there's no "
256 "checkpoint info!");
257 rc = ERROR_INVAL;
258 goto out;
259 }
260
261 dss->rc = 0;
262 libxl__logdirty_init(&dss->logdirty);
263 dss->logdirty.ao = ao;
264
265 dsps->ao = ao;
266 dsps->domid = domid;
267 dsps->live = !!live;
268 rc = libxl__domain_suspend_init(egc, dsps, type);
269 if (rc) goto out;
270
271 dss->xcflags = (live ? XCFLAGS_LIVE : 0)
272 | (debug ? XCFLAGS_DEBUG : 0);
273
274 /* Disallow saving a guest with vNUMA configured because migration
275 * stream does not preserve node information.
276 *
277 * Reject any domain which has vnuma enabled, even if the
278 * configuration is empty. Only domains which have no vnuma
279 * configuration at all are supported.
280 */
281 ret = xc_domain_getvnuma(CTX->xch, domid, &nr_vnodes, &nr_vmemranges,
282 &nr_vcpus, NULL, NULL, NULL);
283 if (ret != -1 || errno != EOPNOTSUPP) {
284 LOGD(ERROR, domid, "Cannot save a guest with vNUMA configured");
285 rc = ERROR_FAIL;
286 goto out;
287 }
288
289 if (dss->checkpointed_stream == LIBXL_CHECKPOINTED_STREAM_NONE)
290 callbacks->suspend = libxl__domain_suspend_callback;
291
292 callbacks->switch_qemu_logdirty = libxl__domain_suspend_common_switch_qemu_logdirty;
293
294 dss->sws.ao = dss->ao;
295 dss->sws.dss = dss;
296 dss->sws.fd = dss->fd;
297 dss->sws.back_channel = false;
298 dss->sws.completion_callback = stream_done;
299
300 libxl__stream_write_start(egc, &dss->sws);
301 return;
302
303 out:
304 domain_save_done(egc, dss, rc);
305 }
306
stream_done(libxl__egc * egc,libxl__stream_write_state * sws,int rc)307 static void stream_done(libxl__egc *egc,
308 libxl__stream_write_state *sws, int rc)
309 {
310 domain_save_done(egc, sws->dss, rc);
311 }
312
domain_save_done(libxl__egc * egc,libxl__domain_save_state * dss,int rc)313 static void domain_save_done(libxl__egc *egc,
314 libxl__domain_save_state *dss, int rc)
315 {
316 STATE_AO_GC(dss->ao);
317
318 /* Convenience aliases */
319 const uint32_t domid = dss->domid;
320 libxl__domain_suspend_state *dsps = &dss->dsps;
321
322 libxl__ev_evtchn_cancel(gc, &dsps->guest_evtchn);
323
324 if (dsps->guest_evtchn.port > 0)
325 xc_suspend_evtchn_release(CTX->xch, CTX->xce, domid,
326 dsps->guest_evtchn.port, &dsps->guest_evtchn_lockfd);
327
328 if (dss->remus) {
329 /*
330 * With Remus/COLO, if we reach this point, it means either
331 * backup died or some network error occurred preventing us
332 * from sending checkpoints. Teardown the network buffers and
333 * release netlink resources. This is an async op.
334 */
335 if (libxl_defbool_val(dss->remus->colo))
336 libxl__colo_save_teardown(egc, &dss->css, rc);
337 else
338 libxl__remus_teardown(egc, &dss->rs, rc);
339 return;
340 }
341
342 dss->callback(egc, dss, rc);
343 }
344
345 /*========================= Domain restore ============================*/
346
347 /*
348 * Inspect the buffer between start and end, and return a pointer to the
349 * character following the NUL terminator of start, or NULL if start is not
350 * terminated before end.
351 */
next_string(const char * start,const char * end)352 static const char *next_string(const char *start, const char *end)
353 {
354 if (start >= end) return NULL;
355
356 size_t total_len = end - start;
357 size_t len = strnlen(start, total_len);
358
359 if (len == total_len)
360 return NULL;
361 else
362 return start + len + 1;
363 }
364
libxl__restore_emulator_xenstore_data(libxl__domain_create_state * dcs,const char * ptr,uint32_t size)365 int libxl__restore_emulator_xenstore_data(libxl__domain_create_state *dcs,
366 const char *ptr, uint32_t size)
367 {
368 STATE_AO_GC(dcs->ao);
369 const char *next = ptr, *end = ptr + size, *key, *val;
370 int rc;
371
372 const uint32_t domid = dcs->guest_domid;
373 const uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
374 const char *xs_root = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "");
375
376 while (next < end) {
377 key = next;
378 next = next_string(next, end);
379
380 /* Sanitise 'key'. */
381 if (!next) {
382 rc = ERROR_FAIL;
383 LOGD(ERROR, domid, "Key in xenstore data not NUL terminated");
384 goto out;
385 }
386 if (key[0] == '\0') {
387 rc = ERROR_FAIL;
388 LOGD(ERROR, domid, "empty key found in xenstore data");
389 goto out;
390 }
391 if (key[0] == '/') {
392 rc = ERROR_FAIL;
393 LOGD(ERROR, domid, "Key in xenstore data not relative");
394 goto out;
395 }
396
397 val = next;
398 next = next_string(next, end);
399
400 /* Sanitise 'val'. */
401 if (!next) {
402 rc = ERROR_FAIL;
403 LOGD(ERROR, domid, "Val in xenstore data not NUL terminated");
404 goto out;
405 }
406
407 libxl__xs_printf(gc, XBT_NULL,
408 GCSPRINTF("%s/%s", xs_root, key),
409 "%s", val);
410 }
411
412 rc = 0;
413
414 out:
415 return rc;
416 }
417
418 /*
419 * Local variables:
420 * mode: C
421 * c-basic-offset: 4
422 * indent-tabs-mode: nil
423 * End:
424 */
425