1 /*
2 * Copyright 2009-2017 Citrix Ltd and other contributors
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published
6 * by the Free Software Foundation; version 2.1 only. with the special
7 * exception on linking described in file LICENSE.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU Lesser General Public License for more details.
13 */
14
15 #include <fcntl.h>
16 #include <inttypes.h>
17 #include <signal.h>
18 #include <stdlib.h>
19 #include <sys/stat.h>
20 #include <sys/types.h>
21 #include <sys/utsname.h>
22 #include <time.h>
23 #include <unistd.h>
24
25 #include <libxl.h>
26 #include <libxl_utils.h>
27 #include <libxlutil.h>
28
29 #include "xl.h"
30 #include "xl_utils.h"
31 #include "xl_parse.h"
32
33 #ifndef LIBXL_HAVE_NO_SUSPEND_RESUME
34
create_migration_child(const char * rune,int * send_fd,int * recv_fd)35 static pid_t create_migration_child(const char *rune, int *send_fd,
36 int *recv_fd)
37 {
38 int sendpipe[2], recvpipe[2];
39 pid_t child;
40
41 if (!rune || !send_fd || !recv_fd)
42 return -1;
43
44 MUST( libxl_pipe(ctx, sendpipe) );
45 MUST( libxl_pipe(ctx, recvpipe) );
46
47 child = xl_fork(child_migration, "migration transport process");
48
49 if (!child) {
50 dup2(sendpipe[0], 0);
51 dup2(recvpipe[1], 1);
52 close(sendpipe[0]); close(sendpipe[1]);
53 close(recvpipe[0]); close(recvpipe[1]);
54 execlp("sh","sh","-c",rune,(char*)0);
55 perror("failed to exec sh");
56 exit(EXIT_FAILURE);
57 }
58
59 close(sendpipe[0]);
60 close(recvpipe[1]);
61 *send_fd = sendpipe[1];
62 *recv_fd = recvpipe[0];
63
64 /* if receiver dies, we get an error and can clean up
65 rather than just dying */
66 signal(SIGPIPE, SIG_IGN);
67
68 return child;
69 }
70
migrate_read_fixedmessage(int fd,const void * msg,int msgsz,const char * what,const char * rune)71 static int migrate_read_fixedmessage(int fd, const void *msg, int msgsz,
72 const char *what, const char *rune) {
73 char buf[msgsz];
74 const char *stream;
75 int rc;
76
77 stream = rune ? "migration receiver stream" : "migration stream";
78 rc = libxl_read_exactly(ctx, fd, buf, msgsz, stream, what);
79 if (rc) return 1;
80
81 if (memcmp(buf, msg, msgsz)) {
82 fprintf(stderr, "%s contained unexpected data instead of %s\n",
83 stream, what);
84 if (rune)
85 fprintf(stderr, "(command run was: %s )\n", rune);
86 return 1;
87 }
88 return 0;
89 }
90
migration_child_report(int recv_fd)91 static void migration_child_report(int recv_fd) {
92 pid_t child;
93 int status, sr;
94 struct timeval now, waituntil, timeout;
95 static const struct timeval pollinterval = { 0, 1000 }; /* 1ms */
96
97 if (!xl_child_pid(child_migration)) return;
98
99 CHK_SYSCALL(gettimeofday(&waituntil, 0));
100 waituntil.tv_sec += 2;
101
102 for (;;) {
103 pid_t migration_child = xl_child_pid(child_migration);
104 child = xl_waitpid(child_migration, &status, WNOHANG);
105
106 if (child == migration_child) {
107 if (status)
108 xl_report_child_exitstatus(XTL_INFO, child_migration,
109 migration_child, status);
110 break;
111 }
112 if (child == -1) {
113 fprintf(stderr, "wait for migration child [%ld] failed: %s\n",
114 (long)migration_child, strerror(errno));
115 break;
116 }
117 assert(child == 0);
118
119 CHK_SYSCALL(gettimeofday(&now, 0));
120 if (timercmp(&now, &waituntil, >)) {
121 fprintf(stderr, "migration child [%ld] not exiting, no longer"
122 " waiting (exit status will be unreported)\n",
123 (long)migration_child);
124 break;
125 }
126 timersub(&waituntil, &now, &timeout);
127
128 if (recv_fd >= 0) {
129 fd_set readfds, exceptfds;
130 FD_ZERO(&readfds);
131 FD_ZERO(&exceptfds);
132 FD_SET(recv_fd, &readfds);
133 FD_SET(recv_fd, &exceptfds);
134 sr = select(recv_fd+1, &readfds,0,&exceptfds, &timeout);
135 } else {
136 if (timercmp(&timeout, &pollinterval, >))
137 timeout = pollinterval;
138 sr = select(0,0,0,0, &timeout);
139 }
140 if (sr > 0) {
141 recv_fd = -1;
142 } else if (sr == 0) {
143 } else if (sr == -1) {
144 if (errno != EINTR) {
145 fprintf(stderr, "migration child [%ld] exit wait select"
146 " failed unexpectedly: %s\n",
147 (long)migration_child, strerror(errno));
148 break;
149 }
150 }
151 }
152 }
153
migrate_do_preamble(int send_fd,int recv_fd,pid_t child,uint8_t * config_data,int config_len,const char * rune)154 static void migrate_do_preamble(int send_fd, int recv_fd, pid_t child,
155 uint8_t *config_data, int config_len,
156 const char *rune)
157 {
158 int rc = 0;
159
160 if (send_fd < 0 || recv_fd < 0) {
161 fprintf(stderr, "migrate_do_preamble: invalid file descriptors\n");
162 exit(EXIT_FAILURE);
163 }
164
165 rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_banner,
166 sizeof(migrate_receiver_banner)-1,
167 "banner", rune);
168 if (rc) {
169 close(send_fd);
170 migration_child_report(recv_fd);
171 exit(EXIT_FAILURE);
172 }
173
174 save_domain_core_writeconfig(send_fd, "migration stream",
175 config_data, config_len);
176
177 }
178
migrate_domain(uint32_t domid,const char * rune,int debug,const char * override_config_file)179 static void migrate_domain(uint32_t domid, const char *rune, int debug,
180 const char *override_config_file)
181 {
182 pid_t child = -1;
183 int rc;
184 int send_fd = -1, recv_fd = -1;
185 char *away_domname;
186 char rc_buf;
187 uint8_t *config_data;
188 int config_len, flags = LIBXL_SUSPEND_LIVE;
189
190 save_domain_core_begin(domid, override_config_file,
191 &config_data, &config_len);
192
193 if (!config_len) {
194 fprintf(stderr, "No config file stored for running domain and "
195 "none supplied - cannot migrate.\n");
196 exit(EXIT_FAILURE);
197 }
198
199 child = create_migration_child(rune, &send_fd, &recv_fd);
200
201 migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len,
202 rune);
203
204 xtl_stdiostream_adjust_flags(logger, XTL_STDIOSTREAM_HIDE_PROGRESS, 0);
205
206 if (debug)
207 flags |= LIBXL_SUSPEND_DEBUG;
208 rc = libxl_domain_suspend(ctx, domid, send_fd, flags, NULL);
209 if (rc) {
210 fprintf(stderr, "migration sender: libxl_domain_suspend failed"
211 " (rc=%d)\n", rc);
212 if (rc == ERROR_GUEST_TIMEDOUT)
213 goto failed_suspend;
214 else
215 goto failed_resume;
216 }
217
218 //fprintf(stderr, "migration sender: Transfer complete.\n");
219 // Should only be printed when debugging as it's a bit messy with
220 // progress indication.
221
222 rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_ready,
223 sizeof(migrate_receiver_ready),
224 "ready message", rune);
225 if (rc) goto failed_resume;
226
227 xtl_stdiostream_adjust_flags(logger, 0, XTL_STDIOSTREAM_HIDE_PROGRESS);
228
229 /* right, at this point we are about give the destination
230 * permission to rename and resume, so we must first rename the
231 * domain away ourselves */
232
233 fprintf(stderr, "migration sender: Target has acknowledged transfer.\n");
234
235 if (common_domname) {
236 xasprintf(&away_domname, "%s--migratedaway", common_domname);
237 rc = libxl_domain_rename(ctx, domid, common_domname, away_domname);
238 if (rc) goto failed_resume;
239 }
240
241 /* point of no return - as soon as we have tried to say
242 * "go" to the receiver, it's not safe to carry on. We leave
243 * the domain renamed to %s--migratedaway in case that's helpful.
244 */
245
246 fprintf(stderr, "migration sender: Giving target permission to start.\n");
247
248 rc = libxl_write_exactly(ctx, send_fd,
249 migrate_permission_to_go,
250 sizeof(migrate_permission_to_go),
251 "migration stream", "GO message");
252 if (rc) goto failed_badly;
253
254 rc = migrate_read_fixedmessage(recv_fd, migrate_report,
255 sizeof(migrate_report),
256 "success/failure report message", rune);
257 if (rc) goto failed_badly;
258
259 rc = libxl_read_exactly(ctx, recv_fd,
260 &rc_buf, 1,
261 "migration ack stream", "success/failure status");
262 if (rc) goto failed_badly;
263
264 if (rc_buf) {
265 fprintf(stderr, "migration sender: Target reports startup failure"
266 " (status code %d).\n", rc_buf);
267
268 rc = migrate_read_fixedmessage(recv_fd, migrate_permission_to_go,
269 sizeof(migrate_permission_to_go),
270 "permission for sender to resume",
271 rune);
272 if (rc) goto failed_badly;
273
274 fprintf(stderr, "migration sender: Trying to resume at our end.\n");
275
276 if (common_domname) {
277 libxl_domain_rename(ctx, domid, away_domname, common_domname);
278 }
279 rc = libxl_domain_resume(ctx, domid, 1, 0);
280 if (!rc) fprintf(stderr, "migration sender: Resumed OK.\n");
281
282 fprintf(stderr, "Migration failed due to problems at target.\n");
283 exit(EXIT_FAILURE);
284 }
285
286 fprintf(stderr, "migration sender: Target reports successful startup.\n");
287 libxl_domain_destroy(ctx, domid, 0); /* bang! */
288 fprintf(stderr, "Migration successful.\n");
289 exit(EXIT_SUCCESS);
290
291 failed_suspend:
292 close(send_fd);
293 migration_child_report(recv_fd);
294 fprintf(stderr, "Migration failed, failed to suspend at sender.\n");
295 exit(EXIT_FAILURE);
296
297 failed_resume:
298 close(send_fd);
299 migration_child_report(recv_fd);
300 fprintf(stderr, "Migration failed, resuming at sender.\n");
301 libxl_domain_resume(ctx, domid, 1, 0);
302 exit(EXIT_FAILURE);
303
304 failed_badly:
305 fprintf(stderr,
306 "** Migration failed during final handshake **\n"
307 "Domain state is now undefined !\n"
308 "Please CHECK AT BOTH ENDS for running instances, before renaming and\n"
309 " resuming at most one instance. Two simultaneous instances of the domain\n"
310 " would probably result in SEVERE DATA LOSS and it is now your\n"
311 " responsibility to avoid that. Sorry.\n");
312
313 close(send_fd);
314 migration_child_report(recv_fd);
315 exit(EXIT_FAILURE);
316 }
317
migrate_receive(int debug,int daemonize,int monitor,int pause_after_migration,int send_fd,int recv_fd,libxl_checkpointed_stream checkpointed,char * colo_proxy_script,bool userspace_colo_proxy)318 static void migrate_receive(int debug, int daemonize, int monitor,
319 int pause_after_migration,
320 int send_fd, int recv_fd,
321 libxl_checkpointed_stream checkpointed,
322 char *colo_proxy_script,
323 bool userspace_colo_proxy)
324 {
325 uint32_t domid;
326 int rc, rc2;
327 char rc_buf;
328 char *migration_domname;
329 struct domain_create dom_info;
330
331 signal(SIGPIPE, SIG_IGN);
332 /* if we get SIGPIPE we'd rather just have it as an error */
333
334 fprintf(stderr, "migration target: Ready to receive domain.\n");
335
336 CHK_ERRNOVAL(libxl_write_exactly(
337 ctx, send_fd, migrate_receiver_banner,
338 sizeof(migrate_receiver_banner)-1,
339 "migration ack stream", "banner") );
340
341 memset(&dom_info, 0, sizeof(dom_info));
342 dom_info.debug = debug;
343 dom_info.daemonize = daemonize;
344 dom_info.monitor = monitor;
345 dom_info.paused = 1;
346 dom_info.migrate_fd = recv_fd;
347 dom_info.send_back_fd = send_fd;
348 dom_info.migration_domname_r = &migration_domname;
349 dom_info.checkpointed_stream = checkpointed;
350 dom_info.colo_proxy_script = colo_proxy_script;
351 dom_info.userspace_colo_proxy = userspace_colo_proxy;
352
353 rc = create_domain(&dom_info);
354 if (rc < 0) {
355 fprintf(stderr, "migration target: Domain creation failed"
356 " (code %d).\n", rc);
357 exit(EXIT_FAILURE);
358 }
359
360 domid = rc;
361
362 switch (checkpointed) {
363 case LIBXL_CHECKPOINTED_STREAM_REMUS:
364 case LIBXL_CHECKPOINTED_STREAM_COLO:
365 {
366 const char *ha = checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO ?
367 "COLO" : "Remus";
368 /* If we are here, it means that the sender (primary) has crashed.
369 * TODO: Split-Brain Check.
370 */
371 fprintf(stderr, "migration target: %s Failover for domain %u\n",
372 ha, domid);
373
374 /*
375 * If domain renaming fails, lets just continue (as we need the domain
376 * to be up & dom names may not matter much, as long as its reachable
377 * over network).
378 *
379 * If domain unpausing fails, destroy domain ? Or is it better to have
380 * a consistent copy of the domain (memory, cpu state, disk)
381 * on atleast one physical host ? Right now, lets just leave the domain
382 * as is and let the Administrator decide (or troubleshoot).
383 */
384 if (migration_domname) {
385 rc = libxl_domain_rename(ctx, domid, migration_domname,
386 common_domname);
387 if (rc)
388 fprintf(stderr, "migration target (%s): "
389 "Failed to rename domain from %s to %s:%d\n",
390 ha, migration_domname, common_domname, rc);
391 }
392
393 if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO)
394 /* The guest is running after failover in COLO mode */
395 exit(rc ? -ERROR_FAIL: 0);
396
397 rc = libxl_domain_unpause(ctx, domid);
398 if (rc)
399 fprintf(stderr, "migration target (%s): "
400 "Failed to unpause domain %s (id: %u):%d\n",
401 ha, common_domname, domid, rc);
402
403 exit(rc ? EXIT_FAILURE : EXIT_SUCCESS);
404 }
405 default:
406 /* do nothing */
407 break;
408 }
409
410 fprintf(stderr, "migration target: Transfer complete,"
411 " requesting permission to start domain.\n");
412
413 rc = libxl_write_exactly(ctx, send_fd,
414 migrate_receiver_ready,
415 sizeof(migrate_receiver_ready),
416 "migration ack stream", "ready message");
417 if (rc) exit(EXIT_FAILURE);
418
419 rc = migrate_read_fixedmessage(recv_fd, migrate_permission_to_go,
420 sizeof(migrate_permission_to_go),
421 "GO message", 0);
422 if (rc) goto perhaps_destroy_notify_rc;
423
424 fprintf(stderr, "migration target: Got permission, starting domain.\n");
425
426 if (migration_domname) {
427 rc = libxl_domain_rename(ctx, domid, migration_domname, common_domname);
428 if (rc) goto perhaps_destroy_notify_rc;
429 }
430
431 if (!pause_after_migration) {
432 rc = libxl_domain_unpause(ctx, domid);
433 if (rc) goto perhaps_destroy_notify_rc;
434 }
435
436 fprintf(stderr, "migration target: Domain started successsfully.\n");
437 rc = 0;
438
439 perhaps_destroy_notify_rc:
440 rc2 = libxl_write_exactly(ctx, send_fd,
441 migrate_report, sizeof(migrate_report),
442 "migration ack stream",
443 "success/failure report");
444 if (rc2) exit(EXIT_FAILURE);
445
446 rc_buf = -rc;
447 assert(!!rc_buf == !!rc);
448 rc2 = libxl_write_exactly(ctx, send_fd, &rc_buf, 1,
449 "migration ack stream",
450 "success/failure code");
451 if (rc2) exit(EXIT_FAILURE);
452
453 if (rc) {
454 fprintf(stderr, "migration target: Failure, destroying our copy.\n");
455
456 rc2 = libxl_domain_destroy(ctx, domid, 0);
457 if (rc2) {
458 fprintf(stderr, "migration target: Failed to destroy our copy"
459 " (code %d).\n", rc2);
460 exit(EXIT_FAILURE);
461 }
462
463 fprintf(stderr, "migration target: Cleanup OK, granting sender"
464 " permission to resume.\n");
465
466 rc2 = libxl_write_exactly(ctx, send_fd,
467 migrate_permission_to_go,
468 sizeof(migrate_permission_to_go),
469 "migration ack stream",
470 "permission to sender to have domain back");
471 if (rc2) exit(EXIT_FAILURE);
472 }
473
474 exit(EXIT_SUCCESS);
475 }
476
477
main_migrate_receive(int argc,char ** argv)478 int main_migrate_receive(int argc, char **argv)
479 {
480 int debug = 0, daemonize = 1, monitor = 1, pause_after_migration = 0;
481 libxl_checkpointed_stream checkpointed = LIBXL_CHECKPOINTED_STREAM_NONE;
482 int opt;
483 bool userspace_colo_proxy = false;
484 char *script = NULL;
485 static struct option opts[] = {
486 {"colo", 0, 0, 0x100},
487 /* It is a shame that the management code for disk is not here. */
488 {"coloft-script", 1, 0, 0x200},
489 {"userspace-colo-proxy", 0, 0, 0x300},
490 COMMON_LONG_OPTS
491 };
492
493 SWITCH_FOREACH_OPT(opt, "Fedrp", opts, "migrate-receive", 0) {
494 case 'F':
495 daemonize = 0;
496 break;
497 case 'e':
498 daemonize = 0;
499 monitor = 0;
500 break;
501 case 'd':
502 debug = 1;
503 break;
504 case 'r':
505 checkpointed = LIBXL_CHECKPOINTED_STREAM_REMUS;
506 break;
507 case 0x100:
508 checkpointed = LIBXL_CHECKPOINTED_STREAM_COLO;
509 break;
510 case 0x200:
511 script = optarg;
512 break;
513 case 0x300:
514 userspace_colo_proxy = true;
515 break;
516 case 'p':
517 pause_after_migration = 1;
518 break;
519 }
520
521 if (argc-optind != 0) {
522 help("migrate-receive");
523 return EXIT_FAILURE;
524 }
525 migrate_receive(debug, daemonize, monitor, pause_after_migration,
526 STDOUT_FILENO, STDIN_FILENO,
527 checkpointed, script, userspace_colo_proxy);
528
529 return EXIT_SUCCESS;
530 }
531
main_migrate(int argc,char ** argv)532 int main_migrate(int argc, char **argv)
533 {
534 uint32_t domid;
535 const char *config_filename = NULL;
536 const char *ssh_command = "ssh";
537 char *rune = NULL;
538 char *host;
539 int opt, daemonize = 1, monitor = 1, debug = 0, pause_after_migration = 0;
540 static struct option opts[] = {
541 {"debug", 0, 0, 0x100},
542 {"live", 0, 0, 0x200},
543 COMMON_LONG_OPTS
544 };
545
546 SWITCH_FOREACH_OPT(opt, "FC:s:ep", opts, "migrate", 2) {
547 case 'C':
548 config_filename = optarg;
549 break;
550 case 's':
551 ssh_command = optarg;
552 break;
553 case 'F':
554 daemonize = 0;
555 break;
556 case 'e':
557 daemonize = 0;
558 monitor = 0;
559 break;
560 case 'p':
561 pause_after_migration = 1;
562 break;
563 case 0x100: /* --debug */
564 debug = 1;
565 break;
566 case 0x200: /* --live */
567 /* ignored for compatibility with xm */
568 break;
569 }
570
571 domid = find_domain(argv[optind]);
572 host = argv[optind + 1];
573
574 bool pass_tty_arg = progress_use_cr || (isatty(2) > 0);
575
576 if (!ssh_command[0]) {
577 rune= host;
578 } else {
579 char verbose_buf[minmsglevel_default+3];
580 int verbose_len;
581 verbose_buf[0] = ' ';
582 verbose_buf[1] = '-';
583 memset(verbose_buf+2, 'v', minmsglevel_default);
584 verbose_buf[sizeof(verbose_buf)-1] = 0;
585 if (minmsglevel == minmsglevel_default) {
586 verbose_len = 0;
587 } else {
588 verbose_len = (minmsglevel_default - minmsglevel) + 2;
589 }
590 xasprintf(&rune, "exec %s %s xl%s%.*s migrate-receive%s%s%s",
591 ssh_command, host,
592 pass_tty_arg ? " -t" : "",
593 verbose_len, verbose_buf,
594 daemonize ? "" : " -e",
595 debug ? " -d" : "",
596 pause_after_migration ? " -p" : "");
597 }
598
599 migrate_domain(domid, rune, debug, config_filename);
600 return EXIT_SUCCESS;
601 }
602
main_remus(int argc,char ** argv)603 int main_remus(int argc, char **argv)
604 {
605 uint32_t domid;
606 int opt, rc, daemonize = 1;
607 const char *ssh_command = "ssh";
608 char *host = NULL, *rune = NULL;
609 libxl_domain_remus_info r_info;
610 int send_fd = -1, recv_fd = -1;
611 pid_t child = -1;
612 uint8_t *config_data;
613 int config_len;
614
615 memset(&r_info, 0, sizeof(libxl_domain_remus_info));
616
617 SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ecp", NULL, "remus", 2) {
618 case 'i':
619 r_info.interval = atoi(optarg);
620 break;
621 case 'F':
622 libxl_defbool_set(&r_info.allow_unsafe, true);
623 break;
624 case 'b':
625 libxl_defbool_set(&r_info.blackhole, true);
626 break;
627 case 'u':
628 libxl_defbool_set(&r_info.compression, false);
629 break;
630 case 'n':
631 libxl_defbool_set(&r_info.netbuf, false);
632 break;
633 case 'N':
634 r_info.netbufscript = optarg;
635 break;
636 case 'd':
637 libxl_defbool_set(&r_info.diskbuf, false);
638 break;
639 case 's':
640 ssh_command = optarg;
641 break;
642 case 'e':
643 daemonize = 0;
644 break;
645 case 'c':
646 libxl_defbool_set(&r_info.colo, true);
647 break;
648 case 'p':
649 libxl_defbool_set(&r_info.userspace_colo_proxy, true);
650 }
651
652 domid = find_domain(argv[optind]);
653 host = argv[optind + 1];
654
655 /* Defaults */
656 libxl_defbool_setdefault(&r_info.blackhole, false);
657 libxl_defbool_setdefault(&r_info.colo, false);
658 libxl_defbool_setdefault(&r_info.userspace_colo_proxy, false);
659
660 if (!libxl_defbool_val(r_info.colo) && !r_info.interval)
661 r_info.interval = 200;
662
663 if (libxl_defbool_val(r_info.userspace_colo_proxy) &&
664 !libxl_defbool_val(r_info.colo)) {
665 fprintf(stderr, "Option -p must be used in conjunction with -c");
666 exit(-1);
667 }
668
669 if (libxl_defbool_val(r_info.colo)) {
670 if (r_info.interval || libxl_defbool_val(r_info.blackhole) ||
671 !libxl_defbool_is_default(r_info.netbuf) ||
672 !libxl_defbool_is_default(r_info.diskbuf)) {
673 perror("option -c is conflict with -i, -d, -n or -b");
674 exit(-1);
675 }
676
677 if (libxl_defbool_is_default(r_info.compression)) {
678 perror("COLO can't be used with memory compression. "
679 "Disable memory checkpoint compression now...");
680 libxl_defbool_set(&r_info.compression, false);
681 }
682 }
683
684 if (!r_info.netbufscript) {
685 if (libxl_defbool_val(r_info.colo))
686 r_info.netbufscript = default_colo_proxy_script;
687 else
688 r_info.netbufscript = default_remus_netbufscript;
689 }
690
691 if (libxl_defbool_val(r_info.blackhole)) {
692 send_fd = open("/dev/null", O_RDWR, 0644);
693 if (send_fd < 0) {
694 perror("failed to open /dev/null");
695 exit(EXIT_FAILURE);
696 }
697 } else {
698
699 if (!ssh_command[0]) {
700 rune = host;
701 } else {
702 if (!libxl_defbool_val(r_info.colo)) {
703 xasprintf(&rune, "exec %s %s xl migrate-receive %s %s",
704 ssh_command, host,
705 "-r",
706 daemonize ? "" : " -e");
707 } else {
708 xasprintf(&rune, "exec %s %s xl migrate-receive %s %s %s %s %s",
709 ssh_command, host,
710 "--colo",
711 r_info.netbufscript ? "--coloft-script" : "",
712 r_info.netbufscript ? r_info.netbufscript : "",
713 libxl_defbool_val(r_info.userspace_colo_proxy) ?
714 "--userspace-colo-proxy" : "",
715 daemonize ? "" : " -e");
716 }
717 }
718
719 save_domain_core_begin(domid, NULL, &config_data, &config_len);
720
721 if (!config_len) {
722 fprintf(stderr, "No config file stored for running domain and "
723 "none supplied - cannot start remus.\n");
724 exit(EXIT_FAILURE);
725 }
726
727 child = create_migration_child(rune, &send_fd, &recv_fd);
728
729 migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len,
730 rune);
731
732 if (ssh_command[0])
733 free(rune);
734 }
735
736 /* Point of no return */
737 rc = libxl_domain_remus_start(ctx, &r_info, domid, send_fd, recv_fd, 0);
738
739 /* check if the domain exists. User may have xl destroyed the
740 * domain to force failover
741 */
742 if (libxl_domain_info(ctx, 0, domid)) {
743 fprintf(stderr, "%s: Primary domain has been destroyed.\n",
744 libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
745 close(send_fd);
746 return EXIT_SUCCESS;
747 }
748
749 /* If we are here, it means remus setup/domain suspend/backup has
750 * failed. Try to resume the domain and exit gracefully.
751 * TODO: Split-Brain check.
752 */
753 if (rc == ERROR_GUEST_TIMEDOUT)
754 fprintf(stderr, "Failed to suspend domain at primary.\n");
755 else {
756 fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n",
757 libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
758 libxl_domain_resume(ctx, domid, 1, 0);
759 }
760
761 close(send_fd);
762 return EXIT_FAILURE;
763 }
764 #endif
765
766
767 /*
768 * Local variables:
769 * mode: C
770 * c-basic-offset: 4
771 * indent-tabs-mode: nil
772 * End:
773 */
774