1 /*
2  * Copyright 2009-2017 Citrix Ltd and other contributors
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU Lesser General Public License as published
6  * by the Free Software Foundation; version 2.1 only. with the special
7  * exception on linking described in file LICENSE.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU Lesser General Public License for more details.
13  */
14 
15 #include <fcntl.h>
16 #include <inttypes.h>
17 #include <signal.h>
18 #include <stdlib.h>
19 #include <sys/stat.h>
20 #include <sys/types.h>
21 #include <sys/utsname.h>
22 #include <time.h>
23 #include <unistd.h>
24 
25 #include <libxl.h>
26 #include <libxl_utils.h>
27 #include <libxlutil.h>
28 
29 #include "xl.h"
30 #include "xl_utils.h"
31 #include "xl_parse.h"
32 
33 #ifndef LIBXL_HAVE_NO_SUSPEND_RESUME
34 
create_migration_child(const char * rune,int * send_fd,int * recv_fd)35 static pid_t create_migration_child(const char *rune, int *send_fd,
36                                         int *recv_fd)
37 {
38     int sendpipe[2], recvpipe[2];
39     pid_t child;
40 
41     if (!rune || !send_fd || !recv_fd)
42         return -1;
43 
44     MUST( libxl_pipe(ctx, sendpipe) );
45     MUST( libxl_pipe(ctx, recvpipe) );
46 
47     child = xl_fork(child_migration, "migration transport process");
48 
49     if (!child) {
50         dup2(sendpipe[0], 0);
51         dup2(recvpipe[1], 1);
52         close(sendpipe[0]); close(sendpipe[1]);
53         close(recvpipe[0]); close(recvpipe[1]);
54         execlp("sh","sh","-c",rune,(char*)0);
55         perror("failed to exec sh");
56         exit(EXIT_FAILURE);
57     }
58 
59     close(sendpipe[0]);
60     close(recvpipe[1]);
61     *send_fd = sendpipe[1];
62     *recv_fd = recvpipe[0];
63 
64     /* if receiver dies, we get an error and can clean up
65        rather than just dying */
66     signal(SIGPIPE, SIG_IGN);
67 
68     return child;
69 }
70 
migrate_read_fixedmessage(int fd,const void * msg,int msgsz,const char * what,const char * rune)71 static int migrate_read_fixedmessage(int fd, const void *msg, int msgsz,
72                                      const char *what, const char *rune) {
73     char buf[msgsz];
74     const char *stream;
75     int rc;
76 
77     stream = rune ? "migration receiver stream" : "migration stream";
78     rc = libxl_read_exactly(ctx, fd, buf, msgsz, stream, what);
79     if (rc) return 1;
80 
81     if (memcmp(buf, msg, msgsz)) {
82         fprintf(stderr, "%s contained unexpected data instead of %s\n",
83                 stream, what);
84         if (rune)
85             fprintf(stderr, "(command run was: %s )\n", rune);
86         return 1;
87     }
88     return 0;
89 }
90 
migration_child_report(int recv_fd)91 static void migration_child_report(int recv_fd) {
92     pid_t child;
93     int status, sr;
94     struct timeval now, waituntil, timeout;
95     static const struct timeval pollinterval = { 0, 1000 }; /* 1ms */
96 
97     if (!xl_child_pid(child_migration)) return;
98 
99     CHK_SYSCALL(gettimeofday(&waituntil, 0));
100     waituntil.tv_sec += 2;
101 
102     for (;;) {
103         pid_t migration_child = xl_child_pid(child_migration);
104         child = xl_waitpid(child_migration, &status, WNOHANG);
105 
106         if (child == migration_child) {
107             if (status)
108                 xl_report_child_exitstatus(XTL_INFO, child_migration,
109                                            migration_child, status);
110             break;
111         }
112         if (child == -1) {
113             fprintf(stderr, "wait for migration child [%ld] failed: %s\n",
114                     (long)migration_child, strerror(errno));
115             break;
116         }
117         assert(child == 0);
118 
119         CHK_SYSCALL(gettimeofday(&now, 0));
120         if (timercmp(&now, &waituntil, >)) {
121             fprintf(stderr, "migration child [%ld] not exiting, no longer"
122                     " waiting (exit status will be unreported)\n",
123                     (long)migration_child);
124             break;
125         }
126         timersub(&waituntil, &now, &timeout);
127 
128         if (recv_fd >= 0) {
129             fd_set readfds, exceptfds;
130             FD_ZERO(&readfds);
131             FD_ZERO(&exceptfds);
132             FD_SET(recv_fd, &readfds);
133             FD_SET(recv_fd, &exceptfds);
134             sr = select(recv_fd+1, &readfds,0,&exceptfds, &timeout);
135         } else {
136             if (timercmp(&timeout, &pollinterval, >))
137                 timeout = pollinterval;
138             sr = select(0,0,0,0, &timeout);
139         }
140         if (sr > 0) {
141             recv_fd = -1;
142         } else if (sr == 0) {
143         } else if (sr == -1) {
144             if (errno != EINTR) {
145                 fprintf(stderr, "migration child [%ld] exit wait select"
146                         " failed unexpectedly: %s\n",
147                         (long)migration_child, strerror(errno));
148                 break;
149             }
150         }
151     }
152 }
153 
migrate_do_preamble(int send_fd,int recv_fd,pid_t child,uint8_t * config_data,int config_len,const char * rune)154 static void migrate_do_preamble(int send_fd, int recv_fd, pid_t child,
155                                 uint8_t *config_data, int config_len,
156                                 const char *rune)
157 {
158     int rc = 0;
159 
160     if (send_fd < 0 || recv_fd < 0) {
161         fprintf(stderr, "migrate_do_preamble: invalid file descriptors\n");
162         exit(EXIT_FAILURE);
163     }
164 
165     rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_banner,
166                                    sizeof(migrate_receiver_banner)-1,
167                                    "banner", rune);
168     if (rc) {
169         close(send_fd);
170         migration_child_report(recv_fd);
171         exit(EXIT_FAILURE);
172     }
173 
174     save_domain_core_writeconfig(send_fd, "migration stream",
175                                  config_data, config_len);
176 
177 }
178 
migrate_domain(uint32_t domid,const char * rune,int debug,const char * override_config_file)179 static void migrate_domain(uint32_t domid, const char *rune, int debug,
180                            const char *override_config_file)
181 {
182     pid_t child = -1;
183     int rc;
184     int send_fd = -1, recv_fd = -1;
185     char *away_domname;
186     char rc_buf;
187     uint8_t *config_data;
188     int config_len, flags = LIBXL_SUSPEND_LIVE;
189 
190     save_domain_core_begin(domid, override_config_file,
191                            &config_data, &config_len);
192 
193     if (!config_len) {
194         fprintf(stderr, "No config file stored for running domain and "
195                 "none supplied - cannot migrate.\n");
196         exit(EXIT_FAILURE);
197     }
198 
199     child = create_migration_child(rune, &send_fd, &recv_fd);
200 
201     migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len,
202                         rune);
203 
204     xtl_stdiostream_adjust_flags(logger, XTL_STDIOSTREAM_HIDE_PROGRESS, 0);
205 
206     if (debug)
207         flags |= LIBXL_SUSPEND_DEBUG;
208     rc = libxl_domain_suspend(ctx, domid, send_fd, flags, NULL);
209     if (rc) {
210         fprintf(stderr, "migration sender: libxl_domain_suspend failed"
211                 " (rc=%d)\n", rc);
212         if (rc == ERROR_GUEST_TIMEDOUT)
213             goto failed_suspend;
214         else
215             goto failed_resume;
216     }
217 
218     //fprintf(stderr, "migration sender: Transfer complete.\n");
219     // Should only be printed when debugging as it's a bit messy with
220     // progress indication.
221 
222     rc = migrate_read_fixedmessage(recv_fd, migrate_receiver_ready,
223                                    sizeof(migrate_receiver_ready),
224                                    "ready message", rune);
225     if (rc) goto failed_resume;
226 
227     xtl_stdiostream_adjust_flags(logger, 0, XTL_STDIOSTREAM_HIDE_PROGRESS);
228 
229     /* right, at this point we are about give the destination
230      * permission to rename and resume, so we must first rename the
231      * domain away ourselves */
232 
233     fprintf(stderr, "migration sender: Target has acknowledged transfer.\n");
234 
235     if (common_domname) {
236         xasprintf(&away_domname, "%s--migratedaway", common_domname);
237         rc = libxl_domain_rename(ctx, domid, common_domname, away_domname);
238         if (rc) goto failed_resume;
239     }
240 
241     /* point of no return - as soon as we have tried to say
242      * "go" to the receiver, it's not safe to carry on.  We leave
243      * the domain renamed to %s--migratedaway in case that's helpful.
244      */
245 
246     fprintf(stderr, "migration sender: Giving target permission to start.\n");
247 
248     rc = libxl_write_exactly(ctx, send_fd,
249                              migrate_permission_to_go,
250                              sizeof(migrate_permission_to_go),
251                              "migration stream", "GO message");
252     if (rc) goto failed_badly;
253 
254     rc = migrate_read_fixedmessage(recv_fd, migrate_report,
255                                    sizeof(migrate_report),
256                                    "success/failure report message", rune);
257     if (rc) goto failed_badly;
258 
259     rc = libxl_read_exactly(ctx, recv_fd,
260                             &rc_buf, 1,
261                             "migration ack stream", "success/failure status");
262     if (rc) goto failed_badly;
263 
264     if (rc_buf) {
265         fprintf(stderr, "migration sender: Target reports startup failure"
266                 " (status code %d).\n", rc_buf);
267 
268         rc = migrate_read_fixedmessage(recv_fd, migrate_permission_to_go,
269                                        sizeof(migrate_permission_to_go),
270                                        "permission for sender to resume",
271                                        rune);
272         if (rc) goto failed_badly;
273 
274         fprintf(stderr, "migration sender: Trying to resume at our end.\n");
275 
276         if (common_domname) {
277             libxl_domain_rename(ctx, domid, away_domname, common_domname);
278         }
279         rc = libxl_domain_resume(ctx, domid, 1, 0);
280         if (!rc) fprintf(stderr, "migration sender: Resumed OK.\n");
281 
282         fprintf(stderr, "Migration failed due to problems at target.\n");
283         exit(EXIT_FAILURE);
284     }
285 
286     fprintf(stderr, "migration sender: Target reports successful startup.\n");
287     libxl_domain_destroy(ctx, domid, 0); /* bang! */
288     fprintf(stderr, "Migration successful.\n");
289     exit(EXIT_SUCCESS);
290 
291  failed_suspend:
292     close(send_fd);
293     migration_child_report(recv_fd);
294     fprintf(stderr, "Migration failed, failed to suspend at sender.\n");
295     exit(EXIT_FAILURE);
296 
297  failed_resume:
298     close(send_fd);
299     migration_child_report(recv_fd);
300     fprintf(stderr, "Migration failed, resuming at sender.\n");
301     libxl_domain_resume(ctx, domid, 1, 0);
302     exit(EXIT_FAILURE);
303 
304  failed_badly:
305     fprintf(stderr,
306  "** Migration failed during final handshake **\n"
307  "Domain state is now undefined !\n"
308  "Please CHECK AT BOTH ENDS for running instances, before renaming and\n"
309  " resuming at most one instance.  Two simultaneous instances of the domain\n"
310  " would probably result in SEVERE DATA LOSS and it is now your\n"
311  " responsibility to avoid that.  Sorry.\n");
312 
313     close(send_fd);
314     migration_child_report(recv_fd);
315     exit(EXIT_FAILURE);
316 }
317 
migrate_receive(int debug,int daemonize,int monitor,int pause_after_migration,int send_fd,int recv_fd,libxl_checkpointed_stream checkpointed,char * colo_proxy_script,bool userspace_colo_proxy)318 static void migrate_receive(int debug, int daemonize, int monitor,
319                             int pause_after_migration,
320                             int send_fd, int recv_fd,
321                             libxl_checkpointed_stream checkpointed,
322                             char *colo_proxy_script,
323                             bool userspace_colo_proxy)
324 {
325     uint32_t domid;
326     int rc, rc2;
327     char rc_buf;
328     char *migration_domname;
329     struct domain_create dom_info;
330 
331     signal(SIGPIPE, SIG_IGN);
332     /* if we get SIGPIPE we'd rather just have it as an error */
333 
334     fprintf(stderr, "migration target: Ready to receive domain.\n");
335 
336     CHK_ERRNOVAL(libxl_write_exactly(
337                      ctx, send_fd, migrate_receiver_banner,
338                      sizeof(migrate_receiver_banner)-1,
339                      "migration ack stream", "banner") );
340 
341     memset(&dom_info, 0, sizeof(dom_info));
342     dom_info.debug = debug;
343     dom_info.daemonize = daemonize;
344     dom_info.monitor = monitor;
345     dom_info.paused = 1;
346     dom_info.migrate_fd = recv_fd;
347     dom_info.send_back_fd = send_fd;
348     dom_info.migration_domname_r = &migration_domname;
349     dom_info.checkpointed_stream = checkpointed;
350     dom_info.colo_proxy_script = colo_proxy_script;
351     dom_info.userspace_colo_proxy = userspace_colo_proxy;
352 
353     rc = create_domain(&dom_info);
354     if (rc < 0) {
355         fprintf(stderr, "migration target: Domain creation failed"
356                 " (code %d).\n", rc);
357         exit(EXIT_FAILURE);
358     }
359 
360     domid = rc;
361 
362     switch (checkpointed) {
363     case LIBXL_CHECKPOINTED_STREAM_REMUS:
364     case LIBXL_CHECKPOINTED_STREAM_COLO:
365     {
366         const char *ha = checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO ?
367                          "COLO" : "Remus";
368         /* If we are here, it means that the sender (primary) has crashed.
369          * TODO: Split-Brain Check.
370          */
371         fprintf(stderr, "migration target: %s Failover for domain %u\n",
372                 ha, domid);
373 
374         /*
375          * If domain renaming fails, lets just continue (as we need the domain
376          * to be up & dom names may not matter much, as long as its reachable
377          * over network).
378          *
379          * If domain unpausing fails, destroy domain ? Or is it better to have
380          * a consistent copy of the domain (memory, cpu state, disk)
381          * on atleast one physical host ? Right now, lets just leave the domain
382          * as is and let the Administrator decide (or troubleshoot).
383          */
384         if (migration_domname) {
385             rc = libxl_domain_rename(ctx, domid, migration_domname,
386                                      common_domname);
387             if (rc)
388                 fprintf(stderr, "migration target (%s): "
389                         "Failed to rename domain from %s to %s:%d\n",
390                         ha, migration_domname, common_domname, rc);
391         }
392 
393         if (checkpointed == LIBXL_CHECKPOINTED_STREAM_COLO)
394             /* The guest is running after failover in COLO mode */
395             exit(rc ? -ERROR_FAIL: 0);
396 
397         rc = libxl_domain_unpause(ctx, domid);
398         if (rc)
399             fprintf(stderr, "migration target (%s): "
400                     "Failed to unpause domain %s (id: %u):%d\n",
401                     ha, common_domname, domid, rc);
402 
403         exit(rc ? EXIT_FAILURE : EXIT_SUCCESS);
404     }
405     default:
406         /* do nothing */
407         break;
408     }
409 
410     fprintf(stderr, "migration target: Transfer complete,"
411             " requesting permission to start domain.\n");
412 
413     rc = libxl_write_exactly(ctx, send_fd,
414                              migrate_receiver_ready,
415                              sizeof(migrate_receiver_ready),
416                              "migration ack stream", "ready message");
417     if (rc) exit(EXIT_FAILURE);
418 
419     rc = migrate_read_fixedmessage(recv_fd, migrate_permission_to_go,
420                                    sizeof(migrate_permission_to_go),
421                                    "GO message", 0);
422     if (rc) goto perhaps_destroy_notify_rc;
423 
424     fprintf(stderr, "migration target: Got permission, starting domain.\n");
425 
426     if (migration_domname) {
427         rc = libxl_domain_rename(ctx, domid, migration_domname, common_domname);
428         if (rc) goto perhaps_destroy_notify_rc;
429     }
430 
431     if (!pause_after_migration) {
432         rc = libxl_domain_unpause(ctx, domid);
433         if (rc) goto perhaps_destroy_notify_rc;
434     }
435 
436     fprintf(stderr, "migration target: Domain started successsfully.\n");
437     rc = 0;
438 
439  perhaps_destroy_notify_rc:
440     rc2 = libxl_write_exactly(ctx, send_fd,
441                               migrate_report, sizeof(migrate_report),
442                               "migration ack stream",
443                               "success/failure report");
444     if (rc2) exit(EXIT_FAILURE);
445 
446     rc_buf = -rc;
447     assert(!!rc_buf == !!rc);
448     rc2 = libxl_write_exactly(ctx, send_fd, &rc_buf, 1,
449                               "migration ack stream",
450                               "success/failure code");
451     if (rc2) exit(EXIT_FAILURE);
452 
453     if (rc) {
454         fprintf(stderr, "migration target: Failure, destroying our copy.\n");
455 
456         rc2 = libxl_domain_destroy(ctx, domid, 0);
457         if (rc2) {
458             fprintf(stderr, "migration target: Failed to destroy our copy"
459                     " (code %d).\n", rc2);
460             exit(EXIT_FAILURE);
461         }
462 
463         fprintf(stderr, "migration target: Cleanup OK, granting sender"
464                 " permission to resume.\n");
465 
466         rc2 = libxl_write_exactly(ctx, send_fd,
467                                   migrate_permission_to_go,
468                                   sizeof(migrate_permission_to_go),
469                                   "migration ack stream",
470                                   "permission to sender to have domain back");
471         if (rc2) exit(EXIT_FAILURE);
472     }
473 
474     exit(EXIT_SUCCESS);
475 }
476 
477 
main_migrate_receive(int argc,char ** argv)478 int main_migrate_receive(int argc, char **argv)
479 {
480     int debug = 0, daemonize = 1, monitor = 1, pause_after_migration = 0;
481     libxl_checkpointed_stream checkpointed = LIBXL_CHECKPOINTED_STREAM_NONE;
482     int opt;
483     bool userspace_colo_proxy = false;
484     char *script = NULL;
485     static struct option opts[] = {
486         {"colo", 0, 0, 0x100},
487         /* It is a shame that the management code for disk is not here. */
488         {"coloft-script", 1, 0, 0x200},
489         {"userspace-colo-proxy", 0, 0, 0x300},
490         COMMON_LONG_OPTS
491     };
492 
493     SWITCH_FOREACH_OPT(opt, "Fedrp", opts, "migrate-receive", 0) {
494     case 'F':
495         daemonize = 0;
496         break;
497     case 'e':
498         daemonize = 0;
499         monitor = 0;
500         break;
501     case 'd':
502         debug = 1;
503         break;
504     case 'r':
505         checkpointed = LIBXL_CHECKPOINTED_STREAM_REMUS;
506         break;
507     case 0x100:
508         checkpointed = LIBXL_CHECKPOINTED_STREAM_COLO;
509         break;
510     case 0x200:
511         script = optarg;
512         break;
513     case 0x300:
514         userspace_colo_proxy = true;
515         break;
516     case 'p':
517         pause_after_migration = 1;
518         break;
519     }
520 
521     if (argc-optind != 0) {
522         help("migrate-receive");
523         return EXIT_FAILURE;
524     }
525     migrate_receive(debug, daemonize, monitor, pause_after_migration,
526                     STDOUT_FILENO, STDIN_FILENO,
527                     checkpointed, script, userspace_colo_proxy);
528 
529     return EXIT_SUCCESS;
530 }
531 
main_migrate(int argc,char ** argv)532 int main_migrate(int argc, char **argv)
533 {
534     uint32_t domid;
535     const char *config_filename = NULL;
536     const char *ssh_command = "ssh";
537     char *rune = NULL;
538     char *host;
539     int opt, daemonize = 1, monitor = 1, debug = 0, pause_after_migration = 0;
540     static struct option opts[] = {
541         {"debug", 0, 0, 0x100},
542         {"live", 0, 0, 0x200},
543         COMMON_LONG_OPTS
544     };
545 
546     SWITCH_FOREACH_OPT(opt, "FC:s:ep", opts, "migrate", 2) {
547     case 'C':
548         config_filename = optarg;
549         break;
550     case 's':
551         ssh_command = optarg;
552         break;
553     case 'F':
554         daemonize = 0;
555         break;
556     case 'e':
557         daemonize = 0;
558         monitor = 0;
559         break;
560     case 'p':
561         pause_after_migration = 1;
562         break;
563     case 0x100: /* --debug */
564         debug = 1;
565         break;
566     case 0x200: /* --live */
567         /* ignored for compatibility with xm */
568         break;
569     }
570 
571     domid = find_domain(argv[optind]);
572     host = argv[optind + 1];
573 
574     bool pass_tty_arg = progress_use_cr || (isatty(2) > 0);
575 
576     if (!ssh_command[0]) {
577         rune= host;
578     } else {
579         char verbose_buf[minmsglevel_default+3];
580         int verbose_len;
581         verbose_buf[0] = ' ';
582         verbose_buf[1] = '-';
583         memset(verbose_buf+2, 'v', minmsglevel_default);
584         verbose_buf[sizeof(verbose_buf)-1] = 0;
585         if (minmsglevel == minmsglevel_default) {
586             verbose_len = 0;
587         } else {
588             verbose_len = (minmsglevel_default - minmsglevel) + 2;
589         }
590         xasprintf(&rune, "exec %s %s xl%s%.*s migrate-receive%s%s%s",
591                   ssh_command, host,
592                   pass_tty_arg ? " -t" : "",
593                   verbose_len, verbose_buf,
594                   daemonize ? "" : " -e",
595                   debug ? " -d" : "",
596                   pause_after_migration ? " -p" : "");
597     }
598 
599     migrate_domain(domid, rune, debug, config_filename);
600     return EXIT_SUCCESS;
601 }
602 
main_remus(int argc,char ** argv)603 int main_remus(int argc, char **argv)
604 {
605     uint32_t domid;
606     int opt, rc, daemonize = 1;
607     const char *ssh_command = "ssh";
608     char *host = NULL, *rune = NULL;
609     libxl_domain_remus_info r_info;
610     int send_fd = -1, recv_fd = -1;
611     pid_t child = -1;
612     uint8_t *config_data;
613     int config_len;
614 
615     memset(&r_info, 0, sizeof(libxl_domain_remus_info));
616 
617     SWITCH_FOREACH_OPT(opt, "Fbundi:s:N:ecp", NULL, "remus", 2) {
618     case 'i':
619         r_info.interval = atoi(optarg);
620         break;
621     case 'F':
622         libxl_defbool_set(&r_info.allow_unsafe, true);
623         break;
624     case 'b':
625         libxl_defbool_set(&r_info.blackhole, true);
626         break;
627     case 'u':
628         libxl_defbool_set(&r_info.compression, false);
629         break;
630     case 'n':
631         libxl_defbool_set(&r_info.netbuf, false);
632         break;
633     case 'N':
634         r_info.netbufscript = optarg;
635         break;
636     case 'd':
637         libxl_defbool_set(&r_info.diskbuf, false);
638         break;
639     case 's':
640         ssh_command = optarg;
641         break;
642     case 'e':
643         daemonize = 0;
644         break;
645     case 'c':
646         libxl_defbool_set(&r_info.colo, true);
647         break;
648     case 'p':
649         libxl_defbool_set(&r_info.userspace_colo_proxy, true);
650     }
651 
652     domid = find_domain(argv[optind]);
653     host = argv[optind + 1];
654 
655     /* Defaults */
656     libxl_defbool_setdefault(&r_info.blackhole, false);
657     libxl_defbool_setdefault(&r_info.colo, false);
658     libxl_defbool_setdefault(&r_info.userspace_colo_proxy, false);
659 
660     if (!libxl_defbool_val(r_info.colo) && !r_info.interval)
661         r_info.interval = 200;
662 
663     if (libxl_defbool_val(r_info.userspace_colo_proxy) &&
664         !libxl_defbool_val(r_info.colo)) {
665         fprintf(stderr, "Option -p must be used in conjunction with -c");
666         exit(-1);
667     }
668 
669     if (libxl_defbool_val(r_info.colo)) {
670         if (r_info.interval || libxl_defbool_val(r_info.blackhole) ||
671             !libxl_defbool_is_default(r_info.netbuf) ||
672             !libxl_defbool_is_default(r_info.diskbuf)) {
673             perror("option -c is conflict with -i, -d, -n or -b");
674             exit(-1);
675         }
676 
677         if (libxl_defbool_is_default(r_info.compression)) {
678             perror("COLO can't be used with memory compression. "
679                    "Disable memory checkpoint compression now...");
680             libxl_defbool_set(&r_info.compression, false);
681         }
682     }
683 
684     if (!r_info.netbufscript) {
685         if (libxl_defbool_val(r_info.colo))
686             r_info.netbufscript = default_colo_proxy_script;
687         else
688             r_info.netbufscript = default_remus_netbufscript;
689     }
690 
691     if (libxl_defbool_val(r_info.blackhole)) {
692         send_fd = open("/dev/null", O_RDWR, 0644);
693         if (send_fd < 0) {
694             perror("failed to open /dev/null");
695             exit(EXIT_FAILURE);
696         }
697     } else {
698 
699         if (!ssh_command[0]) {
700             rune = host;
701         } else {
702             if (!libxl_defbool_val(r_info.colo)) {
703                 xasprintf(&rune, "exec %s %s xl migrate-receive %s %s",
704                           ssh_command, host,
705                           "-r",
706                           daemonize ? "" : " -e");
707             } else {
708                 xasprintf(&rune, "exec %s %s xl migrate-receive %s %s %s %s %s",
709                           ssh_command, host,
710                           "--colo",
711                           r_info.netbufscript ? "--coloft-script" : "",
712                           r_info.netbufscript ? r_info.netbufscript : "",
713                           libxl_defbool_val(r_info.userspace_colo_proxy) ?
714                           "--userspace-colo-proxy" : "",
715                           daemonize ? "" : " -e");
716             }
717         }
718 
719         save_domain_core_begin(domid, NULL, &config_data, &config_len);
720 
721         if (!config_len) {
722             fprintf(stderr, "No config file stored for running domain and "
723                     "none supplied - cannot start remus.\n");
724             exit(EXIT_FAILURE);
725         }
726 
727         child = create_migration_child(rune, &send_fd, &recv_fd);
728 
729         migrate_do_preamble(send_fd, recv_fd, child, config_data, config_len,
730                             rune);
731 
732         if (ssh_command[0])
733             free(rune);
734     }
735 
736     /* Point of no return */
737     rc = libxl_domain_remus_start(ctx, &r_info, domid, send_fd, recv_fd, 0);
738 
739     /* check if the domain exists. User may have xl destroyed the
740      * domain to force failover
741      */
742     if (libxl_domain_info(ctx, 0, domid)) {
743         fprintf(stderr, "%s: Primary domain has been destroyed.\n",
744                 libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
745         close(send_fd);
746         return EXIT_SUCCESS;
747     }
748 
749     /* If we are here, it means remus setup/domain suspend/backup has
750      * failed. Try to resume the domain and exit gracefully.
751      * TODO: Split-Brain check.
752      */
753     if (rc == ERROR_GUEST_TIMEDOUT)
754         fprintf(stderr, "Failed to suspend domain at primary.\n");
755     else {
756         fprintf(stderr, "%s: Backup failed? resuming domain at primary.\n",
757                 libxl_defbool_val(r_info.colo) ? "COLO" : "Remus");
758         libxl_domain_resume(ctx, domid, 1, 0);
759     }
760 
761     close(send_fd);
762     return EXIT_FAILURE;
763 }
764 #endif
765 
766 
767 /*
768  * Local variables:
769  * mode: C
770  * c-basic-offset: 4
771  * indent-tabs-mode: nil
772  * End:
773  */
774