1 #include <assert.h>
2 #include <arpa/inet.h>
3
4 #include "xg_sr_common.h"
5
6 /*
7 * Writes an Image header and Domain header into the stream.
8 */
write_headers(struct xc_sr_context * ctx,uint16_t guest_type)9 static int write_headers(struct xc_sr_context *ctx, uint16_t guest_type)
10 {
11 xc_interface *xch = ctx->xch;
12 int32_t xen_version = xc_version(xch, XENVER_version, NULL);
13 struct xc_sr_ihdr ihdr = {
14 .marker = IHDR_MARKER,
15 .id = htonl(IHDR_ID),
16 .version = htonl(3),
17 .options = htons(IHDR_OPT_LITTLE_ENDIAN),
18 };
19 struct xc_sr_dhdr dhdr = {
20 .type = guest_type,
21 .page_shift = XC_PAGE_SHIFT,
22 .xen_major = (xen_version >> 16) & 0xffff,
23 .xen_minor = (xen_version) & 0xffff,
24 };
25
26 if ( xen_version < 0 )
27 {
28 PERROR("Unable to obtain Xen Version");
29 return -1;
30 }
31
32 if ( write_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
33 {
34 PERROR("Unable to write Image Header to stream");
35 return -1;
36 }
37
38 if ( write_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
39 {
40 PERROR("Unable to write Domain Header to stream");
41 return -1;
42 }
43
44 return 0;
45 }
46
47 /*
48 * Writes an END record into the stream.
49 */
write_end_record(struct xc_sr_context * ctx)50 static int write_end_record(struct xc_sr_context *ctx)
51 {
52 struct xc_sr_record end = { .type = REC_TYPE_END };
53
54 return write_record(ctx, &end);
55 }
56
57 /*
58 * Writes a STATIC_DATA_END record into the stream.
59 */
write_static_data_end_record(struct xc_sr_context * ctx)60 static int write_static_data_end_record(struct xc_sr_context *ctx)
61 {
62 struct xc_sr_record end = { .type = REC_TYPE_STATIC_DATA_END };
63
64 return write_record(ctx, &end);
65 }
66
67 /*
68 * Writes a CHECKPOINT record into the stream.
69 */
write_checkpoint_record(struct xc_sr_context * ctx)70 static int write_checkpoint_record(struct xc_sr_context *ctx)
71 {
72 struct xc_sr_record checkpoint = { .type = REC_TYPE_CHECKPOINT };
73
74 return write_record(ctx, &checkpoint);
75 }
76
77 /*
78 * Writes a batch of memory as a PAGE_DATA record into the stream. The batch
79 * is constructed in ctx->save.batch_pfns.
80 *
81 * This function:
82 * - gets the types for each pfn in the batch.
83 * - for each pfn with real data:
84 * - maps and attempts to localise the pages.
85 * - construct and writes a PAGE_DATA record into the stream.
86 */
write_batch(struct xc_sr_context * ctx)87 static int write_batch(struct xc_sr_context *ctx)
88 {
89 xc_interface *xch = ctx->xch;
90 xen_pfn_t *mfns = NULL, *types = NULL;
91 void *guest_mapping = NULL;
92 void **guest_data = NULL;
93 void **local_pages = NULL;
94 int *errors = NULL, rc = -1;
95 unsigned int i, p, nr_pages = 0, nr_pages_mapped = 0;
96 unsigned int nr_pfns = ctx->save.nr_batch_pfns;
97 void *page, *orig_page;
98 uint64_t *rec_pfns = NULL;
99 struct iovec *iov = NULL; int iovcnt = 0;
100 struct xc_sr_rec_page_data_header hdr = { 0 };
101 struct xc_sr_record rec = {
102 .type = REC_TYPE_PAGE_DATA,
103 };
104
105 assert(nr_pfns != 0);
106
107 /* Mfns of the batch pfns. */
108 mfns = malloc(nr_pfns * sizeof(*mfns));
109 /* Types of the batch pfns. */
110 types = malloc(nr_pfns * sizeof(*types));
111 /* Errors from attempting to map the gfns. */
112 errors = malloc(nr_pfns * sizeof(*errors));
113 /* Pointers to page data to send. Mapped gfns or local allocations. */
114 guest_data = calloc(nr_pfns, sizeof(*guest_data));
115 /* Pointers to locally allocated pages. Need freeing. */
116 local_pages = calloc(nr_pfns, sizeof(*local_pages));
117 /* iovec[] for writev(). */
118 iov = malloc((nr_pfns + 4) * sizeof(*iov));
119
120 if ( !mfns || !types || !errors || !guest_data || !local_pages || !iov )
121 {
122 ERROR("Unable to allocate arrays for a batch of %u pages",
123 nr_pfns);
124 goto err;
125 }
126
127 for ( i = 0; i < nr_pfns; ++i )
128 {
129 types[i] = mfns[i] = ctx->save.ops.pfn_to_gfn(ctx,
130 ctx->save.batch_pfns[i]);
131
132 /* Likely a ballooned page. */
133 if ( mfns[i] == INVALID_MFN )
134 {
135 set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages);
136 ++ctx->save.nr_deferred_pages;
137 }
138 }
139
140 rc = xc_get_pfn_type_batch(xch, ctx->domid, nr_pfns, types);
141 if ( rc )
142 {
143 PERROR("Failed to get types for pfn batch");
144 goto err;
145 }
146 rc = -1;
147
148 for ( i = 0; i < nr_pfns; ++i )
149 {
150 if ( !is_known_page_type(types[i]) )
151 {
152 ERROR("Unknown type %#"PRIpfn" for pfn %#"PRIpfn, types[i], mfns[i]);
153 goto err;
154 }
155
156 if ( !page_type_has_stream_data(types[i]) )
157 continue;
158
159 mfns[nr_pages++] = mfns[i];
160 }
161
162 if ( nr_pages > 0 )
163 {
164 guest_mapping = xenforeignmemory_map(
165 xch->fmem, ctx->domid, PROT_READ, nr_pages, mfns, errors);
166 if ( !guest_mapping )
167 {
168 PERROR("Failed to map guest pages");
169 goto err;
170 }
171 nr_pages_mapped = nr_pages;
172
173 for ( i = 0, p = 0; i < nr_pfns; ++i )
174 {
175 if ( !page_type_has_stream_data(types[i]) )
176 continue;
177
178 if ( errors[p] )
179 {
180 ERROR("Mapping of pfn %#"PRIpfn" (mfn %#"PRIpfn") failed %d",
181 ctx->save.batch_pfns[i], mfns[p], errors[p]);
182 goto err;
183 }
184
185 orig_page = page = guest_mapping + (p * PAGE_SIZE);
186 rc = ctx->save.ops.normalise_page(ctx, types[i], &page);
187
188 if ( orig_page != page )
189 local_pages[i] = page;
190
191 if ( rc )
192 {
193 if ( rc == -1 && errno == EAGAIN )
194 {
195 set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages);
196 ++ctx->save.nr_deferred_pages;
197 types[i] = XEN_DOMCTL_PFINFO_XTAB;
198 --nr_pages;
199 }
200 else
201 goto err;
202 }
203 else
204 guest_data[i] = page;
205
206 rc = -1;
207 ++p;
208 }
209 }
210
211 rec_pfns = malloc(nr_pfns * sizeof(*rec_pfns));
212 if ( !rec_pfns )
213 {
214 ERROR("Unable to allocate %zu bytes of memory for page data pfn list",
215 nr_pfns * sizeof(*rec_pfns));
216 goto err;
217 }
218
219 hdr.count = nr_pfns;
220
221 rec.length = sizeof(hdr);
222 rec.length += nr_pfns * sizeof(*rec_pfns);
223 rec.length += nr_pages * PAGE_SIZE;
224
225 for ( i = 0; i < nr_pfns; ++i )
226 rec_pfns[i] = ((uint64_t)(types[i]) << 32) | ctx->save.batch_pfns[i];
227
228 iov[0].iov_base = &rec.type;
229 iov[0].iov_len = sizeof(rec.type);
230
231 iov[1].iov_base = &rec.length;
232 iov[1].iov_len = sizeof(rec.length);
233
234 iov[2].iov_base = &hdr;
235 iov[2].iov_len = sizeof(hdr);
236
237 iov[3].iov_base = rec_pfns;
238 iov[3].iov_len = nr_pfns * sizeof(*rec_pfns);
239
240 iovcnt = 4;
241
242 if ( nr_pages )
243 {
244 for ( i = 0; i < nr_pfns; ++i )
245 {
246 if ( guest_data[i] )
247 {
248 iov[iovcnt].iov_base = guest_data[i];
249 iov[iovcnt].iov_len = PAGE_SIZE;
250 iovcnt++;
251 --nr_pages;
252 }
253 }
254 }
255
256 if ( writev_exact(ctx->fd, iov, iovcnt) )
257 {
258 PERROR("Failed to write page data to stream");
259 goto err;
260 }
261
262 /* Sanity check we have sent all the pages we expected to. */
263 assert(nr_pages == 0);
264 rc = ctx->save.nr_batch_pfns = 0;
265
266 err:
267 free(rec_pfns);
268 if ( guest_mapping )
269 xenforeignmemory_unmap(xch->fmem, guest_mapping, nr_pages_mapped);
270 for ( i = 0; local_pages && i < nr_pfns; ++i )
271 free(local_pages[i]);
272 free(iov);
273 free(local_pages);
274 free(guest_data);
275 free(errors);
276 free(types);
277 free(mfns);
278
279 return rc;
280 }
281
282 /*
283 * Flush a batch of pfns into the stream.
284 */
flush_batch(struct xc_sr_context * ctx)285 static int flush_batch(struct xc_sr_context *ctx)
286 {
287 int rc = 0;
288
289 if ( ctx->save.nr_batch_pfns == 0 )
290 return rc;
291
292 rc = write_batch(ctx);
293
294 if ( !rc )
295 {
296 VALGRIND_MAKE_MEM_UNDEFINED(ctx->save.batch_pfns,
297 MAX_BATCH_SIZE *
298 sizeof(*ctx->save.batch_pfns));
299 }
300
301 return rc;
302 }
303
304 /*
305 * Add a single pfn to the batch, flushing the batch if full.
306 */
add_to_batch(struct xc_sr_context * ctx,xen_pfn_t pfn)307 static int add_to_batch(struct xc_sr_context *ctx, xen_pfn_t pfn)
308 {
309 int rc = 0;
310
311 if ( ctx->save.nr_batch_pfns == MAX_BATCH_SIZE )
312 rc = flush_batch(ctx);
313
314 if ( rc == 0 )
315 ctx->save.batch_pfns[ctx->save.nr_batch_pfns++] = pfn;
316
317 return rc;
318 }
319
320 /*
321 * Pause/suspend the domain, and refresh ctx->dominfo if required.
322 */
suspend_domain(struct xc_sr_context * ctx)323 static int suspend_domain(struct xc_sr_context *ctx)
324 {
325 xc_interface *xch = ctx->xch;
326
327 /* TODO: Properly specify the return value from this callback. All
328 * implementations currently appear to return 1 for success, whereas
329 * the legacy code checks for != 0. */
330 int cb_rc = ctx->save.callbacks->suspend(ctx->save.callbacks->data);
331
332 if ( cb_rc == 0 )
333 {
334 ERROR("save callback suspend() failed: %d", cb_rc);
335 return -1;
336 }
337
338 /* Refresh domain information. */
339 if ( xc_domain_getinfo_single(xch, ctx->domid, &ctx->dominfo) < 0 )
340 {
341 PERROR("Unable to refresh domain information");
342 return -1;
343 }
344
345 /* Confirm the domain has actually been paused. */
346 if ( !dominfo_shutdown_with(&ctx->dominfo, SHUTDOWN_suspend) )
347 {
348 ERROR("Domain has not been suspended: shutdown %d, reason %d",
349 ctx->dominfo.flags & XEN_DOMINF_shutdown,
350 dominfo_shutdown_reason(&ctx->dominfo));
351 return -1;
352 }
353
354 xc_report_progress_single(xch, "Domain now suspended");
355
356 return 0;
357 }
358
359 /*
360 * Send a subset of pages in the guests p2m, according to the dirty bitmap.
361 * Used for each subsequent iteration of the live migration loop.
362 *
363 * Bitmap is bounded by p2m_size.
364 */
send_dirty_pages(struct xc_sr_context * ctx,unsigned long entries)365 static int send_dirty_pages(struct xc_sr_context *ctx,
366 unsigned long entries)
367 {
368 xc_interface *xch = ctx->xch;
369 xen_pfn_t p;
370 unsigned long written;
371 int rc;
372 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
373 &ctx->save.dirty_bitmap_hbuf);
374
375 for ( p = 0, written = 0; p < ctx->save.p2m_size; ++p )
376 {
377 if ( !test_bit(p, dirty_bitmap) )
378 continue;
379
380 rc = add_to_batch(ctx, p);
381 if ( rc )
382 return rc;
383
384 /* Update progress every 4MB worth of memory sent. */
385 if ( (written & ((1U << (22 - 12)) - 1)) == 0 )
386 xc_report_progress_step(xch, written, entries);
387
388 ++written;
389 }
390
391 rc = flush_batch(ctx);
392 if ( rc )
393 return rc;
394
395 if ( written > entries )
396 DPRINTF("Bitmap contained more entries than expected...");
397
398 xc_report_progress_step(xch, entries, entries);
399
400 return ctx->save.ops.check_vm_state(ctx);
401 }
402
403 /*
404 * Send all pages in the guests p2m. Used as the first iteration of the live
405 * migration loop, and for a non-live save.
406 */
send_all_pages(struct xc_sr_context * ctx)407 static int send_all_pages(struct xc_sr_context *ctx)
408 {
409 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
410 &ctx->save.dirty_bitmap_hbuf);
411
412 bitmap_set(dirty_bitmap, ctx->save.p2m_size);
413
414 return send_dirty_pages(ctx, ctx->save.p2m_size);
415 }
416
enable_logdirty(struct xc_sr_context * ctx)417 static int enable_logdirty(struct xc_sr_context *ctx)
418 {
419 xc_interface *xch = ctx->xch;
420 int on1 = 0, off = 0, on2 = 0;
421 int rc;
422
423 /* This juggling is required if logdirty is enabled for VRAM tracking. */
424 rc = xc_shadow_control(xch, ctx->domid,
425 XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
426 NULL, 0);
427 if ( rc < 0 )
428 {
429 on1 = errno;
430 rc = xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF,
431 NULL, 0);
432 if ( rc < 0 )
433 off = errno;
434 else {
435 rc = xc_shadow_control(xch, ctx->domid,
436 XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
437 NULL, 0);
438 if ( rc < 0 )
439 on2 = errno;
440 }
441 if ( rc < 0 )
442 {
443 PERROR("Failed to enable logdirty: %d,%d,%d", on1, off, on2);
444 return rc;
445 }
446 }
447
448 return 0;
449 }
450
update_progress_string(struct xc_sr_context * ctx,char ** str)451 static int update_progress_string(struct xc_sr_context *ctx, char **str)
452 {
453 xc_interface *xch = ctx->xch;
454 char *new_str = NULL;
455 unsigned int iter = ctx->save.stats.iteration;
456
457 if ( asprintf(&new_str, "Frames iteration %u", iter) == -1 )
458 {
459 PERROR("Unable to allocate new progress string");
460 return -1;
461 }
462
463 free(*str);
464 *str = new_str;
465
466 xc_set_progress_prefix(xch, *str);
467 return 0;
468 }
469
470 /*
471 * This is the live migration precopy policy - it's called periodically during
472 * the precopy phase of live migrations, and is responsible for deciding when
473 * the precopy phase should terminate and what should be done next.
474 *
475 * The policy implemented here behaves identically to the policy previously
476 * hard-coded into xc_domain_save() - it proceeds to the stop-and-copy phase of
477 * the live migration when there are either fewer than 50 dirty pages, or more
478 * than 5 precopy rounds have completed.
479 */
480 #define SPP_MAX_ITERATIONS 5
481 #define SPP_TARGET_DIRTY_COUNT 50
482
simple_precopy_policy(struct precopy_stats stats,void * user)483 static int simple_precopy_policy(struct precopy_stats stats, void *user)
484 {
485 return ((stats.dirty_count >= 0 &&
486 stats.dirty_count < SPP_TARGET_DIRTY_COUNT) ||
487 stats.iteration >= SPP_MAX_ITERATIONS)
488 ? XGS_POLICY_STOP_AND_COPY
489 : XGS_POLICY_CONTINUE_PRECOPY;
490 }
491
492 /*
493 * Send memory while guest is running.
494 */
send_memory_live(struct xc_sr_context * ctx)495 static int send_memory_live(struct xc_sr_context *ctx)
496 {
497 xc_interface *xch = ctx->xch;
498 xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
499 char *progress_str = NULL;
500 unsigned int x = 0;
501 int rc;
502 int policy_decision;
503
504 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
505 &ctx->save.dirty_bitmap_hbuf);
506
507 precopy_policy_t precopy_policy = ctx->save.callbacks->precopy_policy;
508 void *data = ctx->save.callbacks->data;
509
510 struct precopy_stats *policy_stats;
511
512 rc = update_progress_string(ctx, &progress_str);
513 if ( rc )
514 goto out;
515
516 ctx->save.stats = (struct precopy_stats){
517 .dirty_count = ctx->save.p2m_size,
518 };
519 policy_stats = &ctx->save.stats;
520
521 if ( precopy_policy == NULL )
522 precopy_policy = simple_precopy_policy;
523
524 bitmap_set(dirty_bitmap, ctx->save.p2m_size);
525
526 for ( ; ; )
527 {
528 policy_decision = precopy_policy(*policy_stats, data);
529 x++;
530
531 if ( stats.dirty_count > 0 && policy_decision != XGS_POLICY_ABORT )
532 {
533 rc = update_progress_string(ctx, &progress_str);
534 if ( rc )
535 goto out;
536
537 rc = send_dirty_pages(ctx, stats.dirty_count);
538 if ( rc )
539 goto out;
540 }
541
542 if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
543 break;
544
545 policy_stats->iteration = x;
546 policy_stats->total_written += policy_stats->dirty_count;
547 policy_stats->dirty_count = -1;
548
549 policy_decision = precopy_policy(*policy_stats, data);
550
551 if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
552 break;
553
554 if ( xc_logdirty_control(
555 xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
556 &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
557 0, &stats) != ctx->save.p2m_size )
558 {
559 PERROR("Failed to retrieve logdirty bitmap");
560 rc = -1;
561 goto out;
562 }
563
564 policy_stats->dirty_count = stats.dirty_count;
565
566 }
567
568 if ( policy_decision == XGS_POLICY_ABORT )
569 {
570 PERROR("Abort precopy loop");
571 rc = -1;
572 goto out;
573 }
574
575 out:
576 xc_set_progress_prefix(xch, NULL);
577 free(progress_str);
578 return rc;
579 }
580
colo_merge_secondary_dirty_bitmap(struct xc_sr_context * ctx)581 static int colo_merge_secondary_dirty_bitmap(struct xc_sr_context *ctx)
582 {
583 xc_interface *xch = ctx->xch;
584 struct xc_sr_record rec;
585 uint64_t *pfns = NULL;
586 uint64_t pfn;
587 unsigned int count, i;
588 int rc;
589 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
590 &ctx->save.dirty_bitmap_hbuf);
591
592 rc = read_record(ctx, ctx->save.recv_fd, &rec);
593 if ( rc )
594 goto err;
595
596 if ( rec.type != REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST )
597 {
598 PERROR("Expect dirty bitmap record, but received %u", rec.type);
599 rc = -1;
600 goto err;
601 }
602
603 if ( rec.length % sizeof(*pfns) )
604 {
605 PERROR("Invalid dirty pfn list record length %u", rec.length);
606 rc = -1;
607 goto err;
608 }
609
610 count = rec.length / sizeof(*pfns);
611 pfns = rec.data;
612
613 for ( i = 0; i < count; i++ )
614 {
615 pfn = pfns[i];
616 if ( pfn >= ctx->save.p2m_size )
617 {
618 PERROR("Invalid pfn 0x%" PRIx64, pfn);
619 rc = -1;
620 goto err;
621 }
622
623 set_bit(pfn, dirty_bitmap);
624 }
625
626 rc = 0;
627
628 err:
629 free(rec.data);
630 return rc;
631 }
632
633 /*
634 * Suspend the domain and send dirty memory.
635 * This is the last iteration of the live migration and the
636 * heart of the checkpointed stream.
637 */
suspend_and_send_dirty(struct xc_sr_context * ctx)638 static int suspend_and_send_dirty(struct xc_sr_context *ctx)
639 {
640 xc_interface *xch = ctx->xch;
641 xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
642 char *progress_str = NULL;
643 int rc;
644 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
645 &ctx->save.dirty_bitmap_hbuf);
646
647 rc = suspend_domain(ctx);
648 if ( rc )
649 goto out;
650
651 if ( xc_logdirty_control(
652 xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
653 HYPERCALL_BUFFER(dirty_bitmap), ctx->save.p2m_size,
654 XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL, &stats) !=
655 ctx->save.p2m_size )
656 {
657 PERROR("Failed to retrieve logdirty bitmap");
658 rc = -1;
659 goto out;
660 }
661
662 if ( ctx->save.live )
663 {
664 rc = update_progress_string(ctx, &progress_str);
665 if ( rc )
666 goto out;
667 }
668 else
669 xc_set_progress_prefix(xch, "Checkpointed save");
670
671 bitmap_or(dirty_bitmap, ctx->save.deferred_pages, ctx->save.p2m_size);
672
673 if ( !ctx->save.live && ctx->stream_type == XC_STREAM_COLO )
674 {
675 rc = colo_merge_secondary_dirty_bitmap(ctx);
676 if ( rc )
677 {
678 PERROR("Failed to get secondary vm's dirty pages");
679 goto out;
680 }
681 }
682
683 rc = send_dirty_pages(ctx, stats.dirty_count + ctx->save.nr_deferred_pages);
684 if ( rc )
685 goto out;
686
687 bitmap_clear(ctx->save.deferred_pages, ctx->save.p2m_size);
688 ctx->save.nr_deferred_pages = 0;
689
690 out:
691 xc_set_progress_prefix(xch, NULL);
692 free(progress_str);
693 return rc;
694 }
695
verify_frames(struct xc_sr_context * ctx)696 static int verify_frames(struct xc_sr_context *ctx)
697 {
698 xc_interface *xch = ctx->xch;
699 xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
700 int rc;
701 struct xc_sr_record rec = { .type = REC_TYPE_VERIFY };
702
703 DPRINTF("Enabling verify mode");
704
705 rc = write_record(ctx, &rec);
706 if ( rc )
707 goto out;
708
709 xc_set_progress_prefix(xch, "Frames verify");
710 rc = send_all_pages(ctx);
711 if ( rc )
712 goto out;
713
714 if ( xc_logdirty_control(
715 xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_PEEK,
716 &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
717 0, &stats) != ctx->save.p2m_size )
718 {
719 PERROR("Failed to retrieve logdirty bitmap");
720 rc = -1;
721 goto out;
722 }
723
724 DPRINTF(" Further stats: faults %u, dirty %u",
725 stats.fault_count, stats.dirty_count);
726
727 out:
728 return rc;
729 }
730
731 /*
732 * Send all domain memory. This is the heart of the live migration loop.
733 */
send_domain_memory_live(struct xc_sr_context * ctx)734 static int send_domain_memory_live(struct xc_sr_context *ctx)
735 {
736 int rc;
737
738 rc = enable_logdirty(ctx);
739 if ( rc )
740 goto out;
741
742 rc = send_memory_live(ctx);
743 if ( rc )
744 goto out;
745
746 rc = suspend_and_send_dirty(ctx);
747 if ( rc )
748 goto out;
749
750 if ( ctx->save.debug && ctx->stream_type == XC_STREAM_PLAIN )
751 {
752 rc = verify_frames(ctx);
753 if ( rc )
754 goto out;
755 }
756
757 out:
758 return rc;
759 }
760
761 /*
762 * Checkpointed save.
763 */
send_domain_memory_checkpointed(struct xc_sr_context * ctx)764 static int send_domain_memory_checkpointed(struct xc_sr_context *ctx)
765 {
766 return suspend_and_send_dirty(ctx);
767 }
768
769 /*
770 * Send all domain memory, pausing the domain first. Generally used for
771 * suspend-to-file.
772 */
send_domain_memory_nonlive(struct xc_sr_context * ctx)773 static int send_domain_memory_nonlive(struct xc_sr_context *ctx)
774 {
775 xc_interface *xch = ctx->xch;
776 int rc;
777
778 rc = suspend_domain(ctx);
779 if ( rc )
780 goto err;
781
782 xc_set_progress_prefix(xch, "Frames");
783
784 rc = send_all_pages(ctx);
785 if ( rc )
786 goto err;
787
788 err:
789 return rc;
790 }
791
setup(struct xc_sr_context * ctx)792 static int setup(struct xc_sr_context *ctx)
793 {
794 xc_interface *xch = ctx->xch;
795 int rc;
796 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
797 &ctx->save.dirty_bitmap_hbuf);
798
799 rc = ctx->save.ops.setup(ctx);
800 if ( rc )
801 goto err;
802
803 dirty_bitmap = xc_hypercall_buffer_alloc_pages(
804 xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->save.p2m_size)));
805 ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
806 sizeof(*ctx->save.batch_pfns));
807 ctx->save.deferred_pages = bitmap_alloc(ctx->save.p2m_size);
808
809 if ( !ctx->save.batch_pfns || !dirty_bitmap || !ctx->save.deferred_pages )
810 {
811 ERROR("Unable to allocate memory for dirty bitmaps, batch pfns and"
812 " deferred pages");
813 rc = -1;
814 errno = ENOMEM;
815 goto err;
816 }
817
818 rc = 0;
819
820 err:
821 return rc;
822 }
823
cleanup(struct xc_sr_context * ctx)824 static void cleanup(struct xc_sr_context *ctx)
825 {
826 xc_interface *xch = ctx->xch;
827 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
828 &ctx->save.dirty_bitmap_hbuf);
829
830
831 xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF,
832 NULL, 0);
833
834 if ( ctx->save.ops.cleanup(ctx) )
835 PERROR("Failed to clean up");
836
837 xc_hypercall_buffer_free_pages(xch, dirty_bitmap,
838 NRPAGES(bitmap_size(ctx->save.p2m_size)));
839 free(ctx->save.deferred_pages);
840 free(ctx->save.batch_pfns);
841 }
842
843 /*
844 * Save a domain.
845 */
save(struct xc_sr_context * ctx,uint16_t guest_type)846 static int save(struct xc_sr_context *ctx, uint16_t guest_type)
847 {
848 xc_interface *xch = ctx->xch;
849 int rc, saved_rc = 0, saved_errno = 0;
850
851 IPRINTF("Saving domain %d, type %s",
852 ctx->domid, dhdr_type_to_str(guest_type));
853
854 rc = setup(ctx);
855 if ( rc )
856 goto err;
857
858 xc_report_progress_single(xch, "Start of stream");
859
860 rc = write_headers(ctx, guest_type);
861 if ( rc )
862 goto err;
863
864 rc = ctx->save.ops.static_data(ctx);
865 if ( rc )
866 goto err;
867
868 rc = write_static_data_end_record(ctx);
869 if ( rc )
870 goto err;
871
872 rc = ctx->save.ops.start_of_stream(ctx);
873 if ( rc )
874 goto err;
875
876 do {
877 rc = ctx->save.ops.start_of_checkpoint(ctx);
878 if ( rc )
879 goto err;
880
881 rc = ctx->save.ops.check_vm_state(ctx);
882 if ( rc )
883 goto err;
884
885 if ( ctx->save.live )
886 rc = send_domain_memory_live(ctx);
887 else if ( ctx->stream_type != XC_STREAM_PLAIN )
888 rc = send_domain_memory_checkpointed(ctx);
889 else
890 rc = send_domain_memory_nonlive(ctx);
891
892 if ( rc )
893 goto err;
894
895 if ( !dominfo_shutdown_with(&ctx->dominfo, SHUTDOWN_suspend) )
896 {
897 ERROR("Domain has not been suspended");
898 rc = -1;
899 goto err;
900 }
901
902 rc = ctx->save.ops.end_of_checkpoint(ctx);
903 if ( rc )
904 goto err;
905
906 if ( ctx->stream_type != XC_STREAM_PLAIN )
907 {
908 /*
909 * We have now completed the initial live portion of the checkpoint
910 * process. Therefore switch into periodically sending synchronous
911 * batches of pages.
912 */
913 ctx->save.live = false;
914
915 rc = write_checkpoint_record(ctx);
916 if ( rc )
917 goto err;
918
919 if ( ctx->stream_type == XC_STREAM_COLO )
920 {
921 rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
922 if ( !rc )
923 {
924 rc = -1;
925 goto err;
926 }
927 }
928
929 rc = ctx->save.callbacks->postcopy(ctx->save.callbacks->data);
930 if ( rc <= 0 )
931 goto err;
932
933 if ( ctx->stream_type == XC_STREAM_COLO )
934 {
935 rc = ctx->save.callbacks->wait_checkpoint(
936 ctx->save.callbacks->data);
937 if ( rc <= 0 )
938 goto err;
939 }
940 else if ( ctx->stream_type == XC_STREAM_REMUS )
941 {
942 rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
943 if ( rc <= 0 )
944 goto err;
945 }
946 else
947 {
948 ERROR("Unknown checkpointed stream");
949 rc = -1;
950 goto err;
951 }
952 }
953 } while ( ctx->stream_type != XC_STREAM_PLAIN );
954
955 xc_report_progress_single(xch, "End of stream");
956
957 rc = write_end_record(ctx);
958 if ( rc )
959 goto err;
960
961 xc_report_progress_single(xch, "Complete");
962 goto done;
963
964 err:
965 saved_errno = errno;
966 saved_rc = rc;
967 PERROR("Save failed");
968
969 done:
970 cleanup(ctx);
971
972 if ( saved_rc )
973 {
974 rc = saved_rc;
975 errno = saved_errno;
976 }
977
978 return rc;
979 };
980
xc_domain_save(xc_interface * xch,int io_fd,uint32_t dom,uint32_t flags,struct save_callbacks * callbacks,xc_stream_type_t stream_type,int recv_fd)981 int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom,
982 uint32_t flags, struct save_callbacks *callbacks,
983 xc_stream_type_t stream_type, int recv_fd)
984 {
985 struct xc_sr_context ctx = {
986 .xch = xch,
987 .fd = io_fd,
988 .stream_type = stream_type,
989 };
990 bool hvm;
991
992 /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
993 ctx.save.callbacks = callbacks;
994 ctx.save.live = !!(flags & XCFLAGS_LIVE);
995 ctx.save.debug = !!(flags & XCFLAGS_DEBUG);
996 ctx.save.recv_fd = recv_fd;
997
998 if ( xc_domain_getinfo_single(xch, dom, &ctx.dominfo) < 0 )
999 {
1000 PERROR("Failed to get domain info");
1001 return -1;
1002 }
1003
1004 hvm = ctx.dominfo.flags & XEN_DOMINF_hvm_guest;
1005
1006 /* Sanity check stream_type-related parameters */
1007 switch ( stream_type )
1008 {
1009 case XC_STREAM_COLO:
1010 assert(callbacks->wait_checkpoint);
1011 /* Fallthrough */
1012 case XC_STREAM_REMUS:
1013 assert(callbacks->checkpoint && callbacks->postcopy);
1014 /* Fallthrough */
1015 case XC_STREAM_PLAIN:
1016 if ( hvm )
1017 assert(callbacks->switch_qemu_logdirty);
1018 break;
1019
1020 default:
1021 assert(!"Bad stream_type");
1022 break;
1023 }
1024
1025 DPRINTF("fd %d, dom %u, flags %u, hvm %d",
1026 io_fd, dom, flags, hvm);
1027
1028 ctx.domid = dom;
1029
1030 if ( hvm )
1031 {
1032 ctx.save.ops = save_ops_x86_hvm;
1033 return save(&ctx, DHDR_TYPE_X86_HVM);
1034 }
1035 else
1036 {
1037 ctx.save.ops = save_ops_x86_pv;
1038 return save(&ctx, DHDR_TYPE_X86_PV);
1039 }
1040 }
1041
1042 /*
1043 * Local variables:
1044 * mode: C
1045 * c-file-style: "BSD"
1046 * c-basic-offset: 4
1047 * tab-width: 4
1048 * indent-tabs-mode: nil
1049 * End:
1050 */
1051