1 #include <assert.h>
2 #include <arpa/inet.h>
3 
4 #include "xg_sr_common.h"
5 
6 /*
7  * Writes an Image header and Domain header into the stream.
8  */
write_headers(struct xc_sr_context * ctx,uint16_t guest_type)9 static int write_headers(struct xc_sr_context *ctx, uint16_t guest_type)
10 {
11     xc_interface *xch = ctx->xch;
12     int32_t xen_version = xc_version(xch, XENVER_version, NULL);
13     struct xc_sr_ihdr ihdr = {
14         .marker  = IHDR_MARKER,
15         .id      = htonl(IHDR_ID),
16         .version = htonl(3),
17         .options = htons(IHDR_OPT_LITTLE_ENDIAN),
18     };
19     struct xc_sr_dhdr dhdr = {
20         .type       = guest_type,
21         .page_shift = XC_PAGE_SHIFT,
22         .xen_major  = (xen_version >> 16) & 0xffff,
23         .xen_minor  = (xen_version)       & 0xffff,
24     };
25 
26     if ( xen_version < 0 )
27     {
28         PERROR("Unable to obtain Xen Version");
29         return -1;
30     }
31 
32     if ( write_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
33     {
34         PERROR("Unable to write Image Header to stream");
35         return -1;
36     }
37 
38     if ( write_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
39     {
40         PERROR("Unable to write Domain Header to stream");
41         return -1;
42     }
43 
44     return 0;
45 }
46 
47 /*
48  * Writes an END record into the stream.
49  */
write_end_record(struct xc_sr_context * ctx)50 static int write_end_record(struct xc_sr_context *ctx)
51 {
52     struct xc_sr_record end = { .type = REC_TYPE_END };
53 
54     return write_record(ctx, &end);
55 }
56 
57 /*
58  * Writes a STATIC_DATA_END record into the stream.
59  */
write_static_data_end_record(struct xc_sr_context * ctx)60 static int write_static_data_end_record(struct xc_sr_context *ctx)
61 {
62     struct xc_sr_record end = { .type = REC_TYPE_STATIC_DATA_END };
63 
64     return write_record(ctx, &end);
65 }
66 
67 /*
68  * Writes a CHECKPOINT record into the stream.
69  */
write_checkpoint_record(struct xc_sr_context * ctx)70 static int write_checkpoint_record(struct xc_sr_context *ctx)
71 {
72     struct xc_sr_record checkpoint = { .type = REC_TYPE_CHECKPOINT };
73 
74     return write_record(ctx, &checkpoint);
75 }
76 
77 /*
78  * Writes a batch of memory as a PAGE_DATA record into the stream.  The batch
79  * is constructed in ctx->save.batch_pfns.
80  *
81  * This function:
82  * - gets the types for each pfn in the batch.
83  * - for each pfn with real data:
84  *   - maps and attempts to localise the pages.
85  * - construct and writes a PAGE_DATA record into the stream.
86  */
write_batch(struct xc_sr_context * ctx)87 static int write_batch(struct xc_sr_context *ctx)
88 {
89     xc_interface *xch = ctx->xch;
90     xen_pfn_t *mfns = NULL, *types = NULL;
91     void *guest_mapping = NULL;
92     void **guest_data = NULL;
93     void **local_pages = NULL;
94     int *errors = NULL, rc = -1;
95     unsigned int i, p, nr_pages = 0, nr_pages_mapped = 0;
96     unsigned int nr_pfns = ctx->save.nr_batch_pfns;
97     void *page, *orig_page;
98     uint64_t *rec_pfns = NULL;
99     struct iovec *iov = NULL; int iovcnt = 0;
100     struct xc_sr_rec_page_data_header hdr = { 0 };
101     struct xc_sr_record rec = {
102         .type = REC_TYPE_PAGE_DATA,
103     };
104 
105     assert(nr_pfns != 0);
106 
107     /* Mfns of the batch pfns. */
108     mfns = malloc(nr_pfns * sizeof(*mfns));
109     /* Types of the batch pfns. */
110     types = malloc(nr_pfns * sizeof(*types));
111     /* Errors from attempting to map the gfns. */
112     errors = malloc(nr_pfns * sizeof(*errors));
113     /* Pointers to page data to send.  Mapped gfns or local allocations. */
114     guest_data = calloc(nr_pfns, sizeof(*guest_data));
115     /* Pointers to locally allocated pages.  Need freeing. */
116     local_pages = calloc(nr_pfns, sizeof(*local_pages));
117     /* iovec[] for writev(). */
118     iov = malloc((nr_pfns + 4) * sizeof(*iov));
119 
120     if ( !mfns || !types || !errors || !guest_data || !local_pages || !iov )
121     {
122         ERROR("Unable to allocate arrays for a batch of %u pages",
123               nr_pfns);
124         goto err;
125     }
126 
127     for ( i = 0; i < nr_pfns; ++i )
128     {
129         types[i] = mfns[i] = ctx->save.ops.pfn_to_gfn(ctx,
130                                                       ctx->save.batch_pfns[i]);
131 
132         /* Likely a ballooned page. */
133         if ( mfns[i] == INVALID_MFN )
134         {
135             set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages);
136             ++ctx->save.nr_deferred_pages;
137         }
138     }
139 
140     rc = xc_get_pfn_type_batch(xch, ctx->domid, nr_pfns, types);
141     if ( rc )
142     {
143         PERROR("Failed to get types for pfn batch");
144         goto err;
145     }
146     rc = -1;
147 
148     for ( i = 0; i < nr_pfns; ++i )
149     {
150         if ( !is_known_page_type(types[i]) )
151         {
152             ERROR("Unknown type %#"PRIpfn" for pfn %#"PRIpfn, types[i], mfns[i]);
153             goto err;
154         }
155 
156         if ( !page_type_has_stream_data(types[i]) )
157             continue;
158 
159         mfns[nr_pages++] = mfns[i];
160     }
161 
162     if ( nr_pages > 0 )
163     {
164         guest_mapping = xenforeignmemory_map(
165             xch->fmem, ctx->domid, PROT_READ, nr_pages, mfns, errors);
166         if ( !guest_mapping )
167         {
168             PERROR("Failed to map guest pages");
169             goto err;
170         }
171         nr_pages_mapped = nr_pages;
172 
173         for ( i = 0, p = 0; i < nr_pfns; ++i )
174         {
175             if ( !page_type_has_stream_data(types[i]) )
176                 continue;
177 
178             if ( errors[p] )
179             {
180                 ERROR("Mapping of pfn %#"PRIpfn" (mfn %#"PRIpfn") failed %d",
181                       ctx->save.batch_pfns[i], mfns[p], errors[p]);
182                 goto err;
183             }
184 
185             orig_page = page = guest_mapping + (p * PAGE_SIZE);
186             rc = ctx->save.ops.normalise_page(ctx, types[i], &page);
187 
188             if ( orig_page != page )
189                 local_pages[i] = page;
190 
191             if ( rc )
192             {
193                 if ( rc == -1 && errno == EAGAIN )
194                 {
195                     set_bit(ctx->save.batch_pfns[i], ctx->save.deferred_pages);
196                     ++ctx->save.nr_deferred_pages;
197                     types[i] = XEN_DOMCTL_PFINFO_XTAB;
198                     --nr_pages;
199                 }
200                 else
201                     goto err;
202             }
203             else
204                 guest_data[i] = page;
205 
206             rc = -1;
207             ++p;
208         }
209     }
210 
211     rec_pfns = malloc(nr_pfns * sizeof(*rec_pfns));
212     if ( !rec_pfns )
213     {
214         ERROR("Unable to allocate %zu bytes of memory for page data pfn list",
215               nr_pfns * sizeof(*rec_pfns));
216         goto err;
217     }
218 
219     hdr.count = nr_pfns;
220 
221     rec.length = sizeof(hdr);
222     rec.length += nr_pfns * sizeof(*rec_pfns);
223     rec.length += nr_pages * PAGE_SIZE;
224 
225     for ( i = 0; i < nr_pfns; ++i )
226         rec_pfns[i] = ((uint64_t)(types[i]) << 32) | ctx->save.batch_pfns[i];
227 
228     iov[0].iov_base = &rec.type;
229     iov[0].iov_len = sizeof(rec.type);
230 
231     iov[1].iov_base = &rec.length;
232     iov[1].iov_len = sizeof(rec.length);
233 
234     iov[2].iov_base = &hdr;
235     iov[2].iov_len = sizeof(hdr);
236 
237     iov[3].iov_base = rec_pfns;
238     iov[3].iov_len = nr_pfns * sizeof(*rec_pfns);
239 
240     iovcnt = 4;
241 
242     if ( nr_pages )
243     {
244         for ( i = 0; i < nr_pfns; ++i )
245         {
246             if ( guest_data[i] )
247             {
248                 iov[iovcnt].iov_base = guest_data[i];
249                 iov[iovcnt].iov_len = PAGE_SIZE;
250                 iovcnt++;
251                 --nr_pages;
252             }
253         }
254     }
255 
256     if ( writev_exact(ctx->fd, iov, iovcnt) )
257     {
258         PERROR("Failed to write page data to stream");
259         goto err;
260     }
261 
262     /* Sanity check we have sent all the pages we expected to. */
263     assert(nr_pages == 0);
264     rc = ctx->save.nr_batch_pfns = 0;
265 
266  err:
267     free(rec_pfns);
268     if ( guest_mapping )
269         xenforeignmemory_unmap(xch->fmem, guest_mapping, nr_pages_mapped);
270     for ( i = 0; local_pages && i < nr_pfns; ++i )
271         free(local_pages[i]);
272     free(iov);
273     free(local_pages);
274     free(guest_data);
275     free(errors);
276     free(types);
277     free(mfns);
278 
279     return rc;
280 }
281 
282 /*
283  * Flush a batch of pfns into the stream.
284  */
flush_batch(struct xc_sr_context * ctx)285 static int flush_batch(struct xc_sr_context *ctx)
286 {
287     int rc = 0;
288 
289     if ( ctx->save.nr_batch_pfns == 0 )
290         return rc;
291 
292     rc = write_batch(ctx);
293 
294     if ( !rc )
295     {
296         VALGRIND_MAKE_MEM_UNDEFINED(ctx->save.batch_pfns,
297                                     MAX_BATCH_SIZE *
298                                     sizeof(*ctx->save.batch_pfns));
299     }
300 
301     return rc;
302 }
303 
304 /*
305  * Add a single pfn to the batch, flushing the batch if full.
306  */
add_to_batch(struct xc_sr_context * ctx,xen_pfn_t pfn)307 static int add_to_batch(struct xc_sr_context *ctx, xen_pfn_t pfn)
308 {
309     int rc = 0;
310 
311     if ( ctx->save.nr_batch_pfns == MAX_BATCH_SIZE )
312         rc = flush_batch(ctx);
313 
314     if ( rc == 0 )
315         ctx->save.batch_pfns[ctx->save.nr_batch_pfns++] = pfn;
316 
317     return rc;
318 }
319 
320 /*
321  * Pause/suspend the domain, and refresh ctx->dominfo if required.
322  */
suspend_domain(struct xc_sr_context * ctx)323 static int suspend_domain(struct xc_sr_context *ctx)
324 {
325     xc_interface *xch = ctx->xch;
326 
327     /* TODO: Properly specify the return value from this callback.  All
328      * implementations currently appear to return 1 for success, whereas
329      * the legacy code checks for != 0. */
330     int cb_rc = ctx->save.callbacks->suspend(ctx->save.callbacks->data);
331 
332     if ( cb_rc == 0 )
333     {
334         ERROR("save callback suspend() failed: %d", cb_rc);
335         return -1;
336     }
337 
338     /* Refresh domain information. */
339     if ( xc_domain_getinfo_single(xch, ctx->domid, &ctx->dominfo) < 0 )
340     {
341         PERROR("Unable to refresh domain information");
342         return -1;
343     }
344 
345     /* Confirm the domain has actually been paused. */
346     if ( !dominfo_shutdown_with(&ctx->dominfo, SHUTDOWN_suspend) )
347     {
348         ERROR("Domain has not been suspended: shutdown %d, reason %d",
349               ctx->dominfo.flags & XEN_DOMINF_shutdown,
350               dominfo_shutdown_reason(&ctx->dominfo));
351         return -1;
352     }
353 
354     xc_report_progress_single(xch, "Domain now suspended");
355 
356     return 0;
357 }
358 
359 /*
360  * Send a subset of pages in the guests p2m, according to the dirty bitmap.
361  * Used for each subsequent iteration of the live migration loop.
362  *
363  * Bitmap is bounded by p2m_size.
364  */
send_dirty_pages(struct xc_sr_context * ctx,unsigned long entries)365 static int send_dirty_pages(struct xc_sr_context *ctx,
366                             unsigned long entries)
367 {
368     xc_interface *xch = ctx->xch;
369     xen_pfn_t p;
370     unsigned long written;
371     int rc;
372     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
373                                     &ctx->save.dirty_bitmap_hbuf);
374 
375     for ( p = 0, written = 0; p < ctx->save.p2m_size; ++p )
376     {
377         if ( !test_bit(p, dirty_bitmap) )
378             continue;
379 
380         rc = add_to_batch(ctx, p);
381         if ( rc )
382             return rc;
383 
384         /* Update progress every 4MB worth of memory sent. */
385         if ( (written & ((1U << (22 - 12)) - 1)) == 0 )
386             xc_report_progress_step(xch, written, entries);
387 
388         ++written;
389     }
390 
391     rc = flush_batch(ctx);
392     if ( rc )
393         return rc;
394 
395     if ( written > entries )
396         DPRINTF("Bitmap contained more entries than expected...");
397 
398     xc_report_progress_step(xch, entries, entries);
399 
400     return ctx->save.ops.check_vm_state(ctx);
401 }
402 
403 /*
404  * Send all pages in the guests p2m.  Used as the first iteration of the live
405  * migration loop, and for a non-live save.
406  */
send_all_pages(struct xc_sr_context * ctx)407 static int send_all_pages(struct xc_sr_context *ctx)
408 {
409     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
410                                     &ctx->save.dirty_bitmap_hbuf);
411 
412     bitmap_set(dirty_bitmap, ctx->save.p2m_size);
413 
414     return send_dirty_pages(ctx, ctx->save.p2m_size);
415 }
416 
enable_logdirty(struct xc_sr_context * ctx)417 static int enable_logdirty(struct xc_sr_context *ctx)
418 {
419     xc_interface *xch = ctx->xch;
420     int on1 = 0, off = 0, on2 = 0;
421     int rc;
422 
423     /* This juggling is required if logdirty is enabled for VRAM tracking. */
424     rc = xc_shadow_control(xch, ctx->domid,
425                            XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
426                            NULL, 0);
427     if ( rc < 0 )
428     {
429         on1 = errno;
430         rc = xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF,
431                                NULL, 0);
432         if ( rc < 0 )
433             off = errno;
434         else {
435             rc = xc_shadow_control(xch, ctx->domid,
436                                    XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY,
437                                    NULL, 0);
438             if ( rc < 0 )
439                 on2 = errno;
440         }
441         if ( rc < 0 )
442         {
443             PERROR("Failed to enable logdirty: %d,%d,%d", on1, off, on2);
444             return rc;
445         }
446     }
447 
448     return 0;
449 }
450 
update_progress_string(struct xc_sr_context * ctx,char ** str)451 static int update_progress_string(struct xc_sr_context *ctx, char **str)
452 {
453     xc_interface *xch = ctx->xch;
454     char *new_str = NULL;
455     unsigned int iter = ctx->save.stats.iteration;
456 
457     if ( asprintf(&new_str, "Frames iteration %u", iter) == -1 )
458     {
459         PERROR("Unable to allocate new progress string");
460         return -1;
461     }
462 
463     free(*str);
464     *str = new_str;
465 
466     xc_set_progress_prefix(xch, *str);
467     return 0;
468 }
469 
470 /*
471  * This is the live migration precopy policy - it's called periodically during
472  * the precopy phase of live migrations, and is responsible for deciding when
473  * the precopy phase should terminate and what should be done next.
474  *
475  * The policy implemented here behaves identically to the policy previously
476  * hard-coded into xc_domain_save() - it proceeds to the stop-and-copy phase of
477  * the live migration when there are either fewer than 50 dirty pages, or more
478  * than 5 precopy rounds have completed.
479  */
480 #define SPP_MAX_ITERATIONS      5
481 #define SPP_TARGET_DIRTY_COUNT 50
482 
simple_precopy_policy(struct precopy_stats stats,void * user)483 static int simple_precopy_policy(struct precopy_stats stats, void *user)
484 {
485     return ((stats.dirty_count >= 0 &&
486              stats.dirty_count < SPP_TARGET_DIRTY_COUNT) ||
487             stats.iteration >= SPP_MAX_ITERATIONS)
488         ? XGS_POLICY_STOP_AND_COPY
489         : XGS_POLICY_CONTINUE_PRECOPY;
490 }
491 
492 /*
493  * Send memory while guest is running.
494  */
send_memory_live(struct xc_sr_context * ctx)495 static int send_memory_live(struct xc_sr_context *ctx)
496 {
497     xc_interface *xch = ctx->xch;
498     xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
499     char *progress_str = NULL;
500     unsigned int x = 0;
501     int rc;
502     int policy_decision;
503 
504     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
505                                     &ctx->save.dirty_bitmap_hbuf);
506 
507     precopy_policy_t precopy_policy = ctx->save.callbacks->precopy_policy;
508     void *data = ctx->save.callbacks->data;
509 
510     struct precopy_stats *policy_stats;
511 
512     rc = update_progress_string(ctx, &progress_str);
513     if ( rc )
514         goto out;
515 
516     ctx->save.stats = (struct precopy_stats){
517         .dirty_count = ctx->save.p2m_size,
518     };
519     policy_stats = &ctx->save.stats;
520 
521     if ( precopy_policy == NULL )
522         precopy_policy = simple_precopy_policy;
523 
524     bitmap_set(dirty_bitmap, ctx->save.p2m_size);
525 
526     for ( ; ; )
527     {
528         policy_decision = precopy_policy(*policy_stats, data);
529         x++;
530 
531         if ( stats.dirty_count > 0 && policy_decision != XGS_POLICY_ABORT )
532         {
533             rc = update_progress_string(ctx, &progress_str);
534             if ( rc )
535                 goto out;
536 
537             rc = send_dirty_pages(ctx, stats.dirty_count);
538             if ( rc )
539                 goto out;
540         }
541 
542         if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
543             break;
544 
545         policy_stats->iteration     = x;
546         policy_stats->total_written += policy_stats->dirty_count;
547         policy_stats->dirty_count   = -1;
548 
549         policy_decision = precopy_policy(*policy_stats, data);
550 
551         if ( policy_decision != XGS_POLICY_CONTINUE_PRECOPY )
552             break;
553 
554         if ( xc_logdirty_control(
555                  xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
556                  &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
557                  0, &stats) != ctx->save.p2m_size )
558         {
559             PERROR("Failed to retrieve logdirty bitmap");
560             rc = -1;
561             goto out;
562         }
563 
564         policy_stats->dirty_count = stats.dirty_count;
565 
566     }
567 
568     if ( policy_decision == XGS_POLICY_ABORT )
569     {
570         PERROR("Abort precopy loop");
571         rc = -1;
572         goto out;
573     }
574 
575  out:
576     xc_set_progress_prefix(xch, NULL);
577     free(progress_str);
578     return rc;
579 }
580 
colo_merge_secondary_dirty_bitmap(struct xc_sr_context * ctx)581 static int colo_merge_secondary_dirty_bitmap(struct xc_sr_context *ctx)
582 {
583     xc_interface *xch = ctx->xch;
584     struct xc_sr_record rec;
585     uint64_t *pfns = NULL;
586     uint64_t pfn;
587     unsigned int count, i;
588     int rc;
589     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
590                                     &ctx->save.dirty_bitmap_hbuf);
591 
592     rc = read_record(ctx, ctx->save.recv_fd, &rec);
593     if ( rc )
594         goto err;
595 
596     if ( rec.type != REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST )
597     {
598         PERROR("Expect dirty bitmap record, but received %u", rec.type);
599         rc = -1;
600         goto err;
601     }
602 
603     if ( rec.length % sizeof(*pfns) )
604     {
605         PERROR("Invalid dirty pfn list record length %u", rec.length);
606         rc = -1;
607         goto err;
608     }
609 
610     count = rec.length / sizeof(*pfns);
611     pfns = rec.data;
612 
613     for ( i = 0; i < count; i++ )
614     {
615         pfn = pfns[i];
616         if ( pfn >= ctx->save.p2m_size )
617         {
618             PERROR("Invalid pfn 0x%" PRIx64, pfn);
619             rc = -1;
620             goto err;
621         }
622 
623         set_bit(pfn, dirty_bitmap);
624     }
625 
626     rc = 0;
627 
628  err:
629     free(rec.data);
630     return rc;
631 }
632 
633 /*
634  * Suspend the domain and send dirty memory.
635  * This is the last iteration of the live migration and the
636  * heart of the checkpointed stream.
637  */
suspend_and_send_dirty(struct xc_sr_context * ctx)638 static int suspend_and_send_dirty(struct xc_sr_context *ctx)
639 {
640     xc_interface *xch = ctx->xch;
641     xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
642     char *progress_str = NULL;
643     int rc;
644     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
645                                     &ctx->save.dirty_bitmap_hbuf);
646 
647     rc = suspend_domain(ctx);
648     if ( rc )
649         goto out;
650 
651     if ( xc_logdirty_control(
652              xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
653              HYPERCALL_BUFFER(dirty_bitmap), ctx->save.p2m_size,
654              XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL, &stats) !=
655          ctx->save.p2m_size )
656     {
657         PERROR("Failed to retrieve logdirty bitmap");
658         rc = -1;
659         goto out;
660     }
661 
662     if ( ctx->save.live )
663     {
664         rc = update_progress_string(ctx, &progress_str);
665         if ( rc )
666             goto out;
667     }
668     else
669         xc_set_progress_prefix(xch, "Checkpointed save");
670 
671     bitmap_or(dirty_bitmap, ctx->save.deferred_pages, ctx->save.p2m_size);
672 
673     if ( !ctx->save.live && ctx->stream_type == XC_STREAM_COLO )
674     {
675         rc = colo_merge_secondary_dirty_bitmap(ctx);
676         if ( rc )
677         {
678             PERROR("Failed to get secondary vm's dirty pages");
679             goto out;
680         }
681     }
682 
683     rc = send_dirty_pages(ctx, stats.dirty_count + ctx->save.nr_deferred_pages);
684     if ( rc )
685         goto out;
686 
687     bitmap_clear(ctx->save.deferred_pages, ctx->save.p2m_size);
688     ctx->save.nr_deferred_pages = 0;
689 
690  out:
691     xc_set_progress_prefix(xch, NULL);
692     free(progress_str);
693     return rc;
694 }
695 
verify_frames(struct xc_sr_context * ctx)696 static int verify_frames(struct xc_sr_context *ctx)
697 {
698     xc_interface *xch = ctx->xch;
699     xc_shadow_op_stats_t stats = { 0, ctx->save.p2m_size };
700     int rc;
701     struct xc_sr_record rec = { .type = REC_TYPE_VERIFY };
702 
703     DPRINTF("Enabling verify mode");
704 
705     rc = write_record(ctx, &rec);
706     if ( rc )
707         goto out;
708 
709     xc_set_progress_prefix(xch, "Frames verify");
710     rc = send_all_pages(ctx);
711     if ( rc )
712         goto out;
713 
714     if ( xc_logdirty_control(
715              xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_PEEK,
716              &ctx->save.dirty_bitmap_hbuf, ctx->save.p2m_size,
717              0, &stats) != ctx->save.p2m_size )
718     {
719         PERROR("Failed to retrieve logdirty bitmap");
720         rc = -1;
721         goto out;
722     }
723 
724     DPRINTF("  Further stats: faults %u, dirty %u",
725             stats.fault_count, stats.dirty_count);
726 
727  out:
728     return rc;
729 }
730 
731 /*
732  * Send all domain memory.  This is the heart of the live migration loop.
733  */
send_domain_memory_live(struct xc_sr_context * ctx)734 static int send_domain_memory_live(struct xc_sr_context *ctx)
735 {
736     int rc;
737 
738     rc = enable_logdirty(ctx);
739     if ( rc )
740         goto out;
741 
742     rc = send_memory_live(ctx);
743     if ( rc )
744         goto out;
745 
746     rc = suspend_and_send_dirty(ctx);
747     if ( rc )
748         goto out;
749 
750     if ( ctx->save.debug && ctx->stream_type == XC_STREAM_PLAIN )
751     {
752         rc = verify_frames(ctx);
753         if ( rc )
754             goto out;
755     }
756 
757  out:
758     return rc;
759 }
760 
761 /*
762  * Checkpointed save.
763  */
send_domain_memory_checkpointed(struct xc_sr_context * ctx)764 static int send_domain_memory_checkpointed(struct xc_sr_context *ctx)
765 {
766     return suspend_and_send_dirty(ctx);
767 }
768 
769 /*
770  * Send all domain memory, pausing the domain first.  Generally used for
771  * suspend-to-file.
772  */
send_domain_memory_nonlive(struct xc_sr_context * ctx)773 static int send_domain_memory_nonlive(struct xc_sr_context *ctx)
774 {
775     xc_interface *xch = ctx->xch;
776     int rc;
777 
778     rc = suspend_domain(ctx);
779     if ( rc )
780         goto err;
781 
782     xc_set_progress_prefix(xch, "Frames");
783 
784     rc = send_all_pages(ctx);
785     if ( rc )
786         goto err;
787 
788  err:
789     return rc;
790 }
791 
setup(struct xc_sr_context * ctx)792 static int setup(struct xc_sr_context *ctx)
793 {
794     xc_interface *xch = ctx->xch;
795     int rc;
796     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
797                                     &ctx->save.dirty_bitmap_hbuf);
798 
799     rc = ctx->save.ops.setup(ctx);
800     if ( rc )
801         goto err;
802 
803     dirty_bitmap = xc_hypercall_buffer_alloc_pages(
804         xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->save.p2m_size)));
805     ctx->save.batch_pfns = malloc(MAX_BATCH_SIZE *
806                                   sizeof(*ctx->save.batch_pfns));
807     ctx->save.deferred_pages = bitmap_alloc(ctx->save.p2m_size);
808 
809     if ( !ctx->save.batch_pfns || !dirty_bitmap || !ctx->save.deferred_pages )
810     {
811         ERROR("Unable to allocate memory for dirty bitmaps, batch pfns and"
812               " deferred pages");
813         rc = -1;
814         errno = ENOMEM;
815         goto err;
816     }
817 
818     rc = 0;
819 
820  err:
821     return rc;
822 }
823 
cleanup(struct xc_sr_context * ctx)824 static void cleanup(struct xc_sr_context *ctx)
825 {
826     xc_interface *xch = ctx->xch;
827     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
828                                     &ctx->save.dirty_bitmap_hbuf);
829 
830 
831     xc_shadow_control(xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_OFF,
832                       NULL, 0);
833 
834     if ( ctx->save.ops.cleanup(ctx) )
835         PERROR("Failed to clean up");
836 
837     xc_hypercall_buffer_free_pages(xch, dirty_bitmap,
838                                    NRPAGES(bitmap_size(ctx->save.p2m_size)));
839     free(ctx->save.deferred_pages);
840     free(ctx->save.batch_pfns);
841 }
842 
843 /*
844  * Save a domain.
845  */
save(struct xc_sr_context * ctx,uint16_t guest_type)846 static int save(struct xc_sr_context *ctx, uint16_t guest_type)
847 {
848     xc_interface *xch = ctx->xch;
849     int rc, saved_rc = 0, saved_errno = 0;
850 
851     IPRINTF("Saving domain %d, type %s",
852             ctx->domid, dhdr_type_to_str(guest_type));
853 
854     rc = setup(ctx);
855     if ( rc )
856         goto err;
857 
858     xc_report_progress_single(xch, "Start of stream");
859 
860     rc = write_headers(ctx, guest_type);
861     if ( rc )
862         goto err;
863 
864     rc = ctx->save.ops.static_data(ctx);
865     if ( rc )
866         goto err;
867 
868     rc = write_static_data_end_record(ctx);
869     if ( rc )
870         goto err;
871 
872     rc = ctx->save.ops.start_of_stream(ctx);
873     if ( rc )
874         goto err;
875 
876     do {
877         rc = ctx->save.ops.start_of_checkpoint(ctx);
878         if ( rc )
879             goto err;
880 
881         rc = ctx->save.ops.check_vm_state(ctx);
882         if ( rc )
883             goto err;
884 
885         if ( ctx->save.live )
886             rc = send_domain_memory_live(ctx);
887         else if ( ctx->stream_type != XC_STREAM_PLAIN )
888             rc = send_domain_memory_checkpointed(ctx);
889         else
890             rc = send_domain_memory_nonlive(ctx);
891 
892         if ( rc )
893             goto err;
894 
895         if ( !dominfo_shutdown_with(&ctx->dominfo, SHUTDOWN_suspend) )
896         {
897             ERROR("Domain has not been suspended");
898             rc = -1;
899             goto err;
900         }
901 
902         rc = ctx->save.ops.end_of_checkpoint(ctx);
903         if ( rc )
904             goto err;
905 
906         if ( ctx->stream_type != XC_STREAM_PLAIN )
907         {
908             /*
909              * We have now completed the initial live portion of the checkpoint
910              * process. Therefore switch into periodically sending synchronous
911              * batches of pages.
912              */
913             ctx->save.live = false;
914 
915             rc = write_checkpoint_record(ctx);
916             if ( rc )
917                 goto err;
918 
919             if ( ctx->stream_type == XC_STREAM_COLO )
920             {
921                 rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
922                 if ( !rc )
923                 {
924                     rc = -1;
925                     goto err;
926                 }
927             }
928 
929             rc = ctx->save.callbacks->postcopy(ctx->save.callbacks->data);
930             if ( rc <= 0 )
931                 goto err;
932 
933             if ( ctx->stream_type == XC_STREAM_COLO )
934             {
935                 rc = ctx->save.callbacks->wait_checkpoint(
936                     ctx->save.callbacks->data);
937                 if ( rc <= 0 )
938                     goto err;
939             }
940             else if ( ctx->stream_type == XC_STREAM_REMUS )
941             {
942                 rc = ctx->save.callbacks->checkpoint(ctx->save.callbacks->data);
943                 if ( rc <= 0 )
944                     goto err;
945             }
946             else
947             {
948                 ERROR("Unknown checkpointed stream");
949                 rc = -1;
950                 goto err;
951             }
952         }
953     } while ( ctx->stream_type != XC_STREAM_PLAIN );
954 
955     xc_report_progress_single(xch, "End of stream");
956 
957     rc = write_end_record(ctx);
958     if ( rc )
959         goto err;
960 
961     xc_report_progress_single(xch, "Complete");
962     goto done;
963 
964  err:
965     saved_errno = errno;
966     saved_rc = rc;
967     PERROR("Save failed");
968 
969  done:
970     cleanup(ctx);
971 
972     if ( saved_rc )
973     {
974         rc = saved_rc;
975         errno = saved_errno;
976     }
977 
978     return rc;
979 };
980 
xc_domain_save(xc_interface * xch,int io_fd,uint32_t dom,uint32_t flags,struct save_callbacks * callbacks,xc_stream_type_t stream_type,int recv_fd)981 int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom,
982                    uint32_t flags, struct save_callbacks *callbacks,
983                    xc_stream_type_t stream_type, int recv_fd)
984 {
985     struct xc_sr_context ctx = {
986         .xch = xch,
987         .fd = io_fd,
988         .stream_type = stream_type,
989     };
990     bool hvm;
991 
992     /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
993     ctx.save.callbacks = callbacks;
994     ctx.save.live  = !!(flags & XCFLAGS_LIVE);
995     ctx.save.debug = !!(flags & XCFLAGS_DEBUG);
996     ctx.save.recv_fd = recv_fd;
997 
998     if ( xc_domain_getinfo_single(xch, dom, &ctx.dominfo) < 0 )
999     {
1000         PERROR("Failed to get domain info");
1001         return -1;
1002     }
1003 
1004     hvm = ctx.dominfo.flags & XEN_DOMINF_hvm_guest;
1005 
1006     /* Sanity check stream_type-related parameters */
1007     switch ( stream_type )
1008     {
1009     case XC_STREAM_COLO:
1010         assert(callbacks->wait_checkpoint);
1011         /* Fallthrough */
1012     case XC_STREAM_REMUS:
1013         assert(callbacks->checkpoint && callbacks->postcopy);
1014         /* Fallthrough */
1015     case XC_STREAM_PLAIN:
1016         if ( hvm )
1017             assert(callbacks->switch_qemu_logdirty);
1018         break;
1019 
1020     default:
1021         assert(!"Bad stream_type");
1022         break;
1023     }
1024 
1025     DPRINTF("fd %d, dom %u, flags %u, hvm %d",
1026             io_fd, dom, flags, hvm);
1027 
1028     ctx.domid = dom;
1029 
1030     if ( hvm )
1031     {
1032         ctx.save.ops = save_ops_x86_hvm;
1033         return save(&ctx, DHDR_TYPE_X86_HVM);
1034     }
1035     else
1036     {
1037         ctx.save.ops = save_ops_x86_pv;
1038         return save(&ctx, DHDR_TYPE_X86_PV);
1039     }
1040 }
1041 
1042 /*
1043  * Local variables:
1044  * mode: C
1045  * c-file-style: "BSD"
1046  * c-basic-offset: 4
1047  * tab-width: 4
1048  * indent-tabs-mode: nil
1049  * End:
1050  */
1051