1 #include <arpa/inet.h>
2 
3 #include <assert.h>
4 
5 #include "xg_sr_common.h"
6 
7 /*
8  * Read and validate the Image and Domain headers.
9  */
read_headers(struct xc_sr_context * ctx)10 static int read_headers(struct xc_sr_context *ctx)
11 {
12     xc_interface *xch = ctx->xch;
13     struct xc_sr_ihdr ihdr;
14     struct xc_sr_dhdr dhdr;
15 
16     if ( read_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
17     {
18         PERROR("Failed to read Image Header from stream");
19         return -1;
20     }
21 
22     ihdr.id      = ntohl(ihdr.id);
23     ihdr.version = ntohl(ihdr.version);
24     ihdr.options = ntohs(ihdr.options);
25 
26     if ( ihdr.marker != IHDR_MARKER )
27     {
28         ERROR("Invalid marker: Got 0x%016"PRIx64, ihdr.marker);
29         return -1;
30     }
31 
32     if ( ihdr.id != IHDR_ID )
33     {
34         ERROR("Invalid ID: Expected 0x%08x, Got 0x%08x", IHDR_ID, ihdr.id);
35         return -1;
36     }
37 
38     if ( ihdr.version < 2 || ihdr.version > 3 )
39     {
40         ERROR("Invalid Version: Expected 2 <= ver <= 3, Got %d",
41               ihdr.version);
42         return -1;
43     }
44 
45     if ( ihdr.options & IHDR_OPT_BIG_ENDIAN )
46     {
47         ERROR("Unable to handle big endian streams");
48         return -1;
49     }
50 
51     ctx->restore.format_version = ihdr.version;
52 
53     if ( read_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
54     {
55         PERROR("Failed to read Domain Header from stream");
56         return -1;
57     }
58 
59     ctx->restore.guest_type = dhdr.type;
60     ctx->restore.guest_page_size = (1U << dhdr.page_shift);
61 
62     if ( dhdr.xen_major == 0 )
63     {
64         IPRINTF("Found %s domain, converted from legacy stream format",
65                 dhdr_type_to_str(dhdr.type));
66         DPRINTF("  Legacy conversion script version %u", dhdr.xen_minor);
67     }
68     else
69         IPRINTF("Found %s domain from Xen %u.%u",
70                 dhdr_type_to_str(dhdr.type), dhdr.xen_major, dhdr.xen_minor);
71     return 0;
72 }
73 
74 /*
75  * Is a pfn populated?
76  */
pfn_is_populated(const struct xc_sr_context * ctx,xen_pfn_t pfn)77 static bool pfn_is_populated(const struct xc_sr_context *ctx, xen_pfn_t pfn)
78 {
79     if ( pfn > ctx->restore.max_populated_pfn )
80         return false;
81     return test_bit(pfn, ctx->restore.populated_pfns);
82 }
83 
84 /*
85  * Set a pfn as populated, expanding the tracking structures if needed. To
86  * avoid realloc()ing too excessively, the size increased to the nearest power
87  * of two large enough to contain the required pfn.
88  */
pfn_set_populated(struct xc_sr_context * ctx,xen_pfn_t pfn)89 static int pfn_set_populated(struct xc_sr_context *ctx, xen_pfn_t pfn)
90 {
91     xc_interface *xch = ctx->xch;
92 
93     if ( pfn > ctx->restore.max_populated_pfn )
94     {
95         xen_pfn_t new_max;
96         size_t old_sz, new_sz;
97         unsigned long *p;
98 
99         /* Round up to the nearest power of two larger than pfn, less 1. */
100         new_max = pfn;
101         new_max |= new_max >> 1;
102         new_max |= new_max >> 2;
103         new_max |= new_max >> 4;
104         new_max |= new_max >> 8;
105         new_max |= new_max >> 16;
106 #ifdef __x86_64__
107         new_max |= new_max >> 32;
108 #endif
109 
110         old_sz = bitmap_size(ctx->restore.max_populated_pfn + 1);
111         new_sz = bitmap_size(new_max + 1);
112         p = realloc(ctx->restore.populated_pfns, new_sz);
113         if ( !p )
114         {
115             ERROR("Failed to realloc populated bitmap");
116             errno = ENOMEM;
117             return -1;
118         }
119 
120         memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz);
121 
122         ctx->restore.populated_pfns    = p;
123         ctx->restore.max_populated_pfn = new_max;
124     }
125 
126     assert(!test_bit(pfn, ctx->restore.populated_pfns));
127     set_bit(pfn, ctx->restore.populated_pfns);
128 
129     return 0;
130 }
131 
132 /*
133  * Given a set of pfns, obtain memory from Xen to fill the physmap for the
134  * unpopulated subset.  If types is NULL, no page type checking is performed
135  * and all unpopulated pfns are populated.
136  */
populate_pfns(struct xc_sr_context * ctx,unsigned int count,const xen_pfn_t * original_pfns,const uint32_t * types)137 int populate_pfns(struct xc_sr_context *ctx, unsigned int count,
138                   const xen_pfn_t *original_pfns, const uint32_t *types)
139 {
140     xc_interface *xch = ctx->xch;
141     xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
142         *pfns = malloc(count * sizeof(*pfns));
143     unsigned int i, nr_pfns = 0;
144     int rc = -1;
145 
146     if ( !mfns || !pfns )
147     {
148         ERROR("Failed to allocate %zu bytes for populating the physmap",
149               2 * count * sizeof(*mfns));
150         goto err;
151     }
152 
153     for ( i = 0; i < count; ++i )
154     {
155         if ( (!types || page_type_to_populate(types[i])) &&
156              !pfn_is_populated(ctx, original_pfns[i]) )
157         {
158             rc = pfn_set_populated(ctx, original_pfns[i]);
159             if ( rc )
160                 goto err;
161             pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
162             ++nr_pfns;
163         }
164     }
165 
166     if ( nr_pfns )
167     {
168         rc = xc_domain_populate_physmap_exact(
169             xch, ctx->domid, nr_pfns, 0, 0, mfns);
170         if ( rc )
171         {
172             PERROR("Failed to populate physmap");
173             goto err;
174         }
175 
176         for ( i = 0; i < nr_pfns; ++i )
177         {
178             if ( mfns[i] == INVALID_MFN )
179             {
180                 ERROR("Populate physmap failed for pfn %u", i);
181                 rc = -1;
182                 goto err;
183             }
184 
185             ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]);
186         }
187     }
188 
189     rc = 0;
190 
191  err:
192     free(pfns);
193     free(mfns);
194 
195     return rc;
196 }
197 
198 /*
199  * Given a list of pfns, their types, and a block of page data from the
200  * stream, populate and record their types, map the relevant subset and copy
201  * the data into the guest.
202  */
process_page_data(struct xc_sr_context * ctx,unsigned int count,xen_pfn_t * pfns,uint32_t * types,void * page_data)203 static int process_page_data(struct xc_sr_context *ctx, unsigned int count,
204                              xen_pfn_t *pfns, uint32_t *types, void *page_data)
205 {
206     xc_interface *xch = ctx->xch;
207     xen_pfn_t *mfns = malloc(count * sizeof(*mfns));
208     int *map_errs = malloc(count * sizeof(*map_errs));
209     int rc;
210     void *mapping = NULL, *guest_page = NULL;
211     unsigned int i, /* i indexes the pfns from the record. */
212         j,          /* j indexes the subset of pfns we decide to map. */
213         nr_pages = 0;
214 
215     if ( !mfns || !map_errs )
216     {
217         rc = -1;
218         ERROR("Failed to allocate %zu bytes to process page data",
219               count * (sizeof(*mfns) + sizeof(*map_errs)));
220         goto err;
221     }
222 
223     rc = populate_pfns(ctx, count, pfns, types);
224     if ( rc )
225     {
226         ERROR("Failed to populate pfns for batch of %u pages", count);
227         goto err;
228     }
229 
230     for ( i = 0; i < count; ++i )
231     {
232         ctx->restore.ops.set_page_type(ctx, pfns[i], types[i]);
233 
234         if ( page_type_has_stream_data(types[i]) )
235             mfns[nr_pages++] = ctx->restore.ops.pfn_to_gfn(ctx, pfns[i]);
236     }
237 
238     /* Nothing to do? */
239     if ( nr_pages == 0 )
240         goto done;
241 
242     mapping = guest_page = xenforeignmemory_map(
243         xch->fmem, ctx->domid, PROT_READ | PROT_WRITE,
244         nr_pages, mfns, map_errs);
245     if ( !mapping )
246     {
247         rc = -1;
248         PERROR("Unable to map %u mfns for %u pages of data",
249                nr_pages, count);
250         goto err;
251     }
252 
253     for ( i = 0, j = 0; i < count; ++i )
254     {
255         if ( !page_type_has_stream_data(types[i]) )
256             continue;
257 
258         if ( map_errs[j] )
259         {
260             rc = -1;
261             ERROR("Mapping pfn %#"PRIpfn" (mfn %#"PRIpfn", type %#"PRIx32") failed with %d",
262                   pfns[i], mfns[j], types[i], map_errs[j]);
263             goto err;
264         }
265 
266         /* Undo page normalisation done by the saver. */
267         rc = ctx->restore.ops.localise_page(ctx, types[i], page_data);
268         if ( rc )
269         {
270             ERROR("Failed to localise pfn %#"PRIpfn" (type %#"PRIx32")",
271                   pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
272             goto err;
273         }
274 
275         if ( ctx->restore.verify )
276         {
277             /* Verify mode - compare incoming data to what we already have. */
278             if ( memcmp(guest_page, page_data, PAGE_SIZE) )
279                 ERROR("verify pfn %#"PRIpfn" failed (type %#"PRIx32")",
280                       pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
281         }
282         else
283         {
284             /* Regular mode - copy incoming data into place. */
285             memcpy(guest_page, page_data, PAGE_SIZE);
286         }
287 
288         ++j;
289         guest_page += PAGE_SIZE;
290         page_data += PAGE_SIZE;
291     }
292 
293  done:
294     rc = 0;
295 
296  err:
297     if ( mapping )
298         xenforeignmemory_unmap(xch->fmem, mapping, nr_pages);
299 
300     free(map_errs);
301     free(mfns);
302 
303     return rc;
304 }
305 
306 /*
307  * Validate a PAGE_DATA record from the stream, and pass the results to
308  * process_page_data() to actually perform the legwork.
309  */
handle_page_data(struct xc_sr_context * ctx,struct xc_sr_record * rec)310 static int handle_page_data(struct xc_sr_context *ctx, struct xc_sr_record *rec)
311 {
312     xc_interface *xch = ctx->xch;
313     struct xc_sr_rec_page_data_header *pages = rec->data;
314     unsigned int i, pages_of_data = 0;
315     int rc = -1;
316 
317     xen_pfn_t *pfns = NULL, pfn;
318     uint32_t *types = NULL, type;
319 
320     /*
321      * v2 compatibility only exists for x86 streams.  This is a bit of a
322      * bodge, but it is less bad than duplicating handle_page_data() between
323      * different architectures.
324      */
325 #if defined(__i386__) || defined(__x86_64__)
326     /* v2 compat.  Infer the position of STATIC_DATA_END. */
327     if ( ctx->restore.format_version < 3 && !ctx->restore.seen_static_data_end )
328     {
329         rc = handle_static_data_end(ctx);
330         if ( rc )
331         {
332             ERROR("Inferred STATIC_DATA_END record failed");
333             goto err;
334         }
335         rc = -1;
336     }
337 
338     if ( !ctx->restore.seen_static_data_end )
339     {
340         ERROR("No STATIC_DATA_END seen");
341         goto err;
342     }
343 #endif
344 
345     if ( rec->length < sizeof(*pages) )
346     {
347         ERROR("PAGE_DATA record truncated: length %u, min %zu",
348               rec->length, sizeof(*pages));
349         goto err;
350     }
351 
352     if ( pages->count < 1 )
353     {
354         ERROR("Expected at least 1 pfn in PAGE_DATA record");
355         goto err;
356     }
357 
358     if ( rec->length < sizeof(*pages) + (pages->count * sizeof(uint64_t)) )
359     {
360         ERROR("PAGE_DATA record (length %u) too short to contain %u"
361               " pfns worth of information", rec->length, pages->count);
362         goto err;
363     }
364 
365     pfns = malloc(pages->count * sizeof(*pfns));
366     types = malloc(pages->count * sizeof(*types));
367     if ( !pfns || !types )
368     {
369         ERROR("Unable to allocate enough memory for %u pfns",
370               pages->count);
371         goto err;
372     }
373 
374     for ( i = 0; i < pages->count; ++i )
375     {
376         pfn = pages->pfn[i] & PAGE_DATA_PFN_MASK;
377         if ( !ctx->restore.ops.pfn_is_valid(ctx, pfn) )
378         {
379             ERROR("pfn %#"PRIpfn" (index %u) outside domain maximum", pfn, i);
380             goto err;
381         }
382 
383         type = (pages->pfn[i] & PAGE_DATA_TYPE_MASK) >> 32;
384         if ( !is_known_page_type(type) )
385         {
386             ERROR("Unknown type %#"PRIx32" for pfn %#"PRIpfn" (index %u)",
387                   type, pfn, i);
388             goto err;
389         }
390 
391         if ( page_type_has_stream_data(type) )
392             /* NOTAB and all L1 through L4 tables (including pinned) should
393              * have a page worth of data in the record. */
394             pages_of_data++;
395 
396         pfns[i] = pfn;
397         types[i] = type;
398     }
399 
400     if ( rec->length != (sizeof(*pages) +
401                          (sizeof(uint64_t) * pages->count) +
402                          (PAGE_SIZE * pages_of_data)) )
403     {
404         ERROR("PAGE_DATA record wrong size: length %u, expected "
405               "%zu + %zu + %lu", rec->length, sizeof(*pages),
406               (sizeof(uint64_t) * pages->count), (PAGE_SIZE * pages_of_data));
407         goto err;
408     }
409 
410     rc = process_page_data(ctx, pages->count, pfns, types,
411                            &pages->pfn[pages->count]);
412  err:
413     free(types);
414     free(pfns);
415 
416     return rc;
417 }
418 
419 /*
420  * Send checkpoint dirty pfn list to primary.
421  */
send_checkpoint_dirty_pfn_list(struct xc_sr_context * ctx)422 static int send_checkpoint_dirty_pfn_list(struct xc_sr_context *ctx)
423 {
424     xc_interface *xch = ctx->xch;
425     int rc = -1;
426     unsigned int count, written;
427     uint64_t i, *pfns = NULL;
428     xc_shadow_op_stats_t stats = { 0, ctx->restore.p2m_size };
429     struct xc_sr_record rec = {
430         .type = REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST,
431     };
432     struct iovec iov[2] = {
433         { &rec, sizeof(rec) },
434     };
435     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
436                                     &ctx->restore.dirty_bitmap_hbuf);
437 
438     if ( xc_logdirty_control(
439              xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
440              HYPERCALL_BUFFER(dirty_bitmap), ctx->restore.p2m_size,
441              0, &stats) != ctx->restore.p2m_size )
442     {
443         PERROR("Failed to retrieve logdirty bitmap");
444         goto err;
445     }
446 
447     for ( i = 0, count = 0; i < ctx->restore.p2m_size; i++ )
448     {
449         if ( test_bit(i, dirty_bitmap) )
450             count++;
451     }
452 
453 
454     pfns = malloc(count * sizeof(*pfns));
455     if ( !pfns )
456     {
457         ERROR("Unable to allocate %zu bytes of memory for dirty pfn list",
458               count * sizeof(*pfns));
459         goto err;
460     }
461 
462     for ( i = 0, written = 0; i < ctx->restore.p2m_size; ++i )
463     {
464         if ( !test_bit(i, dirty_bitmap) )
465             continue;
466 
467         if ( written > count )
468         {
469             ERROR("Dirty pfn list exceed");
470             goto err;
471         }
472 
473         pfns[written++] = i;
474     }
475 
476     rec.length = count * sizeof(*pfns);
477 
478     iov[1].iov_base = pfns;
479     iov[1].iov_len = rec.length;
480 
481     if ( writev_exact(ctx->restore.send_back_fd, iov, ARRAY_SIZE(iov)) )
482     {
483         PERROR("Failed to write dirty bitmap to stream");
484         goto err;
485     }
486 
487     rc = 0;
488  err:
489     free(pfns);
490     return rc;
491 }
492 
493 static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec);
handle_checkpoint(struct xc_sr_context * ctx)494 static int handle_checkpoint(struct xc_sr_context *ctx)
495 {
496     xc_interface *xch = ctx->xch;
497     int rc = 0, ret;
498     unsigned int i;
499 
500     if ( ctx->stream_type == XC_STREAM_PLAIN )
501     {
502         ERROR("Found checkpoint in non-checkpointed stream");
503         rc = -1;
504         goto err;
505     }
506 
507     ret = ctx->restore.callbacks->checkpoint(ctx->restore.callbacks->data);
508     switch ( ret )
509     {
510     case XGR_CHECKPOINT_SUCCESS:
511         break;
512 
513     case XGR_CHECKPOINT_FAILOVER:
514         if ( ctx->restore.buffer_all_records )
515             rc = BROKEN_CHANNEL;
516         else
517             /* We don't have a consistent state */
518             rc = -1;
519         goto err;
520 
521     default: /* Other fatal error */
522         rc = -1;
523         goto err;
524     }
525 
526     if ( ctx->restore.buffer_all_records )
527     {
528         IPRINTF("All records buffered");
529 
530         for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
531         {
532             rc = process_record(ctx, &ctx->restore.buffered_records[i]);
533             if ( rc )
534                 goto err;
535         }
536         ctx->restore.buffered_rec_num = 0;
537         IPRINTF("All records processed");
538     }
539     else
540         ctx->restore.buffer_all_records = true;
541 
542     if ( ctx->stream_type == XC_STREAM_COLO )
543     {
544 #define HANDLE_CALLBACK_RETURN_VALUE(ret)                   \
545     do {                                                    \
546         if ( ret == 1 )                                     \
547             rc = 0; /* Success */                           \
548         else                                                \
549         {                                                   \
550             if ( ret == 2 )                                 \
551                 rc = BROKEN_CHANNEL;                        \
552             else                                            \
553                 rc = -1; /* Some unspecified error */       \
554             goto err;                                       \
555         }                                                   \
556     } while (0)
557 
558         /* COLO */
559 
560         /* We need to resume guest */
561         rc = ctx->restore.ops.stream_complete(ctx);
562         if ( rc )
563             goto err;
564 
565         ctx->restore.callbacks->restore_results(ctx->restore.xenstore_gfn,
566                                                 ctx->restore.console_gfn,
567                                                 ctx->restore.callbacks->data);
568 
569         /* Resume secondary vm */
570         ret = ctx->restore.callbacks->postcopy(ctx->restore.callbacks->data);
571         HANDLE_CALLBACK_RETURN_VALUE(ret);
572 
573         /* Wait for a new checkpoint */
574         ret = ctx->restore.callbacks->wait_checkpoint(
575             ctx->restore.callbacks->data);
576         HANDLE_CALLBACK_RETURN_VALUE(ret);
577 
578         /* suspend secondary vm */
579         ret = ctx->restore.callbacks->suspend(ctx->restore.callbacks->data);
580         HANDLE_CALLBACK_RETURN_VALUE(ret);
581 
582 #undef HANDLE_CALLBACK_RETURN_VALUE
583 
584         rc = send_checkpoint_dirty_pfn_list(ctx);
585         if ( rc )
586             goto err;
587     }
588 
589  err:
590     return rc;
591 }
592 
buffer_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)593 static int buffer_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
594 {
595     xc_interface *xch = ctx->xch;
596     unsigned int new_alloc_num;
597     struct xc_sr_record *p;
598 
599     if ( ctx->restore.buffered_rec_num >= ctx->restore.allocated_rec_num )
600     {
601         new_alloc_num = ctx->restore.allocated_rec_num + DEFAULT_BUF_RECORDS;
602         p = realloc(ctx->restore.buffered_records,
603                     new_alloc_num * sizeof(struct xc_sr_record));
604         if ( !p )
605         {
606             ERROR("Failed to realloc memory for buffered records");
607             return -1;
608         }
609 
610         ctx->restore.buffered_records = p;
611         ctx->restore.allocated_rec_num = new_alloc_num;
612     }
613 
614     memcpy(&ctx->restore.buffered_records[ctx->restore.buffered_rec_num++],
615            rec, sizeof(*rec));
616 
617     return 0;
618 }
619 
handle_static_data_end(struct xc_sr_context * ctx)620 int handle_static_data_end(struct xc_sr_context *ctx)
621 {
622     xc_interface *xch = ctx->xch;
623     unsigned int missing = 0;
624     int rc = 0;
625 
626     if ( ctx->restore.seen_static_data_end )
627     {
628         ERROR("Multiple STATIC_DATA_END records found");
629         return -1;
630     }
631 
632     ctx->restore.seen_static_data_end = true;
633 
634     rc = ctx->restore.ops.static_data_complete(ctx, &missing);
635     if ( rc )
636         return rc;
637 
638     if ( ctx->restore.callbacks->static_data_done &&
639          (rc = ctx->restore.callbacks->static_data_done(
640              missing, ctx->restore.callbacks->data) != 0) )
641         ERROR("static_data_done() callback failed: %d\n", rc);
642 
643     return rc;
644 }
645 
process_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)646 static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
647 {
648     xc_interface *xch = ctx->xch;
649     int rc = 0;
650 
651     switch ( rec->type )
652     {
653     case REC_TYPE_END:
654         break;
655 
656     case REC_TYPE_PAGE_DATA:
657         rc = handle_page_data(ctx, rec);
658         break;
659 
660     case REC_TYPE_VERIFY:
661         DPRINTF("Verify mode enabled");
662         ctx->restore.verify = true;
663         break;
664 
665     case REC_TYPE_CHECKPOINT:
666         rc = handle_checkpoint(ctx);
667         break;
668 
669     case REC_TYPE_STATIC_DATA_END:
670         rc = handle_static_data_end(ctx);
671         break;
672 
673     default:
674         rc = ctx->restore.ops.process_record(ctx, rec);
675         break;
676     }
677 
678     free(rec->data);
679     rec->data = NULL;
680 
681     return rc;
682 }
683 
setup(struct xc_sr_context * ctx)684 static int setup(struct xc_sr_context *ctx)
685 {
686     xc_interface *xch = ctx->xch;
687     int rc;
688     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
689                                     &ctx->restore.dirty_bitmap_hbuf);
690 
691     if ( ctx->stream_type == XC_STREAM_COLO )
692     {
693         dirty_bitmap = xc_hypercall_buffer_alloc_pages(
694             xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->restore.p2m_size)));
695 
696         if ( !dirty_bitmap )
697         {
698             ERROR("Unable to allocate memory for dirty bitmap");
699             rc = -1;
700             goto err;
701         }
702     }
703 
704     rc = ctx->restore.ops.setup(ctx);
705     if ( rc )
706         goto err;
707 
708     ctx->restore.max_populated_pfn = (32 * 1024 / 4) - 1;
709     ctx->restore.populated_pfns = bitmap_alloc(
710         ctx->restore.max_populated_pfn + 1);
711     if ( !ctx->restore.populated_pfns )
712     {
713         ERROR("Unable to allocate memory for populated_pfns bitmap");
714         rc = -1;
715         goto err;
716     }
717 
718     ctx->restore.buffered_records = malloc(
719         DEFAULT_BUF_RECORDS * sizeof(struct xc_sr_record));
720     if ( !ctx->restore.buffered_records )
721     {
722         ERROR("Unable to allocate memory for buffered records");
723         rc = -1;
724         goto err;
725     }
726     ctx->restore.allocated_rec_num = DEFAULT_BUF_RECORDS;
727 
728  err:
729     return rc;
730 }
731 
cleanup(struct xc_sr_context * ctx)732 static void cleanup(struct xc_sr_context *ctx)
733 {
734     xc_interface *xch = ctx->xch;
735     unsigned int i;
736     DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
737                                     &ctx->restore.dirty_bitmap_hbuf);
738 
739     for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
740         free(ctx->restore.buffered_records[i].data);
741 
742     if ( ctx->stream_type == XC_STREAM_COLO )
743         xc_hypercall_buffer_free_pages(
744             xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->restore.p2m_size)));
745 
746     free(ctx->restore.buffered_records);
747     free(ctx->restore.populated_pfns);
748 
749     if ( ctx->restore.ops.cleanup(ctx) )
750         PERROR("Failed to clean up");
751 }
752 
753 /*
754  * Restore a domain.
755  */
restore(struct xc_sr_context * ctx)756 static int restore(struct xc_sr_context *ctx)
757 {
758     xc_interface *xch = ctx->xch;
759     struct xc_sr_record rec;
760     int rc, saved_rc = 0, saved_errno = 0;
761 
762     IPRINTF("Restoring domain");
763 
764     rc = setup(ctx);
765     if ( rc )
766         goto err;
767 
768     do
769     {
770         rc = read_record(ctx, ctx->fd, &rec);
771         if ( rc )
772         {
773             if ( ctx->restore.buffer_all_records )
774                 goto remus_failover;
775             else
776                 goto err;
777         }
778 
779         if ( ctx->restore.buffer_all_records &&
780              rec.type != REC_TYPE_END &&
781              rec.type != REC_TYPE_CHECKPOINT )
782         {
783             rc = buffer_record(ctx, &rec);
784             if ( rc )
785                 goto err;
786         }
787         else
788         {
789             rc = process_record(ctx, &rec);
790             if ( rc == RECORD_NOT_PROCESSED )
791             {
792                 if ( rec.type & REC_TYPE_OPTIONAL )
793                     DPRINTF("Ignoring optional record %#x (%s)",
794                             rec.type, rec_type_to_str(rec.type));
795                 else
796                 {
797                     ERROR("Mandatory record %#x (%s) not handled",
798                           rec.type, rec_type_to_str(rec.type));
799                     rc = -1;
800                     goto err;
801                 }
802             }
803             else if ( rc == BROKEN_CHANNEL )
804                 goto remus_failover;
805             else if ( rc )
806                 goto err;
807         }
808 
809     } while ( rec.type != REC_TYPE_END );
810 
811  remus_failover:
812     if ( ctx->stream_type == XC_STREAM_COLO )
813     {
814         /* With COLO, we have already called stream_complete */
815         rc = 0;
816         IPRINTF("COLO Failover");
817         goto done;
818     }
819 
820     /*
821      * With Remus, if we reach here, there must be some error on primary,
822      * failover from the last checkpoint state.
823      */
824     rc = ctx->restore.ops.stream_complete(ctx);
825     if ( rc )
826         goto err;
827 
828     IPRINTF("Restore successful");
829     goto done;
830 
831  err:
832     saved_errno = errno;
833     saved_rc = rc;
834     PERROR("Restore failed");
835 
836  done:
837     cleanup(ctx);
838 
839     if ( saved_rc )
840     {
841         rc = saved_rc;
842         errno = saved_errno;
843     }
844 
845     return rc;
846 }
847 
xc_domain_restore(xc_interface * xch,int io_fd,uint32_t dom,unsigned int store_evtchn,unsigned long * store_mfn,uint32_t store_domid,unsigned int console_evtchn,unsigned long * console_gfn,uint32_t console_domid,xc_stream_type_t stream_type,struct restore_callbacks * callbacks,int send_back_fd)848 int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
849                       unsigned int store_evtchn, unsigned long *store_mfn,
850                       uint32_t store_domid, unsigned int console_evtchn,
851                       unsigned long *console_gfn, uint32_t console_domid,
852                       xc_stream_type_t stream_type,
853                       struct restore_callbacks *callbacks, int send_back_fd)
854 {
855     bool hvm;
856     xen_pfn_t nr_pfns;
857     struct xc_sr_context ctx = {
858         .xch = xch,
859         .fd = io_fd,
860         .stream_type = stream_type,
861     };
862 
863     /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
864     ctx.restore.console_evtchn = console_evtchn;
865     ctx.restore.console_domid = console_domid;
866     ctx.restore.xenstore_evtchn = store_evtchn;
867     ctx.restore.xenstore_domid = store_domid;
868     ctx.restore.callbacks = callbacks;
869     ctx.restore.send_back_fd = send_back_fd;
870 
871     /* Sanity check stream_type-related parameters */
872     switch ( stream_type )
873     {
874     case XC_STREAM_COLO:
875         assert(callbacks->suspend &&
876                callbacks->postcopy &&
877                callbacks->wait_checkpoint &&
878                callbacks->restore_results);
879         /* Fallthrough */
880     case XC_STREAM_REMUS:
881         assert(callbacks->checkpoint);
882         /* Fallthrough */
883     case XC_STREAM_PLAIN:
884         break;
885 
886     default:
887         assert(!"Bad stream_type");
888         break;
889     }
890 
891     if ( xc_domain_getinfo_single(xch, dom, &ctx.dominfo) < 0 )
892     {
893         PERROR("Failed to get dominfo for dom%u", dom);
894         return -1;
895     }
896 
897     hvm = ctx.dominfo.flags & XEN_DOMINF_hvm_guest;
898     DPRINTF("fd %d, dom %u, hvm %u, stream_type %d",
899             io_fd, dom, hvm, stream_type);
900 
901     ctx.domid = dom;
902 
903     if ( read_headers(&ctx) )
904         return -1;
905 
906     if ( xc_domain_nr_gpfns(xch, dom, &nr_pfns) < 0 )
907     {
908         PERROR("Unable to obtain the guest p2m size");
909         return -1;
910     }
911 
912     ctx.restore.p2m_size = nr_pfns;
913     ctx.restore.ops = hvm ? restore_ops_x86_hvm : restore_ops_x86_pv;
914 
915     if ( restore(&ctx) )
916         return -1;
917 
918     IPRINTF("XenStore: mfn %#"PRIpfn", dom %d, evt %u",
919             ctx.restore.xenstore_gfn,
920             ctx.restore.xenstore_domid,
921             ctx.restore.xenstore_evtchn);
922 
923     IPRINTF("Console: mfn %#"PRIpfn", dom %d, evt %u",
924             ctx.restore.console_gfn,
925             ctx.restore.console_domid,
926             ctx.restore.console_evtchn);
927 
928     *console_gfn = ctx.restore.console_gfn;
929     *store_mfn = ctx.restore.xenstore_gfn;
930 
931     return 0;
932 }
933 
934 /*
935  * Local variables:
936  * mode: C
937  * c-file-style: "BSD"
938  * c-basic-offset: 4
939  * tab-width: 4
940  * indent-tabs-mode: nil
941  * End:
942  */
943