1 #include <arpa/inet.h>
2
3 #include <assert.h>
4
5 #include "xg_sr_common.h"
6
7 /*
8 * Read and validate the Image and Domain headers.
9 */
read_headers(struct xc_sr_context * ctx)10 static int read_headers(struct xc_sr_context *ctx)
11 {
12 xc_interface *xch = ctx->xch;
13 struct xc_sr_ihdr ihdr;
14 struct xc_sr_dhdr dhdr;
15
16 if ( read_exact(ctx->fd, &ihdr, sizeof(ihdr)) )
17 {
18 PERROR("Failed to read Image Header from stream");
19 return -1;
20 }
21
22 ihdr.id = ntohl(ihdr.id);
23 ihdr.version = ntohl(ihdr.version);
24 ihdr.options = ntohs(ihdr.options);
25
26 if ( ihdr.marker != IHDR_MARKER )
27 {
28 ERROR("Invalid marker: Got 0x%016"PRIx64, ihdr.marker);
29 return -1;
30 }
31
32 if ( ihdr.id != IHDR_ID )
33 {
34 ERROR("Invalid ID: Expected 0x%08x, Got 0x%08x", IHDR_ID, ihdr.id);
35 return -1;
36 }
37
38 if ( ihdr.version < 2 || ihdr.version > 3 )
39 {
40 ERROR("Invalid Version: Expected 2 <= ver <= 3, Got %d",
41 ihdr.version);
42 return -1;
43 }
44
45 if ( ihdr.options & IHDR_OPT_BIG_ENDIAN )
46 {
47 ERROR("Unable to handle big endian streams");
48 return -1;
49 }
50
51 ctx->restore.format_version = ihdr.version;
52
53 if ( read_exact(ctx->fd, &dhdr, sizeof(dhdr)) )
54 {
55 PERROR("Failed to read Domain Header from stream");
56 return -1;
57 }
58
59 ctx->restore.guest_type = dhdr.type;
60 ctx->restore.guest_page_size = (1U << dhdr.page_shift);
61
62 if ( dhdr.xen_major == 0 )
63 {
64 IPRINTF("Found %s domain, converted from legacy stream format",
65 dhdr_type_to_str(dhdr.type));
66 DPRINTF(" Legacy conversion script version %u", dhdr.xen_minor);
67 }
68 else
69 IPRINTF("Found %s domain from Xen %u.%u",
70 dhdr_type_to_str(dhdr.type), dhdr.xen_major, dhdr.xen_minor);
71 return 0;
72 }
73
74 /*
75 * Is a pfn populated?
76 */
pfn_is_populated(const struct xc_sr_context * ctx,xen_pfn_t pfn)77 static bool pfn_is_populated(const struct xc_sr_context *ctx, xen_pfn_t pfn)
78 {
79 if ( pfn > ctx->restore.max_populated_pfn )
80 return false;
81 return test_bit(pfn, ctx->restore.populated_pfns);
82 }
83
84 /*
85 * Set a pfn as populated, expanding the tracking structures if needed. To
86 * avoid realloc()ing too excessively, the size increased to the nearest power
87 * of two large enough to contain the required pfn.
88 */
pfn_set_populated(struct xc_sr_context * ctx,xen_pfn_t pfn)89 static int pfn_set_populated(struct xc_sr_context *ctx, xen_pfn_t pfn)
90 {
91 xc_interface *xch = ctx->xch;
92
93 if ( pfn > ctx->restore.max_populated_pfn )
94 {
95 xen_pfn_t new_max;
96 size_t old_sz, new_sz;
97 unsigned long *p;
98
99 /* Round up to the nearest power of two larger than pfn, less 1. */
100 new_max = pfn;
101 new_max |= new_max >> 1;
102 new_max |= new_max >> 2;
103 new_max |= new_max >> 4;
104 new_max |= new_max >> 8;
105 new_max |= new_max >> 16;
106 #ifdef __x86_64__
107 new_max |= new_max >> 32;
108 #endif
109
110 old_sz = bitmap_size(ctx->restore.max_populated_pfn + 1);
111 new_sz = bitmap_size(new_max + 1);
112 p = realloc(ctx->restore.populated_pfns, new_sz);
113 if ( !p )
114 {
115 ERROR("Failed to realloc populated bitmap");
116 errno = ENOMEM;
117 return -1;
118 }
119
120 memset((uint8_t *)p + old_sz, 0x00, new_sz - old_sz);
121
122 ctx->restore.populated_pfns = p;
123 ctx->restore.max_populated_pfn = new_max;
124 }
125
126 assert(!test_bit(pfn, ctx->restore.populated_pfns));
127 set_bit(pfn, ctx->restore.populated_pfns);
128
129 return 0;
130 }
131
132 /*
133 * Given a set of pfns, obtain memory from Xen to fill the physmap for the
134 * unpopulated subset. If types is NULL, no page type checking is performed
135 * and all unpopulated pfns are populated.
136 */
populate_pfns(struct xc_sr_context * ctx,unsigned int count,const xen_pfn_t * original_pfns,const uint32_t * types)137 int populate_pfns(struct xc_sr_context *ctx, unsigned int count,
138 const xen_pfn_t *original_pfns, const uint32_t *types)
139 {
140 xc_interface *xch = ctx->xch;
141 xen_pfn_t *mfns = malloc(count * sizeof(*mfns)),
142 *pfns = malloc(count * sizeof(*pfns));
143 unsigned int i, nr_pfns = 0;
144 int rc = -1;
145
146 if ( !mfns || !pfns )
147 {
148 ERROR("Failed to allocate %zu bytes for populating the physmap",
149 2 * count * sizeof(*mfns));
150 goto err;
151 }
152
153 for ( i = 0; i < count; ++i )
154 {
155 if ( (!types || page_type_to_populate(types[i])) &&
156 !pfn_is_populated(ctx, original_pfns[i]) )
157 {
158 rc = pfn_set_populated(ctx, original_pfns[i]);
159 if ( rc )
160 goto err;
161 pfns[nr_pfns] = mfns[nr_pfns] = original_pfns[i];
162 ++nr_pfns;
163 }
164 }
165
166 if ( nr_pfns )
167 {
168 rc = xc_domain_populate_physmap_exact(
169 xch, ctx->domid, nr_pfns, 0, 0, mfns);
170 if ( rc )
171 {
172 PERROR("Failed to populate physmap");
173 goto err;
174 }
175
176 for ( i = 0; i < nr_pfns; ++i )
177 {
178 if ( mfns[i] == INVALID_MFN )
179 {
180 ERROR("Populate physmap failed for pfn %u", i);
181 rc = -1;
182 goto err;
183 }
184
185 ctx->restore.ops.set_gfn(ctx, pfns[i], mfns[i]);
186 }
187 }
188
189 rc = 0;
190
191 err:
192 free(pfns);
193 free(mfns);
194
195 return rc;
196 }
197
198 /*
199 * Given a list of pfns, their types, and a block of page data from the
200 * stream, populate and record their types, map the relevant subset and copy
201 * the data into the guest.
202 */
process_page_data(struct xc_sr_context * ctx,unsigned int count,xen_pfn_t * pfns,uint32_t * types,void * page_data)203 static int process_page_data(struct xc_sr_context *ctx, unsigned int count,
204 xen_pfn_t *pfns, uint32_t *types, void *page_data)
205 {
206 xc_interface *xch = ctx->xch;
207 xen_pfn_t *mfns = malloc(count * sizeof(*mfns));
208 int *map_errs = malloc(count * sizeof(*map_errs));
209 int rc;
210 void *mapping = NULL, *guest_page = NULL;
211 unsigned int i, /* i indexes the pfns from the record. */
212 j, /* j indexes the subset of pfns we decide to map. */
213 nr_pages = 0;
214
215 if ( !mfns || !map_errs )
216 {
217 rc = -1;
218 ERROR("Failed to allocate %zu bytes to process page data",
219 count * (sizeof(*mfns) + sizeof(*map_errs)));
220 goto err;
221 }
222
223 rc = populate_pfns(ctx, count, pfns, types);
224 if ( rc )
225 {
226 ERROR("Failed to populate pfns for batch of %u pages", count);
227 goto err;
228 }
229
230 for ( i = 0; i < count; ++i )
231 {
232 ctx->restore.ops.set_page_type(ctx, pfns[i], types[i]);
233
234 if ( page_type_has_stream_data(types[i]) )
235 mfns[nr_pages++] = ctx->restore.ops.pfn_to_gfn(ctx, pfns[i]);
236 }
237
238 /* Nothing to do? */
239 if ( nr_pages == 0 )
240 goto done;
241
242 mapping = guest_page = xenforeignmemory_map(
243 xch->fmem, ctx->domid, PROT_READ | PROT_WRITE,
244 nr_pages, mfns, map_errs);
245 if ( !mapping )
246 {
247 rc = -1;
248 PERROR("Unable to map %u mfns for %u pages of data",
249 nr_pages, count);
250 goto err;
251 }
252
253 for ( i = 0, j = 0; i < count; ++i )
254 {
255 if ( !page_type_has_stream_data(types[i]) )
256 continue;
257
258 if ( map_errs[j] )
259 {
260 rc = -1;
261 ERROR("Mapping pfn %#"PRIpfn" (mfn %#"PRIpfn", type %#"PRIx32") failed with %d",
262 pfns[i], mfns[j], types[i], map_errs[j]);
263 goto err;
264 }
265
266 /* Undo page normalisation done by the saver. */
267 rc = ctx->restore.ops.localise_page(ctx, types[i], page_data);
268 if ( rc )
269 {
270 ERROR("Failed to localise pfn %#"PRIpfn" (type %#"PRIx32")",
271 pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
272 goto err;
273 }
274
275 if ( ctx->restore.verify )
276 {
277 /* Verify mode - compare incoming data to what we already have. */
278 if ( memcmp(guest_page, page_data, PAGE_SIZE) )
279 ERROR("verify pfn %#"PRIpfn" failed (type %#"PRIx32")",
280 pfns[i], types[i] >> XEN_DOMCTL_PFINFO_LTAB_SHIFT);
281 }
282 else
283 {
284 /* Regular mode - copy incoming data into place. */
285 memcpy(guest_page, page_data, PAGE_SIZE);
286 }
287
288 ++j;
289 guest_page += PAGE_SIZE;
290 page_data += PAGE_SIZE;
291 }
292
293 done:
294 rc = 0;
295
296 err:
297 if ( mapping )
298 xenforeignmemory_unmap(xch->fmem, mapping, nr_pages);
299
300 free(map_errs);
301 free(mfns);
302
303 return rc;
304 }
305
306 /*
307 * Validate a PAGE_DATA record from the stream, and pass the results to
308 * process_page_data() to actually perform the legwork.
309 */
handle_page_data(struct xc_sr_context * ctx,struct xc_sr_record * rec)310 static int handle_page_data(struct xc_sr_context *ctx, struct xc_sr_record *rec)
311 {
312 xc_interface *xch = ctx->xch;
313 struct xc_sr_rec_page_data_header *pages = rec->data;
314 unsigned int i, pages_of_data = 0;
315 int rc = -1;
316
317 xen_pfn_t *pfns = NULL, pfn;
318 uint32_t *types = NULL, type;
319
320 /*
321 * v2 compatibility only exists for x86 streams. This is a bit of a
322 * bodge, but it is less bad than duplicating handle_page_data() between
323 * different architectures.
324 */
325 #if defined(__i386__) || defined(__x86_64__)
326 /* v2 compat. Infer the position of STATIC_DATA_END. */
327 if ( ctx->restore.format_version < 3 && !ctx->restore.seen_static_data_end )
328 {
329 rc = handle_static_data_end(ctx);
330 if ( rc )
331 {
332 ERROR("Inferred STATIC_DATA_END record failed");
333 goto err;
334 }
335 rc = -1;
336 }
337
338 if ( !ctx->restore.seen_static_data_end )
339 {
340 ERROR("No STATIC_DATA_END seen");
341 goto err;
342 }
343 #endif
344
345 if ( rec->length < sizeof(*pages) )
346 {
347 ERROR("PAGE_DATA record truncated: length %u, min %zu",
348 rec->length, sizeof(*pages));
349 goto err;
350 }
351
352 if ( pages->count < 1 )
353 {
354 ERROR("Expected at least 1 pfn in PAGE_DATA record");
355 goto err;
356 }
357
358 if ( rec->length < sizeof(*pages) + (pages->count * sizeof(uint64_t)) )
359 {
360 ERROR("PAGE_DATA record (length %u) too short to contain %u"
361 " pfns worth of information", rec->length, pages->count);
362 goto err;
363 }
364
365 pfns = malloc(pages->count * sizeof(*pfns));
366 types = malloc(pages->count * sizeof(*types));
367 if ( !pfns || !types )
368 {
369 ERROR("Unable to allocate enough memory for %u pfns",
370 pages->count);
371 goto err;
372 }
373
374 for ( i = 0; i < pages->count; ++i )
375 {
376 pfn = pages->pfn[i] & PAGE_DATA_PFN_MASK;
377 if ( !ctx->restore.ops.pfn_is_valid(ctx, pfn) )
378 {
379 ERROR("pfn %#"PRIpfn" (index %u) outside domain maximum", pfn, i);
380 goto err;
381 }
382
383 type = (pages->pfn[i] & PAGE_DATA_TYPE_MASK) >> 32;
384 if ( !is_known_page_type(type) )
385 {
386 ERROR("Unknown type %#"PRIx32" for pfn %#"PRIpfn" (index %u)",
387 type, pfn, i);
388 goto err;
389 }
390
391 if ( page_type_has_stream_data(type) )
392 /* NOTAB and all L1 through L4 tables (including pinned) should
393 * have a page worth of data in the record. */
394 pages_of_data++;
395
396 pfns[i] = pfn;
397 types[i] = type;
398 }
399
400 if ( rec->length != (sizeof(*pages) +
401 (sizeof(uint64_t) * pages->count) +
402 (PAGE_SIZE * pages_of_data)) )
403 {
404 ERROR("PAGE_DATA record wrong size: length %u, expected "
405 "%zu + %zu + %lu", rec->length, sizeof(*pages),
406 (sizeof(uint64_t) * pages->count), (PAGE_SIZE * pages_of_data));
407 goto err;
408 }
409
410 rc = process_page_data(ctx, pages->count, pfns, types,
411 &pages->pfn[pages->count]);
412 err:
413 free(types);
414 free(pfns);
415
416 return rc;
417 }
418
419 /*
420 * Send checkpoint dirty pfn list to primary.
421 */
send_checkpoint_dirty_pfn_list(struct xc_sr_context * ctx)422 static int send_checkpoint_dirty_pfn_list(struct xc_sr_context *ctx)
423 {
424 xc_interface *xch = ctx->xch;
425 int rc = -1;
426 unsigned int count, written;
427 uint64_t i, *pfns = NULL;
428 xc_shadow_op_stats_t stats = { 0, ctx->restore.p2m_size };
429 struct xc_sr_record rec = {
430 .type = REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST,
431 };
432 struct iovec iov[2] = {
433 { &rec, sizeof(rec) },
434 };
435 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
436 &ctx->restore.dirty_bitmap_hbuf);
437
438 if ( xc_logdirty_control(
439 xch, ctx->domid, XEN_DOMCTL_SHADOW_OP_CLEAN,
440 HYPERCALL_BUFFER(dirty_bitmap), ctx->restore.p2m_size,
441 0, &stats) != ctx->restore.p2m_size )
442 {
443 PERROR("Failed to retrieve logdirty bitmap");
444 goto err;
445 }
446
447 for ( i = 0, count = 0; i < ctx->restore.p2m_size; i++ )
448 {
449 if ( test_bit(i, dirty_bitmap) )
450 count++;
451 }
452
453
454 pfns = malloc(count * sizeof(*pfns));
455 if ( !pfns )
456 {
457 ERROR("Unable to allocate %zu bytes of memory for dirty pfn list",
458 count * sizeof(*pfns));
459 goto err;
460 }
461
462 for ( i = 0, written = 0; i < ctx->restore.p2m_size; ++i )
463 {
464 if ( !test_bit(i, dirty_bitmap) )
465 continue;
466
467 if ( written > count )
468 {
469 ERROR("Dirty pfn list exceed");
470 goto err;
471 }
472
473 pfns[written++] = i;
474 }
475
476 rec.length = count * sizeof(*pfns);
477
478 iov[1].iov_base = pfns;
479 iov[1].iov_len = rec.length;
480
481 if ( writev_exact(ctx->restore.send_back_fd, iov, ARRAY_SIZE(iov)) )
482 {
483 PERROR("Failed to write dirty bitmap to stream");
484 goto err;
485 }
486
487 rc = 0;
488 err:
489 free(pfns);
490 return rc;
491 }
492
493 static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec);
handle_checkpoint(struct xc_sr_context * ctx)494 static int handle_checkpoint(struct xc_sr_context *ctx)
495 {
496 xc_interface *xch = ctx->xch;
497 int rc = 0, ret;
498 unsigned int i;
499
500 if ( ctx->stream_type == XC_STREAM_PLAIN )
501 {
502 ERROR("Found checkpoint in non-checkpointed stream");
503 rc = -1;
504 goto err;
505 }
506
507 ret = ctx->restore.callbacks->checkpoint(ctx->restore.callbacks->data);
508 switch ( ret )
509 {
510 case XGR_CHECKPOINT_SUCCESS:
511 break;
512
513 case XGR_CHECKPOINT_FAILOVER:
514 if ( ctx->restore.buffer_all_records )
515 rc = BROKEN_CHANNEL;
516 else
517 /* We don't have a consistent state */
518 rc = -1;
519 goto err;
520
521 default: /* Other fatal error */
522 rc = -1;
523 goto err;
524 }
525
526 if ( ctx->restore.buffer_all_records )
527 {
528 IPRINTF("All records buffered");
529
530 for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
531 {
532 rc = process_record(ctx, &ctx->restore.buffered_records[i]);
533 if ( rc )
534 goto err;
535 }
536 ctx->restore.buffered_rec_num = 0;
537 IPRINTF("All records processed");
538 }
539 else
540 ctx->restore.buffer_all_records = true;
541
542 if ( ctx->stream_type == XC_STREAM_COLO )
543 {
544 #define HANDLE_CALLBACK_RETURN_VALUE(ret) \
545 do { \
546 if ( ret == 1 ) \
547 rc = 0; /* Success */ \
548 else \
549 { \
550 if ( ret == 2 ) \
551 rc = BROKEN_CHANNEL; \
552 else \
553 rc = -1; /* Some unspecified error */ \
554 goto err; \
555 } \
556 } while (0)
557
558 /* COLO */
559
560 /* We need to resume guest */
561 rc = ctx->restore.ops.stream_complete(ctx);
562 if ( rc )
563 goto err;
564
565 ctx->restore.callbacks->restore_results(ctx->restore.xenstore_gfn,
566 ctx->restore.console_gfn,
567 ctx->restore.callbacks->data);
568
569 /* Resume secondary vm */
570 ret = ctx->restore.callbacks->postcopy(ctx->restore.callbacks->data);
571 HANDLE_CALLBACK_RETURN_VALUE(ret);
572
573 /* Wait for a new checkpoint */
574 ret = ctx->restore.callbacks->wait_checkpoint(
575 ctx->restore.callbacks->data);
576 HANDLE_CALLBACK_RETURN_VALUE(ret);
577
578 /* suspend secondary vm */
579 ret = ctx->restore.callbacks->suspend(ctx->restore.callbacks->data);
580 HANDLE_CALLBACK_RETURN_VALUE(ret);
581
582 #undef HANDLE_CALLBACK_RETURN_VALUE
583
584 rc = send_checkpoint_dirty_pfn_list(ctx);
585 if ( rc )
586 goto err;
587 }
588
589 err:
590 return rc;
591 }
592
buffer_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)593 static int buffer_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
594 {
595 xc_interface *xch = ctx->xch;
596 unsigned int new_alloc_num;
597 struct xc_sr_record *p;
598
599 if ( ctx->restore.buffered_rec_num >= ctx->restore.allocated_rec_num )
600 {
601 new_alloc_num = ctx->restore.allocated_rec_num + DEFAULT_BUF_RECORDS;
602 p = realloc(ctx->restore.buffered_records,
603 new_alloc_num * sizeof(struct xc_sr_record));
604 if ( !p )
605 {
606 ERROR("Failed to realloc memory for buffered records");
607 return -1;
608 }
609
610 ctx->restore.buffered_records = p;
611 ctx->restore.allocated_rec_num = new_alloc_num;
612 }
613
614 memcpy(&ctx->restore.buffered_records[ctx->restore.buffered_rec_num++],
615 rec, sizeof(*rec));
616
617 return 0;
618 }
619
handle_static_data_end(struct xc_sr_context * ctx)620 int handle_static_data_end(struct xc_sr_context *ctx)
621 {
622 xc_interface *xch = ctx->xch;
623 unsigned int missing = 0;
624 int rc = 0;
625
626 if ( ctx->restore.seen_static_data_end )
627 {
628 ERROR("Multiple STATIC_DATA_END records found");
629 return -1;
630 }
631
632 ctx->restore.seen_static_data_end = true;
633
634 rc = ctx->restore.ops.static_data_complete(ctx, &missing);
635 if ( rc )
636 return rc;
637
638 if ( ctx->restore.callbacks->static_data_done &&
639 (rc = ctx->restore.callbacks->static_data_done(
640 missing, ctx->restore.callbacks->data) != 0) )
641 ERROR("static_data_done() callback failed: %d\n", rc);
642
643 return rc;
644 }
645
process_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)646 static int process_record(struct xc_sr_context *ctx, struct xc_sr_record *rec)
647 {
648 xc_interface *xch = ctx->xch;
649 int rc = 0;
650
651 switch ( rec->type )
652 {
653 case REC_TYPE_END:
654 break;
655
656 case REC_TYPE_PAGE_DATA:
657 rc = handle_page_data(ctx, rec);
658 break;
659
660 case REC_TYPE_VERIFY:
661 DPRINTF("Verify mode enabled");
662 ctx->restore.verify = true;
663 break;
664
665 case REC_TYPE_CHECKPOINT:
666 rc = handle_checkpoint(ctx);
667 break;
668
669 case REC_TYPE_STATIC_DATA_END:
670 rc = handle_static_data_end(ctx);
671 break;
672
673 default:
674 rc = ctx->restore.ops.process_record(ctx, rec);
675 break;
676 }
677
678 free(rec->data);
679 rec->data = NULL;
680
681 return rc;
682 }
683
setup(struct xc_sr_context * ctx)684 static int setup(struct xc_sr_context *ctx)
685 {
686 xc_interface *xch = ctx->xch;
687 int rc;
688 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
689 &ctx->restore.dirty_bitmap_hbuf);
690
691 if ( ctx->stream_type == XC_STREAM_COLO )
692 {
693 dirty_bitmap = xc_hypercall_buffer_alloc_pages(
694 xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->restore.p2m_size)));
695
696 if ( !dirty_bitmap )
697 {
698 ERROR("Unable to allocate memory for dirty bitmap");
699 rc = -1;
700 goto err;
701 }
702 }
703
704 rc = ctx->restore.ops.setup(ctx);
705 if ( rc )
706 goto err;
707
708 ctx->restore.max_populated_pfn = (32 * 1024 / 4) - 1;
709 ctx->restore.populated_pfns = bitmap_alloc(
710 ctx->restore.max_populated_pfn + 1);
711 if ( !ctx->restore.populated_pfns )
712 {
713 ERROR("Unable to allocate memory for populated_pfns bitmap");
714 rc = -1;
715 goto err;
716 }
717
718 ctx->restore.buffered_records = malloc(
719 DEFAULT_BUF_RECORDS * sizeof(struct xc_sr_record));
720 if ( !ctx->restore.buffered_records )
721 {
722 ERROR("Unable to allocate memory for buffered records");
723 rc = -1;
724 goto err;
725 }
726 ctx->restore.allocated_rec_num = DEFAULT_BUF_RECORDS;
727
728 err:
729 return rc;
730 }
731
cleanup(struct xc_sr_context * ctx)732 static void cleanup(struct xc_sr_context *ctx)
733 {
734 xc_interface *xch = ctx->xch;
735 unsigned int i;
736 DECLARE_HYPERCALL_BUFFER_SHADOW(unsigned long, dirty_bitmap,
737 &ctx->restore.dirty_bitmap_hbuf);
738
739 for ( i = 0; i < ctx->restore.buffered_rec_num; i++ )
740 free(ctx->restore.buffered_records[i].data);
741
742 if ( ctx->stream_type == XC_STREAM_COLO )
743 xc_hypercall_buffer_free_pages(
744 xch, dirty_bitmap, NRPAGES(bitmap_size(ctx->restore.p2m_size)));
745
746 free(ctx->restore.buffered_records);
747 free(ctx->restore.populated_pfns);
748
749 if ( ctx->restore.ops.cleanup(ctx) )
750 PERROR("Failed to clean up");
751 }
752
753 /*
754 * Restore a domain.
755 */
restore(struct xc_sr_context * ctx)756 static int restore(struct xc_sr_context *ctx)
757 {
758 xc_interface *xch = ctx->xch;
759 struct xc_sr_record rec;
760 int rc, saved_rc = 0, saved_errno = 0;
761
762 IPRINTF("Restoring domain");
763
764 rc = setup(ctx);
765 if ( rc )
766 goto err;
767
768 do
769 {
770 rc = read_record(ctx, ctx->fd, &rec);
771 if ( rc )
772 {
773 if ( ctx->restore.buffer_all_records )
774 goto remus_failover;
775 else
776 goto err;
777 }
778
779 if ( ctx->restore.buffer_all_records &&
780 rec.type != REC_TYPE_END &&
781 rec.type != REC_TYPE_CHECKPOINT )
782 {
783 rc = buffer_record(ctx, &rec);
784 if ( rc )
785 goto err;
786 }
787 else
788 {
789 rc = process_record(ctx, &rec);
790 if ( rc == RECORD_NOT_PROCESSED )
791 {
792 if ( rec.type & REC_TYPE_OPTIONAL )
793 DPRINTF("Ignoring optional record %#x (%s)",
794 rec.type, rec_type_to_str(rec.type));
795 else
796 {
797 ERROR("Mandatory record %#x (%s) not handled",
798 rec.type, rec_type_to_str(rec.type));
799 rc = -1;
800 goto err;
801 }
802 }
803 else if ( rc == BROKEN_CHANNEL )
804 goto remus_failover;
805 else if ( rc )
806 goto err;
807 }
808
809 } while ( rec.type != REC_TYPE_END );
810
811 remus_failover:
812 if ( ctx->stream_type == XC_STREAM_COLO )
813 {
814 /* With COLO, we have already called stream_complete */
815 rc = 0;
816 IPRINTF("COLO Failover");
817 goto done;
818 }
819
820 /*
821 * With Remus, if we reach here, there must be some error on primary,
822 * failover from the last checkpoint state.
823 */
824 rc = ctx->restore.ops.stream_complete(ctx);
825 if ( rc )
826 goto err;
827
828 IPRINTF("Restore successful");
829 goto done;
830
831 err:
832 saved_errno = errno;
833 saved_rc = rc;
834 PERROR("Restore failed");
835
836 done:
837 cleanup(ctx);
838
839 if ( saved_rc )
840 {
841 rc = saved_rc;
842 errno = saved_errno;
843 }
844
845 return rc;
846 }
847
xc_domain_restore(xc_interface * xch,int io_fd,uint32_t dom,unsigned int store_evtchn,unsigned long * store_mfn,uint32_t store_domid,unsigned int console_evtchn,unsigned long * console_gfn,uint32_t console_domid,xc_stream_type_t stream_type,struct restore_callbacks * callbacks,int send_back_fd)848 int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
849 unsigned int store_evtchn, unsigned long *store_mfn,
850 uint32_t store_domid, unsigned int console_evtchn,
851 unsigned long *console_gfn, uint32_t console_domid,
852 xc_stream_type_t stream_type,
853 struct restore_callbacks *callbacks, int send_back_fd)
854 {
855 bool hvm;
856 xen_pfn_t nr_pfns;
857 struct xc_sr_context ctx = {
858 .xch = xch,
859 .fd = io_fd,
860 .stream_type = stream_type,
861 };
862
863 /* GCC 4.4 (of CentOS 6.x vintage) can' t initialise anonymous unions. */
864 ctx.restore.console_evtchn = console_evtchn;
865 ctx.restore.console_domid = console_domid;
866 ctx.restore.xenstore_evtchn = store_evtchn;
867 ctx.restore.xenstore_domid = store_domid;
868 ctx.restore.callbacks = callbacks;
869 ctx.restore.send_back_fd = send_back_fd;
870
871 /* Sanity check stream_type-related parameters */
872 switch ( stream_type )
873 {
874 case XC_STREAM_COLO:
875 assert(callbacks->suspend &&
876 callbacks->postcopy &&
877 callbacks->wait_checkpoint &&
878 callbacks->restore_results);
879 /* Fallthrough */
880 case XC_STREAM_REMUS:
881 assert(callbacks->checkpoint);
882 /* Fallthrough */
883 case XC_STREAM_PLAIN:
884 break;
885
886 default:
887 assert(!"Bad stream_type");
888 break;
889 }
890
891 if ( xc_domain_getinfo_single(xch, dom, &ctx.dominfo) < 0 )
892 {
893 PERROR("Failed to get dominfo for dom%u", dom);
894 return -1;
895 }
896
897 hvm = ctx.dominfo.flags & XEN_DOMINF_hvm_guest;
898 DPRINTF("fd %d, dom %u, hvm %u, stream_type %d",
899 io_fd, dom, hvm, stream_type);
900
901 ctx.domid = dom;
902
903 if ( read_headers(&ctx) )
904 return -1;
905
906 if ( xc_domain_nr_gpfns(xch, dom, &nr_pfns) < 0 )
907 {
908 PERROR("Unable to obtain the guest p2m size");
909 return -1;
910 }
911
912 ctx.restore.p2m_size = nr_pfns;
913 ctx.restore.ops = hvm ? restore_ops_x86_hvm : restore_ops_x86_pv;
914
915 if ( restore(&ctx) )
916 return -1;
917
918 IPRINTF("XenStore: mfn %#"PRIpfn", dom %d, evt %u",
919 ctx.restore.xenstore_gfn,
920 ctx.restore.xenstore_domid,
921 ctx.restore.xenstore_evtchn);
922
923 IPRINTF("Console: mfn %#"PRIpfn", dom %d, evt %u",
924 ctx.restore.console_gfn,
925 ctx.restore.console_domid,
926 ctx.restore.console_evtchn);
927
928 *console_gfn = ctx.restore.console_gfn;
929 *store_mfn = ctx.restore.xenstore_gfn;
930
931 return 0;
932 }
933
934 /*
935 * Local variables:
936 * mode: C
937 * c-file-style: "BSD"
938 * c-basic-offset: 4
939 * tab-width: 4
940 * indent-tabs-mode: nil
941 * End:
942 */
943