1 #ifndef __COMMON__H
2 #define __COMMON__H
3 
4 #include <stdbool.h>
5 
6 #include "xg_private.h"
7 #include "xg_save_restore.h"
8 #include "xc_dom.h"
9 #include "xc_bitops.h"
10 
11 #include "xc_sr_stream_format.h"
12 
13 /* String representation of Domain Header types. */
14 const char *dhdr_type_to_str(uint32_t type);
15 
16 /* String representation of Record types. */
17 const char *rec_type_to_str(uint32_t type);
18 
19 struct xc_sr_context;
20 struct xc_sr_record;
21 
22 /**
23  * Save operations.  To be implemented for each type of guest, for use by the
24  * common save algorithm.
25  *
26  * Every function must be implemented, even if only with a no-op stub.
27  */
28 struct xc_sr_save_ops
29 {
30     /* Convert a PFN to GFN.  May return ~0UL for an invalid mapping. */
31     xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
32 
33     /**
34      * Optionally transform the contents of a page from being specific to the
35      * sending environment, to being generic for the stream.
36      *
37      * The page of data at the end of 'page' may be a read-only mapping of a
38      * running guest; it must not be modified.  If no transformation is
39      * required, the callee should leave '*pages' untouched.
40      *
41      * If a transformation is required, the callee should allocate themselves
42      * a local page using malloc() and return it via '*page'.
43      *
44      * The caller shall free() '*page' in all cases.  In the case that the
45      * callee encounters an error, it should *NOT* free() the memory it
46      * allocated for '*page'.
47      *
48      * It is valid to fail with EAGAIN if the transformation is not able to be
49      * completed at this point.  The page shall be retried later.
50      *
51      * @returns 0 for success, -1 for failure, with errno appropriately set.
52      */
53     int (*normalise_page)(struct xc_sr_context *ctx, xen_pfn_t type,
54                           void **page);
55 
56     /**
57      * Set up local environment to save a domain. (Typically querying
58      * running domain state, setting up mappings etc.)
59      *
60      * This is called once before any common setup has occurred, allowing for
61      * guest-specific adjustments to be made to common state.
62      */
63     int (*setup)(struct xc_sr_context *ctx);
64 
65     /**
66      * Send records which need to be at the start of the stream.  This is
67      * called once, after the Image and Domain headers are written.
68      */
69     int (*start_of_stream)(struct xc_sr_context *ctx);
70 
71     /**
72      * Send records which need to be at the start of a checkpoint.  This is
73      * called once, or once per checkpoint in a checkpointed stream, and is
74      * ahead of memory data.
75      */
76     int (*start_of_checkpoint)(struct xc_sr_context *ctx);
77 
78     /**
79      * Send records which need to be at the end of the checkpoint.  This is
80      * called once, or once per checkpoint in a checkpointed stream, and is
81      * after the memory data.
82      */
83     int (*end_of_checkpoint)(struct xc_sr_context *ctx);
84 
85     /**
86      * Check state of guest to decide whether it makes sense to continue
87      * migration.  This is called in each iteration or checkpoint to check
88      * whether all criteria for the migration are still met.  If that's not
89      * the case either migration is cancelled via a bad rc or the situation
90      * is handled, e.g. by sending appropriate records.
91      */
92     int (*check_vm_state)(struct xc_sr_context *ctx);
93 
94     /**
95      * Clean up the local environment.  Will be called exactly once, either
96      * after a successful save, or upon encountering an error.
97      */
98     int (*cleanup)(struct xc_sr_context *ctx);
99 };
100 
101 
102 /**
103  * Restore operations.  To be implemented for each type of guest, for use by
104  * the common restore algorithm.
105  *
106  * Every function must be implemented, even if only with a no-op stub.
107  */
108 struct xc_sr_restore_ops
109 {
110     /* Convert a PFN to GFN.  May return ~0UL for an invalid mapping. */
111     xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
112 
113     /* Check to see whether a PFN is valid. */
114     bool (*pfn_is_valid)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
115 
116     /* Set the GFN of a PFN. */
117     void (*set_gfn)(struct xc_sr_context *ctx, xen_pfn_t pfn, xen_pfn_t gfn);
118 
119     /* Set the type of a PFN. */
120     void (*set_page_type)(struct xc_sr_context *ctx, xen_pfn_t pfn,
121                           xen_pfn_t type);
122 
123     /**
124      * Optionally transform the contents of a page from being generic in the
125      * stream, to being specific to the restoring environment.
126      *
127      * 'page' is expected to be modified in-place if a transformation is
128      * required.
129      *
130      * @returns 0 for success, -1 for failure, with errno appropriately set.
131      */
132     int (*localise_page)(struct xc_sr_context *ctx, uint32_t type, void *page);
133 
134     /**
135      * Set up local environment to restore a domain.
136      *
137      * This is called once before any common setup has occurred, allowing for
138      * guest-specific adjustments to be made to common state.
139      */
140     int (*setup)(struct xc_sr_context *ctx);
141 
142     /**
143      * Process an individual record from the stream.  The caller shall take
144      * care of processing common records (e.g. END, PAGE_DATA).
145      *
146      * @return 0 for success, -1 for failure, or the following sentinels:
147      *  - RECORD_NOT_PROCESSED
148      *  - BROKEN_CHANNEL: under Remus/COLO, this means master may be dead, and
149      *    a failover is needed.
150      */
151 #define RECORD_NOT_PROCESSED 1
152 #define BROKEN_CHANNEL 2
153     int (*process_record)(struct xc_sr_context *ctx, struct xc_sr_record *rec);
154 
155     /**
156      * Perform any actions required after the stream has been finished. Called
157      * after the END record has been received.
158      */
159     int (*stream_complete)(struct xc_sr_context *ctx);
160 
161     /**
162      * Clean up the local environment.  Will be called exactly once, either
163      * after a successful restore, or upon encountering an error.
164      */
165     int (*cleanup)(struct xc_sr_context *ctx);
166 };
167 
168 /* x86 PV per-vcpu storage structure for blobs heading Xen-wards. */
169 struct xc_sr_x86_pv_restore_vcpu
170 {
171     void *basic, *extd, *xsave, *msr;
172     size_t basicsz, extdsz, xsavesz, msrsz;
173 };
174 
175 struct xc_sr_context
176 {
177     xc_interface *xch;
178     uint32_t domid;
179     int fd;
180 
181     xc_dominfo_t dominfo;
182 
183     union /* Common save or restore data. */
184     {
185         struct /* Save data. */
186         {
187             int recv_fd;
188 
189             struct xc_sr_save_ops ops;
190             struct save_callbacks *callbacks;
191 
192             /* Live migrate vs non live suspend. */
193             bool live;
194 
195             /* Plain VM, or checkpoints over time. */
196             int checkpointed;
197 
198             /* Further debugging information in the stream. */
199             bool debug;
200 
201             unsigned long p2m_size;
202 
203             struct precopy_stats stats;
204 
205             xen_pfn_t *batch_pfns;
206             unsigned nr_batch_pfns;
207             unsigned long *deferred_pages;
208             unsigned long nr_deferred_pages;
209             xc_hypercall_buffer_t dirty_bitmap_hbuf;
210         } save;
211 
212         struct /* Restore data. */
213         {
214             struct xc_sr_restore_ops ops;
215             struct restore_callbacks *callbacks;
216 
217             int send_back_fd;
218             unsigned long p2m_size;
219             xc_hypercall_buffer_t dirty_bitmap_hbuf;
220 
221             /* From Image Header. */
222             uint32_t format_version;
223 
224             /* From Domain Header. */
225             uint32_t guest_type;
226             uint32_t guest_page_size;
227 
228             /* Plain VM, or checkpoints over time. */
229             int checkpointed;
230 
231             /* Currently buffering records between a checkpoint */
232             bool buffer_all_records;
233 
234 /*
235  * With Remus/COLO, we buffer the records sent by the primary at checkpoint,
236  * in case the primary will fail, we can recover from the last
237  * checkpoint state.
238  * This should be enough for most of the cases because primary only send
239  * dirty pages at checkpoint.
240  */
241 #define DEFAULT_BUF_RECORDS 1024
242             struct xc_sr_record *buffered_records;
243             unsigned allocated_rec_num;
244             unsigned buffered_rec_num;
245 
246             /*
247              * Xenstore and Console parameters.
248              * INPUT:  evtchn & domid
249              * OUTPUT: gfn
250              */
251             xen_pfn_t    xenstore_gfn,    console_gfn;
252             unsigned int xenstore_evtchn, console_evtchn;
253             uint32_t     xenstore_domid,  console_domid;
254 
255             /* Bitmap of currently populated PFNs during restore. */
256             unsigned long *populated_pfns;
257             xen_pfn_t max_populated_pfn;
258 
259             /* Sender has invoked verify mode on the stream. */
260             bool verify;
261         } restore;
262     };
263 
264     union /* Guest-arch specific data. */
265     {
266         struct /* x86 PV guest. */
267         {
268             /* 4 or 8; 32 or 64 bit domain */
269             unsigned int width;
270             /* 3 or 4 pagetable levels */
271             unsigned int levels;
272 
273             /* Maximum Xen frame */
274             xen_pfn_t max_mfn;
275             /* Read-only machine to phys map */
276             xen_pfn_t *m2p;
277             /* first mfn of the compat m2p (Only needed for 32bit PV guests) */
278             xen_pfn_t compat_m2p_mfn0;
279             /* Number of m2p frames mapped */
280             unsigned long nr_m2p_frames;
281 
282             /* Maximum guest frame */
283             xen_pfn_t max_pfn;
284 
285             /* Number of frames making up the p2m */
286             unsigned int p2m_frames;
287             /* Guest's phys to machine map.  Mapped read-only (save) or
288              * allocated locally (restore).  Uses guest unsigned longs. */
289             void *p2m;
290             /* The guest pfns containing the p2m leaves */
291             xen_pfn_t *p2m_pfns;
292 
293             /* Read-only mapping of guests shared info page */
294             shared_info_any_t *shinfo;
295 
296             /* p2m generation count for verifying validity of local p2m. */
297             uint64_t p2m_generation;
298 
299             union
300             {
301                 struct
302                 {
303                     /* State machine for the order of received records. */
304                     bool seen_pv_info;
305 
306                     /* Types for each page (bounded by max_pfn). */
307                     uint32_t *pfn_types;
308 
309                     /* Vcpu context blobs. */
310                     struct xc_sr_x86_pv_restore_vcpu *vcpus;
311                     unsigned nr_vcpus;
312                 } restore;
313             };
314         } x86_pv;
315 
316         struct /* x86 HVM guest. */
317         {
318             union
319             {
320                 struct
321                 {
322                     /* Whether qemu enabled logdirty mode, and we should
323                      * disable on cleanup. */
324                     bool qemu_enabled_logdirty;
325                 } save;
326 
327                 struct
328                 {
329                     /* HVM context blob. */
330                     void *context;
331                     size_t contextsz;
332                 } restore;
333             };
334         } x86_hvm;
335     };
336 };
337 
338 extern struct xc_sr_save_ops save_ops_x86_pv;
339 extern struct xc_sr_save_ops save_ops_x86_hvm;
340 
341 extern struct xc_sr_restore_ops restore_ops_x86_pv;
342 extern struct xc_sr_restore_ops restore_ops_x86_hvm;
343 
344 struct xc_sr_record
345 {
346     uint32_t type;
347     uint32_t length;
348     void *data;
349 };
350 
351 /*
352  * Writes a split record to the stream, applying correct padding where
353  * appropriate.  It is common when sending records containing blobs from Xen
354  * that the header and blob data are separate.  This function accepts a second
355  * buffer and length, and will merge it with the main record when sending.
356  *
357  * Records with a non-zero length must provide a valid data field; records
358  * with a 0 length shall have their data field ignored.
359  *
360  * Returns 0 on success and non0 on failure.
361  */
362 int write_split_record(struct xc_sr_context *ctx, struct xc_sr_record *rec,
363                        void *buf, size_t sz);
364 
365 /*
366  * Writes a record to the stream, applying correct padding where appropriate.
367  * Records with a non-zero length must provide a valid data field; records
368  * with a 0 length shall have their data field ignored.
369  *
370  * Returns 0 on success and non0 on failure.
371  */
write_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)372 static inline int write_record(struct xc_sr_context *ctx,
373                                struct xc_sr_record *rec)
374 {
375     return write_split_record(ctx, rec, NULL, 0);
376 }
377 
378 /*
379  * Reads a record from the stream, and fills in the record structure.
380  *
381  * Returns 0 on success and non-0 on failure.
382  *
383  * On success, the records type and size shall be valid.
384  * - If size is 0, data shall be NULL.
385  * - If size is non-0, data shall be a buffer allocated by malloc() which must
386  *   be passed to free() by the caller.
387  *
388  * On failure, the contents of the record structure are undefined.
389  */
390 int read_record(struct xc_sr_context *ctx, int fd, struct xc_sr_record *rec);
391 
392 /*
393  * This would ideally be private in restore.c, but is needed by
394  * x86_pv_localise_page() if we receive pagetables frames ahead of the
395  * contents of the frames they point at.
396  */
397 int populate_pfns(struct xc_sr_context *ctx, unsigned count,
398                   const xen_pfn_t *original_pfns, const uint32_t *types);
399 
400 #endif
401 /*
402  * Local variables:
403  * mode: C
404  * c-file-style: "BSD"
405  * c-basic-offset: 4
406  * tab-width: 4
407  * indent-tabs-mode: nil
408  * End:
409  */
410