1 #ifndef __COMMON__H
2 #define __COMMON__H
3
4 #include <stdbool.h>
5
6 #include "xg_private.h"
7 #include "xg_save_restore.h"
8 #include "xc_dom.h"
9 #include "xc_bitops.h"
10
11 #include "xc_sr_stream_format.h"
12
13 /* String representation of Domain Header types. */
14 const char *dhdr_type_to_str(uint32_t type);
15
16 /* String representation of Record types. */
17 const char *rec_type_to_str(uint32_t type);
18
19 struct xc_sr_context;
20 struct xc_sr_record;
21
22 /**
23 * Save operations. To be implemented for each type of guest, for use by the
24 * common save algorithm.
25 *
26 * Every function must be implemented, even if only with a no-op stub.
27 */
28 struct xc_sr_save_ops
29 {
30 /* Convert a PFN to GFN. May return ~0UL for an invalid mapping. */
31 xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
32
33 /**
34 * Optionally transform the contents of a page from being specific to the
35 * sending environment, to being generic for the stream.
36 *
37 * The page of data at the end of 'page' may be a read-only mapping of a
38 * running guest; it must not be modified. If no transformation is
39 * required, the callee should leave '*pages' untouched.
40 *
41 * If a transformation is required, the callee should allocate themselves
42 * a local page using malloc() and return it via '*page'.
43 *
44 * The caller shall free() '*page' in all cases. In the case that the
45 * callee encounters an error, it should *NOT* free() the memory it
46 * allocated for '*page'.
47 *
48 * It is valid to fail with EAGAIN if the transformation is not able to be
49 * completed at this point. The page shall be retried later.
50 *
51 * @returns 0 for success, -1 for failure, with errno appropriately set.
52 */
53 int (*normalise_page)(struct xc_sr_context *ctx, xen_pfn_t type,
54 void **page);
55
56 /**
57 * Set up local environment to save a domain. (Typically querying
58 * running domain state, setting up mappings etc.)
59 *
60 * This is called once before any common setup has occurred, allowing for
61 * guest-specific adjustments to be made to common state.
62 */
63 int (*setup)(struct xc_sr_context *ctx);
64
65 /**
66 * Send records which need to be at the start of the stream. This is
67 * called once, after the Image and Domain headers are written.
68 */
69 int (*start_of_stream)(struct xc_sr_context *ctx);
70
71 /**
72 * Send records which need to be at the start of a checkpoint. This is
73 * called once, or once per checkpoint in a checkpointed stream, and is
74 * ahead of memory data.
75 */
76 int (*start_of_checkpoint)(struct xc_sr_context *ctx);
77
78 /**
79 * Send records which need to be at the end of the checkpoint. This is
80 * called once, or once per checkpoint in a checkpointed stream, and is
81 * after the memory data.
82 */
83 int (*end_of_checkpoint)(struct xc_sr_context *ctx);
84
85 /**
86 * Check state of guest to decide whether it makes sense to continue
87 * migration. This is called in each iteration or checkpoint to check
88 * whether all criteria for the migration are still met. If that's not
89 * the case either migration is cancelled via a bad rc or the situation
90 * is handled, e.g. by sending appropriate records.
91 */
92 int (*check_vm_state)(struct xc_sr_context *ctx);
93
94 /**
95 * Clean up the local environment. Will be called exactly once, either
96 * after a successful save, or upon encountering an error.
97 */
98 int (*cleanup)(struct xc_sr_context *ctx);
99 };
100
101
102 /**
103 * Restore operations. To be implemented for each type of guest, for use by
104 * the common restore algorithm.
105 *
106 * Every function must be implemented, even if only with a no-op stub.
107 */
108 struct xc_sr_restore_ops
109 {
110 /* Convert a PFN to GFN. May return ~0UL for an invalid mapping. */
111 xen_pfn_t (*pfn_to_gfn)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
112
113 /* Check to see whether a PFN is valid. */
114 bool (*pfn_is_valid)(const struct xc_sr_context *ctx, xen_pfn_t pfn);
115
116 /* Set the GFN of a PFN. */
117 void (*set_gfn)(struct xc_sr_context *ctx, xen_pfn_t pfn, xen_pfn_t gfn);
118
119 /* Set the type of a PFN. */
120 void (*set_page_type)(struct xc_sr_context *ctx, xen_pfn_t pfn,
121 xen_pfn_t type);
122
123 /**
124 * Optionally transform the contents of a page from being generic in the
125 * stream, to being specific to the restoring environment.
126 *
127 * 'page' is expected to be modified in-place if a transformation is
128 * required.
129 *
130 * @returns 0 for success, -1 for failure, with errno appropriately set.
131 */
132 int (*localise_page)(struct xc_sr_context *ctx, uint32_t type, void *page);
133
134 /**
135 * Set up local environment to restore a domain.
136 *
137 * This is called once before any common setup has occurred, allowing for
138 * guest-specific adjustments to be made to common state.
139 */
140 int (*setup)(struct xc_sr_context *ctx);
141
142 /**
143 * Process an individual record from the stream. The caller shall take
144 * care of processing common records (e.g. END, PAGE_DATA).
145 *
146 * @return 0 for success, -1 for failure, or the following sentinels:
147 * - RECORD_NOT_PROCESSED
148 * - BROKEN_CHANNEL: under Remus/COLO, this means master may be dead, and
149 * a failover is needed.
150 */
151 #define RECORD_NOT_PROCESSED 1
152 #define BROKEN_CHANNEL 2
153 int (*process_record)(struct xc_sr_context *ctx, struct xc_sr_record *rec);
154
155 /**
156 * Perform any actions required after the stream has been finished. Called
157 * after the END record has been received.
158 */
159 int (*stream_complete)(struct xc_sr_context *ctx);
160
161 /**
162 * Clean up the local environment. Will be called exactly once, either
163 * after a successful restore, or upon encountering an error.
164 */
165 int (*cleanup)(struct xc_sr_context *ctx);
166 };
167
168 /* x86 PV per-vcpu storage structure for blobs heading Xen-wards. */
169 struct xc_sr_x86_pv_restore_vcpu
170 {
171 void *basic, *extd, *xsave, *msr;
172 size_t basicsz, extdsz, xsavesz, msrsz;
173 };
174
175 struct xc_sr_context
176 {
177 xc_interface *xch;
178 uint32_t domid;
179 int fd;
180
181 xc_dominfo_t dominfo;
182
183 union /* Common save or restore data. */
184 {
185 struct /* Save data. */
186 {
187 int recv_fd;
188
189 struct xc_sr_save_ops ops;
190 struct save_callbacks *callbacks;
191
192 /* Live migrate vs non live suspend. */
193 bool live;
194
195 /* Plain VM, or checkpoints over time. */
196 int checkpointed;
197
198 /* Further debugging information in the stream. */
199 bool debug;
200
201 unsigned long p2m_size;
202
203 struct precopy_stats stats;
204
205 xen_pfn_t *batch_pfns;
206 unsigned nr_batch_pfns;
207 unsigned long *deferred_pages;
208 unsigned long nr_deferred_pages;
209 xc_hypercall_buffer_t dirty_bitmap_hbuf;
210 } save;
211
212 struct /* Restore data. */
213 {
214 struct xc_sr_restore_ops ops;
215 struct restore_callbacks *callbacks;
216
217 int send_back_fd;
218 unsigned long p2m_size;
219 xc_hypercall_buffer_t dirty_bitmap_hbuf;
220
221 /* From Image Header. */
222 uint32_t format_version;
223
224 /* From Domain Header. */
225 uint32_t guest_type;
226 uint32_t guest_page_size;
227
228 /* Plain VM, or checkpoints over time. */
229 int checkpointed;
230
231 /* Currently buffering records between a checkpoint */
232 bool buffer_all_records;
233
234 /*
235 * With Remus/COLO, we buffer the records sent by the primary at checkpoint,
236 * in case the primary will fail, we can recover from the last
237 * checkpoint state.
238 * This should be enough for most of the cases because primary only send
239 * dirty pages at checkpoint.
240 */
241 #define DEFAULT_BUF_RECORDS 1024
242 struct xc_sr_record *buffered_records;
243 unsigned allocated_rec_num;
244 unsigned buffered_rec_num;
245
246 /*
247 * Xenstore and Console parameters.
248 * INPUT: evtchn & domid
249 * OUTPUT: gfn
250 */
251 xen_pfn_t xenstore_gfn, console_gfn;
252 unsigned int xenstore_evtchn, console_evtchn;
253 uint32_t xenstore_domid, console_domid;
254
255 /* Bitmap of currently populated PFNs during restore. */
256 unsigned long *populated_pfns;
257 xen_pfn_t max_populated_pfn;
258
259 /* Sender has invoked verify mode on the stream. */
260 bool verify;
261 } restore;
262 };
263
264 union /* Guest-arch specific data. */
265 {
266 struct /* x86 PV guest. */
267 {
268 /* 4 or 8; 32 or 64 bit domain */
269 unsigned int width;
270 /* 3 or 4 pagetable levels */
271 unsigned int levels;
272
273 /* Maximum Xen frame */
274 xen_pfn_t max_mfn;
275 /* Read-only machine to phys map */
276 xen_pfn_t *m2p;
277 /* first mfn of the compat m2p (Only needed for 32bit PV guests) */
278 xen_pfn_t compat_m2p_mfn0;
279 /* Number of m2p frames mapped */
280 unsigned long nr_m2p_frames;
281
282 /* Maximum guest frame */
283 xen_pfn_t max_pfn;
284
285 /* Number of frames making up the p2m */
286 unsigned int p2m_frames;
287 /* Guest's phys to machine map. Mapped read-only (save) or
288 * allocated locally (restore). Uses guest unsigned longs. */
289 void *p2m;
290 /* The guest pfns containing the p2m leaves */
291 xen_pfn_t *p2m_pfns;
292
293 /* Read-only mapping of guests shared info page */
294 shared_info_any_t *shinfo;
295
296 /* p2m generation count for verifying validity of local p2m. */
297 uint64_t p2m_generation;
298
299 union
300 {
301 struct
302 {
303 /* State machine for the order of received records. */
304 bool seen_pv_info;
305
306 /* Types for each page (bounded by max_pfn). */
307 uint32_t *pfn_types;
308
309 /* Vcpu context blobs. */
310 struct xc_sr_x86_pv_restore_vcpu *vcpus;
311 unsigned nr_vcpus;
312 } restore;
313 };
314 } x86_pv;
315
316 struct /* x86 HVM guest. */
317 {
318 union
319 {
320 struct
321 {
322 /* Whether qemu enabled logdirty mode, and we should
323 * disable on cleanup. */
324 bool qemu_enabled_logdirty;
325 } save;
326
327 struct
328 {
329 /* HVM context blob. */
330 void *context;
331 size_t contextsz;
332 } restore;
333 };
334 } x86_hvm;
335 };
336 };
337
338 extern struct xc_sr_save_ops save_ops_x86_pv;
339 extern struct xc_sr_save_ops save_ops_x86_hvm;
340
341 extern struct xc_sr_restore_ops restore_ops_x86_pv;
342 extern struct xc_sr_restore_ops restore_ops_x86_hvm;
343
344 struct xc_sr_record
345 {
346 uint32_t type;
347 uint32_t length;
348 void *data;
349 };
350
351 /*
352 * Writes a split record to the stream, applying correct padding where
353 * appropriate. It is common when sending records containing blobs from Xen
354 * that the header and blob data are separate. This function accepts a second
355 * buffer and length, and will merge it with the main record when sending.
356 *
357 * Records with a non-zero length must provide a valid data field; records
358 * with a 0 length shall have their data field ignored.
359 *
360 * Returns 0 on success and non0 on failure.
361 */
362 int write_split_record(struct xc_sr_context *ctx, struct xc_sr_record *rec,
363 void *buf, size_t sz);
364
365 /*
366 * Writes a record to the stream, applying correct padding where appropriate.
367 * Records with a non-zero length must provide a valid data field; records
368 * with a 0 length shall have their data field ignored.
369 *
370 * Returns 0 on success and non0 on failure.
371 */
write_record(struct xc_sr_context * ctx,struct xc_sr_record * rec)372 static inline int write_record(struct xc_sr_context *ctx,
373 struct xc_sr_record *rec)
374 {
375 return write_split_record(ctx, rec, NULL, 0);
376 }
377
378 /*
379 * Reads a record from the stream, and fills in the record structure.
380 *
381 * Returns 0 on success and non-0 on failure.
382 *
383 * On success, the records type and size shall be valid.
384 * - If size is 0, data shall be NULL.
385 * - If size is non-0, data shall be a buffer allocated by malloc() which must
386 * be passed to free() by the caller.
387 *
388 * On failure, the contents of the record structure are undefined.
389 */
390 int read_record(struct xc_sr_context *ctx, int fd, struct xc_sr_record *rec);
391
392 /*
393 * This would ideally be private in restore.c, but is needed by
394 * x86_pv_localise_page() if we receive pagetables frames ahead of the
395 * contents of the frames they point at.
396 */
397 int populate_pfns(struct xc_sr_context *ctx, unsigned count,
398 const xen_pfn_t *original_pfns, const uint32_t *types);
399
400 #endif
401 /*
402 * Local variables:
403 * mode: C
404 * c-file-style: "BSD"
405 * c-basic-offset: 4
406 * tab-width: 4
407 * indent-tabs-mode: nil
408 * End:
409 */
410