1 /******************************************************************************
2 * xenguest.h
3 *
4 * A library for guest domain management in Xen.
5 *
6 * Copyright (c) 2003-2004, K A Fraser.
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation;
11 * version 2.1 of the License.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #ifndef XENGUEST_H
23 #define XENGUEST_H
24
25 #define XC_NUMA_NO_NODE (~0U)
26
27 #define XCFLAGS_LIVE (1 << 0)
28 #define XCFLAGS_DEBUG (1 << 1)
29
30 #define X86_64_B_SIZE 64
31 #define X86_32_B_SIZE 32
32
33 #define X86_HVM_NR_SPECIAL_PAGES 8
34 #define X86_HVM_END_SPECIAL_REGION 0xff000u
35 #define XG_MAX_MODULES 2
36
37 /* --- typedefs and structs ---------------------------------------- */
38
39 typedef uint64_t xen_vaddr_t;
40 typedef uint64_t xen_paddr_t;
41
42 #define PRIpfn PRI_xen_pfn
43
44 struct xc_dom_seg {
45 xen_vaddr_t vstart;
46 xen_vaddr_t vend;
47 xen_pfn_t pfn;
48 xen_pfn_t pages;
49 };
50
51 struct xc_hvm_firmware_module {
52 uint8_t *data;
53 uint32_t length;
54 uint64_t guest_addr_out;
55 };
56
57 struct xc_dom_mem {
58 struct xc_dom_mem *next;
59 void *ptr;
60 enum {
61 XC_DOM_MEM_TYPE_MALLOC_INTERNAL,
62 XC_DOM_MEM_TYPE_MALLOC_EXTERNAL,
63 XC_DOM_MEM_TYPE_MMAP,
64 } type;
65 size_t len;
66 unsigned char memory[0];
67 };
68
69 struct xc_dom_phys {
70 struct xc_dom_phys *next;
71 void *ptr;
72 xen_pfn_t first;
73 xen_pfn_t count;
74 };
75
76 struct xc_dom_module {
77 void *blob;
78 size_t size;
79 void *cmdline;
80 /* If seg.vstart is non zero then the module will be loaded at that
81 * address, otherwise it will automatically placed.
82 *
83 * If automatic placement is used and the module is gzip
84 * compressed then it will be decompressed as it is loaded. If the
85 * module has been explicitly placed then it is loaded as is
86 * otherwise decompressing risks undoing the manual placement.
87 */
88 struct xc_dom_seg seg;
89 };
90
91 struct xc_dom_image {
92 /* files */
93 void *kernel_blob;
94 size_t kernel_size;
95 unsigned int num_modules;
96 struct xc_dom_module modules[XG_MAX_MODULES];
97 void *devicetree_blob;
98 size_t devicetree_size;
99
100 size_t max_kernel_size;
101 size_t max_module_size;
102 size_t max_devicetree_size;
103
104 /* arguments and parameters */
105 char *cmdline;
106 size_t cmdline_size;
107 uint32_t f_requested[XENFEAT_NR_SUBMAPS];
108
109 /* info from (elf) kernel image */
110 struct elf_dom_parms *parms;
111 const char *guest_type;
112
113 /* memory layout */
114 struct xc_dom_seg kernel_seg;
115 struct xc_dom_seg p2m_seg;
116 struct xc_dom_seg pgtables_seg;
117 struct xc_dom_seg devicetree_seg;
118 struct xc_dom_seg start_info_seg;
119 xen_pfn_t start_info_pfn;
120 xen_pfn_t console_pfn;
121 xen_pfn_t xenstore_pfn;
122 xen_pfn_t shared_info_pfn;
123 xen_pfn_t bootstack_pfn;
124 xen_pfn_t pfn_alloc_end;
125 xen_vaddr_t virt_alloc_end;
126 xen_vaddr_t bsd_symtab_start;
127
128 /*
129 * initrd parameters as specified in start_info page
130 * Depending on capabilities of the booted kernel this may be a virtual
131 * address or a pfn. Type is neutral and large enough to hold a virtual
132 * address of a 64 bit kernel even with 32 bit toolstack.
133 */
134 uint64_t initrd_start;
135 uint64_t initrd_len;
136
137 unsigned int alloc_bootstack;
138 xen_vaddr_t virt_pgtab_end;
139
140 /* other state info */
141 uint32_t f_active[XENFEAT_NR_SUBMAPS];
142
143 /*
144 * pv_p2m is specific to x86 PV guests, and maps GFNs to MFNs. It is
145 * eventually copied into guest context.
146 */
147 xen_pfn_t *pv_p2m;
148 xen_pfn_t p2m_size; /* number of pfns covered by pv_p2m */
149
150 /* physical memory
151 *
152 * An x86 PV guest has one or more blocks of physical RAM,
153 * consisting of total_pages starting at 0. The start address and
154 * size of each block is controlled by vNUMA structures.
155 *
156 * An ARM guest has GUEST_RAM_BANKS regions of RAM, with
157 * rambank_size[i] pages in each. The lowest RAM address
158 * is stored in rambase_pfn.
159 */
160 xen_pfn_t rambase_pfn;
161 xen_pfn_t total_pages;
162 struct xc_dom_phys *phys_pages;
163 #if defined (__arm__) || defined(__aarch64__)
164 xen_pfn_t rambank_size[GUEST_RAM_BANKS];
165 #endif
166
167 /* malloc memory pool */
168 struct xc_dom_mem *memblocks;
169
170 /* memory footprint stats */
171 size_t alloc_malloc;
172 size_t alloc_mem_map;
173 size_t alloc_file_map;
174 size_t alloc_domU_map;
175
176 /* misc xen domain config stuff */
177 unsigned long flags;
178 unsigned int console_evtchn;
179 unsigned int xenstore_evtchn;
180 uint32_t console_domid;
181 uint32_t xenstore_domid;
182 xen_pfn_t shared_info_mfn;
183
184 xc_interface *xch;
185 uint32_t guest_domid;
186 int claim_enabled; /* 0 by default, 1 enables it */
187
188 int xen_version;
189 xen_capabilities_info_t xen_caps;
190
191 /* kernel loader, arch hooks */
192 struct xc_dom_loader *kernel_loader;
193 void *private_loader;
194
195 /* vNUMA information */
196 xen_vmemrange_t *vmemranges;
197 unsigned int nr_vmemranges;
198 unsigned int *vnode_to_pnode;
199 unsigned int nr_vnodes;
200
201 /* domain type/architecture specific data */
202 void *arch_private;
203
204 /* kernel loader */
205 struct xc_dom_arch *arch_hooks;
206 /* allocate up to pfn_alloc_end */
207 int (*allocate) (struct xc_dom_image * dom);
208
209 /* Container type (HVM or PV). */
210 enum {
211 XC_DOM_PV_CONTAINER,
212 XC_DOM_HVM_CONTAINER,
213 } container_type;
214
215 /* HVM specific fields. */
216 xen_pfn_t target_pages;
217 xen_paddr_t mmio_start;
218 xen_paddr_t mmio_size;
219 xen_paddr_t lowmem_end;
220 xen_paddr_t highmem_end;
221 xen_pfn_t vga_hole_size;
222
223 /* If unset disables the setup of the IOREQ pages. */
224 bool device_model;
225
226 /* BIOS/Firmware passed to HVMLOADER */
227 struct xc_hvm_firmware_module system_firmware_module;
228
229 /* Extra ACPI tables */
230 #define MAX_ACPI_MODULES 4
231 struct xc_hvm_firmware_module acpi_modules[MAX_ACPI_MODULES];
232
233 /* Extra SMBIOS structures passed to HVMLOADER */
234 struct xc_hvm_firmware_module smbios_module;
235
236 #if defined(__i386__) || defined(__x86_64__)
237 struct e820entry *e820;
238 unsigned int e820_entries;
239 #endif
240
241 xen_pfn_t vuart_gfn;
242
243 /* Number of vCPUs */
244 unsigned int max_vcpus;
245 };
246
247 /* --- arch specific hooks ----------------------------------------- */
248
249 struct xc_dom_arch {
250 int (*alloc_magic_pages) (struct xc_dom_image * dom);
251
252 /* pagetable setup - x86 PV only */
253 int (*alloc_pgtables) (struct xc_dom_image * dom);
254 int (*alloc_p2m_list) (struct xc_dom_image * dom);
255 int (*setup_pgtables) (struct xc_dom_image * dom);
256
257 /* arch-specific data structs setup */
258 /* in Mini-OS environment start_info might be a macro, avoid collision. */
259 #undef start_info
260 int (*start_info) (struct xc_dom_image * dom);
261 int (*shared_info) (struct xc_dom_image * dom, void *shared_info);
262 int (*vcpu) (struct xc_dom_image * dom);
263 int (*bootearly) (struct xc_dom_image * dom);
264 int (*bootlate) (struct xc_dom_image * dom);
265
266 /* arch-specific memory initialization. */
267 int (*meminit) (struct xc_dom_image * dom);
268
269 const char *guest_type;
270 const char *native_protocol;
271 int page_shift;
272 int sizeof_pfn;
273 int p2m_base_supported;
274 int arch_private_size;
275
276 struct xc_dom_arch *next;
277 };
278 void xc_dom_register_arch_hooks(struct xc_dom_arch *hooks);
279
280 #define XC_DOM_PAGE_SHIFT(dom) ((dom)->arch_hooks->page_shift)
281 #define XC_DOM_PAGE_SIZE(dom) (1LL << (dom)->arch_hooks->page_shift)
282
283 /* --- main functions ---------------------------------------------- */
284
285 struct xc_dom_image *xc_dom_allocate(xc_interface *xch,
286 const char *cmdline, const char *features);
287 void xc_dom_release_phys(struct xc_dom_image *dom);
288 void xc_dom_release(struct xc_dom_image *dom);
289 int xc_dom_rambase_init(struct xc_dom_image *dom, uint64_t rambase);
290 int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb);
291
292 /* Set this larger if you have enormous modules/kernels. Note that
293 * you should trust all kernels not to be maliciously large (e.g. to
294 * exhaust all dom0 memory) if you do this (see CVE-2012-4544 /
295 * XSA-25). You can also set the default independently for
296 * modules/kernels in xc_dom_allocate() or call
297 * xc_dom_{kernel,module}_max_size.
298 */
299 #ifndef XC_DOM_DECOMPRESS_MAX
300 #define XC_DOM_DECOMPRESS_MAX (1024*1024*1024) /* 1GB */
301 #endif
302
303 int xc_dom_kernel_check_size(struct xc_dom_image *dom, size_t sz);
304 int xc_dom_kernel_max_size(struct xc_dom_image *dom, size_t sz);
305
306 int xc_dom_module_max_size(struct xc_dom_image *dom, size_t sz);
307
308 int xc_dom_devicetree_max_size(struct xc_dom_image *dom, size_t sz);
309
310 size_t xc_dom_check_gzip(xc_interface *xch,
311 void *blob, size_t ziplen);
312 int xc_dom_do_gunzip(xc_interface *xch,
313 void *src, size_t srclen, void *dst, size_t dstlen);
314 int xc_dom_try_gunzip(struct xc_dom_image *dom, void **blob, size_t * size);
315
316 int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename);
317 int xc_dom_module_file(struct xc_dom_image *dom, const char *filename,
318 const char *cmdline);
319 int xc_dom_kernel_mem(struct xc_dom_image *dom, const void *mem,
320 size_t memsize);
321 int xc_dom_module_mem(struct xc_dom_image *dom, const void *mem,
322 size_t memsize, const char *cmdline);
323 int xc_dom_devicetree_file(struct xc_dom_image *dom, const char *filename);
324 int xc_dom_devicetree_mem(struct xc_dom_image *dom, const void *mem,
325 size_t memsize);
326
327 int xc_dom_parse_image(struct xc_dom_image *dom);
328 int xc_dom_set_arch_hooks(struct xc_dom_image *dom);
329 int xc_dom_build_image(struct xc_dom_image *dom);
330
331 int xc_dom_boot_xen_init(struct xc_dom_image *dom, xc_interface *xch,
332 uint32_t domid);
333 int xc_dom_boot_mem_init(struct xc_dom_image *dom);
334 void *xc_dom_boot_domU_map(struct xc_dom_image *dom, xen_pfn_t pfn,
335 xen_pfn_t count);
336 int xc_dom_boot_image(struct xc_dom_image *dom);
337 int xc_dom_compat_check(struct xc_dom_image *dom);
338 int xc_dom_gnttab_init(struct xc_dom_image *dom);
339 int xc_dom_gnttab_seed(xc_interface *xch, uint32_t guest_domid,
340 bool is_hvm,
341 xen_pfn_t console_gfn,
342 xen_pfn_t xenstore_gfn,
343 uint32_t console_domid,
344 uint32_t xenstore_domid);
345 bool xc_dom_translated(const struct xc_dom_image *dom);
346
347 /* --- debugging bits ---------------------------------------------- */
348
349 int xc_dom_loginit(xc_interface *xch);
350
351 void xc_dom_printf(xc_interface *xch, const char *fmt, ...)
352 __attribute__ ((format(printf, 2, 3)));
353 void xc_dom_panic_func(xc_interface *xch,
354 const char *file, int line, xc_error_code err,
355 const char *fmt, ...)
356 __attribute__ ((format(printf, 5, 6)));
357
358 #define xc_dom_panic(xch, err, fmt, args...) \
359 xc_dom_panic_func(xch, __FILE__, __LINE__, err, fmt, ## args)
360 #define xc_dom_trace(mark) \
361 xc_dom_printf("%s:%d: trace %s\n", __FILE__, __LINE__, mark)
362
363 void xc_dom_log_memory_footprint(struct xc_dom_image *dom);
364
365 /* --- simple memory pool ------------------------------------------ */
366
367 void *xc_dom_malloc(struct xc_dom_image *dom, size_t size);
368 int xc_dom_register_external(struct xc_dom_image *dom, void *ptr, size_t size);
369 void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size);
370 void *xc_dom_malloc_filemap(struct xc_dom_image *dom,
371 const char *filename, size_t * size,
372 const size_t max_size);
373 char *xc_dom_strdup(struct xc_dom_image *dom, const char *str);
374
375 /* --- alloc memory pool ------------------------------------------- */
376
377 xen_pfn_t xc_dom_alloc_page(struct xc_dom_image *dom, const char *name);
378 int xc_dom_alloc_segment(struct xc_dom_image *dom,
379 struct xc_dom_seg *seg, const char *name,
380 xen_vaddr_t start, xen_vaddr_t size);
381
382 /* --- misc bits --------------------------------------------------- */
383
384 void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t first,
385 xen_pfn_t count);
386 void *xc_dom_pfn_to_ptr_retcount(struct xc_dom_image *dom, xen_pfn_t first,
387 xen_pfn_t count, xen_pfn_t *count_out);
388 void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t pfn);
389 void xc_dom_unmap_all(struct xc_dom_image *dom);
390 void *xc_dom_vaddr_to_ptr(struct xc_dom_image *dom,
391 xen_vaddr_t vaddr, size_t *safe_region_out);
392 uint64_t xc_dom_virt_base(struct xc_dom_image *dom);
393 uint64_t xc_dom_virt_entry(struct xc_dom_image *dom);
394 uint64_t xc_dom_virt_hypercall(struct xc_dom_image *dom);
395 char *xc_dom_guest_os(struct xc_dom_image *dom);
396 bool xc_dom_feature_get(struct xc_dom_image *dom, unsigned int nr);
397
xc_dom_seg_to_ptr_pages(struct xc_dom_image * dom,struct xc_dom_seg * seg,xen_pfn_t * pages_out)398 static inline void *xc_dom_seg_to_ptr_pages(struct xc_dom_image *dom,
399 struct xc_dom_seg *seg,
400 xen_pfn_t *pages_out)
401 {
402 void *retval;
403
404 retval = xc_dom_pfn_to_ptr(dom, seg->pfn, seg->pages);
405
406 *pages_out = retval ? seg->pages : 0;
407 return retval;
408 }
409
xc_dom_seg_to_ptr(struct xc_dom_image * dom,struct xc_dom_seg * seg)410 static inline void *xc_dom_seg_to_ptr(struct xc_dom_image *dom,
411 struct xc_dom_seg *seg)
412 {
413 xen_pfn_t dummy;
414
415 return xc_dom_seg_to_ptr_pages(dom, seg, &dummy);
416 }
417
xc_dom_p2m(struct xc_dom_image * dom,xen_pfn_t pfn)418 static inline xen_pfn_t xc_dom_p2m(struct xc_dom_image *dom, xen_pfn_t pfn)
419 {
420 if ( xc_dom_translated(dom) )
421 return pfn;
422
423 /* x86 PV only now. */
424 if ( pfn >= dom->total_pages )
425 return INVALID_MFN;
426
427 return dom->pv_p2m[pfn];
428 }
429
430 /*
431 * User not using xc_suspend_* / xc_await_suspent may not want to
432 * include the full libxenevtchn API here.
433 */
434 struct xenevtchn_handle;
435
436 /* For save's precopy_policy(). */
437 struct precopy_stats
438 {
439 unsigned int iteration;
440 unsigned long total_written;
441 long dirty_count; /* -1 if unknown */
442 };
443
444 /*
445 * A precopy_policy callback may not be running in the same address
446 * space as libxc an so precopy_stats is passed by value.
447 */
448 typedef int (*precopy_policy_t)(struct precopy_stats, void *);
449
450 /* callbacks provided by xc_domain_save */
451 struct save_callbacks {
452 /*
453 * Called after expiration of checkpoint interval,
454 * to suspend the guest.
455 */
456 int (*suspend)(void *data);
457
458 /*
459 * Called before and after every batch of page data sent during
460 * the precopy phase of a live migration to ask the caller what
461 * to do next based on the current state of the precopy migration.
462 *
463 * Should return one of the values listed below:
464 */
465 #define XGS_POLICY_ABORT (-1) /* Abandon the migration entirely
466 * and tidy up. */
467 #define XGS_POLICY_CONTINUE_PRECOPY 0 /* Remain in the precopy phase. */
468 #define XGS_POLICY_STOP_AND_COPY 1 /* Immediately suspend and transmit the
469 * remaining dirty pages. */
470 precopy_policy_t precopy_policy;
471
472 /*
473 * Called after the guest's dirty pages have been
474 * copied into an output buffer.
475 * Callback function resumes the guest & the device model,
476 * returns to xc_domain_save.
477 * xc_domain_save then flushes the output buffer, while the
478 * guest continues to run.
479 */
480 int (*postcopy)(void *data);
481
482 /*
483 * Called after the memory checkpoint has been flushed
484 * out into the network. Typical actions performed in this
485 * callback include:
486 * (a) send the saved device model state (for HVM guests),
487 * (b) wait for checkpoint ack
488 * (c) release the network output buffer pertaining to the acked checkpoint.
489 * (c) sleep for the checkpoint interval.
490 *
491 * returns:
492 * 0: terminate checkpointing gracefully
493 * 1: take another checkpoint
494 */
495 int (*checkpoint)(void *data);
496
497 /*
498 * Called after the checkpoint callback.
499 *
500 * returns:
501 * 0: terminate checkpointing gracefully
502 * 1: take another checkpoint
503 */
504 int (*wait_checkpoint)(void *data);
505
506 /* Enable qemu-dm logging dirty pages to xen */
507 int (*switch_qemu_logdirty)(uint32_t domid, unsigned enable, void *data); /* HVM only */
508
509 /* to be provided as the last argument to each callback function */
510 void *data;
511 };
512
513 /* Type of stream. Plain, or using a continuous replication protocol? */
514 typedef enum {
515 XC_STREAM_PLAIN,
516 XC_STREAM_REMUS,
517 XC_STREAM_COLO,
518 } xc_stream_type_t;
519
520 /**
521 * This function will save a running domain.
522 *
523 * @param xch a handle to an open hypervisor interface
524 * @param io_fd the file descriptor to save a domain to
525 * @param dom the id of the domain
526 * @param flags XCFLAGS_xxx
527 * @param stream_type XC_STREAM_PLAIN if the far end of the stream
528 * doesn't use checkpointing
529 * @param recv_fd Only used for XC_STREAM_COLO. Contains backchannel from
530 * the destination side.
531 * @return 0 on success, -1 on failure
532 */
533 int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom,
534 uint32_t flags, struct save_callbacks *callbacks,
535 xc_stream_type_t stream_type, int recv_fd);
536
537 /* callbacks provided by xc_domain_restore */
538 struct restore_callbacks {
539 /*
540 * Called once the STATIC_DATA_END record has been received/inferred.
541 *
542 * For compatibility with older streams, provides a list of static data
543 * expected to be found in the stream, which was missing. A higher level
544 * toolstack is responsible for providing any necessary compatibiltiy.
545 */
546 #define XGR_SDD_MISSING_CPUID (1 << 0)
547 #define XGR_SDD_MISSING_MSR (1 << 1)
548 int (*static_data_done)(unsigned int missing, void *data);
549
550 /* Called after a new checkpoint to suspend the guest. */
551 int (*suspend)(void *data);
552
553 /*
554 * Called after the secondary vm is ready to resume.
555 * Callback function resumes the guest & the device model,
556 * returns to xc_domain_restore.
557 */
558 int (*postcopy)(void *data);
559
560 /*
561 * A checkpoint record has been found in the stream.
562 * returns:
563 */
564 #define XGR_CHECKPOINT_ERROR 0 /* Terminate processing */
565 #define XGR_CHECKPOINT_SUCCESS 1 /* Continue reading more data from the stream */
566 #define XGR_CHECKPOINT_FAILOVER 2 /* Failover and resume VM */
567 int (*checkpoint)(void *data);
568
569 /*
570 * Called after the checkpoint callback.
571 *
572 * returns:
573 * 0: terminate checkpointing gracefully
574 * 1: take another checkpoint
575 */
576 int (*wait_checkpoint)(void *data);
577
578 /*
579 * callback to send store gfn and console gfn to xl
580 * if we want to resume vm before xc_domain_save()
581 * exits.
582 */
583 void (*restore_results)(xen_pfn_t store_gfn, xen_pfn_t console_gfn,
584 void *data);
585
586 /* to be provided as the last argument to each callback function */
587 void *data;
588 };
589
590 /**
591 * This function will restore a saved domain.
592 *
593 * Domain is restored in a suspended state ready to be unpaused.
594 *
595 * @param xch a handle to an open hypervisor interface
596 * @param io_fd the file descriptor to restore a domain from
597 * @param dom the id of the domain
598 * @param store_evtchn the xenstore event channel for this domain to use
599 * @param store_mfn filled with the gfn of the store page
600 * @param store_domid the backend domain for xenstore
601 * @param console_evtchn the console event channel for this domain to use
602 * @param console_mfn filled with the gfn of the console page
603 * @param console_domid the backend domain for xenconsole
604 * @param stream_type XC_STREAM_PLAIN if the far end of the stream is using
605 * checkpointing
606 * @param callbacks non-NULL to receive a callback to restore toolstack
607 * specific data
608 * @param send_back_fd Only used for XC_STREAM_COLO. Contains backchannel to
609 * the source side.
610 * @return 0 on success, -1 on failure
611 */
612 int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
613 unsigned int store_evtchn, unsigned long *store_mfn,
614 uint32_t store_domid, unsigned int console_evtchn,
615 unsigned long *console_mfn, uint32_t console_domid,
616 xc_stream_type_t stream_type,
617 struct restore_callbacks *callbacks, int send_back_fd);
618
619 /**
620 * This function will create a domain for a paravirtualized Linux
621 * using file names pointing to kernel and ramdisk
622 *
623 * @parm xch a handle to an open hypervisor interface
624 * @parm domid the id of the domain
625 * @parm mem_mb memory size in megabytes
626 * @parm image_name name of the kernel image file
627 * @parm ramdisk_name name of the ramdisk image file
628 * @parm cmdline command line string
629 * @parm flags domain creation flags
630 * @parm store_evtchn the store event channel for this domain to use
631 * @parm store_mfn returned with the mfn of the store page
632 * @parm console_evtchn the console event channel for this domain to use
633 * @parm conole_mfn returned with the mfn of the console page
634 * @return 0 on success, -1 on failure
635 */
636 int xc_linux_build(xc_interface *xch,
637 uint32_t domid,
638 unsigned int mem_mb,
639 const char *image_name,
640 const char *ramdisk_name,
641 const char *cmdline,
642 const char *features,
643 unsigned long flags,
644 unsigned int store_evtchn,
645 unsigned long *store_mfn,
646 unsigned int console_evtchn,
647 unsigned long *console_mfn);
648
649 /*
650 * Sets *lockfd to -1.
651 * Has deallocated everything even on error.
652 */
653 int xc_suspend_evtchn_release(xc_interface *xch,
654 struct xenevtchn_handle *xce,
655 uint32_t domid, int suspend_evtchn, int *lockfd);
656
657 /**
658 * This function eats the initial notification.
659 * xce must not be used for anything else
660 * See xc_suspend_evtchn_init_sane re lockfd.
661 */
662 int xc_suspend_evtchn_init_exclusive(xc_interface *xch,
663 struct xenevtchn_handle *xce,
664 uint32_t domid, int port, int *lockfd);
665
666 /* xce must not be used for anything else */
667 int xc_await_suspend(xc_interface *xch, struct xenevtchn_handle *xce,
668 int suspend_evtchn);
669
670 /**
671 * The port will be signaled immediately after this call
672 * The caller should check the domain status and look for the next event
673 * On success, *lockfd will be set to >=0 and *lockfd must be preserved
674 * and fed to xc_suspend_evtchn_release. (On error *lockfd is
675 * undefined and xc_suspend_evtchn_release is not allowed.)
676 */
677 int xc_suspend_evtchn_init_sane(xc_interface *xch,
678 struct xenevtchn_handle *xce,
679 uint32_t domid, int port, int *lockfd);
680
681 int xc_mark_page_online(xc_interface *xch, unsigned long start,
682 unsigned long end, uint32_t *status);
683
684 int xc_mark_page_offline(xc_interface *xch, unsigned long start,
685 unsigned long end, uint32_t *status);
686
687 int xc_query_page_offline_status(xc_interface *xch, unsigned long start,
688 unsigned long end, uint32_t *status);
689
690 int xc_exchange_page(xc_interface *xch, uint32_t domid, xen_pfn_t mfn);
691
692 /**
693 * This function resumes a suspended domain. The domain should have
694 * been previously suspended.
695 *
696 * Note that there are 'xc_domain_suspend' as suspending a domain
697 * is quite the endeavour.
698 *
699 * For the purpose of this explanation there are three guests:
700 * PV (using hypercalls for privilgied operations), HVM
701 * (fully hardware virtualized guests using emulated devices for everything),
702 * and PVHVM (PV aware with hardware virtualisation).
703 *
704 * HVM guest are the simplest - they suspend via S3 / S4 and resume from
705 * S3 / S4. Upon resume they have to re-negotiate with the emulated devices.
706 *
707 * PV and PVHVM communicate via hypercalls for suspend (and resume).
708 * For suspend the toolstack initiates the process by writing an value
709 * in XenBus "control/shutdown" with the string "suspend".
710 *
711 * The PV guest stashes anything it deems neccessary in 'struct
712 * start_info' in case of failure (PVHVM may ignore this) and calls
713 * the SCHEDOP_shutdown::SHUTDOWN_suspend hypercall (for PV as
714 * argument it passes the MFN to 'struct start_info').
715 *
716 * And then the guest is suspended.
717 *
718 * The checkpointing or notifying a guest that the suspend failed or
719 * cancelled (in case of checkpoint) is by having the
720 * SCHEDOP_shutdown::SHUTDOWN_suspend hypercall return a non-zero
721 * value.
722 *
723 * The PV and PVHVM resume path are similar. For PV it would be
724 * similar to bootup - figure out where the 'struct start_info' is (or
725 * if the suspend was cancelled aka checkpointed - reuse the saved
726 * values).
727 *
728 * From here on they differ depending whether the guest is PV or PVHVM
729 * in specifics but follow overall the same path:
730 * - PV: Bringing up the vCPUS,
731 * - PVHVM: Setup vector callback,
732 * - Bring up vCPU runstates,
733 * - Remap the grant tables if checkpointing or setup from scratch,
734 *
735 *
736 * If the resume was not checkpointing (or if suspend was succesful) we would
737 * setup the PV timers and the different PV events. Lastly the PV drivers
738 * re-negotiate with the backend.
739 *
740 * This function would return before the guest started resuming. That is
741 * the guest would be in non-running state and its vCPU context would be
742 * in the the SCHEDOP_shutdown::SHUTDOWN_suspend hypercall return path
743 * (for PV and PVHVM). For HVM it would be in would be in QEMU emulated
744 * BIOS handling S3 suspend.
745 *
746 * @parm xch a handle to an open hypervisor interface
747 * @parm domid the domain id to resume
748 * @parm fast use cooperative resume (guest must support this)
749 * return 0 on success, -1 on failure
750 */
751 int xc_domain_resume(xc_interface *xch,
752 uint32_t domid,
753 int fast);
754
755 /**
756 * Memory related information, such as PFN types, the P2M table,
757 * the guest word width and the guest page table levels.
758 */
759 struct xc_domain_meminfo {
760 unsigned int pt_levels;
761 unsigned int guest_width;
762 xen_pfn_t *pfn_type;
763 xen_pfn_t *p2m_table;
764 unsigned long p2m_size;
765 unsigned int p2m_frames;
766 };
767
768 int xc_map_domain_meminfo(xc_interface *xch, uint32_t domid,
769 struct xc_domain_meminfo *minfo);
770
771 int xc_unmap_domain_meminfo(xc_interface *xch, struct xc_domain_meminfo *mem);
772
773 /**
774 * This function map m2p table
775 * @parm xch a handle to an open hypervisor interface
776 * @parm max_mfn the max pfn
777 * @parm prot the flags to map, such as read/write etc
778 * @parm mfn0 return the first mfn, can be NULL
779 * @return mapped m2p table on success, NULL on failure
780 */
781 xen_pfn_t *xc_map_m2p(xc_interface *xch,
782 unsigned long max_mfn,
783 int prot,
784 unsigned long *mfn0);
785
786 #if defined(__i386__) || defined(__x86_64__)
787 typedef struct xc_cpu_policy xc_cpu_policy_t;
788
789 /* Create and free a xc_cpu_policy object. */
790 xc_cpu_policy_t *xc_cpu_policy_init(void);
791 void xc_cpu_policy_destroy(xc_cpu_policy_t *policy);
792
793 /* Retrieve a system policy, or get/set a domains policy. */
794 int xc_cpu_policy_get_system(xc_interface *xch, unsigned int policy_idx,
795 xc_cpu_policy_t *policy);
796 int xc_cpu_policy_get_domain(xc_interface *xch, uint32_t domid,
797 xc_cpu_policy_t *policy);
798 int xc_cpu_policy_set_domain(xc_interface *xch, uint32_t domid,
799 xc_cpu_policy_t *policy);
800
801 /* Manipulate a policy via architectural representations. */
802 int xc_cpu_policy_serialise(xc_interface *xch, const xc_cpu_policy_t *policy,
803 xen_cpuid_leaf_t *leaves, uint32_t *nr_leaves,
804 xen_msr_entry_t *msrs, uint32_t *nr_msrs);
805 int xc_cpu_policy_update_cpuid(xc_interface *xch, xc_cpu_policy_t *policy,
806 const xen_cpuid_leaf_t *leaves,
807 uint32_t nr);
808 int xc_cpu_policy_update_msrs(xc_interface *xch, xc_cpu_policy_t *policy,
809 const xen_msr_entry_t *msrs, uint32_t nr);
810
811 /* Compatibility calculations. */
812 bool xc_cpu_policy_is_compatible(xc_interface *xch, xc_cpu_policy_t *host,
813 xc_cpu_policy_t *guest);
814
815 int xc_get_cpu_levelling_caps(xc_interface *xch, uint32_t *caps);
816 int xc_get_cpu_featureset(xc_interface *xch, uint32_t index,
817 uint32_t *nr_features, uint32_t *featureset);
818
819 int xc_cpu_policy_get_size(xc_interface *xch, uint32_t *nr_leaves,
820 uint32_t *nr_msrs);
821 int xc_set_domain_cpu_policy(xc_interface *xch, uint32_t domid,
822 uint32_t nr_leaves, xen_cpuid_leaf_t *leaves,
823 uint32_t nr_msrs, xen_msr_entry_t *msrs,
824 uint32_t *err_leaf_p, uint32_t *err_subleaf_p,
825 uint32_t *err_msr_p);
826
827 uint32_t xc_get_cpu_featureset_size(void);
828
829 enum xc_static_cpu_featuremask {
830 XC_FEATUREMASK_KNOWN,
831 XC_FEATUREMASK_SPECIAL,
832 XC_FEATUREMASK_PV_MAX,
833 XC_FEATUREMASK_PV_DEF,
834 XC_FEATUREMASK_HVM_SHADOW_MAX,
835 XC_FEATUREMASK_HVM_SHADOW_DEF,
836 XC_FEATUREMASK_HVM_HAP_MAX,
837 XC_FEATUREMASK_HVM_HAP_DEF,
838 };
839 const uint32_t *xc_get_static_cpu_featuremask(enum xc_static_cpu_featuremask);
840 #endif /* __i386__ || __x86_64__ */
841 #endif /* XENGUEST_H */
842