1 /******************************************************************************
2  * xenguest.h
3  *
4  * A library for guest domain management in Xen.
5  *
6  * Copyright (c) 2003-2004, K A Fraser.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation;
11  * version 2.1 of the License.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef XENGUEST_H
23 #define XENGUEST_H
24 
25 #define XC_NUMA_NO_NODE   (~0U)
26 
27 #define XCFLAGS_LIVE      (1 << 0)
28 #define XCFLAGS_DEBUG     (1 << 1)
29 
30 #define X86_64_B_SIZE   64
31 #define X86_32_B_SIZE   32
32 
33 #define X86_HVM_NR_SPECIAL_PAGES    8
34 #define X86_HVM_END_SPECIAL_REGION  0xff000u
35 #define XG_MAX_MODULES 2
36 
37 /* --- typedefs and structs ---------------------------------------- */
38 
39 typedef uint64_t xen_vaddr_t;
40 typedef uint64_t xen_paddr_t;
41 
42 #define PRIpfn PRI_xen_pfn
43 
44 struct xc_dom_seg {
45     xen_vaddr_t vstart;
46     xen_vaddr_t vend;
47     xen_pfn_t pfn;
48     xen_pfn_t pages;
49 };
50 
51 struct xc_hvm_firmware_module {
52     uint8_t  *data;
53     uint32_t  length;
54     uint64_t  guest_addr_out;
55 };
56 
57 struct xc_dom_mem {
58     struct xc_dom_mem *next;
59     void *ptr;
60     enum {
61         XC_DOM_MEM_TYPE_MALLOC_INTERNAL,
62         XC_DOM_MEM_TYPE_MALLOC_EXTERNAL,
63         XC_DOM_MEM_TYPE_MMAP,
64     } type;
65     size_t len;
66     unsigned char memory[0];
67 };
68 
69 struct xc_dom_phys {
70     struct xc_dom_phys *next;
71     void *ptr;
72     xen_pfn_t first;
73     xen_pfn_t count;
74 };
75 
76 struct xc_dom_module {
77     void *blob;
78     size_t size;
79     void *cmdline;
80     /* If seg.vstart is non zero then the module will be loaded at that
81      * address, otherwise it will automatically placed.
82      *
83      * If automatic placement is used and the module is gzip
84      * compressed then it will be decompressed as it is loaded. If the
85      * module has been explicitly placed then it is loaded as is
86      * otherwise decompressing risks undoing the manual placement.
87      */
88     struct xc_dom_seg seg;
89 };
90 
91 struct xc_dom_image {
92     /* files */
93     void *kernel_blob;
94     size_t kernel_size;
95     unsigned int num_modules;
96     struct xc_dom_module modules[XG_MAX_MODULES];
97     void *devicetree_blob;
98     size_t devicetree_size;
99 
100     size_t max_kernel_size;
101     size_t max_module_size;
102     size_t max_devicetree_size;
103 
104     /* arguments and parameters */
105     char *cmdline;
106     size_t cmdline_size;
107     uint32_t f_requested[XENFEAT_NR_SUBMAPS];
108 
109     /* info from (elf) kernel image */
110     struct elf_dom_parms *parms;
111     const char *guest_type;
112 
113     /* memory layout */
114     struct xc_dom_seg kernel_seg;
115     struct xc_dom_seg p2m_seg;
116     struct xc_dom_seg pgtables_seg;
117     struct xc_dom_seg devicetree_seg;
118     struct xc_dom_seg start_info_seg;
119     xen_pfn_t start_info_pfn;
120     xen_pfn_t console_pfn;
121     xen_pfn_t xenstore_pfn;
122     xen_pfn_t shared_info_pfn;
123     xen_pfn_t bootstack_pfn;
124     xen_pfn_t pfn_alloc_end;
125     xen_vaddr_t virt_alloc_end;
126     xen_vaddr_t bsd_symtab_start;
127 
128     /*
129      * initrd parameters as specified in start_info page
130      * Depending on capabilities of the booted kernel this may be a virtual
131      * address or a pfn. Type is neutral and large enough to hold a virtual
132      * address of a 64 bit kernel even with 32 bit toolstack.
133      */
134     uint64_t initrd_start;
135     uint64_t initrd_len;
136 
137     unsigned int alloc_bootstack;
138     xen_vaddr_t virt_pgtab_end;
139 
140     /* other state info */
141     uint32_t f_active[XENFEAT_NR_SUBMAPS];
142 
143     /*
144      * pv_p2m is specific to x86 PV guests, and maps GFNs to MFNs.  It is
145      * eventually copied into guest context.
146      */
147     xen_pfn_t *pv_p2m;
148     xen_pfn_t p2m_size;         /* number of pfns covered by pv_p2m */
149 
150     /* physical memory
151      *
152      * An x86 PV guest has one or more blocks of physical RAM,
153      * consisting of total_pages starting at 0. The start address and
154      * size of each block is controlled by vNUMA structures.
155      *
156      * An ARM guest has GUEST_RAM_BANKS regions of RAM, with
157      * rambank_size[i] pages in each. The lowest RAM address
158      * is stored in rambase_pfn.
159      */
160     xen_pfn_t rambase_pfn;
161     xen_pfn_t total_pages;
162     struct xc_dom_phys *phys_pages;
163 #if defined (__arm__) || defined(__aarch64__)
164     xen_pfn_t rambank_size[GUEST_RAM_BANKS];
165 #endif
166 
167     /* malloc memory pool */
168     struct xc_dom_mem *memblocks;
169 
170     /* memory footprint stats */
171     size_t alloc_malloc;
172     size_t alloc_mem_map;
173     size_t alloc_file_map;
174     size_t alloc_domU_map;
175 
176     /* misc xen domain config stuff */
177     unsigned long flags;
178     unsigned int console_evtchn;
179     unsigned int xenstore_evtchn;
180     uint32_t console_domid;
181     uint32_t xenstore_domid;
182     xen_pfn_t shared_info_mfn;
183 
184     xc_interface *xch;
185     uint32_t guest_domid;
186     int claim_enabled; /* 0 by default, 1 enables it */
187 
188     int xen_version;
189     xen_capabilities_info_t xen_caps;
190 
191     /* kernel loader, arch hooks */
192     struct xc_dom_loader *kernel_loader;
193     void *private_loader;
194 
195     /* vNUMA information */
196     xen_vmemrange_t *vmemranges;
197     unsigned int nr_vmemranges;
198     unsigned int *vnode_to_pnode;
199     unsigned int nr_vnodes;
200 
201     /* domain type/architecture specific data */
202     void *arch_private;
203 
204     /* kernel loader */
205     struct xc_dom_arch *arch_hooks;
206     /* allocate up to pfn_alloc_end */
207     int (*allocate) (struct xc_dom_image * dom);
208 
209     /* Container type (HVM or PV). */
210     enum {
211         XC_DOM_PV_CONTAINER,
212         XC_DOM_HVM_CONTAINER,
213     } container_type;
214 
215     /* HVM specific fields. */
216     xen_pfn_t target_pages;
217     xen_paddr_t mmio_start;
218     xen_paddr_t mmio_size;
219     xen_paddr_t lowmem_end;
220     xen_paddr_t highmem_end;
221     xen_pfn_t vga_hole_size;
222 
223     /* If unset disables the setup of the IOREQ pages. */
224     bool device_model;
225 
226     /* BIOS/Firmware passed to HVMLOADER */
227     struct xc_hvm_firmware_module system_firmware_module;
228 
229     /* Extra ACPI tables */
230 #define MAX_ACPI_MODULES        4
231     struct xc_hvm_firmware_module acpi_modules[MAX_ACPI_MODULES];
232 
233     /* Extra SMBIOS structures passed to HVMLOADER */
234     struct xc_hvm_firmware_module smbios_module;
235 
236 #if defined(__i386__) || defined(__x86_64__)
237     struct e820entry *e820;
238     unsigned int e820_entries;
239 #endif
240 
241     xen_pfn_t vuart_gfn;
242 
243     /* Number of vCPUs */
244     unsigned int max_vcpus;
245 };
246 
247 /* --- arch specific hooks ----------------------------------------- */
248 
249 struct xc_dom_arch {
250     int (*alloc_magic_pages) (struct xc_dom_image * dom);
251 
252     /* pagetable setup - x86 PV only */
253     int (*alloc_pgtables) (struct xc_dom_image * dom);
254     int (*alloc_p2m_list) (struct xc_dom_image * dom);
255     int (*setup_pgtables) (struct xc_dom_image * dom);
256 
257     /* arch-specific data structs setup */
258     /* in Mini-OS environment start_info might be a macro, avoid collision. */
259 #undef start_info
260     int (*start_info) (struct xc_dom_image * dom);
261     int (*shared_info) (struct xc_dom_image * dom, void *shared_info);
262     int (*vcpu) (struct xc_dom_image * dom);
263     int (*bootearly) (struct xc_dom_image * dom);
264     int (*bootlate) (struct xc_dom_image * dom);
265 
266     /* arch-specific memory initialization. */
267     int (*meminit) (struct xc_dom_image * dom);
268 
269     const char *guest_type;
270     const char *native_protocol;
271     int page_shift;
272     int sizeof_pfn;
273     int p2m_base_supported;
274     int arch_private_size;
275 
276     struct xc_dom_arch *next;
277 };
278 void xc_dom_register_arch_hooks(struct xc_dom_arch *hooks);
279 
280 #define XC_DOM_PAGE_SHIFT(dom)  ((dom)->arch_hooks->page_shift)
281 #define XC_DOM_PAGE_SIZE(dom)   (1LL << (dom)->arch_hooks->page_shift)
282 
283 /* --- main functions ---------------------------------------------- */
284 
285 struct xc_dom_image *xc_dom_allocate(xc_interface *xch,
286                                      const char *cmdline, const char *features);
287 void xc_dom_release_phys(struct xc_dom_image *dom);
288 void xc_dom_release(struct xc_dom_image *dom);
289 int xc_dom_rambase_init(struct xc_dom_image *dom, uint64_t rambase);
290 int xc_dom_mem_init(struct xc_dom_image *dom, unsigned int mem_mb);
291 
292 /* Set this larger if you have enormous modules/kernels. Note that
293  * you should trust all kernels not to be maliciously large (e.g. to
294  * exhaust all dom0 memory) if you do this (see CVE-2012-4544 /
295  * XSA-25). You can also set the default independently for
296  * modules/kernels in xc_dom_allocate() or call
297  * xc_dom_{kernel,module}_max_size.
298  */
299 #ifndef XC_DOM_DECOMPRESS_MAX
300 #define XC_DOM_DECOMPRESS_MAX (1024*1024*1024) /* 1GB */
301 #endif
302 
303 int xc_dom_kernel_check_size(struct xc_dom_image *dom, size_t sz);
304 int xc_dom_kernel_max_size(struct xc_dom_image *dom, size_t sz);
305 
306 int xc_dom_module_max_size(struct xc_dom_image *dom, size_t sz);
307 
308 int xc_dom_devicetree_max_size(struct xc_dom_image *dom, size_t sz);
309 
310 size_t xc_dom_check_gzip(xc_interface *xch,
311                      void *blob, size_t ziplen);
312 int xc_dom_do_gunzip(xc_interface *xch,
313                      void *src, size_t srclen, void *dst, size_t dstlen);
314 int xc_dom_try_gunzip(struct xc_dom_image *dom, void **blob, size_t * size);
315 
316 int xc_dom_kernel_file(struct xc_dom_image *dom, const char *filename);
317 int xc_dom_module_file(struct xc_dom_image *dom, const char *filename,
318                        const char *cmdline);
319 int xc_dom_kernel_mem(struct xc_dom_image *dom, const void *mem,
320                       size_t memsize);
321 int xc_dom_module_mem(struct xc_dom_image *dom, const void *mem,
322                        size_t memsize, const char *cmdline);
323 int xc_dom_devicetree_file(struct xc_dom_image *dom, const char *filename);
324 int xc_dom_devicetree_mem(struct xc_dom_image *dom, const void *mem,
325                           size_t memsize);
326 
327 int xc_dom_parse_image(struct xc_dom_image *dom);
328 int xc_dom_set_arch_hooks(struct xc_dom_image *dom);
329 int xc_dom_build_image(struct xc_dom_image *dom);
330 
331 int xc_dom_boot_xen_init(struct xc_dom_image *dom, xc_interface *xch,
332                          uint32_t domid);
333 int xc_dom_boot_mem_init(struct xc_dom_image *dom);
334 void *xc_dom_boot_domU_map(struct xc_dom_image *dom, xen_pfn_t pfn,
335                            xen_pfn_t count);
336 int xc_dom_boot_image(struct xc_dom_image *dom);
337 int xc_dom_compat_check(struct xc_dom_image *dom);
338 int xc_dom_gnttab_init(struct xc_dom_image *dom);
339 int xc_dom_gnttab_seed(xc_interface *xch, uint32_t guest_domid,
340                        bool is_hvm,
341                        xen_pfn_t console_gfn,
342                        xen_pfn_t xenstore_gfn,
343                        uint32_t console_domid,
344                        uint32_t xenstore_domid);
345 bool xc_dom_translated(const struct xc_dom_image *dom);
346 
347 /* --- debugging bits ---------------------------------------------- */
348 
349 int xc_dom_loginit(xc_interface *xch);
350 
351 void xc_dom_printf(xc_interface *xch, const char *fmt, ...)
352      __attribute__ ((format(printf, 2, 3)));
353 void xc_dom_panic_func(xc_interface *xch,
354                       const char *file, int line, xc_error_code err,
355                       const char *fmt, ...)
356     __attribute__ ((format(printf, 5, 6)));
357 
358 #define xc_dom_panic(xch, err, fmt, args...) \
359     xc_dom_panic_func(xch, __FILE__, __LINE__, err, fmt, ## args)
360 #define xc_dom_trace(mark) \
361     xc_dom_printf("%s:%d: trace %s\n", __FILE__, __LINE__, mark)
362 
363 void xc_dom_log_memory_footprint(struct xc_dom_image *dom);
364 
365 /* --- simple memory pool ------------------------------------------ */
366 
367 void *xc_dom_malloc(struct xc_dom_image *dom, size_t size);
368 int xc_dom_register_external(struct xc_dom_image *dom, void *ptr, size_t size);
369 void *xc_dom_malloc_page_aligned(struct xc_dom_image *dom, size_t size);
370 void *xc_dom_malloc_filemap(struct xc_dom_image *dom,
371                             const char *filename, size_t * size,
372                             const size_t max_size);
373 char *xc_dom_strdup(struct xc_dom_image *dom, const char *str);
374 
375 /* --- alloc memory pool ------------------------------------------- */
376 
377 xen_pfn_t xc_dom_alloc_page(struct xc_dom_image *dom, const char *name);
378 int xc_dom_alloc_segment(struct xc_dom_image *dom,
379                          struct xc_dom_seg *seg, const char *name,
380                          xen_vaddr_t start, xen_vaddr_t size);
381 
382 /* --- misc bits --------------------------------------------------- */
383 
384 void *xc_dom_pfn_to_ptr(struct xc_dom_image *dom, xen_pfn_t first,
385                         xen_pfn_t count);
386 void *xc_dom_pfn_to_ptr_retcount(struct xc_dom_image *dom, xen_pfn_t first,
387                                  xen_pfn_t count, xen_pfn_t *count_out);
388 void xc_dom_unmap_one(struct xc_dom_image *dom, xen_pfn_t pfn);
389 void xc_dom_unmap_all(struct xc_dom_image *dom);
390 void *xc_dom_vaddr_to_ptr(struct xc_dom_image *dom,
391                           xen_vaddr_t vaddr, size_t *safe_region_out);
392 uint64_t xc_dom_virt_base(struct xc_dom_image *dom);
393 uint64_t xc_dom_virt_entry(struct xc_dom_image *dom);
394 uint64_t xc_dom_virt_hypercall(struct xc_dom_image *dom);
395 char *xc_dom_guest_os(struct xc_dom_image *dom);
396 bool xc_dom_feature_get(struct xc_dom_image *dom, unsigned int nr);
397 
xc_dom_seg_to_ptr_pages(struct xc_dom_image * dom,struct xc_dom_seg * seg,xen_pfn_t * pages_out)398 static inline void *xc_dom_seg_to_ptr_pages(struct xc_dom_image *dom,
399                                       struct xc_dom_seg *seg,
400                                       xen_pfn_t *pages_out)
401 {
402     void *retval;
403 
404     retval = xc_dom_pfn_to_ptr(dom, seg->pfn, seg->pages);
405 
406     *pages_out = retval ? seg->pages : 0;
407     return retval;
408 }
409 
xc_dom_seg_to_ptr(struct xc_dom_image * dom,struct xc_dom_seg * seg)410 static inline void *xc_dom_seg_to_ptr(struct xc_dom_image *dom,
411                                       struct xc_dom_seg *seg)
412 {
413     xen_pfn_t dummy;
414 
415     return xc_dom_seg_to_ptr_pages(dom, seg, &dummy);
416 }
417 
xc_dom_p2m(struct xc_dom_image * dom,xen_pfn_t pfn)418 static inline xen_pfn_t xc_dom_p2m(struct xc_dom_image *dom, xen_pfn_t pfn)
419 {
420     if ( xc_dom_translated(dom) )
421         return pfn;
422 
423     /* x86 PV only now. */
424     if ( pfn >= dom->total_pages )
425         return INVALID_MFN;
426 
427     return dom->pv_p2m[pfn];
428 }
429 
430 /*
431  * User not using xc_suspend_* / xc_await_suspent may not want to
432  * include the full libxenevtchn API here.
433  */
434 struct xenevtchn_handle;
435 
436 /* For save's precopy_policy(). */
437 struct precopy_stats
438 {
439     unsigned int iteration;
440     unsigned long total_written;
441     long dirty_count; /* -1 if unknown */
442 };
443 
444 /*
445  * A precopy_policy callback may not be running in the same address
446  * space as libxc an so precopy_stats is passed by value.
447  */
448 typedef int (*precopy_policy_t)(struct precopy_stats, void *);
449 
450 /* callbacks provided by xc_domain_save */
451 struct save_callbacks {
452     /*
453      * Called after expiration of checkpoint interval,
454      * to suspend the guest.
455      */
456     int (*suspend)(void *data);
457 
458     /*
459      * Called before and after every batch of page data sent during
460      * the precopy phase of a live migration to ask the caller what
461      * to do next based on the current state of the precopy migration.
462      *
463      * Should return one of the values listed below:
464      */
465 #define XGS_POLICY_ABORT          (-1) /* Abandon the migration entirely
466                                         * and tidy up. */
467 #define XGS_POLICY_CONTINUE_PRECOPY 0  /* Remain in the precopy phase. */
468 #define XGS_POLICY_STOP_AND_COPY    1  /* Immediately suspend and transmit the
469                                         * remaining dirty pages. */
470     precopy_policy_t precopy_policy;
471 
472     /*
473      * Called after the guest's dirty pages have been
474      *  copied into an output buffer.
475      * Callback function resumes the guest & the device model,
476      *  returns to xc_domain_save.
477      * xc_domain_save then flushes the output buffer, while the
478      *  guest continues to run.
479      */
480     int (*postcopy)(void *data);
481 
482     /*
483      * Called after the memory checkpoint has been flushed
484      * out into the network. Typical actions performed in this
485      * callback include:
486      *   (a) send the saved device model state (for HVM guests),
487      *   (b) wait for checkpoint ack
488      *   (c) release the network output buffer pertaining to the acked checkpoint.
489      *   (c) sleep for the checkpoint interval.
490      *
491      * returns:
492      * 0: terminate checkpointing gracefully
493      * 1: take another checkpoint
494      */
495     int (*checkpoint)(void *data);
496 
497     /*
498      * Called after the checkpoint callback.
499      *
500      * returns:
501      * 0: terminate checkpointing gracefully
502      * 1: take another checkpoint
503      */
504     int (*wait_checkpoint)(void *data);
505 
506     /* Enable qemu-dm logging dirty pages to xen */
507     int (*switch_qemu_logdirty)(uint32_t domid, unsigned enable, void *data); /* HVM only */
508 
509     /* to be provided as the last argument to each callback function */
510     void *data;
511 };
512 
513 /* Type of stream.  Plain, or using a continuous replication protocol? */
514 typedef enum {
515     XC_STREAM_PLAIN,
516     XC_STREAM_REMUS,
517     XC_STREAM_COLO,
518 } xc_stream_type_t;
519 
520 /**
521  * This function will save a running domain.
522  *
523  * @param xch a handle to an open hypervisor interface
524  * @param io_fd the file descriptor to save a domain to
525  * @param dom the id of the domain
526  * @param flags XCFLAGS_xxx
527  * @param stream_type XC_STREAM_PLAIN if the far end of the stream
528  *        doesn't use checkpointing
529  * @param recv_fd Only used for XC_STREAM_COLO.  Contains backchannel from
530  *        the destination side.
531  * @return 0 on success, -1 on failure
532  */
533 int xc_domain_save(xc_interface *xch, int io_fd, uint32_t dom,
534                    uint32_t flags, struct save_callbacks *callbacks,
535                    xc_stream_type_t stream_type, int recv_fd);
536 
537 /* callbacks provided by xc_domain_restore */
538 struct restore_callbacks {
539     /*
540      * Called once the STATIC_DATA_END record has been received/inferred.
541      *
542      * For compatibility with older streams, provides a list of static data
543      * expected to be found in the stream, which was missing.  A higher level
544      * toolstack is responsible for providing any necessary compatibiltiy.
545      */
546 #define XGR_SDD_MISSING_CPUID (1 << 0)
547 #define XGR_SDD_MISSING_MSR   (1 << 1)
548     int (*static_data_done)(unsigned int missing, void *data);
549 
550     /* Called after a new checkpoint to suspend the guest. */
551     int (*suspend)(void *data);
552 
553     /*
554      * Called after the secondary vm is ready to resume.
555      * Callback function resumes the guest & the device model,
556      * returns to xc_domain_restore.
557      */
558     int (*postcopy)(void *data);
559 
560     /*
561      * A checkpoint record has been found in the stream.
562      * returns:
563      */
564 #define XGR_CHECKPOINT_ERROR    0 /* Terminate processing */
565 #define XGR_CHECKPOINT_SUCCESS  1 /* Continue reading more data from the stream */
566 #define XGR_CHECKPOINT_FAILOVER 2 /* Failover and resume VM */
567     int (*checkpoint)(void *data);
568 
569     /*
570      * Called after the checkpoint callback.
571      *
572      * returns:
573      * 0: terminate checkpointing gracefully
574      * 1: take another checkpoint
575      */
576     int (*wait_checkpoint)(void *data);
577 
578     /*
579      * callback to send store gfn and console gfn to xl
580      * if we want to resume vm before xc_domain_save()
581      * exits.
582      */
583     void (*restore_results)(xen_pfn_t store_gfn, xen_pfn_t console_gfn,
584                             void *data);
585 
586     /* to be provided as the last argument to each callback function */
587     void *data;
588 };
589 
590 /**
591  * This function will restore a saved domain.
592  *
593  * Domain is restored in a suspended state ready to be unpaused.
594  *
595  * @param xch a handle to an open hypervisor interface
596  * @param io_fd the file descriptor to restore a domain from
597  * @param dom the id of the domain
598  * @param store_evtchn the xenstore event channel for this domain to use
599  * @param store_mfn filled with the gfn of the store page
600  * @param store_domid the backend domain for xenstore
601  * @param console_evtchn the console event channel for this domain to use
602  * @param console_mfn filled with the gfn of the console page
603  * @param console_domid the backend domain for xenconsole
604  * @param stream_type XC_STREAM_PLAIN if the far end of the stream is using
605  *        checkpointing
606  * @param callbacks non-NULL to receive a callback to restore toolstack
607  *        specific data
608  * @param send_back_fd Only used for XC_STREAM_COLO.  Contains backchannel to
609  *        the source side.
610  * @return 0 on success, -1 on failure
611  */
612 int xc_domain_restore(xc_interface *xch, int io_fd, uint32_t dom,
613                       unsigned int store_evtchn, unsigned long *store_mfn,
614                       uint32_t store_domid, unsigned int console_evtchn,
615                       unsigned long *console_mfn, uint32_t console_domid,
616                       xc_stream_type_t stream_type,
617                       struct restore_callbacks *callbacks, int send_back_fd);
618 
619 /**
620  * This function will create a domain for a paravirtualized Linux
621  * using file names pointing to kernel and ramdisk
622  *
623  * @parm xch a handle to an open hypervisor interface
624  * @parm domid the id of the domain
625  * @parm mem_mb memory size in megabytes
626  * @parm image_name name of the kernel image file
627  * @parm ramdisk_name name of the ramdisk image file
628  * @parm cmdline command line string
629  * @parm flags domain creation flags
630  * @parm store_evtchn the store event channel for this domain to use
631  * @parm store_mfn returned with the mfn of the store page
632  * @parm console_evtchn the console event channel for this domain to use
633  * @parm conole_mfn returned with the mfn of the console page
634  * @return 0 on success, -1 on failure
635  */
636 int xc_linux_build(xc_interface *xch,
637                    uint32_t domid,
638                    unsigned int mem_mb,
639                    const char *image_name,
640                    const char *ramdisk_name,
641                    const char *cmdline,
642                    const char *features,
643                    unsigned long flags,
644                    unsigned int store_evtchn,
645                    unsigned long *store_mfn,
646                    unsigned int console_evtchn,
647                    unsigned long *console_mfn);
648 
649 /*
650  * Sets *lockfd to -1.
651  * Has deallocated everything even on error.
652  */
653 int xc_suspend_evtchn_release(xc_interface *xch,
654                               struct xenevtchn_handle *xce,
655                               uint32_t domid, int suspend_evtchn, int *lockfd);
656 
657 /**
658  * This function eats the initial notification.
659  * xce must not be used for anything else
660  * See xc_suspend_evtchn_init_sane re lockfd.
661  */
662 int xc_suspend_evtchn_init_exclusive(xc_interface *xch,
663                                      struct xenevtchn_handle *xce,
664                                      uint32_t domid, int port, int *lockfd);
665 
666 /* xce must not be used for anything else */
667 int xc_await_suspend(xc_interface *xch, struct xenevtchn_handle *xce,
668                      int suspend_evtchn);
669 
670 /**
671  * The port will be signaled immediately after this call
672  * The caller should check the domain status and look for the next event
673  * On success, *lockfd will be set to >=0 and *lockfd must be preserved
674  * and fed to xc_suspend_evtchn_release.  (On error *lockfd is
675  * undefined and xc_suspend_evtchn_release is not allowed.)
676  */
677 int xc_suspend_evtchn_init_sane(xc_interface *xch,
678                                 struct xenevtchn_handle *xce,
679                                 uint32_t domid, int port, int *lockfd);
680 
681 int xc_mark_page_online(xc_interface *xch, unsigned long start,
682                         unsigned long end, uint32_t *status);
683 
684 int xc_mark_page_offline(xc_interface *xch, unsigned long start,
685                           unsigned long end, uint32_t *status);
686 
687 int xc_query_page_offline_status(xc_interface *xch, unsigned long start,
688                                  unsigned long end, uint32_t *status);
689 
690 int xc_exchange_page(xc_interface *xch, uint32_t domid, xen_pfn_t mfn);
691 
692 /**
693  * This function resumes a suspended domain. The domain should have
694  * been previously suspended.
695  *
696  * Note that there are 'xc_domain_suspend' as suspending a domain
697  * is quite the endeavour.
698  *
699  * For the purpose of this explanation there are three guests:
700  * PV (using hypercalls for privilgied operations), HVM
701  * (fully hardware virtualized guests using emulated devices for everything),
702  * and PVHVM (PV aware with hardware virtualisation).
703  *
704  * HVM guest are the simplest - they suspend via S3 / S4 and resume from
705  * S3 / S4. Upon resume they have to re-negotiate with the emulated devices.
706  *
707  * PV and PVHVM communicate via hypercalls for suspend (and resume).
708  * For suspend the toolstack initiates the process by writing an value
709  * in XenBus "control/shutdown" with the string "suspend".
710  *
711  * The PV guest stashes anything it deems neccessary in 'struct
712  * start_info' in case of failure (PVHVM may ignore this) and calls
713  * the SCHEDOP_shutdown::SHUTDOWN_suspend hypercall (for PV as
714  * argument it passes the MFN to 'struct start_info').
715  *
716  * And then the guest is suspended.
717  *
718  * The checkpointing or notifying a guest that the suspend failed or
719  * cancelled (in case of checkpoint) is by having the
720  * SCHEDOP_shutdown::SHUTDOWN_suspend hypercall return a non-zero
721  * value.
722  *
723  * The PV and PVHVM resume path are similar. For PV it would be
724  * similar to bootup - figure out where the 'struct start_info' is (or
725  * if the suspend was cancelled aka checkpointed - reuse the saved
726  * values).
727  *
728  * From here on they differ depending whether the guest is PV or PVHVM
729  * in specifics but follow overall the same path:
730  *  - PV: Bringing up the vCPUS,
731  *  - PVHVM: Setup vector callback,
732  *  - Bring up vCPU runstates,
733  *  - Remap the grant tables if checkpointing or setup from scratch,
734  *
735  *
736  * If the resume was not checkpointing (or if suspend was succesful) we would
737  * setup the PV timers and the different PV events. Lastly the PV drivers
738  * re-negotiate with the backend.
739  *
740  * This function would return before the guest started resuming. That is
741  * the guest would be in non-running state and its vCPU context would be
742  * in the the SCHEDOP_shutdown::SHUTDOWN_suspend hypercall return path
743  * (for PV and PVHVM). For HVM it would be in would be in QEMU emulated
744  * BIOS handling S3 suspend.
745  *
746  * @parm xch a handle to an open hypervisor interface
747  * @parm domid the domain id to resume
748  * @parm fast use cooperative resume (guest must support this)
749  * return 0 on success, -1 on failure
750  */
751 int xc_domain_resume(xc_interface *xch,
752                      uint32_t domid,
753                      int fast);
754 
755 /**
756  * Memory related information, such as PFN types, the P2M table,
757  * the guest word width and the guest page table levels.
758  */
759 struct xc_domain_meminfo {
760     unsigned int pt_levels;
761     unsigned int guest_width;
762     xen_pfn_t *pfn_type;
763     xen_pfn_t *p2m_table;
764     unsigned long p2m_size;
765     unsigned int p2m_frames;
766 };
767 
768 int xc_map_domain_meminfo(xc_interface *xch, uint32_t domid,
769                           struct xc_domain_meminfo *minfo);
770 
771 int xc_unmap_domain_meminfo(xc_interface *xch, struct xc_domain_meminfo *mem);
772 
773 /**
774  * This function map m2p table
775  * @parm xch a handle to an open hypervisor interface
776  * @parm max_mfn the max pfn
777  * @parm prot the flags to map, such as read/write etc
778  * @parm mfn0 return the first mfn, can be NULL
779  * @return mapped m2p table on success, NULL on failure
780  */
781 xen_pfn_t *xc_map_m2p(xc_interface *xch,
782                       unsigned long max_mfn,
783                       int prot,
784                       unsigned long *mfn0);
785 
786 #if defined(__i386__) || defined(__x86_64__)
787 typedef struct xc_cpu_policy xc_cpu_policy_t;
788 
789 /* Create and free a xc_cpu_policy object. */
790 xc_cpu_policy_t *xc_cpu_policy_init(void);
791 void xc_cpu_policy_destroy(xc_cpu_policy_t *policy);
792 
793 /* Retrieve a system policy, or get/set a domains policy. */
794 int xc_cpu_policy_get_system(xc_interface *xch, unsigned int policy_idx,
795                              xc_cpu_policy_t *policy);
796 int xc_cpu_policy_get_domain(xc_interface *xch, uint32_t domid,
797                              xc_cpu_policy_t *policy);
798 int xc_cpu_policy_set_domain(xc_interface *xch, uint32_t domid,
799                              xc_cpu_policy_t *policy);
800 
801 /* Manipulate a policy via architectural representations. */
802 int xc_cpu_policy_serialise(xc_interface *xch, const xc_cpu_policy_t *policy,
803                             xen_cpuid_leaf_t *leaves, uint32_t *nr_leaves,
804                             xen_msr_entry_t *msrs, uint32_t *nr_msrs);
805 int xc_cpu_policy_update_cpuid(xc_interface *xch, xc_cpu_policy_t *policy,
806                                const xen_cpuid_leaf_t *leaves,
807                                uint32_t nr);
808 int xc_cpu_policy_update_msrs(xc_interface *xch, xc_cpu_policy_t *policy,
809                               const xen_msr_entry_t *msrs, uint32_t nr);
810 
811 /* Compatibility calculations. */
812 bool xc_cpu_policy_is_compatible(xc_interface *xch, xc_cpu_policy_t *host,
813                                  xc_cpu_policy_t *guest);
814 
815 int xc_get_cpu_levelling_caps(xc_interface *xch, uint32_t *caps);
816 int xc_get_cpu_featureset(xc_interface *xch, uint32_t index,
817                           uint32_t *nr_features, uint32_t *featureset);
818 
819 int xc_cpu_policy_get_size(xc_interface *xch, uint32_t *nr_leaves,
820                            uint32_t *nr_msrs);
821 int xc_set_domain_cpu_policy(xc_interface *xch, uint32_t domid,
822                              uint32_t nr_leaves, xen_cpuid_leaf_t *leaves,
823                              uint32_t nr_msrs, xen_msr_entry_t *msrs,
824                              uint32_t *err_leaf_p, uint32_t *err_subleaf_p,
825                              uint32_t *err_msr_p);
826 
827 uint32_t xc_get_cpu_featureset_size(void);
828 
829 enum xc_static_cpu_featuremask {
830     XC_FEATUREMASK_KNOWN,
831     XC_FEATUREMASK_SPECIAL,
832     XC_FEATUREMASK_PV_MAX,
833     XC_FEATUREMASK_PV_DEF,
834     XC_FEATUREMASK_HVM_SHADOW_MAX,
835     XC_FEATUREMASK_HVM_SHADOW_DEF,
836     XC_FEATUREMASK_HVM_HAP_MAX,
837     XC_FEATUREMASK_HVM_HAP_DEF,
838 };
839 const uint32_t *xc_get_static_cpu_featuremask(enum xc_static_cpu_featuremask);
840 #endif /* __i386__ || __x86_64__ */
841 #endif /* XENGUEST_H */
842