1 #include "libxl_internal.h"
2 #include "libxl_arch.h"
3 #include <xen/arch-x86/cpuid.h>
4 
libxl__arch_domain_prepare_config(libxl__gc * gc,libxl_domain_config * d_config,struct xen_domctl_createdomain * config)5 int libxl__arch_domain_prepare_config(libxl__gc *gc,
6                                       libxl_domain_config *d_config,
7                                       struct xen_domctl_createdomain *config)
8 {
9     switch(d_config->c_info.type) {
10     case LIBXL_DOMAIN_TYPE_HVM:
11         config->arch.emulation_flags = (XEN_X86_EMU_ALL & ~XEN_X86_EMU_VPCI);
12         if (!libxl_defbool_val(d_config->b_info.u.hvm.pirq))
13             config->arch.emulation_flags &= ~XEN_X86_EMU_USE_PIRQ;
14         break;
15     case LIBXL_DOMAIN_TYPE_PVH:
16         config->arch.emulation_flags = XEN_X86_EMU_LAPIC;
17         break;
18     case LIBXL_DOMAIN_TYPE_PV:
19         config->arch.emulation_flags = 0;
20         break;
21     default:
22         abort();
23     }
24 
25     config->arch.misc_flags = 0;
26     if (libxl_defbool_val(d_config->b_info.arch_x86.msr_relaxed))
27         config->arch.misc_flags |= XEN_X86_MSR_RELAXED;
28 
29     return 0;
30 }
31 
libxl__arch_domain_save_config(libxl__gc * gc,libxl_domain_config * d_config,libxl__domain_build_state * state,const struct xen_domctl_createdomain * config)32 int libxl__arch_domain_save_config(libxl__gc *gc,
33                                    libxl_domain_config *d_config,
34                                    libxl__domain_build_state *state,
35                                    const struct xen_domctl_createdomain *config)
36 {
37     return 0;
38 }
39 
e820_names(int type)40 static const char *e820_names(int type)
41 {
42     switch (type) {
43         case E820_RAM: return "RAM";
44         case E820_RESERVED: return "Reserved";
45         case E820_ACPI: return "ACPI";
46         case E820_NVS: return "ACPI NVS";
47         case E820_UNUSABLE: return "Unusable";
48         default: break;
49     }
50     return "Unknown";
51 }
52 
e820_sanitize(libxl__gc * gc,struct e820entry src[],uint32_t * nr_entries,unsigned long map_limitkb,unsigned long balloon_kb)53 static int e820_sanitize(libxl__gc *gc, struct e820entry src[],
54                          uint32_t *nr_entries,
55                          unsigned long map_limitkb,
56                          unsigned long balloon_kb)
57 {
58     uint64_t delta_kb = 0, start = 0, start_kb = 0, last = 0, ram_end;
59     uint32_t i, idx = 0, nr;
60     struct e820entry e820[E820MAX];
61 
62     if (!src || !map_limitkb || !nr_entries)
63         return ERROR_INVAL;
64 
65     nr = *nr_entries;
66     if (!nr)
67         return ERROR_INVAL;
68 
69     if (nr > E820MAX)
70         return ERROR_NOMEM;
71 
72     /* Weed out anything under 1MB */
73     for (i = 0; i < nr; i++) {
74         if (src[i].addr > 0x100000)
75             continue;
76 
77         src[i].type = 0;
78         src[i].size = 0;
79         src[i].addr = -1ULL;
80     }
81 
82     /* Find the lowest and highest entry in E820, skipping over
83      * undesired entries. */
84     start = -1ULL;
85     last = 0;
86     for (i = 0; i < nr; i++) {
87         if ((src[i].type == E820_RAM) ||
88             (src[i].type == E820_UNUSABLE) ||
89             (src[i].type == 0))
90             continue;
91 
92         start = src[i].addr < start ? src[i].addr : start;
93         last = src[i].addr + src[i].size > last ?
94                src[i].addr + src[i].size > last : last;
95     }
96     if (start > 1024)
97         start_kb = start >> 10;
98 
99     /* Add the memory RAM region for the guest */
100     e820[idx].addr = 0;
101     e820[idx].size = (uint64_t)map_limitkb << 10;
102     e820[idx].type = E820_RAM;
103 
104     /* .. and trim if neccessary */
105     if (start_kb && map_limitkb > start_kb) {
106         delta_kb = map_limitkb - start_kb;
107         if (delta_kb)
108             e820[idx].size -= (uint64_t)(delta_kb << 10);
109     }
110     /* Note: We don't touch balloon_kb here. Will add it at the end. */
111     ram_end = e820[idx].addr + e820[idx].size;
112     idx ++;
113 
114     LOG(DEBUG, "Memory: %"PRIu64"kB End of RAM: " \
115         "0x%"PRIx64" (PFN) Delta: %"PRIu64"kB, PCI start: %"PRIu64"kB " \
116         "(0x%"PRIx64" PFN), Balloon %"PRIu64"kB\n", (uint64_t)map_limitkb,
117         ram_end >> 12, delta_kb, start_kb ,start >> 12,
118         (uint64_t)balloon_kb);
119 
120 
121     /* This whole code below is to guard against if the Intel IGD is passed into
122      * the guest. If we don't pass in IGD, this whole code can be ignored.
123      *
124      * The reason for this code is that Intel boxes fill their E820 with
125      * E820_RAM amongst E820_RESERVED and we can't just ditch those E820_RAM.
126      * That is b/c any "gaps" in the E820 is considered PCI I/O space by
127      * Linux and it would be utilized by the Intel IGD as I/O space while
128      * in reality it was an RAM region.
129      *
130      * What this means is that we have to walk the E820 and for any region
131      * that is RAM and below 4GB and above ram_end, needs to change its type
132      * to E820_UNUSED. We also need to move some of the E820_RAM regions if
133      * the overlap with ram_end. */
134     for (i = 0; i < nr; i++) {
135         uint64_t end = src[i].addr + src[i].size;
136 
137         /* We don't care about E820_UNUSABLE, but we need to
138          * change the type to zero b/c the loop after this
139          * sticks E820_UNUSABLE on the guest's E820 but ignores
140          * the ones with type zero. */
141         if ((src[i].type == E820_UNUSABLE) ||
142             /* Any region that is within the "RAM region" can
143              * be safely ditched. */
144             (end < ram_end)) {
145                 src[i].type = 0;
146                 continue;
147         }
148 
149         /* Look only at RAM regions. */
150         if (src[i].type != E820_RAM)
151             continue;
152 
153         /* We only care about RAM regions below 4GB. */
154         if (src[i].addr >= (1ULL<<32))
155             continue;
156 
157         /* E820_RAM overlaps with our RAM region. Move it */
158         if (src[i].addr < ram_end) {
159             uint64_t delta;
160 
161             src[i].type = E820_UNUSABLE;
162             delta = ram_end - src[i].addr;
163             /* The end < ram_end should weed this out */
164             if (src[i].size < delta)
165                 src[i].type = 0;
166             else {
167                 src[i].size -= delta;
168                 src[i].addr = ram_end;
169             }
170             if (src[i].addr + src[i].size != end) {
171                 /* We messed up somewhere */
172                 src[i].type = 0;
173                 LOGE(ERROR, "Computed E820 wrongly. Continuing on.");
174             }
175         }
176         /* Lastly, convert the RAM to UNSUABLE. Look in the Linux kernel
177            at git commit 2f14ddc3a7146ea4cd5a3d1ecd993f85f2e4f948
178             "xen/setup: Inhibit resource API from using System RAM E820
179            gaps as PCI mem gaps" for full explanation. */
180         if (end > ram_end)
181             src[i].type = E820_UNUSABLE;
182     }
183 
184     /* Check if there is a region between ram_end and start. */
185     if (start > ram_end) {
186         int add_unusable = 1;
187         for (i = 0; i < nr && add_unusable; i++) {
188             if (src[i].type != E820_UNUSABLE)
189                 continue;
190             if (ram_end != src[i].addr)
191                 continue;
192             if (start != src[i].addr + src[i].size) {
193                 /* there is one, adjust it */
194                 src[i].size = start - src[i].addr;
195             }
196             add_unusable = 0;
197         }
198         /* .. and if not present, add it in. This is to guard against
199            the Linux guest assuming that the gap between the end of
200            RAM region and the start of the E820_[ACPI,NVS,RESERVED]
201            is PCI I/O space. Which it certainly is _not_. */
202         if (add_unusable) {
203             e820[idx].type = E820_UNUSABLE;
204             e820[idx].addr = ram_end;
205             e820[idx].size = start - ram_end;
206             idx++;
207         }
208     }
209     /* Almost done: copy them over, ignoring the undesireable ones */
210     for (i = 0; i < nr; i++) {
211         if ((src[i].type == E820_RAM) ||
212             (src[i].type == 0))
213             continue;
214 
215         e820[idx].type = src[i].type;
216         e820[idx].addr = src[i].addr;
217         e820[idx].size = src[i].size;
218         idx++;
219     }
220     /* At this point we have the mapped RAM + E820 entries from src. */
221     if (balloon_kb || delta_kb) {
222         /* and if we truncated the RAM region, then add it to the end. */
223         e820[idx].type = E820_RAM;
224         e820[idx].addr = (uint64_t)(1ULL << 32) > last ?
225                          (uint64_t)(1ULL << 32) : last;
226         /* also add the balloon memory to the end. */
227         e820[idx].size = (uint64_t)(delta_kb << 10) +
228                          (uint64_t)(balloon_kb << 10);
229         idx++;
230 
231     }
232     nr = idx;
233 
234     for (i = 0; i < nr; i++) {
235       LOG(DEBUG, ":\t[%"PRIx64" -> %"PRIx64"] %s", e820[i].addr >> 12,
236           (e820[i].addr + e820[i].size) >> 12, e820_names(e820[i].type));
237     }
238 
239     /* Done: copy the sanitized version. */
240     *nr_entries = nr;
241     memcpy(src, e820, nr * sizeof(struct e820entry));
242     return 0;
243 }
244 
e820_host_sanitize(libxl__gc * gc,libxl_domain_build_info * b_info,struct e820entry map[],uint32_t * nr)245 static int e820_host_sanitize(libxl__gc *gc,
246                               libxl_domain_build_info *b_info,
247                               struct e820entry map[],
248                               uint32_t *nr)
249 {
250     int rc;
251 
252     rc = xc_get_machine_memory_map(CTX->xch, map, *nr);
253     if (rc < 0)
254         return ERROR_FAIL;
255 
256     *nr = rc;
257 
258     rc = e820_sanitize(gc, map, nr, b_info->target_memkb,
259                        (b_info->max_memkb - b_info->target_memkb) +
260                        b_info->u.pv.slack_memkb);
261     return rc;
262 }
263 
libxl__e820_alloc(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config)264 static int libxl__e820_alloc(libxl__gc *gc, uint32_t domid,
265         libxl_domain_config *d_config)
266 {
267     libxl_ctx *ctx = libxl__gc_owner(gc);
268     int rc;
269     uint32_t nr;
270     struct e820entry map[E820MAX];
271     libxl_domain_build_info *b_info;
272 
273     if (d_config == NULL || d_config->c_info.type != LIBXL_DOMAIN_TYPE_PV)
274         return ERROR_INVAL;
275 
276     b_info = &d_config->b_info;
277     if (!libxl_defbool_val(b_info->u.pv.e820_host))
278         return ERROR_INVAL;
279 
280     nr = E820MAX;
281     rc = e820_host_sanitize(gc, b_info, map, &nr);
282     if (rc)
283         return ERROR_FAIL;
284 
285     rc = xc_domain_set_memory_map(ctx->xch, domid, map, nr);
286 
287     if (rc < 0)
288         return ERROR_FAIL;
289 
290     return 0;
291 }
292 
timer_mode(const libxl_domain_build_info * info)293 static unsigned long timer_mode(const libxl_domain_build_info *info)
294 {
295     const libxl_timer_mode mode = info->timer_mode;
296     assert(mode >= LIBXL_TIMER_MODE_DELAY_FOR_MISSED_TICKS &&
297            mode <= LIBXL_TIMER_MODE_ONE_MISSED_TICK_PENDING);
298     return ((unsigned long)mode);
299 }
300 
hvm_set_viridian_features(libxl__gc * gc,uint32_t domid,const libxl_domain_build_info * info)301 static int hvm_set_viridian_features(libxl__gc *gc, uint32_t domid,
302                                      const libxl_domain_build_info *info)
303 {
304     libxl_bitmap enlightenments;
305     libxl_viridian_enlightenment v;
306     uint64_t mask = 0;
307 
308     libxl_bitmap_init(&enlightenments);
309     libxl_bitmap_alloc(CTX, &enlightenments,
310                        LIBXL_BUILDINFO_HVM_VIRIDIAN_ENABLE_DISABLE_WIDTH);
311 
312     if (libxl_defbool_val(info->u.hvm.viridian)) {
313         /* Enable defaults */
314         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE);
315         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_FREQ);
316         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT);
317         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_APIC_ASSIST);
318         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CRASH_CTL);
319         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_NO_VP_LIMIT);
320         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CPU_HOTPLUG);
321     }
322 
323     libxl_for_each_set_bit(v, info->u.hvm.viridian_enable) {
324         if (libxl_bitmap_test(&info->u.hvm.viridian_disable, v)) {
325             LOG(ERROR, "%s group both enabled and disabled",
326                 libxl_viridian_enlightenment_to_string(v));
327             goto err;
328         }
329         if (libxl_viridian_enlightenment_to_string(v)) /* check validity */
330             libxl_bitmap_set(&enlightenments, v);
331     }
332 
333     libxl_for_each_set_bit(v, info->u.hvm.viridian_disable)
334         if (libxl_viridian_enlightenment_to_string(v)) /* check validity */
335             libxl_bitmap_reset(&enlightenments, v);
336 
337     /* The base set is a pre-requisite for all others */
338     if (!libxl_bitmap_is_empty(&enlightenments) &&
339         !libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE)) {
340         LOG(ERROR, "base group not enabled");
341         goto err;
342     }
343 
344     libxl_for_each_set_bit(v, enlightenments)
345         LOG(DETAIL, "%s group enabled", libxl_viridian_enlightenment_to_string(v));
346 
347     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE)) {
348         mask |= HVMPV_base_freq;
349 
350         if (!libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_FREQ))
351             mask |= HVMPV_no_freq;
352     }
353 
354     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT))
355         mask |= HVMPV_time_ref_count;
356 
357     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_REFERENCE_TSC))
358         mask |= HVMPV_reference_tsc;
359 
360     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_HCALL_REMOTE_TLB_FLUSH))
361         mask |= HVMPV_hcall_remote_tlb_flush;
362 
363     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_APIC_ASSIST))
364         mask |= HVMPV_apic_assist;
365 
366     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CRASH_CTL))
367         mask |= HVMPV_crash_ctl;
368 
369     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_SYNIC))
370         mask |= HVMPV_synic;
371 
372     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_STIMER))
373         mask |= HVMPV_time_ref_count | HVMPV_synic | HVMPV_stimer;
374 
375     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_HCALL_IPI))
376         mask |= HVMPV_hcall_ipi;
377 
378     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_EX_PROCESSOR_MASKS))
379         mask |= HVMPV_ex_processor_masks;
380 
381     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_NO_VP_LIMIT))
382         mask |= HVMPV_no_vp_limit;
383 
384     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CPU_HOTPLUG))
385         mask |= HVMPV_cpu_hotplug;
386 
387     if (mask != 0 &&
388         xc_hvm_param_set(CTX->xch,
389                          domid,
390                          HVM_PARAM_VIRIDIAN,
391                          mask) != 0) {
392         LOGE(ERROR, "Couldn't set viridian feature mask (0x%"PRIx64")", mask);
393         goto err;
394     }
395 
396     libxl_bitmap_dispose(&enlightenments);
397     return 0;
398 
399 err:
400     libxl_bitmap_dispose(&enlightenments);
401     return ERROR_FAIL;
402 }
403 
hvm_set_conf_params(libxl__gc * gc,uint32_t domid,const libxl_domain_build_info * info)404 static int hvm_set_conf_params(libxl__gc *gc, uint32_t domid,
405                                const libxl_domain_build_info *info)
406 {
407     libxl_ctx *ctx = libxl__gc_owner(gc);
408     xc_interface *xch = ctx->xch;
409     int ret = ERROR_FAIL;
410 
411     switch(info->type) {
412     case LIBXL_DOMAIN_TYPE_HVM:
413         if (xc_hvm_param_set(xch, domid, HVM_PARAM_HPET_ENABLED,
414                              libxl_defbool_val(info->u.hvm.hpet))) {
415             LOG(ERROR, "Couldn't set HVM_PARAM_HPET_ENABLED");
416             goto out;
417         }
418         if (xc_hvm_param_set(xch, domid, HVM_PARAM_VPT_ALIGN,
419                              libxl_defbool_val(info->u.hvm.vpt_align))) {
420             LOG(ERROR, "Couldn't set HVM_PARAM_VPT_ALIGN");
421             goto out;
422         }
423         if (info->u.hvm.mca_caps &&
424             xc_hvm_param_set(CTX->xch, domid, HVM_PARAM_MCA_CAP,
425                              info->u.hvm.mca_caps)) {
426             LOG(ERROR, "Couldn't set HVM_PARAM_MCA_CAP");
427             goto out;
428         }
429 
430         /* Fallthrough */
431     case LIBXL_DOMAIN_TYPE_PVH:
432         if (xc_hvm_param_set(xch, domid, HVM_PARAM_TIMER_MODE,
433                              timer_mode(info))) {
434             LOG(ERROR, "Couldn't set HVM_PARAM_TIMER_MODE");
435             goto out;
436         }
437         break;
438 
439     default:
440         abort();
441     }
442 
443     ret = 0;
444 
445  out:
446     return ret;
447 }
448 
libxl__arch_domain_create(libxl__gc * gc,libxl_domain_config * d_config,libxl__domain_build_state * state,uint32_t domid)449 int libxl__arch_domain_create(libxl__gc *gc,
450                               libxl_domain_config *d_config,
451                               libxl__domain_build_state *state,
452                               uint32_t domid)
453 {
454     const libxl_domain_build_info *info = &d_config->b_info;
455     int ret = 0;
456     int tsc_mode;
457     uint32_t rtc_timeoffset;
458     libxl_ctx *ctx = libxl__gc_owner(gc);
459 
460     if (info->type != LIBXL_DOMAIN_TYPE_PV &&
461         (ret = hvm_set_conf_params(gc, domid, info)) != 0)
462         goto out;
463 
464     /* Viridian flags are already a part of the migration stream so set
465      * them here only for brand new domains. */
466     if (!state->restore &&
467         info->type == LIBXL_DOMAIN_TYPE_HVM &&
468         (ret = hvm_set_viridian_features(gc, domid, info)) != 0)
469         goto out;
470 
471     if (d_config->b_info.type == LIBXL_DOMAIN_TYPE_PV)
472         xc_domain_set_memmap_limit(ctx->xch, domid,
473                                    (d_config->b_info.max_memkb +
474                                     d_config->b_info.u.pv.slack_memkb));
475 
476     switch (d_config->b_info.tsc_mode) {
477     case LIBXL_TSC_MODE_DEFAULT:
478         tsc_mode = XEN_CPUID_TSC_MODE_DEFAULT;
479         break;
480     case LIBXL_TSC_MODE_ALWAYS_EMULATE:
481         tsc_mode = XEN_CPUID_TSC_MODE_ALWAYS_EMULATE;
482         break;
483     case LIBXL_TSC_MODE_NATIVE:
484         tsc_mode = XEN_CPUID_TSC_MODE_NEVER_EMULATE;
485         break;
486     case LIBXL_TSC_MODE_NATIVE_PARAVIRT:
487         LOGD(ERROR, domid, "TSC Mode native_paravirt (a.k.a PVRDTSCP) has been removed");
488         ret = ERROR_FEATURE_REMOVED;
489         goto out;
490     default:
491         abort();
492     }
493 
494     if (xc_domain_set_tsc_info(ctx->xch, domid, tsc_mode, 0, 0, 0)) {
495         LOGE(ERROR, "xc_domain_set_tsc_info() failed");
496         ret = ERROR_FAIL;
497         goto out;
498     }
499 
500     rtc_timeoffset = d_config->b_info.rtc_timeoffset;
501     if (libxl_defbool_val(d_config->b_info.localtime)) {
502         time_t t;
503         struct tm *tm, result;
504 
505         t = time(NULL);
506         tm = localtime_r(&t, &result);
507 
508         if (!tm) {
509             LOGED(ERROR, domid, "Failed to call localtime_r");
510             ret = ERROR_FAIL;
511             goto out;
512         }
513 
514         rtc_timeoffset += tm->tm_gmtoff;
515     }
516 
517     if (rtc_timeoffset)
518         xc_domain_set_time_offset(ctx->xch, domid, rtc_timeoffset);
519 
520     if (d_config->b_info.type != LIBXL_DOMAIN_TYPE_PV) {
521         ret = libxl__domain_set_paging_mempool_size(gc, d_config, domid);
522         if (ret)
523             goto out;
524     }
525 
526     if (d_config->c_info.type == LIBXL_DOMAIN_TYPE_PV &&
527             libxl_defbool_val(d_config->b_info.u.pv.e820_host)) {
528         ret = libxl__e820_alloc(gc, domid, d_config);
529         if (ret) {
530             LOGED(ERROR, domid, "Failed while collecting E820 with: %d (errno:%d)\n",
531                  ret, errno);
532         }
533     }
534 
535 out:
536     return ret;
537 }
538 
libxl__arch_extra_memory(libxl__gc * gc,const libxl_domain_build_info * info,uint64_t * out)539 int libxl__arch_extra_memory(libxl__gc *gc,
540                              const libxl_domain_build_info *info,
541                              uint64_t *out)
542 {
543     *out = LIBXL_MAXMEM_CONSTANT;
544 
545     return 0;
546 }
547 
libxl__arch_domain_init_hw_description(libxl__gc * gc,libxl_domain_config * d_config,libxl__domain_build_state * state,struct xc_dom_image * dom)548 int libxl__arch_domain_init_hw_description(libxl__gc *gc,
549                                            libxl_domain_config *d_config,
550                                            libxl__domain_build_state *state,
551                                            struct xc_dom_image *dom)
552 {
553     return 0;
554 }
555 
libxl__arch_build_dom_finish(libxl__gc * gc,libxl_domain_build_info * info,struct xc_dom_image * dom,libxl__domain_build_state * state)556 int libxl__arch_build_dom_finish(libxl__gc *gc,
557                                  libxl_domain_build_info *info,
558                                  struct xc_dom_image *dom,
559                                  libxl__domain_build_state *state)
560 {
561     return 0;
562 }
563 
564 /* Return 0 on success, ERROR_* on failure. */
libxl__arch_vnuma_build_vmemrange(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * b_info,libxl__domain_build_state * state)565 int libxl__arch_vnuma_build_vmemrange(libxl__gc *gc,
566                                       uint32_t domid,
567                                       libxl_domain_build_info *b_info,
568                                       libxl__domain_build_state *state)
569 {
570     int nid, nr_vmemrange, rc;
571     uint32_t nr_e820, e820_count;
572     struct e820entry map[E820MAX];
573     xen_vmemrange_t *vmemranges;
574     unsigned int array_size;
575 
576     /* If e820_host is not set, call the generic function */
577     if (!(b_info->type == LIBXL_DOMAIN_TYPE_PV &&
578           libxl_defbool_val(b_info->u.pv.e820_host)))
579         return libxl__vnuma_build_vmemrange_pv_generic(gc, domid, b_info,
580                                                        state);
581 
582     assert(state->vmemranges == NULL);
583 
584     nr_e820 = E820MAX;
585     rc = e820_host_sanitize(gc, b_info, map, &nr_e820);
586     if (rc) goto out;
587 
588     e820_count = 0;
589     nr_vmemrange = 0;
590     vmemranges = NULL;
591     array_size = 0;
592     for (nid = 0; nid < b_info->num_vnuma_nodes; nid++) {
593         libxl_vnode_info *p = &b_info->vnuma_nodes[nid];
594         uint64_t remaining_bytes = (p->memkb << 10), bytes;
595 
596         while (remaining_bytes > 0) {
597             if (e820_count >= nr_e820) {
598                 rc = ERROR_NOMEM;
599                 goto out;
600             }
601 
602             /* Skip non RAM region */
603             if (map[e820_count].type != E820_RAM) {
604                 e820_count++;
605                 continue;
606             }
607 
608             if (nr_vmemrange >= array_size) {
609                 array_size += 32;
610                 GCREALLOC_ARRAY(vmemranges, array_size);
611             }
612 
613             bytes = map[e820_count].size >= remaining_bytes ?
614                 remaining_bytes : map[e820_count].size;
615 
616             vmemranges[nr_vmemrange].start = map[e820_count].addr;
617             vmemranges[nr_vmemrange].end = map[e820_count].addr + bytes;
618 
619             if (map[e820_count].size >= remaining_bytes) {
620                 map[e820_count].addr += bytes;
621                 map[e820_count].size -= bytes;
622             } else {
623                 e820_count++;
624             }
625 
626             remaining_bytes -= bytes;
627 
628             vmemranges[nr_vmemrange].flags = 0;
629             vmemranges[nr_vmemrange].nid = nid;
630             nr_vmemrange++;
631         }
632     }
633 
634     state->vmemranges = vmemranges;
635     state->num_vmemranges = nr_vmemrange;
636 
637     rc = 0;
638 out:
639     return rc;
640 }
641 
libxl__arch_domain_map_irq(libxl__gc * gc,uint32_t domid,int irq)642 int libxl__arch_domain_map_irq(libxl__gc *gc, uint32_t domid, int irq)
643 {
644     int ret;
645 
646     ret = xc_physdev_map_pirq(CTX->xch, domid, irq, &irq);
647     if (ret)
648         return ret;
649 
650     ret = xc_domain_irq_permission(CTX->xch, domid, irq, 1);
651 
652     return ret;
653 }
654 
655 /*
656  * Here we're just trying to set these kinds of e820 mappings:
657  *
658  * #1. Low memory region
659  *
660  * Low RAM starts at least from 1M to make sure all standard regions
661  * of the PC memory map, like BIOS, VGA memory-mapped I/O and vgabios,
662  * have enough space.
663  * Note: Those stuffs below 1M are still constructed with multiple
664  * e820 entries by hvmloader. At this point we don't change anything.
665  *
666  * #2. RDM region if it exists
667  *
668  * #3. High memory region if it exists
669  *
670  * Note: these regions are not overlapping since we already check
671  * to adjust them. Please refer to libxl__domain_device_construct_rdm().
672  */
673 #define GUEST_LOW_MEM_START_DEFAULT 0x100000
domain_construct_memmap(libxl__gc * gc,libxl_domain_config * d_config,uint32_t domid,struct xc_dom_image * dom)674 static int domain_construct_memmap(libxl__gc *gc,
675                                    libxl_domain_config *d_config,
676                                    uint32_t domid,
677                                    struct xc_dom_image *dom)
678 {
679     int rc = 0;
680     unsigned int nr = 0, i;
681     /* We always own at least one lowmem entry. */
682     unsigned int e820_entries = 1;
683     struct e820entry *e820 = NULL;
684     uint64_t highmem_size =
685                     dom->highmem_end ? dom->highmem_end - (1ull << 32) : 0;
686     uint32_t lowmem_start = dom->device_model ? GUEST_LOW_MEM_START_DEFAULT : 0;
687     unsigned page_size = XC_DOM_PAGE_SIZE(dom);
688 
689     /* Add all rdm entries. */
690     for (i = 0; i < d_config->num_rdms; i++)
691         if (d_config->rdms[i].policy != LIBXL_RDM_RESERVE_POLICY_INVALID)
692             e820_entries++;
693 
694     /* Add the HVM special pages to PVH memmap as RESERVED. */
695     if (d_config->b_info.type == LIBXL_DOMAIN_TYPE_PVH)
696         e820_entries++;
697 
698     /* If we should have a highmem range. */
699     if (highmem_size)
700         e820_entries++;
701 
702     for (i = 0; i < MAX_ACPI_MODULES; i++)
703         if (dom->acpi_modules[i].length)
704             e820_entries++;
705 
706     if (e820_entries >= E820MAX) {
707         LOGD(ERROR, domid, "Ooops! Too many entries in the memory map!");
708         rc = ERROR_INVAL;
709         goto out;
710     }
711 
712     e820 = libxl__malloc(gc, sizeof(struct e820entry) * e820_entries);
713 
714     /* Low memory */
715     e820[nr].addr = lowmem_start;
716     e820[nr].size = dom->lowmem_end - lowmem_start;
717     e820[nr].type = E820_RAM;
718     nr++;
719 
720     /* RDM mapping */
721     for (i = 0; i < d_config->num_rdms; i++) {
722         if (d_config->rdms[i].policy == LIBXL_RDM_RESERVE_POLICY_INVALID)
723             continue;
724 
725         e820[nr].addr = d_config->rdms[i].start;
726         e820[nr].size = d_config->rdms[i].size;
727         e820[nr].type = E820_RESERVED;
728         nr++;
729     }
730 
731     /* HVM special pages */
732     if (d_config->b_info.type == LIBXL_DOMAIN_TYPE_PVH) {
733         e820[nr].addr = (X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES)
734                         << XC_PAGE_SHIFT;
735         e820[nr].size = X86_HVM_NR_SPECIAL_PAGES << XC_PAGE_SHIFT;
736         e820[nr].type = E820_RESERVED;
737         nr++;
738     }
739 
740     for (i = 0; i < MAX_ACPI_MODULES; i++) {
741         if (dom->acpi_modules[i].length) {
742             e820[nr].addr = dom->acpi_modules[i].guest_addr_out & ~(page_size - 1);
743             e820[nr].size = dom->acpi_modules[i].length +
744                 (dom->acpi_modules[i].guest_addr_out & (page_size - 1));
745             e820[nr].type = E820_ACPI;
746             nr++;
747         }
748     }
749 
750     /* High memory */
751     if (highmem_size) {
752         e820[nr].addr = ((uint64_t)1 << 32);
753         e820[nr].size = highmem_size;
754         e820[nr].type = E820_RAM;
755     }
756 
757     if (xc_domain_set_memory_map(CTX->xch, domid, e820, e820_entries) != 0) {
758         rc = ERROR_FAIL;
759         goto out;
760     }
761 
762     dom->e820 = e820;
763     dom->e820_entries = e820_entries;
764 
765 out:
766     return rc;
767 }
768 
libxl__arch_domain_finalise_hw_description(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,struct xc_dom_image * dom)769 int libxl__arch_domain_finalise_hw_description(libxl__gc *gc,
770                                                uint32_t domid,
771                                                libxl_domain_config *d_config,
772                                                struct xc_dom_image *dom)
773 {
774     libxl_domain_build_info *const info = &d_config->b_info;
775     int rc;
776 
777     if (info->type == LIBXL_DOMAIN_TYPE_PV)
778         return 0;
779 
780     if (info->type == LIBXL_DOMAIN_TYPE_PVH) {
781         rc = libxl__dom_load_acpi(gc, info, dom);
782         if (rc != 0) {
783             LOGE(ERROR, "libxl_dom_load_acpi failed");
784             return rc;
785         }
786     }
787 
788     rc = domain_construct_memmap(gc, d_config, domid, dom);
789     if (rc != 0)
790         LOGE(ERROR, "setting domain memory map failed");
791 
792     return rc;
793 }
794 
libxl__arch_domain_create_info_setdefault(libxl__gc * gc,libxl_domain_create_info * c_info)795 void libxl__arch_domain_create_info_setdefault(libxl__gc *gc,
796                                                libxl_domain_create_info *c_info)
797 {
798 }
799 
libxl__arch_domain_build_info_setdefault(libxl__gc * gc,libxl_domain_build_info * b_info,const libxl_physinfo * physinfo)800 int libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
801                                              libxl_domain_build_info *b_info,
802                                              const libxl_physinfo *physinfo)
803 {
804     libxl_defbool_setdefault(&b_info->acpi, true);
805     libxl_defbool_setdefault(&b_info->arch_x86.msr_relaxed, false);
806 
807     /*
808      * The config parameter "altp2m" replaces the parameter "altp2mhvm".
809      * For legacy reasons, both parameters are accepted on x86 HVM guests.
810      *
811      * If the legacy field info->u.hvm.altp2m is set, activate altp2m.
812      * Otherwise set altp2m based on the field info->altp2m.
813      */
814     libxl_defbool_setdefault(&b_info->u.hvm.altp2m, false);
815     if (b_info->altp2m == LIBXL_ALTP2M_MODE_DISABLED &&
816         libxl_defbool_val(b_info->u.hvm.altp2m))
817         b_info->altp2m = libxl_defbool_val(b_info->u.hvm.altp2m);
818 
819     return 0;
820 }
821 
libxl__arch_passthrough_mode_setdefault(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,const libxl_physinfo * physinfo)822 int libxl__arch_passthrough_mode_setdefault(libxl__gc *gc,
823                                             uint32_t domid,
824                                             libxl_domain_config *d_config,
825                                             const libxl_physinfo *physinfo)
826 {
827     int rc;
828     libxl_domain_create_info *const c_info = &d_config->c_info;
829 
830     if (c_info->passthrough != LIBXL_PASSTHROUGH_DISABLED &&
831         c_info->type == LIBXL_DOMAIN_TYPE_PVH) {
832         LOGD(ERROR, domid,
833              "passthrough not yet supported for x86 PVH guests\n");
834         rc = ERROR_INVAL;
835         goto out;
836     }
837 
838     const char *whynot_pt_share =
839         c_info->type == LIBXL_DOMAIN_TYPE_PV ? "not valid for PV domain" :
840         !physinfo->cap_iommu_hap_pt_share ? "not supported on this platform" :
841         !libxl_defbool_val(d_config->c_info.hap) ?"only valid for HAP guests":
842         NULL;
843 
844     if (c_info->passthrough == LIBXL_PASSTHROUGH_ENABLED) {
845         c_info->passthrough = whynot_pt_share
846             ? LIBXL_PASSTHROUGH_SYNC_PT : LIBXL_PASSTHROUGH_SHARE_PT;
847     }
848 
849     if (c_info->passthrough == LIBXL_PASSTHROUGH_SHARE_PT && whynot_pt_share) {
850         LOGD(ERROR, domid,
851              "passthrough=\"share_pt\" %s\n",
852              whynot_pt_share);
853         rc = ERROR_INVAL;
854         goto out;
855     }
856 
857     rc = 0;
858  out:
859     return rc;
860 }
861 
libxl__arch_update_domain_config(libxl__gc * gc,libxl_domain_config * dst,const libxl_domain_config * src)862 void libxl__arch_update_domain_config(libxl__gc *gc,
863                                       libxl_domain_config *dst,
864                                       const libxl_domain_config *src)
865 {
866     /*
867      * Force MSR relaxed and HVM pirq to be set (either to true or false) so
868      * it's part of the domain configuration when saving or performing a
869      * live-migration.
870      *
871      * Doing so allows the recovery side to figure out whether the flags should
872      * be set to true in order to keep backwards compatibility with already
873      * started domains.
874      */
875     libxl_defbool_setdefault(&dst->b_info.arch_x86.msr_relaxed,
876                     libxl_defbool_val(src->b_info.arch_x86.msr_relaxed));
877     if (src->c_info.type == LIBXL_DOMAIN_TYPE_HVM )
878         libxl_defbool_setdefault(&dst->b_info.u.hvm.pirq,
879                                  libxl_defbool_val(src->b_info.u.hvm.pirq));
880 }
881 
882 /*
883  * Local variables:
884  * mode: C
885  * c-basic-offset: 4
886  * indent-tabs-mode: nil
887  * End:
888  */
889