1 #include "libxl_internal.h"
2 #include "libxl_arch.h"
3 #include <xen/arch-x86/cpuid.h>
4
libxl__arch_domain_prepare_config(libxl__gc * gc,libxl_domain_config * d_config,struct xen_domctl_createdomain * config)5 int libxl__arch_domain_prepare_config(libxl__gc *gc,
6 libxl_domain_config *d_config,
7 struct xen_domctl_createdomain *config)
8 {
9 switch(d_config->c_info.type) {
10 case LIBXL_DOMAIN_TYPE_HVM:
11 config->arch.emulation_flags = (XEN_X86_EMU_ALL & ~XEN_X86_EMU_VPCI);
12 if (!libxl_defbool_val(d_config->b_info.u.hvm.pirq))
13 config->arch.emulation_flags &= ~XEN_X86_EMU_USE_PIRQ;
14 break;
15 case LIBXL_DOMAIN_TYPE_PVH:
16 config->arch.emulation_flags = XEN_X86_EMU_LAPIC;
17 break;
18 case LIBXL_DOMAIN_TYPE_PV:
19 config->arch.emulation_flags = 0;
20 break;
21 default:
22 abort();
23 }
24
25 config->arch.misc_flags = 0;
26 if (libxl_defbool_val(d_config->b_info.arch_x86.msr_relaxed))
27 config->arch.misc_flags |= XEN_X86_MSR_RELAXED;
28
29 return 0;
30 }
31
libxl__arch_domain_save_config(libxl__gc * gc,libxl_domain_config * d_config,libxl__domain_build_state * state,const struct xen_domctl_createdomain * config)32 int libxl__arch_domain_save_config(libxl__gc *gc,
33 libxl_domain_config *d_config,
34 libxl__domain_build_state *state,
35 const struct xen_domctl_createdomain *config)
36 {
37 return 0;
38 }
39
e820_names(int type)40 static const char *e820_names(int type)
41 {
42 switch (type) {
43 case E820_RAM: return "RAM";
44 case E820_RESERVED: return "Reserved";
45 case E820_ACPI: return "ACPI";
46 case E820_NVS: return "ACPI NVS";
47 case E820_UNUSABLE: return "Unusable";
48 default: break;
49 }
50 return "Unknown";
51 }
52
e820_sanitize(libxl__gc * gc,struct e820entry src[],uint32_t * nr_entries,unsigned long map_limitkb,unsigned long balloon_kb)53 static int e820_sanitize(libxl__gc *gc, struct e820entry src[],
54 uint32_t *nr_entries,
55 unsigned long map_limitkb,
56 unsigned long balloon_kb)
57 {
58 uint64_t delta_kb = 0, start = 0, start_kb = 0, last = 0, ram_end;
59 uint32_t i, idx = 0, nr;
60 struct e820entry e820[E820MAX];
61
62 if (!src || !map_limitkb || !nr_entries)
63 return ERROR_INVAL;
64
65 nr = *nr_entries;
66 if (!nr)
67 return ERROR_INVAL;
68
69 if (nr > E820MAX)
70 return ERROR_NOMEM;
71
72 /* Weed out anything under 1MB */
73 for (i = 0; i < nr; i++) {
74 if (src[i].addr > 0x100000)
75 continue;
76
77 src[i].type = 0;
78 src[i].size = 0;
79 src[i].addr = -1ULL;
80 }
81
82 /* Find the lowest and highest entry in E820, skipping over
83 * undesired entries. */
84 start = -1ULL;
85 last = 0;
86 for (i = 0; i < nr; i++) {
87 if ((src[i].type == E820_RAM) ||
88 (src[i].type == E820_UNUSABLE) ||
89 (src[i].type == 0))
90 continue;
91
92 start = src[i].addr < start ? src[i].addr : start;
93 last = src[i].addr + src[i].size > last ?
94 src[i].addr + src[i].size > last : last;
95 }
96 if (start > 1024)
97 start_kb = start >> 10;
98
99 /* Add the memory RAM region for the guest */
100 e820[idx].addr = 0;
101 e820[idx].size = (uint64_t)map_limitkb << 10;
102 e820[idx].type = E820_RAM;
103
104 /* .. and trim if neccessary */
105 if (start_kb && map_limitkb > start_kb) {
106 delta_kb = map_limitkb - start_kb;
107 if (delta_kb)
108 e820[idx].size -= (uint64_t)(delta_kb << 10);
109 }
110 /* Note: We don't touch balloon_kb here. Will add it at the end. */
111 ram_end = e820[idx].addr + e820[idx].size;
112 idx ++;
113
114 LOG(DEBUG, "Memory: %"PRIu64"kB End of RAM: " \
115 "0x%"PRIx64" (PFN) Delta: %"PRIu64"kB, PCI start: %"PRIu64"kB " \
116 "(0x%"PRIx64" PFN), Balloon %"PRIu64"kB\n", (uint64_t)map_limitkb,
117 ram_end >> 12, delta_kb, start_kb ,start >> 12,
118 (uint64_t)balloon_kb);
119
120
121 /* This whole code below is to guard against if the Intel IGD is passed into
122 * the guest. If we don't pass in IGD, this whole code can be ignored.
123 *
124 * The reason for this code is that Intel boxes fill their E820 with
125 * E820_RAM amongst E820_RESERVED and we can't just ditch those E820_RAM.
126 * That is b/c any "gaps" in the E820 is considered PCI I/O space by
127 * Linux and it would be utilized by the Intel IGD as I/O space while
128 * in reality it was an RAM region.
129 *
130 * What this means is that we have to walk the E820 and for any region
131 * that is RAM and below 4GB and above ram_end, needs to change its type
132 * to E820_UNUSED. We also need to move some of the E820_RAM regions if
133 * the overlap with ram_end. */
134 for (i = 0; i < nr; i++) {
135 uint64_t end = src[i].addr + src[i].size;
136
137 /* We don't care about E820_UNUSABLE, but we need to
138 * change the type to zero b/c the loop after this
139 * sticks E820_UNUSABLE on the guest's E820 but ignores
140 * the ones with type zero. */
141 if ((src[i].type == E820_UNUSABLE) ||
142 /* Any region that is within the "RAM region" can
143 * be safely ditched. */
144 (end < ram_end)) {
145 src[i].type = 0;
146 continue;
147 }
148
149 /* Look only at RAM regions. */
150 if (src[i].type != E820_RAM)
151 continue;
152
153 /* We only care about RAM regions below 4GB. */
154 if (src[i].addr >= (1ULL<<32))
155 continue;
156
157 /* E820_RAM overlaps with our RAM region. Move it */
158 if (src[i].addr < ram_end) {
159 uint64_t delta;
160
161 src[i].type = E820_UNUSABLE;
162 delta = ram_end - src[i].addr;
163 /* The end < ram_end should weed this out */
164 if (src[i].size < delta)
165 src[i].type = 0;
166 else {
167 src[i].size -= delta;
168 src[i].addr = ram_end;
169 }
170 if (src[i].addr + src[i].size != end) {
171 /* We messed up somewhere */
172 src[i].type = 0;
173 LOGE(ERROR, "Computed E820 wrongly. Continuing on.");
174 }
175 }
176 /* Lastly, convert the RAM to UNSUABLE. Look in the Linux kernel
177 at git commit 2f14ddc3a7146ea4cd5a3d1ecd993f85f2e4f948
178 "xen/setup: Inhibit resource API from using System RAM E820
179 gaps as PCI mem gaps" for full explanation. */
180 if (end > ram_end)
181 src[i].type = E820_UNUSABLE;
182 }
183
184 /* Check if there is a region between ram_end and start. */
185 if (start > ram_end) {
186 int add_unusable = 1;
187 for (i = 0; i < nr && add_unusable; i++) {
188 if (src[i].type != E820_UNUSABLE)
189 continue;
190 if (ram_end != src[i].addr)
191 continue;
192 if (start != src[i].addr + src[i].size) {
193 /* there is one, adjust it */
194 src[i].size = start - src[i].addr;
195 }
196 add_unusable = 0;
197 }
198 /* .. and if not present, add it in. This is to guard against
199 the Linux guest assuming that the gap between the end of
200 RAM region and the start of the E820_[ACPI,NVS,RESERVED]
201 is PCI I/O space. Which it certainly is _not_. */
202 if (add_unusable) {
203 e820[idx].type = E820_UNUSABLE;
204 e820[idx].addr = ram_end;
205 e820[idx].size = start - ram_end;
206 idx++;
207 }
208 }
209 /* Almost done: copy them over, ignoring the undesireable ones */
210 for (i = 0; i < nr; i++) {
211 if ((src[i].type == E820_RAM) ||
212 (src[i].type == 0))
213 continue;
214
215 e820[idx].type = src[i].type;
216 e820[idx].addr = src[i].addr;
217 e820[idx].size = src[i].size;
218 idx++;
219 }
220 /* At this point we have the mapped RAM + E820 entries from src. */
221 if (balloon_kb || delta_kb) {
222 /* and if we truncated the RAM region, then add it to the end. */
223 e820[idx].type = E820_RAM;
224 e820[idx].addr = (uint64_t)(1ULL << 32) > last ?
225 (uint64_t)(1ULL << 32) : last;
226 /* also add the balloon memory to the end. */
227 e820[idx].size = (uint64_t)(delta_kb << 10) +
228 (uint64_t)(balloon_kb << 10);
229 idx++;
230
231 }
232 nr = idx;
233
234 for (i = 0; i < nr; i++) {
235 LOG(DEBUG, ":\t[%"PRIx64" -> %"PRIx64"] %s", e820[i].addr >> 12,
236 (e820[i].addr + e820[i].size) >> 12, e820_names(e820[i].type));
237 }
238
239 /* Done: copy the sanitized version. */
240 *nr_entries = nr;
241 memcpy(src, e820, nr * sizeof(struct e820entry));
242 return 0;
243 }
244
e820_host_sanitize(libxl__gc * gc,libxl_domain_build_info * b_info,struct e820entry map[],uint32_t * nr)245 static int e820_host_sanitize(libxl__gc *gc,
246 libxl_domain_build_info *b_info,
247 struct e820entry map[],
248 uint32_t *nr)
249 {
250 int rc;
251
252 rc = xc_get_machine_memory_map(CTX->xch, map, *nr);
253 if (rc < 0)
254 return ERROR_FAIL;
255
256 *nr = rc;
257
258 rc = e820_sanitize(gc, map, nr, b_info->target_memkb,
259 (b_info->max_memkb - b_info->target_memkb) +
260 b_info->u.pv.slack_memkb);
261 return rc;
262 }
263
libxl__e820_alloc(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config)264 static int libxl__e820_alloc(libxl__gc *gc, uint32_t domid,
265 libxl_domain_config *d_config)
266 {
267 libxl_ctx *ctx = libxl__gc_owner(gc);
268 int rc;
269 uint32_t nr;
270 struct e820entry map[E820MAX];
271 libxl_domain_build_info *b_info;
272
273 if (d_config == NULL || d_config->c_info.type != LIBXL_DOMAIN_TYPE_PV)
274 return ERROR_INVAL;
275
276 b_info = &d_config->b_info;
277 if (!libxl_defbool_val(b_info->u.pv.e820_host))
278 return ERROR_INVAL;
279
280 nr = E820MAX;
281 rc = e820_host_sanitize(gc, b_info, map, &nr);
282 if (rc)
283 return ERROR_FAIL;
284
285 rc = xc_domain_set_memory_map(ctx->xch, domid, map, nr);
286
287 if (rc < 0)
288 return ERROR_FAIL;
289
290 return 0;
291 }
292
timer_mode(const libxl_domain_build_info * info)293 static unsigned long timer_mode(const libxl_domain_build_info *info)
294 {
295 const libxl_timer_mode mode = info->timer_mode;
296 assert(mode >= LIBXL_TIMER_MODE_DELAY_FOR_MISSED_TICKS &&
297 mode <= LIBXL_TIMER_MODE_ONE_MISSED_TICK_PENDING);
298 return ((unsigned long)mode);
299 }
300
hvm_set_viridian_features(libxl__gc * gc,uint32_t domid,const libxl_domain_build_info * info)301 static int hvm_set_viridian_features(libxl__gc *gc, uint32_t domid,
302 const libxl_domain_build_info *info)
303 {
304 libxl_bitmap enlightenments;
305 libxl_viridian_enlightenment v;
306 uint64_t mask = 0;
307
308 libxl_bitmap_init(&enlightenments);
309 libxl_bitmap_alloc(CTX, &enlightenments,
310 LIBXL_BUILDINFO_HVM_VIRIDIAN_ENABLE_DISABLE_WIDTH);
311
312 if (libxl_defbool_val(info->u.hvm.viridian)) {
313 /* Enable defaults */
314 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE);
315 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_FREQ);
316 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT);
317 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_APIC_ASSIST);
318 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CRASH_CTL);
319 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_NO_VP_LIMIT);
320 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CPU_HOTPLUG);
321 }
322
323 libxl_for_each_set_bit(v, info->u.hvm.viridian_enable) {
324 if (libxl_bitmap_test(&info->u.hvm.viridian_disable, v)) {
325 LOG(ERROR, "%s group both enabled and disabled",
326 libxl_viridian_enlightenment_to_string(v));
327 goto err;
328 }
329 if (libxl_viridian_enlightenment_to_string(v)) /* check validity */
330 libxl_bitmap_set(&enlightenments, v);
331 }
332
333 libxl_for_each_set_bit(v, info->u.hvm.viridian_disable)
334 if (libxl_viridian_enlightenment_to_string(v)) /* check validity */
335 libxl_bitmap_reset(&enlightenments, v);
336
337 /* The base set is a pre-requisite for all others */
338 if (!libxl_bitmap_is_empty(&enlightenments) &&
339 !libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE)) {
340 LOG(ERROR, "base group not enabled");
341 goto err;
342 }
343
344 libxl_for_each_set_bit(v, enlightenments)
345 LOG(DETAIL, "%s group enabled", libxl_viridian_enlightenment_to_string(v));
346
347 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE)) {
348 mask |= HVMPV_base_freq;
349
350 if (!libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_FREQ))
351 mask |= HVMPV_no_freq;
352 }
353
354 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT))
355 mask |= HVMPV_time_ref_count;
356
357 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_REFERENCE_TSC))
358 mask |= HVMPV_reference_tsc;
359
360 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_HCALL_REMOTE_TLB_FLUSH))
361 mask |= HVMPV_hcall_remote_tlb_flush;
362
363 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_APIC_ASSIST))
364 mask |= HVMPV_apic_assist;
365
366 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CRASH_CTL))
367 mask |= HVMPV_crash_ctl;
368
369 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_SYNIC))
370 mask |= HVMPV_synic;
371
372 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_STIMER))
373 mask |= HVMPV_time_ref_count | HVMPV_synic | HVMPV_stimer;
374
375 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_HCALL_IPI))
376 mask |= HVMPV_hcall_ipi;
377
378 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_EX_PROCESSOR_MASKS))
379 mask |= HVMPV_ex_processor_masks;
380
381 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_NO_VP_LIMIT))
382 mask |= HVMPV_no_vp_limit;
383
384 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CPU_HOTPLUG))
385 mask |= HVMPV_cpu_hotplug;
386
387 if (mask != 0 &&
388 xc_hvm_param_set(CTX->xch,
389 domid,
390 HVM_PARAM_VIRIDIAN,
391 mask) != 0) {
392 LOGE(ERROR, "Couldn't set viridian feature mask (0x%"PRIx64")", mask);
393 goto err;
394 }
395
396 libxl_bitmap_dispose(&enlightenments);
397 return 0;
398
399 err:
400 libxl_bitmap_dispose(&enlightenments);
401 return ERROR_FAIL;
402 }
403
hvm_set_conf_params(libxl__gc * gc,uint32_t domid,const libxl_domain_build_info * info)404 static int hvm_set_conf_params(libxl__gc *gc, uint32_t domid,
405 const libxl_domain_build_info *info)
406 {
407 libxl_ctx *ctx = libxl__gc_owner(gc);
408 xc_interface *xch = ctx->xch;
409 int ret = ERROR_FAIL;
410
411 switch(info->type) {
412 case LIBXL_DOMAIN_TYPE_HVM:
413 if (xc_hvm_param_set(xch, domid, HVM_PARAM_HPET_ENABLED,
414 libxl_defbool_val(info->u.hvm.hpet))) {
415 LOG(ERROR, "Couldn't set HVM_PARAM_HPET_ENABLED");
416 goto out;
417 }
418 if (xc_hvm_param_set(xch, domid, HVM_PARAM_VPT_ALIGN,
419 libxl_defbool_val(info->u.hvm.vpt_align))) {
420 LOG(ERROR, "Couldn't set HVM_PARAM_VPT_ALIGN");
421 goto out;
422 }
423 if (info->u.hvm.mca_caps &&
424 xc_hvm_param_set(CTX->xch, domid, HVM_PARAM_MCA_CAP,
425 info->u.hvm.mca_caps)) {
426 LOG(ERROR, "Couldn't set HVM_PARAM_MCA_CAP");
427 goto out;
428 }
429
430 /* Fallthrough */
431 case LIBXL_DOMAIN_TYPE_PVH:
432 if (xc_hvm_param_set(xch, domid, HVM_PARAM_TIMER_MODE,
433 timer_mode(info))) {
434 LOG(ERROR, "Couldn't set HVM_PARAM_TIMER_MODE");
435 goto out;
436 }
437 break;
438
439 default:
440 abort();
441 }
442
443 ret = 0;
444
445 out:
446 return ret;
447 }
448
libxl__arch_domain_create(libxl__gc * gc,libxl_domain_config * d_config,libxl__domain_build_state * state,uint32_t domid)449 int libxl__arch_domain_create(libxl__gc *gc,
450 libxl_domain_config *d_config,
451 libxl__domain_build_state *state,
452 uint32_t domid)
453 {
454 const libxl_domain_build_info *info = &d_config->b_info;
455 int ret = 0;
456 int tsc_mode;
457 uint32_t rtc_timeoffset;
458 libxl_ctx *ctx = libxl__gc_owner(gc);
459
460 if (info->type != LIBXL_DOMAIN_TYPE_PV &&
461 (ret = hvm_set_conf_params(gc, domid, info)) != 0)
462 goto out;
463
464 /* Viridian flags are already a part of the migration stream so set
465 * them here only for brand new domains. */
466 if (!state->restore &&
467 info->type == LIBXL_DOMAIN_TYPE_HVM &&
468 (ret = hvm_set_viridian_features(gc, domid, info)) != 0)
469 goto out;
470
471 if (d_config->b_info.type == LIBXL_DOMAIN_TYPE_PV)
472 xc_domain_set_memmap_limit(ctx->xch, domid,
473 (d_config->b_info.max_memkb +
474 d_config->b_info.u.pv.slack_memkb));
475
476 switch (d_config->b_info.tsc_mode) {
477 case LIBXL_TSC_MODE_DEFAULT:
478 tsc_mode = XEN_CPUID_TSC_MODE_DEFAULT;
479 break;
480 case LIBXL_TSC_MODE_ALWAYS_EMULATE:
481 tsc_mode = XEN_CPUID_TSC_MODE_ALWAYS_EMULATE;
482 break;
483 case LIBXL_TSC_MODE_NATIVE:
484 tsc_mode = XEN_CPUID_TSC_MODE_NEVER_EMULATE;
485 break;
486 case LIBXL_TSC_MODE_NATIVE_PARAVIRT:
487 LOGD(ERROR, domid, "TSC Mode native_paravirt (a.k.a PVRDTSCP) has been removed");
488 ret = ERROR_FEATURE_REMOVED;
489 goto out;
490 default:
491 abort();
492 }
493
494 if (xc_domain_set_tsc_info(ctx->xch, domid, tsc_mode, 0, 0, 0)) {
495 LOGE(ERROR, "xc_domain_set_tsc_info() failed");
496 ret = ERROR_FAIL;
497 goto out;
498 }
499
500 rtc_timeoffset = d_config->b_info.rtc_timeoffset;
501 if (libxl_defbool_val(d_config->b_info.localtime)) {
502 time_t t;
503 struct tm *tm, result;
504
505 t = time(NULL);
506 tm = localtime_r(&t, &result);
507
508 if (!tm) {
509 LOGED(ERROR, domid, "Failed to call localtime_r");
510 ret = ERROR_FAIL;
511 goto out;
512 }
513
514 rtc_timeoffset += tm->tm_gmtoff;
515 }
516
517 if (rtc_timeoffset)
518 xc_domain_set_time_offset(ctx->xch, domid, rtc_timeoffset);
519
520 if (d_config->b_info.type != LIBXL_DOMAIN_TYPE_PV) {
521 ret = libxl__domain_set_paging_mempool_size(gc, d_config, domid);
522 if (ret)
523 goto out;
524 }
525
526 if (d_config->c_info.type == LIBXL_DOMAIN_TYPE_PV &&
527 libxl_defbool_val(d_config->b_info.u.pv.e820_host)) {
528 ret = libxl__e820_alloc(gc, domid, d_config);
529 if (ret) {
530 LOGED(ERROR, domid, "Failed while collecting E820 with: %d (errno:%d)\n",
531 ret, errno);
532 }
533 }
534
535 out:
536 return ret;
537 }
538
libxl__arch_extra_memory(libxl__gc * gc,const libxl_domain_build_info * info,uint64_t * out)539 int libxl__arch_extra_memory(libxl__gc *gc,
540 const libxl_domain_build_info *info,
541 uint64_t *out)
542 {
543 *out = LIBXL_MAXMEM_CONSTANT;
544
545 return 0;
546 }
547
libxl__arch_domain_init_hw_description(libxl__gc * gc,libxl_domain_config * d_config,libxl__domain_build_state * state,struct xc_dom_image * dom)548 int libxl__arch_domain_init_hw_description(libxl__gc *gc,
549 libxl_domain_config *d_config,
550 libxl__domain_build_state *state,
551 struct xc_dom_image *dom)
552 {
553 return 0;
554 }
555
libxl__arch_build_dom_finish(libxl__gc * gc,libxl_domain_build_info * info,struct xc_dom_image * dom,libxl__domain_build_state * state)556 int libxl__arch_build_dom_finish(libxl__gc *gc,
557 libxl_domain_build_info *info,
558 struct xc_dom_image *dom,
559 libxl__domain_build_state *state)
560 {
561 return 0;
562 }
563
564 /* Return 0 on success, ERROR_* on failure. */
libxl__arch_vnuma_build_vmemrange(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * b_info,libxl__domain_build_state * state)565 int libxl__arch_vnuma_build_vmemrange(libxl__gc *gc,
566 uint32_t domid,
567 libxl_domain_build_info *b_info,
568 libxl__domain_build_state *state)
569 {
570 int nid, nr_vmemrange, rc;
571 uint32_t nr_e820, e820_count;
572 struct e820entry map[E820MAX];
573 xen_vmemrange_t *vmemranges;
574 unsigned int array_size;
575
576 /* If e820_host is not set, call the generic function */
577 if (!(b_info->type == LIBXL_DOMAIN_TYPE_PV &&
578 libxl_defbool_val(b_info->u.pv.e820_host)))
579 return libxl__vnuma_build_vmemrange_pv_generic(gc, domid, b_info,
580 state);
581
582 assert(state->vmemranges == NULL);
583
584 nr_e820 = E820MAX;
585 rc = e820_host_sanitize(gc, b_info, map, &nr_e820);
586 if (rc) goto out;
587
588 e820_count = 0;
589 nr_vmemrange = 0;
590 vmemranges = NULL;
591 array_size = 0;
592 for (nid = 0; nid < b_info->num_vnuma_nodes; nid++) {
593 libxl_vnode_info *p = &b_info->vnuma_nodes[nid];
594 uint64_t remaining_bytes = (p->memkb << 10), bytes;
595
596 while (remaining_bytes > 0) {
597 if (e820_count >= nr_e820) {
598 rc = ERROR_NOMEM;
599 goto out;
600 }
601
602 /* Skip non RAM region */
603 if (map[e820_count].type != E820_RAM) {
604 e820_count++;
605 continue;
606 }
607
608 if (nr_vmemrange >= array_size) {
609 array_size += 32;
610 GCREALLOC_ARRAY(vmemranges, array_size);
611 }
612
613 bytes = map[e820_count].size >= remaining_bytes ?
614 remaining_bytes : map[e820_count].size;
615
616 vmemranges[nr_vmemrange].start = map[e820_count].addr;
617 vmemranges[nr_vmemrange].end = map[e820_count].addr + bytes;
618
619 if (map[e820_count].size >= remaining_bytes) {
620 map[e820_count].addr += bytes;
621 map[e820_count].size -= bytes;
622 } else {
623 e820_count++;
624 }
625
626 remaining_bytes -= bytes;
627
628 vmemranges[nr_vmemrange].flags = 0;
629 vmemranges[nr_vmemrange].nid = nid;
630 nr_vmemrange++;
631 }
632 }
633
634 state->vmemranges = vmemranges;
635 state->num_vmemranges = nr_vmemrange;
636
637 rc = 0;
638 out:
639 return rc;
640 }
641
libxl__arch_domain_map_irq(libxl__gc * gc,uint32_t domid,int irq)642 int libxl__arch_domain_map_irq(libxl__gc *gc, uint32_t domid, int irq)
643 {
644 int ret;
645
646 ret = xc_physdev_map_pirq(CTX->xch, domid, irq, &irq);
647 if (ret)
648 return ret;
649
650 ret = xc_domain_irq_permission(CTX->xch, domid, irq, 1);
651
652 return ret;
653 }
654
655 /*
656 * Here we're just trying to set these kinds of e820 mappings:
657 *
658 * #1. Low memory region
659 *
660 * Low RAM starts at least from 1M to make sure all standard regions
661 * of the PC memory map, like BIOS, VGA memory-mapped I/O and vgabios,
662 * have enough space.
663 * Note: Those stuffs below 1M are still constructed with multiple
664 * e820 entries by hvmloader. At this point we don't change anything.
665 *
666 * #2. RDM region if it exists
667 *
668 * #3. High memory region if it exists
669 *
670 * Note: these regions are not overlapping since we already check
671 * to adjust them. Please refer to libxl__domain_device_construct_rdm().
672 */
673 #define GUEST_LOW_MEM_START_DEFAULT 0x100000
domain_construct_memmap(libxl__gc * gc,libxl_domain_config * d_config,uint32_t domid,struct xc_dom_image * dom)674 static int domain_construct_memmap(libxl__gc *gc,
675 libxl_domain_config *d_config,
676 uint32_t domid,
677 struct xc_dom_image *dom)
678 {
679 int rc = 0;
680 unsigned int nr = 0, i;
681 /* We always own at least one lowmem entry. */
682 unsigned int e820_entries = 1;
683 struct e820entry *e820 = NULL;
684 uint64_t highmem_size =
685 dom->highmem_end ? dom->highmem_end - (1ull << 32) : 0;
686 uint32_t lowmem_start = dom->device_model ? GUEST_LOW_MEM_START_DEFAULT : 0;
687 unsigned page_size = XC_DOM_PAGE_SIZE(dom);
688
689 /* Add all rdm entries. */
690 for (i = 0; i < d_config->num_rdms; i++)
691 if (d_config->rdms[i].policy != LIBXL_RDM_RESERVE_POLICY_INVALID)
692 e820_entries++;
693
694 /* Add the HVM special pages to PVH memmap as RESERVED. */
695 if (d_config->b_info.type == LIBXL_DOMAIN_TYPE_PVH)
696 e820_entries++;
697
698 /* If we should have a highmem range. */
699 if (highmem_size)
700 e820_entries++;
701
702 for (i = 0; i < MAX_ACPI_MODULES; i++)
703 if (dom->acpi_modules[i].length)
704 e820_entries++;
705
706 if (e820_entries >= E820MAX) {
707 LOGD(ERROR, domid, "Ooops! Too many entries in the memory map!");
708 rc = ERROR_INVAL;
709 goto out;
710 }
711
712 e820 = libxl__malloc(gc, sizeof(struct e820entry) * e820_entries);
713
714 /* Low memory */
715 e820[nr].addr = lowmem_start;
716 e820[nr].size = dom->lowmem_end - lowmem_start;
717 e820[nr].type = E820_RAM;
718 nr++;
719
720 /* RDM mapping */
721 for (i = 0; i < d_config->num_rdms; i++) {
722 if (d_config->rdms[i].policy == LIBXL_RDM_RESERVE_POLICY_INVALID)
723 continue;
724
725 e820[nr].addr = d_config->rdms[i].start;
726 e820[nr].size = d_config->rdms[i].size;
727 e820[nr].type = E820_RESERVED;
728 nr++;
729 }
730
731 /* HVM special pages */
732 if (d_config->b_info.type == LIBXL_DOMAIN_TYPE_PVH) {
733 e820[nr].addr = (X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES)
734 << XC_PAGE_SHIFT;
735 e820[nr].size = X86_HVM_NR_SPECIAL_PAGES << XC_PAGE_SHIFT;
736 e820[nr].type = E820_RESERVED;
737 nr++;
738 }
739
740 for (i = 0; i < MAX_ACPI_MODULES; i++) {
741 if (dom->acpi_modules[i].length) {
742 e820[nr].addr = dom->acpi_modules[i].guest_addr_out & ~(page_size - 1);
743 e820[nr].size = dom->acpi_modules[i].length +
744 (dom->acpi_modules[i].guest_addr_out & (page_size - 1));
745 e820[nr].type = E820_ACPI;
746 nr++;
747 }
748 }
749
750 /* High memory */
751 if (highmem_size) {
752 e820[nr].addr = ((uint64_t)1 << 32);
753 e820[nr].size = highmem_size;
754 e820[nr].type = E820_RAM;
755 }
756
757 if (xc_domain_set_memory_map(CTX->xch, domid, e820, e820_entries) != 0) {
758 rc = ERROR_FAIL;
759 goto out;
760 }
761
762 dom->e820 = e820;
763 dom->e820_entries = e820_entries;
764
765 out:
766 return rc;
767 }
768
libxl__arch_domain_finalise_hw_description(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,struct xc_dom_image * dom)769 int libxl__arch_domain_finalise_hw_description(libxl__gc *gc,
770 uint32_t domid,
771 libxl_domain_config *d_config,
772 struct xc_dom_image *dom)
773 {
774 libxl_domain_build_info *const info = &d_config->b_info;
775 int rc;
776
777 if (info->type == LIBXL_DOMAIN_TYPE_PV)
778 return 0;
779
780 if (info->type == LIBXL_DOMAIN_TYPE_PVH) {
781 rc = libxl__dom_load_acpi(gc, info, dom);
782 if (rc != 0) {
783 LOGE(ERROR, "libxl_dom_load_acpi failed");
784 return rc;
785 }
786 }
787
788 rc = domain_construct_memmap(gc, d_config, domid, dom);
789 if (rc != 0)
790 LOGE(ERROR, "setting domain memory map failed");
791
792 return rc;
793 }
794
libxl__arch_domain_create_info_setdefault(libxl__gc * gc,libxl_domain_create_info * c_info)795 void libxl__arch_domain_create_info_setdefault(libxl__gc *gc,
796 libxl_domain_create_info *c_info)
797 {
798 }
799
libxl__arch_domain_build_info_setdefault(libxl__gc * gc,libxl_domain_build_info * b_info,const libxl_physinfo * physinfo)800 int libxl__arch_domain_build_info_setdefault(libxl__gc *gc,
801 libxl_domain_build_info *b_info,
802 const libxl_physinfo *physinfo)
803 {
804 libxl_defbool_setdefault(&b_info->acpi, true);
805 libxl_defbool_setdefault(&b_info->arch_x86.msr_relaxed, false);
806
807 /*
808 * The config parameter "altp2m" replaces the parameter "altp2mhvm".
809 * For legacy reasons, both parameters are accepted on x86 HVM guests.
810 *
811 * If the legacy field info->u.hvm.altp2m is set, activate altp2m.
812 * Otherwise set altp2m based on the field info->altp2m.
813 */
814 libxl_defbool_setdefault(&b_info->u.hvm.altp2m, false);
815 if (b_info->altp2m == LIBXL_ALTP2M_MODE_DISABLED &&
816 libxl_defbool_val(b_info->u.hvm.altp2m))
817 b_info->altp2m = libxl_defbool_val(b_info->u.hvm.altp2m);
818
819 return 0;
820 }
821
libxl__arch_passthrough_mode_setdefault(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,const libxl_physinfo * physinfo)822 int libxl__arch_passthrough_mode_setdefault(libxl__gc *gc,
823 uint32_t domid,
824 libxl_domain_config *d_config,
825 const libxl_physinfo *physinfo)
826 {
827 int rc;
828 libxl_domain_create_info *const c_info = &d_config->c_info;
829
830 if (c_info->passthrough != LIBXL_PASSTHROUGH_DISABLED &&
831 c_info->type == LIBXL_DOMAIN_TYPE_PVH) {
832 LOGD(ERROR, domid,
833 "passthrough not yet supported for x86 PVH guests\n");
834 rc = ERROR_INVAL;
835 goto out;
836 }
837
838 const char *whynot_pt_share =
839 c_info->type == LIBXL_DOMAIN_TYPE_PV ? "not valid for PV domain" :
840 !physinfo->cap_iommu_hap_pt_share ? "not supported on this platform" :
841 !libxl_defbool_val(d_config->c_info.hap) ?"only valid for HAP guests":
842 NULL;
843
844 if (c_info->passthrough == LIBXL_PASSTHROUGH_ENABLED) {
845 c_info->passthrough = whynot_pt_share
846 ? LIBXL_PASSTHROUGH_SYNC_PT : LIBXL_PASSTHROUGH_SHARE_PT;
847 }
848
849 if (c_info->passthrough == LIBXL_PASSTHROUGH_SHARE_PT && whynot_pt_share) {
850 LOGD(ERROR, domid,
851 "passthrough=\"share_pt\" %s\n",
852 whynot_pt_share);
853 rc = ERROR_INVAL;
854 goto out;
855 }
856
857 rc = 0;
858 out:
859 return rc;
860 }
861
libxl__arch_update_domain_config(libxl__gc * gc,libxl_domain_config * dst,const libxl_domain_config * src)862 void libxl__arch_update_domain_config(libxl__gc *gc,
863 libxl_domain_config *dst,
864 const libxl_domain_config *src)
865 {
866 /*
867 * Force MSR relaxed and HVM pirq to be set (either to true or false) so
868 * it's part of the domain configuration when saving or performing a
869 * live-migration.
870 *
871 * Doing so allows the recovery side to figure out whether the flags should
872 * be set to true in order to keep backwards compatibility with already
873 * started domains.
874 */
875 libxl_defbool_setdefault(&dst->b_info.arch_x86.msr_relaxed,
876 libxl_defbool_val(src->b_info.arch_x86.msr_relaxed));
877 if (src->c_info.type == LIBXL_DOMAIN_TYPE_HVM )
878 libxl_defbool_setdefault(&dst->b_info.u.hvm.pirq,
879 libxl_defbool_val(src->b_info.u.hvm.pirq));
880 }
881
882 /*
883 * Local variables:
884 * mode: C
885 * c-basic-offset: 4
886 * indent-tabs-mode: nil
887 * End:
888 */
889