1 /*
2 * Copyright (C) 2009 Citrix Ltd.
3 * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published
7 * by the Free Software Foundation; version 2.1 only. with the special
8 * exception on linking described in file LICENSE.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 */
15
16 #include "libxl_osdeps.h" /* must come before any other headers */
17
18 #include <glob.h>
19
20 #include "libxl_internal.h"
21 #include "libxl_arch.h"
22
23 #include <xc_dom.h>
24 #include <xen/hvm/hvm_info_table.h>
25 #include <xen/hvm/hvm_xs_strings.h>
26 #include <xen/hvm/e820.h>
27
28 #include "_paths.h"
29
libxl__domain_type(libxl__gc * gc,uint32_t domid)30 libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid)
31 {
32 libxl_ctx *ctx = libxl__gc_owner(gc);
33 xc_domaininfo_t info;
34 int ret;
35
36 ret = xc_domain_getinfolist(ctx->xch, domid, 1, &info);
37 if (ret != 1 || info.domain != domid) {
38 LOG(ERROR, "unable to get domain type for domid=%"PRIu32, domid);
39 return LIBXL_DOMAIN_TYPE_INVALID;
40 }
41 if (info.flags & XEN_DOMINF_hvm_guest) {
42 const char *type_path = GCSPRINTF("%s/type",
43 libxl__xs_libxl_path(gc, domid));
44 const char *type;
45 libxl_domain_type t;
46 int rc;
47
48 rc = libxl__xs_read_mandatory(gc, XBT_NULL, type_path, &type);
49 if (rc) {
50 LOG(WARN,
51 "unable to get domain type for domid=%"PRIu32", assuming HVM",
52 domid);
53 return LIBXL_DOMAIN_TYPE_HVM;
54 }
55
56 rc = libxl_domain_type_from_string(type, &t);
57 if (rc) {
58 LOG(WARN,
59 "unable to get domain type for domid=%"PRIu32", assuming HVM",
60 domid);
61 return LIBXL_DOMAIN_TYPE_HVM;
62 }
63
64 return t;
65 } else
66 return LIBXL_DOMAIN_TYPE_PV;
67 }
68
libxl__domain_cpupool(libxl__gc * gc,uint32_t domid)69 int libxl__domain_cpupool(libxl__gc *gc, uint32_t domid)
70 {
71 xc_domaininfo_t info;
72 int ret;
73
74 ret = xc_domain_getinfolist(CTX->xch, domid, 1, &info);
75 if (ret != 1)
76 {
77 LOGE(ERROR, "getinfolist failed %d", ret);
78 return ERROR_FAIL;
79 }
80 if (info.domain != domid)
81 {
82 LOGE(ERROR, "got info for dom%d, wanted dom%d\n", info.domain, domid);
83 return ERROR_FAIL;
84 }
85 return info.cpupool;
86 }
87
libxl__domain_scheduler(libxl__gc * gc,uint32_t domid)88 libxl_scheduler libxl__domain_scheduler(libxl__gc *gc, uint32_t domid)
89 {
90 int cpupool = libxl__domain_cpupool(gc, domid);
91 libxl_cpupoolinfo poolinfo;
92 libxl_scheduler sched = LIBXL_SCHEDULER_UNKNOWN;
93 int rc;
94
95 if (cpupool < 0)
96 return sched;
97
98 libxl_cpupoolinfo_init(&poolinfo);
99 rc = libxl_cpupool_info(CTX, &poolinfo, cpupool);
100 if (rc < 0)
101 goto out;
102
103 sched = poolinfo.sched;
104
105 out:
106 libxl_cpupoolinfo_dispose(&poolinfo);
107 return sched;
108 }
109
110 /*
111 * Two NUMA placement candidates are compared by means of the following
112 * heuristics:
113
114 * - the number of vcpus runnable on the candidates is considered, and
115 * candidates with fewer of them are preferred. If two candidate have
116 * the same number of runnable vcpus,
117 * - the amount of free memory in the candidates is considered, and the
118 * candidate with greater amount of it is preferred.
119 *
120 * In fact, leaving larger memory holes, maximizes the probability of being
121 * able to put other domains on the node. That hopefully means many domains
122 * will benefit from local memory accesses, but also introduces the risk of
123 * overloading large (from a memory POV) nodes. That's right the effect
124 * that counting the vcpus able to run on the nodes tries to prevent.
125 *
126 * Note that this completely ignore the number of nodes each candidate span,
127 * as the fact that fewer nodes is better is already accounted for in the
128 * algorithm.
129 */
numa_cmpf(const libxl__numa_candidate * c1,const libxl__numa_candidate * c2)130 static int numa_cmpf(const libxl__numa_candidate *c1,
131 const libxl__numa_candidate *c2)
132 {
133 if (c1->nr_vcpus != c2->nr_vcpus)
134 return c1->nr_vcpus - c2->nr_vcpus;
135
136 return c2->free_memkb - c1->free_memkb;
137 }
138
139 /* The actual automatic NUMA placement routine */
numa_place_domain(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info)140 static int numa_place_domain(libxl__gc *gc, uint32_t domid,
141 libxl_domain_build_info *info)
142 {
143 int found;
144 libxl__numa_candidate candidate;
145 libxl_bitmap cpupool_nodemap;
146 libxl_cpupoolinfo cpupool_info;
147 int i, cpupool, rc = 0;
148 uint64_t memkb;
149
150 libxl__numa_candidate_init(&candidate);
151 libxl_bitmap_init(&cpupool_nodemap);
152 libxl_cpupoolinfo_init(&cpupool_info);
153
154 /*
155 * Extract the cpumap from the cpupool the domain belong to. In fact,
156 * it only makes sense to consider the cpus/nodes that are in there
157 * for placement.
158 */
159 rc = cpupool = libxl__domain_cpupool(gc, domid);
160 if (rc < 0)
161 goto out;
162 rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool);
163 if (rc)
164 goto out;
165
166 rc = libxl_domain_need_memory(CTX, info, &memkb);
167 if (rc)
168 goto out;
169 if (libxl_node_bitmap_alloc(CTX, &cpupool_nodemap, 0)) {
170 rc = ERROR_FAIL;
171 goto out;
172 }
173
174 /* Find the best candidate with enough free memory and at least
175 * as much pcpus as the domain has vcpus. */
176 rc = libxl__get_numa_candidate(gc, memkb, info->max_vcpus,
177 0, 0, &cpupool_info.cpumap,
178 numa_cmpf, &candidate, &found);
179 if (rc)
180 goto out;
181
182 /* Not even a suitable placement candidate! Let's just don't touch the
183 * domain's info->cpumap. It will have affinity with all nodes/cpus. */
184 if (found == 0)
185 goto out;
186
187 /* Map the candidate's node map to the domain's info->nodemap */
188 libxl__numa_candidate_get_nodemap(gc, &candidate, &info->nodemap);
189
190 /* Avoid trying to set the affinity to nodes that might be in the
191 * candidate's nodemap but out of our cpupool. */
192 rc = libxl_cpumap_to_nodemap(CTX, &cpupool_info.cpumap,
193 &cpupool_nodemap);
194 if (rc)
195 goto out;
196
197 libxl_for_each_set_bit(i, info->nodemap) {
198 if (!libxl_bitmap_test(&cpupool_nodemap, i))
199 libxl_bitmap_reset(&info->nodemap, i);
200 }
201
202 LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
203 "%"PRIu64" KB free selected", candidate.nr_nodes,
204 candidate.nr_cpus, candidate.free_memkb / 1024);
205
206 out:
207 libxl__numa_candidate_dispose(&candidate);
208 libxl_bitmap_dispose(&cpupool_nodemap);
209 libxl_cpupoolinfo_dispose(&cpupool_info);
210 return rc;
211 }
212
timer_mode(const libxl_domain_build_info * info)213 static unsigned long timer_mode(const libxl_domain_build_info *info)
214 {
215 const libxl_timer_mode mode = info->timer_mode;
216 assert(mode >= LIBXL_TIMER_MODE_DELAY_FOR_MISSED_TICKS &&
217 mode <= LIBXL_TIMER_MODE_ONE_MISSED_TICK_PENDING);
218 return ((unsigned long)mode);
219 }
220
221 #if defined(__i386__) || defined(__x86_64__)
hvm_set_viridian_features(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * const info)222 static int hvm_set_viridian_features(libxl__gc *gc, uint32_t domid,
223 libxl_domain_build_info *const info)
224 {
225 libxl_bitmap enlightenments;
226 libxl_viridian_enlightenment v;
227 uint64_t mask = 0;
228
229 libxl_bitmap_init(&enlightenments);
230 libxl_bitmap_alloc(CTX, &enlightenments,
231 LIBXL_BUILDINFO_HVM_VIRIDIAN_ENABLE_DISABLE_WIDTH);
232
233 if (libxl_defbool_val(info->u.hvm.viridian)) {
234 /* Enable defaults */
235 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE);
236 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_FREQ);
237 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT);
238 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_APIC_ASSIST);
239 libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CRASH_CTL);
240 }
241
242 libxl_for_each_set_bit(v, info->u.hvm.viridian_enable) {
243 if (libxl_bitmap_test(&info->u.hvm.viridian_disable, v)) {
244 LOG(ERROR, "%s group both enabled and disabled",
245 libxl_viridian_enlightenment_to_string(v));
246 goto err;
247 }
248 if (libxl_viridian_enlightenment_to_string(v)) /* check validity */
249 libxl_bitmap_set(&enlightenments, v);
250 }
251
252 libxl_for_each_set_bit(v, info->u.hvm.viridian_disable)
253 if (libxl_viridian_enlightenment_to_string(v)) /* check validity */
254 libxl_bitmap_reset(&enlightenments, v);
255
256 /* The base set is a pre-requisite for all others */
257 if (!libxl_bitmap_is_empty(&enlightenments) &&
258 !libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE)) {
259 LOG(ERROR, "base group not enabled");
260 goto err;
261 }
262
263 libxl_for_each_set_bit(v, enlightenments)
264 LOG(DETAIL, "%s group enabled", libxl_viridian_enlightenment_to_string(v));
265
266 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE)) {
267 mask |= HVMPV_base_freq;
268
269 if (!libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_FREQ))
270 mask |= HVMPV_no_freq;
271 }
272
273 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT))
274 mask |= HVMPV_time_ref_count;
275
276 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_REFERENCE_TSC))
277 mask |= HVMPV_reference_tsc;
278
279 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_HCALL_REMOTE_TLB_FLUSH))
280 mask |= HVMPV_hcall_remote_tlb_flush;
281
282 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_APIC_ASSIST))
283 mask |= HVMPV_apic_assist;
284
285 if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CRASH_CTL))
286 mask |= HVMPV_crash_ctl;
287
288 if (mask != 0 &&
289 xc_hvm_param_set(CTX->xch,
290 domid,
291 HVM_PARAM_VIRIDIAN,
292 mask) != 0) {
293 LOGE(ERROR, "Couldn't set viridian feature mask (0x%"PRIx64")", mask);
294 goto err;
295 }
296
297 libxl_bitmap_dispose(&enlightenments);
298 return 0;
299
300 err:
301 libxl_bitmap_dispose(&enlightenments);
302 return ERROR_FAIL;
303 }
304
hvm_set_mca_capabilities(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * const info)305 static int hvm_set_mca_capabilities(libxl__gc *gc, uint32_t domid,
306 libxl_domain_build_info *const info)
307 {
308 unsigned long caps = info->u.hvm.mca_caps;
309
310 if (!caps)
311 return 0;
312
313 return xc_hvm_param_set(CTX->xch, domid, HVM_PARAM_MCA_CAP, caps);
314 }
315 #endif
316
hvm_set_conf_params(xc_interface * handle,uint32_t domid,libxl_domain_build_info * const info)317 static void hvm_set_conf_params(xc_interface *handle, uint32_t domid,
318 libxl_domain_build_info *const info)
319 {
320 switch(info->type) {
321 case LIBXL_DOMAIN_TYPE_PVH:
322 xc_hvm_param_set(handle, domid, HVM_PARAM_PAE_ENABLED, true);
323 xc_hvm_param_set(handle, domid, HVM_PARAM_TIMER_MODE,
324 timer_mode(info));
325 xc_hvm_param_set(handle, domid, HVM_PARAM_NESTEDHVM,
326 libxl_defbool_val(info->nested_hvm));
327 break;
328 case LIBXL_DOMAIN_TYPE_HVM:
329 xc_hvm_param_set(handle, domid, HVM_PARAM_PAE_ENABLED,
330 libxl_defbool_val(info->u.hvm.pae));
331 #if defined(__i386__) || defined(__x86_64__)
332 xc_hvm_param_set(handle, domid, HVM_PARAM_HPET_ENABLED,
333 libxl_defbool_val(info->u.hvm.hpet));
334 #endif
335 xc_hvm_param_set(handle, domid, HVM_PARAM_TIMER_MODE,
336 timer_mode(info));
337 xc_hvm_param_set(handle, domid, HVM_PARAM_VPT_ALIGN,
338 libxl_defbool_val(info->u.hvm.vpt_align));
339 xc_hvm_param_set(handle, domid, HVM_PARAM_NESTEDHVM,
340 libxl_defbool_val(info->nested_hvm));
341 break;
342 default:
343 abort();
344 }
345 }
346
libxl__build_pre(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)347 int libxl__build_pre(libxl__gc *gc, uint32_t domid,
348 libxl_domain_config *d_config, libxl__domain_build_state *state)
349 {
350 libxl_domain_build_info *const info = &d_config->b_info;
351 libxl_ctx *ctx = libxl__gc_owner(gc);
352 char *xs_domid, *con_domid;
353 int rc;
354 uint64_t size;
355
356 if (xc_domain_max_vcpus(ctx->xch, domid, info->max_vcpus) != 0) {
357 LOG(ERROR, "Couldn't set max vcpu count");
358 return ERROR_FAIL;
359 }
360
361 if (xc_domain_set_gnttab_limits(ctx->xch, domid, info->max_grant_frames,
362 info->max_maptrack_frames) != 0) {
363 LOG(ERROR, "Couldn't set grant table limits");
364 return ERROR_FAIL;
365 }
366
367 /*
368 * Check if the domain has any CPU or node affinity already. If not, try
369 * to build up the latter via automatic NUMA placement. In fact, in case
370 * numa_place_domain() manage to find a placement, in info->nodemap is
371 * updated accordingly; if it does not manage, info->nodemap is just left
372 * alone. It is then the the subsequent call to
373 * libxl_domain_set_nodeaffinity() that enacts the actual placement.
374 *
375 * As far as scheduling is concerned, we achieve NUMA-aware scheduling
376 * by having the results of placement affect the soft affinity of all
377 * the vcpus of the domain. Of course, we want that iff placement is
378 * enabled and actually happens, so we only change info->cpumap_soft to
379 * reflect the placement result if that is the case
380 */
381 if (libxl_defbool_val(info->numa_placement)) {
382 if (info->cpumap.size || info->num_vcpu_hard_affinity ||
383 info->num_vcpu_soft_affinity)
384 LOG(WARN, "Can't run NUMA placement, as an (hard or soft) "
385 "affinity has been specified explicitly");
386 else if (info->nodemap.size)
387 LOG(WARN, "Can't run NUMA placement, as the domain has "
388 "NUMA node affinity set already");
389 else {
390 libxl_bitmap cpumap_soft;
391
392 rc = libxl_node_bitmap_alloc(ctx, &info->nodemap, 0);
393 if (rc)
394 return rc;
395 libxl_bitmap_set_any(&info->nodemap);
396
397 rc = libxl_cpu_bitmap_alloc(ctx, &cpumap_soft, 0);
398 if (rc)
399 return rc;
400
401 rc = numa_place_domain(gc, domid, info);
402 if (rc) {
403 libxl_bitmap_dispose(&cpumap_soft);
404 return rc;
405 }
406
407 /*
408 * All we need to do now is converting the result of automatic
409 * placement from nodemap to cpumap, and then use such cpumap
410 * as the soft affinity for all the vcpus of the domain.
411 *
412 * When calling libxl_set_vcpuaffinity_all(), it is ok to use
413 * NULL as hard affinity, as we know we don't have one, or we
414 * won't be here.
415 */
416 libxl_nodemap_to_cpumap(ctx, &info->nodemap, &cpumap_soft);
417 libxl_set_vcpuaffinity_all(ctx, domid, info->max_vcpus,
418 NULL, &cpumap_soft);
419
420 libxl_bitmap_dispose(&cpumap_soft);
421
422 /*
423 * Placement has run, so avoid for it to be re-run, if this
424 * same config we are using and building here is ever re-used.
425 * This means that people re-using configs will get the same
426 * results, consistently, across every re-use, which is what
427 * we expect most people to want.
428 */
429 libxl_defbool_set(&info->numa_placement, false);
430 }
431 }
432
433 if (info->nodemap.size)
434 libxl_domain_set_nodeaffinity(ctx, domid, &info->nodemap);
435
436 if (info->num_vcpu_hard_affinity || info->num_vcpu_soft_affinity) {
437 libxl_bitmap *hard_affinity, *soft_affinity;
438 int i, n_vcpus;
439
440 n_vcpus = info->num_vcpu_hard_affinity > info->num_vcpu_soft_affinity ?
441 info->num_vcpu_hard_affinity : info->num_vcpu_soft_affinity;
442
443 for (i = 0; i < n_vcpus; i++) {
444 /*
445 * Prepare hard and soft affinity pointers in a way that allows
446 * us to issue only one call to libxl_set_vcpuaffinity(), setting,
447 * for each vcpu, both hard and soft affinity "atomically".
448 */
449 hard_affinity = NULL;
450 if (info->num_vcpu_hard_affinity &&
451 i < info->num_vcpu_hard_affinity)
452 hard_affinity = &info->vcpu_hard_affinity[i];
453
454 soft_affinity = NULL;
455 if (info->num_vcpu_soft_affinity &&
456 i < info->num_vcpu_soft_affinity)
457 soft_affinity = &info->vcpu_soft_affinity[i];
458
459 if (libxl_set_vcpuaffinity(ctx, domid, i,
460 hard_affinity, soft_affinity)) {
461 LOG(ERROR, "setting affinity failed on vcpu `%d'", i);
462 return ERROR_FAIL;
463 }
464 }
465 }
466
467
468 rc = libxl__arch_extra_memory(gc, info, &size);
469 if (rc < 0) {
470 LOGE(ERROR, "Couldn't get arch extra constant memory size");
471 return ERROR_FAIL;
472 }
473
474 if (xc_domain_setmaxmem(ctx->xch, domid, info->target_memkb + size) < 0) {
475 LOGE(ERROR, "Couldn't set max memory");
476 return ERROR_FAIL;
477 }
478
479 xs_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenstored/domid", NULL);
480 state->store_domid = xs_domid ? atoi(xs_domid) : 0;
481 free(xs_domid);
482
483 con_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenconsoled/domid", NULL);
484 state->console_domid = con_domid ? atoi(con_domid) : 0;
485 free(con_domid);
486
487 state->store_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->store_domid);
488 state->console_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->console_domid);
489
490 if (info->type != LIBXL_DOMAIN_TYPE_PV)
491 hvm_set_conf_params(ctx->xch, domid, info);
492
493 #if defined(__i386__) || defined(__x86_64__)
494 if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
495 rc = hvm_set_viridian_features(gc, domid, info);
496 if (rc)
497 return rc;
498
499 rc = hvm_set_mca_capabilities(gc, domid, info);
500 if (rc)
501 return rc;
502 }
503 #endif
504
505 /* Alternate p2m support on x86 is available only for PVH/HVM guests. */
506 if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
507 /* The config parameter "altp2m" replaces the parameter "altp2mhvm". For
508 * legacy reasons, both parameters are accepted on x86 HVM guests.
509 *
510 * If the legacy field info->u.hvm.altp2m is set, activate altp2m.
511 * Otherwise set altp2m based on the field info->altp2m. */
512 if (info->altp2m == LIBXL_ALTP2M_MODE_DISABLED &&
513 libxl_defbool_val(info->u.hvm.altp2m))
514 xc_hvm_param_set(ctx->xch, domid, HVM_PARAM_ALTP2M,
515 libxl_defbool_val(info->u.hvm.altp2m));
516 else
517 xc_hvm_param_set(ctx->xch, domid, HVM_PARAM_ALTP2M,
518 info->altp2m);
519 } else if (info->type == LIBXL_DOMAIN_TYPE_PVH) {
520 xc_hvm_param_set(ctx->xch, domid, HVM_PARAM_ALTP2M,
521 info->altp2m);
522 }
523
524 rc = libxl__arch_domain_create(gc, d_config, domid);
525
526 return rc;
527 }
528
set_vnuma_affinity(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info)529 static int set_vnuma_affinity(libxl__gc *gc, uint32_t domid,
530 libxl_domain_build_info *info)
531 {
532 libxl_bitmap cpumap;
533 libxl_vnode_info *v;
534 unsigned int i, j;
535 int rc = 0;
536
537 libxl_bitmap_init(&cpumap);
538
539 rc = libxl_cpu_bitmap_alloc(CTX, &cpumap, 0);
540 if (rc) {
541 LOG(ERROR, "Can't allocate nodemap");
542 goto out;
543 }
544
545 /*
546 * For each vcpu in each vnode, set its soft affinity to
547 * the pcpus belonging to the pnode the vnode is on
548 */
549 for (i = 0; i < info->num_vnuma_nodes; i++) {
550 v = &info->vnuma_nodes[i];
551
552 rc = libxl_node_to_cpumap(CTX, v->pnode, &cpumap);
553 if (rc) {
554 LOG(ERROR, "Can't get cpumap for vnode %d", i);
555 goto out;
556 }
557
558 libxl_for_each_set_bit(j, v->vcpus) {
559 rc = libxl_set_vcpuaffinity(CTX, domid, j, NULL, &cpumap);
560 if (rc) {
561 LOG(ERROR, "Can't set cpu affinity for %d", j);
562 goto out;
563 }
564 }
565 }
566
567 out:
568 libxl_bitmap_dispose(&cpumap);
569 return rc;
570 }
571
libxl__build_post(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info,libxl__domain_build_state * state,char ** vms_ents,char ** local_ents)572 int libxl__build_post(libxl__gc *gc, uint32_t domid,
573 libxl_domain_build_info *info,
574 libxl__domain_build_state *state,
575 char **vms_ents, char **local_ents)
576 {
577 libxl_ctx *ctx = libxl__gc_owner(gc);
578 char *dom_path, *vm_path;
579 xs_transaction_t t;
580 char **ents;
581 int i, rc;
582
583 if (info->num_vnuma_nodes && !info->num_vcpu_soft_affinity) {
584 rc = set_vnuma_affinity(gc, domid, info);
585 if (rc)
586 return rc;
587 }
588
589 rc = libxl_domain_sched_params_set(CTX, domid, &info->sched_params);
590 if (rc)
591 return rc;
592
593 rc = xc_domain_set_max_evtchn(ctx->xch, domid, info->event_channels);
594 if (rc) {
595 LOG(ERROR, "Failed to set event channel limit to %d (%d)",
596 info->event_channels, rc);
597 return ERROR_FAIL;
598 }
599
600 libxl_cpuid_apply_policy(ctx, domid);
601 if (info->cpuid != NULL)
602 libxl_cpuid_set(ctx, domid, info->cpuid);
603
604 if (info->type == LIBXL_DOMAIN_TYPE_HVM
605 && !libxl_ms_vm_genid_is_zero(&info->u.hvm.ms_vm_genid)) {
606 rc = libxl__ms_vm_genid_set(gc, domid,
607 &info->u.hvm.ms_vm_genid);
608 if (rc) {
609 LOG(ERROR, "Failed to set VM Generation ID");
610 return rc;
611 }
612 }
613
614 ents = libxl__calloc(gc, 12 + (info->max_vcpus * 2) + 2, sizeof(char *));
615 ents[0] = "memory/static-max";
616 ents[1] = GCSPRINTF("%"PRId64, info->max_memkb);
617 ents[2] = "memory/target";
618 ents[3] = GCSPRINTF("%"PRId64, info->target_memkb -
619 libxl__get_targetmem_fudge(gc, info));
620 ents[4] = "memory/videoram";
621 ents[5] = GCSPRINTF("%"PRId64, info->video_memkb);
622 ents[6] = "domid";
623 ents[7] = GCSPRINTF("%d", domid);
624 ents[8] = "store/port";
625 ents[9] = GCSPRINTF("%"PRIu32, state->store_port);
626 ents[10] = "store/ring-ref";
627 ents[11] = GCSPRINTF("%lu", state->store_mfn);
628 for (i = 0; i < info->max_vcpus; i++) {
629 ents[12+(i*2)] = GCSPRINTF("cpu/%d/availability", i);
630 ents[12+(i*2)+1] = libxl_bitmap_test(&info->avail_vcpus, i)
631 ? "online" : "offline";
632 }
633
634 dom_path = libxl__xs_get_dompath(gc, domid);
635 if (!dom_path) {
636 return ERROR_FAIL;
637 }
638
639 vm_path = xs_read(ctx->xsh, XBT_NULL, GCSPRINTF("%s/vm", dom_path), NULL);
640 retry_transaction:
641 t = xs_transaction_start(ctx->xsh);
642
643 libxl__xs_writev(gc, t, dom_path, ents);
644 libxl__xs_writev(gc, t, dom_path, local_ents);
645 libxl__xs_writev(gc, t, vm_path, vms_ents);
646
647 if (!xs_transaction_end(ctx->xsh, t, 0))
648 if (errno == EAGAIN)
649 goto retry_transaction;
650 xs_introduce_domain(ctx->xsh, domid, state->store_mfn, state->store_port);
651 free(vm_path);
652 return 0;
653 }
654
set_vnuma_info(libxl__gc * gc,uint32_t domid,const libxl_domain_build_info * info,const libxl__domain_build_state * state)655 static int set_vnuma_info(libxl__gc *gc, uint32_t domid,
656 const libxl_domain_build_info *info,
657 const libxl__domain_build_state *state)
658 {
659 int rc = 0;
660 unsigned int i, nr_vdistance;
661 unsigned int *vcpu_to_vnode, *vnode_to_pnode, *vdistance = NULL;
662
663 vcpu_to_vnode = libxl__calloc(gc, info->max_vcpus,
664 sizeof(unsigned int));
665 vnode_to_pnode = libxl__calloc(gc, info->num_vnuma_nodes,
666 sizeof(unsigned int));
667
668 nr_vdistance = info->num_vnuma_nodes * info->num_vnuma_nodes;
669 vdistance = libxl__calloc(gc, nr_vdistance, sizeof(unsigned int));
670
671 for (i = 0; i < info->num_vnuma_nodes; i++) {
672 libxl_vnode_info *v = &info->vnuma_nodes[i];
673 int j;
674
675 /* vnode to pnode mapping */
676 vnode_to_pnode[i] = v->pnode;
677
678 /* vcpu to vnode mapping */
679 libxl_for_each_set_bit(j, v->vcpus)
680 vcpu_to_vnode[j] = i;
681
682 /* node distances */
683 assert(info->num_vnuma_nodes == v->num_distances);
684 memcpy(vdistance + (i * info->num_vnuma_nodes),
685 v->distances,
686 v->num_distances * sizeof(unsigned int));
687 }
688
689 if (xc_domain_setvnuma(CTX->xch, domid, info->num_vnuma_nodes,
690 state->num_vmemranges, info->max_vcpus,
691 state->vmemranges, vdistance,
692 vcpu_to_vnode, vnode_to_pnode) < 0) {
693 LOGE(ERROR, "xc_domain_setvnuma failed");
694 rc = ERROR_FAIL;
695 }
696
697 return rc;
698 }
699
libxl__build_dom(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info,libxl__domain_build_state * state,struct xc_dom_image * dom)700 static int libxl__build_dom(libxl__gc *gc, uint32_t domid,
701 libxl_domain_build_info *info, libxl__domain_build_state *state,
702 struct xc_dom_image *dom)
703 {
704 uint64_t mem_kb;
705 int ret;
706
707 if ( (ret = xc_dom_boot_xen_init(dom, CTX->xch, domid)) != 0 ) {
708 LOGE(ERROR, "xc_dom_boot_xen_init failed");
709 goto out;
710 }
711 #ifdef GUEST_RAM_BASE
712 if ( (ret = xc_dom_rambase_init(dom, GUEST_RAM_BASE)) != 0 ) {
713 LOGE(ERROR, "xc_dom_rambase failed");
714 goto out;
715 }
716 #endif
717 if ( (ret = xc_dom_parse_image(dom)) != 0 ) {
718 LOGE(ERROR, "xc_dom_parse_image failed");
719 goto out;
720 }
721 if ( (ret = libxl__arch_domain_init_hw_description(gc, info, state, dom)) != 0 ) {
722 LOGE(ERROR, "libxl__arch_domain_init_hw_description failed");
723 goto out;
724 }
725
726 mem_kb = dom->container_type == XC_DOM_HVM_CONTAINER ?
727 (info->max_memkb - info->video_memkb) : info->target_memkb;
728 if ( (ret = xc_dom_mem_init(dom, mem_kb / 1024)) != 0 ) {
729 LOGE(ERROR, "xc_dom_mem_init failed");
730 goto out;
731 }
732 if ( (ret = xc_dom_boot_mem_init(dom)) != 0 ) {
733 LOGE(ERROR, "xc_dom_boot_mem_init failed");
734 goto out;
735 }
736 if ( (ret = libxl__arch_domain_finalise_hw_description(gc, info, dom)) != 0 ) {
737 LOGE(ERROR, "libxl__arch_domain_finalise_hw_description failed");
738 goto out;
739 }
740 if ( (ret = xc_dom_build_image(dom)) != 0 ) {
741 LOGE(ERROR, "xc_dom_build_image failed");
742 goto out;
743 }
744 if ( (ret = xc_dom_boot_image(dom)) != 0 ) {
745 LOGE(ERROR, "xc_dom_boot_image failed");
746 goto out;
747 }
748 if ( (ret = xc_dom_gnttab_init(dom)) != 0 ) {
749 LOGE(ERROR, "xc_dom_gnttab_init failed");
750 goto out;
751 }
752 if ((ret = libxl__arch_build_dom_finish(gc, info, dom, state)) != 0) {
753 LOGE(ERROR, "libxl__arch_build_dom_finish failed");
754 goto out;
755 }
756
757 out:
758 return ret != 0 ? ERROR_FAIL : 0;
759 }
760
libxl__build_pv(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info,libxl__domain_build_state * state)761 int libxl__build_pv(libxl__gc *gc, uint32_t domid,
762 libxl_domain_build_info *info, libxl__domain_build_state *state)
763 {
764 libxl_ctx *ctx = libxl__gc_owner(gc);
765 struct xc_dom_image *dom;
766 int ret;
767 int flags = 0;
768
769 xc_dom_loginit(ctx->xch);
770
771 dom = xc_dom_allocate(ctx->xch, state->pv_cmdline, info->u.pv.features);
772 if (!dom) {
773 LOGE(ERROR, "xc_dom_allocate failed");
774 return ERROR_FAIL;
775 }
776
777 dom->container_type = XC_DOM_PV_CONTAINER;
778
779 LOG(DEBUG, "pv kernel mapped %d path %s", state->pv_kernel.mapped, state->pv_kernel.path);
780
781 if (state->pv_kernel.mapped) {
782 ret = xc_dom_kernel_mem(dom,
783 state->pv_kernel.data,
784 state->pv_kernel.size);
785 if ( ret != 0) {
786 LOGE(ERROR, "xc_dom_kernel_mem failed");
787 goto out;
788 }
789 } else {
790 ret = xc_dom_kernel_file(dom, state->pv_kernel.path);
791 if ( ret != 0) {
792 LOGE(ERROR, "xc_dom_kernel_file failed");
793 goto out;
794 }
795 }
796
797 if ( state->pv_ramdisk.path && strlen(state->pv_ramdisk.path) ) {
798 if (state->pv_ramdisk.mapped) {
799 if ( (ret = xc_dom_module_mem(dom, state->pv_ramdisk.data, state->pv_ramdisk.size, NULL)) != 0 ) {
800 LOGE(ERROR, "xc_dom_ramdisk_mem failed");
801 goto out;
802 }
803 } else {
804 if ( (ret = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL)) != 0 ) {
805 LOGE(ERROR, "xc_dom_ramdisk_file failed");
806 goto out;
807 }
808 }
809 }
810
811 dom->flags = flags;
812 dom->console_evtchn = state->console_port;
813 dom->console_domid = state->console_domid;
814 dom->xenstore_evtchn = state->store_port;
815 dom->xenstore_domid = state->store_domid;
816 dom->claim_enabled = libxl_defbool_val(info->claim_mode);
817
818 if (info->num_vnuma_nodes != 0) {
819 unsigned int i;
820
821 ret = libxl__vnuma_build_vmemrange_pv(gc, domid, info, state);
822 if (ret) {
823 LOGE(ERROR, "cannot build vmemranges");
824 goto out;
825 }
826 ret = libxl__vnuma_config_check(gc, info, state);
827 if (ret) goto out;
828
829 ret = set_vnuma_info(gc, domid, info, state);
830 if (ret) goto out;
831
832 dom->nr_vmemranges = state->num_vmemranges;
833 dom->vmemranges = xc_dom_malloc(dom, sizeof(*dom->vmemranges) *
834 dom->nr_vmemranges);
835
836 for (i = 0; i < dom->nr_vmemranges; i++) {
837 dom->vmemranges[i].start = state->vmemranges[i].start;
838 dom->vmemranges[i].end = state->vmemranges[i].end;
839 dom->vmemranges[i].flags = state->vmemranges[i].flags;
840 dom->vmemranges[i].nid = state->vmemranges[i].nid;
841 }
842
843 dom->nr_vnodes = info->num_vnuma_nodes;
844 dom->vnode_to_pnode = xc_dom_malloc(dom, sizeof(*dom->vnode_to_pnode) *
845 dom->nr_vnodes);
846 for (i = 0; i < info->num_vnuma_nodes; i++)
847 dom->vnode_to_pnode[i] = info->vnuma_nodes[i].pnode;
848 }
849
850 ret = libxl__build_dom(gc, domid, info, state, dom);
851 if (ret != 0)
852 goto out;
853
854 if (xc_dom_translated(dom)) {
855 state->console_mfn = dom->console_pfn;
856 state->store_mfn = dom->xenstore_pfn;
857 state->vuart_gfn = dom->vuart_gfn;
858 } else {
859 state->console_mfn = xc_dom_p2m(dom, dom->console_pfn);
860 state->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
861 }
862
863 ret = 0;
864 out:
865 xc_dom_release(dom);
866 return ret == 0 ? 0 : ERROR_FAIL;
867 }
868
hvm_build_set_params(xc_interface * handle,uint32_t domid,libxl_domain_build_info * info,int store_evtchn,unsigned long * store_mfn,int console_evtchn,unsigned long * console_mfn,domid_t store_domid,domid_t console_domid)869 static int hvm_build_set_params(xc_interface *handle, uint32_t domid,
870 libxl_domain_build_info *info,
871 int store_evtchn, unsigned long *store_mfn,
872 int console_evtchn, unsigned long *console_mfn,
873 domid_t store_domid, domid_t console_domid)
874 {
875 struct hvm_info_table *va_hvm;
876 uint8_t *va_map, sum;
877 uint64_t str_mfn, cons_mfn;
878 int i;
879
880 if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
881 va_map = xc_map_foreign_range(handle, domid,
882 XC_PAGE_SIZE, PROT_READ | PROT_WRITE,
883 HVM_INFO_PFN);
884 if (va_map == NULL)
885 return ERROR_FAIL;
886
887 va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
888 va_hvm->apic_mode = libxl_defbool_val(info->apic);
889 va_hvm->nr_vcpus = info->max_vcpus;
890 memset(va_hvm->vcpu_online, 0, sizeof(va_hvm->vcpu_online));
891 memcpy(va_hvm->vcpu_online, info->avail_vcpus.map, info->avail_vcpus.size);
892 for (i = 0, sum = 0; i < va_hvm->length; i++)
893 sum += ((uint8_t *) va_hvm)[i];
894 va_hvm->checksum -= sum;
895 munmap(va_map, XC_PAGE_SIZE);
896 }
897
898 xc_hvm_param_get(handle, domid, HVM_PARAM_STORE_PFN, &str_mfn);
899 xc_hvm_param_get(handle, domid, HVM_PARAM_CONSOLE_PFN, &cons_mfn);
900 xc_hvm_param_set(handle, domid, HVM_PARAM_STORE_EVTCHN, store_evtchn);
901 xc_hvm_param_set(handle, domid, HVM_PARAM_CONSOLE_EVTCHN, console_evtchn);
902
903 *store_mfn = str_mfn;
904 *console_mfn = cons_mfn;
905
906 xc_dom_gnttab_hvm_seed(handle, domid, *console_mfn, *store_mfn, console_domid, store_domid);
907 return 0;
908 }
909
hvm_build_set_xs_values(libxl__gc * gc,uint32_t domid,struct xc_dom_image * dom,const libxl_domain_build_info * info)910 static int hvm_build_set_xs_values(libxl__gc *gc,
911 uint32_t domid,
912 struct xc_dom_image *dom,
913 const libxl_domain_build_info *info)
914 {
915 char *path = NULL;
916 int ret = 0;
917
918 if (dom->smbios_module.guest_addr_out) {
919 path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_ADDRESS, domid);
920
921 ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%"PRIx64,
922 dom->smbios_module.guest_addr_out);
923 if (ret)
924 goto err;
925
926 path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_LENGTH, domid);
927
928 ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%x",
929 dom->smbios_module.length);
930 if (ret)
931 goto err;
932 }
933
934 /* Only one module can be passed. PVHv2 guests do not support this. */
935 if (dom->acpi_modules[0].guest_addr_out &&
936 info->type == LIBXL_DOMAIN_TYPE_HVM) {
937 path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_ADDRESS, domid);
938
939 ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%"PRIx64,
940 dom->acpi_modules[0].guest_addr_out);
941 if (ret)
942 goto err;
943
944 path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_LENGTH, domid);
945
946 ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%x",
947 dom->acpi_modules[0].length);
948 if (ret)
949 goto err;
950 }
951
952 return 0;
953
954 err:
955 LOG(ERROR, "failed to write firmware xenstore value, err: %d", ret);
956 return ret;
957 }
958
libxl__load_hvm_firmware_module(libxl__gc * gc,const char * filename,const char * what,struct xc_hvm_firmware_module * m)959 static int libxl__load_hvm_firmware_module(libxl__gc *gc,
960 const char *filename,
961 const char *what,
962 struct xc_hvm_firmware_module *m)
963 {
964 int datalen = 0;
965 void *data = NULL;
966 int r, rc;
967
968 LOG(DEBUG, "Loading %s: %s", what, filename);
969 r = libxl_read_file_contents(CTX, filename, &data, &datalen);
970 if (r) {
971 /*
972 * Print a message only on ENOENT, other errors are logged by the
973 * function libxl_read_file_contents().
974 */
975 if (r == ENOENT)
976 LOGEV(ERROR, r, "failed to read %s file", what);
977 rc = ERROR_FAIL;
978 goto out;
979 }
980 libxl__ptr_add(gc, data);
981 if (datalen) {
982 /* Only accept non-empty files */
983 m->data = data;
984 m->length = datalen;
985 } else {
986 LOG(ERROR, "file %s for %s is empty", filename, what);
987 rc = ERROR_INVAL;
988 goto out;
989 }
990 rc = 0;
991 out:
992 return rc;
993 }
994
libxl__domain_firmware(libxl__gc * gc,libxl_domain_build_info * info,libxl__domain_build_state * state,struct xc_dom_image * dom)995 static int libxl__domain_firmware(libxl__gc *gc,
996 libxl_domain_build_info *info,
997 libxl__domain_build_state *state,
998 struct xc_dom_image *dom)
999 {
1000 libxl_ctx *ctx = libxl__gc_owner(gc);
1001 const char *firmware = NULL;
1002 int e, rc;
1003 int datalen = 0;
1004 void *data;
1005 const char *bios_filename = NULL;
1006
1007 if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
1008 if (info->u.hvm.firmware) {
1009 firmware = info->u.hvm.firmware;
1010 } else {
1011 switch (info->device_model_version)
1012 {
1013 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
1014 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
1015 firmware = "hvmloader";
1016 break;
1017 default:
1018 LOG(ERROR, "invalid device model version %d",
1019 info->device_model_version);
1020 rc = ERROR_FAIL;
1021 goto out;
1022 }
1023 }
1024 }
1025
1026 if (state->pv_kernel.path != NULL &&
1027 info->type == LIBXL_DOMAIN_TYPE_PVH) {
1028
1029 if (state->shim_path) {
1030 rc = xc_dom_kernel_file(dom, state->shim_path);
1031 if (rc) {
1032 LOGE(ERROR, "xc_dom_kernel_file failed");
1033 goto out;
1034 }
1035
1036 /* We've loaded the shim, so load the kernel as a secondary module */
1037 if (state->pv_kernel.mapped) {
1038 LOG(WARN, "xc_dom_module_mem, cmdline %s",
1039 state->pv_cmdline);
1040 rc = xc_dom_module_mem(dom, state->pv_kernel.data,
1041 state->pv_kernel.size, state->pv_cmdline);
1042 if (rc) {
1043 LOGE(ERROR, "xc_dom_kernel_mem failed");
1044 goto out;
1045 }
1046 } else {
1047 LOG(WARN, "xc_dom_module_file, path %s cmdline %s",
1048 state->pv_kernel.path, state->pv_cmdline);
1049 rc = xc_dom_module_file(dom, state->pv_kernel.path, state->pv_cmdline);
1050 if (rc) {
1051 LOGE(ERROR, "xc_dom_kernel_file failed");
1052 goto out;
1053 }
1054 }
1055 } else {
1056 /* No shim, so load the kernel directly */
1057 if (state->pv_kernel.mapped) {
1058 rc = xc_dom_kernel_mem(dom, state->pv_kernel.data,
1059 state->pv_kernel.size);
1060 if (rc) {
1061 LOGE(ERROR, "xc_dom_kernel_mem failed");
1062 goto out;
1063 }
1064 } else {
1065 rc = xc_dom_kernel_file(dom, state->pv_kernel.path);
1066 if (rc) {
1067 LOGE(ERROR, "xc_dom_kernel_file failed");
1068 goto out;
1069 }
1070 }
1071 }
1072
1073 if (state->pv_ramdisk.path && strlen(state->pv_ramdisk.path)) {
1074 if (state->pv_ramdisk.mapped) {
1075 rc = xc_dom_module_mem(dom, state->pv_ramdisk.data,
1076 state->pv_ramdisk.size, NULL);
1077 if (rc) {
1078 LOGE(ERROR, "xc_dom_ramdisk_mem failed");
1079 goto out;
1080 }
1081 } else {
1082 rc = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL);
1083 if (rc) {
1084 LOGE(ERROR, "xc_dom_ramdisk_file failed");
1085 goto out;
1086 }
1087 }
1088 }
1089 } else {
1090 /*
1091 * Only HVM guests should get here, PVH should always have a set
1092 * kernel at this point.
1093 */
1094 assert(info->type == LIBXL_DOMAIN_TYPE_HVM);
1095 rc = xc_dom_kernel_file(dom, libxl__abs_path(gc, firmware,
1096 libxl__xenfirmwaredir_path()));
1097 }
1098
1099 if (rc != 0) {
1100 LOGE(ERROR, "xc_dom_{kernel_file/ramdisk_file} failed");
1101 goto out;
1102 }
1103
1104 if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1105 info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) {
1106 if (info->u.hvm.system_firmware) {
1107 bios_filename = info->u.hvm.system_firmware;
1108 } else {
1109 switch (info->u.hvm.bios) {
1110 case LIBXL_BIOS_TYPE_SEABIOS:
1111 bios_filename = libxl__seabios_path();
1112 break;
1113 case LIBXL_BIOS_TYPE_OVMF:
1114 bios_filename = libxl__ovmf_path();
1115 break;
1116 case LIBXL_BIOS_TYPE_ROMBIOS:
1117 default:
1118 abort();
1119 }
1120 }
1121 }
1122
1123 if (bios_filename) {
1124 rc = libxl__load_hvm_firmware_module(gc, bios_filename, "BIOS",
1125 &dom->system_firmware_module);
1126 if (rc) goto out;
1127 }
1128
1129 if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1130 info->u.hvm.smbios_firmware) {
1131 data = NULL;
1132 e = libxl_read_file_contents(ctx, info->u.hvm.smbios_firmware,
1133 &data, &datalen);
1134 if (e) {
1135 LOGEV(ERROR, e, "failed to read SMBIOS firmware file %s",
1136 info->u.hvm.smbios_firmware);
1137 rc = ERROR_FAIL;
1138 goto out;
1139 }
1140 libxl__ptr_add(gc, data);
1141 if (datalen) {
1142 /* Only accept non-empty files */
1143 dom->smbios_module.data = data;
1144 dom->smbios_module.length = (uint32_t)datalen;
1145 }
1146 }
1147
1148 if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1149 info->u.hvm.acpi_firmware) {
1150 data = NULL;
1151 e = libxl_read_file_contents(ctx, info->u.hvm.acpi_firmware,
1152 &data, &datalen);
1153 if (e) {
1154 LOGEV(ERROR, e, "failed to read ACPI firmware file %s",
1155 info->u.hvm.acpi_firmware);
1156 rc = ERROR_FAIL;
1157 goto out;
1158 }
1159 libxl__ptr_add(gc, data);
1160 if (datalen) {
1161 /* Only accept a non-empty file */
1162 dom->acpi_modules[0].data = data;
1163 dom->acpi_modules[0].length = (uint32_t)datalen;
1164 }
1165 }
1166
1167 return 0;
1168 out:
1169 assert(rc != 0);
1170 return rc;
1171 }
1172
libxl__build_hvm(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)1173 int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
1174 libxl_domain_config *d_config,
1175 libxl__domain_build_state *state)
1176 {
1177 libxl_ctx *ctx = libxl__gc_owner(gc);
1178 int rc;
1179 uint64_t mmio_start, lowmem_end, highmem_end, mem_size;
1180 libxl_domain_build_info *const info = &d_config->b_info;
1181 struct xc_dom_image *dom = NULL;
1182 bool device_model = info->type == LIBXL_DOMAIN_TYPE_HVM ? true : false;
1183
1184 xc_dom_loginit(ctx->xch);
1185
1186 /*
1187 * If PVH and we have a shim override, use the shim cmdline.
1188 * If PVH and no shim override, use the pv cmdline.
1189 * If not PVH, use info->cmdline.
1190 */
1191 dom = xc_dom_allocate(ctx->xch, info->type == LIBXL_DOMAIN_TYPE_PVH ?
1192 (state->shim_path ? state->shim_cmdline : state->pv_cmdline) :
1193 info->cmdline, NULL);
1194 if (!dom) {
1195 LOGE(ERROR, "xc_dom_allocate failed");
1196 rc = ERROR_NOMEM;
1197 goto out;
1198 }
1199
1200 dom->container_type = XC_DOM_HVM_CONTAINER;
1201
1202 /* The params from the configuration file are in Mb, which are then
1203 * multiplied by 1 Kb. This was then divided off when calling
1204 * the old xc_hvm_build_target_mem() which then turned them to bytes.
1205 * Do all this in one step here...
1206 */
1207 mem_size = (uint64_t)(info->max_memkb - info->video_memkb) << 10;
1208 dom->target_pages = (uint64_t)(info->target_memkb - info->video_memkb) >> 2;
1209 dom->claim_enabled = libxl_defbool_val(info->claim_mode);
1210 if (info->u.hvm.mmio_hole_memkb) {
1211 uint64_t max_ram_below_4g = (1ULL << 32) -
1212 (info->u.hvm.mmio_hole_memkb << 10);
1213
1214 if (max_ram_below_4g < HVM_BELOW_4G_MMIO_START)
1215 dom->mmio_size = info->u.hvm.mmio_hole_memkb << 10;
1216 }
1217
1218 rc = libxl__domain_firmware(gc, info, state, dom);
1219 if (rc != 0) {
1220 LOG(ERROR, "initializing domain firmware failed");
1221 goto out;
1222 }
1223
1224 if (dom->target_pages == 0)
1225 dom->target_pages = mem_size >> XC_PAGE_SHIFT;
1226 if (dom->mmio_size == 0 && device_model)
1227 dom->mmio_size = HVM_BELOW_4G_MMIO_LENGTH;
1228 else if (dom->mmio_size == 0 && !device_model) {
1229 #if defined(__i386__) || defined(__x86_64__)
1230 if (libxl_defbool_val(info->apic)) {
1231 /* Make sure LAPIC_BASE_ADDRESS is below special pages */
1232 assert(((((X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES)
1233 << XC_PAGE_SHIFT) - LAPIC_BASE_ADDRESS)) >= XC_PAGE_SIZE);
1234 dom->mmio_size = GB(4) - LAPIC_BASE_ADDRESS;
1235 } else
1236 dom->mmio_size = GB(4) -
1237 ((X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES)
1238 << XC_PAGE_SHIFT);
1239 #else
1240 assert(1);
1241 #endif
1242 }
1243 lowmem_end = mem_size;
1244 highmem_end = 0;
1245 mmio_start = (1ull << 32) - dom->mmio_size;
1246 if (lowmem_end > mmio_start)
1247 {
1248 highmem_end = (1ull << 32) + (lowmem_end - mmio_start);
1249 lowmem_end = mmio_start;
1250 }
1251 dom->lowmem_end = lowmem_end;
1252 dom->highmem_end = highmem_end;
1253 dom->mmio_start = mmio_start;
1254 dom->vga_hole_size = device_model ? LIBXL_VGA_HOLE_SIZE : 0;
1255 dom->device_model = device_model;
1256
1257 rc = libxl__domain_device_construct_rdm(gc, d_config,
1258 info->u.hvm.rdm_mem_boundary_memkb*1024,
1259 dom);
1260 if (rc) {
1261 LOG(ERROR, "checking reserved device memory failed");
1262 goto out;
1263 }
1264
1265 if (info->num_vnuma_nodes != 0) {
1266 int i;
1267
1268 rc = libxl__vnuma_build_vmemrange_hvm(gc, domid, info, state, dom);
1269 if (rc != 0) {
1270 LOG(ERROR, "hvm build vmemranges failed");
1271 goto out;
1272 }
1273 rc = libxl__vnuma_config_check(gc, info, state);
1274 if (rc != 0) goto out;
1275 rc = set_vnuma_info(gc, domid, info, state);
1276 if (rc != 0) goto out;
1277
1278 dom->nr_vmemranges = state->num_vmemranges;
1279 dom->vmemranges = libxl__malloc(gc, sizeof(*dom->vmemranges) *
1280 dom->nr_vmemranges);
1281
1282 for (i = 0; i < dom->nr_vmemranges; i++) {
1283 dom->vmemranges[i].start = state->vmemranges[i].start;
1284 dom->vmemranges[i].end = state->vmemranges[i].end;
1285 dom->vmemranges[i].flags = state->vmemranges[i].flags;
1286 dom->vmemranges[i].nid = state->vmemranges[i].nid;
1287 }
1288
1289 dom->nr_vnodes = info->num_vnuma_nodes;
1290 dom->vnode_to_pnode = libxl__malloc(gc, sizeof(*dom->vnode_to_pnode) *
1291 dom->nr_vnodes);
1292 for (i = 0; i < dom->nr_vnodes; i++)
1293 dom->vnode_to_pnode[i] = info->vnuma_nodes[i].pnode;
1294 }
1295
1296 rc = libxl__build_dom(gc, domid, info, state, dom);
1297 if (rc != 0)
1298 goto out;
1299
1300 rc = libxl__arch_domain_construct_memmap(gc, d_config, domid, dom);
1301 if (rc != 0) {
1302 LOG(ERROR, "setting domain memory map failed");
1303 goto out;
1304 }
1305
1306 rc = hvm_build_set_params(ctx->xch, domid, info, state->store_port,
1307 &state->store_mfn, state->console_port,
1308 &state->console_mfn, state->store_domid,
1309 state->console_domid);
1310 if (rc != 0) {
1311 LOG(ERROR, "hvm build set params failed");
1312 goto out;
1313 }
1314
1315 rc = hvm_build_set_xs_values(gc, domid, dom, info);
1316 if (rc != 0) {
1317 LOG(ERROR, "hvm build set xenstore values failed");
1318 goto out;
1319 }
1320
1321 xc_dom_release(dom);
1322 return 0;
1323
1324 out:
1325 assert(rc != 0);
1326 if (dom != NULL) xc_dom_release(dom);
1327 return rc;
1328 }
1329
libxl__qemu_traditional_cmd(libxl__gc * gc,uint32_t domid,const char * cmd)1330 int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid,
1331 const char *cmd)
1332 {
1333 char *path = NULL;
1334 uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
1335 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/command");
1336 return libxl__xs_printf(gc, XBT_NULL, path, "%s", cmd);
1337 }
1338
1339 /*==================== Miscellaneous ====================*/
1340
libxl__uuid2string(libxl__gc * gc,const libxl_uuid uuid)1341 char *libxl__uuid2string(libxl__gc *gc, const libxl_uuid uuid)
1342 {
1343 return GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(uuid));
1344 }
1345
libxl__userdata_path(libxl__gc * gc,uint32_t domid,const char * userdata_userid,const char * wh)1346 const char *libxl__userdata_path(libxl__gc *gc, uint32_t domid,
1347 const char *userdata_userid,
1348 const char *wh)
1349 {
1350 libxl_ctx *ctx = libxl__gc_owner(gc);
1351 char *uuid_string, *path;
1352 libxl_dominfo info;
1353 int rc;
1354
1355 libxl_dominfo_init(&info);
1356
1357 rc = libxl_domain_info(ctx, &info, domid);
1358 if (rc) {
1359 LOGE(ERROR, "unable to find domain info for domain %"PRIu32, domid);
1360 path = NULL;
1361 goto out;
1362 }
1363 uuid_string = GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(info.uuid));
1364 path = GCSPRINTF(XEN_LIB_DIR "/userdata-%s.%u.%s.%s",
1365 wh, domid, uuid_string, userdata_userid);
1366
1367 out:
1368 libxl_dominfo_dispose(&info);
1369 return path;
1370 }
1371
userdata_delete(libxl__gc * gc,const char * path)1372 static int userdata_delete(libxl__gc *gc, const char *path)
1373 {
1374 int r;
1375 r = unlink(path);
1376 if (r) {
1377 LOGE(ERROR, "remove failed for %s", path);
1378 return errno;
1379 }
1380 return 0;
1381 }
1382
libxl__userdata_destroyall(libxl__gc * gc,uint32_t domid)1383 void libxl__userdata_destroyall(libxl__gc *gc, uint32_t domid)
1384 {
1385 const char *pattern;
1386 glob_t gl;
1387 int r, i;
1388
1389 pattern = libxl__userdata_path(gc, domid, "*", "?");
1390 if (!pattern)
1391 goto out;
1392
1393 gl.gl_pathc = 0;
1394 gl.gl_pathv = 0;
1395 gl.gl_offs = 0;
1396 r = glob(pattern, GLOB_ERR|GLOB_NOSORT|GLOB_MARK, 0, &gl);
1397 if (r == GLOB_NOMATCH)
1398 goto out;
1399 if (r)
1400 LOGE(ERROR, "glob failed for %s", pattern);
1401
1402 /* Note: don't delete domain-userdata-lock, it will be handled by
1403 * unlock function.
1404 */
1405 for (i=0; i<gl.gl_pathc; i++) {
1406 if (!strstr(gl.gl_pathv[i], "domain-userdata-lock"))
1407 userdata_delete(gc, gl.gl_pathv[i]);
1408 }
1409 globfree(&gl);
1410 out:
1411 return;
1412 }
1413
libxl__userdata_store(libxl__gc * gc,uint32_t domid,const char * userdata_userid,const uint8_t * data,int datalen)1414 int libxl__userdata_store(libxl__gc *gc, uint32_t domid,
1415 const char *userdata_userid,
1416 const uint8_t *data, int datalen)
1417 {
1418 const char *filename;
1419 const char *newfilename;
1420 int e, rc;
1421 int fd = -1;
1422
1423 filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1424 if (!filename) {
1425 rc = ERROR_NOMEM;
1426 goto out;
1427 }
1428
1429 if (!datalen) {
1430 rc = userdata_delete(gc, filename);
1431 goto out;
1432 }
1433
1434 newfilename = libxl__userdata_path(gc, domid, userdata_userid, "n");
1435 if (!newfilename) {
1436 rc = ERROR_NOMEM;
1437 goto out;
1438 }
1439
1440 rc = ERROR_FAIL;
1441
1442 fd = open(newfilename, O_RDWR | O_CREAT | O_TRUNC, 0600);
1443 if (fd < 0)
1444 goto err;
1445
1446 if (libxl_write_exactly(CTX, fd, data, datalen, "userdata", newfilename))
1447 goto err;
1448
1449 if (close(fd) < 0) {
1450 fd = -1;
1451 goto err;
1452 }
1453 fd = -1;
1454
1455 if (rename(newfilename, filename))
1456 goto err;
1457
1458 rc = 0;
1459
1460 err:
1461 if (fd >= 0) {
1462 e = errno;
1463 close(fd);
1464 errno = e;
1465 }
1466
1467 if (rc)
1468 LOGE(ERROR, "cannot write/rename %s for %s", newfilename, filename);
1469 out:
1470 return rc;
1471 }
1472
libxl_userdata_store(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid,const uint8_t * data,int datalen)1473 int libxl_userdata_store(libxl_ctx *ctx, uint32_t domid,
1474 const char *userdata_userid,
1475 const uint8_t *data, int datalen)
1476 {
1477 GC_INIT(ctx);
1478 int rc;
1479 libxl__domain_userdata_lock *lock;
1480
1481 CTX_LOCK;
1482 lock = libxl__lock_domain_userdata(gc, domid);
1483 if (!lock) {
1484 rc = ERROR_LOCK_FAIL;
1485 goto out;
1486 }
1487
1488 rc = libxl__userdata_store(gc, domid, userdata_userid,
1489 data, datalen);
1490
1491 libxl__unlock_domain_userdata(lock);
1492
1493 out:
1494 CTX_UNLOCK;
1495 GC_FREE;
1496 return rc;
1497 }
1498
libxl__userdata_retrieve(libxl__gc * gc,uint32_t domid,const char * userdata_userid,uint8_t ** data_r,int * datalen_r)1499 int libxl__userdata_retrieve(libxl__gc *gc, uint32_t domid,
1500 const char *userdata_userid,
1501 uint8_t **data_r, int *datalen_r)
1502 {
1503 const char *filename;
1504 int e, rc;
1505 int datalen = 0;
1506 void *data = 0;
1507
1508 filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1509 if (!filename) {
1510 rc = ERROR_NOMEM;
1511 goto out;
1512 }
1513
1514 e = libxl_read_file_contents(CTX, filename, data_r ? &data : 0, &datalen);
1515 if (e && errno != ENOENT) {
1516 rc = ERROR_FAIL;
1517 goto out;
1518 }
1519 if (!e && !datalen) {
1520 LOG(ERROR, "userdata file %s is empty", filename);
1521 if (data_r) assert(!*data_r);
1522 rc = ERROR_FAIL;
1523 goto out;
1524 }
1525
1526 if (data_r) *data_r = data;
1527 if (datalen_r) *datalen_r = datalen;
1528 rc = 0;
1529
1530 out:
1531 return rc;
1532 }
1533
libxl_userdata_retrieve(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid,uint8_t ** data_r,int * datalen_r)1534 int libxl_userdata_retrieve(libxl_ctx *ctx, uint32_t domid,
1535 const char *userdata_userid,
1536 uint8_t **data_r, int *datalen_r)
1537 {
1538 GC_INIT(ctx);
1539 int rc;
1540 libxl__domain_userdata_lock *lock;
1541
1542 CTX_LOCK;
1543 lock = libxl__lock_domain_userdata(gc, domid);
1544 if (!lock) {
1545 rc = ERROR_LOCK_FAIL;
1546 goto out;
1547 }
1548
1549 rc = libxl__userdata_retrieve(gc, domid, userdata_userid,
1550 data_r, datalen_r);
1551
1552
1553 libxl__unlock_domain_userdata(lock);
1554 out:
1555 CTX_UNLOCK;
1556 GC_FREE;
1557 return rc;
1558 }
1559
libxl_userdata_unlink(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid)1560 int libxl_userdata_unlink(libxl_ctx *ctx, uint32_t domid,
1561 const char *userdata_userid)
1562 {
1563 GC_INIT(ctx);
1564 CTX_LOCK;
1565
1566 int rc;
1567 libxl__domain_userdata_lock *lock = NULL;
1568 const char *filename;
1569
1570 lock = libxl__lock_domain_userdata(gc, domid);
1571 if (!lock) {
1572 rc = ERROR_LOCK_FAIL;
1573 goto out;
1574 }
1575
1576 filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1577 if (!filename) {
1578 rc = ERROR_FAIL;
1579 goto out;
1580 }
1581 if (unlink(filename)) {
1582 LOGE(ERROR, "error deleting userdata file: %s", filename);
1583 rc = ERROR_FAIL;
1584 goto out;
1585 }
1586
1587 rc = 0;
1588 out:
1589 if (lock)
1590 libxl__unlock_domain_userdata(lock);
1591 CTX_UNLOCK;
1592 GC_FREE;
1593 return rc;
1594 }
1595
1596 /*
1597 * Local variables:
1598 * mode: C
1599 * c-basic-offset: 4
1600 * indent-tabs-mode: nil
1601 * End:
1602 */
1603