1 /*
2 * Copyright (C) 2009 Citrix Ltd.
3 * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published
7 * by the Free Software Foundation; version 2.1 only. with the special
8 * exception on linking described in file LICENSE.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 */
15
16 #include "libxl_osdeps.h" /* must come before any other headers */
17
18 #include <glob.h>
19
20 #include "libxl_internal.h"
21 #include "libxl_arch.h"
22
23 #include <xen/hvm/hvm_info_table.h>
24 #include <xen/hvm/hvm_xs_strings.h>
25 #include <xen/hvm/e820.h>
26
27 //#define DEBUG 1
28
libxl__domain_type(libxl__gc * gc,uint32_t domid)29 libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid)
30 {
31 libxl_ctx *ctx = libxl__gc_owner(gc);
32 xc_domaininfo_t info;
33 int ret;
34
35 ret = xc_domain_getinfo_single(ctx->xch, domid, &info);
36 if (ret < 0) {
37 LOGED(ERROR, domid, "unable to get dominfo");
38 return LIBXL_DOMAIN_TYPE_INVALID;
39 }
40 if (info.flags & XEN_DOMINF_hvm_guest) {
41 const char *type_path = GCSPRINTF("%s/type",
42 libxl__xs_libxl_path(gc, domid));
43 const char *type;
44 libxl_domain_type t;
45 int rc;
46
47 rc = libxl__xs_read_mandatory(gc, XBT_NULL, type_path, &type);
48 if (rc) {
49 LOG(WARN,
50 "unable to get domain type for domid=%"PRIu32", assuming HVM",
51 domid);
52 return LIBXL_DOMAIN_TYPE_HVM;
53 }
54
55 rc = libxl_domain_type_from_string(type, &t);
56 if (rc) {
57 LOG(WARN,
58 "unable to get domain type for domid=%"PRIu32", assuming HVM",
59 domid);
60 return LIBXL_DOMAIN_TYPE_HVM;
61 }
62
63 return t;
64 } else
65 return LIBXL_DOMAIN_TYPE_PV;
66 }
67
libxl__domain_cpupool(libxl__gc * gc,uint32_t domid)68 int libxl__domain_cpupool(libxl__gc *gc, uint32_t domid)
69 {
70 xc_domaininfo_t info;
71 int ret;
72
73 ret = xc_domain_getinfo_single(CTX->xch, domid, &info);
74 if (ret < 0)
75 {
76 LOGED(ERROR, domid, "get domaininfo failed");
77 return ERROR_FAIL;
78 }
79 return info.cpupool;
80 }
81
libxl__domain_scheduler(libxl__gc * gc,uint32_t domid)82 libxl_scheduler libxl__domain_scheduler(libxl__gc *gc, uint32_t domid)
83 {
84 int cpupool = libxl__domain_cpupool(gc, domid);
85 libxl_cpupoolinfo poolinfo;
86 libxl_scheduler sched = LIBXL_SCHEDULER_UNKNOWN;
87 int rc;
88
89 if (cpupool < 0)
90 return sched;
91
92 libxl_cpupoolinfo_init(&poolinfo);
93 rc = libxl_cpupool_info(CTX, &poolinfo, cpupool);
94 if (rc < 0)
95 goto out;
96
97 sched = poolinfo.sched;
98
99 out:
100 libxl_cpupoolinfo_dispose(&poolinfo);
101 return sched;
102 }
103
104 /*
105 * Two NUMA placement candidates are compared by means of the following
106 * heuristics:
107
108 * - the number of vcpus runnable on the candidates is considered, and
109 * candidates with fewer of them are preferred. If two candidate have
110 * the same number of runnable vcpus,
111 * - the amount of free memory in the candidates is considered, and the
112 * candidate with greater amount of it is preferred.
113 *
114 * In fact, leaving larger memory holes, maximizes the probability of being
115 * able to put other domains on the node. That hopefully means many domains
116 * will benefit from local memory accesses, but also introduces the risk of
117 * overloading large (from a memory POV) nodes. That's right the effect
118 * that counting the vcpus able to run on the nodes tries to prevent.
119 *
120 * Note that this completely ignore the number of nodes each candidate span,
121 * as the fact that fewer nodes is better is already accounted for in the
122 * algorithm.
123 */
numa_cmpf(const libxl__numa_candidate * c1,const libxl__numa_candidate * c2)124 static int numa_cmpf(const libxl__numa_candidate *c1,
125 const libxl__numa_candidate *c2)
126 {
127 if (c1->nr_vcpus != c2->nr_vcpus)
128 return c1->nr_vcpus - c2->nr_vcpus;
129
130 return c2->free_memkb - c1->free_memkb;
131 }
132
133 /* The actual automatic NUMA placement routine */
numa_place_domain(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config)134 static int numa_place_domain(libxl__gc *gc, uint32_t domid,
135 libxl_domain_config *d_config)
136 {
137 libxl_domain_build_info *info = &d_config->b_info;
138 int found;
139 libxl__numa_candidate candidate;
140 libxl_bitmap cpumap, cpupool_nodemap, *map;
141 libxl_cpupoolinfo cpupool_info;
142 int i, cpupool, rc = 0;
143 uint64_t memkb;
144
145 libxl__numa_candidate_init(&candidate);
146 libxl_bitmap_init(&cpumap);
147 libxl_bitmap_init(&cpupool_nodemap);
148 libxl_cpupoolinfo_init(&cpupool_info);
149
150 /*
151 * Extract the cpumap from the cpupool the domain belong to. In fact,
152 * it only makes sense to consider the cpus/nodes that are in there
153 * for placement.
154 */
155 rc = cpupool = libxl__domain_cpupool(gc, domid);
156 if (rc < 0)
157 goto out;
158 rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool);
159 if (rc)
160 goto out;
161 map = &cpupool_info.cpumap;
162
163 /*
164 * If there's a well defined hard affinity mask (i.e., the same one for all
165 * the vcpus), we can try to run the placement considering only the pcpus
166 * within such mask.
167 */
168 if (info->num_vcpu_hard_affinity)
169 {
170 #ifdef DEBUG
171 int j;
172
173 for (j = 0; j < info->num_vcpu_hard_affinity; j++)
174 assert(libxl_bitmap_equal(&info->vcpu_hard_affinity[0],
175 &info->vcpu_hard_affinity[j], 0));
176 #endif /* DEBUG */
177
178 rc = libxl_bitmap_and(CTX, &cpumap, &info->vcpu_hard_affinity[0],
179 &cpupool_info.cpumap);
180 if (rc)
181 goto out;
182
183 /* Hard affinity must contain at least one cpu of our cpupool */
184 if (libxl_bitmap_is_empty(&cpumap)) {
185 LOG(ERROR, "Hard affinity completely outside of domain's cpupool!");
186 rc = ERROR_INVAL;
187 goto out;
188 }
189 }
190
191 rc = libxl__domain_need_memory_calculate(gc, info, &memkb);
192 if (rc)
193 goto out;
194 if (libxl_node_bitmap_alloc(CTX, &cpupool_nodemap, 0)) {
195 rc = ERROR_FAIL;
196 goto out;
197 }
198
199 /* Find the best candidate with enough free memory and at least
200 * as much pcpus as the domain has vcpus. */
201 rc = libxl__get_numa_candidate(gc, memkb, info->max_vcpus,
202 0, 0, map, numa_cmpf, &candidate, &found);
203 if (rc)
204 goto out;
205
206 /* Not even a suitable placement candidate! Let's just don't touch the
207 * domain's info->cpumap. It will have affinity with all nodes/cpus. */
208 if (found == 0)
209 goto out;
210
211 /* Map the candidate's node map to the domain's info->nodemap */
212 libxl__numa_candidate_get_nodemap(gc, &candidate, &info->nodemap);
213
214 /* Avoid trying to set the affinity to nodes that might be in the
215 * candidate's nodemap but out of our cpupool. */
216 rc = libxl_cpumap_to_nodemap(CTX, &cpupool_info.cpumap,
217 &cpupool_nodemap);
218 if (rc)
219 goto out;
220
221 libxl_for_each_set_bit(i, info->nodemap) {
222 if (!libxl_bitmap_test(&cpupool_nodemap, i))
223 libxl_bitmap_reset(&info->nodemap, i);
224 }
225
226 LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
227 "%"PRIu64" KB free selected", candidate.nr_nodes,
228 candidate.nr_cpus, candidate.free_memkb / 1024);
229
230 out:
231 libxl__numa_candidate_dispose(&candidate);
232 libxl_bitmap_dispose(&cpupool_nodemap);
233 libxl_bitmap_dispose(&cpumap);
234 libxl_cpupoolinfo_dispose(&cpupool_info);
235 return rc;
236 }
237
libxl__build_pre(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)238 int libxl__build_pre(libxl__gc *gc, uint32_t domid,
239 libxl_domain_config *d_config, libxl__domain_build_state *state)
240 {
241 libxl_domain_build_info *const info = &d_config->b_info;
242 libxl_ctx *ctx = libxl__gc_owner(gc);
243 char *xs_domid, *con_domid;
244 int rc;
245 uint64_t size;
246
247 if (xc_domain_max_vcpus(ctx->xch, domid, info->max_vcpus) != 0) {
248 LOG(ERROR, "Couldn't set max vcpu count");
249 return ERROR_FAIL;
250 }
251
252 /*
253 * Check if the domain has any CPU or node affinity already. If not, try
254 * to build up the latter via automatic NUMA placement. In fact, in case
255 * numa_place_domain() manage to find a placement, in info->nodemap is
256 * updated accordingly; if it does not manage, info->nodemap is just left
257 * alone. It is then the the subsequent call to
258 * libxl_domain_set_nodeaffinity() that enacts the actual placement.
259 *
260 * As far as scheduling is concerned, we achieve NUMA-aware scheduling
261 * by having the results of placement affect the soft affinity of all
262 * the vcpus of the domain. Of course, we want that iff placement is
263 * enabled and actually happens, so we only change info->cpumap_soft to
264 * reflect the placement result if that is the case
265 */
266 if (libxl_defbool_val(info->numa_placement)) {
267 if (info->cpumap.size || info->num_vcpu_soft_affinity)
268 LOG(WARN, "Can't run NUMA placement, as a soft "
269 "affinity has been specified explicitly");
270 else if (info->nodemap.size)
271 LOG(WARN, "Can't run NUMA placement, as the domain has "
272 "NUMA node affinity set already");
273 else {
274 libxl_bitmap cpumap_soft;
275
276 rc = libxl_node_bitmap_alloc(ctx, &info->nodemap, 0);
277 if (rc)
278 return rc;
279 libxl_bitmap_set_any(&info->nodemap);
280
281 rc = libxl_cpu_bitmap_alloc(ctx, &cpumap_soft, 0);
282 if (rc)
283 return rc;
284
285 rc = numa_place_domain(gc, domid, d_config);
286 if (rc) {
287 libxl_bitmap_dispose(&cpumap_soft);
288 return rc;
289 }
290
291 /*
292 * All we need to do now is converting the result of automatic
293 * placement from nodemap to cpumap, and then use such cpumap
294 * as the soft affinity for all the vcpus of the domain.
295 *
296 * When calling libxl_set_vcpuaffinity_all(), it is ok to use
297 * NULL as hard affinity, as we know we don't have one, or we
298 * won't be here.
299 */
300 libxl_nodemap_to_cpumap(ctx, &info->nodemap, &cpumap_soft);
301 libxl_set_vcpuaffinity_all(ctx, domid, info->max_vcpus,
302 NULL, &cpumap_soft);
303
304 libxl_bitmap_dispose(&cpumap_soft);
305
306 /*
307 * Placement has run, so avoid for it to be re-run, if this
308 * same config we are using and building here is ever re-used.
309 * This means that people re-using configs will get the same
310 * results, consistently, across every re-use, which is what
311 * we expect most people to want.
312 */
313 libxl_defbool_set(&info->numa_placement, false);
314 }
315 }
316
317 if (info->nodemap.size)
318 libxl_domain_set_nodeaffinity(ctx, domid, &info->nodemap);
319
320 if (info->num_vcpu_hard_affinity || info->num_vcpu_soft_affinity) {
321 libxl_bitmap *hard_affinity, *soft_affinity;
322 int i, n_vcpus;
323
324 n_vcpus = info->num_vcpu_hard_affinity > info->num_vcpu_soft_affinity ?
325 info->num_vcpu_hard_affinity : info->num_vcpu_soft_affinity;
326
327 for (i = 0; i < n_vcpus; i++) {
328 /*
329 * Prepare hard and soft affinity pointers in a way that allows
330 * us to issue only one call to libxl_set_vcpuaffinity(), setting,
331 * for each vcpu, both hard and soft affinity "atomically".
332 */
333 hard_affinity = NULL;
334 if (info->num_vcpu_hard_affinity &&
335 i < info->num_vcpu_hard_affinity)
336 hard_affinity = &info->vcpu_hard_affinity[i];
337
338 soft_affinity = NULL;
339 if (info->num_vcpu_soft_affinity &&
340 i < info->num_vcpu_soft_affinity)
341 soft_affinity = &info->vcpu_soft_affinity[i];
342
343 if (libxl_set_vcpuaffinity(ctx, domid, i,
344 hard_affinity, soft_affinity)) {
345 LOG(ERROR, "setting affinity failed on vcpu `%d'", i);
346 return ERROR_FAIL;
347 }
348 }
349 }
350
351
352 rc = libxl__arch_extra_memory(gc, info, &size);
353 if (rc < 0) {
354 LOGE(ERROR, "Couldn't get arch extra constant memory size");
355 return ERROR_FAIL;
356 }
357
358 if (xc_domain_setmaxmem(ctx->xch, domid, info->target_memkb + size) < 0) {
359 LOGE(ERROR, "Couldn't set max memory");
360 return ERROR_FAIL;
361 }
362
363 xs_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenstored/domid", NULL);
364 state->store_domid = xs_domid ? atoi(xs_domid) : 0;
365 free(xs_domid);
366
367 con_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenconsoled/domid", NULL);
368 state->console_domid = con_domid ? atoi(con_domid) : 0;
369 free(con_domid);
370
371 state->store_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->store_domid);
372 state->console_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->console_domid);
373
374 rc = libxl__arch_domain_create(gc, d_config, state, domid);
375 if (rc) goto out;
376
377 /* Construct a CPUID policy, but only for brand new domains. Domains
378 * being migrated-in/restored have CPUID handled during the
379 * static_data_done() callback. */
380 if (!state->restore && !state->soft_reset)
381 rc = libxl__cpuid_legacy(ctx, domid, false, info);
382
383 out:
384 return rc;
385 }
386
set_vnuma_affinity(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info)387 static int set_vnuma_affinity(libxl__gc *gc, uint32_t domid,
388 libxl_domain_build_info *info)
389 {
390 libxl_bitmap cpumap;
391 libxl_vnode_info *v;
392 unsigned int i, j;
393 int rc = 0;
394
395 libxl_bitmap_init(&cpumap);
396
397 rc = libxl_cpu_bitmap_alloc(CTX, &cpumap, 0);
398 if (rc) {
399 LOG(ERROR, "Can't allocate nodemap");
400 goto out;
401 }
402
403 /*
404 * For each vcpu in each vnode, set its soft affinity to
405 * the pcpus belonging to the pnode the vnode is on
406 */
407 for (i = 0; i < info->num_vnuma_nodes; i++) {
408 v = &info->vnuma_nodes[i];
409
410 rc = libxl_node_to_cpumap(CTX, v->pnode, &cpumap);
411 if (rc) {
412 LOG(ERROR, "Can't get cpumap for vnode %d", i);
413 goto out;
414 }
415
416 libxl_for_each_set_bit(j, v->vcpus) {
417 rc = libxl_set_vcpuaffinity(CTX, domid, j, NULL, &cpumap);
418 if (rc) {
419 LOG(ERROR, "Can't set cpu affinity for %d", j);
420 goto out;
421 }
422 }
423 }
424
425 out:
426 libxl_bitmap_dispose(&cpumap);
427 return rc;
428 }
429
libxl__build_post(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info,libxl__domain_build_state * state,char ** vms_ents,char ** local_ents)430 int libxl__build_post(libxl__gc *gc, uint32_t domid,
431 libxl_domain_build_info *info,
432 libxl__domain_build_state *state,
433 char **vms_ents, char **local_ents)
434 {
435 libxl_ctx *ctx = libxl__gc_owner(gc);
436 char *dom_path, *vm_path;
437 xs_transaction_t t;
438 char **ents;
439 int i, rc;
440
441 if (info->num_vnuma_nodes && !info->num_vcpu_soft_affinity) {
442 rc = set_vnuma_affinity(gc, domid, info);
443 if (rc)
444 return rc;
445 }
446
447 rc = libxl_domain_sched_params_set(CTX, domid, &info->sched_params);
448 if (rc)
449 return rc;
450
451 if (info->type == LIBXL_DOMAIN_TYPE_HVM
452 && !libxl_ms_vm_genid_is_zero(&info->u.hvm.ms_vm_genid)) {
453 rc = libxl__ms_vm_genid_set(gc, domid,
454 &info->u.hvm.ms_vm_genid);
455 if (rc) {
456 LOG(ERROR, "Failed to set VM Generation ID");
457 return rc;
458 }
459 }
460
461 ents = libxl__calloc(gc, 12 + (info->max_vcpus * 2) + 2, sizeof(char *));
462 ents[0] = "memory/static-max";
463 ents[1] = GCSPRINTF("%"PRId64, info->max_memkb);
464 ents[2] = "memory/target";
465 ents[3] = GCSPRINTF("%"PRId64, info->target_memkb -
466 libxl__get_targetmem_fudge(gc, info));
467 ents[4] = "memory/videoram";
468 ents[5] = GCSPRINTF("%"PRId64, info->video_memkb);
469 ents[6] = "domid";
470 ents[7] = GCSPRINTF("%d", domid);
471 ents[8] = "store/port";
472 ents[9] = GCSPRINTF("%"PRIu32, state->store_port);
473 ents[10] = "store/ring-ref";
474 ents[11] = GCSPRINTF("%lu", state->store_mfn);
475 for (i = 0; i < info->max_vcpus; i++) {
476 ents[12+(i*2)] = GCSPRINTF("cpu/%d/availability", i);
477 ents[12+(i*2)+1] = libxl_bitmap_test(&info->avail_vcpus, i)
478 ? "online" : "offline";
479 }
480
481 dom_path = libxl__xs_get_dompath(gc, domid);
482 if (!dom_path) {
483 return ERROR_FAIL;
484 }
485
486 vm_path = xs_read(ctx->xsh, XBT_NULL, GCSPRINTF("%s/vm", dom_path), NULL);
487 retry_transaction:
488 t = xs_transaction_start(ctx->xsh);
489
490 libxl__xs_writev(gc, t, dom_path, ents);
491 libxl__xs_writev(gc, t, dom_path, local_ents);
492 libxl__xs_writev(gc, t, vm_path, vms_ents);
493
494 if (!xs_transaction_end(ctx->xsh, t, 0))
495 if (errno == EAGAIN)
496 goto retry_transaction;
497
498 if (info->xenstore_feature_mask != ~0U) {
499 unsigned int features;
500
501 if (xs_get_features_supported(ctx->xsh, &features) &&
502 !xs_set_features_domain(ctx->xsh, domid,
503 features & info->xenstore_feature_mask)) {
504 LOGED(ERROR, domid, "Failed to set Xenstore features");
505 rc = ERROR_FAIL;
506 goto out;
507 }
508 }
509
510 xs_introduce_domain(ctx->xsh, domid, state->store_mfn, state->store_port);
511
512 out:
513 free(vm_path);
514 return rc;
515 }
516
set_vnuma_info(libxl__gc * gc,uint32_t domid,const libxl_domain_build_info * info,const libxl__domain_build_state * state)517 static int set_vnuma_info(libxl__gc *gc, uint32_t domid,
518 const libxl_domain_build_info *info,
519 const libxl__domain_build_state *state)
520 {
521 int rc = 0;
522 unsigned int i, nr_vdistance;
523 unsigned int *vcpu_to_vnode, *vnode_to_pnode, *vdistance = NULL;
524
525 vcpu_to_vnode = libxl__calloc(gc, info->max_vcpus,
526 sizeof(unsigned int));
527 vnode_to_pnode = libxl__calloc(gc, info->num_vnuma_nodes,
528 sizeof(unsigned int));
529
530 nr_vdistance = info->num_vnuma_nodes * info->num_vnuma_nodes;
531 vdistance = libxl__calloc(gc, nr_vdistance, sizeof(unsigned int));
532
533 for (i = 0; i < info->num_vnuma_nodes; i++) {
534 libxl_vnode_info *v = &info->vnuma_nodes[i];
535 int j;
536
537 /* vnode to pnode mapping */
538 vnode_to_pnode[i] = v->pnode;
539
540 /* vcpu to vnode mapping */
541 libxl_for_each_set_bit(j, v->vcpus)
542 vcpu_to_vnode[j] = i;
543
544 /* node distances */
545 assert(info->num_vnuma_nodes == v->num_distances);
546 memcpy(vdistance + (i * info->num_vnuma_nodes),
547 v->distances,
548 v->num_distances * sizeof(unsigned int));
549 }
550
551 if (xc_domain_setvnuma(CTX->xch, domid, info->num_vnuma_nodes,
552 state->num_vmemranges, info->max_vcpus,
553 state->vmemranges, vdistance,
554 vcpu_to_vnode, vnode_to_pnode) < 0) {
555 LOGE(ERROR, "xc_domain_setvnuma failed");
556 rc = ERROR_FAIL;
557 }
558
559 return rc;
560 }
561
libxl__build_dom(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state,struct xc_dom_image * dom)562 static int libxl__build_dom(libxl__gc *gc, uint32_t domid,
563 libxl_domain_config *d_config, libxl__domain_build_state *state,
564 struct xc_dom_image *dom)
565 {
566 libxl_domain_build_info *const info = &d_config->b_info;
567 uint64_t mem_kb;
568 int ret;
569
570 if ( (ret = xc_dom_boot_xen_init(dom, CTX->xch, domid)) != 0 ) {
571 LOGE(ERROR, "xc_dom_boot_xen_init failed");
572 goto out;
573 }
574 #ifdef GUEST_RAM_BASE
575 if ( (ret = xc_dom_rambase_init(dom, GUEST_RAM_BASE)) != 0 ) {
576 LOGE(ERROR, "xc_dom_rambase failed");
577 goto out;
578 }
579 #endif
580 if ( (ret = xc_dom_parse_image(dom)) != 0 ) {
581 LOG(ERROR, "xc_dom_parse_image failed");
582 goto out;
583 }
584 if ( (ret = libxl__arch_domain_init_hw_description(gc, d_config, state, dom)) != 0 ) {
585 LOGE(ERROR, "libxl__arch_domain_init_hw_description failed");
586 goto out;
587 }
588
589 mem_kb = dom->container_type == XC_DOM_HVM_CONTAINER ?
590 (info->max_memkb - info->video_memkb) : info->target_memkb;
591 if ( (ret = xc_dom_mem_init(dom, mem_kb / 1024)) != 0 ) {
592 LOGE(ERROR, "xc_dom_mem_init failed");
593 goto out;
594 }
595 if ( (ret = xc_dom_boot_mem_init(dom)) != 0 ) {
596 LOGE(ERROR, "xc_dom_boot_mem_init failed");
597 goto out;
598 }
599 if ( (ret = libxl__arch_domain_finalise_hw_description(gc, domid, d_config, dom)) != 0 ) {
600 LOGE(ERROR, "libxl__arch_domain_finalise_hw_description failed");
601 goto out;
602 }
603 if ( (ret = xc_dom_build_image(dom)) != 0 ) {
604 LOGE(ERROR, "xc_dom_build_image failed");
605 goto out;
606 }
607 if ( (ret = xc_dom_boot_image(dom)) != 0 ) {
608 LOGE(ERROR, "xc_dom_boot_image failed");
609 goto out;
610 }
611 if ( (ret = xc_dom_gnttab_init(dom)) != 0 ) {
612 LOGE(ERROR, "xc_dom_gnttab_init failed");
613 goto out;
614 }
615 if ((ret = libxl__arch_build_dom_finish(gc, info, dom, state)) != 0) {
616 LOGE(ERROR, "libxl__arch_build_dom_finish failed");
617 goto out;
618 }
619
620 out:
621 return ret != 0 ? ERROR_FAIL : 0;
622 }
623
libxl__build_pv(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)624 int libxl__build_pv(libxl__gc *gc, uint32_t domid,
625 libxl_domain_config *d_config, libxl__domain_build_state *state)
626 {
627 libxl_ctx *ctx = libxl__gc_owner(gc);
628 libxl_domain_build_info *const info = &d_config->b_info;
629 struct xc_dom_image *dom;
630 int ret;
631 int flags = 0;
632
633 xc_dom_loginit(ctx->xch);
634
635 dom = xc_dom_allocate(ctx->xch, state->pv_cmdline, info->u.pv.features);
636 if (!dom) {
637 LOGE(ERROR, "xc_dom_allocate failed");
638 return ERROR_FAIL;
639 }
640
641 dom->container_type = XC_DOM_PV_CONTAINER;
642
643 LOG(DEBUG, "pv kernel mapped %d path %s", state->pv_kernel.mapped, state->pv_kernel.path);
644
645 if (state->pv_kernel.mapped) {
646 ret = xc_dom_kernel_mem(dom,
647 state->pv_kernel.data,
648 state->pv_kernel.size);
649 if ( ret != 0) {
650 LOGE(ERROR, "xc_dom_kernel_mem failed");
651 goto out;
652 }
653 } else {
654 ret = xc_dom_kernel_file(dom, state->pv_kernel.path);
655 if ( ret != 0) {
656 LOGE(ERROR, "xc_dom_kernel_file failed");
657 goto out;
658 }
659 }
660
661 if ( state->pv_ramdisk.path && strlen(state->pv_ramdisk.path) ) {
662 if (state->pv_ramdisk.mapped) {
663 if ( (ret = xc_dom_module_mem(dom, state->pv_ramdisk.data, state->pv_ramdisk.size, NULL)) != 0 ) {
664 LOGE(ERROR, "xc_dom_ramdisk_mem failed");
665 goto out;
666 }
667 } else {
668 if ( (ret = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL)) != 0 ) {
669 LOGE(ERROR, "xc_dom_ramdisk_file failed");
670 goto out;
671 }
672 }
673 }
674
675 dom->flags = flags;
676 dom->console_evtchn = state->console_port;
677 dom->console_domid = state->console_domid;
678 dom->xenstore_evtchn = state->store_port;
679 dom->xenstore_domid = state->store_domid;
680 dom->claim_enabled = libxl_defbool_val(info->claim_mode);
681 dom->max_vcpus = info->max_vcpus;
682
683 if (info->num_vnuma_nodes != 0) {
684 unsigned int i;
685
686 ret = libxl__vnuma_build_vmemrange_pv(gc, domid, info, state);
687 if (ret) {
688 LOGE(ERROR, "cannot build vmemranges");
689 goto out;
690 }
691 ret = libxl__vnuma_config_check(gc, info, state);
692 if (ret) goto out;
693
694 ret = set_vnuma_info(gc, domid, info, state);
695 if (ret) goto out;
696
697 dom->nr_vmemranges = state->num_vmemranges;
698 dom->vmemranges = xc_dom_malloc(dom, sizeof(*dom->vmemranges) *
699 dom->nr_vmemranges);
700
701 for (i = 0; i < dom->nr_vmemranges; i++) {
702 dom->vmemranges[i].start = state->vmemranges[i].start;
703 dom->vmemranges[i].end = state->vmemranges[i].end;
704 dom->vmemranges[i].flags = state->vmemranges[i].flags;
705 dom->vmemranges[i].nid = state->vmemranges[i].nid;
706 }
707
708 dom->nr_vnodes = info->num_vnuma_nodes;
709 dom->vnode_to_pnode = xc_dom_malloc(dom, sizeof(*dom->vnode_to_pnode) *
710 dom->nr_vnodes);
711 for (i = 0; i < info->num_vnuma_nodes; i++)
712 dom->vnode_to_pnode[i] = info->vnuma_nodes[i].pnode;
713 }
714
715 ret = libxl__build_dom(gc, domid, d_config, state, dom);
716 if (ret != 0)
717 goto out;
718
719 if (xc_dom_translated(dom)) {
720 state->console_mfn = dom->console_pfn;
721 state->store_mfn = dom->xenstore_pfn;
722 state->vuart_gfn = dom->vuart_gfn;
723 } else {
724 state->console_mfn = xc_dom_p2m(dom, dom->console_pfn);
725 state->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
726 }
727
728 ret = 0;
729 out:
730 xc_dom_release(dom);
731 return ret == 0 ? 0 : ERROR_FAIL;
732 }
733
hvm_build_set_params(xc_interface * handle,uint32_t domid,libxl_domain_build_info * info)734 static int hvm_build_set_params(xc_interface *handle, uint32_t domid,
735 libxl_domain_build_info *info)
736 {
737 struct hvm_info_table *va_hvm;
738 uint8_t *va_map, sum;
739 int i;
740
741 if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
742 va_map = xc_map_foreign_range(handle, domid,
743 XC_PAGE_SIZE, PROT_READ | PROT_WRITE,
744 HVM_INFO_PFN);
745 if (va_map == NULL)
746 return ERROR_FAIL;
747
748 va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
749 va_hvm->apic_mode = libxl_defbool_val(info->apic);
750 va_hvm->nr_vcpus = info->max_vcpus;
751 memset(va_hvm->vcpu_online, 0, sizeof(va_hvm->vcpu_online));
752 memcpy(va_hvm->vcpu_online, info->avail_vcpus.map, info->avail_vcpus.size);
753 for (i = 0, sum = 0; i < va_hvm->length; i++)
754 sum += ((uint8_t *) va_hvm)[i];
755 va_hvm->checksum -= sum;
756 munmap(va_map, XC_PAGE_SIZE);
757 }
758
759 return 0;
760 }
761
hvm_build_set_xs_values(libxl__gc * gc,uint32_t domid,struct xc_dom_image * dom,const libxl_domain_build_info * info)762 static int hvm_build_set_xs_values(libxl__gc *gc,
763 uint32_t domid,
764 struct xc_dom_image *dom,
765 const libxl_domain_build_info *info)
766 {
767 char *path = NULL;
768 int num_oem = 1;
769 int ret = 0;
770
771 if (dom->smbios_module.guest_addr_out) {
772 path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_ADDRESS, domid);
773
774 ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%"PRIx64,
775 dom->smbios_module.guest_addr_out);
776 if (ret)
777 goto err;
778
779 path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_LENGTH, domid);
780
781 ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%x",
782 dom->smbios_module.length);
783 if (ret)
784 goto err;
785 }
786
787 for (int i = 0; i < info->u.hvm.num_smbios; i++) {
788 char *p;
789 if (info->u.hvm.smbios[i].key == LIBXL_SMBIOS_TYPE_OEM) {
790 if (num_oem > 99) {
791 LOGD(ERROR, domid, "More than 99 SMBIOS OEM strings specified");
792 ret = ERROR_INVAL;
793 goto err;
794 }
795 path = GCSPRINTF("/local/domain/%d/"HVM_XS_OEM_STRINGS, domid,
796 num_oem);
797 num_oem++;
798 } else {
799 path = GCSPRINTF("/local/domain/%d/"HVM_XS_BIOS_STRINGS"/%s", domid,
800 libxl_smbios_type_to_string(info->u.hvm.smbios[i].key));
801 }
802
803 /* Convert libxl_smbios_type string to xenstore path that hvmloader
804 * will use, as defined by HVM_XS_*. That is convert the '_' to '-'. */
805 p = strrchr(path, '/');
806 for ( ; *p; p++) {
807 if (*p == '_')
808 *p = '-';
809 }
810
811 LOGD(DEBUG, domid, "Writing %s = \"%s\"", path,
812 info->u.hvm.smbios[i].value);
813 ret = libxl__xs_printf(gc, XBT_NULL, path, "%s",
814 info->u.hvm.smbios[i].value);
815 if (ret)
816 goto err;
817 }
818
819 /* Only one module can be passed. PVHv2 guests do not support this. */
820 if (dom->acpi_modules[0].guest_addr_out &&
821 info->type == LIBXL_DOMAIN_TYPE_HVM) {
822 path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_ADDRESS, domid);
823
824 ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%"PRIx64,
825 dom->acpi_modules[0].guest_addr_out);
826 if (ret)
827 goto err;
828
829 path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_LENGTH, domid);
830
831 ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%x",
832 dom->acpi_modules[0].length);
833 if (ret)
834 goto err;
835 }
836
837 if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
838 path = GCSPRINTF("/local/domain/%d/" HVM_XS_XEN_PLATFORM_PCI_BAR_UC,
839 domid);
840 ret = libxl__xs_printf(gc, XBT_NULL, path, "%d",
841 libxl_defbool_val(info->u.hvm.xen_platform_pci_bar_uc));
842 if (ret)
843 goto err;
844 }
845
846 return 0;
847
848 err:
849 LOG(ERROR, "failed to write firmware xenstore value, err: %d", ret);
850 return ret;
851 }
852
libxl__load_hvm_firmware_module(libxl__gc * gc,const char * filename,const char * what,struct xc_hvm_firmware_module * m)853 static int libxl__load_hvm_firmware_module(libxl__gc *gc,
854 const char *filename,
855 const char *what,
856 struct xc_hvm_firmware_module *m)
857 {
858 int datalen = 0;
859 void *data = NULL;
860 int r, rc;
861
862 LOG(DEBUG, "Loading %s: %s", what, filename);
863 r = libxl_read_file_contents(CTX, filename, &data, &datalen);
864 if (r) {
865 /*
866 * Print a message only on ENOENT, other errors are logged by the
867 * function libxl_read_file_contents().
868 */
869 if (r == ENOENT)
870 LOGEV(ERROR, r, "failed to read %s file", what);
871 rc = ERROR_FAIL;
872 goto out;
873 }
874 libxl__ptr_add(gc, data);
875 if (datalen) {
876 /* Only accept non-empty files */
877 m->data = data;
878 m->length = datalen;
879 } else {
880 LOG(ERROR, "file %s for %s is empty", filename, what);
881 rc = ERROR_INVAL;
882 goto out;
883 }
884 rc = 0;
885 out:
886 return rc;
887 }
888
libxl__domain_firmware(libxl__gc * gc,libxl_domain_build_info * info,libxl__domain_build_state * state,struct xc_dom_image * dom)889 static int libxl__domain_firmware(libxl__gc *gc,
890 libxl_domain_build_info *info,
891 libxl__domain_build_state *state,
892 struct xc_dom_image *dom)
893 {
894 libxl_ctx *ctx = libxl__gc_owner(gc);
895 const char *firmware = NULL;
896 int e, rc;
897 int datalen = 0;
898 void *data;
899 const char *bios_filename = NULL;
900
901 if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
902 if (info->u.hvm.firmware) {
903 firmware = info->u.hvm.firmware;
904 } else {
905 switch (info->device_model_version)
906 {
907 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
908 firmware = "hvmloader";
909 break;
910 default:
911 LOG(ERROR, "invalid device model version %d",
912 info->device_model_version);
913 rc = ERROR_FAIL;
914 goto out;
915 }
916 }
917 }
918
919 if (state->pv_kernel.path != NULL &&
920 info->type == LIBXL_DOMAIN_TYPE_PVH) {
921
922 if (state->shim_path) {
923 rc = xc_dom_kernel_file(dom, state->shim_path);
924 if (rc) {
925 LOGE(ERROR, "xc_dom_kernel_file failed");
926 goto out;
927 }
928
929 /* We've loaded the shim, so load the kernel as a secondary module */
930 if (state->pv_kernel.mapped) {
931 LOG(DEBUG, "xc_dom_module_mem, cmdline %s",
932 state->pv_cmdline);
933 rc = xc_dom_module_mem(dom, state->pv_kernel.data,
934 state->pv_kernel.size, state->pv_cmdline);
935 if (rc) {
936 LOGE(ERROR, "xc_dom_kernel_mem failed");
937 goto out;
938 }
939 } else {
940 LOG(DEBUG, "xc_dom_module_file, path %s cmdline %s",
941 state->pv_kernel.path, state->pv_cmdline);
942 rc = xc_dom_module_file(dom, state->pv_kernel.path, state->pv_cmdline);
943 if (rc) {
944 LOGE(ERROR, "xc_dom_kernel_file failed");
945 goto out;
946 }
947 }
948 } else {
949 /* No shim, so load the kernel directly */
950 if (state->pv_kernel.mapped) {
951 rc = xc_dom_kernel_mem(dom, state->pv_kernel.data,
952 state->pv_kernel.size);
953 if (rc) {
954 LOGE(ERROR, "xc_dom_kernel_mem failed");
955 goto out;
956 }
957 } else {
958 rc = xc_dom_kernel_file(dom, state->pv_kernel.path);
959 if (rc) {
960 LOGE(ERROR, "xc_dom_kernel_file failed");
961 goto out;
962 }
963 }
964 }
965
966 if (state->pv_ramdisk.path && strlen(state->pv_ramdisk.path)) {
967 if (state->pv_ramdisk.mapped) {
968 rc = xc_dom_module_mem(dom, state->pv_ramdisk.data,
969 state->pv_ramdisk.size, NULL);
970 if (rc) {
971 LOGE(ERROR, "xc_dom_ramdisk_mem failed");
972 goto out;
973 }
974 } else {
975 rc = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL);
976 if (rc) {
977 LOGE(ERROR, "xc_dom_ramdisk_file failed");
978 goto out;
979 }
980 }
981 }
982 } else {
983 /*
984 * Only HVM guests should get here, PVH should always have a set
985 * kernel at this point.
986 */
987 assert(info->type == LIBXL_DOMAIN_TYPE_HVM);
988 rc = xc_dom_kernel_file(dom, libxl__abs_path(gc, firmware,
989 libxl__xenfirmwaredir_path()));
990 }
991
992 if (rc != 0) {
993 LOGE(ERROR, "xc_dom_{kernel_file/ramdisk_file} failed");
994 goto out;
995 }
996
997 if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
998 info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) {
999 if (info->u.hvm.system_firmware) {
1000 bios_filename = info->u.hvm.system_firmware;
1001 } else {
1002 switch (info->u.hvm.bios) {
1003 case LIBXL_BIOS_TYPE_SEABIOS:
1004 bios_filename = libxl__seabios_path();
1005 break;
1006 case LIBXL_BIOS_TYPE_OVMF:
1007 bios_filename = libxl__ovmf_path();
1008 break;
1009 case LIBXL_BIOS_TYPE_ROMBIOS:
1010 default:
1011 abort();
1012 }
1013 }
1014 }
1015
1016 if (bios_filename) {
1017 rc = libxl__load_hvm_firmware_module(gc, bios_filename, "BIOS",
1018 &dom->system_firmware_module);
1019 if (rc) goto out;
1020 }
1021
1022 if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1023 info->u.hvm.bios == LIBXL_BIOS_TYPE_ROMBIOS &&
1024 libxl__ipxe_path()) {
1025 const char *fp = libxl__ipxe_path();
1026 rc = xc_dom_module_file(dom, fp, "ipxe");
1027
1028 if (rc) {
1029 LOGE(ERROR, "failed to load IPXE %s (%d)", fp, rc);
1030 rc = ERROR_FAIL;
1031 goto out;
1032 }
1033 }
1034
1035 if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1036 info->u.hvm.smbios_firmware) {
1037 data = NULL;
1038 e = libxl_read_file_contents(ctx, info->u.hvm.smbios_firmware,
1039 &data, &datalen);
1040 if (e) {
1041 LOGEV(ERROR, e, "failed to read SMBIOS firmware file %s",
1042 info->u.hvm.smbios_firmware);
1043 rc = ERROR_FAIL;
1044 goto out;
1045 }
1046 libxl__ptr_add(gc, data);
1047 if (datalen) {
1048 /* Only accept non-empty files */
1049 dom->smbios_module.data = data;
1050 dom->smbios_module.length = (uint32_t)datalen;
1051 }
1052 }
1053
1054 if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1055 info->u.hvm.acpi_firmware) {
1056 data = NULL;
1057 e = libxl_read_file_contents(ctx, info->u.hvm.acpi_firmware,
1058 &data, &datalen);
1059 if (e) {
1060 LOGEV(ERROR, e, "failed to read ACPI firmware file %s",
1061 info->u.hvm.acpi_firmware);
1062 rc = ERROR_FAIL;
1063 goto out;
1064 }
1065 libxl__ptr_add(gc, data);
1066 if (datalen) {
1067 /* Only accept a non-empty file */
1068 dom->acpi_modules[0].data = data;
1069 dom->acpi_modules[0].length = (uint32_t)datalen;
1070 }
1071 }
1072
1073 return 0;
1074 out:
1075 assert(rc != 0);
1076 return rc;
1077 }
1078
libxl__build_hvm(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)1079 int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
1080 libxl_domain_config *d_config,
1081 libxl__domain_build_state *state)
1082 {
1083 libxl_ctx *ctx = libxl__gc_owner(gc);
1084 int rc;
1085 uint64_t mmio_start, lowmem_end, highmem_end, mem_size;
1086 libxl_domain_build_info *const info = &d_config->b_info;
1087 struct xc_dom_image *dom = NULL;
1088 bool device_model = info->type == LIBXL_DOMAIN_TYPE_HVM ? true : false;
1089
1090 xc_dom_loginit(ctx->xch);
1091
1092 /*
1093 * If PVH and we have a shim override, use the shim cmdline.
1094 * If PVH and no shim override, use the pv cmdline.
1095 * If not PVH, use info->cmdline.
1096 */
1097 dom = xc_dom_allocate(ctx->xch, info->type == LIBXL_DOMAIN_TYPE_PVH ?
1098 (state->shim_path ? state->shim_cmdline : state->pv_cmdline) :
1099 info->cmdline, NULL);
1100 if (!dom) {
1101 LOGE(ERROR, "xc_dom_allocate failed");
1102 rc = ERROR_NOMEM;
1103 goto out;
1104 }
1105
1106 dom->container_type = XC_DOM_HVM_CONTAINER;
1107
1108 /* The params from the configuration file are in Mb, which are then
1109 * multiplied by 1 Kb. This was then divided off when calling
1110 * the old xc_hvm_build_target_mem() which then turned them to bytes.
1111 * Do all this in one step here...
1112 */
1113 mem_size = (uint64_t)(info->max_memkb - info->video_memkb) << 10;
1114 dom->target_pages = (uint64_t)(info->target_memkb - info->video_memkb) >> 2;
1115 dom->claim_enabled = libxl_defbool_val(info->claim_mode);
1116 if (info->u.hvm.mmio_hole_memkb) {
1117 uint64_t max_ram_below_4g = (1ULL << 32) -
1118 (info->u.hvm.mmio_hole_memkb << 10);
1119
1120 if (max_ram_below_4g < HVM_BELOW_4G_MMIO_START)
1121 dom->mmio_size = info->u.hvm.mmio_hole_memkb << 10;
1122 }
1123
1124 rc = libxl__domain_firmware(gc, info, state, dom);
1125 if (rc != 0) {
1126 LOG(ERROR, "initializing domain firmware failed");
1127 goto out;
1128 }
1129
1130 if (dom->target_pages == 0)
1131 dom->target_pages = mem_size >> XC_PAGE_SHIFT;
1132 if (dom->mmio_size == 0 && device_model)
1133 dom->mmio_size = HVM_BELOW_4G_MMIO_LENGTH;
1134 else if (dom->mmio_size == 0 && !device_model) {
1135 #if defined(__i386__) || defined(__x86_64__)
1136 /*
1137 * Make sure the local APIC page, the ACPI tables and the special pages
1138 * are inside the MMIO hole.
1139 */
1140 xen_paddr_t start =
1141 (X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES) <<
1142 XC_PAGE_SHIFT;
1143
1144 start = min_t(xen_paddr_t, start, LAPIC_BASE_ADDRESS);
1145 start = min_t(xen_paddr_t, start, ACPI_INFO_PHYSICAL_ADDRESS);
1146 dom->mmio_size = GB(4) - start;
1147 #else
1148 assert(1);
1149 #endif
1150 }
1151 lowmem_end = mem_size;
1152 highmem_end = 0;
1153 mmio_start = (1ull << 32) - dom->mmio_size;
1154 if (lowmem_end > mmio_start)
1155 {
1156 highmem_end = (1ull << 32) + (lowmem_end - mmio_start);
1157 lowmem_end = mmio_start;
1158 }
1159 dom->lowmem_end = lowmem_end;
1160 dom->highmem_end = highmem_end;
1161 dom->mmio_start = mmio_start;
1162 dom->vga_hole_size = device_model ? LIBXL_VGA_HOLE_SIZE : 0;
1163 dom->device_model = device_model;
1164 dom->max_vcpus = info->max_vcpus;
1165 dom->console_evtchn = state->console_port;
1166 dom->console_domid = state->console_domid;
1167 dom->xenstore_evtchn = state->store_port;
1168 dom->xenstore_domid = state->store_domid;
1169
1170 rc = libxl__domain_device_construct_rdm(gc, d_config,
1171 info->u.hvm.rdm_mem_boundary_memkb*1024,
1172 dom);
1173 if (rc) {
1174 LOG(ERROR, "checking reserved device memory failed");
1175 goto out;
1176 }
1177
1178 if (info->num_vnuma_nodes != 0) {
1179 int i;
1180
1181 rc = libxl__vnuma_build_vmemrange_hvm(gc, domid, info, state, dom);
1182 if (rc != 0) {
1183 LOG(ERROR, "hvm build vmemranges failed");
1184 goto out;
1185 }
1186 rc = libxl__vnuma_config_check(gc, info, state);
1187 if (rc != 0) goto out;
1188 rc = set_vnuma_info(gc, domid, info, state);
1189 if (rc != 0) goto out;
1190
1191 dom->nr_vmemranges = state->num_vmemranges;
1192 dom->vmemranges = libxl__malloc(gc, sizeof(*dom->vmemranges) *
1193 dom->nr_vmemranges);
1194
1195 for (i = 0; i < dom->nr_vmemranges; i++) {
1196 dom->vmemranges[i].start = state->vmemranges[i].start;
1197 dom->vmemranges[i].end = state->vmemranges[i].end;
1198 dom->vmemranges[i].flags = state->vmemranges[i].flags;
1199 dom->vmemranges[i].nid = state->vmemranges[i].nid;
1200 }
1201
1202 dom->nr_vnodes = info->num_vnuma_nodes;
1203 dom->vnode_to_pnode = libxl__malloc(gc, sizeof(*dom->vnode_to_pnode) *
1204 dom->nr_vnodes);
1205 for (i = 0; i < dom->nr_vnodes; i++)
1206 dom->vnode_to_pnode[i] = info->vnuma_nodes[i].pnode;
1207 }
1208
1209 rc = libxl__build_dom(gc, domid, d_config, state, dom);
1210 if (rc != 0)
1211 goto out;
1212
1213 rc = hvm_build_set_params(ctx->xch, domid, info);
1214 if (rc != 0) {
1215 LOG(ERROR, "hvm build set params failed");
1216 goto out;
1217 }
1218
1219 state->console_mfn = dom->console_pfn;
1220 state->store_mfn = dom->xenstore_pfn;
1221 state->vuart_gfn = dom->vuart_gfn;
1222
1223 rc = hvm_build_set_xs_values(gc, domid, dom, info);
1224 if (rc != 0) {
1225 LOG(ERROR, "hvm build set xenstore values failed");
1226 goto out;
1227 }
1228
1229 xc_dom_release(dom);
1230 return 0;
1231
1232 out:
1233 assert(rc != 0);
1234 if (dom != NULL) xc_dom_release(dom);
1235 return rc;
1236 }
1237
1238 /*==================== Miscellaneous ====================*/
1239
libxl__uuid2string(libxl__gc * gc,const libxl_uuid uuid)1240 char *libxl__uuid2string(libxl__gc *gc, const libxl_uuid uuid)
1241 {
1242 return GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(uuid));
1243 }
1244
libxl__userdata_path(libxl__gc * gc,uint32_t domid,const char * userdata_userid,const char * wh)1245 const char *libxl__userdata_path(libxl__gc *gc, uint32_t domid,
1246 const char *userdata_userid,
1247 const char *wh)
1248 {
1249 libxl_ctx *ctx = libxl__gc_owner(gc);
1250 char *uuid_string, *path;
1251 libxl_dominfo info;
1252 int rc;
1253
1254 libxl_dominfo_init(&info);
1255
1256 rc = libxl_domain_info(ctx, &info, domid);
1257 if (rc) {
1258 LOGE(ERROR, "unable to find domain info for domain %"PRIu32, domid);
1259 path = NULL;
1260 goto out;
1261 }
1262 uuid_string = GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(info.uuid));
1263 path = GCSPRINTF(XEN_LIB_DIR "/userdata-%s.%u.%s.%s",
1264 wh, domid, uuid_string, userdata_userid);
1265
1266 out:
1267 libxl_dominfo_dispose(&info);
1268 return path;
1269 }
1270
userdata_delete(libxl__gc * gc,const char * path)1271 static int userdata_delete(libxl__gc *gc, const char *path)
1272 {
1273 int r;
1274 r = unlink(path);
1275 if (r) {
1276 LOGE(ERROR, "remove failed for %s", path);
1277 return errno;
1278 }
1279 return 0;
1280 }
1281
libxl__userdata_destroyall(libxl__gc * gc,uint32_t domid)1282 void libxl__userdata_destroyall(libxl__gc *gc, uint32_t domid)
1283 {
1284 const char *pattern;
1285 glob_t gl;
1286 int r, i;
1287
1288 pattern = libxl__userdata_path(gc, domid, "*", "?");
1289 if (!pattern)
1290 goto out;
1291
1292 gl.gl_pathc = 0;
1293 gl.gl_pathv = 0;
1294 gl.gl_offs = 0;
1295 r = glob(pattern, GLOB_ERR|GLOB_NOSORT|GLOB_MARK, 0, &gl);
1296 if (r == GLOB_NOMATCH)
1297 goto out;
1298 if (r)
1299 LOGE(ERROR, "glob failed for %s", pattern);
1300
1301 /* Note: don't delete domain-userdata-lock, it will be handled by
1302 * unlock function.
1303 */
1304 for (i=0; i<gl.gl_pathc; i++) {
1305 if (!strstr(gl.gl_pathv[i], "domain-userdata-lock"))
1306 userdata_delete(gc, gl.gl_pathv[i]);
1307 }
1308 globfree(&gl);
1309 out:
1310 return;
1311 }
1312
libxl__userdata_store(libxl__gc * gc,uint32_t domid,const char * userdata_userid,const uint8_t * data,int datalen)1313 int libxl__userdata_store(libxl__gc *gc, uint32_t domid,
1314 const char *userdata_userid,
1315 const uint8_t *data, int datalen)
1316 {
1317 const char *filename;
1318 const char *newfilename;
1319 int e, rc;
1320 int fd = -1;
1321
1322 filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1323 if (!filename) {
1324 rc = ERROR_NOMEM;
1325 goto out;
1326 }
1327
1328 if (!datalen) {
1329 rc = userdata_delete(gc, filename);
1330 goto out;
1331 }
1332
1333 newfilename = libxl__userdata_path(gc, domid, userdata_userid, "n");
1334 if (!newfilename) {
1335 rc = ERROR_NOMEM;
1336 goto out;
1337 }
1338
1339 rc = ERROR_FAIL;
1340
1341 fd = open(newfilename, O_RDWR | O_CREAT | O_TRUNC, 0600);
1342 if (fd < 0)
1343 goto err;
1344
1345 if (libxl_write_exactly(CTX, fd, data, datalen, "userdata", newfilename))
1346 goto err;
1347
1348 if (close(fd) < 0) {
1349 fd = -1;
1350 goto err;
1351 }
1352 fd = -1;
1353
1354 if (rename(newfilename, filename))
1355 goto err;
1356
1357 rc = 0;
1358
1359 err:
1360 if (fd >= 0) {
1361 e = errno;
1362 close(fd);
1363 errno = e;
1364 }
1365
1366 if (rc)
1367 LOGE(ERROR, "cannot write/rename %s for %s", newfilename, filename);
1368 out:
1369 return rc;
1370 }
1371
libxl_userdata_store(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid,const uint8_t * data,int datalen)1372 int libxl_userdata_store(libxl_ctx *ctx, uint32_t domid,
1373 const char *userdata_userid,
1374 const uint8_t *data, int datalen)
1375 {
1376 GC_INIT(ctx);
1377 int rc;
1378 libxl__flock *lock;
1379
1380 CTX_LOCK;
1381 lock = libxl__lock_domain_userdata(gc, domid);
1382 if (!lock) {
1383 rc = ERROR_LOCK_FAIL;
1384 goto out;
1385 }
1386
1387 rc = libxl__userdata_store(gc, domid, userdata_userid,
1388 data, datalen);
1389
1390 libxl__unlock_file(lock);
1391
1392 out:
1393 CTX_UNLOCK;
1394 GC_FREE;
1395 return rc;
1396 }
1397
libxl__userdata_retrieve(libxl__gc * gc,uint32_t domid,const char * userdata_userid,uint8_t ** data_r,int * datalen_r)1398 int libxl__userdata_retrieve(libxl__gc *gc, uint32_t domid,
1399 const char *userdata_userid,
1400 uint8_t **data_r, int *datalen_r)
1401 {
1402 const char *filename;
1403 int e, rc;
1404 int datalen = 0;
1405 void *data = 0;
1406
1407 filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1408 if (!filename) {
1409 rc = ERROR_NOMEM;
1410 goto out;
1411 }
1412
1413 e = libxl_read_file_contents(CTX, filename, data_r ? &data : 0, &datalen);
1414 if (e && errno != ENOENT) {
1415 rc = ERROR_FAIL;
1416 goto out;
1417 }
1418 if (!e && !datalen) {
1419 LOG(ERROR, "userdata file %s is empty", filename);
1420 if (data_r) assert(!*data_r);
1421 rc = ERROR_FAIL;
1422 goto out;
1423 }
1424
1425 if (data_r) *data_r = data;
1426 if (datalen_r) *datalen_r = datalen;
1427 rc = 0;
1428
1429 out:
1430 return rc;
1431 }
1432
libxl_userdata_retrieve(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid,uint8_t ** data_r,int * datalen_r)1433 int libxl_userdata_retrieve(libxl_ctx *ctx, uint32_t domid,
1434 const char *userdata_userid,
1435 uint8_t **data_r, int *datalen_r)
1436 {
1437 GC_INIT(ctx);
1438 int rc;
1439 libxl__flock *lock;
1440
1441 CTX_LOCK;
1442 lock = libxl__lock_domain_userdata(gc, domid);
1443 if (!lock) {
1444 rc = ERROR_LOCK_FAIL;
1445 goto out;
1446 }
1447
1448 rc = libxl__userdata_retrieve(gc, domid, userdata_userid,
1449 data_r, datalen_r);
1450
1451
1452 libxl__unlock_file(lock);
1453 out:
1454 CTX_UNLOCK;
1455 GC_FREE;
1456 return rc;
1457 }
1458
libxl_userdata_unlink(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid)1459 int libxl_userdata_unlink(libxl_ctx *ctx, uint32_t domid,
1460 const char *userdata_userid)
1461 {
1462 GC_INIT(ctx);
1463 CTX_LOCK;
1464
1465 int rc;
1466 libxl__flock *lock = NULL;
1467 const char *filename;
1468
1469 lock = libxl__lock_domain_userdata(gc, domid);
1470 if (!lock) {
1471 rc = ERROR_LOCK_FAIL;
1472 goto out;
1473 }
1474
1475 filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1476 if (!filename) {
1477 rc = ERROR_FAIL;
1478 goto out;
1479 }
1480 if (unlink(filename)) {
1481 LOGE(ERROR, "error deleting userdata file: %s", filename);
1482 rc = ERROR_FAIL;
1483 goto out;
1484 }
1485
1486 rc = 0;
1487 out:
1488 if (lock)
1489 libxl__unlock_file(lock);
1490 CTX_UNLOCK;
1491 GC_FREE;
1492 return rc;
1493 }
1494
libxl__domain_set_paging_mempool_size(libxl__gc * gc,libxl_domain_config * d_config,uint32_t domid)1495 int libxl__domain_set_paging_mempool_size(
1496 libxl__gc *gc, libxl_domain_config *d_config, uint32_t domid)
1497 {
1498 uint64_t shadow_mem;
1499
1500 shadow_mem = d_config->b_info.shadow_memkb;
1501 shadow_mem <<= 10;
1502
1503 if ((shadow_mem >> 10) != d_config->b_info.shadow_memkb) {
1504 LOGED(ERROR, domid,
1505 "shadow_memkb value %"PRIu64"kB too large",
1506 d_config->b_info.shadow_memkb);
1507 return ERROR_FAIL;
1508 }
1509
1510 int r = xc_set_paging_mempool_size(CTX->xch, domid, shadow_mem);
1511 if (r) {
1512 LOGED(ERROR, domid,
1513 "Failed to set paging mempool size to %"PRIu64"kB",
1514 d_config->b_info.shadow_memkb);
1515 return ERROR_FAIL;
1516 }
1517
1518 return 0;
1519 }
1520
1521 /*
1522 * Local variables:
1523 * mode: C
1524 * c-basic-offset: 4
1525 * indent-tabs-mode: nil
1526 * End:
1527 */
1528