1 /*
2 * Copyright (C) 2009 Citrix Ltd.
3 * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published
7 * by the Free Software Foundation; version 2.1 only. with the special
8 * exception on linking described in file LICENSE.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 */
15
16 #include "libxl_osdeps.h" /* must come before any other headers */
17
18 #include <glob.h>
19
20 #include "libxl_internal.h"
21 #include "libxl_arch.h"
22
23 #include <xen/hvm/hvm_info_table.h>
24 #include <xen/hvm/hvm_xs_strings.h>
25 #include <xen/hvm/e820.h>
26
27 //#define DEBUG 1
28
libxl__domain_type(libxl__gc * gc,uint32_t domid)29 libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid)
30 {
31 libxl_ctx *ctx = libxl__gc_owner(gc);
32 xc_domaininfo_t info;
33 int ret;
34
35 ret = xc_domain_getinfo_single(ctx->xch, domid, &info);
36 if (ret < 0) {
37 LOGED(ERROR, domid, "unable to get dominfo");
38 return LIBXL_DOMAIN_TYPE_INVALID;
39 }
40 if (info.flags & XEN_DOMINF_hvm_guest) {
41 const char *type_path = GCSPRINTF("%s/type",
42 libxl__xs_libxl_path(gc, domid));
43 const char *type;
44 libxl_domain_type t;
45 int rc;
46
47 rc = libxl__xs_read_mandatory(gc, XBT_NULL, type_path, &type);
48 if (rc) {
49 LOG(WARN,
50 "unable to get domain type for domid=%"PRIu32", assuming HVM",
51 domid);
52 return LIBXL_DOMAIN_TYPE_HVM;
53 }
54
55 rc = libxl_domain_type_from_string(type, &t);
56 if (rc) {
57 LOG(WARN,
58 "unable to get domain type for domid=%"PRIu32", assuming HVM",
59 domid);
60 return LIBXL_DOMAIN_TYPE_HVM;
61 }
62
63 return t;
64 } else
65 return LIBXL_DOMAIN_TYPE_PV;
66 }
67
libxl__domain_cpupool(libxl__gc * gc,uint32_t domid)68 int libxl__domain_cpupool(libxl__gc *gc, uint32_t domid)
69 {
70 xc_domaininfo_t info;
71 int ret;
72
73 ret = xc_domain_getinfo_single(CTX->xch, domid, &info);
74 if (ret < 0)
75 {
76 LOGED(ERROR, domid, "get domaininfo failed");
77 return ERROR_FAIL;
78 }
79 return info.cpupool;
80 }
81
libxl__domain_scheduler(libxl__gc * gc,uint32_t domid)82 libxl_scheduler libxl__domain_scheduler(libxl__gc *gc, uint32_t domid)
83 {
84 int cpupool = libxl__domain_cpupool(gc, domid);
85 libxl_cpupoolinfo poolinfo;
86 libxl_scheduler sched = LIBXL_SCHEDULER_UNKNOWN;
87 int rc;
88
89 if (cpupool < 0)
90 return sched;
91
92 libxl_cpupoolinfo_init(&poolinfo);
93 rc = libxl_cpupool_info(CTX, &poolinfo, cpupool);
94 if (rc < 0)
95 goto out;
96
97 sched = poolinfo.sched;
98
99 out:
100 libxl_cpupoolinfo_dispose(&poolinfo);
101 return sched;
102 }
103
104 /*
105 * Two NUMA placement candidates are compared by means of the following
106 * heuristics:
107
108 * - the number of vcpus runnable on the candidates is considered, and
109 * candidates with fewer of them are preferred. If two candidate have
110 * the same number of runnable vcpus,
111 * - the amount of free memory in the candidates is considered, and the
112 * candidate with greater amount of it is preferred.
113 *
114 * In fact, leaving larger memory holes, maximizes the probability of being
115 * able to put other domains on the node. That hopefully means many domains
116 * will benefit from local memory accesses, but also introduces the risk of
117 * overloading large (from a memory POV) nodes. That's right the effect
118 * that counting the vcpus able to run on the nodes tries to prevent.
119 *
120 * Note that this completely ignore the number of nodes each candidate span,
121 * as the fact that fewer nodes is better is already accounted for in the
122 * algorithm.
123 */
numa_cmpf(const libxl__numa_candidate * c1,const libxl__numa_candidate * c2)124 static int numa_cmpf(const libxl__numa_candidate *c1,
125 const libxl__numa_candidate *c2)
126 {
127 if (c1->nr_vcpus != c2->nr_vcpus)
128 return c1->nr_vcpus - c2->nr_vcpus;
129
130 return c2->free_memkb - c1->free_memkb;
131 }
132
133 /* The actual automatic NUMA placement routine */
numa_place_domain(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config)134 static int numa_place_domain(libxl__gc *gc, uint32_t domid,
135 libxl_domain_config *d_config)
136 {
137 libxl_domain_build_info *info = &d_config->b_info;
138 int found;
139 libxl__numa_candidate candidate;
140 libxl_bitmap cpumap, cpupool_nodemap, *map;
141 libxl_cpupoolinfo cpupool_info;
142 int i, cpupool, rc = 0;
143 uint64_t memkb;
144
145 libxl__numa_candidate_init(&candidate);
146 libxl_bitmap_init(&cpumap);
147 libxl_bitmap_init(&cpupool_nodemap);
148 libxl_cpupoolinfo_init(&cpupool_info);
149
150 /*
151 * Extract the cpumap from the cpupool the domain belong to. In fact,
152 * it only makes sense to consider the cpus/nodes that are in there
153 * for placement.
154 */
155 rc = cpupool = libxl__domain_cpupool(gc, domid);
156 if (rc < 0)
157 goto out;
158 rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool);
159 if (rc)
160 goto out;
161 map = &cpupool_info.cpumap;
162
163 /*
164 * If there's a well defined hard affinity mask (i.e., the same one for all
165 * the vcpus), we can try to run the placement considering only the pcpus
166 * within such mask.
167 */
168 if (info->num_vcpu_hard_affinity)
169 {
170 #ifdef DEBUG
171 int j;
172
173 for (j = 0; j < info->num_vcpu_hard_affinity; j++)
174 assert(libxl_bitmap_equal(&info->vcpu_hard_affinity[0],
175 &info->vcpu_hard_affinity[j], 0));
176 #endif /* DEBUG */
177
178 rc = libxl_bitmap_and(CTX, &cpumap, &info->vcpu_hard_affinity[0],
179 &cpupool_info.cpumap);
180 if (rc)
181 goto out;
182
183 /* Hard affinity must contain at least one cpu of our cpupool */
184 if (libxl_bitmap_is_empty(&cpumap)) {
185 LOG(ERROR, "Hard affinity completely outside of domain's cpupool!");
186 rc = ERROR_INVAL;
187 goto out;
188 }
189 }
190
191 rc = libxl__domain_need_memory_calculate(gc, info, &memkb);
192 if (rc)
193 goto out;
194 if (libxl_node_bitmap_alloc(CTX, &cpupool_nodemap, 0)) {
195 rc = ERROR_FAIL;
196 goto out;
197 }
198
199 /* Find the best candidate with enough free memory and at least
200 * as much pcpus as the domain has vcpus. */
201 rc = libxl__get_numa_candidate(gc, memkb, info->max_vcpus,
202 0, 0, map, numa_cmpf, &candidate, &found);
203 if (rc)
204 goto out;
205
206 /* Not even a suitable placement candidate! Let's just don't touch the
207 * domain's info->cpumap. It will have affinity with all nodes/cpus. */
208 if (found == 0)
209 goto out;
210
211 /* Map the candidate's node map to the domain's info->nodemap */
212 libxl__numa_candidate_get_nodemap(gc, &candidate, &info->nodemap);
213
214 /* Avoid trying to set the affinity to nodes that might be in the
215 * candidate's nodemap but out of our cpupool. */
216 rc = libxl_cpumap_to_nodemap(CTX, &cpupool_info.cpumap,
217 &cpupool_nodemap);
218 if (rc)
219 goto out;
220
221 libxl_for_each_set_bit(i, info->nodemap) {
222 if (!libxl_bitmap_test(&cpupool_nodemap, i))
223 libxl_bitmap_reset(&info->nodemap, i);
224 }
225
226 LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
227 "%"PRIu64" KB free selected", candidate.nr_nodes,
228 candidate.nr_cpus, candidate.free_memkb / 1024);
229
230 out:
231 libxl__numa_candidate_dispose(&candidate);
232 libxl_bitmap_dispose(&cpupool_nodemap);
233 libxl_bitmap_dispose(&cpumap);
234 libxl_cpupoolinfo_dispose(&cpupool_info);
235 return rc;
236 }
237
libxl__build_pre(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)238 int libxl__build_pre(libxl__gc *gc, uint32_t domid,
239 libxl_domain_config *d_config, libxl__domain_build_state *state)
240 {
241 libxl_domain_build_info *const info = &d_config->b_info;
242 libxl_ctx *ctx = libxl__gc_owner(gc);
243 char *xs_domid, *con_domid;
244 int rc;
245 uint64_t size;
246
247 if (xc_domain_max_vcpus(ctx->xch, domid, info->max_vcpus) != 0) {
248 LOG(ERROR, "Couldn't set max vcpu count");
249 return ERROR_FAIL;
250 }
251
252 /*
253 * Check if the domain has any CPU or node affinity already. If not, try
254 * to build up the latter via automatic NUMA placement. In fact, in case
255 * numa_place_domain() manage to find a placement, in info->nodemap is
256 * updated accordingly; if it does not manage, info->nodemap is just left
257 * alone. It is then the the subsequent call to
258 * libxl_domain_set_nodeaffinity() that enacts the actual placement.
259 *
260 * As far as scheduling is concerned, we achieve NUMA-aware scheduling
261 * by having the results of placement affect the soft affinity of all
262 * the vcpus of the domain. Of course, we want that iff placement is
263 * enabled and actually happens, so we only change info->cpumap_soft to
264 * reflect the placement result if that is the case
265 */
266 if (libxl_defbool_val(info->numa_placement)) {
267 if (info->cpumap.size || info->num_vcpu_soft_affinity)
268 LOG(WARN, "Can't run NUMA placement, as a soft "
269 "affinity has been specified explicitly");
270 else if (info->nodemap.size)
271 LOG(WARN, "Can't run NUMA placement, as the domain has "
272 "NUMA node affinity set already");
273 else {
274 libxl_bitmap cpumap_soft;
275
276 rc = libxl_node_bitmap_alloc(ctx, &info->nodemap, 0);
277 if (rc)
278 return rc;
279 libxl_bitmap_set_any(&info->nodemap);
280
281 rc = libxl_cpu_bitmap_alloc(ctx, &cpumap_soft, 0);
282 if (rc)
283 return rc;
284
285 rc = numa_place_domain(gc, domid, d_config);
286 if (rc) {
287 libxl_bitmap_dispose(&cpumap_soft);
288 return rc;
289 }
290
291 /*
292 * All we need to do now is converting the result of automatic
293 * placement from nodemap to cpumap, and then use such cpumap
294 * as the soft affinity for all the vcpus of the domain.
295 *
296 * When calling libxl_set_vcpuaffinity_all(), it is ok to use
297 * NULL as hard affinity, as we know we don't have one, or we
298 * won't be here.
299 */
300 libxl_nodemap_to_cpumap(ctx, &info->nodemap, &cpumap_soft);
301 libxl_set_vcpuaffinity_all(ctx, domid, info->max_vcpus,
302 NULL, &cpumap_soft);
303
304 libxl_bitmap_dispose(&cpumap_soft);
305
306 /*
307 * Placement has run, so avoid for it to be re-run, if this
308 * same config we are using and building here is ever re-used.
309 * This means that people re-using configs will get the same
310 * results, consistently, across every re-use, which is what
311 * we expect most people to want.
312 */
313 libxl_defbool_set(&info->numa_placement, false);
314 }
315 }
316
317 if (info->nodemap.size)
318 libxl_domain_set_nodeaffinity(ctx, domid, &info->nodemap);
319
320 if (info->num_vcpu_hard_affinity || info->num_vcpu_soft_affinity) {
321 libxl_bitmap *hard_affinity, *soft_affinity;
322 int i, n_vcpus;
323
324 n_vcpus = info->num_vcpu_hard_affinity > info->num_vcpu_soft_affinity ?
325 info->num_vcpu_hard_affinity : info->num_vcpu_soft_affinity;
326
327 for (i = 0; i < n_vcpus; i++) {
328 /*
329 * Prepare hard and soft affinity pointers in a way that allows
330 * us to issue only one call to libxl_set_vcpuaffinity(), setting,
331 * for each vcpu, both hard and soft affinity "atomically".
332 */
333 hard_affinity = NULL;
334 if (info->num_vcpu_hard_affinity &&
335 i < info->num_vcpu_hard_affinity)
336 hard_affinity = &info->vcpu_hard_affinity[i];
337
338 soft_affinity = NULL;
339 if (info->num_vcpu_soft_affinity &&
340 i < info->num_vcpu_soft_affinity)
341 soft_affinity = &info->vcpu_soft_affinity[i];
342
343 if (libxl_set_vcpuaffinity(ctx, domid, i,
344 hard_affinity, soft_affinity)) {
345 LOG(ERROR, "setting affinity failed on vcpu `%d'", i);
346 return ERROR_FAIL;
347 }
348 }
349 }
350
351
352 rc = libxl__arch_extra_memory(gc, info, &size);
353 if (rc < 0) {
354 LOGE(ERROR, "Couldn't get arch extra constant memory size");
355 return ERROR_FAIL;
356 }
357
358 if (xc_domain_setmaxmem(ctx->xch, domid, info->target_memkb + size) < 0) {
359 LOGE(ERROR, "Couldn't set max memory");
360 return ERROR_FAIL;
361 }
362
363 xs_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenstored/domid", NULL);
364 state->store_domid = xs_domid ? atoi(xs_domid) : 0;
365 free(xs_domid);
366
367 con_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenconsoled/domid", NULL);
368 state->console_domid = con_domid ? atoi(con_domid) : 0;
369 free(con_domid);
370
371 state->store_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->store_domid);
372 state->console_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->console_domid);
373
374 rc = libxl__arch_domain_create(gc, d_config, state, domid);
375 if (rc) goto out;
376
377 /* Construct a CPUID policy, but only for brand new domains. Domains
378 * being migrated-in/restored have CPUID handled during the
379 * static_data_done() callback. */
380 if (!state->restore && !state->soft_reset)
381 rc = libxl__cpuid_legacy(ctx, domid, false, info);
382
383 out:
384 return rc;
385 }
386
set_vnuma_affinity(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info)387 static int set_vnuma_affinity(libxl__gc *gc, uint32_t domid,
388 libxl_domain_build_info *info)
389 {
390 libxl_bitmap cpumap;
391 libxl_vnode_info *v;
392 unsigned int i, j;
393 int rc = 0;
394
395 libxl_bitmap_init(&cpumap);
396
397 rc = libxl_cpu_bitmap_alloc(CTX, &cpumap, 0);
398 if (rc) {
399 LOG(ERROR, "Can't allocate nodemap");
400 goto out;
401 }
402
403 /*
404 * For each vcpu in each vnode, set its soft affinity to
405 * the pcpus belonging to the pnode the vnode is on
406 */
407 for (i = 0; i < info->num_vnuma_nodes; i++) {
408 v = &info->vnuma_nodes[i];
409
410 rc = libxl_node_to_cpumap(CTX, v->pnode, &cpumap);
411 if (rc) {
412 LOG(ERROR, "Can't get cpumap for vnode %d", i);
413 goto out;
414 }
415
416 libxl_for_each_set_bit(j, v->vcpus) {
417 rc = libxl_set_vcpuaffinity(CTX, domid, j, NULL, &cpumap);
418 if (rc) {
419 LOG(ERROR, "Can't set cpu affinity for %d", j);
420 goto out;
421 }
422 }
423 }
424
425 out:
426 libxl_bitmap_dispose(&cpumap);
427 return rc;
428 }
429
libxl__build_post(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info,libxl__domain_build_state * state,char ** vms_ents,char ** local_ents)430 int libxl__build_post(libxl__gc *gc, uint32_t domid,
431 libxl_domain_build_info *info,
432 libxl__domain_build_state *state,
433 char **vms_ents, char **local_ents)
434 {
435 libxl_ctx *ctx = libxl__gc_owner(gc);
436 char *dom_path, *vm_path;
437 xs_transaction_t t;
438 char **ents;
439 int i, rc;
440
441 if (info->num_vnuma_nodes && !info->num_vcpu_soft_affinity) {
442 rc = set_vnuma_affinity(gc, domid, info);
443 if (rc)
444 return rc;
445 }
446
447 rc = libxl_domain_sched_params_set(CTX, domid, &info->sched_params);
448 if (rc)
449 return rc;
450
451 if (info->type == LIBXL_DOMAIN_TYPE_HVM
452 && !libxl_ms_vm_genid_is_zero(&info->u.hvm.ms_vm_genid)) {
453 rc = libxl__ms_vm_genid_set(gc, domid,
454 &info->u.hvm.ms_vm_genid);
455 if (rc) {
456 LOG(ERROR, "Failed to set VM Generation ID");
457 return rc;
458 }
459 }
460
461 ents = libxl__calloc(gc, 12 + (info->max_vcpus * 2) + 2, sizeof(char *));
462 ents[0] = "memory/static-max";
463 ents[1] = GCSPRINTF("%"PRId64, info->max_memkb);
464 ents[2] = "memory/target";
465 ents[3] = GCSPRINTF("%"PRId64, info->target_memkb -
466 libxl__get_targetmem_fudge(gc, info));
467 ents[4] = "memory/videoram";
468 ents[5] = GCSPRINTF("%"PRId64, info->video_memkb);
469 ents[6] = "domid";
470 ents[7] = GCSPRINTF("%d", domid);
471 ents[8] = "store/port";
472 ents[9] = GCSPRINTF("%"PRIu32, state->store_port);
473 ents[10] = "store/ring-ref";
474 ents[11] = GCSPRINTF("%lu", state->store_mfn);
475 for (i = 0; i < info->max_vcpus; i++) {
476 ents[12+(i*2)] = GCSPRINTF("cpu/%d/availability", i);
477 ents[12+(i*2)+1] = libxl_bitmap_test(&info->avail_vcpus, i)
478 ? "online" : "offline";
479 }
480
481 dom_path = libxl__xs_get_dompath(gc, domid);
482 if (!dom_path) {
483 return ERROR_FAIL;
484 }
485
486 vm_path = xs_read(ctx->xsh, XBT_NULL, GCSPRINTF("%s/vm", dom_path), NULL);
487 retry_transaction:
488 t = xs_transaction_start(ctx->xsh);
489
490 libxl__xs_writev(gc, t, dom_path, ents);
491 libxl__xs_writev(gc, t, dom_path, local_ents);
492 libxl__xs_writev(gc, t, vm_path, vms_ents);
493
494 if (!xs_transaction_end(ctx->xsh, t, 0))
495 if (errno == EAGAIN)
496 goto retry_transaction;
497 xs_introduce_domain(ctx->xsh, domid, state->store_mfn, state->store_port);
498 free(vm_path);
499 return 0;
500 }
501
set_vnuma_info(libxl__gc * gc,uint32_t domid,const libxl_domain_build_info * info,const libxl__domain_build_state * state)502 static int set_vnuma_info(libxl__gc *gc, uint32_t domid,
503 const libxl_domain_build_info *info,
504 const libxl__domain_build_state *state)
505 {
506 int rc = 0;
507 unsigned int i, nr_vdistance;
508 unsigned int *vcpu_to_vnode, *vnode_to_pnode, *vdistance = NULL;
509
510 vcpu_to_vnode = libxl__calloc(gc, info->max_vcpus,
511 sizeof(unsigned int));
512 vnode_to_pnode = libxl__calloc(gc, info->num_vnuma_nodes,
513 sizeof(unsigned int));
514
515 nr_vdistance = info->num_vnuma_nodes * info->num_vnuma_nodes;
516 vdistance = libxl__calloc(gc, nr_vdistance, sizeof(unsigned int));
517
518 for (i = 0; i < info->num_vnuma_nodes; i++) {
519 libxl_vnode_info *v = &info->vnuma_nodes[i];
520 int j;
521
522 /* vnode to pnode mapping */
523 vnode_to_pnode[i] = v->pnode;
524
525 /* vcpu to vnode mapping */
526 libxl_for_each_set_bit(j, v->vcpus)
527 vcpu_to_vnode[j] = i;
528
529 /* node distances */
530 assert(info->num_vnuma_nodes == v->num_distances);
531 memcpy(vdistance + (i * info->num_vnuma_nodes),
532 v->distances,
533 v->num_distances * sizeof(unsigned int));
534 }
535
536 if (xc_domain_setvnuma(CTX->xch, domid, info->num_vnuma_nodes,
537 state->num_vmemranges, info->max_vcpus,
538 state->vmemranges, vdistance,
539 vcpu_to_vnode, vnode_to_pnode) < 0) {
540 LOGE(ERROR, "xc_domain_setvnuma failed");
541 rc = ERROR_FAIL;
542 }
543
544 return rc;
545 }
546
libxl__build_dom(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state,struct xc_dom_image * dom)547 static int libxl__build_dom(libxl__gc *gc, uint32_t domid,
548 libxl_domain_config *d_config, libxl__domain_build_state *state,
549 struct xc_dom_image *dom)
550 {
551 libxl_domain_build_info *const info = &d_config->b_info;
552 uint64_t mem_kb;
553 int ret;
554
555 if ( (ret = xc_dom_boot_xen_init(dom, CTX->xch, domid)) != 0 ) {
556 LOGE(ERROR, "xc_dom_boot_xen_init failed");
557 goto out;
558 }
559 #ifdef GUEST_RAM_BASE
560 if ( (ret = xc_dom_rambase_init(dom, GUEST_RAM_BASE)) != 0 ) {
561 LOGE(ERROR, "xc_dom_rambase failed");
562 goto out;
563 }
564 #endif
565 if ( (ret = xc_dom_parse_image(dom)) != 0 ) {
566 LOG(ERROR, "xc_dom_parse_image failed");
567 goto out;
568 }
569 if ( (ret = libxl__arch_domain_init_hw_description(gc, d_config, state, dom)) != 0 ) {
570 LOGE(ERROR, "libxl__arch_domain_init_hw_description failed");
571 goto out;
572 }
573
574 mem_kb = dom->container_type == XC_DOM_HVM_CONTAINER ?
575 (info->max_memkb - info->video_memkb) : info->target_memkb;
576 if ( (ret = xc_dom_mem_init(dom, mem_kb / 1024)) != 0 ) {
577 LOGE(ERROR, "xc_dom_mem_init failed");
578 goto out;
579 }
580 if ( (ret = xc_dom_boot_mem_init(dom)) != 0 ) {
581 LOGE(ERROR, "xc_dom_boot_mem_init failed");
582 goto out;
583 }
584 if ( (ret = libxl__arch_domain_finalise_hw_description(gc, domid, d_config, dom)) != 0 ) {
585 LOGE(ERROR, "libxl__arch_domain_finalise_hw_description failed");
586 goto out;
587 }
588 if ( (ret = xc_dom_build_image(dom)) != 0 ) {
589 LOGE(ERROR, "xc_dom_build_image failed");
590 goto out;
591 }
592 if ( (ret = xc_dom_boot_image(dom)) != 0 ) {
593 LOGE(ERROR, "xc_dom_boot_image failed");
594 goto out;
595 }
596 if ( (ret = xc_dom_gnttab_init(dom)) != 0 ) {
597 LOGE(ERROR, "xc_dom_gnttab_init failed");
598 goto out;
599 }
600 if ((ret = libxl__arch_build_dom_finish(gc, info, dom, state)) != 0) {
601 LOGE(ERROR, "libxl__arch_build_dom_finish failed");
602 goto out;
603 }
604
605 out:
606 return ret != 0 ? ERROR_FAIL : 0;
607 }
608
libxl__build_pv(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)609 int libxl__build_pv(libxl__gc *gc, uint32_t domid,
610 libxl_domain_config *d_config, libxl__domain_build_state *state)
611 {
612 libxl_ctx *ctx = libxl__gc_owner(gc);
613 libxl_domain_build_info *const info = &d_config->b_info;
614 struct xc_dom_image *dom;
615 int ret;
616 int flags = 0;
617
618 xc_dom_loginit(ctx->xch);
619
620 dom = xc_dom_allocate(ctx->xch, state->pv_cmdline, info->u.pv.features);
621 if (!dom) {
622 LOGE(ERROR, "xc_dom_allocate failed");
623 return ERROR_FAIL;
624 }
625
626 dom->container_type = XC_DOM_PV_CONTAINER;
627
628 LOG(DEBUG, "pv kernel mapped %d path %s", state->pv_kernel.mapped, state->pv_kernel.path);
629
630 if (state->pv_kernel.mapped) {
631 ret = xc_dom_kernel_mem(dom,
632 state->pv_kernel.data,
633 state->pv_kernel.size);
634 if ( ret != 0) {
635 LOGE(ERROR, "xc_dom_kernel_mem failed");
636 goto out;
637 }
638 } else {
639 ret = xc_dom_kernel_file(dom, state->pv_kernel.path);
640 if ( ret != 0) {
641 LOGE(ERROR, "xc_dom_kernel_file failed");
642 goto out;
643 }
644 }
645
646 if ( state->pv_ramdisk.path && strlen(state->pv_ramdisk.path) ) {
647 if (state->pv_ramdisk.mapped) {
648 if ( (ret = xc_dom_module_mem(dom, state->pv_ramdisk.data, state->pv_ramdisk.size, NULL)) != 0 ) {
649 LOGE(ERROR, "xc_dom_ramdisk_mem failed");
650 goto out;
651 }
652 } else {
653 if ( (ret = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL)) != 0 ) {
654 LOGE(ERROR, "xc_dom_ramdisk_file failed");
655 goto out;
656 }
657 }
658 }
659
660 dom->flags = flags;
661 dom->console_evtchn = state->console_port;
662 dom->console_domid = state->console_domid;
663 dom->xenstore_evtchn = state->store_port;
664 dom->xenstore_domid = state->store_domid;
665 dom->claim_enabled = libxl_defbool_val(info->claim_mode);
666 dom->max_vcpus = info->max_vcpus;
667
668 if (info->num_vnuma_nodes != 0) {
669 unsigned int i;
670
671 ret = libxl__vnuma_build_vmemrange_pv(gc, domid, info, state);
672 if (ret) {
673 LOGE(ERROR, "cannot build vmemranges");
674 goto out;
675 }
676 ret = libxl__vnuma_config_check(gc, info, state);
677 if (ret) goto out;
678
679 ret = set_vnuma_info(gc, domid, info, state);
680 if (ret) goto out;
681
682 dom->nr_vmemranges = state->num_vmemranges;
683 dom->vmemranges = xc_dom_malloc(dom, sizeof(*dom->vmemranges) *
684 dom->nr_vmemranges);
685
686 for (i = 0; i < dom->nr_vmemranges; i++) {
687 dom->vmemranges[i].start = state->vmemranges[i].start;
688 dom->vmemranges[i].end = state->vmemranges[i].end;
689 dom->vmemranges[i].flags = state->vmemranges[i].flags;
690 dom->vmemranges[i].nid = state->vmemranges[i].nid;
691 }
692
693 dom->nr_vnodes = info->num_vnuma_nodes;
694 dom->vnode_to_pnode = xc_dom_malloc(dom, sizeof(*dom->vnode_to_pnode) *
695 dom->nr_vnodes);
696 for (i = 0; i < info->num_vnuma_nodes; i++)
697 dom->vnode_to_pnode[i] = info->vnuma_nodes[i].pnode;
698 }
699
700 ret = libxl__build_dom(gc, domid, d_config, state, dom);
701 if (ret != 0)
702 goto out;
703
704 if (xc_dom_translated(dom)) {
705 state->console_mfn = dom->console_pfn;
706 state->store_mfn = dom->xenstore_pfn;
707 state->vuart_gfn = dom->vuart_gfn;
708 } else {
709 state->console_mfn = xc_dom_p2m(dom, dom->console_pfn);
710 state->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
711 }
712
713 ret = 0;
714 out:
715 xc_dom_release(dom);
716 return ret == 0 ? 0 : ERROR_FAIL;
717 }
718
hvm_build_set_params(xc_interface * handle,uint32_t domid,libxl_domain_build_info * info)719 static int hvm_build_set_params(xc_interface *handle, uint32_t domid,
720 libxl_domain_build_info *info)
721 {
722 struct hvm_info_table *va_hvm;
723 uint8_t *va_map, sum;
724 int i;
725
726 if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
727 va_map = xc_map_foreign_range(handle, domid,
728 XC_PAGE_SIZE, PROT_READ | PROT_WRITE,
729 HVM_INFO_PFN);
730 if (va_map == NULL)
731 return ERROR_FAIL;
732
733 va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
734 va_hvm->apic_mode = libxl_defbool_val(info->apic);
735 va_hvm->nr_vcpus = info->max_vcpus;
736 memset(va_hvm->vcpu_online, 0, sizeof(va_hvm->vcpu_online));
737 memcpy(va_hvm->vcpu_online, info->avail_vcpus.map, info->avail_vcpus.size);
738 for (i = 0, sum = 0; i < va_hvm->length; i++)
739 sum += ((uint8_t *) va_hvm)[i];
740 va_hvm->checksum -= sum;
741 munmap(va_map, XC_PAGE_SIZE);
742 }
743
744 return 0;
745 }
746
hvm_build_set_xs_values(libxl__gc * gc,uint32_t domid,struct xc_dom_image * dom,const libxl_domain_build_info * info)747 static int hvm_build_set_xs_values(libxl__gc *gc,
748 uint32_t domid,
749 struct xc_dom_image *dom,
750 const libxl_domain_build_info *info)
751 {
752 char *path = NULL;
753 int num_oem = 1;
754 int ret = 0;
755
756 if (dom->smbios_module.guest_addr_out) {
757 path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_ADDRESS, domid);
758
759 ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%"PRIx64,
760 dom->smbios_module.guest_addr_out);
761 if (ret)
762 goto err;
763
764 path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_LENGTH, domid);
765
766 ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%x",
767 dom->smbios_module.length);
768 if (ret)
769 goto err;
770 }
771
772 for (int i = 0; i < info->u.hvm.num_smbios; i++) {
773 char *p;
774 if (info->u.hvm.smbios[i].key == LIBXL_SMBIOS_TYPE_OEM) {
775 if (num_oem > 99) {
776 LOGD(ERROR, domid, "More than 99 SMBIOS OEM strings specified");
777 ret = ERROR_INVAL;
778 goto err;
779 }
780 path = GCSPRINTF("/local/domain/%d/"HVM_XS_OEM_STRINGS, domid,
781 num_oem);
782 num_oem++;
783 } else {
784 path = GCSPRINTF("/local/domain/%d/"HVM_XS_BIOS_STRINGS"/%s", domid,
785 libxl_smbios_type_to_string(info->u.hvm.smbios[i].key));
786 }
787
788 /* Convert libxl_smbios_type string to xenstore path that hvmloader
789 * will use, as defined by HVM_XS_*. That is convert the '_' to '-'. */
790 p = strrchr(path, '/');
791 for ( ; *p; p++) {
792 if (*p == '_')
793 *p = '-';
794 }
795
796 LOGD(DEBUG, domid, "Writing %s = \"%s\"", path,
797 info->u.hvm.smbios[i].value);
798 ret = libxl__xs_printf(gc, XBT_NULL, path, "%s",
799 info->u.hvm.smbios[i].value);
800 if (ret)
801 goto err;
802 }
803
804 /* Only one module can be passed. PVHv2 guests do not support this. */
805 if (dom->acpi_modules[0].guest_addr_out &&
806 info->type == LIBXL_DOMAIN_TYPE_HVM) {
807 path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_ADDRESS, domid);
808
809 ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%"PRIx64,
810 dom->acpi_modules[0].guest_addr_out);
811 if (ret)
812 goto err;
813
814 path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_LENGTH, domid);
815
816 ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%x",
817 dom->acpi_modules[0].length);
818 if (ret)
819 goto err;
820 }
821
822 return 0;
823
824 err:
825 LOG(ERROR, "failed to write firmware xenstore value, err: %d", ret);
826 return ret;
827 }
828
libxl__load_hvm_firmware_module(libxl__gc * gc,const char * filename,const char * what,struct xc_hvm_firmware_module * m)829 static int libxl__load_hvm_firmware_module(libxl__gc *gc,
830 const char *filename,
831 const char *what,
832 struct xc_hvm_firmware_module *m)
833 {
834 int datalen = 0;
835 void *data = NULL;
836 int r, rc;
837
838 LOG(DEBUG, "Loading %s: %s", what, filename);
839 r = libxl_read_file_contents(CTX, filename, &data, &datalen);
840 if (r) {
841 /*
842 * Print a message only on ENOENT, other errors are logged by the
843 * function libxl_read_file_contents().
844 */
845 if (r == ENOENT)
846 LOGEV(ERROR, r, "failed to read %s file", what);
847 rc = ERROR_FAIL;
848 goto out;
849 }
850 libxl__ptr_add(gc, data);
851 if (datalen) {
852 /* Only accept non-empty files */
853 m->data = data;
854 m->length = datalen;
855 } else {
856 LOG(ERROR, "file %s for %s is empty", filename, what);
857 rc = ERROR_INVAL;
858 goto out;
859 }
860 rc = 0;
861 out:
862 return rc;
863 }
864
libxl__domain_firmware(libxl__gc * gc,libxl_domain_build_info * info,libxl__domain_build_state * state,struct xc_dom_image * dom)865 static int libxl__domain_firmware(libxl__gc *gc,
866 libxl_domain_build_info *info,
867 libxl__domain_build_state *state,
868 struct xc_dom_image *dom)
869 {
870 libxl_ctx *ctx = libxl__gc_owner(gc);
871 const char *firmware = NULL;
872 int e, rc;
873 int datalen = 0;
874 void *data;
875 const char *bios_filename = NULL;
876
877 if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
878 if (info->u.hvm.firmware) {
879 firmware = info->u.hvm.firmware;
880 } else {
881 switch (info->device_model_version)
882 {
883 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
884 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
885 firmware = "hvmloader";
886 break;
887 default:
888 LOG(ERROR, "invalid device model version %d",
889 info->device_model_version);
890 rc = ERROR_FAIL;
891 goto out;
892 }
893 }
894 }
895
896 if (state->pv_kernel.path != NULL &&
897 info->type == LIBXL_DOMAIN_TYPE_PVH) {
898
899 if (state->shim_path) {
900 rc = xc_dom_kernel_file(dom, state->shim_path);
901 if (rc) {
902 LOGE(ERROR, "xc_dom_kernel_file failed");
903 goto out;
904 }
905
906 /* We've loaded the shim, so load the kernel as a secondary module */
907 if (state->pv_kernel.mapped) {
908 LOG(DEBUG, "xc_dom_module_mem, cmdline %s",
909 state->pv_cmdline);
910 rc = xc_dom_module_mem(dom, state->pv_kernel.data,
911 state->pv_kernel.size, state->pv_cmdline);
912 if (rc) {
913 LOGE(ERROR, "xc_dom_kernel_mem failed");
914 goto out;
915 }
916 } else {
917 LOG(DEBUG, "xc_dom_module_file, path %s cmdline %s",
918 state->pv_kernel.path, state->pv_cmdline);
919 rc = xc_dom_module_file(dom, state->pv_kernel.path, state->pv_cmdline);
920 if (rc) {
921 LOGE(ERROR, "xc_dom_kernel_file failed");
922 goto out;
923 }
924 }
925 } else {
926 /* No shim, so load the kernel directly */
927 if (state->pv_kernel.mapped) {
928 rc = xc_dom_kernel_mem(dom, state->pv_kernel.data,
929 state->pv_kernel.size);
930 if (rc) {
931 LOGE(ERROR, "xc_dom_kernel_mem failed");
932 goto out;
933 }
934 } else {
935 rc = xc_dom_kernel_file(dom, state->pv_kernel.path);
936 if (rc) {
937 LOGE(ERROR, "xc_dom_kernel_file failed");
938 goto out;
939 }
940 }
941 }
942
943 if (state->pv_ramdisk.path && strlen(state->pv_ramdisk.path)) {
944 if (state->pv_ramdisk.mapped) {
945 rc = xc_dom_module_mem(dom, state->pv_ramdisk.data,
946 state->pv_ramdisk.size, NULL);
947 if (rc) {
948 LOGE(ERROR, "xc_dom_ramdisk_mem failed");
949 goto out;
950 }
951 } else {
952 rc = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL);
953 if (rc) {
954 LOGE(ERROR, "xc_dom_ramdisk_file failed");
955 goto out;
956 }
957 }
958 }
959 } else {
960 /*
961 * Only HVM guests should get here, PVH should always have a set
962 * kernel at this point.
963 */
964 assert(info->type == LIBXL_DOMAIN_TYPE_HVM);
965 rc = xc_dom_kernel_file(dom, libxl__abs_path(gc, firmware,
966 libxl__xenfirmwaredir_path()));
967 }
968
969 if (rc != 0) {
970 LOGE(ERROR, "xc_dom_{kernel_file/ramdisk_file} failed");
971 goto out;
972 }
973
974 if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
975 info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) {
976 if (info->u.hvm.system_firmware) {
977 bios_filename = info->u.hvm.system_firmware;
978 } else {
979 switch (info->u.hvm.bios) {
980 case LIBXL_BIOS_TYPE_SEABIOS:
981 bios_filename = libxl__seabios_path();
982 break;
983 case LIBXL_BIOS_TYPE_OVMF:
984 bios_filename = libxl__ovmf_path();
985 break;
986 case LIBXL_BIOS_TYPE_ROMBIOS:
987 default:
988 abort();
989 }
990 }
991 }
992
993 if (bios_filename) {
994 rc = libxl__load_hvm_firmware_module(gc, bios_filename, "BIOS",
995 &dom->system_firmware_module);
996 if (rc) goto out;
997 }
998
999 if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1000 info->u.hvm.bios == LIBXL_BIOS_TYPE_ROMBIOS &&
1001 libxl__ipxe_path()) {
1002 const char *fp = libxl__ipxe_path();
1003 rc = xc_dom_module_file(dom, fp, "ipxe");
1004
1005 if (rc) {
1006 LOGE(ERROR, "failed to load IPXE %s (%d)", fp, rc);
1007 rc = ERROR_FAIL;
1008 goto out;
1009 }
1010 }
1011
1012 if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1013 info->u.hvm.smbios_firmware) {
1014 data = NULL;
1015 e = libxl_read_file_contents(ctx, info->u.hvm.smbios_firmware,
1016 &data, &datalen);
1017 if (e) {
1018 LOGEV(ERROR, e, "failed to read SMBIOS firmware file %s",
1019 info->u.hvm.smbios_firmware);
1020 rc = ERROR_FAIL;
1021 goto out;
1022 }
1023 libxl__ptr_add(gc, data);
1024 if (datalen) {
1025 /* Only accept non-empty files */
1026 dom->smbios_module.data = data;
1027 dom->smbios_module.length = (uint32_t)datalen;
1028 }
1029 }
1030
1031 if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1032 info->u.hvm.acpi_firmware) {
1033 data = NULL;
1034 e = libxl_read_file_contents(ctx, info->u.hvm.acpi_firmware,
1035 &data, &datalen);
1036 if (e) {
1037 LOGEV(ERROR, e, "failed to read ACPI firmware file %s",
1038 info->u.hvm.acpi_firmware);
1039 rc = ERROR_FAIL;
1040 goto out;
1041 }
1042 libxl__ptr_add(gc, data);
1043 if (datalen) {
1044 /* Only accept a non-empty file */
1045 dom->acpi_modules[0].data = data;
1046 dom->acpi_modules[0].length = (uint32_t)datalen;
1047 }
1048 }
1049
1050 return 0;
1051 out:
1052 assert(rc != 0);
1053 return rc;
1054 }
1055
libxl__build_hvm(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)1056 int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
1057 libxl_domain_config *d_config,
1058 libxl__domain_build_state *state)
1059 {
1060 libxl_ctx *ctx = libxl__gc_owner(gc);
1061 int rc;
1062 uint64_t mmio_start, lowmem_end, highmem_end, mem_size;
1063 libxl_domain_build_info *const info = &d_config->b_info;
1064 struct xc_dom_image *dom = NULL;
1065 bool device_model = info->type == LIBXL_DOMAIN_TYPE_HVM ? true : false;
1066
1067 xc_dom_loginit(ctx->xch);
1068
1069 /*
1070 * If PVH and we have a shim override, use the shim cmdline.
1071 * If PVH and no shim override, use the pv cmdline.
1072 * If not PVH, use info->cmdline.
1073 */
1074 dom = xc_dom_allocate(ctx->xch, info->type == LIBXL_DOMAIN_TYPE_PVH ?
1075 (state->shim_path ? state->shim_cmdline : state->pv_cmdline) :
1076 info->cmdline, NULL);
1077 if (!dom) {
1078 LOGE(ERROR, "xc_dom_allocate failed");
1079 rc = ERROR_NOMEM;
1080 goto out;
1081 }
1082
1083 dom->container_type = XC_DOM_HVM_CONTAINER;
1084
1085 /* The params from the configuration file are in Mb, which are then
1086 * multiplied by 1 Kb. This was then divided off when calling
1087 * the old xc_hvm_build_target_mem() which then turned them to bytes.
1088 * Do all this in one step here...
1089 */
1090 mem_size = (uint64_t)(info->max_memkb - info->video_memkb) << 10;
1091 dom->target_pages = (uint64_t)(info->target_memkb - info->video_memkb) >> 2;
1092 dom->claim_enabled = libxl_defbool_val(info->claim_mode);
1093 if (info->u.hvm.mmio_hole_memkb) {
1094 uint64_t max_ram_below_4g = (1ULL << 32) -
1095 (info->u.hvm.mmio_hole_memkb << 10);
1096
1097 if (max_ram_below_4g < HVM_BELOW_4G_MMIO_START)
1098 dom->mmio_size = info->u.hvm.mmio_hole_memkb << 10;
1099 }
1100
1101 rc = libxl__domain_firmware(gc, info, state, dom);
1102 if (rc != 0) {
1103 LOG(ERROR, "initializing domain firmware failed");
1104 goto out;
1105 }
1106
1107 if (dom->target_pages == 0)
1108 dom->target_pages = mem_size >> XC_PAGE_SHIFT;
1109 if (dom->mmio_size == 0 && device_model)
1110 dom->mmio_size = HVM_BELOW_4G_MMIO_LENGTH;
1111 else if (dom->mmio_size == 0 && !device_model) {
1112 #if defined(__i386__) || defined(__x86_64__)
1113 /*
1114 * Make sure the local APIC page, the ACPI tables and the special pages
1115 * are inside the MMIO hole.
1116 */
1117 xen_paddr_t start =
1118 (X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES) <<
1119 XC_PAGE_SHIFT;
1120
1121 start = min_t(xen_paddr_t, start, LAPIC_BASE_ADDRESS);
1122 start = min_t(xen_paddr_t, start, ACPI_INFO_PHYSICAL_ADDRESS);
1123 dom->mmio_size = GB(4) - start;
1124 #else
1125 assert(1);
1126 #endif
1127 }
1128 lowmem_end = mem_size;
1129 highmem_end = 0;
1130 mmio_start = (1ull << 32) - dom->mmio_size;
1131 if (lowmem_end > mmio_start)
1132 {
1133 highmem_end = (1ull << 32) + (lowmem_end - mmio_start);
1134 lowmem_end = mmio_start;
1135 }
1136 dom->lowmem_end = lowmem_end;
1137 dom->highmem_end = highmem_end;
1138 dom->mmio_start = mmio_start;
1139 dom->vga_hole_size = device_model ? LIBXL_VGA_HOLE_SIZE : 0;
1140 dom->device_model = device_model;
1141 dom->max_vcpus = info->max_vcpus;
1142 dom->console_evtchn = state->console_port;
1143 dom->console_domid = state->console_domid;
1144 dom->xenstore_evtchn = state->store_port;
1145 dom->xenstore_domid = state->store_domid;
1146
1147 rc = libxl__domain_device_construct_rdm(gc, d_config,
1148 info->u.hvm.rdm_mem_boundary_memkb*1024,
1149 dom);
1150 if (rc) {
1151 LOG(ERROR, "checking reserved device memory failed");
1152 goto out;
1153 }
1154
1155 if (info->num_vnuma_nodes != 0) {
1156 int i;
1157
1158 rc = libxl__vnuma_build_vmemrange_hvm(gc, domid, info, state, dom);
1159 if (rc != 0) {
1160 LOG(ERROR, "hvm build vmemranges failed");
1161 goto out;
1162 }
1163 rc = libxl__vnuma_config_check(gc, info, state);
1164 if (rc != 0) goto out;
1165 rc = set_vnuma_info(gc, domid, info, state);
1166 if (rc != 0) goto out;
1167
1168 dom->nr_vmemranges = state->num_vmemranges;
1169 dom->vmemranges = libxl__malloc(gc, sizeof(*dom->vmemranges) *
1170 dom->nr_vmemranges);
1171
1172 for (i = 0; i < dom->nr_vmemranges; i++) {
1173 dom->vmemranges[i].start = state->vmemranges[i].start;
1174 dom->vmemranges[i].end = state->vmemranges[i].end;
1175 dom->vmemranges[i].flags = state->vmemranges[i].flags;
1176 dom->vmemranges[i].nid = state->vmemranges[i].nid;
1177 }
1178
1179 dom->nr_vnodes = info->num_vnuma_nodes;
1180 dom->vnode_to_pnode = libxl__malloc(gc, sizeof(*dom->vnode_to_pnode) *
1181 dom->nr_vnodes);
1182 for (i = 0; i < dom->nr_vnodes; i++)
1183 dom->vnode_to_pnode[i] = info->vnuma_nodes[i].pnode;
1184 }
1185
1186 rc = libxl__build_dom(gc, domid, d_config, state, dom);
1187 if (rc != 0)
1188 goto out;
1189
1190 rc = hvm_build_set_params(ctx->xch, domid, info);
1191 if (rc != 0) {
1192 LOG(ERROR, "hvm build set params failed");
1193 goto out;
1194 }
1195
1196 state->console_mfn = dom->console_pfn;
1197 state->store_mfn = dom->xenstore_pfn;
1198 state->vuart_gfn = dom->vuart_gfn;
1199
1200 rc = hvm_build_set_xs_values(gc, domid, dom, info);
1201 if (rc != 0) {
1202 LOG(ERROR, "hvm build set xenstore values failed");
1203 goto out;
1204 }
1205
1206 xc_dom_release(dom);
1207 return 0;
1208
1209 out:
1210 assert(rc != 0);
1211 if (dom != NULL) xc_dom_release(dom);
1212 return rc;
1213 }
1214
libxl__qemu_traditional_cmd(libxl__gc * gc,uint32_t domid,const char * cmd)1215 int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid,
1216 const char *cmd)
1217 {
1218 char *path = NULL;
1219 uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
1220 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/command");
1221 return libxl__xs_printf(gc, XBT_NULL, path, "%s", cmd);
1222 }
1223
1224 /*==================== Miscellaneous ====================*/
1225
libxl__uuid2string(libxl__gc * gc,const libxl_uuid uuid)1226 char *libxl__uuid2string(libxl__gc *gc, const libxl_uuid uuid)
1227 {
1228 return GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(uuid));
1229 }
1230
libxl__userdata_path(libxl__gc * gc,uint32_t domid,const char * userdata_userid,const char * wh)1231 const char *libxl__userdata_path(libxl__gc *gc, uint32_t domid,
1232 const char *userdata_userid,
1233 const char *wh)
1234 {
1235 libxl_ctx *ctx = libxl__gc_owner(gc);
1236 char *uuid_string, *path;
1237 libxl_dominfo info;
1238 int rc;
1239
1240 libxl_dominfo_init(&info);
1241
1242 rc = libxl_domain_info(ctx, &info, domid);
1243 if (rc) {
1244 LOGE(ERROR, "unable to find domain info for domain %"PRIu32, domid);
1245 path = NULL;
1246 goto out;
1247 }
1248 uuid_string = GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(info.uuid));
1249 path = GCSPRINTF(XEN_LIB_DIR "/userdata-%s.%u.%s.%s",
1250 wh, domid, uuid_string, userdata_userid);
1251
1252 out:
1253 libxl_dominfo_dispose(&info);
1254 return path;
1255 }
1256
userdata_delete(libxl__gc * gc,const char * path)1257 static int userdata_delete(libxl__gc *gc, const char *path)
1258 {
1259 int r;
1260 r = unlink(path);
1261 if (r) {
1262 LOGE(ERROR, "remove failed for %s", path);
1263 return errno;
1264 }
1265 return 0;
1266 }
1267
libxl__userdata_destroyall(libxl__gc * gc,uint32_t domid)1268 void libxl__userdata_destroyall(libxl__gc *gc, uint32_t domid)
1269 {
1270 const char *pattern;
1271 glob_t gl;
1272 int r, i;
1273
1274 pattern = libxl__userdata_path(gc, domid, "*", "?");
1275 if (!pattern)
1276 goto out;
1277
1278 gl.gl_pathc = 0;
1279 gl.gl_pathv = 0;
1280 gl.gl_offs = 0;
1281 r = glob(pattern, GLOB_ERR|GLOB_NOSORT|GLOB_MARK, 0, &gl);
1282 if (r == GLOB_NOMATCH)
1283 goto out;
1284 if (r)
1285 LOGE(ERROR, "glob failed for %s", pattern);
1286
1287 /* Note: don't delete domain-userdata-lock, it will be handled by
1288 * unlock function.
1289 */
1290 for (i=0; i<gl.gl_pathc; i++) {
1291 if (!strstr(gl.gl_pathv[i], "domain-userdata-lock"))
1292 userdata_delete(gc, gl.gl_pathv[i]);
1293 }
1294 globfree(&gl);
1295 out:
1296 return;
1297 }
1298
libxl__userdata_store(libxl__gc * gc,uint32_t domid,const char * userdata_userid,const uint8_t * data,int datalen)1299 int libxl__userdata_store(libxl__gc *gc, uint32_t domid,
1300 const char *userdata_userid,
1301 const uint8_t *data, int datalen)
1302 {
1303 const char *filename;
1304 const char *newfilename;
1305 int e, rc;
1306 int fd = -1;
1307
1308 filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1309 if (!filename) {
1310 rc = ERROR_NOMEM;
1311 goto out;
1312 }
1313
1314 if (!datalen) {
1315 rc = userdata_delete(gc, filename);
1316 goto out;
1317 }
1318
1319 newfilename = libxl__userdata_path(gc, domid, userdata_userid, "n");
1320 if (!newfilename) {
1321 rc = ERROR_NOMEM;
1322 goto out;
1323 }
1324
1325 rc = ERROR_FAIL;
1326
1327 fd = open(newfilename, O_RDWR | O_CREAT | O_TRUNC, 0600);
1328 if (fd < 0)
1329 goto err;
1330
1331 if (libxl_write_exactly(CTX, fd, data, datalen, "userdata", newfilename))
1332 goto err;
1333
1334 if (close(fd) < 0) {
1335 fd = -1;
1336 goto err;
1337 }
1338 fd = -1;
1339
1340 if (rename(newfilename, filename))
1341 goto err;
1342
1343 rc = 0;
1344
1345 err:
1346 if (fd >= 0) {
1347 e = errno;
1348 close(fd);
1349 errno = e;
1350 }
1351
1352 if (rc)
1353 LOGE(ERROR, "cannot write/rename %s for %s", newfilename, filename);
1354 out:
1355 return rc;
1356 }
1357
libxl_userdata_store(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid,const uint8_t * data,int datalen)1358 int libxl_userdata_store(libxl_ctx *ctx, uint32_t domid,
1359 const char *userdata_userid,
1360 const uint8_t *data, int datalen)
1361 {
1362 GC_INIT(ctx);
1363 int rc;
1364 libxl__flock *lock;
1365
1366 CTX_LOCK;
1367 lock = libxl__lock_domain_userdata(gc, domid);
1368 if (!lock) {
1369 rc = ERROR_LOCK_FAIL;
1370 goto out;
1371 }
1372
1373 rc = libxl__userdata_store(gc, domid, userdata_userid,
1374 data, datalen);
1375
1376 libxl__unlock_file(lock);
1377
1378 out:
1379 CTX_UNLOCK;
1380 GC_FREE;
1381 return rc;
1382 }
1383
libxl__userdata_retrieve(libxl__gc * gc,uint32_t domid,const char * userdata_userid,uint8_t ** data_r,int * datalen_r)1384 int libxl__userdata_retrieve(libxl__gc *gc, uint32_t domid,
1385 const char *userdata_userid,
1386 uint8_t **data_r, int *datalen_r)
1387 {
1388 const char *filename;
1389 int e, rc;
1390 int datalen = 0;
1391 void *data = 0;
1392
1393 filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1394 if (!filename) {
1395 rc = ERROR_NOMEM;
1396 goto out;
1397 }
1398
1399 e = libxl_read_file_contents(CTX, filename, data_r ? &data : 0, &datalen);
1400 if (e && errno != ENOENT) {
1401 rc = ERROR_FAIL;
1402 goto out;
1403 }
1404 if (!e && !datalen) {
1405 LOG(ERROR, "userdata file %s is empty", filename);
1406 if (data_r) assert(!*data_r);
1407 rc = ERROR_FAIL;
1408 goto out;
1409 }
1410
1411 if (data_r) *data_r = data;
1412 if (datalen_r) *datalen_r = datalen;
1413 rc = 0;
1414
1415 out:
1416 return rc;
1417 }
1418
libxl_userdata_retrieve(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid,uint8_t ** data_r,int * datalen_r)1419 int libxl_userdata_retrieve(libxl_ctx *ctx, uint32_t domid,
1420 const char *userdata_userid,
1421 uint8_t **data_r, int *datalen_r)
1422 {
1423 GC_INIT(ctx);
1424 int rc;
1425 libxl__flock *lock;
1426
1427 CTX_LOCK;
1428 lock = libxl__lock_domain_userdata(gc, domid);
1429 if (!lock) {
1430 rc = ERROR_LOCK_FAIL;
1431 goto out;
1432 }
1433
1434 rc = libxl__userdata_retrieve(gc, domid, userdata_userid,
1435 data_r, datalen_r);
1436
1437
1438 libxl__unlock_file(lock);
1439 out:
1440 CTX_UNLOCK;
1441 GC_FREE;
1442 return rc;
1443 }
1444
libxl_userdata_unlink(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid)1445 int libxl_userdata_unlink(libxl_ctx *ctx, uint32_t domid,
1446 const char *userdata_userid)
1447 {
1448 GC_INIT(ctx);
1449 CTX_LOCK;
1450
1451 int rc;
1452 libxl__flock *lock = NULL;
1453 const char *filename;
1454
1455 lock = libxl__lock_domain_userdata(gc, domid);
1456 if (!lock) {
1457 rc = ERROR_LOCK_FAIL;
1458 goto out;
1459 }
1460
1461 filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1462 if (!filename) {
1463 rc = ERROR_FAIL;
1464 goto out;
1465 }
1466 if (unlink(filename)) {
1467 LOGE(ERROR, "error deleting userdata file: %s", filename);
1468 rc = ERROR_FAIL;
1469 goto out;
1470 }
1471
1472 rc = 0;
1473 out:
1474 if (lock)
1475 libxl__unlock_file(lock);
1476 CTX_UNLOCK;
1477 GC_FREE;
1478 return rc;
1479 }
1480
libxl__domain_set_paging_mempool_size(libxl__gc * gc,libxl_domain_config * d_config,uint32_t domid)1481 int libxl__domain_set_paging_mempool_size(
1482 libxl__gc *gc, libxl_domain_config *d_config, uint32_t domid)
1483 {
1484 uint64_t shadow_mem;
1485
1486 shadow_mem = d_config->b_info.shadow_memkb;
1487 shadow_mem <<= 10;
1488
1489 if ((shadow_mem >> 10) != d_config->b_info.shadow_memkb) {
1490 LOGED(ERROR, domid,
1491 "shadow_memkb value %"PRIu64"kB too large",
1492 d_config->b_info.shadow_memkb);
1493 return ERROR_FAIL;
1494 }
1495
1496 int r = xc_set_paging_mempool_size(CTX->xch, domid, shadow_mem);
1497 if (r) {
1498 LOGED(ERROR, domid,
1499 "Failed to set paging mempool size to %"PRIu64"kB",
1500 d_config->b_info.shadow_memkb);
1501 return ERROR_FAIL;
1502 }
1503
1504 return 0;
1505 }
1506
1507 /*
1508 * Local variables:
1509 * mode: C
1510 * c-basic-offset: 4
1511 * indent-tabs-mode: nil
1512 * End:
1513 */
1514