1 /*
2  * Copyright (C) 2009      Citrix Ltd.
3  * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published
7  * by the Free Software Foundation; version 2.1 only. with the special
8  * exception on linking described in file LICENSE.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  */
15 
16 #include "libxl_osdeps.h" /* must come before any other headers */
17 
18 #include <glob.h>
19 
20 #include "libxl_internal.h"
21 #include "libxl_arch.h"
22 
23 #include <xen/hvm/hvm_info_table.h>
24 #include <xen/hvm/hvm_xs_strings.h>
25 #include <xen/hvm/e820.h>
26 
27 //#define DEBUG 1
28 
libxl__domain_type(libxl__gc * gc,uint32_t domid)29 libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid)
30 {
31     libxl_ctx *ctx = libxl__gc_owner(gc);
32     xc_domaininfo_t info;
33     int ret;
34 
35     ret = xc_domain_getinfo_single(ctx->xch, domid, &info);
36     if (ret < 0) {
37         LOGED(ERROR, domid, "unable to get dominfo");
38         return LIBXL_DOMAIN_TYPE_INVALID;
39     }
40     if (info.flags & XEN_DOMINF_hvm_guest) {
41         const char *type_path = GCSPRINTF("%s/type",
42                                           libxl__xs_libxl_path(gc, domid));
43         const char *type;
44         libxl_domain_type t;
45         int rc;
46 
47         rc = libxl__xs_read_mandatory(gc, XBT_NULL, type_path, &type);
48         if (rc) {
49             LOG(WARN,
50             "unable to get domain type for domid=%"PRIu32", assuming HVM",
51                 domid);
52             return LIBXL_DOMAIN_TYPE_HVM;
53         }
54 
55         rc = libxl_domain_type_from_string(type, &t);
56         if (rc) {
57             LOG(WARN,
58             "unable to get domain type for domid=%"PRIu32", assuming HVM",
59                 domid);
60             return LIBXL_DOMAIN_TYPE_HVM;
61         }
62 
63         return t;
64     } else
65         return LIBXL_DOMAIN_TYPE_PV;
66 }
67 
libxl__domain_cpupool(libxl__gc * gc,uint32_t domid)68 int libxl__domain_cpupool(libxl__gc *gc, uint32_t domid)
69 {
70     xc_domaininfo_t info;
71     int ret;
72 
73     ret = xc_domain_getinfo_single(CTX->xch, domid, &info);
74     if (ret < 0)
75     {
76         LOGED(ERROR, domid, "get domaininfo failed");
77         return ERROR_FAIL;
78     }
79     return info.cpupool;
80 }
81 
libxl__domain_scheduler(libxl__gc * gc,uint32_t domid)82 libxl_scheduler libxl__domain_scheduler(libxl__gc *gc, uint32_t domid)
83 {
84     int cpupool = libxl__domain_cpupool(gc, domid);
85     libxl_cpupoolinfo poolinfo;
86     libxl_scheduler sched = LIBXL_SCHEDULER_UNKNOWN;
87     int rc;
88 
89     if (cpupool < 0)
90         return sched;
91 
92     libxl_cpupoolinfo_init(&poolinfo);
93     rc = libxl_cpupool_info(CTX, &poolinfo, cpupool);
94     if (rc < 0)
95         goto out;
96 
97     sched = poolinfo.sched;
98 
99 out:
100     libxl_cpupoolinfo_dispose(&poolinfo);
101     return sched;
102 }
103 
104 /*
105  * Two NUMA placement candidates are compared by means of the following
106  * heuristics:
107 
108  *  - the number of vcpus runnable on the candidates is considered, and
109  *    candidates with fewer of them are preferred. If two candidate have
110  *    the same number of runnable vcpus,
111  *  - the amount of free memory in the candidates is considered, and the
112  *    candidate with greater amount of it is preferred.
113  *
114  * In fact, leaving larger memory holes, maximizes the probability of being
115  * able to put other domains on the node. That hopefully means many domains
116  * will benefit from local memory accesses, but also introduces the risk of
117  * overloading large (from a memory POV) nodes. That's right the effect
118  * that counting the vcpus able to run on the nodes tries to prevent.
119  *
120  * Note that this completely ignore the number of nodes each candidate span,
121  * as the fact that fewer nodes is better is already accounted for in the
122  * algorithm.
123  */
numa_cmpf(const libxl__numa_candidate * c1,const libxl__numa_candidate * c2)124 static int numa_cmpf(const libxl__numa_candidate *c1,
125                      const libxl__numa_candidate *c2)
126 {
127     if (c1->nr_vcpus != c2->nr_vcpus)
128         return c1->nr_vcpus - c2->nr_vcpus;
129 
130     return c2->free_memkb - c1->free_memkb;
131 }
132 
133 /* The actual automatic NUMA placement routine */
numa_place_domain(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config)134 static int numa_place_domain(libxl__gc *gc, uint32_t domid,
135                              libxl_domain_config *d_config)
136 {
137     libxl_domain_build_info *info = &d_config->b_info;
138     int found;
139     libxl__numa_candidate candidate;
140     libxl_bitmap cpumap, cpupool_nodemap, *map;
141     libxl_cpupoolinfo cpupool_info;
142     int i, cpupool, rc = 0;
143     uint64_t memkb;
144 
145     libxl__numa_candidate_init(&candidate);
146     libxl_bitmap_init(&cpumap);
147     libxl_bitmap_init(&cpupool_nodemap);
148     libxl_cpupoolinfo_init(&cpupool_info);
149 
150     /*
151      * Extract the cpumap from the cpupool the domain belong to. In fact,
152      * it only makes sense to consider the cpus/nodes that are in there
153      * for placement.
154      */
155     rc = cpupool = libxl__domain_cpupool(gc, domid);
156     if (rc < 0)
157         goto out;
158     rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool);
159     if (rc)
160         goto out;
161     map = &cpupool_info.cpumap;
162 
163     /*
164      * If there's a well defined hard affinity mask (i.e., the same one for all
165      * the vcpus), we can try to run the placement considering only the pcpus
166      * within such mask.
167      */
168     if (info->num_vcpu_hard_affinity)
169     {
170 #ifdef DEBUG
171         int j;
172 
173         for (j = 0; j < info->num_vcpu_hard_affinity; j++)
174             assert(libxl_bitmap_equal(&info->vcpu_hard_affinity[0],
175                                       &info->vcpu_hard_affinity[j], 0));
176 #endif /* DEBUG */
177 
178         rc = libxl_bitmap_and(CTX, &cpumap, &info->vcpu_hard_affinity[0],
179                               &cpupool_info.cpumap);
180         if (rc)
181             goto out;
182 
183         /* Hard affinity must contain at least one cpu of our cpupool */
184         if (libxl_bitmap_is_empty(&cpumap)) {
185             LOG(ERROR, "Hard affinity completely outside of domain's cpupool!");
186             rc = ERROR_INVAL;
187             goto out;
188         }
189     }
190 
191     rc = libxl__domain_need_memory_calculate(gc, info, &memkb);
192     if (rc)
193         goto out;
194     if (libxl_node_bitmap_alloc(CTX, &cpupool_nodemap, 0)) {
195         rc = ERROR_FAIL;
196         goto out;
197     }
198 
199     /* Find the best candidate with enough free memory and at least
200      * as much pcpus as the domain has vcpus.  */
201     rc = libxl__get_numa_candidate(gc, memkb, info->max_vcpus,
202                                    0, 0, map, numa_cmpf, &candidate, &found);
203     if (rc)
204         goto out;
205 
206     /* Not even a suitable placement candidate! Let's just don't touch the
207      * domain's info->cpumap. It will have affinity with all nodes/cpus. */
208     if (found == 0)
209         goto out;
210 
211     /* Map the candidate's node map to the domain's info->nodemap */
212     libxl__numa_candidate_get_nodemap(gc, &candidate, &info->nodemap);
213 
214     /* Avoid trying to set the affinity to nodes that might be in the
215      * candidate's nodemap but out of our cpupool. */
216     rc = libxl_cpumap_to_nodemap(CTX, &cpupool_info.cpumap,
217                                  &cpupool_nodemap);
218     if (rc)
219         goto out;
220 
221     libxl_for_each_set_bit(i, info->nodemap) {
222         if (!libxl_bitmap_test(&cpupool_nodemap, i))
223             libxl_bitmap_reset(&info->nodemap, i);
224     }
225 
226     LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
227                 "%"PRIu64" KB free selected", candidate.nr_nodes,
228                 candidate.nr_cpus, candidate.free_memkb / 1024);
229 
230  out:
231     libxl__numa_candidate_dispose(&candidate);
232     libxl_bitmap_dispose(&cpupool_nodemap);
233     libxl_bitmap_dispose(&cpumap);
234     libxl_cpupoolinfo_dispose(&cpupool_info);
235     return rc;
236 }
237 
libxl__build_pre(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)238 int libxl__build_pre(libxl__gc *gc, uint32_t domid,
239               libxl_domain_config *d_config, libxl__domain_build_state *state)
240 {
241     libxl_domain_build_info *const info = &d_config->b_info;
242     libxl_ctx *ctx = libxl__gc_owner(gc);
243     char *xs_domid, *con_domid;
244     int rc;
245     uint64_t size;
246 
247     if (xc_domain_max_vcpus(ctx->xch, domid, info->max_vcpus) != 0) {
248         LOG(ERROR, "Couldn't set max vcpu count");
249         return ERROR_FAIL;
250     }
251 
252     /*
253      * Check if the domain has any CPU or node affinity already. If not, try
254      * to build up the latter via automatic NUMA placement. In fact, in case
255      * numa_place_domain() manage to find a placement, in info->nodemap is
256      * updated accordingly; if it does not manage, info->nodemap is just left
257      * alone. It is then the the subsequent call to
258      * libxl_domain_set_nodeaffinity() that enacts the actual placement.
259      *
260      * As far as scheduling is concerned, we achieve NUMA-aware scheduling
261      * by having the results of placement affect the soft affinity of all
262      * the vcpus of the domain. Of course, we want that iff placement is
263      * enabled and actually happens, so we only change info->cpumap_soft to
264      * reflect the placement result if that is the case
265      */
266     if (libxl_defbool_val(info->numa_placement)) {
267         if (info->cpumap.size || info->num_vcpu_soft_affinity)
268             LOG(WARN, "Can't run NUMA placement, as a soft "
269                       "affinity has been specified explicitly");
270         else if (info->nodemap.size)
271             LOG(WARN, "Can't run NUMA placement, as the domain has "
272                       "NUMA node affinity set already");
273         else {
274             libxl_bitmap cpumap_soft;
275 
276             rc = libxl_node_bitmap_alloc(ctx, &info->nodemap, 0);
277             if (rc)
278                 return rc;
279             libxl_bitmap_set_any(&info->nodemap);
280 
281             rc = libxl_cpu_bitmap_alloc(ctx, &cpumap_soft, 0);
282             if (rc)
283                 return rc;
284 
285             rc = numa_place_domain(gc, domid, d_config);
286             if (rc) {
287                 libxl_bitmap_dispose(&cpumap_soft);
288                 return rc;
289             }
290 
291             /*
292              * All we need to do now is converting the result of automatic
293              * placement from nodemap to cpumap, and then use such cpumap
294              * as the soft affinity for all the vcpus of the domain.
295              *
296              * When calling libxl_set_vcpuaffinity_all(), it is ok to use
297              * NULL as hard affinity, as we know we don't have one, or we
298              * won't be here.
299              */
300             libxl_nodemap_to_cpumap(ctx, &info->nodemap, &cpumap_soft);
301             libxl_set_vcpuaffinity_all(ctx, domid, info->max_vcpus,
302                                        NULL, &cpumap_soft);
303 
304             libxl_bitmap_dispose(&cpumap_soft);
305 
306             /*
307              * Placement has run, so avoid for it to be re-run, if this
308              * same config we are using and building here is ever re-used.
309              * This means that people re-using configs will get the same
310              * results, consistently, across every re-use, which is what
311              * we expect most people to want.
312              */
313             libxl_defbool_set(&info->numa_placement, false);
314         }
315     }
316 
317     if (info->nodemap.size)
318         libxl_domain_set_nodeaffinity(ctx, domid, &info->nodemap);
319 
320     if (info->num_vcpu_hard_affinity || info->num_vcpu_soft_affinity) {
321         libxl_bitmap *hard_affinity, *soft_affinity;
322         int i, n_vcpus;
323 
324         n_vcpus = info->num_vcpu_hard_affinity > info->num_vcpu_soft_affinity ?
325             info->num_vcpu_hard_affinity : info->num_vcpu_soft_affinity;
326 
327         for (i = 0; i < n_vcpus; i++) {
328             /*
329              * Prepare hard and soft affinity pointers in a way that allows
330              * us to issue only one call to libxl_set_vcpuaffinity(), setting,
331              * for each vcpu, both hard and soft affinity "atomically".
332              */
333             hard_affinity = NULL;
334             if (info->num_vcpu_hard_affinity &&
335                 i < info->num_vcpu_hard_affinity)
336                 hard_affinity = &info->vcpu_hard_affinity[i];
337 
338             soft_affinity = NULL;
339             if (info->num_vcpu_soft_affinity &&
340                 i < info->num_vcpu_soft_affinity)
341                 soft_affinity = &info->vcpu_soft_affinity[i];
342 
343             if (libxl_set_vcpuaffinity(ctx, domid, i,
344                                        hard_affinity, soft_affinity)) {
345                 LOG(ERROR, "setting affinity failed on vcpu `%d'", i);
346                 return ERROR_FAIL;
347             }
348         }
349     }
350 
351 
352     rc = libxl__arch_extra_memory(gc, info, &size);
353     if (rc < 0) {
354         LOGE(ERROR, "Couldn't get arch extra constant memory size");
355         return ERROR_FAIL;
356     }
357 
358     if (xc_domain_setmaxmem(ctx->xch, domid, info->target_memkb + size) < 0) {
359         LOGE(ERROR, "Couldn't set max memory");
360         return ERROR_FAIL;
361     }
362 
363     xs_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenstored/domid", NULL);
364     state->store_domid = xs_domid ? atoi(xs_domid) : 0;
365     free(xs_domid);
366 
367     con_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenconsoled/domid", NULL);
368     state->console_domid = con_domid ? atoi(con_domid) : 0;
369     free(con_domid);
370 
371     state->store_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->store_domid);
372     state->console_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->console_domid);
373 
374     rc = libxl__arch_domain_create(gc, d_config, state, domid);
375     if (rc) goto out;
376 
377     /* Construct a CPUID policy, but only for brand new domains.  Domains
378      * being migrated-in/restored have CPUID handled during the
379      * static_data_done() callback. */
380     if (!state->restore && !state->soft_reset)
381         rc = libxl__cpuid_legacy(ctx, domid, false, info);
382 
383 out:
384     return rc;
385 }
386 
set_vnuma_affinity(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info)387 static int set_vnuma_affinity(libxl__gc *gc, uint32_t domid,
388                               libxl_domain_build_info *info)
389 {
390     libxl_bitmap cpumap;
391     libxl_vnode_info *v;
392     unsigned int i, j;
393     int rc = 0;
394 
395     libxl_bitmap_init(&cpumap);
396 
397     rc = libxl_cpu_bitmap_alloc(CTX, &cpumap, 0);
398     if (rc) {
399         LOG(ERROR, "Can't allocate nodemap");
400         goto out;
401     }
402 
403     /*
404      * For each vcpu in each vnode, set its soft affinity to
405      * the pcpus belonging to the pnode the vnode is on
406      */
407     for (i = 0; i < info->num_vnuma_nodes; i++) {
408         v = &info->vnuma_nodes[i];
409 
410         rc = libxl_node_to_cpumap(CTX, v->pnode, &cpumap);
411         if (rc) {
412             LOG(ERROR, "Can't get cpumap for vnode %d", i);
413             goto out;
414         }
415 
416         libxl_for_each_set_bit(j, v->vcpus) {
417             rc = libxl_set_vcpuaffinity(CTX, domid, j, NULL, &cpumap);
418             if (rc) {
419                 LOG(ERROR, "Can't set cpu affinity for %d", j);
420                 goto out;
421             }
422         }
423     }
424 
425 out:
426     libxl_bitmap_dispose(&cpumap);
427     return rc;
428 }
429 
libxl__build_post(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info,libxl__domain_build_state * state,char ** vms_ents,char ** local_ents)430 int libxl__build_post(libxl__gc *gc, uint32_t domid,
431                       libxl_domain_build_info *info,
432                       libxl__domain_build_state *state,
433                       char **vms_ents, char **local_ents)
434 {
435     libxl_ctx *ctx = libxl__gc_owner(gc);
436     char *dom_path, *vm_path;
437     xs_transaction_t t;
438     char **ents;
439     int i, rc;
440 
441     if (info->num_vnuma_nodes && !info->num_vcpu_soft_affinity) {
442         rc = set_vnuma_affinity(gc, domid, info);
443         if (rc)
444             return rc;
445     }
446 
447     rc = libxl_domain_sched_params_set(CTX, domid, &info->sched_params);
448     if (rc)
449         return rc;
450 
451     if (info->type == LIBXL_DOMAIN_TYPE_HVM
452         && !libxl_ms_vm_genid_is_zero(&info->u.hvm.ms_vm_genid)) {
453         rc = libxl__ms_vm_genid_set(gc, domid,
454                                     &info->u.hvm.ms_vm_genid);
455         if (rc) {
456             LOG(ERROR, "Failed to set VM Generation ID");
457             return rc;
458         }
459     }
460 
461     ents = libxl__calloc(gc, 12 + (info->max_vcpus * 2) + 2, sizeof(char *));
462     ents[0] = "memory/static-max";
463     ents[1] = GCSPRINTF("%"PRId64, info->max_memkb);
464     ents[2] = "memory/target";
465     ents[3] = GCSPRINTF("%"PRId64, info->target_memkb -
466                         libxl__get_targetmem_fudge(gc, info));
467     ents[4] = "memory/videoram";
468     ents[5] = GCSPRINTF("%"PRId64, info->video_memkb);
469     ents[6] = "domid";
470     ents[7] = GCSPRINTF("%d", domid);
471     ents[8] = "store/port";
472     ents[9] = GCSPRINTF("%"PRIu32, state->store_port);
473     ents[10] = "store/ring-ref";
474     ents[11] = GCSPRINTF("%lu", state->store_mfn);
475     for (i = 0; i < info->max_vcpus; i++) {
476         ents[12+(i*2)]   = GCSPRINTF("cpu/%d/availability", i);
477         ents[12+(i*2)+1] = libxl_bitmap_test(&info->avail_vcpus, i)
478                             ? "online" : "offline";
479     }
480 
481     dom_path = libxl__xs_get_dompath(gc, domid);
482     if (!dom_path) {
483         return ERROR_FAIL;
484     }
485 
486     vm_path = xs_read(ctx->xsh, XBT_NULL, GCSPRINTF("%s/vm", dom_path), NULL);
487 retry_transaction:
488     t = xs_transaction_start(ctx->xsh);
489 
490     libxl__xs_writev(gc, t, dom_path, ents);
491     libxl__xs_writev(gc, t, dom_path, local_ents);
492     libxl__xs_writev(gc, t, vm_path, vms_ents);
493 
494     if (!xs_transaction_end(ctx->xsh, t, 0))
495         if (errno == EAGAIN)
496             goto retry_transaction;
497     xs_introduce_domain(ctx->xsh, domid, state->store_mfn, state->store_port);
498     free(vm_path);
499     return 0;
500 }
501 
set_vnuma_info(libxl__gc * gc,uint32_t domid,const libxl_domain_build_info * info,const libxl__domain_build_state * state)502 static int set_vnuma_info(libxl__gc *gc, uint32_t domid,
503                           const libxl_domain_build_info *info,
504                           const libxl__domain_build_state *state)
505 {
506     int rc = 0;
507     unsigned int i, nr_vdistance;
508     unsigned int *vcpu_to_vnode, *vnode_to_pnode, *vdistance = NULL;
509 
510     vcpu_to_vnode = libxl__calloc(gc, info->max_vcpus,
511                                   sizeof(unsigned int));
512     vnode_to_pnode = libxl__calloc(gc, info->num_vnuma_nodes,
513                                    sizeof(unsigned int));
514 
515     nr_vdistance = info->num_vnuma_nodes * info->num_vnuma_nodes;
516     vdistance = libxl__calloc(gc, nr_vdistance, sizeof(unsigned int));
517 
518     for (i = 0; i < info->num_vnuma_nodes; i++) {
519         libxl_vnode_info *v = &info->vnuma_nodes[i];
520         int j;
521 
522         /* vnode to pnode mapping */
523         vnode_to_pnode[i] = v->pnode;
524 
525         /* vcpu to vnode mapping */
526         libxl_for_each_set_bit(j, v->vcpus)
527             vcpu_to_vnode[j] = i;
528 
529         /* node distances */
530         assert(info->num_vnuma_nodes == v->num_distances);
531         memcpy(vdistance + (i * info->num_vnuma_nodes),
532                v->distances,
533                v->num_distances * sizeof(unsigned int));
534     }
535 
536     if (xc_domain_setvnuma(CTX->xch, domid, info->num_vnuma_nodes,
537                            state->num_vmemranges, info->max_vcpus,
538                            state->vmemranges, vdistance,
539                            vcpu_to_vnode, vnode_to_pnode) < 0) {
540         LOGE(ERROR, "xc_domain_setvnuma failed");
541         rc = ERROR_FAIL;
542     }
543 
544     return rc;
545 }
546 
libxl__build_dom(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state,struct xc_dom_image * dom)547 static int libxl__build_dom(libxl__gc *gc, uint32_t domid,
548              libxl_domain_config *d_config, libxl__domain_build_state *state,
549              struct xc_dom_image *dom)
550 {
551     libxl_domain_build_info *const info = &d_config->b_info;
552     uint64_t mem_kb;
553     int ret;
554 
555     if ( (ret = xc_dom_boot_xen_init(dom, CTX->xch, domid)) != 0 ) {
556         LOGE(ERROR, "xc_dom_boot_xen_init failed");
557         goto out;
558     }
559 #ifdef GUEST_RAM_BASE
560     if ( (ret = xc_dom_rambase_init(dom, GUEST_RAM_BASE)) != 0 ) {
561         LOGE(ERROR, "xc_dom_rambase failed");
562         goto out;
563     }
564 #endif
565     if ( (ret = xc_dom_parse_image(dom)) != 0 ) {
566         LOG(ERROR, "xc_dom_parse_image failed");
567         goto out;
568     }
569     if ( (ret = libxl__arch_domain_init_hw_description(gc, d_config, state, dom)) != 0 ) {
570         LOGE(ERROR, "libxl__arch_domain_init_hw_description failed");
571         goto out;
572     }
573 
574     mem_kb = dom->container_type == XC_DOM_HVM_CONTAINER ?
575              (info->max_memkb - info->video_memkb) : info->target_memkb;
576     if ( (ret = xc_dom_mem_init(dom, mem_kb / 1024)) != 0 ) {
577         LOGE(ERROR, "xc_dom_mem_init failed");
578         goto out;
579     }
580     if ( (ret = xc_dom_boot_mem_init(dom)) != 0 ) {
581         LOGE(ERROR, "xc_dom_boot_mem_init failed");
582         goto out;
583     }
584     if ( (ret = libxl__arch_domain_finalise_hw_description(gc, domid, d_config, dom)) != 0 ) {
585         LOGE(ERROR, "libxl__arch_domain_finalise_hw_description failed");
586         goto out;
587     }
588     if ( (ret = xc_dom_build_image(dom)) != 0 ) {
589         LOGE(ERROR, "xc_dom_build_image failed");
590         goto out;
591     }
592     if ( (ret = xc_dom_boot_image(dom)) != 0 ) {
593         LOGE(ERROR, "xc_dom_boot_image failed");
594         goto out;
595     }
596     if ( (ret = xc_dom_gnttab_init(dom)) != 0 ) {
597         LOGE(ERROR, "xc_dom_gnttab_init failed");
598         goto out;
599     }
600     if ((ret = libxl__arch_build_dom_finish(gc, info, dom, state)) != 0) {
601         LOGE(ERROR, "libxl__arch_build_dom_finish failed");
602         goto out;
603     }
604 
605 out:
606     return ret != 0 ? ERROR_FAIL : 0;
607 }
608 
libxl__build_pv(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)609 int libxl__build_pv(libxl__gc *gc, uint32_t domid,
610              libxl_domain_config *d_config, libxl__domain_build_state *state)
611 {
612     libxl_ctx *ctx = libxl__gc_owner(gc);
613     libxl_domain_build_info *const info = &d_config->b_info;
614     struct xc_dom_image *dom;
615     int ret;
616     int flags = 0;
617 
618     xc_dom_loginit(ctx->xch);
619 
620     dom = xc_dom_allocate(ctx->xch, state->pv_cmdline, info->u.pv.features);
621     if (!dom) {
622         LOGE(ERROR, "xc_dom_allocate failed");
623         return ERROR_FAIL;
624     }
625 
626     dom->container_type = XC_DOM_PV_CONTAINER;
627 
628     LOG(DEBUG, "pv kernel mapped %d path %s", state->pv_kernel.mapped, state->pv_kernel.path);
629 
630     if (state->pv_kernel.mapped) {
631         ret = xc_dom_kernel_mem(dom,
632                                 state->pv_kernel.data,
633                                 state->pv_kernel.size);
634         if ( ret != 0) {
635             LOGE(ERROR, "xc_dom_kernel_mem failed");
636             goto out;
637         }
638     } else {
639         ret = xc_dom_kernel_file(dom, state->pv_kernel.path);
640         if ( ret != 0) {
641             LOGE(ERROR, "xc_dom_kernel_file failed");
642             goto out;
643         }
644     }
645 
646     if ( state->pv_ramdisk.path && strlen(state->pv_ramdisk.path) ) {
647         if (state->pv_ramdisk.mapped) {
648             if ( (ret = xc_dom_module_mem(dom, state->pv_ramdisk.data, state->pv_ramdisk.size, NULL)) != 0 ) {
649                 LOGE(ERROR, "xc_dom_ramdisk_mem failed");
650                 goto out;
651             }
652         } else {
653             if ( (ret = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL)) != 0 ) {
654                 LOGE(ERROR, "xc_dom_ramdisk_file failed");
655                 goto out;
656             }
657         }
658     }
659 
660     dom->flags = flags;
661     dom->console_evtchn = state->console_port;
662     dom->console_domid = state->console_domid;
663     dom->xenstore_evtchn = state->store_port;
664     dom->xenstore_domid = state->store_domid;
665     dom->claim_enabled = libxl_defbool_val(info->claim_mode);
666     dom->max_vcpus = info->max_vcpus;
667 
668     if (info->num_vnuma_nodes != 0) {
669         unsigned int i;
670 
671         ret = libxl__vnuma_build_vmemrange_pv(gc, domid, info, state);
672         if (ret) {
673             LOGE(ERROR, "cannot build vmemranges");
674             goto out;
675         }
676         ret = libxl__vnuma_config_check(gc, info, state);
677         if (ret) goto out;
678 
679         ret = set_vnuma_info(gc, domid, info, state);
680         if (ret) goto out;
681 
682         dom->nr_vmemranges = state->num_vmemranges;
683         dom->vmemranges = xc_dom_malloc(dom, sizeof(*dom->vmemranges) *
684                                         dom->nr_vmemranges);
685 
686         for (i = 0; i < dom->nr_vmemranges; i++) {
687             dom->vmemranges[i].start = state->vmemranges[i].start;
688             dom->vmemranges[i].end   = state->vmemranges[i].end;
689             dom->vmemranges[i].flags = state->vmemranges[i].flags;
690             dom->vmemranges[i].nid   = state->vmemranges[i].nid;
691         }
692 
693         dom->nr_vnodes = info->num_vnuma_nodes;
694         dom->vnode_to_pnode = xc_dom_malloc(dom, sizeof(*dom->vnode_to_pnode) *
695                                             dom->nr_vnodes);
696         for (i = 0; i < info->num_vnuma_nodes; i++)
697             dom->vnode_to_pnode[i] = info->vnuma_nodes[i].pnode;
698     }
699 
700     ret = libxl__build_dom(gc, domid, d_config, state, dom);
701     if (ret != 0)
702         goto out;
703 
704     if (xc_dom_translated(dom)) {
705         state->console_mfn = dom->console_pfn;
706         state->store_mfn = dom->xenstore_pfn;
707         state->vuart_gfn = dom->vuart_gfn;
708     } else {
709         state->console_mfn = xc_dom_p2m(dom, dom->console_pfn);
710         state->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
711     }
712 
713     ret = 0;
714 out:
715     xc_dom_release(dom);
716     return ret == 0 ? 0 : ERROR_FAIL;
717 }
718 
hvm_build_set_params(xc_interface * handle,uint32_t domid,libxl_domain_build_info * info)719 static int hvm_build_set_params(xc_interface *handle, uint32_t domid,
720                                 libxl_domain_build_info *info)
721 {
722     struct hvm_info_table *va_hvm;
723     uint8_t *va_map, sum;
724     int i;
725 
726     if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
727         va_map = xc_map_foreign_range(handle, domid,
728                                       XC_PAGE_SIZE, PROT_READ | PROT_WRITE,
729                                       HVM_INFO_PFN);
730         if (va_map == NULL)
731             return ERROR_FAIL;
732 
733         va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
734         va_hvm->apic_mode = libxl_defbool_val(info->apic);
735         va_hvm->nr_vcpus = info->max_vcpus;
736         memset(va_hvm->vcpu_online, 0, sizeof(va_hvm->vcpu_online));
737         memcpy(va_hvm->vcpu_online, info->avail_vcpus.map, info->avail_vcpus.size);
738         for (i = 0, sum = 0; i < va_hvm->length; i++)
739             sum += ((uint8_t *) va_hvm)[i];
740         va_hvm->checksum -= sum;
741         munmap(va_map, XC_PAGE_SIZE);
742     }
743 
744     return 0;
745 }
746 
hvm_build_set_xs_values(libxl__gc * gc,uint32_t domid,struct xc_dom_image * dom,const libxl_domain_build_info * info)747 static int hvm_build_set_xs_values(libxl__gc *gc,
748                                    uint32_t domid,
749                                    struct xc_dom_image *dom,
750                                    const libxl_domain_build_info *info)
751 {
752     char *path = NULL;
753     int num_oem = 1;
754     int ret = 0;
755 
756     if (dom->smbios_module.guest_addr_out) {
757         path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_ADDRESS, domid);
758 
759         ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%"PRIx64,
760                                dom->smbios_module.guest_addr_out);
761         if (ret)
762             goto err;
763 
764         path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_LENGTH, domid);
765 
766         ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%x",
767                                dom->smbios_module.length);
768         if (ret)
769             goto err;
770     }
771 
772     for (int i = 0; i < info->u.hvm.num_smbios; i++) {
773         char *p;
774         if (info->u.hvm.smbios[i].key == LIBXL_SMBIOS_TYPE_OEM) {
775             if (num_oem > 99) {
776                 LOGD(ERROR, domid, "More than 99 SMBIOS OEM strings specified");
777                 ret = ERROR_INVAL;
778                 goto err;
779             }
780             path = GCSPRINTF("/local/domain/%d/"HVM_XS_OEM_STRINGS, domid,
781                              num_oem);
782             num_oem++;
783         } else {
784             path = GCSPRINTF("/local/domain/%d/"HVM_XS_BIOS_STRINGS"/%s", domid,
785                        libxl_smbios_type_to_string(info->u.hvm.smbios[i].key));
786         }
787 
788         /* Convert libxl_smbios_type string to xenstore path that hvmloader
789          * will use, as defined by HVM_XS_*. That is convert the '_' to '-'. */
790         p = strrchr(path, '/');
791         for ( ; *p; p++) {
792             if (*p == '_')
793                 *p = '-';
794         }
795 
796         LOGD(DEBUG, domid, "Writing %s = \"%s\"", path,
797              info->u.hvm.smbios[i].value);
798         ret = libxl__xs_printf(gc, XBT_NULL, path, "%s",
799                                info->u.hvm.smbios[i].value);
800         if (ret)
801             goto err;
802     }
803 
804     /* Only one module can be passed. PVHv2 guests do not support this. */
805     if (dom->acpi_modules[0].guest_addr_out &&
806         info->type == LIBXL_DOMAIN_TYPE_HVM) {
807         path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_ADDRESS, domid);
808 
809         ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%"PRIx64,
810                                dom->acpi_modules[0].guest_addr_out);
811         if (ret)
812             goto err;
813 
814         path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_LENGTH, domid);
815 
816         ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%x",
817                                dom->acpi_modules[0].length);
818         if (ret)
819             goto err;
820     }
821 
822     return 0;
823 
824 err:
825     LOG(ERROR, "failed to write firmware xenstore value, err: %d", ret);
826     return ret;
827 }
828 
libxl__load_hvm_firmware_module(libxl__gc * gc,const char * filename,const char * what,struct xc_hvm_firmware_module * m)829 static int libxl__load_hvm_firmware_module(libxl__gc *gc,
830                                            const char *filename,
831                                            const char *what,
832                                            struct xc_hvm_firmware_module *m)
833 {
834     int datalen = 0;
835     void *data = NULL;
836     int r, rc;
837 
838     LOG(DEBUG, "Loading %s: %s", what, filename);
839     r = libxl_read_file_contents(CTX, filename, &data, &datalen);
840     if (r) {
841         /*
842          * Print a message only on ENOENT, other errors are logged by the
843          * function libxl_read_file_contents().
844          */
845         if (r == ENOENT)
846             LOGEV(ERROR, r, "failed to read %s file", what);
847         rc =  ERROR_FAIL;
848         goto out;
849     }
850     libxl__ptr_add(gc, data);
851     if (datalen) {
852         /* Only accept non-empty files */
853         m->data = data;
854         m->length = datalen;
855     } else {
856         LOG(ERROR, "file %s for %s is empty", filename, what);
857         rc = ERROR_INVAL;
858         goto out;
859     }
860     rc = 0;
861 out:
862     return rc;
863 }
864 
libxl__domain_firmware(libxl__gc * gc,libxl_domain_build_info * info,libxl__domain_build_state * state,struct xc_dom_image * dom)865 static int libxl__domain_firmware(libxl__gc *gc,
866                                   libxl_domain_build_info *info,
867                                   libxl__domain_build_state *state,
868                                   struct xc_dom_image *dom)
869 {
870     libxl_ctx *ctx = libxl__gc_owner(gc);
871     const char *firmware = NULL;
872     int e, rc;
873     int datalen = 0;
874     void *data;
875     const char *bios_filename = NULL;
876 
877     if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
878         if (info->u.hvm.firmware) {
879             firmware = info->u.hvm.firmware;
880         } else {
881             switch (info->device_model_version)
882             {
883             case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
884             case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
885                 firmware = "hvmloader";
886                 break;
887             default:
888                 LOG(ERROR, "invalid device model version %d",
889                     info->device_model_version);
890                 rc = ERROR_FAIL;
891                 goto out;
892             }
893         }
894     }
895 
896     if (state->pv_kernel.path != NULL &&
897         info->type == LIBXL_DOMAIN_TYPE_PVH) {
898 
899         if (state->shim_path) {
900             rc = xc_dom_kernel_file(dom, state->shim_path);
901             if (rc) {
902                 LOGE(ERROR, "xc_dom_kernel_file failed");
903                 goto out;
904             }
905 
906             /* We've loaded the shim, so load the kernel as a secondary module */
907             if (state->pv_kernel.mapped) {
908                 LOG(DEBUG, "xc_dom_module_mem, cmdline %s",
909                     state->pv_cmdline);
910                 rc = xc_dom_module_mem(dom, state->pv_kernel.data,
911                                        state->pv_kernel.size, state->pv_cmdline);
912                 if (rc) {
913                     LOGE(ERROR, "xc_dom_kernel_mem failed");
914                     goto out;
915                 }
916             } else {
917                 LOG(DEBUG, "xc_dom_module_file, path %s cmdline %s",
918                     state->pv_kernel.path, state->pv_cmdline);
919                 rc = xc_dom_module_file(dom, state->pv_kernel.path, state->pv_cmdline);
920                 if (rc) {
921                     LOGE(ERROR, "xc_dom_kernel_file failed");
922                     goto out;
923                 }
924             }
925         } else {
926             /* No shim, so load the kernel directly */
927             if (state->pv_kernel.mapped) {
928                 rc = xc_dom_kernel_mem(dom, state->pv_kernel.data,
929                                        state->pv_kernel.size);
930                 if (rc) {
931                     LOGE(ERROR, "xc_dom_kernel_mem failed");
932                     goto out;
933                 }
934             } else {
935                 rc = xc_dom_kernel_file(dom, state->pv_kernel.path);
936                 if (rc) {
937                     LOGE(ERROR, "xc_dom_kernel_file failed");
938                     goto out;
939                 }
940             }
941         }
942 
943         if (state->pv_ramdisk.path && strlen(state->pv_ramdisk.path)) {
944             if (state->pv_ramdisk.mapped) {
945                 rc = xc_dom_module_mem(dom, state->pv_ramdisk.data,
946                                        state->pv_ramdisk.size, NULL);
947                 if (rc) {
948                     LOGE(ERROR, "xc_dom_ramdisk_mem failed");
949                     goto out;
950                 }
951             } else {
952                 rc = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL);
953                 if (rc) {
954                     LOGE(ERROR, "xc_dom_ramdisk_file failed");
955                     goto out;
956                 }
957             }
958         }
959     } else {
960         /*
961          * Only HVM guests should get here, PVH should always have a set
962          * kernel at this point.
963          */
964         assert(info->type == LIBXL_DOMAIN_TYPE_HVM);
965         rc = xc_dom_kernel_file(dom, libxl__abs_path(gc, firmware,
966                                                  libxl__xenfirmwaredir_path()));
967     }
968 
969     if (rc != 0) {
970         LOGE(ERROR, "xc_dom_{kernel_file/ramdisk_file} failed");
971         goto out;
972     }
973 
974     if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
975         info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) {
976         if (info->u.hvm.system_firmware) {
977             bios_filename = info->u.hvm.system_firmware;
978         } else {
979             switch (info->u.hvm.bios) {
980             case LIBXL_BIOS_TYPE_SEABIOS:
981                 bios_filename = libxl__seabios_path();
982                 break;
983             case LIBXL_BIOS_TYPE_OVMF:
984                 bios_filename = libxl__ovmf_path();
985                 break;
986             case LIBXL_BIOS_TYPE_ROMBIOS:
987             default:
988                 abort();
989             }
990         }
991     }
992 
993     if (bios_filename) {
994         rc = libxl__load_hvm_firmware_module(gc, bios_filename, "BIOS",
995                                              &dom->system_firmware_module);
996         if (rc) goto out;
997     }
998 
999     if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1000         info->u.hvm.bios == LIBXL_BIOS_TYPE_ROMBIOS &&
1001         libxl__ipxe_path()) {
1002         const char *fp = libxl__ipxe_path();
1003         rc = xc_dom_module_file(dom, fp, "ipxe");
1004 
1005         if (rc) {
1006             LOGE(ERROR, "failed to load IPXE %s (%d)", fp, rc);
1007             rc = ERROR_FAIL;
1008             goto out;
1009         }
1010     }
1011 
1012     if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1013         info->u.hvm.smbios_firmware) {
1014         data = NULL;
1015         e = libxl_read_file_contents(ctx, info->u.hvm.smbios_firmware,
1016                                      &data, &datalen);
1017         if (e) {
1018             LOGEV(ERROR, e, "failed to read SMBIOS firmware file %s",
1019                 info->u.hvm.smbios_firmware);
1020             rc = ERROR_FAIL;
1021             goto out;
1022         }
1023         libxl__ptr_add(gc, data);
1024         if (datalen) {
1025             /* Only accept non-empty files */
1026             dom->smbios_module.data = data;
1027             dom->smbios_module.length = (uint32_t)datalen;
1028         }
1029     }
1030 
1031     if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1032         info->u.hvm.acpi_firmware) {
1033         data = NULL;
1034         e = libxl_read_file_contents(ctx, info->u.hvm.acpi_firmware,
1035                                      &data, &datalen);
1036         if (e) {
1037             LOGEV(ERROR, e, "failed to read ACPI firmware file %s",
1038                 info->u.hvm.acpi_firmware);
1039             rc = ERROR_FAIL;
1040             goto out;
1041         }
1042         libxl__ptr_add(gc, data);
1043         if (datalen) {
1044             /* Only accept a non-empty file */
1045             dom->acpi_modules[0].data = data;
1046             dom->acpi_modules[0].length = (uint32_t)datalen;
1047         }
1048     }
1049 
1050     return 0;
1051 out:
1052     assert(rc != 0);
1053     return rc;
1054 }
1055 
libxl__build_hvm(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)1056 int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
1057               libxl_domain_config *d_config,
1058               libxl__domain_build_state *state)
1059 {
1060     libxl_ctx *ctx = libxl__gc_owner(gc);
1061     int rc;
1062     uint64_t mmio_start, lowmem_end, highmem_end, mem_size;
1063     libxl_domain_build_info *const info = &d_config->b_info;
1064     struct xc_dom_image *dom = NULL;
1065     bool device_model = info->type == LIBXL_DOMAIN_TYPE_HVM ? true : false;
1066 
1067     xc_dom_loginit(ctx->xch);
1068 
1069     /*
1070      * If PVH and we have a shim override, use the shim cmdline.
1071      * If PVH and no shim override, use the pv cmdline.
1072      * If not PVH, use info->cmdline.
1073      */
1074     dom = xc_dom_allocate(ctx->xch, info->type == LIBXL_DOMAIN_TYPE_PVH ?
1075                           (state->shim_path ? state->shim_cmdline : state->pv_cmdline) :
1076                           info->cmdline, NULL);
1077     if (!dom) {
1078         LOGE(ERROR, "xc_dom_allocate failed");
1079         rc = ERROR_NOMEM;
1080         goto out;
1081     }
1082 
1083     dom->container_type = XC_DOM_HVM_CONTAINER;
1084 
1085     /* The params from the configuration file are in Mb, which are then
1086      * multiplied by 1 Kb. This was then divided off when calling
1087      * the old xc_hvm_build_target_mem() which then turned them to bytes.
1088      * Do all this in one step here...
1089      */
1090     mem_size = (uint64_t)(info->max_memkb - info->video_memkb) << 10;
1091     dom->target_pages = (uint64_t)(info->target_memkb - info->video_memkb) >> 2;
1092     dom->claim_enabled = libxl_defbool_val(info->claim_mode);
1093     if (info->u.hvm.mmio_hole_memkb) {
1094         uint64_t max_ram_below_4g = (1ULL << 32) -
1095             (info->u.hvm.mmio_hole_memkb << 10);
1096 
1097         if (max_ram_below_4g < HVM_BELOW_4G_MMIO_START)
1098             dom->mmio_size = info->u.hvm.mmio_hole_memkb << 10;
1099     }
1100 
1101     rc = libxl__domain_firmware(gc, info, state, dom);
1102     if (rc != 0) {
1103         LOG(ERROR, "initializing domain firmware failed");
1104         goto out;
1105     }
1106 
1107     if (dom->target_pages == 0)
1108         dom->target_pages = mem_size >> XC_PAGE_SHIFT;
1109     if (dom->mmio_size == 0 && device_model)
1110         dom->mmio_size = HVM_BELOW_4G_MMIO_LENGTH;
1111     else if (dom->mmio_size == 0 && !device_model) {
1112 #if defined(__i386__) || defined(__x86_64__)
1113         /*
1114          * Make sure the local APIC page, the ACPI tables and the special pages
1115          * are inside the MMIO hole.
1116          */
1117         xen_paddr_t start =
1118             (X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES) <<
1119             XC_PAGE_SHIFT;
1120 
1121         start = min_t(xen_paddr_t, start, LAPIC_BASE_ADDRESS);
1122         start = min_t(xen_paddr_t, start, ACPI_INFO_PHYSICAL_ADDRESS);
1123         dom->mmio_size = GB(4) - start;
1124 #else
1125         assert(1);
1126 #endif
1127     }
1128     lowmem_end = mem_size;
1129     highmem_end = 0;
1130     mmio_start = (1ull << 32) - dom->mmio_size;
1131     if (lowmem_end > mmio_start)
1132     {
1133         highmem_end = (1ull << 32) + (lowmem_end - mmio_start);
1134         lowmem_end = mmio_start;
1135     }
1136     dom->lowmem_end = lowmem_end;
1137     dom->highmem_end = highmem_end;
1138     dom->mmio_start = mmio_start;
1139     dom->vga_hole_size = device_model ? LIBXL_VGA_HOLE_SIZE : 0;
1140     dom->device_model = device_model;
1141     dom->max_vcpus = info->max_vcpus;
1142     dom->console_evtchn = state->console_port;
1143     dom->console_domid = state->console_domid;
1144     dom->xenstore_evtchn = state->store_port;
1145     dom->xenstore_domid = state->store_domid;
1146 
1147     rc = libxl__domain_device_construct_rdm(gc, d_config,
1148                                             info->u.hvm.rdm_mem_boundary_memkb*1024,
1149                                             dom);
1150     if (rc) {
1151         LOG(ERROR, "checking reserved device memory failed");
1152         goto out;
1153     }
1154 
1155     if (info->num_vnuma_nodes != 0) {
1156         int i;
1157 
1158         rc = libxl__vnuma_build_vmemrange_hvm(gc, domid, info, state, dom);
1159         if (rc != 0) {
1160             LOG(ERROR, "hvm build vmemranges failed");
1161             goto out;
1162         }
1163         rc = libxl__vnuma_config_check(gc, info, state);
1164         if (rc != 0) goto out;
1165         rc = set_vnuma_info(gc, domid, info, state);
1166         if (rc != 0) goto out;
1167 
1168         dom->nr_vmemranges = state->num_vmemranges;
1169         dom->vmemranges = libxl__malloc(gc, sizeof(*dom->vmemranges) *
1170                                         dom->nr_vmemranges);
1171 
1172         for (i = 0; i < dom->nr_vmemranges; i++) {
1173             dom->vmemranges[i].start = state->vmemranges[i].start;
1174             dom->vmemranges[i].end   = state->vmemranges[i].end;
1175             dom->vmemranges[i].flags = state->vmemranges[i].flags;
1176             dom->vmemranges[i].nid   = state->vmemranges[i].nid;
1177         }
1178 
1179         dom->nr_vnodes = info->num_vnuma_nodes;
1180         dom->vnode_to_pnode = libxl__malloc(gc, sizeof(*dom->vnode_to_pnode) *
1181                                             dom->nr_vnodes);
1182         for (i = 0; i < dom->nr_vnodes; i++)
1183             dom->vnode_to_pnode[i] = info->vnuma_nodes[i].pnode;
1184     }
1185 
1186     rc = libxl__build_dom(gc, domid, d_config, state, dom);
1187     if (rc != 0)
1188         goto out;
1189 
1190     rc = hvm_build_set_params(ctx->xch, domid, info);
1191     if (rc != 0) {
1192         LOG(ERROR, "hvm build set params failed");
1193         goto out;
1194     }
1195 
1196     state->console_mfn = dom->console_pfn;
1197     state->store_mfn = dom->xenstore_pfn;
1198     state->vuart_gfn = dom->vuart_gfn;
1199 
1200     rc = hvm_build_set_xs_values(gc, domid, dom, info);
1201     if (rc != 0) {
1202         LOG(ERROR, "hvm build set xenstore values failed");
1203         goto out;
1204     }
1205 
1206     xc_dom_release(dom);
1207     return 0;
1208 
1209 out:
1210     assert(rc != 0);
1211     if (dom != NULL) xc_dom_release(dom);
1212     return rc;
1213 }
1214 
libxl__qemu_traditional_cmd(libxl__gc * gc,uint32_t domid,const char * cmd)1215 int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid,
1216                                 const char *cmd)
1217 {
1218     char *path = NULL;
1219     uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
1220     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/command");
1221     return libxl__xs_printf(gc, XBT_NULL, path, "%s", cmd);
1222 }
1223 
1224 /*==================== Miscellaneous ====================*/
1225 
libxl__uuid2string(libxl__gc * gc,const libxl_uuid uuid)1226 char *libxl__uuid2string(libxl__gc *gc, const libxl_uuid uuid)
1227 {
1228     return GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(uuid));
1229 }
1230 
libxl__userdata_path(libxl__gc * gc,uint32_t domid,const char * userdata_userid,const char * wh)1231 const char *libxl__userdata_path(libxl__gc *gc, uint32_t domid,
1232                                  const char *userdata_userid,
1233                                  const char *wh)
1234 {
1235     libxl_ctx *ctx = libxl__gc_owner(gc);
1236     char *uuid_string, *path;
1237     libxl_dominfo info;
1238     int rc;
1239 
1240     libxl_dominfo_init(&info);
1241 
1242     rc = libxl_domain_info(ctx, &info, domid);
1243     if (rc) {
1244         LOGE(ERROR, "unable to find domain info for domain %"PRIu32, domid);
1245         path = NULL;
1246         goto out;
1247     }
1248     uuid_string = GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(info.uuid));
1249     path = GCSPRINTF(XEN_LIB_DIR "/userdata-%s.%u.%s.%s",
1250                      wh, domid, uuid_string, userdata_userid);
1251 
1252  out:
1253     libxl_dominfo_dispose(&info);
1254     return path;
1255 }
1256 
userdata_delete(libxl__gc * gc,const char * path)1257 static int userdata_delete(libxl__gc *gc, const char *path)
1258 {
1259     int r;
1260     r = unlink(path);
1261     if (r) {
1262         LOGE(ERROR, "remove failed for %s", path);
1263         return errno;
1264     }
1265     return 0;
1266 }
1267 
libxl__userdata_destroyall(libxl__gc * gc,uint32_t domid)1268 void libxl__userdata_destroyall(libxl__gc *gc, uint32_t domid)
1269 {
1270     const char *pattern;
1271     glob_t gl;
1272     int r, i;
1273 
1274     pattern = libxl__userdata_path(gc, domid, "*", "?");
1275     if (!pattern)
1276         goto out;
1277 
1278     gl.gl_pathc = 0;
1279     gl.gl_pathv = 0;
1280     gl.gl_offs = 0;
1281     r = glob(pattern, GLOB_ERR|GLOB_NOSORT|GLOB_MARK, 0, &gl);
1282     if (r == GLOB_NOMATCH)
1283         goto out;
1284     if (r)
1285         LOGE(ERROR, "glob failed for %s", pattern);
1286 
1287     /* Note: don't delete domain-userdata-lock, it will be handled by
1288      * unlock function.
1289      */
1290     for (i=0; i<gl.gl_pathc; i++) {
1291         if (!strstr(gl.gl_pathv[i], "domain-userdata-lock"))
1292             userdata_delete(gc, gl.gl_pathv[i]);
1293     }
1294     globfree(&gl);
1295 out:
1296     return;
1297 }
1298 
libxl__userdata_store(libxl__gc * gc,uint32_t domid,const char * userdata_userid,const uint8_t * data,int datalen)1299 int libxl__userdata_store(libxl__gc *gc, uint32_t domid,
1300                           const char *userdata_userid,
1301                           const uint8_t *data, int datalen)
1302 {
1303     const char *filename;
1304     const char *newfilename;
1305     int e, rc;
1306     int fd = -1;
1307 
1308     filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1309     if (!filename) {
1310         rc = ERROR_NOMEM;
1311         goto out;
1312     }
1313 
1314     if (!datalen) {
1315         rc = userdata_delete(gc, filename);
1316         goto out;
1317     }
1318 
1319     newfilename = libxl__userdata_path(gc, domid, userdata_userid, "n");
1320     if (!newfilename) {
1321         rc = ERROR_NOMEM;
1322         goto out;
1323     }
1324 
1325     rc = ERROR_FAIL;
1326 
1327     fd = open(newfilename, O_RDWR | O_CREAT | O_TRUNC, 0600);
1328     if (fd < 0)
1329         goto err;
1330 
1331     if (libxl_write_exactly(CTX, fd, data, datalen, "userdata", newfilename))
1332         goto err;
1333 
1334     if (close(fd) < 0) {
1335         fd = -1;
1336         goto err;
1337     }
1338     fd = -1;
1339 
1340     if (rename(newfilename, filename))
1341         goto err;
1342 
1343     rc = 0;
1344 
1345 err:
1346     if (fd >= 0) {
1347         e = errno;
1348         close(fd);
1349         errno = e;
1350     }
1351 
1352     if (rc)
1353         LOGE(ERROR, "cannot write/rename %s for %s", newfilename, filename);
1354 out:
1355     return rc;
1356 }
1357 
libxl_userdata_store(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid,const uint8_t * data,int datalen)1358 int libxl_userdata_store(libxl_ctx *ctx, uint32_t domid,
1359                               const char *userdata_userid,
1360                               const uint8_t *data, int datalen)
1361 {
1362     GC_INIT(ctx);
1363     int rc;
1364     libxl__flock *lock;
1365 
1366     CTX_LOCK;
1367     lock = libxl__lock_domain_userdata(gc, domid);
1368     if (!lock) {
1369         rc = ERROR_LOCK_FAIL;
1370         goto out;
1371     }
1372 
1373     rc = libxl__userdata_store(gc, domid, userdata_userid,
1374                                data, datalen);
1375 
1376     libxl__unlock_file(lock);
1377 
1378 out:
1379     CTX_UNLOCK;
1380     GC_FREE;
1381     return rc;
1382 }
1383 
libxl__userdata_retrieve(libxl__gc * gc,uint32_t domid,const char * userdata_userid,uint8_t ** data_r,int * datalen_r)1384 int libxl__userdata_retrieve(libxl__gc *gc, uint32_t domid,
1385                              const char *userdata_userid,
1386                              uint8_t **data_r, int *datalen_r)
1387 {
1388     const char *filename;
1389     int e, rc;
1390     int datalen = 0;
1391     void *data = 0;
1392 
1393     filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1394     if (!filename) {
1395         rc = ERROR_NOMEM;
1396         goto out;
1397     }
1398 
1399     e = libxl_read_file_contents(CTX, filename, data_r ? &data : 0, &datalen);
1400     if (e && errno != ENOENT) {
1401         rc = ERROR_FAIL;
1402         goto out;
1403     }
1404     if (!e && !datalen) {
1405         LOG(ERROR, "userdata file %s is empty", filename);
1406         if (data_r) assert(!*data_r);
1407         rc = ERROR_FAIL;
1408         goto out;
1409     }
1410 
1411     if (data_r) *data_r = data;
1412     if (datalen_r) *datalen_r = datalen;
1413     rc = 0;
1414 
1415 out:
1416     return rc;
1417 }
1418 
libxl_userdata_retrieve(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid,uint8_t ** data_r,int * datalen_r)1419 int libxl_userdata_retrieve(libxl_ctx *ctx, uint32_t domid,
1420                                  const char *userdata_userid,
1421                                  uint8_t **data_r, int *datalen_r)
1422 {
1423     GC_INIT(ctx);
1424     int rc;
1425     libxl__flock *lock;
1426 
1427     CTX_LOCK;
1428     lock = libxl__lock_domain_userdata(gc, domid);
1429     if (!lock) {
1430         rc = ERROR_LOCK_FAIL;
1431         goto out;
1432     }
1433 
1434     rc = libxl__userdata_retrieve(gc, domid, userdata_userid,
1435                                   data_r, datalen_r);
1436 
1437 
1438     libxl__unlock_file(lock);
1439 out:
1440     CTX_UNLOCK;
1441     GC_FREE;
1442     return rc;
1443 }
1444 
libxl_userdata_unlink(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid)1445 int libxl_userdata_unlink(libxl_ctx *ctx, uint32_t domid,
1446                           const char *userdata_userid)
1447 {
1448     GC_INIT(ctx);
1449     CTX_LOCK;
1450 
1451     int rc;
1452     libxl__flock *lock = NULL;
1453     const char *filename;
1454 
1455     lock = libxl__lock_domain_userdata(gc, domid);
1456     if (!lock) {
1457         rc = ERROR_LOCK_FAIL;
1458         goto out;
1459     }
1460 
1461     filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1462     if (!filename) {
1463         rc = ERROR_FAIL;
1464         goto out;
1465     }
1466     if (unlink(filename)) {
1467         LOGE(ERROR, "error deleting userdata file: %s", filename);
1468         rc = ERROR_FAIL;
1469         goto out;
1470     }
1471 
1472     rc = 0;
1473 out:
1474     if (lock)
1475         libxl__unlock_file(lock);
1476     CTX_UNLOCK;
1477     GC_FREE;
1478     return rc;
1479 }
1480 
libxl__domain_set_paging_mempool_size(libxl__gc * gc,libxl_domain_config * d_config,uint32_t domid)1481 int libxl__domain_set_paging_mempool_size(
1482     libxl__gc *gc, libxl_domain_config *d_config, uint32_t domid)
1483 {
1484     uint64_t shadow_mem;
1485 
1486     shadow_mem = d_config->b_info.shadow_memkb;
1487     shadow_mem <<= 10;
1488 
1489     if ((shadow_mem >> 10) != d_config->b_info.shadow_memkb) {
1490         LOGED(ERROR, domid,
1491               "shadow_memkb value %"PRIu64"kB too large",
1492               d_config->b_info.shadow_memkb);
1493         return ERROR_FAIL;
1494     }
1495 
1496     int r = xc_set_paging_mempool_size(CTX->xch, domid, shadow_mem);
1497     if (r) {
1498         LOGED(ERROR, domid,
1499               "Failed to set paging mempool size to %"PRIu64"kB",
1500               d_config->b_info.shadow_memkb);
1501         return ERROR_FAIL;
1502     }
1503 
1504     return 0;
1505 }
1506 
1507 /*
1508  * Local variables:
1509  * mode: C
1510  * c-basic-offset: 4
1511  * indent-tabs-mode: nil
1512  * End:
1513  */
1514