1 /*
2  * Copyright (C) 2009      Citrix Ltd.
3  * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published
7  * by the Free Software Foundation; version 2.1 only. with the special
8  * exception on linking described in file LICENSE.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  */
15 
16 #include "libxl_osdeps.h" /* must come before any other headers */
17 
18 #include <glob.h>
19 
20 #include "libxl_internal.h"
21 #include "libxl_arch.h"
22 
23 #include <xen/hvm/hvm_info_table.h>
24 #include <xen/hvm/hvm_xs_strings.h>
25 #include <xen/hvm/e820.h>
26 
27 //#define DEBUG 1
28 
libxl__domain_type(libxl__gc * gc,uint32_t domid)29 libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid)
30 {
31     libxl_ctx *ctx = libxl__gc_owner(gc);
32     xc_domaininfo_t info;
33     int ret;
34 
35     ret = xc_domain_getinfo_single(ctx->xch, domid, &info);
36     if (ret < 0) {
37         LOGED(ERROR, domid, "unable to get dominfo");
38         return LIBXL_DOMAIN_TYPE_INVALID;
39     }
40     if (info.flags & XEN_DOMINF_hvm_guest) {
41         const char *type_path = GCSPRINTF("%s/type",
42                                           libxl__xs_libxl_path(gc, domid));
43         const char *type;
44         libxl_domain_type t;
45         int rc;
46 
47         rc = libxl__xs_read_mandatory(gc, XBT_NULL, type_path, &type);
48         if (rc) {
49             LOG(WARN,
50             "unable to get domain type for domid=%"PRIu32", assuming HVM",
51                 domid);
52             return LIBXL_DOMAIN_TYPE_HVM;
53         }
54 
55         rc = libxl_domain_type_from_string(type, &t);
56         if (rc) {
57             LOG(WARN,
58             "unable to get domain type for domid=%"PRIu32", assuming HVM",
59                 domid);
60             return LIBXL_DOMAIN_TYPE_HVM;
61         }
62 
63         return t;
64     } else
65         return LIBXL_DOMAIN_TYPE_PV;
66 }
67 
libxl__domain_cpupool(libxl__gc * gc,uint32_t domid)68 int libxl__domain_cpupool(libxl__gc *gc, uint32_t domid)
69 {
70     xc_domaininfo_t info;
71     int ret;
72 
73     ret = xc_domain_getinfo_single(CTX->xch, domid, &info);
74     if (ret < 0)
75     {
76         LOGED(ERROR, domid, "get domaininfo failed");
77         return ERROR_FAIL;
78     }
79     return info.cpupool;
80 }
81 
libxl__domain_scheduler(libxl__gc * gc,uint32_t domid)82 libxl_scheduler libxl__domain_scheduler(libxl__gc *gc, uint32_t domid)
83 {
84     int cpupool = libxl__domain_cpupool(gc, domid);
85     libxl_cpupoolinfo poolinfo;
86     libxl_scheduler sched = LIBXL_SCHEDULER_UNKNOWN;
87     int rc;
88 
89     if (cpupool < 0)
90         return sched;
91 
92     libxl_cpupoolinfo_init(&poolinfo);
93     rc = libxl_cpupool_info(CTX, &poolinfo, cpupool);
94     if (rc < 0)
95         goto out;
96 
97     sched = poolinfo.sched;
98 
99 out:
100     libxl_cpupoolinfo_dispose(&poolinfo);
101     return sched;
102 }
103 
104 /*
105  * Two NUMA placement candidates are compared by means of the following
106  * heuristics:
107 
108  *  - the number of vcpus runnable on the candidates is considered, and
109  *    candidates with fewer of them are preferred. If two candidate have
110  *    the same number of runnable vcpus,
111  *  - the amount of free memory in the candidates is considered, and the
112  *    candidate with greater amount of it is preferred.
113  *
114  * In fact, leaving larger memory holes, maximizes the probability of being
115  * able to put other domains on the node. That hopefully means many domains
116  * will benefit from local memory accesses, but also introduces the risk of
117  * overloading large (from a memory POV) nodes. That's right the effect
118  * that counting the vcpus able to run on the nodes tries to prevent.
119  *
120  * Note that this completely ignore the number of nodes each candidate span,
121  * as the fact that fewer nodes is better is already accounted for in the
122  * algorithm.
123  */
numa_cmpf(const libxl__numa_candidate * c1,const libxl__numa_candidate * c2)124 static int numa_cmpf(const libxl__numa_candidate *c1,
125                      const libxl__numa_candidate *c2)
126 {
127     if (c1->nr_vcpus != c2->nr_vcpus)
128         return c1->nr_vcpus - c2->nr_vcpus;
129 
130     return c2->free_memkb - c1->free_memkb;
131 }
132 
133 /* The actual automatic NUMA placement routine */
numa_place_domain(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config)134 static int numa_place_domain(libxl__gc *gc, uint32_t domid,
135                              libxl_domain_config *d_config)
136 {
137     libxl_domain_build_info *info = &d_config->b_info;
138     int found;
139     libxl__numa_candidate candidate;
140     libxl_bitmap cpumap, cpupool_nodemap, *map;
141     libxl_cpupoolinfo cpupool_info;
142     int i, cpupool, rc = 0;
143     uint64_t memkb;
144 
145     libxl__numa_candidate_init(&candidate);
146     libxl_bitmap_init(&cpumap);
147     libxl_bitmap_init(&cpupool_nodemap);
148     libxl_cpupoolinfo_init(&cpupool_info);
149 
150     /*
151      * Extract the cpumap from the cpupool the domain belong to. In fact,
152      * it only makes sense to consider the cpus/nodes that are in there
153      * for placement.
154      */
155     rc = cpupool = libxl__domain_cpupool(gc, domid);
156     if (rc < 0)
157         goto out;
158     rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool);
159     if (rc)
160         goto out;
161     map = &cpupool_info.cpumap;
162 
163     /*
164      * If there's a well defined hard affinity mask (i.e., the same one for all
165      * the vcpus), we can try to run the placement considering only the pcpus
166      * within such mask.
167      */
168     if (info->num_vcpu_hard_affinity)
169     {
170 #ifdef DEBUG
171         int j;
172 
173         for (j = 0; j < info->num_vcpu_hard_affinity; j++)
174             assert(libxl_bitmap_equal(&info->vcpu_hard_affinity[0],
175                                       &info->vcpu_hard_affinity[j], 0));
176 #endif /* DEBUG */
177 
178         rc = libxl_bitmap_and(CTX, &cpumap, &info->vcpu_hard_affinity[0],
179                               &cpupool_info.cpumap);
180         if (rc)
181             goto out;
182 
183         /* Hard affinity must contain at least one cpu of our cpupool */
184         if (libxl_bitmap_is_empty(&cpumap)) {
185             LOG(ERROR, "Hard affinity completely outside of domain's cpupool!");
186             rc = ERROR_INVAL;
187             goto out;
188         }
189     }
190 
191     rc = libxl__domain_need_memory_calculate(gc, info, &memkb);
192     if (rc)
193         goto out;
194     if (libxl_node_bitmap_alloc(CTX, &cpupool_nodemap, 0)) {
195         rc = ERROR_FAIL;
196         goto out;
197     }
198 
199     /* Find the best candidate with enough free memory and at least
200      * as much pcpus as the domain has vcpus.  */
201     rc = libxl__get_numa_candidate(gc, memkb, info->max_vcpus,
202                                    0, 0, map, numa_cmpf, &candidate, &found);
203     if (rc)
204         goto out;
205 
206     /* Not even a suitable placement candidate! Let's just don't touch the
207      * domain's info->cpumap. It will have affinity with all nodes/cpus. */
208     if (found == 0)
209         goto out;
210 
211     /* Map the candidate's node map to the domain's info->nodemap */
212     libxl__numa_candidate_get_nodemap(gc, &candidate, &info->nodemap);
213 
214     /* Avoid trying to set the affinity to nodes that might be in the
215      * candidate's nodemap but out of our cpupool. */
216     rc = libxl_cpumap_to_nodemap(CTX, &cpupool_info.cpumap,
217                                  &cpupool_nodemap);
218     if (rc)
219         goto out;
220 
221     libxl_for_each_set_bit(i, info->nodemap) {
222         if (!libxl_bitmap_test(&cpupool_nodemap, i))
223             libxl_bitmap_reset(&info->nodemap, i);
224     }
225 
226     LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
227                 "%"PRIu64" KB free selected", candidate.nr_nodes,
228                 candidate.nr_cpus, candidate.free_memkb / 1024);
229 
230  out:
231     libxl__numa_candidate_dispose(&candidate);
232     libxl_bitmap_dispose(&cpupool_nodemap);
233     libxl_bitmap_dispose(&cpumap);
234     libxl_cpupoolinfo_dispose(&cpupool_info);
235     return rc;
236 }
237 
libxl__build_pre(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)238 int libxl__build_pre(libxl__gc *gc, uint32_t domid,
239               libxl_domain_config *d_config, libxl__domain_build_state *state)
240 {
241     libxl_domain_build_info *const info = &d_config->b_info;
242     libxl_ctx *ctx = libxl__gc_owner(gc);
243     char *xs_domid, *con_domid;
244     int rc;
245     uint64_t size;
246 
247     if (xc_domain_max_vcpus(ctx->xch, domid, info->max_vcpus) != 0) {
248         LOG(ERROR, "Couldn't set max vcpu count");
249         return ERROR_FAIL;
250     }
251 
252     /*
253      * Check if the domain has any CPU or node affinity already. If not, try
254      * to build up the latter via automatic NUMA placement. In fact, in case
255      * numa_place_domain() manage to find a placement, in info->nodemap is
256      * updated accordingly; if it does not manage, info->nodemap is just left
257      * alone. It is then the the subsequent call to
258      * libxl_domain_set_nodeaffinity() that enacts the actual placement.
259      *
260      * As far as scheduling is concerned, we achieve NUMA-aware scheduling
261      * by having the results of placement affect the soft affinity of all
262      * the vcpus of the domain. Of course, we want that iff placement is
263      * enabled and actually happens, so we only change info->cpumap_soft to
264      * reflect the placement result if that is the case
265      */
266     if (libxl_defbool_val(info->numa_placement)) {
267         if (info->cpumap.size || info->num_vcpu_soft_affinity)
268             LOG(WARN, "Can't run NUMA placement, as a soft "
269                       "affinity has been specified explicitly");
270         else if (info->nodemap.size)
271             LOG(WARN, "Can't run NUMA placement, as the domain has "
272                       "NUMA node affinity set already");
273         else {
274             libxl_bitmap cpumap_soft;
275 
276             rc = libxl_node_bitmap_alloc(ctx, &info->nodemap, 0);
277             if (rc)
278                 return rc;
279             libxl_bitmap_set_any(&info->nodemap);
280 
281             rc = libxl_cpu_bitmap_alloc(ctx, &cpumap_soft, 0);
282             if (rc)
283                 return rc;
284 
285             rc = numa_place_domain(gc, domid, d_config);
286             if (rc) {
287                 libxl_bitmap_dispose(&cpumap_soft);
288                 return rc;
289             }
290 
291             /*
292              * All we need to do now is converting the result of automatic
293              * placement from nodemap to cpumap, and then use such cpumap
294              * as the soft affinity for all the vcpus of the domain.
295              *
296              * When calling libxl_set_vcpuaffinity_all(), it is ok to use
297              * NULL as hard affinity, as we know we don't have one, or we
298              * won't be here.
299              */
300             libxl_nodemap_to_cpumap(ctx, &info->nodemap, &cpumap_soft);
301             libxl_set_vcpuaffinity_all(ctx, domid, info->max_vcpus,
302                                        NULL, &cpumap_soft);
303 
304             libxl_bitmap_dispose(&cpumap_soft);
305 
306             /*
307              * Placement has run, so avoid for it to be re-run, if this
308              * same config we are using and building here is ever re-used.
309              * This means that people re-using configs will get the same
310              * results, consistently, across every re-use, which is what
311              * we expect most people to want.
312              */
313             libxl_defbool_set(&info->numa_placement, false);
314         }
315     }
316 
317     if (info->nodemap.size)
318         libxl_domain_set_nodeaffinity(ctx, domid, &info->nodemap);
319 
320     if (info->num_vcpu_hard_affinity || info->num_vcpu_soft_affinity) {
321         libxl_bitmap *hard_affinity, *soft_affinity;
322         int i, n_vcpus;
323 
324         n_vcpus = info->num_vcpu_hard_affinity > info->num_vcpu_soft_affinity ?
325             info->num_vcpu_hard_affinity : info->num_vcpu_soft_affinity;
326 
327         for (i = 0; i < n_vcpus; i++) {
328             /*
329              * Prepare hard and soft affinity pointers in a way that allows
330              * us to issue only one call to libxl_set_vcpuaffinity(), setting,
331              * for each vcpu, both hard and soft affinity "atomically".
332              */
333             hard_affinity = NULL;
334             if (info->num_vcpu_hard_affinity &&
335                 i < info->num_vcpu_hard_affinity)
336                 hard_affinity = &info->vcpu_hard_affinity[i];
337 
338             soft_affinity = NULL;
339             if (info->num_vcpu_soft_affinity &&
340                 i < info->num_vcpu_soft_affinity)
341                 soft_affinity = &info->vcpu_soft_affinity[i];
342 
343             if (libxl_set_vcpuaffinity(ctx, domid, i,
344                                        hard_affinity, soft_affinity)) {
345                 LOG(ERROR, "setting affinity failed on vcpu `%d'", i);
346                 return ERROR_FAIL;
347             }
348         }
349     }
350 
351 
352     rc = libxl__arch_extra_memory(gc, info, &size);
353     if (rc < 0) {
354         LOGE(ERROR, "Couldn't get arch extra constant memory size");
355         return ERROR_FAIL;
356     }
357 
358     if (xc_domain_setmaxmem(ctx->xch, domid, info->target_memkb + size) < 0) {
359         LOGE(ERROR, "Couldn't set max memory");
360         return ERROR_FAIL;
361     }
362 
363     xs_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenstored/domid", NULL);
364     state->store_domid = xs_domid ? atoi(xs_domid) : 0;
365     free(xs_domid);
366 
367     con_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenconsoled/domid", NULL);
368     state->console_domid = con_domid ? atoi(con_domid) : 0;
369     free(con_domid);
370 
371     state->store_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->store_domid);
372     state->console_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->console_domid);
373 
374     rc = libxl__arch_domain_create(gc, d_config, state, domid);
375     if (rc) goto out;
376 
377     /* Construct a CPUID policy, but only for brand new domains.  Domains
378      * being migrated-in/restored have CPUID handled during the
379      * static_data_done() callback. */
380     if (!state->restore && !state->soft_reset)
381         rc = libxl__cpuid_legacy(ctx, domid, false, info);
382 
383 out:
384     return rc;
385 }
386 
set_vnuma_affinity(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info)387 static int set_vnuma_affinity(libxl__gc *gc, uint32_t domid,
388                               libxl_domain_build_info *info)
389 {
390     libxl_bitmap cpumap;
391     libxl_vnode_info *v;
392     unsigned int i, j;
393     int rc = 0;
394 
395     libxl_bitmap_init(&cpumap);
396 
397     rc = libxl_cpu_bitmap_alloc(CTX, &cpumap, 0);
398     if (rc) {
399         LOG(ERROR, "Can't allocate nodemap");
400         goto out;
401     }
402 
403     /*
404      * For each vcpu in each vnode, set its soft affinity to
405      * the pcpus belonging to the pnode the vnode is on
406      */
407     for (i = 0; i < info->num_vnuma_nodes; i++) {
408         v = &info->vnuma_nodes[i];
409 
410         rc = libxl_node_to_cpumap(CTX, v->pnode, &cpumap);
411         if (rc) {
412             LOG(ERROR, "Can't get cpumap for vnode %d", i);
413             goto out;
414         }
415 
416         libxl_for_each_set_bit(j, v->vcpus) {
417             rc = libxl_set_vcpuaffinity(CTX, domid, j, NULL, &cpumap);
418             if (rc) {
419                 LOG(ERROR, "Can't set cpu affinity for %d", j);
420                 goto out;
421             }
422         }
423     }
424 
425 out:
426     libxl_bitmap_dispose(&cpumap);
427     return rc;
428 }
429 
libxl__build_post(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info,libxl__domain_build_state * state,char ** vms_ents,char ** local_ents)430 int libxl__build_post(libxl__gc *gc, uint32_t domid,
431                       libxl_domain_build_info *info,
432                       libxl__domain_build_state *state,
433                       char **vms_ents, char **local_ents)
434 {
435     libxl_ctx *ctx = libxl__gc_owner(gc);
436     char *dom_path, *vm_path;
437     xs_transaction_t t;
438     char **ents;
439     int i, rc;
440 
441     if (info->num_vnuma_nodes && !info->num_vcpu_soft_affinity) {
442         rc = set_vnuma_affinity(gc, domid, info);
443         if (rc)
444             return rc;
445     }
446 
447     rc = libxl_domain_sched_params_set(CTX, domid, &info->sched_params);
448     if (rc)
449         return rc;
450 
451     if (info->type == LIBXL_DOMAIN_TYPE_HVM
452         && !libxl_ms_vm_genid_is_zero(&info->u.hvm.ms_vm_genid)) {
453         rc = libxl__ms_vm_genid_set(gc, domid,
454                                     &info->u.hvm.ms_vm_genid);
455         if (rc) {
456             LOG(ERROR, "Failed to set VM Generation ID");
457             return rc;
458         }
459     }
460 
461     ents = libxl__calloc(gc, 12 + (info->max_vcpus * 2) + 2, sizeof(char *));
462     ents[0] = "memory/static-max";
463     ents[1] = GCSPRINTF("%"PRId64, info->max_memkb);
464     ents[2] = "memory/target";
465     ents[3] = GCSPRINTF("%"PRId64, info->target_memkb -
466                         libxl__get_targetmem_fudge(gc, info));
467     ents[4] = "memory/videoram";
468     ents[5] = GCSPRINTF("%"PRId64, info->video_memkb);
469     ents[6] = "domid";
470     ents[7] = GCSPRINTF("%d", domid);
471     ents[8] = "store/port";
472     ents[9] = GCSPRINTF("%"PRIu32, state->store_port);
473     ents[10] = "store/ring-ref";
474     ents[11] = GCSPRINTF("%lu", state->store_mfn);
475     for (i = 0; i < info->max_vcpus; i++) {
476         ents[12+(i*2)]   = GCSPRINTF("cpu/%d/availability", i);
477         ents[12+(i*2)+1] = libxl_bitmap_test(&info->avail_vcpus, i)
478                             ? "online" : "offline";
479     }
480 
481     dom_path = libxl__xs_get_dompath(gc, domid);
482     if (!dom_path) {
483         return ERROR_FAIL;
484     }
485 
486     vm_path = xs_read(ctx->xsh, XBT_NULL, GCSPRINTF("%s/vm", dom_path), NULL);
487 retry_transaction:
488     t = xs_transaction_start(ctx->xsh);
489 
490     libxl__xs_writev(gc, t, dom_path, ents);
491     libxl__xs_writev(gc, t, dom_path, local_ents);
492     libxl__xs_writev(gc, t, vm_path, vms_ents);
493 
494     if (!xs_transaction_end(ctx->xsh, t, 0))
495         if (errno == EAGAIN)
496             goto retry_transaction;
497 
498     if (info->xenstore_feature_mask != ~0U) {
499         unsigned int features;
500 
501         if (xs_get_features_supported(ctx->xsh, &features) &&
502             !xs_set_features_domain(ctx->xsh, domid,
503                                     features & info->xenstore_feature_mask)) {
504             LOGED(ERROR, domid, "Failed to set Xenstore features");
505             rc = ERROR_FAIL;
506             goto out;
507         }
508     }
509 
510     xs_introduce_domain(ctx->xsh, domid, state->store_mfn, state->store_port);
511 
512  out:
513     free(vm_path);
514     return rc;
515 }
516 
set_vnuma_info(libxl__gc * gc,uint32_t domid,const libxl_domain_build_info * info,const libxl__domain_build_state * state)517 static int set_vnuma_info(libxl__gc *gc, uint32_t domid,
518                           const libxl_domain_build_info *info,
519                           const libxl__domain_build_state *state)
520 {
521     int rc = 0;
522     unsigned int i, nr_vdistance;
523     unsigned int *vcpu_to_vnode, *vnode_to_pnode, *vdistance = NULL;
524 
525     vcpu_to_vnode = libxl__calloc(gc, info->max_vcpus,
526                                   sizeof(unsigned int));
527     vnode_to_pnode = libxl__calloc(gc, info->num_vnuma_nodes,
528                                    sizeof(unsigned int));
529 
530     nr_vdistance = info->num_vnuma_nodes * info->num_vnuma_nodes;
531     vdistance = libxl__calloc(gc, nr_vdistance, sizeof(unsigned int));
532 
533     for (i = 0; i < info->num_vnuma_nodes; i++) {
534         libxl_vnode_info *v = &info->vnuma_nodes[i];
535         int j;
536 
537         /* vnode to pnode mapping */
538         vnode_to_pnode[i] = v->pnode;
539 
540         /* vcpu to vnode mapping */
541         libxl_for_each_set_bit(j, v->vcpus)
542             vcpu_to_vnode[j] = i;
543 
544         /* node distances */
545         assert(info->num_vnuma_nodes == v->num_distances);
546         memcpy(vdistance + (i * info->num_vnuma_nodes),
547                v->distances,
548                v->num_distances * sizeof(unsigned int));
549     }
550 
551     if (xc_domain_setvnuma(CTX->xch, domid, info->num_vnuma_nodes,
552                            state->num_vmemranges, info->max_vcpus,
553                            state->vmemranges, vdistance,
554                            vcpu_to_vnode, vnode_to_pnode) < 0) {
555         LOGE(ERROR, "xc_domain_setvnuma failed");
556         rc = ERROR_FAIL;
557     }
558 
559     return rc;
560 }
561 
libxl__build_dom(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state,struct xc_dom_image * dom)562 static int libxl__build_dom(libxl__gc *gc, uint32_t domid,
563              libxl_domain_config *d_config, libxl__domain_build_state *state,
564              struct xc_dom_image *dom)
565 {
566     libxl_domain_build_info *const info = &d_config->b_info;
567     uint64_t mem_kb;
568     int ret;
569 
570     if ( (ret = xc_dom_boot_xen_init(dom, CTX->xch, domid)) != 0 ) {
571         LOGE(ERROR, "xc_dom_boot_xen_init failed");
572         goto out;
573     }
574 #ifdef GUEST_RAM_BASE
575     if ( (ret = xc_dom_rambase_init(dom, GUEST_RAM_BASE)) != 0 ) {
576         LOGE(ERROR, "xc_dom_rambase failed");
577         goto out;
578     }
579 #endif
580     if ( (ret = xc_dom_parse_image(dom)) != 0 ) {
581         LOG(ERROR, "xc_dom_parse_image failed");
582         goto out;
583     }
584     if ( (ret = libxl__arch_domain_init_hw_description(gc, d_config, state, dom)) != 0 ) {
585         LOGE(ERROR, "libxl__arch_domain_init_hw_description failed");
586         goto out;
587     }
588 
589     mem_kb = dom->container_type == XC_DOM_HVM_CONTAINER ?
590              (info->max_memkb - info->video_memkb) : info->target_memkb;
591     if ( (ret = xc_dom_mem_init(dom, mem_kb / 1024)) != 0 ) {
592         LOGE(ERROR, "xc_dom_mem_init failed");
593         goto out;
594     }
595     if ( (ret = xc_dom_boot_mem_init(dom)) != 0 ) {
596         LOGE(ERROR, "xc_dom_boot_mem_init failed");
597         goto out;
598     }
599     if ( (ret = libxl__arch_domain_finalise_hw_description(gc, domid, d_config, dom)) != 0 ) {
600         LOGE(ERROR, "libxl__arch_domain_finalise_hw_description failed");
601         goto out;
602     }
603     if ( (ret = xc_dom_build_image(dom)) != 0 ) {
604         LOGE(ERROR, "xc_dom_build_image failed");
605         goto out;
606     }
607     if ( (ret = xc_dom_boot_image(dom)) != 0 ) {
608         LOGE(ERROR, "xc_dom_boot_image failed");
609         goto out;
610     }
611     if ( (ret = xc_dom_gnttab_init(dom)) != 0 ) {
612         LOGE(ERROR, "xc_dom_gnttab_init failed");
613         goto out;
614     }
615     if ((ret = libxl__arch_build_dom_finish(gc, info, dom, state)) != 0) {
616         LOGE(ERROR, "libxl__arch_build_dom_finish failed");
617         goto out;
618     }
619 
620 out:
621     return ret != 0 ? ERROR_FAIL : 0;
622 }
623 
libxl__build_pv(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)624 int libxl__build_pv(libxl__gc *gc, uint32_t domid,
625              libxl_domain_config *d_config, libxl__domain_build_state *state)
626 {
627     libxl_ctx *ctx = libxl__gc_owner(gc);
628     libxl_domain_build_info *const info = &d_config->b_info;
629     struct xc_dom_image *dom;
630     int ret;
631     int flags = 0;
632 
633     xc_dom_loginit(ctx->xch);
634 
635     dom = xc_dom_allocate(ctx->xch, state->pv_cmdline, info->u.pv.features);
636     if (!dom) {
637         LOGE(ERROR, "xc_dom_allocate failed");
638         return ERROR_FAIL;
639     }
640 
641     dom->container_type = XC_DOM_PV_CONTAINER;
642 
643     LOG(DEBUG, "pv kernel mapped %d path %s", state->pv_kernel.mapped, state->pv_kernel.path);
644 
645     if (state->pv_kernel.mapped) {
646         ret = xc_dom_kernel_mem(dom,
647                                 state->pv_kernel.data,
648                                 state->pv_kernel.size);
649         if ( ret != 0) {
650             LOGE(ERROR, "xc_dom_kernel_mem failed");
651             goto out;
652         }
653     } else {
654         ret = xc_dom_kernel_file(dom, state->pv_kernel.path);
655         if ( ret != 0) {
656             LOGE(ERROR, "xc_dom_kernel_file failed");
657             goto out;
658         }
659     }
660 
661     if ( state->pv_ramdisk.path && strlen(state->pv_ramdisk.path) ) {
662         if (state->pv_ramdisk.mapped) {
663             if ( (ret = xc_dom_module_mem(dom, state->pv_ramdisk.data, state->pv_ramdisk.size, NULL)) != 0 ) {
664                 LOGE(ERROR, "xc_dom_ramdisk_mem failed");
665                 goto out;
666             }
667         } else {
668             if ( (ret = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL)) != 0 ) {
669                 LOGE(ERROR, "xc_dom_ramdisk_file failed");
670                 goto out;
671             }
672         }
673     }
674 
675     dom->flags = flags;
676     dom->console_evtchn = state->console_port;
677     dom->console_domid = state->console_domid;
678     dom->xenstore_evtchn = state->store_port;
679     dom->xenstore_domid = state->store_domid;
680     dom->claim_enabled = libxl_defbool_val(info->claim_mode);
681     dom->max_vcpus = info->max_vcpus;
682 
683     if (info->num_vnuma_nodes != 0) {
684         unsigned int i;
685 
686         ret = libxl__vnuma_build_vmemrange_pv(gc, domid, info, state);
687         if (ret) {
688             LOGE(ERROR, "cannot build vmemranges");
689             goto out;
690         }
691         ret = libxl__vnuma_config_check(gc, info, state);
692         if (ret) goto out;
693 
694         ret = set_vnuma_info(gc, domid, info, state);
695         if (ret) goto out;
696 
697         dom->nr_vmemranges = state->num_vmemranges;
698         dom->vmemranges = xc_dom_malloc(dom, sizeof(*dom->vmemranges) *
699                                         dom->nr_vmemranges);
700 
701         for (i = 0; i < dom->nr_vmemranges; i++) {
702             dom->vmemranges[i].start = state->vmemranges[i].start;
703             dom->vmemranges[i].end   = state->vmemranges[i].end;
704             dom->vmemranges[i].flags = state->vmemranges[i].flags;
705             dom->vmemranges[i].nid   = state->vmemranges[i].nid;
706         }
707 
708         dom->nr_vnodes = info->num_vnuma_nodes;
709         dom->vnode_to_pnode = xc_dom_malloc(dom, sizeof(*dom->vnode_to_pnode) *
710                                             dom->nr_vnodes);
711         for (i = 0; i < info->num_vnuma_nodes; i++)
712             dom->vnode_to_pnode[i] = info->vnuma_nodes[i].pnode;
713     }
714 
715     ret = libxl__build_dom(gc, domid, d_config, state, dom);
716     if (ret != 0)
717         goto out;
718 
719     if (xc_dom_translated(dom)) {
720         state->console_mfn = dom->console_pfn;
721         state->store_mfn = dom->xenstore_pfn;
722         state->vuart_gfn = dom->vuart_gfn;
723     } else {
724         state->console_mfn = xc_dom_p2m(dom, dom->console_pfn);
725         state->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
726     }
727 
728     ret = 0;
729 out:
730     xc_dom_release(dom);
731     return ret == 0 ? 0 : ERROR_FAIL;
732 }
733 
hvm_build_set_params(xc_interface * handle,uint32_t domid,libxl_domain_build_info * info)734 static int hvm_build_set_params(xc_interface *handle, uint32_t domid,
735                                 libxl_domain_build_info *info)
736 {
737     struct hvm_info_table *va_hvm;
738     uint8_t *va_map, sum;
739     int i;
740 
741     if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
742         va_map = xc_map_foreign_range(handle, domid,
743                                       XC_PAGE_SIZE, PROT_READ | PROT_WRITE,
744                                       HVM_INFO_PFN);
745         if (va_map == NULL)
746             return ERROR_FAIL;
747 
748         va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
749         va_hvm->apic_mode = libxl_defbool_val(info->apic);
750         va_hvm->nr_vcpus = info->max_vcpus;
751         memset(va_hvm->vcpu_online, 0, sizeof(va_hvm->vcpu_online));
752         memcpy(va_hvm->vcpu_online, info->avail_vcpus.map, info->avail_vcpus.size);
753         for (i = 0, sum = 0; i < va_hvm->length; i++)
754             sum += ((uint8_t *) va_hvm)[i];
755         va_hvm->checksum -= sum;
756         munmap(va_map, XC_PAGE_SIZE);
757     }
758 
759     return 0;
760 }
761 
hvm_build_set_xs_values(libxl__gc * gc,uint32_t domid,struct xc_dom_image * dom,const libxl_domain_build_info * info)762 static int hvm_build_set_xs_values(libxl__gc *gc,
763                                    uint32_t domid,
764                                    struct xc_dom_image *dom,
765                                    const libxl_domain_build_info *info)
766 {
767     char *path = NULL;
768     int num_oem = 1;
769     int ret = 0;
770 
771     if (dom->smbios_module.guest_addr_out) {
772         path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_ADDRESS, domid);
773 
774         ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%"PRIx64,
775                                dom->smbios_module.guest_addr_out);
776         if (ret)
777             goto err;
778 
779         path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_LENGTH, domid);
780 
781         ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%x",
782                                dom->smbios_module.length);
783         if (ret)
784             goto err;
785     }
786 
787     for (int i = 0; i < info->u.hvm.num_smbios; i++) {
788         char *p;
789         if (info->u.hvm.smbios[i].key == LIBXL_SMBIOS_TYPE_OEM) {
790             if (num_oem > 99) {
791                 LOGD(ERROR, domid, "More than 99 SMBIOS OEM strings specified");
792                 ret = ERROR_INVAL;
793                 goto err;
794             }
795             path = GCSPRINTF("/local/domain/%d/"HVM_XS_OEM_STRINGS, domid,
796                              num_oem);
797             num_oem++;
798         } else {
799             path = GCSPRINTF("/local/domain/%d/"HVM_XS_BIOS_STRINGS"/%s", domid,
800                        libxl_smbios_type_to_string(info->u.hvm.smbios[i].key));
801         }
802 
803         /* Convert libxl_smbios_type string to xenstore path that hvmloader
804          * will use, as defined by HVM_XS_*. That is convert the '_' to '-'. */
805         p = strrchr(path, '/');
806         for ( ; *p; p++) {
807             if (*p == '_')
808                 *p = '-';
809         }
810 
811         LOGD(DEBUG, domid, "Writing %s = \"%s\"", path,
812              info->u.hvm.smbios[i].value);
813         ret = libxl__xs_printf(gc, XBT_NULL, path, "%s",
814                                info->u.hvm.smbios[i].value);
815         if (ret)
816             goto err;
817     }
818 
819     /* Only one module can be passed. PVHv2 guests do not support this. */
820     if (dom->acpi_modules[0].guest_addr_out &&
821         info->type == LIBXL_DOMAIN_TYPE_HVM) {
822         path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_ADDRESS, domid);
823 
824         ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%"PRIx64,
825                                dom->acpi_modules[0].guest_addr_out);
826         if (ret)
827             goto err;
828 
829         path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_LENGTH, domid);
830 
831         ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%x",
832                                dom->acpi_modules[0].length);
833         if (ret)
834             goto err;
835     }
836 
837     if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
838         path = GCSPRINTF("/local/domain/%d/" HVM_XS_XEN_PLATFORM_PCI_BAR_UC,
839                          domid);
840         ret = libxl__xs_printf(gc, XBT_NULL, path, "%d",
841             libxl_defbool_val(info->u.hvm.xen_platform_pci_bar_uc));
842         if (ret)
843             goto err;
844     }
845 
846     return 0;
847 
848 err:
849     LOG(ERROR, "failed to write firmware xenstore value, err: %d", ret);
850     return ret;
851 }
852 
libxl__load_hvm_firmware_module(libxl__gc * gc,const char * filename,const char * what,struct xc_hvm_firmware_module * m)853 static int libxl__load_hvm_firmware_module(libxl__gc *gc,
854                                            const char *filename,
855                                            const char *what,
856                                            struct xc_hvm_firmware_module *m)
857 {
858     int datalen = 0;
859     void *data = NULL;
860     int r, rc;
861 
862     LOG(DEBUG, "Loading %s: %s", what, filename);
863     r = libxl_read_file_contents(CTX, filename, &data, &datalen);
864     if (r) {
865         /*
866          * Print a message only on ENOENT, other errors are logged by the
867          * function libxl_read_file_contents().
868          */
869         if (r == ENOENT)
870             LOGEV(ERROR, r, "failed to read %s file", what);
871         rc =  ERROR_FAIL;
872         goto out;
873     }
874     libxl__ptr_add(gc, data);
875     if (datalen) {
876         /* Only accept non-empty files */
877         m->data = data;
878         m->length = datalen;
879     } else {
880         LOG(ERROR, "file %s for %s is empty", filename, what);
881         rc = ERROR_INVAL;
882         goto out;
883     }
884     rc = 0;
885 out:
886     return rc;
887 }
888 
libxl__domain_firmware(libxl__gc * gc,libxl_domain_build_info * info,libxl__domain_build_state * state,struct xc_dom_image * dom)889 static int libxl__domain_firmware(libxl__gc *gc,
890                                   libxl_domain_build_info *info,
891                                   libxl__domain_build_state *state,
892                                   struct xc_dom_image *dom)
893 {
894     libxl_ctx *ctx = libxl__gc_owner(gc);
895     const char *firmware = NULL;
896     int e, rc;
897     int datalen = 0;
898     void *data;
899     const char *bios_filename = NULL;
900 
901     if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
902         if (info->u.hvm.firmware) {
903             firmware = info->u.hvm.firmware;
904         } else {
905             switch (info->device_model_version)
906             {
907             case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
908                 firmware = "hvmloader";
909                 break;
910             default:
911                 LOG(ERROR, "invalid device model version %d",
912                     info->device_model_version);
913                 rc = ERROR_FAIL;
914                 goto out;
915             }
916         }
917     }
918 
919     if (state->pv_kernel.path != NULL &&
920         info->type == LIBXL_DOMAIN_TYPE_PVH) {
921 
922         if (state->shim_path) {
923             rc = xc_dom_kernel_file(dom, state->shim_path);
924             if (rc) {
925                 LOGE(ERROR, "xc_dom_kernel_file failed");
926                 goto out;
927             }
928 
929             /* We've loaded the shim, so load the kernel as a secondary module */
930             if (state->pv_kernel.mapped) {
931                 LOG(DEBUG, "xc_dom_module_mem, cmdline %s",
932                     state->pv_cmdline);
933                 rc = xc_dom_module_mem(dom, state->pv_kernel.data,
934                                        state->pv_kernel.size, state->pv_cmdline);
935                 if (rc) {
936                     LOGE(ERROR, "xc_dom_kernel_mem failed");
937                     goto out;
938                 }
939             } else {
940                 LOG(DEBUG, "xc_dom_module_file, path %s cmdline %s",
941                     state->pv_kernel.path, state->pv_cmdline);
942                 rc = xc_dom_module_file(dom, state->pv_kernel.path, state->pv_cmdline);
943                 if (rc) {
944                     LOGE(ERROR, "xc_dom_kernel_file failed");
945                     goto out;
946                 }
947             }
948         } else {
949             /* No shim, so load the kernel directly */
950             if (state->pv_kernel.mapped) {
951                 rc = xc_dom_kernel_mem(dom, state->pv_kernel.data,
952                                        state->pv_kernel.size);
953                 if (rc) {
954                     LOGE(ERROR, "xc_dom_kernel_mem failed");
955                     goto out;
956                 }
957             } else {
958                 rc = xc_dom_kernel_file(dom, state->pv_kernel.path);
959                 if (rc) {
960                     LOGE(ERROR, "xc_dom_kernel_file failed");
961                     goto out;
962                 }
963             }
964         }
965 
966         if (state->pv_ramdisk.path && strlen(state->pv_ramdisk.path)) {
967             if (state->pv_ramdisk.mapped) {
968                 rc = xc_dom_module_mem(dom, state->pv_ramdisk.data,
969                                        state->pv_ramdisk.size, NULL);
970                 if (rc) {
971                     LOGE(ERROR, "xc_dom_ramdisk_mem failed");
972                     goto out;
973                 }
974             } else {
975                 rc = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL);
976                 if (rc) {
977                     LOGE(ERROR, "xc_dom_ramdisk_file failed");
978                     goto out;
979                 }
980             }
981         }
982     } else {
983         /*
984          * Only HVM guests should get here, PVH should always have a set
985          * kernel at this point.
986          */
987         assert(info->type == LIBXL_DOMAIN_TYPE_HVM);
988         rc = xc_dom_kernel_file(dom, libxl__abs_path(gc, firmware,
989                                                  libxl__xenfirmwaredir_path()));
990     }
991 
992     if (rc != 0) {
993         LOGE(ERROR, "xc_dom_{kernel_file/ramdisk_file} failed");
994         goto out;
995     }
996 
997     if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
998         info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) {
999         if (info->u.hvm.system_firmware) {
1000             bios_filename = info->u.hvm.system_firmware;
1001         } else {
1002             switch (info->u.hvm.bios) {
1003             case LIBXL_BIOS_TYPE_SEABIOS:
1004                 bios_filename = libxl__seabios_path();
1005                 break;
1006             case LIBXL_BIOS_TYPE_OVMF:
1007                 bios_filename = libxl__ovmf_path();
1008                 break;
1009             case LIBXL_BIOS_TYPE_ROMBIOS:
1010             default:
1011                 abort();
1012             }
1013         }
1014     }
1015 
1016     if (bios_filename) {
1017         rc = libxl__load_hvm_firmware_module(gc, bios_filename, "BIOS",
1018                                              &dom->system_firmware_module);
1019         if (rc) goto out;
1020     }
1021 
1022     if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1023         info->u.hvm.bios == LIBXL_BIOS_TYPE_ROMBIOS &&
1024         libxl__ipxe_path()) {
1025         const char *fp = libxl__ipxe_path();
1026         rc = xc_dom_module_file(dom, fp, "ipxe");
1027 
1028         if (rc) {
1029             LOGE(ERROR, "failed to load IPXE %s (%d)", fp, rc);
1030             rc = ERROR_FAIL;
1031             goto out;
1032         }
1033     }
1034 
1035     if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1036         info->u.hvm.smbios_firmware) {
1037         data = NULL;
1038         e = libxl_read_file_contents(ctx, info->u.hvm.smbios_firmware,
1039                                      &data, &datalen);
1040         if (e) {
1041             LOGEV(ERROR, e, "failed to read SMBIOS firmware file %s",
1042                 info->u.hvm.smbios_firmware);
1043             rc = ERROR_FAIL;
1044             goto out;
1045         }
1046         libxl__ptr_add(gc, data);
1047         if (datalen) {
1048             /* Only accept non-empty files */
1049             dom->smbios_module.data = data;
1050             dom->smbios_module.length = (uint32_t)datalen;
1051         }
1052     }
1053 
1054     if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1055         info->u.hvm.acpi_firmware) {
1056         data = NULL;
1057         e = libxl_read_file_contents(ctx, info->u.hvm.acpi_firmware,
1058                                      &data, &datalen);
1059         if (e) {
1060             LOGEV(ERROR, e, "failed to read ACPI firmware file %s",
1061                 info->u.hvm.acpi_firmware);
1062             rc = ERROR_FAIL;
1063             goto out;
1064         }
1065         libxl__ptr_add(gc, data);
1066         if (datalen) {
1067             /* Only accept a non-empty file */
1068             dom->acpi_modules[0].data = data;
1069             dom->acpi_modules[0].length = (uint32_t)datalen;
1070         }
1071     }
1072 
1073     return 0;
1074 out:
1075     assert(rc != 0);
1076     return rc;
1077 }
1078 
libxl__build_hvm(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)1079 int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
1080               libxl_domain_config *d_config,
1081               libxl__domain_build_state *state)
1082 {
1083     libxl_ctx *ctx = libxl__gc_owner(gc);
1084     int rc;
1085     uint64_t mmio_start, lowmem_end, highmem_end, mem_size;
1086     libxl_domain_build_info *const info = &d_config->b_info;
1087     struct xc_dom_image *dom = NULL;
1088     bool device_model = info->type == LIBXL_DOMAIN_TYPE_HVM ? true : false;
1089 
1090     xc_dom_loginit(ctx->xch);
1091 
1092     /*
1093      * If PVH and we have a shim override, use the shim cmdline.
1094      * If PVH and no shim override, use the pv cmdline.
1095      * If not PVH, use info->cmdline.
1096      */
1097     dom = xc_dom_allocate(ctx->xch, info->type == LIBXL_DOMAIN_TYPE_PVH ?
1098                           (state->shim_path ? state->shim_cmdline : state->pv_cmdline) :
1099                           info->cmdline, NULL);
1100     if (!dom) {
1101         LOGE(ERROR, "xc_dom_allocate failed");
1102         rc = ERROR_NOMEM;
1103         goto out;
1104     }
1105 
1106     dom->container_type = XC_DOM_HVM_CONTAINER;
1107 
1108     /* The params from the configuration file are in Mb, which are then
1109      * multiplied by 1 Kb. This was then divided off when calling
1110      * the old xc_hvm_build_target_mem() which then turned them to bytes.
1111      * Do all this in one step here...
1112      */
1113     mem_size = (uint64_t)(info->max_memkb - info->video_memkb) << 10;
1114     dom->target_pages = (uint64_t)(info->target_memkb - info->video_memkb) >> 2;
1115     dom->claim_enabled = libxl_defbool_val(info->claim_mode);
1116     if (info->u.hvm.mmio_hole_memkb) {
1117         uint64_t max_ram_below_4g = (1ULL << 32) -
1118             (info->u.hvm.mmio_hole_memkb << 10);
1119 
1120         if (max_ram_below_4g < HVM_BELOW_4G_MMIO_START)
1121             dom->mmio_size = info->u.hvm.mmio_hole_memkb << 10;
1122     }
1123 
1124     rc = libxl__domain_firmware(gc, info, state, dom);
1125     if (rc != 0) {
1126         LOG(ERROR, "initializing domain firmware failed");
1127         goto out;
1128     }
1129 
1130     if (dom->target_pages == 0)
1131         dom->target_pages = mem_size >> XC_PAGE_SHIFT;
1132     if (dom->mmio_size == 0 && device_model)
1133         dom->mmio_size = HVM_BELOW_4G_MMIO_LENGTH;
1134     else if (dom->mmio_size == 0 && !device_model) {
1135 #if defined(__i386__) || defined(__x86_64__)
1136         /*
1137          * Make sure the local APIC page, the ACPI tables and the special pages
1138          * are inside the MMIO hole.
1139          */
1140         xen_paddr_t start =
1141             (X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES) <<
1142             XC_PAGE_SHIFT;
1143 
1144         start = min_t(xen_paddr_t, start, LAPIC_BASE_ADDRESS);
1145         start = min_t(xen_paddr_t, start, ACPI_INFO_PHYSICAL_ADDRESS);
1146         dom->mmio_size = GB(4) - start;
1147 #else
1148         assert(1);
1149 #endif
1150     }
1151     lowmem_end = mem_size;
1152     highmem_end = 0;
1153     mmio_start = (1ull << 32) - dom->mmio_size;
1154     if (lowmem_end > mmio_start)
1155     {
1156         highmem_end = (1ull << 32) + (lowmem_end - mmio_start);
1157         lowmem_end = mmio_start;
1158     }
1159     dom->lowmem_end = lowmem_end;
1160     dom->highmem_end = highmem_end;
1161     dom->mmio_start = mmio_start;
1162     dom->vga_hole_size = device_model ? LIBXL_VGA_HOLE_SIZE : 0;
1163     dom->device_model = device_model;
1164     dom->max_vcpus = info->max_vcpus;
1165     dom->console_evtchn = state->console_port;
1166     dom->console_domid = state->console_domid;
1167     dom->xenstore_evtchn = state->store_port;
1168     dom->xenstore_domid = state->store_domid;
1169 
1170     rc = libxl__domain_device_construct_rdm(gc, d_config,
1171                                             info->u.hvm.rdm_mem_boundary_memkb*1024,
1172                                             dom);
1173     if (rc) {
1174         LOG(ERROR, "checking reserved device memory failed");
1175         goto out;
1176     }
1177 
1178     if (info->num_vnuma_nodes != 0) {
1179         int i;
1180 
1181         rc = libxl__vnuma_build_vmemrange_hvm(gc, domid, info, state, dom);
1182         if (rc != 0) {
1183             LOG(ERROR, "hvm build vmemranges failed");
1184             goto out;
1185         }
1186         rc = libxl__vnuma_config_check(gc, info, state);
1187         if (rc != 0) goto out;
1188         rc = set_vnuma_info(gc, domid, info, state);
1189         if (rc != 0) goto out;
1190 
1191         dom->nr_vmemranges = state->num_vmemranges;
1192         dom->vmemranges = libxl__malloc(gc, sizeof(*dom->vmemranges) *
1193                                         dom->nr_vmemranges);
1194 
1195         for (i = 0; i < dom->nr_vmemranges; i++) {
1196             dom->vmemranges[i].start = state->vmemranges[i].start;
1197             dom->vmemranges[i].end   = state->vmemranges[i].end;
1198             dom->vmemranges[i].flags = state->vmemranges[i].flags;
1199             dom->vmemranges[i].nid   = state->vmemranges[i].nid;
1200         }
1201 
1202         dom->nr_vnodes = info->num_vnuma_nodes;
1203         dom->vnode_to_pnode = libxl__malloc(gc, sizeof(*dom->vnode_to_pnode) *
1204                                             dom->nr_vnodes);
1205         for (i = 0; i < dom->nr_vnodes; i++)
1206             dom->vnode_to_pnode[i] = info->vnuma_nodes[i].pnode;
1207     }
1208 
1209     rc = libxl__build_dom(gc, domid, d_config, state, dom);
1210     if (rc != 0)
1211         goto out;
1212 
1213     rc = hvm_build_set_params(ctx->xch, domid, info);
1214     if (rc != 0) {
1215         LOG(ERROR, "hvm build set params failed");
1216         goto out;
1217     }
1218 
1219     state->console_mfn = dom->console_pfn;
1220     state->store_mfn = dom->xenstore_pfn;
1221     state->vuart_gfn = dom->vuart_gfn;
1222 
1223     rc = hvm_build_set_xs_values(gc, domid, dom, info);
1224     if (rc != 0) {
1225         LOG(ERROR, "hvm build set xenstore values failed");
1226         goto out;
1227     }
1228 
1229     xc_dom_release(dom);
1230     return 0;
1231 
1232 out:
1233     assert(rc != 0);
1234     if (dom != NULL) xc_dom_release(dom);
1235     return rc;
1236 }
1237 
1238 /*==================== Miscellaneous ====================*/
1239 
libxl__uuid2string(libxl__gc * gc,const libxl_uuid uuid)1240 char *libxl__uuid2string(libxl__gc *gc, const libxl_uuid uuid)
1241 {
1242     return GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(uuid));
1243 }
1244 
libxl__userdata_path(libxl__gc * gc,uint32_t domid,const char * userdata_userid,const char * wh)1245 const char *libxl__userdata_path(libxl__gc *gc, uint32_t domid,
1246                                  const char *userdata_userid,
1247                                  const char *wh)
1248 {
1249     libxl_ctx *ctx = libxl__gc_owner(gc);
1250     char *uuid_string, *path;
1251     libxl_dominfo info;
1252     int rc;
1253 
1254     libxl_dominfo_init(&info);
1255 
1256     rc = libxl_domain_info(ctx, &info, domid);
1257     if (rc) {
1258         LOGE(ERROR, "unable to find domain info for domain %"PRIu32, domid);
1259         path = NULL;
1260         goto out;
1261     }
1262     uuid_string = GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(info.uuid));
1263     path = GCSPRINTF(XEN_LIB_DIR "/userdata-%s.%u.%s.%s",
1264                      wh, domid, uuid_string, userdata_userid);
1265 
1266  out:
1267     libxl_dominfo_dispose(&info);
1268     return path;
1269 }
1270 
userdata_delete(libxl__gc * gc,const char * path)1271 static int userdata_delete(libxl__gc *gc, const char *path)
1272 {
1273     int r;
1274     r = unlink(path);
1275     if (r) {
1276         LOGE(ERROR, "remove failed for %s", path);
1277         return errno;
1278     }
1279     return 0;
1280 }
1281 
libxl__userdata_destroyall(libxl__gc * gc,uint32_t domid)1282 void libxl__userdata_destroyall(libxl__gc *gc, uint32_t domid)
1283 {
1284     const char *pattern;
1285     glob_t gl;
1286     int r, i;
1287 
1288     pattern = libxl__userdata_path(gc, domid, "*", "?");
1289     if (!pattern)
1290         goto out;
1291 
1292     gl.gl_pathc = 0;
1293     gl.gl_pathv = 0;
1294     gl.gl_offs = 0;
1295     r = glob(pattern, GLOB_ERR|GLOB_NOSORT|GLOB_MARK, 0, &gl);
1296     if (r == GLOB_NOMATCH)
1297         goto out;
1298     if (r)
1299         LOGE(ERROR, "glob failed for %s", pattern);
1300 
1301     /* Note: don't delete domain-userdata-lock, it will be handled by
1302      * unlock function.
1303      */
1304     for (i=0; i<gl.gl_pathc; i++) {
1305         if (!strstr(gl.gl_pathv[i], "domain-userdata-lock"))
1306             userdata_delete(gc, gl.gl_pathv[i]);
1307     }
1308     globfree(&gl);
1309 out:
1310     return;
1311 }
1312 
libxl__userdata_store(libxl__gc * gc,uint32_t domid,const char * userdata_userid,const uint8_t * data,int datalen)1313 int libxl__userdata_store(libxl__gc *gc, uint32_t domid,
1314                           const char *userdata_userid,
1315                           const uint8_t *data, int datalen)
1316 {
1317     const char *filename;
1318     const char *newfilename;
1319     int e, rc;
1320     int fd = -1;
1321 
1322     filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1323     if (!filename) {
1324         rc = ERROR_NOMEM;
1325         goto out;
1326     }
1327 
1328     if (!datalen) {
1329         rc = userdata_delete(gc, filename);
1330         goto out;
1331     }
1332 
1333     newfilename = libxl__userdata_path(gc, domid, userdata_userid, "n");
1334     if (!newfilename) {
1335         rc = ERROR_NOMEM;
1336         goto out;
1337     }
1338 
1339     rc = ERROR_FAIL;
1340 
1341     fd = open(newfilename, O_RDWR | O_CREAT | O_TRUNC, 0600);
1342     if (fd < 0)
1343         goto err;
1344 
1345     if (libxl_write_exactly(CTX, fd, data, datalen, "userdata", newfilename))
1346         goto err;
1347 
1348     if (close(fd) < 0) {
1349         fd = -1;
1350         goto err;
1351     }
1352     fd = -1;
1353 
1354     if (rename(newfilename, filename))
1355         goto err;
1356 
1357     rc = 0;
1358 
1359 err:
1360     if (fd >= 0) {
1361         e = errno;
1362         close(fd);
1363         errno = e;
1364     }
1365 
1366     if (rc)
1367         LOGE(ERROR, "cannot write/rename %s for %s", newfilename, filename);
1368 out:
1369     return rc;
1370 }
1371 
libxl_userdata_store(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid,const uint8_t * data,int datalen)1372 int libxl_userdata_store(libxl_ctx *ctx, uint32_t domid,
1373                               const char *userdata_userid,
1374                               const uint8_t *data, int datalen)
1375 {
1376     GC_INIT(ctx);
1377     int rc;
1378     libxl__flock *lock;
1379 
1380     CTX_LOCK;
1381     lock = libxl__lock_domain_userdata(gc, domid);
1382     if (!lock) {
1383         rc = ERROR_LOCK_FAIL;
1384         goto out;
1385     }
1386 
1387     rc = libxl__userdata_store(gc, domid, userdata_userid,
1388                                data, datalen);
1389 
1390     libxl__unlock_file(lock);
1391 
1392 out:
1393     CTX_UNLOCK;
1394     GC_FREE;
1395     return rc;
1396 }
1397 
libxl__userdata_retrieve(libxl__gc * gc,uint32_t domid,const char * userdata_userid,uint8_t ** data_r,int * datalen_r)1398 int libxl__userdata_retrieve(libxl__gc *gc, uint32_t domid,
1399                              const char *userdata_userid,
1400                              uint8_t **data_r, int *datalen_r)
1401 {
1402     const char *filename;
1403     int e, rc;
1404     int datalen = 0;
1405     void *data = 0;
1406 
1407     filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1408     if (!filename) {
1409         rc = ERROR_NOMEM;
1410         goto out;
1411     }
1412 
1413     e = libxl_read_file_contents(CTX, filename, data_r ? &data : 0, &datalen);
1414     if (e && errno != ENOENT) {
1415         rc = ERROR_FAIL;
1416         goto out;
1417     }
1418     if (!e && !datalen) {
1419         LOG(ERROR, "userdata file %s is empty", filename);
1420         if (data_r) assert(!*data_r);
1421         rc = ERROR_FAIL;
1422         goto out;
1423     }
1424 
1425     if (data_r) *data_r = data;
1426     if (datalen_r) *datalen_r = datalen;
1427     rc = 0;
1428 
1429 out:
1430     return rc;
1431 }
1432 
libxl_userdata_retrieve(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid,uint8_t ** data_r,int * datalen_r)1433 int libxl_userdata_retrieve(libxl_ctx *ctx, uint32_t domid,
1434                                  const char *userdata_userid,
1435                                  uint8_t **data_r, int *datalen_r)
1436 {
1437     GC_INIT(ctx);
1438     int rc;
1439     libxl__flock *lock;
1440 
1441     CTX_LOCK;
1442     lock = libxl__lock_domain_userdata(gc, domid);
1443     if (!lock) {
1444         rc = ERROR_LOCK_FAIL;
1445         goto out;
1446     }
1447 
1448     rc = libxl__userdata_retrieve(gc, domid, userdata_userid,
1449                                   data_r, datalen_r);
1450 
1451 
1452     libxl__unlock_file(lock);
1453 out:
1454     CTX_UNLOCK;
1455     GC_FREE;
1456     return rc;
1457 }
1458 
libxl_userdata_unlink(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid)1459 int libxl_userdata_unlink(libxl_ctx *ctx, uint32_t domid,
1460                           const char *userdata_userid)
1461 {
1462     GC_INIT(ctx);
1463     CTX_LOCK;
1464 
1465     int rc;
1466     libxl__flock *lock = NULL;
1467     const char *filename;
1468 
1469     lock = libxl__lock_domain_userdata(gc, domid);
1470     if (!lock) {
1471         rc = ERROR_LOCK_FAIL;
1472         goto out;
1473     }
1474 
1475     filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1476     if (!filename) {
1477         rc = ERROR_FAIL;
1478         goto out;
1479     }
1480     if (unlink(filename)) {
1481         LOGE(ERROR, "error deleting userdata file: %s", filename);
1482         rc = ERROR_FAIL;
1483         goto out;
1484     }
1485 
1486     rc = 0;
1487 out:
1488     if (lock)
1489         libxl__unlock_file(lock);
1490     CTX_UNLOCK;
1491     GC_FREE;
1492     return rc;
1493 }
1494 
libxl__domain_set_paging_mempool_size(libxl__gc * gc,libxl_domain_config * d_config,uint32_t domid)1495 int libxl__domain_set_paging_mempool_size(
1496     libxl__gc *gc, libxl_domain_config *d_config, uint32_t domid)
1497 {
1498     uint64_t shadow_mem;
1499 
1500     shadow_mem = d_config->b_info.shadow_memkb;
1501     shadow_mem <<= 10;
1502 
1503     if ((shadow_mem >> 10) != d_config->b_info.shadow_memkb) {
1504         LOGED(ERROR, domid,
1505               "shadow_memkb value %"PRIu64"kB too large",
1506               d_config->b_info.shadow_memkb);
1507         return ERROR_FAIL;
1508     }
1509 
1510     int r = xc_set_paging_mempool_size(CTX->xch, domid, shadow_mem);
1511     if (r) {
1512         LOGED(ERROR, domid,
1513               "Failed to set paging mempool size to %"PRIu64"kB",
1514               d_config->b_info.shadow_memkb);
1515         return ERROR_FAIL;
1516     }
1517 
1518     return 0;
1519 }
1520 
1521 /*
1522  * Local variables:
1523  * mode: C
1524  * c-basic-offset: 4
1525  * indent-tabs-mode: nil
1526  * End:
1527  */
1528