1 /*
2  * Copyright (C) 2009      Citrix Ltd.
3  * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU Lesser General Public License as published
7  * by the Free Software Foundation; version 2.1 only. with the special
8  * exception on linking described in file LICENSE.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU Lesser General Public License for more details.
14  */
15 
16 #include "libxl_osdeps.h" /* must come before any other headers */
17 
18 #include <glob.h>
19 
20 #include "libxl_internal.h"
21 #include "libxl_arch.h"
22 
23 #include <xc_dom.h>
24 #include <xen/hvm/hvm_info_table.h>
25 #include <xen/hvm/hvm_xs_strings.h>
26 #include <xen/hvm/e820.h>
27 
28 #include "_paths.h"
29 
libxl__domain_type(libxl__gc * gc,uint32_t domid)30 libxl_domain_type libxl__domain_type(libxl__gc *gc, uint32_t domid)
31 {
32     libxl_ctx *ctx = libxl__gc_owner(gc);
33     xc_domaininfo_t info;
34     int ret;
35 
36     ret = xc_domain_getinfolist(ctx->xch, domid, 1, &info);
37     if (ret != 1 || info.domain != domid) {
38         LOG(ERROR, "unable to get domain type for domid=%"PRIu32, domid);
39         return LIBXL_DOMAIN_TYPE_INVALID;
40     }
41     if (info.flags & XEN_DOMINF_hvm_guest) {
42         const char *type_path = GCSPRINTF("%s/type",
43                                           libxl__xs_libxl_path(gc, domid));
44         const char *type;
45         libxl_domain_type t;
46         int rc;
47 
48         rc = libxl__xs_read_mandatory(gc, XBT_NULL, type_path, &type);
49         if (rc) {
50             LOG(WARN,
51             "unable to get domain type for domid=%"PRIu32", assuming HVM",
52                 domid);
53             return LIBXL_DOMAIN_TYPE_HVM;
54         }
55 
56         rc = libxl_domain_type_from_string(type, &t);
57         if (rc) {
58             LOG(WARN,
59             "unable to get domain type for domid=%"PRIu32", assuming HVM",
60                 domid);
61             return LIBXL_DOMAIN_TYPE_HVM;
62         }
63 
64         return t;
65     } else
66         return LIBXL_DOMAIN_TYPE_PV;
67 }
68 
libxl__domain_cpupool(libxl__gc * gc,uint32_t domid)69 int libxl__domain_cpupool(libxl__gc *gc, uint32_t domid)
70 {
71     xc_domaininfo_t info;
72     int ret;
73 
74     ret = xc_domain_getinfolist(CTX->xch, domid, 1, &info);
75     if (ret != 1)
76     {
77         LOGE(ERROR, "getinfolist failed %d", ret);
78         return ERROR_FAIL;
79     }
80     if (info.domain != domid)
81     {
82         LOGE(ERROR, "got info for dom%d, wanted dom%d\n", info.domain, domid);
83         return ERROR_FAIL;
84     }
85     return info.cpupool;
86 }
87 
libxl__domain_scheduler(libxl__gc * gc,uint32_t domid)88 libxl_scheduler libxl__domain_scheduler(libxl__gc *gc, uint32_t domid)
89 {
90     int cpupool = libxl__domain_cpupool(gc, domid);
91     libxl_cpupoolinfo poolinfo;
92     libxl_scheduler sched = LIBXL_SCHEDULER_UNKNOWN;
93     int rc;
94 
95     if (cpupool < 0)
96         return sched;
97 
98     libxl_cpupoolinfo_init(&poolinfo);
99     rc = libxl_cpupool_info(CTX, &poolinfo, cpupool);
100     if (rc < 0)
101         goto out;
102 
103     sched = poolinfo.sched;
104 
105 out:
106     libxl_cpupoolinfo_dispose(&poolinfo);
107     return sched;
108 }
109 
110 /*
111  * Two NUMA placement candidates are compared by means of the following
112  * heuristics:
113 
114  *  - the number of vcpus runnable on the candidates is considered, and
115  *    candidates with fewer of them are preferred. If two candidate have
116  *    the same number of runnable vcpus,
117  *  - the amount of free memory in the candidates is considered, and the
118  *    candidate with greater amount of it is preferred.
119  *
120  * In fact, leaving larger memory holes, maximizes the probability of being
121  * able to put other domains on the node. That hopefully means many domains
122  * will benefit from local memory accesses, but also introduces the risk of
123  * overloading large (from a memory POV) nodes. That's right the effect
124  * that counting the vcpus able to run on the nodes tries to prevent.
125  *
126  * Note that this completely ignore the number of nodes each candidate span,
127  * as the fact that fewer nodes is better is already accounted for in the
128  * algorithm.
129  */
numa_cmpf(const libxl__numa_candidate * c1,const libxl__numa_candidate * c2)130 static int numa_cmpf(const libxl__numa_candidate *c1,
131                      const libxl__numa_candidate *c2)
132 {
133     if (c1->nr_vcpus != c2->nr_vcpus)
134         return c1->nr_vcpus - c2->nr_vcpus;
135 
136     return c2->free_memkb - c1->free_memkb;
137 }
138 
139 /* The actual automatic NUMA placement routine */
numa_place_domain(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info)140 static int numa_place_domain(libxl__gc *gc, uint32_t domid,
141                              libxl_domain_build_info *info)
142 {
143     int found;
144     libxl__numa_candidate candidate;
145     libxl_bitmap cpupool_nodemap;
146     libxl_cpupoolinfo cpupool_info;
147     int i, cpupool, rc = 0;
148     uint64_t memkb;
149 
150     libxl__numa_candidate_init(&candidate);
151     libxl_bitmap_init(&cpupool_nodemap);
152     libxl_cpupoolinfo_init(&cpupool_info);
153 
154     /*
155      * Extract the cpumap from the cpupool the domain belong to. In fact,
156      * it only makes sense to consider the cpus/nodes that are in there
157      * for placement.
158      */
159     rc = cpupool = libxl__domain_cpupool(gc, domid);
160     if (rc < 0)
161         goto out;
162     rc = libxl_cpupool_info(CTX, &cpupool_info, cpupool);
163     if (rc)
164         goto out;
165 
166     rc = libxl_domain_need_memory(CTX, info, &memkb);
167     if (rc)
168         goto out;
169     if (libxl_node_bitmap_alloc(CTX, &cpupool_nodemap, 0)) {
170         rc = ERROR_FAIL;
171         goto out;
172     }
173 
174     /* Find the best candidate with enough free memory and at least
175      * as much pcpus as the domain has vcpus.  */
176     rc = libxl__get_numa_candidate(gc, memkb, info->max_vcpus,
177                                    0, 0, &cpupool_info.cpumap,
178                                    numa_cmpf, &candidate, &found);
179     if (rc)
180         goto out;
181 
182     /* Not even a suitable placement candidate! Let's just don't touch the
183      * domain's info->cpumap. It will have affinity with all nodes/cpus. */
184     if (found == 0)
185         goto out;
186 
187     /* Map the candidate's node map to the domain's info->nodemap */
188     libxl__numa_candidate_get_nodemap(gc, &candidate, &info->nodemap);
189 
190     /* Avoid trying to set the affinity to nodes that might be in the
191      * candidate's nodemap but out of our cpupool. */
192     rc = libxl_cpumap_to_nodemap(CTX, &cpupool_info.cpumap,
193                                  &cpupool_nodemap);
194     if (rc)
195         goto out;
196 
197     libxl_for_each_set_bit(i, info->nodemap) {
198         if (!libxl_bitmap_test(&cpupool_nodemap, i))
199             libxl_bitmap_reset(&info->nodemap, i);
200     }
201 
202     LOG(DETAIL, "NUMA placement candidate with %d nodes, %d cpus and "
203                 "%"PRIu64" KB free selected", candidate.nr_nodes,
204                 candidate.nr_cpus, candidate.free_memkb / 1024);
205 
206  out:
207     libxl__numa_candidate_dispose(&candidate);
208     libxl_bitmap_dispose(&cpupool_nodemap);
209     libxl_cpupoolinfo_dispose(&cpupool_info);
210     return rc;
211 }
212 
timer_mode(const libxl_domain_build_info * info)213 static unsigned long timer_mode(const libxl_domain_build_info *info)
214 {
215     const libxl_timer_mode mode = info->timer_mode;
216     assert(mode >= LIBXL_TIMER_MODE_DELAY_FOR_MISSED_TICKS &&
217            mode <= LIBXL_TIMER_MODE_ONE_MISSED_TICK_PENDING);
218     return ((unsigned long)mode);
219 }
220 
221 #if defined(__i386__) || defined(__x86_64__)
hvm_set_viridian_features(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * const info)222 static int hvm_set_viridian_features(libxl__gc *gc, uint32_t domid,
223                                      libxl_domain_build_info *const info)
224 {
225     libxl_bitmap enlightenments;
226     libxl_viridian_enlightenment v;
227     uint64_t mask = 0;
228 
229     libxl_bitmap_init(&enlightenments);
230     libxl_bitmap_alloc(CTX, &enlightenments,
231                        LIBXL_BUILDINFO_HVM_VIRIDIAN_ENABLE_DISABLE_WIDTH);
232 
233     if (libxl_defbool_val(info->u.hvm.viridian)) {
234         /* Enable defaults */
235         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE);
236         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_FREQ);
237         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT);
238         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_APIC_ASSIST);
239         libxl_bitmap_set(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CRASH_CTL);
240     }
241 
242     libxl_for_each_set_bit(v, info->u.hvm.viridian_enable) {
243         if (libxl_bitmap_test(&info->u.hvm.viridian_disable, v)) {
244             LOG(ERROR, "%s group both enabled and disabled",
245                 libxl_viridian_enlightenment_to_string(v));
246             goto err;
247         }
248         if (libxl_viridian_enlightenment_to_string(v)) /* check validity */
249             libxl_bitmap_set(&enlightenments, v);
250     }
251 
252     libxl_for_each_set_bit(v, info->u.hvm.viridian_disable)
253         if (libxl_viridian_enlightenment_to_string(v)) /* check validity */
254             libxl_bitmap_reset(&enlightenments, v);
255 
256     /* The base set is a pre-requisite for all others */
257     if (!libxl_bitmap_is_empty(&enlightenments) &&
258         !libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE)) {
259         LOG(ERROR, "base group not enabled");
260         goto err;
261     }
262 
263     libxl_for_each_set_bit(v, enlightenments)
264         LOG(DETAIL, "%s group enabled", libxl_viridian_enlightenment_to_string(v));
265 
266     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_BASE)) {
267         mask |= HVMPV_base_freq;
268 
269         if (!libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_FREQ))
270             mask |= HVMPV_no_freq;
271     }
272 
273     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_TIME_REF_COUNT))
274         mask |= HVMPV_time_ref_count;
275 
276     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_REFERENCE_TSC))
277         mask |= HVMPV_reference_tsc;
278 
279     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_HCALL_REMOTE_TLB_FLUSH))
280         mask |= HVMPV_hcall_remote_tlb_flush;
281 
282     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_APIC_ASSIST))
283         mask |= HVMPV_apic_assist;
284 
285     if (libxl_bitmap_test(&enlightenments, LIBXL_VIRIDIAN_ENLIGHTENMENT_CRASH_CTL))
286         mask |= HVMPV_crash_ctl;
287 
288     if (mask != 0 &&
289         xc_hvm_param_set(CTX->xch,
290                          domid,
291                          HVM_PARAM_VIRIDIAN,
292                          mask) != 0) {
293         LOGE(ERROR, "Couldn't set viridian feature mask (0x%"PRIx64")", mask);
294         goto err;
295     }
296 
297     libxl_bitmap_dispose(&enlightenments);
298     return 0;
299 
300 err:
301     libxl_bitmap_dispose(&enlightenments);
302     return ERROR_FAIL;
303 }
304 
hvm_set_mca_capabilities(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * const info)305 static int hvm_set_mca_capabilities(libxl__gc *gc, uint32_t domid,
306                                     libxl_domain_build_info *const info)
307 {
308     unsigned long caps = info->u.hvm.mca_caps;
309 
310     if (!caps)
311         return 0;
312 
313     return xc_hvm_param_set(CTX->xch, domid, HVM_PARAM_MCA_CAP, caps);
314 }
315 #endif
316 
hvm_set_conf_params(xc_interface * handle,uint32_t domid,libxl_domain_build_info * const info)317 static void hvm_set_conf_params(xc_interface *handle, uint32_t domid,
318                                 libxl_domain_build_info *const info)
319 {
320     switch(info->type) {
321     case LIBXL_DOMAIN_TYPE_PVH:
322         xc_hvm_param_set(handle, domid, HVM_PARAM_PAE_ENABLED, true);
323         xc_hvm_param_set(handle, domid, HVM_PARAM_TIMER_MODE,
324                          timer_mode(info));
325         xc_hvm_param_set(handle, domid, HVM_PARAM_NESTEDHVM,
326                          libxl_defbool_val(info->nested_hvm));
327         break;
328     case LIBXL_DOMAIN_TYPE_HVM:
329         xc_hvm_param_set(handle, domid, HVM_PARAM_PAE_ENABLED,
330                          libxl_defbool_val(info->u.hvm.pae));
331 #if defined(__i386__) || defined(__x86_64__)
332         xc_hvm_param_set(handle, domid, HVM_PARAM_HPET_ENABLED,
333                          libxl_defbool_val(info->u.hvm.hpet));
334 #endif
335         xc_hvm_param_set(handle, domid, HVM_PARAM_TIMER_MODE,
336                          timer_mode(info));
337         xc_hvm_param_set(handle, domid, HVM_PARAM_VPT_ALIGN,
338                          libxl_defbool_val(info->u.hvm.vpt_align));
339         xc_hvm_param_set(handle, domid, HVM_PARAM_NESTEDHVM,
340                          libxl_defbool_val(info->nested_hvm));
341         break;
342     default:
343         abort();
344     }
345 }
346 
libxl__build_pre(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)347 int libxl__build_pre(libxl__gc *gc, uint32_t domid,
348               libxl_domain_config *d_config, libxl__domain_build_state *state)
349 {
350     libxl_domain_build_info *const info = &d_config->b_info;
351     libxl_ctx *ctx = libxl__gc_owner(gc);
352     char *xs_domid, *con_domid;
353     int rc;
354     uint64_t size;
355 
356     if (xc_domain_max_vcpus(ctx->xch, domid, info->max_vcpus) != 0) {
357         LOG(ERROR, "Couldn't set max vcpu count");
358         return ERROR_FAIL;
359     }
360 
361     if (xc_domain_set_gnttab_limits(ctx->xch, domid, info->max_grant_frames,
362                                     info->max_maptrack_frames) != 0) {
363         LOG(ERROR, "Couldn't set grant table limits");
364         return ERROR_FAIL;
365     }
366 
367     /*
368      * Check if the domain has any CPU or node affinity already. If not, try
369      * to build up the latter via automatic NUMA placement. In fact, in case
370      * numa_place_domain() manage to find a placement, in info->nodemap is
371      * updated accordingly; if it does not manage, info->nodemap is just left
372      * alone. It is then the the subsequent call to
373      * libxl_domain_set_nodeaffinity() that enacts the actual placement.
374      *
375      * As far as scheduling is concerned, we achieve NUMA-aware scheduling
376      * by having the results of placement affect the soft affinity of all
377      * the vcpus of the domain. Of course, we want that iff placement is
378      * enabled and actually happens, so we only change info->cpumap_soft to
379      * reflect the placement result if that is the case
380      */
381     if (libxl_defbool_val(info->numa_placement)) {
382         if (info->cpumap.size || info->num_vcpu_hard_affinity ||
383             info->num_vcpu_soft_affinity)
384             LOG(WARN, "Can't run NUMA placement, as an (hard or soft) "
385                       "affinity has been specified explicitly");
386         else if (info->nodemap.size)
387             LOG(WARN, "Can't run NUMA placement, as the domain has "
388                       "NUMA node affinity set already");
389         else {
390             libxl_bitmap cpumap_soft;
391 
392             rc = libxl_node_bitmap_alloc(ctx, &info->nodemap, 0);
393             if (rc)
394                 return rc;
395             libxl_bitmap_set_any(&info->nodemap);
396 
397             rc = libxl_cpu_bitmap_alloc(ctx, &cpumap_soft, 0);
398             if (rc)
399                 return rc;
400 
401             rc = numa_place_domain(gc, domid, info);
402             if (rc) {
403                 libxl_bitmap_dispose(&cpumap_soft);
404                 return rc;
405             }
406 
407             /*
408              * All we need to do now is converting the result of automatic
409              * placement from nodemap to cpumap, and then use such cpumap
410              * as the soft affinity for all the vcpus of the domain.
411              *
412              * When calling libxl_set_vcpuaffinity_all(), it is ok to use
413              * NULL as hard affinity, as we know we don't have one, or we
414              * won't be here.
415              */
416             libxl_nodemap_to_cpumap(ctx, &info->nodemap, &cpumap_soft);
417             libxl_set_vcpuaffinity_all(ctx, domid, info->max_vcpus,
418                                        NULL, &cpumap_soft);
419 
420             libxl_bitmap_dispose(&cpumap_soft);
421 
422             /*
423              * Placement has run, so avoid for it to be re-run, if this
424              * same config we are using and building here is ever re-used.
425              * This means that people re-using configs will get the same
426              * results, consistently, across every re-use, which is what
427              * we expect most people to want.
428              */
429             libxl_defbool_set(&info->numa_placement, false);
430         }
431     }
432 
433     if (info->nodemap.size)
434         libxl_domain_set_nodeaffinity(ctx, domid, &info->nodemap);
435 
436     if (info->num_vcpu_hard_affinity || info->num_vcpu_soft_affinity) {
437         libxl_bitmap *hard_affinity, *soft_affinity;
438         int i, n_vcpus;
439 
440         n_vcpus = info->num_vcpu_hard_affinity > info->num_vcpu_soft_affinity ?
441             info->num_vcpu_hard_affinity : info->num_vcpu_soft_affinity;
442 
443         for (i = 0; i < n_vcpus; i++) {
444             /*
445              * Prepare hard and soft affinity pointers in a way that allows
446              * us to issue only one call to libxl_set_vcpuaffinity(), setting,
447              * for each vcpu, both hard and soft affinity "atomically".
448              */
449             hard_affinity = NULL;
450             if (info->num_vcpu_hard_affinity &&
451                 i < info->num_vcpu_hard_affinity)
452                 hard_affinity = &info->vcpu_hard_affinity[i];
453 
454             soft_affinity = NULL;
455             if (info->num_vcpu_soft_affinity &&
456                 i < info->num_vcpu_soft_affinity)
457                 soft_affinity = &info->vcpu_soft_affinity[i];
458 
459             if (libxl_set_vcpuaffinity(ctx, domid, i,
460                                        hard_affinity, soft_affinity)) {
461                 LOG(ERROR, "setting affinity failed on vcpu `%d'", i);
462                 return ERROR_FAIL;
463             }
464         }
465     }
466 
467 
468     rc = libxl__arch_extra_memory(gc, info, &size);
469     if (rc < 0) {
470         LOGE(ERROR, "Couldn't get arch extra constant memory size");
471         return ERROR_FAIL;
472     }
473 
474     if (xc_domain_setmaxmem(ctx->xch, domid, info->target_memkb + size) < 0) {
475         LOGE(ERROR, "Couldn't set max memory");
476         return ERROR_FAIL;
477     }
478 
479     xs_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenstored/domid", NULL);
480     state->store_domid = xs_domid ? atoi(xs_domid) : 0;
481     free(xs_domid);
482 
483     con_domid = xs_read(ctx->xsh, XBT_NULL, "/tool/xenconsoled/domid", NULL);
484     state->console_domid = con_domid ? atoi(con_domid) : 0;
485     free(con_domid);
486 
487     state->store_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->store_domid);
488     state->console_port = xc_evtchn_alloc_unbound(ctx->xch, domid, state->console_domid);
489 
490     if (info->type != LIBXL_DOMAIN_TYPE_PV)
491         hvm_set_conf_params(ctx->xch, domid, info);
492 
493 #if defined(__i386__) || defined(__x86_64__)
494     if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
495         rc = hvm_set_viridian_features(gc, domid, info);
496         if (rc)
497             return rc;
498 
499         rc = hvm_set_mca_capabilities(gc, domid, info);
500         if (rc)
501             return rc;
502     }
503 #endif
504 
505     /* Alternate p2m support on x86 is available only for PVH/HVM guests. */
506     if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
507         /* The config parameter "altp2m" replaces the parameter "altp2mhvm". For
508          * legacy reasons, both parameters are accepted on x86 HVM guests.
509          *
510          * If the legacy field info->u.hvm.altp2m is set, activate altp2m.
511          * Otherwise set altp2m based on the field info->altp2m. */
512         if (info->altp2m == LIBXL_ALTP2M_MODE_DISABLED &&
513             libxl_defbool_val(info->u.hvm.altp2m))
514             xc_hvm_param_set(ctx->xch, domid, HVM_PARAM_ALTP2M,
515                              libxl_defbool_val(info->u.hvm.altp2m));
516         else
517             xc_hvm_param_set(ctx->xch, domid, HVM_PARAM_ALTP2M,
518                              info->altp2m);
519     } else if (info->type == LIBXL_DOMAIN_TYPE_PVH) {
520         xc_hvm_param_set(ctx->xch, domid, HVM_PARAM_ALTP2M,
521                          info->altp2m);
522     }
523 
524     rc = libxl__arch_domain_create(gc, d_config, domid);
525 
526     return rc;
527 }
528 
set_vnuma_affinity(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info)529 static int set_vnuma_affinity(libxl__gc *gc, uint32_t domid,
530                               libxl_domain_build_info *info)
531 {
532     libxl_bitmap cpumap;
533     libxl_vnode_info *v;
534     unsigned int i, j;
535     int rc = 0;
536 
537     libxl_bitmap_init(&cpumap);
538 
539     rc = libxl_cpu_bitmap_alloc(CTX, &cpumap, 0);
540     if (rc) {
541         LOG(ERROR, "Can't allocate nodemap");
542         goto out;
543     }
544 
545     /*
546      * For each vcpu in each vnode, set its soft affinity to
547      * the pcpus belonging to the pnode the vnode is on
548      */
549     for (i = 0; i < info->num_vnuma_nodes; i++) {
550         v = &info->vnuma_nodes[i];
551 
552         rc = libxl_node_to_cpumap(CTX, v->pnode, &cpumap);
553         if (rc) {
554             LOG(ERROR, "Can't get cpumap for vnode %d", i);
555             goto out;
556         }
557 
558         libxl_for_each_set_bit(j, v->vcpus) {
559             rc = libxl_set_vcpuaffinity(CTX, domid, j, NULL, &cpumap);
560             if (rc) {
561                 LOG(ERROR, "Can't set cpu affinity for %d", j);
562                 goto out;
563             }
564         }
565     }
566 
567 out:
568     libxl_bitmap_dispose(&cpumap);
569     return rc;
570 }
571 
libxl__build_post(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info,libxl__domain_build_state * state,char ** vms_ents,char ** local_ents)572 int libxl__build_post(libxl__gc *gc, uint32_t domid,
573                       libxl_domain_build_info *info,
574                       libxl__domain_build_state *state,
575                       char **vms_ents, char **local_ents)
576 {
577     libxl_ctx *ctx = libxl__gc_owner(gc);
578     char *dom_path, *vm_path;
579     xs_transaction_t t;
580     char **ents;
581     int i, rc;
582 
583     if (info->num_vnuma_nodes && !info->num_vcpu_soft_affinity) {
584         rc = set_vnuma_affinity(gc, domid, info);
585         if (rc)
586             return rc;
587     }
588 
589     rc = libxl_domain_sched_params_set(CTX, domid, &info->sched_params);
590     if (rc)
591         return rc;
592 
593     rc = xc_domain_set_max_evtchn(ctx->xch, domid, info->event_channels);
594     if (rc) {
595         LOG(ERROR, "Failed to set event channel limit to %d (%d)",
596             info->event_channels, rc);
597         return ERROR_FAIL;
598     }
599 
600     libxl_cpuid_apply_policy(ctx, domid);
601     if (info->cpuid != NULL)
602         libxl_cpuid_set(ctx, domid, info->cpuid);
603 
604     if (info->type == LIBXL_DOMAIN_TYPE_HVM
605         && !libxl_ms_vm_genid_is_zero(&info->u.hvm.ms_vm_genid)) {
606         rc = libxl__ms_vm_genid_set(gc, domid,
607                                     &info->u.hvm.ms_vm_genid);
608         if (rc) {
609             LOG(ERROR, "Failed to set VM Generation ID");
610             return rc;
611         }
612     }
613 
614     ents = libxl__calloc(gc, 12 + (info->max_vcpus * 2) + 2, sizeof(char *));
615     ents[0] = "memory/static-max";
616     ents[1] = GCSPRINTF("%"PRId64, info->max_memkb);
617     ents[2] = "memory/target";
618     ents[3] = GCSPRINTF("%"PRId64, info->target_memkb -
619                         libxl__get_targetmem_fudge(gc, info));
620     ents[4] = "memory/videoram";
621     ents[5] = GCSPRINTF("%"PRId64, info->video_memkb);
622     ents[6] = "domid";
623     ents[7] = GCSPRINTF("%d", domid);
624     ents[8] = "store/port";
625     ents[9] = GCSPRINTF("%"PRIu32, state->store_port);
626     ents[10] = "store/ring-ref";
627     ents[11] = GCSPRINTF("%lu", state->store_mfn);
628     for (i = 0; i < info->max_vcpus; i++) {
629         ents[12+(i*2)]   = GCSPRINTF("cpu/%d/availability", i);
630         ents[12+(i*2)+1] = libxl_bitmap_test(&info->avail_vcpus, i)
631                             ? "online" : "offline";
632     }
633 
634     dom_path = libxl__xs_get_dompath(gc, domid);
635     if (!dom_path) {
636         return ERROR_FAIL;
637     }
638 
639     vm_path = xs_read(ctx->xsh, XBT_NULL, GCSPRINTF("%s/vm", dom_path), NULL);
640 retry_transaction:
641     t = xs_transaction_start(ctx->xsh);
642 
643     libxl__xs_writev(gc, t, dom_path, ents);
644     libxl__xs_writev(gc, t, dom_path, local_ents);
645     libxl__xs_writev(gc, t, vm_path, vms_ents);
646 
647     if (!xs_transaction_end(ctx->xsh, t, 0))
648         if (errno == EAGAIN)
649             goto retry_transaction;
650     xs_introduce_domain(ctx->xsh, domid, state->store_mfn, state->store_port);
651     free(vm_path);
652     return 0;
653 }
654 
set_vnuma_info(libxl__gc * gc,uint32_t domid,const libxl_domain_build_info * info,const libxl__domain_build_state * state)655 static int set_vnuma_info(libxl__gc *gc, uint32_t domid,
656                           const libxl_domain_build_info *info,
657                           const libxl__domain_build_state *state)
658 {
659     int rc = 0;
660     unsigned int i, nr_vdistance;
661     unsigned int *vcpu_to_vnode, *vnode_to_pnode, *vdistance = NULL;
662 
663     vcpu_to_vnode = libxl__calloc(gc, info->max_vcpus,
664                                   sizeof(unsigned int));
665     vnode_to_pnode = libxl__calloc(gc, info->num_vnuma_nodes,
666                                    sizeof(unsigned int));
667 
668     nr_vdistance = info->num_vnuma_nodes * info->num_vnuma_nodes;
669     vdistance = libxl__calloc(gc, nr_vdistance, sizeof(unsigned int));
670 
671     for (i = 0; i < info->num_vnuma_nodes; i++) {
672         libxl_vnode_info *v = &info->vnuma_nodes[i];
673         int j;
674 
675         /* vnode to pnode mapping */
676         vnode_to_pnode[i] = v->pnode;
677 
678         /* vcpu to vnode mapping */
679         libxl_for_each_set_bit(j, v->vcpus)
680             vcpu_to_vnode[j] = i;
681 
682         /* node distances */
683         assert(info->num_vnuma_nodes == v->num_distances);
684         memcpy(vdistance + (i * info->num_vnuma_nodes),
685                v->distances,
686                v->num_distances * sizeof(unsigned int));
687     }
688 
689     if (xc_domain_setvnuma(CTX->xch, domid, info->num_vnuma_nodes,
690                            state->num_vmemranges, info->max_vcpus,
691                            state->vmemranges, vdistance,
692                            vcpu_to_vnode, vnode_to_pnode) < 0) {
693         LOGE(ERROR, "xc_domain_setvnuma failed");
694         rc = ERROR_FAIL;
695     }
696 
697     return rc;
698 }
699 
libxl__build_dom(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info,libxl__domain_build_state * state,struct xc_dom_image * dom)700 static int libxl__build_dom(libxl__gc *gc, uint32_t domid,
701              libxl_domain_build_info *info, libxl__domain_build_state *state,
702              struct xc_dom_image *dom)
703 {
704     uint64_t mem_kb;
705     int ret;
706 
707     if ( (ret = xc_dom_boot_xen_init(dom, CTX->xch, domid)) != 0 ) {
708         LOGE(ERROR, "xc_dom_boot_xen_init failed");
709         goto out;
710     }
711 #ifdef GUEST_RAM_BASE
712     if ( (ret = xc_dom_rambase_init(dom, GUEST_RAM_BASE)) != 0 ) {
713         LOGE(ERROR, "xc_dom_rambase failed");
714         goto out;
715     }
716 #endif
717     if ( (ret = xc_dom_parse_image(dom)) != 0 ) {
718         LOGE(ERROR, "xc_dom_parse_image failed");
719         goto out;
720     }
721     if ( (ret = libxl__arch_domain_init_hw_description(gc, info, state, dom)) != 0 ) {
722         LOGE(ERROR, "libxl__arch_domain_init_hw_description failed");
723         goto out;
724     }
725 
726     mem_kb = dom->container_type == XC_DOM_HVM_CONTAINER ?
727              (info->max_memkb - info->video_memkb) : info->target_memkb;
728     if ( (ret = xc_dom_mem_init(dom, mem_kb / 1024)) != 0 ) {
729         LOGE(ERROR, "xc_dom_mem_init failed");
730         goto out;
731     }
732     if ( (ret = xc_dom_boot_mem_init(dom)) != 0 ) {
733         LOGE(ERROR, "xc_dom_boot_mem_init failed");
734         goto out;
735     }
736     if ( (ret = libxl__arch_domain_finalise_hw_description(gc, info, dom)) != 0 ) {
737         LOGE(ERROR, "libxl__arch_domain_finalise_hw_description failed");
738         goto out;
739     }
740     if ( (ret = xc_dom_build_image(dom)) != 0 ) {
741         LOGE(ERROR, "xc_dom_build_image failed");
742         goto out;
743     }
744     if ( (ret = xc_dom_boot_image(dom)) != 0 ) {
745         LOGE(ERROR, "xc_dom_boot_image failed");
746         goto out;
747     }
748     if ( (ret = xc_dom_gnttab_init(dom)) != 0 ) {
749         LOGE(ERROR, "xc_dom_gnttab_init failed");
750         goto out;
751     }
752     if ((ret = libxl__arch_build_dom_finish(gc, info, dom, state)) != 0) {
753         LOGE(ERROR, "libxl__arch_build_dom_finish failed");
754         goto out;
755     }
756 
757 out:
758     return ret != 0 ? ERROR_FAIL : 0;
759 }
760 
libxl__build_pv(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * info,libxl__domain_build_state * state)761 int libxl__build_pv(libxl__gc *gc, uint32_t domid,
762              libxl_domain_build_info *info, libxl__domain_build_state *state)
763 {
764     libxl_ctx *ctx = libxl__gc_owner(gc);
765     struct xc_dom_image *dom;
766     int ret;
767     int flags = 0;
768 
769     xc_dom_loginit(ctx->xch);
770 
771     dom = xc_dom_allocate(ctx->xch, state->pv_cmdline, info->u.pv.features);
772     if (!dom) {
773         LOGE(ERROR, "xc_dom_allocate failed");
774         return ERROR_FAIL;
775     }
776 
777     dom->container_type = XC_DOM_PV_CONTAINER;
778 
779     LOG(DEBUG, "pv kernel mapped %d path %s", state->pv_kernel.mapped, state->pv_kernel.path);
780 
781     if (state->pv_kernel.mapped) {
782         ret = xc_dom_kernel_mem(dom,
783                                 state->pv_kernel.data,
784                                 state->pv_kernel.size);
785         if ( ret != 0) {
786             LOGE(ERROR, "xc_dom_kernel_mem failed");
787             goto out;
788         }
789     } else {
790         ret = xc_dom_kernel_file(dom, state->pv_kernel.path);
791         if ( ret != 0) {
792             LOGE(ERROR, "xc_dom_kernel_file failed");
793             goto out;
794         }
795     }
796 
797     if ( state->pv_ramdisk.path && strlen(state->pv_ramdisk.path) ) {
798         if (state->pv_ramdisk.mapped) {
799             if ( (ret = xc_dom_module_mem(dom, state->pv_ramdisk.data, state->pv_ramdisk.size, NULL)) != 0 ) {
800                 LOGE(ERROR, "xc_dom_ramdisk_mem failed");
801                 goto out;
802             }
803         } else {
804             if ( (ret = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL)) != 0 ) {
805                 LOGE(ERROR, "xc_dom_ramdisk_file failed");
806                 goto out;
807             }
808         }
809     }
810 
811     dom->flags = flags;
812     dom->console_evtchn = state->console_port;
813     dom->console_domid = state->console_domid;
814     dom->xenstore_evtchn = state->store_port;
815     dom->xenstore_domid = state->store_domid;
816     dom->claim_enabled = libxl_defbool_val(info->claim_mode);
817 
818     if (info->num_vnuma_nodes != 0) {
819         unsigned int i;
820 
821         ret = libxl__vnuma_build_vmemrange_pv(gc, domid, info, state);
822         if (ret) {
823             LOGE(ERROR, "cannot build vmemranges");
824             goto out;
825         }
826         ret = libxl__vnuma_config_check(gc, info, state);
827         if (ret) goto out;
828 
829         ret = set_vnuma_info(gc, domid, info, state);
830         if (ret) goto out;
831 
832         dom->nr_vmemranges = state->num_vmemranges;
833         dom->vmemranges = xc_dom_malloc(dom, sizeof(*dom->vmemranges) *
834                                         dom->nr_vmemranges);
835 
836         for (i = 0; i < dom->nr_vmemranges; i++) {
837             dom->vmemranges[i].start = state->vmemranges[i].start;
838             dom->vmemranges[i].end   = state->vmemranges[i].end;
839             dom->vmemranges[i].flags = state->vmemranges[i].flags;
840             dom->vmemranges[i].nid   = state->vmemranges[i].nid;
841         }
842 
843         dom->nr_vnodes = info->num_vnuma_nodes;
844         dom->vnode_to_pnode = xc_dom_malloc(dom, sizeof(*dom->vnode_to_pnode) *
845                                             dom->nr_vnodes);
846         for (i = 0; i < info->num_vnuma_nodes; i++)
847             dom->vnode_to_pnode[i] = info->vnuma_nodes[i].pnode;
848     }
849 
850     ret = libxl__build_dom(gc, domid, info, state, dom);
851     if (ret != 0)
852         goto out;
853 
854     if (xc_dom_translated(dom)) {
855         state->console_mfn = dom->console_pfn;
856         state->store_mfn = dom->xenstore_pfn;
857         state->vuart_gfn = dom->vuart_gfn;
858     } else {
859         state->console_mfn = xc_dom_p2m(dom, dom->console_pfn);
860         state->store_mfn = xc_dom_p2m(dom, dom->xenstore_pfn);
861     }
862 
863     ret = 0;
864 out:
865     xc_dom_release(dom);
866     return ret == 0 ? 0 : ERROR_FAIL;
867 }
868 
hvm_build_set_params(xc_interface * handle,uint32_t domid,libxl_domain_build_info * info,int store_evtchn,unsigned long * store_mfn,int console_evtchn,unsigned long * console_mfn,domid_t store_domid,domid_t console_domid)869 static int hvm_build_set_params(xc_interface *handle, uint32_t domid,
870                                 libxl_domain_build_info *info,
871                                 int store_evtchn, unsigned long *store_mfn,
872                                 int console_evtchn, unsigned long *console_mfn,
873                                 domid_t store_domid, domid_t console_domid)
874 {
875     struct hvm_info_table *va_hvm;
876     uint8_t *va_map, sum;
877     uint64_t str_mfn, cons_mfn;
878     int i;
879 
880     if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
881         va_map = xc_map_foreign_range(handle, domid,
882                                       XC_PAGE_SIZE, PROT_READ | PROT_WRITE,
883                                       HVM_INFO_PFN);
884         if (va_map == NULL)
885             return ERROR_FAIL;
886 
887         va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
888         va_hvm->apic_mode = libxl_defbool_val(info->apic);
889         va_hvm->nr_vcpus = info->max_vcpus;
890         memset(va_hvm->vcpu_online, 0, sizeof(va_hvm->vcpu_online));
891         memcpy(va_hvm->vcpu_online, info->avail_vcpus.map, info->avail_vcpus.size);
892         for (i = 0, sum = 0; i < va_hvm->length; i++)
893             sum += ((uint8_t *) va_hvm)[i];
894         va_hvm->checksum -= sum;
895         munmap(va_map, XC_PAGE_SIZE);
896     }
897 
898     xc_hvm_param_get(handle, domid, HVM_PARAM_STORE_PFN, &str_mfn);
899     xc_hvm_param_get(handle, domid, HVM_PARAM_CONSOLE_PFN, &cons_mfn);
900     xc_hvm_param_set(handle, domid, HVM_PARAM_STORE_EVTCHN, store_evtchn);
901     xc_hvm_param_set(handle, domid, HVM_PARAM_CONSOLE_EVTCHN, console_evtchn);
902 
903     *store_mfn = str_mfn;
904     *console_mfn = cons_mfn;
905 
906     xc_dom_gnttab_hvm_seed(handle, domid, *console_mfn, *store_mfn, console_domid, store_domid);
907     return 0;
908 }
909 
hvm_build_set_xs_values(libxl__gc * gc,uint32_t domid,struct xc_dom_image * dom,const libxl_domain_build_info * info)910 static int hvm_build_set_xs_values(libxl__gc *gc,
911                                    uint32_t domid,
912                                    struct xc_dom_image *dom,
913                                    const libxl_domain_build_info *info)
914 {
915     char *path = NULL;
916     int ret = 0;
917 
918     if (dom->smbios_module.guest_addr_out) {
919         path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_ADDRESS, domid);
920 
921         ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%"PRIx64,
922                                dom->smbios_module.guest_addr_out);
923         if (ret)
924             goto err;
925 
926         path = GCSPRINTF("/local/domain/%d/"HVM_XS_SMBIOS_PT_LENGTH, domid);
927 
928         ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%x",
929                                dom->smbios_module.length);
930         if (ret)
931             goto err;
932     }
933 
934     /* Only one module can be passed. PVHv2 guests do not support this. */
935     if (dom->acpi_modules[0].guest_addr_out &&
936         info->type == LIBXL_DOMAIN_TYPE_HVM) {
937         path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_ADDRESS, domid);
938 
939         ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%"PRIx64,
940                                dom->acpi_modules[0].guest_addr_out);
941         if (ret)
942             goto err;
943 
944         path = GCSPRINTF("/local/domain/%d/"HVM_XS_ACPI_PT_LENGTH, domid);
945 
946         ret = libxl__xs_printf(gc, XBT_NULL, path, "0x%x",
947                                dom->acpi_modules[0].length);
948         if (ret)
949             goto err;
950     }
951 
952     return 0;
953 
954 err:
955     LOG(ERROR, "failed to write firmware xenstore value, err: %d", ret);
956     return ret;
957 }
958 
libxl__load_hvm_firmware_module(libxl__gc * gc,const char * filename,const char * what,struct xc_hvm_firmware_module * m)959 static int libxl__load_hvm_firmware_module(libxl__gc *gc,
960                                            const char *filename,
961                                            const char *what,
962                                            struct xc_hvm_firmware_module *m)
963 {
964     int datalen = 0;
965     void *data = NULL;
966     int r, rc;
967 
968     LOG(DEBUG, "Loading %s: %s", what, filename);
969     r = libxl_read_file_contents(CTX, filename, &data, &datalen);
970     if (r) {
971         /*
972          * Print a message only on ENOENT, other errors are logged by the
973          * function libxl_read_file_contents().
974          */
975         if (r == ENOENT)
976             LOGEV(ERROR, r, "failed to read %s file", what);
977         rc =  ERROR_FAIL;
978         goto out;
979     }
980     libxl__ptr_add(gc, data);
981     if (datalen) {
982         /* Only accept non-empty files */
983         m->data = data;
984         m->length = datalen;
985     } else {
986         LOG(ERROR, "file %s for %s is empty", filename, what);
987         rc = ERROR_INVAL;
988         goto out;
989     }
990     rc = 0;
991 out:
992     return rc;
993 }
994 
libxl__domain_firmware(libxl__gc * gc,libxl_domain_build_info * info,libxl__domain_build_state * state,struct xc_dom_image * dom)995 static int libxl__domain_firmware(libxl__gc *gc,
996                                   libxl_domain_build_info *info,
997                                   libxl__domain_build_state *state,
998                                   struct xc_dom_image *dom)
999 {
1000     libxl_ctx *ctx = libxl__gc_owner(gc);
1001     const char *firmware = NULL;
1002     int e, rc;
1003     int datalen = 0;
1004     void *data;
1005     const char *bios_filename = NULL;
1006 
1007     if (info->type == LIBXL_DOMAIN_TYPE_HVM) {
1008         if (info->u.hvm.firmware) {
1009             firmware = info->u.hvm.firmware;
1010         } else {
1011             switch (info->device_model_version)
1012             {
1013             case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
1014             case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
1015                 firmware = "hvmloader";
1016                 break;
1017             default:
1018                 LOG(ERROR, "invalid device model version %d",
1019                     info->device_model_version);
1020                 rc = ERROR_FAIL;
1021                 goto out;
1022             }
1023         }
1024     }
1025 
1026     if (state->pv_kernel.path != NULL &&
1027         info->type == LIBXL_DOMAIN_TYPE_PVH) {
1028 
1029         if (state->shim_path) {
1030             rc = xc_dom_kernel_file(dom, state->shim_path);
1031             if (rc) {
1032                 LOGE(ERROR, "xc_dom_kernel_file failed");
1033                 goto out;
1034             }
1035 
1036             /* We've loaded the shim, so load the kernel as a secondary module */
1037             if (state->pv_kernel.mapped) {
1038                 LOG(WARN, "xc_dom_module_mem, cmdline %s",
1039                     state->pv_cmdline);
1040                 rc = xc_dom_module_mem(dom, state->pv_kernel.data,
1041                                        state->pv_kernel.size, state->pv_cmdline);
1042                 if (rc) {
1043                     LOGE(ERROR, "xc_dom_kernel_mem failed");
1044                     goto out;
1045                 }
1046             } else {
1047                 LOG(WARN, "xc_dom_module_file, path %s cmdline %s",
1048                     state->pv_kernel.path, state->pv_cmdline);
1049                 rc = xc_dom_module_file(dom, state->pv_kernel.path, state->pv_cmdline);
1050                 if (rc) {
1051                     LOGE(ERROR, "xc_dom_kernel_file failed");
1052                     goto out;
1053                 }
1054             }
1055         } else {
1056             /* No shim, so load the kernel directly */
1057             if (state->pv_kernel.mapped) {
1058                 rc = xc_dom_kernel_mem(dom, state->pv_kernel.data,
1059                                        state->pv_kernel.size);
1060                 if (rc) {
1061                     LOGE(ERROR, "xc_dom_kernel_mem failed");
1062                     goto out;
1063                 }
1064             } else {
1065                 rc = xc_dom_kernel_file(dom, state->pv_kernel.path);
1066                 if (rc) {
1067                     LOGE(ERROR, "xc_dom_kernel_file failed");
1068                     goto out;
1069                 }
1070             }
1071         }
1072 
1073         if (state->pv_ramdisk.path && strlen(state->pv_ramdisk.path)) {
1074             if (state->pv_ramdisk.mapped) {
1075                 rc = xc_dom_module_mem(dom, state->pv_ramdisk.data,
1076                                        state->pv_ramdisk.size, NULL);
1077                 if (rc) {
1078                     LOGE(ERROR, "xc_dom_ramdisk_mem failed");
1079                     goto out;
1080                 }
1081             } else {
1082                 rc = xc_dom_module_file(dom, state->pv_ramdisk.path, NULL);
1083                 if (rc) {
1084                     LOGE(ERROR, "xc_dom_ramdisk_file failed");
1085                     goto out;
1086                 }
1087             }
1088         }
1089     } else {
1090         /*
1091          * Only HVM guests should get here, PVH should always have a set
1092          * kernel at this point.
1093          */
1094         assert(info->type == LIBXL_DOMAIN_TYPE_HVM);
1095         rc = xc_dom_kernel_file(dom, libxl__abs_path(gc, firmware,
1096                                                  libxl__xenfirmwaredir_path()));
1097     }
1098 
1099     if (rc != 0) {
1100         LOGE(ERROR, "xc_dom_{kernel_file/ramdisk_file} failed");
1101         goto out;
1102     }
1103 
1104     if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1105         info->device_model_version == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) {
1106         if (info->u.hvm.system_firmware) {
1107             bios_filename = info->u.hvm.system_firmware;
1108         } else {
1109             switch (info->u.hvm.bios) {
1110             case LIBXL_BIOS_TYPE_SEABIOS:
1111                 bios_filename = libxl__seabios_path();
1112                 break;
1113             case LIBXL_BIOS_TYPE_OVMF:
1114                 bios_filename = libxl__ovmf_path();
1115                 break;
1116             case LIBXL_BIOS_TYPE_ROMBIOS:
1117             default:
1118                 abort();
1119             }
1120         }
1121     }
1122 
1123     if (bios_filename) {
1124         rc = libxl__load_hvm_firmware_module(gc, bios_filename, "BIOS",
1125                                              &dom->system_firmware_module);
1126         if (rc) goto out;
1127     }
1128 
1129     if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1130         info->u.hvm.smbios_firmware) {
1131         data = NULL;
1132         e = libxl_read_file_contents(ctx, info->u.hvm.smbios_firmware,
1133                                      &data, &datalen);
1134         if (e) {
1135             LOGEV(ERROR, e, "failed to read SMBIOS firmware file %s",
1136                 info->u.hvm.smbios_firmware);
1137             rc = ERROR_FAIL;
1138             goto out;
1139         }
1140         libxl__ptr_add(gc, data);
1141         if (datalen) {
1142             /* Only accept non-empty files */
1143             dom->smbios_module.data = data;
1144             dom->smbios_module.length = (uint32_t)datalen;
1145         }
1146     }
1147 
1148     if (info->type == LIBXL_DOMAIN_TYPE_HVM &&
1149         info->u.hvm.acpi_firmware) {
1150         data = NULL;
1151         e = libxl_read_file_contents(ctx, info->u.hvm.acpi_firmware,
1152                                      &data, &datalen);
1153         if (e) {
1154             LOGEV(ERROR, e, "failed to read ACPI firmware file %s",
1155                 info->u.hvm.acpi_firmware);
1156             rc = ERROR_FAIL;
1157             goto out;
1158         }
1159         libxl__ptr_add(gc, data);
1160         if (datalen) {
1161             /* Only accept a non-empty file */
1162             dom->acpi_modules[0].data = data;
1163             dom->acpi_modules[0].length = (uint32_t)datalen;
1164         }
1165     }
1166 
1167     return 0;
1168 out:
1169     assert(rc != 0);
1170     return rc;
1171 }
1172 
libxl__build_hvm(libxl__gc * gc,uint32_t domid,libxl_domain_config * d_config,libxl__domain_build_state * state)1173 int libxl__build_hvm(libxl__gc *gc, uint32_t domid,
1174               libxl_domain_config *d_config,
1175               libxl__domain_build_state *state)
1176 {
1177     libxl_ctx *ctx = libxl__gc_owner(gc);
1178     int rc;
1179     uint64_t mmio_start, lowmem_end, highmem_end, mem_size;
1180     libxl_domain_build_info *const info = &d_config->b_info;
1181     struct xc_dom_image *dom = NULL;
1182     bool device_model = info->type == LIBXL_DOMAIN_TYPE_HVM ? true : false;
1183 
1184     xc_dom_loginit(ctx->xch);
1185 
1186     /*
1187      * If PVH and we have a shim override, use the shim cmdline.
1188      * If PVH and no shim override, use the pv cmdline.
1189      * If not PVH, use info->cmdline.
1190      */
1191     dom = xc_dom_allocate(ctx->xch, info->type == LIBXL_DOMAIN_TYPE_PVH ?
1192                           (state->shim_path ? state->shim_cmdline : state->pv_cmdline) :
1193                           info->cmdline, NULL);
1194     if (!dom) {
1195         LOGE(ERROR, "xc_dom_allocate failed");
1196         rc = ERROR_NOMEM;
1197         goto out;
1198     }
1199 
1200     dom->container_type = XC_DOM_HVM_CONTAINER;
1201 
1202     /* The params from the configuration file are in Mb, which are then
1203      * multiplied by 1 Kb. This was then divided off when calling
1204      * the old xc_hvm_build_target_mem() which then turned them to bytes.
1205      * Do all this in one step here...
1206      */
1207     mem_size = (uint64_t)(info->max_memkb - info->video_memkb) << 10;
1208     dom->target_pages = (uint64_t)(info->target_memkb - info->video_memkb) >> 2;
1209     dom->claim_enabled = libxl_defbool_val(info->claim_mode);
1210     if (info->u.hvm.mmio_hole_memkb) {
1211         uint64_t max_ram_below_4g = (1ULL << 32) -
1212             (info->u.hvm.mmio_hole_memkb << 10);
1213 
1214         if (max_ram_below_4g < HVM_BELOW_4G_MMIO_START)
1215             dom->mmio_size = info->u.hvm.mmio_hole_memkb << 10;
1216     }
1217 
1218     rc = libxl__domain_firmware(gc, info, state, dom);
1219     if (rc != 0) {
1220         LOG(ERROR, "initializing domain firmware failed");
1221         goto out;
1222     }
1223 
1224     if (dom->target_pages == 0)
1225         dom->target_pages = mem_size >> XC_PAGE_SHIFT;
1226     if (dom->mmio_size == 0 && device_model)
1227         dom->mmio_size = HVM_BELOW_4G_MMIO_LENGTH;
1228     else if (dom->mmio_size == 0 && !device_model) {
1229 #if defined(__i386__) || defined(__x86_64__)
1230         if (libxl_defbool_val(info->apic)) {
1231             /* Make sure LAPIC_BASE_ADDRESS is below special pages */
1232             assert(((((X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES)
1233                       << XC_PAGE_SHIFT) - LAPIC_BASE_ADDRESS)) >= XC_PAGE_SIZE);
1234             dom->mmio_size = GB(4) - LAPIC_BASE_ADDRESS;
1235         } else
1236             dom->mmio_size = GB(4) -
1237                 ((X86_HVM_END_SPECIAL_REGION - X86_HVM_NR_SPECIAL_PAGES)
1238                  << XC_PAGE_SHIFT);
1239 #else
1240         assert(1);
1241 #endif
1242     }
1243     lowmem_end = mem_size;
1244     highmem_end = 0;
1245     mmio_start = (1ull << 32) - dom->mmio_size;
1246     if (lowmem_end > mmio_start)
1247     {
1248         highmem_end = (1ull << 32) + (lowmem_end - mmio_start);
1249         lowmem_end = mmio_start;
1250     }
1251     dom->lowmem_end = lowmem_end;
1252     dom->highmem_end = highmem_end;
1253     dom->mmio_start = mmio_start;
1254     dom->vga_hole_size = device_model ? LIBXL_VGA_HOLE_SIZE : 0;
1255     dom->device_model = device_model;
1256 
1257     rc = libxl__domain_device_construct_rdm(gc, d_config,
1258                                             info->u.hvm.rdm_mem_boundary_memkb*1024,
1259                                             dom);
1260     if (rc) {
1261         LOG(ERROR, "checking reserved device memory failed");
1262         goto out;
1263     }
1264 
1265     if (info->num_vnuma_nodes != 0) {
1266         int i;
1267 
1268         rc = libxl__vnuma_build_vmemrange_hvm(gc, domid, info, state, dom);
1269         if (rc != 0) {
1270             LOG(ERROR, "hvm build vmemranges failed");
1271             goto out;
1272         }
1273         rc = libxl__vnuma_config_check(gc, info, state);
1274         if (rc != 0) goto out;
1275         rc = set_vnuma_info(gc, domid, info, state);
1276         if (rc != 0) goto out;
1277 
1278         dom->nr_vmemranges = state->num_vmemranges;
1279         dom->vmemranges = libxl__malloc(gc, sizeof(*dom->vmemranges) *
1280                                         dom->nr_vmemranges);
1281 
1282         for (i = 0; i < dom->nr_vmemranges; i++) {
1283             dom->vmemranges[i].start = state->vmemranges[i].start;
1284             dom->vmemranges[i].end   = state->vmemranges[i].end;
1285             dom->vmemranges[i].flags = state->vmemranges[i].flags;
1286             dom->vmemranges[i].nid   = state->vmemranges[i].nid;
1287         }
1288 
1289         dom->nr_vnodes = info->num_vnuma_nodes;
1290         dom->vnode_to_pnode = libxl__malloc(gc, sizeof(*dom->vnode_to_pnode) *
1291                                             dom->nr_vnodes);
1292         for (i = 0; i < dom->nr_vnodes; i++)
1293             dom->vnode_to_pnode[i] = info->vnuma_nodes[i].pnode;
1294     }
1295 
1296     rc = libxl__build_dom(gc, domid, info, state, dom);
1297     if (rc != 0)
1298         goto out;
1299 
1300     rc = libxl__arch_domain_construct_memmap(gc, d_config, domid, dom);
1301     if (rc != 0) {
1302         LOG(ERROR, "setting domain memory map failed");
1303         goto out;
1304     }
1305 
1306     rc = hvm_build_set_params(ctx->xch, domid, info, state->store_port,
1307                                &state->store_mfn, state->console_port,
1308                                &state->console_mfn, state->store_domid,
1309                                state->console_domid);
1310     if (rc != 0) {
1311         LOG(ERROR, "hvm build set params failed");
1312         goto out;
1313     }
1314 
1315     rc = hvm_build_set_xs_values(gc, domid, dom, info);
1316     if (rc != 0) {
1317         LOG(ERROR, "hvm build set xenstore values failed");
1318         goto out;
1319     }
1320 
1321     xc_dom_release(dom);
1322     return 0;
1323 
1324 out:
1325     assert(rc != 0);
1326     if (dom != NULL) xc_dom_release(dom);
1327     return rc;
1328 }
1329 
libxl__qemu_traditional_cmd(libxl__gc * gc,uint32_t domid,const char * cmd)1330 int libxl__qemu_traditional_cmd(libxl__gc *gc, uint32_t domid,
1331                                 const char *cmd)
1332 {
1333     char *path = NULL;
1334     uint32_t dm_domid = libxl_get_stubdom_id(CTX, domid);
1335     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/command");
1336     return libxl__xs_printf(gc, XBT_NULL, path, "%s", cmd);
1337 }
1338 
1339 /*==================== Miscellaneous ====================*/
1340 
libxl__uuid2string(libxl__gc * gc,const libxl_uuid uuid)1341 char *libxl__uuid2string(libxl__gc *gc, const libxl_uuid uuid)
1342 {
1343     return GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(uuid));
1344 }
1345 
libxl__userdata_path(libxl__gc * gc,uint32_t domid,const char * userdata_userid,const char * wh)1346 const char *libxl__userdata_path(libxl__gc *gc, uint32_t domid,
1347                                  const char *userdata_userid,
1348                                  const char *wh)
1349 {
1350     libxl_ctx *ctx = libxl__gc_owner(gc);
1351     char *uuid_string, *path;
1352     libxl_dominfo info;
1353     int rc;
1354 
1355     libxl_dominfo_init(&info);
1356 
1357     rc = libxl_domain_info(ctx, &info, domid);
1358     if (rc) {
1359         LOGE(ERROR, "unable to find domain info for domain %"PRIu32, domid);
1360         path = NULL;
1361         goto out;
1362     }
1363     uuid_string = GCSPRINTF(LIBXL_UUID_FMT, LIBXL_UUID_BYTES(info.uuid));
1364     path = GCSPRINTF(XEN_LIB_DIR "/userdata-%s.%u.%s.%s",
1365                      wh, domid, uuid_string, userdata_userid);
1366 
1367  out:
1368     libxl_dominfo_dispose(&info);
1369     return path;
1370 }
1371 
userdata_delete(libxl__gc * gc,const char * path)1372 static int userdata_delete(libxl__gc *gc, const char *path)
1373 {
1374     int r;
1375     r = unlink(path);
1376     if (r) {
1377         LOGE(ERROR, "remove failed for %s", path);
1378         return errno;
1379     }
1380     return 0;
1381 }
1382 
libxl__userdata_destroyall(libxl__gc * gc,uint32_t domid)1383 void libxl__userdata_destroyall(libxl__gc *gc, uint32_t domid)
1384 {
1385     const char *pattern;
1386     glob_t gl;
1387     int r, i;
1388 
1389     pattern = libxl__userdata_path(gc, domid, "*", "?");
1390     if (!pattern)
1391         goto out;
1392 
1393     gl.gl_pathc = 0;
1394     gl.gl_pathv = 0;
1395     gl.gl_offs = 0;
1396     r = glob(pattern, GLOB_ERR|GLOB_NOSORT|GLOB_MARK, 0, &gl);
1397     if (r == GLOB_NOMATCH)
1398         goto out;
1399     if (r)
1400         LOGE(ERROR, "glob failed for %s", pattern);
1401 
1402     /* Note: don't delete domain-userdata-lock, it will be handled by
1403      * unlock function.
1404      */
1405     for (i=0; i<gl.gl_pathc; i++) {
1406         if (!strstr(gl.gl_pathv[i], "domain-userdata-lock"))
1407             userdata_delete(gc, gl.gl_pathv[i]);
1408     }
1409     globfree(&gl);
1410 out:
1411     return;
1412 }
1413 
libxl__userdata_store(libxl__gc * gc,uint32_t domid,const char * userdata_userid,const uint8_t * data,int datalen)1414 int libxl__userdata_store(libxl__gc *gc, uint32_t domid,
1415                           const char *userdata_userid,
1416                           const uint8_t *data, int datalen)
1417 {
1418     const char *filename;
1419     const char *newfilename;
1420     int e, rc;
1421     int fd = -1;
1422 
1423     filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1424     if (!filename) {
1425         rc = ERROR_NOMEM;
1426         goto out;
1427     }
1428 
1429     if (!datalen) {
1430         rc = userdata_delete(gc, filename);
1431         goto out;
1432     }
1433 
1434     newfilename = libxl__userdata_path(gc, domid, userdata_userid, "n");
1435     if (!newfilename) {
1436         rc = ERROR_NOMEM;
1437         goto out;
1438     }
1439 
1440     rc = ERROR_FAIL;
1441 
1442     fd = open(newfilename, O_RDWR | O_CREAT | O_TRUNC, 0600);
1443     if (fd < 0)
1444         goto err;
1445 
1446     if (libxl_write_exactly(CTX, fd, data, datalen, "userdata", newfilename))
1447         goto err;
1448 
1449     if (close(fd) < 0) {
1450         fd = -1;
1451         goto err;
1452     }
1453     fd = -1;
1454 
1455     if (rename(newfilename, filename))
1456         goto err;
1457 
1458     rc = 0;
1459 
1460 err:
1461     if (fd >= 0) {
1462         e = errno;
1463         close(fd);
1464         errno = e;
1465     }
1466 
1467     if (rc)
1468         LOGE(ERROR, "cannot write/rename %s for %s", newfilename, filename);
1469 out:
1470     return rc;
1471 }
1472 
libxl_userdata_store(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid,const uint8_t * data,int datalen)1473 int libxl_userdata_store(libxl_ctx *ctx, uint32_t domid,
1474                               const char *userdata_userid,
1475                               const uint8_t *data, int datalen)
1476 {
1477     GC_INIT(ctx);
1478     int rc;
1479     libxl__domain_userdata_lock *lock;
1480 
1481     CTX_LOCK;
1482     lock = libxl__lock_domain_userdata(gc, domid);
1483     if (!lock) {
1484         rc = ERROR_LOCK_FAIL;
1485         goto out;
1486     }
1487 
1488     rc = libxl__userdata_store(gc, domid, userdata_userid,
1489                                data, datalen);
1490 
1491     libxl__unlock_domain_userdata(lock);
1492 
1493 out:
1494     CTX_UNLOCK;
1495     GC_FREE;
1496     return rc;
1497 }
1498 
libxl__userdata_retrieve(libxl__gc * gc,uint32_t domid,const char * userdata_userid,uint8_t ** data_r,int * datalen_r)1499 int libxl__userdata_retrieve(libxl__gc *gc, uint32_t domid,
1500                              const char *userdata_userid,
1501                              uint8_t **data_r, int *datalen_r)
1502 {
1503     const char *filename;
1504     int e, rc;
1505     int datalen = 0;
1506     void *data = 0;
1507 
1508     filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1509     if (!filename) {
1510         rc = ERROR_NOMEM;
1511         goto out;
1512     }
1513 
1514     e = libxl_read_file_contents(CTX, filename, data_r ? &data : 0, &datalen);
1515     if (e && errno != ENOENT) {
1516         rc = ERROR_FAIL;
1517         goto out;
1518     }
1519     if (!e && !datalen) {
1520         LOG(ERROR, "userdata file %s is empty", filename);
1521         if (data_r) assert(!*data_r);
1522         rc = ERROR_FAIL;
1523         goto out;
1524     }
1525 
1526     if (data_r) *data_r = data;
1527     if (datalen_r) *datalen_r = datalen;
1528     rc = 0;
1529 
1530 out:
1531     return rc;
1532 }
1533 
libxl_userdata_retrieve(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid,uint8_t ** data_r,int * datalen_r)1534 int libxl_userdata_retrieve(libxl_ctx *ctx, uint32_t domid,
1535                                  const char *userdata_userid,
1536                                  uint8_t **data_r, int *datalen_r)
1537 {
1538     GC_INIT(ctx);
1539     int rc;
1540     libxl__domain_userdata_lock *lock;
1541 
1542     CTX_LOCK;
1543     lock = libxl__lock_domain_userdata(gc, domid);
1544     if (!lock) {
1545         rc = ERROR_LOCK_FAIL;
1546         goto out;
1547     }
1548 
1549     rc = libxl__userdata_retrieve(gc, domid, userdata_userid,
1550                                   data_r, datalen_r);
1551 
1552 
1553     libxl__unlock_domain_userdata(lock);
1554 out:
1555     CTX_UNLOCK;
1556     GC_FREE;
1557     return rc;
1558 }
1559 
libxl_userdata_unlink(libxl_ctx * ctx,uint32_t domid,const char * userdata_userid)1560 int libxl_userdata_unlink(libxl_ctx *ctx, uint32_t domid,
1561                           const char *userdata_userid)
1562 {
1563     GC_INIT(ctx);
1564     CTX_LOCK;
1565 
1566     int rc;
1567     libxl__domain_userdata_lock *lock = NULL;
1568     const char *filename;
1569 
1570     lock = libxl__lock_domain_userdata(gc, domid);
1571     if (!lock) {
1572         rc = ERROR_LOCK_FAIL;
1573         goto out;
1574     }
1575 
1576     filename = libxl__userdata_path(gc, domid, userdata_userid, "d");
1577     if (!filename) {
1578         rc = ERROR_FAIL;
1579         goto out;
1580     }
1581     if (unlink(filename)) {
1582         LOGE(ERROR, "error deleting userdata file: %s", filename);
1583         rc = ERROR_FAIL;
1584         goto out;
1585     }
1586 
1587     rc = 0;
1588 out:
1589     if (lock)
1590         libxl__unlock_domain_userdata(lock);
1591     CTX_UNLOCK;
1592     GC_FREE;
1593     return rc;
1594 }
1595 
1596 /*
1597  * Local variables:
1598  * mode: C
1599  * c-basic-offset: 4
1600  * indent-tabs-mode: nil
1601  * End:
1602  */
1603