1 /*
2 * Copyright (C) 2014 Citrix Ltd.
3 * Author Wei Liu <wei.liu2@citrix.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published
7 * by the Free Software Foundation; version 2.1 only. with the special
8 * exception on linking described in file LICENSE.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 */
15 #include "libxl_osdeps.h" /* must come before any other headers */
16 #include "libxl_internal.h"
17 #include "libxl_arch.h"
18 #include <stdlib.h>
19
20 #include <xc_dom.h>
21
libxl__vnuma_configured(const libxl_domain_build_info * b_info)22 bool libxl__vnuma_configured(const libxl_domain_build_info *b_info)
23 {
24 return b_info->num_vnuma_nodes != 0;
25 }
26
27 /* Sort vmemranges in ascending order with "start" */
compare_vmemrange(const void * a,const void * b)28 static int compare_vmemrange(const void *a, const void *b)
29 {
30 const xen_vmemrange_t *x = a, *y = b;
31 if (x->start < y->start)
32 return -1;
33 if (x->start > y->start)
34 return 1;
35 return 0;
36 }
37
38 /* Check if a vcpu has an hard (or soft) affinity set in such
39 * a way that it does not match the pnode to which the vcpu itself
40 * is assigned to.
41 */
check_vnuma_affinity(libxl__gc * gc,unsigned int vcpu,unsigned int pnode,unsigned int num_affinity,const libxl_bitmap * affinity,const char * kind)42 static int check_vnuma_affinity(libxl__gc *gc,
43 unsigned int vcpu,
44 unsigned int pnode,
45 unsigned int num_affinity,
46 const libxl_bitmap *affinity,
47 const char *kind)
48 {
49 libxl_bitmap nodemap;
50 int rc = 0;
51
52 libxl_bitmap_init(&nodemap);
53
54 rc = libxl_node_bitmap_alloc(CTX, &nodemap, 0);
55 if (rc) {
56 LOG(ERROR, "Can't allocate nodemap");
57 goto out;
58 }
59
60 rc = libxl_cpumap_to_nodemap(CTX, affinity, &nodemap);
61 if (rc) {
62 LOG(ERROR, "Can't convert Vcpu %d affinity to nodemap", vcpu);
63 goto out;
64 }
65
66 if (libxl_bitmap_count_set(&nodemap) != 1 ||
67 !libxl_bitmap_test(&nodemap, pnode))
68 LOG(WARN, "Vcpu %d %s affinity and vnuma info mismatch", vcpu, kind);
69
70 out:
71 libxl_bitmap_dispose(&nodemap);
72 return rc;
73 }
74
75 /* Check if vNUMA configuration is valid:
76 * 1. all pnodes inside vnode_to_pnode array are valid
77 * 2. each vcpu belongs to one and only one vnode
78 * 3. each vmemrange is valid and doesn't overlap with any other
79 * 4. local distance cannot be larger than remote distance
80 *
81 * Check also, if any hard or soft affinity is specified, whether
82 * they match with the vNUMA related bits (namely vcpus to vnodes
83 * mappings and vnodes to pnodes association). If that does not
84 * hold, however, just print a warning, as that has "only"
85 * performance implications.
86 */
libxl__vnuma_config_check(libxl__gc * gc,const libxl_domain_build_info * b_info,const libxl__domain_build_state * state)87 int libxl__vnuma_config_check(libxl__gc *gc,
88 const libxl_domain_build_info *b_info,
89 const libxl__domain_build_state *state)
90 {
91 int nr_nodes = 0, rc = ERROR_VNUMA_CONFIG_INVALID;
92 unsigned int i, j;
93 libxl_numainfo *ninfo = NULL;
94 uint64_t total_memkb = 0;
95 libxl_bitmap cpumap;
96 libxl_vnode_info *v;
97
98 libxl_bitmap_init(&cpumap);
99
100 /* Check pnode specified is valid */
101 ninfo = libxl_get_numainfo(CTX, &nr_nodes);
102 if (!ninfo) {
103 LOG(ERROR, "libxl_get_numainfo failed");
104 goto out;
105 }
106
107 for (i = 0; i < b_info->num_vnuma_nodes; i++) {
108 uint32_t pnode;
109
110 v = &b_info->vnuma_nodes[i];
111 pnode = v->pnode;
112
113 /* The pnode specified is not valid? */
114 if (pnode >= nr_nodes) {
115 LOG(ERROR, "Invalid pnode %"PRIu32" specified", pnode);
116 goto out;
117 }
118
119 total_memkb += v->memkb;
120 }
121
122 if (total_memkb != b_info->max_memkb) {
123 LOG(ERROR, "Amount of memory mismatch (0x%"PRIx64" != 0x%"PRIx64")",
124 total_memkb, b_info->max_memkb);
125 goto out;
126 }
127
128 /* Check vcpu mapping */
129 libxl_cpu_bitmap_alloc(CTX, &cpumap, b_info->max_vcpus);
130 for (i = 0; i < b_info->num_vnuma_nodes; i++) {
131 v = &b_info->vnuma_nodes[i];
132 libxl_for_each_set_bit(j, v->vcpus) {
133 if (!libxl_bitmap_test(&cpumap, j))
134 libxl_bitmap_set(&cpumap, j);
135 else {
136 LOG(ERROR, "Vcpu %d assigned more than once", j);
137 goto out;
138 }
139 }
140 }
141
142 for (i = 0; i < b_info->max_vcpus; i++) {
143 if (!libxl_bitmap_test(&cpumap, i)) {
144 LOG(ERROR, "Vcpu %d is not assigned to any vnode", i);
145 goto out;
146 }
147 }
148
149 /* Check whether vcpu affinity (if any) matches vnuma configuration */
150 for (i = 0; i < b_info->num_vnuma_nodes; i++) {
151 v = &b_info->vnuma_nodes[i];
152 libxl_for_each_set_bit(j, v->vcpus) {
153 if (b_info->num_vcpu_hard_affinity > j)
154 check_vnuma_affinity(gc, j, v->pnode,
155 b_info->num_vcpu_hard_affinity,
156 &b_info->vcpu_hard_affinity[j],
157 "hard");
158 if (b_info->num_vcpu_soft_affinity > j)
159 check_vnuma_affinity(gc, j, v->pnode,
160 b_info->num_vcpu_soft_affinity,
161 &b_info->vcpu_soft_affinity[j],
162 "soft");
163 }
164 }
165
166 /* Check vmemranges */
167 qsort(state->vmemranges, state->num_vmemranges, sizeof(xen_vmemrange_t),
168 compare_vmemrange);
169
170 for (i = 0; i < state->num_vmemranges; i++) {
171 if (state->vmemranges[i].end < state->vmemranges[i].start) {
172 LOG(ERROR, "Vmemrange end < start");
173 goto out;
174 }
175 }
176
177 for (i = 0; i < state->num_vmemranges - 1; i++) {
178 if (state->vmemranges[i].end > state->vmemranges[i+1].start) {
179 LOG(ERROR,
180 "Vmemranges overlapped, 0x%"PRIx64"-0x%"PRIx64", 0x%"PRIx64"-0x%"PRIx64,
181 state->vmemranges[i].start, state->vmemranges[i].end,
182 state->vmemranges[i+1].start, state->vmemranges[i+1].end);
183 goto out;
184 }
185 }
186
187 /* Check vdistances */
188 for (i = 0; i < b_info->num_vnuma_nodes; i++) {
189 uint32_t local_distance, remote_distance;
190
191 v = &b_info->vnuma_nodes[i];
192 local_distance = v->distances[i];
193
194 for (j = 0; j < v->num_distances; j++) {
195 if (i == j) continue;
196 remote_distance = v->distances[j];
197 if (local_distance > remote_distance) {
198 LOG(ERROR,
199 "Distance from %u to %u smaller than %u's local distance",
200 i, j, i);
201 goto out;
202 }
203 }
204 }
205
206 rc = 0;
207 out:
208 libxl_numainfo_list_free(ninfo, nr_nodes);
209 libxl_bitmap_dispose(&cpumap);
210 return rc;
211 }
212
libxl__vnuma_build_vmemrange_pv_generic(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * b_info,libxl__domain_build_state * state)213 int libxl__vnuma_build_vmemrange_pv_generic(libxl__gc *gc,
214 uint32_t domid,
215 libxl_domain_build_info *b_info,
216 libxl__domain_build_state *state)
217 {
218 int i;
219 uint64_t next;
220 xen_vmemrange_t *v = NULL;
221
222 /* Generate one vmemrange for each virtual node. */
223 GCREALLOC_ARRAY(v, b_info->num_vnuma_nodes);
224 next = 0;
225 for (i = 0; i < b_info->num_vnuma_nodes; i++) {
226 libxl_vnode_info *p = &b_info->vnuma_nodes[i];
227
228 v[i].start = next;
229 v[i].end = next + (p->memkb << 10);
230 v[i].flags = 0;
231 v[i].nid = i;
232
233 next = v[i].end;
234 }
235
236 state->vmemranges = v;
237 state->num_vmemranges = i;
238
239 return 0;
240 }
241
242 /* Build vmemranges for PV guest */
libxl__vnuma_build_vmemrange_pv(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * b_info,libxl__domain_build_state * state)243 int libxl__vnuma_build_vmemrange_pv(libxl__gc *gc,
244 uint32_t domid,
245 libxl_domain_build_info *b_info,
246 libxl__domain_build_state *state)
247 {
248 assert(state->vmemranges == NULL);
249 return libxl__arch_vnuma_build_vmemrange(gc, domid, b_info, state);
250 }
251
252 /* Build vmemranges for HVM guest */
libxl__vnuma_build_vmemrange_hvm(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * b_info,libxl__domain_build_state * state,struct xc_dom_image * dom)253 int libxl__vnuma_build_vmemrange_hvm(libxl__gc *gc,
254 uint32_t domid,
255 libxl_domain_build_info *b_info,
256 libxl__domain_build_state *state,
257 struct xc_dom_image *dom)
258 {
259 uint64_t hole_start, hole_end, next;
260 int nid, nr_vmemrange;
261 xen_vmemrange_t *vmemranges;
262 int rc;
263
264 /* Derive vmemranges from vnode size and memory hole.
265 *
266 * Guest physical address space layout:
267 * [0, hole_start) [hole_start, hole_end) [hole_end, highmem_end)
268 */
269 hole_start = dom->lowmem_end < dom->mmio_start ?
270 dom->lowmem_end : dom->mmio_start;
271 hole_end = (dom->mmio_start + dom->mmio_size) > (1ULL << 32) ?
272 (dom->mmio_start + dom->mmio_size) : (1ULL << 32);
273
274 assert(state->vmemranges == NULL);
275
276 next = 0;
277 nr_vmemrange = 0;
278 vmemranges = NULL;
279 for (nid = 0; nid < b_info->num_vnuma_nodes; nid++) {
280 libxl_vnode_info *p = &b_info->vnuma_nodes[nid];
281 uint64_t remaining_bytes = p->memkb << 10;
282
283 /* Consider video ram belongs to vnode 0 */
284 if (nid == 0) {
285 if (p->memkb < b_info->video_memkb) {
286 LOGD(ERROR, domid, "vnode 0 too small to contain video ram");
287 rc = ERROR_INVAL;
288 goto out;
289 }
290 remaining_bytes -= (b_info->video_memkb << 10);
291 }
292
293 while (remaining_bytes > 0) {
294 uint64_t count = remaining_bytes;
295
296 if (next >= hole_start && next < hole_end)
297 next = hole_end;
298 if ((next < hole_start) && (next + remaining_bytes >= hole_start))
299 count = hole_start - next;
300
301 GCREALLOC_ARRAY(vmemranges, nr_vmemrange+1);
302 vmemranges[nr_vmemrange].start = next;
303 vmemranges[nr_vmemrange].end = next + count;
304 vmemranges[nr_vmemrange].flags = 0;
305 vmemranges[nr_vmemrange].nid = nid;
306
307 nr_vmemrange++;
308 remaining_bytes -= count;
309 next += count;
310 }
311 }
312
313 state->vmemranges = vmemranges;
314 state->num_vmemranges = nr_vmemrange;
315
316 rc = 0;
317 out:
318 return rc;
319 }
320
321 /*
322 * Local variables:
323 * mode: C
324 * c-basic-offset: 4
325 * indent-tabs-mode: nil
326 * End:
327 */
328