1 /*
2 * Copyright (C) 2014 Citrix Ltd.
3 * Author Wei Liu <wei.liu2@citrix.com>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published
7 * by the Free Software Foundation; version 2.1 only. with the special
8 * exception on linking described in file LICENSE.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 */
15 #include "libxl_osdeps.h" /* must come before any other headers */
16 #include "libxl_internal.h"
17 #include "libxl_arch.h"
18 #include <stdlib.h>
19
libxl__vnuma_configured(const libxl_domain_build_info * b_info)20 bool libxl__vnuma_configured(const libxl_domain_build_info *b_info)
21 {
22 return b_info->num_vnuma_nodes != 0;
23 }
24
25 /* Sort vmemranges in ascending order with "start" */
compare_vmemrange(const void * a,const void * b)26 static int compare_vmemrange(const void *a, const void *b)
27 {
28 const xen_vmemrange_t *x = a, *y = b;
29 if (x->start < y->start)
30 return -1;
31 if (x->start > y->start)
32 return 1;
33 return 0;
34 }
35
36 /* Check if a vcpu has an hard (or soft) affinity set in such
37 * a way that it does not match the pnode to which the vcpu itself
38 * is assigned to.
39 */
check_vnuma_affinity(libxl__gc * gc,unsigned int vcpu,unsigned int pnode,unsigned int num_affinity,const libxl_bitmap * affinity,const char * kind)40 static int check_vnuma_affinity(libxl__gc *gc,
41 unsigned int vcpu,
42 unsigned int pnode,
43 unsigned int num_affinity,
44 const libxl_bitmap *affinity,
45 const char *kind)
46 {
47 libxl_bitmap nodemap;
48 int rc = 0;
49
50 libxl_bitmap_init(&nodemap);
51
52 rc = libxl_node_bitmap_alloc(CTX, &nodemap, 0);
53 if (rc) {
54 LOG(ERROR, "Can't allocate nodemap");
55 goto out;
56 }
57
58 rc = libxl_cpumap_to_nodemap(CTX, affinity, &nodemap);
59 if (rc) {
60 LOG(ERROR, "Can't convert Vcpu %d affinity to nodemap", vcpu);
61 goto out;
62 }
63
64 if (libxl_bitmap_count_set(&nodemap) != 1 ||
65 !libxl_bitmap_test(&nodemap, pnode))
66 LOG(WARN, "Vcpu %d %s affinity and vnuma info mismatch", vcpu, kind);
67
68 out:
69 libxl_bitmap_dispose(&nodemap);
70 return rc;
71 }
72
73 /* Check if vNUMA configuration is valid:
74 * 1. all pnodes inside vnode_to_pnode array are valid
75 * 2. each vcpu belongs to one and only one vnode
76 * 3. each vmemrange is valid and doesn't overlap with any other
77 * 4. local distance cannot be larger than remote distance
78 *
79 * Check also, if any hard or soft affinity is specified, whether
80 * they match with the vNUMA related bits (namely vcpus to vnodes
81 * mappings and vnodes to pnodes association). If that does not
82 * hold, however, just print a warning, as that has "only"
83 * performance implications.
84 */
libxl__vnuma_config_check(libxl__gc * gc,const libxl_domain_build_info * b_info,const libxl__domain_build_state * state)85 int libxl__vnuma_config_check(libxl__gc *gc,
86 const libxl_domain_build_info *b_info,
87 const libxl__domain_build_state *state)
88 {
89 int nr_nodes = 0, rc = ERROR_VNUMA_CONFIG_INVALID;
90 unsigned int i, j;
91 libxl_numainfo *ninfo = NULL;
92 uint64_t total_memkb = 0;
93 libxl_bitmap cpumap;
94 libxl_vnode_info *v;
95
96 libxl_bitmap_init(&cpumap);
97
98 /* Check pnode specified is valid */
99 ninfo = libxl_get_numainfo(CTX, &nr_nodes);
100 if (!ninfo) {
101 LOG(ERROR, "libxl_get_numainfo failed");
102 goto out;
103 }
104
105 for (i = 0; i < b_info->num_vnuma_nodes; i++) {
106 uint32_t pnode;
107
108 v = &b_info->vnuma_nodes[i];
109 pnode = v->pnode;
110
111 /* The pnode specified is not valid? */
112 if (pnode >= nr_nodes) {
113 LOG(ERROR, "Invalid pnode %"PRIu32" specified", pnode);
114 goto out;
115 }
116
117 total_memkb += v->memkb;
118 }
119
120 if (total_memkb != b_info->max_memkb) {
121 LOG(ERROR, "Amount of memory mismatch (0x%"PRIx64" != 0x%"PRIx64")",
122 total_memkb, b_info->max_memkb);
123 goto out;
124 }
125
126 /* Check vcpu mapping */
127 libxl_cpu_bitmap_alloc(CTX, &cpumap, b_info->max_vcpus);
128 for (i = 0; i < b_info->num_vnuma_nodes; i++) {
129 v = &b_info->vnuma_nodes[i];
130 libxl_for_each_set_bit(j, v->vcpus) {
131 if (!libxl_bitmap_test(&cpumap, j))
132 libxl_bitmap_set(&cpumap, j);
133 else {
134 LOG(ERROR, "Vcpu %d assigned more than once", j);
135 goto out;
136 }
137 }
138 }
139
140 for (i = 0; i < b_info->max_vcpus; i++) {
141 if (!libxl_bitmap_test(&cpumap, i)) {
142 LOG(ERROR, "Vcpu %d is not assigned to any vnode", i);
143 goto out;
144 }
145 }
146
147 /* Check whether vcpu affinity (if any) matches vnuma configuration */
148 for (i = 0; i < b_info->num_vnuma_nodes; i++) {
149 v = &b_info->vnuma_nodes[i];
150 libxl_for_each_set_bit(j, v->vcpus) {
151 if (b_info->num_vcpu_hard_affinity > j)
152 check_vnuma_affinity(gc, j, v->pnode,
153 b_info->num_vcpu_hard_affinity,
154 &b_info->vcpu_hard_affinity[j],
155 "hard");
156 if (b_info->num_vcpu_soft_affinity > j)
157 check_vnuma_affinity(gc, j, v->pnode,
158 b_info->num_vcpu_soft_affinity,
159 &b_info->vcpu_soft_affinity[j],
160 "soft");
161 }
162 }
163
164 /* Check vmemranges */
165 qsort(state->vmemranges, state->num_vmemranges, sizeof(xen_vmemrange_t),
166 compare_vmemrange);
167
168 for (i = 0; i < state->num_vmemranges; i++) {
169 if (state->vmemranges[i].end < state->vmemranges[i].start) {
170 LOG(ERROR, "Vmemrange end < start");
171 goto out;
172 }
173 }
174
175 for (i = 0; i < state->num_vmemranges - 1; i++) {
176 if (state->vmemranges[i].end > state->vmemranges[i+1].start) {
177 LOG(ERROR,
178 "Vmemranges overlapped, 0x%"PRIx64"-0x%"PRIx64", 0x%"PRIx64"-0x%"PRIx64,
179 state->vmemranges[i].start, state->vmemranges[i].end,
180 state->vmemranges[i+1].start, state->vmemranges[i+1].end);
181 goto out;
182 }
183 }
184
185 /* Check vdistances */
186 for (i = 0; i < b_info->num_vnuma_nodes; i++) {
187 uint32_t local_distance, remote_distance;
188
189 v = &b_info->vnuma_nodes[i];
190 local_distance = v->distances[i];
191
192 for (j = 0; j < v->num_distances; j++) {
193 if (i == j) continue;
194 remote_distance = v->distances[j];
195 if (local_distance > remote_distance) {
196 LOG(ERROR,
197 "Distance from %u to %u smaller than %u's local distance",
198 i, j, i);
199 goto out;
200 }
201 }
202 }
203
204 rc = 0;
205 out:
206 libxl_numainfo_list_free(ninfo, nr_nodes);
207 libxl_bitmap_dispose(&cpumap);
208 return rc;
209 }
210
libxl__vnuma_build_vmemrange_pv_generic(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * b_info,libxl__domain_build_state * state)211 int libxl__vnuma_build_vmemrange_pv_generic(libxl__gc *gc,
212 uint32_t domid,
213 libxl_domain_build_info *b_info,
214 libxl__domain_build_state *state)
215 {
216 int i;
217 uint64_t next;
218 xen_vmemrange_t *v = NULL;
219
220 /* Generate one vmemrange for each virtual node. */
221 GCREALLOC_ARRAY(v, b_info->num_vnuma_nodes);
222 next = 0;
223 for (i = 0; i < b_info->num_vnuma_nodes; i++) {
224 libxl_vnode_info *p = &b_info->vnuma_nodes[i];
225
226 v[i].start = next;
227 v[i].end = next + (p->memkb << 10);
228 v[i].flags = 0;
229 v[i].nid = i;
230
231 next = v[i].end;
232 }
233
234 state->vmemranges = v;
235 state->num_vmemranges = i;
236
237 return 0;
238 }
239
240 /* Build vmemranges for PV guest */
libxl__vnuma_build_vmemrange_pv(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * b_info,libxl__domain_build_state * state)241 int libxl__vnuma_build_vmemrange_pv(libxl__gc *gc,
242 uint32_t domid,
243 libxl_domain_build_info *b_info,
244 libxl__domain_build_state *state)
245 {
246 assert(state->vmemranges == NULL);
247 return libxl__arch_vnuma_build_vmemrange(gc, domid, b_info, state);
248 }
249
250 /* Build vmemranges for HVM guest */
libxl__vnuma_build_vmemrange_hvm(libxl__gc * gc,uint32_t domid,libxl_domain_build_info * b_info,libxl__domain_build_state * state,struct xc_dom_image * dom)251 int libxl__vnuma_build_vmemrange_hvm(libxl__gc *gc,
252 uint32_t domid,
253 libxl_domain_build_info *b_info,
254 libxl__domain_build_state *state,
255 struct xc_dom_image *dom)
256 {
257 uint64_t hole_start, hole_end, next;
258 int nid, nr_vmemrange;
259 xen_vmemrange_t *vmemranges;
260 int rc;
261
262 /* Derive vmemranges from vnode size and memory hole.
263 *
264 * Guest physical address space layout:
265 * [0, hole_start) [hole_start, hole_end) [hole_end, highmem_end)
266 */
267 hole_start = dom->lowmem_end < dom->mmio_start ?
268 dom->lowmem_end : dom->mmio_start;
269 hole_end = (dom->mmio_start + dom->mmio_size) > (1ULL << 32) ?
270 (dom->mmio_start + dom->mmio_size) : (1ULL << 32);
271
272 assert(state->vmemranges == NULL);
273
274 next = 0;
275 nr_vmemrange = 0;
276 vmemranges = NULL;
277 for (nid = 0; nid < b_info->num_vnuma_nodes; nid++) {
278 libxl_vnode_info *p = &b_info->vnuma_nodes[nid];
279 uint64_t remaining_bytes = p->memkb << 10;
280
281 /* Consider video ram belongs to vnode 0 */
282 if (nid == 0) {
283 if (p->memkb < b_info->video_memkb) {
284 LOGD(ERROR, domid, "vnode 0 too small to contain video ram");
285 rc = ERROR_INVAL;
286 goto out;
287 }
288 remaining_bytes -= (b_info->video_memkb << 10);
289 }
290
291 while (remaining_bytes > 0) {
292 uint64_t count = remaining_bytes;
293
294 if (next >= hole_start && next < hole_end)
295 next = hole_end;
296 if ((next < hole_start) && (next + remaining_bytes >= hole_start))
297 count = hole_start - next;
298
299 GCREALLOC_ARRAY(vmemranges, nr_vmemrange+1);
300 vmemranges[nr_vmemrange].start = next;
301 vmemranges[nr_vmemrange].end = next + count;
302 vmemranges[nr_vmemrange].flags = 0;
303 vmemranges[nr_vmemrange].nid = nid;
304
305 nr_vmemrange++;
306 remaining_bytes -= count;
307 next += count;
308 }
309 }
310
311 state->vmemranges = vmemranges;
312 state->num_vmemranges = nr_vmemrange;
313
314 rc = 0;
315 out:
316 return rc;
317 }
318
319 /*
320 * Local variables:
321 * mode: C
322 * c-basic-offset: 4
323 * indent-tabs-mode: nil
324 * End:
325 */
326