1 /*
2 * Copyright (C) 2009 Citrix Ltd.
3 * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4 * Author Stefano Stabellini <stefano.stabellini@eu.citrix.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; version 2.1 only. with the special
9 * exception on linking described in file LICENSE.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 */
16
17 #include "libxl_osdeps.h" /* must come before any other headers */
18
19 #include "libxl_internal.h"
20
21 #define PCI_BDF "%04x:%02x:%02x.%01x"
22 #define PCI_BDF_SHORT "%02x:%02x.%01x"
23 #define PCI_BDF_VDEVFN "%04x:%02x:%02x.%01x@%02x"
24 #define PCI_OPTIONS "msitranslate=%d,power_mgmt=%d"
25 #define PCI_BDF_XSPATH "%04x-%02x-%02x-%01x"
26 #define PCI_PT_QDEV_ID "pci-pt-%02x_%02x.%01x"
27
pci_encode_bdf(libxl_device_pci * pci)28 static unsigned int pci_encode_bdf(libxl_device_pci *pci)
29 {
30 unsigned int value;
31
32 value = pci->domain << 16;
33 value |= (pci->bus & 0xff) << 8;
34 value |= (pci->dev & 0x1f) << 3;
35 value |= (pci->func & 0x7);
36
37 return value;
38 }
39
pci_struct_fill(libxl_device_pci * pci,unsigned int domain,unsigned int bus,unsigned int dev,unsigned int func)40 static void pci_struct_fill(libxl_device_pci *pci, unsigned int domain,
41 unsigned int bus, unsigned int dev,
42 unsigned int func)
43 {
44 pci->domain = domain;
45 pci->bus = bus;
46 pci->dev = dev;
47 pci->func = func;
48 }
49
libxl_create_pci_backend_device(libxl__gc * gc,flexarray_t * back,int num,const libxl_device_pci * pci)50 static void libxl_create_pci_backend_device(libxl__gc *gc,
51 flexarray_t *back,
52 int num,
53 const libxl_device_pci *pci)
54 {
55 flexarray_append(back, GCSPRINTF("key-%d", num));
56 flexarray_append(back, GCSPRINTF(PCI_BDF, pci->domain, pci->bus, pci->dev, pci->func));
57 flexarray_append(back, GCSPRINTF("dev-%d", num));
58 flexarray_append(back, GCSPRINTF(PCI_BDF, pci->domain, pci->bus, pci->dev, pci->func));
59 if (pci->vdevfn)
60 flexarray_append_pair(back, GCSPRINTF("vdevfn-%d", num), GCSPRINTF("%x", pci->vdevfn));
61 if (pci->name)
62 flexarray_append_pair(back, GCSPRINTF("name-%d", num), GCSPRINTF("%s", pci->name));
63 flexarray_append(back, GCSPRINTF("opts-%d", num));
64 flexarray_append(back,
65 GCSPRINTF("msitranslate=%d,power_mgmt=%d,permissive=%d,rdm_policy=%s",
66 pci->msitranslate, pci->power_mgmt,
67 pci->permissive, libxl_rdm_reserve_policy_to_string(pci->rdm_policy)));
68 flexarray_append_pair(back, GCSPRINTF("state-%d", num), GCSPRINTF("%d", XenbusStateInitialising));
69 }
70
libxl__device_from_pci(libxl__gc * gc,uint32_t domid,const libxl_device_pci * pci,libxl__device * device)71 static void libxl__device_from_pci(libxl__gc *gc, uint32_t domid,
72 const libxl_device_pci *pci,
73 libxl__device *device)
74 {
75 device->backend_devid = 0;
76 device->backend_domid = 0;
77 device->backend_kind = LIBXL__DEVICE_KIND_PCI;
78 device->devid = 0;
79 device->domid = domid;
80 device->kind = LIBXL__DEVICE_KIND_PCI;
81 }
82
libxl__create_pci_backend(libxl__gc * gc,xs_transaction_t t,uint32_t domid,const libxl_device_pci * pci)83 static void libxl__create_pci_backend(libxl__gc *gc, xs_transaction_t t,
84 uint32_t domid, const libxl_device_pci *pci)
85 {
86 libxl_ctx *ctx = libxl__gc_owner(gc);
87 flexarray_t *front, *back;
88 char *fe_path, *be_path;
89 struct xs_permissions fe_perms[2], be_perms[2];
90
91 LOGD(DEBUG, domid, "Creating pci backend");
92
93 front = flexarray_make(gc, 16, 1);
94 back = flexarray_make(gc, 16, 1);
95
96 fe_path = libxl__domain_device_frontend_path(gc, domid, 0,
97 LIBXL__DEVICE_KIND_PCI);
98 be_path = libxl__domain_device_backend_path(gc, 0, domid, 0,
99 LIBXL__DEVICE_KIND_PCI);
100
101 flexarray_append_pair(back, "frontend", fe_path);
102 flexarray_append_pair(back, "frontend-id", GCSPRINTF("%d", domid));
103 flexarray_append_pair(back, "online", GCSPRINTF("%d", 1));
104 flexarray_append_pair(back, "state", GCSPRINTF("%d", XenbusStateInitialising));
105 flexarray_append_pair(back, "domain", libxl__domid_to_name(gc, domid));
106
107 be_perms[0].id = 0;
108 be_perms[0].perms = XS_PERM_NONE;
109 be_perms[1].id = domid;
110 be_perms[1].perms = XS_PERM_READ;
111
112 xs_rm(ctx->xsh, t, be_path);
113 xs_mkdir(ctx->xsh, t, be_path);
114 xs_set_permissions(ctx->xsh, t, be_path, be_perms,
115 ARRAY_SIZE(be_perms));
116 libxl__xs_writev(gc, t, be_path, libxl__xs_kvs_of_flexarray(gc, back));
117
118 flexarray_append_pair(front, "backend", be_path);
119 flexarray_append_pair(front, "backend-id", GCSPRINTF("%d", 0));
120 flexarray_append_pair(front, "state", GCSPRINTF("%d", XenbusStateInitialising));
121
122 fe_perms[0].id = domid;
123 fe_perms[0].perms = XS_PERM_NONE;
124 fe_perms[1].id = 0;
125 fe_perms[1].perms = XS_PERM_READ;
126
127 xs_rm(ctx->xsh, t, fe_path);
128 xs_mkdir(ctx->xsh, t, fe_path);
129 xs_set_permissions(ctx->xsh, t, fe_path,
130 fe_perms, ARRAY_SIZE(fe_perms));
131 libxl__xs_writev(gc, t, fe_path, libxl__xs_kvs_of_flexarray(gc, front));
132 }
133
libxl__device_pci_add_xenstore(libxl__gc * gc,uint32_t domid,const libxl_device_pci * pci,bool starting)134 static int libxl__device_pci_add_xenstore(libxl__gc *gc,
135 uint32_t domid,
136 const libxl_device_pci *pci,
137 bool starting)
138 {
139 flexarray_t *back;
140 char *num_devs, *be_path;
141 int num = 0;
142 xs_transaction_t t = XBT_NULL;
143 int rc;
144 libxl_domain_config d_config;
145 libxl__flock *lock = NULL;
146 bool is_stubdomain = libxl_is_stubdom(CTX, domid, NULL);
147
148 /* Stubdomain doesn't have own config. */
149 if (!is_stubdomain)
150 libxl_domain_config_init(&d_config);
151
152 be_path = libxl__domain_device_backend_path(gc, 0, domid, 0,
153 LIBXL__DEVICE_KIND_PCI);
154 num_devs = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/num_devs", be_path));
155
156 libxl_domain_type domtype = libxl__domain_type(gc, domid);
157 if (domtype == LIBXL_DOMAIN_TYPE_INVALID)
158 return ERROR_FAIL;
159
160 /* Wait is only needed if the backend already exists (num_devs != NULL) */
161 if (num_devs && !starting && domtype == LIBXL_DOMAIN_TYPE_PV) {
162 rc = libxl__wait_for_backend(gc, be_path,
163 GCSPRINTF("%d", XenbusStateConnected));
164 if (rc) return rc;
165 }
166
167 back = flexarray_make(gc, 16, 1);
168
169 LOGD(DEBUG, domid, "Adding new pci device to xenstore");
170 num = num_devs ? atoi(num_devs) : 0;
171 libxl_create_pci_backend_device(gc, back, num, pci);
172 flexarray_append_pair(back, "num_devs", GCSPRINTF("%d", num + 1));
173 if (num && !starting)
174 flexarray_append_pair(back, "state", GCSPRINTF("%d", XenbusStateReconfiguring));
175
176 /*
177 * Stubdomin config is derived from its target domain, it doesn't have
178 * its own file.
179 */
180 if (!is_stubdomain && !starting) {
181 lock = libxl__lock_domain_userdata(gc, domid);
182 if (!lock) {
183 rc = ERROR_LOCK_FAIL;
184 goto out;
185 }
186
187 rc = libxl__get_domain_configuration(gc, domid, &d_config);
188 if (rc) goto out;
189
190 LOGD(DEBUG, domid, "Adding new pci device to config");
191 device_add_domain_config(gc, &d_config, &libxl__pci_devtype,
192 pci);
193
194 rc = libxl__dm_check_start(gc, &d_config, domid);
195 if (rc) goto out;
196 }
197
198 for (;;) {
199 rc = libxl__xs_transaction_start(gc, &t);
200 if (rc) goto out;
201
202 if (lock) {
203 rc = libxl__set_domain_configuration(gc, domid, &d_config);
204 if (rc) goto out;
205 }
206
207 /* This is the first device, so create the backend */
208 if (!num_devs)
209 libxl__create_pci_backend(gc, t, domid, pci);
210
211 libxl__xs_writev(gc, t, be_path, libxl__xs_kvs_of_flexarray(gc, back));
212
213 rc = libxl__xs_transaction_commit(gc, &t);
214 if (!rc) break;
215 if (rc < 0) goto out;
216 }
217
218 out:
219 libxl__xs_transaction_abort(gc, &t);
220 if (lock) libxl__unlock_file(lock);
221 if (!is_stubdomain)
222 libxl_domain_config_dispose(&d_config);
223 return rc;
224 }
225
libxl__device_pci_remove_xenstore(libxl__gc * gc,uint32_t domid,libxl_device_pci * pci)226 static int libxl__device_pci_remove_xenstore(libxl__gc *gc, uint32_t domid, libxl_device_pci *pci)
227 {
228 libxl_ctx *ctx = libxl__gc_owner(gc);
229 char *be_path, *num_devs_path, *num_devs, *xsdev, *tmp, *tmppath;
230 int num, i, j;
231 xs_transaction_t t;
232
233 be_path = libxl__domain_device_backend_path(gc, 0, domid, 0,
234 LIBXL__DEVICE_KIND_PCI);
235 num_devs_path = GCSPRINTF("%s/num_devs", be_path);
236 num_devs = libxl__xs_read(gc, XBT_NULL, num_devs_path);
237 if (!num_devs)
238 return ERROR_INVAL;
239 num = atoi(num_devs);
240
241 libxl_domain_type domtype = libxl__domain_type(gc, domid);
242 if (domtype == LIBXL_DOMAIN_TYPE_INVALID)
243 return ERROR_FAIL;
244
245 if (domtype == LIBXL_DOMAIN_TYPE_PV) {
246 if (libxl__wait_for_backend(gc, be_path, GCSPRINTF("%d", XenbusStateConnected)) < 0) {
247 LOGD(DEBUG, domid, "pci backend at %s is not ready", be_path);
248 return ERROR_FAIL;
249 }
250 }
251
252 for (i = 0; i < num; i++) {
253 unsigned int domain = 0, bus = 0, dev = 0, func = 0;
254 xsdev = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/dev-%d", be_path, i));
255 sscanf(xsdev, PCI_BDF, &domain, &bus, &dev, &func);
256 if (domain == pci->domain && bus == pci->bus &&
257 pci->dev == dev && pci->func == func) {
258 break;
259 }
260 }
261 if (i == num) {
262 LOGD(ERROR, domid, "Couldn't find the device on xenstore");
263 return ERROR_INVAL;
264 }
265
266 retry_transaction:
267 t = xs_transaction_start(ctx->xsh);
268 xs_write(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, i), GCSPRINTF("%d", XenbusStateClosing), 1);
269 xs_write(ctx->xsh, t, GCSPRINTF("%s/state", be_path), GCSPRINTF("%d", XenbusStateReconfiguring), 1);
270 if (!xs_transaction_end(ctx->xsh, t, 0))
271 if (errno == EAGAIN)
272 goto retry_transaction;
273
274 if (domtype == LIBXL_DOMAIN_TYPE_PV) {
275 if (libxl__wait_for_backend(gc, be_path, GCSPRINTF("%d", XenbusStateConnected)) < 0) {
276 LOGD(DEBUG, domid, "pci backend at %s is not ready", be_path);
277 return ERROR_FAIL;
278 }
279 }
280
281 retry_transaction2:
282 t = xs_transaction_start(ctx->xsh);
283 xs_rm(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, i));
284 xs_rm(ctx->xsh, t, GCSPRINTF("%s/key-%d", be_path, i));
285 xs_rm(ctx->xsh, t, GCSPRINTF("%s/dev-%d", be_path, i));
286 xs_rm(ctx->xsh, t, GCSPRINTF("%s/vdev-%d", be_path, i));
287 xs_rm(ctx->xsh, t, GCSPRINTF("%s/opts-%d", be_path, i));
288 xs_rm(ctx->xsh, t, GCSPRINTF("%s/vdevfn-%d", be_path, i));
289 xs_rm(ctx->xsh, t, GCSPRINTF("%s/name-%d", be_path, i));
290 libxl__xs_printf(gc, t, num_devs_path, "%d", num - 1);
291 for (j = i + 1; j < num; j++) {
292 tmppath = GCSPRINTF("%s/state-%d", be_path, j);
293 tmp = libxl__xs_read(gc, t, tmppath);
294 xs_write(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, j - 1), tmp, strlen(tmp));
295 xs_rm(ctx->xsh, t, tmppath);
296 tmppath = GCSPRINTF("%s/dev-%d", be_path, j);
297 tmp = libxl__xs_read(gc, t, tmppath);
298 xs_write(ctx->xsh, t, GCSPRINTF("%s/dev-%d", be_path, j - 1), tmp, strlen(tmp));
299 xs_rm(ctx->xsh, t, tmppath);
300 tmppath = GCSPRINTF("%s/key-%d", be_path, j);
301 tmp = libxl__xs_read(gc, t, tmppath);
302 xs_write(ctx->xsh, t, GCSPRINTF("%s/key-%d", be_path, j - 1), tmp, strlen(tmp));
303 xs_rm(ctx->xsh, t, tmppath);
304 tmppath = GCSPRINTF("%s/vdev-%d", be_path, j);
305 tmp = libxl__xs_read(gc, t, tmppath);
306 if (tmp) {
307 xs_write(ctx->xsh, t, GCSPRINTF("%s/vdev-%d", be_path, j - 1), tmp, strlen(tmp));
308 xs_rm(ctx->xsh, t, tmppath);
309 }
310 tmppath = GCSPRINTF("%s/opts-%d", be_path, j);
311 tmp = libxl__xs_read(gc, t, tmppath);
312 if (tmp) {
313 xs_write(ctx->xsh, t, GCSPRINTF("%s/opts-%d", be_path, j - 1), tmp, strlen(tmp));
314 xs_rm(ctx->xsh, t, tmppath);
315 }
316 tmppath = GCSPRINTF("%s/vdevfn-%d", be_path, j);
317 tmp = libxl__xs_read(gc, t, tmppath);
318 if (tmp) {
319 xs_write(ctx->xsh, t, GCSPRINTF("%s/vdevfn-%d", be_path, j - 1), tmp, strlen(tmp));
320 xs_rm(ctx->xsh, t, tmppath);
321 }
322 tmppath = GCSPRINTF("%s/name-%d", be_path, j);
323 tmp = libxl__xs_read(gc, t, tmppath);
324 if (tmp) {
325 xs_write(ctx->xsh, t, GCSPRINTF("%s/name-%d", be_path, j - 1), tmp, strlen(tmp));
326 xs_rm(ctx->xsh, t, tmppath);
327 }
328 }
329 if (!xs_transaction_end(ctx->xsh, t, 0))
330 if (errno == EAGAIN)
331 goto retry_transaction2;
332
333 if (num == 1) {
334 libxl__device dev;
335 if (libxl__parse_backend_path(gc, be_path, &dev) != 0)
336 return ERROR_FAIL;
337
338 dev.domid = domid;
339 dev.kind = LIBXL__DEVICE_KIND_PCI;
340 dev.devid = 0;
341
342 libxl__device_destroy(gc, &dev);
343 return 0;
344 }
345
346 return 0;
347 }
348
is_pci_in_array(libxl_device_pci * pcis,int num,libxl_device_pci * pci)349 static bool is_pci_in_array(libxl_device_pci *pcis, int num,
350 libxl_device_pci *pci)
351 {
352 int i;
353
354 for (i = 0; i < num; i++) {
355 if (COMPARE_PCI(pci, &pcis[i]))
356 break;
357 }
358
359 return i < num;
360 }
361
362 /* Write the standard BDF into the sysfs path given by sysfs_path. */
sysfs_write_bdf(libxl__gc * gc,const char * sysfs_path,libxl_device_pci * pci)363 static int sysfs_write_bdf(libxl__gc *gc, const char * sysfs_path,
364 libxl_device_pci *pci)
365 {
366 int rc, fd;
367 char *buf;
368
369 fd = open(sysfs_path, O_WRONLY);
370 if (fd < 0) {
371 LOGE(ERROR, "Couldn't open %s", sysfs_path);
372 return ERROR_FAIL;
373 }
374
375 buf = GCSPRINTF(PCI_BDF, pci->domain, pci->bus,
376 pci->dev, pci->func);
377 rc = write(fd, buf, strlen(buf));
378 /* Annoying to have two if's, but we need the errno */
379 if (rc < 0)
380 LOGE(ERROR, "write to %s returned %d", sysfs_path, rc);
381 close(fd);
382
383 if (rc < 0)
384 return ERROR_FAIL;
385
386 return 0;
387 }
388
389 #define PCI_INFO_PATH "/libxl/pci"
390
pci_info_xs_path(libxl__gc * gc,libxl_device_pci * pci,const char * node)391 static char *pci_info_xs_path(libxl__gc *gc, libxl_device_pci *pci,
392 const char *node)
393 {
394 return node ?
395 GCSPRINTF(PCI_INFO_PATH"/"PCI_BDF_XSPATH"/%s",
396 pci->domain, pci->bus, pci->dev, pci->func,
397 node) :
398 GCSPRINTF(PCI_INFO_PATH"/"PCI_BDF_XSPATH,
399 pci->domain, pci->bus, pci->dev, pci->func);
400 }
401
402
pci_info_xs_write(libxl__gc * gc,libxl_device_pci * pci,const char * node,const char * val)403 static int pci_info_xs_write(libxl__gc *gc, libxl_device_pci *pci,
404 const char *node, const char *val)
405 {
406 char *path = pci_info_xs_path(gc, pci, node);
407 int rc = libxl__xs_printf(gc, XBT_NULL, path, "%s", val);
408
409 if (rc) LOGE(WARN, "Write of %s to node %s failed.", val, path);
410
411 return rc;
412 }
413
pci_info_xs_read(libxl__gc * gc,libxl_device_pci * pci,const char * node)414 static char *pci_info_xs_read(libxl__gc *gc, libxl_device_pci *pci,
415 const char *node)
416 {
417 char *path = pci_info_xs_path(gc, pci, node);
418
419 return libxl__xs_read(gc, XBT_NULL, path);
420 }
421
pci_info_xs_remove(libxl__gc * gc,libxl_device_pci * pci,const char * node)422 static void pci_info_xs_remove(libxl__gc *gc, libxl_device_pci *pci,
423 const char *node)
424 {
425 char *path = pci_info_xs_path(gc, pci, node);
426 libxl_ctx *ctx = libxl__gc_owner(gc);
427
428 /* Remove the xenstore entry */
429 xs_rm(ctx->xsh, XBT_NULL, path);
430 }
431
libxl_device_pci_assignable_list(libxl_ctx * ctx,int * num)432 libxl_device_pci *libxl_device_pci_assignable_list(libxl_ctx *ctx, int *num)
433 {
434 GC_INIT(ctx);
435 libxl_device_pci *pcis = NULL, *new;
436 struct dirent *de;
437 DIR *dir;
438
439 *num = 0;
440
441 dir = opendir(SYSFS_PCIBACK_DRIVER);
442 if (NULL == dir) {
443 if (errno == ENOENT) {
444 LOG(ERROR, "Looks like pciback driver not loaded");
445 } else {
446 LOGE(ERROR, "Couldn't open %s", SYSFS_PCIBACK_DRIVER);
447 }
448 goto out;
449 }
450
451 while((de = readdir(dir))) {
452 unsigned int dom, bus, dev, func;
453 char *name;
454
455 if (sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4)
456 continue;
457
458 new = realloc(pcis, ((*num) + 1) * sizeof(*new));
459 if (NULL == new)
460 continue;
461
462 pcis = new;
463 new = pcis + *num;
464
465 libxl_device_pci_init(new);
466 pci_struct_fill(new, dom, bus, dev, func);
467
468 if (pci_info_xs_read(gc, new, "domid")) /* already assigned */
469 continue;
470
471 name = pci_info_xs_read(gc, new, "name");
472 if (name) new->name = strdup(name);
473
474 (*num)++;
475 }
476
477 closedir(dir);
478 out:
479 GC_FREE;
480 return pcis;
481 }
482
libxl_device_pci_assignable_list_free(libxl_device_pci * list,int num)483 void libxl_device_pci_assignable_list_free(libxl_device_pci *list, int num)
484 {
485 int i;
486
487 for (i = 0; i < num; i++)
488 libxl_device_pci_dispose(&list[i]);
489
490 free(list);
491 }
492
493 /* Unbind device from its current driver, if any. If driver_path is non-NULL,
494 * store the path to the original driver in it. */
sysfs_dev_unbind(libxl__gc * gc,libxl_device_pci * pci,char ** driver_path)495 static int sysfs_dev_unbind(libxl__gc *gc, libxl_device_pci *pci,
496 char **driver_path)
497 {
498 char * spath, *dp = NULL;
499 struct stat st;
500
501 spath = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/driver",
502 pci->domain,
503 pci->bus,
504 pci->dev,
505 pci->func);
506 if ( !lstat(spath, &st) ) {
507 /* Find the canonical path to the driver. */
508 dp = libxl__zalloc(gc, PATH_MAX);
509 dp = realpath(spath, dp);
510 if ( !dp ) {
511 LOGE(ERROR, "realpath() failed");
512 return -1;
513 }
514
515 LOG(DEBUG, "Driver re-plug path: %s", dp);
516
517 /* Unbind from the old driver */
518 spath = GCSPRINTF("%s/unbind", dp);
519 if ( sysfs_write_bdf(gc, spath, pci) < 0 ) {
520 LOGE(ERROR, "Couldn't unbind device");
521 return -1;
522 }
523 }
524
525 if ( driver_path )
526 *driver_path = dp;
527
528 return 0;
529 }
530
sysfs_dev_get_vendor(libxl__gc * gc,libxl_device_pci * pci)531 static uint16_t sysfs_dev_get_vendor(libxl__gc *gc, libxl_device_pci *pci)
532 {
533 char *pci_device_vendor_path =
534 GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/vendor",
535 pci->domain, pci->bus, pci->dev, pci->func);
536 uint16_t read_items;
537 uint16_t pci_device_vendor;
538
539 FILE *f = fopen(pci_device_vendor_path, "r");
540 if (!f) {
541 LOGE(ERROR,
542 "pci device "PCI_BDF" does not have vendor attribute",
543 pci->domain, pci->bus, pci->dev, pci->func);
544 return 0xffff;
545 }
546 read_items = fscanf(f, "0x%hx\n", &pci_device_vendor);
547 fclose(f);
548 if (read_items != 1) {
549 LOGE(ERROR,
550 "cannot read vendor of pci device "PCI_BDF,
551 pci->domain, pci->bus, pci->dev, pci->func);
552 return 0xffff;
553 }
554
555 return pci_device_vendor;
556 }
557
sysfs_dev_get_device(libxl__gc * gc,libxl_device_pci * pci)558 static uint16_t sysfs_dev_get_device(libxl__gc *gc, libxl_device_pci *pci)
559 {
560 char *pci_device_device_path =
561 GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/device",
562 pci->domain, pci->bus, pci->dev, pci->func);
563 uint16_t read_items;
564 uint16_t pci_device_device;
565
566 FILE *f = fopen(pci_device_device_path, "r");
567 if (!f) {
568 LOGE(ERROR,
569 "pci device "PCI_BDF" does not have device attribute",
570 pci->domain, pci->bus, pci->dev, pci->func);
571 return 0xffff;
572 }
573 read_items = fscanf(f, "0x%hx\n", &pci_device_device);
574 fclose(f);
575 if (read_items != 1) {
576 LOGE(ERROR,
577 "cannot read device of pci device "PCI_BDF,
578 pci->domain, pci->bus, pci->dev, pci->func);
579 return 0xffff;
580 }
581
582 return pci_device_device;
583 }
584
sysfs_dev_get_class(libxl__gc * gc,libxl_device_pci * pci,unsigned long * class)585 static int sysfs_dev_get_class(libxl__gc *gc, libxl_device_pci *pci,
586 unsigned long *class)
587 {
588 char *pci_device_class_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/class",
589 pci->domain, pci->bus, pci->dev, pci->func);
590 int read_items, ret = 0;
591
592 FILE *f = fopen(pci_device_class_path, "r");
593 if (!f) {
594 LOGE(ERROR,
595 "pci device "PCI_BDF" does not have class attribute",
596 pci->domain, pci->bus, pci->dev, pci->func);
597 ret = ERROR_FAIL;
598 goto out;
599 }
600 read_items = fscanf(f, "0x%lx\n", class);
601 fclose(f);
602 if (read_items != 1) {
603 LOGE(ERROR,
604 "cannot read class of pci device "PCI_BDF,
605 pci->domain, pci->bus, pci->dev, pci->func);
606 ret = ERROR_FAIL;
607 }
608
609 out:
610 return ret;
611 }
612
613 /*
614 * Some devices may need some ways to work well. Here like IGD,
615 * we have to pass a specific option to qemu.
616 */
libxl__is_igd_vga_passthru(libxl__gc * gc,const libxl_domain_config * d_config)617 bool libxl__is_igd_vga_passthru(libxl__gc *gc,
618 const libxl_domain_config *d_config)
619 {
620 unsigned int i;
621 uint16_t pt_vendor, pt_device;
622 unsigned long class;
623
624 for (i = 0 ; i < d_config->num_pcidevs ; i++) {
625 libxl_device_pci *pci = &d_config->pcidevs[i];
626 pt_vendor = sysfs_dev_get_vendor(gc, pci);
627 pt_device = sysfs_dev_get_device(gc, pci);
628
629 if (pt_vendor == 0xffff || pt_device == 0xffff ||
630 pt_vendor != 0x8086)
631 continue;
632
633 if (sysfs_dev_get_class(gc, pci, &class))
634 continue;
635 if (class == 0x030000)
636 return true;
637 }
638
639 return false;
640 }
641
642 /*
643 * A brief comment about slots. I don't know what slots are for; however,
644 * I have by experimentation determined:
645 * - Before a device can be bound to pciback, its BDF must first be listed
646 * in pciback/slots
647 * - The way to get the BDF listed there is to write BDF to
648 * pciback/new_slot
649 * - Writing the same BDF to pciback/new_slot is not idempotent; it results
650 * in two entries of the BDF in pciback/slots
651 * It's not clear whether having two entries in pciback/slots is a problem
652 * or not. Just to be safe, this code does the conservative thing, and
653 * first checks to see if there is a slot, adding one only if one does not
654 * already exist.
655 */
656
657 /* Scan through /sys/.../pciback/slots looking for pci's BDF */
pciback_dev_has_slot(libxl__gc * gc,libxl_device_pci * pci)658 static int pciback_dev_has_slot(libxl__gc *gc, libxl_device_pci *pci)
659 {
660 FILE *f;
661 int rc = 0;
662 unsigned dom, bus, dev, func;
663
664 f = fopen(SYSFS_PCIBACK_DRIVER"/slots", "r");
665
666 if (f == NULL) {
667 LOGE(ERROR, "Couldn't open %s", SYSFS_PCIBACK_DRIVER"/slots");
668 return ERROR_FAIL;
669 }
670
671 while (fscanf(f, "%x:%x:%x.%d\n", &dom, &bus, &dev, &func) == 4) {
672 if (dom == pci->domain
673 && bus == pci->bus
674 && dev == pci->dev
675 && func == pci->func) {
676 rc = 1;
677 goto out;
678 }
679 }
680 out:
681 fclose(f);
682 return rc;
683 }
684
pciback_dev_is_assigned(libxl__gc * gc,libxl_device_pci * pci)685 static int pciback_dev_is_assigned(libxl__gc *gc, libxl_device_pci *pci)
686 {
687 char * spath;
688 int rc;
689 struct stat st;
690
691 if ( access(SYSFS_PCIBACK_DRIVER, F_OK) < 0 ) {
692 if ( errno == ENOENT ) {
693 LOG(ERROR, "Looks like pciback driver is not loaded");
694 } else {
695 LOGE(ERROR, "Can't access "SYSFS_PCIBACK_DRIVER);
696 }
697 return -1;
698 }
699
700 spath = GCSPRINTF(SYSFS_PCIBACK_DRIVER"/"PCI_BDF,
701 pci->domain, pci->bus,
702 pci->dev, pci->func);
703 rc = lstat(spath, &st);
704
705 if( rc == 0 )
706 return 1;
707 if ( rc < 0 && errno == ENOENT )
708 return 0;
709 LOGE(ERROR, "Accessing %s", spath);
710 return -1;
711 }
712
pciback_dev_assign(libxl__gc * gc,libxl_device_pci * pci)713 static int pciback_dev_assign(libxl__gc *gc, libxl_device_pci *pci)
714 {
715 int rc;
716
717 if ( (rc = pciback_dev_has_slot(gc, pci)) < 0 ) {
718 LOGE(ERROR, "Error checking for pciback slot");
719 return ERROR_FAIL;
720 } else if (rc == 0) {
721 if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/new_slot",
722 pci) < 0 ) {
723 LOGE(ERROR, "Couldn't bind device to pciback!");
724 return ERROR_FAIL;
725 }
726 }
727
728 if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/bind", pci) < 0 ) {
729 LOGE(ERROR, "Couldn't bind device to pciback!");
730 return ERROR_FAIL;
731 }
732 return 0;
733 }
734
pciback_dev_unassign(libxl__gc * gc,libxl_device_pci * pci)735 static int pciback_dev_unassign(libxl__gc *gc, libxl_device_pci *pci)
736 {
737 /* Remove from pciback */
738 if ( sysfs_dev_unbind(gc, pci, NULL) < 0 ) {
739 LOG(ERROR, "Couldn't unbind device!");
740 return ERROR_FAIL;
741 }
742
743 /* Remove slot if necessary */
744 if ( pciback_dev_has_slot(gc, pci) > 0 ) {
745 if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/remove_slot",
746 pci) < 0 ) {
747 LOGE(ERROR, "Couldn't remove pciback slot");
748 return ERROR_FAIL;
749 }
750 }
751 return 0;
752 }
753
libxl__device_pci_assignable_add(libxl__gc * gc,libxl_device_pci * pci,int rebind)754 static int libxl__device_pci_assignable_add(libxl__gc *gc,
755 libxl_device_pci *pci,
756 int rebind)
757 {
758 libxl_ctx *ctx = libxl__gc_owner(gc);
759 unsigned dom, bus, dev, func;
760 char *spath, *driver_path = NULL;
761 const char *name;
762 int rc;
763 struct stat st;
764
765 /* Local copy for convenience */
766 dom = pci->domain;
767 bus = pci->bus;
768 dev = pci->dev;
769 func = pci->func;
770 name = pci->name;
771
772 /* Sanitise any name that is set */
773 if (name) {
774 unsigned int i, n = strlen(name);
775
776 if (n > 64) { /* Reasonable upper bound on name length */
777 LOG(ERROR, "Name too long");
778 return ERROR_FAIL;
779 }
780
781 for (i = 0; i < n; i++) {
782 if (!isgraph(name[i])) {
783 LOG(ERROR, "Names may only include printable characters");
784 return ERROR_FAIL;
785 }
786 }
787 }
788
789 /* See if the device exists */
790 spath = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF, dom, bus, dev, func);
791 if ( lstat(spath, &st) ) {
792 LOGE(ERROR, "Couldn't lstat %s", spath);
793 return ERROR_FAIL;
794 }
795
796 /* Check to see if it's already assigned to pciback */
797 rc = pciback_dev_is_assigned(gc, pci);
798 if ( rc < 0 ) {
799 return ERROR_FAIL;
800 }
801 if ( rc ) {
802 LOG(WARN, PCI_BDF" already assigned to pciback", dom, bus, dev, func);
803 goto name;
804 }
805
806 /* Check to see if there's already a driver that we need to unbind from */
807 if ( sysfs_dev_unbind(gc, pci, &driver_path ) ) {
808 LOG(ERROR, "Couldn't unbind "PCI_BDF" from driver",
809 dom, bus, dev, func);
810 return ERROR_FAIL;
811 }
812
813 /* Store driver_path for rebinding to dom0 */
814 if ( rebind ) {
815 if ( driver_path ) {
816 pci_info_xs_write(gc, pci, "driver_path", driver_path);
817 } else if ( (driver_path =
818 pci_info_xs_read(gc, pci, "driver_path")) != NULL ) {
819 LOG(INFO, PCI_BDF" not bound to a driver, will be rebound to %s",
820 dom, bus, dev, func, driver_path);
821 } else {
822 LOG(WARN, PCI_BDF" not bound to a driver, will not be rebound.",
823 dom, bus, dev, func);
824 }
825 } else {
826 pci_info_xs_remove(gc, pci, "driver_path");
827 }
828
829 if ( pciback_dev_assign(gc, pci) ) {
830 LOG(ERROR, "Couldn't bind device to pciback!");
831 return ERROR_FAIL;
832 }
833
834 name:
835 if (name)
836 pci_info_xs_write(gc, pci, "name", name);
837 else
838 pci_info_xs_remove(gc, pci, "name");
839
840 /*
841 * DOMID_IO is just a sentinel domain, without any actual mappings,
842 * so always pass XEN_DOMCTL_DEV_RDM_RELAXED to avoid assignment being
843 * unnecessarily denied.
844 */
845 rc = xc_assign_device(ctx->xch, DOMID_IO, pci_encode_bdf(pci),
846 XEN_DOMCTL_DEV_RDM_RELAXED);
847 if ( rc < 0 ) {
848 LOG(ERROR, "failed to quarantine "PCI_BDF, dom, bus, dev, func);
849 return ERROR_FAIL;
850 }
851
852 return 0;
853 }
854
name2bdf(libxl__gc * gc,libxl_device_pci * pci)855 static int name2bdf(libxl__gc *gc, libxl_device_pci *pci)
856 {
857 char **bdfs;
858 unsigned int i, n;
859 int rc = ERROR_NOTFOUND;
860
861 bdfs = libxl__xs_directory(gc, XBT_NULL, PCI_INFO_PATH, &n);
862 if (!bdfs || !n)
863 goto out;
864
865 for (i = 0; i < n; i++) {
866 unsigned dom, bus, dev, func;
867 char *name;
868
869 if (sscanf(bdfs[i], PCI_BDF_XSPATH, &dom, &bus, &dev, &func) != 4)
870 continue;
871
872 pci_struct_fill(pci, dom, bus, dev, func);
873
874 name = pci_info_xs_read(gc, pci, "name");
875 if (name && !strcmp(name, pci->name)) {
876 rc = 0;
877 break;
878 }
879 }
880
881 out:
882 if (!rc)
883 LOG(DETAIL, "'%s' -> " PCI_BDF, pci->name, pci->domain,
884 pci->bus, pci->dev, pci->func);
885
886 return rc;
887 }
888
libxl__device_pci_assignable_remove(libxl__gc * gc,libxl_device_pci * pci,int rebind)889 static int libxl__device_pci_assignable_remove(libxl__gc *gc,
890 libxl_device_pci *pci,
891 int rebind)
892 {
893 libxl_ctx *ctx = libxl__gc_owner(gc);
894 int rc;
895 char *driver_path;
896
897 /* If the device is named then we need to look up the BDF */
898 if (pci->name) {
899 rc = name2bdf(gc, pci);
900 if (rc) return rc;
901 }
902
903 /* De-quarantine */
904 rc = xc_deassign_device(ctx->xch, DOMID_IO, pci_encode_bdf(pci));
905 if ( rc < 0 ) {
906 LOG(ERROR, "failed to de-quarantine "PCI_BDF, pci->domain, pci->bus,
907 pci->dev, pci->func);
908 return ERROR_FAIL;
909 }
910
911 /* Unbind from pciback */
912 if ( (rc = pciback_dev_is_assigned(gc, pci)) < 0 ) {
913 return ERROR_FAIL;
914 } else if ( rc ) {
915 pciback_dev_unassign(gc, pci);
916 } else {
917 LOG(WARN, "Not bound to pciback");
918 }
919
920 /* Rebind if necessary */
921 driver_path = pci_info_xs_read(gc, pci, "driver_path");
922
923 if ( driver_path ) {
924 if ( rebind ) {
925 LOG(INFO, "Rebinding to driver at %s", driver_path);
926
927 if ( sysfs_write_bdf(gc,
928 GCSPRINTF("%s/bind", driver_path),
929 pci) < 0 ) {
930 LOGE(ERROR, "Couldn't bind device to %s", driver_path);
931 return -1;
932 }
933
934 pci_info_xs_remove(gc, pci, "driver_path");
935 }
936 } else {
937 if ( rebind ) {
938 LOG(WARN,
939 "Couldn't find path for original driver; not rebinding");
940 }
941 }
942
943 pci_info_xs_remove(gc, pci, "name");
944
945 return 0;
946 }
947
libxl_device_pci_assignable_add(libxl_ctx * ctx,libxl_device_pci * pci,int rebind)948 int libxl_device_pci_assignable_add(libxl_ctx *ctx, libxl_device_pci *pci,
949 int rebind)
950 {
951 GC_INIT(ctx);
952 int rc;
953
954 rc = libxl__device_pci_assignable_add(gc, pci, rebind);
955
956 GC_FREE;
957 return rc;
958 }
959
960
libxl_device_pci_assignable_remove(libxl_ctx * ctx,libxl_device_pci * pci,int rebind)961 int libxl_device_pci_assignable_remove(libxl_ctx *ctx, libxl_device_pci *pci,
962 int rebind)
963 {
964 GC_INIT(ctx);
965 int rc;
966
967 rc = libxl__device_pci_assignable_remove(gc, pci, rebind);
968
969 GC_FREE;
970 return rc;
971 }
972
973 /*
974 * This function checks that all functions of a device are bound to pciback
975 * driver. It also initialises a bit-mask of which function numbers are present
976 * on that device.
977 */
pci_multifunction_check(libxl__gc * gc,libxl_device_pci * pci,unsigned int * func_mask)978 static int pci_multifunction_check(libxl__gc *gc, libxl_device_pci *pci, unsigned int *func_mask)
979 {
980 struct dirent *de;
981 DIR *dir;
982
983 *func_mask = 0;
984
985 dir = opendir(SYSFS_PCI_DEV);
986 if ( NULL == dir ) {
987 LOGE(ERROR, "Couldn't open %s", SYSFS_PCI_DEV);
988 return -1;
989 }
990
991 while( (de = readdir(dir)) ) {
992 unsigned dom, bus, dev, func;
993 struct stat st;
994 char *path;
995
996 if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 )
997 continue;
998 if ( pci->domain != dom )
999 continue;
1000 if ( pci->bus != bus )
1001 continue;
1002 if ( pci->dev != dev )
1003 continue;
1004
1005 path = GCSPRINTF("%s/" PCI_BDF, SYSFS_PCIBACK_DRIVER, dom, bus, dev, func);
1006 if ( lstat(path, &st) ) {
1007 if ( errno == ENOENT )
1008 LOG(ERROR, PCI_BDF " is not assigned to pciback driver",
1009 dom, bus, dev, func);
1010 else
1011 LOGE(ERROR, "Couldn't lstat %s", path);
1012 closedir(dir);
1013 return -1;
1014 }
1015 (*func_mask) |= (1 << func);
1016 }
1017
1018 closedir(dir);
1019 return 0;
1020 }
1021
pci_ins_check(libxl__gc * gc,uint32_t domid,const char * state,void * priv)1022 static int pci_ins_check(libxl__gc *gc, uint32_t domid, const char *state, void *priv)
1023 {
1024 char *orig_state = priv;
1025
1026 if ( !strcmp(state, "pci-insert-failed") )
1027 return -1;
1028 if ( !strcmp(state, "pci-inserted") )
1029 return 0;
1030 if ( !strcmp(state, orig_state) )
1031 return 1;
1032
1033 return 1;
1034 }
1035
qemu_pci_add_xenstore(libxl__gc * gc,uint32_t domid,libxl_device_pci * pci)1036 static int qemu_pci_add_xenstore(libxl__gc *gc, uint32_t domid,
1037 libxl_device_pci *pci)
1038 {
1039 libxl_ctx *ctx = libxl__gc_owner(gc);
1040 int rc = 0;
1041 char *path;
1042 char *state, *vdevfn;
1043 uint32_t dm_domid;
1044
1045 dm_domid = libxl_get_stubdom_id(CTX, domid);
1046 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
1047 state = libxl__xs_read(gc, XBT_NULL, path);
1048 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter");
1049 if (pci->vdevfn) {
1050 libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF_VDEVFN","PCI_OPTIONS,
1051 pci->domain, pci->bus, pci->dev,
1052 pci->func, pci->vdevfn, pci->msitranslate,
1053 pci->power_mgmt);
1054 } else {
1055 libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF","PCI_OPTIONS,
1056 pci->domain, pci->bus, pci->dev,
1057 pci->func, pci->msitranslate, pci->power_mgmt);
1058 }
1059
1060 libxl__qemu_traditional_cmd(gc, domid, "pci-ins");
1061 rc = libxl__wait_for_device_model_deprecated(gc, domid, NULL, NULL,
1062 pci_ins_check, state);
1063 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter");
1064 vdevfn = libxl__xs_read(gc, XBT_NULL, path);
1065 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
1066 if ( rc < 0 )
1067 LOGD(ERROR, domid, "qemu refused to add device: %s", vdevfn);
1068 else if ( sscanf(vdevfn, "0x%x", &pci->vdevfn) != 1 ) {
1069 LOGD(ERROR, domid, "wrong format for the vdevfn: '%s'", vdevfn);
1070 rc = -1;
1071 }
1072 xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state));
1073
1074 return rc;
1075 }
1076
check_qemu_running(libxl__gc * gc,libxl_domid domid,libxl__xswait_state * xswa,int rc,const char * state)1077 static int check_qemu_running(libxl__gc *gc,
1078 libxl_domid domid,
1079 libxl__xswait_state *xswa,
1080 int rc,
1081 const char *state)
1082 {
1083 if (rc) {
1084 if (rc == ERROR_TIMEDOUT) {
1085 LOGD(ERROR, domid, "%s not ready", xswa->what);
1086 }
1087 goto out;
1088 }
1089
1090 if (!state || strcmp(state, "running"))
1091 return ERROR_NOT_READY;
1092
1093 out:
1094 libxl__xswait_stop(gc, xswa);
1095 return rc;
1096 }
1097
1098 typedef struct pci_add_state {
1099 /* filled by user of do_pci_add */
1100 libxl__ao_device *aodev;
1101 libxl_domid domid;
1102 bool starting;
1103 void (*callback)(libxl__egc *, struct pci_add_state *, int rc);
1104
1105 /* private to device_pci_add_stubdom_wait */
1106 libxl__ev_devstate pciback_ds;
1107
1108 /* private to do_pci_add */
1109 libxl__xswait_state xswait;
1110 libxl__ev_qmp qmp;
1111 libxl__ev_time timeout;
1112 libxl__ev_time timeout_retries;
1113 libxl_device_pci pci;
1114 libxl_domid pci_domid;
1115 int retries;
1116 } pci_add_state;
1117
1118 static void pci_add_qemu_trad_watch_state_cb(libxl__egc *egc,
1119 libxl__xswait_state *xswa, int rc, const char *state);
1120 static void pci_add_qmp_device_add(libxl__egc *, pci_add_state *);
1121 static void pci_add_qmp_device_add_cb(libxl__egc *,
1122 libxl__ev_qmp *, const libxl__json_object *, int rc);
1123 static void pci_add_qmp_device_add_retry(libxl__egc *egc, libxl__ev_time *ev,
1124 const struct timeval *requested_abs, int rc);
1125 static void pci_add_qmp_query_pci_cb(libxl__egc *,
1126 libxl__ev_qmp *, const libxl__json_object *, int rc);
1127 static void pci_add_timeout(libxl__egc *egc, libxl__ev_time *ev,
1128 const struct timeval *requested_abs, int rc);
1129 static void pci_add_dm_done(libxl__egc *,
1130 pci_add_state *, int rc);
1131
do_pci_add(libxl__egc * egc,libxl_domid domid,pci_add_state * pas)1132 static void do_pci_add(libxl__egc *egc,
1133 libxl_domid domid,
1134 pci_add_state *pas)
1135 {
1136 STATE_AO_GC(pas->aodev->ao);
1137 libxl_domain_type type = libxl__domain_type(gc, domid);
1138 int rc;
1139
1140 /* init pci_add_state */
1141 libxl__xswait_init(&pas->xswait);
1142 libxl__ev_qmp_init(&pas->qmp);
1143 pas->pci_domid = domid;
1144 pas->retries = 0;
1145 libxl__ev_time_init(&pas->timeout);
1146 libxl__ev_time_init(&pas->timeout_retries);
1147
1148 if (type == LIBXL_DOMAIN_TYPE_INVALID) {
1149 rc = ERROR_FAIL;
1150 goto out;
1151 }
1152
1153 if (type == LIBXL_DOMAIN_TYPE_HVM) {
1154 switch (libxl__device_model_version_running(gc, domid)) {
1155 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
1156 pas->xswait.ao = ao;
1157 pas->xswait.what = "Device Model";
1158 pas->xswait.path = DEVICE_MODEL_XS_PATH(gc,
1159 libxl_get_stubdom_id(CTX, domid), domid, "/state");
1160 pas->xswait.timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000;
1161 pas->xswait.callback = pci_add_qemu_trad_watch_state_cb;
1162 rc = libxl__xswait_start(gc, &pas->xswait);
1163 if (rc) goto out;
1164 return;
1165 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
1166 rc = libxl__ev_time_register_rel(ao, &pas->timeout,
1167 pci_add_timeout,
1168 LIBXL_QMP_CMD_TIMEOUT * 1000);
1169 if (rc) goto out;
1170
1171 pci_add_qmp_device_add(egc, pas); /* must be last */
1172 return;
1173 default:
1174 rc = ERROR_INVAL;
1175 break;
1176 }
1177 }
1178
1179 rc = 0;
1180
1181 out:
1182 pci_add_dm_done(egc, pas, rc); /* must be last */
1183 }
1184
pci_add_qemu_trad_watch_state_cb(libxl__egc * egc,libxl__xswait_state * xswa,int rc,const char * state)1185 static void pci_add_qemu_trad_watch_state_cb(libxl__egc *egc,
1186 libxl__xswait_state *xswa,
1187 int rc,
1188 const char *state)
1189 {
1190 pci_add_state *pas = CONTAINER_OF(xswa, *pas, xswait);
1191 STATE_AO_GC(pas->aodev->ao);
1192
1193 /* Convenience aliases */
1194 libxl_domid domid = pas->domid;
1195 libxl_device_pci *pci = &pas->pci;
1196
1197 rc = check_qemu_running(gc, domid, xswa, rc, state);
1198 if (rc == ERROR_NOT_READY)
1199 return;
1200 if (rc)
1201 goto out;
1202
1203 rc = qemu_pci_add_xenstore(gc, domid, pci);
1204 out:
1205 pci_add_dm_done(egc, pas, rc); /* must be last */
1206 }
1207
pci_add_qmp_device_add(libxl__egc * egc,pci_add_state * pas)1208 static void pci_add_qmp_device_add(libxl__egc *egc, pci_add_state *pas)
1209 {
1210 STATE_AO_GC(pas->aodev->ao);
1211 libxl__json_object *args = NULL;
1212 int rc;
1213
1214 /* Convenience aliases */
1215 libxl_domid domid = pas->domid;
1216 libxl_device_pci *pci = &pas->pci;
1217 libxl__ev_qmp *const qmp = &pas->qmp;
1218
1219 libxl__qmp_param_add_string(gc, &args, "driver",
1220 "xen-pci-passthrough");
1221 QMP_PARAMETERS_SPRINTF(&args, "id", PCI_PT_QDEV_ID,
1222 pci->bus, pci->dev, pci->func);
1223 QMP_PARAMETERS_SPRINTF(&args, "hostaddr",
1224 "%04x:%02x:%02x.%01x", pci->domain,
1225 pci->bus, pci->dev, pci->func);
1226 if (pci->vdevfn) {
1227 QMP_PARAMETERS_SPRINTF(&args, "addr", "%x.%x",
1228 PCI_SLOT(pci->vdevfn),
1229 PCI_FUNC(pci->vdevfn));
1230 }
1231 /*
1232 * Version of QEMU prior to the XSA-131 fix did not support
1233 * this property and were effectively always in permissive
1234 * mode. The fix for XSA-131 switched the default to be
1235 * restricted by default and added the permissive property.
1236 *
1237 * Therefore in order to support both old and new QEMU we only
1238 * set the permissive flag if it is true. Users of older QEMU
1239 * have no reason to set the flag so this is ok.
1240 */
1241 if (pci->permissive)
1242 libxl__qmp_param_add_bool(gc, &args, "permissive", true);
1243
1244 qmp->ao = pas->aodev->ao;
1245 qmp->domid = domid;
1246 qmp->payload_fd = -1;
1247 qmp->callback = pci_add_qmp_device_add_cb;
1248 rc = libxl__ev_qmp_send(egc, qmp, "device_add", args);
1249 if (rc) goto out;
1250 return;
1251
1252 out:
1253 pci_add_dm_done(egc, pas, rc); /* must be last */
1254 }
1255
pci_add_qmp_device_add_cb(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * response,int rc)1256 static void pci_add_qmp_device_add_cb(libxl__egc *egc,
1257 libxl__ev_qmp *qmp,
1258 const libxl__json_object *response,
1259 int rc)
1260 {
1261 EGC_GC;
1262 pci_add_state *pas = CONTAINER_OF(qmp, *pas, qmp);
1263
1264 if (rc) {
1265 /* Retry only applicable for HVM with stubdom. */
1266 if (libxl_get_stubdom_id(CTX, qmp->domid) == 0)
1267 goto out;
1268
1269 if (pas->retries++ < 10) {
1270 LOGD(ERROR, qmp->domid, "Retrying PCI add %d", pas->retries);
1271 rc = libxl__ev_time_register_rel(pas->aodev->ao,
1272 &pas->timeout_retries,
1273 pci_add_qmp_device_add_retry,
1274 1000);
1275 if (rc) goto out;
1276 return; /* Wait for the timeout to then retry. */
1277 } else {
1278 goto out;
1279 }
1280 }
1281
1282 qmp->callback = pci_add_qmp_query_pci_cb;
1283 rc = libxl__ev_qmp_send(egc, qmp, "query-pci", NULL);
1284 if (rc) goto out;
1285 return;
1286
1287 out:
1288 pci_add_dm_done(egc, pas, rc); /* must be last */
1289 }
1290
pci_add_qmp_device_add_retry(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)1291 static void pci_add_qmp_device_add_retry(libxl__egc *egc, libxl__ev_time *ev,
1292 const struct timeval *requested_abs,
1293 int rc)
1294 {
1295 pci_add_state *pas = CONTAINER_OF(ev, *pas, timeout_retries);
1296
1297 pci_add_qmp_device_add(egc, pas);
1298 }
1299
pci_add_qmp_query_pci_cb(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * response,int rc)1300 static void pci_add_qmp_query_pci_cb(libxl__egc *egc,
1301 libxl__ev_qmp *qmp,
1302 const libxl__json_object *response,
1303 int rc)
1304 {
1305 EGC_GC;
1306 pci_add_state *pas = CONTAINER_OF(qmp, *pas, qmp);
1307 const libxl__json_object *bus = NULL;
1308 char *asked_id;
1309 int i, j;
1310 const libxl__json_object *devices = NULL;
1311 const libxl__json_object *device = NULL;
1312 const libxl__json_object *o = NULL;
1313 const char *id = NULL;
1314 int dev_slot, dev_func;
1315
1316 /* Convenience aliases */
1317 libxl_device_pci *pci = &pas->pci;
1318
1319 if (rc) goto out;
1320
1321 /* `query-pci' returns:
1322 * [
1323 * {'bus': 'int',
1324 * 'devices': [
1325 * {'bus': 'int', 'slot': 'int', 'function': 'int',
1326 * 'class_info': 'PciDeviceClass', 'id': 'PciDeviceId',
1327 * '*irq': 'int', 'qdev_id': 'str',
1328 * '*pci_bridge': 'PciBridgeInfo',
1329 * 'regions': ['PciMemoryRegion']
1330 * }
1331 * ]
1332 * }
1333 * ]
1334 * (See qemu.git/qapi/ for the struct that aren't detailed here)
1335 */
1336
1337 asked_id = GCSPRINTF(PCI_PT_QDEV_ID,
1338 pci->bus, pci->dev, pci->func);
1339
1340 for (i = 0; (bus = libxl__json_array_get(response, i)); i++) {
1341 devices = libxl__json_map_get("devices", bus, JSON_ARRAY);
1342 if (!devices) {
1343 rc = ERROR_QEMU_API;
1344 goto out;
1345 }
1346
1347 for (j = 0; (device = libxl__json_array_get(devices, j)); j++) {
1348 o = libxl__json_map_get("qdev_id", device, JSON_STRING);
1349 if (!o) {
1350 rc = ERROR_QEMU_API;
1351 goto out;
1352 }
1353 id = libxl__json_object_get_string(o);
1354 if (!id || strcmp(asked_id, id))
1355 continue;
1356
1357 o = libxl__json_map_get("slot", device, JSON_INTEGER);
1358 if (!o) {
1359 rc = ERROR_QEMU_API;
1360 goto out;
1361 }
1362 dev_slot = libxl__json_object_get_integer(o);
1363 o = libxl__json_map_get("function", device, JSON_INTEGER);
1364 if (!o) {
1365 rc = ERROR_QEMU_API;
1366 goto out;
1367 }
1368 dev_func = libxl__json_object_get_integer(o);
1369
1370 pci->vdevfn = PCI_DEVFN(dev_slot, dev_func);
1371
1372 rc = 0;
1373 goto out;
1374 }
1375 }
1376
1377 rc = ERROR_FAIL;
1378 LOGD(ERROR, qmp->domid,
1379 "PCI device id '%s' wasn't found in QEMU's 'query-pci' response.",
1380 asked_id);
1381
1382 out:
1383 if (rc == ERROR_QEMU_API) {
1384 LOGD(ERROR, qmp->domid,
1385 "Unexpected response to QMP cmd 'query-pci', received:\n%s",
1386 JSON(response));
1387 }
1388 pci_add_dm_done(egc, pas, rc); /* must be last */
1389 }
1390
pci_add_timeout(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)1391 static void pci_add_timeout(libxl__egc *egc, libxl__ev_time *ev,
1392 const struct timeval *requested_abs,
1393 int rc)
1394 {
1395 pci_add_state *pas = CONTAINER_OF(ev, *pas, timeout);
1396
1397 pci_add_dm_done(egc, pas, rc);
1398 }
1399
pci_supp_legacy_irq(void)1400 static bool pci_supp_legacy_irq(void)
1401 {
1402 #ifdef CONFIG_PCI_SUPP_LEGACY_IRQ
1403 return true;
1404 #else
1405 return false;
1406 #endif
1407 }
1408
pci_add_dm_done(libxl__egc * egc,pci_add_state * pas,int rc)1409 static void pci_add_dm_done(libxl__egc *egc,
1410 pci_add_state *pas,
1411 int rc)
1412 {
1413 STATE_AO_GC(pas->aodev->ao);
1414 libxl_ctx *ctx = libxl__gc_owner(gc);
1415 libxl_domid domid = pas->pci_domid;
1416 char *sysfs_path;
1417 FILE *f;
1418 unsigned long long start, end, flags, size;
1419 int irq, i;
1420 int r;
1421 uint32_t flag = XEN_DOMCTL_DEV_RDM_RELAXED;
1422 uint32_t domainid = domid;
1423 bool isstubdom = libxl_is_stubdom(ctx, domid, &domainid);
1424
1425 /* Convenience aliases */
1426 bool starting = pas->starting;
1427 libxl_device_pci *pci = &pas->pci;
1428 bool hvm = libxl__domain_type(gc, domid) == LIBXL_DOMAIN_TYPE_HVM;
1429
1430 libxl__ev_qmp_dispose(gc, &pas->qmp);
1431
1432 if (rc) goto out;
1433
1434 /* stubdomain is always running by now, even at create time */
1435 if (isstubdom)
1436 starting = false;
1437
1438 sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/resource", pci->domain,
1439 pci->bus, pci->dev, pci->func);
1440 f = fopen(sysfs_path, "r");
1441 start = end = flags = size = 0;
1442 irq = 0;
1443
1444 if (f == NULL) {
1445 LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path);
1446 rc = ERROR_FAIL;
1447 goto out;
1448 }
1449 for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) {
1450 if (fscanf(f, "0x%llx 0x%llx 0x%llx\n", &start, &end, &flags) != 3)
1451 continue;
1452 size = end - start + 1;
1453 if (start) {
1454 if (flags & PCI_BAR_IO) {
1455 r = xc_domain_ioport_permission(ctx->xch, domid, start, size, 1);
1456 if (r < 0) {
1457 LOGED(ERROR, domainid,
1458 "xc_domain_ioport_permission 0x%llx/0x%llx (error %d)",
1459 start, size, r);
1460 fclose(f);
1461 rc = ERROR_FAIL;
1462 goto out;
1463 }
1464 } else {
1465 r = xc_domain_iomem_permission(ctx->xch, domid, start>>XC_PAGE_SHIFT,
1466 (size+(XC_PAGE_SIZE-1))>>XC_PAGE_SHIFT, 1);
1467 if (r < 0) {
1468 LOGED(ERROR, domainid,
1469 "xc_domain_iomem_permission 0x%llx/0x%llx (error %d)",
1470 start, size, r);
1471 fclose(f);
1472 rc = ERROR_FAIL;
1473 goto out;
1474 }
1475 }
1476 }
1477 }
1478 fclose(f);
1479 if (!pci_supp_legacy_irq())
1480 goto out_no_irq;
1481 sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/irq", pci->domain,
1482 pci->bus, pci->dev, pci->func);
1483 f = fopen(sysfs_path, "r");
1484 if (f == NULL) {
1485 LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path);
1486 goto out_no_irq;
1487 }
1488 if ((fscanf(f, "%u", &irq) == 1) && irq) {
1489 r = xc_physdev_map_pirq(ctx->xch, domid, irq, &irq);
1490 if (r < 0) {
1491 LOGED(ERROR, domainid, "xc_physdev_map_pirq irq=%d (error=%d)",
1492 irq, r);
1493 fclose(f);
1494 rc = ERROR_FAIL;
1495 goto out;
1496 }
1497 r = xc_domain_irq_permission(ctx->xch, domid, irq, 1);
1498 if (r < 0) {
1499 LOGED(ERROR, domainid,
1500 "xc_domain_irq_permission irq=%d (error=%d)", irq, r);
1501 fclose(f);
1502 rc = ERROR_FAIL;
1503 goto out;
1504 }
1505 }
1506 fclose(f);
1507
1508 /* Don't restrict writes to the PCI config space from this VM */
1509 if (pci->permissive) {
1510 if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/permissive",
1511 pci) < 0 ) {
1512 LOGD(ERROR, domainid, "Setting permissive for device");
1513 rc = ERROR_FAIL;
1514 goto out;
1515 }
1516 }
1517
1518 out_no_irq:
1519 if (!isstubdom) {
1520 if (pci->rdm_policy == LIBXL_RDM_RESERVE_POLICY_STRICT) {
1521 flag &= ~XEN_DOMCTL_DEV_RDM_RELAXED;
1522 } else if (pci->rdm_policy != LIBXL_RDM_RESERVE_POLICY_RELAXED) {
1523 LOGED(ERROR, domainid, "unknown rdm check flag.");
1524 rc = ERROR_FAIL;
1525 goto out;
1526 }
1527 r = xc_assign_device(ctx->xch, domid, pci_encode_bdf(pci), flag);
1528 if (r < 0 && (hvm || errno != ENOSYS)) {
1529 LOGED(ERROR, domainid, "xc_assign_device failed");
1530 rc = ERROR_FAIL;
1531 goto out;
1532 }
1533 }
1534
1535 if (!libxl_get_stubdom_id(CTX, domid))
1536 rc = libxl__device_pci_add_xenstore(gc, domid, pci, starting);
1537 else
1538 rc = 0;
1539 out:
1540 libxl__ev_time_deregister(gc, &pas->timeout);
1541 libxl__ev_time_deregister(gc, &pas->timeout_retries);
1542 pas->callback(egc, pas, rc);
1543 }
1544
libxl__device_pci_reset(libxl__gc * gc,unsigned int domain,unsigned int bus,unsigned int dev,unsigned int func)1545 static int libxl__device_pci_reset(libxl__gc *gc, unsigned int domain, unsigned int bus,
1546 unsigned int dev, unsigned int func)
1547 {
1548 char *reset;
1549 int fd, rc;
1550
1551 reset = GCSPRINTF("%s/do_flr", SYSFS_PCIBACK_DRIVER);
1552 fd = open(reset, O_WRONLY);
1553 if (fd >= 0) {
1554 char *buf = GCSPRINTF(PCI_BDF, domain, bus, dev, func);
1555 rc = write(fd, buf, strlen(buf));
1556 if (rc < 0)
1557 LOGE(ERROR, "write '%s' to %s failed", buf, reset);
1558 close(fd);
1559 return rc < 0 ? rc : 0;
1560 }
1561 if (errno != ENOENT)
1562 LOGE(ERROR, "Failed to access pciback path %s", reset);
1563 reset = GCSPRINTF("%s/"PCI_BDF"/reset", SYSFS_PCI_DEV, domain, bus, dev, func);
1564 fd = open(reset, O_WRONLY);
1565 if (fd >= 0) {
1566 rc = write(fd, "1", 1);
1567 if (rc < 0)
1568 LOGE(ERROR, "write to %s failed", reset);
1569 close(fd);
1570 return rc < 0 ? rc : 0;
1571 }
1572 if (errno == ENOENT) {
1573 LOG(ERROR,
1574 "The kernel doesn't support reset from sysfs for PCI device "PCI_BDF,
1575 domain, bus, dev, func);
1576 } else {
1577 LOGE(ERROR, "Failed to access reset path %s", reset);
1578 }
1579 return -1;
1580 }
1581
libxl__device_pci_setdefault(libxl__gc * gc,uint32_t domid,libxl_device_pci * pci,bool hotplug)1582 int libxl__device_pci_setdefault(libxl__gc *gc, uint32_t domid,
1583 libxl_device_pci *pci, bool hotplug)
1584 {
1585 /* We'd like to force reserve rdm specific to a device by default.*/
1586 if (pci->rdm_policy == LIBXL_RDM_RESERVE_POLICY_INVALID)
1587 pci->rdm_policy = LIBXL_RDM_RESERVE_POLICY_STRICT;
1588 return 0;
1589 }
1590
libxl_device_pci_add(libxl_ctx * ctx,uint32_t domid,libxl_device_pci * pci,const libxl_asyncop_how * ao_how)1591 int libxl_device_pci_add(libxl_ctx *ctx, uint32_t domid,
1592 libxl_device_pci *pci,
1593 const libxl_asyncop_how *ao_how)
1594 {
1595 AO_CREATE(ctx, domid, ao_how);
1596 libxl__ao_device *aodev;
1597
1598 GCNEW(aodev);
1599 libxl__prepare_ao_device(ao, aodev);
1600 aodev->action = LIBXL__DEVICE_ACTION_ADD;
1601 aodev->callback = device_addrm_aocomplete;
1602 aodev->update_json = true;
1603 libxl__device_pci_add(egc, domid, pci, false, aodev);
1604 return AO_INPROGRESS;
1605 }
1606
libxl_pci_assignable(libxl_ctx * ctx,libxl_device_pci * pci)1607 static bool libxl_pci_assignable(libxl_ctx *ctx, libxl_device_pci *pci)
1608 {
1609 libxl_device_pci *pcis;
1610 int num;
1611 bool assignable;
1612
1613 pcis = libxl_device_pci_assignable_list(ctx, &num);
1614 assignable = is_pci_in_array(pcis, num, pci);
1615 libxl_device_pci_assignable_list_free(pcis, num);
1616
1617 return assignable;
1618 }
1619
1620 static void device_pci_add_stubdom_wait(libxl__egc *egc,
1621 pci_add_state *pas, int rc);
1622 static void device_pci_add_stubdom_ready(libxl__egc *egc,
1623 libxl__ev_devstate *ds, int rc);
1624 static void device_pci_add_stubdom_done(libxl__egc *egc,
1625 pci_add_state *, int rc);
1626 static void device_pci_add_done(libxl__egc *egc,
1627 pci_add_state *, int rc);
1628
libxl__device_pci_add(libxl__egc * egc,uint32_t domid,libxl_device_pci * pci,bool starting,libxl__ao_device * aodev)1629 void libxl__device_pci_add(libxl__egc *egc, uint32_t domid,
1630 libxl_device_pci *pci, bool starting,
1631 libxl__ao_device *aodev)
1632 {
1633 STATE_AO_GC(aodev->ao);
1634 libxl_ctx *ctx = libxl__gc_owner(gc);
1635 int rc;
1636 int stubdomid = 0;
1637 pci_add_state *pas;
1638
1639 GCNEW(pas);
1640 pas->aodev = aodev;
1641 pas->domid = domid;
1642
1643 libxl_device_pci_copy(CTX, &pas->pci, pci);
1644 pci = &pas->pci;
1645
1646 /* If the device is named then we need to look up the BDF */
1647 if (pci->name) {
1648 rc = name2bdf(gc, pci);
1649 if (rc) goto out;
1650 }
1651
1652 pas->starting = starting;
1653 pas->callback = device_pci_add_stubdom_done;
1654
1655 if (libxl__domain_type(gc, domid) == LIBXL_DOMAIN_TYPE_HVM) {
1656 rc = xc_test_assign_device(ctx->xch, domid, pci_encode_bdf(pci));
1657 if (rc) {
1658 LOGD(ERROR, domid,
1659 "PCI device %04x:%02x:%02x.%u %s?",
1660 pci->domain, pci->bus, pci->dev, pci->func,
1661 errno == EOPNOTSUPP ? "cannot be assigned - no IOMMU"
1662 : "already assigned to a different guest");
1663 goto out;
1664 }
1665 }
1666
1667 rc = libxl__device_pci_setdefault(gc, domid, pci, !starting);
1668 if (rc) goto out;
1669
1670 if (pci->seize && !pciback_dev_is_assigned(gc, pci)) {
1671 rc = libxl__device_pci_assignable_add(gc, pci, 1);
1672 if ( rc )
1673 goto out;
1674 }
1675
1676 if (!libxl_pci_assignable(ctx, pci)) {
1677 LOGD(ERROR, domid, "PCI device %x:%x:%x.%x is not assignable",
1678 pci->domain, pci->bus, pci->dev, pci->func);
1679 rc = ERROR_FAIL;
1680 goto out;
1681 }
1682
1683 rc = pci_info_xs_write(gc, pci, "domid", GCSPRINTF("%u", domid));
1684 if (rc) goto out;
1685
1686 libxl__device_pci_reset(gc, pci->domain, pci->bus, pci->dev, pci->func);
1687
1688 stubdomid = libxl_get_stubdom_id(ctx, domid);
1689 if (stubdomid != 0) {
1690 pas->callback = device_pci_add_stubdom_wait;
1691
1692 do_pci_add(egc, stubdomid, pas); /* must be last */
1693 return;
1694 }
1695
1696 device_pci_add_stubdom_done(egc, pas, 0); /* must be last */
1697 return;
1698
1699 out:
1700 device_pci_add_done(egc, pas, rc); /* must be last */
1701 }
1702
device_pci_add_stubdom_wait(libxl__egc * egc,pci_add_state * pas,int rc)1703 static void device_pci_add_stubdom_wait(libxl__egc *egc,
1704 pci_add_state *pas,
1705 int rc)
1706 {
1707 libxl__ao_device *aodev = pas->aodev;
1708 STATE_AO_GC(aodev->ao);
1709 int stubdomid = libxl_get_stubdom_id(CTX, pas->domid);
1710 char *state_path;
1711
1712 if (rc) goto out;
1713
1714 /* Wait for the device actually being connected, otherwise device model
1715 * running there will fail to find the device. */
1716 state_path = GCSPRINTF("%s/state",
1717 libxl__domain_device_backend_path(gc, 0, stubdomid, 0,
1718 LIBXL__DEVICE_KIND_PCI));
1719 rc = libxl__ev_devstate_wait(ao, &pas->pciback_ds,
1720 device_pci_add_stubdom_ready,
1721 state_path, XenbusStateConnected,
1722 LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000);
1723 if (rc) goto out;
1724 return;
1725 out:
1726 device_pci_add_done(egc, pas, rc); /* must be last */
1727 }
1728
device_pci_add_stubdom_ready(libxl__egc * egc,libxl__ev_devstate * ds,int rc)1729 static void device_pci_add_stubdom_ready(libxl__egc *egc,
1730 libxl__ev_devstate *ds,
1731 int rc)
1732 {
1733 pci_add_state *pas = CONTAINER_OF(ds, *pas, pciback_ds);
1734
1735 device_pci_add_stubdom_done(egc, pas, rc); /* must be last */
1736 }
1737
device_pci_add_stubdom_done(libxl__egc * egc,pci_add_state * pas,int rc)1738 static void device_pci_add_stubdom_done(libxl__egc *egc,
1739 pci_add_state *pas,
1740 int rc)
1741 {
1742 STATE_AO_GC(pas->aodev->ao);
1743 unsigned int orig_vdev, pfunc_mask;
1744 int i;
1745
1746 /* Convenience aliases */
1747 libxl_domid domid = pas->domid;
1748 libxl_device_pci *pci = &pas->pci;
1749
1750 if (rc) goto out;
1751
1752 orig_vdev = pci->vdevfn & ~7U;
1753
1754 if ( pci->vfunc_mask == LIBXL_PCI_FUNC_ALL ) {
1755 if ( !(pci->vdevfn >> 3) ) {
1756 LOGD(ERROR, domid, "Must specify a v-slot for multi-function devices");
1757 rc = ERROR_INVAL;
1758 goto out;
1759 }
1760 if ( pci_multifunction_check(gc, pci, &pfunc_mask) ) {
1761 rc = ERROR_FAIL;
1762 goto out;
1763 }
1764 pci->vfunc_mask &= pfunc_mask;
1765 /* so now vfunc_mask == pfunc_mask */
1766 }else{
1767 pfunc_mask = (1 << pci->func);
1768 }
1769
1770 for (rc = 0, i = 7; i >= 0; --i) {
1771 if ( (1 << i) & pfunc_mask ) {
1772 if ( pci->vfunc_mask == pfunc_mask ) {
1773 pci->func = i;
1774 pci->vdevfn = orig_vdev | i;
1775 } else {
1776 /* if not passing through multiple devices in a block make
1777 * sure that virtual function number 0 is always used otherwise
1778 * guest won't see the device
1779 */
1780 pci->vdevfn = orig_vdev;
1781 }
1782 pas->callback = device_pci_add_done;
1783 do_pci_add(egc, domid, pas); /* must be last */
1784 return;
1785 }
1786 }
1787
1788 out:
1789 device_pci_add_done(egc, pas, rc);
1790 }
1791
device_pci_add_done(libxl__egc * egc,pci_add_state * pas,int rc)1792 static void device_pci_add_done(libxl__egc *egc,
1793 pci_add_state *pas,
1794 int rc)
1795 {
1796 EGC_GC;
1797 libxl__ao_device *aodev = pas->aodev;
1798 libxl_domid domid = pas->domid;
1799 libxl_device_pci *pci = &pas->pci;
1800
1801 if (rc) {
1802 if (pci->name) {
1803 LOGD(ERROR, domid,
1804 "libxl__device_pci_add failed for "
1805 "PCI device '%s' (rc %d)",
1806 pci->name,
1807 rc);
1808 } else {
1809 LOGD(ERROR, domid,
1810 "libxl__device_pci_add failed for "
1811 "PCI device %x:%x:%x.%x (rc %d)",
1812 pci->domain, pci->bus, pci->dev, pci->func,
1813 rc);
1814 }
1815 pci_info_xs_remove(gc, pci, "domid");
1816 }
1817 libxl_device_pci_dispose(pci);
1818 aodev->rc = rc;
1819 aodev->callback(egc, aodev);
1820 }
1821
1822 typedef struct {
1823 libxl__multidev multidev;
1824 libxl__ao_device *outer_aodev;
1825 libxl_domain_config *d_config;
1826 libxl_domid domid;
1827 } add_pcis_state;
1828
1829 static void add_pcis_done(libxl__egc *, libxl__multidev *, int rc);
1830
libxl__add_pcis(libxl__egc * egc,libxl__ao * ao,uint32_t domid,libxl_domain_config * d_config,libxl__multidev * multidev)1831 static void libxl__add_pcis(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
1832 libxl_domain_config *d_config,
1833 libxl__multidev *multidev)
1834 {
1835 AO_GC;
1836 add_pcis_state *apds;
1837 int i;
1838
1839 /* We need to start a new multidev in order to be able to execute
1840 * libxl__create_pci_backend only once. */
1841
1842 GCNEW(apds);
1843 apds->outer_aodev = libxl__multidev_prepare(multidev);
1844 apds->d_config = d_config;
1845 apds->domid = domid;
1846 apds->multidev.callback = add_pcis_done;
1847 libxl__multidev_begin(ao, &apds->multidev);
1848
1849 for (i = 0; i < d_config->num_pcidevs; i++) {
1850 libxl__ao_device *aodev = libxl__multidev_prepare(&apds->multidev);
1851 libxl__device_pci_add(egc, domid, &d_config->pcidevs[i],
1852 true, aodev);
1853 }
1854
1855 libxl__multidev_prepared(egc, &apds->multidev, 0);
1856 }
1857
add_pcis_done(libxl__egc * egc,libxl__multidev * multidev,int rc)1858 static void add_pcis_done(libxl__egc *egc, libxl__multidev *multidev,
1859 int rc)
1860 {
1861 EGC_GC;
1862 add_pcis_state *apds = CONTAINER_OF(multidev, *apds, multidev);
1863 libxl__ao_device *aodev = apds->outer_aodev;
1864
1865 aodev->rc = rc;
1866 aodev->callback(egc, aodev);
1867 }
1868
qemu_pci_remove_xenstore(libxl__gc * gc,uint32_t domid,libxl_device_pci * pci,int force)1869 static int qemu_pci_remove_xenstore(libxl__gc *gc, uint32_t domid,
1870 libxl_device_pci *pci, int force)
1871 {
1872 libxl_ctx *ctx = libxl__gc_owner(gc);
1873 char *state;
1874 char *path;
1875 uint32_t dm_domid;
1876
1877 dm_domid = libxl_get_stubdom_id(CTX, domid);
1878
1879 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
1880 state = libxl__xs_read(gc, XBT_NULL, path);
1881 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter");
1882 libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF, pci->domain,
1883 pci->bus, pci->dev, pci->func);
1884
1885 /* Remove all functions at once atomically by only signalling
1886 * device-model for function 0 */
1887 if ( !force && (pci->vdevfn & 0x7) == 0 ) {
1888 libxl__qemu_traditional_cmd(gc, domid, "pci-rem");
1889 if (libxl__wait_for_device_model_deprecated(gc, domid, "pci-removed",
1890 NULL, NULL, NULL) < 0) {
1891 LOGD(ERROR, domid, "Device Model didn't respond in time");
1892 /* This depends on guest operating system acknowledging the
1893 * SCI, if it doesn't respond in time then we may wish to
1894 * force the removal.
1895 */
1896 return ERROR_FAIL;
1897 }
1898 }
1899 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
1900 xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state));
1901
1902 return 0;
1903 }
1904
1905 typedef struct pci_remove_state {
1906 libxl__ao_device *aodev;
1907 libxl_domid domid;
1908 libxl_device_pci pci;
1909 bool force;
1910 bool hvm;
1911 unsigned int orig_vdev;
1912 unsigned int pfunc_mask;
1913 int next_func;
1914 libxl__ao_device stubdom_aodev;
1915 libxl__xswait_state xswait;
1916 libxl__ev_qmp qmp;
1917 libxl__ev_time timeout;
1918 libxl__ev_time retry_timer;
1919 } pci_remove_state;
1920
1921 static void libxl__device_pci_remove_common(libxl__egc *egc,
1922 uint32_t domid, libxl_device_pci *pci, bool force,
1923 libxl__ao_device *aodev);
1924 static void device_pci_remove_common_next(libxl__egc *egc,
1925 pci_remove_state *prs, int rc);
1926
1927 static void pci_remove_qemu_trad_watch_state_cb(libxl__egc *egc,
1928 libxl__xswait_state *xswa, int rc, const char *state);
1929 static void pci_remove_qmp_device_del(libxl__egc *egc,
1930 pci_remove_state *prs);
1931 static void pci_remove_qmp_device_del_cb(libxl__egc *egc,
1932 libxl__ev_qmp *qmp, const libxl__json_object *response, int rc);
1933 static void pci_remove_qmp_retry_timer_cb(libxl__egc *egc,
1934 libxl__ev_time *ev, const struct timeval *requested_abs, int rc);
1935 static void pci_remove_qmp_query_cb(libxl__egc *egc,
1936 libxl__ev_qmp *qmp, const libxl__json_object *response, int rc);
1937 static void pci_remove_timeout(libxl__egc *egc,
1938 libxl__ev_time *ev, const struct timeval *requested_abs, int rc);
1939 static void pci_remove_detached(libxl__egc *egc,
1940 pci_remove_state *prs, int rc);
1941 static void pci_remove_stubdom_done(libxl__egc *egc,
1942 libxl__ao_device *aodev);
1943 static void pci_remove_done(libxl__egc *egc,
1944 pci_remove_state *prs, int rc);
1945
do_pci_remove(libxl__egc * egc,pci_remove_state * prs)1946 static void do_pci_remove(libxl__egc *egc, pci_remove_state *prs)
1947 {
1948 STATE_AO_GC(prs->aodev->ao);
1949 libxl_ctx *ctx = libxl__gc_owner(gc);
1950 libxl_device_pci *pcis;
1951 bool attached;
1952 uint32_t domid = prs->domid;
1953 libxl_domain_type type = libxl__domain_type(gc, domid);
1954 libxl_device_pci *pci = &prs->pci;
1955 int rc, num;
1956 pcis = libxl_device_pci_list(ctx, domid, &num);
1957 if (!pcis) {
1958 rc = ERROR_FAIL;
1959 goto out_fail;
1960 }
1961
1962 attached = is_pci_in_array(pcis, num, pci);
1963 libxl_device_pci_list_free(pcis, num);
1964
1965 rc = ERROR_INVAL;
1966 if (!attached) {
1967 LOGD(ERROR, domid, "PCI device not attached to this domain");
1968 goto out_fail;
1969 }
1970
1971 if (type == LIBXL_DOMAIN_TYPE_HVM) {
1972 prs->hvm = true;
1973 switch (libxl__device_model_version_running(gc, domid)) {
1974 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
1975 prs->xswait.ao = ao;
1976 prs->xswait.what = "Device Model";
1977 prs->xswait.path = DEVICE_MODEL_XS_PATH(gc,
1978 libxl_get_stubdom_id(CTX, domid), domid, "/state");
1979 prs->xswait.timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000;
1980 prs->xswait.callback = pci_remove_qemu_trad_watch_state_cb;
1981 rc = libxl__xswait_start(gc, &prs->xswait);
1982 if (rc) goto out_fail;
1983 return;
1984 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
1985 pci_remove_qmp_device_del(egc, prs); /* must be last */
1986 return;
1987 default:
1988 rc = ERROR_INVAL;
1989 goto out_fail;
1990 }
1991 }
1992 rc = 0;
1993 out_fail:
1994 pci_remove_detached(egc, prs, rc); /* must be last */
1995 }
1996
pci_remove_qemu_trad_watch_state_cb(libxl__egc * egc,libxl__xswait_state * xswa,int rc,const char * state)1997 static void pci_remove_qemu_trad_watch_state_cb(libxl__egc *egc,
1998 libxl__xswait_state *xswa,
1999 int rc,
2000 const char *state)
2001 {
2002 pci_remove_state *prs = CONTAINER_OF(xswa, *prs, xswait);
2003 STATE_AO_GC(prs->aodev->ao);
2004
2005 /* Convenience aliases */
2006 libxl_domid domid = prs->domid;
2007 libxl_device_pci *const pci = &prs->pci;
2008
2009 rc = check_qemu_running(gc, domid, xswa, rc, state);
2010 if (rc == ERROR_NOT_READY)
2011 return;
2012 if (rc)
2013 goto out;
2014
2015 rc = qemu_pci_remove_xenstore(gc, domid, pci, prs->force);
2016
2017 out:
2018 pci_remove_detached(egc, prs, rc);
2019 }
2020
pci_remove_qmp_device_del(libxl__egc * egc,pci_remove_state * prs)2021 static void pci_remove_qmp_device_del(libxl__egc *egc,
2022 pci_remove_state *prs)
2023 {
2024 STATE_AO_GC(prs->aodev->ao);
2025 libxl__json_object *args = NULL;
2026 int rc;
2027
2028 /* Convenience aliases */
2029 libxl_device_pci *const pci = &prs->pci;
2030
2031 rc = libxl__ev_time_register_rel(ao, &prs->timeout,
2032 pci_remove_timeout,
2033 LIBXL_QMP_CMD_TIMEOUT * 1000);
2034 if (rc) goto out;
2035
2036 QMP_PARAMETERS_SPRINTF(&args, "id", PCI_PT_QDEV_ID,
2037 pci->bus, pci->dev, pci->func);
2038 prs->qmp.callback = pci_remove_qmp_device_del_cb;
2039 rc = libxl__ev_qmp_send(egc, &prs->qmp, "device_del", args);
2040 if (rc) goto out;
2041 return;
2042
2043 out:
2044 pci_remove_detached(egc, prs, rc);
2045 }
2046
pci_remove_qmp_device_del_cb(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * response,int rc)2047 static void pci_remove_qmp_device_del_cb(libxl__egc *egc,
2048 libxl__ev_qmp *qmp,
2049 const libxl__json_object *response,
2050 int rc)
2051 {
2052 EGC_GC;
2053 pci_remove_state *prs = CONTAINER_OF(qmp, *prs, qmp);
2054
2055 if (rc) goto out;
2056
2057 /* Now that the command is sent, we want to wait until QEMU has
2058 * confirmed that the device is removed. */
2059 /* TODO: Instead of using a poll loop { ev_timer ; query-pci }, it
2060 * could be possible to listen to events sent by QEMU via QMP in order
2061 * to wait for the passthrough pci-device to be removed from QEMU. */
2062 pci_remove_qmp_retry_timer_cb(egc, &prs->retry_timer, NULL,
2063 ERROR_TIMEDOUT);
2064 return;
2065
2066 out:
2067 pci_remove_detached(egc, prs, rc);
2068 }
2069
pci_remove_qmp_retry_timer_cb(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)2070 static void pci_remove_qmp_retry_timer_cb(libxl__egc *egc, libxl__ev_time *ev,
2071 const struct timeval *requested_abs,
2072 int rc)
2073 {
2074 EGC_GC;
2075 pci_remove_state *prs = CONTAINER_OF(ev, *prs, retry_timer);
2076
2077 prs->qmp.callback = pci_remove_qmp_query_cb;
2078 rc = libxl__ev_qmp_send(egc, &prs->qmp, "query-pci", NULL);
2079 if (rc) goto out;
2080 return;
2081
2082 out:
2083 pci_remove_detached(egc, prs, rc);
2084 }
2085
pci_remove_qmp_query_cb(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * response,int rc)2086 static void pci_remove_qmp_query_cb(libxl__egc *egc,
2087 libxl__ev_qmp *qmp,
2088 const libxl__json_object *response,
2089 int rc)
2090 {
2091 EGC_GC;
2092 pci_remove_state *prs = CONTAINER_OF(qmp, *prs, qmp);
2093 const libxl__json_object *bus = NULL;
2094 const char *asked_id;
2095 int i, j;
2096
2097 /* Convenience aliases */
2098 libxl__ao *const ao = prs->aodev->ao;
2099 libxl_device_pci *const pci = &prs->pci;
2100
2101 if (rc) goto out;
2102
2103 libxl__ev_qmp_dispose(gc, qmp);
2104
2105 asked_id = GCSPRINTF(PCI_PT_QDEV_ID,
2106 pci->bus, pci->dev, pci->func);
2107
2108 /* query-pci response:
2109 * [{ 'devices': [ 'qdev_id': 'str', ... ], ... }]
2110 * */
2111
2112 for (i = 0; (bus = libxl__json_array_get(response, i)); i++) {
2113 const libxl__json_object *devices = NULL;
2114 const libxl__json_object *device = NULL;
2115 const libxl__json_object *o = NULL;
2116 const char *id = NULL;
2117
2118 devices = libxl__json_map_get("devices", bus, JSON_ARRAY);
2119 if (!devices) {
2120 rc = ERROR_QEMU_API;
2121 goto out;
2122 }
2123
2124 for (j = 0; (device = libxl__json_array_get(devices, j)); j++) {
2125 o = libxl__json_map_get("qdev_id", device, JSON_STRING);
2126 if (!o) {
2127 rc = ERROR_QEMU_API;
2128 goto out;
2129 }
2130 id = libxl__json_object_get_string(o);
2131
2132 if (id && !strcmp(asked_id, id)) {
2133 /* Device still in QEMU, need to wait longuer. */
2134 rc = libxl__ev_time_register_rel(ao, &prs->retry_timer,
2135 pci_remove_qmp_retry_timer_cb, 1000);
2136 if (rc) goto out;
2137 return;
2138 }
2139 }
2140 }
2141
2142 out:
2143 pci_remove_detached(egc, prs, rc); /* must be last */
2144 }
2145
pci_remove_timeout(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)2146 static void pci_remove_timeout(libxl__egc *egc, libxl__ev_time *ev,
2147 const struct timeval *requested_abs,
2148 int rc)
2149 {
2150 EGC_GC;
2151 pci_remove_state *prs = CONTAINER_OF(ev, *prs, timeout);
2152
2153 /* Convenience aliases */
2154 libxl_device_pci *const pci = &prs->pci;
2155
2156 LOGD(WARN, prs->domid, "timed out waiting for DM to remove "
2157 PCI_PT_QDEV_ID, pci->bus, pci->dev, pci->func);
2158
2159 /* If we timed out, we might still want to keep destroying the device
2160 * (when force==true), so let the next function decide what to do on
2161 * error */
2162 pci_remove_detached(egc, prs, rc);
2163 }
2164
pci_remove_detached(libxl__egc * egc,pci_remove_state * prs,int rc)2165 static void pci_remove_detached(libxl__egc *egc,
2166 pci_remove_state *prs,
2167 int rc)
2168 {
2169 STATE_AO_GC(prs->aodev->ao);
2170 libxl_ctx *ctx = libxl__gc_owner(gc);
2171 unsigned int start = 0, end = 0, flags = 0, size = 0;
2172 int irq = 0, i, stubdomid = 0;
2173 const char *sysfs_path;
2174 FILE *f;
2175 uint32_t domainid = prs->domid;
2176 bool isstubdom;
2177
2178 /* Convenience aliases */
2179 libxl_device_pci *const pci = &prs->pci;
2180 libxl_domid domid = prs->domid;
2181
2182 /* Cleaning QMP states ASAP */
2183 libxl__ev_qmp_dispose(gc, &prs->qmp);
2184 libxl__ev_time_deregister(gc, &prs->timeout);
2185 libxl__ev_time_deregister(gc, &prs->retry_timer);
2186
2187 if (rc && !prs->force)
2188 goto out;
2189
2190 /* Revoke the permissions */
2191 sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/resource",
2192 pci->domain, pci->bus, pci->dev, pci->func);
2193
2194 f = fopen(sysfs_path, "r");
2195 if (f == NULL) {
2196 LOGED(ERROR, domid, "Couldn't open %s", sysfs_path);
2197 goto skip_bar;
2198 }
2199
2200 for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) {
2201 if (fscanf(f, "0x%x 0x%x 0x%x\n", &start, &end, &flags) != 3)
2202 continue;
2203 size = end - start + 1;
2204 if (start) {
2205 if (flags & PCI_BAR_IO) {
2206 rc = xc_domain_ioport_permission(ctx->xch, domid, start,
2207 size, 0);
2208 if (rc < 0)
2209 LOGED(ERROR, domid,
2210 "xc_domain_ioport_permission error 0x%x/0x%x",
2211 start,
2212 size);
2213 } else {
2214 rc = xc_domain_iomem_permission(ctx->xch, domid,
2215 start >> XC_PAGE_SHIFT,
2216 (size + (XC_PAGE_SIZE - 1)) >> XC_PAGE_SHIFT,
2217 0);
2218 if (rc < 0)
2219 LOGED(ERROR, domid,
2220 "xc_domain_iomem_permission error 0x%x/0x%x",
2221 start,
2222 size);
2223 }
2224 }
2225 }
2226 fclose(f);
2227
2228 skip_bar:
2229 if (!pci_supp_legacy_irq())
2230 goto skip_legacy_irq;
2231
2232 sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/irq", pci->domain,
2233 pci->bus, pci->dev, pci->func);
2234
2235 f = fopen(sysfs_path, "r");
2236 if (f == NULL) {
2237 LOGED(ERROR, domid, "Couldn't open %s", sysfs_path);
2238 goto skip_legacy_irq;
2239 }
2240
2241 if ((fscanf(f, "%u", &irq) == 1) && irq) {
2242 rc = xc_physdev_unmap_pirq(ctx->xch, domid, irq);
2243 if (rc < 0) {
2244 /*
2245 * QEMU may have already unmapped the IRQ. So the error
2246 * may be spurious. For now, still print an error message as
2247 * it is not easy to distinguished between valid and
2248 * spurious error.
2249 */
2250 LOGED(ERROR, domid, "xc_physdev_unmap_pirq irq=%d", irq);
2251 }
2252 rc = xc_domain_irq_permission(ctx->xch, domid, irq, 0);
2253 if (rc < 0) {
2254 LOGED(ERROR, domid, "xc_domain_irq_permission irq=%d", irq);
2255 }
2256 }
2257
2258 fclose(f);
2259
2260 skip_legacy_irq:
2261
2262 isstubdom = libxl_is_stubdom(CTX, domid, &domainid);
2263
2264 /* don't do multiple resets while some functions are still passed through */
2265 if ((pci->vdevfn & 0x7) == 0) {
2266 libxl__device_pci_reset(gc, pci->domain, pci->bus, pci->dev, pci->func);
2267 }
2268
2269 if (!isstubdom) {
2270 rc = xc_deassign_device(CTX->xch, domid, pci_encode_bdf(pci));
2271 if (rc < 0 && (prs->hvm || errno != ENOSYS))
2272 LOGED(ERROR, domainid, "xc_deassign_device failed");
2273 }
2274
2275 stubdomid = libxl_get_stubdom_id(CTX, domid);
2276 if (stubdomid != 0) {
2277 libxl_device_pci *pci_s;
2278 libxl__ao_device *const stubdom_aodev = &prs->stubdom_aodev;
2279
2280 GCNEW(pci_s);
2281 libxl_device_pci_init(pci_s);
2282 libxl_device_pci_copy(CTX, pci_s, pci);
2283
2284 libxl__prepare_ao_device(ao, stubdom_aodev);
2285 stubdom_aodev->action = LIBXL__DEVICE_ACTION_REMOVE;
2286 stubdom_aodev->callback = pci_remove_stubdom_done;
2287 stubdom_aodev->update_json = prs->aodev->update_json;
2288 libxl__device_pci_remove_common(egc, stubdomid, pci_s,
2289 prs->force, stubdom_aodev);
2290 return;
2291 }
2292
2293 rc = 0;
2294 out:
2295 pci_remove_done(egc, prs, rc);
2296 }
2297
pci_remove_stubdom_done(libxl__egc * egc,libxl__ao_device * aodev)2298 static void pci_remove_stubdom_done(libxl__egc *egc,
2299 libxl__ao_device *aodev)
2300 {
2301 pci_remove_state *prs = CONTAINER_OF(aodev, *prs, stubdom_aodev);
2302
2303 pci_remove_done(egc, prs, 0);
2304 }
2305
pci_remove_done(libxl__egc * egc,pci_remove_state * prs,int rc)2306 static void pci_remove_done(libxl__egc *egc,
2307 pci_remove_state *prs,
2308 int rc)
2309 {
2310 EGC_GC;
2311
2312 if (rc) goto out;
2313
2314 libxl__device_pci_remove_xenstore(gc, prs->domid, &prs->pci);
2315 out:
2316 device_pci_remove_common_next(egc, prs, rc);
2317 }
2318
libxl__device_pci_remove_common(libxl__egc * egc,uint32_t domid,libxl_device_pci * pci,bool force,libxl__ao_device * aodev)2319 static void libxl__device_pci_remove_common(libxl__egc *egc,
2320 uint32_t domid,
2321 libxl_device_pci *pci,
2322 bool force,
2323 libxl__ao_device *aodev)
2324 {
2325 STATE_AO_GC(aodev->ao);
2326 int rc;
2327 pci_remove_state *prs;
2328
2329 GCNEW(prs);
2330 prs->aodev = aodev;
2331 prs->domid = domid;
2332
2333 libxl_device_pci_copy(CTX, &prs->pci, pci);
2334 pci = &prs->pci;
2335
2336 /* If the device is named then we need to look up the BDF */
2337 if (pci->name) {
2338 rc = name2bdf(gc, pci);
2339 if (rc) goto out;
2340 }
2341
2342 prs->force = force;
2343 libxl__xswait_init(&prs->xswait);
2344 libxl__ev_qmp_init(&prs->qmp);
2345 prs->qmp.ao = prs->aodev->ao;
2346 prs->qmp.domid = prs->domid;
2347 prs->qmp.payload_fd = -1;
2348 libxl__ev_time_init(&prs->timeout);
2349 libxl__ev_time_init(&prs->retry_timer);
2350
2351 prs->orig_vdev = pci->vdevfn & ~7U;
2352
2353 if ( pci->vfunc_mask == LIBXL_PCI_FUNC_ALL ) {
2354 if ( pci_multifunction_check(gc, pci, &prs->pfunc_mask) ) {
2355 rc = ERROR_FAIL;
2356 goto out;
2357 }
2358 pci->vfunc_mask &= prs->pfunc_mask;
2359 } else {
2360 prs->pfunc_mask = (1 << pci->func);
2361 }
2362
2363 rc = 0;
2364 prs->next_func = 7;
2365 out:
2366 device_pci_remove_common_next(egc, prs, rc);
2367 }
2368
device_pci_remove_common_next(libxl__egc * egc,pci_remove_state * prs,int rc)2369 static void device_pci_remove_common_next(libxl__egc *egc,
2370 pci_remove_state *prs,
2371 int rc)
2372 {
2373 EGC_GC;
2374
2375 /* Convenience aliases */
2376 libxl_device_pci *const pci = &prs->pci;
2377 libxl__ao_device *const aodev = prs->aodev;
2378 const unsigned int pfunc_mask = prs->pfunc_mask;
2379 const unsigned int orig_vdev = prs->orig_vdev;
2380
2381 if (rc) goto out;
2382
2383 while (prs->next_func >= 0) {
2384 const int i = prs->next_func;
2385 prs->next_func--;
2386 if ( (1 << i) & pfunc_mask ) {
2387 if ( pci->vfunc_mask == pfunc_mask ) {
2388 pci->func = i;
2389 pci->vdevfn = orig_vdev | i;
2390 } else {
2391 pci->vdevfn = orig_vdev;
2392 }
2393 do_pci_remove(egc, prs);
2394 return;
2395 }
2396 }
2397
2398 rc = 0;
2399 out:
2400 libxl__ev_qmp_dispose(gc, &prs->qmp);
2401 libxl__xswait_stop(gc, &prs->xswait);
2402 libxl__ev_time_deregister(gc, &prs->timeout);
2403 libxl__ev_time_deregister(gc, &prs->retry_timer);
2404
2405 if (!rc) pci_info_xs_remove(gc, pci, "domid");
2406
2407 libxl_device_pci_dispose(pci);
2408 aodev->rc = rc;
2409 aodev->callback(egc, aodev);
2410 }
2411
libxl_device_pci_remove(libxl_ctx * ctx,uint32_t domid,libxl_device_pci * pci,const libxl_asyncop_how * ao_how)2412 int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid,
2413 libxl_device_pci *pci,
2414 const libxl_asyncop_how *ao_how)
2415
2416 {
2417 AO_CREATE(ctx, domid, ao_how);
2418 libxl__ao_device *aodev;
2419
2420 GCNEW(aodev);
2421 libxl__prepare_ao_device(ao, aodev);
2422 aodev->action = LIBXL__DEVICE_ACTION_REMOVE;
2423 aodev->callback = device_addrm_aocomplete;
2424 aodev->update_json = true;
2425 libxl__device_pci_remove_common(egc, domid, pci, false, aodev);
2426 return AO_INPROGRESS;
2427 }
2428
libxl_device_pci_destroy(libxl_ctx * ctx,uint32_t domid,libxl_device_pci * pci,const libxl_asyncop_how * ao_how)2429 int libxl_device_pci_destroy(libxl_ctx *ctx, uint32_t domid,
2430 libxl_device_pci *pci,
2431 const libxl_asyncop_how *ao_how)
2432 {
2433 AO_CREATE(ctx, domid, ao_how);
2434 libxl__ao_device *aodev;
2435
2436 GCNEW(aodev);
2437 libxl__prepare_ao_device(ao, aodev);
2438 aodev->action = LIBXL__DEVICE_ACTION_REMOVE;
2439 aodev->callback = device_addrm_aocomplete;
2440 aodev->update_json = true;
2441 libxl__device_pci_remove_common(egc, domid, pci, true, aodev);
2442 return AO_INPROGRESS;
2443 }
2444
libxl__device_pci_from_xs_be(libxl__gc * gc,const char * be_path,libxl_devid nr,void * data)2445 static int libxl__device_pci_from_xs_be(libxl__gc *gc,
2446 const char *be_path,
2447 libxl_devid nr, void *data)
2448 {
2449 char *s;
2450 unsigned int domain = 0, bus = 0, dev = 0, func = 0;
2451 libxl_device_pci *pci = data;
2452
2453 libxl_device_pci_init(pci);
2454
2455 s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/dev-%d", be_path, nr));
2456 sscanf(s, PCI_BDF, &domain, &bus, &dev, &func);
2457
2458 pci_struct_fill(pci, domain, bus, dev, func);
2459
2460 s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/vdevfn-%d", be_path, nr));
2461 if (s)
2462 pci->vdevfn = strtol(s, (char **) NULL, 16);
2463
2464 s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/name-%d", be_path, nr));
2465 if (s)
2466 pci->name = strdup(s);
2467
2468 s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/opts-%d", be_path, nr));
2469 if (s) {
2470 char *saveptr;
2471 char *p = strtok_r(s, ",=", &saveptr);
2472 do {
2473 while (*p == ' ')
2474 p++;
2475 if (!strcmp(p, "msitranslate")) {
2476 p = strtok_r(NULL, ",=", &saveptr);
2477 pci->msitranslate = atoi(p);
2478 } else if (!strcmp(p, "power_mgmt")) {
2479 p = strtok_r(NULL, ",=", &saveptr);
2480 pci->power_mgmt = atoi(p);
2481 } else if (!strcmp(p, "permissive")) {
2482 p = strtok_r(NULL, ",=", &saveptr);
2483 pci->permissive = atoi(p);
2484 } else if (!strcmp(p, "rdm_policy")) {
2485 p = strtok_r(NULL, ",=", &saveptr);
2486 libxl_rdm_reserve_policy_from_string(p, &pci->rdm_policy);
2487 }
2488 } while ((p = strtok_r(NULL, ",=", &saveptr)) != NULL);
2489 }
2490
2491 return 0;
2492 }
2493
libxl__device_pci_get_num(libxl__gc * gc,const char * be_path,unsigned int * num)2494 static int libxl__device_pci_get_num(libxl__gc *gc, const char *be_path,
2495 unsigned int *num)
2496 {
2497 char *num_devs;
2498 int rc = 0;
2499
2500 num_devs = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/num_devs", be_path));
2501 if (!num_devs)
2502 rc = ERROR_FAIL;
2503 else
2504 *num = atoi(num_devs);
2505
2506 return rc;
2507 }
2508
libxl__device_pci_get_path(libxl__gc * gc,uint32_t domid,char ** path)2509 static int libxl__device_pci_get_path(libxl__gc *gc, uint32_t domid,
2510 char **path)
2511 {
2512 *path = libxl__domain_device_backend_path(gc, 0, domid, 0,
2513 LIBXL__DEVICE_KIND_PCI);
2514
2515 return 0;
2516 }
2517
libxl__device_pci_destroy_all(libxl__egc * egc,uint32_t domid,libxl__multidev * multidev)2518 void libxl__device_pci_destroy_all(libxl__egc *egc, uint32_t domid,
2519 libxl__multidev *multidev)
2520 {
2521 STATE_AO_GC(multidev->ao);
2522 libxl_device_pci *pcis;
2523 int num, i;
2524
2525 pcis = libxl_device_pci_list(CTX, domid, &num);
2526 if ( pcis == NULL )
2527 return;
2528
2529 for (i = 0; i < num; i++) {
2530 /* Force remove on shutdown since, on HVM, qemu will not always
2531 * respond to SCI interrupt because the guest kernel has shut down the
2532 * devices by the time we even get here!
2533 */
2534 libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
2535 libxl__device_pci_remove_common(egc, domid, pcis + i, true,
2536 aodev);
2537 }
2538
2539 libxl_device_pci_list_free(pcis, num);
2540 }
2541
libxl__grant_vga_iomem_permission(libxl__gc * gc,const uint32_t domid,libxl_domain_config * const d_config)2542 int libxl__grant_vga_iomem_permission(libxl__gc *gc, const uint32_t domid,
2543 libxl_domain_config *const d_config)
2544 {
2545 int i, ret;
2546
2547 if (!libxl_defbool_val(d_config->b_info.u.hvm.gfx_passthru))
2548 return 0;
2549
2550 for (i = 0 ; i < d_config->num_pcidevs ; i++) {
2551 uint64_t vga_iomem_start = 0xa0000 >> XC_PAGE_SHIFT;
2552 uint32_t stubdom_domid;
2553 libxl_device_pci *pci = &d_config->pcidevs[i];
2554 unsigned long pci_device_class;
2555
2556 if (sysfs_dev_get_class(gc, pci, &pci_device_class))
2557 continue;
2558 if (pci_device_class != 0x030000) /* VGA class */
2559 continue;
2560
2561 stubdom_domid = libxl_get_stubdom_id(CTX, domid);
2562 ret = xc_domain_iomem_permission(CTX->xch, stubdom_domid,
2563 vga_iomem_start, 0x20, 1);
2564 if (ret < 0) {
2565 LOGED(ERROR, domid,
2566 "failed to give stubdom%d access to iomem range "
2567 "%"PRIx64"-%"PRIx64" for VGA passthru",
2568 stubdom_domid,
2569 vga_iomem_start, (vga_iomem_start + 0x20 - 1));
2570 return ret;
2571 }
2572 ret = xc_domain_iomem_permission(CTX->xch, domid,
2573 vga_iomem_start, 0x20, 1);
2574 if (ret < 0) {
2575 LOGED(ERROR, domid,
2576 "failed to give dom%d access to iomem range "
2577 "%"PRIx64"-%"PRIx64" for VGA passthru",
2578 domid, vga_iomem_start, (vga_iomem_start + 0x20 - 1));
2579 return ret;
2580 }
2581 break;
2582 }
2583
2584 return 0;
2585 }
2586
libxl_device_pci_compare(const libxl_device_pci * d1,const libxl_device_pci * d2)2587 static int libxl_device_pci_compare(const libxl_device_pci *d1,
2588 const libxl_device_pci *d2)
2589 {
2590 return COMPARE_PCI(d1, d2);
2591 }
2592
2593 LIBXL_DEFINE_DEVICE_LIST(pci)
2594
2595 #define libxl__device_pci_update_devid NULL
2596
2597 DEFINE_DEVICE_TYPE_STRUCT(pci, PCI, pcidevs,
2598 .get_num = libxl__device_pci_get_num,
2599 .get_path = libxl__device_pci_get_path,
2600 .from_xenstore = libxl__device_pci_from_xs_be,
2601 );
2602
2603 /*
2604 * Local variables:
2605 * mode: C
2606 * c-basic-offset: 4
2607 * indent-tabs-mode: nil
2608 * End:
2609 */
2610