1 /*
2 * Copyright (C) 2009 Citrix Ltd.
3 * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4 * Author Stefano Stabellini <stefano.stabellini@eu.citrix.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; version 2.1 only. with the special
9 * exception on linking described in file LICENSE.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 */
16
17 #include "libxl_osdeps.h" /* must come before any other headers */
18
19 #include "libxl_internal.h"
20
21 #define PCI_BDF "%04x:%02x:%02x.%01x"
22 #define PCI_BDF_SHORT "%02x:%02x.%01x"
23 #define PCI_BDF_VDEVFN "%04x:%02x:%02x.%01x@%02x"
24 #define PCI_OPTIONS "msitranslate=%d,power_mgmt=%d"
25 #define PCI_BDF_XSPATH "%04x-%02x-%02x-%01x"
26
pcidev_encode_bdf(libxl_device_pci * pcidev)27 static unsigned int pcidev_encode_bdf(libxl_device_pci *pcidev)
28 {
29 unsigned int value;
30
31 value = pcidev->domain << 16;
32 value |= (pcidev->bus & 0xff) << 8;
33 value |= (pcidev->dev & 0x1f) << 3;
34 value |= (pcidev->func & 0x7);
35
36 return value;
37 }
38
pcidev_struct_fill(libxl_device_pci * pcidev,unsigned int domain,unsigned int bus,unsigned int dev,unsigned int func,unsigned int vdevfn)39 static void pcidev_struct_fill(libxl_device_pci *pcidev, unsigned int domain,
40 unsigned int bus, unsigned int dev,
41 unsigned int func, unsigned int vdevfn)
42 {
43 pcidev->domain = domain;
44 pcidev->bus = bus;
45 pcidev->dev = dev;
46 pcidev->func = func;
47 pcidev->vdevfn = vdevfn;
48 }
49
libxl_create_pci_backend_device(libxl__gc * gc,flexarray_t * back,int num,libxl_device_pci * pcidev)50 static void libxl_create_pci_backend_device(libxl__gc *gc, flexarray_t *back, int num, libxl_device_pci *pcidev)
51 {
52 flexarray_append(back, GCSPRINTF("key-%d", num));
53 flexarray_append(back, GCSPRINTF(PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func));
54 flexarray_append(back, GCSPRINTF("dev-%d", num));
55 flexarray_append(back, GCSPRINTF(PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func));
56 if (pcidev->vdevfn)
57 flexarray_append_pair(back, GCSPRINTF("vdevfn-%d", num), GCSPRINTF("%x", pcidev->vdevfn));
58 flexarray_append(back, GCSPRINTF("opts-%d", num));
59 flexarray_append(back,
60 GCSPRINTF("msitranslate=%d,power_mgmt=%d,permissive=%d",
61 pcidev->msitranslate, pcidev->power_mgmt,
62 pcidev->permissive));
63 flexarray_append_pair(back, GCSPRINTF("state-%d", num), GCSPRINTF("%d", XenbusStateInitialising));
64 }
65
libxl__device_from_pcidev(libxl__gc * gc,uint32_t domid,libxl_device_pci * pcidev,libxl__device * device)66 static void libxl__device_from_pcidev(libxl__gc *gc, uint32_t domid,
67 libxl_device_pci *pcidev,
68 libxl__device *device)
69 {
70 device->backend_devid = 0;
71 device->backend_domid = 0;
72 device->backend_kind = LIBXL__DEVICE_KIND_PCI;
73 device->devid = 0;
74 device->domid = domid;
75 device->kind = LIBXL__DEVICE_KIND_PCI;
76 }
77
libxl__create_pci_backend(libxl__gc * gc,uint32_t domid,libxl_device_pci * pcidev,int num)78 int libxl__create_pci_backend(libxl__gc *gc, uint32_t domid,
79 libxl_device_pci *pcidev, int num)
80 {
81 flexarray_t *front = NULL;
82 flexarray_t *back = NULL;
83 libxl__device device;
84 int i;
85
86 front = flexarray_make(gc, 16, 1);
87 back = flexarray_make(gc, 16, 1);
88
89 LOGD(DEBUG, domid, "Creating pci backend");
90
91 /* add pci device */
92 libxl__device_from_pcidev(gc, domid, pcidev, &device);
93
94 flexarray_append_pair(back, "frontend-id", GCSPRINTF("%d", domid));
95 flexarray_append_pair(back, "online", "1");
96 flexarray_append_pair(back, "state", GCSPRINTF("%d", XenbusStateInitialising));
97 flexarray_append_pair(back, "domain", libxl__domid_to_name(gc, domid));
98
99 for (i = 0; i < num; i++, pcidev++)
100 libxl_create_pci_backend_device(gc, back, i, pcidev);
101
102 flexarray_append_pair(back, "num_devs", GCSPRINTF("%d", num));
103 flexarray_append_pair(front, "backend-id", GCSPRINTF("%d", 0));
104 flexarray_append_pair(front, "state", GCSPRINTF("%d", XenbusStateInitialising));
105
106 return libxl__device_generic_add(gc, XBT_NULL, &device,
107 libxl__xs_kvs_of_flexarray(gc, back),
108 libxl__xs_kvs_of_flexarray(gc, front),
109 NULL);
110 }
111
libxl__device_pci_add_xenstore(libxl__gc * gc,uint32_t domid,libxl_device_pci * pcidev,int starting)112 static int libxl__device_pci_add_xenstore(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev, int starting)
113 {
114 flexarray_t *back;
115 char *num_devs, *be_path;
116 int num = 0;
117 xs_transaction_t t = XBT_NULL;
118 libxl__device *device;
119 int rc;
120 libxl_domain_config d_config;
121 libxl_device_pci pcidev_saved;
122 libxl__domain_userdata_lock *lock = NULL;
123
124 libxl_domain_config_init(&d_config);
125 libxl_device_pci_init(&pcidev_saved);
126 libxl_device_pci_copy(CTX, &pcidev_saved, pcidev);
127
128 be_path = GCSPRINTF("%s/backend/pci/%d/0", libxl__xs_get_dompath(gc, 0), domid);
129 num_devs = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/num_devs", be_path));
130 if (!num_devs)
131 return libxl__create_pci_backend(gc, domid, pcidev, 1);
132
133 libxl_domain_type domtype = libxl__domain_type(gc, domid);
134 if (domtype == LIBXL_DOMAIN_TYPE_INVALID)
135 return ERROR_FAIL;
136
137 if (!starting && domtype == LIBXL_DOMAIN_TYPE_PV) {
138 if (libxl__wait_for_backend(gc, be_path, GCSPRINTF("%d", XenbusStateConnected)) < 0)
139 return ERROR_FAIL;
140 }
141
142 back = flexarray_make(gc, 16, 1);
143
144 LOGD(DEBUG, domid, "Adding new pci device to xenstore");
145 num = atoi(num_devs);
146 libxl_create_pci_backend_device(gc, back, num, pcidev);
147 flexarray_append_pair(back, "num_devs", GCSPRINTF("%d", num + 1));
148 if (!starting)
149 flexarray_append_pair(back, "state", GCSPRINTF("%d", XenbusStateReconfiguring));
150
151 GCNEW(device);
152 libxl__device_from_pcidev(gc, domid, pcidev, device);
153
154 lock = libxl__lock_domain_userdata(gc, domid);
155 if (!lock) {
156 rc = ERROR_LOCK_FAIL;
157 goto out;
158 }
159
160 rc = libxl__get_domain_configuration(gc, domid, &d_config);
161 if (rc) goto out;
162
163 device_add_domain_config(gc, &d_config, &libxl__pcidev_devtype,
164 &pcidev_saved);
165
166 rc = libxl__dm_check_start(gc, &d_config, domid);
167 if (rc) goto out;
168
169 for (;;) {
170 rc = libxl__xs_transaction_start(gc, &t);
171 if (rc) goto out;
172
173 rc = libxl__set_domain_configuration(gc, domid, &d_config);
174 if (rc) goto out;
175
176 libxl__xs_writev(gc, t, be_path, libxl__xs_kvs_of_flexarray(gc, back));
177
178 rc = libxl__xs_transaction_commit(gc, &t);
179 if (!rc) break;
180 if (rc < 0) goto out;
181 }
182
183 out:
184 libxl__xs_transaction_abort(gc, &t);
185 if (lock) libxl__unlock_domain_userdata(lock);
186 libxl_device_pci_dispose(&pcidev_saved);
187 libxl_domain_config_dispose(&d_config);
188 return rc;
189 }
190
libxl__device_pci_remove_xenstore(libxl__gc * gc,uint32_t domid,libxl_device_pci * pcidev)191 static int libxl__device_pci_remove_xenstore(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev)
192 {
193 libxl_ctx *ctx = libxl__gc_owner(gc);
194 char *be_path, *num_devs_path, *num_devs, *xsdev, *tmp, *tmppath;
195 int num, i, j;
196 xs_transaction_t t;
197
198 be_path = GCSPRINTF("%s/backend/pci/%d/0", libxl__xs_get_dompath(gc, 0), domid);
199 num_devs_path = GCSPRINTF("%s/num_devs", be_path);
200 num_devs = libxl__xs_read(gc, XBT_NULL, num_devs_path);
201 if (!num_devs)
202 return ERROR_INVAL;
203 num = atoi(num_devs);
204
205 libxl_domain_type domtype = libxl__domain_type(gc, domid);
206 if (domtype == LIBXL_DOMAIN_TYPE_INVALID)
207 return ERROR_FAIL;
208
209 if (domtype == LIBXL_DOMAIN_TYPE_PV) {
210 if (libxl__wait_for_backend(gc, be_path, GCSPRINTF("%d", XenbusStateConnected)) < 0) {
211 LOGD(DEBUG, domid, "pci backend at %s is not ready", be_path);
212 return ERROR_FAIL;
213 }
214 }
215
216 for (i = 0; i < num; i++) {
217 unsigned int domain = 0, bus = 0, dev = 0, func = 0;
218 xsdev = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/dev-%d", be_path, i));
219 sscanf(xsdev, PCI_BDF, &domain, &bus, &dev, &func);
220 if (domain == pcidev->domain && bus == pcidev->bus &&
221 pcidev->dev == dev && pcidev->func == func) {
222 break;
223 }
224 }
225 if (i == num) {
226 LOGD(ERROR, domid, "Couldn't find the device on xenstore");
227 return ERROR_INVAL;
228 }
229
230 retry_transaction:
231 t = xs_transaction_start(ctx->xsh);
232 xs_write(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, i), GCSPRINTF("%d", XenbusStateClosing), 1);
233 xs_write(ctx->xsh, t, GCSPRINTF("%s/state", be_path), GCSPRINTF("%d", XenbusStateReconfiguring), 1);
234 if (!xs_transaction_end(ctx->xsh, t, 0))
235 if (errno == EAGAIN)
236 goto retry_transaction;
237
238 if (domtype == LIBXL_DOMAIN_TYPE_PV) {
239 if (libxl__wait_for_backend(gc, be_path, GCSPRINTF("%d", XenbusStateConnected)) < 0) {
240 LOGD(DEBUG, domid, "pci backend at %s is not ready", be_path);
241 return ERROR_FAIL;
242 }
243 }
244
245 retry_transaction2:
246 t = xs_transaction_start(ctx->xsh);
247 xs_rm(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, i));
248 xs_rm(ctx->xsh, t, GCSPRINTF("%s/key-%d", be_path, i));
249 xs_rm(ctx->xsh, t, GCSPRINTF("%s/dev-%d", be_path, i));
250 xs_rm(ctx->xsh, t, GCSPRINTF("%s/vdev-%d", be_path, i));
251 xs_rm(ctx->xsh, t, GCSPRINTF("%s/opts-%d", be_path, i));
252 xs_rm(ctx->xsh, t, GCSPRINTF("%s/vdevfn-%d", be_path, i));
253 libxl__xs_printf(gc, t, num_devs_path, "%d", num - 1);
254 for (j = i + 1; j < num; j++) {
255 tmppath = GCSPRINTF("%s/state-%d", be_path, j);
256 tmp = libxl__xs_read(gc, t, tmppath);
257 xs_write(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, j - 1), tmp, strlen(tmp));
258 xs_rm(ctx->xsh, t, tmppath);
259 tmppath = GCSPRINTF("%s/dev-%d", be_path, j);
260 tmp = libxl__xs_read(gc, t, tmppath);
261 xs_write(ctx->xsh, t, GCSPRINTF("%s/dev-%d", be_path, j - 1), tmp, strlen(tmp));
262 xs_rm(ctx->xsh, t, tmppath);
263 tmppath = GCSPRINTF("%s/key-%d", be_path, j);
264 tmp = libxl__xs_read(gc, t, tmppath);
265 xs_write(ctx->xsh, t, GCSPRINTF("%s/key-%d", be_path, j - 1), tmp, strlen(tmp));
266 xs_rm(ctx->xsh, t, tmppath);
267 tmppath = GCSPRINTF("%s/vdev-%d", be_path, j);
268 tmp = libxl__xs_read(gc, t, tmppath);
269 if (tmp) {
270 xs_write(ctx->xsh, t, GCSPRINTF("%s/vdev-%d", be_path, j - 1), tmp, strlen(tmp));
271 xs_rm(ctx->xsh, t, tmppath);
272 }
273 tmppath = GCSPRINTF("%s/opts-%d", be_path, j);
274 tmp = libxl__xs_read(gc, t, tmppath);
275 if (tmp) {
276 xs_write(ctx->xsh, t, GCSPRINTF("%s/opts-%d", be_path, j - 1), tmp, strlen(tmp));
277 xs_rm(ctx->xsh, t, tmppath);
278 }
279 tmppath = GCSPRINTF("%s/vdevfn-%d", be_path, j);
280 tmp = libxl__xs_read(gc, t, tmppath);
281 if (tmp) {
282 xs_write(ctx->xsh, t, GCSPRINTF("%s/vdevfn-%d", be_path, j - 1), tmp, strlen(tmp));
283 xs_rm(ctx->xsh, t, tmppath);
284 }
285 }
286 if (!xs_transaction_end(ctx->xsh, t, 0))
287 if (errno == EAGAIN)
288 goto retry_transaction2;
289
290 if (num == 1) {
291 libxl__device dev;
292 if (libxl__parse_backend_path(gc, be_path, &dev) != 0)
293 return ERROR_FAIL;
294
295 dev.domid = domid;
296 dev.kind = LIBXL__DEVICE_KIND_PCI;
297 dev.devid = 0;
298
299 libxl__device_destroy(gc, &dev);
300 return 0;
301 }
302
303 return 0;
304 }
305
get_all_assigned_devices(libxl__gc * gc,libxl_device_pci ** list,int * num)306 static int get_all_assigned_devices(libxl__gc *gc, libxl_device_pci **list, int *num)
307 {
308 char **domlist;
309 unsigned int nd = 0, i;
310
311 *list = NULL;
312 *num = 0;
313
314 domlist = libxl__xs_directory(gc, XBT_NULL, "/local/domain", &nd);
315 for(i = 0; i < nd; i++) {
316 char *path, *num_devs;
317
318 path = GCSPRINTF("/local/domain/0/backend/pci/%s/0/num_devs", domlist[i]);
319 num_devs = libxl__xs_read(gc, XBT_NULL, path);
320 if ( num_devs ) {
321 int ndev = atoi(num_devs), j;
322 char *devpath, *bdf;
323
324 for(j = 0; j < ndev; j++) {
325 devpath = GCSPRINTF("/local/domain/0/backend/pci/%s/0/dev-%u",
326 domlist[i], j);
327 bdf = libxl__xs_read(gc, XBT_NULL, devpath);
328 if ( bdf ) {
329 unsigned dom, bus, dev, func;
330 if ( sscanf(bdf, PCI_BDF, &dom, &bus, &dev, &func) != 4 )
331 continue;
332
333 *list = realloc(*list, sizeof(libxl_device_pci) * ((*num) + 1));
334 if (*list == NULL)
335 return ERROR_NOMEM;
336 pcidev_struct_fill(*list + *num, dom, bus, dev, func, 0);
337 (*num)++;
338 }
339 }
340 }
341 }
342 libxl__ptr_add(gc, *list);
343
344 return 0;
345 }
346
is_pcidev_in_array(libxl_device_pci * assigned,int num_assigned,int dom,int bus,int dev,int func)347 static int is_pcidev_in_array(libxl_device_pci *assigned, int num_assigned,
348 int dom, int bus, int dev, int func)
349 {
350 int i;
351
352 for(i = 0; i < num_assigned; i++) {
353 if ( assigned[i].domain != dom )
354 continue;
355 if ( assigned[i].bus != bus )
356 continue;
357 if ( assigned[i].dev != dev )
358 continue;
359 if ( assigned[i].func != func )
360 continue;
361 return 1;
362 }
363
364 return 0;
365 }
366
367 /* Write the standard BDF into the sysfs path given by sysfs_path. */
sysfs_write_bdf(libxl__gc * gc,const char * sysfs_path,libxl_device_pci * pcidev)368 static int sysfs_write_bdf(libxl__gc *gc, const char * sysfs_path,
369 libxl_device_pci *pcidev)
370 {
371 int rc, fd;
372 char *buf;
373
374 fd = open(sysfs_path, O_WRONLY);
375 if (fd < 0) {
376 LOGE(ERROR, "Couldn't open %s", sysfs_path);
377 return ERROR_FAIL;
378 }
379
380 buf = GCSPRINTF(PCI_BDF, pcidev->domain, pcidev->bus,
381 pcidev->dev, pcidev->func);
382 rc = write(fd, buf, strlen(buf));
383 /* Annoying to have two if's, but we need the errno */
384 if (rc < 0)
385 LOGE(ERROR, "write to %s returned %d", sysfs_path, rc);
386 close(fd);
387
388 if (rc < 0)
389 return ERROR_FAIL;
390
391 return 0;
392 }
393
libxl_device_pci_assignable_list(libxl_ctx * ctx,int * num)394 libxl_device_pci *libxl_device_pci_assignable_list(libxl_ctx *ctx, int *num)
395 {
396 GC_INIT(ctx);
397 libxl_device_pci *pcidevs = NULL, *new, *assigned;
398 struct dirent *de;
399 DIR *dir;
400 int r, num_assigned;
401
402 *num = 0;
403
404 r = get_all_assigned_devices(gc, &assigned, &num_assigned);
405 if (r) goto out;
406
407 dir = opendir(SYSFS_PCIBACK_DRIVER);
408 if (NULL == dir) {
409 if (errno == ENOENT) {
410 LOG(ERROR, "Looks like pciback driver not loaded");
411 } else {
412 LOGE(ERROR, "Couldn't open %s", SYSFS_PCIBACK_DRIVER);
413 }
414 goto out;
415 }
416
417 while((de = readdir(dir))) {
418 unsigned dom, bus, dev, func;
419 if (sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4)
420 continue;
421
422 if (is_pcidev_in_array(assigned, num_assigned, dom, bus, dev, func))
423 continue;
424
425 new = realloc(pcidevs, ((*num) + 1) * sizeof(*new));
426 if (NULL == new)
427 continue;
428
429 pcidevs = new;
430 new = pcidevs + *num;
431
432 memset(new, 0, sizeof(*new));
433 pcidev_struct_fill(new, dom, bus, dev, func, 0);
434 (*num)++;
435 }
436
437 closedir(dir);
438 out:
439 GC_FREE;
440 return pcidevs;
441 }
442
443 /* Unbind device from its current driver, if any. If driver_path is non-NULL,
444 * store the path to the original driver in it. */
sysfs_dev_unbind(libxl__gc * gc,libxl_device_pci * pcidev,char ** driver_path)445 static int sysfs_dev_unbind(libxl__gc *gc, libxl_device_pci *pcidev,
446 char **driver_path)
447 {
448 char * spath, *dp = NULL;
449 struct stat st;
450
451 spath = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/driver",
452 pcidev->domain,
453 pcidev->bus,
454 pcidev->dev,
455 pcidev->func);
456 if ( !lstat(spath, &st) ) {
457 /* Find the canonical path to the driver. */
458 dp = libxl__zalloc(gc, PATH_MAX);
459 dp = realpath(spath, dp);
460 if ( !dp ) {
461 LOGE(ERROR, "realpath() failed");
462 return -1;
463 }
464
465 LOG(DEBUG, "Driver re-plug path: %s", dp);
466
467 /* Unbind from the old driver */
468 spath = GCSPRINTF("%s/unbind", dp);
469 if ( sysfs_write_bdf(gc, spath, pcidev) < 0 ) {
470 LOGE(ERROR, "Couldn't unbind device");
471 return -1;
472 }
473 }
474
475 if ( driver_path )
476 *driver_path = dp;
477
478 return 0;
479 }
480
sysfs_dev_get_vendor(libxl__gc * gc,libxl_device_pci * pcidev)481 static uint16_t sysfs_dev_get_vendor(libxl__gc *gc, libxl_device_pci *pcidev)
482 {
483 char *pci_device_vendor_path =
484 GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/vendor",
485 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
486 uint16_t read_items;
487 uint16_t pci_device_vendor;
488
489 FILE *f = fopen(pci_device_vendor_path, "r");
490 if (!f) {
491 LOGE(ERROR,
492 "pci device "PCI_BDF" does not have vendor attribute",
493 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
494 return 0xffff;
495 }
496 read_items = fscanf(f, "0x%hx\n", &pci_device_vendor);
497 fclose(f);
498 if (read_items != 1) {
499 LOGE(ERROR,
500 "cannot read vendor of pci device "PCI_BDF,
501 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
502 return 0xffff;
503 }
504
505 return pci_device_vendor;
506 }
507
sysfs_dev_get_device(libxl__gc * gc,libxl_device_pci * pcidev)508 static uint16_t sysfs_dev_get_device(libxl__gc *gc, libxl_device_pci *pcidev)
509 {
510 char *pci_device_device_path =
511 GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/device",
512 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
513 uint16_t read_items;
514 uint16_t pci_device_device;
515
516 FILE *f = fopen(pci_device_device_path, "r");
517 if (!f) {
518 LOGE(ERROR,
519 "pci device "PCI_BDF" does not have device attribute",
520 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
521 return 0xffff;
522 }
523 read_items = fscanf(f, "0x%hx\n", &pci_device_device);
524 fclose(f);
525 if (read_items != 1) {
526 LOGE(ERROR,
527 "cannot read device of pci device "PCI_BDF,
528 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
529 return 0xffff;
530 }
531
532 return pci_device_device;
533 }
534
sysfs_dev_get_class(libxl__gc * gc,libxl_device_pci * pcidev,unsigned long * class)535 static int sysfs_dev_get_class(libxl__gc *gc, libxl_device_pci *pcidev,
536 unsigned long *class)
537 {
538 char *pci_device_class_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/class",
539 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
540 int read_items, ret = 0;
541
542 FILE *f = fopen(pci_device_class_path, "r");
543 if (!f) {
544 LOGE(ERROR,
545 "pci device "PCI_BDF" does not have class attribute",
546 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
547 ret = ERROR_FAIL;
548 goto out;
549 }
550 read_items = fscanf(f, "0x%lx\n", class);
551 fclose(f);
552 if (read_items != 1) {
553 LOGE(ERROR,
554 "cannot read class of pci device "PCI_BDF,
555 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
556 ret = ERROR_FAIL;
557 }
558
559 out:
560 return ret;
561 }
562
563 /*
564 * Some devices may need some ways to work well. Here like IGD,
565 * we have to pass a specific option to qemu.
566 */
libxl__is_igd_vga_passthru(libxl__gc * gc,const libxl_domain_config * d_config)567 bool libxl__is_igd_vga_passthru(libxl__gc *gc,
568 const libxl_domain_config *d_config)
569 {
570 unsigned int i;
571 uint16_t pt_vendor, pt_device;
572 unsigned long class;
573
574 for (i = 0 ; i < d_config->num_pcidevs ; i++) {
575 libxl_device_pci *pcidev = &d_config->pcidevs[i];
576 pt_vendor = sysfs_dev_get_vendor(gc, pcidev);
577 pt_device = sysfs_dev_get_device(gc, pcidev);
578
579 if (pt_vendor == 0xffff || pt_device == 0xffff ||
580 pt_vendor != 0x8086)
581 continue;
582
583 if (sysfs_dev_get_class(gc, pcidev, &class))
584 continue;
585 if (class == 0x030000)
586 return true;
587 }
588
589 return false;
590 }
591
592 /*
593 * A brief comment about slots. I don't know what slots are for; however,
594 * I have by experimentation determined:
595 * - Before a device can be bound to pciback, its BDF must first be listed
596 * in pciback/slots
597 * - The way to get the BDF listed there is to write BDF to
598 * pciback/new_slot
599 * - Writing the same BDF to pciback/new_slot is not idempotent; it results
600 * in two entries of the BDF in pciback/slots
601 * It's not clear whether having two entries in pciback/slots is a problem
602 * or not. Just to be safe, this code does the conservative thing, and
603 * first checks to see if there is a slot, adding one only if one does not
604 * already exist.
605 */
606
607 /* Scan through /sys/.../pciback/slots looking for pcidev's BDF */
pciback_dev_has_slot(libxl__gc * gc,libxl_device_pci * pcidev)608 static int pciback_dev_has_slot(libxl__gc *gc, libxl_device_pci *pcidev)
609 {
610 FILE *f;
611 int rc = 0;
612 unsigned dom, bus, dev, func;
613
614 f = fopen(SYSFS_PCIBACK_DRIVER"/slots", "r");
615
616 if (f == NULL) {
617 LOGE(ERROR, "Couldn't open %s", SYSFS_PCIBACK_DRIVER"/slots");
618 return ERROR_FAIL;
619 }
620
621 while(fscanf(f, "%x:%x:%x.%d\n", &dom, &bus, &dev, &func)==4) {
622 if(dom == pcidev->domain
623 && bus == pcidev->bus
624 && dev == pcidev->dev
625 && func == pcidev->func) {
626 rc = 1;
627 goto out;
628 }
629 }
630 out:
631 fclose(f);
632 return rc;
633 }
634
pciback_dev_is_assigned(libxl__gc * gc,libxl_device_pci * pcidev)635 static int pciback_dev_is_assigned(libxl__gc *gc, libxl_device_pci *pcidev)
636 {
637 char * spath;
638 int rc;
639 struct stat st;
640
641 if ( access(SYSFS_PCIBACK_DRIVER, F_OK) < 0 ) {
642 if ( errno == ENOENT ) {
643 LOG(ERROR, "Looks like pciback driver is not loaded");
644 } else {
645 LOGE(ERROR, "Can't access "SYSFS_PCIBACK_DRIVER);
646 }
647 return -1;
648 }
649
650 spath = GCSPRINTF(SYSFS_PCIBACK_DRIVER"/"PCI_BDF,
651 pcidev->domain, pcidev->bus,
652 pcidev->dev, pcidev->func);
653 rc = lstat(spath, &st);
654
655 if( rc == 0 )
656 return 1;
657 if ( rc < 0 && errno == ENOENT )
658 return 0;
659 LOGE(ERROR, "Accessing %s", spath);
660 return -1;
661 }
662
pciback_dev_assign(libxl__gc * gc,libxl_device_pci * pcidev)663 static int pciback_dev_assign(libxl__gc *gc, libxl_device_pci *pcidev)
664 {
665 int rc;
666
667 if ( (rc=pciback_dev_has_slot(gc, pcidev)) < 0 ) {
668 LOGE(ERROR, "Error checking for pciback slot");
669 return ERROR_FAIL;
670 } else if (rc == 0) {
671 if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/new_slot",
672 pcidev) < 0 ) {
673 LOGE(ERROR, "Couldn't bind device to pciback!");
674 return ERROR_FAIL;
675 }
676 }
677
678 if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/bind", pcidev) < 0 ) {
679 LOGE(ERROR, "Couldn't bind device to pciback!");
680 return ERROR_FAIL;
681 }
682 return 0;
683 }
684
pciback_dev_unassign(libxl__gc * gc,libxl_device_pci * pcidev)685 static int pciback_dev_unassign(libxl__gc *gc, libxl_device_pci *pcidev)
686 {
687 /* Remove from pciback */
688 if ( sysfs_dev_unbind(gc, pcidev, NULL) < 0 ) {
689 LOG(ERROR, "Couldn't unbind device!");
690 return ERROR_FAIL;
691 }
692
693 /* Remove slot if necessary */
694 if ( pciback_dev_has_slot(gc, pcidev) > 0 ) {
695 if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/remove_slot",
696 pcidev) < 0 ) {
697 LOGE(ERROR, "Couldn't remove pciback slot");
698 return ERROR_FAIL;
699 }
700 }
701 return 0;
702 }
703
704 #define PCIBACK_INFO_PATH "/libxl/pciback"
705
pci_assignable_driver_path_write(libxl__gc * gc,libxl_device_pci * pcidev,char * driver_path)706 static void pci_assignable_driver_path_write(libxl__gc *gc,
707 libxl_device_pci *pcidev,
708 char *driver_path)
709 {
710 char *path;
711
712 path = GCSPRINTF(PCIBACK_INFO_PATH"/"PCI_BDF_XSPATH"/driver_path",
713 pcidev->domain,
714 pcidev->bus,
715 pcidev->dev,
716 pcidev->func);
717 if ( libxl__xs_printf(gc, XBT_NULL, path, "%s", driver_path) < 0 ) {
718 LOGE(WARN, "Write of %s to node %s failed.", driver_path, path);
719 }
720 }
721
pci_assignable_driver_path_read(libxl__gc * gc,libxl_device_pci * pcidev)722 static char * pci_assignable_driver_path_read(libxl__gc *gc,
723 libxl_device_pci *pcidev)
724 {
725 return libxl__xs_read(gc, XBT_NULL,
726 GCSPRINTF(
727 PCIBACK_INFO_PATH "/" PCI_BDF_XSPATH "/driver_path",
728 pcidev->domain,
729 pcidev->bus,
730 pcidev->dev,
731 pcidev->func));
732 }
733
pci_assignable_driver_path_remove(libxl__gc * gc,libxl_device_pci * pcidev)734 static void pci_assignable_driver_path_remove(libxl__gc *gc,
735 libxl_device_pci *pcidev)
736 {
737 libxl_ctx *ctx = libxl__gc_owner(gc);
738
739 /* Remove the xenstore entry */
740 xs_rm(ctx->xsh, XBT_NULL,
741 GCSPRINTF(PCIBACK_INFO_PATH "/" PCI_BDF_XSPATH,
742 pcidev->domain,
743 pcidev->bus,
744 pcidev->dev,
745 pcidev->func) );
746 }
747
libxl__device_pci_assignable_add(libxl__gc * gc,libxl_device_pci * pcidev,int rebind)748 static int libxl__device_pci_assignable_add(libxl__gc *gc,
749 libxl_device_pci *pcidev,
750 int rebind)
751 {
752 unsigned dom, bus, dev, func;
753 char *spath, *driver_path = NULL;
754 int rc;
755 struct stat st;
756
757 /* Local copy for convenience */
758 dom = pcidev->domain;
759 bus = pcidev->bus;
760 dev = pcidev->dev;
761 func = pcidev->func;
762
763 /* See if the device exists */
764 spath = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF, dom, bus, dev, func);
765 if ( lstat(spath, &st) ) {
766 LOGE(ERROR, "Couldn't lstat %s", spath);
767 return ERROR_FAIL;
768 }
769
770 /* Check to see if it's already assigned to pciback */
771 rc = pciback_dev_is_assigned(gc, pcidev);
772 if ( rc < 0 ) {
773 return ERROR_FAIL;
774 }
775 if ( rc ) {
776 LOG(WARN, PCI_BDF" already assigned to pciback", dom, bus, dev, func);
777 return 0;
778 }
779
780 /* Check to see if there's already a driver that we need to unbind from */
781 if ( sysfs_dev_unbind(gc, pcidev, &driver_path ) ) {
782 LOG(ERROR, "Couldn't unbind "PCI_BDF" from driver",
783 dom, bus, dev, func);
784 return ERROR_FAIL;
785 }
786
787 /* Store driver_path for rebinding to dom0 */
788 if ( rebind ) {
789 if ( driver_path ) {
790 pci_assignable_driver_path_write(gc, pcidev, driver_path);
791 } else if ( (driver_path =
792 pci_assignable_driver_path_read(gc, pcidev)) != NULL ) {
793 LOG(INFO, PCI_BDF" not bound to a driver, will be rebound to %s",
794 dom, bus, dev, func, driver_path);
795 } else {
796 LOG(WARN, PCI_BDF" not bound to a driver, will not be rebound.",
797 dom, bus, dev, func);
798 }
799 } else {
800 pci_assignable_driver_path_remove(gc, pcidev);
801 }
802
803 if ( pciback_dev_assign(gc, pcidev) ) {
804 LOG(ERROR, "Couldn't bind device to pciback!");
805 return ERROR_FAIL;
806 }
807
808 return 0;
809 }
810
libxl__device_pci_assignable_remove(libxl__gc * gc,libxl_device_pci * pcidev,int rebind)811 static int libxl__device_pci_assignable_remove(libxl__gc *gc,
812 libxl_device_pci *pcidev,
813 int rebind)
814 {
815 int rc;
816 char *driver_path;
817
818 /* Unbind from pciback */
819 if ( (rc=pciback_dev_is_assigned(gc, pcidev)) < 0 ) {
820 return ERROR_FAIL;
821 } else if ( rc ) {
822 pciback_dev_unassign(gc, pcidev);
823 } else {
824 LOG(WARN, "Not bound to pciback");
825 }
826
827 /* Rebind if necessary */
828 driver_path = pci_assignable_driver_path_read(gc, pcidev);
829
830 if ( driver_path ) {
831 if ( rebind ) {
832 LOG(INFO, "Rebinding to driver at %s", driver_path);
833
834 if ( sysfs_write_bdf(gc,
835 GCSPRINTF("%s/bind", driver_path),
836 pcidev) < 0 ) {
837 LOGE(ERROR, "Couldn't bind device to %s", driver_path);
838 return -1;
839 }
840
841 pci_assignable_driver_path_remove(gc, pcidev);
842 }
843 } else {
844 if ( rebind ) {
845 LOG(WARN,
846 "Couldn't find path for original driver; not rebinding");
847 }
848 }
849
850 return 0;
851 }
852
libxl_device_pci_assignable_add(libxl_ctx * ctx,libxl_device_pci * pcidev,int rebind)853 int libxl_device_pci_assignable_add(libxl_ctx *ctx, libxl_device_pci *pcidev,
854 int rebind)
855 {
856 GC_INIT(ctx);
857 int rc;
858
859 rc = libxl__device_pci_assignable_add(gc, pcidev, rebind);
860
861 GC_FREE;
862 return rc;
863 }
864
865
libxl_device_pci_assignable_remove(libxl_ctx * ctx,libxl_device_pci * pcidev,int rebind)866 int libxl_device_pci_assignable_remove(libxl_ctx *ctx, libxl_device_pci *pcidev,
867 int rebind)
868 {
869 GC_INIT(ctx);
870 int rc;
871
872 rc = libxl__device_pci_assignable_remove(gc, pcidev, rebind);
873
874 GC_FREE;
875 return rc;
876 }
877
878 /*
879 * This function checks that all functions of a device are bound to pciback
880 * driver. It also initialises a bit-mask of which function numbers are present
881 * on that device.
882 */
pci_multifunction_check(libxl__gc * gc,libxl_device_pci * pcidev,unsigned int * func_mask)883 static int pci_multifunction_check(libxl__gc *gc, libxl_device_pci *pcidev, unsigned int *func_mask)
884 {
885 struct dirent *de;
886 DIR *dir;
887
888 *func_mask = 0;
889
890 dir = opendir(SYSFS_PCI_DEV);
891 if ( NULL == dir ) {
892 LOGE(ERROR, "Couldn't open %s", SYSFS_PCI_DEV);
893 return -1;
894 }
895
896 while( (de = readdir(dir)) ) {
897 unsigned dom, bus, dev, func;
898 struct stat st;
899 char *path;
900
901 if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 )
902 continue;
903 if ( pcidev->domain != dom )
904 continue;
905 if ( pcidev->bus != bus )
906 continue;
907 if ( pcidev->dev != dev )
908 continue;
909
910 path = GCSPRINTF("%s/" PCI_BDF, SYSFS_PCIBACK_DRIVER, dom, bus, dev, func);
911 if ( lstat(path, &st) ) {
912 if ( errno == ENOENT )
913 LOG(ERROR, PCI_BDF " is not assigned to pciback driver",
914 dom, bus, dev, func);
915 else
916 LOGE(ERROR, "Couldn't lstat %s", path);
917 closedir(dir);
918 return -1;
919 }
920 (*func_mask) |= (1 << func);
921 }
922
923 closedir(dir);
924 return 0;
925 }
926
pci_ins_check(libxl__gc * gc,uint32_t domid,const char * state,void * priv)927 static int pci_ins_check(libxl__gc *gc, uint32_t domid, const char *state, void *priv)
928 {
929 char *orig_state = priv;
930
931 if ( !strcmp(state, "pci-insert-failed") )
932 return -1;
933 if ( !strcmp(state, "pci-inserted") )
934 return 0;
935 if ( !strcmp(state, orig_state) )
936 return 1;
937
938 return 1;
939 }
940
qemu_pci_add_xenstore(libxl__gc * gc,uint32_t domid,libxl_device_pci * pcidev)941 static int qemu_pci_add_xenstore(libxl__gc *gc, uint32_t domid,
942 libxl_device_pci *pcidev)
943 {
944 libxl_ctx *ctx = libxl__gc_owner(gc);
945 int rc = 0;
946 char *path;
947 char *state, *vdevfn;
948 uint32_t dm_domid;
949
950 dm_domid = libxl_get_stubdom_id(CTX, domid);
951 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
952 state = libxl__xs_read(gc, XBT_NULL, path);
953 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter");
954 if (pcidev->vdevfn) {
955 libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF_VDEVFN","PCI_OPTIONS,
956 pcidev->domain, pcidev->bus, pcidev->dev,
957 pcidev->func, pcidev->vdevfn, pcidev->msitranslate,
958 pcidev->power_mgmt);
959 } else {
960 libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF","PCI_OPTIONS,
961 pcidev->domain, pcidev->bus, pcidev->dev,
962 pcidev->func, pcidev->msitranslate, pcidev->power_mgmt);
963 }
964
965 libxl__qemu_traditional_cmd(gc, domid, "pci-ins");
966 rc = libxl__wait_for_device_model_deprecated(gc, domid, NULL, NULL,
967 pci_ins_check, state);
968 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter");
969 vdevfn = libxl__xs_read(gc, XBT_NULL, path);
970 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
971 if ( rc < 0 )
972 LOGD(ERROR, domid, "qemu refused to add device: %s", vdevfn);
973 else if ( sscanf(vdevfn, "0x%x", &pcidev->vdevfn) != 1 ) {
974 LOGD(ERROR, domid, "wrong format for the vdevfn: '%s'", vdevfn);
975 rc = -1;
976 }
977 xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state));
978
979 return rc;
980 }
981
do_pci_add(libxl__gc * gc,uint32_t domid,libxl_device_pci * pcidev,int starting)982 static int do_pci_add(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev, int starting)
983 {
984 libxl_ctx *ctx = libxl__gc_owner(gc);
985 libxl_domain_type type = libxl__domain_type(gc, domid);
986 char *sysfs_path;
987 FILE *f;
988 unsigned long long start, end, flags, size;
989 int irq, i, rc, hvm = 0;
990 uint32_t flag = XEN_DOMCTL_DEV_RDM_RELAXED;
991 uint32_t domainid = domid;
992 bool isstubdom = libxl_is_stubdom(ctx, domid, &domainid);
993
994 if (type == LIBXL_DOMAIN_TYPE_INVALID)
995 return ERROR_FAIL;
996
997 if (type == LIBXL_DOMAIN_TYPE_HVM) {
998 hvm = 1;
999 if (libxl__wait_for_device_model_deprecated(gc, domid, "running",
1000 NULL, NULL, NULL) < 0) {
1001 return ERROR_FAIL;
1002 }
1003 switch (libxl__device_model_version_running(gc, domid)) {
1004 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
1005 rc = qemu_pci_add_xenstore(gc, domid, pcidev);
1006 break;
1007 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
1008 rc = libxl__qmp_pci_add(gc, domid, pcidev);
1009 break;
1010 default:
1011 return ERROR_INVAL;
1012 }
1013 if ( rc )
1014 return ERROR_FAIL;
1015 }
1016
1017 sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/resource", pcidev->domain,
1018 pcidev->bus, pcidev->dev, pcidev->func);
1019 f = fopen(sysfs_path, "r");
1020 start = end = flags = size = 0;
1021 irq = 0;
1022
1023 if (f == NULL) {
1024 LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path);
1025 return ERROR_FAIL;
1026 }
1027 for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) {
1028 if (fscanf(f, "0x%llx 0x%llx 0x%llx\n", &start, &end, &flags) != 3)
1029 continue;
1030 size = end - start + 1;
1031 if (start) {
1032 if (flags & PCI_BAR_IO) {
1033 rc = xc_domain_ioport_permission(ctx->xch, domid, start, size, 1);
1034 if (rc < 0) {
1035 LOGED(ERROR, domainid,
1036 "Error: xc_domain_ioport_permission error 0x%llx/0x%llx",
1037 start,
1038 size);
1039 fclose(f);
1040 return ERROR_FAIL;
1041 }
1042 } else {
1043 rc = xc_domain_iomem_permission(ctx->xch, domid, start>>XC_PAGE_SHIFT,
1044 (size+(XC_PAGE_SIZE-1))>>XC_PAGE_SHIFT, 1);
1045 if (rc < 0) {
1046 LOGED(ERROR, domainid,
1047 "Error: xc_domain_iomem_permission error 0x%llx/0x%llx",
1048 start,
1049 size);
1050 fclose(f);
1051 return ERROR_FAIL;
1052 }
1053 }
1054 }
1055 }
1056 fclose(f);
1057 sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/irq", pcidev->domain,
1058 pcidev->bus, pcidev->dev, pcidev->func);
1059 f = fopen(sysfs_path, "r");
1060 if (f == NULL) {
1061 LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path);
1062 goto out;
1063 }
1064 if ((fscanf(f, "%u", &irq) == 1) && irq) {
1065 rc = xc_physdev_map_pirq(ctx->xch, domid, irq, &irq);
1066 if (rc < 0) {
1067 LOGED(ERROR, domainid, "Error: xc_physdev_map_pirq irq=%d", irq);
1068 fclose(f);
1069 return ERROR_FAIL;
1070 }
1071 rc = xc_domain_irq_permission(ctx->xch, domid, irq, 1);
1072 if (rc < 0) {
1073 LOGED(ERROR, domainid, "Error: xc_domain_irq_permission irq=%d", irq);
1074 fclose(f);
1075 return ERROR_FAIL;
1076 }
1077 }
1078 fclose(f);
1079
1080 /* Don't restrict writes to the PCI config space from this VM */
1081 if (pcidev->permissive) {
1082 if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/permissive",
1083 pcidev) < 0 ) {
1084 LOGD(ERROR, domainid, "Setting permissive for device");
1085 return ERROR_FAIL;
1086 }
1087 }
1088
1089 out:
1090 if (!isstubdom) {
1091 if (pcidev->rdm_policy == LIBXL_RDM_RESERVE_POLICY_STRICT) {
1092 flag &= ~XEN_DOMCTL_DEV_RDM_RELAXED;
1093 } else if (pcidev->rdm_policy != LIBXL_RDM_RESERVE_POLICY_RELAXED) {
1094 LOGED(ERROR, domainid, "unknown rdm check flag.");
1095 return ERROR_FAIL;
1096 }
1097 rc = xc_assign_device(ctx->xch, domid, pcidev_encode_bdf(pcidev), flag);
1098 if (rc < 0 && (hvm || errno != ENOSYS)) {
1099 LOGED(ERROR, domainid, "xc_assign_device failed");
1100 return ERROR_FAIL;
1101 }
1102 }
1103
1104 if (!starting)
1105 rc = libxl__device_pci_add_xenstore(gc, domid, pcidev, starting);
1106 else
1107 rc = 0;
1108 return rc;
1109 }
1110
libxl__device_pci_reset(libxl__gc * gc,unsigned int domain,unsigned int bus,unsigned int dev,unsigned int func)1111 static int libxl__device_pci_reset(libxl__gc *gc, unsigned int domain, unsigned int bus,
1112 unsigned int dev, unsigned int func)
1113 {
1114 char *reset;
1115 int fd, rc;
1116
1117 reset = GCSPRINTF("%s/do_flr", SYSFS_PCIBACK_DRIVER);
1118 fd = open(reset, O_WRONLY);
1119 if (fd >= 0) {
1120 char *buf = GCSPRINTF(PCI_BDF, domain, bus, dev, func);
1121 rc = write(fd, buf, strlen(buf));
1122 if (rc < 0)
1123 LOGD(ERROR, domain, "write to %s returned %d", reset, rc);
1124 close(fd);
1125 return rc < 0 ? rc : 0;
1126 }
1127 if (errno != ENOENT)
1128 LOGED(ERROR, domain, "Failed to access pciback path %s", reset);
1129 reset = GCSPRINTF("%s/"PCI_BDF"/reset", SYSFS_PCI_DEV, domain, bus, dev, func);
1130 fd = open(reset, O_WRONLY);
1131 if (fd >= 0) {
1132 rc = write(fd, "1", 1);
1133 if (rc < 0)
1134 LOGED(ERROR, domain, "write to %s returned %d", reset, rc);
1135 close(fd);
1136 return rc < 0 ? rc : 0;
1137 }
1138 if (errno == ENOENT) {
1139 LOGD(ERROR, domain,
1140 "The kernel doesn't support reset from sysfs for PCI device "PCI_BDF,
1141 domain, bus, dev, func);
1142 } else {
1143 LOGED(ERROR, domain, "Failed to access reset path %s", reset);
1144 }
1145 return -1;
1146 }
1147
libxl__device_pci_setdefault(libxl__gc * gc,uint32_t domid,libxl_device_pci * pci,bool hotplug)1148 static int libxl__device_pci_setdefault(libxl__gc *gc, uint32_t domid,
1149 libxl_device_pci *pci, bool hotplug)
1150 {
1151 /* We'd like to force reserve rdm specific to a device by default.*/
1152 if (pci->rdm_policy == LIBXL_RDM_RESERVE_POLICY_INVALID)
1153 pci->rdm_policy = LIBXL_RDM_RESERVE_POLICY_STRICT;
1154 return 0;
1155 }
1156
libxl_device_pci_add(libxl_ctx * ctx,uint32_t domid,libxl_device_pci * pcidev,const libxl_asyncop_how * ao_how)1157 int libxl_device_pci_add(libxl_ctx *ctx, uint32_t domid,
1158 libxl_device_pci *pcidev,
1159 const libxl_asyncop_how *ao_how)
1160 {
1161 AO_CREATE(ctx, domid, ao_how);
1162 int rc;
1163 rc = libxl__device_pci_add(gc, domid, pcidev, 0);
1164 libxl__ao_complete(egc, ao, rc);
1165 return AO_INPROGRESS;
1166 }
1167
libxl_pcidev_assignable(libxl_ctx * ctx,libxl_device_pci * pcidev)1168 static int libxl_pcidev_assignable(libxl_ctx *ctx, libxl_device_pci *pcidev)
1169 {
1170 libxl_device_pci *pcidevs;
1171 int num, i;
1172
1173 pcidevs = libxl_device_pci_assignable_list(ctx, &num);
1174 for (i = 0; i < num; i++) {
1175 if (pcidevs[i].domain == pcidev->domain &&
1176 pcidevs[i].bus == pcidev->bus &&
1177 pcidevs[i].dev == pcidev->dev &&
1178 pcidevs[i].func == pcidev->func)
1179 break;
1180 }
1181 free(pcidevs);
1182 return i != num;
1183 }
1184
libxl__device_pci_add(libxl__gc * gc,uint32_t domid,libxl_device_pci * pcidev,int starting)1185 int libxl__device_pci_add(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev, int starting)
1186 {
1187 libxl_ctx *ctx = libxl__gc_owner(gc);
1188 unsigned int orig_vdev, pfunc_mask;
1189 libxl_device_pci *assigned;
1190 int num_assigned, i, rc;
1191 int stubdomid = 0;
1192
1193 if (libxl__domain_type(gc, domid) == LIBXL_DOMAIN_TYPE_HVM) {
1194 rc = xc_test_assign_device(ctx->xch, domid, pcidev_encode_bdf(pcidev));
1195 if (rc) {
1196 LOGD(ERROR, domid,
1197 "PCI device %04x:%02x:%02x.%u %s?",
1198 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func,
1199 errno == ENOSYS ? "cannot be assigned - no IOMMU"
1200 : "already assigned to a different guest");
1201 goto out;
1202 }
1203 }
1204
1205 rc = libxl__device_pci_setdefault(gc, domid, pcidev, false);
1206 if (rc) goto out;
1207
1208 if (pcidev->seize && !pciback_dev_is_assigned(gc, pcidev)) {
1209 rc = libxl__device_pci_assignable_add(gc, pcidev, 1);
1210 if ( rc )
1211 goto out;
1212 }
1213
1214 if (!libxl_pcidev_assignable(ctx, pcidev)) {
1215 LOGD(ERROR, domid, "PCI device %x:%x:%x.%x is not assignable",
1216 pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
1217 rc = ERROR_FAIL;
1218 goto out;
1219 }
1220
1221 rc = get_all_assigned_devices(gc, &assigned, &num_assigned);
1222 if ( rc ) {
1223 LOGD(ERROR, domid,
1224 "cannot determine if device is assigned, refusing to continue");
1225 goto out;
1226 }
1227 if ( is_pcidev_in_array(assigned, num_assigned, pcidev->domain,
1228 pcidev->bus, pcidev->dev, pcidev->func) ) {
1229 LOGD(ERROR, domid, "PCI device already attached to a domain");
1230 rc = ERROR_FAIL;
1231 goto out;
1232 }
1233
1234 libxl__device_pci_reset(gc, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
1235
1236 stubdomid = libxl_get_stubdom_id(ctx, domid);
1237 if (stubdomid != 0) {
1238 libxl_device_pci pcidev_s = *pcidev;
1239 /* stubdomain is always running by now, even at create time */
1240 rc = do_pci_add(gc, stubdomid, &pcidev_s, 0);
1241 if ( rc )
1242 goto out;
1243 }
1244
1245 orig_vdev = pcidev->vdevfn & ~7U;
1246
1247 if ( pcidev->vfunc_mask == LIBXL_PCI_FUNC_ALL ) {
1248 if ( !(pcidev->vdevfn >> 3) ) {
1249 LOGD(ERROR, domid, "Must specify a v-slot for multi-function devices");
1250 rc = ERROR_INVAL;
1251 goto out;
1252 }
1253 if ( pci_multifunction_check(gc, pcidev, &pfunc_mask) ) {
1254 rc = ERROR_FAIL;
1255 goto out;
1256 }
1257 pcidev->vfunc_mask &= pfunc_mask;
1258 /* so now vfunc_mask == pfunc_mask */
1259 }else{
1260 pfunc_mask = (1 << pcidev->func);
1261 }
1262
1263 for(rc = 0, i = 7; i >= 0; --i) {
1264 if ( (1 << i) & pfunc_mask ) {
1265 if ( pcidev->vfunc_mask == pfunc_mask ) {
1266 pcidev->func = i;
1267 pcidev->vdevfn = orig_vdev | i;
1268 }else{
1269 /* if not passing through multiple devices in a block make
1270 * sure that virtual function number 0 is always used otherwise
1271 * guest won't see the device
1272 */
1273 pcidev->vdevfn = orig_vdev;
1274 }
1275 if ( do_pci_add(gc, domid, pcidev, starting) )
1276 rc = ERROR_FAIL;
1277 }
1278 }
1279
1280 out:
1281 return rc;
1282 }
1283
libxl__add_pcidevs(libxl__egc * egc,libxl__ao * ao,uint32_t domid,libxl_domain_config * d_config,libxl__multidev * multidev)1284 static void libxl__add_pcidevs(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
1285 libxl_domain_config *d_config,
1286 libxl__multidev *multidev)
1287 {
1288 AO_GC;
1289 libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
1290 int i, rc = 0;
1291
1292 for (i = 0; i < d_config->num_pcidevs; i++) {
1293 rc = libxl__device_pci_add(gc, domid, &d_config->pcidevs[i], 1);
1294 if (rc < 0) {
1295 LOGD(ERROR, domid, "libxl_device_pci_add failed: %d", rc);
1296 goto out;
1297 }
1298 }
1299
1300 if (d_config->num_pcidevs > 0) {
1301 rc = libxl__create_pci_backend(gc, domid, d_config->pcidevs,
1302 d_config->num_pcidevs);
1303 if (rc < 0) {
1304 LOGD(ERROR, domid, "libxl_create_pci_backend failed: %d", rc);
1305 goto out;
1306 }
1307 }
1308
1309 out:
1310 aodev->rc = rc;
1311 aodev->callback(egc, aodev);
1312 }
1313
qemu_pci_remove_xenstore(libxl__gc * gc,uint32_t domid,libxl_device_pci * pcidev,int force)1314 static int qemu_pci_remove_xenstore(libxl__gc *gc, uint32_t domid,
1315 libxl_device_pci *pcidev, int force)
1316 {
1317 libxl_ctx *ctx = libxl__gc_owner(gc);
1318 char *state;
1319 char *path;
1320 uint32_t dm_domid;
1321
1322 dm_domid = libxl_get_stubdom_id(CTX, domid);
1323
1324 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
1325 state = libxl__xs_read(gc, XBT_NULL, path);
1326 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter");
1327 libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF, pcidev->domain,
1328 pcidev->bus, pcidev->dev, pcidev->func);
1329
1330 /* Remove all functions at once atomically by only signalling
1331 * device-model for function 0 */
1332 if ( !force && (pcidev->vdevfn & 0x7) == 0 ) {
1333 libxl__qemu_traditional_cmd(gc, domid, "pci-rem");
1334 if (libxl__wait_for_device_model_deprecated(gc, domid, "pci-removed",
1335 NULL, NULL, NULL) < 0) {
1336 LOGD(ERROR, domid, "Device Model didn't respond in time");
1337 /* This depends on guest operating system acknowledging the
1338 * SCI, if it doesn't respond in time then we may wish to
1339 * force the removal.
1340 */
1341 return ERROR_FAIL;
1342 }
1343 }
1344 path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
1345 xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state));
1346
1347 return 0;
1348 }
1349
1350 static int libxl__device_pci_remove_common(libxl__gc *gc, uint32_t domid,
1351 libxl_device_pci *pcidev, int force);
1352
do_pci_remove(libxl__gc * gc,uint32_t domid,libxl_device_pci * pcidev,int force)1353 static int do_pci_remove(libxl__gc *gc, uint32_t domid,
1354 libxl_device_pci *pcidev, int force)
1355 {
1356 libxl_ctx *ctx = libxl__gc_owner(gc);
1357 libxl_device_pci *assigned;
1358 libxl_domain_type type = libxl__domain_type(gc, domid);
1359 int hvm = 0, rc, num;
1360 int stubdomid = 0;
1361 uint32_t domainid = domid;
1362 bool isstubdom = libxl_is_stubdom(ctx, domid, &domainid);
1363
1364
1365 assigned = libxl_device_pci_list(ctx, domid, &num);
1366 if ( assigned == NULL )
1367 return ERROR_FAIL;
1368
1369 rc = ERROR_INVAL;
1370 if ( !is_pcidev_in_array(assigned, num, pcidev->domain,
1371 pcidev->bus, pcidev->dev, pcidev->func) ) {
1372 LOGD(ERROR, domainid, "PCI device not attached to this domain");
1373 goto out_fail;
1374 }
1375
1376 rc = ERROR_FAIL;
1377 if (type == LIBXL_DOMAIN_TYPE_HVM) {
1378 hvm = 1;
1379 if (libxl__wait_for_device_model_deprecated(gc, domid, "running",
1380 NULL, NULL, NULL) < 0)
1381 goto out_fail;
1382
1383 switch (libxl__device_model_version_running(gc, domid)) {
1384 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
1385 rc = qemu_pci_remove_xenstore(gc, domid, pcidev, force);
1386 break;
1387 case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
1388 rc = libxl__qmp_pci_del(gc, domid, pcidev);
1389 break;
1390 default:
1391 rc = ERROR_INVAL;
1392 goto out_fail;
1393 }
1394 if (rc && !force) {
1395 rc = ERROR_FAIL;
1396 goto out_fail;
1397 }
1398 } else {
1399 assert(type == LIBXL_DOMAIN_TYPE_PV);
1400
1401 char *sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/resource", pcidev->domain,
1402 pcidev->bus, pcidev->dev, pcidev->func);
1403 FILE *f = fopen(sysfs_path, "r");
1404 unsigned int start = 0, end = 0, flags = 0, size = 0;
1405 int irq = 0;
1406 int i;
1407
1408 if (f == NULL) {
1409 LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path);
1410 goto skip1;
1411 }
1412 for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) {
1413 if (fscanf(f, "0x%x 0x%x 0x%x\n", &start, &end, &flags) != 3)
1414 continue;
1415 size = end - start + 1;
1416 if (start) {
1417 if (flags & PCI_BAR_IO) {
1418 rc = xc_domain_ioport_permission(ctx->xch, domid, start, size, 0);
1419 if (rc < 0)
1420 LOGED(ERROR, domainid,
1421 "xc_domain_ioport_permission error 0x%x/0x%x",
1422 start,
1423 size);
1424 } else {
1425 rc = xc_domain_iomem_permission(ctx->xch, domid, start>>XC_PAGE_SHIFT,
1426 (size+(XC_PAGE_SIZE-1))>>XC_PAGE_SHIFT, 0);
1427 if (rc < 0)
1428 LOGED(ERROR, domainid,
1429 "xc_domain_iomem_permission error 0x%x/0x%x",
1430 start,
1431 size);
1432 }
1433 }
1434 }
1435 fclose(f);
1436 skip1:
1437 sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/irq", pcidev->domain,
1438 pcidev->bus, pcidev->dev, pcidev->func);
1439 f = fopen(sysfs_path, "r");
1440 if (f == NULL) {
1441 LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path);
1442 goto out;
1443 }
1444 if ((fscanf(f, "%u", &irq) == 1) && irq) {
1445 rc = xc_physdev_unmap_pirq(ctx->xch, domid, irq);
1446 if (rc < 0) {
1447 LOGED(ERROR, domainid, "xc_physdev_unmap_pirq irq=%d", irq);
1448 }
1449 rc = xc_domain_irq_permission(ctx->xch, domid, irq, 0);
1450 if (rc < 0) {
1451 LOGED(ERROR, domainid, "xc_domain_irq_permission irq=%d", irq);
1452 }
1453 }
1454 fclose(f);
1455 }
1456 out:
1457 /* don't do multiple resets while some functions are still passed through */
1458 if ( (pcidev->vdevfn & 0x7) == 0 ) {
1459 libxl__device_pci_reset(gc, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
1460 }
1461
1462 if (!isstubdom) {
1463 rc = xc_deassign_device(ctx->xch, domid, pcidev_encode_bdf(pcidev));
1464 if (rc < 0 && (hvm || errno != ENOSYS))
1465 LOGED(ERROR, domainid, "xc_deassign_device failed");
1466 }
1467
1468 stubdomid = libxl_get_stubdom_id(ctx, domid);
1469 if (stubdomid != 0) {
1470 libxl_device_pci pcidev_s = *pcidev;
1471 libxl__device_pci_remove_common(gc, stubdomid, &pcidev_s, force);
1472 }
1473
1474 libxl__device_pci_remove_xenstore(gc, domid, pcidev);
1475
1476 rc = 0;
1477 out_fail:
1478 free(assigned);
1479 return rc;
1480
1481 }
1482
libxl__device_pci_remove_common(libxl__gc * gc,uint32_t domid,libxl_device_pci * pcidev,int force)1483 static int libxl__device_pci_remove_common(libxl__gc *gc, uint32_t domid,
1484 libxl_device_pci *pcidev, int force)
1485 {
1486 unsigned int orig_vdev, pfunc_mask;
1487 int i, rc;
1488
1489 orig_vdev = pcidev->vdevfn & ~7U;
1490
1491 if ( pcidev->vfunc_mask == LIBXL_PCI_FUNC_ALL ) {
1492 if ( pci_multifunction_check(gc, pcidev, &pfunc_mask) ) {
1493 rc = ERROR_FAIL;
1494 goto out;
1495 }
1496 pcidev->vfunc_mask &= pfunc_mask;
1497 }else{
1498 pfunc_mask = (1 << pcidev->func);
1499 }
1500
1501 for(rc = 0, i = 7; i >= 0; --i) {
1502 if ( (1 << i) & pfunc_mask ) {
1503 if ( pcidev->vfunc_mask == pfunc_mask ) {
1504 pcidev->func = i;
1505 pcidev->vdevfn = orig_vdev | i;
1506 }else{
1507 pcidev->vdevfn = orig_vdev;
1508 }
1509 if ( do_pci_remove(gc, domid, pcidev, force) )
1510 rc = ERROR_FAIL;
1511 }
1512 }
1513
1514 out:
1515 return rc;
1516 }
1517
libxl_device_pci_remove(libxl_ctx * ctx,uint32_t domid,libxl_device_pci * pcidev,const libxl_asyncop_how * ao_how)1518 int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid,
1519 libxl_device_pci *pcidev,
1520 const libxl_asyncop_how *ao_how)
1521
1522 {
1523 AO_CREATE(ctx, domid, ao_how);
1524 int rc;
1525
1526 rc = libxl__device_pci_remove_common(gc, domid, pcidev, 0);
1527
1528 libxl__ao_complete(egc, ao, rc);
1529 return AO_INPROGRESS;
1530 }
1531
libxl_device_pci_destroy(libxl_ctx * ctx,uint32_t domid,libxl_device_pci * pcidev,const libxl_asyncop_how * ao_how)1532 int libxl_device_pci_destroy(libxl_ctx *ctx, uint32_t domid,
1533 libxl_device_pci *pcidev,
1534 const libxl_asyncop_how *ao_how)
1535 {
1536 AO_CREATE(ctx, domid, ao_how);
1537 int rc;
1538
1539 rc = libxl__device_pci_remove_common(gc, domid, pcidev, 1);
1540
1541 libxl__ao_complete(egc, ao, rc);
1542 return AO_INPROGRESS;
1543 }
1544
libxl__device_pci_from_xs_be(libxl__gc * gc,const char * be_path,libxl_device_pci * pci,int nr)1545 static void libxl__device_pci_from_xs_be(libxl__gc *gc,
1546 const char *be_path,
1547 libxl_device_pci *pci,
1548 int nr)
1549 {
1550 char *s;
1551 unsigned int domain = 0, bus = 0, dev = 0, func = 0, vdevfn = 0;
1552
1553 s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/dev-%d", be_path, nr));
1554 sscanf(s, PCI_BDF, &domain, &bus, &dev, &func);
1555
1556 s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/vdevfn-%d", be_path, nr));
1557 if (s)
1558 vdevfn = strtol(s, (char **) NULL, 16);
1559
1560 pcidev_struct_fill(pci, domain, bus, dev, func, vdevfn);
1561
1562 s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/opts-%d", be_path, nr));
1563 if (s) {
1564 char *saveptr;
1565 char *p = strtok_r(s, ",=", &saveptr);
1566 do {
1567 while (*p == ' ')
1568 p++;
1569 if (!strcmp(p, "msitranslate")) {
1570 p = strtok_r(NULL, ",=", &saveptr);
1571 pci->msitranslate = atoi(p);
1572 } else if (!strcmp(p, "power_mgmt")) {
1573 p = strtok_r(NULL, ",=", &saveptr);
1574 pci->power_mgmt = atoi(p);
1575 } else if (!strcmp(p, "permissive")) {
1576 p = strtok_r(NULL, ",=", &saveptr);
1577 pci->permissive = atoi(p);
1578 }
1579 } while ((p = strtok_r(NULL, ",=", &saveptr)) != NULL);
1580 }
1581 }
1582
libxl_device_pci_list(libxl_ctx * ctx,uint32_t domid,int * num)1583 libxl_device_pci *libxl_device_pci_list(libxl_ctx *ctx, uint32_t domid, int *num)
1584 {
1585 GC_INIT(ctx);
1586 char *be_path, *num_devs;
1587 int n, i;
1588 libxl_device_pci *pcidevs = NULL;
1589
1590 *num = 0;
1591
1592 be_path = GCSPRINTF("%s/backend/pci/%d/0", libxl__xs_get_dompath(gc, 0), domid);
1593 num_devs = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/num_devs", be_path));
1594 if (!num_devs)
1595 goto out;
1596
1597 n = atoi(num_devs);
1598 pcidevs = calloc(n, sizeof(libxl_device_pci));
1599
1600 for (i = 0; i < n; i++)
1601 libxl__device_pci_from_xs_be(gc, be_path, pcidevs + i, i);
1602
1603 *num = n;
1604 out:
1605 GC_FREE;
1606 return pcidevs;
1607 }
1608
libxl__device_pci_destroy_all(libxl__gc * gc,uint32_t domid)1609 int libxl__device_pci_destroy_all(libxl__gc *gc, uint32_t domid)
1610 {
1611 libxl_ctx *ctx = libxl__gc_owner(gc);
1612 libxl_device_pci *pcidevs;
1613 int num, i, rc = 0;
1614
1615 pcidevs = libxl_device_pci_list(ctx, domid, &num);
1616 if ( pcidevs == NULL )
1617 return 0;
1618
1619 for (i = 0; i < num; i++) {
1620 /* Force remove on shutdown since, on HVM, qemu will not always
1621 * respond to SCI interrupt because the guest kernel has shut down the
1622 * devices by the time we even get here!
1623 */
1624 if (libxl__device_pci_remove_common(gc, domid, pcidevs + i, 1) < 0)
1625 rc = ERROR_FAIL;
1626 }
1627
1628 free(pcidevs);
1629 return rc;
1630 }
1631
libxl__grant_vga_iomem_permission(libxl__gc * gc,const uint32_t domid,libxl_domain_config * const d_config)1632 int libxl__grant_vga_iomem_permission(libxl__gc *gc, const uint32_t domid,
1633 libxl_domain_config *const d_config)
1634 {
1635 int i, ret;
1636
1637 if (!libxl_defbool_val(d_config->b_info.u.hvm.gfx_passthru))
1638 return 0;
1639
1640 for (i = 0 ; i < d_config->num_pcidevs ; i++) {
1641 uint64_t vga_iomem_start = 0xa0000 >> XC_PAGE_SHIFT;
1642 uint32_t stubdom_domid;
1643 libxl_device_pci *pcidev = &d_config->pcidevs[i];
1644 unsigned long pci_device_class;
1645
1646 if (sysfs_dev_get_class(gc, pcidev, &pci_device_class))
1647 continue;
1648 if (pci_device_class != 0x030000) /* VGA class */
1649 continue;
1650
1651 stubdom_domid = libxl_get_stubdom_id(CTX, domid);
1652 ret = xc_domain_iomem_permission(CTX->xch, stubdom_domid,
1653 vga_iomem_start, 0x20, 1);
1654 if (ret < 0) {
1655 LOGED(ERROR, domid,
1656 "failed to give stubdom%d access to iomem range "
1657 "%"PRIx64"-%"PRIx64" for VGA passthru",
1658 stubdom_domid,
1659 vga_iomem_start, (vga_iomem_start + 0x20 - 1));
1660 return ret;
1661 }
1662 ret = xc_domain_iomem_permission(CTX->xch, domid,
1663 vga_iomem_start, 0x20, 1);
1664 if (ret < 0) {
1665 LOGED(ERROR, domid,
1666 "failed to give dom%d access to iomem range "
1667 "%"PRIx64"-%"PRIx64" for VGA passthru",
1668 domid, vga_iomem_start, (vga_iomem_start + 0x20 - 1));
1669 return ret;
1670 }
1671 break;
1672 }
1673
1674 return 0;
1675 }
1676
libxl_device_pci_compare(libxl_device_pci * d1,libxl_device_pci * d2)1677 static int libxl_device_pci_compare(libxl_device_pci *d1,
1678 libxl_device_pci *d2)
1679 {
1680 return COMPARE_PCI(d1, d2);
1681 }
1682
1683 #define libxl__device_pci_update_devid NULL
1684
1685 DEFINE_DEVICE_TYPE_STRUCT_X(pcidev, pci, pci);
1686
1687 /*
1688 * Local variables:
1689 * mode: C
1690 * c-basic-offset: 4
1691 * indent-tabs-mode: nil
1692 * End:
1693 */
1694