1 /*
2  * Copyright (C) 2009      Citrix Ltd.
3  * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4  * Author Stefano Stabellini <stefano.stabellini@eu.citrix.com>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published
8  * by the Free Software Foundation; version 2.1 only. with the special
9  * exception on linking described in file LICENSE.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU Lesser General Public License for more details.
15  */
16 
17 #include "libxl_osdeps.h" /* must come before any other headers */
18 
19 #include "libxl_internal.h"
20 
21 #define PCI_BDF                "%04x:%02x:%02x.%01x"
22 #define PCI_BDF_SHORT          "%02x:%02x.%01x"
23 #define PCI_BDF_VDEVFN         "%04x:%02x:%02x.%01x@%02x"
24 #define PCI_OPTIONS            "msitranslate=%d,power_mgmt=%d"
25 #define PCI_BDF_XSPATH         "%04x-%02x-%02x-%01x"
26 #define PCI_PT_QDEV_ID         "pci-pt-%02x_%02x.%01x"
27 
pci_encode_bdf(libxl_device_pci * pci)28 static unsigned int pci_encode_bdf(libxl_device_pci *pci)
29 {
30     unsigned int value;
31 
32     value = pci->domain << 16;
33     value |= (pci->bus & 0xff) << 8;
34     value |= (pci->dev & 0x1f) << 3;
35     value |= (pci->func & 0x7);
36 
37     return value;
38 }
39 
pci_struct_fill(libxl_device_pci * pci,unsigned int domain,unsigned int bus,unsigned int dev,unsigned int func)40 static void pci_struct_fill(libxl_device_pci *pci, unsigned int domain,
41                             unsigned int bus, unsigned int dev,
42                             unsigned int func)
43 {
44     pci->domain = domain;
45     pci->bus = bus;
46     pci->dev = dev;
47     pci->func = func;
48 }
49 
libxl_create_pci_backend_device(libxl__gc * gc,flexarray_t * back,int num,const libxl_device_pci * pci)50 static void libxl_create_pci_backend_device(libxl__gc *gc,
51                                             flexarray_t *back,
52                                             int num,
53                                             const libxl_device_pci *pci)
54 {
55     flexarray_append(back, GCSPRINTF("key-%d", num));
56     flexarray_append(back, GCSPRINTF(PCI_BDF, pci->domain, pci->bus, pci->dev, pci->func));
57     flexarray_append(back, GCSPRINTF("dev-%d", num));
58     flexarray_append(back, GCSPRINTF(PCI_BDF, pci->domain, pci->bus, pci->dev, pci->func));
59     if (pci->vdevfn)
60         flexarray_append_pair(back, GCSPRINTF("vdevfn-%d", num), GCSPRINTF("%x", pci->vdevfn));
61     if (pci->name)
62         flexarray_append_pair(back, GCSPRINTF("name-%d", num), GCSPRINTF("%s", pci->name));
63     flexarray_append(back, GCSPRINTF("opts-%d", num));
64     flexarray_append(back,
65               GCSPRINTF("msitranslate=%d,power_mgmt=%d,permissive=%d,rdm_policy=%s",
66                         pci->msitranslate, pci->power_mgmt,
67                         pci->permissive, libxl_rdm_reserve_policy_to_string(pci->rdm_policy)));
68     flexarray_append_pair(back, GCSPRINTF("state-%d", num), GCSPRINTF("%d", XenbusStateInitialising));
69 }
70 
libxl__device_from_pci(libxl__gc * gc,uint32_t domid,const libxl_device_pci * pci,libxl__device * device)71 static void libxl__device_from_pci(libxl__gc *gc, uint32_t domid,
72                                    const libxl_device_pci *pci,
73                                    libxl__device *device)
74 {
75     device->backend_devid = 0;
76     device->backend_domid = 0;
77     device->backend_kind = LIBXL__DEVICE_KIND_PCI;
78     device->devid = 0;
79     device->domid = domid;
80     device->kind = LIBXL__DEVICE_KIND_PCI;
81 }
82 
libxl__create_pci_backend(libxl__gc * gc,xs_transaction_t t,uint32_t domid,const libxl_device_pci * pci)83 static void libxl__create_pci_backend(libxl__gc *gc, xs_transaction_t t,
84                                       uint32_t domid, const libxl_device_pci *pci)
85 {
86     libxl_ctx *ctx = libxl__gc_owner(gc);
87     flexarray_t *front, *back;
88     char *fe_path, *be_path;
89     struct xs_permissions fe_perms[2], be_perms[2];
90 
91     LOGD(DEBUG, domid, "Creating pci backend");
92 
93     front = flexarray_make(gc, 16, 1);
94     back = flexarray_make(gc, 16, 1);
95 
96     fe_path = libxl__domain_device_frontend_path(gc, domid, 0,
97                                                  LIBXL__DEVICE_KIND_PCI);
98     be_path = libxl__domain_device_backend_path(gc, 0, domid, 0,
99                                                 LIBXL__DEVICE_KIND_PCI);
100 
101     flexarray_append_pair(back, "frontend", fe_path);
102     flexarray_append_pair(back, "frontend-id", GCSPRINTF("%d", domid));
103     flexarray_append_pair(back, "online", GCSPRINTF("%d", 1));
104     flexarray_append_pair(back, "state", GCSPRINTF("%d", XenbusStateInitialising));
105     flexarray_append_pair(back, "domain", libxl__domid_to_name(gc, domid));
106 
107     be_perms[0].id = 0;
108     be_perms[0].perms = XS_PERM_NONE;
109     be_perms[1].id = domid;
110     be_perms[1].perms = XS_PERM_READ;
111 
112     xs_rm(ctx->xsh, t, be_path);
113     xs_mkdir(ctx->xsh, t, be_path);
114     xs_set_permissions(ctx->xsh, t, be_path, be_perms,
115                        ARRAY_SIZE(be_perms));
116     libxl__xs_writev(gc, t, be_path, libxl__xs_kvs_of_flexarray(gc, back));
117 
118     flexarray_append_pair(front, "backend", be_path);
119     flexarray_append_pair(front, "backend-id", GCSPRINTF("%d", 0));
120     flexarray_append_pair(front, "state", GCSPRINTF("%d", XenbusStateInitialising));
121 
122     fe_perms[0].id = domid;
123     fe_perms[0].perms = XS_PERM_NONE;
124     fe_perms[1].id = 0;
125     fe_perms[1].perms = XS_PERM_READ;
126 
127     xs_rm(ctx->xsh, t, fe_path);
128     xs_mkdir(ctx->xsh, t, fe_path);
129     xs_set_permissions(ctx->xsh, t, fe_path,
130                        fe_perms, ARRAY_SIZE(fe_perms));
131     libxl__xs_writev(gc, t, fe_path, libxl__xs_kvs_of_flexarray(gc, front));
132 }
133 
libxl__device_pci_add_xenstore(libxl__gc * gc,uint32_t domid,const libxl_device_pci * pci,bool starting)134 static int libxl__device_pci_add_xenstore(libxl__gc *gc,
135                                           uint32_t domid,
136                                           const libxl_device_pci *pci,
137                                           bool starting)
138 {
139     flexarray_t *back;
140     char *num_devs, *be_path;
141     int num = 0;
142     xs_transaction_t t = XBT_NULL;
143     int rc;
144     libxl_domain_config d_config;
145     libxl__flock *lock = NULL;
146     bool is_stubdomain = libxl_is_stubdom(CTX, domid, NULL);
147 
148     /* Stubdomain doesn't have own config. */
149     if (!is_stubdomain)
150         libxl_domain_config_init(&d_config);
151 
152     be_path = libxl__domain_device_backend_path(gc, 0, domid, 0,
153                                                 LIBXL__DEVICE_KIND_PCI);
154     num_devs = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/num_devs", be_path));
155 
156     libxl_domain_type domtype = libxl__domain_type(gc, domid);
157     if (domtype == LIBXL_DOMAIN_TYPE_INVALID)
158         return ERROR_FAIL;
159 
160     /* Wait is only needed if the backend already exists (num_devs != NULL) */
161     if (num_devs && !starting && domtype == LIBXL_DOMAIN_TYPE_PV) {
162         rc = libxl__wait_for_backend(gc, be_path,
163                                      GCSPRINTF("%d", XenbusStateConnected));
164         if (rc) return rc;
165     }
166 
167     back = flexarray_make(gc, 16, 1);
168 
169     LOGD(DEBUG, domid, "Adding new pci device to xenstore");
170     num = num_devs ? atoi(num_devs) : 0;
171     libxl_create_pci_backend_device(gc, back, num, pci);
172     flexarray_append_pair(back, "num_devs", GCSPRINTF("%d", num + 1));
173     if (num && !starting)
174         flexarray_append_pair(back, "state", GCSPRINTF("%d", XenbusStateReconfiguring));
175 
176     /*
177      * Stubdomin config is derived from its target domain, it doesn't have
178      * its own file.
179      */
180     if (!is_stubdomain && !starting) {
181         lock = libxl__lock_domain_userdata(gc, domid);
182         if (!lock) {
183             rc = ERROR_LOCK_FAIL;
184             goto out;
185         }
186 
187         rc = libxl__get_domain_configuration(gc, domid, &d_config);
188         if (rc) goto out;
189 
190         LOGD(DEBUG, domid, "Adding new pci device to config");
191         device_add_domain_config(gc, &d_config, &libxl__pci_devtype,
192                                  pci);
193 
194         rc = libxl__dm_check_start(gc, &d_config, domid);
195         if (rc) goto out;
196     }
197 
198     for (;;) {
199         rc = libxl__xs_transaction_start(gc, &t);
200         if (rc) goto out;
201 
202         if (lock) {
203             rc = libxl__set_domain_configuration(gc, domid, &d_config);
204             if (rc) goto out;
205         }
206 
207         /* This is the first device, so create the backend */
208         if (!num_devs)
209             libxl__create_pci_backend(gc, t, domid, pci);
210 
211         libxl__xs_writev(gc, t, be_path, libxl__xs_kvs_of_flexarray(gc, back));
212 
213         rc = libxl__xs_transaction_commit(gc, &t);
214         if (!rc) break;
215         if (rc < 0) goto out;
216     }
217 
218 out:
219     libxl__xs_transaction_abort(gc, &t);
220     if (lock) libxl__unlock_file(lock);
221     if (!is_stubdomain)
222         libxl_domain_config_dispose(&d_config);
223     return rc;
224 }
225 
libxl__device_pci_remove_xenstore(libxl__gc * gc,uint32_t domid,libxl_device_pci * pci)226 static int libxl__device_pci_remove_xenstore(libxl__gc *gc, uint32_t domid, libxl_device_pci *pci)
227 {
228     libxl_ctx *ctx = libxl__gc_owner(gc);
229     char *be_path, *num_devs_path, *num_devs, *xsdev, *tmp, *tmppath;
230     int num, i, j;
231     xs_transaction_t t;
232 
233     be_path = libxl__domain_device_backend_path(gc, 0, domid, 0,
234                                                 LIBXL__DEVICE_KIND_PCI);
235     num_devs_path = GCSPRINTF("%s/num_devs", be_path);
236     num_devs = libxl__xs_read(gc, XBT_NULL, num_devs_path);
237     if (!num_devs)
238         return ERROR_INVAL;
239     num = atoi(num_devs);
240 
241     libxl_domain_type domtype = libxl__domain_type(gc, domid);
242     if (domtype == LIBXL_DOMAIN_TYPE_INVALID)
243         return ERROR_FAIL;
244 
245     if (domtype == LIBXL_DOMAIN_TYPE_PV) {
246         if (libxl__wait_for_backend(gc, be_path, GCSPRINTF("%d", XenbusStateConnected)) < 0) {
247             LOGD(DEBUG, domid, "pci backend at %s is not ready", be_path);
248             return ERROR_FAIL;
249         }
250     }
251 
252     for (i = 0; i < num; i++) {
253         unsigned int domain = 0, bus = 0, dev = 0, func = 0;
254         xsdev = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/dev-%d", be_path, i));
255         sscanf(xsdev, PCI_BDF, &domain, &bus, &dev, &func);
256         if (domain == pci->domain && bus == pci->bus &&
257             pci->dev == dev && pci->func == func) {
258             break;
259         }
260     }
261     if (i == num) {
262         LOGD(ERROR, domid, "Couldn't find the device on xenstore");
263         return ERROR_INVAL;
264     }
265 
266 retry_transaction:
267     t = xs_transaction_start(ctx->xsh);
268     xs_write(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, i), GCSPRINTF("%d", XenbusStateClosing), 1);
269     xs_write(ctx->xsh, t, GCSPRINTF("%s/state", be_path), GCSPRINTF("%d", XenbusStateReconfiguring), 1);
270     if (!xs_transaction_end(ctx->xsh, t, 0))
271         if (errno == EAGAIN)
272             goto retry_transaction;
273 
274     if (domtype == LIBXL_DOMAIN_TYPE_PV) {
275         if (libxl__wait_for_backend(gc, be_path, GCSPRINTF("%d", XenbusStateConnected)) < 0) {
276             LOGD(DEBUG, domid, "pci backend at %s is not ready", be_path);
277             return ERROR_FAIL;
278         }
279     }
280 
281 retry_transaction2:
282     t = xs_transaction_start(ctx->xsh);
283     xs_rm(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, i));
284     xs_rm(ctx->xsh, t, GCSPRINTF("%s/key-%d", be_path, i));
285     xs_rm(ctx->xsh, t, GCSPRINTF("%s/dev-%d", be_path, i));
286     xs_rm(ctx->xsh, t, GCSPRINTF("%s/vdev-%d", be_path, i));
287     xs_rm(ctx->xsh, t, GCSPRINTF("%s/opts-%d", be_path, i));
288     xs_rm(ctx->xsh, t, GCSPRINTF("%s/vdevfn-%d", be_path, i));
289     xs_rm(ctx->xsh, t, GCSPRINTF("%s/name-%d", be_path, i));
290     libxl__xs_printf(gc, t, num_devs_path, "%d", num - 1);
291     for (j = i + 1; j < num; j++) {
292         tmppath = GCSPRINTF("%s/state-%d", be_path, j);
293         tmp = libxl__xs_read(gc, t, tmppath);
294         xs_write(ctx->xsh, t, GCSPRINTF("%s/state-%d", be_path, j - 1), tmp, strlen(tmp));
295         xs_rm(ctx->xsh, t, tmppath);
296         tmppath = GCSPRINTF("%s/dev-%d", be_path, j);
297         tmp = libxl__xs_read(gc, t, tmppath);
298         xs_write(ctx->xsh, t, GCSPRINTF("%s/dev-%d", be_path, j - 1), tmp, strlen(tmp));
299         xs_rm(ctx->xsh, t, tmppath);
300         tmppath = GCSPRINTF("%s/key-%d", be_path, j);
301         tmp = libxl__xs_read(gc, t, tmppath);
302         xs_write(ctx->xsh, t, GCSPRINTF("%s/key-%d", be_path, j - 1), tmp, strlen(tmp));
303         xs_rm(ctx->xsh, t, tmppath);
304         tmppath = GCSPRINTF("%s/vdev-%d", be_path, j);
305         tmp = libxl__xs_read(gc, t, tmppath);
306         if (tmp) {
307             xs_write(ctx->xsh, t, GCSPRINTF("%s/vdev-%d", be_path, j - 1), tmp, strlen(tmp));
308             xs_rm(ctx->xsh, t, tmppath);
309         }
310         tmppath = GCSPRINTF("%s/opts-%d", be_path, j);
311         tmp = libxl__xs_read(gc, t, tmppath);
312         if (tmp) {
313             xs_write(ctx->xsh, t, GCSPRINTF("%s/opts-%d", be_path, j - 1), tmp, strlen(tmp));
314             xs_rm(ctx->xsh, t, tmppath);
315         }
316         tmppath = GCSPRINTF("%s/vdevfn-%d", be_path, j);
317         tmp = libxl__xs_read(gc, t, tmppath);
318         if (tmp) {
319             xs_write(ctx->xsh, t, GCSPRINTF("%s/vdevfn-%d", be_path, j - 1), tmp, strlen(tmp));
320             xs_rm(ctx->xsh, t, tmppath);
321         }
322         tmppath = GCSPRINTF("%s/name-%d", be_path, j);
323         tmp = libxl__xs_read(gc, t, tmppath);
324         if (tmp) {
325             xs_write(ctx->xsh, t, GCSPRINTF("%s/name-%d", be_path, j - 1), tmp, strlen(tmp));
326             xs_rm(ctx->xsh, t, tmppath);
327         }
328     }
329     if (!xs_transaction_end(ctx->xsh, t, 0))
330         if (errno == EAGAIN)
331             goto retry_transaction2;
332 
333     if (num == 1) {
334         libxl__device dev;
335         if (libxl__parse_backend_path(gc, be_path, &dev) != 0)
336             return ERROR_FAIL;
337 
338         dev.domid = domid;
339         dev.kind = LIBXL__DEVICE_KIND_PCI;
340         dev.devid = 0;
341 
342         libxl__device_destroy(gc, &dev);
343         return 0;
344     }
345 
346     return 0;
347 }
348 
is_pci_in_array(libxl_device_pci * pcis,int num,libxl_device_pci * pci)349 static bool is_pci_in_array(libxl_device_pci *pcis, int num,
350                             libxl_device_pci *pci)
351 {
352     int i;
353 
354     for (i = 0; i < num; i++) {
355         if (COMPARE_PCI(pci, &pcis[i]))
356             break;
357     }
358 
359     return i < num;
360 }
361 
362 /* Write the standard BDF into the sysfs path given by sysfs_path. */
sysfs_write_bdf(libxl__gc * gc,const char * sysfs_path,libxl_device_pci * pci)363 static int sysfs_write_bdf(libxl__gc *gc, const char * sysfs_path,
364                            libxl_device_pci *pci)
365 {
366     int rc, fd;
367     char *buf;
368 
369     fd = open(sysfs_path, O_WRONLY);
370     if (fd < 0) {
371         LOGE(ERROR, "Couldn't open %s", sysfs_path);
372         return ERROR_FAIL;
373     }
374 
375     buf = GCSPRINTF(PCI_BDF, pci->domain, pci->bus,
376                     pci->dev, pci->func);
377     rc = write(fd, buf, strlen(buf));
378     /* Annoying to have two if's, but we need the errno */
379     if (rc < 0)
380         LOGE(ERROR, "write to %s returned %d", sysfs_path, rc);
381     close(fd);
382 
383     if (rc < 0)
384         return ERROR_FAIL;
385 
386     return 0;
387 }
388 
389 #define PCI_INFO_PATH "/libxl/pci"
390 
pci_info_xs_path(libxl__gc * gc,libxl_device_pci * pci,const char * node)391 static char *pci_info_xs_path(libxl__gc *gc, libxl_device_pci *pci,
392                               const char *node)
393 {
394     return node ?
395         GCSPRINTF(PCI_INFO_PATH"/"PCI_BDF_XSPATH"/%s",
396                   pci->domain, pci->bus, pci->dev, pci->func,
397                   node) :
398         GCSPRINTF(PCI_INFO_PATH"/"PCI_BDF_XSPATH,
399                   pci->domain, pci->bus, pci->dev, pci->func);
400 }
401 
402 
pci_info_xs_write(libxl__gc * gc,libxl_device_pci * pci,const char * node,const char * val)403 static int pci_info_xs_write(libxl__gc *gc, libxl_device_pci *pci,
404                               const char *node, const char *val)
405 {
406     char *path = pci_info_xs_path(gc, pci, node);
407     int rc = libxl__xs_printf(gc, XBT_NULL, path, "%s", val);
408 
409     if (rc) LOGE(WARN, "Write of %s to node %s failed.", val, path);
410 
411     return rc;
412 }
413 
pci_info_xs_read(libxl__gc * gc,libxl_device_pci * pci,const char * node)414 static char *pci_info_xs_read(libxl__gc *gc, libxl_device_pci *pci,
415                               const char *node)
416 {
417     char *path = pci_info_xs_path(gc, pci, node);
418 
419     return libxl__xs_read(gc, XBT_NULL, path);
420 }
421 
pci_info_xs_remove(libxl__gc * gc,libxl_device_pci * pci,const char * node)422 static void pci_info_xs_remove(libxl__gc *gc, libxl_device_pci *pci,
423                                const char *node)
424 {
425     char *path = pci_info_xs_path(gc, pci, node);
426     libxl_ctx *ctx = libxl__gc_owner(gc);
427 
428     /* Remove the xenstore entry */
429     xs_rm(ctx->xsh, XBT_NULL, path);
430 }
431 
libxl_device_pci_assignable_list(libxl_ctx * ctx,int * num)432 libxl_device_pci *libxl_device_pci_assignable_list(libxl_ctx *ctx, int *num)
433 {
434     GC_INIT(ctx);
435     libxl_device_pci *pcis = NULL, *new;
436     struct dirent *de;
437     DIR *dir;
438 
439     *num = 0;
440 
441     dir = opendir(SYSFS_PCIBACK_DRIVER);
442     if (NULL == dir) {
443         if (errno == ENOENT) {
444             LOG(ERROR, "Looks like pciback driver not loaded");
445         } else {
446             LOGE(ERROR, "Couldn't open %s", SYSFS_PCIBACK_DRIVER);
447         }
448         goto out;
449     }
450 
451     while((de = readdir(dir))) {
452         unsigned int dom, bus, dev, func;
453         char *name;
454 
455         if (sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4)
456             continue;
457 
458         new = realloc(pcis, ((*num) + 1) * sizeof(*new));
459         if (NULL == new)
460             continue;
461 
462         pcis = new;
463         new = pcis + *num;
464 
465         libxl_device_pci_init(new);
466         pci_struct_fill(new, dom, bus, dev, func);
467 
468         if (pci_info_xs_read(gc, new, "domid")) /* already assigned */
469             continue;
470 
471         name = pci_info_xs_read(gc, new, "name");
472         if (name) new->name = strdup(name);
473 
474         (*num)++;
475     }
476 
477     closedir(dir);
478 out:
479     GC_FREE;
480     return pcis;
481 }
482 
libxl_device_pci_assignable_list_free(libxl_device_pci * list,int num)483 void libxl_device_pci_assignable_list_free(libxl_device_pci *list, int num)
484 {
485     int i;
486 
487     for (i = 0; i < num; i++)
488         libxl_device_pci_dispose(&list[i]);
489 
490     free(list);
491 }
492 
493 /* Unbind device from its current driver, if any.  If driver_path is non-NULL,
494  * store the path to the original driver in it. */
sysfs_dev_unbind(libxl__gc * gc,libxl_device_pci * pci,char ** driver_path)495 static int sysfs_dev_unbind(libxl__gc *gc, libxl_device_pci *pci,
496                             char **driver_path)
497 {
498     char * spath, *dp = NULL;
499     struct stat st;
500 
501     spath = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/driver",
502                            pci->domain,
503                            pci->bus,
504                            pci->dev,
505                            pci->func);
506     if ( !lstat(spath, &st) ) {
507         /* Find the canonical path to the driver. */
508         dp = libxl__zalloc(gc, PATH_MAX);
509         dp = realpath(spath, dp);
510         if ( !dp ) {
511             LOGE(ERROR, "realpath() failed");
512             return -1;
513         }
514 
515         LOG(DEBUG, "Driver re-plug path: %s", dp);
516 
517         /* Unbind from the old driver */
518         spath = GCSPRINTF("%s/unbind", dp);
519         if ( sysfs_write_bdf(gc, spath, pci) < 0 ) {
520             LOGE(ERROR, "Couldn't unbind device");
521             return -1;
522         }
523     }
524 
525     if ( driver_path )
526         *driver_path = dp;
527 
528     return 0;
529 }
530 
sysfs_dev_get_vendor(libxl__gc * gc,libxl_device_pci * pci)531 static uint16_t sysfs_dev_get_vendor(libxl__gc *gc, libxl_device_pci *pci)
532 {
533     char *pci_device_vendor_path =
534             GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/vendor",
535                       pci->domain, pci->bus, pci->dev, pci->func);
536     uint16_t read_items;
537     uint16_t pci_device_vendor;
538 
539     FILE *f = fopen(pci_device_vendor_path, "r");
540     if (!f) {
541         LOGE(ERROR,
542              "pci device "PCI_BDF" does not have vendor attribute",
543              pci->domain, pci->bus, pci->dev, pci->func);
544         return 0xffff;
545     }
546     read_items = fscanf(f, "0x%hx\n", &pci_device_vendor);
547     fclose(f);
548     if (read_items != 1) {
549         LOGE(ERROR,
550              "cannot read vendor of pci device "PCI_BDF,
551              pci->domain, pci->bus, pci->dev, pci->func);
552         return 0xffff;
553     }
554 
555     return pci_device_vendor;
556 }
557 
sysfs_dev_get_device(libxl__gc * gc,libxl_device_pci * pci)558 static uint16_t sysfs_dev_get_device(libxl__gc *gc, libxl_device_pci *pci)
559 {
560     char *pci_device_device_path =
561             GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/device",
562                       pci->domain, pci->bus, pci->dev, pci->func);
563     uint16_t read_items;
564     uint16_t pci_device_device;
565 
566     FILE *f = fopen(pci_device_device_path, "r");
567     if (!f) {
568         LOGE(ERROR,
569              "pci device "PCI_BDF" does not have device attribute",
570              pci->domain, pci->bus, pci->dev, pci->func);
571         return 0xffff;
572     }
573     read_items = fscanf(f, "0x%hx\n", &pci_device_device);
574     fclose(f);
575     if (read_items != 1) {
576         LOGE(ERROR,
577              "cannot read device of pci device "PCI_BDF,
578              pci->domain, pci->bus, pci->dev, pci->func);
579         return 0xffff;
580     }
581 
582     return pci_device_device;
583 }
584 
sysfs_dev_get_class(libxl__gc * gc,libxl_device_pci * pci,unsigned long * class)585 static int sysfs_dev_get_class(libxl__gc *gc, libxl_device_pci *pci,
586                                unsigned long *class)
587 {
588     char *pci_device_class_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/class",
589                      pci->domain, pci->bus, pci->dev, pci->func);
590     int read_items, ret = 0;
591 
592     FILE *f = fopen(pci_device_class_path, "r");
593     if (!f) {
594         LOGE(ERROR,
595              "pci device "PCI_BDF" does not have class attribute",
596              pci->domain, pci->bus, pci->dev, pci->func);
597         ret = ERROR_FAIL;
598         goto out;
599     }
600     read_items = fscanf(f, "0x%lx\n", class);
601     fclose(f);
602     if (read_items != 1) {
603         LOGE(ERROR,
604              "cannot read class of pci device "PCI_BDF,
605              pci->domain, pci->bus, pci->dev, pci->func);
606         ret = ERROR_FAIL;
607     }
608 
609 out:
610     return ret;
611 }
612 
613 /*
614  * Some devices may need some ways to work well. Here like IGD,
615  * we have to pass a specific option to qemu.
616  */
libxl__is_igd_vga_passthru(libxl__gc * gc,const libxl_domain_config * d_config)617 bool libxl__is_igd_vga_passthru(libxl__gc *gc,
618                                 const libxl_domain_config *d_config)
619 {
620     unsigned int i;
621     uint16_t pt_vendor, pt_device;
622     unsigned long class;
623 
624     for (i = 0 ; i < d_config->num_pcidevs ; i++) {
625         libxl_device_pci *pci = &d_config->pcidevs[i];
626         pt_vendor = sysfs_dev_get_vendor(gc, pci);
627         pt_device = sysfs_dev_get_device(gc, pci);
628 
629         if (pt_vendor == 0xffff || pt_device == 0xffff ||
630             pt_vendor != 0x8086)
631             continue;
632 
633         if (sysfs_dev_get_class(gc, pci, &class))
634             continue;
635         if (class == 0x030000)
636             return true;
637     }
638 
639     return false;
640 }
641 
642 /*
643  * A brief comment about slots.  I don't know what slots are for; however,
644  * I have by experimentation determined:
645  * - Before a device can be bound to pciback, its BDF must first be listed
646  *   in pciback/slots
647  * - The way to get the BDF listed there is to write BDF to
648  *   pciback/new_slot
649  * - Writing the same BDF to pciback/new_slot is not idempotent; it results
650  *   in two entries of the BDF in pciback/slots
651  * It's not clear whether having two entries in pciback/slots is a problem
652  * or not.  Just to be safe, this code does the conservative thing, and
653  * first checks to see if there is a slot, adding one only if one does not
654  * already exist.
655  */
656 
657 /* Scan through /sys/.../pciback/slots looking for pci's BDF */
pciback_dev_has_slot(libxl__gc * gc,libxl_device_pci * pci)658 static int pciback_dev_has_slot(libxl__gc *gc, libxl_device_pci *pci)
659 {
660     FILE *f;
661     int rc = 0;
662     unsigned dom, bus, dev, func;
663 
664     f = fopen(SYSFS_PCIBACK_DRIVER"/slots", "r");
665 
666     if (f == NULL) {
667         LOGE(ERROR, "Couldn't open %s", SYSFS_PCIBACK_DRIVER"/slots");
668         return ERROR_FAIL;
669     }
670 
671     while (fscanf(f, "%x:%x:%x.%d\n", &dom, &bus, &dev, &func) == 4) {
672         if (dom == pci->domain
673             && bus == pci->bus
674             && dev == pci->dev
675             && func == pci->func) {
676             rc = 1;
677             goto out;
678         }
679     }
680 out:
681     fclose(f);
682     return rc;
683 }
684 
pciback_dev_is_assigned(libxl__gc * gc,libxl_device_pci * pci)685 static int pciback_dev_is_assigned(libxl__gc *gc, libxl_device_pci *pci)
686 {
687     char * spath;
688     int rc;
689     struct stat st;
690 
691     if ( access(SYSFS_PCIBACK_DRIVER, F_OK) < 0 ) {
692         if ( errno == ENOENT ) {
693             LOG(ERROR, "Looks like pciback driver is not loaded");
694         } else {
695             LOGE(ERROR, "Can't access "SYSFS_PCIBACK_DRIVER);
696         }
697         return -1;
698     }
699 
700     spath = GCSPRINTF(SYSFS_PCIBACK_DRIVER"/"PCI_BDF,
701                       pci->domain, pci->bus,
702                       pci->dev, pci->func);
703     rc = lstat(spath, &st);
704 
705     if( rc == 0 )
706         return 1;
707     if ( rc < 0 && errno == ENOENT )
708         return 0;
709     LOGE(ERROR, "Accessing %s", spath);
710     return -1;
711 }
712 
pciback_dev_assign(libxl__gc * gc,libxl_device_pci * pci)713 static int pciback_dev_assign(libxl__gc *gc, libxl_device_pci *pci)
714 {
715     int rc;
716 
717     if ( (rc = pciback_dev_has_slot(gc, pci)) < 0 ) {
718         LOGE(ERROR, "Error checking for pciback slot");
719         return ERROR_FAIL;
720     } else if (rc == 0) {
721         if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/new_slot",
722                              pci) < 0 ) {
723             LOGE(ERROR, "Couldn't bind device to pciback!");
724             return ERROR_FAIL;
725         }
726     }
727 
728     if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/bind", pci) < 0 ) {
729         LOGE(ERROR, "Couldn't bind device to pciback!");
730         return ERROR_FAIL;
731     }
732     return 0;
733 }
734 
pciback_dev_unassign(libxl__gc * gc,libxl_device_pci * pci)735 static int pciback_dev_unassign(libxl__gc *gc, libxl_device_pci *pci)
736 {
737     /* Remove from pciback */
738     if ( sysfs_dev_unbind(gc, pci, NULL) < 0 ) {
739         LOG(ERROR, "Couldn't unbind device!");
740         return ERROR_FAIL;
741     }
742 
743     /* Remove slot if necessary */
744     if ( pciback_dev_has_slot(gc, pci) > 0 ) {
745         if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/remove_slot",
746                              pci) < 0 ) {
747             LOGE(ERROR, "Couldn't remove pciback slot");
748             return ERROR_FAIL;
749         }
750     }
751     return 0;
752 }
753 
libxl__device_pci_assignable_add(libxl__gc * gc,libxl_device_pci * pci,int rebind)754 static int libxl__device_pci_assignable_add(libxl__gc *gc,
755                                             libxl_device_pci *pci,
756                                             int rebind)
757 {
758     libxl_ctx *ctx = libxl__gc_owner(gc);
759     unsigned dom, bus, dev, func;
760     char *spath, *driver_path = NULL;
761     const char *name;
762     int rc;
763     struct stat st;
764 
765     /* Local copy for convenience */
766     dom = pci->domain;
767     bus = pci->bus;
768     dev = pci->dev;
769     func = pci->func;
770     name = pci->name;
771 
772     /* Sanitise any name that is set */
773     if (name) {
774         unsigned int i, n = strlen(name);
775 
776         if (n > 64) { /* Reasonable upper bound on name length */
777             LOG(ERROR, "Name too long");
778             return ERROR_FAIL;
779         }
780 
781         for (i = 0; i < n; i++) {
782             if (!isgraph(name[i])) {
783                 LOG(ERROR, "Names may only include printable characters");
784                 return ERROR_FAIL;
785             }
786         }
787     }
788 
789     /* See if the device exists */
790     spath = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF, dom, bus, dev, func);
791     if ( lstat(spath, &st) ) {
792         LOGE(ERROR, "Couldn't lstat %s", spath);
793         return ERROR_FAIL;
794     }
795 
796     /* Check to see if it's already assigned to pciback */
797     rc = pciback_dev_is_assigned(gc, pci);
798     if ( rc < 0 ) {
799         return ERROR_FAIL;
800     }
801     if ( rc ) {
802         LOG(WARN, PCI_BDF" already assigned to pciback", dom, bus, dev, func);
803         goto name;
804     }
805 
806     /* Check to see if there's already a driver that we need to unbind from */
807     if ( sysfs_dev_unbind(gc, pci, &driver_path ) ) {
808         LOG(ERROR, "Couldn't unbind "PCI_BDF" from driver",
809             dom, bus, dev, func);
810         return ERROR_FAIL;
811     }
812 
813     /* Store driver_path for rebinding to dom0 */
814     if ( rebind ) {
815         if ( driver_path ) {
816             pci_info_xs_write(gc, pci, "driver_path", driver_path);
817         } else if ( (driver_path =
818                      pci_info_xs_read(gc, pci, "driver_path")) != NULL ) {
819             LOG(INFO, PCI_BDF" not bound to a driver, will be rebound to %s",
820                 dom, bus, dev, func, driver_path);
821         } else {
822             LOG(WARN, PCI_BDF" not bound to a driver, will not be rebound.",
823                 dom, bus, dev, func);
824         }
825     } else {
826         pci_info_xs_remove(gc, pci, "driver_path");
827     }
828 
829     if ( pciback_dev_assign(gc, pci) ) {
830         LOG(ERROR, "Couldn't bind device to pciback!");
831         return ERROR_FAIL;
832     }
833 
834 name:
835     if (name)
836         pci_info_xs_write(gc, pci, "name", name);
837     else
838         pci_info_xs_remove(gc, pci, "name");
839 
840     /*
841      * DOMID_IO is just a sentinel domain, without any actual mappings,
842      * so always pass XEN_DOMCTL_DEV_RDM_RELAXED to avoid assignment being
843      * unnecessarily denied.
844      */
845     rc = xc_assign_device(ctx->xch, DOMID_IO, pci_encode_bdf(pci),
846                           XEN_DOMCTL_DEV_RDM_RELAXED);
847     if ( rc < 0 ) {
848         LOG(ERROR, "failed to quarantine "PCI_BDF, dom, bus, dev, func);
849         return ERROR_FAIL;
850     }
851 
852     return 0;
853 }
854 
name2bdf(libxl__gc * gc,libxl_device_pci * pci)855 static int name2bdf(libxl__gc *gc, libxl_device_pci *pci)
856 {
857     char **bdfs;
858     unsigned int i, n;
859     int rc = ERROR_NOTFOUND;
860 
861     bdfs = libxl__xs_directory(gc, XBT_NULL, PCI_INFO_PATH, &n);
862     if (!bdfs || !n)
863         goto out;
864 
865     for (i = 0; i < n; i++) {
866         unsigned dom, bus, dev, func;
867         char *name;
868 
869         if (sscanf(bdfs[i], PCI_BDF_XSPATH, &dom, &bus, &dev, &func) != 4)
870             continue;
871 
872         pci_struct_fill(pci, dom, bus, dev, func);
873 
874         name = pci_info_xs_read(gc, pci, "name");
875         if (name && !strcmp(name, pci->name)) {
876             rc = 0;
877             break;
878         }
879     }
880 
881 out:
882     if (!rc)
883         LOG(DETAIL, "'%s' -> " PCI_BDF, pci->name, pci->domain,
884             pci->bus, pci->dev, pci->func);
885 
886     return rc;
887 }
888 
libxl__device_pci_assignable_remove(libxl__gc * gc,libxl_device_pci * pci,int rebind)889 static int libxl__device_pci_assignable_remove(libxl__gc *gc,
890                                                libxl_device_pci *pci,
891                                                int rebind)
892 {
893     libxl_ctx *ctx = libxl__gc_owner(gc);
894     int rc;
895     char *driver_path;
896 
897     /* If the device is named then we need to look up the BDF */
898     if (pci->name) {
899         rc = name2bdf(gc, pci);
900         if (rc) return rc;
901     }
902 
903     /* De-quarantine */
904     rc = xc_deassign_device(ctx->xch, DOMID_IO, pci_encode_bdf(pci));
905     if ( rc < 0 ) {
906         LOG(ERROR, "failed to de-quarantine "PCI_BDF, pci->domain, pci->bus,
907             pci->dev, pci->func);
908         return ERROR_FAIL;
909     }
910 
911     /* Unbind from pciback */
912     if ( (rc = pciback_dev_is_assigned(gc, pci)) < 0 ) {
913         return ERROR_FAIL;
914     } else if ( rc ) {
915         pciback_dev_unassign(gc, pci);
916     } else {
917         LOG(WARN, "Not bound to pciback");
918     }
919 
920     /* Rebind if necessary */
921     driver_path = pci_info_xs_read(gc, pci, "driver_path");
922 
923     if ( driver_path ) {
924         if ( rebind ) {
925             LOG(INFO, "Rebinding to driver at %s", driver_path);
926 
927             if ( sysfs_write_bdf(gc,
928                                  GCSPRINTF("%s/bind", driver_path),
929                                  pci) < 0 ) {
930                 LOGE(ERROR, "Couldn't bind device to %s", driver_path);
931                 return -1;
932             }
933 
934             pci_info_xs_remove(gc, pci, "driver_path");
935         }
936     } else {
937         if ( rebind ) {
938             LOG(WARN,
939                 "Couldn't find path for original driver; not rebinding");
940         }
941     }
942 
943     pci_info_xs_remove(gc, pci, "name");
944 
945     return 0;
946 }
947 
libxl_device_pci_assignable_add(libxl_ctx * ctx,libxl_device_pci * pci,int rebind)948 int libxl_device_pci_assignable_add(libxl_ctx *ctx, libxl_device_pci *pci,
949                                     int rebind)
950 {
951     GC_INIT(ctx);
952     int rc;
953 
954     rc = libxl__device_pci_assignable_add(gc, pci, rebind);
955 
956     GC_FREE;
957     return rc;
958 }
959 
960 
libxl_device_pci_assignable_remove(libxl_ctx * ctx,libxl_device_pci * pci,int rebind)961 int libxl_device_pci_assignable_remove(libxl_ctx *ctx, libxl_device_pci *pci,
962                                        int rebind)
963 {
964     GC_INIT(ctx);
965     int rc;
966 
967     rc = libxl__device_pci_assignable_remove(gc, pci, rebind);
968 
969     GC_FREE;
970     return rc;
971 }
972 
973 /*
974  * This function checks that all functions of a device are bound to pciback
975  * driver. It also initialises a bit-mask of which function numbers are present
976  * on that device.
977 */
pci_multifunction_check(libxl__gc * gc,libxl_device_pci * pci,unsigned int * func_mask)978 static int pci_multifunction_check(libxl__gc *gc, libxl_device_pci *pci, unsigned int *func_mask)
979 {
980     struct dirent *de;
981     DIR *dir;
982 
983     *func_mask = 0;
984 
985     dir = opendir(SYSFS_PCI_DEV);
986     if ( NULL == dir ) {
987         LOGE(ERROR, "Couldn't open %s", SYSFS_PCI_DEV);
988         return -1;
989     }
990 
991     while( (de = readdir(dir)) ) {
992         unsigned dom, bus, dev, func;
993         struct stat st;
994         char *path;
995 
996         if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 )
997             continue;
998         if ( pci->domain != dom )
999             continue;
1000         if ( pci->bus != bus )
1001             continue;
1002         if ( pci->dev != dev )
1003             continue;
1004 
1005         path = GCSPRINTF("%s/" PCI_BDF, SYSFS_PCIBACK_DRIVER, dom, bus, dev, func);
1006         if ( lstat(path, &st) ) {
1007             if ( errno == ENOENT )
1008                 LOG(ERROR, PCI_BDF " is not assigned to pciback driver",
1009                     dom, bus, dev, func);
1010             else
1011                 LOGE(ERROR, "Couldn't lstat %s", path);
1012             closedir(dir);
1013             return -1;
1014         }
1015         (*func_mask) |= (1 << func);
1016     }
1017 
1018     closedir(dir);
1019     return 0;
1020 }
1021 
pci_ins_check(libxl__gc * gc,uint32_t domid,const char * state,void * priv)1022 static int pci_ins_check(libxl__gc *gc, uint32_t domid, const char *state, void *priv)
1023 {
1024     char *orig_state = priv;
1025 
1026     if ( !strcmp(state, "pci-insert-failed") )
1027         return -1;
1028     if ( !strcmp(state, "pci-inserted") )
1029         return 0;
1030     if ( !strcmp(state, orig_state) )
1031         return 1;
1032 
1033     return 1;
1034 }
1035 
qemu_pci_add_xenstore(libxl__gc * gc,uint32_t domid,libxl_device_pci * pci)1036 static int qemu_pci_add_xenstore(libxl__gc *gc, uint32_t domid,
1037                                  libxl_device_pci *pci)
1038 {
1039     libxl_ctx *ctx = libxl__gc_owner(gc);
1040     int rc = 0;
1041     char *path;
1042     char *state, *vdevfn;
1043     uint32_t dm_domid;
1044 
1045     dm_domid = libxl_get_stubdom_id(CTX, domid);
1046     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
1047     state = libxl__xs_read(gc, XBT_NULL, path);
1048     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter");
1049     if (pci->vdevfn) {
1050         libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF_VDEVFN","PCI_OPTIONS,
1051                          pci->domain, pci->bus, pci->dev,
1052                          pci->func, pci->vdevfn, pci->msitranslate,
1053                          pci->power_mgmt);
1054     } else {
1055         libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF","PCI_OPTIONS,
1056                          pci->domain,  pci->bus, pci->dev,
1057                          pci->func, pci->msitranslate, pci->power_mgmt);
1058     }
1059 
1060     libxl__qemu_traditional_cmd(gc, domid, "pci-ins");
1061     rc = libxl__wait_for_device_model_deprecated(gc, domid, NULL, NULL,
1062                                       pci_ins_check, state);
1063     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter");
1064     vdevfn = libxl__xs_read(gc, XBT_NULL, path);
1065     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
1066     if ( rc < 0 )
1067         LOGD(ERROR, domid, "qemu refused to add device: %s", vdevfn);
1068     else if ( sscanf(vdevfn, "0x%x", &pci->vdevfn) != 1 ) {
1069         LOGD(ERROR, domid, "wrong format for the vdevfn: '%s'", vdevfn);
1070         rc = -1;
1071     }
1072     xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state));
1073 
1074     return rc;
1075 }
1076 
check_qemu_running(libxl__gc * gc,libxl_domid domid,libxl__xswait_state * xswa,int rc,const char * state)1077 static int check_qemu_running(libxl__gc *gc,
1078                               libxl_domid domid,
1079                               libxl__xswait_state *xswa,
1080                               int rc,
1081                               const char *state)
1082 {
1083     if (rc) {
1084         if (rc == ERROR_TIMEDOUT) {
1085             LOGD(ERROR, domid, "%s not ready", xswa->what);
1086         }
1087         goto out;
1088     }
1089 
1090     if (!state || strcmp(state, "running"))
1091         return ERROR_NOT_READY;
1092 
1093 out:
1094     libxl__xswait_stop(gc, xswa);
1095     return rc;
1096 }
1097 
1098 typedef struct pci_add_state {
1099     /* filled by user of do_pci_add */
1100     libxl__ao_device *aodev;
1101     libxl_domid domid;
1102     bool starting;
1103     void (*callback)(libxl__egc *, struct pci_add_state *, int rc);
1104 
1105     /* private to device_pci_add_stubdom_wait */
1106     libxl__ev_devstate pciback_ds;
1107 
1108     /* private to do_pci_add */
1109     libxl__xswait_state xswait;
1110     libxl__ev_qmp qmp;
1111     libxl__ev_time timeout;
1112     libxl__ev_time timeout_retries;
1113     libxl_device_pci pci;
1114     libxl_domid pci_domid;
1115     int retries;
1116 } pci_add_state;
1117 
1118 static void pci_add_qemu_trad_watch_state_cb(libxl__egc *egc,
1119     libxl__xswait_state *xswa, int rc, const char *state);
1120 static void pci_add_qmp_device_add(libxl__egc *, pci_add_state *);
1121 static void pci_add_qmp_device_add_cb(libxl__egc *,
1122     libxl__ev_qmp *, const libxl__json_object *, int rc);
1123 static void pci_add_qmp_device_add_retry(libxl__egc *egc, libxl__ev_time *ev,
1124     const struct timeval *requested_abs, int rc);
1125 static void pci_add_qmp_query_pci_cb(libxl__egc *,
1126     libxl__ev_qmp *, const libxl__json_object *, int rc);
1127 static void pci_add_timeout(libxl__egc *egc, libxl__ev_time *ev,
1128     const struct timeval *requested_abs, int rc);
1129 static void pci_add_dm_done(libxl__egc *,
1130     pci_add_state *, int rc);
1131 
do_pci_add(libxl__egc * egc,libxl_domid domid,pci_add_state * pas)1132 static void do_pci_add(libxl__egc *egc,
1133                        libxl_domid domid,
1134                        pci_add_state *pas)
1135 {
1136     STATE_AO_GC(pas->aodev->ao);
1137     libxl_domain_type type = libxl__domain_type(gc, domid);
1138     int rc;
1139 
1140     /* init pci_add_state */
1141     libxl__xswait_init(&pas->xswait);
1142     libxl__ev_qmp_init(&pas->qmp);
1143     pas->pci_domid = domid;
1144     pas->retries = 0;
1145     libxl__ev_time_init(&pas->timeout);
1146     libxl__ev_time_init(&pas->timeout_retries);
1147 
1148     if (type == LIBXL_DOMAIN_TYPE_INVALID) {
1149         rc = ERROR_FAIL;
1150         goto out;
1151     }
1152 
1153     if (type == LIBXL_DOMAIN_TYPE_HVM) {
1154         switch (libxl__device_model_version_running(gc, domid)) {
1155             case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
1156                 pas->xswait.ao = ao;
1157                 pas->xswait.what = "Device Model";
1158                 pas->xswait.path = DEVICE_MODEL_XS_PATH(gc,
1159                     libxl_get_stubdom_id(CTX, domid), domid, "/state");
1160                 pas->xswait.timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000;
1161                 pas->xswait.callback = pci_add_qemu_trad_watch_state_cb;
1162                 rc = libxl__xswait_start(gc, &pas->xswait);
1163                 if (rc) goto out;
1164                 return;
1165             case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
1166                 rc = libxl__ev_time_register_rel(ao, &pas->timeout,
1167                                                  pci_add_timeout,
1168                                                  LIBXL_QMP_CMD_TIMEOUT * 1000);
1169                 if (rc) goto out;
1170 
1171                 pci_add_qmp_device_add(egc, pas); /* must be last */
1172                 return;
1173             default:
1174                 rc = ERROR_INVAL;
1175                 break;
1176         }
1177     }
1178 
1179     rc = 0;
1180 
1181 out:
1182     pci_add_dm_done(egc, pas, rc); /* must be last */
1183 }
1184 
pci_add_qemu_trad_watch_state_cb(libxl__egc * egc,libxl__xswait_state * xswa,int rc,const char * state)1185 static void pci_add_qemu_trad_watch_state_cb(libxl__egc *egc,
1186                                              libxl__xswait_state *xswa,
1187                                              int rc,
1188                                              const char *state)
1189 {
1190     pci_add_state *pas = CONTAINER_OF(xswa, *pas, xswait);
1191     STATE_AO_GC(pas->aodev->ao);
1192 
1193     /* Convenience aliases */
1194     libxl_domid domid = pas->domid;
1195     libxl_device_pci *pci = &pas->pci;
1196 
1197     rc = check_qemu_running(gc, domid, xswa, rc, state);
1198     if (rc == ERROR_NOT_READY)
1199         return;
1200     if (rc)
1201         goto out;
1202 
1203     rc = qemu_pci_add_xenstore(gc, domid, pci);
1204 out:
1205     pci_add_dm_done(egc, pas, rc); /* must be last */
1206 }
1207 
pci_add_qmp_device_add(libxl__egc * egc,pci_add_state * pas)1208 static void pci_add_qmp_device_add(libxl__egc *egc, pci_add_state *pas)
1209 {
1210     STATE_AO_GC(pas->aodev->ao);
1211     libxl__json_object *args = NULL;
1212     int rc;
1213 
1214     /* Convenience aliases */
1215     libxl_domid domid = pas->domid;
1216     libxl_device_pci *pci = &pas->pci;
1217     libxl__ev_qmp *const qmp = &pas->qmp;
1218 
1219     libxl__qmp_param_add_string(gc, &args, "driver",
1220                                 "xen-pci-passthrough");
1221     QMP_PARAMETERS_SPRINTF(&args, "id", PCI_PT_QDEV_ID,
1222                            pci->bus, pci->dev, pci->func);
1223     QMP_PARAMETERS_SPRINTF(&args, "hostaddr",
1224                            "%04x:%02x:%02x.%01x", pci->domain,
1225                            pci->bus, pci->dev, pci->func);
1226     if (pci->vdevfn) {
1227         QMP_PARAMETERS_SPRINTF(&args, "addr", "%x.%x",
1228                                PCI_SLOT(pci->vdevfn),
1229                                PCI_FUNC(pci->vdevfn));
1230     }
1231     /*
1232      * Version of QEMU prior to the XSA-131 fix did not support
1233      * this property and were effectively always in permissive
1234      * mode. The fix for XSA-131 switched the default to be
1235      * restricted by default and added the permissive property.
1236      *
1237      * Therefore in order to support both old and new QEMU we only
1238      * set the permissive flag if it is true. Users of older QEMU
1239      * have no reason to set the flag so this is ok.
1240      */
1241     if (pci->permissive)
1242         libxl__qmp_param_add_bool(gc, &args, "permissive", true);
1243 
1244     qmp->ao = pas->aodev->ao;
1245     qmp->domid = domid;
1246     qmp->payload_fd = -1;
1247     qmp->callback = pci_add_qmp_device_add_cb;
1248     rc = libxl__ev_qmp_send(egc, qmp, "device_add", args);
1249     if (rc) goto out;
1250     return;
1251 
1252 out:
1253     pci_add_dm_done(egc, pas, rc); /* must be last */
1254 }
1255 
pci_add_qmp_device_add_cb(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * response,int rc)1256 static void pci_add_qmp_device_add_cb(libxl__egc *egc,
1257                                       libxl__ev_qmp *qmp,
1258                                       const libxl__json_object *response,
1259                                       int rc)
1260 {
1261     EGC_GC;
1262     pci_add_state *pas = CONTAINER_OF(qmp, *pas, qmp);
1263 
1264     if (rc) {
1265         /* Retry only applicable for HVM with stubdom. */
1266         if (libxl_get_stubdom_id(CTX, qmp->domid) == 0)
1267             goto out;
1268 
1269         if (pas->retries++ < 10) {
1270             LOGD(ERROR, qmp->domid, "Retrying PCI add %d", pas->retries);
1271             rc = libxl__ev_time_register_rel(pas->aodev->ao,
1272                                              &pas->timeout_retries,
1273                                              pci_add_qmp_device_add_retry,
1274                                              1000);
1275             if (rc) goto out;
1276             return; /* Wait for the timeout to then retry. */
1277         } else {
1278             goto out;
1279         }
1280     }
1281 
1282     qmp->callback = pci_add_qmp_query_pci_cb;
1283     rc = libxl__ev_qmp_send(egc, qmp, "query-pci", NULL);
1284     if (rc) goto out;
1285     return;
1286 
1287 out:
1288     pci_add_dm_done(egc, pas, rc); /* must be last */
1289 }
1290 
pci_add_qmp_device_add_retry(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)1291 static void pci_add_qmp_device_add_retry(libxl__egc *egc, libxl__ev_time *ev,
1292                                          const struct timeval *requested_abs,
1293                                          int rc)
1294 {
1295     pci_add_state *pas = CONTAINER_OF(ev, *pas, timeout_retries);
1296 
1297     pci_add_qmp_device_add(egc, pas);
1298 }
1299 
pci_add_qmp_query_pci_cb(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * response,int rc)1300 static void pci_add_qmp_query_pci_cb(libxl__egc *egc,
1301                                      libxl__ev_qmp *qmp,
1302                                      const libxl__json_object *response,
1303                                      int rc)
1304 {
1305     EGC_GC;
1306     pci_add_state *pas = CONTAINER_OF(qmp, *pas, qmp);
1307     const libxl__json_object *bus = NULL;
1308     char *asked_id;
1309     int i, j;
1310     const libxl__json_object *devices = NULL;
1311     const libxl__json_object *device = NULL;
1312     const libxl__json_object *o = NULL;
1313     const char *id = NULL;
1314     int dev_slot, dev_func;
1315 
1316     /* Convenience aliases */
1317     libxl_device_pci *pci = &pas->pci;
1318 
1319     if (rc) goto out;
1320 
1321     /* `query-pci' returns:
1322      * [
1323      *   {'bus': 'int',
1324      *    'devices': [
1325      *       {'bus': 'int', 'slot': 'int', 'function': 'int',
1326      *        'class_info': 'PciDeviceClass', 'id': 'PciDeviceId',
1327      *        '*irq': 'int', 'qdev_id': 'str',
1328      *        '*pci_bridge': 'PciBridgeInfo',
1329      *        'regions': ['PciMemoryRegion']
1330      *       }
1331      *    ]
1332      *   }
1333      * ]
1334      * (See qemu.git/qapi/ for the struct that aren't detailed here)
1335      */
1336 
1337     asked_id = GCSPRINTF(PCI_PT_QDEV_ID,
1338                          pci->bus, pci->dev, pci->func);
1339 
1340     for (i = 0; (bus = libxl__json_array_get(response, i)); i++) {
1341         devices = libxl__json_map_get("devices", bus, JSON_ARRAY);
1342         if (!devices) {
1343             rc = ERROR_QEMU_API;
1344             goto out;
1345         }
1346 
1347         for (j = 0; (device = libxl__json_array_get(devices, j)); j++) {
1348              o = libxl__json_map_get("qdev_id", device, JSON_STRING);
1349              if (!o) {
1350                  rc = ERROR_QEMU_API;
1351                  goto out;
1352              }
1353              id = libxl__json_object_get_string(o);
1354              if (!id || strcmp(asked_id, id))
1355                  continue;
1356 
1357              o = libxl__json_map_get("slot", device, JSON_INTEGER);
1358              if (!o) {
1359                  rc = ERROR_QEMU_API;
1360                  goto out;
1361              }
1362              dev_slot = libxl__json_object_get_integer(o);
1363              o = libxl__json_map_get("function", device, JSON_INTEGER);
1364              if (!o) {
1365                  rc = ERROR_QEMU_API;
1366                  goto out;
1367              }
1368              dev_func = libxl__json_object_get_integer(o);
1369 
1370              pci->vdevfn = PCI_DEVFN(dev_slot, dev_func);
1371 
1372              rc = 0;
1373              goto out;
1374         }
1375     }
1376 
1377     rc = ERROR_FAIL;
1378     LOGD(ERROR, qmp->domid,
1379          "PCI device id '%s' wasn't found in QEMU's 'query-pci' response.",
1380          asked_id);
1381 
1382 out:
1383     if (rc == ERROR_QEMU_API) {
1384         LOGD(ERROR, qmp->domid,
1385              "Unexpected response to QMP cmd 'query-pci', received:\n%s",
1386              JSON(response));
1387     }
1388     pci_add_dm_done(egc, pas, rc); /* must be last */
1389 }
1390 
pci_add_timeout(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)1391 static void pci_add_timeout(libxl__egc *egc, libxl__ev_time *ev,
1392                             const struct timeval *requested_abs,
1393                             int rc)
1394 {
1395     pci_add_state *pas = CONTAINER_OF(ev, *pas, timeout);
1396 
1397     pci_add_dm_done(egc, pas, rc);
1398 }
1399 
pci_supp_legacy_irq(void)1400 static bool pci_supp_legacy_irq(void)
1401 {
1402 #ifdef CONFIG_PCI_SUPP_LEGACY_IRQ
1403     return true;
1404 #else
1405     return false;
1406 #endif
1407 }
1408 
pci_add_dm_done(libxl__egc * egc,pci_add_state * pas,int rc)1409 static void pci_add_dm_done(libxl__egc *egc,
1410                             pci_add_state *pas,
1411                             int rc)
1412 {
1413     STATE_AO_GC(pas->aodev->ao);
1414     libxl_ctx *ctx = libxl__gc_owner(gc);
1415     libxl_domid domid = pas->pci_domid;
1416     char *sysfs_path;
1417     FILE *f;
1418     unsigned long long start, end, flags, size;
1419     int irq, i;
1420     int r;
1421     uint32_t flag = XEN_DOMCTL_DEV_RDM_RELAXED;
1422     uint32_t domainid = domid;
1423     bool isstubdom = libxl_is_stubdom(ctx, domid, &domainid);
1424 
1425     /* Convenience aliases */
1426     bool starting = pas->starting;
1427     libxl_device_pci *pci = &pas->pci;
1428     bool hvm = libxl__domain_type(gc, domid) == LIBXL_DOMAIN_TYPE_HVM;
1429 
1430     libxl__ev_qmp_dispose(gc, &pas->qmp);
1431 
1432     if (rc) goto out;
1433 
1434     /* stubdomain is always running by now, even at create time */
1435     if (isstubdom)
1436         starting = false;
1437 
1438     sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/resource", pci->domain,
1439                            pci->bus, pci->dev, pci->func);
1440     f = fopen(sysfs_path, "r");
1441     start = end = flags = size = 0;
1442     irq = 0;
1443 
1444     if (f == NULL) {
1445         LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path);
1446         rc = ERROR_FAIL;
1447         goto out;
1448     }
1449     for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) {
1450         if (fscanf(f, "0x%llx 0x%llx 0x%llx\n", &start, &end, &flags) != 3)
1451             continue;
1452         size = end - start + 1;
1453         if (start) {
1454             if (flags & PCI_BAR_IO) {
1455                 r = xc_domain_ioport_permission(ctx->xch, domid, start, size, 1);
1456                 if (r < 0) {
1457                     LOGED(ERROR, domainid,
1458                           "xc_domain_ioport_permission 0x%llx/0x%llx (error %d)",
1459                           start, size, r);
1460                     fclose(f);
1461                     rc = ERROR_FAIL;
1462                     goto out;
1463                 }
1464             } else {
1465                 r = xc_domain_iomem_permission(ctx->xch, domid, start>>XC_PAGE_SHIFT,
1466                                                 (size+(XC_PAGE_SIZE-1))>>XC_PAGE_SHIFT, 1);
1467                 if (r < 0) {
1468                     LOGED(ERROR, domainid,
1469                           "xc_domain_iomem_permission 0x%llx/0x%llx (error %d)",
1470                           start, size, r);
1471                     fclose(f);
1472                     rc = ERROR_FAIL;
1473                     goto out;
1474                 }
1475             }
1476         }
1477     }
1478     fclose(f);
1479     if (!pci_supp_legacy_irq())
1480         goto out_no_irq;
1481     sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/irq", pci->domain,
1482                                 pci->bus, pci->dev, pci->func);
1483     f = fopen(sysfs_path, "r");
1484     if (f == NULL) {
1485         LOGED(ERROR, domainid, "Couldn't open %s", sysfs_path);
1486         goto out_no_irq;
1487     }
1488     if ((fscanf(f, "%u", &irq) == 1) && irq) {
1489         r = xc_physdev_map_pirq(ctx->xch, domid, irq, &irq);
1490         if (r < 0) {
1491             LOGED(ERROR, domainid, "xc_physdev_map_pirq irq=%d (error=%d)",
1492                   irq, r);
1493             fclose(f);
1494             rc = ERROR_FAIL;
1495             goto out;
1496         }
1497         r = xc_domain_irq_permission(ctx->xch, domid, irq, 1);
1498         if (r < 0) {
1499             LOGED(ERROR, domainid,
1500                   "xc_domain_irq_permission irq=%d (error=%d)", irq, r);
1501             fclose(f);
1502             rc = ERROR_FAIL;
1503             goto out;
1504         }
1505     }
1506     fclose(f);
1507 
1508     /* Don't restrict writes to the PCI config space from this VM */
1509     if (pci->permissive) {
1510         if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/permissive",
1511                              pci) < 0 ) {
1512             LOGD(ERROR, domainid, "Setting permissive for device");
1513             rc = ERROR_FAIL;
1514             goto out;
1515         }
1516     }
1517 
1518 out_no_irq:
1519     if (!isstubdom) {
1520         if (pci->rdm_policy == LIBXL_RDM_RESERVE_POLICY_STRICT) {
1521             flag &= ~XEN_DOMCTL_DEV_RDM_RELAXED;
1522         } else if (pci->rdm_policy != LIBXL_RDM_RESERVE_POLICY_RELAXED) {
1523             LOGED(ERROR, domainid, "unknown rdm check flag.");
1524             rc = ERROR_FAIL;
1525             goto out;
1526         }
1527         r = xc_assign_device(ctx->xch, domid, pci_encode_bdf(pci), flag);
1528         if (r < 0 && (hvm || errno != ENOSYS)) {
1529             LOGED(ERROR, domainid, "xc_assign_device failed");
1530             rc = ERROR_FAIL;
1531             goto out;
1532         }
1533     }
1534 
1535     if (!libxl_get_stubdom_id(CTX, domid))
1536         rc = libxl__device_pci_add_xenstore(gc, domid, pci, starting);
1537     else
1538         rc = 0;
1539 out:
1540     libxl__ev_time_deregister(gc, &pas->timeout);
1541     libxl__ev_time_deregister(gc, &pas->timeout_retries);
1542     pas->callback(egc, pas, rc);
1543 }
1544 
libxl__device_pci_reset(libxl__gc * gc,unsigned int domain,unsigned int bus,unsigned int dev,unsigned int func)1545 static int libxl__device_pci_reset(libxl__gc *gc, unsigned int domain, unsigned int bus,
1546                                    unsigned int dev, unsigned int func)
1547 {
1548     char *reset;
1549     int fd, rc;
1550 
1551     reset = GCSPRINTF("%s/do_flr", SYSFS_PCIBACK_DRIVER);
1552     fd = open(reset, O_WRONLY);
1553     if (fd >= 0) {
1554         char *buf = GCSPRINTF(PCI_BDF, domain, bus, dev, func);
1555         rc = write(fd, buf, strlen(buf));
1556         if (rc < 0)
1557             LOGE(ERROR, "write '%s' to %s failed", buf, reset);
1558         close(fd);
1559         return rc < 0 ? rc : 0;
1560     }
1561     if (errno != ENOENT)
1562         LOGE(ERROR, "Failed to access pciback path %s", reset);
1563     reset = GCSPRINTF("%s/"PCI_BDF"/reset", SYSFS_PCI_DEV, domain, bus, dev, func);
1564     fd = open(reset, O_WRONLY);
1565     if (fd >= 0) {
1566         rc = write(fd, "1", 1);
1567         if (rc < 0)
1568             LOGE(ERROR, "write to %s failed", reset);
1569         close(fd);
1570         return rc < 0 ? rc : 0;
1571     }
1572     if (errno == ENOENT) {
1573         LOG(ERROR,
1574             "The kernel doesn't support reset from sysfs for PCI device "PCI_BDF,
1575             domain, bus, dev, func);
1576     } else {
1577         LOGE(ERROR, "Failed to access reset path %s", reset);
1578     }
1579     return -1;
1580 }
1581 
libxl__device_pci_setdefault(libxl__gc * gc,uint32_t domid,libxl_device_pci * pci,bool hotplug)1582 int libxl__device_pci_setdefault(libxl__gc *gc, uint32_t domid,
1583                                  libxl_device_pci *pci, bool hotplug)
1584 {
1585     /* We'd like to force reserve rdm specific to a device by default.*/
1586     if (pci->rdm_policy == LIBXL_RDM_RESERVE_POLICY_INVALID)
1587         pci->rdm_policy = LIBXL_RDM_RESERVE_POLICY_STRICT;
1588     return 0;
1589 }
1590 
libxl_device_pci_add(libxl_ctx * ctx,uint32_t domid,libxl_device_pci * pci,const libxl_asyncop_how * ao_how)1591 int libxl_device_pci_add(libxl_ctx *ctx, uint32_t domid,
1592                          libxl_device_pci *pci,
1593                          const libxl_asyncop_how *ao_how)
1594 {
1595     AO_CREATE(ctx, domid, ao_how);
1596     libxl__ao_device *aodev;
1597 
1598     GCNEW(aodev);
1599     libxl__prepare_ao_device(ao, aodev);
1600     aodev->action = LIBXL__DEVICE_ACTION_ADD;
1601     aodev->callback = device_addrm_aocomplete;
1602     aodev->update_json = true;
1603     libxl__device_pci_add(egc, domid, pci, false, aodev);
1604     return AO_INPROGRESS;
1605 }
1606 
libxl_pci_assignable(libxl_ctx * ctx,libxl_device_pci * pci)1607 static bool libxl_pci_assignable(libxl_ctx *ctx, libxl_device_pci *pci)
1608 {
1609     libxl_device_pci *pcis;
1610     int num;
1611     bool assignable;
1612 
1613     pcis = libxl_device_pci_assignable_list(ctx, &num);
1614     assignable = is_pci_in_array(pcis, num, pci);
1615     libxl_device_pci_assignable_list_free(pcis, num);
1616 
1617     return assignable;
1618 }
1619 
1620 static void device_pci_add_stubdom_wait(libxl__egc *egc,
1621     pci_add_state *pas, int rc);
1622 static void device_pci_add_stubdom_ready(libxl__egc *egc,
1623     libxl__ev_devstate *ds, int rc);
1624 static void device_pci_add_stubdom_done(libxl__egc *egc,
1625     pci_add_state *, int rc);
1626 static void device_pci_add_done(libxl__egc *egc,
1627     pci_add_state *, int rc);
1628 
libxl__device_pci_add(libxl__egc * egc,uint32_t domid,libxl_device_pci * pci,bool starting,libxl__ao_device * aodev)1629 void libxl__device_pci_add(libxl__egc *egc, uint32_t domid,
1630                            libxl_device_pci *pci, bool starting,
1631                            libxl__ao_device *aodev)
1632 {
1633     STATE_AO_GC(aodev->ao);
1634     libxl_ctx *ctx = libxl__gc_owner(gc);
1635     int rc;
1636     int stubdomid = 0;
1637     pci_add_state *pas;
1638 
1639     GCNEW(pas);
1640     pas->aodev = aodev;
1641     pas->domid = domid;
1642 
1643     libxl_device_pci_copy(CTX, &pas->pci, pci);
1644     pci = &pas->pci;
1645 
1646     /* If the device is named then we need to look up the BDF */
1647     if (pci->name) {
1648         rc = name2bdf(gc, pci);
1649         if (rc) goto out;
1650     }
1651 
1652     pas->starting = starting;
1653     pas->callback = device_pci_add_stubdom_done;
1654 
1655     if (libxl__domain_type(gc, domid) == LIBXL_DOMAIN_TYPE_HVM) {
1656         rc = xc_test_assign_device(ctx->xch, domid, pci_encode_bdf(pci));
1657         if (rc) {
1658             LOGD(ERROR, domid,
1659                  "PCI device %04x:%02x:%02x.%u %s?",
1660                  pci->domain, pci->bus, pci->dev, pci->func,
1661                  errno == EOPNOTSUPP ? "cannot be assigned - no IOMMU"
1662                  : "already assigned to a different guest");
1663             goto out;
1664         }
1665     }
1666 
1667     rc = libxl__device_pci_setdefault(gc, domid, pci, !starting);
1668     if (rc) goto out;
1669 
1670     if (pci->seize && !pciback_dev_is_assigned(gc, pci)) {
1671         rc = libxl__device_pci_assignable_add(gc, pci, 1);
1672         if ( rc )
1673             goto out;
1674     }
1675 
1676     if (!libxl_pci_assignable(ctx, pci)) {
1677         LOGD(ERROR, domid, "PCI device %x:%x:%x.%x is not assignable",
1678              pci->domain, pci->bus, pci->dev, pci->func);
1679         rc = ERROR_FAIL;
1680         goto out;
1681     }
1682 
1683     rc = pci_info_xs_write(gc, pci, "domid", GCSPRINTF("%u", domid));
1684     if (rc) goto out;
1685 
1686     libxl__device_pci_reset(gc, pci->domain, pci->bus, pci->dev, pci->func);
1687 
1688     stubdomid = libxl_get_stubdom_id(ctx, domid);
1689     if (stubdomid != 0) {
1690         pas->callback = device_pci_add_stubdom_wait;
1691 
1692         do_pci_add(egc, stubdomid, pas); /* must be last */
1693         return;
1694     }
1695 
1696     device_pci_add_stubdom_done(egc, pas, 0); /* must be last */
1697     return;
1698 
1699 out:
1700     device_pci_add_done(egc, pas, rc); /* must be last */
1701 }
1702 
device_pci_add_stubdom_wait(libxl__egc * egc,pci_add_state * pas,int rc)1703 static void device_pci_add_stubdom_wait(libxl__egc *egc,
1704                                         pci_add_state *pas,
1705                                         int rc)
1706 {
1707     libxl__ao_device *aodev = pas->aodev;
1708     STATE_AO_GC(aodev->ao);
1709     int stubdomid = libxl_get_stubdom_id(CTX, pas->domid);
1710     char *state_path;
1711 
1712     if (rc) goto out;
1713 
1714     /* Wait for the device actually being connected, otherwise device model
1715      * running there will fail to find the device. */
1716     state_path = GCSPRINTF("%s/state",
1717             libxl__domain_device_backend_path(gc, 0, stubdomid, 0,
1718                                               LIBXL__DEVICE_KIND_PCI));
1719     rc = libxl__ev_devstate_wait(ao, &pas->pciback_ds,
1720             device_pci_add_stubdom_ready,
1721             state_path, XenbusStateConnected,
1722             LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000);
1723     if (rc) goto out;
1724     return;
1725 out:
1726     device_pci_add_done(egc, pas, rc); /* must be last */
1727 }
1728 
device_pci_add_stubdom_ready(libxl__egc * egc,libxl__ev_devstate * ds,int rc)1729 static void device_pci_add_stubdom_ready(libxl__egc *egc,
1730                                          libxl__ev_devstate *ds,
1731                                          int rc)
1732 {
1733     pci_add_state *pas = CONTAINER_OF(ds, *pas, pciback_ds);
1734 
1735     device_pci_add_stubdom_done(egc, pas, rc); /* must be last */
1736 }
1737 
device_pci_add_stubdom_done(libxl__egc * egc,pci_add_state * pas,int rc)1738 static void device_pci_add_stubdom_done(libxl__egc *egc,
1739                                         pci_add_state *pas,
1740                                         int rc)
1741 {
1742     STATE_AO_GC(pas->aodev->ao);
1743     unsigned int orig_vdev, pfunc_mask;
1744     int i;
1745 
1746     /* Convenience aliases */
1747     libxl_domid domid = pas->domid;
1748     libxl_device_pci *pci = &pas->pci;
1749 
1750     if (rc) goto out;
1751 
1752     orig_vdev = pci->vdevfn & ~7U;
1753 
1754     if ( pci->vfunc_mask == LIBXL_PCI_FUNC_ALL ) {
1755         if ( !(pci->vdevfn >> 3) ) {
1756             LOGD(ERROR, domid, "Must specify a v-slot for multi-function devices");
1757             rc = ERROR_INVAL;
1758             goto out;
1759         }
1760         if ( pci_multifunction_check(gc, pci, &pfunc_mask) ) {
1761             rc = ERROR_FAIL;
1762             goto out;
1763         }
1764         pci->vfunc_mask &= pfunc_mask;
1765         /* so now vfunc_mask == pfunc_mask */
1766     }else{
1767         pfunc_mask = (1 << pci->func);
1768     }
1769 
1770     for (rc = 0, i = 7; i >= 0; --i) {
1771         if ( (1 << i) & pfunc_mask ) {
1772             if ( pci->vfunc_mask == pfunc_mask ) {
1773                 pci->func = i;
1774                 pci->vdevfn = orig_vdev | i;
1775             } else {
1776                 /* if not passing through multiple devices in a block make
1777                  * sure that virtual function number 0 is always used otherwise
1778                  * guest won't see the device
1779                  */
1780                 pci->vdevfn = orig_vdev;
1781             }
1782             pas->callback = device_pci_add_done;
1783             do_pci_add(egc, domid, pas); /* must be last */
1784             return;
1785         }
1786     }
1787 
1788 out:
1789     device_pci_add_done(egc, pas, rc);
1790 }
1791 
device_pci_add_done(libxl__egc * egc,pci_add_state * pas,int rc)1792 static void device_pci_add_done(libxl__egc *egc,
1793                                 pci_add_state *pas,
1794                                 int rc)
1795 {
1796     EGC_GC;
1797     libxl__ao_device *aodev = pas->aodev;
1798     libxl_domid domid = pas->domid;
1799     libxl_device_pci *pci = &pas->pci;
1800 
1801     if (rc) {
1802         if (pci->name) {
1803             LOGD(ERROR, domid,
1804                  "libxl__device_pci_add failed for "
1805                  "PCI device '%s' (rc %d)",
1806                  pci->name,
1807                  rc);
1808         } else {
1809             LOGD(ERROR, domid,
1810                  "libxl__device_pci_add failed for "
1811                  "PCI device %x:%x:%x.%x (rc %d)",
1812                  pci->domain, pci->bus, pci->dev, pci->func,
1813                  rc);
1814         }
1815         pci_info_xs_remove(gc, pci, "domid");
1816     }
1817     libxl_device_pci_dispose(pci);
1818     aodev->rc = rc;
1819     aodev->callback(egc, aodev);
1820 }
1821 
1822 typedef struct {
1823     libxl__multidev multidev;
1824     libxl__ao_device *outer_aodev;
1825     libxl_domain_config *d_config;
1826     libxl_domid domid;
1827 } add_pcis_state;
1828 
1829 static void add_pcis_done(libxl__egc *, libxl__multidev *, int rc);
1830 
libxl__add_pcis(libxl__egc * egc,libxl__ao * ao,uint32_t domid,libxl_domain_config * d_config,libxl__multidev * multidev)1831 static void libxl__add_pcis(libxl__egc *egc, libxl__ao *ao, uint32_t domid,
1832                             libxl_domain_config *d_config,
1833                             libxl__multidev *multidev)
1834 {
1835     AO_GC;
1836     add_pcis_state *apds;
1837     int i;
1838 
1839     /* We need to start a new multidev in order to be able to execute
1840      * libxl__create_pci_backend only once. */
1841 
1842     GCNEW(apds);
1843     apds->outer_aodev = libxl__multidev_prepare(multidev);
1844     apds->d_config = d_config;
1845     apds->domid = domid;
1846     apds->multidev.callback = add_pcis_done;
1847     libxl__multidev_begin(ao, &apds->multidev);
1848 
1849     for (i = 0; i < d_config->num_pcidevs; i++) {
1850         libxl__ao_device *aodev = libxl__multidev_prepare(&apds->multidev);
1851         libxl__device_pci_add(egc, domid, &d_config->pcidevs[i],
1852                               true, aodev);
1853     }
1854 
1855     libxl__multidev_prepared(egc, &apds->multidev, 0);
1856 }
1857 
add_pcis_done(libxl__egc * egc,libxl__multidev * multidev,int rc)1858 static void add_pcis_done(libxl__egc *egc, libxl__multidev *multidev,
1859                           int rc)
1860 {
1861     EGC_GC;
1862     add_pcis_state *apds = CONTAINER_OF(multidev, *apds, multidev);
1863     libxl__ao_device *aodev = apds->outer_aodev;
1864 
1865     aodev->rc = rc;
1866     aodev->callback(egc, aodev);
1867 }
1868 
qemu_pci_remove_xenstore(libxl__gc * gc,uint32_t domid,libxl_device_pci * pci,int force)1869 static int qemu_pci_remove_xenstore(libxl__gc *gc, uint32_t domid,
1870                                     libxl_device_pci *pci, int force)
1871 {
1872     libxl_ctx *ctx = libxl__gc_owner(gc);
1873     char *state;
1874     char *path;
1875     uint32_t dm_domid;
1876 
1877     dm_domid = libxl_get_stubdom_id(CTX, domid);
1878 
1879     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
1880     state = libxl__xs_read(gc, XBT_NULL, path);
1881     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/parameter");
1882     libxl__xs_printf(gc, XBT_NULL, path, PCI_BDF, pci->domain,
1883                      pci->bus, pci->dev, pci->func);
1884 
1885     /* Remove all functions at once atomically by only signalling
1886      * device-model for function 0 */
1887     if ( !force && (pci->vdevfn & 0x7) == 0 ) {
1888         libxl__qemu_traditional_cmd(gc, domid, "pci-rem");
1889         if (libxl__wait_for_device_model_deprecated(gc, domid, "pci-removed",
1890                                          NULL, NULL, NULL) < 0) {
1891             LOGD(ERROR, domid, "Device Model didn't respond in time");
1892             /* This depends on guest operating system acknowledging the
1893              * SCI, if it doesn't respond in time then we may wish to
1894              * force the removal.
1895              */
1896             return ERROR_FAIL;
1897         }
1898     }
1899     path = DEVICE_MODEL_XS_PATH(gc, dm_domid, domid, "/state");
1900     xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state));
1901 
1902     return 0;
1903 }
1904 
1905 typedef struct pci_remove_state {
1906     libxl__ao_device *aodev;
1907     libxl_domid domid;
1908     libxl_device_pci pci;
1909     bool force;
1910     bool hvm;
1911     unsigned int orig_vdev;
1912     unsigned int pfunc_mask;
1913     int next_func;
1914     libxl__ao_device stubdom_aodev;
1915     libxl__xswait_state xswait;
1916     libxl__ev_qmp qmp;
1917     libxl__ev_time timeout;
1918     libxl__ev_time retry_timer;
1919 } pci_remove_state;
1920 
1921 static void libxl__device_pci_remove_common(libxl__egc *egc,
1922     uint32_t domid, libxl_device_pci *pci, bool force,
1923     libxl__ao_device *aodev);
1924 static void device_pci_remove_common_next(libxl__egc *egc,
1925     pci_remove_state *prs, int rc);
1926 
1927 static void pci_remove_qemu_trad_watch_state_cb(libxl__egc *egc,
1928     libxl__xswait_state *xswa, int rc, const char *state);
1929 static void pci_remove_qmp_device_del(libxl__egc *egc,
1930     pci_remove_state *prs);
1931 static void pci_remove_qmp_device_del_cb(libxl__egc *egc,
1932     libxl__ev_qmp *qmp, const libxl__json_object *response, int rc);
1933 static void pci_remove_qmp_retry_timer_cb(libxl__egc *egc,
1934     libxl__ev_time *ev, const struct timeval *requested_abs, int rc);
1935 static void pci_remove_qmp_query_cb(libxl__egc *egc,
1936     libxl__ev_qmp *qmp, const libxl__json_object *response, int rc);
1937 static void pci_remove_timeout(libxl__egc *egc,
1938     libxl__ev_time *ev, const struct timeval *requested_abs, int rc);
1939 static void pci_remove_detached(libxl__egc *egc,
1940     pci_remove_state *prs, int rc);
1941 static void pci_remove_stubdom_done(libxl__egc *egc,
1942     libxl__ao_device *aodev);
1943 static void pci_remove_done(libxl__egc *egc,
1944     pci_remove_state *prs, int rc);
1945 
do_pci_remove(libxl__egc * egc,pci_remove_state * prs)1946 static void do_pci_remove(libxl__egc *egc, pci_remove_state *prs)
1947 {
1948     STATE_AO_GC(prs->aodev->ao);
1949     libxl_ctx *ctx = libxl__gc_owner(gc);
1950     libxl_device_pci *pcis;
1951     bool attached;
1952     uint32_t domid = prs->domid;
1953     libxl_domain_type type = libxl__domain_type(gc, domid);
1954     libxl_device_pci *pci = &prs->pci;
1955     int rc, num;
1956     pcis = libxl_device_pci_list(ctx, domid, &num);
1957     if (!pcis) {
1958         rc = ERROR_FAIL;
1959         goto out_fail;
1960     }
1961 
1962     attached = is_pci_in_array(pcis, num, pci);
1963     libxl_device_pci_list_free(pcis, num);
1964 
1965     rc = ERROR_INVAL;
1966     if (!attached) {
1967         LOGD(ERROR, domid, "PCI device not attached to this domain");
1968         goto out_fail;
1969     }
1970 
1971     if (type == LIBXL_DOMAIN_TYPE_HVM) {
1972         prs->hvm = true;
1973         switch (libxl__device_model_version_running(gc, domid)) {
1974         case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL:
1975             prs->xswait.ao = ao;
1976             prs->xswait.what = "Device Model";
1977             prs->xswait.path = DEVICE_MODEL_XS_PATH(gc,
1978                 libxl_get_stubdom_id(CTX, domid), domid, "/state");
1979             prs->xswait.timeout_ms = LIBXL_DEVICE_MODEL_START_TIMEOUT * 1000;
1980             prs->xswait.callback = pci_remove_qemu_trad_watch_state_cb;
1981             rc = libxl__xswait_start(gc, &prs->xswait);
1982             if (rc) goto out_fail;
1983             return;
1984         case LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN:
1985             pci_remove_qmp_device_del(egc, prs); /* must be last */
1986             return;
1987         default:
1988             rc = ERROR_INVAL;
1989             goto out_fail;
1990         }
1991     }
1992     rc = 0;
1993 out_fail:
1994     pci_remove_detached(egc, prs, rc); /* must be last */
1995 }
1996 
pci_remove_qemu_trad_watch_state_cb(libxl__egc * egc,libxl__xswait_state * xswa,int rc,const char * state)1997 static void pci_remove_qemu_trad_watch_state_cb(libxl__egc *egc,
1998                                                 libxl__xswait_state *xswa,
1999                                                 int rc,
2000                                                 const char *state)
2001 {
2002     pci_remove_state *prs = CONTAINER_OF(xswa, *prs, xswait);
2003     STATE_AO_GC(prs->aodev->ao);
2004 
2005     /* Convenience aliases */
2006     libxl_domid domid = prs->domid;
2007     libxl_device_pci *const pci = &prs->pci;
2008 
2009     rc = check_qemu_running(gc, domid, xswa, rc, state);
2010     if (rc == ERROR_NOT_READY)
2011         return;
2012     if (rc)
2013         goto out;
2014 
2015     rc = qemu_pci_remove_xenstore(gc, domid, pci, prs->force);
2016 
2017 out:
2018     pci_remove_detached(egc, prs, rc);
2019 }
2020 
pci_remove_qmp_device_del(libxl__egc * egc,pci_remove_state * prs)2021 static void pci_remove_qmp_device_del(libxl__egc *egc,
2022                                       pci_remove_state *prs)
2023 {
2024     STATE_AO_GC(prs->aodev->ao);
2025     libxl__json_object *args = NULL;
2026     int rc;
2027 
2028     /* Convenience aliases */
2029     libxl_device_pci *const pci = &prs->pci;
2030 
2031     rc = libxl__ev_time_register_rel(ao, &prs->timeout,
2032                                      pci_remove_timeout,
2033                                      LIBXL_QMP_CMD_TIMEOUT * 1000);
2034     if (rc) goto out;
2035 
2036     QMP_PARAMETERS_SPRINTF(&args, "id", PCI_PT_QDEV_ID,
2037                            pci->bus, pci->dev, pci->func);
2038     prs->qmp.callback = pci_remove_qmp_device_del_cb;
2039     rc = libxl__ev_qmp_send(egc, &prs->qmp, "device_del", args);
2040     if (rc) goto out;
2041     return;
2042 
2043 out:
2044     pci_remove_detached(egc, prs, rc);
2045 }
2046 
pci_remove_qmp_device_del_cb(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * response,int rc)2047 static void pci_remove_qmp_device_del_cb(libxl__egc *egc,
2048                                          libxl__ev_qmp *qmp,
2049                                          const libxl__json_object *response,
2050                                          int rc)
2051 {
2052     EGC_GC;
2053     pci_remove_state *prs = CONTAINER_OF(qmp, *prs, qmp);
2054 
2055     if (rc) goto out;
2056 
2057     /* Now that the command is sent, we want to wait until QEMU has
2058      * confirmed that the device is removed. */
2059     /* TODO: Instead of using a poll loop { ev_timer ; query-pci }, it
2060      * could be possible to listen to events sent by QEMU via QMP in order
2061      * to wait for the passthrough pci-device to be removed from QEMU.  */
2062     pci_remove_qmp_retry_timer_cb(egc, &prs->retry_timer, NULL,
2063                                   ERROR_TIMEDOUT);
2064     return;
2065 
2066 out:
2067     pci_remove_detached(egc, prs, rc);
2068 }
2069 
pci_remove_qmp_retry_timer_cb(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)2070 static void pci_remove_qmp_retry_timer_cb(libxl__egc *egc, libxl__ev_time *ev,
2071                                           const struct timeval *requested_abs,
2072                                           int rc)
2073 {
2074     EGC_GC;
2075     pci_remove_state *prs = CONTAINER_OF(ev, *prs, retry_timer);
2076 
2077     prs->qmp.callback = pci_remove_qmp_query_cb;
2078     rc = libxl__ev_qmp_send(egc, &prs->qmp, "query-pci", NULL);
2079     if (rc) goto out;
2080     return;
2081 
2082 out:
2083     pci_remove_detached(egc, prs, rc);
2084 }
2085 
pci_remove_qmp_query_cb(libxl__egc * egc,libxl__ev_qmp * qmp,const libxl__json_object * response,int rc)2086 static void pci_remove_qmp_query_cb(libxl__egc *egc,
2087                                     libxl__ev_qmp *qmp,
2088                                     const libxl__json_object *response,
2089                                     int rc)
2090 {
2091     EGC_GC;
2092     pci_remove_state *prs = CONTAINER_OF(qmp, *prs, qmp);
2093     const libxl__json_object *bus = NULL;
2094     const char *asked_id;
2095     int i, j;
2096 
2097     /* Convenience aliases */
2098     libxl__ao *const ao = prs->aodev->ao;
2099     libxl_device_pci *const pci = &prs->pci;
2100 
2101     if (rc) goto out;
2102 
2103     libxl__ev_qmp_dispose(gc, qmp);
2104 
2105     asked_id = GCSPRINTF(PCI_PT_QDEV_ID,
2106                          pci->bus, pci->dev, pci->func);
2107 
2108     /* query-pci response:
2109      * [{ 'devices': [ 'qdev_id': 'str', ...  ], ... }]
2110      * */
2111 
2112     for (i = 0; (bus = libxl__json_array_get(response, i)); i++) {
2113         const libxl__json_object *devices = NULL;
2114         const libxl__json_object *device = NULL;
2115         const libxl__json_object *o = NULL;
2116         const char *id = NULL;
2117 
2118         devices = libxl__json_map_get("devices", bus, JSON_ARRAY);
2119         if (!devices) {
2120             rc = ERROR_QEMU_API;
2121             goto out;
2122         }
2123 
2124         for (j = 0; (device = libxl__json_array_get(devices, j)); j++) {
2125              o = libxl__json_map_get("qdev_id", device, JSON_STRING);
2126              if (!o) {
2127                  rc = ERROR_QEMU_API;
2128                  goto out;
2129              }
2130              id = libxl__json_object_get_string(o);
2131 
2132              if (id && !strcmp(asked_id, id)) {
2133                  /* Device still in QEMU, need to wait longuer. */
2134                  rc = libxl__ev_time_register_rel(ao, &prs->retry_timer,
2135                      pci_remove_qmp_retry_timer_cb, 1000);
2136                  if (rc) goto out;
2137                  return;
2138              }
2139         }
2140     }
2141 
2142 out:
2143     pci_remove_detached(egc, prs, rc); /* must be last */
2144 }
2145 
pci_remove_timeout(libxl__egc * egc,libxl__ev_time * ev,const struct timeval * requested_abs,int rc)2146 static void pci_remove_timeout(libxl__egc *egc, libxl__ev_time *ev,
2147                                const struct timeval *requested_abs,
2148                                int rc)
2149 {
2150     EGC_GC;
2151     pci_remove_state *prs = CONTAINER_OF(ev, *prs, timeout);
2152 
2153     /* Convenience aliases */
2154     libxl_device_pci *const pci = &prs->pci;
2155 
2156     LOGD(WARN, prs->domid, "timed out waiting for DM to remove "
2157          PCI_PT_QDEV_ID, pci->bus, pci->dev, pci->func);
2158 
2159     /* If we timed out, we might still want to keep destroying the device
2160      * (when force==true), so let the next function decide what to do on
2161      * error */
2162     pci_remove_detached(egc, prs, rc);
2163 }
2164 
pci_remove_detached(libxl__egc * egc,pci_remove_state * prs,int rc)2165 static void pci_remove_detached(libxl__egc *egc,
2166                                 pci_remove_state *prs,
2167                                 int rc)
2168 {
2169     STATE_AO_GC(prs->aodev->ao);
2170     libxl_ctx *ctx = libxl__gc_owner(gc);
2171     unsigned int start = 0, end = 0, flags = 0, size = 0;
2172     int  irq = 0, i, stubdomid = 0;
2173     const char *sysfs_path;
2174     FILE *f;
2175     uint32_t domainid = prs->domid;
2176     bool isstubdom;
2177 
2178     /* Convenience aliases */
2179     libxl_device_pci *const pci = &prs->pci;
2180     libxl_domid domid = prs->domid;
2181 
2182     /* Cleaning QMP states ASAP */
2183     libxl__ev_qmp_dispose(gc, &prs->qmp);
2184     libxl__ev_time_deregister(gc, &prs->timeout);
2185     libxl__ev_time_deregister(gc, &prs->retry_timer);
2186 
2187     if (rc && !prs->force)
2188         goto out;
2189 
2190     /* Revoke the permissions */
2191     sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/resource",
2192                            pci->domain, pci->bus, pci->dev, pci->func);
2193 
2194     f = fopen(sysfs_path, "r");
2195     if (f == NULL) {
2196         LOGED(ERROR, domid, "Couldn't open %s", sysfs_path);
2197         goto skip_bar;
2198     }
2199 
2200     for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) {
2201         if (fscanf(f, "0x%x 0x%x 0x%x\n", &start, &end, &flags) != 3)
2202             continue;
2203         size = end - start + 1;
2204         if (start) {
2205             if (flags & PCI_BAR_IO) {
2206                 rc = xc_domain_ioport_permission(ctx->xch, domid, start,
2207                                                  size, 0);
2208                 if (rc < 0)
2209                     LOGED(ERROR, domid,
2210                           "xc_domain_ioport_permission error 0x%x/0x%x",
2211                           start,
2212                           size);
2213             } else {
2214                 rc = xc_domain_iomem_permission(ctx->xch, domid,
2215                                                 start >> XC_PAGE_SHIFT,
2216                                                 (size + (XC_PAGE_SIZE - 1)) >> XC_PAGE_SHIFT,
2217                                                 0);
2218                 if (rc < 0)
2219                     LOGED(ERROR, domid,
2220                           "xc_domain_iomem_permission error 0x%x/0x%x",
2221                           start,
2222                           size);
2223             }
2224         }
2225     }
2226     fclose(f);
2227 
2228 skip_bar:
2229     if (!pci_supp_legacy_irq())
2230         goto skip_legacy_irq;
2231 
2232     sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/irq", pci->domain,
2233                            pci->bus, pci->dev, pci->func);
2234 
2235     f = fopen(sysfs_path, "r");
2236     if (f == NULL) {
2237         LOGED(ERROR, domid, "Couldn't open %s", sysfs_path);
2238         goto skip_legacy_irq;
2239     }
2240 
2241     if ((fscanf(f, "%u", &irq) == 1) && irq) {
2242         rc = xc_physdev_unmap_pirq(ctx->xch, domid, irq);
2243         if (rc < 0) {
2244             /*
2245              * QEMU may have already unmapped the IRQ. So the error
2246              * may be spurious. For now, still print an error message as
2247              * it is not easy to distinguished between valid and
2248              * spurious error.
2249              */
2250             LOGED(ERROR, domid, "xc_physdev_unmap_pirq irq=%d", irq);
2251         }
2252         rc = xc_domain_irq_permission(ctx->xch, domid, irq, 0);
2253         if (rc < 0) {
2254             LOGED(ERROR, domid, "xc_domain_irq_permission irq=%d", irq);
2255         }
2256     }
2257 
2258     fclose(f);
2259 
2260 skip_legacy_irq:
2261 
2262     isstubdom = libxl_is_stubdom(CTX, domid, &domainid);
2263 
2264     /* don't do multiple resets while some functions are still passed through */
2265     if ((pci->vdevfn & 0x7) == 0) {
2266         libxl__device_pci_reset(gc, pci->domain, pci->bus, pci->dev, pci->func);
2267     }
2268 
2269     if (!isstubdom) {
2270         rc = xc_deassign_device(CTX->xch, domid, pci_encode_bdf(pci));
2271         if (rc < 0 && (prs->hvm || errno != ENOSYS))
2272             LOGED(ERROR, domainid, "xc_deassign_device failed");
2273     }
2274 
2275     stubdomid = libxl_get_stubdom_id(CTX, domid);
2276     if (stubdomid != 0) {
2277         libxl_device_pci *pci_s;
2278         libxl__ao_device *const stubdom_aodev = &prs->stubdom_aodev;
2279 
2280         GCNEW(pci_s);
2281         libxl_device_pci_init(pci_s);
2282         libxl_device_pci_copy(CTX, pci_s, pci);
2283 
2284         libxl__prepare_ao_device(ao, stubdom_aodev);
2285         stubdom_aodev->action = LIBXL__DEVICE_ACTION_REMOVE;
2286         stubdom_aodev->callback = pci_remove_stubdom_done;
2287         stubdom_aodev->update_json = prs->aodev->update_json;
2288         libxl__device_pci_remove_common(egc, stubdomid, pci_s,
2289                                         prs->force, stubdom_aodev);
2290         return;
2291     }
2292 
2293     rc = 0;
2294 out:
2295     pci_remove_done(egc, prs, rc);
2296 }
2297 
pci_remove_stubdom_done(libxl__egc * egc,libxl__ao_device * aodev)2298 static void pci_remove_stubdom_done(libxl__egc *egc,
2299                                     libxl__ao_device *aodev)
2300 {
2301     pci_remove_state *prs = CONTAINER_OF(aodev, *prs, stubdom_aodev);
2302 
2303     pci_remove_done(egc, prs, 0);
2304 }
2305 
pci_remove_done(libxl__egc * egc,pci_remove_state * prs,int rc)2306 static void pci_remove_done(libxl__egc *egc,
2307                             pci_remove_state *prs,
2308                             int rc)
2309 {
2310     EGC_GC;
2311 
2312     if (rc) goto out;
2313 
2314     libxl__device_pci_remove_xenstore(gc, prs->domid, &prs->pci);
2315 out:
2316     device_pci_remove_common_next(egc, prs, rc);
2317 }
2318 
libxl__device_pci_remove_common(libxl__egc * egc,uint32_t domid,libxl_device_pci * pci,bool force,libxl__ao_device * aodev)2319 static void libxl__device_pci_remove_common(libxl__egc *egc,
2320                                             uint32_t domid,
2321                                             libxl_device_pci *pci,
2322                                             bool force,
2323                                             libxl__ao_device *aodev)
2324 {
2325     STATE_AO_GC(aodev->ao);
2326     int rc;
2327     pci_remove_state *prs;
2328 
2329     GCNEW(prs);
2330     prs->aodev = aodev;
2331     prs->domid = domid;
2332 
2333     libxl_device_pci_copy(CTX, &prs->pci, pci);
2334     pci = &prs->pci;
2335 
2336     /* If the device is named then we need to look up the BDF */
2337     if (pci->name) {
2338         rc = name2bdf(gc, pci);
2339         if (rc) goto out;
2340     }
2341 
2342     prs->force = force;
2343     libxl__xswait_init(&prs->xswait);
2344     libxl__ev_qmp_init(&prs->qmp);
2345     prs->qmp.ao = prs->aodev->ao;
2346     prs->qmp.domid = prs->domid;
2347     prs->qmp.payload_fd = -1;
2348     libxl__ev_time_init(&prs->timeout);
2349     libxl__ev_time_init(&prs->retry_timer);
2350 
2351     prs->orig_vdev = pci->vdevfn & ~7U;
2352 
2353     if ( pci->vfunc_mask == LIBXL_PCI_FUNC_ALL ) {
2354         if ( pci_multifunction_check(gc, pci, &prs->pfunc_mask) ) {
2355             rc = ERROR_FAIL;
2356             goto out;
2357         }
2358         pci->vfunc_mask &= prs->pfunc_mask;
2359     } else {
2360         prs->pfunc_mask = (1 << pci->func);
2361     }
2362 
2363     rc = 0;
2364     prs->next_func = 7;
2365 out:
2366     device_pci_remove_common_next(egc, prs, rc);
2367 }
2368 
device_pci_remove_common_next(libxl__egc * egc,pci_remove_state * prs,int rc)2369 static void device_pci_remove_common_next(libxl__egc *egc,
2370                                           pci_remove_state *prs,
2371                                           int rc)
2372 {
2373     EGC_GC;
2374 
2375     /* Convenience aliases */
2376     libxl_device_pci *const pci = &prs->pci;
2377     libxl__ao_device *const aodev = prs->aodev;
2378     const unsigned int pfunc_mask = prs->pfunc_mask;
2379     const unsigned int orig_vdev = prs->orig_vdev;
2380 
2381     if (rc) goto out;
2382 
2383     while (prs->next_func >= 0) {
2384         const int i = prs->next_func;
2385         prs->next_func--;
2386         if ( (1 << i) & pfunc_mask ) {
2387             if ( pci->vfunc_mask == pfunc_mask ) {
2388                 pci->func = i;
2389                 pci->vdevfn = orig_vdev | i;
2390             } else {
2391                 pci->vdevfn = orig_vdev;
2392             }
2393             do_pci_remove(egc, prs);
2394             return;
2395         }
2396     }
2397 
2398     rc = 0;
2399 out:
2400     libxl__ev_qmp_dispose(gc, &prs->qmp);
2401     libxl__xswait_stop(gc, &prs->xswait);
2402     libxl__ev_time_deregister(gc, &prs->timeout);
2403     libxl__ev_time_deregister(gc, &prs->retry_timer);
2404 
2405     if (!rc) pci_info_xs_remove(gc, pci, "domid");
2406 
2407     libxl_device_pci_dispose(pci);
2408     aodev->rc = rc;
2409     aodev->callback(egc, aodev);
2410 }
2411 
libxl_device_pci_remove(libxl_ctx * ctx,uint32_t domid,libxl_device_pci * pci,const libxl_asyncop_how * ao_how)2412 int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid,
2413                             libxl_device_pci *pci,
2414                             const libxl_asyncop_how *ao_how)
2415 
2416 {
2417     AO_CREATE(ctx, domid, ao_how);
2418     libxl__ao_device *aodev;
2419 
2420     GCNEW(aodev);
2421     libxl__prepare_ao_device(ao, aodev);
2422     aodev->action = LIBXL__DEVICE_ACTION_REMOVE;
2423     aodev->callback = device_addrm_aocomplete;
2424     aodev->update_json = true;
2425     libxl__device_pci_remove_common(egc, domid, pci, false, aodev);
2426     return AO_INPROGRESS;
2427 }
2428 
libxl_device_pci_destroy(libxl_ctx * ctx,uint32_t domid,libxl_device_pci * pci,const libxl_asyncop_how * ao_how)2429 int libxl_device_pci_destroy(libxl_ctx *ctx, uint32_t domid,
2430                              libxl_device_pci *pci,
2431                              const libxl_asyncop_how *ao_how)
2432 {
2433     AO_CREATE(ctx, domid, ao_how);
2434     libxl__ao_device *aodev;
2435 
2436     GCNEW(aodev);
2437     libxl__prepare_ao_device(ao, aodev);
2438     aodev->action = LIBXL__DEVICE_ACTION_REMOVE;
2439     aodev->callback = device_addrm_aocomplete;
2440     aodev->update_json = true;
2441     libxl__device_pci_remove_common(egc, domid, pci, true, aodev);
2442     return AO_INPROGRESS;
2443 }
2444 
libxl__device_pci_from_xs_be(libxl__gc * gc,const char * be_path,libxl_devid nr,void * data)2445 static int libxl__device_pci_from_xs_be(libxl__gc *gc,
2446                                         const char *be_path,
2447                                         libxl_devid nr, void *data)
2448 {
2449     char *s;
2450     unsigned int domain = 0, bus = 0, dev = 0, func = 0;
2451     libxl_device_pci *pci = data;
2452 
2453     libxl_device_pci_init(pci);
2454 
2455     s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/dev-%d", be_path, nr));
2456     sscanf(s, PCI_BDF, &domain, &bus, &dev, &func);
2457 
2458     pci_struct_fill(pci, domain, bus, dev, func);
2459 
2460     s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/vdevfn-%d", be_path, nr));
2461     if (s)
2462         pci->vdevfn = strtol(s, (char **) NULL, 16);
2463 
2464     s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/name-%d", be_path, nr));
2465     if (s)
2466         pci->name = strdup(s);
2467 
2468     s = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/opts-%d", be_path, nr));
2469     if (s) {
2470         char *saveptr;
2471         char *p = strtok_r(s, ",=", &saveptr);
2472         do {
2473             while (*p == ' ')
2474                 p++;
2475             if (!strcmp(p, "msitranslate")) {
2476                 p = strtok_r(NULL, ",=", &saveptr);
2477                 pci->msitranslate = atoi(p);
2478             } else if (!strcmp(p, "power_mgmt")) {
2479                 p = strtok_r(NULL, ",=", &saveptr);
2480                 pci->power_mgmt = atoi(p);
2481             } else if (!strcmp(p, "permissive")) {
2482                 p = strtok_r(NULL, ",=", &saveptr);
2483                 pci->permissive = atoi(p);
2484             } else if (!strcmp(p, "rdm_policy")) {
2485                 p = strtok_r(NULL, ",=", &saveptr);
2486                 libxl_rdm_reserve_policy_from_string(p, &pci->rdm_policy);
2487             }
2488         } while ((p = strtok_r(NULL, ",=", &saveptr)) != NULL);
2489     }
2490 
2491     return 0;
2492 }
2493 
libxl__device_pci_get_num(libxl__gc * gc,const char * be_path,unsigned int * num)2494 static int libxl__device_pci_get_num(libxl__gc *gc, const char *be_path,
2495                                      unsigned int *num)
2496 {
2497     char *num_devs;
2498     int rc = 0;
2499 
2500     num_devs = libxl__xs_read(gc, XBT_NULL, GCSPRINTF("%s/num_devs", be_path));
2501     if (!num_devs)
2502         rc = ERROR_FAIL;
2503     else
2504         *num = atoi(num_devs);
2505 
2506     return rc;
2507 }
2508 
libxl__device_pci_get_path(libxl__gc * gc,uint32_t domid,char ** path)2509 static int libxl__device_pci_get_path(libxl__gc *gc, uint32_t domid,
2510                                       char **path)
2511 {
2512     *path = libxl__domain_device_backend_path(gc, 0, domid, 0,
2513                                               LIBXL__DEVICE_KIND_PCI);
2514 
2515     return 0;
2516 }
2517 
libxl__device_pci_destroy_all(libxl__egc * egc,uint32_t domid,libxl__multidev * multidev)2518 void libxl__device_pci_destroy_all(libxl__egc *egc, uint32_t domid,
2519                                    libxl__multidev *multidev)
2520 {
2521     STATE_AO_GC(multidev->ao);
2522     libxl_device_pci *pcis;
2523     int num, i;
2524 
2525     pcis = libxl_device_pci_list(CTX, domid, &num);
2526     if ( pcis == NULL )
2527         return;
2528 
2529     for (i = 0; i < num; i++) {
2530         /* Force remove on shutdown since, on HVM, qemu will not always
2531          * respond to SCI interrupt because the guest kernel has shut down the
2532          * devices by the time we even get here!
2533          */
2534         libxl__ao_device *aodev = libxl__multidev_prepare(multidev);
2535         libxl__device_pci_remove_common(egc, domid, pcis + i, true,
2536                                         aodev);
2537     }
2538 
2539     libxl_device_pci_list_free(pcis, num);
2540 }
2541 
libxl__grant_vga_iomem_permission(libxl__gc * gc,const uint32_t domid,libxl_domain_config * const d_config)2542 int libxl__grant_vga_iomem_permission(libxl__gc *gc, const uint32_t domid,
2543                                       libxl_domain_config *const d_config)
2544 {
2545     int i, ret;
2546 
2547     if (!libxl_defbool_val(d_config->b_info.u.hvm.gfx_passthru))
2548         return 0;
2549 
2550     for (i = 0 ; i < d_config->num_pcidevs ; i++) {
2551         uint64_t vga_iomem_start = 0xa0000 >> XC_PAGE_SHIFT;
2552         uint32_t stubdom_domid;
2553         libxl_device_pci *pci = &d_config->pcidevs[i];
2554         unsigned long pci_device_class;
2555 
2556         if (sysfs_dev_get_class(gc, pci, &pci_device_class))
2557             continue;
2558         if (pci_device_class != 0x030000) /* VGA class */
2559             continue;
2560 
2561         stubdom_domid = libxl_get_stubdom_id(CTX, domid);
2562         ret = xc_domain_iomem_permission(CTX->xch, stubdom_domid,
2563                                          vga_iomem_start, 0x20, 1);
2564         if (ret < 0) {
2565             LOGED(ERROR, domid,
2566                   "failed to give stubdom%d access to iomem range "
2567                   "%"PRIx64"-%"PRIx64" for VGA passthru",
2568                   stubdom_domid,
2569                   vga_iomem_start, (vga_iomem_start + 0x20 - 1));
2570             return ret;
2571         }
2572         ret = xc_domain_iomem_permission(CTX->xch, domid,
2573                                          vga_iomem_start, 0x20, 1);
2574         if (ret < 0) {
2575             LOGED(ERROR, domid,
2576                   "failed to give dom%d access to iomem range "
2577                   "%"PRIx64"-%"PRIx64" for VGA passthru",
2578                   domid, vga_iomem_start, (vga_iomem_start + 0x20 - 1));
2579             return ret;
2580         }
2581         break;
2582     }
2583 
2584     return 0;
2585 }
2586 
libxl_device_pci_compare(const libxl_device_pci * d1,const libxl_device_pci * d2)2587 static int libxl_device_pci_compare(const libxl_device_pci *d1,
2588                                     const libxl_device_pci *d2)
2589 {
2590     return COMPARE_PCI(d1, d2);
2591 }
2592 
2593 LIBXL_DEFINE_DEVICE_LIST(pci)
2594 
2595 #define libxl__device_pci_update_devid NULL
2596 
2597 DEFINE_DEVICE_TYPE_STRUCT(pci, PCI, pcidevs,
2598     .get_num = libxl__device_pci_get_num,
2599     .get_path = libxl__device_pci_get_path,
2600     .from_xenstore = libxl__device_pci_from_xs_be,
2601 );
2602 
2603 /*
2604  * Local variables:
2605  * mode: C
2606  * c-basic-offset: 4
2607  * indent-tabs-mode: nil
2608  * End:
2609  */
2610