1 /*
2  * Copyright (C) 2008,  Netronome Systems, Inc.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; If not, see <http://www.gnu.org/licenses/>.
15  */
16 
17 #include <xen/sched.h>
18 #include <xen/pci.h>
19 #include <xen/pci_regs.h>
20 #include <xen/list.h>
21 #include <xen/prefetch.h>
22 #include <xen/iommu.h>
23 #include <xen/irq.h>
24 #include <xen/vm_event.h>
25 #include <asm/hvm/irq.h>
26 #include <xen/delay.h>
27 #include <xen/keyhandler.h>
28 #include <xen/event.h>
29 #include <xen/guest_access.h>
30 #include <xen/paging.h>
31 #include <xen/radix-tree.h>
32 #include <xen/softirq.h>
33 #include <xen/tasklet.h>
34 #include <xsm/xsm.h>
35 #include <asm/msi.h>
36 #include "ats.h"
37 
38 struct pci_seg {
39     struct list_head alldevs_list;
40     u16 nr;
41     unsigned long *ro_map;
42     /* bus2bridge_lock protects bus2bridge array */
43     spinlock_t bus2bridge_lock;
44 #define MAX_BUSES 256
45     struct {
46         u8 map;
47         u8 bus;
48         u8 devfn;
49     } bus2bridge[MAX_BUSES];
50 };
51 
52 static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED;
53 
pcidevs_lock(void)54 void pcidevs_lock(void)
55 {
56     spin_lock_recursive(&_pcidevs_lock);
57 }
58 
pcidevs_unlock(void)59 void pcidevs_unlock(void)
60 {
61     spin_unlock_recursive(&_pcidevs_lock);
62 }
63 
pcidevs_locked(void)64 bool_t pcidevs_locked(void)
65 {
66     return !!spin_is_locked(&_pcidevs_lock);
67 }
68 
pcidevs_trylock(void)69 bool_t pcidevs_trylock(void)
70 {
71     return !!spin_trylock_recursive(&_pcidevs_lock);
72 }
73 
74 static struct radix_tree_root pci_segments;
75 
get_pseg(u16 seg)76 static inline struct pci_seg *get_pseg(u16 seg)
77 {
78     return radix_tree_lookup(&pci_segments, seg);
79 }
80 
pci_known_segment(u16 seg)81 bool_t pci_known_segment(u16 seg)
82 {
83     return get_pseg(seg) != NULL;
84 }
85 
alloc_pseg(u16 seg)86 static struct pci_seg *alloc_pseg(u16 seg)
87 {
88     struct pci_seg *pseg = get_pseg(seg);
89 
90     if ( pseg )
91         return pseg;
92 
93     pseg = xzalloc(struct pci_seg);
94     if ( !pseg )
95         return NULL;
96 
97     pseg->nr = seg;
98     INIT_LIST_HEAD(&pseg->alldevs_list);
99     spin_lock_init(&pseg->bus2bridge_lock);
100 
101     if ( radix_tree_insert(&pci_segments, seg, pseg) )
102     {
103         xfree(pseg);
104         pseg = NULL;
105     }
106 
107     return pseg;
108 }
109 
pci_segments_iterate(int (* handler)(struct pci_seg *,void *),void * arg)110 static int pci_segments_iterate(
111     int (*handler)(struct pci_seg *, void *), void *arg)
112 {
113     u16 seg = 0;
114     int rc = 0;
115 
116     do {
117         struct pci_seg *pseg;
118 
119         if ( !radix_tree_gang_lookup(&pci_segments, (void **)&pseg, seg, 1) )
120             break;
121         rc = handler(pseg, arg);
122         seg = pseg->nr + 1;
123     } while (!rc && seg);
124 
125     return rc;
126 }
127 
pt_pci_init(void)128 void __init pt_pci_init(void)
129 {
130     radix_tree_init(&pci_segments);
131     if ( !alloc_pseg(0) )
132         panic("Could not initialize PCI segment 0");
133 }
134 
pci_add_segment(u16 seg)135 int __init pci_add_segment(u16 seg)
136 {
137     return alloc_pseg(seg) ? 0 : -ENOMEM;
138 }
139 
pci_get_ro_map(u16 seg)140 const unsigned long *pci_get_ro_map(u16 seg)
141 {
142     struct pci_seg *pseg = get_pseg(seg);
143 
144     return pseg ? pseg->ro_map : NULL;
145 }
146 
147 static struct phantom_dev {
148     u16 seg;
149     u8 bus, slot, stride;
150 } phantom_devs[8];
151 static unsigned int nr_phantom_devs;
152 
parse_phantom_dev(const char * str)153 static int __init parse_phantom_dev(const char *str)
154 {
155     const char *s;
156     unsigned int seg, bus, slot;
157     struct phantom_dev phantom;
158 
159     if ( !*str )
160         return -EINVAL;
161     if ( nr_phantom_devs >= ARRAY_SIZE(phantom_devs) )
162         return -E2BIG;
163 
164     s = parse_pci(str, &seg, &bus, &slot, NULL);
165     if ( !s || *s != ',' )
166         return -EINVAL;
167 
168     phantom.seg = seg;
169     phantom.bus = bus;
170     phantom.slot = slot;
171 
172     switch ( phantom.stride = simple_strtol(s + 1, &s, 0) )
173     {
174     case 1: case 2: case 4:
175         if ( *s )
176     default:
177             return -EINVAL;
178     }
179 
180     phantom_devs[nr_phantom_devs++] = phantom;
181 
182     return 0;
183 }
184 custom_param("pci-phantom", parse_phantom_dev);
185 
186 static u16 __read_mostly command_mask;
187 static u16 __read_mostly bridge_ctl_mask;
188 
189 /*
190  * The 'pci' parameter controls certain PCI device aspects.
191  * Optional comma separated value may contain:
192  *
193  *   serr                       don't suppress system errors (default)
194  *   no-serr                    suppress system errors
195  *   perr                       don't suppress parity errors (default)
196  *   no-perr                    suppress parity errors
197  */
parse_pci_param(const char * s)198 static int __init parse_pci_param(const char *s)
199 {
200     const char *ss;
201     int rc = 0;
202 
203     do {
204         bool_t on = !!strncmp(s, "no-", 3);
205         u16 cmd_mask = 0, brctl_mask = 0;
206 
207         if ( !on )
208             s += 3;
209 
210         ss = strchr(s, ',');
211         if ( !ss )
212             ss = strchr(s, '\0');
213 
214         if ( !strncmp(s, "serr", ss - s) )
215         {
216             cmd_mask = PCI_COMMAND_SERR;
217             brctl_mask = PCI_BRIDGE_CTL_SERR | PCI_BRIDGE_CTL_DTMR_SERR;
218         }
219         else if ( !strncmp(s, "perr", ss - s) )
220         {
221             cmd_mask = PCI_COMMAND_PARITY;
222             brctl_mask = PCI_BRIDGE_CTL_PARITY;
223         }
224         else
225             rc = -EINVAL;
226 
227         if ( on )
228         {
229             command_mask &= ~cmd_mask;
230             bridge_ctl_mask &= ~brctl_mask;
231         }
232         else
233         {
234             command_mask |= cmd_mask;
235             bridge_ctl_mask |= brctl_mask;
236         }
237 
238         s = ss + 1;
239     } while ( *ss );
240 
241     return rc;
242 }
243 custom_param("pci", parse_pci_param);
244 
check_pdev(const struct pci_dev * pdev)245 static void check_pdev(const struct pci_dev *pdev)
246 {
247 #define PCI_STATUS_CHECK \
248     (PCI_STATUS_PARITY | PCI_STATUS_SIG_TARGET_ABORT | \
249      PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_REC_MASTER_ABORT | \
250      PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_DETECTED_PARITY)
251     u16 seg = pdev->seg;
252     u8 bus = pdev->bus;
253     u8 dev = PCI_SLOT(pdev->devfn);
254     u8 func = PCI_FUNC(pdev->devfn);
255     u16 val;
256 
257     if ( command_mask )
258     {
259         val = pci_conf_read16(seg, bus, dev, func, PCI_COMMAND);
260         if ( val & command_mask )
261             pci_conf_write16(seg, bus, dev, func, PCI_COMMAND,
262                              val & ~command_mask);
263         val = pci_conf_read16(seg, bus, dev, func, PCI_STATUS);
264         if ( val & PCI_STATUS_CHECK )
265         {
266             printk(XENLOG_INFO "%04x:%02x:%02x.%u status %04x -> %04x\n",
267                    seg, bus, dev, func, val, val & ~PCI_STATUS_CHECK);
268             pci_conf_write16(seg, bus, dev, func, PCI_STATUS,
269                              val & PCI_STATUS_CHECK);
270         }
271     }
272 
273     switch ( pci_conf_read8(seg, bus, dev, func, PCI_HEADER_TYPE) & 0x7f )
274     {
275     case PCI_HEADER_TYPE_BRIDGE:
276         if ( !bridge_ctl_mask )
277             break;
278         val = pci_conf_read16(seg, bus, dev, func, PCI_BRIDGE_CONTROL);
279         if ( val & bridge_ctl_mask )
280             pci_conf_write16(seg, bus, dev, func, PCI_BRIDGE_CONTROL,
281                              val & ~bridge_ctl_mask);
282         val = pci_conf_read16(seg, bus, dev, func, PCI_SEC_STATUS);
283         if ( val & PCI_STATUS_CHECK )
284         {
285             printk(XENLOG_INFO
286                    "%04x:%02x:%02x.%u secondary status %04x -> %04x\n",
287                    seg, bus, dev, func, val, val & ~PCI_STATUS_CHECK);
288             pci_conf_write16(seg, bus, dev, func, PCI_SEC_STATUS,
289                              val & PCI_STATUS_CHECK);
290         }
291         break;
292 
293     case PCI_HEADER_TYPE_CARDBUS:
294         /* TODO */
295         break;
296     }
297 #undef PCI_STATUS_CHECK
298 }
299 
alloc_pdev(struct pci_seg * pseg,u8 bus,u8 devfn)300 static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
301 {
302     struct pci_dev *pdev;
303 
304     list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
305         if ( pdev->bus == bus && pdev->devfn == devfn )
306             return pdev;
307 
308     pdev = xzalloc(struct pci_dev);
309     if ( !pdev )
310         return NULL;
311 
312     *(u16*) &pdev->seg = pseg->nr;
313     *((u8*) &pdev->bus) = bus;
314     *((u8*) &pdev->devfn) = devfn;
315     pdev->domain = NULL;
316     INIT_LIST_HEAD(&pdev->msi_list);
317 
318     if ( pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
319                              PCI_CAP_ID_MSIX) )
320     {
321         struct arch_msix *msix = xzalloc(struct arch_msix);
322 
323         if ( !msix )
324         {
325             xfree(pdev);
326             return NULL;
327         }
328         spin_lock_init(&msix->table_lock);
329         pdev->msix = msix;
330     }
331 
332     list_add(&pdev->alldevs_list, &pseg->alldevs_list);
333 
334     /* update bus2bridge */
335     switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) )
336     {
337         int pos;
338         u16 cap;
339         u8 sec_bus, sub_bus;
340 
341         case DEV_TYPE_PCIe2PCI_BRIDGE:
342         case DEV_TYPE_LEGACY_PCI_BRIDGE:
343             sec_bus = pci_conf_read8(pseg->nr, bus, PCI_SLOT(devfn),
344                                      PCI_FUNC(devfn), PCI_SECONDARY_BUS);
345             sub_bus = pci_conf_read8(pseg->nr, bus, PCI_SLOT(devfn),
346                                      PCI_FUNC(devfn), PCI_SUBORDINATE_BUS);
347 
348             spin_lock(&pseg->bus2bridge_lock);
349             for ( ; sec_bus <= sub_bus; sec_bus++ )
350             {
351                 pseg->bus2bridge[sec_bus].map = 1;
352                 pseg->bus2bridge[sec_bus].bus = bus;
353                 pseg->bus2bridge[sec_bus].devfn = devfn;
354             }
355             spin_unlock(&pseg->bus2bridge_lock);
356             break;
357 
358         case DEV_TYPE_PCIe_ENDPOINT:
359             pos = pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn),
360                                       PCI_FUNC(devfn), PCI_CAP_ID_EXP);
361             BUG_ON(!pos);
362             cap = pci_conf_read16(pseg->nr, bus, PCI_SLOT(devfn),
363                                   PCI_FUNC(devfn), pos + PCI_EXP_DEVCAP);
364             if ( cap & PCI_EXP_DEVCAP_PHANTOM )
365             {
366                 pdev->phantom_stride = 8 >> MASK_EXTR(cap,
367                                                       PCI_EXP_DEVCAP_PHANTOM);
368                 if ( PCI_FUNC(devfn) >= pdev->phantom_stride )
369                     pdev->phantom_stride = 0;
370             }
371             else
372             {
373                 unsigned int i;
374 
375                 for ( i = 0; i < nr_phantom_devs; ++i )
376                     if ( phantom_devs[i].seg == pseg->nr &&
377                          phantom_devs[i].bus == bus &&
378                          phantom_devs[i].slot == PCI_SLOT(devfn) &&
379                          phantom_devs[i].stride > PCI_FUNC(devfn) )
380                     {
381                         pdev->phantom_stride = phantom_devs[i].stride;
382                         break;
383                     }
384             }
385             break;
386 
387         case DEV_TYPE_PCI:
388         case DEV_TYPE_PCIe_BRIDGE:
389         case DEV_TYPE_PCI_HOST_BRIDGE:
390             break;
391 
392         default:
393             printk(XENLOG_WARNING "%04x:%02x:%02x.%u: unknown type %d\n",
394                    pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pdev->type);
395             break;
396     }
397 
398     check_pdev(pdev);
399 
400     return pdev;
401 }
402 
free_pdev(struct pci_seg * pseg,struct pci_dev * pdev)403 static void free_pdev(struct pci_seg *pseg, struct pci_dev *pdev)
404 {
405     /* update bus2bridge */
406     switch ( pdev->type )
407     {
408         u8 dev, func, sec_bus, sub_bus;
409 
410         case DEV_TYPE_PCIe2PCI_BRIDGE:
411         case DEV_TYPE_LEGACY_PCI_BRIDGE:
412             dev = PCI_SLOT(pdev->devfn);
413             func = PCI_FUNC(pdev->devfn);
414             sec_bus = pci_conf_read8(pseg->nr, pdev->bus, dev, func,
415                                      PCI_SECONDARY_BUS);
416             sub_bus = pci_conf_read8(pseg->nr, pdev->bus, dev, func,
417                                      PCI_SUBORDINATE_BUS);
418 
419             spin_lock(&pseg->bus2bridge_lock);
420             for ( ; sec_bus <= sub_bus; sec_bus++ )
421                 pseg->bus2bridge[sec_bus] = pseg->bus2bridge[pdev->bus];
422             spin_unlock(&pseg->bus2bridge_lock);
423             break;
424 
425         default:
426             break;
427     }
428 
429     list_del(&pdev->alldevs_list);
430     xfree(pdev->msix);
431     xfree(pdev);
432 }
433 
_pci_hide_device(struct pci_dev * pdev)434 static void _pci_hide_device(struct pci_dev *pdev)
435 {
436     if ( pdev->domain )
437         return;
438     pdev->domain = dom_xen;
439     list_add(&pdev->domain_list, &dom_xen->arch.pdev_list);
440 }
441 
pci_hide_device(int bus,int devfn)442 int __init pci_hide_device(int bus, int devfn)
443 {
444     struct pci_dev *pdev;
445     int rc = -ENOMEM;
446 
447     pcidevs_lock();
448     pdev = alloc_pdev(get_pseg(0), bus, devfn);
449     if ( pdev )
450     {
451         _pci_hide_device(pdev);
452         rc = 0;
453     }
454     pcidevs_unlock();
455 
456     return rc;
457 }
458 
pci_ro_device(int seg,int bus,int devfn)459 int __init pci_ro_device(int seg, int bus, int devfn)
460 {
461     struct pci_seg *pseg = alloc_pseg(seg);
462     struct pci_dev *pdev;
463 
464     if ( !pseg )
465         return -ENOMEM;
466     pdev = alloc_pdev(pseg, bus, devfn);
467     if ( !pdev )
468         return -ENOMEM;
469 
470     if ( !pseg->ro_map )
471     {
472         size_t sz = BITS_TO_LONGS(PCI_BDF(-1, -1, -1) + 1) * sizeof(long);
473 
474         pseg->ro_map = alloc_xenheap_pages(get_order_from_bytes(sz), 0);
475         if ( !pseg->ro_map )
476             return -ENOMEM;
477         memset(pseg->ro_map, 0, sz);
478     }
479 
480     __set_bit(PCI_BDF2(bus, devfn), pseg->ro_map);
481     _pci_hide_device(pdev);
482 
483     return 0;
484 }
485 
pci_get_pdev(int seg,int bus,int devfn)486 struct pci_dev *pci_get_pdev(int seg, int bus, int devfn)
487 {
488     struct pci_seg *pseg = get_pseg(seg);
489     struct pci_dev *pdev = NULL;
490 
491     ASSERT(pcidevs_locked());
492     ASSERT(seg != -1 || bus == -1);
493     ASSERT(bus != -1 || devfn == -1);
494 
495     if ( !pseg )
496     {
497         if ( seg == -1 )
498             radix_tree_gang_lookup(&pci_segments, (void **)&pseg, 0, 1);
499         if ( !pseg )
500             return NULL;
501     }
502 
503     do {
504         list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
505             if ( (pdev->bus == bus || bus == -1) &&
506                  (pdev->devfn == devfn || devfn == -1) )
507                 return pdev;
508     } while ( radix_tree_gang_lookup(&pci_segments, (void **)&pseg,
509                                      pseg->nr + 1, 1) );
510 
511     return NULL;
512 }
513 
pci_get_real_pdev(int seg,int bus,int devfn)514 struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn)
515 {
516     struct pci_dev *pdev;
517     int stride;
518 
519     if ( seg < 0 || bus < 0 || devfn < 0 )
520         return NULL;
521 
522     for ( pdev = pci_get_pdev(seg, bus, devfn), stride = 4;
523           !pdev && stride; stride >>= 1 )
524     {
525         if ( !(devfn & (8 - stride)) )
526             continue;
527         pdev = pci_get_pdev(seg, bus, devfn & ~(8 - stride));
528         if ( pdev && stride != pdev->phantom_stride )
529             pdev = NULL;
530     }
531 
532     return pdev;
533 }
534 
pci_get_pdev_by_domain(const struct domain * d,int seg,int bus,int devfn)535 struct pci_dev *pci_get_pdev_by_domain(const struct domain *d, int seg,
536                                        int bus, int devfn)
537 {
538     struct pci_seg *pseg = get_pseg(seg);
539     struct pci_dev *pdev = NULL;
540 
541     ASSERT(seg != -1 || bus == -1);
542     ASSERT(bus != -1 || devfn == -1);
543 
544     if ( !pseg )
545     {
546         if ( seg == -1 )
547             radix_tree_gang_lookup(&pci_segments, (void **)&pseg, 0, 1);
548         if ( !pseg )
549             return NULL;
550     }
551 
552     do {
553         list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
554             if ( (pdev->bus == bus || bus == -1) &&
555                  (pdev->devfn == devfn || devfn == -1) &&
556                  (pdev->domain == d) )
557                 return pdev;
558     } while ( radix_tree_gang_lookup(&pci_segments, (void **)&pseg,
559                                      pseg->nr + 1, 1) );
560 
561     return NULL;
562 }
563 
564 /**
565  * pci_enable_acs - enable ACS if hardware support it
566  * @dev: the PCI device
567  */
pci_enable_acs(struct pci_dev * pdev)568 static void pci_enable_acs(struct pci_dev *pdev)
569 {
570     int pos;
571     u16 cap, ctrl, seg = pdev->seg;
572     u8 bus = pdev->bus;
573     u8 dev = PCI_SLOT(pdev->devfn);
574     u8 func = PCI_FUNC(pdev->devfn);
575 
576     if ( !iommu_enabled )
577         return;
578 
579     pos = pci_find_ext_capability(seg, bus, pdev->devfn, PCI_EXT_CAP_ID_ACS);
580     if (!pos)
581         return;
582 
583     cap = pci_conf_read16(seg, bus, dev, func, pos + PCI_ACS_CAP);
584     ctrl = pci_conf_read16(seg, bus, dev, func, pos + PCI_ACS_CTRL);
585 
586     /* Source Validation */
587     ctrl |= (cap & PCI_ACS_SV);
588 
589     /* P2P Request Redirect */
590     ctrl |= (cap & PCI_ACS_RR);
591 
592     /* P2P Completion Redirect */
593     ctrl |= (cap & PCI_ACS_CR);
594 
595     /* Upstream Forwarding */
596     ctrl |= (cap & PCI_ACS_UF);
597 
598     pci_conf_write16(seg, bus, dev, func, pos + PCI_ACS_CTRL, ctrl);
599 }
600 
601 static int iommu_add_device(struct pci_dev *pdev);
602 static int iommu_enable_device(struct pci_dev *pdev);
603 static int iommu_remove_device(struct pci_dev *pdev);
604 
pci_add_device(u16 seg,u8 bus,u8 devfn,const struct pci_dev_info * info,nodeid_t node)605 int pci_add_device(u16 seg, u8 bus, u8 devfn,
606                    const struct pci_dev_info *info, nodeid_t node)
607 {
608     struct pci_seg *pseg;
609     struct pci_dev *pdev;
610     unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
611     const char *pdev_type;
612     int ret;
613     bool pf_is_extfn = false;
614 
615     if ( !info )
616         pdev_type = "device";
617     else if ( info->is_virtfn )
618     {
619         pcidevs_lock();
620         pdev = pci_get_pdev(seg, info->physfn.bus, info->physfn.devfn);
621         if ( pdev )
622             pf_is_extfn = pdev->info.is_extfn;
623         pcidevs_unlock();
624         if ( !pdev )
625             pci_add_device(seg, info->physfn.bus, info->physfn.devfn,
626                            NULL, node);
627         pdev_type = "virtual function";
628     }
629     else if ( info->is_extfn )
630         pdev_type = "extended function";
631     else
632     {
633         info = NULL;
634         pdev_type = "device";
635     }
636 
637     ret = xsm_resource_plug_pci(XSM_PRIV, (seg << 16) | (bus << 8) | devfn);
638     if ( ret )
639         return ret;
640 
641     ret = -ENOMEM;
642 
643     pcidevs_lock();
644     pseg = alloc_pseg(seg);
645     if ( !pseg )
646         goto out;
647     pdev = alloc_pdev(pseg, bus, devfn);
648     if ( !pdev )
649         goto out;
650 
651     pdev->node = node;
652 
653     if ( info )
654     {
655         pdev->info = *info;
656         /*
657          * VF's 'is_extfn' field is used to indicate whether its PF is an
658          * extended function.
659          */
660         if ( pdev->info.is_virtfn )
661             pdev->info.is_extfn = pf_is_extfn;
662     }
663     else if ( !pdev->vf_rlen[0] )
664     {
665         unsigned int pos = pci_find_ext_capability(seg, bus, devfn,
666                                                    PCI_EXT_CAP_ID_SRIOV);
667         u16 ctrl = pci_conf_read16(seg, bus, slot, func, pos + PCI_SRIOV_CTRL);
668 
669         if ( !pos )
670             /* Nothing */;
671         else if ( !(ctrl & (PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE)) )
672         {
673             unsigned int i;
674 
675             BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
676             for ( i = 0; i < PCI_SRIOV_NUM_BARS; ++i )
677             {
678                 unsigned int idx = pos + PCI_SRIOV_BAR + i * 4;
679                 u32 bar = pci_conf_read32(seg, bus, slot, func, idx);
680                 u32 hi = 0;
681 
682                 if ( (bar & PCI_BASE_ADDRESS_SPACE) ==
683                      PCI_BASE_ADDRESS_SPACE_IO )
684                 {
685                     printk(XENLOG_WARNING
686                            "SR-IOV device %04x:%02x:%02x.%u with vf BAR%u"
687                            " in IO space\n",
688                            seg, bus, slot, func, i);
689                     continue;
690                 }
691                 pci_conf_write32(seg, bus, slot, func, idx, ~0);
692                 if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
693                      PCI_BASE_ADDRESS_MEM_TYPE_64 )
694                 {
695                     if ( i >= PCI_SRIOV_NUM_BARS )
696                     {
697                         printk(XENLOG_WARNING
698                                "SR-IOV device %04x:%02x:%02x.%u with 64-bit"
699                                " vf BAR in last slot\n",
700                                seg, bus, slot, func);
701                         break;
702                     }
703                     hi = pci_conf_read32(seg, bus, slot, func, idx + 4);
704                     pci_conf_write32(seg, bus, slot, func, idx + 4, ~0);
705                 }
706                 pdev->vf_rlen[i] = pci_conf_read32(seg, bus, slot, func, idx) &
707                                    PCI_BASE_ADDRESS_MEM_MASK;
708                 if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
709                      PCI_BASE_ADDRESS_MEM_TYPE_64 )
710                 {
711                     pdev->vf_rlen[i] |= (u64)pci_conf_read32(seg, bus,
712                                                              slot, func,
713                                                              idx + 4) << 32;
714                     pci_conf_write32(seg, bus, slot, func, idx + 4, hi);
715                 }
716                 else if ( pdev->vf_rlen[i] )
717                     pdev->vf_rlen[i] |= (u64)~0 << 32;
718                 pci_conf_write32(seg, bus, slot, func, idx, bar);
719                 pdev->vf_rlen[i] = -pdev->vf_rlen[i];
720                 if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
721                      PCI_BASE_ADDRESS_MEM_TYPE_64 )
722                     ++i;
723             }
724         }
725         else
726             printk(XENLOG_WARNING
727                    "SR-IOV device %04x:%02x:%02x.%u has its virtual"
728                    " functions already enabled (%04x)\n",
729                    seg, bus, slot, func, ctrl);
730     }
731 
732     check_pdev(pdev);
733 
734     ret = 0;
735     if ( !pdev->domain )
736     {
737         pdev->domain = hardware_domain;
738         ret = iommu_add_device(pdev);
739         if ( ret )
740         {
741             pdev->domain = NULL;
742             goto out;
743         }
744 
745         list_add(&pdev->domain_list, &hardware_domain->arch.pdev_list);
746     }
747     else
748         iommu_enable_device(pdev);
749 
750     pci_enable_acs(pdev);
751 
752 out:
753     pcidevs_unlock();
754     if ( !ret )
755     {
756         printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
757                seg, bus, slot, func);
758         while ( pdev->phantom_stride )
759         {
760             func += pdev->phantom_stride;
761             if ( PCI_SLOT(func) )
762                 break;
763             printk(XENLOG_DEBUG "PCI phantom %04x:%02x:%02x.%u\n",
764                    seg, bus, slot, func);
765         }
766     }
767     return ret;
768 }
769 
pci_remove_device(u16 seg,u8 bus,u8 devfn)770 int pci_remove_device(u16 seg, u8 bus, u8 devfn)
771 {
772     struct pci_seg *pseg = get_pseg(seg);
773     struct pci_dev *pdev;
774     int ret;
775 
776     ret = xsm_resource_unplug_pci(XSM_PRIV, (seg << 16) | (bus << 8) | devfn);
777     if ( ret )
778         return ret;
779 
780     ret = -ENODEV;
781 
782     if ( !pseg )
783         return -ENODEV;
784 
785     pcidevs_lock();
786     list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
787         if ( pdev->bus == bus && pdev->devfn == devfn )
788         {
789             ret = iommu_remove_device(pdev);
790             if ( pdev->domain )
791                 list_del(&pdev->domain_list);
792             pci_cleanup_msi(pdev);
793             free_pdev(pseg, pdev);
794             printk(XENLOG_DEBUG "PCI remove device %04x:%02x:%02x.%u\n",
795                    seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
796             break;
797         }
798 
799     pcidevs_unlock();
800     return ret;
801 }
802 
pci_clean_dpci_irq(struct domain * d,struct hvm_pirq_dpci * pirq_dpci,void * arg)803 static int pci_clean_dpci_irq(struct domain *d,
804                               struct hvm_pirq_dpci *pirq_dpci, void *arg)
805 {
806     struct dev_intx_gsi_link *digl, *tmp;
807 
808     pirq_guest_unbind(d, dpci_pirq(pirq_dpci));
809 
810     if ( pt_irq_need_timer(pirq_dpci->flags) )
811         kill_timer(&pirq_dpci->timer);
812 
813     list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list )
814     {
815         list_del(&digl->list);
816         xfree(digl);
817     }
818 
819     return pt_pirq_softirq_active(pirq_dpci) ? -ERESTART : 0;
820 }
821 
pci_clean_dpci_irqs(struct domain * d)822 static int pci_clean_dpci_irqs(struct domain *d)
823 {
824     struct hvm_irq_dpci *hvm_irq_dpci = NULL;
825 
826     if ( !iommu_enabled )
827         return 0;
828 
829     if ( !is_hvm_domain(d) )
830         return 0;
831 
832     spin_lock(&d->event_lock);
833     hvm_irq_dpci = domain_get_irq_dpci(d);
834     if ( hvm_irq_dpci != NULL )
835     {
836         int ret = pt_pirq_iterate(d, pci_clean_dpci_irq, NULL);
837 
838         if ( ret )
839         {
840             spin_unlock(&d->event_lock);
841             return ret;
842         }
843 
844         hvm_domain_irq(d)->dpci = NULL;
845         free_hvm_irq_dpci(hvm_irq_dpci);
846     }
847     spin_unlock(&d->event_lock);
848     return 0;
849 }
850 
pci_release_devices(struct domain * d)851 int pci_release_devices(struct domain *d)
852 {
853     struct pci_dev *pdev;
854     u8 bus, devfn;
855     int ret;
856 
857     pcidevs_lock();
858     ret = pci_clean_dpci_irqs(d);
859     if ( ret )
860     {
861         pcidevs_unlock();
862         return ret;
863     }
864     while ( (pdev = pci_get_pdev_by_domain(d, -1, -1, -1)) )
865     {
866         bus = pdev->bus;
867         devfn = pdev->devfn;
868         if ( deassign_device(d, pdev->seg, bus, devfn) )
869             printk("domain %d: deassign device (%04x:%02x:%02x.%u) failed!\n",
870                    d->domain_id, pdev->seg, bus,
871                    PCI_SLOT(devfn), PCI_FUNC(devfn));
872     }
873     pcidevs_unlock();
874 
875     return 0;
876 }
877 
878 #define PCI_CLASS_BRIDGE_HOST    0x0600
879 #define PCI_CLASS_BRIDGE_PCI     0x0604
880 
pdev_type(u16 seg,u8 bus,u8 devfn)881 enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn)
882 {
883     u16 class_device, creg;
884     u8 d = PCI_SLOT(devfn), f = PCI_FUNC(devfn);
885     int pos = pci_find_cap_offset(seg, bus, d, f, PCI_CAP_ID_EXP);
886 
887     class_device = pci_conf_read16(seg, bus, d, f, PCI_CLASS_DEVICE);
888     switch ( class_device )
889     {
890     case PCI_CLASS_BRIDGE_PCI:
891         if ( !pos )
892             return DEV_TYPE_LEGACY_PCI_BRIDGE;
893         creg = pci_conf_read16(seg, bus, d, f, pos + PCI_EXP_FLAGS);
894         switch ( (creg & PCI_EXP_FLAGS_TYPE) >> 4 )
895         {
896         case PCI_EXP_TYPE_PCI_BRIDGE:
897             return DEV_TYPE_PCIe2PCI_BRIDGE;
898         case PCI_EXP_TYPE_PCIE_BRIDGE:
899             return DEV_TYPE_PCI2PCIe_BRIDGE;
900         }
901         return DEV_TYPE_PCIe_BRIDGE;
902     case PCI_CLASS_BRIDGE_HOST:
903         return DEV_TYPE_PCI_HOST_BRIDGE;
904 
905     case 0x0000: case 0xffff:
906         return DEV_TYPE_PCI_UNKNOWN;
907     }
908 
909     return pos ? DEV_TYPE_PCIe_ENDPOINT : DEV_TYPE_PCI;
910 }
911 
912 /*
913  * find the upstream PCIe-to-PCI/PCIX bridge or PCI legacy bridge
914  * return 0: the device is integrated PCI device or PCIe
915  * return 1: find PCIe-to-PCI/PCIX bridge or PCI legacy bridge
916  * return -1: fail
917  */
find_upstream_bridge(u16 seg,u8 * bus,u8 * devfn,u8 * secbus)918 int find_upstream_bridge(u16 seg, u8 *bus, u8 *devfn, u8 *secbus)
919 {
920     struct pci_seg *pseg = get_pseg(seg);
921     int ret = 0;
922     int cnt = 0;
923 
924     if ( *bus == 0 )
925         return 0;
926 
927     if ( !pseg )
928         return -1;
929 
930     if ( !pseg->bus2bridge[*bus].map )
931         return 0;
932 
933     ret = 1;
934     spin_lock(&pseg->bus2bridge_lock);
935     while ( pseg->bus2bridge[*bus].map )
936     {
937         *secbus = *bus;
938         *devfn = pseg->bus2bridge[*bus].devfn;
939         *bus = pseg->bus2bridge[*bus].bus;
940         if ( cnt++ >= MAX_BUSES )
941         {
942             ret = -1;
943             goto out;
944         }
945     }
946 
947 out:
948     spin_unlock(&pseg->bus2bridge_lock);
949     return ret;
950 }
951 
pci_device_detect(u16 seg,u8 bus,u8 dev,u8 func)952 bool_t __init pci_device_detect(u16 seg, u8 bus, u8 dev, u8 func)
953 {
954     u32 vendor;
955 
956     vendor = pci_conf_read32(seg, bus, dev, func, PCI_VENDOR_ID);
957     /* some broken boards return 0 or ~0 if a slot is empty: */
958     if ( (vendor == 0xffffffff) || (vendor == 0x00000000) ||
959          (vendor == 0x0000ffff) || (vendor == 0xffff0000) )
960         return 0;
961     return 1;
962 }
963 
pci_check_disable_device(u16 seg,u8 bus,u8 devfn)964 void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
965 {
966     struct pci_dev *pdev;
967     s_time_t now = NOW();
968     u16 cword;
969 
970     pcidevs_lock();
971     pdev = pci_get_real_pdev(seg, bus, devfn);
972     if ( pdev )
973     {
974         if ( now < pdev->fault.time ||
975              now - pdev->fault.time > MILLISECS(10) )
976             pdev->fault.count >>= 1;
977         pdev->fault.time = now;
978         if ( ++pdev->fault.count < PT_FAULT_THRESHOLD )
979             pdev = NULL;
980     }
981     pcidevs_unlock();
982 
983     if ( !pdev )
984         return;
985 
986     /* Tell the device to stop DMAing; we can't rely on the guest to
987      * control it for us. */
988     devfn = pdev->devfn;
989     cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
990                             PCI_COMMAND);
991     pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
992                      PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
993 }
994 
995 /*
996  * scan pci devices to add all existed PCI devices to alldevs_list,
997  * and setup pci hierarchy in array bus2bridge.
998  */
_scan_pci_devices(struct pci_seg * pseg,void * arg)999 static int __init _scan_pci_devices(struct pci_seg *pseg, void *arg)
1000 {
1001     struct pci_dev *pdev;
1002     int bus, dev, func;
1003 
1004     for ( bus = 0; bus < 256; bus++ )
1005     {
1006         for ( dev = 0; dev < 32; dev++ )
1007         {
1008             for ( func = 0; func < 8; func++ )
1009             {
1010                 if ( !pci_device_detect(pseg->nr, bus, dev, func) )
1011                 {
1012                     if ( !func )
1013                         break;
1014                     continue;
1015                 }
1016 
1017                 pdev = alloc_pdev(pseg, bus, PCI_DEVFN(dev, func));
1018                 if ( !pdev )
1019                 {
1020                     printk(XENLOG_WARNING "%04x:%02x:%02x.%u: alloc_pdev failed\n",
1021                            pseg->nr, bus, dev, func);
1022                     return -ENOMEM;
1023                 }
1024 
1025                 if ( !func && !(pci_conf_read8(pseg->nr, bus, dev, func,
1026                                                PCI_HEADER_TYPE) & 0x80) )
1027                     break;
1028             }
1029         }
1030     }
1031 
1032     return 0;
1033 }
1034 
scan_pci_devices(void)1035 int __init scan_pci_devices(void)
1036 {
1037     int ret;
1038 
1039     pcidevs_lock();
1040     ret = pci_segments_iterate(_scan_pci_devices, NULL);
1041     pcidevs_unlock();
1042 
1043     return ret;
1044 }
1045 
1046 struct setup_hwdom {
1047     struct domain *d;
1048     int (*handler)(u8 devfn, struct pci_dev *);
1049 };
1050 
setup_one_hwdom_device(const struct setup_hwdom * ctxt,struct pci_dev * pdev)1051 static void __hwdom_init setup_one_hwdom_device(const struct setup_hwdom *ctxt,
1052                                                 struct pci_dev *pdev)
1053 {
1054     u8 devfn = pdev->devfn;
1055 
1056     do {
1057         int err = ctxt->handler(devfn, pdev);
1058 
1059         if ( err )
1060         {
1061             printk(XENLOG_ERR "setup %04x:%02x:%02x.%u for d%d failed (%d)\n",
1062                    pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1063                    ctxt->d->domain_id, err);
1064             if ( devfn == pdev->devfn )
1065                 return;
1066         }
1067         devfn += pdev->phantom_stride;
1068     } while ( devfn != pdev->devfn &&
1069               PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
1070 }
1071 
_setup_hwdom_pci_devices(struct pci_seg * pseg,void * arg)1072 static int __hwdom_init _setup_hwdom_pci_devices(struct pci_seg *pseg, void *arg)
1073 {
1074     struct setup_hwdom *ctxt = arg;
1075     int bus, devfn;
1076 
1077     for ( bus = 0; bus < 256; bus++ )
1078     {
1079         for ( devfn = 0; devfn < 256; devfn++ )
1080         {
1081             struct pci_dev *pdev = pci_get_pdev(pseg->nr, bus, devfn);
1082 
1083             if ( !pdev )
1084                 continue;
1085 
1086             if ( !pdev->domain )
1087             {
1088                 pdev->domain = ctxt->d;
1089                 list_add(&pdev->domain_list, &ctxt->d->arch.pdev_list);
1090                 setup_one_hwdom_device(ctxt, pdev);
1091             }
1092             else if ( pdev->domain == dom_xen )
1093             {
1094                 pdev->domain = ctxt->d;
1095                 setup_one_hwdom_device(ctxt, pdev);
1096                 pdev->domain = dom_xen;
1097             }
1098             else if ( pdev->domain != ctxt->d )
1099                 printk(XENLOG_WARNING "Dom%d owning %04x:%02x:%02x.%u?\n",
1100                        pdev->domain->domain_id, pseg->nr, bus,
1101                        PCI_SLOT(devfn), PCI_FUNC(devfn));
1102 
1103             if ( iommu_verbose )
1104             {
1105                 pcidevs_unlock();
1106                 process_pending_softirqs();
1107                 pcidevs_lock();
1108             }
1109         }
1110 
1111         if ( !iommu_verbose )
1112         {
1113             pcidevs_unlock();
1114             process_pending_softirqs();
1115             pcidevs_lock();
1116         }
1117     }
1118 
1119     return 0;
1120 }
1121 
setup_hwdom_pci_devices(struct domain * d,int (* handler)(u8 devfn,struct pci_dev *))1122 void __hwdom_init setup_hwdom_pci_devices(
1123     struct domain *d, int (*handler)(u8 devfn, struct pci_dev *))
1124 {
1125     struct setup_hwdom ctxt = { .d = d, .handler = handler };
1126 
1127     pcidevs_lock();
1128     pci_segments_iterate(_setup_hwdom_pci_devices, &ctxt);
1129     pcidevs_unlock();
1130 }
1131 
1132 #ifdef CONFIG_ACPI
1133 #include <acpi/acpi.h>
1134 #include <acpi/apei.h>
1135 
hest_match_pci(const struct acpi_hest_aer_common * p,const struct pci_dev * pdev)1136 static int hest_match_pci(const struct acpi_hest_aer_common *p,
1137                           const struct pci_dev *pdev)
1138 {
1139     return ACPI_HEST_SEGMENT(p->bus) == pdev->seg &&
1140            ACPI_HEST_BUS(p->bus)     == pdev->bus &&
1141            p->device                 == PCI_SLOT(pdev->devfn) &&
1142            p->function               == PCI_FUNC(pdev->devfn);
1143 }
1144 
hest_match_type(const struct acpi_hest_header * hest_hdr,const struct pci_dev * pdev)1145 static bool_t hest_match_type(const struct acpi_hest_header *hest_hdr,
1146                               const struct pci_dev *pdev)
1147 {
1148     unsigned int pos = pci_find_cap_offset(pdev->seg, pdev->bus,
1149                                            PCI_SLOT(pdev->devfn),
1150                                            PCI_FUNC(pdev->devfn),
1151                                            PCI_CAP_ID_EXP);
1152     u8 pcie = MASK_EXTR(pci_conf_read16(pdev->seg, pdev->bus,
1153                                         PCI_SLOT(pdev->devfn),
1154                                         PCI_FUNC(pdev->devfn),
1155                                         pos + PCI_EXP_FLAGS),
1156                         PCI_EXP_FLAGS_TYPE);
1157 
1158     switch ( hest_hdr->type )
1159     {
1160     case ACPI_HEST_TYPE_AER_ROOT_PORT:
1161         return pcie == PCI_EXP_TYPE_ROOT_PORT;
1162     case ACPI_HEST_TYPE_AER_ENDPOINT:
1163         return pcie == PCI_EXP_TYPE_ENDPOINT;
1164     case ACPI_HEST_TYPE_AER_BRIDGE:
1165         return pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
1166                                PCI_FUNC(pdev->devfn), PCI_CLASS_DEVICE) ==
1167                PCI_CLASS_BRIDGE_PCI;
1168     }
1169 
1170     return 0;
1171 }
1172 
1173 struct aer_hest_parse_info {
1174     const struct pci_dev *pdev;
1175     bool_t firmware_first;
1176 };
1177 
hest_source_is_pcie_aer(const struct acpi_hest_header * hest_hdr)1178 static bool_t hest_source_is_pcie_aer(const struct acpi_hest_header *hest_hdr)
1179 {
1180     if ( hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT ||
1181          hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT ||
1182          hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE )
1183         return 1;
1184     return 0;
1185 }
1186 
aer_hest_parse(const struct acpi_hest_header * hest_hdr,void * data)1187 static int aer_hest_parse(const struct acpi_hest_header *hest_hdr, void *data)
1188 {
1189     struct aer_hest_parse_info *info = data;
1190     const struct acpi_hest_aer_common *p;
1191     bool_t ff;
1192 
1193     if ( !hest_source_is_pcie_aer(hest_hdr) )
1194         return 0;
1195 
1196     p = (const struct acpi_hest_aer_common *)(hest_hdr + 1);
1197     ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
1198 
1199     /*
1200      * If no specific device is supplied, determine whether
1201      * FIRMWARE_FIRST is set for *any* PCIe device.
1202      */
1203     if ( !info->pdev )
1204     {
1205         info->firmware_first |= ff;
1206         return 0;
1207     }
1208 
1209     /* Otherwise, check the specific device */
1210     if ( p->flags & ACPI_HEST_GLOBAL ?
1211          hest_match_type(hest_hdr, info->pdev) :
1212          hest_match_pci(p, info->pdev) )
1213     {
1214         info->firmware_first = ff;
1215         return 1;
1216     }
1217 
1218     return 0;
1219 }
1220 
pcie_aer_get_firmware_first(const struct pci_dev * pdev)1221 bool_t pcie_aer_get_firmware_first(const struct pci_dev *pdev)
1222 {
1223     struct aer_hest_parse_info info = { .pdev = pdev };
1224 
1225     return pci_find_cap_offset(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
1226                                PCI_FUNC(pdev->devfn), PCI_CAP_ID_EXP) &&
1227            apei_hest_parse(aer_hest_parse, &info) >= 0 &&
1228            info.firmware_first;
1229 }
1230 #endif
1231 
_dump_pci_devices(struct pci_seg * pseg,void * arg)1232 static int _dump_pci_devices(struct pci_seg *pseg, void *arg)
1233 {
1234     struct pci_dev *pdev;
1235     struct msi_desc *msi;
1236 
1237     printk("==== segment %04x ====\n", pseg->nr);
1238 
1239     list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
1240     {
1241         printk("%04x:%02x:%02x.%u - dom %-3d - node %-3d - MSIs < ",
1242                pseg->nr, pdev->bus,
1243                PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1244                pdev->domain ? pdev->domain->domain_id : -1,
1245                (pdev->node != NUMA_NO_NODE) ? pdev->node : -1);
1246         list_for_each_entry ( msi, &pdev->msi_list, list )
1247                printk("%d ", msi->irq);
1248         printk(">\n");
1249     }
1250 
1251     return 0;
1252 }
1253 
dump_pci_devices(unsigned char ch)1254 static void dump_pci_devices(unsigned char ch)
1255 {
1256     printk("==== PCI devices ====\n");
1257     pcidevs_lock();
1258     pci_segments_iterate(_dump_pci_devices, NULL);
1259     pcidevs_unlock();
1260 }
1261 
setup_dump_pcidevs(void)1262 static int __init setup_dump_pcidevs(void)
1263 {
1264     register_keyhandler('Q', dump_pci_devices, "dump PCI devices", 1);
1265     return 0;
1266 }
1267 __initcall(setup_dump_pcidevs);
1268 
iommu_update_ire_from_msi(struct msi_desc * msi_desc,struct msi_msg * msg)1269 int iommu_update_ire_from_msi(
1270     struct msi_desc *msi_desc, struct msi_msg *msg)
1271 {
1272     return iommu_intremap
1273            ? iommu_get_ops()->update_ire_from_msi(msi_desc, msg) : 0;
1274 }
1275 
iommu_read_msi_from_ire(struct msi_desc * msi_desc,struct msi_msg * msg)1276 void iommu_read_msi_from_ire(
1277     struct msi_desc *msi_desc, struct msi_msg *msg)
1278 {
1279     if ( iommu_intremap )
1280         iommu_get_ops()->read_msi_from_ire(msi_desc, msg);
1281 }
1282 
iommu_add_device(struct pci_dev * pdev)1283 static int iommu_add_device(struct pci_dev *pdev)
1284 {
1285     const struct domain_iommu *hd;
1286     int rc;
1287     u8 devfn;
1288 
1289     if ( !pdev->domain )
1290         return -EINVAL;
1291 
1292     ASSERT(pcidevs_locked());
1293 
1294     hd = dom_iommu(pdev->domain);
1295     if ( !iommu_enabled || !hd->platform_ops )
1296         return 0;
1297 
1298     rc = hd->platform_ops->add_device(pdev->devfn, pci_to_dev(pdev));
1299     if ( rc || !pdev->phantom_stride )
1300         return rc;
1301 
1302     for ( devfn = pdev->devfn ; ; )
1303     {
1304         devfn += pdev->phantom_stride;
1305         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
1306             return 0;
1307         rc = hd->platform_ops->add_device(devfn, pci_to_dev(pdev));
1308         if ( rc )
1309             printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n",
1310                    pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
1311     }
1312 }
1313 
iommu_enable_device(struct pci_dev * pdev)1314 static int iommu_enable_device(struct pci_dev *pdev)
1315 {
1316     const struct domain_iommu *hd;
1317 
1318     if ( !pdev->domain )
1319         return -EINVAL;
1320 
1321     ASSERT(pcidevs_locked());
1322 
1323     hd = dom_iommu(pdev->domain);
1324     if ( !iommu_enabled || !hd->platform_ops ||
1325          !hd->platform_ops->enable_device )
1326         return 0;
1327 
1328     return hd->platform_ops->enable_device(pci_to_dev(pdev));
1329 }
1330 
iommu_remove_device(struct pci_dev * pdev)1331 static int iommu_remove_device(struct pci_dev *pdev)
1332 {
1333     const struct domain_iommu *hd;
1334     u8 devfn;
1335 
1336     if ( !pdev->domain )
1337         return -EINVAL;
1338 
1339     hd = dom_iommu(pdev->domain);
1340     if ( !iommu_enabled || !hd->platform_ops )
1341         return 0;
1342 
1343     for ( devfn = pdev->devfn ; pdev->phantom_stride; )
1344     {
1345         int rc;
1346 
1347         devfn += pdev->phantom_stride;
1348         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
1349             break;
1350         rc = hd->platform_ops->remove_device(devfn, pci_to_dev(pdev));
1351         if ( !rc )
1352             continue;
1353 
1354         printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n",
1355                pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
1356         return rc;
1357     }
1358 
1359     return hd->platform_ops->remove_device(pdev->devfn, pci_to_dev(pdev));
1360 }
1361 
1362 /*
1363  * If the device isn't owned by the hardware domain, it means it already
1364  * has been assigned to other domain, or it doesn't exist.
1365  */
device_assigned(u16 seg,u8 bus,u8 devfn)1366 static int device_assigned(u16 seg, u8 bus, u8 devfn)
1367 {
1368     struct pci_dev *pdev;
1369 
1370     pcidevs_lock();
1371     pdev = pci_get_pdev_by_domain(hardware_domain, seg, bus, devfn);
1372     pcidevs_unlock();
1373 
1374     return pdev ? 0 : -EBUSY;
1375 }
1376 
assign_device(struct domain * d,u16 seg,u8 bus,u8 devfn,u32 flag)1377 static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
1378 {
1379     const struct domain_iommu *hd = dom_iommu(d);
1380     struct pci_dev *pdev;
1381     int rc = 0;
1382 
1383     if ( !iommu_enabled || !hd->platform_ops )
1384         return 0;
1385 
1386     /* Prevent device assign if mem paging or mem sharing have been
1387      * enabled for this domain */
1388     if ( unlikely(!need_iommu(d) &&
1389             (d->arch.hvm_domain.mem_sharing_enabled ||
1390              vm_event_check_ring(d->vm_event_paging) ||
1391              p2m_get_hostp2m(d)->global_logdirty)) )
1392         return -EXDEV;
1393 
1394     if ( !pcidevs_trylock() )
1395         return -ERESTART;
1396 
1397     rc = iommu_construct(d);
1398     if ( rc )
1399     {
1400         pcidevs_unlock();
1401         return rc;
1402     }
1403 
1404     pdev = pci_get_pdev_by_domain(hardware_domain, seg, bus, devfn);
1405     if ( !pdev )
1406     {
1407         rc = pci_get_pdev(seg, bus, devfn) ? -EBUSY : -ENODEV;
1408         goto done;
1409     }
1410 
1411     if ( pdev->msix )
1412         msixtbl_init(d);
1413 
1414     pdev->fault.count = 0;
1415 
1416     if ( (rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag)) )
1417         goto done;
1418 
1419     for ( ; pdev->phantom_stride; rc = 0 )
1420     {
1421         devfn += pdev->phantom_stride;
1422         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
1423             break;
1424         rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag);
1425         if ( rc )
1426             printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed (%d)\n",
1427                    d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1428                    rc);
1429     }
1430 
1431  done:
1432     if ( !has_arch_pdevs(d) && need_iommu(d) )
1433         iommu_teardown(d);
1434     pcidevs_unlock();
1435 
1436     return rc;
1437 }
1438 
1439 /* caller should hold the pcidevs_lock */
deassign_device(struct domain * d,u16 seg,u8 bus,u8 devfn)1440 int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
1441 {
1442     const struct domain_iommu *hd = dom_iommu(d);
1443     struct pci_dev *pdev = NULL;
1444     int ret = 0;
1445 
1446     if ( !iommu_enabled || !hd->platform_ops )
1447         return -EINVAL;
1448 
1449     ASSERT(pcidevs_locked());
1450     pdev = pci_get_pdev_by_domain(d, seg, bus, devfn);
1451     if ( !pdev )
1452         return -ENODEV;
1453 
1454     while ( pdev->phantom_stride )
1455     {
1456         devfn += pdev->phantom_stride;
1457         if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
1458             break;
1459         ret = hd->platform_ops->reassign_device(d, hardware_domain, devfn,
1460                                                 pci_to_dev(pdev));
1461         if ( !ret )
1462             continue;
1463 
1464         printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n",
1465                d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret);
1466         return ret;
1467     }
1468 
1469     devfn = pdev->devfn;
1470     ret = hd->platform_ops->reassign_device(d, hardware_domain, devfn,
1471                                             pci_to_dev(pdev));
1472     if ( ret )
1473     {
1474         dprintk(XENLOG_G_ERR,
1475                 "d%d: deassign device (%04x:%02x:%02x.%u) failed\n",
1476                 d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1477         return ret;
1478     }
1479 
1480     pdev->fault.count = 0;
1481 
1482     if ( !has_arch_pdevs(d) && need_iommu(d) )
1483         iommu_teardown(d);
1484 
1485     return ret;
1486 }
1487 
iommu_get_device_group(struct domain * d,u16 seg,u8 bus,u8 devfn,XEN_GUEST_HANDLE_64 (uint32)buf,int max_sdevs)1488 static int iommu_get_device_group(
1489     struct domain *d, u16 seg, u8 bus, u8 devfn,
1490     XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs)
1491 {
1492     const struct domain_iommu *hd = dom_iommu(d);
1493     struct pci_dev *pdev;
1494     int group_id, sdev_id;
1495     u32 bdf;
1496     int i = 0;
1497     const struct iommu_ops *ops = hd->platform_ops;
1498 
1499     if ( !iommu_enabled || !ops || !ops->get_device_group_id )
1500         return 0;
1501 
1502     group_id = ops->get_device_group_id(seg, bus, devfn);
1503 
1504     pcidevs_lock();
1505     for_each_pdev( d, pdev )
1506     {
1507         if ( (pdev->seg != seg) ||
1508              ((pdev->bus == bus) && (pdev->devfn == devfn)) )
1509             continue;
1510 
1511         if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) )
1512             continue;
1513 
1514         sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn);
1515         if ( (sdev_id == group_id) && (i < max_sdevs) )
1516         {
1517             bdf = 0;
1518             bdf |= (pdev->bus & 0xff) << 16;
1519             bdf |= (pdev->devfn & 0xff) << 8;
1520 
1521             if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
1522             {
1523                 pcidevs_unlock();
1524                 return -1;
1525             }
1526             i++;
1527         }
1528     }
1529 
1530     pcidevs_unlock();
1531 
1532     return i;
1533 }
1534 
iommu_dev_iotlb_flush_timeout(struct domain * d,struct pci_dev * pdev)1535 void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev)
1536 {
1537     pcidevs_lock();
1538 
1539     disable_ats_device(pdev);
1540 
1541     ASSERT(pdev->domain);
1542     if ( d != pdev->domain )
1543     {
1544         pcidevs_unlock();
1545         return;
1546     }
1547 
1548     list_del(&pdev->domain_list);
1549     pdev->domain = NULL;
1550     _pci_hide_device(pdev);
1551 
1552     if ( !d->is_shutting_down && printk_ratelimit() )
1553         printk(XENLOG_ERR
1554                "dom%d: ATS device %04x:%02x:%02x.%u flush failed\n",
1555                d->domain_id, pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
1556                PCI_FUNC(pdev->devfn));
1557     if ( !is_hardware_domain(d) )
1558         domain_crash(d);
1559 
1560     pcidevs_unlock();
1561 }
1562 
iommu_do_pci_domctl(struct xen_domctl * domctl,struct domain * d,XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl)1563 int iommu_do_pci_domctl(
1564     struct xen_domctl *domctl, struct domain *d,
1565     XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
1566 {
1567     u16 seg;
1568     u8 bus, devfn;
1569     int ret = 0;
1570     uint32_t machine_sbdf;
1571 
1572     switch ( domctl->cmd )
1573     {
1574         unsigned int flags;
1575 
1576     case XEN_DOMCTL_get_device_group:
1577     {
1578         u32 max_sdevs;
1579         XEN_GUEST_HANDLE_64(uint32) sdevs;
1580 
1581         ret = xsm_get_device_group(XSM_HOOK, domctl->u.get_device_group.machine_sbdf);
1582         if ( ret )
1583             break;
1584 
1585         seg = domctl->u.get_device_group.machine_sbdf >> 16;
1586         bus = PCI_BUS(domctl->u.get_device_group.machine_sbdf);
1587         devfn = PCI_DEVFN2(domctl->u.get_device_group.machine_sbdf);
1588         max_sdevs = domctl->u.get_device_group.max_sdevs;
1589         sdevs = domctl->u.get_device_group.sdev_array;
1590 
1591         ret = iommu_get_device_group(d, seg, bus, devfn, sdevs, max_sdevs);
1592         if ( ret < 0 )
1593         {
1594             dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
1595             ret = -EFAULT;
1596             domctl->u.get_device_group.num_sdevs = 0;
1597         }
1598         else
1599         {
1600             domctl->u.get_device_group.num_sdevs = ret;
1601             ret = 0;
1602         }
1603         if ( __copy_field_to_guest(u_domctl, domctl, u.get_device_group) )
1604             ret = -EFAULT;
1605     }
1606     break;
1607 
1608     case XEN_DOMCTL_assign_device:
1609         ASSERT(d);
1610         /* fall through */
1611     case XEN_DOMCTL_test_assign_device:
1612         /* Don't support self-assignment of devices. */
1613         if ( d == current->domain )
1614         {
1615             ret = -EINVAL;
1616             break;
1617         }
1618 
1619         ret = -ENODEV;
1620         if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI )
1621             break;
1622 
1623         ret = -EINVAL;
1624         flags = domctl->u.assign_device.flags;
1625         if ( domctl->cmd == XEN_DOMCTL_assign_device
1626              ? d->is_dying || (flags & ~XEN_DOMCTL_DEV_RDM_RELAXED)
1627              : flags )
1628             break;
1629 
1630         machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
1631 
1632         ret = xsm_assign_device(XSM_HOOK, d, machine_sbdf);
1633         if ( ret )
1634             break;
1635 
1636         seg = machine_sbdf >> 16;
1637         bus = PCI_BUS(machine_sbdf);
1638         devfn = PCI_DEVFN2(machine_sbdf);
1639 
1640         ret = device_assigned(seg, bus, devfn);
1641         if ( domctl->cmd == XEN_DOMCTL_test_assign_device )
1642         {
1643             if ( ret )
1644             {
1645                 printk(XENLOG_G_INFO
1646                        "%04x:%02x:%02x.%u already assigned, or non-existent\n",
1647                        seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1648                 ret = -EINVAL;
1649             }
1650             break;
1651         }
1652         if ( !ret )
1653             ret = assign_device(d, seg, bus, devfn, flags);
1654         if ( ret == -ERESTART )
1655             ret = hypercall_create_continuation(__HYPERVISOR_domctl,
1656                                                 "h", u_domctl);
1657         else if ( ret )
1658             printk(XENLOG_G_ERR "XEN_DOMCTL_assign_device: "
1659                    "assign %04x:%02x:%02x.%u to dom%d failed (%d)\n",
1660                    seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1661                    d->domain_id, ret);
1662 
1663         break;
1664 
1665     case XEN_DOMCTL_deassign_device:
1666         /* Don't support self-deassignment of devices. */
1667         if ( d == current->domain )
1668         {
1669             ret = -EINVAL;
1670             break;
1671         }
1672 
1673         ret = -ENODEV;
1674         if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI )
1675             break;
1676 
1677         ret = -EINVAL;
1678         if ( domctl->u.assign_device.flags )
1679             break;
1680 
1681         machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
1682 
1683         ret = xsm_deassign_device(XSM_HOOK, d, machine_sbdf);
1684         if ( ret )
1685             break;
1686 
1687         seg = machine_sbdf >> 16;
1688         bus = PCI_BUS(machine_sbdf);
1689         devfn = PCI_DEVFN2(machine_sbdf);
1690 
1691         pcidevs_lock();
1692         ret = deassign_device(d, seg, bus, devfn);
1693         pcidevs_unlock();
1694         if ( ret )
1695             printk(XENLOG_G_ERR
1696                    "deassign %04x:%02x:%02x.%u from dom%d failed (%d)\n",
1697                    seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1698                    d->domain_id, ret);
1699 
1700         break;
1701 
1702     default:
1703         ret = -ENOSYS;
1704         break;
1705     }
1706 
1707     return ret;
1708 }
1709 
1710 /*
1711  * Local variables:
1712  * mode: C
1713  * c-file-style: "BSD"
1714  * c-basic-offset: 4
1715  * indent-tabs-mode: nil
1716  * End:
1717  */
1718