1 /*
2 * Copyright (C) 2008, Netronome Systems, Inc.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; If not, see <http://www.gnu.org/licenses/>.
15 */
16
17 #include <xen/sched.h>
18 #include <xen/pci.h>
19 #include <xen/pci_regs.h>
20 #include <xen/list.h>
21 #include <xen/prefetch.h>
22 #include <xen/iommu.h>
23 #include <xen/irq.h>
24 #include <xen/vm_event.h>
25 #include <asm/hvm/irq.h>
26 #include <xen/delay.h>
27 #include <xen/keyhandler.h>
28 #include <xen/event.h>
29 #include <xen/guest_access.h>
30 #include <xen/paging.h>
31 #include <xen/radix-tree.h>
32 #include <xen/softirq.h>
33 #include <xen/tasklet.h>
34 #include <xsm/xsm.h>
35 #include <asm/msi.h>
36 #include "ats.h"
37
38 struct pci_seg {
39 struct list_head alldevs_list;
40 u16 nr;
41 unsigned long *ro_map;
42 /* bus2bridge_lock protects bus2bridge array */
43 spinlock_t bus2bridge_lock;
44 #define MAX_BUSES 256
45 struct {
46 u8 map;
47 u8 bus;
48 u8 devfn;
49 } bus2bridge[MAX_BUSES];
50 };
51
52 static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED;
53
pcidevs_lock(void)54 void pcidevs_lock(void)
55 {
56 spin_lock_recursive(&_pcidevs_lock);
57 }
58
pcidevs_unlock(void)59 void pcidevs_unlock(void)
60 {
61 spin_unlock_recursive(&_pcidevs_lock);
62 }
63
pcidevs_locked(void)64 bool_t pcidevs_locked(void)
65 {
66 return !!spin_is_locked(&_pcidevs_lock);
67 }
68
pcidevs_trylock(void)69 bool_t pcidevs_trylock(void)
70 {
71 return !!spin_trylock_recursive(&_pcidevs_lock);
72 }
73
74 static struct radix_tree_root pci_segments;
75
get_pseg(u16 seg)76 static inline struct pci_seg *get_pseg(u16 seg)
77 {
78 return radix_tree_lookup(&pci_segments, seg);
79 }
80
pci_known_segment(u16 seg)81 bool_t pci_known_segment(u16 seg)
82 {
83 return get_pseg(seg) != NULL;
84 }
85
alloc_pseg(u16 seg)86 static struct pci_seg *alloc_pseg(u16 seg)
87 {
88 struct pci_seg *pseg = get_pseg(seg);
89
90 if ( pseg )
91 return pseg;
92
93 pseg = xzalloc(struct pci_seg);
94 if ( !pseg )
95 return NULL;
96
97 pseg->nr = seg;
98 INIT_LIST_HEAD(&pseg->alldevs_list);
99 spin_lock_init(&pseg->bus2bridge_lock);
100
101 if ( radix_tree_insert(&pci_segments, seg, pseg) )
102 {
103 xfree(pseg);
104 pseg = NULL;
105 }
106
107 return pseg;
108 }
109
pci_segments_iterate(int (* handler)(struct pci_seg *,void *),void * arg)110 static int pci_segments_iterate(
111 int (*handler)(struct pci_seg *, void *), void *arg)
112 {
113 u16 seg = 0;
114 int rc = 0;
115
116 do {
117 struct pci_seg *pseg;
118
119 if ( !radix_tree_gang_lookup(&pci_segments, (void **)&pseg, seg, 1) )
120 break;
121 rc = handler(pseg, arg);
122 seg = pseg->nr + 1;
123 } while (!rc && seg);
124
125 return rc;
126 }
127
pt_pci_init(void)128 void __init pt_pci_init(void)
129 {
130 radix_tree_init(&pci_segments);
131 if ( !alloc_pseg(0) )
132 panic("Could not initialize PCI segment 0");
133 }
134
pci_add_segment(u16 seg)135 int __init pci_add_segment(u16 seg)
136 {
137 return alloc_pseg(seg) ? 0 : -ENOMEM;
138 }
139
pci_get_ro_map(u16 seg)140 const unsigned long *pci_get_ro_map(u16 seg)
141 {
142 struct pci_seg *pseg = get_pseg(seg);
143
144 return pseg ? pseg->ro_map : NULL;
145 }
146
147 static struct phantom_dev {
148 u16 seg;
149 u8 bus, slot, stride;
150 } phantom_devs[8];
151 static unsigned int nr_phantom_devs;
152
parse_phantom_dev(const char * str)153 static int __init parse_phantom_dev(const char *str)
154 {
155 const char *s;
156 unsigned int seg, bus, slot;
157 struct phantom_dev phantom;
158
159 if ( !*str )
160 return -EINVAL;
161 if ( nr_phantom_devs >= ARRAY_SIZE(phantom_devs) )
162 return -E2BIG;
163
164 s = parse_pci(str, &seg, &bus, &slot, NULL);
165 if ( !s || *s != ',' )
166 return -EINVAL;
167
168 phantom.seg = seg;
169 phantom.bus = bus;
170 phantom.slot = slot;
171
172 switch ( phantom.stride = simple_strtol(s + 1, &s, 0) )
173 {
174 case 1: case 2: case 4:
175 if ( *s )
176 default:
177 return -EINVAL;
178 }
179
180 phantom_devs[nr_phantom_devs++] = phantom;
181
182 return 0;
183 }
184 custom_param("pci-phantom", parse_phantom_dev);
185
186 static u16 __read_mostly command_mask;
187 static u16 __read_mostly bridge_ctl_mask;
188
189 /*
190 * The 'pci' parameter controls certain PCI device aspects.
191 * Optional comma separated value may contain:
192 *
193 * serr don't suppress system errors (default)
194 * no-serr suppress system errors
195 * perr don't suppress parity errors (default)
196 * no-perr suppress parity errors
197 */
parse_pci_param(const char * s)198 static int __init parse_pci_param(const char *s)
199 {
200 const char *ss;
201 int rc = 0;
202
203 do {
204 bool_t on = !!strncmp(s, "no-", 3);
205 u16 cmd_mask = 0, brctl_mask = 0;
206
207 if ( !on )
208 s += 3;
209
210 ss = strchr(s, ',');
211 if ( !ss )
212 ss = strchr(s, '\0');
213
214 if ( !strncmp(s, "serr", ss - s) )
215 {
216 cmd_mask = PCI_COMMAND_SERR;
217 brctl_mask = PCI_BRIDGE_CTL_SERR | PCI_BRIDGE_CTL_DTMR_SERR;
218 }
219 else if ( !strncmp(s, "perr", ss - s) )
220 {
221 cmd_mask = PCI_COMMAND_PARITY;
222 brctl_mask = PCI_BRIDGE_CTL_PARITY;
223 }
224 else
225 rc = -EINVAL;
226
227 if ( on )
228 {
229 command_mask &= ~cmd_mask;
230 bridge_ctl_mask &= ~brctl_mask;
231 }
232 else
233 {
234 command_mask |= cmd_mask;
235 bridge_ctl_mask |= brctl_mask;
236 }
237
238 s = ss + 1;
239 } while ( *ss );
240
241 return rc;
242 }
243 custom_param("pci", parse_pci_param);
244
check_pdev(const struct pci_dev * pdev)245 static void check_pdev(const struct pci_dev *pdev)
246 {
247 #define PCI_STATUS_CHECK \
248 (PCI_STATUS_PARITY | PCI_STATUS_SIG_TARGET_ABORT | \
249 PCI_STATUS_REC_TARGET_ABORT | PCI_STATUS_REC_MASTER_ABORT | \
250 PCI_STATUS_SIG_SYSTEM_ERROR | PCI_STATUS_DETECTED_PARITY)
251 u16 seg = pdev->seg;
252 u8 bus = pdev->bus;
253 u8 dev = PCI_SLOT(pdev->devfn);
254 u8 func = PCI_FUNC(pdev->devfn);
255 u16 val;
256
257 if ( command_mask )
258 {
259 val = pci_conf_read16(seg, bus, dev, func, PCI_COMMAND);
260 if ( val & command_mask )
261 pci_conf_write16(seg, bus, dev, func, PCI_COMMAND,
262 val & ~command_mask);
263 val = pci_conf_read16(seg, bus, dev, func, PCI_STATUS);
264 if ( val & PCI_STATUS_CHECK )
265 {
266 printk(XENLOG_INFO "%04x:%02x:%02x.%u status %04x -> %04x\n",
267 seg, bus, dev, func, val, val & ~PCI_STATUS_CHECK);
268 pci_conf_write16(seg, bus, dev, func, PCI_STATUS,
269 val & PCI_STATUS_CHECK);
270 }
271 }
272
273 switch ( pci_conf_read8(seg, bus, dev, func, PCI_HEADER_TYPE) & 0x7f )
274 {
275 case PCI_HEADER_TYPE_BRIDGE:
276 if ( !bridge_ctl_mask )
277 break;
278 val = pci_conf_read16(seg, bus, dev, func, PCI_BRIDGE_CONTROL);
279 if ( val & bridge_ctl_mask )
280 pci_conf_write16(seg, bus, dev, func, PCI_BRIDGE_CONTROL,
281 val & ~bridge_ctl_mask);
282 val = pci_conf_read16(seg, bus, dev, func, PCI_SEC_STATUS);
283 if ( val & PCI_STATUS_CHECK )
284 {
285 printk(XENLOG_INFO
286 "%04x:%02x:%02x.%u secondary status %04x -> %04x\n",
287 seg, bus, dev, func, val, val & ~PCI_STATUS_CHECK);
288 pci_conf_write16(seg, bus, dev, func, PCI_SEC_STATUS,
289 val & PCI_STATUS_CHECK);
290 }
291 break;
292
293 case PCI_HEADER_TYPE_CARDBUS:
294 /* TODO */
295 break;
296 }
297 #undef PCI_STATUS_CHECK
298 }
299
alloc_pdev(struct pci_seg * pseg,u8 bus,u8 devfn)300 static struct pci_dev *alloc_pdev(struct pci_seg *pseg, u8 bus, u8 devfn)
301 {
302 struct pci_dev *pdev;
303
304 list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
305 if ( pdev->bus == bus && pdev->devfn == devfn )
306 return pdev;
307
308 pdev = xzalloc(struct pci_dev);
309 if ( !pdev )
310 return NULL;
311
312 *(u16*) &pdev->seg = pseg->nr;
313 *((u8*) &pdev->bus) = bus;
314 *((u8*) &pdev->devfn) = devfn;
315 pdev->domain = NULL;
316 INIT_LIST_HEAD(&pdev->msi_list);
317
318 if ( pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
319 PCI_CAP_ID_MSIX) )
320 {
321 struct arch_msix *msix = xzalloc(struct arch_msix);
322
323 if ( !msix )
324 {
325 xfree(pdev);
326 return NULL;
327 }
328 spin_lock_init(&msix->table_lock);
329 pdev->msix = msix;
330 }
331
332 list_add(&pdev->alldevs_list, &pseg->alldevs_list);
333
334 /* update bus2bridge */
335 switch ( pdev->type = pdev_type(pseg->nr, bus, devfn) )
336 {
337 int pos;
338 u16 cap;
339 u8 sec_bus, sub_bus;
340
341 case DEV_TYPE_PCIe2PCI_BRIDGE:
342 case DEV_TYPE_LEGACY_PCI_BRIDGE:
343 sec_bus = pci_conf_read8(pseg->nr, bus, PCI_SLOT(devfn),
344 PCI_FUNC(devfn), PCI_SECONDARY_BUS);
345 sub_bus = pci_conf_read8(pseg->nr, bus, PCI_SLOT(devfn),
346 PCI_FUNC(devfn), PCI_SUBORDINATE_BUS);
347
348 spin_lock(&pseg->bus2bridge_lock);
349 for ( ; sec_bus <= sub_bus; sec_bus++ )
350 {
351 pseg->bus2bridge[sec_bus].map = 1;
352 pseg->bus2bridge[sec_bus].bus = bus;
353 pseg->bus2bridge[sec_bus].devfn = devfn;
354 }
355 spin_unlock(&pseg->bus2bridge_lock);
356 break;
357
358 case DEV_TYPE_PCIe_ENDPOINT:
359 pos = pci_find_cap_offset(pseg->nr, bus, PCI_SLOT(devfn),
360 PCI_FUNC(devfn), PCI_CAP_ID_EXP);
361 BUG_ON(!pos);
362 cap = pci_conf_read16(pseg->nr, bus, PCI_SLOT(devfn),
363 PCI_FUNC(devfn), pos + PCI_EXP_DEVCAP);
364 if ( cap & PCI_EXP_DEVCAP_PHANTOM )
365 {
366 pdev->phantom_stride = 8 >> MASK_EXTR(cap,
367 PCI_EXP_DEVCAP_PHANTOM);
368 if ( PCI_FUNC(devfn) >= pdev->phantom_stride )
369 pdev->phantom_stride = 0;
370 }
371 else
372 {
373 unsigned int i;
374
375 for ( i = 0; i < nr_phantom_devs; ++i )
376 if ( phantom_devs[i].seg == pseg->nr &&
377 phantom_devs[i].bus == bus &&
378 phantom_devs[i].slot == PCI_SLOT(devfn) &&
379 phantom_devs[i].stride > PCI_FUNC(devfn) )
380 {
381 pdev->phantom_stride = phantom_devs[i].stride;
382 break;
383 }
384 }
385 break;
386
387 case DEV_TYPE_PCI:
388 case DEV_TYPE_PCIe_BRIDGE:
389 case DEV_TYPE_PCI_HOST_BRIDGE:
390 break;
391
392 default:
393 printk(XENLOG_WARNING "%04x:%02x:%02x.%u: unknown type %d\n",
394 pseg->nr, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pdev->type);
395 break;
396 }
397
398 check_pdev(pdev);
399
400 return pdev;
401 }
402
free_pdev(struct pci_seg * pseg,struct pci_dev * pdev)403 static void free_pdev(struct pci_seg *pseg, struct pci_dev *pdev)
404 {
405 /* update bus2bridge */
406 switch ( pdev->type )
407 {
408 u8 dev, func, sec_bus, sub_bus;
409
410 case DEV_TYPE_PCIe2PCI_BRIDGE:
411 case DEV_TYPE_LEGACY_PCI_BRIDGE:
412 dev = PCI_SLOT(pdev->devfn);
413 func = PCI_FUNC(pdev->devfn);
414 sec_bus = pci_conf_read8(pseg->nr, pdev->bus, dev, func,
415 PCI_SECONDARY_BUS);
416 sub_bus = pci_conf_read8(pseg->nr, pdev->bus, dev, func,
417 PCI_SUBORDINATE_BUS);
418
419 spin_lock(&pseg->bus2bridge_lock);
420 for ( ; sec_bus <= sub_bus; sec_bus++ )
421 pseg->bus2bridge[sec_bus] = pseg->bus2bridge[pdev->bus];
422 spin_unlock(&pseg->bus2bridge_lock);
423 break;
424
425 default:
426 break;
427 }
428
429 list_del(&pdev->alldevs_list);
430 xfree(pdev->msix);
431 xfree(pdev);
432 }
433
_pci_hide_device(struct pci_dev * pdev)434 static void _pci_hide_device(struct pci_dev *pdev)
435 {
436 if ( pdev->domain )
437 return;
438 pdev->domain = dom_xen;
439 list_add(&pdev->domain_list, &dom_xen->arch.pdev_list);
440 }
441
pci_hide_device(int bus,int devfn)442 int __init pci_hide_device(int bus, int devfn)
443 {
444 struct pci_dev *pdev;
445 int rc = -ENOMEM;
446
447 pcidevs_lock();
448 pdev = alloc_pdev(get_pseg(0), bus, devfn);
449 if ( pdev )
450 {
451 _pci_hide_device(pdev);
452 rc = 0;
453 }
454 pcidevs_unlock();
455
456 return rc;
457 }
458
pci_ro_device(int seg,int bus,int devfn)459 int __init pci_ro_device(int seg, int bus, int devfn)
460 {
461 struct pci_seg *pseg = alloc_pseg(seg);
462 struct pci_dev *pdev;
463
464 if ( !pseg )
465 return -ENOMEM;
466 pdev = alloc_pdev(pseg, bus, devfn);
467 if ( !pdev )
468 return -ENOMEM;
469
470 if ( !pseg->ro_map )
471 {
472 size_t sz = BITS_TO_LONGS(PCI_BDF(-1, -1, -1) + 1) * sizeof(long);
473
474 pseg->ro_map = alloc_xenheap_pages(get_order_from_bytes(sz), 0);
475 if ( !pseg->ro_map )
476 return -ENOMEM;
477 memset(pseg->ro_map, 0, sz);
478 }
479
480 __set_bit(PCI_BDF2(bus, devfn), pseg->ro_map);
481 _pci_hide_device(pdev);
482
483 return 0;
484 }
485
pci_get_pdev(int seg,int bus,int devfn)486 struct pci_dev *pci_get_pdev(int seg, int bus, int devfn)
487 {
488 struct pci_seg *pseg = get_pseg(seg);
489 struct pci_dev *pdev = NULL;
490
491 ASSERT(pcidevs_locked());
492 ASSERT(seg != -1 || bus == -1);
493 ASSERT(bus != -1 || devfn == -1);
494
495 if ( !pseg )
496 {
497 if ( seg == -1 )
498 radix_tree_gang_lookup(&pci_segments, (void **)&pseg, 0, 1);
499 if ( !pseg )
500 return NULL;
501 }
502
503 do {
504 list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
505 if ( (pdev->bus == bus || bus == -1) &&
506 (pdev->devfn == devfn || devfn == -1) )
507 return pdev;
508 } while ( radix_tree_gang_lookup(&pci_segments, (void **)&pseg,
509 pseg->nr + 1, 1) );
510
511 return NULL;
512 }
513
pci_get_real_pdev(int seg,int bus,int devfn)514 struct pci_dev *pci_get_real_pdev(int seg, int bus, int devfn)
515 {
516 struct pci_dev *pdev;
517 int stride;
518
519 if ( seg < 0 || bus < 0 || devfn < 0 )
520 return NULL;
521
522 for ( pdev = pci_get_pdev(seg, bus, devfn), stride = 4;
523 !pdev && stride; stride >>= 1 )
524 {
525 if ( !(devfn & (8 - stride)) )
526 continue;
527 pdev = pci_get_pdev(seg, bus, devfn & ~(8 - stride));
528 if ( pdev && stride != pdev->phantom_stride )
529 pdev = NULL;
530 }
531
532 return pdev;
533 }
534
pci_get_pdev_by_domain(const struct domain * d,int seg,int bus,int devfn)535 struct pci_dev *pci_get_pdev_by_domain(const struct domain *d, int seg,
536 int bus, int devfn)
537 {
538 struct pci_seg *pseg = get_pseg(seg);
539 struct pci_dev *pdev = NULL;
540
541 ASSERT(seg != -1 || bus == -1);
542 ASSERT(bus != -1 || devfn == -1);
543
544 if ( !pseg )
545 {
546 if ( seg == -1 )
547 radix_tree_gang_lookup(&pci_segments, (void **)&pseg, 0, 1);
548 if ( !pseg )
549 return NULL;
550 }
551
552 do {
553 list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
554 if ( (pdev->bus == bus || bus == -1) &&
555 (pdev->devfn == devfn || devfn == -1) &&
556 (pdev->domain == d) )
557 return pdev;
558 } while ( radix_tree_gang_lookup(&pci_segments, (void **)&pseg,
559 pseg->nr + 1, 1) );
560
561 return NULL;
562 }
563
564 /**
565 * pci_enable_acs - enable ACS if hardware support it
566 * @dev: the PCI device
567 */
pci_enable_acs(struct pci_dev * pdev)568 static void pci_enable_acs(struct pci_dev *pdev)
569 {
570 int pos;
571 u16 cap, ctrl, seg = pdev->seg;
572 u8 bus = pdev->bus;
573 u8 dev = PCI_SLOT(pdev->devfn);
574 u8 func = PCI_FUNC(pdev->devfn);
575
576 if ( !iommu_enabled )
577 return;
578
579 pos = pci_find_ext_capability(seg, bus, pdev->devfn, PCI_EXT_CAP_ID_ACS);
580 if (!pos)
581 return;
582
583 cap = pci_conf_read16(seg, bus, dev, func, pos + PCI_ACS_CAP);
584 ctrl = pci_conf_read16(seg, bus, dev, func, pos + PCI_ACS_CTRL);
585
586 /* Source Validation */
587 ctrl |= (cap & PCI_ACS_SV);
588
589 /* P2P Request Redirect */
590 ctrl |= (cap & PCI_ACS_RR);
591
592 /* P2P Completion Redirect */
593 ctrl |= (cap & PCI_ACS_CR);
594
595 /* Upstream Forwarding */
596 ctrl |= (cap & PCI_ACS_UF);
597
598 pci_conf_write16(seg, bus, dev, func, pos + PCI_ACS_CTRL, ctrl);
599 }
600
601 static int iommu_add_device(struct pci_dev *pdev);
602 static int iommu_enable_device(struct pci_dev *pdev);
603 static int iommu_remove_device(struct pci_dev *pdev);
604
pci_add_device(u16 seg,u8 bus,u8 devfn,const struct pci_dev_info * info,nodeid_t node)605 int pci_add_device(u16 seg, u8 bus, u8 devfn,
606 const struct pci_dev_info *info, nodeid_t node)
607 {
608 struct pci_seg *pseg;
609 struct pci_dev *pdev;
610 unsigned int slot = PCI_SLOT(devfn), func = PCI_FUNC(devfn);
611 const char *pdev_type;
612 int ret;
613 bool pf_is_extfn = false;
614
615 if ( !info )
616 pdev_type = "device";
617 else if ( info->is_virtfn )
618 {
619 pcidevs_lock();
620 pdev = pci_get_pdev(seg, info->physfn.bus, info->physfn.devfn);
621 if ( pdev )
622 pf_is_extfn = pdev->info.is_extfn;
623 pcidevs_unlock();
624 if ( !pdev )
625 pci_add_device(seg, info->physfn.bus, info->physfn.devfn,
626 NULL, node);
627 pdev_type = "virtual function";
628 }
629 else if ( info->is_extfn )
630 pdev_type = "extended function";
631 else
632 {
633 info = NULL;
634 pdev_type = "device";
635 }
636
637 ret = xsm_resource_plug_pci(XSM_PRIV, (seg << 16) | (bus << 8) | devfn);
638 if ( ret )
639 return ret;
640
641 ret = -ENOMEM;
642
643 pcidevs_lock();
644 pseg = alloc_pseg(seg);
645 if ( !pseg )
646 goto out;
647 pdev = alloc_pdev(pseg, bus, devfn);
648 if ( !pdev )
649 goto out;
650
651 pdev->node = node;
652
653 if ( info )
654 {
655 pdev->info = *info;
656 /*
657 * VF's 'is_extfn' field is used to indicate whether its PF is an
658 * extended function.
659 */
660 if ( pdev->info.is_virtfn )
661 pdev->info.is_extfn = pf_is_extfn;
662 }
663 else if ( !pdev->vf_rlen[0] )
664 {
665 unsigned int pos = pci_find_ext_capability(seg, bus, devfn,
666 PCI_EXT_CAP_ID_SRIOV);
667 u16 ctrl = pci_conf_read16(seg, bus, slot, func, pos + PCI_SRIOV_CTRL);
668
669 if ( !pos )
670 /* Nothing */;
671 else if ( !(ctrl & (PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE)) )
672 {
673 unsigned int i;
674
675 BUILD_BUG_ON(ARRAY_SIZE(pdev->vf_rlen) != PCI_SRIOV_NUM_BARS);
676 for ( i = 0; i < PCI_SRIOV_NUM_BARS; ++i )
677 {
678 unsigned int idx = pos + PCI_SRIOV_BAR + i * 4;
679 u32 bar = pci_conf_read32(seg, bus, slot, func, idx);
680 u32 hi = 0;
681
682 if ( (bar & PCI_BASE_ADDRESS_SPACE) ==
683 PCI_BASE_ADDRESS_SPACE_IO )
684 {
685 printk(XENLOG_WARNING
686 "SR-IOV device %04x:%02x:%02x.%u with vf BAR%u"
687 " in IO space\n",
688 seg, bus, slot, func, i);
689 continue;
690 }
691 pci_conf_write32(seg, bus, slot, func, idx, ~0);
692 if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
693 PCI_BASE_ADDRESS_MEM_TYPE_64 )
694 {
695 if ( i >= PCI_SRIOV_NUM_BARS )
696 {
697 printk(XENLOG_WARNING
698 "SR-IOV device %04x:%02x:%02x.%u with 64-bit"
699 " vf BAR in last slot\n",
700 seg, bus, slot, func);
701 break;
702 }
703 hi = pci_conf_read32(seg, bus, slot, func, idx + 4);
704 pci_conf_write32(seg, bus, slot, func, idx + 4, ~0);
705 }
706 pdev->vf_rlen[i] = pci_conf_read32(seg, bus, slot, func, idx) &
707 PCI_BASE_ADDRESS_MEM_MASK;
708 if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
709 PCI_BASE_ADDRESS_MEM_TYPE_64 )
710 {
711 pdev->vf_rlen[i] |= (u64)pci_conf_read32(seg, bus,
712 slot, func,
713 idx + 4) << 32;
714 pci_conf_write32(seg, bus, slot, func, idx + 4, hi);
715 }
716 else if ( pdev->vf_rlen[i] )
717 pdev->vf_rlen[i] |= (u64)~0 << 32;
718 pci_conf_write32(seg, bus, slot, func, idx, bar);
719 pdev->vf_rlen[i] = -pdev->vf_rlen[i];
720 if ( (bar & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
721 PCI_BASE_ADDRESS_MEM_TYPE_64 )
722 ++i;
723 }
724 }
725 else
726 printk(XENLOG_WARNING
727 "SR-IOV device %04x:%02x:%02x.%u has its virtual"
728 " functions already enabled (%04x)\n",
729 seg, bus, slot, func, ctrl);
730 }
731
732 check_pdev(pdev);
733
734 ret = 0;
735 if ( !pdev->domain )
736 {
737 pdev->domain = hardware_domain;
738 ret = iommu_add_device(pdev);
739 if ( ret )
740 {
741 pdev->domain = NULL;
742 goto out;
743 }
744
745 list_add(&pdev->domain_list, &hardware_domain->arch.pdev_list);
746 }
747 else
748 iommu_enable_device(pdev);
749
750 pci_enable_acs(pdev);
751
752 out:
753 pcidevs_unlock();
754 if ( !ret )
755 {
756 printk(XENLOG_DEBUG "PCI add %s %04x:%02x:%02x.%u\n", pdev_type,
757 seg, bus, slot, func);
758 while ( pdev->phantom_stride )
759 {
760 func += pdev->phantom_stride;
761 if ( PCI_SLOT(func) )
762 break;
763 printk(XENLOG_DEBUG "PCI phantom %04x:%02x:%02x.%u\n",
764 seg, bus, slot, func);
765 }
766 }
767 return ret;
768 }
769
pci_remove_device(u16 seg,u8 bus,u8 devfn)770 int pci_remove_device(u16 seg, u8 bus, u8 devfn)
771 {
772 struct pci_seg *pseg = get_pseg(seg);
773 struct pci_dev *pdev;
774 int ret;
775
776 ret = xsm_resource_unplug_pci(XSM_PRIV, (seg << 16) | (bus << 8) | devfn);
777 if ( ret )
778 return ret;
779
780 ret = -ENODEV;
781
782 if ( !pseg )
783 return -ENODEV;
784
785 pcidevs_lock();
786 list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
787 if ( pdev->bus == bus && pdev->devfn == devfn )
788 {
789 ret = iommu_remove_device(pdev);
790 if ( pdev->domain )
791 list_del(&pdev->domain_list);
792 pci_cleanup_msi(pdev);
793 free_pdev(pseg, pdev);
794 printk(XENLOG_DEBUG "PCI remove device %04x:%02x:%02x.%u\n",
795 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
796 break;
797 }
798
799 pcidevs_unlock();
800 return ret;
801 }
802
pci_clean_dpci_irq(struct domain * d,struct hvm_pirq_dpci * pirq_dpci,void * arg)803 static int pci_clean_dpci_irq(struct domain *d,
804 struct hvm_pirq_dpci *pirq_dpci, void *arg)
805 {
806 struct dev_intx_gsi_link *digl, *tmp;
807
808 pirq_guest_unbind(d, dpci_pirq(pirq_dpci));
809
810 if ( pt_irq_need_timer(pirq_dpci->flags) )
811 kill_timer(&pirq_dpci->timer);
812
813 list_for_each_entry_safe ( digl, tmp, &pirq_dpci->digl_list, list )
814 {
815 list_del(&digl->list);
816 xfree(digl);
817 }
818
819 return pt_pirq_softirq_active(pirq_dpci) ? -ERESTART : 0;
820 }
821
pci_clean_dpci_irqs(struct domain * d)822 static int pci_clean_dpci_irqs(struct domain *d)
823 {
824 struct hvm_irq_dpci *hvm_irq_dpci = NULL;
825
826 if ( !iommu_enabled )
827 return 0;
828
829 if ( !is_hvm_domain(d) )
830 return 0;
831
832 spin_lock(&d->event_lock);
833 hvm_irq_dpci = domain_get_irq_dpci(d);
834 if ( hvm_irq_dpci != NULL )
835 {
836 int ret = pt_pirq_iterate(d, pci_clean_dpci_irq, NULL);
837
838 if ( ret )
839 {
840 spin_unlock(&d->event_lock);
841 return ret;
842 }
843
844 hvm_domain_irq(d)->dpci = NULL;
845 free_hvm_irq_dpci(hvm_irq_dpci);
846 }
847 spin_unlock(&d->event_lock);
848 return 0;
849 }
850
pci_release_devices(struct domain * d)851 int pci_release_devices(struct domain *d)
852 {
853 struct pci_dev *pdev;
854 u8 bus, devfn;
855 int ret;
856
857 pcidevs_lock();
858 ret = pci_clean_dpci_irqs(d);
859 if ( ret )
860 {
861 pcidevs_unlock();
862 return ret;
863 }
864 while ( (pdev = pci_get_pdev_by_domain(d, -1, -1, -1)) )
865 {
866 bus = pdev->bus;
867 devfn = pdev->devfn;
868 if ( deassign_device(d, pdev->seg, bus, devfn) )
869 printk("domain %d: deassign device (%04x:%02x:%02x.%u) failed!\n",
870 d->domain_id, pdev->seg, bus,
871 PCI_SLOT(devfn), PCI_FUNC(devfn));
872 }
873 pcidevs_unlock();
874
875 return 0;
876 }
877
878 #define PCI_CLASS_BRIDGE_HOST 0x0600
879 #define PCI_CLASS_BRIDGE_PCI 0x0604
880
pdev_type(u16 seg,u8 bus,u8 devfn)881 enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn)
882 {
883 u16 class_device, creg;
884 u8 d = PCI_SLOT(devfn), f = PCI_FUNC(devfn);
885 int pos = pci_find_cap_offset(seg, bus, d, f, PCI_CAP_ID_EXP);
886
887 class_device = pci_conf_read16(seg, bus, d, f, PCI_CLASS_DEVICE);
888 switch ( class_device )
889 {
890 case PCI_CLASS_BRIDGE_PCI:
891 if ( !pos )
892 return DEV_TYPE_LEGACY_PCI_BRIDGE;
893 creg = pci_conf_read16(seg, bus, d, f, pos + PCI_EXP_FLAGS);
894 switch ( (creg & PCI_EXP_FLAGS_TYPE) >> 4 )
895 {
896 case PCI_EXP_TYPE_PCI_BRIDGE:
897 return DEV_TYPE_PCIe2PCI_BRIDGE;
898 case PCI_EXP_TYPE_PCIE_BRIDGE:
899 return DEV_TYPE_PCI2PCIe_BRIDGE;
900 }
901 return DEV_TYPE_PCIe_BRIDGE;
902 case PCI_CLASS_BRIDGE_HOST:
903 return DEV_TYPE_PCI_HOST_BRIDGE;
904
905 case 0x0000: case 0xffff:
906 return DEV_TYPE_PCI_UNKNOWN;
907 }
908
909 return pos ? DEV_TYPE_PCIe_ENDPOINT : DEV_TYPE_PCI;
910 }
911
912 /*
913 * find the upstream PCIe-to-PCI/PCIX bridge or PCI legacy bridge
914 * return 0: the device is integrated PCI device or PCIe
915 * return 1: find PCIe-to-PCI/PCIX bridge or PCI legacy bridge
916 * return -1: fail
917 */
find_upstream_bridge(u16 seg,u8 * bus,u8 * devfn,u8 * secbus)918 int find_upstream_bridge(u16 seg, u8 *bus, u8 *devfn, u8 *secbus)
919 {
920 struct pci_seg *pseg = get_pseg(seg);
921 int ret = 0;
922 int cnt = 0;
923
924 if ( *bus == 0 )
925 return 0;
926
927 if ( !pseg )
928 return -1;
929
930 if ( !pseg->bus2bridge[*bus].map )
931 return 0;
932
933 ret = 1;
934 spin_lock(&pseg->bus2bridge_lock);
935 while ( pseg->bus2bridge[*bus].map )
936 {
937 *secbus = *bus;
938 *devfn = pseg->bus2bridge[*bus].devfn;
939 *bus = pseg->bus2bridge[*bus].bus;
940 if ( cnt++ >= MAX_BUSES )
941 {
942 ret = -1;
943 goto out;
944 }
945 }
946
947 out:
948 spin_unlock(&pseg->bus2bridge_lock);
949 return ret;
950 }
951
pci_device_detect(u16 seg,u8 bus,u8 dev,u8 func)952 bool_t __init pci_device_detect(u16 seg, u8 bus, u8 dev, u8 func)
953 {
954 u32 vendor;
955
956 vendor = pci_conf_read32(seg, bus, dev, func, PCI_VENDOR_ID);
957 /* some broken boards return 0 or ~0 if a slot is empty: */
958 if ( (vendor == 0xffffffff) || (vendor == 0x00000000) ||
959 (vendor == 0x0000ffff) || (vendor == 0xffff0000) )
960 return 0;
961 return 1;
962 }
963
pci_check_disable_device(u16 seg,u8 bus,u8 devfn)964 void pci_check_disable_device(u16 seg, u8 bus, u8 devfn)
965 {
966 struct pci_dev *pdev;
967 s_time_t now = NOW();
968 u16 cword;
969
970 pcidevs_lock();
971 pdev = pci_get_real_pdev(seg, bus, devfn);
972 if ( pdev )
973 {
974 if ( now < pdev->fault.time ||
975 now - pdev->fault.time > MILLISECS(10) )
976 pdev->fault.count >>= 1;
977 pdev->fault.time = now;
978 if ( ++pdev->fault.count < PT_FAULT_THRESHOLD )
979 pdev = NULL;
980 }
981 pcidevs_unlock();
982
983 if ( !pdev )
984 return;
985
986 /* Tell the device to stop DMAing; we can't rely on the guest to
987 * control it for us. */
988 devfn = pdev->devfn;
989 cword = pci_conf_read16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
990 PCI_COMMAND);
991 pci_conf_write16(seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
992 PCI_COMMAND, cword & ~PCI_COMMAND_MASTER);
993 }
994
995 /*
996 * scan pci devices to add all existed PCI devices to alldevs_list,
997 * and setup pci hierarchy in array bus2bridge.
998 */
_scan_pci_devices(struct pci_seg * pseg,void * arg)999 static int __init _scan_pci_devices(struct pci_seg *pseg, void *arg)
1000 {
1001 struct pci_dev *pdev;
1002 int bus, dev, func;
1003
1004 for ( bus = 0; bus < 256; bus++ )
1005 {
1006 for ( dev = 0; dev < 32; dev++ )
1007 {
1008 for ( func = 0; func < 8; func++ )
1009 {
1010 if ( !pci_device_detect(pseg->nr, bus, dev, func) )
1011 {
1012 if ( !func )
1013 break;
1014 continue;
1015 }
1016
1017 pdev = alloc_pdev(pseg, bus, PCI_DEVFN(dev, func));
1018 if ( !pdev )
1019 {
1020 printk(XENLOG_WARNING "%04x:%02x:%02x.%u: alloc_pdev failed\n",
1021 pseg->nr, bus, dev, func);
1022 return -ENOMEM;
1023 }
1024
1025 if ( !func && !(pci_conf_read8(pseg->nr, bus, dev, func,
1026 PCI_HEADER_TYPE) & 0x80) )
1027 break;
1028 }
1029 }
1030 }
1031
1032 return 0;
1033 }
1034
scan_pci_devices(void)1035 int __init scan_pci_devices(void)
1036 {
1037 int ret;
1038
1039 pcidevs_lock();
1040 ret = pci_segments_iterate(_scan_pci_devices, NULL);
1041 pcidevs_unlock();
1042
1043 return ret;
1044 }
1045
1046 struct setup_hwdom {
1047 struct domain *d;
1048 int (*handler)(u8 devfn, struct pci_dev *);
1049 };
1050
setup_one_hwdom_device(const struct setup_hwdom * ctxt,struct pci_dev * pdev)1051 static void __hwdom_init setup_one_hwdom_device(const struct setup_hwdom *ctxt,
1052 struct pci_dev *pdev)
1053 {
1054 u8 devfn = pdev->devfn;
1055
1056 do {
1057 int err = ctxt->handler(devfn, pdev);
1058
1059 if ( err )
1060 {
1061 printk(XENLOG_ERR "setup %04x:%02x:%02x.%u for d%d failed (%d)\n",
1062 pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1063 ctxt->d->domain_id, err);
1064 if ( devfn == pdev->devfn )
1065 return;
1066 }
1067 devfn += pdev->phantom_stride;
1068 } while ( devfn != pdev->devfn &&
1069 PCI_SLOT(devfn) == PCI_SLOT(pdev->devfn) );
1070 }
1071
_setup_hwdom_pci_devices(struct pci_seg * pseg,void * arg)1072 static int __hwdom_init _setup_hwdom_pci_devices(struct pci_seg *pseg, void *arg)
1073 {
1074 struct setup_hwdom *ctxt = arg;
1075 int bus, devfn;
1076
1077 for ( bus = 0; bus < 256; bus++ )
1078 {
1079 for ( devfn = 0; devfn < 256; devfn++ )
1080 {
1081 struct pci_dev *pdev = pci_get_pdev(pseg->nr, bus, devfn);
1082
1083 if ( !pdev )
1084 continue;
1085
1086 if ( !pdev->domain )
1087 {
1088 pdev->domain = ctxt->d;
1089 list_add(&pdev->domain_list, &ctxt->d->arch.pdev_list);
1090 setup_one_hwdom_device(ctxt, pdev);
1091 }
1092 else if ( pdev->domain == dom_xen )
1093 {
1094 pdev->domain = ctxt->d;
1095 setup_one_hwdom_device(ctxt, pdev);
1096 pdev->domain = dom_xen;
1097 }
1098 else if ( pdev->domain != ctxt->d )
1099 printk(XENLOG_WARNING "Dom%d owning %04x:%02x:%02x.%u?\n",
1100 pdev->domain->domain_id, pseg->nr, bus,
1101 PCI_SLOT(devfn), PCI_FUNC(devfn));
1102
1103 if ( iommu_verbose )
1104 {
1105 pcidevs_unlock();
1106 process_pending_softirqs();
1107 pcidevs_lock();
1108 }
1109 }
1110
1111 if ( !iommu_verbose )
1112 {
1113 pcidevs_unlock();
1114 process_pending_softirqs();
1115 pcidevs_lock();
1116 }
1117 }
1118
1119 return 0;
1120 }
1121
setup_hwdom_pci_devices(struct domain * d,int (* handler)(u8 devfn,struct pci_dev *))1122 void __hwdom_init setup_hwdom_pci_devices(
1123 struct domain *d, int (*handler)(u8 devfn, struct pci_dev *))
1124 {
1125 struct setup_hwdom ctxt = { .d = d, .handler = handler };
1126
1127 pcidevs_lock();
1128 pci_segments_iterate(_setup_hwdom_pci_devices, &ctxt);
1129 pcidevs_unlock();
1130 }
1131
1132 #ifdef CONFIG_ACPI
1133 #include <acpi/acpi.h>
1134 #include <acpi/apei.h>
1135
hest_match_pci(const struct acpi_hest_aer_common * p,const struct pci_dev * pdev)1136 static int hest_match_pci(const struct acpi_hest_aer_common *p,
1137 const struct pci_dev *pdev)
1138 {
1139 return ACPI_HEST_SEGMENT(p->bus) == pdev->seg &&
1140 ACPI_HEST_BUS(p->bus) == pdev->bus &&
1141 p->device == PCI_SLOT(pdev->devfn) &&
1142 p->function == PCI_FUNC(pdev->devfn);
1143 }
1144
hest_match_type(const struct acpi_hest_header * hest_hdr,const struct pci_dev * pdev)1145 static bool_t hest_match_type(const struct acpi_hest_header *hest_hdr,
1146 const struct pci_dev *pdev)
1147 {
1148 unsigned int pos = pci_find_cap_offset(pdev->seg, pdev->bus,
1149 PCI_SLOT(pdev->devfn),
1150 PCI_FUNC(pdev->devfn),
1151 PCI_CAP_ID_EXP);
1152 u8 pcie = MASK_EXTR(pci_conf_read16(pdev->seg, pdev->bus,
1153 PCI_SLOT(pdev->devfn),
1154 PCI_FUNC(pdev->devfn),
1155 pos + PCI_EXP_FLAGS),
1156 PCI_EXP_FLAGS_TYPE);
1157
1158 switch ( hest_hdr->type )
1159 {
1160 case ACPI_HEST_TYPE_AER_ROOT_PORT:
1161 return pcie == PCI_EXP_TYPE_ROOT_PORT;
1162 case ACPI_HEST_TYPE_AER_ENDPOINT:
1163 return pcie == PCI_EXP_TYPE_ENDPOINT;
1164 case ACPI_HEST_TYPE_AER_BRIDGE:
1165 return pci_conf_read16(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
1166 PCI_FUNC(pdev->devfn), PCI_CLASS_DEVICE) ==
1167 PCI_CLASS_BRIDGE_PCI;
1168 }
1169
1170 return 0;
1171 }
1172
1173 struct aer_hest_parse_info {
1174 const struct pci_dev *pdev;
1175 bool_t firmware_first;
1176 };
1177
hest_source_is_pcie_aer(const struct acpi_hest_header * hest_hdr)1178 static bool_t hest_source_is_pcie_aer(const struct acpi_hest_header *hest_hdr)
1179 {
1180 if ( hest_hdr->type == ACPI_HEST_TYPE_AER_ROOT_PORT ||
1181 hest_hdr->type == ACPI_HEST_TYPE_AER_ENDPOINT ||
1182 hest_hdr->type == ACPI_HEST_TYPE_AER_BRIDGE )
1183 return 1;
1184 return 0;
1185 }
1186
aer_hest_parse(const struct acpi_hest_header * hest_hdr,void * data)1187 static int aer_hest_parse(const struct acpi_hest_header *hest_hdr, void *data)
1188 {
1189 struct aer_hest_parse_info *info = data;
1190 const struct acpi_hest_aer_common *p;
1191 bool_t ff;
1192
1193 if ( !hest_source_is_pcie_aer(hest_hdr) )
1194 return 0;
1195
1196 p = (const struct acpi_hest_aer_common *)(hest_hdr + 1);
1197 ff = !!(p->flags & ACPI_HEST_FIRMWARE_FIRST);
1198
1199 /*
1200 * If no specific device is supplied, determine whether
1201 * FIRMWARE_FIRST is set for *any* PCIe device.
1202 */
1203 if ( !info->pdev )
1204 {
1205 info->firmware_first |= ff;
1206 return 0;
1207 }
1208
1209 /* Otherwise, check the specific device */
1210 if ( p->flags & ACPI_HEST_GLOBAL ?
1211 hest_match_type(hest_hdr, info->pdev) :
1212 hest_match_pci(p, info->pdev) )
1213 {
1214 info->firmware_first = ff;
1215 return 1;
1216 }
1217
1218 return 0;
1219 }
1220
pcie_aer_get_firmware_first(const struct pci_dev * pdev)1221 bool_t pcie_aer_get_firmware_first(const struct pci_dev *pdev)
1222 {
1223 struct aer_hest_parse_info info = { .pdev = pdev };
1224
1225 return pci_find_cap_offset(pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
1226 PCI_FUNC(pdev->devfn), PCI_CAP_ID_EXP) &&
1227 apei_hest_parse(aer_hest_parse, &info) >= 0 &&
1228 info.firmware_first;
1229 }
1230 #endif
1231
_dump_pci_devices(struct pci_seg * pseg,void * arg)1232 static int _dump_pci_devices(struct pci_seg *pseg, void *arg)
1233 {
1234 struct pci_dev *pdev;
1235 struct msi_desc *msi;
1236
1237 printk("==== segment %04x ====\n", pseg->nr);
1238
1239 list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
1240 {
1241 printk("%04x:%02x:%02x.%u - dom %-3d - node %-3d - MSIs < ",
1242 pseg->nr, pdev->bus,
1243 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1244 pdev->domain ? pdev->domain->domain_id : -1,
1245 (pdev->node != NUMA_NO_NODE) ? pdev->node : -1);
1246 list_for_each_entry ( msi, &pdev->msi_list, list )
1247 printk("%d ", msi->irq);
1248 printk(">\n");
1249 }
1250
1251 return 0;
1252 }
1253
dump_pci_devices(unsigned char ch)1254 static void dump_pci_devices(unsigned char ch)
1255 {
1256 printk("==== PCI devices ====\n");
1257 pcidevs_lock();
1258 pci_segments_iterate(_dump_pci_devices, NULL);
1259 pcidevs_unlock();
1260 }
1261
setup_dump_pcidevs(void)1262 static int __init setup_dump_pcidevs(void)
1263 {
1264 register_keyhandler('Q', dump_pci_devices, "dump PCI devices", 1);
1265 return 0;
1266 }
1267 __initcall(setup_dump_pcidevs);
1268
iommu_update_ire_from_msi(struct msi_desc * msi_desc,struct msi_msg * msg)1269 int iommu_update_ire_from_msi(
1270 struct msi_desc *msi_desc, struct msi_msg *msg)
1271 {
1272 return iommu_intremap
1273 ? iommu_get_ops()->update_ire_from_msi(msi_desc, msg) : 0;
1274 }
1275
iommu_read_msi_from_ire(struct msi_desc * msi_desc,struct msi_msg * msg)1276 void iommu_read_msi_from_ire(
1277 struct msi_desc *msi_desc, struct msi_msg *msg)
1278 {
1279 if ( iommu_intremap )
1280 iommu_get_ops()->read_msi_from_ire(msi_desc, msg);
1281 }
1282
iommu_add_device(struct pci_dev * pdev)1283 static int iommu_add_device(struct pci_dev *pdev)
1284 {
1285 const struct domain_iommu *hd;
1286 int rc;
1287 u8 devfn;
1288
1289 if ( !pdev->domain )
1290 return -EINVAL;
1291
1292 ASSERT(pcidevs_locked());
1293
1294 hd = dom_iommu(pdev->domain);
1295 if ( !iommu_enabled || !hd->platform_ops )
1296 return 0;
1297
1298 rc = hd->platform_ops->add_device(pdev->devfn, pci_to_dev(pdev));
1299 if ( rc || !pdev->phantom_stride )
1300 return rc;
1301
1302 for ( devfn = pdev->devfn ; ; )
1303 {
1304 devfn += pdev->phantom_stride;
1305 if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
1306 return 0;
1307 rc = hd->platform_ops->add_device(devfn, pci_to_dev(pdev));
1308 if ( rc )
1309 printk(XENLOG_WARNING "IOMMU: add %04x:%02x:%02x.%u failed (%d)\n",
1310 pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
1311 }
1312 }
1313
iommu_enable_device(struct pci_dev * pdev)1314 static int iommu_enable_device(struct pci_dev *pdev)
1315 {
1316 const struct domain_iommu *hd;
1317
1318 if ( !pdev->domain )
1319 return -EINVAL;
1320
1321 ASSERT(pcidevs_locked());
1322
1323 hd = dom_iommu(pdev->domain);
1324 if ( !iommu_enabled || !hd->platform_ops ||
1325 !hd->platform_ops->enable_device )
1326 return 0;
1327
1328 return hd->platform_ops->enable_device(pci_to_dev(pdev));
1329 }
1330
iommu_remove_device(struct pci_dev * pdev)1331 static int iommu_remove_device(struct pci_dev *pdev)
1332 {
1333 const struct domain_iommu *hd;
1334 u8 devfn;
1335
1336 if ( !pdev->domain )
1337 return -EINVAL;
1338
1339 hd = dom_iommu(pdev->domain);
1340 if ( !iommu_enabled || !hd->platform_ops )
1341 return 0;
1342
1343 for ( devfn = pdev->devfn ; pdev->phantom_stride; )
1344 {
1345 int rc;
1346
1347 devfn += pdev->phantom_stride;
1348 if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
1349 break;
1350 rc = hd->platform_ops->remove_device(devfn, pci_to_dev(pdev));
1351 if ( !rc )
1352 continue;
1353
1354 printk(XENLOG_ERR "IOMMU: remove %04x:%02x:%02x.%u failed (%d)\n",
1355 pdev->seg, pdev->bus, PCI_SLOT(devfn), PCI_FUNC(devfn), rc);
1356 return rc;
1357 }
1358
1359 return hd->platform_ops->remove_device(pdev->devfn, pci_to_dev(pdev));
1360 }
1361
1362 /*
1363 * If the device isn't owned by the hardware domain, it means it already
1364 * has been assigned to other domain, or it doesn't exist.
1365 */
device_assigned(u16 seg,u8 bus,u8 devfn)1366 static int device_assigned(u16 seg, u8 bus, u8 devfn)
1367 {
1368 struct pci_dev *pdev;
1369
1370 pcidevs_lock();
1371 pdev = pci_get_pdev_by_domain(hardware_domain, seg, bus, devfn);
1372 pcidevs_unlock();
1373
1374 return pdev ? 0 : -EBUSY;
1375 }
1376
assign_device(struct domain * d,u16 seg,u8 bus,u8 devfn,u32 flag)1377 static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag)
1378 {
1379 const struct domain_iommu *hd = dom_iommu(d);
1380 struct pci_dev *pdev;
1381 int rc = 0;
1382
1383 if ( !iommu_enabled || !hd->platform_ops )
1384 return 0;
1385
1386 /* Prevent device assign if mem paging or mem sharing have been
1387 * enabled for this domain */
1388 if ( unlikely(!need_iommu(d) &&
1389 (d->arch.hvm_domain.mem_sharing_enabled ||
1390 vm_event_check_ring(d->vm_event_paging) ||
1391 p2m_get_hostp2m(d)->global_logdirty)) )
1392 return -EXDEV;
1393
1394 if ( !pcidevs_trylock() )
1395 return -ERESTART;
1396
1397 rc = iommu_construct(d);
1398 if ( rc )
1399 {
1400 pcidevs_unlock();
1401 return rc;
1402 }
1403
1404 pdev = pci_get_pdev_by_domain(hardware_domain, seg, bus, devfn);
1405 if ( !pdev )
1406 {
1407 rc = pci_get_pdev(seg, bus, devfn) ? -EBUSY : -ENODEV;
1408 goto done;
1409 }
1410
1411 if ( pdev->msix )
1412 msixtbl_init(d);
1413
1414 pdev->fault.count = 0;
1415
1416 if ( (rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag)) )
1417 goto done;
1418
1419 for ( ; pdev->phantom_stride; rc = 0 )
1420 {
1421 devfn += pdev->phantom_stride;
1422 if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
1423 break;
1424 rc = hd->platform_ops->assign_device(d, devfn, pci_to_dev(pdev), flag);
1425 if ( rc )
1426 printk(XENLOG_G_WARNING "d%d: assign %04x:%02x:%02x.%u failed (%d)\n",
1427 d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1428 rc);
1429 }
1430
1431 done:
1432 if ( !has_arch_pdevs(d) && need_iommu(d) )
1433 iommu_teardown(d);
1434 pcidevs_unlock();
1435
1436 return rc;
1437 }
1438
1439 /* caller should hold the pcidevs_lock */
deassign_device(struct domain * d,u16 seg,u8 bus,u8 devfn)1440 int deassign_device(struct domain *d, u16 seg, u8 bus, u8 devfn)
1441 {
1442 const struct domain_iommu *hd = dom_iommu(d);
1443 struct pci_dev *pdev = NULL;
1444 int ret = 0;
1445
1446 if ( !iommu_enabled || !hd->platform_ops )
1447 return -EINVAL;
1448
1449 ASSERT(pcidevs_locked());
1450 pdev = pci_get_pdev_by_domain(d, seg, bus, devfn);
1451 if ( !pdev )
1452 return -ENODEV;
1453
1454 while ( pdev->phantom_stride )
1455 {
1456 devfn += pdev->phantom_stride;
1457 if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) )
1458 break;
1459 ret = hd->platform_ops->reassign_device(d, hardware_domain, devfn,
1460 pci_to_dev(pdev));
1461 if ( !ret )
1462 continue;
1463
1464 printk(XENLOG_G_ERR "d%d: deassign %04x:%02x:%02x.%u failed (%d)\n",
1465 d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), ret);
1466 return ret;
1467 }
1468
1469 devfn = pdev->devfn;
1470 ret = hd->platform_ops->reassign_device(d, hardware_domain, devfn,
1471 pci_to_dev(pdev));
1472 if ( ret )
1473 {
1474 dprintk(XENLOG_G_ERR,
1475 "d%d: deassign device (%04x:%02x:%02x.%u) failed\n",
1476 d->domain_id, seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1477 return ret;
1478 }
1479
1480 pdev->fault.count = 0;
1481
1482 if ( !has_arch_pdevs(d) && need_iommu(d) )
1483 iommu_teardown(d);
1484
1485 return ret;
1486 }
1487
iommu_get_device_group(struct domain * d,u16 seg,u8 bus,u8 devfn,XEN_GUEST_HANDLE_64 (uint32)buf,int max_sdevs)1488 static int iommu_get_device_group(
1489 struct domain *d, u16 seg, u8 bus, u8 devfn,
1490 XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs)
1491 {
1492 const struct domain_iommu *hd = dom_iommu(d);
1493 struct pci_dev *pdev;
1494 int group_id, sdev_id;
1495 u32 bdf;
1496 int i = 0;
1497 const struct iommu_ops *ops = hd->platform_ops;
1498
1499 if ( !iommu_enabled || !ops || !ops->get_device_group_id )
1500 return 0;
1501
1502 group_id = ops->get_device_group_id(seg, bus, devfn);
1503
1504 pcidevs_lock();
1505 for_each_pdev( d, pdev )
1506 {
1507 if ( (pdev->seg != seg) ||
1508 ((pdev->bus == bus) && (pdev->devfn == devfn)) )
1509 continue;
1510
1511 if ( xsm_get_device_group(XSM_HOOK, (seg << 16) | (pdev->bus << 8) | pdev->devfn) )
1512 continue;
1513
1514 sdev_id = ops->get_device_group_id(seg, pdev->bus, pdev->devfn);
1515 if ( (sdev_id == group_id) && (i < max_sdevs) )
1516 {
1517 bdf = 0;
1518 bdf |= (pdev->bus & 0xff) << 16;
1519 bdf |= (pdev->devfn & 0xff) << 8;
1520
1521 if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
1522 {
1523 pcidevs_unlock();
1524 return -1;
1525 }
1526 i++;
1527 }
1528 }
1529
1530 pcidevs_unlock();
1531
1532 return i;
1533 }
1534
iommu_dev_iotlb_flush_timeout(struct domain * d,struct pci_dev * pdev)1535 void iommu_dev_iotlb_flush_timeout(struct domain *d, struct pci_dev *pdev)
1536 {
1537 pcidevs_lock();
1538
1539 disable_ats_device(pdev);
1540
1541 ASSERT(pdev->domain);
1542 if ( d != pdev->domain )
1543 {
1544 pcidevs_unlock();
1545 return;
1546 }
1547
1548 list_del(&pdev->domain_list);
1549 pdev->domain = NULL;
1550 _pci_hide_device(pdev);
1551
1552 if ( !d->is_shutting_down && printk_ratelimit() )
1553 printk(XENLOG_ERR
1554 "dom%d: ATS device %04x:%02x:%02x.%u flush failed\n",
1555 d->domain_id, pdev->seg, pdev->bus, PCI_SLOT(pdev->devfn),
1556 PCI_FUNC(pdev->devfn));
1557 if ( !is_hardware_domain(d) )
1558 domain_crash(d);
1559
1560 pcidevs_unlock();
1561 }
1562
iommu_do_pci_domctl(struct xen_domctl * domctl,struct domain * d,XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl)1563 int iommu_do_pci_domctl(
1564 struct xen_domctl *domctl, struct domain *d,
1565 XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
1566 {
1567 u16 seg;
1568 u8 bus, devfn;
1569 int ret = 0;
1570 uint32_t machine_sbdf;
1571
1572 switch ( domctl->cmd )
1573 {
1574 unsigned int flags;
1575
1576 case XEN_DOMCTL_get_device_group:
1577 {
1578 u32 max_sdevs;
1579 XEN_GUEST_HANDLE_64(uint32) sdevs;
1580
1581 ret = xsm_get_device_group(XSM_HOOK, domctl->u.get_device_group.machine_sbdf);
1582 if ( ret )
1583 break;
1584
1585 seg = domctl->u.get_device_group.machine_sbdf >> 16;
1586 bus = PCI_BUS(domctl->u.get_device_group.machine_sbdf);
1587 devfn = PCI_DEVFN2(domctl->u.get_device_group.machine_sbdf);
1588 max_sdevs = domctl->u.get_device_group.max_sdevs;
1589 sdevs = domctl->u.get_device_group.sdev_array;
1590
1591 ret = iommu_get_device_group(d, seg, bus, devfn, sdevs, max_sdevs);
1592 if ( ret < 0 )
1593 {
1594 dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
1595 ret = -EFAULT;
1596 domctl->u.get_device_group.num_sdevs = 0;
1597 }
1598 else
1599 {
1600 domctl->u.get_device_group.num_sdevs = ret;
1601 ret = 0;
1602 }
1603 if ( __copy_field_to_guest(u_domctl, domctl, u.get_device_group) )
1604 ret = -EFAULT;
1605 }
1606 break;
1607
1608 case XEN_DOMCTL_assign_device:
1609 ASSERT(d);
1610 /* fall through */
1611 case XEN_DOMCTL_test_assign_device:
1612 /* Don't support self-assignment of devices. */
1613 if ( d == current->domain )
1614 {
1615 ret = -EINVAL;
1616 break;
1617 }
1618
1619 ret = -ENODEV;
1620 if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI )
1621 break;
1622
1623 ret = -EINVAL;
1624 flags = domctl->u.assign_device.flags;
1625 if ( domctl->cmd == XEN_DOMCTL_assign_device
1626 ? d->is_dying || (flags & ~XEN_DOMCTL_DEV_RDM_RELAXED)
1627 : flags )
1628 break;
1629
1630 machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
1631
1632 ret = xsm_assign_device(XSM_HOOK, d, machine_sbdf);
1633 if ( ret )
1634 break;
1635
1636 seg = machine_sbdf >> 16;
1637 bus = PCI_BUS(machine_sbdf);
1638 devfn = PCI_DEVFN2(machine_sbdf);
1639
1640 ret = device_assigned(seg, bus, devfn);
1641 if ( domctl->cmd == XEN_DOMCTL_test_assign_device )
1642 {
1643 if ( ret )
1644 {
1645 printk(XENLOG_G_INFO
1646 "%04x:%02x:%02x.%u already assigned, or non-existent\n",
1647 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1648 ret = -EINVAL;
1649 }
1650 break;
1651 }
1652 if ( !ret )
1653 ret = assign_device(d, seg, bus, devfn, flags);
1654 if ( ret == -ERESTART )
1655 ret = hypercall_create_continuation(__HYPERVISOR_domctl,
1656 "h", u_domctl);
1657 else if ( ret )
1658 printk(XENLOG_G_ERR "XEN_DOMCTL_assign_device: "
1659 "assign %04x:%02x:%02x.%u to dom%d failed (%d)\n",
1660 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1661 d->domain_id, ret);
1662
1663 break;
1664
1665 case XEN_DOMCTL_deassign_device:
1666 /* Don't support self-deassignment of devices. */
1667 if ( d == current->domain )
1668 {
1669 ret = -EINVAL;
1670 break;
1671 }
1672
1673 ret = -ENODEV;
1674 if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI )
1675 break;
1676
1677 ret = -EINVAL;
1678 if ( domctl->u.assign_device.flags )
1679 break;
1680
1681 machine_sbdf = domctl->u.assign_device.u.pci.machine_sbdf;
1682
1683 ret = xsm_deassign_device(XSM_HOOK, d, machine_sbdf);
1684 if ( ret )
1685 break;
1686
1687 seg = machine_sbdf >> 16;
1688 bus = PCI_BUS(machine_sbdf);
1689 devfn = PCI_DEVFN2(machine_sbdf);
1690
1691 pcidevs_lock();
1692 ret = deassign_device(d, seg, bus, devfn);
1693 pcidevs_unlock();
1694 if ( ret )
1695 printk(XENLOG_G_ERR
1696 "deassign %04x:%02x:%02x.%u from dom%d failed (%d)\n",
1697 seg, bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
1698 d->domain_id, ret);
1699
1700 break;
1701
1702 default:
1703 ret = -ENOSYS;
1704 break;
1705 }
1706
1707 return ret;
1708 }
1709
1710 /*
1711 * Local variables:
1712 * mode: C
1713 * c-file-style: "BSD"
1714 * c-basic-offset: 4
1715 * indent-tabs-mode: nil
1716 * End:
1717 */
1718