1 /*
2  *  Copyright (C) 2001  MandrakeSoft S.A.
3  *
4  *    MandrakeSoft S.A.
5  *    43, rue d'Aboukir
6  *    75002 Paris - France
7  *    http://www.linux-mandrake.com/
8  *    http://www.mandrakesoft.com/
9  *
10  *  This library is free software; you can redistribute it and/or
11  *  modify it under the terms of the GNU Lesser General Public
12  *  License as published by the Free Software Foundation; either
13  *  version 2 of the License, or (at your option) any later version.
14  *
15  *  This library is distributed in the hope that it will be useful,
16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  *  Lesser General Public License for more details.
19  *
20  *  You should have received a copy of the GNU Lesser General Public
21  *  License along with this library; If not, see <http://www.gnu.org/licenses/>.
22  *
23  * Support for virtual MSI logic
24  * Will be merged it with virtual IOAPIC logic, since most is the same
25 */
26 
27 #include <xen/types.h>
28 #include <xen/mm.h>
29 #include <xen/xmalloc.h>
30 #include <xen/lib.h>
31 #include <xen/errno.h>
32 #include <xen/sched.h>
33 #include <xen/irq.h>
34 #include <public/hvm/ioreq.h>
35 #include <asm/hvm/io.h>
36 #include <asm/hvm/vpic.h>
37 #include <asm/hvm/vlapic.h>
38 #include <asm/hvm/support.h>
39 #include <asm/current.h>
40 #include <asm/event.h>
41 #include <asm/io_apic.h>
42 
vmsi_inj_irq(struct vlapic * target,uint8_t vector,uint8_t trig_mode,uint8_t delivery_mode)43 static void vmsi_inj_irq(
44     struct vlapic *target,
45     uint8_t vector,
46     uint8_t trig_mode,
47     uint8_t delivery_mode)
48 {
49     HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "vmsi_inj_irq: vec %02x trig %d dm %d\n",
50                 vector, trig_mode, delivery_mode);
51 
52     switch ( delivery_mode )
53     {
54     case dest_Fixed:
55     case dest_LowestPrio:
56         vlapic_set_irq(target, vector, trig_mode);
57         break;
58     default:
59         BUG();
60     }
61 }
62 
vmsi_deliver(struct domain * d,int vector,uint8_t dest,uint8_t dest_mode,uint8_t delivery_mode,uint8_t trig_mode)63 int vmsi_deliver(
64     struct domain *d, int vector,
65     uint8_t dest, uint8_t dest_mode,
66     uint8_t delivery_mode, uint8_t trig_mode)
67 {
68     struct vlapic *target;
69     struct vcpu *v;
70 
71     switch ( delivery_mode )
72     {
73     case dest_LowestPrio:
74         target = vlapic_lowest_prio(d, NULL, 0, dest, dest_mode);
75         if ( target != NULL )
76         {
77             vmsi_inj_irq(target, vector, trig_mode, delivery_mode);
78             break;
79         }
80         HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "null MSI round robin: vector=%02x\n",
81                     vector);
82         return -ESRCH;
83 
84     case dest_Fixed:
85         for_each_vcpu ( d, v )
86             if ( vlapic_match_dest(vcpu_vlapic(v), NULL,
87                                    0, dest, dest_mode) )
88                 vmsi_inj_irq(vcpu_vlapic(v), vector,
89                              trig_mode, delivery_mode);
90         break;
91 
92     default:
93         printk(XENLOG_G_WARNING
94                "%pv: Unsupported MSI delivery mode %d for Dom%d\n",
95                current, delivery_mode, d->domain_id);
96         return -EINVAL;
97     }
98 
99     return 0;
100 }
101 
vmsi_deliver_pirq(struct domain * d,const struct hvm_pirq_dpci * pirq_dpci)102 void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *pirq_dpci)
103 {
104     uint32_t flags = pirq_dpci->gmsi.gflags;
105     int vector = pirq_dpci->gmsi.gvec;
106     uint8_t dest = (uint8_t)flags;
107     bool dest_mode = flags & XEN_DOMCTL_VMSI_X86_DM_MASK;
108     uint8_t delivery_mode = MASK_EXTR(flags, XEN_DOMCTL_VMSI_X86_DELIV_MASK);
109     bool trig_mode = flags & XEN_DOMCTL_VMSI_X86_TRIG_MASK;
110 
111     HVM_DBG_LOG(DBG_LEVEL_IOAPIC,
112                 "msi: dest=%x dest_mode=%x delivery_mode=%x "
113                 "vector=%x trig_mode=%x\n",
114                 dest, dest_mode, delivery_mode, vector, trig_mode);
115 
116     ASSERT(pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI);
117 
118     vmsi_deliver(d, vector, dest, dest_mode, delivery_mode, trig_mode);
119 }
120 
121 /* Return value, -1 : multi-dests, non-negative value: dest_vcpu_id */
hvm_girq_dest_2_vcpu_id(struct domain * d,uint8_t dest,uint8_t dest_mode)122 int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode)
123 {
124     int dest_vcpu_id = -1, w = 0;
125     struct vcpu *v;
126 
127     if ( d->max_vcpus == 1 )
128         return 0;
129 
130     for_each_vcpu ( d, v )
131     {
132         if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) )
133         {
134             w++;
135             dest_vcpu_id = v->vcpu_id;
136         }
137     }
138     if ( w > 1 )
139         return -1;
140 
141     return dest_vcpu_id;
142 }
143 
144 /* MSI-X mask bit hypervisor interception */
145 struct msixtbl_entry
146 {
147     struct list_head list;
148     atomic_t refcnt;    /* how many bind_pt_irq called for the device */
149 
150     /* TODO: resolve the potential race by destruction of pdev */
151     struct pci_dev *pdev;
152     unsigned long gtable;       /* gpa of msix table */
153     DECLARE_BITMAP(table_flags, MAX_MSIX_TABLE_ENTRIES);
154 #define MAX_MSIX_ACC_ENTRIES 3
155     unsigned int table_len;
156     struct {
157         uint32_t msi_ad[3];	/* Shadow of address low, high and data */
158     } gentries[MAX_MSIX_ACC_ENTRIES];
159     DECLARE_BITMAP(acc_valid, 3 * MAX_MSIX_ACC_ENTRIES);
160 #define acc_bit(what, ent, slot, idx) \
161         what##_bit((slot) * 3 + (idx), (ent)->acc_valid)
162     struct rcu_head rcu;
163 };
164 
165 static DEFINE_RCU_READ_LOCK(msixtbl_rcu_lock);
166 
167 /*
168  * MSI-X table infrastructure is dynamically initialised when an MSI-X capable
169  * device is passed through to a domain, rather than unconditionally for all
170  * domains.
171  */
msixtbl_initialised(const struct domain * d)172 static bool msixtbl_initialised(const struct domain *d)
173 {
174     return !!d->arch.hvm_domain.msixtbl_list.next;
175 }
176 
msixtbl_find_entry(struct vcpu * v,unsigned long addr)177 static struct msixtbl_entry *msixtbl_find_entry(
178     struct vcpu *v, unsigned long addr)
179 {
180     struct msixtbl_entry *entry;
181     struct domain *d = v->domain;
182 
183     list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
184         if ( addr >= entry->gtable &&
185              addr < entry->gtable + entry->table_len )
186             return entry;
187 
188     return NULL;
189 }
190 
msixtbl_addr_to_desc(const struct msixtbl_entry * entry,unsigned long addr)191 static struct msi_desc *msixtbl_addr_to_desc(
192     const struct msixtbl_entry *entry, unsigned long addr)
193 {
194     unsigned int nr_entry;
195     struct msi_desc *desc;
196 
197     if ( !entry || !entry->pdev )
198         return NULL;
199 
200     nr_entry = (addr - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
201 
202     list_for_each_entry( desc, &entry->pdev->msi_list, list )
203         if ( desc->msi_attrib.type == PCI_CAP_ID_MSIX &&
204              desc->msi_attrib.entry_nr == nr_entry )
205             return desc;
206 
207     return NULL;
208 }
209 
msixtbl_read(const struct hvm_io_handler * handler,uint64_t address,uint32_t len,uint64_t * pval)210 static int msixtbl_read(const struct hvm_io_handler *handler,
211                         uint64_t address, uint32_t len, uint64_t *pval)
212 {
213     unsigned long offset;
214     struct msixtbl_entry *entry;
215     unsigned int nr_entry, index;
216     int r = X86EMUL_UNHANDLEABLE;
217 
218     if ( (len != 4 && len != 8) || (address & (len - 1)) )
219         return r;
220 
221     rcu_read_lock(&msixtbl_rcu_lock);
222 
223     entry = msixtbl_find_entry(current, address);
224     if ( !entry )
225         goto out;
226     offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
227 
228     if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
229     {
230         nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
231         index = offset / sizeof(uint32_t);
232         if ( nr_entry >= MAX_MSIX_ACC_ENTRIES ||
233              !acc_bit(test, entry, nr_entry, index) )
234             goto out;
235         *pval = entry->gentries[nr_entry].msi_ad[index];
236         if ( len == 8 )
237         {
238             if ( index )
239                 offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
240             else if ( acc_bit(test, entry, nr_entry, 1) )
241                 *pval |= (u64)entry->gentries[nr_entry].msi_ad[1] << 32;
242             else
243                 goto out;
244         }
245     }
246     if ( offset == PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
247     {
248         const struct msi_desc *msi_desc = msixtbl_addr_to_desc(entry, address);
249 
250         if ( !msi_desc )
251             goto out;
252         if ( len == 4 )
253             *pval = MASK_INSR(msi_desc->msi_attrib.guest_masked,
254                               PCI_MSIX_VECTOR_BITMASK);
255         else
256             *pval |= (u64)MASK_INSR(msi_desc->msi_attrib.guest_masked,
257                                     PCI_MSIX_VECTOR_BITMASK) << 32;
258     }
259 
260     r = X86EMUL_OKAY;
261 out:
262     rcu_read_unlock(&msixtbl_rcu_lock);
263     return r;
264 }
265 
msixtbl_write(struct vcpu * v,unsigned long address,unsigned int len,unsigned long val)266 static int msixtbl_write(struct vcpu *v, unsigned long address,
267                          unsigned int len, unsigned long val)
268 {
269     unsigned long offset;
270     struct msixtbl_entry *entry;
271     const struct msi_desc *msi_desc;
272     unsigned int nr_entry, index;
273     int r = X86EMUL_UNHANDLEABLE;
274     unsigned long flags;
275     struct irq_desc *desc;
276 
277     if ( (len != 4 && len != 8) || (address & (len - 1)) )
278         return r;
279 
280     rcu_read_lock(&msixtbl_rcu_lock);
281 
282     entry = msixtbl_find_entry(v, address);
283     if ( !entry )
284         goto out;
285     nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
286 
287     offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
288     if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
289     {
290         index = offset / sizeof(uint32_t);
291         if ( nr_entry < MAX_MSIX_ACC_ENTRIES )
292         {
293             entry->gentries[nr_entry].msi_ad[index] = val;
294             acc_bit(set, entry, nr_entry, index);
295             if ( len == 8 && !index )
296             {
297                 entry->gentries[nr_entry].msi_ad[1] = val >> 32;
298                 acc_bit(set, entry, nr_entry, 1);
299             }
300         }
301         set_bit(nr_entry, &entry->table_flags);
302         if ( len != 8 || !index )
303             goto out;
304         val >>= 32;
305         address += 4;
306     }
307 
308     /* Exit to device model when unmasking and address/data got modified. */
309     if ( !(val & PCI_MSIX_VECTOR_BITMASK) &&
310          test_and_clear_bit(nr_entry, &entry->table_flags) )
311     {
312         v->arch.hvm_vcpu.hvm_io.msix_unmask_address = address;
313         goto out;
314     }
315 
316     msi_desc = msixtbl_addr_to_desc(entry, address);
317     if ( !msi_desc || msi_desc->irq < 0 )
318         goto out;
319 
320     desc = irq_to_desc(msi_desc->irq);
321     if ( !desc )
322         goto out;
323 
324     spin_lock_irqsave(&desc->lock, flags);
325 
326     if ( !desc->msi_desc )
327         goto unlock;
328 
329     ASSERT(msi_desc == desc->msi_desc);
330 
331     guest_mask_msi_irq(desc, !!(val & PCI_MSIX_VECTOR_BITMASK));
332 
333 unlock:
334     spin_unlock_irqrestore(&desc->lock, flags);
335     if ( len == 4 )
336         r = X86EMUL_OKAY;
337 
338 out:
339     rcu_read_unlock(&msixtbl_rcu_lock);
340     return r;
341 }
342 
_msixtbl_write(const struct hvm_io_handler * handler,uint64_t address,uint32_t len,uint64_t val)343 static int _msixtbl_write(const struct hvm_io_handler *handler,
344                           uint64_t address, uint32_t len, uint64_t val)
345 {
346     return msixtbl_write(current, address, len, val);
347 }
348 
msixtbl_range(const struct hvm_io_handler * handler,const ioreq_t * r)349 static bool_t msixtbl_range(const struct hvm_io_handler *handler,
350                             const ioreq_t *r)
351 {
352     struct vcpu *curr = current;
353     unsigned long addr = r->addr;
354     const struct msi_desc *desc;
355 
356     ASSERT(r->type == IOREQ_TYPE_COPY);
357 
358     rcu_read_lock(&msixtbl_rcu_lock);
359     desc = msixtbl_addr_to_desc(msixtbl_find_entry(curr, addr), addr);
360     rcu_read_unlock(&msixtbl_rcu_lock);
361 
362     if ( desc )
363         return 1;
364 
365     if ( r->state == STATE_IOREQ_READY && r->dir == IOREQ_WRITE )
366     {
367         unsigned int size = r->size;
368 
369         if ( !r->data_is_ptr )
370         {
371             uint64_t data = r->data;
372 
373             if ( size == 8 )
374             {
375                 BUILD_BUG_ON(!(PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET & 4));
376                 data >>= 32;
377                 addr += size = 4;
378             }
379             if ( size == 4 &&
380                  ((addr & (PCI_MSIX_ENTRY_SIZE - 1)) ==
381                   PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) &&
382                  !(data & PCI_MSIX_VECTOR_BITMASK) )
383             {
384                 curr->arch.hvm_vcpu.hvm_io.msix_snoop_address = addr;
385                 curr->arch.hvm_vcpu.hvm_io.msix_snoop_gpa = 0;
386             }
387         }
388         else if ( (size == 4 || size == 8) &&
389                   /* Only support forward REP MOVS for now. */
390                   !r->df &&
391                   /*
392                    * Only fully support accesses to a single table entry for
393                    * now (if multiple ones get written to in one go, only the
394                    * final one gets dealt with).
395                    */
396                   r->count && r->count <= PCI_MSIX_ENTRY_SIZE / size &&
397                   !((addr + (size * r->count)) & (PCI_MSIX_ENTRY_SIZE - 1)) )
398         {
399             BUILD_BUG_ON((PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET + 4) &
400                          (PCI_MSIX_ENTRY_SIZE - 1));
401 
402             curr->arch.hvm_vcpu.hvm_io.msix_snoop_address =
403                 addr + size * r->count - 4;
404             curr->arch.hvm_vcpu.hvm_io.msix_snoop_gpa =
405                 r->data + size * r->count - 4;
406         }
407     }
408 
409     return 0;
410 }
411 
412 static const struct hvm_io_ops msixtbl_mmio_ops = {
413     .accept = msixtbl_range,
414     .read = msixtbl_read,
415     .write = _msixtbl_write,
416 };
417 
add_msixtbl_entry(struct domain * d,struct pci_dev * pdev,uint64_t gtable,struct msixtbl_entry * entry)418 static void add_msixtbl_entry(struct domain *d,
419                               struct pci_dev *pdev,
420                               uint64_t gtable,
421                               struct msixtbl_entry *entry)
422 {
423     INIT_LIST_HEAD(&entry->list);
424     INIT_RCU_HEAD(&entry->rcu);
425     atomic_set(&entry->refcnt, 0);
426 
427     entry->table_len = pdev->msix->nr_entries * PCI_MSIX_ENTRY_SIZE;
428     entry->pdev = pdev;
429     entry->gtable = (unsigned long) gtable;
430 
431     list_add_rcu(&entry->list, &d->arch.hvm_domain.msixtbl_list);
432 }
433 
free_msixtbl_entry(struct rcu_head * rcu)434 static void free_msixtbl_entry(struct rcu_head *rcu)
435 {
436     struct msixtbl_entry *entry;
437 
438     entry = container_of (rcu, struct msixtbl_entry, rcu);
439 
440     xfree(entry);
441 }
442 
del_msixtbl_entry(struct msixtbl_entry * entry)443 static void del_msixtbl_entry(struct msixtbl_entry *entry)
444 {
445     list_del_rcu(&entry->list);
446     call_rcu(&entry->rcu, free_msixtbl_entry);
447 }
448 
msixtbl_pt_register(struct domain * d,struct pirq * pirq,uint64_t gtable)449 int msixtbl_pt_register(struct domain *d, struct pirq *pirq, uint64_t gtable)
450 {
451     struct irq_desc *irq_desc;
452     struct msi_desc *msi_desc;
453     struct pci_dev *pdev;
454     struct msixtbl_entry *entry, *new_entry;
455     int r = -EINVAL;
456 
457     ASSERT(pcidevs_locked());
458     ASSERT(spin_is_locked(&d->event_lock));
459 
460     if ( !msixtbl_initialised(d) )
461         return -ENODEV;
462 
463     /*
464      * xmalloc() with irq_disabled causes the failure of check_lock()
465      * for xenpool->lock. So we allocate an entry beforehand.
466      */
467     new_entry = xzalloc(struct msixtbl_entry);
468     if ( !new_entry )
469         return -ENOMEM;
470 
471     irq_desc = pirq_spin_lock_irq_desc(pirq, NULL);
472     if ( !irq_desc )
473     {
474         xfree(new_entry);
475         return r;
476     }
477 
478     msi_desc = irq_desc->msi_desc;
479     if ( !msi_desc )
480         goto out;
481 
482     pdev = msi_desc->dev;
483 
484     list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
485         if ( pdev == entry->pdev )
486             goto found;
487 
488     entry = new_entry;
489     new_entry = NULL;
490     add_msixtbl_entry(d, pdev, gtable, entry);
491 
492 found:
493     atomic_inc(&entry->refcnt);
494     r = 0;
495 
496 out:
497     spin_unlock_irq(&irq_desc->lock);
498     xfree(new_entry);
499 
500     if ( !r )
501     {
502         struct vcpu *v;
503 
504         for_each_vcpu ( d, v )
505         {
506             if ( (v->pause_flags & VPF_blocked_in_xen) &&
507                  !v->arch.hvm_vcpu.hvm_io.msix_snoop_gpa &&
508                  v->arch.hvm_vcpu.hvm_io.msix_snoop_address ==
509                  (gtable + msi_desc->msi_attrib.entry_nr *
510                            PCI_MSIX_ENTRY_SIZE +
511                   PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) )
512                 v->arch.hvm_vcpu.hvm_io.msix_unmask_address =
513                     v->arch.hvm_vcpu.hvm_io.msix_snoop_address;
514         }
515     }
516 
517     return r;
518 }
519 
msixtbl_pt_unregister(struct domain * d,struct pirq * pirq)520 void msixtbl_pt_unregister(struct domain *d, struct pirq *pirq)
521 {
522     struct irq_desc *irq_desc;
523     struct msi_desc *msi_desc;
524     struct pci_dev *pdev;
525     struct msixtbl_entry *entry;
526 
527     ASSERT(pcidevs_locked());
528     ASSERT(spin_is_locked(&d->event_lock));
529 
530     if ( !msixtbl_initialised(d) )
531         return;
532 
533     irq_desc = pirq_spin_lock_irq_desc(pirq, NULL);
534     if ( !irq_desc )
535         return;
536 
537     msi_desc = irq_desc->msi_desc;
538     if ( !msi_desc )
539         goto out;
540 
541     pdev = msi_desc->dev;
542 
543     list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
544         if ( pdev == entry->pdev )
545             goto found;
546 
547 out:
548     spin_unlock_irq(&irq_desc->lock);
549     return;
550 
551 found:
552     if ( !atomic_dec_and_test(&entry->refcnt) )
553         del_msixtbl_entry(entry);
554 
555     spin_unlock_irq(&irq_desc->lock);
556 }
557 
msixtbl_init(struct domain * d)558 void msixtbl_init(struct domain *d)
559 {
560     struct hvm_io_handler *handler;
561 
562     if ( !is_hvm_domain(d) || !has_vlapic(d) || msixtbl_initialised(d) )
563         return;
564 
565     INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list);
566 
567     handler = hvm_next_io_handler(d);
568     if ( handler )
569     {
570         handler->type = IOREQ_TYPE_COPY;
571         handler->ops = &msixtbl_mmio_ops;
572     }
573 }
574 
msixtbl_pt_cleanup(struct domain * d)575 void msixtbl_pt_cleanup(struct domain *d)
576 {
577     struct msixtbl_entry *entry, *temp;
578 
579     if ( !msixtbl_initialised(d) )
580         return;
581 
582     spin_lock(&d->event_lock);
583 
584     list_for_each_entry_safe( entry, temp,
585                               &d->arch.hvm_domain.msixtbl_list, list )
586         del_msixtbl_entry(entry);
587 
588     spin_unlock(&d->event_lock);
589 }
590 
msix_write_completion(struct vcpu * v)591 void msix_write_completion(struct vcpu *v)
592 {
593     unsigned long ctrl_address = v->arch.hvm_vcpu.hvm_io.msix_unmask_address;
594     unsigned long snoop_addr = v->arch.hvm_vcpu.hvm_io.msix_snoop_address;
595 
596     v->arch.hvm_vcpu.hvm_io.msix_snoop_address = 0;
597 
598     if ( !ctrl_address && snoop_addr &&
599          v->arch.hvm_vcpu.hvm_io.msix_snoop_gpa )
600     {
601         const struct msi_desc *desc;
602         uint32_t data;
603 
604         rcu_read_lock(&msixtbl_rcu_lock);
605         desc = msixtbl_addr_to_desc(msixtbl_find_entry(v, snoop_addr),
606                                     snoop_addr);
607         rcu_read_unlock(&msixtbl_rcu_lock);
608 
609         if ( desc &&
610              hvm_copy_from_guest_phys(&data,
611                                       v->arch.hvm_vcpu.hvm_io.msix_snoop_gpa,
612                                       sizeof(data)) == HVMTRANS_okay &&
613              !(data & PCI_MSIX_VECTOR_BITMASK) )
614             ctrl_address = snoop_addr;
615     }
616 
617     if ( !ctrl_address )
618         return;
619 
620     v->arch.hvm_vcpu.hvm_io.msix_unmask_address = 0;
621     if ( msixtbl_write(v, ctrl_address, 4, 0) != X86EMUL_OKAY )
622         gdprintk(XENLOG_WARNING, "MSI-X write completion failure\n");
623 }
624