1 /*
2 * Copyright (C) 2001 MandrakeSoft S.A.
3 *
4 * MandrakeSoft S.A.
5 * 43, rue d'Aboukir
6 * 75002 Paris - France
7 * http://www.linux-mandrake.com/
8 * http://www.mandrakesoft.com/
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; If not, see <http://www.gnu.org/licenses/>.
22 *
23 * Support for virtual MSI logic
24 * Will be merged it with virtual IOAPIC logic, since most is the same
25 */
26
27 #include <xen/types.h>
28 #include <xen/mm.h>
29 #include <xen/xmalloc.h>
30 #include <xen/lib.h>
31 #include <xen/errno.h>
32 #include <xen/sched.h>
33 #include <xen/irq.h>
34 #include <public/hvm/ioreq.h>
35 #include <asm/hvm/io.h>
36 #include <asm/hvm/vpic.h>
37 #include <asm/hvm/vlapic.h>
38 #include <asm/hvm/support.h>
39 #include <asm/current.h>
40 #include <asm/event.h>
41 #include <asm/io_apic.h>
42
vmsi_inj_irq(struct vlapic * target,uint8_t vector,uint8_t trig_mode,uint8_t delivery_mode)43 static void vmsi_inj_irq(
44 struct vlapic *target,
45 uint8_t vector,
46 uint8_t trig_mode,
47 uint8_t delivery_mode)
48 {
49 HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "vmsi_inj_irq: vec %02x trig %d dm %d\n",
50 vector, trig_mode, delivery_mode);
51
52 switch ( delivery_mode )
53 {
54 case dest_Fixed:
55 case dest_LowestPrio:
56 vlapic_set_irq(target, vector, trig_mode);
57 break;
58 default:
59 BUG();
60 }
61 }
62
vmsi_deliver(struct domain * d,int vector,uint8_t dest,uint8_t dest_mode,uint8_t delivery_mode,uint8_t trig_mode)63 int vmsi_deliver(
64 struct domain *d, int vector,
65 uint8_t dest, uint8_t dest_mode,
66 uint8_t delivery_mode, uint8_t trig_mode)
67 {
68 struct vlapic *target;
69 struct vcpu *v;
70
71 switch ( delivery_mode )
72 {
73 case dest_LowestPrio:
74 target = vlapic_lowest_prio(d, NULL, 0, dest, dest_mode);
75 if ( target != NULL )
76 {
77 vmsi_inj_irq(target, vector, trig_mode, delivery_mode);
78 break;
79 }
80 HVM_DBG_LOG(DBG_LEVEL_VLAPIC, "null MSI round robin: vector=%02x\n",
81 vector);
82 return -ESRCH;
83
84 case dest_Fixed:
85 for_each_vcpu ( d, v )
86 if ( vlapic_match_dest(vcpu_vlapic(v), NULL,
87 0, dest, dest_mode) )
88 vmsi_inj_irq(vcpu_vlapic(v), vector,
89 trig_mode, delivery_mode);
90 break;
91
92 default:
93 printk(XENLOG_G_WARNING
94 "%pv: Unsupported MSI delivery mode %d for Dom%d\n",
95 current, delivery_mode, d->domain_id);
96 return -EINVAL;
97 }
98
99 return 0;
100 }
101
vmsi_deliver_pirq(struct domain * d,const struct hvm_pirq_dpci * pirq_dpci)102 void vmsi_deliver_pirq(struct domain *d, const struct hvm_pirq_dpci *pirq_dpci)
103 {
104 uint32_t flags = pirq_dpci->gmsi.gflags;
105 int vector = pirq_dpci->gmsi.gvec;
106 uint8_t dest = (uint8_t)flags;
107 bool dest_mode = flags & XEN_DOMCTL_VMSI_X86_DM_MASK;
108 uint8_t delivery_mode = MASK_EXTR(flags, XEN_DOMCTL_VMSI_X86_DELIV_MASK);
109 bool trig_mode = flags & XEN_DOMCTL_VMSI_X86_TRIG_MASK;
110
111 HVM_DBG_LOG(DBG_LEVEL_IOAPIC,
112 "msi: dest=%x dest_mode=%x delivery_mode=%x "
113 "vector=%x trig_mode=%x\n",
114 dest, dest_mode, delivery_mode, vector, trig_mode);
115
116 ASSERT(pirq_dpci->flags & HVM_IRQ_DPCI_GUEST_MSI);
117
118 vmsi_deliver(d, vector, dest, dest_mode, delivery_mode, trig_mode);
119 }
120
121 /* Return value, -1 : multi-dests, non-negative value: dest_vcpu_id */
hvm_girq_dest_2_vcpu_id(struct domain * d,uint8_t dest,uint8_t dest_mode)122 int hvm_girq_dest_2_vcpu_id(struct domain *d, uint8_t dest, uint8_t dest_mode)
123 {
124 int dest_vcpu_id = -1, w = 0;
125 struct vcpu *v;
126
127 if ( d->max_vcpus == 1 )
128 return 0;
129
130 for_each_vcpu ( d, v )
131 {
132 if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode) )
133 {
134 w++;
135 dest_vcpu_id = v->vcpu_id;
136 }
137 }
138 if ( w > 1 )
139 return -1;
140
141 return dest_vcpu_id;
142 }
143
144 /* MSI-X mask bit hypervisor interception */
145 struct msixtbl_entry
146 {
147 struct list_head list;
148 atomic_t refcnt; /* how many bind_pt_irq called for the device */
149
150 /* TODO: resolve the potential race by destruction of pdev */
151 struct pci_dev *pdev;
152 unsigned long gtable; /* gpa of msix table */
153 DECLARE_BITMAP(table_flags, MAX_MSIX_TABLE_ENTRIES);
154 #define MAX_MSIX_ACC_ENTRIES 3
155 unsigned int table_len;
156 struct {
157 uint32_t msi_ad[3]; /* Shadow of address low, high and data */
158 } gentries[MAX_MSIX_ACC_ENTRIES];
159 DECLARE_BITMAP(acc_valid, 3 * MAX_MSIX_ACC_ENTRIES);
160 #define acc_bit(what, ent, slot, idx) \
161 what##_bit((slot) * 3 + (idx), (ent)->acc_valid)
162 struct rcu_head rcu;
163 };
164
165 static DEFINE_RCU_READ_LOCK(msixtbl_rcu_lock);
166
167 /*
168 * MSI-X table infrastructure is dynamically initialised when an MSI-X capable
169 * device is passed through to a domain, rather than unconditionally for all
170 * domains.
171 */
msixtbl_initialised(const struct domain * d)172 static bool msixtbl_initialised(const struct domain *d)
173 {
174 return !!d->arch.hvm_domain.msixtbl_list.next;
175 }
176
msixtbl_find_entry(struct vcpu * v,unsigned long addr)177 static struct msixtbl_entry *msixtbl_find_entry(
178 struct vcpu *v, unsigned long addr)
179 {
180 struct msixtbl_entry *entry;
181 struct domain *d = v->domain;
182
183 list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
184 if ( addr >= entry->gtable &&
185 addr < entry->gtable + entry->table_len )
186 return entry;
187
188 return NULL;
189 }
190
msixtbl_addr_to_desc(const struct msixtbl_entry * entry,unsigned long addr)191 static struct msi_desc *msixtbl_addr_to_desc(
192 const struct msixtbl_entry *entry, unsigned long addr)
193 {
194 unsigned int nr_entry;
195 struct msi_desc *desc;
196
197 if ( !entry || !entry->pdev )
198 return NULL;
199
200 nr_entry = (addr - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
201
202 list_for_each_entry( desc, &entry->pdev->msi_list, list )
203 if ( desc->msi_attrib.type == PCI_CAP_ID_MSIX &&
204 desc->msi_attrib.entry_nr == nr_entry )
205 return desc;
206
207 return NULL;
208 }
209
msixtbl_read(const struct hvm_io_handler * handler,uint64_t address,uint32_t len,uint64_t * pval)210 static int msixtbl_read(const struct hvm_io_handler *handler,
211 uint64_t address, uint32_t len, uint64_t *pval)
212 {
213 unsigned long offset;
214 struct msixtbl_entry *entry;
215 unsigned int nr_entry, index;
216 int r = X86EMUL_UNHANDLEABLE;
217
218 if ( (len != 4 && len != 8) || (address & (len - 1)) )
219 return r;
220
221 rcu_read_lock(&msixtbl_rcu_lock);
222
223 entry = msixtbl_find_entry(current, address);
224 if ( !entry )
225 goto out;
226 offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
227
228 if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
229 {
230 nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
231 index = offset / sizeof(uint32_t);
232 if ( nr_entry >= MAX_MSIX_ACC_ENTRIES ||
233 !acc_bit(test, entry, nr_entry, index) )
234 goto out;
235 *pval = entry->gentries[nr_entry].msi_ad[index];
236 if ( len == 8 )
237 {
238 if ( index )
239 offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
240 else if ( acc_bit(test, entry, nr_entry, 1) )
241 *pval |= (u64)entry->gentries[nr_entry].msi_ad[1] << 32;
242 else
243 goto out;
244 }
245 }
246 if ( offset == PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
247 {
248 const struct msi_desc *msi_desc = msixtbl_addr_to_desc(entry, address);
249
250 if ( !msi_desc )
251 goto out;
252 if ( len == 4 )
253 *pval = MASK_INSR(msi_desc->msi_attrib.guest_masked,
254 PCI_MSIX_VECTOR_BITMASK);
255 else
256 *pval |= (u64)MASK_INSR(msi_desc->msi_attrib.guest_masked,
257 PCI_MSIX_VECTOR_BITMASK) << 32;
258 }
259
260 r = X86EMUL_OKAY;
261 out:
262 rcu_read_unlock(&msixtbl_rcu_lock);
263 return r;
264 }
265
msixtbl_write(struct vcpu * v,unsigned long address,unsigned int len,unsigned long val)266 static int msixtbl_write(struct vcpu *v, unsigned long address,
267 unsigned int len, unsigned long val)
268 {
269 unsigned long offset;
270 struct msixtbl_entry *entry;
271 const struct msi_desc *msi_desc;
272 unsigned int nr_entry, index;
273 int r = X86EMUL_UNHANDLEABLE;
274 unsigned long flags;
275 struct irq_desc *desc;
276
277 if ( (len != 4 && len != 8) || (address & (len - 1)) )
278 return r;
279
280 rcu_read_lock(&msixtbl_rcu_lock);
281
282 entry = msixtbl_find_entry(v, address);
283 if ( !entry )
284 goto out;
285 nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
286
287 offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
288 if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET )
289 {
290 index = offset / sizeof(uint32_t);
291 if ( nr_entry < MAX_MSIX_ACC_ENTRIES )
292 {
293 entry->gentries[nr_entry].msi_ad[index] = val;
294 acc_bit(set, entry, nr_entry, index);
295 if ( len == 8 && !index )
296 {
297 entry->gentries[nr_entry].msi_ad[1] = val >> 32;
298 acc_bit(set, entry, nr_entry, 1);
299 }
300 }
301 set_bit(nr_entry, &entry->table_flags);
302 if ( len != 8 || !index )
303 goto out;
304 val >>= 32;
305 address += 4;
306 }
307
308 /* Exit to device model when unmasking and address/data got modified. */
309 if ( !(val & PCI_MSIX_VECTOR_BITMASK) &&
310 test_and_clear_bit(nr_entry, &entry->table_flags) )
311 {
312 v->arch.hvm_vcpu.hvm_io.msix_unmask_address = address;
313 goto out;
314 }
315
316 msi_desc = msixtbl_addr_to_desc(entry, address);
317 if ( !msi_desc || msi_desc->irq < 0 )
318 goto out;
319
320 desc = irq_to_desc(msi_desc->irq);
321 if ( !desc )
322 goto out;
323
324 spin_lock_irqsave(&desc->lock, flags);
325
326 if ( !desc->msi_desc )
327 goto unlock;
328
329 ASSERT(msi_desc == desc->msi_desc);
330
331 guest_mask_msi_irq(desc, !!(val & PCI_MSIX_VECTOR_BITMASK));
332
333 unlock:
334 spin_unlock_irqrestore(&desc->lock, flags);
335 if ( len == 4 )
336 r = X86EMUL_OKAY;
337
338 out:
339 rcu_read_unlock(&msixtbl_rcu_lock);
340 return r;
341 }
342
_msixtbl_write(const struct hvm_io_handler * handler,uint64_t address,uint32_t len,uint64_t val)343 static int _msixtbl_write(const struct hvm_io_handler *handler,
344 uint64_t address, uint32_t len, uint64_t val)
345 {
346 return msixtbl_write(current, address, len, val);
347 }
348
msixtbl_range(const struct hvm_io_handler * handler,const ioreq_t * r)349 static bool_t msixtbl_range(const struct hvm_io_handler *handler,
350 const ioreq_t *r)
351 {
352 struct vcpu *curr = current;
353 unsigned long addr = r->addr;
354 const struct msi_desc *desc;
355
356 ASSERT(r->type == IOREQ_TYPE_COPY);
357
358 rcu_read_lock(&msixtbl_rcu_lock);
359 desc = msixtbl_addr_to_desc(msixtbl_find_entry(curr, addr), addr);
360 rcu_read_unlock(&msixtbl_rcu_lock);
361
362 if ( desc )
363 return 1;
364
365 if ( r->state == STATE_IOREQ_READY && r->dir == IOREQ_WRITE )
366 {
367 unsigned int size = r->size;
368
369 if ( !r->data_is_ptr )
370 {
371 uint64_t data = r->data;
372
373 if ( size == 8 )
374 {
375 BUILD_BUG_ON(!(PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET & 4));
376 data >>= 32;
377 addr += size = 4;
378 }
379 if ( size == 4 &&
380 ((addr & (PCI_MSIX_ENTRY_SIZE - 1)) ==
381 PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) &&
382 !(data & PCI_MSIX_VECTOR_BITMASK) )
383 {
384 curr->arch.hvm_vcpu.hvm_io.msix_snoop_address = addr;
385 curr->arch.hvm_vcpu.hvm_io.msix_snoop_gpa = 0;
386 }
387 }
388 else if ( (size == 4 || size == 8) &&
389 /* Only support forward REP MOVS for now. */
390 !r->df &&
391 /*
392 * Only fully support accesses to a single table entry for
393 * now (if multiple ones get written to in one go, only the
394 * final one gets dealt with).
395 */
396 r->count && r->count <= PCI_MSIX_ENTRY_SIZE / size &&
397 !((addr + (size * r->count)) & (PCI_MSIX_ENTRY_SIZE - 1)) )
398 {
399 BUILD_BUG_ON((PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET + 4) &
400 (PCI_MSIX_ENTRY_SIZE - 1));
401
402 curr->arch.hvm_vcpu.hvm_io.msix_snoop_address =
403 addr + size * r->count - 4;
404 curr->arch.hvm_vcpu.hvm_io.msix_snoop_gpa =
405 r->data + size * r->count - 4;
406 }
407 }
408
409 return 0;
410 }
411
412 static const struct hvm_io_ops msixtbl_mmio_ops = {
413 .accept = msixtbl_range,
414 .read = msixtbl_read,
415 .write = _msixtbl_write,
416 };
417
add_msixtbl_entry(struct domain * d,struct pci_dev * pdev,uint64_t gtable,struct msixtbl_entry * entry)418 static void add_msixtbl_entry(struct domain *d,
419 struct pci_dev *pdev,
420 uint64_t gtable,
421 struct msixtbl_entry *entry)
422 {
423 INIT_LIST_HEAD(&entry->list);
424 INIT_RCU_HEAD(&entry->rcu);
425 atomic_set(&entry->refcnt, 0);
426
427 entry->table_len = pdev->msix->nr_entries * PCI_MSIX_ENTRY_SIZE;
428 entry->pdev = pdev;
429 entry->gtable = (unsigned long) gtable;
430
431 list_add_rcu(&entry->list, &d->arch.hvm_domain.msixtbl_list);
432 }
433
free_msixtbl_entry(struct rcu_head * rcu)434 static void free_msixtbl_entry(struct rcu_head *rcu)
435 {
436 struct msixtbl_entry *entry;
437
438 entry = container_of (rcu, struct msixtbl_entry, rcu);
439
440 xfree(entry);
441 }
442
del_msixtbl_entry(struct msixtbl_entry * entry)443 static void del_msixtbl_entry(struct msixtbl_entry *entry)
444 {
445 list_del_rcu(&entry->list);
446 call_rcu(&entry->rcu, free_msixtbl_entry);
447 }
448
msixtbl_pt_register(struct domain * d,struct pirq * pirq,uint64_t gtable)449 int msixtbl_pt_register(struct domain *d, struct pirq *pirq, uint64_t gtable)
450 {
451 struct irq_desc *irq_desc;
452 struct msi_desc *msi_desc;
453 struct pci_dev *pdev;
454 struct msixtbl_entry *entry, *new_entry;
455 int r = -EINVAL;
456
457 ASSERT(pcidevs_locked());
458 ASSERT(spin_is_locked(&d->event_lock));
459
460 if ( !msixtbl_initialised(d) )
461 return -ENODEV;
462
463 /*
464 * xmalloc() with irq_disabled causes the failure of check_lock()
465 * for xenpool->lock. So we allocate an entry beforehand.
466 */
467 new_entry = xzalloc(struct msixtbl_entry);
468 if ( !new_entry )
469 return -ENOMEM;
470
471 irq_desc = pirq_spin_lock_irq_desc(pirq, NULL);
472 if ( !irq_desc )
473 {
474 xfree(new_entry);
475 return r;
476 }
477
478 msi_desc = irq_desc->msi_desc;
479 if ( !msi_desc )
480 goto out;
481
482 pdev = msi_desc->dev;
483
484 list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
485 if ( pdev == entry->pdev )
486 goto found;
487
488 entry = new_entry;
489 new_entry = NULL;
490 add_msixtbl_entry(d, pdev, gtable, entry);
491
492 found:
493 atomic_inc(&entry->refcnt);
494 r = 0;
495
496 out:
497 spin_unlock_irq(&irq_desc->lock);
498 xfree(new_entry);
499
500 if ( !r )
501 {
502 struct vcpu *v;
503
504 for_each_vcpu ( d, v )
505 {
506 if ( (v->pause_flags & VPF_blocked_in_xen) &&
507 !v->arch.hvm_vcpu.hvm_io.msix_snoop_gpa &&
508 v->arch.hvm_vcpu.hvm_io.msix_snoop_address ==
509 (gtable + msi_desc->msi_attrib.entry_nr *
510 PCI_MSIX_ENTRY_SIZE +
511 PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET) )
512 v->arch.hvm_vcpu.hvm_io.msix_unmask_address =
513 v->arch.hvm_vcpu.hvm_io.msix_snoop_address;
514 }
515 }
516
517 return r;
518 }
519
msixtbl_pt_unregister(struct domain * d,struct pirq * pirq)520 void msixtbl_pt_unregister(struct domain *d, struct pirq *pirq)
521 {
522 struct irq_desc *irq_desc;
523 struct msi_desc *msi_desc;
524 struct pci_dev *pdev;
525 struct msixtbl_entry *entry;
526
527 ASSERT(pcidevs_locked());
528 ASSERT(spin_is_locked(&d->event_lock));
529
530 if ( !msixtbl_initialised(d) )
531 return;
532
533 irq_desc = pirq_spin_lock_irq_desc(pirq, NULL);
534 if ( !irq_desc )
535 return;
536
537 msi_desc = irq_desc->msi_desc;
538 if ( !msi_desc )
539 goto out;
540
541 pdev = msi_desc->dev;
542
543 list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
544 if ( pdev == entry->pdev )
545 goto found;
546
547 out:
548 spin_unlock_irq(&irq_desc->lock);
549 return;
550
551 found:
552 if ( !atomic_dec_and_test(&entry->refcnt) )
553 del_msixtbl_entry(entry);
554
555 spin_unlock_irq(&irq_desc->lock);
556 }
557
msixtbl_init(struct domain * d)558 void msixtbl_init(struct domain *d)
559 {
560 struct hvm_io_handler *handler;
561
562 if ( !is_hvm_domain(d) || !has_vlapic(d) || msixtbl_initialised(d) )
563 return;
564
565 INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list);
566
567 handler = hvm_next_io_handler(d);
568 if ( handler )
569 {
570 handler->type = IOREQ_TYPE_COPY;
571 handler->ops = &msixtbl_mmio_ops;
572 }
573 }
574
msixtbl_pt_cleanup(struct domain * d)575 void msixtbl_pt_cleanup(struct domain *d)
576 {
577 struct msixtbl_entry *entry, *temp;
578
579 if ( !msixtbl_initialised(d) )
580 return;
581
582 spin_lock(&d->event_lock);
583
584 list_for_each_entry_safe( entry, temp,
585 &d->arch.hvm_domain.msixtbl_list, list )
586 del_msixtbl_entry(entry);
587
588 spin_unlock(&d->event_lock);
589 }
590
msix_write_completion(struct vcpu * v)591 void msix_write_completion(struct vcpu *v)
592 {
593 unsigned long ctrl_address = v->arch.hvm_vcpu.hvm_io.msix_unmask_address;
594 unsigned long snoop_addr = v->arch.hvm_vcpu.hvm_io.msix_snoop_address;
595
596 v->arch.hvm_vcpu.hvm_io.msix_snoop_address = 0;
597
598 if ( !ctrl_address && snoop_addr &&
599 v->arch.hvm_vcpu.hvm_io.msix_snoop_gpa )
600 {
601 const struct msi_desc *desc;
602 uint32_t data;
603
604 rcu_read_lock(&msixtbl_rcu_lock);
605 desc = msixtbl_addr_to_desc(msixtbl_find_entry(v, snoop_addr),
606 snoop_addr);
607 rcu_read_unlock(&msixtbl_rcu_lock);
608
609 if ( desc &&
610 hvm_copy_from_guest_phys(&data,
611 v->arch.hvm_vcpu.hvm_io.msix_snoop_gpa,
612 sizeof(data)) == HVMTRANS_okay &&
613 !(data & PCI_MSIX_VECTOR_BITMASK) )
614 ctrl_address = snoop_addr;
615 }
616
617 if ( !ctrl_address )
618 return;
619
620 v->arch.hvm_vcpu.hvm_io.msix_unmask_address = 0;
621 if ( msixtbl_write(v, ctrl_address, 4, 0) != X86EMUL_OKAY )
622 gdprintk(XENLOG_WARNING, "MSI-X write completion failure\n");
623 }
624