1 /*
2 * xen/arch/arm/gic-v3-lpi.c
3 *
4 * ARM GICv3 Locality-specific Peripheral Interrupts (LPI) support
5 *
6 * Copyright (C) 2016,2017 - ARM Ltd
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; under version 2 of the License.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; If not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include <xen/lib.h>
22 #include <xen/mm.h>
23 #include <xen/sched.h>
24 #include <xen/sizes.h>
25 #include <xen/warning.h>
26 #include <asm/atomic.h>
27 #include <asm/domain.h>
28 #include <asm/gic.h>
29 #include <asm/gic_v3_defs.h>
30 #include <asm/gic_v3_its.h>
31 #include <asm/io.h>
32 #include <asm/page.h>
33
34 /*
35 * There could be a lot of LPIs on the host side, and they always go to
36 * a guest. So having a struct irq_desc for each of them would be wasteful
37 * and useless.
38 * Instead just store enough information to find the right VCPU to inject
39 * those LPIs into, which just requires the virtual LPI number.
40 * To avoid a global lock on this data structure, this is using a lockless
41 * approach relying on the architectural atomicity of native data types:
42 * We read or write the "data" view of this union atomically, then can
43 * access the broken-down fields in our local copy.
44 */
45 union host_lpi {
46 uint64_t data;
47 struct {
48 uint32_t virt_lpi;
49 uint16_t dom_id;
50 uint16_t pad;
51 };
52 };
53
54 #define LPI_PROPTABLE_NEEDS_FLUSHING (1U << 0)
55
56 /* Global state */
57 static struct {
58 /* The global LPI property table, shared by all redistributors. */
59 uint8_t *lpi_property;
60 /*
61 * A two-level table to lookup LPIs firing on the host and look up the
62 * VCPU and virtual LPI number to inject into.
63 */
64 union host_lpi **host_lpis;
65 /*
66 * Number of physical LPIs the host supports. This is a property of
67 * the GIC hardware. We depart from the habit of naming these things
68 * "physical" in Xen, as the GICv3/4 spec uses the term "physical LPI"
69 * in a different context to differentiate them from "virtual LPIs".
70 */
71 unsigned long int max_host_lpi_ids;
72 /*
73 * Protects allocation and deallocation of host LPIs and next_free_lpi,
74 * but not the actual data stored in the host_lpi entry.
75 */
76 spinlock_t host_lpis_lock;
77 uint32_t next_free_lpi;
78 unsigned int flags;
79 } lpi_data;
80
81 struct lpi_redist_data {
82 paddr_t redist_addr;
83 unsigned int redist_id;
84 void *pending_table;
85 };
86
87 static DEFINE_PER_CPU(struct lpi_redist_data, lpi_redist);
88
89 #define MAX_NR_HOST_LPIS (lpi_data.max_host_lpi_ids - LPI_OFFSET)
90 #define HOST_LPIS_PER_PAGE (PAGE_SIZE / sizeof(union host_lpi))
91
gic_get_host_lpi(uint32_t plpi)92 static union host_lpi *gic_get_host_lpi(uint32_t plpi)
93 {
94 union host_lpi *block;
95
96 if ( !is_lpi(plpi) || plpi >= MAX_NR_HOST_LPIS + LPI_OFFSET )
97 return NULL;
98
99 ASSERT(plpi >= LPI_OFFSET);
100
101 plpi -= LPI_OFFSET;
102
103 block = lpi_data.host_lpis[plpi / HOST_LPIS_PER_PAGE];
104 if ( !block )
105 return NULL;
106
107 /* Matches the write barrier in allocation code. */
108 smp_rmb();
109
110 return &block[plpi % HOST_LPIS_PER_PAGE];
111 }
112
113 /*
114 * An ITS can refer to redistributors in two ways: either by an ID (possibly
115 * the CPU number) or by its MMIO address. This is a hardware implementation
116 * choice, so we have to cope with both approaches. The GICv3 code calculates
117 * both values and calls this function to let the ITS store them when it's
118 * later required to provide them. This is done in a per-CPU variable.
119 */
gicv3_set_redist_address(paddr_t address,unsigned int redist_id)120 void gicv3_set_redist_address(paddr_t address, unsigned int redist_id)
121 {
122 this_cpu(lpi_redist).redist_addr = address;
123 this_cpu(lpi_redist).redist_id = redist_id;
124 }
125
126 /*
127 * Returns a redistributor's ID (either as an address or as an ID).
128 * This must be (and is) called only after it has been setup by the above
129 * function.
130 */
gicv3_get_redist_address(unsigned int cpu,bool use_pta)131 uint64_t gicv3_get_redist_address(unsigned int cpu, bool use_pta)
132 {
133 if ( use_pta )
134 return per_cpu(lpi_redist, cpu).redist_addr & GENMASK(51, 16);
135 else
136 return per_cpu(lpi_redist, cpu).redist_id << 16;
137 }
138
vgic_vcpu_inject_lpi(struct domain * d,unsigned int virq)139 void vgic_vcpu_inject_lpi(struct domain *d, unsigned int virq)
140 {
141 /*
142 * TODO: this assumes that the struct pending_irq stays valid all of
143 * the time. We cannot properly protect this with the current locking
144 * scheme, but the future per-IRQ lock will solve this problem.
145 */
146 struct pending_irq *p = irq_to_pending(d->vcpu[0], virq);
147 unsigned int vcpu_id;
148
149 if ( !p )
150 return;
151
152 vcpu_id = ACCESS_ONCE(p->lpi_vcpu_id);
153 if ( vcpu_id >= d->max_vcpus )
154 return;
155
156 vgic_vcpu_inject_irq(d->vcpu[vcpu_id], virq);
157 }
158
159 /*
160 * Handle incoming LPIs, which are a bit special, because they are potentially
161 * numerous and also only get injected into guests. Treat them specially here,
162 * by just looking up their target vCPU and virtual LPI number and hand it
163 * over to the injection function.
164 * Please note that LPIs are edge-triggered only, also have no active state,
165 * so spurious interrupts on the host side are no issue (we can just ignore
166 * them).
167 * Also a guest cannot expect that firing interrupts that haven't been
168 * fully configured yet will reach the CPU, so we don't need to care about
169 * this special case.
170 */
gicv3_do_LPI(unsigned int lpi)171 void gicv3_do_LPI(unsigned int lpi)
172 {
173 struct domain *d;
174 union host_lpi *hlpip, hlpi;
175
176 irq_enter();
177
178 /* EOI the LPI already. */
179 WRITE_SYSREG32(lpi, ICC_EOIR1_EL1);
180
181 /* Find out if a guest mapped something to this physical LPI. */
182 hlpip = gic_get_host_lpi(lpi);
183 if ( !hlpip )
184 goto out;
185
186 hlpi.data = read_u64_atomic(&hlpip->data);
187
188 /*
189 * Unmapped events are marked with an invalid LPI ID. We can safely
190 * ignore them, as they have no further state and no-one can expect
191 * to see them if they have not been mapped.
192 */
193 if ( hlpi.virt_lpi == INVALID_LPI )
194 goto out;
195
196 d = rcu_lock_domain_by_id(hlpi.dom_id);
197 if ( !d )
198 goto out;
199
200 /*
201 * TODO: Investigate what to do here for potential interrupt storms.
202 * As we keep all host LPIs enabled, for disabling LPIs we would need
203 * to queue a ITS host command, which we avoid so far during a guest's
204 * runtime. Also re-enabling would trigger a host command upon the
205 * guest sending a command, which could be an attack vector for
206 * hogging the host command queue.
207 * See the thread around here for some background:
208 * https://lists.xen.org/archives/html/xen-devel/2016-12/msg00003.html
209 */
210 vgic_vcpu_inject_lpi(d, hlpi.virt_lpi);
211
212 rcu_unlock_domain(d);
213
214 out:
215 irq_exit();
216 }
217
gicv3_lpi_update_host_entry(uint32_t host_lpi,int domain_id,uint32_t virt_lpi)218 void gicv3_lpi_update_host_entry(uint32_t host_lpi, int domain_id,
219 uint32_t virt_lpi)
220 {
221 union host_lpi *hlpip, hlpi;
222
223 ASSERT(host_lpi >= LPI_OFFSET);
224
225 host_lpi -= LPI_OFFSET;
226
227 hlpip = &lpi_data.host_lpis[host_lpi / HOST_LPIS_PER_PAGE][host_lpi % HOST_LPIS_PER_PAGE];
228
229 hlpi.virt_lpi = virt_lpi;
230 hlpi.dom_id = domain_id;
231
232 write_u64_atomic(&hlpip->data, hlpi.data);
233 }
234
gicv3_lpi_allocate_pendtable(uint64_t * reg)235 static int gicv3_lpi_allocate_pendtable(uint64_t *reg)
236 {
237 uint64_t val;
238 void *pendtable;
239
240 if ( this_cpu(lpi_redist).pending_table )
241 return -EBUSY;
242
243 val = GIC_BASER_CACHE_RaWaWb << GICR_PENDBASER_INNER_CACHEABILITY_SHIFT;
244 val |= GIC_BASER_CACHE_SameAsInner << GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT;
245 val |= GIC_BASER_InnerShareable << GICR_PENDBASER_SHAREABILITY_SHIFT;
246
247 /*
248 * The pending table holds one bit per LPI and even covers bits for
249 * interrupt IDs below 8192, so we allocate the full range.
250 * The GICv3 imposes a 64KB alignment requirement, also requires
251 * physically contiguous memory.
252 */
253 pendtable = _xzalloc(lpi_data.max_host_lpi_ids / 8, SZ_64K);
254 if ( !pendtable )
255 return -ENOMEM;
256
257 /* Make sure the physical address can be encoded in the register. */
258 if ( virt_to_maddr(pendtable) & ~GENMASK(51, 16) )
259 {
260 xfree(pendtable);
261 return -ERANGE;
262 }
263 clean_and_invalidate_dcache_va_range(pendtable,
264 lpi_data.max_host_lpi_ids / 8);
265
266 this_cpu(lpi_redist).pending_table = pendtable;
267
268 val |= GICR_PENDBASER_PTZ;
269
270 val |= virt_to_maddr(pendtable);
271
272 *reg = val;
273
274 return 0;
275 }
276
277 /*
278 * Tell a redistributor about the (shared) property table, allocating one
279 * if not already done.
280 */
gicv3_lpi_set_proptable(void __iomem * rdist_base)281 static int gicv3_lpi_set_proptable(void __iomem * rdist_base)
282 {
283 uint64_t reg;
284
285 reg = GIC_BASER_CACHE_RaWaWb << GICR_PROPBASER_INNER_CACHEABILITY_SHIFT;
286 reg |= GIC_BASER_CACHE_SameAsInner << GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT;
287 reg |= GIC_BASER_InnerShareable << GICR_PROPBASER_SHAREABILITY_SHIFT;
288
289 /*
290 * The property table is shared across all redistributors, so allocate
291 * this only once, but return the same value on subsequent calls.
292 */
293 if ( !lpi_data.lpi_property )
294 {
295 /* The property table holds one byte per LPI. */
296 void *table = _xmalloc(lpi_data.max_host_lpi_ids, SZ_4K);
297
298 if ( !table )
299 return -ENOMEM;
300
301 /* Make sure the physical address can be encoded in the register. */
302 if ( (virt_to_maddr(table) & ~GENMASK(51, 12)) )
303 {
304 xfree(table);
305 return -ERANGE;
306 }
307 memset(table, GIC_PRI_IRQ | LPI_PROP_RES1, MAX_NR_HOST_LPIS);
308 clean_and_invalidate_dcache_va_range(table, MAX_NR_HOST_LPIS);
309 lpi_data.lpi_property = table;
310 }
311
312 /* Encode the number of bits needed, minus one */
313 reg |= fls(lpi_data.max_host_lpi_ids - 1) - 1;
314
315 reg |= virt_to_maddr(lpi_data.lpi_property);
316
317 writeq_relaxed(reg, rdist_base + GICR_PROPBASER);
318 reg = readq_relaxed(rdist_base + GICR_PROPBASER);
319
320 /* If we can't do shareable, we have to drop cacheability as well. */
321 if ( !(reg & GICR_PROPBASER_SHAREABILITY_MASK) )
322 {
323 reg &= ~GICR_PROPBASER_INNER_CACHEABILITY_MASK;
324 reg |= GIC_BASER_CACHE_nC << GICR_PROPBASER_INNER_CACHEABILITY_SHIFT;
325 }
326
327 /* Remember that we have to flush the property table if non-cacheable. */
328 if ( (reg & GICR_PROPBASER_INNER_CACHEABILITY_MASK) <= GIC_BASER_CACHE_nC )
329 {
330 lpi_data.flags |= LPI_PROPTABLE_NEEDS_FLUSHING;
331 /* Update the redistributors knowledge about the attributes. */
332 writeq_relaxed(reg, rdist_base + GICR_PROPBASER);
333 }
334
335 return 0;
336 }
337
gicv3_lpi_init_rdist(void __iomem * rdist_base)338 int gicv3_lpi_init_rdist(void __iomem * rdist_base)
339 {
340 uint32_t reg;
341 uint64_t table_reg;
342 int ret;
343
344 /* We don't support LPIs without an ITS. */
345 if ( !gicv3_its_host_has_its() )
346 return -ENODEV;
347
348 /* Make sure LPIs are disabled before setting up the tables. */
349 reg = readl_relaxed(rdist_base + GICR_CTLR);
350 if ( reg & GICR_CTLR_ENABLE_LPIS )
351 return -EBUSY;
352
353 ret = gicv3_lpi_allocate_pendtable(&table_reg);
354 if ( ret )
355 return ret;
356 writeq_relaxed(table_reg, rdist_base + GICR_PENDBASER);
357 table_reg = readq_relaxed(rdist_base + GICR_PENDBASER);
358
359 /* If the hardware reports non-shareable, drop cacheability as well. */
360 if ( !(table_reg & GICR_PENDBASER_SHAREABILITY_MASK) )
361 {
362 table_reg &= ~GICR_PENDBASER_INNER_CACHEABILITY_MASK;
363 table_reg |= GIC_BASER_CACHE_nC << GICR_PENDBASER_INNER_CACHEABILITY_SHIFT;
364
365 writeq_relaxed(table_reg, rdist_base + GICR_PENDBASER);
366 }
367
368 return gicv3_lpi_set_proptable(rdist_base);
369 }
370
371 static unsigned int max_lpi_bits = 20;
372 integer_param("max_lpi_bits", max_lpi_bits);
373
374 /*
375 * Allocate the 2nd level array for host LPIs. This one holds pointers
376 * to the page with the actual "union host_lpi" entries. Our LPI limit
377 * avoids excessive memory usage.
378 */
gicv3_lpi_init_host_lpis(unsigned int host_lpi_bits)379 int gicv3_lpi_init_host_lpis(unsigned int host_lpi_bits)
380 {
381 unsigned int nr_lpi_ptrs;
382
383 /* We rely on the data structure being atomically accessible. */
384 BUILD_BUG_ON(sizeof(union host_lpi) > sizeof(unsigned long));
385
386 /*
387 * An implementation needs to support at least 14 bits of LPI IDs.
388 * Tell the user about it, the actual number is reported below.
389 */
390 if ( max_lpi_bits < 14 || max_lpi_bits > 32 )
391 printk(XENLOG_WARNING "WARNING: max_lpi_bits must be between 14 and 32, adjusting.\n");
392
393 max_lpi_bits = max(max_lpi_bits, 14U);
394 lpi_data.max_host_lpi_ids = BIT(min(host_lpi_bits, max_lpi_bits));
395
396 /*
397 * Warn if the number of LPIs are quite high, as the user might not want
398 * to waste megabytes of memory for a mostly empty table.
399 * It's very unlikely that we need more than 24 bits worth of LPIs.
400 */
401 if ( lpi_data.max_host_lpi_ids > BIT(24) )
402 warning_add("Using high number of LPIs, limit memory usage with max_lpi_bits\n");
403
404 spin_lock_init(&lpi_data.host_lpis_lock);
405 lpi_data.next_free_lpi = 0;
406
407 nr_lpi_ptrs = MAX_NR_HOST_LPIS / (PAGE_SIZE / sizeof(union host_lpi));
408 lpi_data.host_lpis = xzalloc_array(union host_lpi *, nr_lpi_ptrs);
409 if ( !lpi_data.host_lpis )
410 return -ENOMEM;
411
412 printk("GICv3: using at most %lu LPIs on the host.\n", MAX_NR_HOST_LPIS);
413
414 return 0;
415 }
416
find_unused_host_lpi(uint32_t start,uint32_t * index)417 static int find_unused_host_lpi(uint32_t start, uint32_t *index)
418 {
419 unsigned int chunk;
420 uint32_t i = *index;
421
422 ASSERT(spin_is_locked(&lpi_data.host_lpis_lock));
423
424 for ( chunk = start;
425 chunk < MAX_NR_HOST_LPIS / HOST_LPIS_PER_PAGE;
426 chunk++ )
427 {
428 /* If we hit an unallocated chunk, use entry 0 in that one. */
429 if ( !lpi_data.host_lpis[chunk] )
430 {
431 *index = 0;
432 return chunk;
433 }
434
435 /* Find an unallocated entry in this chunk. */
436 for ( ; i < HOST_LPIS_PER_PAGE; i += LPI_BLOCK )
437 {
438 if ( lpi_data.host_lpis[chunk][i].dom_id == DOMID_INVALID )
439 {
440 *index = i;
441 return chunk;
442 }
443 }
444 i = 0;
445 }
446
447 return -1;
448 }
449
450 /*
451 * Allocate a block of 32 LPIs on the given host ITS for device "devid",
452 * starting with "eventid". Put them into the respective ITT by issuing a
453 * MAPTI command for each of them.
454 */
gicv3_allocate_host_lpi_block(struct domain * d,uint32_t * first_lpi)455 int gicv3_allocate_host_lpi_block(struct domain *d, uint32_t *first_lpi)
456 {
457 uint32_t lpi, lpi_idx;
458 int chunk;
459 int i;
460
461 spin_lock(&lpi_data.host_lpis_lock);
462 lpi_idx = lpi_data.next_free_lpi % HOST_LPIS_PER_PAGE;
463 chunk = find_unused_host_lpi(lpi_data.next_free_lpi / HOST_LPIS_PER_PAGE,
464 &lpi_idx);
465
466 if ( chunk == - 1 ) /* rescan for a hole from the beginning */
467 {
468 lpi_idx = 0;
469 chunk = find_unused_host_lpi(0, &lpi_idx);
470 if ( chunk == -1 )
471 {
472 spin_unlock(&lpi_data.host_lpis_lock);
473 return -ENOSPC;
474 }
475 }
476
477 /* If we hit an unallocated chunk, we initialize it and use entry 0. */
478 if ( !lpi_data.host_lpis[chunk] )
479 {
480 union host_lpi *new_chunk;
481
482 /* TODO: NUMA locality for quicker IRQ path? */
483 new_chunk = alloc_xenheap_page();
484 if ( !new_chunk )
485 {
486 spin_unlock(&lpi_data.host_lpis_lock);
487 return -ENOMEM;
488 }
489
490 for ( i = 0; i < HOST_LPIS_PER_PAGE; i += LPI_BLOCK )
491 new_chunk[i].dom_id = DOMID_INVALID;
492
493 /*
494 * Make sure all slots are really marked empty before publishing the
495 * new chunk.
496 */
497 smp_wmb();
498
499 lpi_data.host_lpis[chunk] = new_chunk;
500 lpi_idx = 0;
501 }
502
503 lpi = chunk * HOST_LPIS_PER_PAGE + lpi_idx;
504
505 for ( i = 0; i < LPI_BLOCK; i++ )
506 {
507 union host_lpi hlpi;
508
509 /*
510 * Mark this host LPI as belonging to the domain, but don't assign
511 * any virtual LPI or a VCPU yet.
512 */
513 hlpi.virt_lpi = INVALID_LPI;
514 hlpi.dom_id = d->domain_id;
515 write_u64_atomic(&lpi_data.host_lpis[chunk][lpi_idx + i].data,
516 hlpi.data);
517
518 /*
519 * Enable this host LPI, so we don't have to do this during the
520 * guest's runtime.
521 */
522 lpi_data.lpi_property[lpi + i] |= LPI_PROP_ENABLED;
523 }
524
525 lpi_data.next_free_lpi = lpi + LPI_BLOCK;
526
527 /*
528 * We have allocated and initialized the host LPI entries, so it's safe
529 * to drop the lock now. Access to the structures can be done concurrently
530 * as it involves only an atomic uint64_t access.
531 */
532 spin_unlock(&lpi_data.host_lpis_lock);
533
534 if ( lpi_data.flags & LPI_PROPTABLE_NEEDS_FLUSHING )
535 clean_and_invalidate_dcache_va_range(&lpi_data.lpi_property[lpi],
536 LPI_BLOCK);
537
538 *first_lpi = lpi + LPI_OFFSET;
539
540 return 0;
541 }
542
gicv3_free_host_lpi_block(uint32_t first_lpi)543 void gicv3_free_host_lpi_block(uint32_t first_lpi)
544 {
545 union host_lpi *hlpi, empty_lpi = { .dom_id = DOMID_INVALID };
546 int i;
547
548 /* This should only be called with the beginning of a block. */
549 ASSERT((first_lpi % LPI_BLOCK) == 0);
550
551 hlpi = gic_get_host_lpi(first_lpi);
552 if ( !hlpi )
553 return; /* Nothing to free here. */
554
555 spin_lock(&lpi_data.host_lpis_lock);
556
557 for ( i = 0; i < LPI_BLOCK; i++ )
558 write_u64_atomic(&hlpi[i].data, empty_lpi.data);
559
560 /*
561 * Make sure the next allocation can reuse this block, as we do only
562 * forward scanning when finding an unused block.
563 */
564 if ( lpi_data.next_free_lpi > first_lpi )
565 lpi_data.next_free_lpi = first_lpi;
566
567 spin_unlock(&lpi_data.host_lpis_lock);
568
569 return;
570 }
571
572 /*
573 * Local variables:
574 * mode: C
575 * c-file-style: "BSD"
576 * c-basic-offset: 4
577 * indent-tabs-mode: nil
578 * End:
579 */
580