1 /*
2  * FIFO event channel management.
3  *
4  * Copyright (C) 2013 Citrix Systems R&D Ltd.
5  *
6  * This source code is licensed under the GNU General Public License,
7  * Version 2 or later.  See the file COPYING for more details.
8  */
9 
10 #include <xen/init.h>
11 #include <xen/lib.h>
12 #include <xen/errno.h>
13 #include <xen/sched.h>
14 #include <xen/event.h>
15 #include <xen/event_fifo.h>
16 #include <xen/paging.h>
17 #include <xen/mm.h>
18 #include <xen/domain_page.h>
19 
20 #include <public/event_channel.h>
21 
evtchn_fifo_word_from_port(const struct domain * d,unsigned int port)22 static inline event_word_t *evtchn_fifo_word_from_port(const struct domain *d,
23                                                        unsigned int port)
24 {
25     unsigned int p, w;
26 
27     if ( unlikely(port >= d->evtchn_fifo->num_evtchns) )
28         return NULL;
29 
30     /*
31      * Callers aren't required to hold d->event_lock, so we need to synchronize
32      * with add_page_to_event_array().
33      */
34     smp_rmb();
35 
36     p = port / EVTCHN_FIFO_EVENT_WORDS_PER_PAGE;
37     w = port % EVTCHN_FIFO_EVENT_WORDS_PER_PAGE;
38 
39     return d->evtchn_fifo->event_array[p] + w;
40 }
41 
evtchn_fifo_init(struct domain * d,struct evtchn * evtchn)42 static void evtchn_fifo_init(struct domain *d, struct evtchn *evtchn)
43 {
44     event_word_t *word;
45 
46     evtchn->priority = EVTCHN_FIFO_PRIORITY_DEFAULT;
47 
48     /*
49      * If this event is still linked, the first event may be delivered
50      * on the wrong VCPU or with an unexpected priority.
51      */
52     word = evtchn_fifo_word_from_port(d, evtchn->port);
53     if ( word && test_bit(EVTCHN_FIFO_LINKED, word) )
54         gdprintk(XENLOG_WARNING, "domain %d, port %d already on a queue\n",
55                  d->domain_id, evtchn->port);
56 }
57 
lock_old_queue(const struct domain * d,struct evtchn * evtchn,unsigned long * flags)58 static struct evtchn_fifo_queue *lock_old_queue(const struct domain *d,
59                                                 struct evtchn *evtchn,
60                                                 unsigned long *flags)
61 {
62     struct vcpu *v;
63     struct evtchn_fifo_queue *q, *old_q;
64     unsigned int try;
65 
66     for ( try = 0; try < 3; try++ )
67     {
68         v = d->vcpu[evtchn->last_vcpu_id];
69         old_q = &v->evtchn_fifo->queue[evtchn->last_priority];
70 
71         spin_lock_irqsave(&old_q->lock, *flags);
72 
73         v = d->vcpu[evtchn->last_vcpu_id];
74         q = &v->evtchn_fifo->queue[evtchn->last_priority];
75 
76         if ( old_q == q )
77             return old_q;
78 
79         spin_unlock_irqrestore(&old_q->lock, *flags);
80     }
81 
82     gprintk(XENLOG_WARNING,
83             "dom%d port %d lost event (too many queue changes)\n",
84             d->domain_id, evtchn->port);
85     return NULL;
86 }
87 
try_set_link(event_word_t * word,event_word_t * w,uint32_t link)88 static int try_set_link(event_word_t *word, event_word_t *w, uint32_t link)
89 {
90     event_word_t new, old;
91 
92     if ( !(*w & (1 << EVTCHN_FIFO_LINKED)) )
93         return 0;
94 
95     old = *w;
96     new = (old & ~((1 << EVTCHN_FIFO_BUSY) | EVTCHN_FIFO_LINK_MASK)) | link;
97     *w = cmpxchg(word, old, new);
98     if ( *w == old )
99         return 1;
100 
101     return -EAGAIN;
102 }
103 
104 /*
105  * Atomically set the LINK field iff it is still LINKED.
106  *
107  * The guest is only permitted to make the following changes to a
108  * LINKED event.
109  *
110  * - set MASKED
111  * - clear MASKED
112  * - clear PENDING
113  * - clear LINKED (and LINK)
114  *
115  * We block unmasking by the guest by marking the tail word as BUSY,
116  * therefore, the cmpxchg() may fail at most 4 times.
117  */
evtchn_fifo_set_link(const struct domain * d,event_word_t * word,uint32_t link)118 static bool_t evtchn_fifo_set_link(const struct domain *d, event_word_t *word,
119                                    uint32_t link)
120 {
121     event_word_t w;
122     unsigned int try;
123     int ret;
124 
125     w = read_atomic(word);
126 
127     ret = try_set_link(word, &w, link);
128     if ( ret >= 0 )
129         return ret;
130 
131     /* Lock the word to prevent guest unmasking. */
132     set_bit(EVTCHN_FIFO_BUSY, word);
133 
134     w = read_atomic(word);
135 
136     for ( try = 0; try < 4; try++ )
137     {
138         ret = try_set_link(word, &w, link);
139         if ( ret >= 0 )
140         {
141             if ( ret == 0 )
142                 clear_bit(EVTCHN_FIFO_BUSY, word);
143             return ret;
144         }
145     }
146     gdprintk(XENLOG_WARNING, "domain %d, port %d not linked\n",
147              d->domain_id, link);
148     clear_bit(EVTCHN_FIFO_BUSY, word);
149     return 1;
150 }
151 
evtchn_fifo_set_pending(struct vcpu * v,struct evtchn * evtchn)152 static void evtchn_fifo_set_pending(struct vcpu *v, struct evtchn *evtchn)
153 {
154     struct domain *d = v->domain;
155     unsigned int port;
156     event_word_t *word;
157     unsigned long flags;
158     bool_t was_pending;
159 
160     port = evtchn->port;
161     word = evtchn_fifo_word_from_port(d, port);
162 
163     /*
164      * Event array page may not exist yet, save the pending state for
165      * when the page is added.
166      */
167     if ( unlikely(!word) )
168     {
169         evtchn->pending = 1;
170         return;
171     }
172 
173     was_pending = test_and_set_bit(EVTCHN_FIFO_PENDING, word);
174 
175     /*
176      * Link the event if it unmasked and not already linked.
177      */
178     if ( !test_bit(EVTCHN_FIFO_MASKED, word)
179          && !test_bit(EVTCHN_FIFO_LINKED, word) )
180     {
181         struct evtchn_fifo_queue *q, *old_q;
182         event_word_t *tail_word;
183         bool_t linked = 0;
184 
185         /*
186          * Control block not mapped.  The guest must not unmask an
187          * event until the control block is initialized, so we can
188          * just drop the event.
189          */
190         if ( unlikely(!v->evtchn_fifo->control_block) )
191         {
192             printk(XENLOG_G_WARNING
193                    "%pv has no FIFO event channel control block\n", v);
194             goto done;
195         }
196 
197         /*
198          * No locking around getting the queue. This may race with
199          * changing the priority but we are allowed to signal the
200          * event once on the old priority.
201          */
202         q = &v->evtchn_fifo->queue[evtchn->priority];
203 
204         old_q = lock_old_queue(d, evtchn, &flags);
205         if ( !old_q )
206             goto done;
207 
208         if ( test_and_set_bit(EVTCHN_FIFO_LINKED, word) )
209         {
210             spin_unlock_irqrestore(&old_q->lock, flags);
211             goto done;
212         }
213 
214         /*
215          * If this event was a tail, the old queue is now empty and
216          * its tail must be invalidated to prevent adding an event to
217          * the old queue from corrupting the new queue.
218          */
219         if ( old_q->tail == port )
220             old_q->tail = 0;
221 
222         /* Moved to a different queue? */
223         if ( old_q != q )
224         {
225             evtchn->last_vcpu_id = evtchn->notify_vcpu_id;
226             evtchn->last_priority = evtchn->priority;
227 
228             spin_unlock_irqrestore(&old_q->lock, flags);
229             spin_lock_irqsave(&q->lock, flags);
230         }
231 
232         /*
233          * Atomically link the tail to port iff the tail is linked.
234          * If the tail is unlinked the queue is empty.
235          *
236          * If port is the same as tail, the queue is empty but q->tail
237          * will appear linked as we just set LINKED above.
238          *
239          * If the queue is empty (i.e., we haven't linked to the new
240          * event), head must be updated.
241          */
242         if ( q->tail )
243         {
244             tail_word = evtchn_fifo_word_from_port(d, q->tail);
245             linked = evtchn_fifo_set_link(d, tail_word, port);
246         }
247         if ( !linked )
248             write_atomic(q->head, port);
249         q->tail = port;
250 
251         spin_unlock_irqrestore(&q->lock, flags);
252 
253         if ( !linked
254              && !test_and_set_bit(q->priority,
255                                   &v->evtchn_fifo->control_block->ready) )
256             vcpu_mark_events_pending(v);
257     }
258  done:
259     if ( !was_pending )
260         evtchn_check_pollers(d, port);
261 }
262 
evtchn_fifo_clear_pending(struct domain * d,struct evtchn * evtchn)263 static void evtchn_fifo_clear_pending(struct domain *d, struct evtchn *evtchn)
264 {
265     event_word_t *word;
266 
267     word = evtchn_fifo_word_from_port(d, evtchn->port);
268     if ( unlikely(!word) )
269         return;
270 
271     /*
272      * Just clear the P bit.
273      *
274      * No need to unlink as the guest will unlink and ignore
275      * non-pending events.
276      */
277     clear_bit(EVTCHN_FIFO_PENDING, word);
278 }
279 
evtchn_fifo_unmask(struct domain * d,struct evtchn * evtchn)280 static void evtchn_fifo_unmask(struct domain *d, struct evtchn *evtchn)
281 {
282     struct vcpu *v = d->vcpu[evtchn->notify_vcpu_id];
283     event_word_t *word;
284 
285     word = evtchn_fifo_word_from_port(d, evtchn->port);
286     if ( unlikely(!word) )
287         return;
288 
289     clear_bit(EVTCHN_FIFO_MASKED, word);
290 
291     /* Relink if pending. */
292     if ( test_bit(EVTCHN_FIFO_PENDING, word) )
293         evtchn_fifo_set_pending(v, evtchn);
294 }
295 
evtchn_fifo_is_pending(const struct domain * d,evtchn_port_t port)296 static bool evtchn_fifo_is_pending(const struct domain *d, evtchn_port_t port)
297 {
298     const event_word_t *word = evtchn_fifo_word_from_port(d, port);
299 
300     return word && test_bit(EVTCHN_FIFO_PENDING, word);
301 }
302 
evtchn_fifo_is_masked(const struct domain * d,evtchn_port_t port)303 static bool_t evtchn_fifo_is_masked(const struct domain *d, evtchn_port_t port)
304 {
305     const event_word_t *word = evtchn_fifo_word_from_port(d, port);
306 
307     return !word || test_bit(EVTCHN_FIFO_MASKED, word);
308 }
309 
evtchn_fifo_is_busy(const struct domain * d,evtchn_port_t port)310 static bool_t evtchn_fifo_is_busy(const struct domain *d, evtchn_port_t port)
311 {
312     const event_word_t *word = evtchn_fifo_word_from_port(d, port);
313 
314     return word && test_bit(EVTCHN_FIFO_LINKED, word);
315 }
316 
evtchn_fifo_set_priority(struct domain * d,struct evtchn * evtchn,unsigned int priority)317 static int evtchn_fifo_set_priority(struct domain *d, struct evtchn *evtchn,
318                                     unsigned int priority)
319 {
320     if ( priority > EVTCHN_FIFO_PRIORITY_MIN )
321         return -EINVAL;
322 
323     /*
324      * Only need to switch to the new queue for future events. If the
325      * event is already pending or in the process of being linked it
326      * will be on the old queue -- this is fine.
327      */
328     evtchn->priority = priority;
329 
330     return 0;
331 }
332 
evtchn_fifo_print_state(struct domain * d,const struct evtchn * evtchn)333 static void evtchn_fifo_print_state(struct domain *d,
334                                     const struct evtchn *evtchn)
335 {
336     event_word_t *word;
337 
338     word = evtchn_fifo_word_from_port(d, evtchn->port);
339     if ( !word )
340         printk("?     ");
341     else if ( test_bit(EVTCHN_FIFO_LINKED, word) )
342         printk("%c %-4u", test_bit(EVTCHN_FIFO_BUSY, word) ? 'B' : ' ',
343                *word & EVTCHN_FIFO_LINK_MASK);
344     else
345         printk("%c -   ", test_bit(EVTCHN_FIFO_BUSY, word) ? 'B' : ' ');
346 }
347 
348 static const struct evtchn_port_ops evtchn_port_ops_fifo =
349 {
350     .init          = evtchn_fifo_init,
351     .set_pending   = evtchn_fifo_set_pending,
352     .clear_pending = evtchn_fifo_clear_pending,
353     .unmask        = evtchn_fifo_unmask,
354     .is_pending    = evtchn_fifo_is_pending,
355     .is_masked     = evtchn_fifo_is_masked,
356     .is_busy       = evtchn_fifo_is_busy,
357     .set_priority  = evtchn_fifo_set_priority,
358     .print_state   = evtchn_fifo_print_state,
359 };
360 
map_guest_page(struct domain * d,uint64_t gfn,void ** virt)361 static int map_guest_page(struct domain *d, uint64_t gfn, void **virt)
362 {
363     struct page_info *p;
364 
365     p = get_page_from_gfn(d, gfn, NULL, P2M_ALLOC);
366     if ( !p )
367         return -EINVAL;
368 
369     if ( !get_page_type(p, PGT_writable_page) )
370     {
371         put_page(p);
372         return -EINVAL;
373     }
374 
375     *virt = __map_domain_page_global(p);
376     if ( !*virt )
377     {
378         put_page_and_type(p);
379         return -ENOMEM;
380     }
381     return 0;
382 }
383 
unmap_guest_page(void * virt)384 static void unmap_guest_page(void *virt)
385 {
386     struct page_info *page;
387 
388     if ( !virt )
389         return;
390 
391     virt = (void *)((unsigned long)virt & PAGE_MASK);
392     page = mfn_to_page(domain_page_map_to_mfn(virt));
393 
394     unmap_domain_page_global(virt);
395     put_page_and_type(page);
396 }
397 
init_queue(struct vcpu * v,struct evtchn_fifo_queue * q,unsigned int i)398 static void init_queue(struct vcpu *v, struct evtchn_fifo_queue *q,
399                        unsigned int i)
400 {
401     spin_lock_init(&q->lock);
402     q->priority = i;
403 }
404 
setup_control_block(struct vcpu * v)405 static int setup_control_block(struct vcpu *v)
406 {
407     struct evtchn_fifo_vcpu *efv;
408     unsigned int i;
409 
410     efv = xzalloc(struct evtchn_fifo_vcpu);
411     if ( !efv )
412         return -ENOMEM;
413 
414     for ( i = 0; i <= EVTCHN_FIFO_PRIORITY_MIN; i++ )
415         init_queue(v, &efv->queue[i], i);
416 
417     v->evtchn_fifo = efv;
418 
419     return 0;
420 }
421 
map_control_block(struct vcpu * v,uint64_t gfn,uint32_t offset)422 static int map_control_block(struct vcpu *v, uint64_t gfn, uint32_t offset)
423 {
424     void *virt;
425     unsigned int i;
426     int rc;
427 
428     if ( v->evtchn_fifo->control_block )
429         return -EINVAL;
430 
431     rc = map_guest_page(v->domain, gfn, &virt);
432     if ( rc < 0 )
433         return rc;
434 
435     v->evtchn_fifo->control_block = virt + offset;
436 
437     for ( i = 0; i <= EVTCHN_FIFO_PRIORITY_MIN; i++ )
438         v->evtchn_fifo->queue[i].head = &v->evtchn_fifo->control_block->head[i];
439 
440     return 0;
441 }
442 
cleanup_control_block(struct vcpu * v)443 static void cleanup_control_block(struct vcpu *v)
444 {
445     if ( !v->evtchn_fifo )
446         return;
447 
448     unmap_guest_page(v->evtchn_fifo->control_block);
449     xfree(v->evtchn_fifo);
450     v->evtchn_fifo = NULL;
451 }
452 
453 /*
454  * Setup an event array with no pages.
455  */
setup_event_array(struct domain * d)456 static int setup_event_array(struct domain *d)
457 {
458     d->evtchn_fifo = xzalloc(struct evtchn_fifo_domain);
459     if ( !d->evtchn_fifo )
460         return -ENOMEM;
461 
462     return 0;
463 }
464 
cleanup_event_array(struct domain * d)465 static void cleanup_event_array(struct domain *d)
466 {
467     unsigned int i;
468 
469     if ( !d->evtchn_fifo )
470         return;
471 
472     for ( i = 0; i < EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES; i++ )
473         unmap_guest_page(d->evtchn_fifo->event_array[i]);
474     xfree(d->evtchn_fifo);
475     d->evtchn_fifo = NULL;
476 }
477 
setup_ports(struct domain * d)478 static void setup_ports(struct domain *d)
479 {
480     unsigned int port;
481 
482     /*
483      * For each port that is already bound:
484      *
485      * - save its pending state.
486      * - set default priority.
487      */
488     for ( port = 1; port < d->max_evtchns; port++ )
489     {
490         struct evtchn *evtchn;
491 
492         if ( !port_is_valid(d, port) )
493             break;
494 
495         evtchn = evtchn_from_port(d, port);
496 
497         if ( test_bit(port, &shared_info(d, evtchn_pending)) )
498             evtchn->pending = 1;
499 
500         evtchn_fifo_set_priority(d, evtchn, EVTCHN_FIFO_PRIORITY_DEFAULT);
501     }
502 }
503 
evtchn_fifo_init_control(struct evtchn_init_control * init_control)504 int evtchn_fifo_init_control(struct evtchn_init_control *init_control)
505 {
506     struct domain *d = current->domain;
507     uint32_t vcpu_id;
508     uint64_t gfn;
509     uint32_t offset;
510     struct vcpu *v;
511     int rc;
512 
513     init_control->link_bits = EVTCHN_FIFO_LINK_BITS;
514 
515     vcpu_id = init_control->vcpu;
516     gfn     = init_control->control_gfn;
517     offset  = init_control->offset;
518 
519     if ( vcpu_id >= d->max_vcpus || !d->vcpu[vcpu_id] )
520         return -ENOENT;
521     v = d->vcpu[vcpu_id];
522 
523     /* Must not cross page boundary. */
524     if ( offset > (PAGE_SIZE - sizeof(evtchn_fifo_control_block_t)) )
525         return -EINVAL;
526 
527     /* Must be 8-bytes aligned. */
528     if ( offset & (8 - 1) )
529         return -EINVAL;
530 
531     spin_lock(&d->event_lock);
532 
533     /*
534      * If this is the first control block, setup an empty event array
535      * and switch to the fifo port ops.
536      */
537     if ( !d->evtchn_fifo )
538     {
539         struct vcpu *vcb;
540 
541         for_each_vcpu ( d, vcb ) {
542             rc = setup_control_block(vcb);
543             if ( rc < 0 )
544                 goto error;
545         }
546 
547         rc = setup_event_array(d);
548         if ( rc < 0 )
549             goto error;
550 
551         rc = map_control_block(v, gfn, offset);
552         if ( rc < 0 )
553             goto error;
554 
555         d->evtchn_port_ops = &evtchn_port_ops_fifo;
556         d->max_evtchns = EVTCHN_FIFO_NR_CHANNELS;
557         setup_ports(d);
558     }
559     else
560         rc = map_control_block(v, gfn, offset);
561 
562     spin_unlock(&d->event_lock);
563 
564     return rc;
565 
566  error:
567     evtchn_fifo_destroy(d);
568     spin_unlock(&d->event_lock);
569     return rc;
570 }
571 
add_page_to_event_array(struct domain * d,unsigned long gfn)572 static int add_page_to_event_array(struct domain *d, unsigned long gfn)
573 {
574     void *virt;
575     unsigned int slot;
576     unsigned int port = d->evtchn_fifo->num_evtchns;
577     int rc;
578 
579     slot = d->evtchn_fifo->num_evtchns / EVTCHN_FIFO_EVENT_WORDS_PER_PAGE;
580     if ( slot >= EVTCHN_FIFO_MAX_EVENT_ARRAY_PAGES )
581         return -ENOSPC;
582 
583     rc = map_guest_page(d, gfn, &virt);
584     if ( rc < 0 )
585         return rc;
586 
587     d->evtchn_fifo->event_array[slot] = virt;
588 
589     /* Synchronize with evtchn_fifo_word_from_port(). */
590     smp_wmb();
591 
592     d->evtchn_fifo->num_evtchns += EVTCHN_FIFO_EVENT_WORDS_PER_PAGE;
593 
594     /*
595      * Re-raise any events that were pending while this array page was
596      * missing.
597      */
598     for ( ; port < d->evtchn_fifo->num_evtchns; port++ )
599     {
600         struct evtchn *evtchn;
601 
602         if ( !port_is_valid(d, port) )
603             break;
604 
605         evtchn = evtchn_from_port(d, port);
606         if ( evtchn->pending )
607             evtchn_fifo_set_pending(d->vcpu[evtchn->notify_vcpu_id], evtchn);
608     }
609 
610     return 0;
611 }
612 
evtchn_fifo_expand_array(const struct evtchn_expand_array * expand_array)613 int evtchn_fifo_expand_array(const struct evtchn_expand_array *expand_array)
614 {
615     struct domain *d = current->domain;
616     int rc;
617 
618     if ( !d->evtchn_fifo )
619         return -EOPNOTSUPP;
620 
621     spin_lock(&d->event_lock);
622     rc = add_page_to_event_array(d, expand_array->array_gfn);
623     spin_unlock(&d->event_lock);
624 
625     return rc;
626 }
627 
evtchn_fifo_destroy(struct domain * d)628 void evtchn_fifo_destroy(struct domain *d)
629 {
630     struct vcpu *v;
631 
632     for_each_vcpu( d, v )
633         cleanup_control_block(v);
634     cleanup_event_array(d);
635 }
636 
637 /*
638  * Local variables:
639  * mode: C
640  * c-file-style: "BSD"
641  * c-basic-offset: 4
642  * tab-width: 4
643  * indent-tabs-mode: nil
644  * End:
645  */
646