1 /******************************************************************************
2  * Argo : Hypervisor-Mediated data eXchange
3  *
4  * Derived from v4v, the version 2 of v2v.
5  *
6  * Copyright (c) 2010, Citrix Systems
7  * Copyright (c) 2018-2019 BAE Systems
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <xen/argo.h>
19 #include <xen/domain.h>
20 #include <xen/domain_page.h>
21 #include <xen/errno.h>
22 #include <xen/event.h>
23 #include <xen/guest_access.h>
24 #include <xen/lib.h>
25 #include <xen/nospec.h>
26 #include <xen/param.h>
27 #include <xen/sched.h>
28 #include <xen/sections.h>
29 #include <xen/time.h>
30 
31 #include <xsm/xsm.h>
32 
33 #include <asm/p2m.h>
34 
35 #include <public/argo.h>
36 
37 #ifdef CONFIG_COMPAT
38 #include <compat/argo.h>
39 CHECK_argo_addr;
40 #undef CHECK_argo_addr
41 #define CHECK_argo_addr struct xen_argo_addr
42 CHECK_argo_register_ring;
43 CHECK_argo_ring;
44 CHECK_argo_ring_data_ent;
45 #undef CHECK_argo_ring_data_ent
46 #define CHECK_argo_ring_data_ent struct xen_argo_ring_data_ent
47 CHECK_argo_ring_data;
48 CHECK_argo_ring_message_header;
49 CHECK_argo_unregister_ring;
50 CHECK_argo_send_addr;
51 #endif
52 
53 #define MAX_RINGS_PER_DOMAIN            128U
54 #define MAX_NOTIFY_COUNT                256U
55 #define MAX_PENDING_PER_RING             32U
56 
57 /* All messages on the ring are padded to a multiple of the slot size. */
58 #define ROUNDUP_MESSAGE(a) ROUNDUP((a), XEN_ARGO_MSG_SLOT_SIZE)
59 
60 /* The maximum size of a message that may be sent on the largest Argo ring. */
61 #define MAX_ARGO_MESSAGE_SIZE ((XEN_ARGO_MAX_RING_SIZE) - \
62         (sizeof(struct xen_argo_ring_message_header)) - ROUNDUP_MESSAGE(1))
63 
64 /* Number of PAGEs needed to hold a ring of a given size in bytes */
65 #define NPAGES_RING(ring_len) \
66     (ROUNDUP((ROUNDUP_MESSAGE(ring_len) + sizeof(xen_argo_ring_t)), PAGE_SIZE) \
67      >> PAGE_SHIFT)
68 
69 DEFINE_XEN_GUEST_HANDLE(xen_argo_addr_t);
70 DEFINE_XEN_GUEST_HANDLE(xen_argo_gfn_t);
71 DEFINE_XEN_GUEST_HANDLE(xen_argo_iov_t);
72 DEFINE_XEN_GUEST_HANDLE(xen_argo_register_ring_t);
73 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_t);
74 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_t);
75 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t);
76 DEFINE_XEN_GUEST_HANDLE(xen_argo_send_addr_t);
77 DEFINE_XEN_GUEST_HANDLE(xen_argo_unregister_ring_t);
78 #ifdef CONFIG_COMPAT
79 DEFINE_COMPAT_HANDLE(compat_argo_iov_t);
80 #endif
81 
82 static bool __read_mostly opt_argo;
83 static bool __read_mostly opt_argo_mac_permissive;
84 
parse_argo(const char * s)85 static int __init cf_check parse_argo(const char *s)
86 {
87     const char *ss;
88     int val, rc = 0;
89 
90     do {
91         ss = strchr(s, ',');
92         if ( !ss )
93             ss = strchr(s, '\0');
94 
95         if ( (val = parse_bool(s, ss)) >= 0 )
96             opt_argo = val;
97         else if ( (val = parse_boolean("mac-permissive", s, ss)) >= 0 )
98             opt_argo_mac_permissive = val;
99         else
100             rc = -EINVAL;
101 
102         s = ss + 1;
103     } while ( *ss );
104 
105     return rc;
106 }
107 custom_param("argo", parse_argo);
108 
109 typedef struct argo_ring_id
110 {
111     xen_argo_port_t aport;
112     domid_t partner_id;
113     domid_t domain_id;
114 } argo_ring_id;
115 
116 /* Data about a domain's own ring that it has registered */
117 struct argo_ring_info
118 {
119     /* next node in the hash, protected by rings_L2 */
120     struct list_head node;
121     /* this ring's id, protected by rings_L2 */
122     struct argo_ring_id id;
123     /* L3, the ring_info lock: protects the members of this struct below */
124     spinlock_t L3_lock;
125     /* length of the ring, protected by L3 */
126     unsigned int len;
127     /* number of pages translated into mfns, protected by L3 */
128     unsigned int nmfns;
129     /* cached tx pointer location, protected by L3 */
130     unsigned int tx_ptr;
131     /* mapped ring pages protected by L3 */
132     void **mfn_mapping;
133     /* list of mfns of guest ring, protected by L3 */
134     mfn_t *mfns;
135     /* list of struct pending_ent for this ring, protected by L3 */
136     struct list_head pending;
137     /* number of pending entries queued for this ring, protected by L3 */
138     unsigned int npending;
139 };
140 
141 /* Data about a single-sender ring, held by the sender (partner) domain */
142 struct argo_send_info
143 {
144     /* next node in the hash, protected by send_L2 */
145     struct list_head node;
146     /* this ring's id, protected by send_L2 */
147     struct argo_ring_id id;
148 };
149 
150 /* A space-available notification that is awaiting sufficient space */
151 struct pending_ent
152 {
153     /* List node within argo_ring_info's pending list */
154     struct list_head node;
155     /*
156      * List node within argo_domain's wildcard_pend_list. Only used if the
157      * ring is one with a wildcard partner (ie. that any domain may send to)
158      * to enable cancelling signals on wildcard rings on domain destroy.
159      */
160     struct list_head wildcard_node;
161     /*
162      * Pointer to the ring_info that this ent pertains to. Used to ensure that
163      * ring_info->npending is decremented when ents for wildcard rings are
164      * cancelled for domain destroy.
165      * Caution: Must hold the correct locks before accessing ring_info via this.
166      */
167     struct argo_ring_info *ring_info;
168     /* minimum ring space available that this signal is waiting upon */
169     unsigned int len;
170     /* domain to be notified when space is available */
171     domid_t domain_id;
172 };
173 
174 /*
175  * The value of the argo element in a struct domain is
176  * protected by L1_global_argo_rwlock
177  */
178 #define ARGO_HASHTABLE_SIZE 32
179 struct argo_domain
180 {
181     /* rings_L2 */
182     rwlock_t rings_L2_rwlock;
183     /*
184      * Hash table of argo_ring_info about rings this domain has registered.
185      * Protected by rings_L2.
186      */
187     struct list_head ring_hash[ARGO_HASHTABLE_SIZE];
188     /* Counter of rings registered by this domain. Protected by rings_L2. */
189     unsigned int ring_count;
190 
191     /* send_L2 */
192     spinlock_t send_L2_lock;
193     /*
194      * Hash table of argo_send_info about rings other domains have registered
195      * for this domain to send to. Single partner, non-wildcard rings.
196      * Protected by send_L2.
197      */
198     struct list_head send_hash[ARGO_HASHTABLE_SIZE];
199 
200     /* wildcard_L2 */
201     spinlock_t wildcard_L2_lock;
202     /*
203      * List of pending space-available signals for this domain about wildcard
204      * rings registered by other domains. Protected by wildcard_L2.
205      */
206     struct list_head wildcard_pend_list;
207 };
208 
209 /*
210  * Locking is organized as follows:
211  *
212  * Terminology: R(<lock>) means taking a read lock on the specified lock;
213  *              W(<lock>) means taking a write lock on it.
214  *
215  * == L1 : The global read/write lock: L1_global_argo_rwlock
216  * Protects the argo elements of all struct domain *d in the system.
217  *
218  * R(L1) does not protect any of the elements of d->argo; it protects their
219  * addresses. W(L1) protects those and more since it implies W on all the lower
220  * level locks - see the notes on those locks below.
221  *
222  * The destruction of an argo-enabled domain, which must have a non-NULL d->argo
223  * pointer, will need to free that d->argo pointer, which requires W(L1).
224  * Since holding R(L1) will block acquiring W(L1), it will ensure that
225  * no domains pointers that argo is interested in become invalid while either
226  * W(L1) or R(L1) are held.
227  */
228 
229 static DEFINE_RWLOCK(L1_global_argo_rwlock); /* L1 */
230 
231 /*
232  * == rings_L2 : The per-domain ring hash lock: d->argo->rings_L2_rwlock
233  *
234  * Holding a read lock on rings_L2 protects the ring hash table and
235  * the elements in the hash_table d->argo->ring_hash, and
236  * the node and id fields in struct argo_ring_info in the
237  * hash table.
238  * Holding a write lock on rings_L2 protects all of the elements of all the
239  * struct argo_ring_info belonging to this domain.
240  *
241  * To take rings_L2 you must already have R(L1). W(L1) implies W(rings_L2) and
242  * L3.
243  *
244  * == L3 : The individual ring_info lock: ring_info->L3_lock
245  *
246  * Protects all the fields within the argo_ring_info, aside from the ones that
247  * rings_L2 already protects: node, id, lock.
248  *
249  * To acquire L3 you must already have R(rings_L2). W(rings_L2) implies L3.
250  *
251  * == send_L2 : The per-domain single-sender partner rings lock:
252  *              d->argo->send_L2_lock
253  *
254  * Protects the per-domain send hash table : d->argo->send_hash
255  * and the elements in the hash table, and the node and id fields
256  * in struct argo_send_info in the hash table.
257  *
258  * To take send_L2, you must already have R(L1). W(L1) implies send_L2.
259  * Do not attempt to acquire a rings_L2 on any domain after taking and while
260  * holding a send_L2 lock -- acquire the rings_L2 (if one is needed) beforehand.
261  *
262  * == wildcard_L2 : The per-domain wildcard pending list lock:
263  *                  d->argo->wildcard_L2_lock
264  *
265  * Protects the per-domain list of outstanding signals for space availability
266  * on wildcard rings.
267  *
268  * To take wildcard_L2, you must already have R(L1). W(L1) implies wildcard_L2.
269  * No other locks are acquired after obtaining wildcard_L2.
270  */
271 
272 /*
273  * Lock state validations macros
274  *
275  * These macros encode the logic to verify that the locking has adhered to the
276  * locking discipline above.
277  * eg. On entry to logic that requires holding at least R(rings_L2), this:
278  *      ASSERT(LOCKING_Read_rings_L2(d));
279  *
280  * checks that the lock state is sufficient, validating that one of the
281  * following must be true when executed:       R(rings_L2) && R(L1)
282  *                                        or:  W(rings_L2) && R(L1)
283  *                                        or:  W(L1)
284  *
285  * The LOCKING macros defined below here are for use at verification points.
286  */
287 #define LOCKING_Write_L1 (rw_is_write_locked(&L1_global_argo_rwlock))
288 /*
289  * While LOCKING_Read_L1 will return true even if the lock is write-locked,
290  * that's OK because everywhere that a Read lock is needed with these macros,
291  * holding a Write lock there instead is OK too: we're checking that _at least_
292  * the specified level of locks are held.
293  */
294 #define LOCKING_Read_L1 (rw_is_locked(&L1_global_argo_rwlock))
295 
296 #define LOCKING_Write_rings_L2(d) \
297     ((LOCKING_Read_L1 && rw_is_write_locked(&(d)->argo->rings_L2_rwlock)) || \
298      LOCKING_Write_L1)
299 /*
300  * Skip checking LOCKING_Write_rings_L2(d) within this LOCKING_Read_rings_L2
301  * definition because the first clause that is testing R(L1) && R(L2) will also
302  * return true if R(L1) && W(L2) is true, because of the way that rw_is_locked
303  * behaves. This results in a slightly shorter and faster implementation.
304  */
305 #define LOCKING_Read_rings_L2(d) \
306     ((LOCKING_Read_L1 && rw_is_locked(&(d)->argo->rings_L2_rwlock)) || \
307      LOCKING_Write_L1)
308 /*
309  * Skip checking LOCKING_Write_L1 within this LOCKING_L3 definition because
310  * LOCKING_Write_rings_L2(d) will return true for that condition.
311  */
312 #define LOCKING_L3(d, r) \
313     ((LOCKING_Read_L1 && rw_is_locked(&(d)->argo->rings_L2_rwlock) \
314       && spin_is_locked(&(r)->L3_lock)) || LOCKING_Write_rings_L2(d))
315 
316 #define LOCKING_send_L2(d) \
317     ((LOCKING_Read_L1 && spin_is_locked(&(d)->argo->send_L2_lock)) || \
318      LOCKING_Write_L1)
319 
320 #define ARGO_DEBUG 0
321 #define argo_dprintk(fmt, args...)                      \
322     do {                                                \
323         if ( ARGO_DEBUG )                               \
324             printk(XENLOG_DEBUG "argo: " fmt, ##args);  \
325     } while ( 0 )
326 
327 /*
328  * This hash function is used to distribute rings within the per-domain
329  * hash tables (d->argo->ring_hash and d->argo_send_hash). The hash table
330  * will provide a struct if a match is found with a 'argo_ring_id' key:
331  * ie. the key is a (domain id, argo port, partner domain id) tuple.
332  * The algorithm approximates the string hashing function 'djb2'.
333  */
334 static unsigned int
hash_index(const struct argo_ring_id * id)335 hash_index(const struct argo_ring_id *id)
336 {
337     unsigned int hash = 5381; /* prime constant from djb2 */
338 
339     /* For each input: hash = hash * 33 + <new input character value> */
340     hash = ((hash << 5) + hash) +  (id->aport            & 0xff);
341     hash = ((hash << 5) + hash) + ((id->aport      >> 8) & 0xff);
342     hash = ((hash << 5) + hash) + ((id->aport     >> 16) & 0xff);
343     hash = ((hash << 5) + hash) + ((id->aport     >> 24) & 0xff);
344     hash = ((hash << 5) + hash) +  (id->domain_id        & 0xff);
345     hash = ((hash << 5) + hash) + ((id->domain_id  >> 8) & 0xff);
346     hash = ((hash << 5) + hash) +  (id->partner_id       & 0xff);
347     hash = ((hash << 5) + hash) + ((id->partner_id >> 8) & 0xff);
348 
349     /*
350      * Since ARGO_HASHTABLE_SIZE is small, use higher-order bits of the
351      * hash to contribute to the lower-order bits before masking off.
352      */
353     return (hash ^ (hash >> 15)) & (ARGO_HASHTABLE_SIZE - 1);
354 }
355 
356 static struct argo_ring_info *
find_ring_info(const struct domain * d,const struct argo_ring_id * id)357 find_ring_info(const struct domain *d, const struct argo_ring_id *id)
358 {
359     struct argo_ring_info *ring_info;
360     const struct list_head *bucket;
361 
362     ASSERT(LOCKING_Read_rings_L2(d));
363 
364     /* List is not modified here. Search and return the match if found. */
365     bucket = &d->argo->ring_hash[hash_index(id)];
366 
367     list_for_each_entry(ring_info, bucket, node)
368     {
369         const struct argo_ring_id *cmpid = &ring_info->id;
370 
371         if ( cmpid->aport == id->aport &&
372              cmpid->domain_id == id->domain_id &&
373              cmpid->partner_id == id->partner_id )
374         {
375             argo_dprintk("found ring_info for ring(%u:%x %u)\n",
376                          id->domain_id, id->aport, id->partner_id);
377             return ring_info;
378         }
379     }
380     argo_dprintk("no ring_info for ring(%u:%x %u)\n",
381                  id->domain_id, id->aport, id->partner_id);
382 
383     return NULL;
384 }
385 
386 static struct argo_ring_info *
find_ring_info_by_match(const struct domain * d,xen_argo_port_t aport,domid_t partner_id)387 find_ring_info_by_match(const struct domain *d, xen_argo_port_t aport,
388                         domid_t partner_id)
389 {
390     struct argo_ring_id id;
391     struct argo_ring_info *ring_info;
392 
393     ASSERT(LOCKING_Read_rings_L2(d));
394 
395     id.aport = aport;
396     id.domain_id = d->domain_id;
397     id.partner_id = partner_id;
398 
399     ring_info = find_ring_info(d, &id);
400     if ( ring_info )
401         return ring_info;
402 
403     id.partner_id = XEN_ARGO_DOMID_ANY;
404 
405     return find_ring_info(d, &id);
406 }
407 
408 static struct argo_send_info *
find_send_info(const struct domain * d,const struct argo_ring_id * id)409 find_send_info(const struct domain *d, const struct argo_ring_id *id)
410 {
411     struct argo_send_info *send_info;
412     const struct list_head *bucket;
413 
414     ASSERT(LOCKING_send_L2(d));
415 
416     /* List is not modified here. Search and return the match if found. */
417     bucket = &d->argo->send_hash[hash_index(id)];
418 
419     list_for_each_entry(send_info, bucket, node)
420     {
421         const struct argo_ring_id *cmpid = &send_info->id;
422 
423         if ( cmpid->aport == id->aport &&
424              cmpid->domain_id == id->domain_id &&
425              cmpid->partner_id == id->partner_id )
426         {
427             argo_dprintk("found send_info for ring(%u:%x %u)\n",
428                          id->domain_id, id->aport, id->partner_id);
429             return send_info;
430         }
431     }
432     argo_dprintk("no send_info for ring(%u:%x %u)\n",
433                  id->domain_id, id->aport, id->partner_id);
434 
435     return NULL;
436 }
437 
438 static void
signal_domain(struct domain * d)439 signal_domain(struct domain *d)
440 {
441     argo_dprintk("signalling domid:%u\n", d->domain_id);
442 
443     send_guest_domain_virq(d, VIRQ_ARGO);
444 }
445 
446 static void
signal_domid(domid_t domain_id)447 signal_domid(domid_t domain_id)
448 {
449     struct domain *d = rcu_lock_domain_by_id(domain_id);
450 
451     if ( !d )
452         return;
453 
454     signal_domain(d);
455     rcu_unlock_domain(d);
456 }
457 
458 static void
ring_unmap(const struct domain * d,struct argo_ring_info * ring_info)459 ring_unmap(const struct domain *d, struct argo_ring_info *ring_info)
460 {
461     unsigned int i;
462 
463     ASSERT(LOCKING_L3(d, ring_info));
464 
465     if ( !ring_info->mfn_mapping )
466         return;
467 
468     ASSERT(!ring_info->nmfns || ring_info->mfns);
469 
470     for ( i = 0; i < ring_info->nmfns; i++ )
471     {
472         if ( !ring_info->mfn_mapping[i] )
473             continue;
474 
475         ASSERT(!mfn_eq(ring_info->mfns[i], INVALID_MFN));
476         argo_dprintk(XENLOG_ERR "argo: unmapping page %"PRI_mfn" from %p\n",
477                      mfn_x(ring_info->mfns[i]), ring_info->mfn_mapping[i]);
478 
479         unmap_domain_page_global(ring_info->mfn_mapping[i]);
480         ring_info->mfn_mapping[i] = NULL;
481     }
482 }
483 
484 static int
ring_map_page(const struct domain * d,struct argo_ring_info * ring_info,unsigned int i,void ** out_ptr)485 ring_map_page(const struct domain *d, struct argo_ring_info *ring_info,
486               unsigned int i, void **out_ptr)
487 {
488     ASSERT(LOCKING_L3(d, ring_info));
489 
490     /*
491      * FIXME: Investigate using vmap to create a single contiguous virtual
492      * address space mapping of the ring instead of using the array of single
493      * page mappings.
494      * Affects logic in memcpy_to_guest_ring, the mfn_mapping array data
495      * structure, and places where ring mappings are added or removed.
496      */
497 
498     if ( i >= ring_info->nmfns )
499     {
500         gprintk(XENLOG_ERR,
501                "argo: ring (vm%u:%x vm%u) %p attempted to map page %u of %u\n",
502                 ring_info->id.domain_id, ring_info->id.aport,
503                 ring_info->id.partner_id, ring_info, i, ring_info->nmfns);
504         return -ENOMEM;
505     }
506     i = array_index_nospec(i, ring_info->nmfns);
507 
508     if ( !ring_info->mfns || !ring_info->mfn_mapping )
509     {
510         ASSERT_UNREACHABLE();
511         ring_info->len = 0;
512         return -ENOMEM;
513     }
514 
515     if ( !ring_info->mfn_mapping[i] )
516     {
517         ring_info->mfn_mapping[i] = map_domain_page_global(ring_info->mfns[i]);
518         if ( !ring_info->mfn_mapping[i] )
519         {
520             gprintk(XENLOG_ERR, "argo: ring (vm%u:%x vm%u) %p attempted to map "
521                     "page %u of %u\n",
522                     ring_info->id.domain_id, ring_info->id.aport,
523                     ring_info->id.partner_id, ring_info, i, ring_info->nmfns);
524             return -ENOMEM;
525         }
526         argo_dprintk("mapping page %"PRI_mfn" to %p\n",
527                      mfn_x(ring_info->mfns[i]), ring_info->mfn_mapping[i]);
528     }
529 
530     if ( out_ptr )
531         *out_ptr = ring_info->mfn_mapping[i];
532 
533     return 0;
534 }
535 
536 static void
update_tx_ptr(const struct domain * d,struct argo_ring_info * ring_info,uint32_t tx_ptr)537 update_tx_ptr(const struct domain *d, struct argo_ring_info *ring_info,
538               uint32_t tx_ptr)
539 {
540     xen_argo_ring_t *ringp;
541 
542     ASSERT(LOCKING_L3(d, ring_info));
543     ASSERT(ring_info->mfn_mapping[0]);
544 
545     ring_info->tx_ptr = tx_ptr;
546     ringp = ring_info->mfn_mapping[0];
547 
548     write_atomic(&ringp->tx_ptr, tx_ptr);
549     smp_wmb();
550 }
551 
552 static int
memcpy_to_guest_ring(const struct domain * d,struct argo_ring_info * ring_info,unsigned int offset,const void * src,XEN_GUEST_HANDLE (uint8)src_hnd,unsigned int len)553 memcpy_to_guest_ring(const struct domain *d, struct argo_ring_info *ring_info,
554                      unsigned int offset,
555                      const void *src, XEN_GUEST_HANDLE(uint8) src_hnd,
556                      unsigned int len)
557 {
558     unsigned int mfns_index = offset >> PAGE_SHIFT;
559     void *dst;
560     int ret;
561     unsigned int src_offset = 0;
562 
563     ASSERT(LOCKING_L3(d, ring_info));
564 
565     offset &= ~PAGE_MASK;
566 
567     if ( len + offset > XEN_ARGO_MAX_RING_SIZE )
568         return -EFAULT;
569 
570     while ( len )
571     {
572         unsigned int head_len = (offset + len) > PAGE_SIZE ? PAGE_SIZE - offset
573                                                            : len;
574 
575         ret = ring_map_page(d, ring_info, mfns_index, &dst);
576         if ( ret )
577             return ret;
578 
579         if ( src )
580         {
581             memcpy(dst + offset, src + src_offset, head_len);
582             src_offset += head_len;
583         }
584         else
585         {
586             if ( copy_from_guest(dst + offset, src_hnd, head_len) )
587                 return -EFAULT;
588 
589             guest_handle_add_offset(src_hnd, head_len);
590         }
591 
592         mfns_index++;
593         len -= head_len;
594         offset = 0;
595     }
596 
597     return 0;
598 }
599 
600 /*
601  * Use this with caution: rx_ptr is under guest control and may be bogus.
602  * See get_sanitized_ring for a safer alternative.
603  */
604 static int
get_rx_ptr(const struct domain * d,struct argo_ring_info * ring_info,uint32_t * rx_ptr)605 get_rx_ptr(const struct domain *d, struct argo_ring_info *ring_info,
606            uint32_t *rx_ptr)
607 {
608     void *src;
609     xen_argo_ring_t *ringp;
610     int ret;
611 
612     ASSERT(LOCKING_L3(d, ring_info));
613 
614     if ( !ring_info->nmfns || ring_info->nmfns < NPAGES_RING(ring_info->len) )
615         return -EINVAL;
616 
617     ret = ring_map_page(d, ring_info, 0, &src);
618     if ( ret )
619         return ret;
620 
621     ringp = (xen_argo_ring_t *)src;
622 
623     *rx_ptr = read_atomic(&ringp->rx_ptr);
624 
625     return 0;
626 }
627 
628 /*
629  * get_sanitized_ring creates a modified copy of the ring pointers where
630  * the rx_ptr is rounded up to ensure it is aligned, and then ring
631  * wrap is handled. Simplifies safe use of the rx_ptr for available
632  * space calculation.
633  */
634 static int
get_sanitized_ring(const struct domain * d,xen_argo_ring_t * ring,struct argo_ring_info * ring_info)635 get_sanitized_ring(const struct domain *d, xen_argo_ring_t *ring,
636                    struct argo_ring_info *ring_info)
637 {
638     uint32_t rx_ptr;
639     int ret;
640 
641     ASSERT(LOCKING_L3(d, ring_info));
642 
643     ret = get_rx_ptr(d, ring_info, &rx_ptr);
644     if ( ret )
645         return ret;
646 
647     ring->tx_ptr = ring_info->tx_ptr;
648 
649     rx_ptr = ROUNDUP_MESSAGE(rx_ptr);
650     if ( rx_ptr >= ring_info->len )
651         rx_ptr = 0;
652 
653     ring->rx_ptr = rx_ptr;
654 
655     return 0;
656 }
657 
658 static unsigned int
ringbuf_payload_space(const struct domain * d,struct argo_ring_info * ring_info)659 ringbuf_payload_space(const struct domain *d, struct argo_ring_info *ring_info)
660 {
661     xen_argo_ring_t ring;
662     unsigned int len;
663     int ret;
664 
665     ASSERT(LOCKING_L3(d, ring_info));
666 
667     len = ring_info->len;
668     if ( !len )
669         return 0;
670 
671     if ( get_sanitized_ring(d, &ring, ring_info) )
672         return 0;
673 
674     argo_dprintk("sanitized ringbuf_payload_space: tx_ptr=%u rx_ptr=%u\n",
675                  ring.tx_ptr, ring.rx_ptr);
676 
677     /*
678      * rx_ptr == tx_ptr means that the ring has been emptied.
679      * See message size checking logic in the entry to ringbuf_insert which
680      * ensures that there is always one message slot of size ROUNDUP_MESSAGE(1)
681      * left available, preventing a ring from being entirely filled.
682      * This ensures that matching ring indexes always indicate an empty ring
683      * and never a full one.
684      */
685     ret = ring.rx_ptr - ring.tx_ptr;
686     if ( ret <= 0 )
687         ret += len;
688 
689     /*
690      * In a sanitized ring, we can rely on:
691      *              (rx_ptr < ring_info->len)           &&
692      *              (tx_ptr < ring_info->len)           &&
693      *      (ring_info->len <= XEN_ARGO_MAX_RING_SIZE)
694      *
695      * and since: XEN_ARGO_MAX_RING_SIZE < INT32_MAX
696      * therefore right here: ret < INT32_MAX
697      * and we are safe to return it as a unsigned value from this function.
698      * The subtractions below cannot increase its value.
699      */
700 
701     /*
702      * The maximum size payload for a message that will be accepted is:
703      * (the available space between the ring indexes)
704      *    minus (space for a message header)
705      *    minus (space for one message slot)
706      * since ringbuf_insert requires that one message slot be left
707      * unfilled, to avoid filling the ring to capacity and confusing a full
708      * ring with an empty one.
709      * Since the ring indexes are sanitized, the value in ret is aligned, so
710      * the simple subtraction here works to return the aligned value needed:
711      */
712     ret -= sizeof(struct xen_argo_ring_message_header);
713     ret -= ROUNDUP_MESSAGE(1);
714 
715     return (ret < 0) ? 0 : ret;
716 }
717 
718 /*
719  * iov_count returns its count on success via an out variable to avoid
720  * potential for a negative return value to be used incorrectly
721  * (eg. coerced into an unsigned variable resulting in a large incorrect value)
722  */
723 static int
iov_count(const xen_argo_iov_t * piov,unsigned int niov,unsigned int * count)724 iov_count(const xen_argo_iov_t *piov, unsigned int niov,
725           unsigned int *count)
726 {
727     unsigned int sum_iov_lens = 0;
728 
729     if ( niov > XEN_ARGO_MAXIOV )
730         return -EINVAL;
731 
732     for ( ; niov--; piov++ )
733     {
734         /* valid iovs must have the padding field set to zero */
735         if ( piov->pad )
736         {
737             argo_dprintk("invalid iov: padding is not zero\n");
738             return -EINVAL;
739         }
740 
741         /* check each to protect sum against integer overflow */
742         if ( piov->iov_len > MAX_ARGO_MESSAGE_SIZE )
743         {
744             argo_dprintk("invalid iov_len: too big (%u)>%llu\n",
745                          piov->iov_len, MAX_ARGO_MESSAGE_SIZE);
746             return -EINVAL;
747         }
748 
749         sum_iov_lens += piov->iov_len;
750 
751         /*
752          * Again protect sum from integer overflow
753          * and ensure total msg size will be within bounds.
754          */
755         if ( sum_iov_lens > MAX_ARGO_MESSAGE_SIZE )
756         {
757             argo_dprintk("invalid iov series: total message too big\n");
758             return -EMSGSIZE;
759         }
760     }
761 
762     *count = sum_iov_lens;
763 
764     return 0;
765 }
766 
767 static int
ringbuf_insert(const struct domain * d,struct argo_ring_info * ring_info,const struct argo_ring_id * src_id,xen_argo_iov_t * iovs,unsigned int niov,uint32_t message_type,unsigned int len)768 ringbuf_insert(const struct domain *d, struct argo_ring_info *ring_info,
769                const struct argo_ring_id *src_id, xen_argo_iov_t *iovs,
770                unsigned int niov, uint32_t message_type, unsigned int len)
771 {
772     xen_argo_ring_t ring;
773     struct xen_argo_ring_message_header mh = { };
774     int sp, ret;
775     xen_argo_iov_t *piov;
776     XEN_GUEST_HANDLE(uint8) NULL_hnd = { };
777 
778     ASSERT(LOCKING_L3(d, ring_info));
779 
780     /*
781      * Enforced below: no more than 'len' bytes of guest data
782      * (plus the message header) will be sent in this operation.
783      */
784 
785     /*
786      * Upper bound check the message len against the ring size.
787      * The message must not fill the ring; there must be at least one slot
788      * remaining so we can distinguish a full ring from an empty one.
789      * iov_count has already verified: len <= MAX_ARGO_MESSAGE_SIZE.
790      */
791     if ( ring_info->len <= (sizeof(struct xen_argo_ring_message_header) +
792                             ROUNDUP_MESSAGE(len)) )
793         return -EMSGSIZE;
794 
795     ret = get_sanitized_ring(d, &ring, ring_info);
796     if ( ret )
797         return ret;
798 
799     argo_dprintk("ring.tx_ptr=%u ring.rx_ptr=%u ring len=%u"
800                  " ring_info->tx_ptr=%u\n",
801                  ring.tx_ptr, ring.rx_ptr, ring_info->len, ring_info->tx_ptr);
802 
803     if ( ring.rx_ptr == ring.tx_ptr )
804         sp = ring_info->len;
805     else
806     {
807         sp = ring.rx_ptr - ring.tx_ptr;
808         if ( sp < 0 )
809             sp += ring_info->len;
810     }
811 
812     /*
813      * Size bounds check against currently available space in the ring.
814      * Again: the message must not fill the ring leaving no space remaining.
815      */
816     if ( (ROUNDUP_MESSAGE(len) +
817             sizeof(struct xen_argo_ring_message_header)) >= sp )
818     {
819         argo_dprintk("EAGAIN\n");
820         return -EAGAIN;
821     }
822 
823     mh.len = len + sizeof(struct xen_argo_ring_message_header);
824     mh.source.aport = src_id->aport;
825     mh.source.domain_id = src_id->domain_id;
826     mh.message_type = message_type;
827 
828     /*
829      * For this copy to the guest ring, tx_ptr is always 16-byte aligned
830      * and the message header is 16 bytes long.
831      */
832     BUILD_BUG_ON(
833         sizeof(struct xen_argo_ring_message_header) != ROUNDUP_MESSAGE(1));
834 
835     /*
836      * First data write into the destination ring: fixed size, message header.
837      * This cannot overrun because the available free space (value in 'sp')
838      * is checked above and must be at least this size.
839      */
840     ret = memcpy_to_guest_ring(d, ring_info,
841                                ring.tx_ptr + sizeof(xen_argo_ring_t),
842                                &mh, NULL_hnd, sizeof(mh));
843     if ( ret )
844     {
845         gprintk(XENLOG_ERR,
846                 "argo: failed to write message header to ring (vm%u:%x vm%u)\n",
847                 ring_info->id.domain_id, ring_info->id.aport,
848                 ring_info->id.partner_id);
849 
850         return ret;
851     }
852 
853     ring.tx_ptr += sizeof(mh);
854     if ( ring.tx_ptr == ring_info->len )
855         ring.tx_ptr = 0;
856 
857     for ( piov = iovs; niov--; piov++ )
858     {
859         XEN_GUEST_HANDLE(uint8) buf_hnd = piov->iov_hnd;
860         unsigned int iov_len = piov->iov_len;
861 
862         /* If no data is provided in this iov, moan and skip on to the next */
863         if ( !iov_len )
864         {
865             gprintk(XENLOG_WARNING,
866                     "argo: no data iov_len=0 iov_hnd=%p ring (vm%u:%x vm%u)\n",
867                     buf_hnd.p, ring_info->id.domain_id, ring_info->id.aport,
868                     ring_info->id.partner_id);
869 
870             continue;
871         }
872 
873         if ( unlikely(!guest_handle_okay(buf_hnd, iov_len)) )
874         {
875             gprintk(XENLOG_ERR,
876                     "argo: bad iov handle [%p, %u] (vm%u:%x vm%u)\n",
877                     buf_hnd.p, iov_len,
878                     ring_info->id.domain_id, ring_info->id.aport,
879                     ring_info->id.partner_id);
880 
881             return -EFAULT;
882         }
883 
884         sp = ring_info->len - ring.tx_ptr;
885 
886         /* Check: iov data size versus free space at the tail of the ring */
887         if ( iov_len > sp )
888         {
889             /*
890              * Second possible data write: ring-tail-wrap-write.
891              * Populate the ring tail and update the internal tx_ptr to handle
892              * wrapping at the end of ring.
893              * Size of data written here: sp
894              * which is the exact full amount of free space available at the
895              * tail of the ring, so this cannot overrun.
896              */
897             ret = memcpy_to_guest_ring(d, ring_info,
898                                        ring.tx_ptr + sizeof(xen_argo_ring_t),
899                                        NULL, buf_hnd, sp);
900             if ( ret )
901             {
902                 gprintk(XENLOG_ERR,
903                         "argo: failed to copy {%p, %d} (vm%u:%x vm%u)\n",
904                         buf_hnd.p, sp,
905                         ring_info->id.domain_id, ring_info->id.aport,
906                         ring_info->id.partner_id);
907 
908                 return ret;
909             }
910 
911             ring.tx_ptr = 0;
912             iov_len -= sp;
913             guest_handle_add_offset(buf_hnd, sp);
914 
915             ASSERT(iov_len <= ring_info->len);
916         }
917 
918         /*
919          * Third possible data write: all data remaining for this iov.
920          * Size of data written here: iov_len
921          *
922          * Case 1: if the ring-tail-wrap-write above was performed, then
923          *         iov_len has been decreased by 'sp' and ring.tx_ptr is zero.
924          *
925          *    We know from checking the result of iov_count:
926          *      len + sizeof(message_header) <= ring_info->len
927          *    We also know that len is the total of summing all iov_lens, so:
928          *       iov_len <= len
929          *    so by transitivity:
930          *       iov_len <= len <= (ring_info->len - sizeof(msgheader))
931          *    and therefore:
932          *       (iov_len + sizeof(msgheader) <= ring_info->len) &&
933          *       (ring.tx_ptr == 0)
934          *    so this write cannot overrun here.
935          *
936          * Case 2: ring-tail-wrap-write above was not performed
937          *    -> so iov_len is the guest-supplied value and: (iov_len <= sp)
938          *    ie. less than available space at the tail of the ring:
939          *        so this write cannot overrun.
940          */
941         ret = memcpy_to_guest_ring(d, ring_info,
942                                    ring.tx_ptr + sizeof(xen_argo_ring_t),
943                                    NULL, buf_hnd, iov_len);
944         if ( ret )
945         {
946             gprintk(XENLOG_ERR,
947                     "argo: failed to copy [%p, %u] (vm%u:%x vm%u)\n",
948                     buf_hnd.p, iov_len, ring_info->id.domain_id,
949                     ring_info->id.aport, ring_info->id.partner_id);
950 
951             return ret;
952         }
953 
954         ring.tx_ptr += iov_len;
955 
956         if ( ring.tx_ptr == ring_info->len )
957             ring.tx_ptr = 0;
958     }
959 
960     /*
961      * Finished writing data from all iovs into the ring: now need to round up
962      * tx_ptr to align to the next message boundary, and then wrap if necessary.
963      */
964     ring.tx_ptr = ROUNDUP_MESSAGE(ring.tx_ptr);
965 
966     if ( ring.tx_ptr >= ring_info->len )
967         ring.tx_ptr -= ring_info->len;
968 
969     update_tx_ptr(d, ring_info, ring.tx_ptr);
970 
971     /*
972      * At this point (and also on an error exit paths from this function) it is
973      * possible to unmap the ring_info, ie:
974      *   ring_unmap(d, ring_info);
975      * but performance should be improved by not doing so, and retaining
976      * the mapping.
977      * An XSM policy control over level of confidentiality required
978      * versus performance cost could be added to decide that here.
979      */
980 
981     return ret;
982 }
983 
984 static void
wildcard_pending_list_remove(domid_t domain_id,struct pending_ent * ent)985 wildcard_pending_list_remove(domid_t domain_id, struct pending_ent *ent)
986 {
987     struct domain *d = rcu_lock_domain_by_id(domain_id);
988 
989     if ( !d )
990         return;
991 
992     ASSERT(LOCKING_Read_L1);
993 
994     if ( d->argo )
995     {
996         spin_lock(&d->argo->wildcard_L2_lock);
997         list_del(&ent->wildcard_node);
998         spin_unlock(&d->argo->wildcard_L2_lock);
999     }
1000     rcu_unlock_domain(d);
1001 }
1002 
1003 static void
wildcard_pending_list_insert(domid_t domain_id,struct pending_ent * ent)1004 wildcard_pending_list_insert(domid_t domain_id, struct pending_ent *ent)
1005 {
1006     struct domain *d = rcu_lock_domain_by_id(domain_id);
1007 
1008     if ( !d )
1009         return;
1010 
1011     ASSERT(LOCKING_Read_L1);
1012 
1013     if ( d->argo )
1014     {
1015         spin_lock(&d->argo->wildcard_L2_lock);
1016         list_add(&ent->wildcard_node, &d->argo->wildcard_pend_list);
1017         spin_unlock(&d->argo->wildcard_L2_lock);
1018     }
1019     rcu_unlock_domain(d);
1020 }
1021 
1022 static void
pending_remove_all(const struct domain * d,struct argo_ring_info * ring_info)1023 pending_remove_all(const struct domain *d, struct argo_ring_info *ring_info)
1024 {
1025     struct pending_ent *ent;
1026 
1027     ASSERT(LOCKING_L3(d, ring_info));
1028 
1029     /* Delete all pending notifications from this ring's list. */
1030     while ( (ent = list_first_entry_or_null(&ring_info->pending,
1031                                             struct pending_ent, node)) )
1032     {
1033         /* For wildcard rings, remove each from their wildcard list too. */
1034         if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1035             wildcard_pending_list_remove(ent->domain_id, ent);
1036         list_del(&ent->node);
1037         xfree(ent);
1038     }
1039     ring_info->npending = 0;
1040 }
1041 
1042 static void
pending_notify(struct list_head * to_notify)1043 pending_notify(struct list_head *to_notify)
1044 {
1045     struct pending_ent *ent;
1046 
1047     ASSERT(LOCKING_Read_L1);
1048 
1049     /* Sending signals for all ents in this list, draining until it is empty. */
1050     while ( (ent = list_first_entry_or_null(to_notify, struct pending_ent,
1051                                             node)) )
1052     {
1053         list_del(&ent->node);
1054         signal_domid(ent->domain_id);
1055         xfree(ent);
1056     }
1057 }
1058 
1059 static void
pending_find(const struct domain * d,struct argo_ring_info * ring_info,unsigned int payload_space,struct list_head * to_notify)1060 pending_find(const struct domain *d, struct argo_ring_info *ring_info,
1061              unsigned int payload_space, struct list_head *to_notify)
1062 {
1063     struct pending_ent *ent, *next;
1064 
1065     ASSERT(LOCKING_Read_rings_L2(d));
1066 
1067     /*
1068      * TODO: Current policy here is to signal _all_ of the waiting domains
1069      *       interested in sending a message of size less than payload_space.
1070      *
1071      * This is likely to be suboptimal, since once one of them has added
1072      * their message to the ring, there may well be insufficient room
1073      * available for any of the others to transmit, meaning that they were
1074      * woken in vain, which created extra work just to requeue their wait.
1075      *
1076      * Retain this simple policy for now since it at least avoids starving a
1077      * domain of available space notifications because of a policy that only
1078      * notified other domains instead. Improvement may be possible;
1079      * investigation required.
1080      */
1081     spin_lock(&ring_info->L3_lock);
1082 
1083     /* Remove matching ents from the ring list, and add them to "to_notify" */
1084     list_for_each_entry_safe(ent, next, &ring_info->pending, node)
1085     {
1086         if ( payload_space >= ent->len )
1087         {
1088             if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1089                 wildcard_pending_list_remove(ent->domain_id, ent);
1090 
1091             list_del(&ent->node);
1092             ring_info->npending--;
1093             list_add(&ent->node, to_notify);
1094         }
1095     }
1096 
1097     spin_unlock(&ring_info->L3_lock);
1098 }
1099 
1100 static int
pending_queue(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id,unsigned int len)1101 pending_queue(const struct domain *d, struct argo_ring_info *ring_info,
1102               domid_t src_id, unsigned int len)
1103 {
1104     struct pending_ent *ent;
1105 
1106     ASSERT(LOCKING_L3(d, ring_info));
1107 
1108     if ( ring_info->npending >= MAX_PENDING_PER_RING )
1109         return -EBUSY;
1110 
1111     ent = xmalloc(struct pending_ent);
1112     if ( !ent )
1113         return -ENOMEM;
1114 
1115     ent->len = len;
1116     ent->domain_id = src_id;
1117     ent->ring_info = ring_info;
1118 
1119     if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1120         wildcard_pending_list_insert(src_id, ent);
1121     list_add(&ent->node, &ring_info->pending);
1122     ring_info->npending++;
1123 
1124     return 0;
1125 }
1126 
1127 static int
pending_requeue(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id,unsigned int len)1128 pending_requeue(const struct domain *d, struct argo_ring_info *ring_info,
1129                 domid_t src_id, unsigned int len)
1130 {
1131     struct pending_ent *ent;
1132 
1133     ASSERT(LOCKING_L3(d, ring_info));
1134 
1135     /* List structure is not modified here. Update len in a match if found. */
1136     list_for_each_entry(ent, &ring_info->pending, node)
1137     {
1138         if ( ent->domain_id == src_id )
1139         {
1140             /*
1141              * Reuse an existing queue entry for a notification rather than add
1142              * another. If the existing entry is waiting for a smaller size than
1143              * the current message then adjust the record to wait for the
1144              * current (larger) size to be available before triggering a
1145              * notification.
1146              * This assists the waiting sender by ensuring that whenever a
1147              * notification is triggered, there is sufficient space available
1148              * for (at least) any one of the messages awaiting transmission.
1149              */
1150             if ( ent->len < len )
1151                 ent->len = len;
1152 
1153             return 0;
1154         }
1155     }
1156 
1157     return pending_queue(d, ring_info, src_id, len);
1158 }
1159 
1160 static void
pending_cancel(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id)1161 pending_cancel(const struct domain *d, struct argo_ring_info *ring_info,
1162                domid_t src_id)
1163 {
1164     struct pending_ent *ent, *next;
1165 
1166     ASSERT(LOCKING_L3(d, ring_info));
1167 
1168     /* Remove all ents where domain_id matches src_id from the ring's list. */
1169     list_for_each_entry_safe(ent, next, &ring_info->pending, node)
1170     {
1171         if ( ent->domain_id == src_id )
1172         {
1173             /* For wildcard rings, remove each from their wildcard list too. */
1174             if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1175                 wildcard_pending_list_remove(ent->domain_id, ent);
1176             list_del(&ent->node);
1177             xfree(ent);
1178             ring_info->npending--;
1179         }
1180     }
1181 }
1182 
1183 static void
wildcard_rings_pending_remove(struct domain * d)1184 wildcard_rings_pending_remove(struct domain *d)
1185 {
1186     struct pending_ent *ent;
1187 
1188     ASSERT(LOCKING_Write_L1);
1189 
1190     /* Delete all pending signals to the domain about wildcard rings. */
1191     while ( (ent = list_first_entry_or_null(&d->argo->wildcard_pend_list,
1192                                             struct pending_ent, node)) )
1193     {
1194         /*
1195          * The ent->node deleted here, and the npending value decreased,
1196          * belong to the ring_info of another domain, which is why this
1197          * function requires holding W(L1):
1198          * it implies the L3 lock that protects that ring_info struct.
1199          */
1200         ent->ring_info->npending--;
1201         list_del(&ent->node);
1202         list_del(&ent->wildcard_node);
1203         xfree(ent);
1204     }
1205 }
1206 
1207 static void
ring_remove_mfns(const struct domain * d,struct argo_ring_info * ring_info)1208 ring_remove_mfns(const struct domain *d, struct argo_ring_info *ring_info)
1209 {
1210     unsigned int i;
1211 
1212     ASSERT(LOCKING_Write_rings_L2(d));
1213 
1214     if ( !ring_info->mfns )
1215         return;
1216 
1217     if ( !ring_info->mfn_mapping )
1218     {
1219         ASSERT_UNREACHABLE();
1220         return;
1221     }
1222 
1223     ring_unmap(d, ring_info);
1224 
1225     for ( i = 0; i < ring_info->nmfns; i++ )
1226         if ( !mfn_eq(ring_info->mfns[i], INVALID_MFN) )
1227             put_page_and_type(mfn_to_page(ring_info->mfns[i]));
1228 
1229     ring_info->nmfns = 0;
1230     XFREE(ring_info->mfns);
1231     XFREE(ring_info->mfn_mapping);
1232 }
1233 
1234 static void
ring_remove_info(const struct domain * d,struct argo_ring_info * ring_info)1235 ring_remove_info(const struct domain *d, struct argo_ring_info *ring_info)
1236 {
1237     ASSERT(LOCKING_Write_rings_L2(d));
1238 
1239     pending_remove_all(d, ring_info);
1240     list_del(&ring_info->node);
1241     ring_remove_mfns(d, ring_info);
1242     xfree(ring_info);
1243 }
1244 
1245 static void
domain_rings_remove_all(struct domain * d)1246 domain_rings_remove_all(struct domain *d)
1247 {
1248     unsigned int i;
1249 
1250     ASSERT(LOCKING_Write_rings_L2(d));
1251 
1252     for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
1253     {
1254         struct argo_ring_info *ring_info;
1255         struct list_head *bucket = &d->argo->ring_hash[i];
1256 
1257         while ( (ring_info = list_first_entry_or_null(bucket,
1258                                                       struct argo_ring_info,
1259                                                       node)) )
1260             ring_remove_info(d, ring_info);
1261     }
1262     d->argo->ring_count = 0;
1263 }
1264 
1265 /*
1266  * Tear down all rings of other domains where src_d domain is the partner.
1267  * (ie. it is the single domain that can send to those rings.)
1268  * This will also cancel any pending notifications about those rings.
1269  */
1270 static void
partner_rings_remove(struct domain * src_d)1271 partner_rings_remove(struct domain *src_d)
1272 {
1273     unsigned int i;
1274 
1275     ASSERT(LOCKING_Write_L1);
1276 
1277     for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
1278     {
1279         struct argo_send_info *send_info;
1280         struct list_head *bucket = &src_d->argo->send_hash[i];
1281 
1282         /* Remove all ents from the send list. Take each off their ring list. */
1283         while ( (send_info = list_first_entry_or_null(bucket,
1284                                                       struct argo_send_info,
1285                                                       node)) )
1286         {
1287             struct domain *dst_d = rcu_lock_domain_by_id(send_info->id.domain_id);
1288 
1289             if ( dst_d && dst_d->argo )
1290             {
1291                 struct argo_ring_info *ring_info =
1292                     find_ring_info(dst_d, &send_info->id);
1293 
1294                 if ( ring_info )
1295                 {
1296                     ring_remove_info(dst_d, ring_info);
1297                     dst_d->argo->ring_count--;
1298                 }
1299                 else
1300                     ASSERT_UNREACHABLE();
1301             }
1302             else
1303                 argo_dprintk("%pd has entry for stale partner d%u\n",
1304                              src_d, send_info->id.domain_id);
1305 
1306             if ( dst_d )
1307                 rcu_unlock_domain(dst_d);
1308 
1309             list_del(&send_info->node);
1310             xfree(send_info);
1311         }
1312     }
1313 }
1314 
1315 static int
fill_ring_data(const struct domain * currd,XEN_GUEST_HANDLE (xen_argo_ring_data_ent_t)data_ent_hnd)1316 fill_ring_data(const struct domain *currd,
1317                XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) data_ent_hnd)
1318 {
1319     xen_argo_ring_data_ent_t ent;
1320     struct domain *dst_d;
1321     struct argo_ring_info *ring_info;
1322     int ret = 0;
1323 
1324     ASSERT(currd == current->domain);
1325     ASSERT(LOCKING_Read_L1);
1326 
1327     if ( __copy_from_guest(&ent, data_ent_hnd, 1) )
1328         return -EFAULT;
1329 
1330     argo_dprintk("fill_ring_data: ent.ring.domain=%u,ent.ring.aport=%x\n",
1331                  ent.ring.domain_id, ent.ring.aport);
1332 
1333     ent.flags = 0;
1334 
1335     dst_d = rcu_lock_domain_by_id(ent.ring.domain_id);
1336     if ( !dst_d || !dst_d->argo )
1337         goto out;
1338 
1339     /*
1340      * Don't supply information about rings that a guest is not
1341      * allowed to send to.
1342      */
1343     ret = xsm_argo_send(currd, dst_d);
1344     if ( ret )
1345         goto out;
1346 
1347     read_lock(&dst_d->argo->rings_L2_rwlock);
1348 
1349     ring_info = find_ring_info_by_match(dst_d, ent.ring.aport,
1350                                         currd->domain_id);
1351     if ( ring_info )
1352     {
1353         unsigned int space_avail;
1354 
1355         ent.flags |= XEN_ARGO_RING_EXISTS;
1356 
1357         spin_lock(&ring_info->L3_lock);
1358 
1359         ent.max_message_size = ring_info->len -
1360                                    sizeof(struct xen_argo_ring_message_header) -
1361                                    ROUNDUP_MESSAGE(1);
1362 
1363         if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1364             ent.flags |= XEN_ARGO_RING_SHARED;
1365 
1366         space_avail = ringbuf_payload_space(dst_d, ring_info);
1367 
1368         argo_dprintk("fill_ring_data: aport=%x space_avail=%u"
1369                      " space_wanted=%u\n",
1370                      ring_info->id.aport, space_avail, ent.space_required);
1371 
1372         /* Do not queue a notification for an unachievable size */
1373         if ( ent.space_required > ent.max_message_size )
1374             ent.flags |= XEN_ARGO_RING_EMSGSIZE;
1375         else if ( space_avail >= ent.space_required )
1376         {
1377             pending_cancel(dst_d, ring_info, currd->domain_id);
1378             ent.flags |= XEN_ARGO_RING_SUFFICIENT;
1379         }
1380         else
1381         {
1382             ret = pending_requeue(dst_d, ring_info, currd->domain_id,
1383                                   ent.space_required);
1384             if ( ret == -EBUSY )
1385             {
1386                 /*
1387                  * Too many other domains are already awaiting notification
1388                  * about available space on this ring. Indicate this state via
1389                  * flag. No need to return an error to the caller; allow the
1390                  * processing of queries about other rings to continue.
1391                  */
1392                 ent.flags |= XEN_ARGO_RING_EBUSY;
1393                 ret = 0;
1394             }
1395         }
1396 
1397         spin_unlock(&ring_info->L3_lock);
1398 
1399         if ( space_avail == ent.max_message_size )
1400             ent.flags |= XEN_ARGO_RING_EMPTY;
1401 
1402     }
1403     read_unlock(&dst_d->argo->rings_L2_rwlock);
1404 
1405  out:
1406     if ( dst_d )
1407         rcu_unlock_domain(dst_d);
1408 
1409     if ( !ret && (__copy_field_to_guest(data_ent_hnd, &ent, flags) ||
1410                   __copy_field_to_guest(data_ent_hnd, &ent, max_message_size)) )
1411         return -EFAULT;
1412 
1413     return ret;
1414 }
1415 
1416 static int
find_ring_mfn(struct domain * d,gfn_t gfn,mfn_t * mfn)1417 find_ring_mfn(struct domain *d, gfn_t gfn, mfn_t *mfn)
1418 {
1419     struct page_info *page;
1420     p2m_type_t p2mt;
1421     int ret;
1422 
1423     ret = check_get_page_from_gfn(d, gfn, false, &p2mt, &page);
1424     if ( unlikely(ret) )
1425         return ret;
1426 
1427     *mfn = page_to_mfn(page);
1428 
1429     switch ( p2mt )
1430     {
1431     case p2m_ram_rw:
1432         if ( !get_page_type(page, PGT_writable_page) )
1433             ret = -EINVAL;
1434         break;
1435 
1436 #ifdef CONFIG_X86
1437     case p2m_ram_logdirty:
1438         ret = -EAGAIN;
1439         break;
1440 #endif
1441 
1442     default:
1443         ret = -EINVAL;
1444         break;
1445     }
1446 
1447     if ( unlikely(ret) )
1448         put_page(page);
1449 
1450     return ret;
1451 }
1452 
1453 static int
find_ring_mfns(struct domain * d,struct argo_ring_info * ring_info,const unsigned int npage,XEN_GUEST_HANDLE_PARAM (xen_argo_gfn_t)gfn_hnd,const unsigned int len)1454 find_ring_mfns(struct domain *d, struct argo_ring_info *ring_info,
1455                const unsigned int npage,
1456                XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd,
1457                const unsigned int len)
1458 {
1459     unsigned int i;
1460     int ret = 0;
1461     mfn_t *mfns;
1462     void **mfn_mapping;
1463 
1464     ASSERT(LOCKING_Write_rings_L2(d));
1465 
1466     if ( ring_info->mfns )
1467     {
1468         /* Ring already existed: drop the previous mapping. */
1469         argo_dprintk("argo: vm%u re-register existing ring "
1470                      "(vm%u:%x vm%u) clears mapping\n",
1471                      d->domain_id, ring_info->id.domain_id,
1472                      ring_info->id.aport, ring_info->id.partner_id);
1473 
1474         ring_remove_mfns(d, ring_info);
1475         ASSERT(!ring_info->mfns);
1476     }
1477 
1478     mfns = xmalloc_array(mfn_t, npage);
1479     if ( !mfns )
1480         return -ENOMEM;
1481 
1482     for ( i = 0; i < npage; i++ )
1483         mfns[i] = INVALID_MFN;
1484 
1485     mfn_mapping = xzalloc_array(void *, npage);
1486     if ( !mfn_mapping )
1487     {
1488         xfree(mfns);
1489         return -ENOMEM;
1490     }
1491 
1492     ring_info->mfns = mfns;
1493     ring_info->mfn_mapping = mfn_mapping;
1494 
1495     for ( i = 0; i < npage; i++ )
1496     {
1497         mfn_t mfn;
1498         xen_argo_gfn_t argo_gfn;
1499 
1500         ret = __copy_from_guest_offset(&argo_gfn, gfn_hnd, i, 1) ? -EFAULT : 0;
1501         if ( ret )
1502             break;
1503 
1504         ret = find_ring_mfn(d, _gfn(argo_gfn), &mfn);
1505         if ( ret )
1506         {
1507             gprintk(XENLOG_ERR, "argo: vm%u: invalid gfn %"PRI_gfn" "
1508                     "r:(vm%u:%x vm%u) %p %u/%u\n",
1509                     d->domain_id, gfn_x(_gfn(argo_gfn)),
1510                     ring_info->id.domain_id, ring_info->id.aport,
1511                     ring_info->id.partner_id, ring_info, i, npage);
1512             break;
1513         }
1514 
1515         ring_info->mfns[i] = mfn;
1516 
1517         argo_dprintk("%u: %"PRI_gfn" -> %"PRI_mfn"\n",
1518                      i, gfn_x(_gfn(argo_gfn)), mfn_x(ring_info->mfns[i]));
1519     }
1520 
1521     ring_info->nmfns = i;
1522 
1523     if ( ret )
1524         ring_remove_mfns(d, ring_info);
1525     else
1526     {
1527         ASSERT(ring_info->nmfns == NPAGES_RING(len));
1528 
1529         argo_dprintk("argo: vm%u ring (vm%u:%x vm%u) %p "
1530                      "mfn_mapping %p len %u nmfns %u\n",
1531                      d->domain_id, ring_info->id.domain_id,
1532                      ring_info->id.aport, ring_info->id.partner_id, ring_info,
1533                      ring_info->mfn_mapping, ring_info->len, ring_info->nmfns);
1534     }
1535 
1536     return ret;
1537 }
1538 
1539 static long
unregister_ring(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_unregister_ring_t)unreg_hnd)1540 unregister_ring(struct domain *currd,
1541                 XEN_GUEST_HANDLE_PARAM(xen_argo_unregister_ring_t) unreg_hnd)
1542 {
1543     xen_argo_unregister_ring_t unreg;
1544     struct argo_ring_id ring_id;
1545     struct argo_ring_info *ring_info = NULL;
1546     struct argo_send_info *send_info = NULL;
1547     struct domain *dst_d = NULL;
1548 
1549     ASSERT(currd == current->domain);
1550 
1551     if ( copy_from_guest(&unreg, unreg_hnd, 1) )
1552         return -EFAULT;
1553 
1554     if ( unreg.pad )
1555         return -EINVAL;
1556 
1557     ring_id.partner_id = unreg.partner_id;
1558     ring_id.aport = unreg.aport;
1559     ring_id.domain_id = currd->domain_id;
1560 
1561     read_lock(&L1_global_argo_rwlock);
1562 
1563     if ( unlikely(!currd->argo) )
1564     {
1565         read_unlock(&L1_global_argo_rwlock);
1566         return -ENODEV;
1567     }
1568 
1569     write_lock(&currd->argo->rings_L2_rwlock);
1570 
1571     ring_info = find_ring_info(currd, &ring_id);
1572     if ( !ring_info )
1573         goto out;
1574 
1575     ring_remove_info(currd, ring_info);
1576     currd->argo->ring_count--;
1577 
1578     if ( ring_id.partner_id == XEN_ARGO_DOMID_ANY )
1579         goto out;
1580 
1581     dst_d = rcu_lock_domain_by_id(ring_id.partner_id);
1582     if ( !dst_d || !dst_d->argo )
1583     {
1584         ASSERT_UNREACHABLE();
1585         goto out;
1586     }
1587 
1588     spin_lock(&dst_d->argo->send_L2_lock);
1589 
1590     send_info = find_send_info(dst_d, &ring_id);
1591     if ( send_info )
1592         list_del(&send_info->node);
1593     else
1594         ASSERT_UNREACHABLE();
1595 
1596     spin_unlock(&dst_d->argo->send_L2_lock);
1597 
1598  out:
1599     write_unlock(&currd->argo->rings_L2_rwlock);
1600 
1601     read_unlock(&L1_global_argo_rwlock);
1602 
1603     if ( dst_d )
1604         rcu_unlock_domain(dst_d);
1605 
1606     xfree(send_info);
1607 
1608     if ( !ring_info )
1609     {
1610         argo_dprintk("unregister_ring: no ring_info found for ring(%u:%x %u)\n",
1611                      ring_id.domain_id, ring_id.aport, ring_id.partner_id);
1612         return -ENOENT;
1613     }
1614 
1615     return 0;
1616 }
1617 
1618 static long
register_ring(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_register_ring_t)reg_hnd,XEN_GUEST_HANDLE_PARAM (xen_argo_gfn_t)gfn_hnd,unsigned int npage,unsigned int flags)1619 register_ring(struct domain *currd,
1620               XEN_GUEST_HANDLE_PARAM(xen_argo_register_ring_t) reg_hnd,
1621               XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd,
1622               unsigned int npage, unsigned int flags)
1623 {
1624     xen_argo_register_ring_t reg;
1625     struct argo_ring_id ring_id;
1626     void *map_ringp;
1627     xen_argo_ring_t *ringp;
1628     struct argo_ring_info *ring_info, *new_ring_info = NULL;
1629     struct argo_send_info *send_info = NULL;
1630     struct domain *dst_d = NULL;
1631     int ret = 0;
1632     unsigned int private_tx_ptr;
1633 
1634     ASSERT(currd == current->domain);
1635 
1636     /* flags: reserve currently-undefined bits, require zero.  */
1637     if ( unlikely(flags & ~XEN_ARGO_REGISTER_FLAG_MASK) )
1638         return -EINVAL;
1639 
1640     if ( copy_from_guest(&reg, reg_hnd, 1) )
1641         return -EFAULT;
1642 
1643     /*
1644      * A ring must be large enough to transmit messages, so requires space for:
1645      * * 1 message header, plus
1646      * * 1 payload slot (payload is always rounded to a multiple of 16 bytes)
1647      *   for the message payload to be written into, plus
1648      * * 1 more slot, so that the ring cannot be filled to capacity with a
1649      *   single minimum-size message -- see the logic in ringbuf_insert --
1650      *   allowing for this ensures that there can be space remaining when a
1651      *   message is present.
1652      * The above determines the minimum acceptable ring size.
1653      */
1654     if ( (reg.len < (sizeof(struct xen_argo_ring_message_header)
1655                       + ROUNDUP_MESSAGE(1) + ROUNDUP_MESSAGE(1))) ||
1656          (reg.len > XEN_ARGO_MAX_RING_SIZE) ||
1657          (reg.len != ROUNDUP_MESSAGE(reg.len)) ||
1658          (NPAGES_RING(reg.len) != npage) ||
1659          (reg.pad != 0) )
1660         return -EINVAL;
1661 
1662     ring_id.partner_id = reg.partner_id;
1663     ring_id.aport = reg.aport;
1664     ring_id.domain_id = currd->domain_id;
1665 
1666     if ( reg.partner_id == XEN_ARGO_DOMID_ANY )
1667     {
1668         ret = opt_argo_mac_permissive ? xsm_argo_register_any_source(currd) :
1669                                         -EPERM;
1670         if ( ret )
1671             return ret;
1672     }
1673     else
1674     {
1675         dst_d = rcu_lock_domain_by_id(reg.partner_id);
1676         if ( !dst_d )
1677         {
1678             argo_dprintk("!dst_d, ESRCH\n");
1679             return -ESRCH;
1680         }
1681 
1682         ret = xsm_argo_register_single_source(currd, dst_d);
1683         if ( ret )
1684             goto out;
1685 
1686         send_info = xzalloc(struct argo_send_info);
1687         if ( !send_info )
1688         {
1689             ret = -ENOMEM;
1690             goto out;
1691         }
1692         send_info->id = ring_id;
1693     }
1694 
1695     /*
1696      * Common case is that the ring doesn't already exist, so do the alloc here
1697      * before picking up any locks.
1698      */
1699     new_ring_info = xzalloc(struct argo_ring_info);
1700     if ( !new_ring_info )
1701     {
1702         ret = -ENOMEM;
1703         goto out;
1704     }
1705 
1706     read_lock(&L1_global_argo_rwlock);
1707 
1708     if ( !currd->argo )
1709     {
1710         ret = -ENODEV;
1711         goto out_unlock;
1712     }
1713 
1714     if ( dst_d && !dst_d->argo )
1715     {
1716         argo_dprintk("!dst_d->argo, ECONNREFUSED\n");
1717         ret = -ECONNREFUSED;
1718         goto out_unlock;
1719     }
1720 
1721     write_lock(&currd->argo->rings_L2_rwlock);
1722 
1723     if ( currd->argo->ring_count >= MAX_RINGS_PER_DOMAIN )
1724     {
1725         ret = -ENOSPC;
1726         goto out_unlock2;
1727     }
1728 
1729     ring_info = find_ring_info(currd, &ring_id);
1730     if ( !ring_info )
1731     {
1732         ring_info = new_ring_info;
1733         new_ring_info = NULL;
1734 
1735         spin_lock_init(&ring_info->L3_lock);
1736 
1737         ring_info->id = ring_id;
1738         INIT_LIST_HEAD(&ring_info->pending);
1739 
1740         list_add(&ring_info->node,
1741                  &currd->argo->ring_hash[hash_index(&ring_info->id)]);
1742 
1743         argo_dprintk("argo: vm%u registering ring (vm%u:%x vm%u)\n",
1744                      currd->domain_id, ring_id.domain_id, ring_id.aport,
1745                      ring_id.partner_id);
1746     }
1747     else if ( ring_info->len )
1748     {
1749         /*
1750          * If the caller specified that the ring must not already exist,
1751          * fail at attempt to add a completed ring which already exists.
1752          */
1753         if ( flags & XEN_ARGO_REGISTER_FLAG_FAIL_EXIST )
1754         {
1755             gprintk(XENLOG_ERR, "argo: vm%u disallowed reregistration of "
1756                     "existing ring (vm%u:%x vm%u)\n",
1757                     currd->domain_id, ring_id.domain_id, ring_id.aport,
1758                     ring_id.partner_id);
1759             ret = -EEXIST;
1760             goto out_unlock2;
1761         }
1762 
1763         if ( ring_info->len != reg.len )
1764         {
1765             /*
1766              * Change of ring size could result in entries on the pending
1767              * notifications list that will never trigger.
1768              * Simple blunt solution: disallow ring resize for now.
1769              * TODO: investigate enabling ring resize.
1770              */
1771             gprintk(XENLOG_ERR, "argo: vm%u attempted to change ring size "
1772                     "(vm%u:%x vm%u)\n",
1773                     currd->domain_id, ring_id.domain_id, ring_id.aport,
1774                     ring_id.partner_id);
1775             /*
1776              * Could return EINVAL here, but if the ring didn't already
1777              * exist then the arguments would have been valid, so: EEXIST.
1778              */
1779             ret = -EEXIST;
1780             goto out_unlock2;
1781         }
1782 
1783         argo_dprintk("argo: vm%u re-registering existing ring (vm%u:%x vm%u)\n",
1784                      currd->domain_id, ring_id.domain_id, ring_id.aport,
1785                      ring_id.partner_id);
1786     }
1787 
1788     ret = find_ring_mfns(currd, ring_info, npage, gfn_hnd, reg.len);
1789     if ( ret )
1790     {
1791         gprintk(XENLOG_ERR,
1792                 "argo: vm%u failed to find ring mfns (vm%u:%x vm%u)\n",
1793                 currd->domain_id, ring_id.domain_id, ring_id.aport,
1794                 ring_id.partner_id);
1795 
1796         ring_remove_info(currd, ring_info);
1797         goto out_unlock2;
1798     }
1799 
1800     /*
1801      * The first page of the memory supplied for the ring has the xen_argo_ring
1802      * structure at its head, which is where the ring indexes reside.
1803      */
1804     ret = ring_map_page(currd, ring_info, 0, &map_ringp);
1805     if ( ret )
1806     {
1807         gprintk(XENLOG_ERR,
1808                 "argo: vm%u failed to map ring mfn 0 (vm%u:%x vm%u)\n",
1809                 currd->domain_id, ring_id.domain_id, ring_id.aport,
1810                 ring_id.partner_id);
1811 
1812         ring_remove_info(currd, ring_info);
1813         goto out_unlock2;
1814     }
1815     ringp = map_ringp;
1816 
1817     private_tx_ptr = read_atomic(&ringp->tx_ptr);
1818 
1819     if ( (private_tx_ptr >= reg.len) ||
1820          (ROUNDUP_MESSAGE(private_tx_ptr) != private_tx_ptr) )
1821     {
1822         /*
1823          * Since the ring is a mess, attempt to flush the contents of it
1824          * here by setting the tx_ptr to the next aligned message slot past
1825          * the latest rx_ptr we have observed. Handle ring wrap correctly.
1826          */
1827         private_tx_ptr = ROUNDUP_MESSAGE(read_atomic(&ringp->rx_ptr));
1828 
1829         if ( private_tx_ptr >= reg.len )
1830             private_tx_ptr = 0;
1831 
1832         update_tx_ptr(currd, ring_info, private_tx_ptr);
1833     }
1834 
1835     ring_info->tx_ptr = private_tx_ptr;
1836     ring_info->len = reg.len;
1837     currd->argo->ring_count++;
1838 
1839     if ( send_info )
1840     {
1841         spin_lock(&dst_d->argo->send_L2_lock);
1842 
1843         list_add(&send_info->node,
1844                  &dst_d->argo->send_hash[hash_index(&send_info->id)]);
1845 
1846         spin_unlock(&dst_d->argo->send_L2_lock);
1847     }
1848 
1849  out_unlock2:
1850     write_unlock(&currd->argo->rings_L2_rwlock);
1851 
1852  out_unlock:
1853     read_unlock(&L1_global_argo_rwlock);
1854 
1855  out:
1856     if ( dst_d )
1857         rcu_unlock_domain(dst_d);
1858 
1859     if ( ret )
1860         xfree(send_info);
1861 
1862     xfree(new_ring_info);
1863 
1864     return ret;
1865 }
1866 
1867 static void
notify_ring(const struct domain * d,struct argo_ring_info * ring_info,struct list_head * to_notify)1868 notify_ring(const struct domain *d, struct argo_ring_info *ring_info,
1869             struct list_head *to_notify)
1870 {
1871     unsigned int space;
1872 
1873     ASSERT(LOCKING_Read_rings_L2(d));
1874 
1875     spin_lock(&ring_info->L3_lock);
1876 
1877     if ( ring_info->len )
1878         space = ringbuf_payload_space(d, ring_info);
1879     else
1880         space = 0;
1881 
1882     spin_unlock(&ring_info->L3_lock);
1883 
1884     if ( space )
1885         pending_find(d, ring_info, space, to_notify);
1886 }
1887 
1888 static void
notify_check_pending(struct domain * d)1889 notify_check_pending(struct domain *d)
1890 {
1891     unsigned int i;
1892     LIST_HEAD(to_notify);
1893 
1894     ASSERT(LOCKING_Read_L1);
1895 
1896     read_lock(&d->argo->rings_L2_rwlock);
1897 
1898     /* Walk all rings, call notify_ring on each to populate to_notify list */
1899     for ( i = 0; i < ARGO_HASHTABLE_SIZE; i++ )
1900     {
1901         struct argo_ring_info *ring_info, *next;
1902         struct list_head *bucket = &d->argo->ring_hash[i];
1903 
1904         list_for_each_entry_safe(ring_info, next, bucket, node)
1905             notify_ring(d, ring_info, &to_notify);
1906     }
1907 
1908     read_unlock(&d->argo->rings_L2_rwlock);
1909 
1910     if ( !list_empty(&to_notify) )
1911         pending_notify(&to_notify);
1912 }
1913 
1914 static long
notify(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_ring_data_t)ring_data_hnd)1915 notify(struct domain *currd,
1916        XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd)
1917 {
1918     XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) ent_hnd;
1919     xen_argo_ring_data_t ring_data;
1920     int ret = 0;
1921 
1922     ASSERT(currd == current->domain);
1923 
1924     read_lock(&L1_global_argo_rwlock);
1925 
1926     if ( !currd->argo )
1927     {
1928         argo_dprintk("!d->argo, ENODEV\n");
1929         ret = -ENODEV;
1930         goto out;
1931     }
1932 
1933     notify_check_pending(currd);
1934 
1935     if ( guest_handle_is_null(ring_data_hnd) )
1936         goto out;
1937 
1938     ret = copy_from_guest(&ring_data, ring_data_hnd, 1) ? -EFAULT : 0;
1939     if ( ret )
1940         goto out;
1941 
1942     if ( ring_data.nent > MAX_NOTIFY_COUNT )
1943     {
1944         gprintk(XENLOG_ERR, "argo: notify entry count(%u) exceeds max(%u)\n",
1945                 ring_data.nent, MAX_NOTIFY_COUNT);
1946         ret = -EACCES;
1947         goto out;
1948     }
1949 
1950     ent_hnd = guest_handle_for_field(ring_data_hnd,
1951                                      xen_argo_ring_data_ent_t, data[0]);
1952     if ( unlikely(!guest_handle_okay(ent_hnd, ring_data.nent)) )
1953     {
1954         ret = -EFAULT;
1955         goto out;
1956     }
1957 
1958     while ( !ret && ring_data.nent-- )
1959     {
1960         ret = fill_ring_data(currd, ent_hnd);
1961         guest_handle_add_offset(ent_hnd, 1);
1962     }
1963 
1964  out:
1965     read_unlock(&L1_global_argo_rwlock);
1966 
1967     return ret;
1968 }
1969 
1970 static long
sendv(struct domain * src_d,xen_argo_addr_t * src_addr,const xen_argo_addr_t * dst_addr,xen_argo_iov_t * iovs,unsigned int niov,uint32_t message_type)1971 sendv(struct domain *src_d, xen_argo_addr_t *src_addr,
1972       const xen_argo_addr_t *dst_addr, xen_argo_iov_t *iovs, unsigned int niov,
1973       uint32_t message_type)
1974 {
1975     struct domain *dst_d = NULL;
1976     struct argo_ring_id src_id;
1977     struct argo_ring_info *ring_info;
1978     int ret = 0;
1979     unsigned int len = 0;
1980 
1981     argo_dprintk("sendv: (%u:%x)->(%u:%x) niov:%u type:%x\n",
1982                  src_addr->domain_id, src_addr->aport, dst_addr->domain_id,
1983                  dst_addr->aport, niov, message_type);
1984 
1985     /* Check padding is zeroed. */
1986     if ( unlikely(src_addr->pad || dst_addr->pad) )
1987         return -EINVAL;
1988 
1989     if ( src_addr->domain_id == XEN_ARGO_DOMID_ANY )
1990          src_addr->domain_id = src_d->domain_id;
1991 
1992     /* No domain is currently authorized to send on behalf of another */
1993     if ( unlikely(src_addr->domain_id != src_d->domain_id) )
1994         return -EPERM;
1995 
1996     src_id.aport = src_addr->aport;
1997     src_id.domain_id = src_d->domain_id;
1998     src_id.partner_id = dst_addr->domain_id;
1999 
2000     dst_d = rcu_lock_domain_by_id(dst_addr->domain_id);
2001     if ( !dst_d )
2002         return -ESRCH;
2003 
2004     ret = xsm_argo_send(src_d, dst_d);
2005     if ( ret )
2006     {
2007         gprintk(XENLOG_ERR, "argo: XSM REJECTED %i -> %i\n",
2008                 src_d->domain_id, dst_d->domain_id);
2009 
2010         rcu_unlock_domain(dst_d);
2011 
2012         return ret;
2013     }
2014 
2015     read_lock(&L1_global_argo_rwlock);
2016 
2017     if ( !src_d->argo )
2018     {
2019         ret = -ENODEV;
2020         goto out_unlock;
2021     }
2022 
2023     if ( !dst_d->argo )
2024     {
2025         argo_dprintk("!dst_d->argo, ECONNREFUSED\n");
2026         ret = -ECONNREFUSED;
2027         goto out_unlock;
2028     }
2029 
2030     read_lock(&dst_d->argo->rings_L2_rwlock);
2031 
2032     ring_info = find_ring_info_by_match(dst_d, dst_addr->aport,
2033                                         src_id.domain_id);
2034     if ( !ring_info )
2035     {
2036         gprintk(XENLOG_ERR,
2037                 "argo: vm%u connection refused, src (vm%u:%x) dst (vm%u:%x)\n",
2038                 current->domain->domain_id, src_id.domain_id, src_id.aport,
2039                 dst_addr->domain_id, dst_addr->aport);
2040 
2041         ret = -ECONNREFUSED;
2042     }
2043     else
2044     {
2045         spin_lock(&ring_info->L3_lock);
2046 
2047         /*
2048          * Obtain the total size of data to transmit -- sets the 'len' variable
2049          * -- and sanity check that the iovs conform to size and number limits.
2050          */
2051         ret = iov_count(iovs, niov, &len);
2052         if ( !ret )
2053         {
2054             ret = ringbuf_insert(dst_d, ring_info, &src_id, iovs, niov,
2055                                  message_type, len);
2056             if ( ret == -EAGAIN )
2057             {
2058                 int rc;
2059 
2060                 argo_dprintk("argo_ringbuf_sendv failed, EAGAIN\n");
2061                 /* requeue to issue a notification when space is there */
2062                 rc = pending_requeue(dst_d, ring_info, src_id.domain_id, len);
2063                 if ( rc )
2064                     ret = rc;
2065             }
2066         }
2067 
2068         spin_unlock(&ring_info->L3_lock);
2069     }
2070 
2071     read_unlock(&dst_d->argo->rings_L2_rwlock);
2072 
2073  out_unlock:
2074     read_unlock(&L1_global_argo_rwlock);
2075 
2076     if ( ret >= 0 )
2077         signal_domain(dst_d);
2078 
2079     if ( dst_d )
2080         rcu_unlock_domain(dst_d);
2081 
2082     return ( ret < 0 ) ? ret : len;
2083 }
2084 
2085 long
do_argo_op(unsigned int cmd,XEN_GUEST_HANDLE_PARAM (void)arg1,XEN_GUEST_HANDLE_PARAM (void)arg2,unsigned long raw_arg3,unsigned long raw_arg4)2086 do_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1,
2087            XEN_GUEST_HANDLE_PARAM(void) arg2, unsigned long raw_arg3,
2088            unsigned long raw_arg4)
2089 {
2090     struct domain *currd = current->domain;
2091     long rc;
2092     unsigned int arg3 = raw_arg3, arg4 = raw_arg4;
2093 
2094     argo_dprintk("->do_argo_op(%u,%p,%p,%lu,0x%lx)\n", cmd,
2095                  (void *)arg1.p, (void *)arg2.p, raw_arg3, raw_arg4);
2096 
2097     /* Reject numeric hypercall args outside 32-bit range */
2098     if ( (arg3 != raw_arg3) || (arg4 != raw_arg4) )
2099         return -EINVAL;
2100 
2101     if ( unlikely(!opt_argo) )
2102         return -EOPNOTSUPP;
2103 
2104     rc = xsm_argo_enable(currd);
2105     if ( rc )
2106         return rc;
2107 
2108     switch ( cmd )
2109     {
2110     case XEN_ARGO_OP_register_ring:
2111     {
2112         XEN_GUEST_HANDLE_PARAM(xen_argo_register_ring_t) reg_hnd =
2113             guest_handle_cast(arg1, xen_argo_register_ring_t);
2114         XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd =
2115             guest_handle_cast(arg2, xen_argo_gfn_t);
2116         /* arg3: npage, arg4: flags */
2117 
2118         BUILD_BUG_ON(!IS_ALIGNED(XEN_ARGO_MAX_RING_SIZE, PAGE_SIZE));
2119 
2120         if ( unlikely(arg3 > (XEN_ARGO_MAX_RING_SIZE >> PAGE_SHIFT)) )
2121         {
2122             rc = -EINVAL;
2123             break;
2124         }
2125 
2126         /* Check array to allow use of the faster __copy operations later */
2127         if ( unlikely(!guest_handle_okay(gfn_hnd, arg3)) )
2128         {
2129             rc = -EFAULT;
2130             break;
2131         }
2132 
2133         rc = register_ring(currd, reg_hnd, gfn_hnd, arg3, arg4);
2134         break;
2135     }
2136 
2137     case XEN_ARGO_OP_unregister_ring:
2138     {
2139         XEN_GUEST_HANDLE_PARAM(xen_argo_unregister_ring_t) unreg_hnd =
2140             guest_handle_cast(arg1, xen_argo_unregister_ring_t);
2141 
2142         if ( unlikely((!guest_handle_is_null(arg2)) || arg3 || arg4) )
2143         {
2144             rc = -EINVAL;
2145             break;
2146         }
2147 
2148         rc = unregister_ring(currd, unreg_hnd);
2149         break;
2150     }
2151 
2152     case XEN_ARGO_OP_sendv:
2153     {
2154         xen_argo_send_addr_t send_addr;
2155         xen_argo_iov_t iovs[XEN_ARGO_MAXIOV];
2156         unsigned int niov;
2157 
2158         XEN_GUEST_HANDLE_PARAM(xen_argo_send_addr_t) send_addr_hnd =
2159             guest_handle_cast(arg1, xen_argo_send_addr_t);
2160         XEN_GUEST_HANDLE_PARAM(xen_argo_iov_t) iovs_hnd =
2161             guest_handle_cast(arg2, xen_argo_iov_t);
2162         /* arg3 is niov */
2163         /* arg4 is message_type. Must be a 32-bit value. */
2164 
2165         /* XEN_ARGO_MAXIOV value determines size of iov array on stack */
2166         BUILD_BUG_ON(XEN_ARGO_MAXIOV > 8);
2167 
2168         rc = copy_from_guest(&send_addr, send_addr_hnd, 1) ? -EFAULT : 0;
2169         if ( rc )
2170         {
2171             rc = -EFAULT;
2172             break;
2173         }
2174 
2175         /*
2176          * Reject niov above maximum limit or message_types that are outside
2177          * 32 bit range.
2178          */
2179         if ( unlikely((arg3 > XEN_ARGO_MAXIOV) || (arg4 != (uint32_t)arg4)) )
2180         {
2181             rc = -EINVAL;
2182             break;
2183         }
2184         niov = array_index_nospec(arg3, XEN_ARGO_MAXIOV + 1);
2185 
2186         rc = copy_from_guest(iovs, iovs_hnd, niov) ? -EFAULT : 0;
2187         if ( rc )
2188         {
2189             rc = -EFAULT;
2190             break;
2191         }
2192 
2193         rc = sendv(currd, &send_addr.src, &send_addr.dst, iovs, niov, arg4);
2194         break;
2195     }
2196 
2197     case XEN_ARGO_OP_notify:
2198     {
2199         XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd =
2200                    guest_handle_cast(arg1, xen_argo_ring_data_t);
2201 
2202         if ( unlikely((!guest_handle_is_null(arg2)) || arg3 || arg4) )
2203         {
2204             rc = -EINVAL;
2205             break;
2206         }
2207 
2208         rc = notify(currd, ring_data_hnd);
2209         break;
2210     }
2211 
2212     default:
2213         rc = -EOPNOTSUPP;
2214         break;
2215     }
2216 
2217     argo_dprintk("<-do_argo_op(%u)=%ld\n", cmd, rc);
2218 
2219     return rc;
2220 }
2221 
2222 #ifdef CONFIG_COMPAT
2223 int
compat_argo_op(unsigned int cmd,XEN_GUEST_HANDLE_PARAM (void)arg1,XEN_GUEST_HANDLE_PARAM (void)arg2,unsigned long arg3,unsigned long arg4)2224 compat_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1,
2225                XEN_GUEST_HANDLE_PARAM(void) arg2, unsigned long arg3,
2226                unsigned long arg4)
2227 {
2228     struct domain *currd = current->domain;
2229     int rc;
2230     xen_argo_send_addr_t send_addr;
2231     xen_argo_iov_t iovs[XEN_ARGO_MAXIOV];
2232     compat_argo_iov_t compat_iovs[XEN_ARGO_MAXIOV];
2233     unsigned int i, niov;
2234     XEN_GUEST_HANDLE_PARAM(xen_argo_send_addr_t) send_addr_hnd;
2235 
2236     /* check XEN_ARGO_MAXIOV as it sizes stack arrays: iovs, compat_iovs */
2237     BUILD_BUG_ON(XEN_ARGO_MAXIOV > 8);
2238 
2239     /* Forward all ops besides sendv to the native handler. */
2240     if ( cmd != XEN_ARGO_OP_sendv )
2241         return do_argo_op(cmd, arg1, arg2, arg3, arg4);
2242 
2243     if ( unlikely(!opt_argo) )
2244         return -EOPNOTSUPP;
2245 
2246     rc = xsm_argo_enable(currd);
2247     if ( rc )
2248         return rc;
2249 
2250     argo_dprintk("->compat_argo_op(%u,%p,%p,%lu,0x%lx)\n", cmd,
2251                  (void *)arg1.p, (void *)arg2.p, arg3, arg4);
2252 
2253     send_addr_hnd = guest_handle_cast(arg1, xen_argo_send_addr_t);
2254     /* arg2: iovs, arg3: niov, arg4: message_type */
2255 
2256     rc = copy_from_guest(&send_addr, send_addr_hnd, 1) ? -EFAULT : 0;
2257     if ( rc )
2258         goto out;
2259 
2260     if ( unlikely(arg3 > XEN_ARGO_MAXIOV) )
2261     {
2262         rc = -EINVAL;
2263         goto out;
2264     }
2265     niov = array_index_nospec(arg3, XEN_ARGO_MAXIOV + 1);
2266 
2267     rc = copy_from_guest(compat_iovs, arg2, niov) ? -EFAULT : 0;
2268     if ( rc )
2269         goto out;
2270 
2271     for ( i = 0; i < niov; i++ )
2272     {
2273 #define XLAT_argo_iov_HNDL_iov_hnd(_d_, _s_) \
2274     guest_from_compat_handle((_d_)->iov_hnd, (_s_)->iov_hnd)
2275 
2276         XLAT_argo_iov(&iovs[i], &compat_iovs[i]);
2277 
2278 #undef XLAT_argo_iov_HNDL_iov_hnd
2279     }
2280 
2281     rc = sendv(currd, &send_addr.src, &send_addr.dst, iovs, niov, arg4);
2282  out:
2283     argo_dprintk("<-compat_argo_op(%u)=%d\n", cmd, rc);
2284 
2285     return rc;
2286 }
2287 #endif
2288 
2289 static void
argo_domain_init(struct argo_domain * argo)2290 argo_domain_init(struct argo_domain *argo)
2291 {
2292     unsigned int i;
2293 
2294     rwlock_init(&argo->rings_L2_rwlock);
2295     spin_lock_init(&argo->send_L2_lock);
2296     spin_lock_init(&argo->wildcard_L2_lock);
2297 
2298     for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
2299     {
2300         INIT_LIST_HEAD(&argo->ring_hash[i]);
2301         INIT_LIST_HEAD(&argo->send_hash[i]);
2302     }
2303     INIT_LIST_HEAD(&argo->wildcard_pend_list);
2304 }
2305 
2306 int
argo_init(struct domain * d)2307 argo_init(struct domain *d)
2308 {
2309     struct argo_domain *argo;
2310 
2311     if ( !opt_argo || xsm_argo_enable(d) )
2312     {
2313         argo_dprintk("argo disabled, domid: %u\n", d->domain_id);
2314         return 0;
2315     }
2316 
2317     argo_dprintk("init: domid: %u\n", d->domain_id);
2318 
2319     argo = xzalloc(struct argo_domain);
2320     if ( !argo )
2321         return -ENOMEM;
2322 
2323     argo_domain_init(argo);
2324 
2325     write_lock(&L1_global_argo_rwlock);
2326 
2327     d->argo = argo;
2328 
2329     write_unlock(&L1_global_argo_rwlock);
2330 
2331     return 0;
2332 }
2333 
2334 void
argo_destroy(struct domain * d)2335 argo_destroy(struct domain *d)
2336 {
2337     BUG_ON(!d->is_dying);
2338 
2339     write_lock(&L1_global_argo_rwlock);
2340 
2341     argo_dprintk("destroy: domid %u d->argo=%p\n", d->domain_id, d->argo);
2342 
2343     if ( d->argo )
2344     {
2345         domain_rings_remove_all(d);
2346         partner_rings_remove(d);
2347         wildcard_rings_pending_remove(d);
2348         XFREE(d->argo);
2349     }
2350 
2351     write_unlock(&L1_global_argo_rwlock);
2352 }
2353 
2354 void
argo_soft_reset(struct domain * d)2355 argo_soft_reset(struct domain *d)
2356 {
2357     write_lock(&L1_global_argo_rwlock);
2358 
2359     argo_dprintk("soft reset d=%u d->argo=%p\n", d->domain_id, d->argo);
2360 
2361     if ( d->argo )
2362     {
2363         domain_rings_remove_all(d);
2364         partner_rings_remove(d);
2365         wildcard_rings_pending_remove(d);
2366 
2367         /*
2368          * Since neither opt_argo or xsm_argo_enable(d) can change at runtime,
2369          * if d->argo is true then both opt_argo and xsm_argo_enable(d) must be
2370          * true, and we can assume that init is allowed to proceed again here.
2371          */
2372         argo_domain_init(d->argo);
2373     }
2374 
2375     write_unlock(&L1_global_argo_rwlock);
2376 }
2377