1 /******************************************************************************
2  * Argo : Hypervisor-Mediated data eXchange
3  *
4  * Derived from v4v, the version 2 of v2v.
5  *
6  * Copyright (c) 2010, Citrix Systems
7  * Copyright (c) 2018-2019 BAE Systems
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <xen/argo.h>
19 #include <xen/domain.h>
20 #include <xen/domain_page.h>
21 #include <xen/errno.h>
22 #include <xen/event.h>
23 #include <xen/guest_access.h>
24 #include <xen/lib.h>
25 #include <xen/nospec.h>
26 #include <xen/param.h>
27 #include <xen/sched.h>
28 #include <xen/time.h>
29 
30 #include <xsm/xsm.h>
31 
32 #include <asm/p2m.h>
33 
34 #include <public/argo.h>
35 
36 #ifdef CONFIG_COMPAT
37 #include <compat/argo.h>
38 CHECK_argo_addr;
39 #undef CHECK_argo_addr
40 #define CHECK_argo_addr struct xen_argo_addr
41 CHECK_argo_register_ring;
42 CHECK_argo_ring;
43 CHECK_argo_ring_data_ent;
44 #undef CHECK_argo_ring_data_ent
45 #define CHECK_argo_ring_data_ent struct xen_argo_ring_data_ent
46 CHECK_argo_ring_data;
47 CHECK_argo_ring_message_header;
48 CHECK_argo_unregister_ring;
49 CHECK_argo_send_addr;
50 #endif
51 
52 #define MAX_RINGS_PER_DOMAIN            128U
53 #define MAX_NOTIFY_COUNT                256U
54 #define MAX_PENDING_PER_RING             32U
55 
56 /* All messages on the ring are padded to a multiple of the slot size. */
57 #define ROUNDUP_MESSAGE(a) ROUNDUP((a), XEN_ARGO_MSG_SLOT_SIZE)
58 
59 /* The maximum size of a message that may be sent on the largest Argo ring. */
60 #define MAX_ARGO_MESSAGE_SIZE ((XEN_ARGO_MAX_RING_SIZE) - \
61         (sizeof(struct xen_argo_ring_message_header)) - ROUNDUP_MESSAGE(1))
62 
63 /* Number of PAGEs needed to hold a ring of a given size in bytes */
64 #define NPAGES_RING(ring_len) \
65     (ROUNDUP((ROUNDUP_MESSAGE(ring_len) + sizeof(xen_argo_ring_t)), PAGE_SIZE) \
66      >> PAGE_SHIFT)
67 
68 DEFINE_XEN_GUEST_HANDLE(xen_argo_addr_t);
69 DEFINE_XEN_GUEST_HANDLE(xen_argo_gfn_t);
70 DEFINE_XEN_GUEST_HANDLE(xen_argo_iov_t);
71 DEFINE_XEN_GUEST_HANDLE(xen_argo_register_ring_t);
72 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_t);
73 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_t);
74 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t);
75 DEFINE_XEN_GUEST_HANDLE(xen_argo_send_addr_t);
76 DEFINE_XEN_GUEST_HANDLE(xen_argo_unregister_ring_t);
77 #ifdef CONFIG_COMPAT
78 DEFINE_COMPAT_HANDLE(compat_argo_iov_t);
79 #endif
80 
81 static bool __read_mostly opt_argo;
82 static bool __read_mostly opt_argo_mac_permissive;
83 
parse_argo(const char * s)84 static int __init cf_check parse_argo(const char *s)
85 {
86     const char *ss;
87     int val, rc = 0;
88 
89     do {
90         ss = strchr(s, ',');
91         if ( !ss )
92             ss = strchr(s, '\0');
93 
94         if ( (val = parse_bool(s, ss)) >= 0 )
95             opt_argo = val;
96         else if ( (val = parse_boolean("mac-permissive", s, ss)) >= 0 )
97             opt_argo_mac_permissive = val;
98         else
99             rc = -EINVAL;
100 
101         s = ss + 1;
102     } while ( *ss );
103 
104     return rc;
105 }
106 custom_param("argo", parse_argo);
107 
108 typedef struct argo_ring_id
109 {
110     xen_argo_port_t aport;
111     domid_t partner_id;
112     domid_t domain_id;
113 } argo_ring_id;
114 
115 /* Data about a domain's own ring that it has registered */
116 struct argo_ring_info
117 {
118     /* next node in the hash, protected by rings_L2 */
119     struct list_head node;
120     /* this ring's id, protected by rings_L2 */
121     struct argo_ring_id id;
122     /* L3, the ring_info lock: protects the members of this struct below */
123     spinlock_t L3_lock;
124     /* length of the ring, protected by L3 */
125     unsigned int len;
126     /* number of pages translated into mfns, protected by L3 */
127     unsigned int nmfns;
128     /* cached tx pointer location, protected by L3 */
129     unsigned int tx_ptr;
130     /* mapped ring pages protected by L3 */
131     void **mfn_mapping;
132     /* list of mfns of guest ring, protected by L3 */
133     mfn_t *mfns;
134     /* list of struct pending_ent for this ring, protected by L3 */
135     struct list_head pending;
136     /* number of pending entries queued for this ring, protected by L3 */
137     unsigned int npending;
138 };
139 
140 /* Data about a single-sender ring, held by the sender (partner) domain */
141 struct argo_send_info
142 {
143     /* next node in the hash, protected by send_L2 */
144     struct list_head node;
145     /* this ring's id, protected by send_L2 */
146     struct argo_ring_id id;
147 };
148 
149 /* A space-available notification that is awaiting sufficient space */
150 struct pending_ent
151 {
152     /* List node within argo_ring_info's pending list */
153     struct list_head node;
154     /*
155      * List node within argo_domain's wildcard_pend_list. Only used if the
156      * ring is one with a wildcard partner (ie. that any domain may send to)
157      * to enable cancelling signals on wildcard rings on domain destroy.
158      */
159     struct list_head wildcard_node;
160     /*
161      * Pointer to the ring_info that this ent pertains to. Used to ensure that
162      * ring_info->npending is decremented when ents for wildcard rings are
163      * cancelled for domain destroy.
164      * Caution: Must hold the correct locks before accessing ring_info via this.
165      */
166     struct argo_ring_info *ring_info;
167     /* minimum ring space available that this signal is waiting upon */
168     unsigned int len;
169     /* domain to be notified when space is available */
170     domid_t domain_id;
171 };
172 
173 /*
174  * The value of the argo element in a struct domain is
175  * protected by L1_global_argo_rwlock
176  */
177 #define ARGO_HASHTABLE_SIZE 32
178 struct argo_domain
179 {
180     /* rings_L2 */
181     rwlock_t rings_L2_rwlock;
182     /*
183      * Hash table of argo_ring_info about rings this domain has registered.
184      * Protected by rings_L2.
185      */
186     struct list_head ring_hash[ARGO_HASHTABLE_SIZE];
187     /* Counter of rings registered by this domain. Protected by rings_L2. */
188     unsigned int ring_count;
189 
190     /* send_L2 */
191     spinlock_t send_L2_lock;
192     /*
193      * Hash table of argo_send_info about rings other domains have registered
194      * for this domain to send to. Single partner, non-wildcard rings.
195      * Protected by send_L2.
196      */
197     struct list_head send_hash[ARGO_HASHTABLE_SIZE];
198 
199     /* wildcard_L2 */
200     spinlock_t wildcard_L2_lock;
201     /*
202      * List of pending space-available signals for this domain about wildcard
203      * rings registered by other domains. Protected by wildcard_L2.
204      */
205     struct list_head wildcard_pend_list;
206 };
207 
208 /*
209  * Locking is organized as follows:
210  *
211  * Terminology: R(<lock>) means taking a read lock on the specified lock;
212  *              W(<lock>) means taking a write lock on it.
213  *
214  * == L1 : The global read/write lock: L1_global_argo_rwlock
215  * Protects the argo elements of all struct domain *d in the system.
216  *
217  * R(L1) does not protect any of the elements of d->argo; it protects their
218  * addresses. W(L1) protects those and more since it implies W on all the lower
219  * level locks - see the notes on those locks below.
220  *
221  * The destruction of an argo-enabled domain, which must have a non-NULL d->argo
222  * pointer, will need to free that d->argo pointer, which requires W(L1).
223  * Since holding R(L1) will block acquiring W(L1), it will ensure that
224  * no domains pointers that argo is interested in become invalid while either
225  * W(L1) or R(L1) are held.
226  */
227 
228 static DEFINE_RWLOCK(L1_global_argo_rwlock); /* L1 */
229 
230 /*
231  * == rings_L2 : The per-domain ring hash lock: d->argo->rings_L2_rwlock
232  *
233  * Holding a read lock on rings_L2 protects the ring hash table and
234  * the elements in the hash_table d->argo->ring_hash, and
235  * the node and id fields in struct argo_ring_info in the
236  * hash table.
237  * Holding a write lock on rings_L2 protects all of the elements of all the
238  * struct argo_ring_info belonging to this domain.
239  *
240  * To take rings_L2 you must already have R(L1). W(L1) implies W(rings_L2) and
241  * L3.
242  *
243  * == L3 : The individual ring_info lock: ring_info->L3_lock
244  *
245  * Protects all the fields within the argo_ring_info, aside from the ones that
246  * rings_L2 already protects: node, id, lock.
247  *
248  * To acquire L3 you must already have R(rings_L2). W(rings_L2) implies L3.
249  *
250  * == send_L2 : The per-domain single-sender partner rings lock:
251  *              d->argo->send_L2_lock
252  *
253  * Protects the per-domain send hash table : d->argo->send_hash
254  * and the elements in the hash table, and the node and id fields
255  * in struct argo_send_info in the hash table.
256  *
257  * To take send_L2, you must already have R(L1). W(L1) implies send_L2.
258  * Do not attempt to acquire a rings_L2 on any domain after taking and while
259  * holding a send_L2 lock -- acquire the rings_L2 (if one is needed) beforehand.
260  *
261  * == wildcard_L2 : The per-domain wildcard pending list lock:
262  *                  d->argo->wildcard_L2_lock
263  *
264  * Protects the per-domain list of outstanding signals for space availability
265  * on wildcard rings.
266  *
267  * To take wildcard_L2, you must already have R(L1). W(L1) implies wildcard_L2.
268  * No other locks are acquired after obtaining wildcard_L2.
269  */
270 
271 /*
272  * Lock state validations macros
273  *
274  * These macros encode the logic to verify that the locking has adhered to the
275  * locking discipline above.
276  * eg. On entry to logic that requires holding at least R(rings_L2), this:
277  *      ASSERT(LOCKING_Read_rings_L2(d));
278  *
279  * checks that the lock state is sufficient, validating that one of the
280  * following must be true when executed:       R(rings_L2) && R(L1)
281  *                                        or:  W(rings_L2) && R(L1)
282  *                                        or:  W(L1)
283  *
284  * The LOCKING macros defined below here are for use at verification points.
285  */
286 #define LOCKING_Write_L1 (rw_is_write_locked(&L1_global_argo_rwlock))
287 /*
288  * While LOCKING_Read_L1 will return true even if the lock is write-locked,
289  * that's OK because everywhere that a Read lock is needed with these macros,
290  * holding a Write lock there instead is OK too: we're checking that _at least_
291  * the specified level of locks are held.
292  */
293 #define LOCKING_Read_L1 (rw_is_locked(&L1_global_argo_rwlock))
294 
295 #define LOCKING_Write_rings_L2(d) \
296     ((LOCKING_Read_L1 && rw_is_write_locked(&(d)->argo->rings_L2_rwlock)) || \
297      LOCKING_Write_L1)
298 /*
299  * Skip checking LOCKING_Write_rings_L2(d) within this LOCKING_Read_rings_L2
300  * definition because the first clause that is testing R(L1) && R(L2) will also
301  * return true if R(L1) && W(L2) is true, because of the way that rw_is_locked
302  * behaves. This results in a slightly shorter and faster implementation.
303  */
304 #define LOCKING_Read_rings_L2(d) \
305     ((LOCKING_Read_L1 && rw_is_locked(&(d)->argo->rings_L2_rwlock)) || \
306      LOCKING_Write_L1)
307 /*
308  * Skip checking LOCKING_Write_L1 within this LOCKING_L3 definition because
309  * LOCKING_Write_rings_L2(d) will return true for that condition.
310  */
311 #define LOCKING_L3(d, r) \
312     ((LOCKING_Read_L1 && rw_is_locked(&(d)->argo->rings_L2_rwlock) \
313       && spin_is_locked(&(r)->L3_lock)) || LOCKING_Write_rings_L2(d))
314 
315 #define LOCKING_send_L2(d) \
316     ((LOCKING_Read_L1 && spin_is_locked(&(d)->argo->send_L2_lock)) || \
317      LOCKING_Write_L1)
318 
319 #define ARGO_DEBUG 0
320 #define argo_dprintk(fmt, args...)                      \
321     do {                                                \
322         if ( ARGO_DEBUG )                               \
323             printk(XENLOG_DEBUG "argo: " fmt, ##args);  \
324     } while ( 0 )
325 
326 /*
327  * This hash function is used to distribute rings within the per-domain
328  * hash tables (d->argo->ring_hash and d->argo_send_hash). The hash table
329  * will provide a struct if a match is found with a 'argo_ring_id' key:
330  * ie. the key is a (domain id, argo port, partner domain id) tuple.
331  * The algorithm approximates the string hashing function 'djb2'.
332  */
333 static unsigned int
hash_index(const struct argo_ring_id * id)334 hash_index(const struct argo_ring_id *id)
335 {
336     unsigned int hash = 5381; /* prime constant from djb2 */
337 
338     /* For each input: hash = hash * 33 + <new input character value> */
339     hash = ((hash << 5) + hash) +  (id->aport            & 0xff);
340     hash = ((hash << 5) + hash) + ((id->aport      >> 8) & 0xff);
341     hash = ((hash << 5) + hash) + ((id->aport     >> 16) & 0xff);
342     hash = ((hash << 5) + hash) + ((id->aport     >> 24) & 0xff);
343     hash = ((hash << 5) + hash) +  (id->domain_id        & 0xff);
344     hash = ((hash << 5) + hash) + ((id->domain_id  >> 8) & 0xff);
345     hash = ((hash << 5) + hash) +  (id->partner_id       & 0xff);
346     hash = ((hash << 5) + hash) + ((id->partner_id >> 8) & 0xff);
347 
348     /*
349      * Since ARGO_HASHTABLE_SIZE is small, use higher-order bits of the
350      * hash to contribute to the lower-order bits before masking off.
351      */
352     return (hash ^ (hash >> 15)) & (ARGO_HASHTABLE_SIZE - 1);
353 }
354 
355 static struct argo_ring_info *
find_ring_info(const struct domain * d,const struct argo_ring_id * id)356 find_ring_info(const struct domain *d, const struct argo_ring_id *id)
357 {
358     struct argo_ring_info *ring_info;
359     const struct list_head *bucket;
360 
361     ASSERT(LOCKING_Read_rings_L2(d));
362 
363     /* List is not modified here. Search and return the match if found. */
364     bucket = &d->argo->ring_hash[hash_index(id)];
365 
366     list_for_each_entry(ring_info, bucket, node)
367     {
368         const struct argo_ring_id *cmpid = &ring_info->id;
369 
370         if ( cmpid->aport == id->aport &&
371              cmpid->domain_id == id->domain_id &&
372              cmpid->partner_id == id->partner_id )
373         {
374             argo_dprintk("found ring_info for ring(%u:%x %u)\n",
375                          id->domain_id, id->aport, id->partner_id);
376             return ring_info;
377         }
378     }
379     argo_dprintk("no ring_info for ring(%u:%x %u)\n",
380                  id->domain_id, id->aport, id->partner_id);
381 
382     return NULL;
383 }
384 
385 static struct argo_ring_info *
find_ring_info_by_match(const struct domain * d,xen_argo_port_t aport,domid_t partner_id)386 find_ring_info_by_match(const struct domain *d, xen_argo_port_t aport,
387                         domid_t partner_id)
388 {
389     struct argo_ring_id id;
390     struct argo_ring_info *ring_info;
391 
392     ASSERT(LOCKING_Read_rings_L2(d));
393 
394     id.aport = aport;
395     id.domain_id = d->domain_id;
396     id.partner_id = partner_id;
397 
398     ring_info = find_ring_info(d, &id);
399     if ( ring_info )
400         return ring_info;
401 
402     id.partner_id = XEN_ARGO_DOMID_ANY;
403 
404     return find_ring_info(d, &id);
405 }
406 
407 static struct argo_send_info *
find_send_info(const struct domain * d,const struct argo_ring_id * id)408 find_send_info(const struct domain *d, const struct argo_ring_id *id)
409 {
410     struct argo_send_info *send_info;
411     const struct list_head *bucket;
412 
413     ASSERT(LOCKING_send_L2(d));
414 
415     /* List is not modified here. Search and return the match if found. */
416     bucket = &d->argo->send_hash[hash_index(id)];
417 
418     list_for_each_entry(send_info, bucket, node)
419     {
420         const struct argo_ring_id *cmpid = &send_info->id;
421 
422         if ( cmpid->aport == id->aport &&
423              cmpid->domain_id == id->domain_id &&
424              cmpid->partner_id == id->partner_id )
425         {
426             argo_dprintk("found send_info for ring(%u:%x %u)\n",
427                          id->domain_id, id->aport, id->partner_id);
428             return send_info;
429         }
430     }
431     argo_dprintk("no send_info for ring(%u:%x %u)\n",
432                  id->domain_id, id->aport, id->partner_id);
433 
434     return NULL;
435 }
436 
437 static void
signal_domain(struct domain * d)438 signal_domain(struct domain *d)
439 {
440     argo_dprintk("signalling domid:%u\n", d->domain_id);
441 
442     send_guest_global_virq(d, VIRQ_ARGO);
443 }
444 
445 static void
signal_domid(domid_t domain_id)446 signal_domid(domid_t domain_id)
447 {
448     struct domain *d = rcu_lock_domain_by_id(domain_id);
449 
450     if ( !d )
451         return;
452 
453     signal_domain(d);
454     rcu_unlock_domain(d);
455 }
456 
457 static void
ring_unmap(const struct domain * d,struct argo_ring_info * ring_info)458 ring_unmap(const struct domain *d, struct argo_ring_info *ring_info)
459 {
460     unsigned int i;
461 
462     ASSERT(LOCKING_L3(d, ring_info));
463 
464     if ( !ring_info->mfn_mapping )
465         return;
466 
467     ASSERT(!ring_info->nmfns || ring_info->mfns);
468 
469     for ( i = 0; i < ring_info->nmfns; i++ )
470     {
471         if ( !ring_info->mfn_mapping[i] )
472             continue;
473 
474         ASSERT(!mfn_eq(ring_info->mfns[i], INVALID_MFN));
475         argo_dprintk(XENLOG_ERR "argo: unmapping page %"PRI_mfn" from %p\n",
476                      mfn_x(ring_info->mfns[i]), ring_info->mfn_mapping[i]);
477 
478         unmap_domain_page_global(ring_info->mfn_mapping[i]);
479         ring_info->mfn_mapping[i] = NULL;
480     }
481 }
482 
483 static int
ring_map_page(const struct domain * d,struct argo_ring_info * ring_info,unsigned int i,void ** out_ptr)484 ring_map_page(const struct domain *d, struct argo_ring_info *ring_info,
485               unsigned int i, void **out_ptr)
486 {
487     ASSERT(LOCKING_L3(d, ring_info));
488 
489     /*
490      * FIXME: Investigate using vmap to create a single contiguous virtual
491      * address space mapping of the ring instead of using the array of single
492      * page mappings.
493      * Affects logic in memcpy_to_guest_ring, the mfn_mapping array data
494      * structure, and places where ring mappings are added or removed.
495      */
496 
497     if ( i >= ring_info->nmfns )
498     {
499         gprintk(XENLOG_ERR,
500                "argo: ring (vm%u:%x vm%u) %p attempted to map page %u of %u\n",
501                 ring_info->id.domain_id, ring_info->id.aport,
502                 ring_info->id.partner_id, ring_info, i, ring_info->nmfns);
503         return -ENOMEM;
504     }
505     i = array_index_nospec(i, ring_info->nmfns);
506 
507     if ( !ring_info->mfns || !ring_info->mfn_mapping )
508     {
509         ASSERT_UNREACHABLE();
510         ring_info->len = 0;
511         return -ENOMEM;
512     }
513 
514     if ( !ring_info->mfn_mapping[i] )
515     {
516         ring_info->mfn_mapping[i] = map_domain_page_global(ring_info->mfns[i]);
517         if ( !ring_info->mfn_mapping[i] )
518         {
519             gprintk(XENLOG_ERR, "argo: ring (vm%u:%x vm%u) %p attempted to map "
520                     "page %u of %u\n",
521                     ring_info->id.domain_id, ring_info->id.aport,
522                     ring_info->id.partner_id, ring_info, i, ring_info->nmfns);
523             return -ENOMEM;
524         }
525         argo_dprintk("mapping page %"PRI_mfn" to %p\n",
526                      mfn_x(ring_info->mfns[i]), ring_info->mfn_mapping[i]);
527     }
528 
529     if ( out_ptr )
530         *out_ptr = ring_info->mfn_mapping[i];
531 
532     return 0;
533 }
534 
535 static void
update_tx_ptr(const struct domain * d,struct argo_ring_info * ring_info,uint32_t tx_ptr)536 update_tx_ptr(const struct domain *d, struct argo_ring_info *ring_info,
537               uint32_t tx_ptr)
538 {
539     xen_argo_ring_t *ringp;
540 
541     ASSERT(LOCKING_L3(d, ring_info));
542     ASSERT(ring_info->mfn_mapping[0]);
543 
544     ring_info->tx_ptr = tx_ptr;
545     ringp = ring_info->mfn_mapping[0];
546 
547     write_atomic(&ringp->tx_ptr, tx_ptr);
548     smp_wmb();
549 }
550 
551 static int
memcpy_to_guest_ring(const struct domain * d,struct argo_ring_info * ring_info,unsigned int offset,const void * src,XEN_GUEST_HANDLE (uint8)src_hnd,unsigned int len)552 memcpy_to_guest_ring(const struct domain *d, struct argo_ring_info *ring_info,
553                      unsigned int offset,
554                      const void *src, XEN_GUEST_HANDLE(uint8) src_hnd,
555                      unsigned int len)
556 {
557     unsigned int mfns_index = offset >> PAGE_SHIFT;
558     void *dst;
559     int ret;
560     unsigned int src_offset = 0;
561 
562     ASSERT(LOCKING_L3(d, ring_info));
563 
564     offset &= ~PAGE_MASK;
565 
566     if ( len + offset > XEN_ARGO_MAX_RING_SIZE )
567         return -EFAULT;
568 
569     while ( len )
570     {
571         unsigned int head_len = (offset + len) > PAGE_SIZE ? PAGE_SIZE - offset
572                                                            : len;
573 
574         ret = ring_map_page(d, ring_info, mfns_index, &dst);
575         if ( ret )
576             return ret;
577 
578         if ( src )
579         {
580             memcpy(dst + offset, src + src_offset, head_len);
581             src_offset += head_len;
582         }
583         else
584         {
585             if ( copy_from_guest(dst + offset, src_hnd, head_len) )
586                 return -EFAULT;
587 
588             guest_handle_add_offset(src_hnd, head_len);
589         }
590 
591         mfns_index++;
592         len -= head_len;
593         offset = 0;
594     }
595 
596     return 0;
597 }
598 
599 /*
600  * Use this with caution: rx_ptr is under guest control and may be bogus.
601  * See get_sanitized_ring for a safer alternative.
602  */
603 static int
get_rx_ptr(const struct domain * d,struct argo_ring_info * ring_info,uint32_t * rx_ptr)604 get_rx_ptr(const struct domain *d, struct argo_ring_info *ring_info,
605            uint32_t *rx_ptr)
606 {
607     void *src;
608     xen_argo_ring_t *ringp;
609     int ret;
610 
611     ASSERT(LOCKING_L3(d, ring_info));
612 
613     if ( !ring_info->nmfns || ring_info->nmfns < NPAGES_RING(ring_info->len) )
614         return -EINVAL;
615 
616     ret = ring_map_page(d, ring_info, 0, &src);
617     if ( ret )
618         return ret;
619 
620     ringp = (xen_argo_ring_t *)src;
621 
622     *rx_ptr = read_atomic(&ringp->rx_ptr);
623 
624     return 0;
625 }
626 
627 /*
628  * get_sanitized_ring creates a modified copy of the ring pointers where
629  * the rx_ptr is rounded up to ensure it is aligned, and then ring
630  * wrap is handled. Simplifies safe use of the rx_ptr for available
631  * space calculation.
632  */
633 static int
get_sanitized_ring(const struct domain * d,xen_argo_ring_t * ring,struct argo_ring_info * ring_info)634 get_sanitized_ring(const struct domain *d, xen_argo_ring_t *ring,
635                    struct argo_ring_info *ring_info)
636 {
637     uint32_t rx_ptr;
638     int ret;
639 
640     ASSERT(LOCKING_L3(d, ring_info));
641 
642     ret = get_rx_ptr(d, ring_info, &rx_ptr);
643     if ( ret )
644         return ret;
645 
646     ring->tx_ptr = ring_info->tx_ptr;
647 
648     rx_ptr = ROUNDUP_MESSAGE(rx_ptr);
649     if ( rx_ptr >= ring_info->len )
650         rx_ptr = 0;
651 
652     ring->rx_ptr = rx_ptr;
653 
654     return 0;
655 }
656 
657 static unsigned int
ringbuf_payload_space(const struct domain * d,struct argo_ring_info * ring_info)658 ringbuf_payload_space(const struct domain *d, struct argo_ring_info *ring_info)
659 {
660     xen_argo_ring_t ring;
661     unsigned int len;
662     int ret;
663 
664     ASSERT(LOCKING_L3(d, ring_info));
665 
666     len = ring_info->len;
667     if ( !len )
668         return 0;
669 
670     if ( get_sanitized_ring(d, &ring, ring_info) )
671         return 0;
672 
673     argo_dprintk("sanitized ringbuf_payload_space: tx_ptr=%u rx_ptr=%u\n",
674                  ring.tx_ptr, ring.rx_ptr);
675 
676     /*
677      * rx_ptr == tx_ptr means that the ring has been emptied.
678      * See message size checking logic in the entry to ringbuf_insert which
679      * ensures that there is always one message slot of size ROUNDUP_MESSAGE(1)
680      * left available, preventing a ring from being entirely filled.
681      * This ensures that matching ring indexes always indicate an empty ring
682      * and never a full one.
683      */
684     ret = ring.rx_ptr - ring.tx_ptr;
685     if ( ret <= 0 )
686         ret += len;
687 
688     /*
689      * In a sanitized ring, we can rely on:
690      *              (rx_ptr < ring_info->len)           &&
691      *              (tx_ptr < ring_info->len)           &&
692      *      (ring_info->len <= XEN_ARGO_MAX_RING_SIZE)
693      *
694      * and since: XEN_ARGO_MAX_RING_SIZE < INT32_MAX
695      * therefore right here: ret < INT32_MAX
696      * and we are safe to return it as a unsigned value from this function.
697      * The subtractions below cannot increase its value.
698      */
699 
700     /*
701      * The maximum size payload for a message that will be accepted is:
702      * (the available space between the ring indexes)
703      *    minus (space for a message header)
704      *    minus (space for one message slot)
705      * since ringbuf_insert requires that one message slot be left
706      * unfilled, to avoid filling the ring to capacity and confusing a full
707      * ring with an empty one.
708      * Since the ring indexes are sanitized, the value in ret is aligned, so
709      * the simple subtraction here works to return the aligned value needed:
710      */
711     ret -= sizeof(struct xen_argo_ring_message_header);
712     ret -= ROUNDUP_MESSAGE(1);
713 
714     return (ret < 0) ? 0 : ret;
715 }
716 
717 /*
718  * iov_count returns its count on success via an out variable to avoid
719  * potential for a negative return value to be used incorrectly
720  * (eg. coerced into an unsigned variable resulting in a large incorrect value)
721  */
722 static int
iov_count(const xen_argo_iov_t * piov,unsigned int niov,unsigned int * count)723 iov_count(const xen_argo_iov_t *piov, unsigned int niov,
724           unsigned int *count)
725 {
726     unsigned int sum_iov_lens = 0;
727 
728     if ( niov > XEN_ARGO_MAXIOV )
729         return -EINVAL;
730 
731     for ( ; niov--; piov++ )
732     {
733         /* valid iovs must have the padding field set to zero */
734         if ( piov->pad )
735         {
736             argo_dprintk("invalid iov: padding is not zero\n");
737             return -EINVAL;
738         }
739 
740         /* check each to protect sum against integer overflow */
741         if ( piov->iov_len > MAX_ARGO_MESSAGE_SIZE )
742         {
743             argo_dprintk("invalid iov_len: too big (%u)>%llu\n",
744                          piov->iov_len, MAX_ARGO_MESSAGE_SIZE);
745             return -EINVAL;
746         }
747 
748         sum_iov_lens += piov->iov_len;
749 
750         /*
751          * Again protect sum from integer overflow
752          * and ensure total msg size will be within bounds.
753          */
754         if ( sum_iov_lens > MAX_ARGO_MESSAGE_SIZE )
755         {
756             argo_dprintk("invalid iov series: total message too big\n");
757             return -EMSGSIZE;
758         }
759     }
760 
761     *count = sum_iov_lens;
762 
763     return 0;
764 }
765 
766 static int
ringbuf_insert(const struct domain * d,struct argo_ring_info * ring_info,const struct argo_ring_id * src_id,xen_argo_iov_t * iovs,unsigned int niov,uint32_t message_type,unsigned int len)767 ringbuf_insert(const struct domain *d, struct argo_ring_info *ring_info,
768                const struct argo_ring_id *src_id, xen_argo_iov_t *iovs,
769                unsigned int niov, uint32_t message_type, unsigned int len)
770 {
771     xen_argo_ring_t ring;
772     struct xen_argo_ring_message_header mh = { };
773     int sp, ret;
774     xen_argo_iov_t *piov;
775     XEN_GUEST_HANDLE(uint8) NULL_hnd = { };
776 
777     ASSERT(LOCKING_L3(d, ring_info));
778 
779     /*
780      * Enforced below: no more than 'len' bytes of guest data
781      * (plus the message header) will be sent in this operation.
782      */
783 
784     /*
785      * Upper bound check the message len against the ring size.
786      * The message must not fill the ring; there must be at least one slot
787      * remaining so we can distinguish a full ring from an empty one.
788      * iov_count has already verified: len <= MAX_ARGO_MESSAGE_SIZE.
789      */
790     if ( ring_info->len <= (sizeof(struct xen_argo_ring_message_header) +
791                             ROUNDUP_MESSAGE(len)) )
792         return -EMSGSIZE;
793 
794     ret = get_sanitized_ring(d, &ring, ring_info);
795     if ( ret )
796         return ret;
797 
798     argo_dprintk("ring.tx_ptr=%u ring.rx_ptr=%u ring len=%u"
799                  " ring_info->tx_ptr=%u\n",
800                  ring.tx_ptr, ring.rx_ptr, ring_info->len, ring_info->tx_ptr);
801 
802     if ( ring.rx_ptr == ring.tx_ptr )
803         sp = ring_info->len;
804     else
805     {
806         sp = ring.rx_ptr - ring.tx_ptr;
807         if ( sp < 0 )
808             sp += ring_info->len;
809     }
810 
811     /*
812      * Size bounds check against currently available space in the ring.
813      * Again: the message must not fill the ring leaving no space remaining.
814      */
815     if ( (ROUNDUP_MESSAGE(len) +
816             sizeof(struct xen_argo_ring_message_header)) >= sp )
817     {
818         argo_dprintk("EAGAIN\n");
819         return -EAGAIN;
820     }
821 
822     mh.len = len + sizeof(struct xen_argo_ring_message_header);
823     mh.source.aport = src_id->aport;
824     mh.source.domain_id = src_id->domain_id;
825     mh.message_type = message_type;
826 
827     /*
828      * For this copy to the guest ring, tx_ptr is always 16-byte aligned
829      * and the message header is 16 bytes long.
830      */
831     BUILD_BUG_ON(
832         sizeof(struct xen_argo_ring_message_header) != ROUNDUP_MESSAGE(1));
833 
834     /*
835      * First data write into the destination ring: fixed size, message header.
836      * This cannot overrun because the available free space (value in 'sp')
837      * is checked above and must be at least this size.
838      */
839     ret = memcpy_to_guest_ring(d, ring_info,
840                                ring.tx_ptr + sizeof(xen_argo_ring_t),
841                                &mh, NULL_hnd, sizeof(mh));
842     if ( ret )
843     {
844         gprintk(XENLOG_ERR,
845                 "argo: failed to write message header to ring (vm%u:%x vm%u)\n",
846                 ring_info->id.domain_id, ring_info->id.aport,
847                 ring_info->id.partner_id);
848 
849         return ret;
850     }
851 
852     ring.tx_ptr += sizeof(mh);
853     if ( ring.tx_ptr == ring_info->len )
854         ring.tx_ptr = 0;
855 
856     for ( piov = iovs; niov--; piov++ )
857     {
858         XEN_GUEST_HANDLE(uint8) buf_hnd = piov->iov_hnd;
859         unsigned int iov_len = piov->iov_len;
860 
861         /* If no data is provided in this iov, moan and skip on to the next */
862         if ( !iov_len )
863         {
864             gprintk(XENLOG_WARNING,
865                     "argo: no data iov_len=0 iov_hnd=%p ring (vm%u:%x vm%u)\n",
866                     buf_hnd.p, ring_info->id.domain_id, ring_info->id.aport,
867                     ring_info->id.partner_id);
868 
869             continue;
870         }
871 
872         if ( unlikely(!guest_handle_okay(buf_hnd, iov_len)) )
873         {
874             gprintk(XENLOG_ERR,
875                     "argo: bad iov handle [%p, %u] (vm%u:%x vm%u)\n",
876                     buf_hnd.p, iov_len,
877                     ring_info->id.domain_id, ring_info->id.aport,
878                     ring_info->id.partner_id);
879 
880             return -EFAULT;
881         }
882 
883         sp = ring_info->len - ring.tx_ptr;
884 
885         /* Check: iov data size versus free space at the tail of the ring */
886         if ( iov_len > sp )
887         {
888             /*
889              * Second possible data write: ring-tail-wrap-write.
890              * Populate the ring tail and update the internal tx_ptr to handle
891              * wrapping at the end of ring.
892              * Size of data written here: sp
893              * which is the exact full amount of free space available at the
894              * tail of the ring, so this cannot overrun.
895              */
896             ret = memcpy_to_guest_ring(d, ring_info,
897                                        ring.tx_ptr + sizeof(xen_argo_ring_t),
898                                        NULL, buf_hnd, sp);
899             if ( ret )
900             {
901                 gprintk(XENLOG_ERR,
902                         "argo: failed to copy {%p, %d} (vm%u:%x vm%u)\n",
903                         buf_hnd.p, sp,
904                         ring_info->id.domain_id, ring_info->id.aport,
905                         ring_info->id.partner_id);
906 
907                 return ret;
908             }
909 
910             ring.tx_ptr = 0;
911             iov_len -= sp;
912             guest_handle_add_offset(buf_hnd, sp);
913 
914             ASSERT(iov_len <= ring_info->len);
915         }
916 
917         /*
918          * Third possible data write: all data remaining for this iov.
919          * Size of data written here: iov_len
920          *
921          * Case 1: if the ring-tail-wrap-write above was performed, then
922          *         iov_len has been decreased by 'sp' and ring.tx_ptr is zero.
923          *
924          *    We know from checking the result of iov_count:
925          *      len + sizeof(message_header) <= ring_info->len
926          *    We also know that len is the total of summing all iov_lens, so:
927          *       iov_len <= len
928          *    so by transitivity:
929          *       iov_len <= len <= (ring_info->len - sizeof(msgheader))
930          *    and therefore:
931          *       (iov_len + sizeof(msgheader) <= ring_info->len) &&
932          *       (ring.tx_ptr == 0)
933          *    so this write cannot overrun here.
934          *
935          * Case 2: ring-tail-wrap-write above was not performed
936          *    -> so iov_len is the guest-supplied value and: (iov_len <= sp)
937          *    ie. less than available space at the tail of the ring:
938          *        so this write cannot overrun.
939          */
940         ret = memcpy_to_guest_ring(d, ring_info,
941                                    ring.tx_ptr + sizeof(xen_argo_ring_t),
942                                    NULL, buf_hnd, iov_len);
943         if ( ret )
944         {
945             gprintk(XENLOG_ERR,
946                     "argo: failed to copy [%p, %u] (vm%u:%x vm%u)\n",
947                     buf_hnd.p, iov_len, ring_info->id.domain_id,
948                     ring_info->id.aport, ring_info->id.partner_id);
949 
950             return ret;
951         }
952 
953         ring.tx_ptr += iov_len;
954 
955         if ( ring.tx_ptr == ring_info->len )
956             ring.tx_ptr = 0;
957     }
958 
959     /*
960      * Finished writing data from all iovs into the ring: now need to round up
961      * tx_ptr to align to the next message boundary, and then wrap if necessary.
962      */
963     ring.tx_ptr = ROUNDUP_MESSAGE(ring.tx_ptr);
964 
965     if ( ring.tx_ptr >= ring_info->len )
966         ring.tx_ptr -= ring_info->len;
967 
968     update_tx_ptr(d, ring_info, ring.tx_ptr);
969 
970     /*
971      * At this point (and also on an error exit paths from this function) it is
972      * possible to unmap the ring_info, ie:
973      *   ring_unmap(d, ring_info);
974      * but performance should be improved by not doing so, and retaining
975      * the mapping.
976      * An XSM policy control over level of confidentiality required
977      * versus performance cost could be added to decide that here.
978      */
979 
980     return ret;
981 }
982 
983 static void
wildcard_pending_list_remove(domid_t domain_id,struct pending_ent * ent)984 wildcard_pending_list_remove(domid_t domain_id, struct pending_ent *ent)
985 {
986     struct domain *d = rcu_lock_domain_by_id(domain_id);
987 
988     if ( !d )
989         return;
990 
991     ASSERT(LOCKING_Read_L1);
992 
993     if ( d->argo )
994     {
995         spin_lock(&d->argo->wildcard_L2_lock);
996         list_del(&ent->wildcard_node);
997         spin_unlock(&d->argo->wildcard_L2_lock);
998     }
999     rcu_unlock_domain(d);
1000 }
1001 
1002 static void
wildcard_pending_list_insert(domid_t domain_id,struct pending_ent * ent)1003 wildcard_pending_list_insert(domid_t domain_id, struct pending_ent *ent)
1004 {
1005     struct domain *d = rcu_lock_domain_by_id(domain_id);
1006 
1007     if ( !d )
1008         return;
1009 
1010     ASSERT(LOCKING_Read_L1);
1011 
1012     if ( d->argo )
1013     {
1014         spin_lock(&d->argo->wildcard_L2_lock);
1015         list_add(&ent->wildcard_node, &d->argo->wildcard_pend_list);
1016         spin_unlock(&d->argo->wildcard_L2_lock);
1017     }
1018     rcu_unlock_domain(d);
1019 }
1020 
1021 static void
pending_remove_all(const struct domain * d,struct argo_ring_info * ring_info)1022 pending_remove_all(const struct domain *d, struct argo_ring_info *ring_info)
1023 {
1024     struct pending_ent *ent;
1025 
1026     ASSERT(LOCKING_L3(d, ring_info));
1027 
1028     /* Delete all pending notifications from this ring's list. */
1029     while ( (ent = list_first_entry_or_null(&ring_info->pending,
1030                                             struct pending_ent, node)) )
1031     {
1032         /* For wildcard rings, remove each from their wildcard list too. */
1033         if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1034             wildcard_pending_list_remove(ent->domain_id, ent);
1035         list_del(&ent->node);
1036         xfree(ent);
1037     }
1038     ring_info->npending = 0;
1039 }
1040 
1041 static void
pending_notify(struct list_head * to_notify)1042 pending_notify(struct list_head *to_notify)
1043 {
1044     struct pending_ent *ent;
1045 
1046     ASSERT(LOCKING_Read_L1);
1047 
1048     /* Sending signals for all ents in this list, draining until it is empty. */
1049     while ( (ent = list_first_entry_or_null(to_notify, struct pending_ent,
1050                                             node)) )
1051     {
1052         list_del(&ent->node);
1053         signal_domid(ent->domain_id);
1054         xfree(ent);
1055     }
1056 }
1057 
1058 static void
pending_find(const struct domain * d,struct argo_ring_info * ring_info,unsigned int payload_space,struct list_head * to_notify)1059 pending_find(const struct domain *d, struct argo_ring_info *ring_info,
1060              unsigned int payload_space, struct list_head *to_notify)
1061 {
1062     struct pending_ent *ent, *next;
1063 
1064     ASSERT(LOCKING_Read_rings_L2(d));
1065 
1066     /*
1067      * TODO: Current policy here is to signal _all_ of the waiting domains
1068      *       interested in sending a message of size less than payload_space.
1069      *
1070      * This is likely to be suboptimal, since once one of them has added
1071      * their message to the ring, there may well be insufficient room
1072      * available for any of the others to transmit, meaning that they were
1073      * woken in vain, which created extra work just to requeue their wait.
1074      *
1075      * Retain this simple policy for now since it at least avoids starving a
1076      * domain of available space notifications because of a policy that only
1077      * notified other domains instead. Improvement may be possible;
1078      * investigation required.
1079      */
1080     spin_lock(&ring_info->L3_lock);
1081 
1082     /* Remove matching ents from the ring list, and add them to "to_notify" */
1083     list_for_each_entry_safe(ent, next, &ring_info->pending, node)
1084     {
1085         if ( payload_space >= ent->len )
1086         {
1087             if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1088                 wildcard_pending_list_remove(ent->domain_id, ent);
1089 
1090             list_del(&ent->node);
1091             ring_info->npending--;
1092             list_add(&ent->node, to_notify);
1093         }
1094     }
1095 
1096     spin_unlock(&ring_info->L3_lock);
1097 }
1098 
1099 static int
pending_queue(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id,unsigned int len)1100 pending_queue(const struct domain *d, struct argo_ring_info *ring_info,
1101               domid_t src_id, unsigned int len)
1102 {
1103     struct pending_ent *ent;
1104 
1105     ASSERT(LOCKING_L3(d, ring_info));
1106 
1107     if ( ring_info->npending >= MAX_PENDING_PER_RING )
1108         return -EBUSY;
1109 
1110     ent = xmalloc(struct pending_ent);
1111     if ( !ent )
1112         return -ENOMEM;
1113 
1114     ent->len = len;
1115     ent->domain_id = src_id;
1116     ent->ring_info = ring_info;
1117 
1118     if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1119         wildcard_pending_list_insert(src_id, ent);
1120     list_add(&ent->node, &ring_info->pending);
1121     ring_info->npending++;
1122 
1123     return 0;
1124 }
1125 
1126 static int
pending_requeue(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id,unsigned int len)1127 pending_requeue(const struct domain *d, struct argo_ring_info *ring_info,
1128                 domid_t src_id, unsigned int len)
1129 {
1130     struct pending_ent *ent;
1131 
1132     ASSERT(LOCKING_L3(d, ring_info));
1133 
1134     /* List structure is not modified here. Update len in a match if found. */
1135     list_for_each_entry(ent, &ring_info->pending, node)
1136     {
1137         if ( ent->domain_id == src_id )
1138         {
1139             /*
1140              * Reuse an existing queue entry for a notification rather than add
1141              * another. If the existing entry is waiting for a smaller size than
1142              * the current message then adjust the record to wait for the
1143              * current (larger) size to be available before triggering a
1144              * notification.
1145              * This assists the waiting sender by ensuring that whenever a
1146              * notification is triggered, there is sufficient space available
1147              * for (at least) any one of the messages awaiting transmission.
1148              */
1149             if ( ent->len < len )
1150                 ent->len = len;
1151 
1152             return 0;
1153         }
1154     }
1155 
1156     return pending_queue(d, ring_info, src_id, len);
1157 }
1158 
1159 static void
pending_cancel(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id)1160 pending_cancel(const struct domain *d, struct argo_ring_info *ring_info,
1161                domid_t src_id)
1162 {
1163     struct pending_ent *ent, *next;
1164 
1165     ASSERT(LOCKING_L3(d, ring_info));
1166 
1167     /* Remove all ents where domain_id matches src_id from the ring's list. */
1168     list_for_each_entry_safe(ent, next, &ring_info->pending, node)
1169     {
1170         if ( ent->domain_id == src_id )
1171         {
1172             /* For wildcard rings, remove each from their wildcard list too. */
1173             if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1174                 wildcard_pending_list_remove(ent->domain_id, ent);
1175             list_del(&ent->node);
1176             xfree(ent);
1177             ring_info->npending--;
1178         }
1179     }
1180 }
1181 
1182 static void
wildcard_rings_pending_remove(struct domain * d)1183 wildcard_rings_pending_remove(struct domain *d)
1184 {
1185     struct pending_ent *ent;
1186 
1187     ASSERT(LOCKING_Write_L1);
1188 
1189     /* Delete all pending signals to the domain about wildcard rings. */
1190     while ( (ent = list_first_entry_or_null(&d->argo->wildcard_pend_list,
1191                                             struct pending_ent, node)) )
1192     {
1193         /*
1194          * The ent->node deleted here, and the npending value decreased,
1195          * belong to the ring_info of another domain, which is why this
1196          * function requires holding W(L1):
1197          * it implies the L3 lock that protects that ring_info struct.
1198          */
1199         ent->ring_info->npending--;
1200         list_del(&ent->node);
1201         list_del(&ent->wildcard_node);
1202         xfree(ent);
1203     }
1204 }
1205 
1206 static void
ring_remove_mfns(const struct domain * d,struct argo_ring_info * ring_info)1207 ring_remove_mfns(const struct domain *d, struct argo_ring_info *ring_info)
1208 {
1209     unsigned int i;
1210 
1211     ASSERT(LOCKING_Write_rings_L2(d));
1212 
1213     if ( !ring_info->mfns )
1214         return;
1215 
1216     if ( !ring_info->mfn_mapping )
1217     {
1218         ASSERT_UNREACHABLE();
1219         return;
1220     }
1221 
1222     ring_unmap(d, ring_info);
1223 
1224     for ( i = 0; i < ring_info->nmfns; i++ )
1225         if ( !mfn_eq(ring_info->mfns[i], INVALID_MFN) )
1226             put_page_and_type(mfn_to_page(ring_info->mfns[i]));
1227 
1228     ring_info->nmfns = 0;
1229     XFREE(ring_info->mfns);
1230     XFREE(ring_info->mfn_mapping);
1231 }
1232 
1233 static void
ring_remove_info(const struct domain * d,struct argo_ring_info * ring_info)1234 ring_remove_info(const struct domain *d, struct argo_ring_info *ring_info)
1235 {
1236     ASSERT(LOCKING_Write_rings_L2(d));
1237 
1238     pending_remove_all(d, ring_info);
1239     list_del(&ring_info->node);
1240     ring_remove_mfns(d, ring_info);
1241     xfree(ring_info);
1242 }
1243 
1244 static void
domain_rings_remove_all(struct domain * d)1245 domain_rings_remove_all(struct domain *d)
1246 {
1247     unsigned int i;
1248 
1249     ASSERT(LOCKING_Write_rings_L2(d));
1250 
1251     for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
1252     {
1253         struct argo_ring_info *ring_info;
1254         struct list_head *bucket = &d->argo->ring_hash[i];
1255 
1256         while ( (ring_info = list_first_entry_or_null(bucket,
1257                                                       struct argo_ring_info,
1258                                                       node)) )
1259             ring_remove_info(d, ring_info);
1260     }
1261     d->argo->ring_count = 0;
1262 }
1263 
1264 /*
1265  * Tear down all rings of other domains where src_d domain is the partner.
1266  * (ie. it is the single domain that can send to those rings.)
1267  * This will also cancel any pending notifications about those rings.
1268  */
1269 static void
partner_rings_remove(struct domain * src_d)1270 partner_rings_remove(struct domain *src_d)
1271 {
1272     unsigned int i;
1273 
1274     ASSERT(LOCKING_Write_L1);
1275 
1276     for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
1277     {
1278         struct argo_send_info *send_info;
1279         struct list_head *bucket = &src_d->argo->send_hash[i];
1280 
1281         /* Remove all ents from the send list. Take each off their ring list. */
1282         while ( (send_info = list_first_entry_or_null(bucket,
1283                                                       struct argo_send_info,
1284                                                       node)) )
1285         {
1286             struct domain *dst_d = rcu_lock_domain_by_id(send_info->id.domain_id);
1287 
1288             if ( dst_d && dst_d->argo )
1289             {
1290                 struct argo_ring_info *ring_info =
1291                     find_ring_info(dst_d, &send_info->id);
1292 
1293                 if ( ring_info )
1294                 {
1295                     ring_remove_info(dst_d, ring_info);
1296                     dst_d->argo->ring_count--;
1297                 }
1298                 else
1299                     ASSERT_UNREACHABLE();
1300             }
1301             else
1302                 argo_dprintk("%pd has entry for stale partner d%u\n",
1303                              src_d, send_info->id.domain_id);
1304 
1305             if ( dst_d )
1306                 rcu_unlock_domain(dst_d);
1307 
1308             list_del(&send_info->node);
1309             xfree(send_info);
1310         }
1311     }
1312 }
1313 
1314 static int
fill_ring_data(const struct domain * currd,XEN_GUEST_HANDLE (xen_argo_ring_data_ent_t)data_ent_hnd)1315 fill_ring_data(const struct domain *currd,
1316                XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) data_ent_hnd)
1317 {
1318     xen_argo_ring_data_ent_t ent;
1319     struct domain *dst_d;
1320     struct argo_ring_info *ring_info;
1321     int ret = 0;
1322 
1323     ASSERT(currd == current->domain);
1324     ASSERT(LOCKING_Read_L1);
1325 
1326     if ( __copy_from_guest(&ent, data_ent_hnd, 1) )
1327         return -EFAULT;
1328 
1329     argo_dprintk("fill_ring_data: ent.ring.domain=%u,ent.ring.aport=%x\n",
1330                  ent.ring.domain_id, ent.ring.aport);
1331 
1332     ent.flags = 0;
1333 
1334     dst_d = rcu_lock_domain_by_id(ent.ring.domain_id);
1335     if ( !dst_d || !dst_d->argo )
1336         goto out;
1337 
1338     /*
1339      * Don't supply information about rings that a guest is not
1340      * allowed to send to.
1341      */
1342     ret = xsm_argo_send(currd, dst_d);
1343     if ( ret )
1344         goto out;
1345 
1346     read_lock(&dst_d->argo->rings_L2_rwlock);
1347 
1348     ring_info = find_ring_info_by_match(dst_d, ent.ring.aport,
1349                                         currd->domain_id);
1350     if ( ring_info )
1351     {
1352         unsigned int space_avail;
1353 
1354         ent.flags |= XEN_ARGO_RING_EXISTS;
1355 
1356         spin_lock(&ring_info->L3_lock);
1357 
1358         ent.max_message_size = ring_info->len -
1359                                    sizeof(struct xen_argo_ring_message_header) -
1360                                    ROUNDUP_MESSAGE(1);
1361 
1362         if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1363             ent.flags |= XEN_ARGO_RING_SHARED;
1364 
1365         space_avail = ringbuf_payload_space(dst_d, ring_info);
1366 
1367         argo_dprintk("fill_ring_data: aport=%x space_avail=%u"
1368                      " space_wanted=%u\n",
1369                      ring_info->id.aport, space_avail, ent.space_required);
1370 
1371         /* Do not queue a notification for an unachievable size */
1372         if ( ent.space_required > ent.max_message_size )
1373             ent.flags |= XEN_ARGO_RING_EMSGSIZE;
1374         else if ( space_avail >= ent.space_required )
1375         {
1376             pending_cancel(dst_d, ring_info, currd->domain_id);
1377             ent.flags |= XEN_ARGO_RING_SUFFICIENT;
1378         }
1379         else
1380         {
1381             ret = pending_requeue(dst_d, ring_info, currd->domain_id,
1382                                   ent.space_required);
1383             if ( ret == -EBUSY )
1384             {
1385                 /*
1386                  * Too many other domains are already awaiting notification
1387                  * about available space on this ring. Indicate this state via
1388                  * flag. No need to return an error to the caller; allow the
1389                  * processing of queries about other rings to continue.
1390                  */
1391                 ent.flags |= XEN_ARGO_RING_EBUSY;
1392                 ret = 0;
1393             }
1394         }
1395 
1396         spin_unlock(&ring_info->L3_lock);
1397 
1398         if ( space_avail == ent.max_message_size )
1399             ent.flags |= XEN_ARGO_RING_EMPTY;
1400 
1401     }
1402     read_unlock(&dst_d->argo->rings_L2_rwlock);
1403 
1404  out:
1405     if ( dst_d )
1406         rcu_unlock_domain(dst_d);
1407 
1408     if ( !ret && (__copy_field_to_guest(data_ent_hnd, &ent, flags) ||
1409                   __copy_field_to_guest(data_ent_hnd, &ent, max_message_size)) )
1410         return -EFAULT;
1411 
1412     return ret;
1413 }
1414 
1415 static int
find_ring_mfn(struct domain * d,gfn_t gfn,mfn_t * mfn)1416 find_ring_mfn(struct domain *d, gfn_t gfn, mfn_t *mfn)
1417 {
1418     struct page_info *page;
1419     p2m_type_t p2mt;
1420     int ret;
1421 
1422     ret = check_get_page_from_gfn(d, gfn, false, &p2mt, &page);
1423     if ( unlikely(ret) )
1424         return ret;
1425 
1426     *mfn = page_to_mfn(page);
1427 
1428     switch ( p2mt )
1429     {
1430     case p2m_ram_rw:
1431         if ( !get_page_type(page, PGT_writable_page) )
1432             ret = -EINVAL;
1433         break;
1434 
1435 #ifdef CONFIG_X86
1436     case p2m_ram_logdirty:
1437         ret = -EAGAIN;
1438         break;
1439 #endif
1440 
1441     default:
1442         ret = -EINVAL;
1443         break;
1444     }
1445 
1446     if ( unlikely(ret) )
1447         put_page(page);
1448 
1449     return ret;
1450 }
1451 
1452 static int
find_ring_mfns(struct domain * d,struct argo_ring_info * ring_info,const unsigned int npage,XEN_GUEST_HANDLE_PARAM (xen_argo_gfn_t)gfn_hnd,const unsigned int len)1453 find_ring_mfns(struct domain *d, struct argo_ring_info *ring_info,
1454                const unsigned int npage,
1455                XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd,
1456                const unsigned int len)
1457 {
1458     unsigned int i;
1459     int ret = 0;
1460     mfn_t *mfns;
1461     void **mfn_mapping;
1462 
1463     ASSERT(LOCKING_Write_rings_L2(d));
1464 
1465     if ( ring_info->mfns )
1466     {
1467         /* Ring already existed: drop the previous mapping. */
1468         argo_dprintk("argo: vm%u re-register existing ring "
1469                      "(vm%u:%x vm%u) clears mapping\n",
1470                      d->domain_id, ring_info->id.domain_id,
1471                      ring_info->id.aport, ring_info->id.partner_id);
1472 
1473         ring_remove_mfns(d, ring_info);
1474         ASSERT(!ring_info->mfns);
1475     }
1476 
1477     mfns = xmalloc_array(mfn_t, npage);
1478     if ( !mfns )
1479         return -ENOMEM;
1480 
1481     for ( i = 0; i < npage; i++ )
1482         mfns[i] = INVALID_MFN;
1483 
1484     mfn_mapping = xzalloc_array(void *, npage);
1485     if ( !mfn_mapping )
1486     {
1487         xfree(mfns);
1488         return -ENOMEM;
1489     }
1490 
1491     ring_info->mfns = mfns;
1492     ring_info->mfn_mapping = mfn_mapping;
1493 
1494     for ( i = 0; i < npage; i++ )
1495     {
1496         mfn_t mfn;
1497         xen_argo_gfn_t argo_gfn;
1498 
1499         ret = __copy_from_guest_offset(&argo_gfn, gfn_hnd, i, 1) ? -EFAULT : 0;
1500         if ( ret )
1501             break;
1502 
1503         ret = find_ring_mfn(d, _gfn(argo_gfn), &mfn);
1504         if ( ret )
1505         {
1506             gprintk(XENLOG_ERR, "argo: vm%u: invalid gfn %"PRI_gfn" "
1507                     "r:(vm%u:%x vm%u) %p %u/%u\n",
1508                     d->domain_id, gfn_x(_gfn(argo_gfn)),
1509                     ring_info->id.domain_id, ring_info->id.aport,
1510                     ring_info->id.partner_id, ring_info, i, npage);
1511             break;
1512         }
1513 
1514         ring_info->mfns[i] = mfn;
1515 
1516         argo_dprintk("%u: %"PRI_gfn" -> %"PRI_mfn"\n",
1517                      i, gfn_x(_gfn(argo_gfn)), mfn_x(ring_info->mfns[i]));
1518     }
1519 
1520     ring_info->nmfns = i;
1521 
1522     if ( ret )
1523         ring_remove_mfns(d, ring_info);
1524     else
1525     {
1526         ASSERT(ring_info->nmfns == NPAGES_RING(len));
1527 
1528         argo_dprintk("argo: vm%u ring (vm%u:%x vm%u) %p "
1529                      "mfn_mapping %p len %u nmfns %u\n",
1530                      d->domain_id, ring_info->id.domain_id,
1531                      ring_info->id.aport, ring_info->id.partner_id, ring_info,
1532                      ring_info->mfn_mapping, ring_info->len, ring_info->nmfns);
1533     }
1534 
1535     return ret;
1536 }
1537 
1538 static long
unregister_ring(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_unregister_ring_t)unreg_hnd)1539 unregister_ring(struct domain *currd,
1540                 XEN_GUEST_HANDLE_PARAM(xen_argo_unregister_ring_t) unreg_hnd)
1541 {
1542     xen_argo_unregister_ring_t unreg;
1543     struct argo_ring_id ring_id;
1544     struct argo_ring_info *ring_info = NULL;
1545     struct argo_send_info *send_info = NULL;
1546     struct domain *dst_d = NULL;
1547 
1548     ASSERT(currd == current->domain);
1549 
1550     if ( copy_from_guest(&unreg, unreg_hnd, 1) )
1551         return -EFAULT;
1552 
1553     if ( unreg.pad )
1554         return -EINVAL;
1555 
1556     ring_id.partner_id = unreg.partner_id;
1557     ring_id.aport = unreg.aport;
1558     ring_id.domain_id = currd->domain_id;
1559 
1560     read_lock(&L1_global_argo_rwlock);
1561 
1562     if ( unlikely(!currd->argo) )
1563     {
1564         read_unlock(&L1_global_argo_rwlock);
1565         return -ENODEV;
1566     }
1567 
1568     write_lock(&currd->argo->rings_L2_rwlock);
1569 
1570     ring_info = find_ring_info(currd, &ring_id);
1571     if ( !ring_info )
1572         goto out;
1573 
1574     ring_remove_info(currd, ring_info);
1575     currd->argo->ring_count--;
1576 
1577     if ( ring_id.partner_id == XEN_ARGO_DOMID_ANY )
1578         goto out;
1579 
1580     dst_d = rcu_lock_domain_by_id(ring_id.partner_id);
1581     if ( !dst_d || !dst_d->argo )
1582     {
1583         ASSERT_UNREACHABLE();
1584         goto out;
1585     }
1586 
1587     spin_lock(&dst_d->argo->send_L2_lock);
1588 
1589     send_info = find_send_info(dst_d, &ring_id);
1590     if ( send_info )
1591         list_del(&send_info->node);
1592     else
1593         ASSERT_UNREACHABLE();
1594 
1595     spin_unlock(&dst_d->argo->send_L2_lock);
1596 
1597  out:
1598     write_unlock(&currd->argo->rings_L2_rwlock);
1599 
1600     read_unlock(&L1_global_argo_rwlock);
1601 
1602     if ( dst_d )
1603         rcu_unlock_domain(dst_d);
1604 
1605     xfree(send_info);
1606 
1607     if ( !ring_info )
1608     {
1609         argo_dprintk("unregister_ring: no ring_info found for ring(%u:%x %u)\n",
1610                      ring_id.domain_id, ring_id.aport, ring_id.partner_id);
1611         return -ENOENT;
1612     }
1613 
1614     return 0;
1615 }
1616 
1617 static long
register_ring(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_register_ring_t)reg_hnd,XEN_GUEST_HANDLE_PARAM (xen_argo_gfn_t)gfn_hnd,unsigned int npage,unsigned int flags)1618 register_ring(struct domain *currd,
1619               XEN_GUEST_HANDLE_PARAM(xen_argo_register_ring_t) reg_hnd,
1620               XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd,
1621               unsigned int npage, unsigned int flags)
1622 {
1623     xen_argo_register_ring_t reg;
1624     struct argo_ring_id ring_id;
1625     void *map_ringp;
1626     xen_argo_ring_t *ringp;
1627     struct argo_ring_info *ring_info, *new_ring_info = NULL;
1628     struct argo_send_info *send_info = NULL;
1629     struct domain *dst_d = NULL;
1630     int ret = 0;
1631     unsigned int private_tx_ptr;
1632 
1633     ASSERT(currd == current->domain);
1634 
1635     /* flags: reserve currently-undefined bits, require zero.  */
1636     if ( unlikely(flags & ~XEN_ARGO_REGISTER_FLAG_MASK) )
1637         return -EINVAL;
1638 
1639     if ( copy_from_guest(&reg, reg_hnd, 1) )
1640         return -EFAULT;
1641 
1642     /*
1643      * A ring must be large enough to transmit messages, so requires space for:
1644      * * 1 message header, plus
1645      * * 1 payload slot (payload is always rounded to a multiple of 16 bytes)
1646      *   for the message payload to be written into, plus
1647      * * 1 more slot, so that the ring cannot be filled to capacity with a
1648      *   single minimum-size message -- see the logic in ringbuf_insert --
1649      *   allowing for this ensures that there can be space remaining when a
1650      *   message is present.
1651      * The above determines the minimum acceptable ring size.
1652      */
1653     if ( (reg.len < (sizeof(struct xen_argo_ring_message_header)
1654                       + ROUNDUP_MESSAGE(1) + ROUNDUP_MESSAGE(1))) ||
1655          (reg.len > XEN_ARGO_MAX_RING_SIZE) ||
1656          (reg.len != ROUNDUP_MESSAGE(reg.len)) ||
1657          (NPAGES_RING(reg.len) != npage) ||
1658          (reg.pad != 0) )
1659         return -EINVAL;
1660 
1661     ring_id.partner_id = reg.partner_id;
1662     ring_id.aport = reg.aport;
1663     ring_id.domain_id = currd->domain_id;
1664 
1665     if ( reg.partner_id == XEN_ARGO_DOMID_ANY )
1666     {
1667         ret = opt_argo_mac_permissive ? xsm_argo_register_any_source(currd) :
1668                                         -EPERM;
1669         if ( ret )
1670             return ret;
1671     }
1672     else
1673     {
1674         dst_d = rcu_lock_domain_by_id(reg.partner_id);
1675         if ( !dst_d )
1676         {
1677             argo_dprintk("!dst_d, ESRCH\n");
1678             return -ESRCH;
1679         }
1680 
1681         ret = xsm_argo_register_single_source(currd, dst_d);
1682         if ( ret )
1683             goto out;
1684 
1685         send_info = xzalloc(struct argo_send_info);
1686         if ( !send_info )
1687         {
1688             ret = -ENOMEM;
1689             goto out;
1690         }
1691         send_info->id = ring_id;
1692     }
1693 
1694     /*
1695      * Common case is that the ring doesn't already exist, so do the alloc here
1696      * before picking up any locks.
1697      */
1698     new_ring_info = xzalloc(struct argo_ring_info);
1699     if ( !new_ring_info )
1700     {
1701         ret = -ENOMEM;
1702         goto out;
1703     }
1704 
1705     read_lock(&L1_global_argo_rwlock);
1706 
1707     if ( !currd->argo )
1708     {
1709         ret = -ENODEV;
1710         goto out_unlock;
1711     }
1712 
1713     if ( dst_d && !dst_d->argo )
1714     {
1715         argo_dprintk("!dst_d->argo, ECONNREFUSED\n");
1716         ret = -ECONNREFUSED;
1717         goto out_unlock;
1718     }
1719 
1720     write_lock(&currd->argo->rings_L2_rwlock);
1721 
1722     if ( currd->argo->ring_count >= MAX_RINGS_PER_DOMAIN )
1723     {
1724         ret = -ENOSPC;
1725         goto out_unlock2;
1726     }
1727 
1728     ring_info = find_ring_info(currd, &ring_id);
1729     if ( !ring_info )
1730     {
1731         ring_info = new_ring_info;
1732         new_ring_info = NULL;
1733 
1734         spin_lock_init(&ring_info->L3_lock);
1735 
1736         ring_info->id = ring_id;
1737         INIT_LIST_HEAD(&ring_info->pending);
1738 
1739         list_add(&ring_info->node,
1740                  &currd->argo->ring_hash[hash_index(&ring_info->id)]);
1741 
1742         argo_dprintk("argo: vm%u registering ring (vm%u:%x vm%u)\n",
1743                      currd->domain_id, ring_id.domain_id, ring_id.aport,
1744                      ring_id.partner_id);
1745     }
1746     else if ( ring_info->len )
1747     {
1748         /*
1749          * If the caller specified that the ring must not already exist,
1750          * fail at attempt to add a completed ring which already exists.
1751          */
1752         if ( flags & XEN_ARGO_REGISTER_FLAG_FAIL_EXIST )
1753         {
1754             gprintk(XENLOG_ERR, "argo: vm%u disallowed reregistration of "
1755                     "existing ring (vm%u:%x vm%u)\n",
1756                     currd->domain_id, ring_id.domain_id, ring_id.aport,
1757                     ring_id.partner_id);
1758             ret = -EEXIST;
1759             goto out_unlock2;
1760         }
1761 
1762         if ( ring_info->len != reg.len )
1763         {
1764             /*
1765              * Change of ring size could result in entries on the pending
1766              * notifications list that will never trigger.
1767              * Simple blunt solution: disallow ring resize for now.
1768              * TODO: investigate enabling ring resize.
1769              */
1770             gprintk(XENLOG_ERR, "argo: vm%u attempted to change ring size "
1771                     "(vm%u:%x vm%u)\n",
1772                     currd->domain_id, ring_id.domain_id, ring_id.aport,
1773                     ring_id.partner_id);
1774             /*
1775              * Could return EINVAL here, but if the ring didn't already
1776              * exist then the arguments would have been valid, so: EEXIST.
1777              */
1778             ret = -EEXIST;
1779             goto out_unlock2;
1780         }
1781 
1782         argo_dprintk("argo: vm%u re-registering existing ring (vm%u:%x vm%u)\n",
1783                      currd->domain_id, ring_id.domain_id, ring_id.aport,
1784                      ring_id.partner_id);
1785     }
1786 
1787     ret = find_ring_mfns(currd, ring_info, npage, gfn_hnd, reg.len);
1788     if ( ret )
1789     {
1790         gprintk(XENLOG_ERR,
1791                 "argo: vm%u failed to find ring mfns (vm%u:%x vm%u)\n",
1792                 currd->domain_id, ring_id.domain_id, ring_id.aport,
1793                 ring_id.partner_id);
1794 
1795         ring_remove_info(currd, ring_info);
1796         goto out_unlock2;
1797     }
1798 
1799     /*
1800      * The first page of the memory supplied for the ring has the xen_argo_ring
1801      * structure at its head, which is where the ring indexes reside.
1802      */
1803     ret = ring_map_page(currd, ring_info, 0, &map_ringp);
1804     if ( ret )
1805     {
1806         gprintk(XENLOG_ERR,
1807                 "argo: vm%u failed to map ring mfn 0 (vm%u:%x vm%u)\n",
1808                 currd->domain_id, ring_id.domain_id, ring_id.aport,
1809                 ring_id.partner_id);
1810 
1811         ring_remove_info(currd, ring_info);
1812         goto out_unlock2;
1813     }
1814     ringp = map_ringp;
1815 
1816     private_tx_ptr = read_atomic(&ringp->tx_ptr);
1817 
1818     if ( (private_tx_ptr >= reg.len) ||
1819          (ROUNDUP_MESSAGE(private_tx_ptr) != private_tx_ptr) )
1820     {
1821         /*
1822          * Since the ring is a mess, attempt to flush the contents of it
1823          * here by setting the tx_ptr to the next aligned message slot past
1824          * the latest rx_ptr we have observed. Handle ring wrap correctly.
1825          */
1826         private_tx_ptr = ROUNDUP_MESSAGE(read_atomic(&ringp->rx_ptr));
1827 
1828         if ( private_tx_ptr >= reg.len )
1829             private_tx_ptr = 0;
1830 
1831         update_tx_ptr(currd, ring_info, private_tx_ptr);
1832     }
1833 
1834     ring_info->tx_ptr = private_tx_ptr;
1835     ring_info->len = reg.len;
1836     currd->argo->ring_count++;
1837 
1838     if ( send_info )
1839     {
1840         spin_lock(&dst_d->argo->send_L2_lock);
1841 
1842         list_add(&send_info->node,
1843                  &dst_d->argo->send_hash[hash_index(&send_info->id)]);
1844 
1845         spin_unlock(&dst_d->argo->send_L2_lock);
1846     }
1847 
1848  out_unlock2:
1849     write_unlock(&currd->argo->rings_L2_rwlock);
1850 
1851  out_unlock:
1852     read_unlock(&L1_global_argo_rwlock);
1853 
1854  out:
1855     if ( dst_d )
1856         rcu_unlock_domain(dst_d);
1857 
1858     if ( ret )
1859         xfree(send_info);
1860 
1861     xfree(new_ring_info);
1862 
1863     return ret;
1864 }
1865 
1866 static void
notify_ring(const struct domain * d,struct argo_ring_info * ring_info,struct list_head * to_notify)1867 notify_ring(const struct domain *d, struct argo_ring_info *ring_info,
1868             struct list_head *to_notify)
1869 {
1870     unsigned int space;
1871 
1872     ASSERT(LOCKING_Read_rings_L2(d));
1873 
1874     spin_lock(&ring_info->L3_lock);
1875 
1876     if ( ring_info->len )
1877         space = ringbuf_payload_space(d, ring_info);
1878     else
1879         space = 0;
1880 
1881     spin_unlock(&ring_info->L3_lock);
1882 
1883     if ( space )
1884         pending_find(d, ring_info, space, to_notify);
1885 }
1886 
1887 static void
notify_check_pending(struct domain * d)1888 notify_check_pending(struct domain *d)
1889 {
1890     unsigned int i;
1891     LIST_HEAD(to_notify);
1892 
1893     ASSERT(LOCKING_Read_L1);
1894 
1895     read_lock(&d->argo->rings_L2_rwlock);
1896 
1897     /* Walk all rings, call notify_ring on each to populate to_notify list */
1898     for ( i = 0; i < ARGO_HASHTABLE_SIZE; i++ )
1899     {
1900         struct argo_ring_info *ring_info, *next;
1901         struct list_head *bucket = &d->argo->ring_hash[i];
1902 
1903         list_for_each_entry_safe(ring_info, next, bucket, node)
1904             notify_ring(d, ring_info, &to_notify);
1905     }
1906 
1907     read_unlock(&d->argo->rings_L2_rwlock);
1908 
1909     if ( !list_empty(&to_notify) )
1910         pending_notify(&to_notify);
1911 }
1912 
1913 static long
notify(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_ring_data_t)ring_data_hnd)1914 notify(struct domain *currd,
1915        XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd)
1916 {
1917     XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) ent_hnd;
1918     xen_argo_ring_data_t ring_data;
1919     int ret = 0;
1920 
1921     ASSERT(currd == current->domain);
1922 
1923     read_lock(&L1_global_argo_rwlock);
1924 
1925     if ( !currd->argo )
1926     {
1927         argo_dprintk("!d->argo, ENODEV\n");
1928         ret = -ENODEV;
1929         goto out;
1930     }
1931 
1932     notify_check_pending(currd);
1933 
1934     if ( guest_handle_is_null(ring_data_hnd) )
1935         goto out;
1936 
1937     ret = copy_from_guest(&ring_data, ring_data_hnd, 1) ? -EFAULT : 0;
1938     if ( ret )
1939         goto out;
1940 
1941     if ( ring_data.nent > MAX_NOTIFY_COUNT )
1942     {
1943         gprintk(XENLOG_ERR, "argo: notify entry count(%u) exceeds max(%u)\n",
1944                 ring_data.nent, MAX_NOTIFY_COUNT);
1945         ret = -EACCES;
1946         goto out;
1947     }
1948 
1949     ent_hnd = guest_handle_for_field(ring_data_hnd,
1950                                      xen_argo_ring_data_ent_t, data[0]);
1951     if ( unlikely(!guest_handle_okay(ent_hnd, ring_data.nent)) )
1952     {
1953         ret = -EFAULT;
1954         goto out;
1955     }
1956 
1957     while ( !ret && ring_data.nent-- )
1958     {
1959         ret = fill_ring_data(currd, ent_hnd);
1960         guest_handle_add_offset(ent_hnd, 1);
1961     }
1962 
1963  out:
1964     read_unlock(&L1_global_argo_rwlock);
1965 
1966     return ret;
1967 }
1968 
1969 static long
sendv(struct domain * src_d,xen_argo_addr_t * src_addr,const xen_argo_addr_t * dst_addr,xen_argo_iov_t * iovs,unsigned int niov,uint32_t message_type)1970 sendv(struct domain *src_d, xen_argo_addr_t *src_addr,
1971       const xen_argo_addr_t *dst_addr, xen_argo_iov_t *iovs, unsigned int niov,
1972       uint32_t message_type)
1973 {
1974     struct domain *dst_d = NULL;
1975     struct argo_ring_id src_id;
1976     struct argo_ring_info *ring_info;
1977     int ret = 0;
1978     unsigned int len = 0;
1979 
1980     argo_dprintk("sendv: (%u:%x)->(%u:%x) niov:%u type:%x\n",
1981                  src_addr->domain_id, src_addr->aport, dst_addr->domain_id,
1982                  dst_addr->aport, niov, message_type);
1983 
1984     /* Check padding is zeroed. */
1985     if ( unlikely(src_addr->pad || dst_addr->pad) )
1986         return -EINVAL;
1987 
1988     if ( src_addr->domain_id == XEN_ARGO_DOMID_ANY )
1989          src_addr->domain_id = src_d->domain_id;
1990 
1991     /* No domain is currently authorized to send on behalf of another */
1992     if ( unlikely(src_addr->domain_id != src_d->domain_id) )
1993         return -EPERM;
1994 
1995     src_id.aport = src_addr->aport;
1996     src_id.domain_id = src_d->domain_id;
1997     src_id.partner_id = dst_addr->domain_id;
1998 
1999     dst_d = rcu_lock_domain_by_id(dst_addr->domain_id);
2000     if ( !dst_d )
2001         return -ESRCH;
2002 
2003     ret = xsm_argo_send(src_d, dst_d);
2004     if ( ret )
2005     {
2006         gprintk(XENLOG_ERR, "argo: XSM REJECTED %i -> %i\n",
2007                 src_d->domain_id, dst_d->domain_id);
2008 
2009         rcu_unlock_domain(dst_d);
2010 
2011         return ret;
2012     }
2013 
2014     read_lock(&L1_global_argo_rwlock);
2015 
2016     if ( !src_d->argo )
2017     {
2018         ret = -ENODEV;
2019         goto out_unlock;
2020     }
2021 
2022     if ( !dst_d->argo )
2023     {
2024         argo_dprintk("!dst_d->argo, ECONNREFUSED\n");
2025         ret = -ECONNREFUSED;
2026         goto out_unlock;
2027     }
2028 
2029     read_lock(&dst_d->argo->rings_L2_rwlock);
2030 
2031     ring_info = find_ring_info_by_match(dst_d, dst_addr->aport,
2032                                         src_id.domain_id);
2033     if ( !ring_info )
2034     {
2035         gprintk(XENLOG_ERR,
2036                 "argo: vm%u connection refused, src (vm%u:%x) dst (vm%u:%x)\n",
2037                 current->domain->domain_id, src_id.domain_id, src_id.aport,
2038                 dst_addr->domain_id, dst_addr->aport);
2039 
2040         ret = -ECONNREFUSED;
2041     }
2042     else
2043     {
2044         spin_lock(&ring_info->L3_lock);
2045 
2046         /*
2047          * Obtain the total size of data to transmit -- sets the 'len' variable
2048          * -- and sanity check that the iovs conform to size and number limits.
2049          */
2050         ret = iov_count(iovs, niov, &len);
2051         if ( !ret )
2052         {
2053             ret = ringbuf_insert(dst_d, ring_info, &src_id, iovs, niov,
2054                                  message_type, len);
2055             if ( ret == -EAGAIN )
2056             {
2057                 int rc;
2058 
2059                 argo_dprintk("argo_ringbuf_sendv failed, EAGAIN\n");
2060                 /* requeue to issue a notification when space is there */
2061                 rc = pending_requeue(dst_d, ring_info, src_id.domain_id, len);
2062                 if ( rc )
2063                     ret = rc;
2064             }
2065         }
2066 
2067         spin_unlock(&ring_info->L3_lock);
2068     }
2069 
2070     read_unlock(&dst_d->argo->rings_L2_rwlock);
2071 
2072  out_unlock:
2073     read_unlock(&L1_global_argo_rwlock);
2074 
2075     if ( ret >= 0 )
2076         signal_domain(dst_d);
2077 
2078     if ( dst_d )
2079         rcu_unlock_domain(dst_d);
2080 
2081     return ( ret < 0 ) ? ret : len;
2082 }
2083 
2084 long
do_argo_op(unsigned int cmd,XEN_GUEST_HANDLE_PARAM (void)arg1,XEN_GUEST_HANDLE_PARAM (void)arg2,unsigned long raw_arg3,unsigned long raw_arg4)2085 do_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1,
2086            XEN_GUEST_HANDLE_PARAM(void) arg2, unsigned long raw_arg3,
2087            unsigned long raw_arg4)
2088 {
2089     struct domain *currd = current->domain;
2090     long rc;
2091     unsigned int arg3 = raw_arg3, arg4 = raw_arg4;
2092 
2093     argo_dprintk("->do_argo_op(%u,%p,%p,%lu,0x%lx)\n", cmd,
2094                  (void *)arg1.p, (void *)arg2.p, raw_arg3, raw_arg4);
2095 
2096     /* Reject numeric hypercall args outside 32-bit range */
2097     if ( (arg3 != raw_arg3) || (arg4 != raw_arg4) )
2098         return -EINVAL;
2099 
2100     if ( unlikely(!opt_argo) )
2101         return -EOPNOTSUPP;
2102 
2103     rc = xsm_argo_enable(currd);
2104     if ( rc )
2105         return rc;
2106 
2107     switch ( cmd )
2108     {
2109     case XEN_ARGO_OP_register_ring:
2110     {
2111         XEN_GUEST_HANDLE_PARAM(xen_argo_register_ring_t) reg_hnd =
2112             guest_handle_cast(arg1, xen_argo_register_ring_t);
2113         XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd =
2114             guest_handle_cast(arg2, xen_argo_gfn_t);
2115         /* arg3: npage, arg4: flags */
2116 
2117         BUILD_BUG_ON(!IS_ALIGNED(XEN_ARGO_MAX_RING_SIZE, PAGE_SIZE));
2118 
2119         if ( unlikely(arg3 > (XEN_ARGO_MAX_RING_SIZE >> PAGE_SHIFT)) )
2120         {
2121             rc = -EINVAL;
2122             break;
2123         }
2124 
2125         /* Check array to allow use of the faster __copy operations later */
2126         if ( unlikely(!guest_handle_okay(gfn_hnd, arg3)) )
2127         {
2128             rc = -EFAULT;
2129             break;
2130         }
2131 
2132         rc = register_ring(currd, reg_hnd, gfn_hnd, arg3, arg4);
2133         break;
2134     }
2135 
2136     case XEN_ARGO_OP_unregister_ring:
2137     {
2138         XEN_GUEST_HANDLE_PARAM(xen_argo_unregister_ring_t) unreg_hnd =
2139             guest_handle_cast(arg1, xen_argo_unregister_ring_t);
2140 
2141         if ( unlikely((!guest_handle_is_null(arg2)) || arg3 || arg4) )
2142         {
2143             rc = -EINVAL;
2144             break;
2145         }
2146 
2147         rc = unregister_ring(currd, unreg_hnd);
2148         break;
2149     }
2150 
2151     case XEN_ARGO_OP_sendv:
2152     {
2153         xen_argo_send_addr_t send_addr;
2154         xen_argo_iov_t iovs[XEN_ARGO_MAXIOV];
2155         unsigned int niov;
2156 
2157         XEN_GUEST_HANDLE_PARAM(xen_argo_send_addr_t) send_addr_hnd =
2158             guest_handle_cast(arg1, xen_argo_send_addr_t);
2159         XEN_GUEST_HANDLE_PARAM(xen_argo_iov_t) iovs_hnd =
2160             guest_handle_cast(arg2, xen_argo_iov_t);
2161         /* arg3 is niov */
2162         /* arg4 is message_type. Must be a 32-bit value. */
2163 
2164         /* XEN_ARGO_MAXIOV value determines size of iov array on stack */
2165         BUILD_BUG_ON(XEN_ARGO_MAXIOV > 8);
2166 
2167         rc = copy_from_guest(&send_addr, send_addr_hnd, 1) ? -EFAULT : 0;
2168         if ( rc )
2169         {
2170             rc = -EFAULT;
2171             break;
2172         }
2173 
2174         /*
2175          * Reject niov above maximum limit or message_types that are outside
2176          * 32 bit range.
2177          */
2178         if ( unlikely((arg3 > XEN_ARGO_MAXIOV) || (arg4 != (uint32_t)arg4)) )
2179         {
2180             rc = -EINVAL;
2181             break;
2182         }
2183         niov = array_index_nospec(arg3, XEN_ARGO_MAXIOV + 1);
2184 
2185         rc = copy_from_guest(iovs, iovs_hnd, niov) ? -EFAULT : 0;
2186         if ( rc )
2187         {
2188             rc = -EFAULT;
2189             break;
2190         }
2191 
2192         rc = sendv(currd, &send_addr.src, &send_addr.dst, iovs, niov, arg4);
2193         break;
2194     }
2195 
2196     case XEN_ARGO_OP_notify:
2197     {
2198         XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd =
2199                    guest_handle_cast(arg1, xen_argo_ring_data_t);
2200 
2201         if ( unlikely((!guest_handle_is_null(arg2)) || arg3 || arg4) )
2202         {
2203             rc = -EINVAL;
2204             break;
2205         }
2206 
2207         rc = notify(currd, ring_data_hnd);
2208         break;
2209     }
2210 
2211     default:
2212         rc = -EOPNOTSUPP;
2213         break;
2214     }
2215 
2216     argo_dprintk("<-do_argo_op(%u)=%ld\n", cmd, rc);
2217 
2218     return rc;
2219 }
2220 
2221 #ifdef CONFIG_COMPAT
2222 int
compat_argo_op(unsigned int cmd,XEN_GUEST_HANDLE_PARAM (void)arg1,XEN_GUEST_HANDLE_PARAM (void)arg2,unsigned long arg3,unsigned long arg4)2223 compat_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1,
2224                XEN_GUEST_HANDLE_PARAM(void) arg2, unsigned long arg3,
2225                unsigned long arg4)
2226 {
2227     struct domain *currd = current->domain;
2228     int rc;
2229     xen_argo_send_addr_t send_addr;
2230     xen_argo_iov_t iovs[XEN_ARGO_MAXIOV];
2231     compat_argo_iov_t compat_iovs[XEN_ARGO_MAXIOV];
2232     unsigned int i, niov;
2233     XEN_GUEST_HANDLE_PARAM(xen_argo_send_addr_t) send_addr_hnd;
2234 
2235     /* check XEN_ARGO_MAXIOV as it sizes stack arrays: iovs, compat_iovs */
2236     BUILD_BUG_ON(XEN_ARGO_MAXIOV > 8);
2237 
2238     /* Forward all ops besides sendv to the native handler. */
2239     if ( cmd != XEN_ARGO_OP_sendv )
2240         return do_argo_op(cmd, arg1, arg2, arg3, arg4);
2241 
2242     if ( unlikely(!opt_argo) )
2243         return -EOPNOTSUPP;
2244 
2245     rc = xsm_argo_enable(currd);
2246     if ( rc )
2247         return rc;
2248 
2249     argo_dprintk("->compat_argo_op(%u,%p,%p,%lu,0x%lx)\n", cmd,
2250                  (void *)arg1.p, (void *)arg2.p, arg3, arg4);
2251 
2252     send_addr_hnd = guest_handle_cast(arg1, xen_argo_send_addr_t);
2253     /* arg2: iovs, arg3: niov, arg4: message_type */
2254 
2255     rc = copy_from_guest(&send_addr, send_addr_hnd, 1) ? -EFAULT : 0;
2256     if ( rc )
2257         goto out;
2258 
2259     if ( unlikely(arg3 > XEN_ARGO_MAXIOV) )
2260     {
2261         rc = -EINVAL;
2262         goto out;
2263     }
2264     niov = array_index_nospec(arg3, XEN_ARGO_MAXIOV + 1);
2265 
2266     rc = copy_from_guest(compat_iovs, arg2, niov) ? -EFAULT : 0;
2267     if ( rc )
2268         goto out;
2269 
2270     for ( i = 0; i < niov; i++ )
2271     {
2272 #define XLAT_argo_iov_HNDL_iov_hnd(_d_, _s_) \
2273     guest_from_compat_handle((_d_)->iov_hnd, (_s_)->iov_hnd)
2274 
2275         XLAT_argo_iov(&iovs[i], &compat_iovs[i]);
2276 
2277 #undef XLAT_argo_iov_HNDL_iov_hnd
2278     }
2279 
2280     rc = sendv(currd, &send_addr.src, &send_addr.dst, iovs, niov, arg4);
2281  out:
2282     argo_dprintk("<-compat_argo_op(%u)=%d\n", cmd, rc);
2283 
2284     return rc;
2285 }
2286 #endif
2287 
2288 static void
argo_domain_init(struct argo_domain * argo)2289 argo_domain_init(struct argo_domain *argo)
2290 {
2291     unsigned int i;
2292 
2293     rwlock_init(&argo->rings_L2_rwlock);
2294     spin_lock_init(&argo->send_L2_lock);
2295     spin_lock_init(&argo->wildcard_L2_lock);
2296 
2297     for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
2298     {
2299         INIT_LIST_HEAD(&argo->ring_hash[i]);
2300         INIT_LIST_HEAD(&argo->send_hash[i]);
2301     }
2302     INIT_LIST_HEAD(&argo->wildcard_pend_list);
2303 }
2304 
2305 int
argo_init(struct domain * d)2306 argo_init(struct domain *d)
2307 {
2308     struct argo_domain *argo;
2309 
2310     if ( !opt_argo || xsm_argo_enable(d) )
2311     {
2312         argo_dprintk("argo disabled, domid: %u\n", d->domain_id);
2313         return 0;
2314     }
2315 
2316     argo_dprintk("init: domid: %u\n", d->domain_id);
2317 
2318     argo = xzalloc(struct argo_domain);
2319     if ( !argo )
2320         return -ENOMEM;
2321 
2322     argo_domain_init(argo);
2323 
2324     write_lock(&L1_global_argo_rwlock);
2325 
2326     d->argo = argo;
2327 
2328     write_unlock(&L1_global_argo_rwlock);
2329 
2330     return 0;
2331 }
2332 
2333 void
argo_destroy(struct domain * d)2334 argo_destroy(struct domain *d)
2335 {
2336     BUG_ON(!d->is_dying);
2337 
2338     write_lock(&L1_global_argo_rwlock);
2339 
2340     argo_dprintk("destroy: domid %u d->argo=%p\n", d->domain_id, d->argo);
2341 
2342     if ( d->argo )
2343     {
2344         domain_rings_remove_all(d);
2345         partner_rings_remove(d);
2346         wildcard_rings_pending_remove(d);
2347         XFREE(d->argo);
2348     }
2349 
2350     write_unlock(&L1_global_argo_rwlock);
2351 }
2352 
2353 void
argo_soft_reset(struct domain * d)2354 argo_soft_reset(struct domain *d)
2355 {
2356     write_lock(&L1_global_argo_rwlock);
2357 
2358     argo_dprintk("soft reset d=%u d->argo=%p\n", d->domain_id, d->argo);
2359 
2360     if ( d->argo )
2361     {
2362         domain_rings_remove_all(d);
2363         partner_rings_remove(d);
2364         wildcard_rings_pending_remove(d);
2365 
2366         /*
2367          * Since neither opt_argo or xsm_argo_enable(d) can change at runtime,
2368          * if d->argo is true then both opt_argo and xsm_argo_enable(d) must be
2369          * true, and we can assume that init is allowed to proceed again here.
2370          */
2371         argo_domain_init(d->argo);
2372     }
2373 
2374     write_unlock(&L1_global_argo_rwlock);
2375 }
2376