1 /******************************************************************************
2 * Argo : Hypervisor-Mediated data eXchange
3 *
4 * Derived from v4v, the version 2 of v2v.
5 *
6 * Copyright (c) 2010, Citrix Systems
7 * Copyright (c) 2018-2019 BAE Systems
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <xen/argo.h>
19 #include <xen/domain.h>
20 #include <xen/domain_page.h>
21 #include <xen/errno.h>
22 #include <xen/event.h>
23 #include <xen/guest_access.h>
24 #include <xen/lib.h>
25 #include <xen/nospec.h>
26 #include <xen/param.h>
27 #include <xen/sched.h>
28 #include <xen/sections.h>
29 #include <xen/time.h>
30
31 #include <xsm/xsm.h>
32
33 #include <asm/p2m.h>
34
35 #include <public/argo.h>
36
37 #ifdef CONFIG_COMPAT
38 #include <compat/argo.h>
39 CHECK_argo_addr;
40 #undef CHECK_argo_addr
41 #define CHECK_argo_addr struct xen_argo_addr
42 CHECK_argo_register_ring;
43 CHECK_argo_ring;
44 CHECK_argo_ring_data_ent;
45 #undef CHECK_argo_ring_data_ent
46 #define CHECK_argo_ring_data_ent struct xen_argo_ring_data_ent
47 CHECK_argo_ring_data;
48 CHECK_argo_ring_message_header;
49 CHECK_argo_unregister_ring;
50 CHECK_argo_send_addr;
51 #endif
52
53 #define MAX_RINGS_PER_DOMAIN 128U
54 #define MAX_NOTIFY_COUNT 256U
55 #define MAX_PENDING_PER_RING 32U
56
57 /* All messages on the ring are padded to a multiple of the slot size. */
58 #define ROUNDUP_MESSAGE(a) ROUNDUP((a), XEN_ARGO_MSG_SLOT_SIZE)
59
60 /* The maximum size of a message that may be sent on the largest Argo ring. */
61 #define MAX_ARGO_MESSAGE_SIZE ((XEN_ARGO_MAX_RING_SIZE) - \
62 (sizeof(struct xen_argo_ring_message_header)) - ROUNDUP_MESSAGE(1))
63
64 /* Number of PAGEs needed to hold a ring of a given size in bytes */
65 #define NPAGES_RING(ring_len) \
66 (ROUNDUP((ROUNDUP_MESSAGE(ring_len) + sizeof(xen_argo_ring_t)), PAGE_SIZE) \
67 >> PAGE_SHIFT)
68
69 DEFINE_XEN_GUEST_HANDLE(xen_argo_addr_t);
70 DEFINE_XEN_GUEST_HANDLE(xen_argo_gfn_t);
71 DEFINE_XEN_GUEST_HANDLE(xen_argo_iov_t);
72 DEFINE_XEN_GUEST_HANDLE(xen_argo_register_ring_t);
73 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_t);
74 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_t);
75 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t);
76 DEFINE_XEN_GUEST_HANDLE(xen_argo_send_addr_t);
77 DEFINE_XEN_GUEST_HANDLE(xen_argo_unregister_ring_t);
78 #ifdef CONFIG_COMPAT
79 DEFINE_COMPAT_HANDLE(compat_argo_iov_t);
80 #endif
81
82 static bool __read_mostly opt_argo;
83 static bool __read_mostly opt_argo_mac_permissive;
84
parse_argo(const char * s)85 static int __init cf_check parse_argo(const char *s)
86 {
87 const char *ss;
88 int val, rc = 0;
89
90 do {
91 ss = strchr(s, ',');
92 if ( !ss )
93 ss = strchr(s, '\0');
94
95 if ( (val = parse_bool(s, ss)) >= 0 )
96 opt_argo = val;
97 else if ( (val = parse_boolean("mac-permissive", s, ss)) >= 0 )
98 opt_argo_mac_permissive = val;
99 else
100 rc = -EINVAL;
101
102 s = ss + 1;
103 } while ( *ss );
104
105 return rc;
106 }
107 custom_param("argo", parse_argo);
108
109 typedef struct argo_ring_id
110 {
111 xen_argo_port_t aport;
112 domid_t partner_id;
113 domid_t domain_id;
114 } argo_ring_id;
115
116 /* Data about a domain's own ring that it has registered */
117 struct argo_ring_info
118 {
119 /* next node in the hash, protected by rings_L2 */
120 struct list_head node;
121 /* this ring's id, protected by rings_L2 */
122 struct argo_ring_id id;
123 /* L3, the ring_info lock: protects the members of this struct below */
124 spinlock_t L3_lock;
125 /* length of the ring, protected by L3 */
126 unsigned int len;
127 /* number of pages translated into mfns, protected by L3 */
128 unsigned int nmfns;
129 /* cached tx pointer location, protected by L3 */
130 unsigned int tx_ptr;
131 /* mapped ring pages protected by L3 */
132 void **mfn_mapping;
133 /* list of mfns of guest ring, protected by L3 */
134 mfn_t *mfns;
135 /* list of struct pending_ent for this ring, protected by L3 */
136 struct list_head pending;
137 /* number of pending entries queued for this ring, protected by L3 */
138 unsigned int npending;
139 };
140
141 /* Data about a single-sender ring, held by the sender (partner) domain */
142 struct argo_send_info
143 {
144 /* next node in the hash, protected by send_L2 */
145 struct list_head node;
146 /* this ring's id, protected by send_L2 */
147 struct argo_ring_id id;
148 };
149
150 /* A space-available notification that is awaiting sufficient space */
151 struct pending_ent
152 {
153 /* List node within argo_ring_info's pending list */
154 struct list_head node;
155 /*
156 * List node within argo_domain's wildcard_pend_list. Only used if the
157 * ring is one with a wildcard partner (ie. that any domain may send to)
158 * to enable cancelling signals on wildcard rings on domain destroy.
159 */
160 struct list_head wildcard_node;
161 /*
162 * Pointer to the ring_info that this ent pertains to. Used to ensure that
163 * ring_info->npending is decremented when ents for wildcard rings are
164 * cancelled for domain destroy.
165 * Caution: Must hold the correct locks before accessing ring_info via this.
166 */
167 struct argo_ring_info *ring_info;
168 /* minimum ring space available that this signal is waiting upon */
169 unsigned int len;
170 /* domain to be notified when space is available */
171 domid_t domain_id;
172 };
173
174 /*
175 * The value of the argo element in a struct domain is
176 * protected by L1_global_argo_rwlock
177 */
178 #define ARGO_HASHTABLE_SIZE 32
179 struct argo_domain
180 {
181 /* rings_L2 */
182 rwlock_t rings_L2_rwlock;
183 /*
184 * Hash table of argo_ring_info about rings this domain has registered.
185 * Protected by rings_L2.
186 */
187 struct list_head ring_hash[ARGO_HASHTABLE_SIZE];
188 /* Counter of rings registered by this domain. Protected by rings_L2. */
189 unsigned int ring_count;
190
191 /* send_L2 */
192 spinlock_t send_L2_lock;
193 /*
194 * Hash table of argo_send_info about rings other domains have registered
195 * for this domain to send to. Single partner, non-wildcard rings.
196 * Protected by send_L2.
197 */
198 struct list_head send_hash[ARGO_HASHTABLE_SIZE];
199
200 /* wildcard_L2 */
201 spinlock_t wildcard_L2_lock;
202 /*
203 * List of pending space-available signals for this domain about wildcard
204 * rings registered by other domains. Protected by wildcard_L2.
205 */
206 struct list_head wildcard_pend_list;
207 };
208
209 /*
210 * Locking is organized as follows:
211 *
212 * Terminology: R(<lock>) means taking a read lock on the specified lock;
213 * W(<lock>) means taking a write lock on it.
214 *
215 * == L1 : The global read/write lock: L1_global_argo_rwlock
216 * Protects the argo elements of all struct domain *d in the system.
217 *
218 * R(L1) does not protect any of the elements of d->argo; it protects their
219 * addresses. W(L1) protects those and more since it implies W on all the lower
220 * level locks - see the notes on those locks below.
221 *
222 * The destruction of an argo-enabled domain, which must have a non-NULL d->argo
223 * pointer, will need to free that d->argo pointer, which requires W(L1).
224 * Since holding R(L1) will block acquiring W(L1), it will ensure that
225 * no domains pointers that argo is interested in become invalid while either
226 * W(L1) or R(L1) are held.
227 */
228
229 static DEFINE_RWLOCK(L1_global_argo_rwlock); /* L1 */
230
231 /*
232 * == rings_L2 : The per-domain ring hash lock: d->argo->rings_L2_rwlock
233 *
234 * Holding a read lock on rings_L2 protects the ring hash table and
235 * the elements in the hash_table d->argo->ring_hash, and
236 * the node and id fields in struct argo_ring_info in the
237 * hash table.
238 * Holding a write lock on rings_L2 protects all of the elements of all the
239 * struct argo_ring_info belonging to this domain.
240 *
241 * To take rings_L2 you must already have R(L1). W(L1) implies W(rings_L2) and
242 * L3.
243 *
244 * == L3 : The individual ring_info lock: ring_info->L3_lock
245 *
246 * Protects all the fields within the argo_ring_info, aside from the ones that
247 * rings_L2 already protects: node, id, lock.
248 *
249 * To acquire L3 you must already have R(rings_L2). W(rings_L2) implies L3.
250 *
251 * == send_L2 : The per-domain single-sender partner rings lock:
252 * d->argo->send_L2_lock
253 *
254 * Protects the per-domain send hash table : d->argo->send_hash
255 * and the elements in the hash table, and the node and id fields
256 * in struct argo_send_info in the hash table.
257 *
258 * To take send_L2, you must already have R(L1). W(L1) implies send_L2.
259 * Do not attempt to acquire a rings_L2 on any domain after taking and while
260 * holding a send_L2 lock -- acquire the rings_L2 (if one is needed) beforehand.
261 *
262 * == wildcard_L2 : The per-domain wildcard pending list lock:
263 * d->argo->wildcard_L2_lock
264 *
265 * Protects the per-domain list of outstanding signals for space availability
266 * on wildcard rings.
267 *
268 * To take wildcard_L2, you must already have R(L1). W(L1) implies wildcard_L2.
269 * No other locks are acquired after obtaining wildcard_L2.
270 */
271
272 /*
273 * Lock state validations macros
274 *
275 * These macros encode the logic to verify that the locking has adhered to the
276 * locking discipline above.
277 * eg. On entry to logic that requires holding at least R(rings_L2), this:
278 * ASSERT(LOCKING_Read_rings_L2(d));
279 *
280 * checks that the lock state is sufficient, validating that one of the
281 * following must be true when executed: R(rings_L2) && R(L1)
282 * or: W(rings_L2) && R(L1)
283 * or: W(L1)
284 *
285 * The LOCKING macros defined below here are for use at verification points.
286 */
287 #define LOCKING_Write_L1 (rw_is_write_locked(&L1_global_argo_rwlock))
288 /*
289 * While LOCKING_Read_L1 will return true even if the lock is write-locked,
290 * that's OK because everywhere that a Read lock is needed with these macros,
291 * holding a Write lock there instead is OK too: we're checking that _at least_
292 * the specified level of locks are held.
293 */
294 #define LOCKING_Read_L1 (rw_is_locked(&L1_global_argo_rwlock))
295
296 #define LOCKING_Write_rings_L2(d) \
297 ((LOCKING_Read_L1 && rw_is_write_locked(&(d)->argo->rings_L2_rwlock)) || \
298 LOCKING_Write_L1)
299 /*
300 * Skip checking LOCKING_Write_rings_L2(d) within this LOCKING_Read_rings_L2
301 * definition because the first clause that is testing R(L1) && R(L2) will also
302 * return true if R(L1) && W(L2) is true, because of the way that rw_is_locked
303 * behaves. This results in a slightly shorter and faster implementation.
304 */
305 #define LOCKING_Read_rings_L2(d) \
306 ((LOCKING_Read_L1 && rw_is_locked(&(d)->argo->rings_L2_rwlock)) || \
307 LOCKING_Write_L1)
308 /*
309 * Skip checking LOCKING_Write_L1 within this LOCKING_L3 definition because
310 * LOCKING_Write_rings_L2(d) will return true for that condition.
311 */
312 #define LOCKING_L3(d, r) \
313 ((LOCKING_Read_L1 && rw_is_locked(&(d)->argo->rings_L2_rwlock) \
314 && spin_is_locked(&(r)->L3_lock)) || LOCKING_Write_rings_L2(d))
315
316 #define LOCKING_send_L2(d) \
317 ((LOCKING_Read_L1 && spin_is_locked(&(d)->argo->send_L2_lock)) || \
318 LOCKING_Write_L1)
319
320 #define ARGO_DEBUG 0
321 #define argo_dprintk(fmt, args...) \
322 do { \
323 if ( ARGO_DEBUG ) \
324 printk(XENLOG_DEBUG "argo: " fmt, ##args); \
325 } while ( 0 )
326
327 /*
328 * This hash function is used to distribute rings within the per-domain
329 * hash tables (d->argo->ring_hash and d->argo_send_hash). The hash table
330 * will provide a struct if a match is found with a 'argo_ring_id' key:
331 * ie. the key is a (domain id, argo port, partner domain id) tuple.
332 * The algorithm approximates the string hashing function 'djb2'.
333 */
334 static unsigned int
hash_index(const struct argo_ring_id * id)335 hash_index(const struct argo_ring_id *id)
336 {
337 unsigned int hash = 5381; /* prime constant from djb2 */
338
339 /* For each input: hash = hash * 33 + <new input character value> */
340 hash = ((hash << 5) + hash) + (id->aport & 0xff);
341 hash = ((hash << 5) + hash) + ((id->aport >> 8) & 0xff);
342 hash = ((hash << 5) + hash) + ((id->aport >> 16) & 0xff);
343 hash = ((hash << 5) + hash) + ((id->aport >> 24) & 0xff);
344 hash = ((hash << 5) + hash) + (id->domain_id & 0xff);
345 hash = ((hash << 5) + hash) + ((id->domain_id >> 8) & 0xff);
346 hash = ((hash << 5) + hash) + (id->partner_id & 0xff);
347 hash = ((hash << 5) + hash) + ((id->partner_id >> 8) & 0xff);
348
349 /*
350 * Since ARGO_HASHTABLE_SIZE is small, use higher-order bits of the
351 * hash to contribute to the lower-order bits before masking off.
352 */
353 return (hash ^ (hash >> 15)) & (ARGO_HASHTABLE_SIZE - 1);
354 }
355
356 static struct argo_ring_info *
find_ring_info(const struct domain * d,const struct argo_ring_id * id)357 find_ring_info(const struct domain *d, const struct argo_ring_id *id)
358 {
359 struct argo_ring_info *ring_info;
360 const struct list_head *bucket;
361
362 ASSERT(LOCKING_Read_rings_L2(d));
363
364 /* List is not modified here. Search and return the match if found. */
365 bucket = &d->argo->ring_hash[hash_index(id)];
366
367 list_for_each_entry(ring_info, bucket, node)
368 {
369 const struct argo_ring_id *cmpid = &ring_info->id;
370
371 if ( cmpid->aport == id->aport &&
372 cmpid->domain_id == id->domain_id &&
373 cmpid->partner_id == id->partner_id )
374 {
375 argo_dprintk("found ring_info for ring(%u:%x %u)\n",
376 id->domain_id, id->aport, id->partner_id);
377 return ring_info;
378 }
379 }
380 argo_dprintk("no ring_info for ring(%u:%x %u)\n",
381 id->domain_id, id->aport, id->partner_id);
382
383 return NULL;
384 }
385
386 static struct argo_ring_info *
find_ring_info_by_match(const struct domain * d,xen_argo_port_t aport,domid_t partner_id)387 find_ring_info_by_match(const struct domain *d, xen_argo_port_t aport,
388 domid_t partner_id)
389 {
390 struct argo_ring_id id;
391 struct argo_ring_info *ring_info;
392
393 ASSERT(LOCKING_Read_rings_L2(d));
394
395 id.aport = aport;
396 id.domain_id = d->domain_id;
397 id.partner_id = partner_id;
398
399 ring_info = find_ring_info(d, &id);
400 if ( ring_info )
401 return ring_info;
402
403 id.partner_id = XEN_ARGO_DOMID_ANY;
404
405 return find_ring_info(d, &id);
406 }
407
408 static struct argo_send_info *
find_send_info(const struct domain * d,const struct argo_ring_id * id)409 find_send_info(const struct domain *d, const struct argo_ring_id *id)
410 {
411 struct argo_send_info *send_info;
412 const struct list_head *bucket;
413
414 ASSERT(LOCKING_send_L2(d));
415
416 /* List is not modified here. Search and return the match if found. */
417 bucket = &d->argo->send_hash[hash_index(id)];
418
419 list_for_each_entry(send_info, bucket, node)
420 {
421 const struct argo_ring_id *cmpid = &send_info->id;
422
423 if ( cmpid->aport == id->aport &&
424 cmpid->domain_id == id->domain_id &&
425 cmpid->partner_id == id->partner_id )
426 {
427 argo_dprintk("found send_info for ring(%u:%x %u)\n",
428 id->domain_id, id->aport, id->partner_id);
429 return send_info;
430 }
431 }
432 argo_dprintk("no send_info for ring(%u:%x %u)\n",
433 id->domain_id, id->aport, id->partner_id);
434
435 return NULL;
436 }
437
438 static void
signal_domain(struct domain * d)439 signal_domain(struct domain *d)
440 {
441 argo_dprintk("signalling domid:%u\n", d->domain_id);
442
443 send_guest_domain_virq(d, VIRQ_ARGO);
444 }
445
446 static void
signal_domid(domid_t domain_id)447 signal_domid(domid_t domain_id)
448 {
449 struct domain *d = rcu_lock_domain_by_id(domain_id);
450
451 if ( !d )
452 return;
453
454 signal_domain(d);
455 rcu_unlock_domain(d);
456 }
457
458 static void
ring_unmap(const struct domain * d,struct argo_ring_info * ring_info)459 ring_unmap(const struct domain *d, struct argo_ring_info *ring_info)
460 {
461 unsigned int i;
462
463 ASSERT(LOCKING_L3(d, ring_info));
464
465 if ( !ring_info->mfn_mapping )
466 return;
467
468 ASSERT(!ring_info->nmfns || ring_info->mfns);
469
470 for ( i = 0; i < ring_info->nmfns; i++ )
471 {
472 if ( !ring_info->mfn_mapping[i] )
473 continue;
474
475 ASSERT(!mfn_eq(ring_info->mfns[i], INVALID_MFN));
476 argo_dprintk(XENLOG_ERR "argo: unmapping page %"PRI_mfn" from %p\n",
477 mfn_x(ring_info->mfns[i]), ring_info->mfn_mapping[i]);
478
479 unmap_domain_page_global(ring_info->mfn_mapping[i]);
480 ring_info->mfn_mapping[i] = NULL;
481 }
482 }
483
484 static int
ring_map_page(const struct domain * d,struct argo_ring_info * ring_info,unsigned int i,void ** out_ptr)485 ring_map_page(const struct domain *d, struct argo_ring_info *ring_info,
486 unsigned int i, void **out_ptr)
487 {
488 ASSERT(LOCKING_L3(d, ring_info));
489
490 /*
491 * FIXME: Investigate using vmap to create a single contiguous virtual
492 * address space mapping of the ring instead of using the array of single
493 * page mappings.
494 * Affects logic in memcpy_to_guest_ring, the mfn_mapping array data
495 * structure, and places where ring mappings are added or removed.
496 */
497
498 if ( i >= ring_info->nmfns )
499 {
500 gprintk(XENLOG_ERR,
501 "argo: ring (vm%u:%x vm%u) %p attempted to map page %u of %u\n",
502 ring_info->id.domain_id, ring_info->id.aport,
503 ring_info->id.partner_id, ring_info, i, ring_info->nmfns);
504 return -ENOMEM;
505 }
506 i = array_index_nospec(i, ring_info->nmfns);
507
508 if ( !ring_info->mfns || !ring_info->mfn_mapping )
509 {
510 ASSERT_UNREACHABLE();
511 ring_info->len = 0;
512 return -ENOMEM;
513 }
514
515 if ( !ring_info->mfn_mapping[i] )
516 {
517 ring_info->mfn_mapping[i] = map_domain_page_global(ring_info->mfns[i]);
518 if ( !ring_info->mfn_mapping[i] )
519 {
520 gprintk(XENLOG_ERR, "argo: ring (vm%u:%x vm%u) %p attempted to map "
521 "page %u of %u\n",
522 ring_info->id.domain_id, ring_info->id.aport,
523 ring_info->id.partner_id, ring_info, i, ring_info->nmfns);
524 return -ENOMEM;
525 }
526 argo_dprintk("mapping page %"PRI_mfn" to %p\n",
527 mfn_x(ring_info->mfns[i]), ring_info->mfn_mapping[i]);
528 }
529
530 if ( out_ptr )
531 *out_ptr = ring_info->mfn_mapping[i];
532
533 return 0;
534 }
535
536 static void
update_tx_ptr(const struct domain * d,struct argo_ring_info * ring_info,uint32_t tx_ptr)537 update_tx_ptr(const struct domain *d, struct argo_ring_info *ring_info,
538 uint32_t tx_ptr)
539 {
540 xen_argo_ring_t *ringp;
541
542 ASSERT(LOCKING_L3(d, ring_info));
543 ASSERT(ring_info->mfn_mapping[0]);
544
545 ring_info->tx_ptr = tx_ptr;
546 ringp = ring_info->mfn_mapping[0];
547
548 write_atomic(&ringp->tx_ptr, tx_ptr);
549 smp_wmb();
550 }
551
552 static int
memcpy_to_guest_ring(const struct domain * d,struct argo_ring_info * ring_info,unsigned int offset,const void * src,XEN_GUEST_HANDLE (uint8)src_hnd,unsigned int len)553 memcpy_to_guest_ring(const struct domain *d, struct argo_ring_info *ring_info,
554 unsigned int offset,
555 const void *src, XEN_GUEST_HANDLE(uint8) src_hnd,
556 unsigned int len)
557 {
558 unsigned int mfns_index = offset >> PAGE_SHIFT;
559 void *dst;
560 int ret;
561 unsigned int src_offset = 0;
562
563 ASSERT(LOCKING_L3(d, ring_info));
564
565 offset &= ~PAGE_MASK;
566
567 if ( len + offset > XEN_ARGO_MAX_RING_SIZE )
568 return -EFAULT;
569
570 while ( len )
571 {
572 unsigned int head_len = (offset + len) > PAGE_SIZE ? PAGE_SIZE - offset
573 : len;
574
575 ret = ring_map_page(d, ring_info, mfns_index, &dst);
576 if ( ret )
577 return ret;
578
579 if ( src )
580 {
581 memcpy(dst + offset, src + src_offset, head_len);
582 src_offset += head_len;
583 }
584 else
585 {
586 if ( copy_from_guest(dst + offset, src_hnd, head_len) )
587 return -EFAULT;
588
589 guest_handle_add_offset(src_hnd, head_len);
590 }
591
592 mfns_index++;
593 len -= head_len;
594 offset = 0;
595 }
596
597 return 0;
598 }
599
600 /*
601 * Use this with caution: rx_ptr is under guest control and may be bogus.
602 * See get_sanitized_ring for a safer alternative.
603 */
604 static int
get_rx_ptr(const struct domain * d,struct argo_ring_info * ring_info,uint32_t * rx_ptr)605 get_rx_ptr(const struct domain *d, struct argo_ring_info *ring_info,
606 uint32_t *rx_ptr)
607 {
608 void *src;
609 xen_argo_ring_t *ringp;
610 int ret;
611
612 ASSERT(LOCKING_L3(d, ring_info));
613
614 if ( !ring_info->nmfns || ring_info->nmfns < NPAGES_RING(ring_info->len) )
615 return -EINVAL;
616
617 ret = ring_map_page(d, ring_info, 0, &src);
618 if ( ret )
619 return ret;
620
621 ringp = (xen_argo_ring_t *)src;
622
623 *rx_ptr = read_atomic(&ringp->rx_ptr);
624
625 return 0;
626 }
627
628 /*
629 * get_sanitized_ring creates a modified copy of the ring pointers where
630 * the rx_ptr is rounded up to ensure it is aligned, and then ring
631 * wrap is handled. Simplifies safe use of the rx_ptr for available
632 * space calculation.
633 */
634 static int
get_sanitized_ring(const struct domain * d,xen_argo_ring_t * ring,struct argo_ring_info * ring_info)635 get_sanitized_ring(const struct domain *d, xen_argo_ring_t *ring,
636 struct argo_ring_info *ring_info)
637 {
638 uint32_t rx_ptr;
639 int ret;
640
641 ASSERT(LOCKING_L3(d, ring_info));
642
643 ret = get_rx_ptr(d, ring_info, &rx_ptr);
644 if ( ret )
645 return ret;
646
647 ring->tx_ptr = ring_info->tx_ptr;
648
649 rx_ptr = ROUNDUP_MESSAGE(rx_ptr);
650 if ( rx_ptr >= ring_info->len )
651 rx_ptr = 0;
652
653 ring->rx_ptr = rx_ptr;
654
655 return 0;
656 }
657
658 static unsigned int
ringbuf_payload_space(const struct domain * d,struct argo_ring_info * ring_info)659 ringbuf_payload_space(const struct domain *d, struct argo_ring_info *ring_info)
660 {
661 xen_argo_ring_t ring;
662 unsigned int len;
663 int ret;
664
665 ASSERT(LOCKING_L3(d, ring_info));
666
667 len = ring_info->len;
668 if ( !len )
669 return 0;
670
671 if ( get_sanitized_ring(d, &ring, ring_info) )
672 return 0;
673
674 argo_dprintk("sanitized ringbuf_payload_space: tx_ptr=%u rx_ptr=%u\n",
675 ring.tx_ptr, ring.rx_ptr);
676
677 /*
678 * rx_ptr == tx_ptr means that the ring has been emptied.
679 * See message size checking logic in the entry to ringbuf_insert which
680 * ensures that there is always one message slot of size ROUNDUP_MESSAGE(1)
681 * left available, preventing a ring from being entirely filled.
682 * This ensures that matching ring indexes always indicate an empty ring
683 * and never a full one.
684 */
685 ret = ring.rx_ptr - ring.tx_ptr;
686 if ( ret <= 0 )
687 ret += len;
688
689 /*
690 * In a sanitized ring, we can rely on:
691 * (rx_ptr < ring_info->len) &&
692 * (tx_ptr < ring_info->len) &&
693 * (ring_info->len <= XEN_ARGO_MAX_RING_SIZE)
694 *
695 * and since: XEN_ARGO_MAX_RING_SIZE < INT32_MAX
696 * therefore right here: ret < INT32_MAX
697 * and we are safe to return it as a unsigned value from this function.
698 * The subtractions below cannot increase its value.
699 */
700
701 /*
702 * The maximum size payload for a message that will be accepted is:
703 * (the available space between the ring indexes)
704 * minus (space for a message header)
705 * minus (space for one message slot)
706 * since ringbuf_insert requires that one message slot be left
707 * unfilled, to avoid filling the ring to capacity and confusing a full
708 * ring with an empty one.
709 * Since the ring indexes are sanitized, the value in ret is aligned, so
710 * the simple subtraction here works to return the aligned value needed:
711 */
712 ret -= sizeof(struct xen_argo_ring_message_header);
713 ret -= ROUNDUP_MESSAGE(1);
714
715 return (ret < 0) ? 0 : ret;
716 }
717
718 /*
719 * iov_count returns its count on success via an out variable to avoid
720 * potential for a negative return value to be used incorrectly
721 * (eg. coerced into an unsigned variable resulting in a large incorrect value)
722 */
723 static int
iov_count(const xen_argo_iov_t * piov,unsigned int niov,unsigned int * count)724 iov_count(const xen_argo_iov_t *piov, unsigned int niov,
725 unsigned int *count)
726 {
727 unsigned int sum_iov_lens = 0;
728
729 if ( niov > XEN_ARGO_MAXIOV )
730 return -EINVAL;
731
732 for ( ; niov--; piov++ )
733 {
734 /* valid iovs must have the padding field set to zero */
735 if ( piov->pad )
736 {
737 argo_dprintk("invalid iov: padding is not zero\n");
738 return -EINVAL;
739 }
740
741 /* check each to protect sum against integer overflow */
742 if ( piov->iov_len > MAX_ARGO_MESSAGE_SIZE )
743 {
744 argo_dprintk("invalid iov_len: too big (%u)>%llu\n",
745 piov->iov_len, MAX_ARGO_MESSAGE_SIZE);
746 return -EINVAL;
747 }
748
749 sum_iov_lens += piov->iov_len;
750
751 /*
752 * Again protect sum from integer overflow
753 * and ensure total msg size will be within bounds.
754 */
755 if ( sum_iov_lens > MAX_ARGO_MESSAGE_SIZE )
756 {
757 argo_dprintk("invalid iov series: total message too big\n");
758 return -EMSGSIZE;
759 }
760 }
761
762 *count = sum_iov_lens;
763
764 return 0;
765 }
766
767 static int
ringbuf_insert(const struct domain * d,struct argo_ring_info * ring_info,const struct argo_ring_id * src_id,xen_argo_iov_t * iovs,unsigned int niov,uint32_t message_type,unsigned int len)768 ringbuf_insert(const struct domain *d, struct argo_ring_info *ring_info,
769 const struct argo_ring_id *src_id, xen_argo_iov_t *iovs,
770 unsigned int niov, uint32_t message_type, unsigned int len)
771 {
772 xen_argo_ring_t ring;
773 struct xen_argo_ring_message_header mh = { };
774 int sp, ret;
775 xen_argo_iov_t *piov;
776 XEN_GUEST_HANDLE(uint8) NULL_hnd = { };
777
778 ASSERT(LOCKING_L3(d, ring_info));
779
780 /*
781 * Enforced below: no more than 'len' bytes of guest data
782 * (plus the message header) will be sent in this operation.
783 */
784
785 /*
786 * Upper bound check the message len against the ring size.
787 * The message must not fill the ring; there must be at least one slot
788 * remaining so we can distinguish a full ring from an empty one.
789 * iov_count has already verified: len <= MAX_ARGO_MESSAGE_SIZE.
790 */
791 if ( ring_info->len <= (sizeof(struct xen_argo_ring_message_header) +
792 ROUNDUP_MESSAGE(len)) )
793 return -EMSGSIZE;
794
795 ret = get_sanitized_ring(d, &ring, ring_info);
796 if ( ret )
797 return ret;
798
799 argo_dprintk("ring.tx_ptr=%u ring.rx_ptr=%u ring len=%u"
800 " ring_info->tx_ptr=%u\n",
801 ring.tx_ptr, ring.rx_ptr, ring_info->len, ring_info->tx_ptr);
802
803 if ( ring.rx_ptr == ring.tx_ptr )
804 sp = ring_info->len;
805 else
806 {
807 sp = ring.rx_ptr - ring.tx_ptr;
808 if ( sp < 0 )
809 sp += ring_info->len;
810 }
811
812 /*
813 * Size bounds check against currently available space in the ring.
814 * Again: the message must not fill the ring leaving no space remaining.
815 */
816 if ( (ROUNDUP_MESSAGE(len) +
817 sizeof(struct xen_argo_ring_message_header)) >= sp )
818 {
819 argo_dprintk("EAGAIN\n");
820 return -EAGAIN;
821 }
822
823 mh.len = len + sizeof(struct xen_argo_ring_message_header);
824 mh.source.aport = src_id->aport;
825 mh.source.domain_id = src_id->domain_id;
826 mh.message_type = message_type;
827
828 /*
829 * For this copy to the guest ring, tx_ptr is always 16-byte aligned
830 * and the message header is 16 bytes long.
831 */
832 BUILD_BUG_ON(
833 sizeof(struct xen_argo_ring_message_header) != ROUNDUP_MESSAGE(1));
834
835 /*
836 * First data write into the destination ring: fixed size, message header.
837 * This cannot overrun because the available free space (value in 'sp')
838 * is checked above and must be at least this size.
839 */
840 ret = memcpy_to_guest_ring(d, ring_info,
841 ring.tx_ptr + sizeof(xen_argo_ring_t),
842 &mh, NULL_hnd, sizeof(mh));
843 if ( ret )
844 {
845 gprintk(XENLOG_ERR,
846 "argo: failed to write message header to ring (vm%u:%x vm%u)\n",
847 ring_info->id.domain_id, ring_info->id.aport,
848 ring_info->id.partner_id);
849
850 return ret;
851 }
852
853 ring.tx_ptr += sizeof(mh);
854 if ( ring.tx_ptr == ring_info->len )
855 ring.tx_ptr = 0;
856
857 for ( piov = iovs; niov--; piov++ )
858 {
859 XEN_GUEST_HANDLE(uint8) buf_hnd = piov->iov_hnd;
860 unsigned int iov_len = piov->iov_len;
861
862 /* If no data is provided in this iov, moan and skip on to the next */
863 if ( !iov_len )
864 {
865 gprintk(XENLOG_WARNING,
866 "argo: no data iov_len=0 iov_hnd=%p ring (vm%u:%x vm%u)\n",
867 buf_hnd.p, ring_info->id.domain_id, ring_info->id.aport,
868 ring_info->id.partner_id);
869
870 continue;
871 }
872
873 if ( unlikely(!guest_handle_okay(buf_hnd, iov_len)) )
874 {
875 gprintk(XENLOG_ERR,
876 "argo: bad iov handle [%p, %u] (vm%u:%x vm%u)\n",
877 buf_hnd.p, iov_len,
878 ring_info->id.domain_id, ring_info->id.aport,
879 ring_info->id.partner_id);
880
881 return -EFAULT;
882 }
883
884 sp = ring_info->len - ring.tx_ptr;
885
886 /* Check: iov data size versus free space at the tail of the ring */
887 if ( iov_len > sp )
888 {
889 /*
890 * Second possible data write: ring-tail-wrap-write.
891 * Populate the ring tail and update the internal tx_ptr to handle
892 * wrapping at the end of ring.
893 * Size of data written here: sp
894 * which is the exact full amount of free space available at the
895 * tail of the ring, so this cannot overrun.
896 */
897 ret = memcpy_to_guest_ring(d, ring_info,
898 ring.tx_ptr + sizeof(xen_argo_ring_t),
899 NULL, buf_hnd, sp);
900 if ( ret )
901 {
902 gprintk(XENLOG_ERR,
903 "argo: failed to copy {%p, %d} (vm%u:%x vm%u)\n",
904 buf_hnd.p, sp,
905 ring_info->id.domain_id, ring_info->id.aport,
906 ring_info->id.partner_id);
907
908 return ret;
909 }
910
911 ring.tx_ptr = 0;
912 iov_len -= sp;
913 guest_handle_add_offset(buf_hnd, sp);
914
915 ASSERT(iov_len <= ring_info->len);
916 }
917
918 /*
919 * Third possible data write: all data remaining for this iov.
920 * Size of data written here: iov_len
921 *
922 * Case 1: if the ring-tail-wrap-write above was performed, then
923 * iov_len has been decreased by 'sp' and ring.tx_ptr is zero.
924 *
925 * We know from checking the result of iov_count:
926 * len + sizeof(message_header) <= ring_info->len
927 * We also know that len is the total of summing all iov_lens, so:
928 * iov_len <= len
929 * so by transitivity:
930 * iov_len <= len <= (ring_info->len - sizeof(msgheader))
931 * and therefore:
932 * (iov_len + sizeof(msgheader) <= ring_info->len) &&
933 * (ring.tx_ptr == 0)
934 * so this write cannot overrun here.
935 *
936 * Case 2: ring-tail-wrap-write above was not performed
937 * -> so iov_len is the guest-supplied value and: (iov_len <= sp)
938 * ie. less than available space at the tail of the ring:
939 * so this write cannot overrun.
940 */
941 ret = memcpy_to_guest_ring(d, ring_info,
942 ring.tx_ptr + sizeof(xen_argo_ring_t),
943 NULL, buf_hnd, iov_len);
944 if ( ret )
945 {
946 gprintk(XENLOG_ERR,
947 "argo: failed to copy [%p, %u] (vm%u:%x vm%u)\n",
948 buf_hnd.p, iov_len, ring_info->id.domain_id,
949 ring_info->id.aport, ring_info->id.partner_id);
950
951 return ret;
952 }
953
954 ring.tx_ptr += iov_len;
955
956 if ( ring.tx_ptr == ring_info->len )
957 ring.tx_ptr = 0;
958 }
959
960 /*
961 * Finished writing data from all iovs into the ring: now need to round up
962 * tx_ptr to align to the next message boundary, and then wrap if necessary.
963 */
964 ring.tx_ptr = ROUNDUP_MESSAGE(ring.tx_ptr);
965
966 if ( ring.tx_ptr >= ring_info->len )
967 ring.tx_ptr -= ring_info->len;
968
969 update_tx_ptr(d, ring_info, ring.tx_ptr);
970
971 /*
972 * At this point (and also on an error exit paths from this function) it is
973 * possible to unmap the ring_info, ie:
974 * ring_unmap(d, ring_info);
975 * but performance should be improved by not doing so, and retaining
976 * the mapping.
977 * An XSM policy control over level of confidentiality required
978 * versus performance cost could be added to decide that here.
979 */
980
981 return ret;
982 }
983
984 static void
wildcard_pending_list_remove(domid_t domain_id,struct pending_ent * ent)985 wildcard_pending_list_remove(domid_t domain_id, struct pending_ent *ent)
986 {
987 struct domain *d = rcu_lock_domain_by_id(domain_id);
988
989 if ( !d )
990 return;
991
992 ASSERT(LOCKING_Read_L1);
993
994 if ( d->argo )
995 {
996 spin_lock(&d->argo->wildcard_L2_lock);
997 list_del(&ent->wildcard_node);
998 spin_unlock(&d->argo->wildcard_L2_lock);
999 }
1000 rcu_unlock_domain(d);
1001 }
1002
1003 static void
wildcard_pending_list_insert(domid_t domain_id,struct pending_ent * ent)1004 wildcard_pending_list_insert(domid_t domain_id, struct pending_ent *ent)
1005 {
1006 struct domain *d = rcu_lock_domain_by_id(domain_id);
1007
1008 if ( !d )
1009 return;
1010
1011 ASSERT(LOCKING_Read_L1);
1012
1013 if ( d->argo )
1014 {
1015 spin_lock(&d->argo->wildcard_L2_lock);
1016 list_add(&ent->wildcard_node, &d->argo->wildcard_pend_list);
1017 spin_unlock(&d->argo->wildcard_L2_lock);
1018 }
1019 rcu_unlock_domain(d);
1020 }
1021
1022 static void
pending_remove_all(const struct domain * d,struct argo_ring_info * ring_info)1023 pending_remove_all(const struct domain *d, struct argo_ring_info *ring_info)
1024 {
1025 struct pending_ent *ent;
1026
1027 ASSERT(LOCKING_L3(d, ring_info));
1028
1029 /* Delete all pending notifications from this ring's list. */
1030 while ( (ent = list_first_entry_or_null(&ring_info->pending,
1031 struct pending_ent, node)) )
1032 {
1033 /* For wildcard rings, remove each from their wildcard list too. */
1034 if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1035 wildcard_pending_list_remove(ent->domain_id, ent);
1036 list_del(&ent->node);
1037 xfree(ent);
1038 }
1039 ring_info->npending = 0;
1040 }
1041
1042 static void
pending_notify(struct list_head * to_notify)1043 pending_notify(struct list_head *to_notify)
1044 {
1045 struct pending_ent *ent;
1046
1047 ASSERT(LOCKING_Read_L1);
1048
1049 /* Sending signals for all ents in this list, draining until it is empty. */
1050 while ( (ent = list_first_entry_or_null(to_notify, struct pending_ent,
1051 node)) )
1052 {
1053 list_del(&ent->node);
1054 signal_domid(ent->domain_id);
1055 xfree(ent);
1056 }
1057 }
1058
1059 static void
pending_find(const struct domain * d,struct argo_ring_info * ring_info,unsigned int payload_space,struct list_head * to_notify)1060 pending_find(const struct domain *d, struct argo_ring_info *ring_info,
1061 unsigned int payload_space, struct list_head *to_notify)
1062 {
1063 struct pending_ent *ent, *next;
1064
1065 ASSERT(LOCKING_Read_rings_L2(d));
1066
1067 /*
1068 * TODO: Current policy here is to signal _all_ of the waiting domains
1069 * interested in sending a message of size less than payload_space.
1070 *
1071 * This is likely to be suboptimal, since once one of them has added
1072 * their message to the ring, there may well be insufficient room
1073 * available for any of the others to transmit, meaning that they were
1074 * woken in vain, which created extra work just to requeue their wait.
1075 *
1076 * Retain this simple policy for now since it at least avoids starving a
1077 * domain of available space notifications because of a policy that only
1078 * notified other domains instead. Improvement may be possible;
1079 * investigation required.
1080 */
1081 spin_lock(&ring_info->L3_lock);
1082
1083 /* Remove matching ents from the ring list, and add them to "to_notify" */
1084 list_for_each_entry_safe(ent, next, &ring_info->pending, node)
1085 {
1086 if ( payload_space >= ent->len )
1087 {
1088 if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1089 wildcard_pending_list_remove(ent->domain_id, ent);
1090
1091 list_del(&ent->node);
1092 ring_info->npending--;
1093 list_add(&ent->node, to_notify);
1094 }
1095 }
1096
1097 spin_unlock(&ring_info->L3_lock);
1098 }
1099
1100 static int
pending_queue(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id,unsigned int len)1101 pending_queue(const struct domain *d, struct argo_ring_info *ring_info,
1102 domid_t src_id, unsigned int len)
1103 {
1104 struct pending_ent *ent;
1105
1106 ASSERT(LOCKING_L3(d, ring_info));
1107
1108 if ( ring_info->npending >= MAX_PENDING_PER_RING )
1109 return -EBUSY;
1110
1111 ent = xmalloc(struct pending_ent);
1112 if ( !ent )
1113 return -ENOMEM;
1114
1115 ent->len = len;
1116 ent->domain_id = src_id;
1117 ent->ring_info = ring_info;
1118
1119 if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1120 wildcard_pending_list_insert(src_id, ent);
1121 list_add(&ent->node, &ring_info->pending);
1122 ring_info->npending++;
1123
1124 return 0;
1125 }
1126
1127 static int
pending_requeue(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id,unsigned int len)1128 pending_requeue(const struct domain *d, struct argo_ring_info *ring_info,
1129 domid_t src_id, unsigned int len)
1130 {
1131 struct pending_ent *ent;
1132
1133 ASSERT(LOCKING_L3(d, ring_info));
1134
1135 /* List structure is not modified here. Update len in a match if found. */
1136 list_for_each_entry(ent, &ring_info->pending, node)
1137 {
1138 if ( ent->domain_id == src_id )
1139 {
1140 /*
1141 * Reuse an existing queue entry for a notification rather than add
1142 * another. If the existing entry is waiting for a smaller size than
1143 * the current message then adjust the record to wait for the
1144 * current (larger) size to be available before triggering a
1145 * notification.
1146 * This assists the waiting sender by ensuring that whenever a
1147 * notification is triggered, there is sufficient space available
1148 * for (at least) any one of the messages awaiting transmission.
1149 */
1150 if ( ent->len < len )
1151 ent->len = len;
1152
1153 return 0;
1154 }
1155 }
1156
1157 return pending_queue(d, ring_info, src_id, len);
1158 }
1159
1160 static void
pending_cancel(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id)1161 pending_cancel(const struct domain *d, struct argo_ring_info *ring_info,
1162 domid_t src_id)
1163 {
1164 struct pending_ent *ent, *next;
1165
1166 ASSERT(LOCKING_L3(d, ring_info));
1167
1168 /* Remove all ents where domain_id matches src_id from the ring's list. */
1169 list_for_each_entry_safe(ent, next, &ring_info->pending, node)
1170 {
1171 if ( ent->domain_id == src_id )
1172 {
1173 /* For wildcard rings, remove each from their wildcard list too. */
1174 if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1175 wildcard_pending_list_remove(ent->domain_id, ent);
1176 list_del(&ent->node);
1177 xfree(ent);
1178 ring_info->npending--;
1179 }
1180 }
1181 }
1182
1183 static void
wildcard_rings_pending_remove(struct domain * d)1184 wildcard_rings_pending_remove(struct domain *d)
1185 {
1186 struct pending_ent *ent;
1187
1188 ASSERT(LOCKING_Write_L1);
1189
1190 /* Delete all pending signals to the domain about wildcard rings. */
1191 while ( (ent = list_first_entry_or_null(&d->argo->wildcard_pend_list,
1192 struct pending_ent, node)) )
1193 {
1194 /*
1195 * The ent->node deleted here, and the npending value decreased,
1196 * belong to the ring_info of another domain, which is why this
1197 * function requires holding W(L1):
1198 * it implies the L3 lock that protects that ring_info struct.
1199 */
1200 ent->ring_info->npending--;
1201 list_del(&ent->node);
1202 list_del(&ent->wildcard_node);
1203 xfree(ent);
1204 }
1205 }
1206
1207 static void
ring_remove_mfns(const struct domain * d,struct argo_ring_info * ring_info)1208 ring_remove_mfns(const struct domain *d, struct argo_ring_info *ring_info)
1209 {
1210 unsigned int i;
1211
1212 ASSERT(LOCKING_Write_rings_L2(d));
1213
1214 if ( !ring_info->mfns )
1215 return;
1216
1217 if ( !ring_info->mfn_mapping )
1218 {
1219 ASSERT_UNREACHABLE();
1220 return;
1221 }
1222
1223 ring_unmap(d, ring_info);
1224
1225 for ( i = 0; i < ring_info->nmfns; i++ )
1226 if ( !mfn_eq(ring_info->mfns[i], INVALID_MFN) )
1227 put_page_and_type(mfn_to_page(ring_info->mfns[i]));
1228
1229 ring_info->nmfns = 0;
1230 XFREE(ring_info->mfns);
1231 XFREE(ring_info->mfn_mapping);
1232 }
1233
1234 static void
ring_remove_info(const struct domain * d,struct argo_ring_info * ring_info)1235 ring_remove_info(const struct domain *d, struct argo_ring_info *ring_info)
1236 {
1237 ASSERT(LOCKING_Write_rings_L2(d));
1238
1239 pending_remove_all(d, ring_info);
1240 list_del(&ring_info->node);
1241 ring_remove_mfns(d, ring_info);
1242 xfree(ring_info);
1243 }
1244
1245 static void
domain_rings_remove_all(struct domain * d)1246 domain_rings_remove_all(struct domain *d)
1247 {
1248 unsigned int i;
1249
1250 ASSERT(LOCKING_Write_rings_L2(d));
1251
1252 for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
1253 {
1254 struct argo_ring_info *ring_info;
1255 struct list_head *bucket = &d->argo->ring_hash[i];
1256
1257 while ( (ring_info = list_first_entry_or_null(bucket,
1258 struct argo_ring_info,
1259 node)) )
1260 ring_remove_info(d, ring_info);
1261 }
1262 d->argo->ring_count = 0;
1263 }
1264
1265 /*
1266 * Tear down all rings of other domains where src_d domain is the partner.
1267 * (ie. it is the single domain that can send to those rings.)
1268 * This will also cancel any pending notifications about those rings.
1269 */
1270 static void
partner_rings_remove(struct domain * src_d)1271 partner_rings_remove(struct domain *src_d)
1272 {
1273 unsigned int i;
1274
1275 ASSERT(LOCKING_Write_L1);
1276
1277 for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
1278 {
1279 struct argo_send_info *send_info;
1280 struct list_head *bucket = &src_d->argo->send_hash[i];
1281
1282 /* Remove all ents from the send list. Take each off their ring list. */
1283 while ( (send_info = list_first_entry_or_null(bucket,
1284 struct argo_send_info,
1285 node)) )
1286 {
1287 struct domain *dst_d = rcu_lock_domain_by_id(send_info->id.domain_id);
1288
1289 if ( dst_d && dst_d->argo )
1290 {
1291 struct argo_ring_info *ring_info =
1292 find_ring_info(dst_d, &send_info->id);
1293
1294 if ( ring_info )
1295 {
1296 ring_remove_info(dst_d, ring_info);
1297 dst_d->argo->ring_count--;
1298 }
1299 else
1300 ASSERT_UNREACHABLE();
1301 }
1302 else
1303 argo_dprintk("%pd has entry for stale partner d%u\n",
1304 src_d, send_info->id.domain_id);
1305
1306 if ( dst_d )
1307 rcu_unlock_domain(dst_d);
1308
1309 list_del(&send_info->node);
1310 xfree(send_info);
1311 }
1312 }
1313 }
1314
1315 static int
fill_ring_data(const struct domain * currd,XEN_GUEST_HANDLE (xen_argo_ring_data_ent_t)data_ent_hnd)1316 fill_ring_data(const struct domain *currd,
1317 XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) data_ent_hnd)
1318 {
1319 xen_argo_ring_data_ent_t ent;
1320 struct domain *dst_d;
1321 struct argo_ring_info *ring_info;
1322 int ret = 0;
1323
1324 ASSERT(currd == current->domain);
1325 ASSERT(LOCKING_Read_L1);
1326
1327 if ( __copy_from_guest(&ent, data_ent_hnd, 1) )
1328 return -EFAULT;
1329
1330 argo_dprintk("fill_ring_data: ent.ring.domain=%u,ent.ring.aport=%x\n",
1331 ent.ring.domain_id, ent.ring.aport);
1332
1333 ent.flags = 0;
1334
1335 dst_d = rcu_lock_domain_by_id(ent.ring.domain_id);
1336 if ( !dst_d || !dst_d->argo )
1337 goto out;
1338
1339 /*
1340 * Don't supply information about rings that a guest is not
1341 * allowed to send to.
1342 */
1343 ret = xsm_argo_send(currd, dst_d);
1344 if ( ret )
1345 goto out;
1346
1347 read_lock(&dst_d->argo->rings_L2_rwlock);
1348
1349 ring_info = find_ring_info_by_match(dst_d, ent.ring.aport,
1350 currd->domain_id);
1351 if ( ring_info )
1352 {
1353 unsigned int space_avail;
1354
1355 ent.flags |= XEN_ARGO_RING_EXISTS;
1356
1357 spin_lock(&ring_info->L3_lock);
1358
1359 ent.max_message_size = ring_info->len -
1360 sizeof(struct xen_argo_ring_message_header) -
1361 ROUNDUP_MESSAGE(1);
1362
1363 if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1364 ent.flags |= XEN_ARGO_RING_SHARED;
1365
1366 space_avail = ringbuf_payload_space(dst_d, ring_info);
1367
1368 argo_dprintk("fill_ring_data: aport=%x space_avail=%u"
1369 " space_wanted=%u\n",
1370 ring_info->id.aport, space_avail, ent.space_required);
1371
1372 /* Do not queue a notification for an unachievable size */
1373 if ( ent.space_required > ent.max_message_size )
1374 ent.flags |= XEN_ARGO_RING_EMSGSIZE;
1375 else if ( space_avail >= ent.space_required )
1376 {
1377 pending_cancel(dst_d, ring_info, currd->domain_id);
1378 ent.flags |= XEN_ARGO_RING_SUFFICIENT;
1379 }
1380 else
1381 {
1382 ret = pending_requeue(dst_d, ring_info, currd->domain_id,
1383 ent.space_required);
1384 if ( ret == -EBUSY )
1385 {
1386 /*
1387 * Too many other domains are already awaiting notification
1388 * about available space on this ring. Indicate this state via
1389 * flag. No need to return an error to the caller; allow the
1390 * processing of queries about other rings to continue.
1391 */
1392 ent.flags |= XEN_ARGO_RING_EBUSY;
1393 ret = 0;
1394 }
1395 }
1396
1397 spin_unlock(&ring_info->L3_lock);
1398
1399 if ( space_avail == ent.max_message_size )
1400 ent.flags |= XEN_ARGO_RING_EMPTY;
1401
1402 }
1403 read_unlock(&dst_d->argo->rings_L2_rwlock);
1404
1405 out:
1406 if ( dst_d )
1407 rcu_unlock_domain(dst_d);
1408
1409 if ( !ret && (__copy_field_to_guest(data_ent_hnd, &ent, flags) ||
1410 __copy_field_to_guest(data_ent_hnd, &ent, max_message_size)) )
1411 return -EFAULT;
1412
1413 return ret;
1414 }
1415
1416 static int
find_ring_mfn(struct domain * d,gfn_t gfn,mfn_t * mfn)1417 find_ring_mfn(struct domain *d, gfn_t gfn, mfn_t *mfn)
1418 {
1419 struct page_info *page;
1420 p2m_type_t p2mt;
1421 int ret;
1422
1423 ret = check_get_page_from_gfn(d, gfn, false, &p2mt, &page);
1424 if ( unlikely(ret) )
1425 return ret;
1426
1427 *mfn = page_to_mfn(page);
1428
1429 switch ( p2mt )
1430 {
1431 case p2m_ram_rw:
1432 if ( !get_page_type(page, PGT_writable_page) )
1433 ret = -EINVAL;
1434 break;
1435
1436 #ifdef CONFIG_X86
1437 case p2m_ram_logdirty:
1438 ret = -EAGAIN;
1439 break;
1440 #endif
1441
1442 default:
1443 ret = -EINVAL;
1444 break;
1445 }
1446
1447 if ( unlikely(ret) )
1448 put_page(page);
1449
1450 return ret;
1451 }
1452
1453 static int
find_ring_mfns(struct domain * d,struct argo_ring_info * ring_info,const unsigned int npage,XEN_GUEST_HANDLE_PARAM (xen_argo_gfn_t)gfn_hnd,const unsigned int len)1454 find_ring_mfns(struct domain *d, struct argo_ring_info *ring_info,
1455 const unsigned int npage,
1456 XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd,
1457 const unsigned int len)
1458 {
1459 unsigned int i;
1460 int ret = 0;
1461 mfn_t *mfns;
1462 void **mfn_mapping;
1463
1464 ASSERT(LOCKING_Write_rings_L2(d));
1465
1466 if ( ring_info->mfns )
1467 {
1468 /* Ring already existed: drop the previous mapping. */
1469 argo_dprintk("argo: vm%u re-register existing ring "
1470 "(vm%u:%x vm%u) clears mapping\n",
1471 d->domain_id, ring_info->id.domain_id,
1472 ring_info->id.aport, ring_info->id.partner_id);
1473
1474 ring_remove_mfns(d, ring_info);
1475 ASSERT(!ring_info->mfns);
1476 }
1477
1478 mfns = xmalloc_array(mfn_t, npage);
1479 if ( !mfns )
1480 return -ENOMEM;
1481
1482 for ( i = 0; i < npage; i++ )
1483 mfns[i] = INVALID_MFN;
1484
1485 mfn_mapping = xzalloc_array(void *, npage);
1486 if ( !mfn_mapping )
1487 {
1488 xfree(mfns);
1489 return -ENOMEM;
1490 }
1491
1492 ring_info->mfns = mfns;
1493 ring_info->mfn_mapping = mfn_mapping;
1494
1495 for ( i = 0; i < npage; i++ )
1496 {
1497 mfn_t mfn;
1498 xen_argo_gfn_t argo_gfn;
1499
1500 ret = __copy_from_guest_offset(&argo_gfn, gfn_hnd, i, 1) ? -EFAULT : 0;
1501 if ( ret )
1502 break;
1503
1504 ret = find_ring_mfn(d, _gfn(argo_gfn), &mfn);
1505 if ( ret )
1506 {
1507 gprintk(XENLOG_ERR, "argo: vm%u: invalid gfn %"PRI_gfn" "
1508 "r:(vm%u:%x vm%u) %p %u/%u\n",
1509 d->domain_id, gfn_x(_gfn(argo_gfn)),
1510 ring_info->id.domain_id, ring_info->id.aport,
1511 ring_info->id.partner_id, ring_info, i, npage);
1512 break;
1513 }
1514
1515 ring_info->mfns[i] = mfn;
1516
1517 argo_dprintk("%u: %"PRI_gfn" -> %"PRI_mfn"\n",
1518 i, gfn_x(_gfn(argo_gfn)), mfn_x(ring_info->mfns[i]));
1519 }
1520
1521 ring_info->nmfns = i;
1522
1523 if ( ret )
1524 ring_remove_mfns(d, ring_info);
1525 else
1526 {
1527 ASSERT(ring_info->nmfns == NPAGES_RING(len));
1528
1529 argo_dprintk("argo: vm%u ring (vm%u:%x vm%u) %p "
1530 "mfn_mapping %p len %u nmfns %u\n",
1531 d->domain_id, ring_info->id.domain_id,
1532 ring_info->id.aport, ring_info->id.partner_id, ring_info,
1533 ring_info->mfn_mapping, ring_info->len, ring_info->nmfns);
1534 }
1535
1536 return ret;
1537 }
1538
1539 static long
unregister_ring(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_unregister_ring_t)unreg_hnd)1540 unregister_ring(struct domain *currd,
1541 XEN_GUEST_HANDLE_PARAM(xen_argo_unregister_ring_t) unreg_hnd)
1542 {
1543 xen_argo_unregister_ring_t unreg;
1544 struct argo_ring_id ring_id;
1545 struct argo_ring_info *ring_info = NULL;
1546 struct argo_send_info *send_info = NULL;
1547 struct domain *dst_d = NULL;
1548
1549 ASSERT(currd == current->domain);
1550
1551 if ( copy_from_guest(&unreg, unreg_hnd, 1) )
1552 return -EFAULT;
1553
1554 if ( unreg.pad )
1555 return -EINVAL;
1556
1557 ring_id.partner_id = unreg.partner_id;
1558 ring_id.aport = unreg.aport;
1559 ring_id.domain_id = currd->domain_id;
1560
1561 read_lock(&L1_global_argo_rwlock);
1562
1563 if ( unlikely(!currd->argo) )
1564 {
1565 read_unlock(&L1_global_argo_rwlock);
1566 return -ENODEV;
1567 }
1568
1569 write_lock(&currd->argo->rings_L2_rwlock);
1570
1571 ring_info = find_ring_info(currd, &ring_id);
1572 if ( !ring_info )
1573 goto out;
1574
1575 ring_remove_info(currd, ring_info);
1576 currd->argo->ring_count--;
1577
1578 if ( ring_id.partner_id == XEN_ARGO_DOMID_ANY )
1579 goto out;
1580
1581 dst_d = rcu_lock_domain_by_id(ring_id.partner_id);
1582 if ( !dst_d || !dst_d->argo )
1583 {
1584 ASSERT_UNREACHABLE();
1585 goto out;
1586 }
1587
1588 spin_lock(&dst_d->argo->send_L2_lock);
1589
1590 send_info = find_send_info(dst_d, &ring_id);
1591 if ( send_info )
1592 list_del(&send_info->node);
1593 else
1594 ASSERT_UNREACHABLE();
1595
1596 spin_unlock(&dst_d->argo->send_L2_lock);
1597
1598 out:
1599 write_unlock(&currd->argo->rings_L2_rwlock);
1600
1601 read_unlock(&L1_global_argo_rwlock);
1602
1603 if ( dst_d )
1604 rcu_unlock_domain(dst_d);
1605
1606 xfree(send_info);
1607
1608 if ( !ring_info )
1609 {
1610 argo_dprintk("unregister_ring: no ring_info found for ring(%u:%x %u)\n",
1611 ring_id.domain_id, ring_id.aport, ring_id.partner_id);
1612 return -ENOENT;
1613 }
1614
1615 return 0;
1616 }
1617
1618 static long
register_ring(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_register_ring_t)reg_hnd,XEN_GUEST_HANDLE_PARAM (xen_argo_gfn_t)gfn_hnd,unsigned int npage,unsigned int flags)1619 register_ring(struct domain *currd,
1620 XEN_GUEST_HANDLE_PARAM(xen_argo_register_ring_t) reg_hnd,
1621 XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd,
1622 unsigned int npage, unsigned int flags)
1623 {
1624 xen_argo_register_ring_t reg;
1625 struct argo_ring_id ring_id;
1626 void *map_ringp;
1627 xen_argo_ring_t *ringp;
1628 struct argo_ring_info *ring_info, *new_ring_info = NULL;
1629 struct argo_send_info *send_info = NULL;
1630 struct domain *dst_d = NULL;
1631 int ret = 0;
1632 unsigned int private_tx_ptr;
1633
1634 ASSERT(currd == current->domain);
1635
1636 /* flags: reserve currently-undefined bits, require zero. */
1637 if ( unlikely(flags & ~XEN_ARGO_REGISTER_FLAG_MASK) )
1638 return -EINVAL;
1639
1640 if ( copy_from_guest(®, reg_hnd, 1) )
1641 return -EFAULT;
1642
1643 /*
1644 * A ring must be large enough to transmit messages, so requires space for:
1645 * * 1 message header, plus
1646 * * 1 payload slot (payload is always rounded to a multiple of 16 bytes)
1647 * for the message payload to be written into, plus
1648 * * 1 more slot, so that the ring cannot be filled to capacity with a
1649 * single minimum-size message -- see the logic in ringbuf_insert --
1650 * allowing for this ensures that there can be space remaining when a
1651 * message is present.
1652 * The above determines the minimum acceptable ring size.
1653 */
1654 if ( (reg.len < (sizeof(struct xen_argo_ring_message_header)
1655 + ROUNDUP_MESSAGE(1) + ROUNDUP_MESSAGE(1))) ||
1656 (reg.len > XEN_ARGO_MAX_RING_SIZE) ||
1657 (reg.len != ROUNDUP_MESSAGE(reg.len)) ||
1658 (NPAGES_RING(reg.len) != npage) ||
1659 (reg.pad != 0) )
1660 return -EINVAL;
1661
1662 ring_id.partner_id = reg.partner_id;
1663 ring_id.aport = reg.aport;
1664 ring_id.domain_id = currd->domain_id;
1665
1666 if ( reg.partner_id == XEN_ARGO_DOMID_ANY )
1667 {
1668 ret = opt_argo_mac_permissive ? xsm_argo_register_any_source(currd) :
1669 -EPERM;
1670 if ( ret )
1671 return ret;
1672 }
1673 else
1674 {
1675 dst_d = rcu_lock_domain_by_id(reg.partner_id);
1676 if ( !dst_d )
1677 {
1678 argo_dprintk("!dst_d, ESRCH\n");
1679 return -ESRCH;
1680 }
1681
1682 ret = xsm_argo_register_single_source(currd, dst_d);
1683 if ( ret )
1684 goto out;
1685
1686 send_info = xzalloc(struct argo_send_info);
1687 if ( !send_info )
1688 {
1689 ret = -ENOMEM;
1690 goto out;
1691 }
1692 send_info->id = ring_id;
1693 }
1694
1695 /*
1696 * Common case is that the ring doesn't already exist, so do the alloc here
1697 * before picking up any locks.
1698 */
1699 new_ring_info = xzalloc(struct argo_ring_info);
1700 if ( !new_ring_info )
1701 {
1702 ret = -ENOMEM;
1703 goto out;
1704 }
1705
1706 read_lock(&L1_global_argo_rwlock);
1707
1708 if ( !currd->argo )
1709 {
1710 ret = -ENODEV;
1711 goto out_unlock;
1712 }
1713
1714 if ( dst_d && !dst_d->argo )
1715 {
1716 argo_dprintk("!dst_d->argo, ECONNREFUSED\n");
1717 ret = -ECONNREFUSED;
1718 goto out_unlock;
1719 }
1720
1721 write_lock(&currd->argo->rings_L2_rwlock);
1722
1723 if ( currd->argo->ring_count >= MAX_RINGS_PER_DOMAIN )
1724 {
1725 ret = -ENOSPC;
1726 goto out_unlock2;
1727 }
1728
1729 ring_info = find_ring_info(currd, &ring_id);
1730 if ( !ring_info )
1731 {
1732 ring_info = new_ring_info;
1733 new_ring_info = NULL;
1734
1735 spin_lock_init(&ring_info->L3_lock);
1736
1737 ring_info->id = ring_id;
1738 INIT_LIST_HEAD(&ring_info->pending);
1739
1740 list_add(&ring_info->node,
1741 &currd->argo->ring_hash[hash_index(&ring_info->id)]);
1742
1743 argo_dprintk("argo: vm%u registering ring (vm%u:%x vm%u)\n",
1744 currd->domain_id, ring_id.domain_id, ring_id.aport,
1745 ring_id.partner_id);
1746 }
1747 else if ( ring_info->len )
1748 {
1749 /*
1750 * If the caller specified that the ring must not already exist,
1751 * fail at attempt to add a completed ring which already exists.
1752 */
1753 if ( flags & XEN_ARGO_REGISTER_FLAG_FAIL_EXIST )
1754 {
1755 gprintk(XENLOG_ERR, "argo: vm%u disallowed reregistration of "
1756 "existing ring (vm%u:%x vm%u)\n",
1757 currd->domain_id, ring_id.domain_id, ring_id.aport,
1758 ring_id.partner_id);
1759 ret = -EEXIST;
1760 goto out_unlock2;
1761 }
1762
1763 if ( ring_info->len != reg.len )
1764 {
1765 /*
1766 * Change of ring size could result in entries on the pending
1767 * notifications list that will never trigger.
1768 * Simple blunt solution: disallow ring resize for now.
1769 * TODO: investigate enabling ring resize.
1770 */
1771 gprintk(XENLOG_ERR, "argo: vm%u attempted to change ring size "
1772 "(vm%u:%x vm%u)\n",
1773 currd->domain_id, ring_id.domain_id, ring_id.aport,
1774 ring_id.partner_id);
1775 /*
1776 * Could return EINVAL here, but if the ring didn't already
1777 * exist then the arguments would have been valid, so: EEXIST.
1778 */
1779 ret = -EEXIST;
1780 goto out_unlock2;
1781 }
1782
1783 argo_dprintk("argo: vm%u re-registering existing ring (vm%u:%x vm%u)\n",
1784 currd->domain_id, ring_id.domain_id, ring_id.aport,
1785 ring_id.partner_id);
1786 }
1787
1788 ret = find_ring_mfns(currd, ring_info, npage, gfn_hnd, reg.len);
1789 if ( ret )
1790 {
1791 gprintk(XENLOG_ERR,
1792 "argo: vm%u failed to find ring mfns (vm%u:%x vm%u)\n",
1793 currd->domain_id, ring_id.domain_id, ring_id.aport,
1794 ring_id.partner_id);
1795
1796 ring_remove_info(currd, ring_info);
1797 goto out_unlock2;
1798 }
1799
1800 /*
1801 * The first page of the memory supplied for the ring has the xen_argo_ring
1802 * structure at its head, which is where the ring indexes reside.
1803 */
1804 ret = ring_map_page(currd, ring_info, 0, &map_ringp);
1805 if ( ret )
1806 {
1807 gprintk(XENLOG_ERR,
1808 "argo: vm%u failed to map ring mfn 0 (vm%u:%x vm%u)\n",
1809 currd->domain_id, ring_id.domain_id, ring_id.aport,
1810 ring_id.partner_id);
1811
1812 ring_remove_info(currd, ring_info);
1813 goto out_unlock2;
1814 }
1815 ringp = map_ringp;
1816
1817 private_tx_ptr = read_atomic(&ringp->tx_ptr);
1818
1819 if ( (private_tx_ptr >= reg.len) ||
1820 (ROUNDUP_MESSAGE(private_tx_ptr) != private_tx_ptr) )
1821 {
1822 /*
1823 * Since the ring is a mess, attempt to flush the contents of it
1824 * here by setting the tx_ptr to the next aligned message slot past
1825 * the latest rx_ptr we have observed. Handle ring wrap correctly.
1826 */
1827 private_tx_ptr = ROUNDUP_MESSAGE(read_atomic(&ringp->rx_ptr));
1828
1829 if ( private_tx_ptr >= reg.len )
1830 private_tx_ptr = 0;
1831
1832 update_tx_ptr(currd, ring_info, private_tx_ptr);
1833 }
1834
1835 ring_info->tx_ptr = private_tx_ptr;
1836 ring_info->len = reg.len;
1837 currd->argo->ring_count++;
1838
1839 if ( send_info )
1840 {
1841 spin_lock(&dst_d->argo->send_L2_lock);
1842
1843 list_add(&send_info->node,
1844 &dst_d->argo->send_hash[hash_index(&send_info->id)]);
1845
1846 spin_unlock(&dst_d->argo->send_L2_lock);
1847 }
1848
1849 out_unlock2:
1850 write_unlock(&currd->argo->rings_L2_rwlock);
1851
1852 out_unlock:
1853 read_unlock(&L1_global_argo_rwlock);
1854
1855 out:
1856 if ( dst_d )
1857 rcu_unlock_domain(dst_d);
1858
1859 if ( ret )
1860 xfree(send_info);
1861
1862 xfree(new_ring_info);
1863
1864 return ret;
1865 }
1866
1867 static void
notify_ring(const struct domain * d,struct argo_ring_info * ring_info,struct list_head * to_notify)1868 notify_ring(const struct domain *d, struct argo_ring_info *ring_info,
1869 struct list_head *to_notify)
1870 {
1871 unsigned int space;
1872
1873 ASSERT(LOCKING_Read_rings_L2(d));
1874
1875 spin_lock(&ring_info->L3_lock);
1876
1877 if ( ring_info->len )
1878 space = ringbuf_payload_space(d, ring_info);
1879 else
1880 space = 0;
1881
1882 spin_unlock(&ring_info->L3_lock);
1883
1884 if ( space )
1885 pending_find(d, ring_info, space, to_notify);
1886 }
1887
1888 static void
notify_check_pending(struct domain * d)1889 notify_check_pending(struct domain *d)
1890 {
1891 unsigned int i;
1892 LIST_HEAD(to_notify);
1893
1894 ASSERT(LOCKING_Read_L1);
1895
1896 read_lock(&d->argo->rings_L2_rwlock);
1897
1898 /* Walk all rings, call notify_ring on each to populate to_notify list */
1899 for ( i = 0; i < ARGO_HASHTABLE_SIZE; i++ )
1900 {
1901 struct argo_ring_info *ring_info, *next;
1902 struct list_head *bucket = &d->argo->ring_hash[i];
1903
1904 list_for_each_entry_safe(ring_info, next, bucket, node)
1905 notify_ring(d, ring_info, &to_notify);
1906 }
1907
1908 read_unlock(&d->argo->rings_L2_rwlock);
1909
1910 if ( !list_empty(&to_notify) )
1911 pending_notify(&to_notify);
1912 }
1913
1914 static long
notify(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_ring_data_t)ring_data_hnd)1915 notify(struct domain *currd,
1916 XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd)
1917 {
1918 XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) ent_hnd;
1919 xen_argo_ring_data_t ring_data;
1920 int ret = 0;
1921
1922 ASSERT(currd == current->domain);
1923
1924 read_lock(&L1_global_argo_rwlock);
1925
1926 if ( !currd->argo )
1927 {
1928 argo_dprintk("!d->argo, ENODEV\n");
1929 ret = -ENODEV;
1930 goto out;
1931 }
1932
1933 notify_check_pending(currd);
1934
1935 if ( guest_handle_is_null(ring_data_hnd) )
1936 goto out;
1937
1938 ret = copy_from_guest(&ring_data, ring_data_hnd, 1) ? -EFAULT : 0;
1939 if ( ret )
1940 goto out;
1941
1942 if ( ring_data.nent > MAX_NOTIFY_COUNT )
1943 {
1944 gprintk(XENLOG_ERR, "argo: notify entry count(%u) exceeds max(%u)\n",
1945 ring_data.nent, MAX_NOTIFY_COUNT);
1946 ret = -EACCES;
1947 goto out;
1948 }
1949
1950 ent_hnd = guest_handle_for_field(ring_data_hnd,
1951 xen_argo_ring_data_ent_t, data[0]);
1952 if ( unlikely(!guest_handle_okay(ent_hnd, ring_data.nent)) )
1953 {
1954 ret = -EFAULT;
1955 goto out;
1956 }
1957
1958 while ( !ret && ring_data.nent-- )
1959 {
1960 ret = fill_ring_data(currd, ent_hnd);
1961 guest_handle_add_offset(ent_hnd, 1);
1962 }
1963
1964 out:
1965 read_unlock(&L1_global_argo_rwlock);
1966
1967 return ret;
1968 }
1969
1970 static long
sendv(struct domain * src_d,xen_argo_addr_t * src_addr,const xen_argo_addr_t * dst_addr,xen_argo_iov_t * iovs,unsigned int niov,uint32_t message_type)1971 sendv(struct domain *src_d, xen_argo_addr_t *src_addr,
1972 const xen_argo_addr_t *dst_addr, xen_argo_iov_t *iovs, unsigned int niov,
1973 uint32_t message_type)
1974 {
1975 struct domain *dst_d = NULL;
1976 struct argo_ring_id src_id;
1977 struct argo_ring_info *ring_info;
1978 int ret = 0;
1979 unsigned int len = 0;
1980
1981 argo_dprintk("sendv: (%u:%x)->(%u:%x) niov:%u type:%x\n",
1982 src_addr->domain_id, src_addr->aport, dst_addr->domain_id,
1983 dst_addr->aport, niov, message_type);
1984
1985 /* Check padding is zeroed. */
1986 if ( unlikely(src_addr->pad || dst_addr->pad) )
1987 return -EINVAL;
1988
1989 if ( src_addr->domain_id == XEN_ARGO_DOMID_ANY )
1990 src_addr->domain_id = src_d->domain_id;
1991
1992 /* No domain is currently authorized to send on behalf of another */
1993 if ( unlikely(src_addr->domain_id != src_d->domain_id) )
1994 return -EPERM;
1995
1996 src_id.aport = src_addr->aport;
1997 src_id.domain_id = src_d->domain_id;
1998 src_id.partner_id = dst_addr->domain_id;
1999
2000 dst_d = rcu_lock_domain_by_id(dst_addr->domain_id);
2001 if ( !dst_d )
2002 return -ESRCH;
2003
2004 ret = xsm_argo_send(src_d, dst_d);
2005 if ( ret )
2006 {
2007 gprintk(XENLOG_ERR, "argo: XSM REJECTED %i -> %i\n",
2008 src_d->domain_id, dst_d->domain_id);
2009
2010 rcu_unlock_domain(dst_d);
2011
2012 return ret;
2013 }
2014
2015 read_lock(&L1_global_argo_rwlock);
2016
2017 if ( !src_d->argo )
2018 {
2019 ret = -ENODEV;
2020 goto out_unlock;
2021 }
2022
2023 if ( !dst_d->argo )
2024 {
2025 argo_dprintk("!dst_d->argo, ECONNREFUSED\n");
2026 ret = -ECONNREFUSED;
2027 goto out_unlock;
2028 }
2029
2030 read_lock(&dst_d->argo->rings_L2_rwlock);
2031
2032 ring_info = find_ring_info_by_match(dst_d, dst_addr->aport,
2033 src_id.domain_id);
2034 if ( !ring_info )
2035 {
2036 gprintk(XENLOG_ERR,
2037 "argo: vm%u connection refused, src (vm%u:%x) dst (vm%u:%x)\n",
2038 current->domain->domain_id, src_id.domain_id, src_id.aport,
2039 dst_addr->domain_id, dst_addr->aport);
2040
2041 ret = -ECONNREFUSED;
2042 }
2043 else
2044 {
2045 spin_lock(&ring_info->L3_lock);
2046
2047 /*
2048 * Obtain the total size of data to transmit -- sets the 'len' variable
2049 * -- and sanity check that the iovs conform to size and number limits.
2050 */
2051 ret = iov_count(iovs, niov, &len);
2052 if ( !ret )
2053 {
2054 ret = ringbuf_insert(dst_d, ring_info, &src_id, iovs, niov,
2055 message_type, len);
2056 if ( ret == -EAGAIN )
2057 {
2058 int rc;
2059
2060 argo_dprintk("argo_ringbuf_sendv failed, EAGAIN\n");
2061 /* requeue to issue a notification when space is there */
2062 rc = pending_requeue(dst_d, ring_info, src_id.domain_id, len);
2063 if ( rc )
2064 ret = rc;
2065 }
2066 }
2067
2068 spin_unlock(&ring_info->L3_lock);
2069 }
2070
2071 read_unlock(&dst_d->argo->rings_L2_rwlock);
2072
2073 out_unlock:
2074 read_unlock(&L1_global_argo_rwlock);
2075
2076 if ( ret >= 0 )
2077 signal_domain(dst_d);
2078
2079 if ( dst_d )
2080 rcu_unlock_domain(dst_d);
2081
2082 return ( ret < 0 ) ? ret : len;
2083 }
2084
2085 long
do_argo_op(unsigned int cmd,XEN_GUEST_HANDLE_PARAM (void)arg1,XEN_GUEST_HANDLE_PARAM (void)arg2,unsigned long raw_arg3,unsigned long raw_arg4)2086 do_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1,
2087 XEN_GUEST_HANDLE_PARAM(void) arg2, unsigned long raw_arg3,
2088 unsigned long raw_arg4)
2089 {
2090 struct domain *currd = current->domain;
2091 long rc;
2092 unsigned int arg3 = raw_arg3, arg4 = raw_arg4;
2093
2094 argo_dprintk("->do_argo_op(%u,%p,%p,%lu,0x%lx)\n", cmd,
2095 (void *)arg1.p, (void *)arg2.p, raw_arg3, raw_arg4);
2096
2097 /* Reject numeric hypercall args outside 32-bit range */
2098 if ( (arg3 != raw_arg3) || (arg4 != raw_arg4) )
2099 return -EINVAL;
2100
2101 if ( unlikely(!opt_argo) )
2102 return -EOPNOTSUPP;
2103
2104 rc = xsm_argo_enable(currd);
2105 if ( rc )
2106 return rc;
2107
2108 switch ( cmd )
2109 {
2110 case XEN_ARGO_OP_register_ring:
2111 {
2112 XEN_GUEST_HANDLE_PARAM(xen_argo_register_ring_t) reg_hnd =
2113 guest_handle_cast(arg1, xen_argo_register_ring_t);
2114 XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd =
2115 guest_handle_cast(arg2, xen_argo_gfn_t);
2116 /* arg3: npage, arg4: flags */
2117
2118 BUILD_BUG_ON(!IS_ALIGNED(XEN_ARGO_MAX_RING_SIZE, PAGE_SIZE));
2119
2120 if ( unlikely(arg3 > (XEN_ARGO_MAX_RING_SIZE >> PAGE_SHIFT)) )
2121 {
2122 rc = -EINVAL;
2123 break;
2124 }
2125
2126 /* Check array to allow use of the faster __copy operations later */
2127 if ( unlikely(!guest_handle_okay(gfn_hnd, arg3)) )
2128 {
2129 rc = -EFAULT;
2130 break;
2131 }
2132
2133 rc = register_ring(currd, reg_hnd, gfn_hnd, arg3, arg4);
2134 break;
2135 }
2136
2137 case XEN_ARGO_OP_unregister_ring:
2138 {
2139 XEN_GUEST_HANDLE_PARAM(xen_argo_unregister_ring_t) unreg_hnd =
2140 guest_handle_cast(arg1, xen_argo_unregister_ring_t);
2141
2142 if ( unlikely((!guest_handle_is_null(arg2)) || arg3 || arg4) )
2143 {
2144 rc = -EINVAL;
2145 break;
2146 }
2147
2148 rc = unregister_ring(currd, unreg_hnd);
2149 break;
2150 }
2151
2152 case XEN_ARGO_OP_sendv:
2153 {
2154 xen_argo_send_addr_t send_addr;
2155 xen_argo_iov_t iovs[XEN_ARGO_MAXIOV];
2156 unsigned int niov;
2157
2158 XEN_GUEST_HANDLE_PARAM(xen_argo_send_addr_t) send_addr_hnd =
2159 guest_handle_cast(arg1, xen_argo_send_addr_t);
2160 XEN_GUEST_HANDLE_PARAM(xen_argo_iov_t) iovs_hnd =
2161 guest_handle_cast(arg2, xen_argo_iov_t);
2162 /* arg3 is niov */
2163 /* arg4 is message_type. Must be a 32-bit value. */
2164
2165 /* XEN_ARGO_MAXIOV value determines size of iov array on stack */
2166 BUILD_BUG_ON(XEN_ARGO_MAXIOV > 8);
2167
2168 rc = copy_from_guest(&send_addr, send_addr_hnd, 1) ? -EFAULT : 0;
2169 if ( rc )
2170 {
2171 rc = -EFAULT;
2172 break;
2173 }
2174
2175 /*
2176 * Reject niov above maximum limit or message_types that are outside
2177 * 32 bit range.
2178 */
2179 if ( unlikely((arg3 > XEN_ARGO_MAXIOV) || (arg4 != (uint32_t)arg4)) )
2180 {
2181 rc = -EINVAL;
2182 break;
2183 }
2184 niov = array_index_nospec(arg3, XEN_ARGO_MAXIOV + 1);
2185
2186 rc = copy_from_guest(iovs, iovs_hnd, niov) ? -EFAULT : 0;
2187 if ( rc )
2188 {
2189 rc = -EFAULT;
2190 break;
2191 }
2192
2193 rc = sendv(currd, &send_addr.src, &send_addr.dst, iovs, niov, arg4);
2194 break;
2195 }
2196
2197 case XEN_ARGO_OP_notify:
2198 {
2199 XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd =
2200 guest_handle_cast(arg1, xen_argo_ring_data_t);
2201
2202 if ( unlikely((!guest_handle_is_null(arg2)) || arg3 || arg4) )
2203 {
2204 rc = -EINVAL;
2205 break;
2206 }
2207
2208 rc = notify(currd, ring_data_hnd);
2209 break;
2210 }
2211
2212 default:
2213 rc = -EOPNOTSUPP;
2214 break;
2215 }
2216
2217 argo_dprintk("<-do_argo_op(%u)=%ld\n", cmd, rc);
2218
2219 return rc;
2220 }
2221
2222 #ifdef CONFIG_COMPAT
2223 int
compat_argo_op(unsigned int cmd,XEN_GUEST_HANDLE_PARAM (void)arg1,XEN_GUEST_HANDLE_PARAM (void)arg2,unsigned long arg3,unsigned long arg4)2224 compat_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1,
2225 XEN_GUEST_HANDLE_PARAM(void) arg2, unsigned long arg3,
2226 unsigned long arg4)
2227 {
2228 struct domain *currd = current->domain;
2229 int rc;
2230 xen_argo_send_addr_t send_addr;
2231 xen_argo_iov_t iovs[XEN_ARGO_MAXIOV];
2232 compat_argo_iov_t compat_iovs[XEN_ARGO_MAXIOV];
2233 unsigned int i, niov;
2234 XEN_GUEST_HANDLE_PARAM(xen_argo_send_addr_t) send_addr_hnd;
2235
2236 /* check XEN_ARGO_MAXIOV as it sizes stack arrays: iovs, compat_iovs */
2237 BUILD_BUG_ON(XEN_ARGO_MAXIOV > 8);
2238
2239 /* Forward all ops besides sendv to the native handler. */
2240 if ( cmd != XEN_ARGO_OP_sendv )
2241 return do_argo_op(cmd, arg1, arg2, arg3, arg4);
2242
2243 if ( unlikely(!opt_argo) )
2244 return -EOPNOTSUPP;
2245
2246 rc = xsm_argo_enable(currd);
2247 if ( rc )
2248 return rc;
2249
2250 argo_dprintk("->compat_argo_op(%u,%p,%p,%lu,0x%lx)\n", cmd,
2251 (void *)arg1.p, (void *)arg2.p, arg3, arg4);
2252
2253 send_addr_hnd = guest_handle_cast(arg1, xen_argo_send_addr_t);
2254 /* arg2: iovs, arg3: niov, arg4: message_type */
2255
2256 rc = copy_from_guest(&send_addr, send_addr_hnd, 1) ? -EFAULT : 0;
2257 if ( rc )
2258 goto out;
2259
2260 if ( unlikely(arg3 > XEN_ARGO_MAXIOV) )
2261 {
2262 rc = -EINVAL;
2263 goto out;
2264 }
2265 niov = array_index_nospec(arg3, XEN_ARGO_MAXIOV + 1);
2266
2267 rc = copy_from_guest(compat_iovs, arg2, niov) ? -EFAULT : 0;
2268 if ( rc )
2269 goto out;
2270
2271 for ( i = 0; i < niov; i++ )
2272 {
2273 #define XLAT_argo_iov_HNDL_iov_hnd(_d_, _s_) \
2274 guest_from_compat_handle((_d_)->iov_hnd, (_s_)->iov_hnd)
2275
2276 XLAT_argo_iov(&iovs[i], &compat_iovs[i]);
2277
2278 #undef XLAT_argo_iov_HNDL_iov_hnd
2279 }
2280
2281 rc = sendv(currd, &send_addr.src, &send_addr.dst, iovs, niov, arg4);
2282 out:
2283 argo_dprintk("<-compat_argo_op(%u)=%d\n", cmd, rc);
2284
2285 return rc;
2286 }
2287 #endif
2288
2289 static void
argo_domain_init(struct argo_domain * argo)2290 argo_domain_init(struct argo_domain *argo)
2291 {
2292 unsigned int i;
2293
2294 rwlock_init(&argo->rings_L2_rwlock);
2295 spin_lock_init(&argo->send_L2_lock);
2296 spin_lock_init(&argo->wildcard_L2_lock);
2297
2298 for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
2299 {
2300 INIT_LIST_HEAD(&argo->ring_hash[i]);
2301 INIT_LIST_HEAD(&argo->send_hash[i]);
2302 }
2303 INIT_LIST_HEAD(&argo->wildcard_pend_list);
2304 }
2305
2306 int
argo_init(struct domain * d)2307 argo_init(struct domain *d)
2308 {
2309 struct argo_domain *argo;
2310
2311 if ( !opt_argo || xsm_argo_enable(d) )
2312 {
2313 argo_dprintk("argo disabled, domid: %u\n", d->domain_id);
2314 return 0;
2315 }
2316
2317 argo_dprintk("init: domid: %u\n", d->domain_id);
2318
2319 argo = xzalloc(struct argo_domain);
2320 if ( !argo )
2321 return -ENOMEM;
2322
2323 argo_domain_init(argo);
2324
2325 write_lock(&L1_global_argo_rwlock);
2326
2327 d->argo = argo;
2328
2329 write_unlock(&L1_global_argo_rwlock);
2330
2331 return 0;
2332 }
2333
2334 void
argo_destroy(struct domain * d)2335 argo_destroy(struct domain *d)
2336 {
2337 BUG_ON(!d->is_dying);
2338
2339 write_lock(&L1_global_argo_rwlock);
2340
2341 argo_dprintk("destroy: domid %u d->argo=%p\n", d->domain_id, d->argo);
2342
2343 if ( d->argo )
2344 {
2345 domain_rings_remove_all(d);
2346 partner_rings_remove(d);
2347 wildcard_rings_pending_remove(d);
2348 XFREE(d->argo);
2349 }
2350
2351 write_unlock(&L1_global_argo_rwlock);
2352 }
2353
2354 void
argo_soft_reset(struct domain * d)2355 argo_soft_reset(struct domain *d)
2356 {
2357 write_lock(&L1_global_argo_rwlock);
2358
2359 argo_dprintk("soft reset d=%u d->argo=%p\n", d->domain_id, d->argo);
2360
2361 if ( d->argo )
2362 {
2363 domain_rings_remove_all(d);
2364 partner_rings_remove(d);
2365 wildcard_rings_pending_remove(d);
2366
2367 /*
2368 * Since neither opt_argo or xsm_argo_enable(d) can change at runtime,
2369 * if d->argo is true then both opt_argo and xsm_argo_enable(d) must be
2370 * true, and we can assume that init is allowed to proceed again here.
2371 */
2372 argo_domain_init(d->argo);
2373 }
2374
2375 write_unlock(&L1_global_argo_rwlock);
2376 }
2377