1 /******************************************************************************
2 * Argo : Hypervisor-Mediated data eXchange
3 *
4 * Derived from v4v, the version 2 of v2v.
5 *
6 * Copyright (c) 2010, Citrix Systems
7 * Copyright (c) 2018-2019 BAE Systems
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include <xen/argo.h>
19 #include <xen/domain.h>
20 #include <xen/domain_page.h>
21 #include <xen/errno.h>
22 #include <xen/event.h>
23 #include <xen/guest_access.h>
24 #include <xen/lib.h>
25 #include <xen/nospec.h>
26 #include <xen/param.h>
27 #include <xen/sched.h>
28 #include <xen/time.h>
29
30 #include <xsm/xsm.h>
31
32 #include <asm/p2m.h>
33
34 #include <public/argo.h>
35
36 #ifdef CONFIG_COMPAT
37 #include <compat/argo.h>
38 CHECK_argo_addr;
39 #undef CHECK_argo_addr
40 #define CHECK_argo_addr struct xen_argo_addr
41 CHECK_argo_register_ring;
42 CHECK_argo_ring;
43 CHECK_argo_ring_data_ent;
44 #undef CHECK_argo_ring_data_ent
45 #define CHECK_argo_ring_data_ent struct xen_argo_ring_data_ent
46 CHECK_argo_ring_data;
47 CHECK_argo_ring_message_header;
48 CHECK_argo_unregister_ring;
49 CHECK_argo_send_addr;
50 #endif
51
52 #define MAX_RINGS_PER_DOMAIN 128U
53 #define MAX_NOTIFY_COUNT 256U
54 #define MAX_PENDING_PER_RING 32U
55
56 /* All messages on the ring are padded to a multiple of the slot size. */
57 #define ROUNDUP_MESSAGE(a) ROUNDUP((a), XEN_ARGO_MSG_SLOT_SIZE)
58
59 /* The maximum size of a message that may be sent on the largest Argo ring. */
60 #define MAX_ARGO_MESSAGE_SIZE ((XEN_ARGO_MAX_RING_SIZE) - \
61 (sizeof(struct xen_argo_ring_message_header)) - ROUNDUP_MESSAGE(1))
62
63 /* Number of PAGEs needed to hold a ring of a given size in bytes */
64 #define NPAGES_RING(ring_len) \
65 (ROUNDUP((ROUNDUP_MESSAGE(ring_len) + sizeof(xen_argo_ring_t)), PAGE_SIZE) \
66 >> PAGE_SHIFT)
67
68 DEFINE_XEN_GUEST_HANDLE(xen_argo_addr_t);
69 DEFINE_XEN_GUEST_HANDLE(xen_argo_gfn_t);
70 DEFINE_XEN_GUEST_HANDLE(xen_argo_iov_t);
71 DEFINE_XEN_GUEST_HANDLE(xen_argo_register_ring_t);
72 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_t);
73 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_t);
74 DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t);
75 DEFINE_XEN_GUEST_HANDLE(xen_argo_send_addr_t);
76 DEFINE_XEN_GUEST_HANDLE(xen_argo_unregister_ring_t);
77 #ifdef CONFIG_COMPAT
78 DEFINE_COMPAT_HANDLE(compat_argo_iov_t);
79 #endif
80
81 static bool __read_mostly opt_argo;
82 static bool __read_mostly opt_argo_mac_permissive;
83
parse_argo(const char * s)84 static int __init cf_check parse_argo(const char *s)
85 {
86 const char *ss;
87 int val, rc = 0;
88
89 do {
90 ss = strchr(s, ',');
91 if ( !ss )
92 ss = strchr(s, '\0');
93
94 if ( (val = parse_bool(s, ss)) >= 0 )
95 opt_argo = val;
96 else if ( (val = parse_boolean("mac-permissive", s, ss)) >= 0 )
97 opt_argo_mac_permissive = val;
98 else
99 rc = -EINVAL;
100
101 s = ss + 1;
102 } while ( *ss );
103
104 return rc;
105 }
106 custom_param("argo", parse_argo);
107
108 typedef struct argo_ring_id
109 {
110 xen_argo_port_t aport;
111 domid_t partner_id;
112 domid_t domain_id;
113 } argo_ring_id;
114
115 /* Data about a domain's own ring that it has registered */
116 struct argo_ring_info
117 {
118 /* next node in the hash, protected by rings_L2 */
119 struct list_head node;
120 /* this ring's id, protected by rings_L2 */
121 struct argo_ring_id id;
122 /* L3, the ring_info lock: protects the members of this struct below */
123 spinlock_t L3_lock;
124 /* length of the ring, protected by L3 */
125 unsigned int len;
126 /* number of pages translated into mfns, protected by L3 */
127 unsigned int nmfns;
128 /* cached tx pointer location, protected by L3 */
129 unsigned int tx_ptr;
130 /* mapped ring pages protected by L3 */
131 void **mfn_mapping;
132 /* list of mfns of guest ring, protected by L3 */
133 mfn_t *mfns;
134 /* list of struct pending_ent for this ring, protected by L3 */
135 struct list_head pending;
136 /* number of pending entries queued for this ring, protected by L3 */
137 unsigned int npending;
138 };
139
140 /* Data about a single-sender ring, held by the sender (partner) domain */
141 struct argo_send_info
142 {
143 /* next node in the hash, protected by send_L2 */
144 struct list_head node;
145 /* this ring's id, protected by send_L2 */
146 struct argo_ring_id id;
147 };
148
149 /* A space-available notification that is awaiting sufficient space */
150 struct pending_ent
151 {
152 /* List node within argo_ring_info's pending list */
153 struct list_head node;
154 /*
155 * List node within argo_domain's wildcard_pend_list. Only used if the
156 * ring is one with a wildcard partner (ie. that any domain may send to)
157 * to enable cancelling signals on wildcard rings on domain destroy.
158 */
159 struct list_head wildcard_node;
160 /*
161 * Pointer to the ring_info that this ent pertains to. Used to ensure that
162 * ring_info->npending is decremented when ents for wildcard rings are
163 * cancelled for domain destroy.
164 * Caution: Must hold the correct locks before accessing ring_info via this.
165 */
166 struct argo_ring_info *ring_info;
167 /* minimum ring space available that this signal is waiting upon */
168 unsigned int len;
169 /* domain to be notified when space is available */
170 domid_t domain_id;
171 };
172
173 /*
174 * The value of the argo element in a struct domain is
175 * protected by L1_global_argo_rwlock
176 */
177 #define ARGO_HASHTABLE_SIZE 32
178 struct argo_domain
179 {
180 /* rings_L2 */
181 rwlock_t rings_L2_rwlock;
182 /*
183 * Hash table of argo_ring_info about rings this domain has registered.
184 * Protected by rings_L2.
185 */
186 struct list_head ring_hash[ARGO_HASHTABLE_SIZE];
187 /* Counter of rings registered by this domain. Protected by rings_L2. */
188 unsigned int ring_count;
189
190 /* send_L2 */
191 spinlock_t send_L2_lock;
192 /*
193 * Hash table of argo_send_info about rings other domains have registered
194 * for this domain to send to. Single partner, non-wildcard rings.
195 * Protected by send_L2.
196 */
197 struct list_head send_hash[ARGO_HASHTABLE_SIZE];
198
199 /* wildcard_L2 */
200 spinlock_t wildcard_L2_lock;
201 /*
202 * List of pending space-available signals for this domain about wildcard
203 * rings registered by other domains. Protected by wildcard_L2.
204 */
205 struct list_head wildcard_pend_list;
206 };
207
208 /*
209 * Locking is organized as follows:
210 *
211 * Terminology: R(<lock>) means taking a read lock on the specified lock;
212 * W(<lock>) means taking a write lock on it.
213 *
214 * == L1 : The global read/write lock: L1_global_argo_rwlock
215 * Protects the argo elements of all struct domain *d in the system.
216 *
217 * R(L1) does not protect any of the elements of d->argo; it protects their
218 * addresses. W(L1) protects those and more since it implies W on all the lower
219 * level locks - see the notes on those locks below.
220 *
221 * The destruction of an argo-enabled domain, which must have a non-NULL d->argo
222 * pointer, will need to free that d->argo pointer, which requires W(L1).
223 * Since holding R(L1) will block acquiring W(L1), it will ensure that
224 * no domains pointers that argo is interested in become invalid while either
225 * W(L1) or R(L1) are held.
226 */
227
228 static DEFINE_RWLOCK(L1_global_argo_rwlock); /* L1 */
229
230 /*
231 * == rings_L2 : The per-domain ring hash lock: d->argo->rings_L2_rwlock
232 *
233 * Holding a read lock on rings_L2 protects the ring hash table and
234 * the elements in the hash_table d->argo->ring_hash, and
235 * the node and id fields in struct argo_ring_info in the
236 * hash table.
237 * Holding a write lock on rings_L2 protects all of the elements of all the
238 * struct argo_ring_info belonging to this domain.
239 *
240 * To take rings_L2 you must already have R(L1). W(L1) implies W(rings_L2) and
241 * L3.
242 *
243 * == L3 : The individual ring_info lock: ring_info->L3_lock
244 *
245 * Protects all the fields within the argo_ring_info, aside from the ones that
246 * rings_L2 already protects: node, id, lock.
247 *
248 * To acquire L3 you must already have R(rings_L2). W(rings_L2) implies L3.
249 *
250 * == send_L2 : The per-domain single-sender partner rings lock:
251 * d->argo->send_L2_lock
252 *
253 * Protects the per-domain send hash table : d->argo->send_hash
254 * and the elements in the hash table, and the node and id fields
255 * in struct argo_send_info in the hash table.
256 *
257 * To take send_L2, you must already have R(L1). W(L1) implies send_L2.
258 * Do not attempt to acquire a rings_L2 on any domain after taking and while
259 * holding a send_L2 lock -- acquire the rings_L2 (if one is needed) beforehand.
260 *
261 * == wildcard_L2 : The per-domain wildcard pending list lock:
262 * d->argo->wildcard_L2_lock
263 *
264 * Protects the per-domain list of outstanding signals for space availability
265 * on wildcard rings.
266 *
267 * To take wildcard_L2, you must already have R(L1). W(L1) implies wildcard_L2.
268 * No other locks are acquired after obtaining wildcard_L2.
269 */
270
271 /*
272 * Lock state validations macros
273 *
274 * These macros encode the logic to verify that the locking has adhered to the
275 * locking discipline above.
276 * eg. On entry to logic that requires holding at least R(rings_L2), this:
277 * ASSERT(LOCKING_Read_rings_L2(d));
278 *
279 * checks that the lock state is sufficient, validating that one of the
280 * following must be true when executed: R(rings_L2) && R(L1)
281 * or: W(rings_L2) && R(L1)
282 * or: W(L1)
283 *
284 * The LOCKING macros defined below here are for use at verification points.
285 */
286 #define LOCKING_Write_L1 (rw_is_write_locked(&L1_global_argo_rwlock))
287 /*
288 * While LOCKING_Read_L1 will return true even if the lock is write-locked,
289 * that's OK because everywhere that a Read lock is needed with these macros,
290 * holding a Write lock there instead is OK too: we're checking that _at least_
291 * the specified level of locks are held.
292 */
293 #define LOCKING_Read_L1 (rw_is_locked(&L1_global_argo_rwlock))
294
295 #define LOCKING_Write_rings_L2(d) \
296 ((LOCKING_Read_L1 && rw_is_write_locked(&(d)->argo->rings_L2_rwlock)) || \
297 LOCKING_Write_L1)
298 /*
299 * Skip checking LOCKING_Write_rings_L2(d) within this LOCKING_Read_rings_L2
300 * definition because the first clause that is testing R(L1) && R(L2) will also
301 * return true if R(L1) && W(L2) is true, because of the way that rw_is_locked
302 * behaves. This results in a slightly shorter and faster implementation.
303 */
304 #define LOCKING_Read_rings_L2(d) \
305 ((LOCKING_Read_L1 && rw_is_locked(&(d)->argo->rings_L2_rwlock)) || \
306 LOCKING_Write_L1)
307 /*
308 * Skip checking LOCKING_Write_L1 within this LOCKING_L3 definition because
309 * LOCKING_Write_rings_L2(d) will return true for that condition.
310 */
311 #define LOCKING_L3(d, r) \
312 ((LOCKING_Read_L1 && rw_is_locked(&(d)->argo->rings_L2_rwlock) \
313 && spin_is_locked(&(r)->L3_lock)) || LOCKING_Write_rings_L2(d))
314
315 #define LOCKING_send_L2(d) \
316 ((LOCKING_Read_L1 && spin_is_locked(&(d)->argo->send_L2_lock)) || \
317 LOCKING_Write_L1)
318
319 #define ARGO_DEBUG 0
320 #define argo_dprintk(fmt, args...) \
321 do { \
322 if ( ARGO_DEBUG ) \
323 printk(XENLOG_DEBUG "argo: " fmt, ##args); \
324 } while ( 0 )
325
326 /*
327 * This hash function is used to distribute rings within the per-domain
328 * hash tables (d->argo->ring_hash and d->argo_send_hash). The hash table
329 * will provide a struct if a match is found with a 'argo_ring_id' key:
330 * ie. the key is a (domain id, argo port, partner domain id) tuple.
331 * The algorithm approximates the string hashing function 'djb2'.
332 */
333 static unsigned int
hash_index(const struct argo_ring_id * id)334 hash_index(const struct argo_ring_id *id)
335 {
336 unsigned int hash = 5381; /* prime constant from djb2 */
337
338 /* For each input: hash = hash * 33 + <new input character value> */
339 hash = ((hash << 5) + hash) + (id->aport & 0xff);
340 hash = ((hash << 5) + hash) + ((id->aport >> 8) & 0xff);
341 hash = ((hash << 5) + hash) + ((id->aport >> 16) & 0xff);
342 hash = ((hash << 5) + hash) + ((id->aport >> 24) & 0xff);
343 hash = ((hash << 5) + hash) + (id->domain_id & 0xff);
344 hash = ((hash << 5) + hash) + ((id->domain_id >> 8) & 0xff);
345 hash = ((hash << 5) + hash) + (id->partner_id & 0xff);
346 hash = ((hash << 5) + hash) + ((id->partner_id >> 8) & 0xff);
347
348 /*
349 * Since ARGO_HASHTABLE_SIZE is small, use higher-order bits of the
350 * hash to contribute to the lower-order bits before masking off.
351 */
352 return (hash ^ (hash >> 15)) & (ARGO_HASHTABLE_SIZE - 1);
353 }
354
355 static struct argo_ring_info *
find_ring_info(const struct domain * d,const struct argo_ring_id * id)356 find_ring_info(const struct domain *d, const struct argo_ring_id *id)
357 {
358 struct argo_ring_info *ring_info;
359 const struct list_head *bucket;
360
361 ASSERT(LOCKING_Read_rings_L2(d));
362
363 /* List is not modified here. Search and return the match if found. */
364 bucket = &d->argo->ring_hash[hash_index(id)];
365
366 list_for_each_entry(ring_info, bucket, node)
367 {
368 const struct argo_ring_id *cmpid = &ring_info->id;
369
370 if ( cmpid->aport == id->aport &&
371 cmpid->domain_id == id->domain_id &&
372 cmpid->partner_id == id->partner_id )
373 {
374 argo_dprintk("found ring_info for ring(%u:%x %u)\n",
375 id->domain_id, id->aport, id->partner_id);
376 return ring_info;
377 }
378 }
379 argo_dprintk("no ring_info for ring(%u:%x %u)\n",
380 id->domain_id, id->aport, id->partner_id);
381
382 return NULL;
383 }
384
385 static struct argo_ring_info *
find_ring_info_by_match(const struct domain * d,xen_argo_port_t aport,domid_t partner_id)386 find_ring_info_by_match(const struct domain *d, xen_argo_port_t aport,
387 domid_t partner_id)
388 {
389 struct argo_ring_id id;
390 struct argo_ring_info *ring_info;
391
392 ASSERT(LOCKING_Read_rings_L2(d));
393
394 id.aport = aport;
395 id.domain_id = d->domain_id;
396 id.partner_id = partner_id;
397
398 ring_info = find_ring_info(d, &id);
399 if ( ring_info )
400 return ring_info;
401
402 id.partner_id = XEN_ARGO_DOMID_ANY;
403
404 return find_ring_info(d, &id);
405 }
406
407 static struct argo_send_info *
find_send_info(const struct domain * d,const struct argo_ring_id * id)408 find_send_info(const struct domain *d, const struct argo_ring_id *id)
409 {
410 struct argo_send_info *send_info;
411 const struct list_head *bucket;
412
413 ASSERT(LOCKING_send_L2(d));
414
415 /* List is not modified here. Search and return the match if found. */
416 bucket = &d->argo->send_hash[hash_index(id)];
417
418 list_for_each_entry(send_info, bucket, node)
419 {
420 const struct argo_ring_id *cmpid = &send_info->id;
421
422 if ( cmpid->aport == id->aport &&
423 cmpid->domain_id == id->domain_id &&
424 cmpid->partner_id == id->partner_id )
425 {
426 argo_dprintk("found send_info for ring(%u:%x %u)\n",
427 id->domain_id, id->aport, id->partner_id);
428 return send_info;
429 }
430 }
431 argo_dprintk("no send_info for ring(%u:%x %u)\n",
432 id->domain_id, id->aport, id->partner_id);
433
434 return NULL;
435 }
436
437 static void
signal_domain(struct domain * d)438 signal_domain(struct domain *d)
439 {
440 argo_dprintk("signalling domid:%u\n", d->domain_id);
441
442 send_guest_global_virq(d, VIRQ_ARGO);
443 }
444
445 static void
signal_domid(domid_t domain_id)446 signal_domid(domid_t domain_id)
447 {
448 struct domain *d = rcu_lock_domain_by_id(domain_id);
449
450 if ( !d )
451 return;
452
453 signal_domain(d);
454 rcu_unlock_domain(d);
455 }
456
457 static void
ring_unmap(const struct domain * d,struct argo_ring_info * ring_info)458 ring_unmap(const struct domain *d, struct argo_ring_info *ring_info)
459 {
460 unsigned int i;
461
462 ASSERT(LOCKING_L3(d, ring_info));
463
464 if ( !ring_info->mfn_mapping )
465 return;
466
467 ASSERT(!ring_info->nmfns || ring_info->mfns);
468
469 for ( i = 0; i < ring_info->nmfns; i++ )
470 {
471 if ( !ring_info->mfn_mapping[i] )
472 continue;
473
474 ASSERT(!mfn_eq(ring_info->mfns[i], INVALID_MFN));
475 argo_dprintk(XENLOG_ERR "argo: unmapping page %"PRI_mfn" from %p\n",
476 mfn_x(ring_info->mfns[i]), ring_info->mfn_mapping[i]);
477
478 unmap_domain_page_global(ring_info->mfn_mapping[i]);
479 ring_info->mfn_mapping[i] = NULL;
480 }
481 }
482
483 static int
ring_map_page(const struct domain * d,struct argo_ring_info * ring_info,unsigned int i,void ** out_ptr)484 ring_map_page(const struct domain *d, struct argo_ring_info *ring_info,
485 unsigned int i, void **out_ptr)
486 {
487 ASSERT(LOCKING_L3(d, ring_info));
488
489 /*
490 * FIXME: Investigate using vmap to create a single contiguous virtual
491 * address space mapping of the ring instead of using the array of single
492 * page mappings.
493 * Affects logic in memcpy_to_guest_ring, the mfn_mapping array data
494 * structure, and places where ring mappings are added or removed.
495 */
496
497 if ( i >= ring_info->nmfns )
498 {
499 gprintk(XENLOG_ERR,
500 "argo: ring (vm%u:%x vm%u) %p attempted to map page %u of %u\n",
501 ring_info->id.domain_id, ring_info->id.aport,
502 ring_info->id.partner_id, ring_info, i, ring_info->nmfns);
503 return -ENOMEM;
504 }
505 i = array_index_nospec(i, ring_info->nmfns);
506
507 if ( !ring_info->mfns || !ring_info->mfn_mapping )
508 {
509 ASSERT_UNREACHABLE();
510 ring_info->len = 0;
511 return -ENOMEM;
512 }
513
514 if ( !ring_info->mfn_mapping[i] )
515 {
516 ring_info->mfn_mapping[i] = map_domain_page_global(ring_info->mfns[i]);
517 if ( !ring_info->mfn_mapping[i] )
518 {
519 gprintk(XENLOG_ERR, "argo: ring (vm%u:%x vm%u) %p attempted to map "
520 "page %u of %u\n",
521 ring_info->id.domain_id, ring_info->id.aport,
522 ring_info->id.partner_id, ring_info, i, ring_info->nmfns);
523 return -ENOMEM;
524 }
525 argo_dprintk("mapping page %"PRI_mfn" to %p\n",
526 mfn_x(ring_info->mfns[i]), ring_info->mfn_mapping[i]);
527 }
528
529 if ( out_ptr )
530 *out_ptr = ring_info->mfn_mapping[i];
531
532 return 0;
533 }
534
535 static void
update_tx_ptr(const struct domain * d,struct argo_ring_info * ring_info,uint32_t tx_ptr)536 update_tx_ptr(const struct domain *d, struct argo_ring_info *ring_info,
537 uint32_t tx_ptr)
538 {
539 xen_argo_ring_t *ringp;
540
541 ASSERT(LOCKING_L3(d, ring_info));
542 ASSERT(ring_info->mfn_mapping[0]);
543
544 ring_info->tx_ptr = tx_ptr;
545 ringp = ring_info->mfn_mapping[0];
546
547 write_atomic(&ringp->tx_ptr, tx_ptr);
548 smp_wmb();
549 }
550
551 static int
memcpy_to_guest_ring(const struct domain * d,struct argo_ring_info * ring_info,unsigned int offset,const void * src,XEN_GUEST_HANDLE (uint8)src_hnd,unsigned int len)552 memcpy_to_guest_ring(const struct domain *d, struct argo_ring_info *ring_info,
553 unsigned int offset,
554 const void *src, XEN_GUEST_HANDLE(uint8) src_hnd,
555 unsigned int len)
556 {
557 unsigned int mfns_index = offset >> PAGE_SHIFT;
558 void *dst;
559 int ret;
560 unsigned int src_offset = 0;
561
562 ASSERT(LOCKING_L3(d, ring_info));
563
564 offset &= ~PAGE_MASK;
565
566 if ( len + offset > XEN_ARGO_MAX_RING_SIZE )
567 return -EFAULT;
568
569 while ( len )
570 {
571 unsigned int head_len = (offset + len) > PAGE_SIZE ? PAGE_SIZE - offset
572 : len;
573
574 ret = ring_map_page(d, ring_info, mfns_index, &dst);
575 if ( ret )
576 return ret;
577
578 if ( src )
579 {
580 memcpy(dst + offset, src + src_offset, head_len);
581 src_offset += head_len;
582 }
583 else
584 {
585 if ( copy_from_guest(dst + offset, src_hnd, head_len) )
586 return -EFAULT;
587
588 guest_handle_add_offset(src_hnd, head_len);
589 }
590
591 mfns_index++;
592 len -= head_len;
593 offset = 0;
594 }
595
596 return 0;
597 }
598
599 /*
600 * Use this with caution: rx_ptr is under guest control and may be bogus.
601 * See get_sanitized_ring for a safer alternative.
602 */
603 static int
get_rx_ptr(const struct domain * d,struct argo_ring_info * ring_info,uint32_t * rx_ptr)604 get_rx_ptr(const struct domain *d, struct argo_ring_info *ring_info,
605 uint32_t *rx_ptr)
606 {
607 void *src;
608 xen_argo_ring_t *ringp;
609 int ret;
610
611 ASSERT(LOCKING_L3(d, ring_info));
612
613 if ( !ring_info->nmfns || ring_info->nmfns < NPAGES_RING(ring_info->len) )
614 return -EINVAL;
615
616 ret = ring_map_page(d, ring_info, 0, &src);
617 if ( ret )
618 return ret;
619
620 ringp = (xen_argo_ring_t *)src;
621
622 *rx_ptr = read_atomic(&ringp->rx_ptr);
623
624 return 0;
625 }
626
627 /*
628 * get_sanitized_ring creates a modified copy of the ring pointers where
629 * the rx_ptr is rounded up to ensure it is aligned, and then ring
630 * wrap is handled. Simplifies safe use of the rx_ptr for available
631 * space calculation.
632 */
633 static int
get_sanitized_ring(const struct domain * d,xen_argo_ring_t * ring,struct argo_ring_info * ring_info)634 get_sanitized_ring(const struct domain *d, xen_argo_ring_t *ring,
635 struct argo_ring_info *ring_info)
636 {
637 uint32_t rx_ptr;
638 int ret;
639
640 ASSERT(LOCKING_L3(d, ring_info));
641
642 ret = get_rx_ptr(d, ring_info, &rx_ptr);
643 if ( ret )
644 return ret;
645
646 ring->tx_ptr = ring_info->tx_ptr;
647
648 rx_ptr = ROUNDUP_MESSAGE(rx_ptr);
649 if ( rx_ptr >= ring_info->len )
650 rx_ptr = 0;
651
652 ring->rx_ptr = rx_ptr;
653
654 return 0;
655 }
656
657 static unsigned int
ringbuf_payload_space(const struct domain * d,struct argo_ring_info * ring_info)658 ringbuf_payload_space(const struct domain *d, struct argo_ring_info *ring_info)
659 {
660 xen_argo_ring_t ring;
661 unsigned int len;
662 int ret;
663
664 ASSERT(LOCKING_L3(d, ring_info));
665
666 len = ring_info->len;
667 if ( !len )
668 return 0;
669
670 if ( get_sanitized_ring(d, &ring, ring_info) )
671 return 0;
672
673 argo_dprintk("sanitized ringbuf_payload_space: tx_ptr=%u rx_ptr=%u\n",
674 ring.tx_ptr, ring.rx_ptr);
675
676 /*
677 * rx_ptr == tx_ptr means that the ring has been emptied.
678 * See message size checking logic in the entry to ringbuf_insert which
679 * ensures that there is always one message slot of size ROUNDUP_MESSAGE(1)
680 * left available, preventing a ring from being entirely filled.
681 * This ensures that matching ring indexes always indicate an empty ring
682 * and never a full one.
683 */
684 ret = ring.rx_ptr - ring.tx_ptr;
685 if ( ret <= 0 )
686 ret += len;
687
688 /*
689 * In a sanitized ring, we can rely on:
690 * (rx_ptr < ring_info->len) &&
691 * (tx_ptr < ring_info->len) &&
692 * (ring_info->len <= XEN_ARGO_MAX_RING_SIZE)
693 *
694 * and since: XEN_ARGO_MAX_RING_SIZE < INT32_MAX
695 * therefore right here: ret < INT32_MAX
696 * and we are safe to return it as a unsigned value from this function.
697 * The subtractions below cannot increase its value.
698 */
699
700 /*
701 * The maximum size payload for a message that will be accepted is:
702 * (the available space between the ring indexes)
703 * minus (space for a message header)
704 * minus (space for one message slot)
705 * since ringbuf_insert requires that one message slot be left
706 * unfilled, to avoid filling the ring to capacity and confusing a full
707 * ring with an empty one.
708 * Since the ring indexes are sanitized, the value in ret is aligned, so
709 * the simple subtraction here works to return the aligned value needed:
710 */
711 ret -= sizeof(struct xen_argo_ring_message_header);
712 ret -= ROUNDUP_MESSAGE(1);
713
714 return (ret < 0) ? 0 : ret;
715 }
716
717 /*
718 * iov_count returns its count on success via an out variable to avoid
719 * potential for a negative return value to be used incorrectly
720 * (eg. coerced into an unsigned variable resulting in a large incorrect value)
721 */
722 static int
iov_count(const xen_argo_iov_t * piov,unsigned int niov,unsigned int * count)723 iov_count(const xen_argo_iov_t *piov, unsigned int niov,
724 unsigned int *count)
725 {
726 unsigned int sum_iov_lens = 0;
727
728 if ( niov > XEN_ARGO_MAXIOV )
729 return -EINVAL;
730
731 for ( ; niov--; piov++ )
732 {
733 /* valid iovs must have the padding field set to zero */
734 if ( piov->pad )
735 {
736 argo_dprintk("invalid iov: padding is not zero\n");
737 return -EINVAL;
738 }
739
740 /* check each to protect sum against integer overflow */
741 if ( piov->iov_len > MAX_ARGO_MESSAGE_SIZE )
742 {
743 argo_dprintk("invalid iov_len: too big (%u)>%llu\n",
744 piov->iov_len, MAX_ARGO_MESSAGE_SIZE);
745 return -EINVAL;
746 }
747
748 sum_iov_lens += piov->iov_len;
749
750 /*
751 * Again protect sum from integer overflow
752 * and ensure total msg size will be within bounds.
753 */
754 if ( sum_iov_lens > MAX_ARGO_MESSAGE_SIZE )
755 {
756 argo_dprintk("invalid iov series: total message too big\n");
757 return -EMSGSIZE;
758 }
759 }
760
761 *count = sum_iov_lens;
762
763 return 0;
764 }
765
766 static int
ringbuf_insert(const struct domain * d,struct argo_ring_info * ring_info,const struct argo_ring_id * src_id,xen_argo_iov_t * iovs,unsigned int niov,uint32_t message_type,unsigned int len)767 ringbuf_insert(const struct domain *d, struct argo_ring_info *ring_info,
768 const struct argo_ring_id *src_id, xen_argo_iov_t *iovs,
769 unsigned int niov, uint32_t message_type, unsigned int len)
770 {
771 xen_argo_ring_t ring;
772 struct xen_argo_ring_message_header mh = { };
773 int sp, ret;
774 xen_argo_iov_t *piov;
775 XEN_GUEST_HANDLE(uint8) NULL_hnd = { };
776
777 ASSERT(LOCKING_L3(d, ring_info));
778
779 /*
780 * Enforced below: no more than 'len' bytes of guest data
781 * (plus the message header) will be sent in this operation.
782 */
783
784 /*
785 * Upper bound check the message len against the ring size.
786 * The message must not fill the ring; there must be at least one slot
787 * remaining so we can distinguish a full ring from an empty one.
788 * iov_count has already verified: len <= MAX_ARGO_MESSAGE_SIZE.
789 */
790 if ( ring_info->len <= (sizeof(struct xen_argo_ring_message_header) +
791 ROUNDUP_MESSAGE(len)) )
792 return -EMSGSIZE;
793
794 ret = get_sanitized_ring(d, &ring, ring_info);
795 if ( ret )
796 return ret;
797
798 argo_dprintk("ring.tx_ptr=%u ring.rx_ptr=%u ring len=%u"
799 " ring_info->tx_ptr=%u\n",
800 ring.tx_ptr, ring.rx_ptr, ring_info->len, ring_info->tx_ptr);
801
802 if ( ring.rx_ptr == ring.tx_ptr )
803 sp = ring_info->len;
804 else
805 {
806 sp = ring.rx_ptr - ring.tx_ptr;
807 if ( sp < 0 )
808 sp += ring_info->len;
809 }
810
811 /*
812 * Size bounds check against currently available space in the ring.
813 * Again: the message must not fill the ring leaving no space remaining.
814 */
815 if ( (ROUNDUP_MESSAGE(len) +
816 sizeof(struct xen_argo_ring_message_header)) >= sp )
817 {
818 argo_dprintk("EAGAIN\n");
819 return -EAGAIN;
820 }
821
822 mh.len = len + sizeof(struct xen_argo_ring_message_header);
823 mh.source.aport = src_id->aport;
824 mh.source.domain_id = src_id->domain_id;
825 mh.message_type = message_type;
826
827 /*
828 * For this copy to the guest ring, tx_ptr is always 16-byte aligned
829 * and the message header is 16 bytes long.
830 */
831 BUILD_BUG_ON(
832 sizeof(struct xen_argo_ring_message_header) != ROUNDUP_MESSAGE(1));
833
834 /*
835 * First data write into the destination ring: fixed size, message header.
836 * This cannot overrun because the available free space (value in 'sp')
837 * is checked above and must be at least this size.
838 */
839 ret = memcpy_to_guest_ring(d, ring_info,
840 ring.tx_ptr + sizeof(xen_argo_ring_t),
841 &mh, NULL_hnd, sizeof(mh));
842 if ( ret )
843 {
844 gprintk(XENLOG_ERR,
845 "argo: failed to write message header to ring (vm%u:%x vm%u)\n",
846 ring_info->id.domain_id, ring_info->id.aport,
847 ring_info->id.partner_id);
848
849 return ret;
850 }
851
852 ring.tx_ptr += sizeof(mh);
853 if ( ring.tx_ptr == ring_info->len )
854 ring.tx_ptr = 0;
855
856 for ( piov = iovs; niov--; piov++ )
857 {
858 XEN_GUEST_HANDLE(uint8) buf_hnd = piov->iov_hnd;
859 unsigned int iov_len = piov->iov_len;
860
861 /* If no data is provided in this iov, moan and skip on to the next */
862 if ( !iov_len )
863 {
864 gprintk(XENLOG_WARNING,
865 "argo: no data iov_len=0 iov_hnd=%p ring (vm%u:%x vm%u)\n",
866 buf_hnd.p, ring_info->id.domain_id, ring_info->id.aport,
867 ring_info->id.partner_id);
868
869 continue;
870 }
871
872 if ( unlikely(!guest_handle_okay(buf_hnd, iov_len)) )
873 {
874 gprintk(XENLOG_ERR,
875 "argo: bad iov handle [%p, %u] (vm%u:%x vm%u)\n",
876 buf_hnd.p, iov_len,
877 ring_info->id.domain_id, ring_info->id.aport,
878 ring_info->id.partner_id);
879
880 return -EFAULT;
881 }
882
883 sp = ring_info->len - ring.tx_ptr;
884
885 /* Check: iov data size versus free space at the tail of the ring */
886 if ( iov_len > sp )
887 {
888 /*
889 * Second possible data write: ring-tail-wrap-write.
890 * Populate the ring tail and update the internal tx_ptr to handle
891 * wrapping at the end of ring.
892 * Size of data written here: sp
893 * which is the exact full amount of free space available at the
894 * tail of the ring, so this cannot overrun.
895 */
896 ret = memcpy_to_guest_ring(d, ring_info,
897 ring.tx_ptr + sizeof(xen_argo_ring_t),
898 NULL, buf_hnd, sp);
899 if ( ret )
900 {
901 gprintk(XENLOG_ERR,
902 "argo: failed to copy {%p, %d} (vm%u:%x vm%u)\n",
903 buf_hnd.p, sp,
904 ring_info->id.domain_id, ring_info->id.aport,
905 ring_info->id.partner_id);
906
907 return ret;
908 }
909
910 ring.tx_ptr = 0;
911 iov_len -= sp;
912 guest_handle_add_offset(buf_hnd, sp);
913
914 ASSERT(iov_len <= ring_info->len);
915 }
916
917 /*
918 * Third possible data write: all data remaining for this iov.
919 * Size of data written here: iov_len
920 *
921 * Case 1: if the ring-tail-wrap-write above was performed, then
922 * iov_len has been decreased by 'sp' and ring.tx_ptr is zero.
923 *
924 * We know from checking the result of iov_count:
925 * len + sizeof(message_header) <= ring_info->len
926 * We also know that len is the total of summing all iov_lens, so:
927 * iov_len <= len
928 * so by transitivity:
929 * iov_len <= len <= (ring_info->len - sizeof(msgheader))
930 * and therefore:
931 * (iov_len + sizeof(msgheader) <= ring_info->len) &&
932 * (ring.tx_ptr == 0)
933 * so this write cannot overrun here.
934 *
935 * Case 2: ring-tail-wrap-write above was not performed
936 * -> so iov_len is the guest-supplied value and: (iov_len <= sp)
937 * ie. less than available space at the tail of the ring:
938 * so this write cannot overrun.
939 */
940 ret = memcpy_to_guest_ring(d, ring_info,
941 ring.tx_ptr + sizeof(xen_argo_ring_t),
942 NULL, buf_hnd, iov_len);
943 if ( ret )
944 {
945 gprintk(XENLOG_ERR,
946 "argo: failed to copy [%p, %u] (vm%u:%x vm%u)\n",
947 buf_hnd.p, iov_len, ring_info->id.domain_id,
948 ring_info->id.aport, ring_info->id.partner_id);
949
950 return ret;
951 }
952
953 ring.tx_ptr += iov_len;
954
955 if ( ring.tx_ptr == ring_info->len )
956 ring.tx_ptr = 0;
957 }
958
959 /*
960 * Finished writing data from all iovs into the ring: now need to round up
961 * tx_ptr to align to the next message boundary, and then wrap if necessary.
962 */
963 ring.tx_ptr = ROUNDUP_MESSAGE(ring.tx_ptr);
964
965 if ( ring.tx_ptr >= ring_info->len )
966 ring.tx_ptr -= ring_info->len;
967
968 update_tx_ptr(d, ring_info, ring.tx_ptr);
969
970 /*
971 * At this point (and also on an error exit paths from this function) it is
972 * possible to unmap the ring_info, ie:
973 * ring_unmap(d, ring_info);
974 * but performance should be improved by not doing so, and retaining
975 * the mapping.
976 * An XSM policy control over level of confidentiality required
977 * versus performance cost could be added to decide that here.
978 */
979
980 return ret;
981 }
982
983 static void
wildcard_pending_list_remove(domid_t domain_id,struct pending_ent * ent)984 wildcard_pending_list_remove(domid_t domain_id, struct pending_ent *ent)
985 {
986 struct domain *d = rcu_lock_domain_by_id(domain_id);
987
988 if ( !d )
989 return;
990
991 ASSERT(LOCKING_Read_L1);
992
993 if ( d->argo )
994 {
995 spin_lock(&d->argo->wildcard_L2_lock);
996 list_del(&ent->wildcard_node);
997 spin_unlock(&d->argo->wildcard_L2_lock);
998 }
999 rcu_unlock_domain(d);
1000 }
1001
1002 static void
wildcard_pending_list_insert(domid_t domain_id,struct pending_ent * ent)1003 wildcard_pending_list_insert(domid_t domain_id, struct pending_ent *ent)
1004 {
1005 struct domain *d = rcu_lock_domain_by_id(domain_id);
1006
1007 if ( !d )
1008 return;
1009
1010 ASSERT(LOCKING_Read_L1);
1011
1012 if ( d->argo )
1013 {
1014 spin_lock(&d->argo->wildcard_L2_lock);
1015 list_add(&ent->wildcard_node, &d->argo->wildcard_pend_list);
1016 spin_unlock(&d->argo->wildcard_L2_lock);
1017 }
1018 rcu_unlock_domain(d);
1019 }
1020
1021 static void
pending_remove_all(const struct domain * d,struct argo_ring_info * ring_info)1022 pending_remove_all(const struct domain *d, struct argo_ring_info *ring_info)
1023 {
1024 struct pending_ent *ent;
1025
1026 ASSERT(LOCKING_L3(d, ring_info));
1027
1028 /* Delete all pending notifications from this ring's list. */
1029 while ( (ent = list_first_entry_or_null(&ring_info->pending,
1030 struct pending_ent, node)) )
1031 {
1032 /* For wildcard rings, remove each from their wildcard list too. */
1033 if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1034 wildcard_pending_list_remove(ent->domain_id, ent);
1035 list_del(&ent->node);
1036 xfree(ent);
1037 }
1038 ring_info->npending = 0;
1039 }
1040
1041 static void
pending_notify(struct list_head * to_notify)1042 pending_notify(struct list_head *to_notify)
1043 {
1044 struct pending_ent *ent;
1045
1046 ASSERT(LOCKING_Read_L1);
1047
1048 /* Sending signals for all ents in this list, draining until it is empty. */
1049 while ( (ent = list_first_entry_or_null(to_notify, struct pending_ent,
1050 node)) )
1051 {
1052 list_del(&ent->node);
1053 signal_domid(ent->domain_id);
1054 xfree(ent);
1055 }
1056 }
1057
1058 static void
pending_find(const struct domain * d,struct argo_ring_info * ring_info,unsigned int payload_space,struct list_head * to_notify)1059 pending_find(const struct domain *d, struct argo_ring_info *ring_info,
1060 unsigned int payload_space, struct list_head *to_notify)
1061 {
1062 struct pending_ent *ent, *next;
1063
1064 ASSERT(LOCKING_Read_rings_L2(d));
1065
1066 /*
1067 * TODO: Current policy here is to signal _all_ of the waiting domains
1068 * interested in sending a message of size less than payload_space.
1069 *
1070 * This is likely to be suboptimal, since once one of them has added
1071 * their message to the ring, there may well be insufficient room
1072 * available for any of the others to transmit, meaning that they were
1073 * woken in vain, which created extra work just to requeue their wait.
1074 *
1075 * Retain this simple policy for now since it at least avoids starving a
1076 * domain of available space notifications because of a policy that only
1077 * notified other domains instead. Improvement may be possible;
1078 * investigation required.
1079 */
1080 spin_lock(&ring_info->L3_lock);
1081
1082 /* Remove matching ents from the ring list, and add them to "to_notify" */
1083 list_for_each_entry_safe(ent, next, &ring_info->pending, node)
1084 {
1085 if ( payload_space >= ent->len )
1086 {
1087 if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1088 wildcard_pending_list_remove(ent->domain_id, ent);
1089
1090 list_del(&ent->node);
1091 ring_info->npending--;
1092 list_add(&ent->node, to_notify);
1093 }
1094 }
1095
1096 spin_unlock(&ring_info->L3_lock);
1097 }
1098
1099 static int
pending_queue(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id,unsigned int len)1100 pending_queue(const struct domain *d, struct argo_ring_info *ring_info,
1101 domid_t src_id, unsigned int len)
1102 {
1103 struct pending_ent *ent;
1104
1105 ASSERT(LOCKING_L3(d, ring_info));
1106
1107 if ( ring_info->npending >= MAX_PENDING_PER_RING )
1108 return -EBUSY;
1109
1110 ent = xmalloc(struct pending_ent);
1111 if ( !ent )
1112 return -ENOMEM;
1113
1114 ent->len = len;
1115 ent->domain_id = src_id;
1116 ent->ring_info = ring_info;
1117
1118 if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1119 wildcard_pending_list_insert(src_id, ent);
1120 list_add(&ent->node, &ring_info->pending);
1121 ring_info->npending++;
1122
1123 return 0;
1124 }
1125
1126 static int
pending_requeue(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id,unsigned int len)1127 pending_requeue(const struct domain *d, struct argo_ring_info *ring_info,
1128 domid_t src_id, unsigned int len)
1129 {
1130 struct pending_ent *ent;
1131
1132 ASSERT(LOCKING_L3(d, ring_info));
1133
1134 /* List structure is not modified here. Update len in a match if found. */
1135 list_for_each_entry(ent, &ring_info->pending, node)
1136 {
1137 if ( ent->domain_id == src_id )
1138 {
1139 /*
1140 * Reuse an existing queue entry for a notification rather than add
1141 * another. If the existing entry is waiting for a smaller size than
1142 * the current message then adjust the record to wait for the
1143 * current (larger) size to be available before triggering a
1144 * notification.
1145 * This assists the waiting sender by ensuring that whenever a
1146 * notification is triggered, there is sufficient space available
1147 * for (at least) any one of the messages awaiting transmission.
1148 */
1149 if ( ent->len < len )
1150 ent->len = len;
1151
1152 return 0;
1153 }
1154 }
1155
1156 return pending_queue(d, ring_info, src_id, len);
1157 }
1158
1159 static void
pending_cancel(const struct domain * d,struct argo_ring_info * ring_info,domid_t src_id)1160 pending_cancel(const struct domain *d, struct argo_ring_info *ring_info,
1161 domid_t src_id)
1162 {
1163 struct pending_ent *ent, *next;
1164
1165 ASSERT(LOCKING_L3(d, ring_info));
1166
1167 /* Remove all ents where domain_id matches src_id from the ring's list. */
1168 list_for_each_entry_safe(ent, next, &ring_info->pending, node)
1169 {
1170 if ( ent->domain_id == src_id )
1171 {
1172 /* For wildcard rings, remove each from their wildcard list too. */
1173 if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1174 wildcard_pending_list_remove(ent->domain_id, ent);
1175 list_del(&ent->node);
1176 xfree(ent);
1177 ring_info->npending--;
1178 }
1179 }
1180 }
1181
1182 static void
wildcard_rings_pending_remove(struct domain * d)1183 wildcard_rings_pending_remove(struct domain *d)
1184 {
1185 struct pending_ent *ent;
1186
1187 ASSERT(LOCKING_Write_L1);
1188
1189 /* Delete all pending signals to the domain about wildcard rings. */
1190 while ( (ent = list_first_entry_or_null(&d->argo->wildcard_pend_list,
1191 struct pending_ent, node)) )
1192 {
1193 /*
1194 * The ent->node deleted here, and the npending value decreased,
1195 * belong to the ring_info of another domain, which is why this
1196 * function requires holding W(L1):
1197 * it implies the L3 lock that protects that ring_info struct.
1198 */
1199 ent->ring_info->npending--;
1200 list_del(&ent->node);
1201 list_del(&ent->wildcard_node);
1202 xfree(ent);
1203 }
1204 }
1205
1206 static void
ring_remove_mfns(const struct domain * d,struct argo_ring_info * ring_info)1207 ring_remove_mfns(const struct domain *d, struct argo_ring_info *ring_info)
1208 {
1209 unsigned int i;
1210
1211 ASSERT(LOCKING_Write_rings_L2(d));
1212
1213 if ( !ring_info->mfns )
1214 return;
1215
1216 if ( !ring_info->mfn_mapping )
1217 {
1218 ASSERT_UNREACHABLE();
1219 return;
1220 }
1221
1222 ring_unmap(d, ring_info);
1223
1224 for ( i = 0; i < ring_info->nmfns; i++ )
1225 if ( !mfn_eq(ring_info->mfns[i], INVALID_MFN) )
1226 put_page_and_type(mfn_to_page(ring_info->mfns[i]));
1227
1228 ring_info->nmfns = 0;
1229 XFREE(ring_info->mfns);
1230 XFREE(ring_info->mfn_mapping);
1231 }
1232
1233 static void
ring_remove_info(const struct domain * d,struct argo_ring_info * ring_info)1234 ring_remove_info(const struct domain *d, struct argo_ring_info *ring_info)
1235 {
1236 ASSERT(LOCKING_Write_rings_L2(d));
1237
1238 pending_remove_all(d, ring_info);
1239 list_del(&ring_info->node);
1240 ring_remove_mfns(d, ring_info);
1241 xfree(ring_info);
1242 }
1243
1244 static void
domain_rings_remove_all(struct domain * d)1245 domain_rings_remove_all(struct domain *d)
1246 {
1247 unsigned int i;
1248
1249 ASSERT(LOCKING_Write_rings_L2(d));
1250
1251 for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
1252 {
1253 struct argo_ring_info *ring_info;
1254 struct list_head *bucket = &d->argo->ring_hash[i];
1255
1256 while ( (ring_info = list_first_entry_or_null(bucket,
1257 struct argo_ring_info,
1258 node)) )
1259 ring_remove_info(d, ring_info);
1260 }
1261 d->argo->ring_count = 0;
1262 }
1263
1264 /*
1265 * Tear down all rings of other domains where src_d domain is the partner.
1266 * (ie. it is the single domain that can send to those rings.)
1267 * This will also cancel any pending notifications about those rings.
1268 */
1269 static void
partner_rings_remove(struct domain * src_d)1270 partner_rings_remove(struct domain *src_d)
1271 {
1272 unsigned int i;
1273
1274 ASSERT(LOCKING_Write_L1);
1275
1276 for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
1277 {
1278 struct argo_send_info *send_info;
1279 struct list_head *bucket = &src_d->argo->send_hash[i];
1280
1281 /* Remove all ents from the send list. Take each off their ring list. */
1282 while ( (send_info = list_first_entry_or_null(bucket,
1283 struct argo_send_info,
1284 node)) )
1285 {
1286 struct domain *dst_d = rcu_lock_domain_by_id(send_info->id.domain_id);
1287
1288 if ( dst_d && dst_d->argo )
1289 {
1290 struct argo_ring_info *ring_info =
1291 find_ring_info(dst_d, &send_info->id);
1292
1293 if ( ring_info )
1294 {
1295 ring_remove_info(dst_d, ring_info);
1296 dst_d->argo->ring_count--;
1297 }
1298 else
1299 ASSERT_UNREACHABLE();
1300 }
1301 else
1302 argo_dprintk("%pd has entry for stale partner d%u\n",
1303 src_d, send_info->id.domain_id);
1304
1305 if ( dst_d )
1306 rcu_unlock_domain(dst_d);
1307
1308 list_del(&send_info->node);
1309 xfree(send_info);
1310 }
1311 }
1312 }
1313
1314 static int
fill_ring_data(const struct domain * currd,XEN_GUEST_HANDLE (xen_argo_ring_data_ent_t)data_ent_hnd)1315 fill_ring_data(const struct domain *currd,
1316 XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) data_ent_hnd)
1317 {
1318 xen_argo_ring_data_ent_t ent;
1319 struct domain *dst_d;
1320 struct argo_ring_info *ring_info;
1321 int ret = 0;
1322
1323 ASSERT(currd == current->domain);
1324 ASSERT(LOCKING_Read_L1);
1325
1326 if ( __copy_from_guest(&ent, data_ent_hnd, 1) )
1327 return -EFAULT;
1328
1329 argo_dprintk("fill_ring_data: ent.ring.domain=%u,ent.ring.aport=%x\n",
1330 ent.ring.domain_id, ent.ring.aport);
1331
1332 ent.flags = 0;
1333
1334 dst_d = rcu_lock_domain_by_id(ent.ring.domain_id);
1335 if ( !dst_d || !dst_d->argo )
1336 goto out;
1337
1338 /*
1339 * Don't supply information about rings that a guest is not
1340 * allowed to send to.
1341 */
1342 ret = xsm_argo_send(currd, dst_d);
1343 if ( ret )
1344 goto out;
1345
1346 read_lock(&dst_d->argo->rings_L2_rwlock);
1347
1348 ring_info = find_ring_info_by_match(dst_d, ent.ring.aport,
1349 currd->domain_id);
1350 if ( ring_info )
1351 {
1352 unsigned int space_avail;
1353
1354 ent.flags |= XEN_ARGO_RING_EXISTS;
1355
1356 spin_lock(&ring_info->L3_lock);
1357
1358 ent.max_message_size = ring_info->len -
1359 sizeof(struct xen_argo_ring_message_header) -
1360 ROUNDUP_MESSAGE(1);
1361
1362 if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY )
1363 ent.flags |= XEN_ARGO_RING_SHARED;
1364
1365 space_avail = ringbuf_payload_space(dst_d, ring_info);
1366
1367 argo_dprintk("fill_ring_data: aport=%x space_avail=%u"
1368 " space_wanted=%u\n",
1369 ring_info->id.aport, space_avail, ent.space_required);
1370
1371 /* Do not queue a notification for an unachievable size */
1372 if ( ent.space_required > ent.max_message_size )
1373 ent.flags |= XEN_ARGO_RING_EMSGSIZE;
1374 else if ( space_avail >= ent.space_required )
1375 {
1376 pending_cancel(dst_d, ring_info, currd->domain_id);
1377 ent.flags |= XEN_ARGO_RING_SUFFICIENT;
1378 }
1379 else
1380 {
1381 ret = pending_requeue(dst_d, ring_info, currd->domain_id,
1382 ent.space_required);
1383 if ( ret == -EBUSY )
1384 {
1385 /*
1386 * Too many other domains are already awaiting notification
1387 * about available space on this ring. Indicate this state via
1388 * flag. No need to return an error to the caller; allow the
1389 * processing of queries about other rings to continue.
1390 */
1391 ent.flags |= XEN_ARGO_RING_EBUSY;
1392 ret = 0;
1393 }
1394 }
1395
1396 spin_unlock(&ring_info->L3_lock);
1397
1398 if ( space_avail == ent.max_message_size )
1399 ent.flags |= XEN_ARGO_RING_EMPTY;
1400
1401 }
1402 read_unlock(&dst_d->argo->rings_L2_rwlock);
1403
1404 out:
1405 if ( dst_d )
1406 rcu_unlock_domain(dst_d);
1407
1408 if ( !ret && (__copy_field_to_guest(data_ent_hnd, &ent, flags) ||
1409 __copy_field_to_guest(data_ent_hnd, &ent, max_message_size)) )
1410 return -EFAULT;
1411
1412 return ret;
1413 }
1414
1415 static int
find_ring_mfn(struct domain * d,gfn_t gfn,mfn_t * mfn)1416 find_ring_mfn(struct domain *d, gfn_t gfn, mfn_t *mfn)
1417 {
1418 struct page_info *page;
1419 p2m_type_t p2mt;
1420 int ret;
1421
1422 ret = check_get_page_from_gfn(d, gfn, false, &p2mt, &page);
1423 if ( unlikely(ret) )
1424 return ret;
1425
1426 *mfn = page_to_mfn(page);
1427
1428 switch ( p2mt )
1429 {
1430 case p2m_ram_rw:
1431 if ( !get_page_type(page, PGT_writable_page) )
1432 ret = -EINVAL;
1433 break;
1434
1435 #ifdef CONFIG_X86
1436 case p2m_ram_logdirty:
1437 ret = -EAGAIN;
1438 break;
1439 #endif
1440
1441 default:
1442 ret = -EINVAL;
1443 break;
1444 }
1445
1446 if ( unlikely(ret) )
1447 put_page(page);
1448
1449 return ret;
1450 }
1451
1452 static int
find_ring_mfns(struct domain * d,struct argo_ring_info * ring_info,const unsigned int npage,XEN_GUEST_HANDLE_PARAM (xen_argo_gfn_t)gfn_hnd,const unsigned int len)1453 find_ring_mfns(struct domain *d, struct argo_ring_info *ring_info,
1454 const unsigned int npage,
1455 XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd,
1456 const unsigned int len)
1457 {
1458 unsigned int i;
1459 int ret = 0;
1460 mfn_t *mfns;
1461 void **mfn_mapping;
1462
1463 ASSERT(LOCKING_Write_rings_L2(d));
1464
1465 if ( ring_info->mfns )
1466 {
1467 /* Ring already existed: drop the previous mapping. */
1468 argo_dprintk("argo: vm%u re-register existing ring "
1469 "(vm%u:%x vm%u) clears mapping\n",
1470 d->domain_id, ring_info->id.domain_id,
1471 ring_info->id.aport, ring_info->id.partner_id);
1472
1473 ring_remove_mfns(d, ring_info);
1474 ASSERT(!ring_info->mfns);
1475 }
1476
1477 mfns = xmalloc_array(mfn_t, npage);
1478 if ( !mfns )
1479 return -ENOMEM;
1480
1481 for ( i = 0; i < npage; i++ )
1482 mfns[i] = INVALID_MFN;
1483
1484 mfn_mapping = xzalloc_array(void *, npage);
1485 if ( !mfn_mapping )
1486 {
1487 xfree(mfns);
1488 return -ENOMEM;
1489 }
1490
1491 ring_info->mfns = mfns;
1492 ring_info->mfn_mapping = mfn_mapping;
1493
1494 for ( i = 0; i < npage; i++ )
1495 {
1496 mfn_t mfn;
1497 xen_argo_gfn_t argo_gfn;
1498
1499 ret = __copy_from_guest_offset(&argo_gfn, gfn_hnd, i, 1) ? -EFAULT : 0;
1500 if ( ret )
1501 break;
1502
1503 ret = find_ring_mfn(d, _gfn(argo_gfn), &mfn);
1504 if ( ret )
1505 {
1506 gprintk(XENLOG_ERR, "argo: vm%u: invalid gfn %"PRI_gfn" "
1507 "r:(vm%u:%x vm%u) %p %u/%u\n",
1508 d->domain_id, gfn_x(_gfn(argo_gfn)),
1509 ring_info->id.domain_id, ring_info->id.aport,
1510 ring_info->id.partner_id, ring_info, i, npage);
1511 break;
1512 }
1513
1514 ring_info->mfns[i] = mfn;
1515
1516 argo_dprintk("%u: %"PRI_gfn" -> %"PRI_mfn"\n",
1517 i, gfn_x(_gfn(argo_gfn)), mfn_x(ring_info->mfns[i]));
1518 }
1519
1520 ring_info->nmfns = i;
1521
1522 if ( ret )
1523 ring_remove_mfns(d, ring_info);
1524 else
1525 {
1526 ASSERT(ring_info->nmfns == NPAGES_RING(len));
1527
1528 argo_dprintk("argo: vm%u ring (vm%u:%x vm%u) %p "
1529 "mfn_mapping %p len %u nmfns %u\n",
1530 d->domain_id, ring_info->id.domain_id,
1531 ring_info->id.aport, ring_info->id.partner_id, ring_info,
1532 ring_info->mfn_mapping, ring_info->len, ring_info->nmfns);
1533 }
1534
1535 return ret;
1536 }
1537
1538 static long
unregister_ring(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_unregister_ring_t)unreg_hnd)1539 unregister_ring(struct domain *currd,
1540 XEN_GUEST_HANDLE_PARAM(xen_argo_unregister_ring_t) unreg_hnd)
1541 {
1542 xen_argo_unregister_ring_t unreg;
1543 struct argo_ring_id ring_id;
1544 struct argo_ring_info *ring_info = NULL;
1545 struct argo_send_info *send_info = NULL;
1546 struct domain *dst_d = NULL;
1547
1548 ASSERT(currd == current->domain);
1549
1550 if ( copy_from_guest(&unreg, unreg_hnd, 1) )
1551 return -EFAULT;
1552
1553 if ( unreg.pad )
1554 return -EINVAL;
1555
1556 ring_id.partner_id = unreg.partner_id;
1557 ring_id.aport = unreg.aport;
1558 ring_id.domain_id = currd->domain_id;
1559
1560 read_lock(&L1_global_argo_rwlock);
1561
1562 if ( unlikely(!currd->argo) )
1563 {
1564 read_unlock(&L1_global_argo_rwlock);
1565 return -ENODEV;
1566 }
1567
1568 write_lock(&currd->argo->rings_L2_rwlock);
1569
1570 ring_info = find_ring_info(currd, &ring_id);
1571 if ( !ring_info )
1572 goto out;
1573
1574 ring_remove_info(currd, ring_info);
1575 currd->argo->ring_count--;
1576
1577 if ( ring_id.partner_id == XEN_ARGO_DOMID_ANY )
1578 goto out;
1579
1580 dst_d = rcu_lock_domain_by_id(ring_id.partner_id);
1581 if ( !dst_d || !dst_d->argo )
1582 {
1583 ASSERT_UNREACHABLE();
1584 goto out;
1585 }
1586
1587 spin_lock(&dst_d->argo->send_L2_lock);
1588
1589 send_info = find_send_info(dst_d, &ring_id);
1590 if ( send_info )
1591 list_del(&send_info->node);
1592 else
1593 ASSERT_UNREACHABLE();
1594
1595 spin_unlock(&dst_d->argo->send_L2_lock);
1596
1597 out:
1598 write_unlock(&currd->argo->rings_L2_rwlock);
1599
1600 read_unlock(&L1_global_argo_rwlock);
1601
1602 if ( dst_d )
1603 rcu_unlock_domain(dst_d);
1604
1605 xfree(send_info);
1606
1607 if ( !ring_info )
1608 {
1609 argo_dprintk("unregister_ring: no ring_info found for ring(%u:%x %u)\n",
1610 ring_id.domain_id, ring_id.aport, ring_id.partner_id);
1611 return -ENOENT;
1612 }
1613
1614 return 0;
1615 }
1616
1617 static long
register_ring(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_register_ring_t)reg_hnd,XEN_GUEST_HANDLE_PARAM (xen_argo_gfn_t)gfn_hnd,unsigned int npage,unsigned int flags)1618 register_ring(struct domain *currd,
1619 XEN_GUEST_HANDLE_PARAM(xen_argo_register_ring_t) reg_hnd,
1620 XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd,
1621 unsigned int npage, unsigned int flags)
1622 {
1623 xen_argo_register_ring_t reg;
1624 struct argo_ring_id ring_id;
1625 void *map_ringp;
1626 xen_argo_ring_t *ringp;
1627 struct argo_ring_info *ring_info, *new_ring_info = NULL;
1628 struct argo_send_info *send_info = NULL;
1629 struct domain *dst_d = NULL;
1630 int ret = 0;
1631 unsigned int private_tx_ptr;
1632
1633 ASSERT(currd == current->domain);
1634
1635 /* flags: reserve currently-undefined bits, require zero. */
1636 if ( unlikely(flags & ~XEN_ARGO_REGISTER_FLAG_MASK) )
1637 return -EINVAL;
1638
1639 if ( copy_from_guest(®, reg_hnd, 1) )
1640 return -EFAULT;
1641
1642 /*
1643 * A ring must be large enough to transmit messages, so requires space for:
1644 * * 1 message header, plus
1645 * * 1 payload slot (payload is always rounded to a multiple of 16 bytes)
1646 * for the message payload to be written into, plus
1647 * * 1 more slot, so that the ring cannot be filled to capacity with a
1648 * single minimum-size message -- see the logic in ringbuf_insert --
1649 * allowing for this ensures that there can be space remaining when a
1650 * message is present.
1651 * The above determines the minimum acceptable ring size.
1652 */
1653 if ( (reg.len < (sizeof(struct xen_argo_ring_message_header)
1654 + ROUNDUP_MESSAGE(1) + ROUNDUP_MESSAGE(1))) ||
1655 (reg.len > XEN_ARGO_MAX_RING_SIZE) ||
1656 (reg.len != ROUNDUP_MESSAGE(reg.len)) ||
1657 (NPAGES_RING(reg.len) != npage) ||
1658 (reg.pad != 0) )
1659 return -EINVAL;
1660
1661 ring_id.partner_id = reg.partner_id;
1662 ring_id.aport = reg.aport;
1663 ring_id.domain_id = currd->domain_id;
1664
1665 if ( reg.partner_id == XEN_ARGO_DOMID_ANY )
1666 {
1667 ret = opt_argo_mac_permissive ? xsm_argo_register_any_source(currd) :
1668 -EPERM;
1669 if ( ret )
1670 return ret;
1671 }
1672 else
1673 {
1674 dst_d = rcu_lock_domain_by_id(reg.partner_id);
1675 if ( !dst_d )
1676 {
1677 argo_dprintk("!dst_d, ESRCH\n");
1678 return -ESRCH;
1679 }
1680
1681 ret = xsm_argo_register_single_source(currd, dst_d);
1682 if ( ret )
1683 goto out;
1684
1685 send_info = xzalloc(struct argo_send_info);
1686 if ( !send_info )
1687 {
1688 ret = -ENOMEM;
1689 goto out;
1690 }
1691 send_info->id = ring_id;
1692 }
1693
1694 /*
1695 * Common case is that the ring doesn't already exist, so do the alloc here
1696 * before picking up any locks.
1697 */
1698 new_ring_info = xzalloc(struct argo_ring_info);
1699 if ( !new_ring_info )
1700 {
1701 ret = -ENOMEM;
1702 goto out;
1703 }
1704
1705 read_lock(&L1_global_argo_rwlock);
1706
1707 if ( !currd->argo )
1708 {
1709 ret = -ENODEV;
1710 goto out_unlock;
1711 }
1712
1713 if ( dst_d && !dst_d->argo )
1714 {
1715 argo_dprintk("!dst_d->argo, ECONNREFUSED\n");
1716 ret = -ECONNREFUSED;
1717 goto out_unlock;
1718 }
1719
1720 write_lock(&currd->argo->rings_L2_rwlock);
1721
1722 if ( currd->argo->ring_count >= MAX_RINGS_PER_DOMAIN )
1723 {
1724 ret = -ENOSPC;
1725 goto out_unlock2;
1726 }
1727
1728 ring_info = find_ring_info(currd, &ring_id);
1729 if ( !ring_info )
1730 {
1731 ring_info = new_ring_info;
1732 new_ring_info = NULL;
1733
1734 spin_lock_init(&ring_info->L3_lock);
1735
1736 ring_info->id = ring_id;
1737 INIT_LIST_HEAD(&ring_info->pending);
1738
1739 list_add(&ring_info->node,
1740 &currd->argo->ring_hash[hash_index(&ring_info->id)]);
1741
1742 argo_dprintk("argo: vm%u registering ring (vm%u:%x vm%u)\n",
1743 currd->domain_id, ring_id.domain_id, ring_id.aport,
1744 ring_id.partner_id);
1745 }
1746 else if ( ring_info->len )
1747 {
1748 /*
1749 * If the caller specified that the ring must not already exist,
1750 * fail at attempt to add a completed ring which already exists.
1751 */
1752 if ( flags & XEN_ARGO_REGISTER_FLAG_FAIL_EXIST )
1753 {
1754 gprintk(XENLOG_ERR, "argo: vm%u disallowed reregistration of "
1755 "existing ring (vm%u:%x vm%u)\n",
1756 currd->domain_id, ring_id.domain_id, ring_id.aport,
1757 ring_id.partner_id);
1758 ret = -EEXIST;
1759 goto out_unlock2;
1760 }
1761
1762 if ( ring_info->len != reg.len )
1763 {
1764 /*
1765 * Change of ring size could result in entries on the pending
1766 * notifications list that will never trigger.
1767 * Simple blunt solution: disallow ring resize for now.
1768 * TODO: investigate enabling ring resize.
1769 */
1770 gprintk(XENLOG_ERR, "argo: vm%u attempted to change ring size "
1771 "(vm%u:%x vm%u)\n",
1772 currd->domain_id, ring_id.domain_id, ring_id.aport,
1773 ring_id.partner_id);
1774 /*
1775 * Could return EINVAL here, but if the ring didn't already
1776 * exist then the arguments would have been valid, so: EEXIST.
1777 */
1778 ret = -EEXIST;
1779 goto out_unlock2;
1780 }
1781
1782 argo_dprintk("argo: vm%u re-registering existing ring (vm%u:%x vm%u)\n",
1783 currd->domain_id, ring_id.domain_id, ring_id.aport,
1784 ring_id.partner_id);
1785 }
1786
1787 ret = find_ring_mfns(currd, ring_info, npage, gfn_hnd, reg.len);
1788 if ( ret )
1789 {
1790 gprintk(XENLOG_ERR,
1791 "argo: vm%u failed to find ring mfns (vm%u:%x vm%u)\n",
1792 currd->domain_id, ring_id.domain_id, ring_id.aport,
1793 ring_id.partner_id);
1794
1795 ring_remove_info(currd, ring_info);
1796 goto out_unlock2;
1797 }
1798
1799 /*
1800 * The first page of the memory supplied for the ring has the xen_argo_ring
1801 * structure at its head, which is where the ring indexes reside.
1802 */
1803 ret = ring_map_page(currd, ring_info, 0, &map_ringp);
1804 if ( ret )
1805 {
1806 gprintk(XENLOG_ERR,
1807 "argo: vm%u failed to map ring mfn 0 (vm%u:%x vm%u)\n",
1808 currd->domain_id, ring_id.domain_id, ring_id.aport,
1809 ring_id.partner_id);
1810
1811 ring_remove_info(currd, ring_info);
1812 goto out_unlock2;
1813 }
1814 ringp = map_ringp;
1815
1816 private_tx_ptr = read_atomic(&ringp->tx_ptr);
1817
1818 if ( (private_tx_ptr >= reg.len) ||
1819 (ROUNDUP_MESSAGE(private_tx_ptr) != private_tx_ptr) )
1820 {
1821 /*
1822 * Since the ring is a mess, attempt to flush the contents of it
1823 * here by setting the tx_ptr to the next aligned message slot past
1824 * the latest rx_ptr we have observed. Handle ring wrap correctly.
1825 */
1826 private_tx_ptr = ROUNDUP_MESSAGE(read_atomic(&ringp->rx_ptr));
1827
1828 if ( private_tx_ptr >= reg.len )
1829 private_tx_ptr = 0;
1830
1831 update_tx_ptr(currd, ring_info, private_tx_ptr);
1832 }
1833
1834 ring_info->tx_ptr = private_tx_ptr;
1835 ring_info->len = reg.len;
1836 currd->argo->ring_count++;
1837
1838 if ( send_info )
1839 {
1840 spin_lock(&dst_d->argo->send_L2_lock);
1841
1842 list_add(&send_info->node,
1843 &dst_d->argo->send_hash[hash_index(&send_info->id)]);
1844
1845 spin_unlock(&dst_d->argo->send_L2_lock);
1846 }
1847
1848 out_unlock2:
1849 write_unlock(&currd->argo->rings_L2_rwlock);
1850
1851 out_unlock:
1852 read_unlock(&L1_global_argo_rwlock);
1853
1854 out:
1855 if ( dst_d )
1856 rcu_unlock_domain(dst_d);
1857
1858 if ( ret )
1859 xfree(send_info);
1860
1861 xfree(new_ring_info);
1862
1863 return ret;
1864 }
1865
1866 static void
notify_ring(const struct domain * d,struct argo_ring_info * ring_info,struct list_head * to_notify)1867 notify_ring(const struct domain *d, struct argo_ring_info *ring_info,
1868 struct list_head *to_notify)
1869 {
1870 unsigned int space;
1871
1872 ASSERT(LOCKING_Read_rings_L2(d));
1873
1874 spin_lock(&ring_info->L3_lock);
1875
1876 if ( ring_info->len )
1877 space = ringbuf_payload_space(d, ring_info);
1878 else
1879 space = 0;
1880
1881 spin_unlock(&ring_info->L3_lock);
1882
1883 if ( space )
1884 pending_find(d, ring_info, space, to_notify);
1885 }
1886
1887 static void
notify_check_pending(struct domain * d)1888 notify_check_pending(struct domain *d)
1889 {
1890 unsigned int i;
1891 LIST_HEAD(to_notify);
1892
1893 ASSERT(LOCKING_Read_L1);
1894
1895 read_lock(&d->argo->rings_L2_rwlock);
1896
1897 /* Walk all rings, call notify_ring on each to populate to_notify list */
1898 for ( i = 0; i < ARGO_HASHTABLE_SIZE; i++ )
1899 {
1900 struct argo_ring_info *ring_info, *next;
1901 struct list_head *bucket = &d->argo->ring_hash[i];
1902
1903 list_for_each_entry_safe(ring_info, next, bucket, node)
1904 notify_ring(d, ring_info, &to_notify);
1905 }
1906
1907 read_unlock(&d->argo->rings_L2_rwlock);
1908
1909 if ( !list_empty(&to_notify) )
1910 pending_notify(&to_notify);
1911 }
1912
1913 static long
notify(struct domain * currd,XEN_GUEST_HANDLE_PARAM (xen_argo_ring_data_t)ring_data_hnd)1914 notify(struct domain *currd,
1915 XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd)
1916 {
1917 XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) ent_hnd;
1918 xen_argo_ring_data_t ring_data;
1919 int ret = 0;
1920
1921 ASSERT(currd == current->domain);
1922
1923 read_lock(&L1_global_argo_rwlock);
1924
1925 if ( !currd->argo )
1926 {
1927 argo_dprintk("!d->argo, ENODEV\n");
1928 ret = -ENODEV;
1929 goto out;
1930 }
1931
1932 notify_check_pending(currd);
1933
1934 if ( guest_handle_is_null(ring_data_hnd) )
1935 goto out;
1936
1937 ret = copy_from_guest(&ring_data, ring_data_hnd, 1) ? -EFAULT : 0;
1938 if ( ret )
1939 goto out;
1940
1941 if ( ring_data.nent > MAX_NOTIFY_COUNT )
1942 {
1943 gprintk(XENLOG_ERR, "argo: notify entry count(%u) exceeds max(%u)\n",
1944 ring_data.nent, MAX_NOTIFY_COUNT);
1945 ret = -EACCES;
1946 goto out;
1947 }
1948
1949 ent_hnd = guest_handle_for_field(ring_data_hnd,
1950 xen_argo_ring_data_ent_t, data[0]);
1951 if ( unlikely(!guest_handle_okay(ent_hnd, ring_data.nent)) )
1952 {
1953 ret = -EFAULT;
1954 goto out;
1955 }
1956
1957 while ( !ret && ring_data.nent-- )
1958 {
1959 ret = fill_ring_data(currd, ent_hnd);
1960 guest_handle_add_offset(ent_hnd, 1);
1961 }
1962
1963 out:
1964 read_unlock(&L1_global_argo_rwlock);
1965
1966 return ret;
1967 }
1968
1969 static long
sendv(struct domain * src_d,xen_argo_addr_t * src_addr,const xen_argo_addr_t * dst_addr,xen_argo_iov_t * iovs,unsigned int niov,uint32_t message_type)1970 sendv(struct domain *src_d, xen_argo_addr_t *src_addr,
1971 const xen_argo_addr_t *dst_addr, xen_argo_iov_t *iovs, unsigned int niov,
1972 uint32_t message_type)
1973 {
1974 struct domain *dst_d = NULL;
1975 struct argo_ring_id src_id;
1976 struct argo_ring_info *ring_info;
1977 int ret = 0;
1978 unsigned int len = 0;
1979
1980 argo_dprintk("sendv: (%u:%x)->(%u:%x) niov:%u type:%x\n",
1981 src_addr->domain_id, src_addr->aport, dst_addr->domain_id,
1982 dst_addr->aport, niov, message_type);
1983
1984 /* Check padding is zeroed. */
1985 if ( unlikely(src_addr->pad || dst_addr->pad) )
1986 return -EINVAL;
1987
1988 if ( src_addr->domain_id == XEN_ARGO_DOMID_ANY )
1989 src_addr->domain_id = src_d->domain_id;
1990
1991 /* No domain is currently authorized to send on behalf of another */
1992 if ( unlikely(src_addr->domain_id != src_d->domain_id) )
1993 return -EPERM;
1994
1995 src_id.aport = src_addr->aport;
1996 src_id.domain_id = src_d->domain_id;
1997 src_id.partner_id = dst_addr->domain_id;
1998
1999 dst_d = rcu_lock_domain_by_id(dst_addr->domain_id);
2000 if ( !dst_d )
2001 return -ESRCH;
2002
2003 ret = xsm_argo_send(src_d, dst_d);
2004 if ( ret )
2005 {
2006 gprintk(XENLOG_ERR, "argo: XSM REJECTED %i -> %i\n",
2007 src_d->domain_id, dst_d->domain_id);
2008
2009 rcu_unlock_domain(dst_d);
2010
2011 return ret;
2012 }
2013
2014 read_lock(&L1_global_argo_rwlock);
2015
2016 if ( !src_d->argo )
2017 {
2018 ret = -ENODEV;
2019 goto out_unlock;
2020 }
2021
2022 if ( !dst_d->argo )
2023 {
2024 argo_dprintk("!dst_d->argo, ECONNREFUSED\n");
2025 ret = -ECONNREFUSED;
2026 goto out_unlock;
2027 }
2028
2029 read_lock(&dst_d->argo->rings_L2_rwlock);
2030
2031 ring_info = find_ring_info_by_match(dst_d, dst_addr->aport,
2032 src_id.domain_id);
2033 if ( !ring_info )
2034 {
2035 gprintk(XENLOG_ERR,
2036 "argo: vm%u connection refused, src (vm%u:%x) dst (vm%u:%x)\n",
2037 current->domain->domain_id, src_id.domain_id, src_id.aport,
2038 dst_addr->domain_id, dst_addr->aport);
2039
2040 ret = -ECONNREFUSED;
2041 }
2042 else
2043 {
2044 spin_lock(&ring_info->L3_lock);
2045
2046 /*
2047 * Obtain the total size of data to transmit -- sets the 'len' variable
2048 * -- and sanity check that the iovs conform to size and number limits.
2049 */
2050 ret = iov_count(iovs, niov, &len);
2051 if ( !ret )
2052 {
2053 ret = ringbuf_insert(dst_d, ring_info, &src_id, iovs, niov,
2054 message_type, len);
2055 if ( ret == -EAGAIN )
2056 {
2057 int rc;
2058
2059 argo_dprintk("argo_ringbuf_sendv failed, EAGAIN\n");
2060 /* requeue to issue a notification when space is there */
2061 rc = pending_requeue(dst_d, ring_info, src_id.domain_id, len);
2062 if ( rc )
2063 ret = rc;
2064 }
2065 }
2066
2067 spin_unlock(&ring_info->L3_lock);
2068 }
2069
2070 read_unlock(&dst_d->argo->rings_L2_rwlock);
2071
2072 out_unlock:
2073 read_unlock(&L1_global_argo_rwlock);
2074
2075 if ( ret >= 0 )
2076 signal_domain(dst_d);
2077
2078 if ( dst_d )
2079 rcu_unlock_domain(dst_d);
2080
2081 return ( ret < 0 ) ? ret : len;
2082 }
2083
2084 long
do_argo_op(unsigned int cmd,XEN_GUEST_HANDLE_PARAM (void)arg1,XEN_GUEST_HANDLE_PARAM (void)arg2,unsigned long raw_arg3,unsigned long raw_arg4)2085 do_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1,
2086 XEN_GUEST_HANDLE_PARAM(void) arg2, unsigned long raw_arg3,
2087 unsigned long raw_arg4)
2088 {
2089 struct domain *currd = current->domain;
2090 long rc;
2091 unsigned int arg3 = raw_arg3, arg4 = raw_arg4;
2092
2093 argo_dprintk("->do_argo_op(%u,%p,%p,%lu,0x%lx)\n", cmd,
2094 (void *)arg1.p, (void *)arg2.p, raw_arg3, raw_arg4);
2095
2096 /* Reject numeric hypercall args outside 32-bit range */
2097 if ( (arg3 != raw_arg3) || (arg4 != raw_arg4) )
2098 return -EINVAL;
2099
2100 if ( unlikely(!opt_argo) )
2101 return -EOPNOTSUPP;
2102
2103 rc = xsm_argo_enable(currd);
2104 if ( rc )
2105 return rc;
2106
2107 switch ( cmd )
2108 {
2109 case XEN_ARGO_OP_register_ring:
2110 {
2111 XEN_GUEST_HANDLE_PARAM(xen_argo_register_ring_t) reg_hnd =
2112 guest_handle_cast(arg1, xen_argo_register_ring_t);
2113 XEN_GUEST_HANDLE_PARAM(xen_argo_gfn_t) gfn_hnd =
2114 guest_handle_cast(arg2, xen_argo_gfn_t);
2115 /* arg3: npage, arg4: flags */
2116
2117 BUILD_BUG_ON(!IS_ALIGNED(XEN_ARGO_MAX_RING_SIZE, PAGE_SIZE));
2118
2119 if ( unlikely(arg3 > (XEN_ARGO_MAX_RING_SIZE >> PAGE_SHIFT)) )
2120 {
2121 rc = -EINVAL;
2122 break;
2123 }
2124
2125 /* Check array to allow use of the faster __copy operations later */
2126 if ( unlikely(!guest_handle_okay(gfn_hnd, arg3)) )
2127 {
2128 rc = -EFAULT;
2129 break;
2130 }
2131
2132 rc = register_ring(currd, reg_hnd, gfn_hnd, arg3, arg4);
2133 break;
2134 }
2135
2136 case XEN_ARGO_OP_unregister_ring:
2137 {
2138 XEN_GUEST_HANDLE_PARAM(xen_argo_unregister_ring_t) unreg_hnd =
2139 guest_handle_cast(arg1, xen_argo_unregister_ring_t);
2140
2141 if ( unlikely((!guest_handle_is_null(arg2)) || arg3 || arg4) )
2142 {
2143 rc = -EINVAL;
2144 break;
2145 }
2146
2147 rc = unregister_ring(currd, unreg_hnd);
2148 break;
2149 }
2150
2151 case XEN_ARGO_OP_sendv:
2152 {
2153 xen_argo_send_addr_t send_addr;
2154 xen_argo_iov_t iovs[XEN_ARGO_MAXIOV];
2155 unsigned int niov;
2156
2157 XEN_GUEST_HANDLE_PARAM(xen_argo_send_addr_t) send_addr_hnd =
2158 guest_handle_cast(arg1, xen_argo_send_addr_t);
2159 XEN_GUEST_HANDLE_PARAM(xen_argo_iov_t) iovs_hnd =
2160 guest_handle_cast(arg2, xen_argo_iov_t);
2161 /* arg3 is niov */
2162 /* arg4 is message_type. Must be a 32-bit value. */
2163
2164 /* XEN_ARGO_MAXIOV value determines size of iov array on stack */
2165 BUILD_BUG_ON(XEN_ARGO_MAXIOV > 8);
2166
2167 rc = copy_from_guest(&send_addr, send_addr_hnd, 1) ? -EFAULT : 0;
2168 if ( rc )
2169 {
2170 rc = -EFAULT;
2171 break;
2172 }
2173
2174 /*
2175 * Reject niov above maximum limit or message_types that are outside
2176 * 32 bit range.
2177 */
2178 if ( unlikely((arg3 > XEN_ARGO_MAXIOV) || (arg4 != (uint32_t)arg4)) )
2179 {
2180 rc = -EINVAL;
2181 break;
2182 }
2183 niov = array_index_nospec(arg3, XEN_ARGO_MAXIOV + 1);
2184
2185 rc = copy_from_guest(iovs, iovs_hnd, niov) ? -EFAULT : 0;
2186 if ( rc )
2187 {
2188 rc = -EFAULT;
2189 break;
2190 }
2191
2192 rc = sendv(currd, &send_addr.src, &send_addr.dst, iovs, niov, arg4);
2193 break;
2194 }
2195
2196 case XEN_ARGO_OP_notify:
2197 {
2198 XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd =
2199 guest_handle_cast(arg1, xen_argo_ring_data_t);
2200
2201 if ( unlikely((!guest_handle_is_null(arg2)) || arg3 || arg4) )
2202 {
2203 rc = -EINVAL;
2204 break;
2205 }
2206
2207 rc = notify(currd, ring_data_hnd);
2208 break;
2209 }
2210
2211 default:
2212 rc = -EOPNOTSUPP;
2213 break;
2214 }
2215
2216 argo_dprintk("<-do_argo_op(%u)=%ld\n", cmd, rc);
2217
2218 return rc;
2219 }
2220
2221 #ifdef CONFIG_COMPAT
2222 int
compat_argo_op(unsigned int cmd,XEN_GUEST_HANDLE_PARAM (void)arg1,XEN_GUEST_HANDLE_PARAM (void)arg2,unsigned long arg3,unsigned long arg4)2223 compat_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1,
2224 XEN_GUEST_HANDLE_PARAM(void) arg2, unsigned long arg3,
2225 unsigned long arg4)
2226 {
2227 struct domain *currd = current->domain;
2228 int rc;
2229 xen_argo_send_addr_t send_addr;
2230 xen_argo_iov_t iovs[XEN_ARGO_MAXIOV];
2231 compat_argo_iov_t compat_iovs[XEN_ARGO_MAXIOV];
2232 unsigned int i, niov;
2233 XEN_GUEST_HANDLE_PARAM(xen_argo_send_addr_t) send_addr_hnd;
2234
2235 /* check XEN_ARGO_MAXIOV as it sizes stack arrays: iovs, compat_iovs */
2236 BUILD_BUG_ON(XEN_ARGO_MAXIOV > 8);
2237
2238 /* Forward all ops besides sendv to the native handler. */
2239 if ( cmd != XEN_ARGO_OP_sendv )
2240 return do_argo_op(cmd, arg1, arg2, arg3, arg4);
2241
2242 if ( unlikely(!opt_argo) )
2243 return -EOPNOTSUPP;
2244
2245 rc = xsm_argo_enable(currd);
2246 if ( rc )
2247 return rc;
2248
2249 argo_dprintk("->compat_argo_op(%u,%p,%p,%lu,0x%lx)\n", cmd,
2250 (void *)arg1.p, (void *)arg2.p, arg3, arg4);
2251
2252 send_addr_hnd = guest_handle_cast(arg1, xen_argo_send_addr_t);
2253 /* arg2: iovs, arg3: niov, arg4: message_type */
2254
2255 rc = copy_from_guest(&send_addr, send_addr_hnd, 1) ? -EFAULT : 0;
2256 if ( rc )
2257 goto out;
2258
2259 if ( unlikely(arg3 > XEN_ARGO_MAXIOV) )
2260 {
2261 rc = -EINVAL;
2262 goto out;
2263 }
2264 niov = array_index_nospec(arg3, XEN_ARGO_MAXIOV + 1);
2265
2266 rc = copy_from_guest(compat_iovs, arg2, niov) ? -EFAULT : 0;
2267 if ( rc )
2268 goto out;
2269
2270 for ( i = 0; i < niov; i++ )
2271 {
2272 #define XLAT_argo_iov_HNDL_iov_hnd(_d_, _s_) \
2273 guest_from_compat_handle((_d_)->iov_hnd, (_s_)->iov_hnd)
2274
2275 XLAT_argo_iov(&iovs[i], &compat_iovs[i]);
2276
2277 #undef XLAT_argo_iov_HNDL_iov_hnd
2278 }
2279
2280 rc = sendv(currd, &send_addr.src, &send_addr.dst, iovs, niov, arg4);
2281 out:
2282 argo_dprintk("<-compat_argo_op(%u)=%d\n", cmd, rc);
2283
2284 return rc;
2285 }
2286 #endif
2287
2288 static void
argo_domain_init(struct argo_domain * argo)2289 argo_domain_init(struct argo_domain *argo)
2290 {
2291 unsigned int i;
2292
2293 rwlock_init(&argo->rings_L2_rwlock);
2294 spin_lock_init(&argo->send_L2_lock);
2295 spin_lock_init(&argo->wildcard_L2_lock);
2296
2297 for ( i = 0; i < ARGO_HASHTABLE_SIZE; ++i )
2298 {
2299 INIT_LIST_HEAD(&argo->ring_hash[i]);
2300 INIT_LIST_HEAD(&argo->send_hash[i]);
2301 }
2302 INIT_LIST_HEAD(&argo->wildcard_pend_list);
2303 }
2304
2305 int
argo_init(struct domain * d)2306 argo_init(struct domain *d)
2307 {
2308 struct argo_domain *argo;
2309
2310 if ( !opt_argo || xsm_argo_enable(d) )
2311 {
2312 argo_dprintk("argo disabled, domid: %u\n", d->domain_id);
2313 return 0;
2314 }
2315
2316 argo_dprintk("init: domid: %u\n", d->domain_id);
2317
2318 argo = xzalloc(struct argo_domain);
2319 if ( !argo )
2320 return -ENOMEM;
2321
2322 argo_domain_init(argo);
2323
2324 write_lock(&L1_global_argo_rwlock);
2325
2326 d->argo = argo;
2327
2328 write_unlock(&L1_global_argo_rwlock);
2329
2330 return 0;
2331 }
2332
2333 void
argo_destroy(struct domain * d)2334 argo_destroy(struct domain *d)
2335 {
2336 BUG_ON(!d->is_dying);
2337
2338 write_lock(&L1_global_argo_rwlock);
2339
2340 argo_dprintk("destroy: domid %u d->argo=%p\n", d->domain_id, d->argo);
2341
2342 if ( d->argo )
2343 {
2344 domain_rings_remove_all(d);
2345 partner_rings_remove(d);
2346 wildcard_rings_pending_remove(d);
2347 XFREE(d->argo);
2348 }
2349
2350 write_unlock(&L1_global_argo_rwlock);
2351 }
2352
2353 void
argo_soft_reset(struct domain * d)2354 argo_soft_reset(struct domain *d)
2355 {
2356 write_lock(&L1_global_argo_rwlock);
2357
2358 argo_dprintk("soft reset d=%u d->argo=%p\n", d->domain_id, d->argo);
2359
2360 if ( d->argo )
2361 {
2362 domain_rings_remove_all(d);
2363 partner_rings_remove(d);
2364 wildcard_rings_pending_remove(d);
2365
2366 /*
2367 * Since neither opt_argo or xsm_argo_enable(d) can change at runtime,
2368 * if d->argo is true then both opt_argo and xsm_argo_enable(d) must be
2369 * true, and we can assume that init is allowed to proceed again here.
2370 */
2371 argo_domain_init(d->argo);
2372 }
2373
2374 write_unlock(&L1_global_argo_rwlock);
2375 }
2376