1 /******************************************************************************
2 * arch/x86/mm/mem_sharing.c
3 *
4 * Memory sharing support.
5 *
6 * Copyright (c) 2011 GridCentric, Inc. (Adin Scannell & Andres Lagar-Cavilla)
7 * Copyright (c) 2009 Citrix Systems, Inc. (Grzegorz Milos)
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 #include <xen/types.h>
24 #include <xen/domain_page.h>
25 #include <xen/spinlock.h>
26 #include <xen/rwlock.h>
27 #include <xen/mm.h>
28 #include <xen/grant_table.h>
29 #include <xen/sched.h>
30 #include <xen/rcupdate.h>
31 #include <xen/guest_access.h>
32 #include <xen/vm_event.h>
33 #include <asm/page.h>
34 #include <asm/string.h>
35 #include <asm/p2m.h>
36 #include <asm/altp2m.h>
37 #include <asm/atomic.h>
38 #include <asm/event.h>
39 #include <xsm/xsm.h>
40
41 #include "mm-locks.h"
42
43 static shr_handle_t next_handle = 1;
44
45 typedef struct pg_lock_data {
46 int mm_unlock_level;
47 unsigned short recurse_count;
48 } pg_lock_data_t;
49
50 static DEFINE_PER_CPU(pg_lock_data_t, __pld);
51
52 #define MEM_SHARING_DEBUG(_f, _a...) \
53 debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__, ##_a)
54
55 /* Reverse map defines */
56 #define RMAP_HASHTAB_ORDER 0
57 #define RMAP_HASHTAB_SIZE \
58 ((PAGE_SIZE << RMAP_HASHTAB_ORDER) / sizeof(struct list_head))
59 #define RMAP_USES_HASHTAB(page) \
60 ((page)->sharing->hash_table.flag == NULL)
61 #define RMAP_HEAVY_SHARED_PAGE RMAP_HASHTAB_SIZE
62 /* A bit of hysteresis. We don't want to be mutating between list and hash
63 * table constantly. */
64 #define RMAP_LIGHT_SHARED_PAGE (RMAP_HEAVY_SHARED_PAGE >> 2)
65
66 #if MEM_SHARING_AUDIT
67
68 static struct list_head shr_audit_list;
69 static spinlock_t shr_audit_lock;
70 static DEFINE_RCU_READ_LOCK(shr_audit_read_lock);
71
72 /* RCU delayed free of audit list entry */
_free_pg_shared_info(struct rcu_head * head)73 static void _free_pg_shared_info(struct rcu_head *head)
74 {
75 xfree(container_of(head, struct page_sharing_info, rcu_head));
76 }
77
audit_add_list(struct page_info * page)78 static inline void audit_add_list(struct page_info *page)
79 {
80 INIT_LIST_HEAD(&page->sharing->entry);
81 spin_lock(&shr_audit_lock);
82 list_add_rcu(&page->sharing->entry, &shr_audit_list);
83 spin_unlock(&shr_audit_lock);
84 }
85
86 /* Removes from the audit list and cleans up the page sharing metadata. */
page_sharing_dispose(struct page_info * page)87 static inline void page_sharing_dispose(struct page_info *page)
88 {
89 /* Unlikely given our thresholds, but we should be careful. */
90 if ( unlikely(RMAP_USES_HASHTAB(page)) )
91 free_xenheap_pages(page->sharing->hash_table.bucket,
92 RMAP_HASHTAB_ORDER);
93
94 spin_lock(&shr_audit_lock);
95 list_del_rcu(&page->sharing->entry);
96 spin_unlock(&shr_audit_lock);
97 INIT_RCU_HEAD(&page->sharing->rcu_head);
98 call_rcu(&page->sharing->rcu_head, _free_pg_shared_info);
99 }
100
101 #else
102
103 #define audit_add_list(p) ((void)0)
page_sharing_dispose(struct page_info * page)104 static inline void page_sharing_dispose(struct page_info *page)
105 {
106 /* Unlikely given our thresholds, but we should be careful. */
107 if ( unlikely(RMAP_USES_HASHTAB(page)) )
108 free_xenheap_pages(page->sharing->hash_table.bucket,
109 RMAP_HASHTAB_ORDER);
110 xfree(page->sharing);
111 }
112
113 #endif /* MEM_SHARING_AUDIT */
114
mem_sharing_page_lock(struct page_info * pg)115 static inline int mem_sharing_page_lock(struct page_info *pg)
116 {
117 int rc;
118 pg_lock_data_t *pld = &(this_cpu(__pld));
119
120 page_sharing_mm_pre_lock();
121 rc = page_lock(pg);
122 if ( rc )
123 {
124 preempt_disable();
125 page_sharing_mm_post_lock(&pld->mm_unlock_level,
126 &pld->recurse_count);
127 }
128 return rc;
129 }
130
mem_sharing_page_unlock(struct page_info * pg)131 static inline void mem_sharing_page_unlock(struct page_info *pg)
132 {
133 pg_lock_data_t *pld = &(this_cpu(__pld));
134
135 page_sharing_mm_unlock(pld->mm_unlock_level,
136 &pld->recurse_count);
137 preempt_enable();
138 page_unlock(pg);
139 }
140
get_next_handle(void)141 static inline shr_handle_t get_next_handle(void)
142 {
143 /* Get the next handle get_page style */
144 uint64_t x, y = next_handle;
145 do {
146 x = y;
147 }
148 while ( (y = cmpxchg(&next_handle, x, x + 1)) != x );
149 return x + 1;
150 }
151
152 #define mem_sharing_enabled(d) \
153 (is_hvm_domain(d) && (d)->arch.hvm_domain.mem_sharing_enabled)
154
155 #undef mfn_to_page
156 #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
157 #undef page_to_mfn
158 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
159
160 static atomic_t nr_saved_mfns = ATOMIC_INIT(0);
161 static atomic_t nr_shared_mfns = ATOMIC_INIT(0);
162
163 /** Reverse map **/
164 /* Every shared frame keeps a reverse map (rmap) of <domain, gfn> tuples that
165 * this shared frame backs. For pages with a low degree of sharing, a O(n)
166 * search linked list is good enough. For pages with higher degree of sharing,
167 * we use a hash table instead. */
168
169 typedef struct gfn_info
170 {
171 unsigned long gfn;
172 domid_t domain;
173 struct list_head list;
174 } gfn_info_t;
175
176 static inline void
rmap_init(struct page_info * page)177 rmap_init(struct page_info *page)
178 {
179 /* We always start off as a doubly linked list. */
180 INIT_LIST_HEAD(&page->sharing->gfns);
181 }
182
183 /* Exceedingly simple "hash function" */
184 #define HASH(domain, gfn) \
185 (((gfn) + (domain)) % RMAP_HASHTAB_SIZE)
186
187 /* Conversions. Tuned by the thresholds. Should only happen twice
188 * (once each) during the lifetime of a shared page */
189 static inline int
rmap_list_to_hash_table(struct page_info * page)190 rmap_list_to_hash_table(struct page_info *page)
191 {
192 unsigned int i;
193 struct list_head *pos, *tmp, *b =
194 alloc_xenheap_pages(RMAP_HASHTAB_ORDER, 0);
195
196 if ( b == NULL )
197 return -ENOMEM;
198
199 for ( i = 0; i < RMAP_HASHTAB_SIZE; i++ )
200 INIT_LIST_HEAD(b + i);
201
202 list_for_each_safe(pos, tmp, &page->sharing->gfns)
203 {
204 gfn_info_t *gfn_info = list_entry(pos, gfn_info_t, list);
205 struct list_head *bucket = b + HASH(gfn_info->domain, gfn_info->gfn);
206 list_del(pos);
207 list_add(pos, bucket);
208 }
209
210 page->sharing->hash_table.bucket = b;
211 page->sharing->hash_table.flag = NULL;
212
213 return 0;
214 }
215
216 static inline void
rmap_hash_table_to_list(struct page_info * page)217 rmap_hash_table_to_list(struct page_info *page)
218 {
219 unsigned int i;
220 struct list_head *bucket = page->sharing->hash_table.bucket;
221
222 INIT_LIST_HEAD(&page->sharing->gfns);
223
224 for ( i = 0; i < RMAP_HASHTAB_SIZE; i++ )
225 {
226 struct list_head *pos, *tmp, *head = bucket + i;
227 list_for_each_safe(pos, tmp, head)
228 {
229 list_del(pos);
230 list_add(pos, &page->sharing->gfns);
231 }
232 }
233
234 free_xenheap_pages(bucket, RMAP_HASHTAB_ORDER);
235 }
236
237 /* Generic accessors to the rmap */
238 static inline unsigned long
rmap_count(struct page_info * pg)239 rmap_count(struct page_info *pg)
240 {
241 unsigned long count;
242 unsigned long t = read_atomic(&pg->u.inuse.type_info);
243 count = t & PGT_count_mask;
244 if ( t & PGT_locked )
245 count--;
246 return count;
247 }
248
249 /* The page type count is always decreased after removing from the rmap.
250 * Use a convert flag to avoid mutating the rmap if in the middle of an
251 * iterator, or if the page will be soon destroyed anyways. */
252 static inline void
rmap_del(gfn_info_t * gfn_info,struct page_info * page,int convert)253 rmap_del(gfn_info_t *gfn_info, struct page_info *page, int convert)
254 {
255 if ( RMAP_USES_HASHTAB(page) && convert &&
256 (rmap_count(page) <= RMAP_LIGHT_SHARED_PAGE) )
257 rmap_hash_table_to_list(page);
258
259 /* Regardless of rmap type, same removal operation */
260 list_del(&gfn_info->list);
261 }
262
263 /* The page type count is always increased before adding to the rmap. */
264 static inline void
rmap_add(gfn_info_t * gfn_info,struct page_info * page)265 rmap_add(gfn_info_t *gfn_info, struct page_info *page)
266 {
267 struct list_head *head;
268
269 if ( !RMAP_USES_HASHTAB(page) &&
270 (rmap_count(page) >= RMAP_HEAVY_SHARED_PAGE) )
271 /* The conversion may fail with ENOMEM. We'll be less efficient,
272 * but no reason to panic. */
273 (void)rmap_list_to_hash_table(page);
274
275 head = (RMAP_USES_HASHTAB(page)) ?
276 page->sharing->hash_table.bucket +
277 HASH(gfn_info->domain, gfn_info->gfn) :
278 &page->sharing->gfns;
279
280 INIT_LIST_HEAD(&gfn_info->list);
281 list_add(&gfn_info->list, head);
282 }
283
284 static inline gfn_info_t *
rmap_retrieve(uint16_t domain_id,unsigned long gfn,struct page_info * page)285 rmap_retrieve(uint16_t domain_id, unsigned long gfn,
286 struct page_info *page)
287 {
288 gfn_info_t *gfn_info;
289 struct list_head *le, *head;
290
291 head = (RMAP_USES_HASHTAB(page)) ?
292 page->sharing->hash_table.bucket + HASH(domain_id, gfn) :
293 &page->sharing->gfns;
294
295 list_for_each(le, head)
296 {
297 gfn_info = list_entry(le, gfn_info_t, list);
298 if ( (gfn_info->gfn == gfn) && (gfn_info->domain == domain_id) )
299 return gfn_info;
300 }
301
302 /* Nothing was found */
303 return NULL;
304 }
305
306 /* Returns true if the rmap has only one entry. O(1) complexity. */
rmap_has_one_entry(struct page_info * page)307 static inline int rmap_has_one_entry(struct page_info *page)
308 {
309 return (rmap_count(page) == 1);
310 }
311
312 /* Returns true if the rmap has any entries. O(1) complexity. */
rmap_has_entries(struct page_info * page)313 static inline int rmap_has_entries(struct page_info *page)
314 {
315 return (rmap_count(page) != 0);
316 }
317
318 /* The iterator hides the details of how the rmap is implemented. This
319 * involves splitting the list_for_each_safe macro into two steps. */
320 struct rmap_iterator {
321 struct list_head *curr;
322 struct list_head *next;
323 unsigned int bucket;
324 };
325
326 static inline void
rmap_seed_iterator(struct page_info * page,struct rmap_iterator * ri)327 rmap_seed_iterator(struct page_info *page, struct rmap_iterator *ri)
328 {
329 ri->curr = (RMAP_USES_HASHTAB(page)) ?
330 page->sharing->hash_table.bucket :
331 &page->sharing->gfns;
332 ri->next = ri->curr->next;
333 ri->bucket = 0;
334 }
335
336 static inline gfn_info_t *
rmap_iterate(struct page_info * page,struct rmap_iterator * ri)337 rmap_iterate(struct page_info *page, struct rmap_iterator *ri)
338 {
339 struct list_head *head = (RMAP_USES_HASHTAB(page)) ?
340 page->sharing->hash_table.bucket + ri->bucket :
341 &page->sharing->gfns;
342
343 retry:
344 if ( ri->next == head)
345 {
346 if ( RMAP_USES_HASHTAB(page) )
347 {
348 ri->bucket++;
349 if ( ri->bucket >= RMAP_HASHTAB_SIZE )
350 /* No more hash table buckets */
351 return NULL;
352 head = page->sharing->hash_table.bucket + ri->bucket;
353 ri->curr = head;
354 ri->next = ri->curr->next;
355 goto retry;
356 } else
357 /* List exhausted */
358 return NULL;
359 }
360
361 ri->curr = ri->next;
362 ri->next = ri->curr->next;
363
364 return list_entry(ri->curr, gfn_info_t, list);
365 }
366
mem_sharing_gfn_alloc(struct page_info * page,struct domain * d,unsigned long gfn)367 static inline gfn_info_t *mem_sharing_gfn_alloc(struct page_info *page,
368 struct domain *d,
369 unsigned long gfn)
370 {
371 gfn_info_t *gfn_info = xmalloc(gfn_info_t);
372
373 if ( gfn_info == NULL )
374 return NULL;
375
376 gfn_info->gfn = gfn;
377 gfn_info->domain = d->domain_id;
378
379 rmap_add(gfn_info, page);
380
381 /* Increment our number of shared pges. */
382 atomic_inc(&d->shr_pages);
383
384 return gfn_info;
385 }
386
mem_sharing_gfn_destroy(struct page_info * page,struct domain * d,gfn_info_t * gfn_info)387 static inline void mem_sharing_gfn_destroy(struct page_info *page,
388 struct domain *d,
389 gfn_info_t *gfn_info)
390 {
391 /* Decrement the number of pages. */
392 atomic_dec(&d->shr_pages);
393
394 /* Free the gfn_info structure. */
395 rmap_del(gfn_info, page, 1);
396 xfree(gfn_info);
397 }
398
mem_sharing_lookup(unsigned long mfn)399 static struct page_info* mem_sharing_lookup(unsigned long mfn)
400 {
401 if ( mfn_valid(_mfn(mfn)) )
402 {
403 struct page_info* page = mfn_to_page(_mfn(mfn));
404 if ( page_get_owner(page) == dom_cow )
405 {
406 /* Count has to be at least two, because we're called
407 * with the mfn locked (1) and this is supposed to be
408 * a shared page (1). */
409 unsigned long t = read_atomic(&page->u.inuse.type_info);
410 ASSERT((t & PGT_type_mask) == PGT_shared_page);
411 ASSERT((t & PGT_count_mask) >= 2);
412 ASSERT(get_gpfn_from_mfn(mfn) == SHARED_M2P_ENTRY);
413 return page;
414 }
415 }
416
417 return NULL;
418 }
419
audit(void)420 static int audit(void)
421 {
422 #if MEM_SHARING_AUDIT
423 int errors = 0;
424 unsigned long count_expected;
425 unsigned long count_found = 0;
426 struct list_head *ae;
427
428 count_expected = atomic_read(&nr_shared_mfns);
429
430 rcu_read_lock(&shr_audit_read_lock);
431
432 list_for_each_rcu(ae, &shr_audit_list)
433 {
434 struct page_sharing_info *pg_shared_info;
435 unsigned long nr_gfns = 0;
436 struct page_info *pg;
437 mfn_t mfn;
438 gfn_info_t *g;
439 struct rmap_iterator ri;
440
441 pg_shared_info = list_entry(ae, struct page_sharing_info, entry);
442 pg = pg_shared_info->pg;
443 mfn = page_to_mfn(pg);
444
445 /* If we can't lock it, it's definitely not a shared page */
446 if ( !mem_sharing_page_lock(pg) )
447 {
448 MEM_SHARING_DEBUG("mfn %lx in audit list, but cannot be locked (%lx)!\n",
449 mfn_x(mfn), pg->u.inuse.type_info);
450 errors++;
451 continue;
452 }
453
454 /* Check if the MFN has correct type, owner and handle. */
455 if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_shared_page )
456 {
457 MEM_SHARING_DEBUG("mfn %lx in audit list, but not PGT_shared_page (%lx)!\n",
458 mfn_x(mfn), pg->u.inuse.type_info & PGT_type_mask);
459 errors++;
460 continue;
461 }
462
463 /* Check the page owner. */
464 if ( page_get_owner(pg) != dom_cow )
465 {
466 MEM_SHARING_DEBUG("mfn %lx shared, but wrong owner (%hu)!\n",
467 mfn_x(mfn), page_get_owner(pg)->domain_id);
468 errors++;
469 }
470
471 /* Check the m2p entry */
472 if ( get_gpfn_from_mfn(mfn_x(mfn)) != SHARED_M2P_ENTRY )
473 {
474 MEM_SHARING_DEBUG("mfn %lx shared, but wrong m2p entry (%lx)!\n",
475 mfn_x(mfn), get_gpfn_from_mfn(mfn_x(mfn)));
476 errors++;
477 }
478
479 /* Check we have a list */
480 if ( (!pg->sharing) || !rmap_has_entries(pg) )
481 {
482 MEM_SHARING_DEBUG("mfn %lx shared, but empty gfn list!\n",
483 mfn_x(mfn));
484 errors++;
485 continue;
486 }
487
488 /* We've found a page that is shared */
489 count_found++;
490
491 /* Check if all GFNs map to the MFN, and the p2m types */
492 rmap_seed_iterator(pg, &ri);
493 while ( (g = rmap_iterate(pg, &ri)) != NULL )
494 {
495 struct domain *d;
496 p2m_type_t t;
497 mfn_t o_mfn;
498
499 d = get_domain_by_id(g->domain);
500 if ( d == NULL )
501 {
502 MEM_SHARING_DEBUG("Unknown dom: %hu, for PFN=%lx, MFN=%lx\n",
503 g->domain, g->gfn, mfn_x(mfn));
504 errors++;
505 continue;
506 }
507 o_mfn = get_gfn_query_unlocked(d, g->gfn, &t);
508 if ( mfn_x(o_mfn) != mfn_x(mfn) )
509 {
510 MEM_SHARING_DEBUG("Incorrect P2M for d=%hu, PFN=%lx."
511 "Expecting MFN=%lx, got %lx\n",
512 g->domain, g->gfn, mfn_x(mfn), mfn_x(o_mfn));
513 errors++;
514 }
515 if ( t != p2m_ram_shared )
516 {
517 MEM_SHARING_DEBUG("Incorrect P2M type for d=%hu, PFN=%lx MFN=%lx."
518 "Expecting t=%d, got %d\n",
519 g->domain, g->gfn, mfn_x(mfn), p2m_ram_shared, t);
520 errors++;
521 }
522 put_domain(d);
523 nr_gfns++;
524 }
525 /* The type count has an extra ref because we have locked the page */
526 if ( (nr_gfns + 1) != (pg->u.inuse.type_info & PGT_count_mask) )
527 {
528 MEM_SHARING_DEBUG("Mismatched counts for MFN=%lx."
529 "nr_gfns in list %lu, in type_info %lx\n",
530 mfn_x(mfn), nr_gfns,
531 (pg->u.inuse.type_info & PGT_count_mask));
532 errors++;
533 }
534
535 mem_sharing_page_unlock(pg);
536 }
537
538 rcu_read_unlock(&shr_audit_read_lock);
539
540 if ( count_found != count_expected )
541 {
542 MEM_SHARING_DEBUG("Expected %ld shared mfns, found %ld.",
543 count_expected, count_found);
544 errors++;
545 }
546
547 return errors;
548 #else
549 return -EOPNOTSUPP;
550 #endif
551 }
552
mem_sharing_notify_enomem(struct domain * d,unsigned long gfn,bool_t allow_sleep)553 int mem_sharing_notify_enomem(struct domain *d, unsigned long gfn,
554 bool_t allow_sleep)
555 {
556 struct vcpu *v = current;
557 int rc;
558 vm_event_request_t req = {
559 .reason = VM_EVENT_REASON_MEM_SHARING,
560 .vcpu_id = v->vcpu_id,
561 .u.mem_sharing.gfn = gfn,
562 .u.mem_sharing.p2mt = p2m_ram_shared
563 };
564
565 if ( (rc = __vm_event_claim_slot(d,
566 d->vm_event_share, allow_sleep)) < 0 )
567 return rc;
568
569 if ( v->domain == d )
570 {
571 req.flags = VM_EVENT_FLAG_VCPU_PAUSED;
572 vm_event_vcpu_pause(v);
573 }
574
575 vm_event_put_request(d, d->vm_event_share, &req);
576
577 return 0;
578 }
579
mem_sharing_get_nr_saved_mfns(void)580 unsigned int mem_sharing_get_nr_saved_mfns(void)
581 {
582 return ((unsigned int)atomic_read(&nr_saved_mfns));
583 }
584
mem_sharing_get_nr_shared_mfns(void)585 unsigned int mem_sharing_get_nr_shared_mfns(void)
586 {
587 return (unsigned int)atomic_read(&nr_shared_mfns);
588 }
589
590 /* Functions that change a page's type and ownership */
page_make_sharable(struct domain * d,struct page_info * page,int expected_refcnt)591 static int page_make_sharable(struct domain *d,
592 struct page_info *page,
593 int expected_refcnt)
594 {
595 bool_t drop_dom_ref;
596
597 spin_lock(&d->page_alloc_lock);
598
599 if ( d->is_dying )
600 {
601 spin_unlock(&d->page_alloc_lock);
602 return -EBUSY;
603 }
604
605 /* Change page type and count atomically */
606 if ( !get_page_and_type(page, d, PGT_shared_page) )
607 {
608 spin_unlock(&d->page_alloc_lock);
609 return -EINVAL;
610 }
611
612 /* Check it wasn't already sharable and undo if it was */
613 if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
614 {
615 spin_unlock(&d->page_alloc_lock);
616 put_page_and_type(page);
617 return -EEXIST;
618 }
619
620 /* Check if the ref count is 2. The first from PGC_allocated, and
621 * the second from get_page_and_type at the top of this function */
622 if ( page->count_info != (PGC_allocated | (2 + expected_refcnt)) )
623 {
624 spin_unlock(&d->page_alloc_lock);
625 /* Return type count back to zero */
626 put_page_and_type(page);
627 return -E2BIG;
628 }
629
630 page_set_owner(page, dom_cow);
631 drop_dom_ref = !domain_adjust_tot_pages(d, -1);
632 page_list_del(page, &d->page_list);
633 spin_unlock(&d->page_alloc_lock);
634
635 if ( drop_dom_ref )
636 put_domain(d);
637 return 0;
638 }
639
page_make_private(struct domain * d,struct page_info * page)640 static int page_make_private(struct domain *d, struct page_info *page)
641 {
642 unsigned long expected_type;
643
644 if ( !get_page(page, dom_cow) )
645 return -EINVAL;
646
647 spin_lock(&d->page_alloc_lock);
648
649 if ( d->is_dying )
650 {
651 spin_unlock(&d->page_alloc_lock);
652 put_page(page);
653 return -EBUSY;
654 }
655
656 /* We can only change the type if count is one */
657 /* Because we are locking pages individually, we need to drop
658 * the lock here, while the page is typed. We cannot risk the
659 * race of page_unlock and then put_page_type. */
660 expected_type = (PGT_shared_page | PGT_validated | PGT_locked | 2);
661 if ( page->u.inuse.type_info != expected_type )
662 {
663 spin_unlock(&d->page_alloc_lock);
664 put_page(page);
665 return -EEXIST;
666 }
667
668 /* Drop the final typecount */
669 put_page_and_type(page);
670
671 /* Now that we've dropped the type, we can unlock */
672 mem_sharing_page_unlock(page);
673
674 /* Change the owner */
675 ASSERT(page_get_owner(page) == dom_cow);
676 page_set_owner(page, d);
677
678 if ( domain_adjust_tot_pages(d, 1) == 1 )
679 get_knownalive_domain(d);
680 page_list_add_tail(page, &d->page_list);
681 spin_unlock(&d->page_alloc_lock);
682
683 put_page(page);
684
685 return 0;
686 }
687
__grab_shared_page(mfn_t mfn)688 static inline struct page_info *__grab_shared_page(mfn_t mfn)
689 {
690 struct page_info *pg = NULL;
691
692 if ( !mfn_valid(mfn) )
693 return NULL;
694 pg = mfn_to_page(mfn);
695
696 /* If the page is not validated we can't lock it, and if it's
697 * not validated it's obviously not shared. */
698 if ( !mem_sharing_page_lock(pg) )
699 return NULL;
700
701 if ( mem_sharing_lookup(mfn_x(mfn)) == NULL )
702 {
703 mem_sharing_page_unlock(pg);
704 return NULL;
705 }
706
707 return pg;
708 }
709
debug_mfn(mfn_t mfn)710 static int debug_mfn(mfn_t mfn)
711 {
712 struct page_info *page;
713 int num_refs;
714
715 if ( (page = __grab_shared_page(mfn)) == NULL)
716 {
717 gdprintk(XENLOG_ERR, "Invalid MFN=%lx\n", mfn_x(mfn));
718 return -EINVAL;
719 }
720
721 MEM_SHARING_DEBUG(
722 "Debug page: MFN=%lx is ci=%lx, ti=%lx, owner_id=%d\n",
723 mfn_x(page_to_mfn(page)),
724 page->count_info,
725 page->u.inuse.type_info,
726 page_get_owner(page)->domain_id);
727
728 /* -1 because the page is locked and that's an additional type ref */
729 num_refs = ((int) (page->u.inuse.type_info & PGT_count_mask)) - 1;
730 mem_sharing_page_unlock(page);
731 return num_refs;
732 }
733
debug_gfn(struct domain * d,gfn_t gfn)734 static int debug_gfn(struct domain *d, gfn_t gfn)
735 {
736 p2m_type_t p2mt;
737 mfn_t mfn;
738 int num_refs;
739
740 mfn = get_gfn_query(d, gfn_x(gfn), &p2mt);
741
742 MEM_SHARING_DEBUG("Debug for dom%d, gfn=%" PRI_gfn "\n",
743 d->domain_id, gfn_x(gfn));
744 num_refs = debug_mfn(mfn);
745 put_gfn(d, gfn_x(gfn));
746
747 return num_refs;
748 }
749
debug_gref(struct domain * d,grant_ref_t ref)750 static int debug_gref(struct domain *d, grant_ref_t ref)
751 {
752 int rc;
753 uint16_t status;
754 gfn_t gfn;
755
756 rc = mem_sharing_gref_to_gfn(d->grant_table, ref, &gfn, &status);
757 if ( rc )
758 {
759 MEM_SHARING_DEBUG("Asked to debug [dom=%d,gref=%u]: error %d.\n",
760 d->domain_id, ref, rc);
761 return rc;
762 }
763
764 MEM_SHARING_DEBUG(
765 "==> Grant [dom=%d,ref=%d], status=%x. ",
766 d->domain_id, ref, status);
767
768 return debug_gfn(d, gfn);
769 }
770
nominate_page(struct domain * d,gfn_t gfn,int expected_refcnt,shr_handle_t * phandle)771 static int nominate_page(struct domain *d, gfn_t gfn,
772 int expected_refcnt, shr_handle_t *phandle)
773 {
774 struct p2m_domain *hp2m = p2m_get_hostp2m(d);
775 p2m_type_t p2mt;
776 p2m_access_t p2ma;
777 mfn_t mfn;
778 struct page_info *page = NULL; /* gcc... */
779 int ret;
780
781 *phandle = 0UL;
782
783 mfn = get_gfn_type_access(hp2m, gfn_x(gfn), &p2mt, &p2ma, 0, NULL);
784
785 /* Check if mfn is valid */
786 ret = -EINVAL;
787 if ( !mfn_valid(mfn) )
788 goto out;
789
790 /* Return the handle if the page is already shared */
791 if ( p2m_is_shared(p2mt) ) {
792 struct page_info *pg = __grab_shared_page(mfn);
793 if ( !pg )
794 {
795 gprintk(XENLOG_ERR,
796 "Shared p2m entry gfn %" PRI_gfn ", but could not grab mfn %" PRI_mfn " dom%d\n",
797 gfn_x(gfn), mfn_x(mfn), d->domain_id);
798 BUG();
799 }
800 *phandle = pg->sharing->handle;
801 ret = 0;
802 mem_sharing_page_unlock(pg);
803 goto out;
804 }
805
806 /* Check p2m type */
807 if ( !p2m_is_sharable(p2mt) )
808 goto out;
809
810 /* Check if there are mem_access/remapped altp2m entries for this page */
811 if ( altp2m_active(d) )
812 {
813 unsigned int i;
814 struct p2m_domain *ap2m;
815 mfn_t amfn;
816 p2m_type_t ap2mt;
817 p2m_access_t ap2ma;
818
819 altp2m_list_lock(d);
820
821 for ( i = 0; i < MAX_ALTP2M; i++ )
822 {
823 ap2m = d->arch.altp2m_p2m[i];
824 if ( !ap2m )
825 continue;
826
827 amfn = get_gfn_type_access(ap2m, gfn_x(gfn), &ap2mt, &ap2ma, 0, NULL);
828 if ( mfn_valid(amfn) && (!mfn_eq(amfn, mfn) || ap2ma != p2ma) )
829 {
830 altp2m_list_unlock(d);
831 goto out;
832 }
833 }
834
835 altp2m_list_unlock(d);
836 }
837
838 /* Try to convert the mfn to the sharable type */
839 page = mfn_to_page(mfn);
840 ret = page_make_sharable(d, page, expected_refcnt);
841 if ( ret )
842 goto out;
843
844 /* Now that the page is validated, we can lock it. There is no
845 * race because we're holding the p2m entry, so no one else
846 * could be nominating this gfn */
847 ret = -ENOENT;
848 if ( !mem_sharing_page_lock(page) )
849 goto out;
850
851 /* Initialize the shared state */
852 ret = -ENOMEM;
853 if ( (page->sharing =
854 xmalloc(struct page_sharing_info)) == NULL )
855 {
856 /* Making a page private atomically unlocks it */
857 BUG_ON(page_make_private(d, page) != 0);
858 goto out;
859 }
860 page->sharing->pg = page;
861 rmap_init(page);
862
863 /* Create the handle */
864 page->sharing->handle = get_next_handle();
865
866 /* Create the local gfn info */
867 if ( mem_sharing_gfn_alloc(page, d, gfn_x(gfn)) == NULL )
868 {
869 xfree(page->sharing);
870 page->sharing = NULL;
871 BUG_ON(page_make_private(d, page) != 0);
872 goto out;
873 }
874
875 /* Change the p2m type, should never fail with p2m locked. */
876 BUG_ON(p2m_change_type_one(d, gfn_x(gfn), p2mt, p2m_ram_shared));
877
878 /* Account for this page. */
879 atomic_inc(&nr_shared_mfns);
880
881 /* Update m2p entry to SHARED_M2P_ENTRY */
882 set_gpfn_from_mfn(mfn_x(mfn), SHARED_M2P_ENTRY);
883
884 *phandle = page->sharing->handle;
885 audit_add_list(page);
886 mem_sharing_page_unlock(page);
887 ret = 0;
888
889 out:
890 put_gfn(d, gfn_x(gfn));
891 return ret;
892 }
893
share_pages(struct domain * sd,gfn_t sgfn,shr_handle_t sh,struct domain * cd,gfn_t cgfn,shr_handle_t ch)894 static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
895 struct domain *cd, gfn_t cgfn, shr_handle_t ch)
896 {
897 struct page_info *spage, *cpage, *firstpg, *secondpg;
898 gfn_info_t *gfn;
899 struct domain *d;
900 int ret = -EINVAL;
901 mfn_t smfn, cmfn;
902 p2m_type_t smfn_type, cmfn_type;
903 struct two_gfns tg;
904 struct rmap_iterator ri;
905
906 get_two_gfns(sd, gfn_x(sgfn), &smfn_type, NULL, &smfn,
907 cd, gfn_x(cgfn), &cmfn_type, NULL, &cmfn,
908 0, &tg);
909
910 /* This tricky business is to avoid two callers deadlocking if
911 * grabbing pages in opposite client/source order */
912 if( mfn_x(smfn) == mfn_x(cmfn) )
913 {
914 /* The pages are already the same. We could return some
915 * kind of error here, but no matter how you look at it,
916 * the pages are already 'shared'. It possibly represents
917 * a big problem somewhere else, but as far as sharing is
918 * concerned: great success! */
919 ret = 0;
920 goto err_out;
921 }
922 else if ( mfn_x(smfn) < mfn_x(cmfn) )
923 {
924 ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
925 spage = firstpg = __grab_shared_page(smfn);
926 if ( spage == NULL )
927 goto err_out;
928
929 ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
930 cpage = secondpg = __grab_shared_page(cmfn);
931 if ( cpage == NULL )
932 {
933 mem_sharing_page_unlock(spage);
934 goto err_out;
935 }
936 } else {
937 ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
938 cpage = firstpg = __grab_shared_page(cmfn);
939 if ( cpage == NULL )
940 goto err_out;
941
942 ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
943 spage = secondpg = __grab_shared_page(smfn);
944 if ( spage == NULL )
945 {
946 mem_sharing_page_unlock(cpage);
947 goto err_out;
948 }
949 }
950
951 ASSERT(smfn_type == p2m_ram_shared);
952 ASSERT(cmfn_type == p2m_ram_shared);
953
954 /* Check that the handles match */
955 if ( spage->sharing->handle != sh )
956 {
957 ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
958 mem_sharing_page_unlock(secondpg);
959 mem_sharing_page_unlock(firstpg);
960 goto err_out;
961 }
962 if ( cpage->sharing->handle != ch )
963 {
964 ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
965 mem_sharing_page_unlock(secondpg);
966 mem_sharing_page_unlock(firstpg);
967 goto err_out;
968 }
969
970 /* Merge the lists together */
971 rmap_seed_iterator(cpage, &ri);
972 while ( (gfn = rmap_iterate(cpage, &ri)) != NULL)
973 {
974 /* Get the source page and type, this should never fail:
975 * we are under shr lock, and got a successful lookup */
976 BUG_ON(!get_page_and_type(spage, dom_cow, PGT_shared_page));
977 /* Move the gfn_info from client list to source list.
978 * Don't change the type of rmap for the client page. */
979 rmap_del(gfn, cpage, 0);
980 rmap_add(gfn, spage);
981 put_page_and_type(cpage);
982 d = get_domain_by_id(gfn->domain);
983 BUG_ON(!d);
984 BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn));
985 put_domain(d);
986 }
987 ASSERT(list_empty(&cpage->sharing->gfns));
988
989 /* Clear the rest of the shared state */
990 page_sharing_dispose(cpage);
991 cpage->sharing = NULL;
992
993 mem_sharing_page_unlock(secondpg);
994 mem_sharing_page_unlock(firstpg);
995
996 /* Free the client page */
997 if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
998 put_page(cpage);
999
1000 /* We managed to free a domain page. */
1001 atomic_dec(&nr_shared_mfns);
1002 atomic_inc(&nr_saved_mfns);
1003 ret = 0;
1004
1005 err_out:
1006 put_two_gfns(&tg);
1007 return ret;
1008 }
1009
mem_sharing_add_to_physmap(struct domain * sd,unsigned long sgfn,shr_handle_t sh,struct domain * cd,unsigned long cgfn)1010 int mem_sharing_add_to_physmap(struct domain *sd, unsigned long sgfn, shr_handle_t sh,
1011 struct domain *cd, unsigned long cgfn)
1012 {
1013 struct page_info *spage;
1014 int ret = -EINVAL;
1015 mfn_t smfn, cmfn;
1016 p2m_type_t smfn_type, cmfn_type;
1017 struct gfn_info *gfn_info;
1018 struct p2m_domain *p2m = p2m_get_hostp2m(cd);
1019 p2m_access_t a;
1020 struct two_gfns tg;
1021
1022 get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn,
1023 cd, cgfn, &cmfn_type, &a, &cmfn,
1024 0, &tg);
1025
1026 /* Get the source shared page, check and lock */
1027 ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
1028 spage = __grab_shared_page(smfn);
1029 if ( spage == NULL )
1030 goto err_out;
1031 ASSERT(smfn_type == p2m_ram_shared);
1032
1033 /* Check that the handles match */
1034 if ( spage->sharing->handle != sh )
1035 goto err_unlock;
1036
1037 /* Make sure the target page is a hole in the physmap. These are typically
1038 * p2m_mmio_dm, but also accept p2m_invalid and paged out pages. See the
1039 * definition of p2m_is_hole in p2m.h. */
1040 if ( !p2m_is_hole(cmfn_type) )
1041 {
1042 ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
1043 goto err_unlock;
1044 }
1045
1046 /* This is simpler than regular sharing */
1047 BUG_ON(!get_page_and_type(spage, dom_cow, PGT_shared_page));
1048 if ( (gfn_info = mem_sharing_gfn_alloc(spage, cd, cgfn)) == NULL )
1049 {
1050 put_page_and_type(spage);
1051 ret = -ENOMEM;
1052 goto err_unlock;
1053 }
1054
1055 ret = p2m_set_entry(p2m, _gfn(cgfn), smfn, PAGE_ORDER_4K,
1056 p2m_ram_shared, a);
1057
1058 /* Tempted to turn this into an assert */
1059 if ( ret )
1060 {
1061 mem_sharing_gfn_destroy(spage, cd, gfn_info);
1062 put_page_and_type(spage);
1063 } else {
1064 /* There is a chance we're plugging a hole where a paged out page was */
1065 if ( p2m_is_paging(cmfn_type) && (cmfn_type != p2m_ram_paging_out) )
1066 {
1067 atomic_dec(&cd->paged_pages);
1068 /* Further, there is a chance this was a valid page. Don't leak it. */
1069 if ( mfn_valid(cmfn) )
1070 {
1071 struct page_info *cpage = mfn_to_page(cmfn);
1072 ASSERT(cpage != NULL);
1073 if ( test_and_clear_bit(_PGC_allocated, &cpage->count_info) )
1074 put_page(cpage);
1075 }
1076 }
1077 }
1078
1079 atomic_inc(&nr_saved_mfns);
1080
1081 err_unlock:
1082 mem_sharing_page_unlock(spage);
1083 err_out:
1084 put_two_gfns(&tg);
1085 return ret;
1086 }
1087
1088
1089 /* A note on the rationale for unshare error handling:
1090 * 1. Unshare can only fail with ENOMEM. Any other error conditions BUG_ON()'s
1091 * 2. We notify a potential dom0 helper through a vm_event ring. But we
1092 * allow the notification to not go to sleep. If the event ring is full
1093 * of ENOMEM warnings, then it's on the ball.
1094 * 3. We cannot go to sleep until the unshare is resolved, because we might
1095 * be buried deep into locks (e.g. something -> copy_to_user -> __hvm_copy)
1096 * 4. So, we make sure we:
1097 * 4.1. return an error
1098 * 4.2. do not corrupt shared memory
1099 * 4.3. do not corrupt guest memory
1100 * 4.4. let the guest deal with it if the error propagation will reach it
1101 */
__mem_sharing_unshare_page(struct domain * d,unsigned long gfn,uint16_t flags)1102 int __mem_sharing_unshare_page(struct domain *d,
1103 unsigned long gfn,
1104 uint16_t flags)
1105 {
1106 p2m_type_t p2mt;
1107 mfn_t mfn;
1108 struct page_info *page, *old_page;
1109 int last_gfn;
1110 gfn_info_t *gfn_info = NULL;
1111
1112 mfn = get_gfn(d, gfn, &p2mt);
1113
1114 /* Has someone already unshared it? */
1115 if ( !p2m_is_shared(p2mt) ) {
1116 put_gfn(d, gfn);
1117 return 0;
1118 }
1119
1120 page = __grab_shared_page(mfn);
1121 if ( page == NULL )
1122 {
1123 gdprintk(XENLOG_ERR, "Domain p2m is shared, but page is not: "
1124 "%lx\n", gfn);
1125 BUG();
1126 }
1127
1128 gfn_info = rmap_retrieve(d->domain_id, gfn, page);
1129 if ( unlikely(gfn_info == NULL) )
1130 {
1131 gdprintk(XENLOG_ERR, "Could not find gfn_info for shared gfn: "
1132 "%lx\n", gfn);
1133 BUG();
1134 }
1135
1136 /* Do the accounting first. If anything fails below, we have bigger
1137 * bigger fish to fry. First, remove the gfn from the list. */
1138 last_gfn = rmap_has_one_entry(page);
1139 if ( last_gfn )
1140 {
1141 /* Clean up shared state. Get rid of the <domid, gfn> tuple
1142 * before destroying the rmap. */
1143 mem_sharing_gfn_destroy(page, d, gfn_info);
1144 page_sharing_dispose(page);
1145 page->sharing = NULL;
1146 atomic_dec(&nr_shared_mfns);
1147 }
1148 else
1149 atomic_dec(&nr_saved_mfns);
1150
1151 /* If the GFN is getting destroyed drop the references to MFN
1152 * (possibly freeing the page), and exit early */
1153 if ( flags & MEM_SHARING_DESTROY_GFN )
1154 {
1155 if ( !last_gfn )
1156 mem_sharing_gfn_destroy(page, d, gfn_info);
1157 put_page_and_type(page);
1158 mem_sharing_page_unlock(page);
1159 if ( last_gfn &&
1160 test_and_clear_bit(_PGC_allocated, &page->count_info) )
1161 put_page(page);
1162 put_gfn(d, gfn);
1163
1164 return 0;
1165 }
1166
1167 if ( last_gfn )
1168 {
1169 /* Making a page private atomically unlocks it */
1170 BUG_ON(page_make_private(d, page) != 0);
1171 goto private_page_found;
1172 }
1173
1174 old_page = page;
1175 page = alloc_domheap_page(d, 0);
1176 if ( !page )
1177 {
1178 /* Undo dec of nr_saved_mfns, as the retry will decrease again. */
1179 atomic_inc(&nr_saved_mfns);
1180 mem_sharing_page_unlock(old_page);
1181 put_gfn(d, gfn);
1182 /* Caller is responsible for placing an event
1183 * in the ring */
1184 return -ENOMEM;
1185 }
1186
1187 copy_domain_page(page_to_mfn(page), page_to_mfn(old_page));
1188
1189 BUG_ON(set_shared_p2m_entry(d, gfn, page_to_mfn(page)));
1190 mem_sharing_gfn_destroy(old_page, d, gfn_info);
1191 mem_sharing_page_unlock(old_page);
1192 put_page_and_type(old_page);
1193
1194 private_page_found:
1195 if ( p2m_change_type_one(d, gfn, p2m_ram_shared, p2m_ram_rw) )
1196 {
1197 gdprintk(XENLOG_ERR, "Could not change p2m type d %hu gfn %lx.\n",
1198 d->domain_id, gfn);
1199 BUG();
1200 }
1201
1202 /* Update m2p entry */
1203 set_gpfn_from_mfn(mfn_x(page_to_mfn(page)), gfn);
1204
1205 /* Now that the gfn<->mfn map is properly established,
1206 * marking dirty is feasible */
1207 paging_mark_dirty(d, page_to_mfn(page));
1208 /* We do not need to unlock a private page */
1209 put_gfn(d, gfn);
1210 return 0;
1211 }
1212
relinquish_shared_pages(struct domain * d)1213 int relinquish_shared_pages(struct domain *d)
1214 {
1215 int rc = 0;
1216 struct p2m_domain *p2m = p2m_get_hostp2m(d);
1217 unsigned long gfn, count = 0;
1218
1219 if ( p2m == NULL )
1220 return 0;
1221
1222 p2m_lock(p2m);
1223 for ( gfn = p2m->next_shared_gfn_to_relinquish;
1224 gfn <= p2m->max_mapped_pfn; gfn++ )
1225 {
1226 p2m_access_t a;
1227 p2m_type_t t;
1228 mfn_t mfn;
1229 int set_rc;
1230
1231 if ( atomic_read(&d->shr_pages) == 0 )
1232 break;
1233 mfn = p2m->get_entry(p2m, _gfn(gfn), &t, &a, 0, NULL, NULL);
1234 if ( mfn_valid(mfn) && (t == p2m_ram_shared) )
1235 {
1236 /* Does not fail with ENOMEM given the DESTROY flag */
1237 BUG_ON(__mem_sharing_unshare_page(d, gfn,
1238 MEM_SHARING_DESTROY_GFN));
1239 /* Clear out the p2m entry so no one else may try to
1240 * unshare. Must succeed: we just read the old entry and
1241 * we hold the p2m lock. */
1242 set_rc = p2m->set_entry(p2m, _gfn(gfn), _mfn(0), PAGE_ORDER_4K,
1243 p2m_invalid, p2m_access_rwx, -1);
1244 ASSERT(set_rc == 0);
1245 count += 0x10;
1246 }
1247 else
1248 ++count;
1249
1250 /* Preempt every 2MiB (shared) or 32MiB (unshared) - arbitrary. */
1251 if ( count >= 0x2000 )
1252 {
1253 if ( hypercall_preempt_check() )
1254 {
1255 p2m->next_shared_gfn_to_relinquish = gfn + 1;
1256 rc = -ERESTART;
1257 break;
1258 }
1259 count = 0;
1260 }
1261 }
1262
1263 p2m_unlock(p2m);
1264 return rc;
1265 }
1266
range_share(struct domain * d,struct domain * cd,struct mem_sharing_op_range * range)1267 static int range_share(struct domain *d, struct domain *cd,
1268 struct mem_sharing_op_range *range)
1269 {
1270 int rc = 0;
1271 shr_handle_t sh, ch;
1272 unsigned long start = range->opaque ?: range->first_gfn;
1273
1274 while ( range->last_gfn >= start )
1275 {
1276 /*
1277 * We only break out if we run out of memory as individual pages may
1278 * legitimately be unsharable and we just want to skip over those.
1279 */
1280 rc = nominate_page(d, _gfn(start), 0, &sh);
1281 if ( rc == -ENOMEM )
1282 break;
1283
1284 if ( !rc )
1285 {
1286 rc = nominate_page(cd, _gfn(start), 0, &ch);
1287 if ( rc == -ENOMEM )
1288 break;
1289
1290 if ( !rc )
1291 {
1292 /* If we get here this should be guaranteed to succeed. */
1293 rc = share_pages(d, _gfn(start), sh, cd, _gfn(start), ch);
1294 ASSERT(!rc);
1295 }
1296 }
1297
1298 /* Check for continuation if it's not the last iteration. */
1299 if ( range->last_gfn >= ++start && hypercall_preempt_check() )
1300 {
1301 rc = 1;
1302 break;
1303 }
1304 }
1305
1306 range->opaque = start;
1307
1308 /*
1309 * The last page may fail with -EINVAL, and for range sharing we don't
1310 * care about that.
1311 */
1312 if ( range->last_gfn < start && rc == -EINVAL )
1313 rc = 0;
1314
1315 return rc;
1316 }
1317
mem_sharing_memop(XEN_GUEST_HANDLE_PARAM (xen_mem_sharing_op_t)arg)1318 int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
1319 {
1320 int rc;
1321 xen_mem_sharing_op_t mso;
1322 struct domain *d;
1323
1324 rc = -EFAULT;
1325 if ( copy_from_guest(&mso, arg, 1) )
1326 return rc;
1327
1328 if ( mso.op == XENMEM_sharing_op_audit )
1329 return audit();
1330
1331 rc = rcu_lock_live_remote_domain_by_id(mso.domain, &d);
1332 if ( rc )
1333 return rc;
1334
1335 rc = xsm_mem_sharing(XSM_DM_PRIV, d);
1336 if ( rc )
1337 goto out;
1338
1339 /* Only HAP is supported */
1340 rc = -ENODEV;
1341 if ( !hap_enabled(d) || !d->arch.hvm_domain.mem_sharing_enabled )
1342 goto out;
1343
1344 switch ( mso.op )
1345 {
1346 case XENMEM_sharing_op_nominate_gfn:
1347 {
1348 shr_handle_t handle;
1349
1350 rc = -EINVAL;
1351 if ( !mem_sharing_enabled(d) )
1352 goto out;
1353
1354 rc = nominate_page(d, _gfn(mso.u.nominate.u.gfn), 0, &handle);
1355 mso.u.nominate.handle = handle;
1356 }
1357 break;
1358
1359 case XENMEM_sharing_op_nominate_gref:
1360 {
1361 grant_ref_t gref = mso.u.nominate.u.grant_ref;
1362 gfn_t gfn;
1363 shr_handle_t handle;
1364
1365 rc = -EINVAL;
1366 if ( !mem_sharing_enabled(d) )
1367 goto out;
1368 rc = mem_sharing_gref_to_gfn(d->grant_table, gref, &gfn, NULL);
1369 if ( rc < 0 )
1370 goto out;
1371
1372 rc = nominate_page(d, gfn, 3, &handle);
1373 mso.u.nominate.handle = handle;
1374 }
1375 break;
1376
1377 case XENMEM_sharing_op_share:
1378 {
1379 gfn_t sgfn, cgfn;
1380 struct domain *cd;
1381 shr_handle_t sh, ch;
1382
1383 rc = -EINVAL;
1384 if ( !mem_sharing_enabled(d) )
1385 goto out;
1386
1387 rc = rcu_lock_live_remote_domain_by_id(mso.u.share.client_domain,
1388 &cd);
1389 if ( rc )
1390 goto out;
1391
1392 rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd, mso.op);
1393 if ( rc )
1394 {
1395 rcu_unlock_domain(cd);
1396 goto out;
1397 }
1398
1399 if ( !mem_sharing_enabled(cd) )
1400 {
1401 rcu_unlock_domain(cd);
1402 rc = -EINVAL;
1403 goto out;
1404 }
1405
1406 if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mso.u.share.source_gfn) )
1407 {
1408 grant_ref_t gref = (grant_ref_t)
1409 (XENMEM_SHARING_OP_FIELD_GET_GREF(
1410 mso.u.share.source_gfn));
1411 rc = mem_sharing_gref_to_gfn(d->grant_table, gref, &sgfn,
1412 NULL);
1413 if ( rc < 0 )
1414 {
1415 rcu_unlock_domain(cd);
1416 goto out;
1417 }
1418 }
1419 else
1420 sgfn = _gfn(mso.u.share.source_gfn);
1421
1422 if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mso.u.share.client_gfn) )
1423 {
1424 grant_ref_t gref = (grant_ref_t)
1425 (XENMEM_SHARING_OP_FIELD_GET_GREF(
1426 mso.u.share.client_gfn));
1427 rc = mem_sharing_gref_to_gfn(cd->grant_table, gref, &cgfn,
1428 NULL);
1429 if ( rc < 0 )
1430 {
1431 rcu_unlock_domain(cd);
1432 goto out;
1433 }
1434 }
1435 else
1436 cgfn = _gfn(mso.u.share.client_gfn);
1437
1438 sh = mso.u.share.source_handle;
1439 ch = mso.u.share.client_handle;
1440
1441 rc = share_pages(d, sgfn, sh, cd, cgfn, ch);
1442
1443 rcu_unlock_domain(cd);
1444 }
1445 break;
1446
1447 case XENMEM_sharing_op_add_physmap:
1448 {
1449 unsigned long sgfn, cgfn;
1450 struct domain *cd;
1451 shr_handle_t sh;
1452
1453 rc = -EINVAL;
1454 if ( !mem_sharing_enabled(d) )
1455 goto out;
1456
1457 rc = rcu_lock_live_remote_domain_by_id(mso.u.share.client_domain,
1458 &cd);
1459 if ( rc )
1460 goto out;
1461
1462 rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd, mso.op);
1463 if ( rc )
1464 {
1465 rcu_unlock_domain(cd);
1466 goto out;
1467 }
1468
1469 if ( !mem_sharing_enabled(cd) )
1470 {
1471 rcu_unlock_domain(cd);
1472 rc = -EINVAL;
1473 goto out;
1474 }
1475
1476 if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mso.u.share.source_gfn) )
1477 {
1478 /* Cannot add a gref to the physmap */
1479 rcu_unlock_domain(cd);
1480 rc = -EINVAL;
1481 goto out;
1482 }
1483
1484 sgfn = mso.u.share.source_gfn;
1485 sh = mso.u.share.source_handle;
1486 cgfn = mso.u.share.client_gfn;
1487
1488 rc = mem_sharing_add_to_physmap(d, sgfn, sh, cd, cgfn);
1489
1490 rcu_unlock_domain(cd);
1491 }
1492 break;
1493
1494 case XENMEM_sharing_op_range_share:
1495 {
1496 unsigned long max_sgfn, max_cgfn;
1497 struct domain *cd;
1498
1499 rc = -EINVAL;
1500 if ( mso.u.range._pad[0] || mso.u.range._pad[1] ||
1501 mso.u.range._pad[2] )
1502 goto out;
1503
1504 /*
1505 * We use opaque for the hypercall continuation value.
1506 * Ideally the user sets this to 0 in the beginning but
1507 * there is no good way of enforcing that here, so we just check
1508 * that it's at least in range.
1509 */
1510 if ( mso.u.range.opaque &&
1511 (mso.u.range.opaque < mso.u.range.first_gfn ||
1512 mso.u.range.opaque > mso.u.range.last_gfn) )
1513 goto out;
1514
1515 if ( !mem_sharing_enabled(d) )
1516 goto out;
1517
1518 rc = rcu_lock_live_remote_domain_by_id(mso.u.range.client_domain,
1519 &cd);
1520 if ( rc )
1521 goto out;
1522
1523 /*
1524 * We reuse XENMEM_sharing_op_share XSM check here as this is
1525 * essentially the same concept repeated over multiple pages.
1526 */
1527 rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd,
1528 XENMEM_sharing_op_share);
1529 if ( rc )
1530 {
1531 rcu_unlock_domain(cd);
1532 goto out;
1533 }
1534
1535 if ( !mem_sharing_enabled(cd) )
1536 {
1537 rcu_unlock_domain(cd);
1538 rc = -EINVAL;
1539 goto out;
1540 }
1541
1542 /*
1543 * Sanity check only, the client should keep the domains paused for
1544 * the duration of this op.
1545 */
1546 if ( !atomic_read(&d->pause_count) ||
1547 !atomic_read(&cd->pause_count) )
1548 {
1549 rcu_unlock_domain(cd);
1550 rc = -EINVAL;
1551 goto out;
1552 }
1553
1554 max_sgfn = domain_get_maximum_gpfn(d);
1555 max_cgfn = domain_get_maximum_gpfn(cd);
1556
1557 if ( max_sgfn < mso.u.range.first_gfn ||
1558 max_sgfn < mso.u.range.last_gfn ||
1559 max_cgfn < mso.u.range.first_gfn ||
1560 max_cgfn < mso.u.range.last_gfn )
1561 {
1562 rcu_unlock_domain(cd);
1563 rc = -EINVAL;
1564 goto out;
1565 }
1566
1567 rc = range_share(d, cd, &mso.u.range);
1568 rcu_unlock_domain(cd);
1569
1570 if ( rc > 0 )
1571 {
1572 if ( __copy_to_guest(arg, &mso, 1) )
1573 rc = -EFAULT;
1574 else
1575 rc = hypercall_create_continuation(__HYPERVISOR_memory_op,
1576 "lh", XENMEM_sharing_op,
1577 arg);
1578 }
1579 else
1580 mso.u.range.opaque = 0;
1581 }
1582 break;
1583
1584 case XENMEM_sharing_op_debug_gfn:
1585 rc = debug_gfn(d, _gfn(mso.u.debug.u.gfn));
1586 break;
1587
1588 case XENMEM_sharing_op_debug_gref:
1589 rc = debug_gref(d, mso.u.debug.u.gref);
1590 break;
1591
1592 default:
1593 rc = -ENOSYS;
1594 break;
1595 }
1596
1597 if ( !rc && __copy_to_guest(arg, &mso, 1) )
1598 rc = -EFAULT;
1599
1600 out:
1601 rcu_unlock_domain(d);
1602 return rc;
1603 }
1604
mem_sharing_domctl(struct domain * d,struct xen_domctl_mem_sharing_op * mec)1605 int mem_sharing_domctl(struct domain *d, struct xen_domctl_mem_sharing_op *mec)
1606 {
1607 int rc;
1608
1609 /* Only HAP is supported */
1610 if ( !hap_enabled(d) )
1611 return -ENODEV;
1612
1613 switch(mec->op)
1614 {
1615 case XEN_DOMCTL_MEM_SHARING_CONTROL:
1616 {
1617 rc = 0;
1618 if ( unlikely(need_iommu(d) && mec->u.enable) )
1619 rc = -EXDEV;
1620 else
1621 d->arch.hvm_domain.mem_sharing_enabled = mec->u.enable;
1622 }
1623 break;
1624
1625 default:
1626 rc = -ENOSYS;
1627 }
1628
1629 return rc;
1630 }
1631
mem_sharing_init(void)1632 void __init mem_sharing_init(void)
1633 {
1634 printk("Initing memory sharing.\n");
1635 #if MEM_SHARING_AUDIT
1636 spin_lock_init(&shr_audit_lock);
1637 INIT_LIST_HEAD(&shr_audit_list);
1638 #endif
1639 }
1640
1641