1 /******************************************************************************
2  * arch/x86/mm/mem_sharing.c
3  *
4  * Memory sharing support.
5  *
6  * Copyright (c) 2011 GridCentric, Inc. (Adin Scannell & Andres Lagar-Cavilla)
7  * Copyright (c) 2009 Citrix Systems, Inc. (Grzegorz Milos)
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; If not, see <http://www.gnu.org/licenses/>.
21  */
22 
23 #include <xen/types.h>
24 #include <xen/domain_page.h>
25 #include <xen/spinlock.h>
26 #include <xen/rwlock.h>
27 #include <xen/mm.h>
28 #include <xen/grant_table.h>
29 #include <xen/sched.h>
30 #include <xen/rcupdate.h>
31 #include <xen/guest_access.h>
32 #include <xen/vm_event.h>
33 #include <asm/page.h>
34 #include <asm/string.h>
35 #include <asm/p2m.h>
36 #include <asm/altp2m.h>
37 #include <asm/atomic.h>
38 #include <asm/event.h>
39 #include <xsm/xsm.h>
40 
41 #include "mm-locks.h"
42 
43 static shr_handle_t next_handle = 1;
44 
45 typedef struct pg_lock_data {
46     int mm_unlock_level;
47     unsigned short recurse_count;
48 } pg_lock_data_t;
49 
50 static DEFINE_PER_CPU(pg_lock_data_t, __pld);
51 
52 #define MEM_SHARING_DEBUG(_f, _a...)                                  \
53     debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__, ##_a)
54 
55 /* Reverse map defines */
56 #define RMAP_HASHTAB_ORDER  0
57 #define RMAP_HASHTAB_SIZE   \
58         ((PAGE_SIZE << RMAP_HASHTAB_ORDER) / sizeof(struct list_head))
59 #define RMAP_USES_HASHTAB(page) \
60         ((page)->sharing->hash_table.flag == NULL)
61 #define RMAP_HEAVY_SHARED_PAGE   RMAP_HASHTAB_SIZE
62 /* A bit of hysteresis. We don't want to be mutating between list and hash
63  * table constantly. */
64 #define RMAP_LIGHT_SHARED_PAGE   (RMAP_HEAVY_SHARED_PAGE >> 2)
65 
66 #if MEM_SHARING_AUDIT
67 
68 static struct list_head shr_audit_list;
69 static spinlock_t shr_audit_lock;
70 static DEFINE_RCU_READ_LOCK(shr_audit_read_lock);
71 
72 /* RCU delayed free of audit list entry */
_free_pg_shared_info(struct rcu_head * head)73 static void _free_pg_shared_info(struct rcu_head *head)
74 {
75     xfree(container_of(head, struct page_sharing_info, rcu_head));
76 }
77 
audit_add_list(struct page_info * page)78 static inline void audit_add_list(struct page_info *page)
79 {
80     INIT_LIST_HEAD(&page->sharing->entry);
81     spin_lock(&shr_audit_lock);
82     list_add_rcu(&page->sharing->entry, &shr_audit_list);
83     spin_unlock(&shr_audit_lock);
84 }
85 
86 /* Removes from the audit list and cleans up the page sharing metadata. */
page_sharing_dispose(struct page_info * page)87 static inline void page_sharing_dispose(struct page_info *page)
88 {
89     /* Unlikely given our thresholds, but we should be careful. */
90     if ( unlikely(RMAP_USES_HASHTAB(page)) )
91         free_xenheap_pages(page->sharing->hash_table.bucket,
92                             RMAP_HASHTAB_ORDER);
93 
94     spin_lock(&shr_audit_lock);
95     list_del_rcu(&page->sharing->entry);
96     spin_unlock(&shr_audit_lock);
97     INIT_RCU_HEAD(&page->sharing->rcu_head);
98     call_rcu(&page->sharing->rcu_head, _free_pg_shared_info);
99 }
100 
101 #else
102 
103 #define audit_add_list(p)  ((void)0)
page_sharing_dispose(struct page_info * page)104 static inline void page_sharing_dispose(struct page_info *page)
105 {
106     /* Unlikely given our thresholds, but we should be careful. */
107     if ( unlikely(RMAP_USES_HASHTAB(page)) )
108         free_xenheap_pages(page->sharing->hash_table.bucket,
109                             RMAP_HASHTAB_ORDER);
110     xfree(page->sharing);
111 }
112 
113 #endif /* MEM_SHARING_AUDIT */
114 
mem_sharing_page_lock(struct page_info * pg)115 static inline int mem_sharing_page_lock(struct page_info *pg)
116 {
117     int rc;
118     pg_lock_data_t *pld = &(this_cpu(__pld));
119 
120     page_sharing_mm_pre_lock();
121     rc = page_lock(pg);
122     if ( rc )
123     {
124         preempt_disable();
125         page_sharing_mm_post_lock(&pld->mm_unlock_level,
126                                   &pld->recurse_count);
127     }
128     return rc;
129 }
130 
mem_sharing_page_unlock(struct page_info * pg)131 static inline void mem_sharing_page_unlock(struct page_info *pg)
132 {
133     pg_lock_data_t *pld = &(this_cpu(__pld));
134 
135     page_sharing_mm_unlock(pld->mm_unlock_level,
136                            &pld->recurse_count);
137     preempt_enable();
138     page_unlock(pg);
139 }
140 
get_next_handle(void)141 static inline shr_handle_t get_next_handle(void)
142 {
143     /* Get the next handle get_page style */
144     uint64_t x, y = next_handle;
145     do {
146         x = y;
147     }
148     while ( (y = cmpxchg(&next_handle, x, x + 1)) != x );
149     return x + 1;
150 }
151 
152 #define mem_sharing_enabled(d) \
153     (is_hvm_domain(d) && (d)->arch.hvm_domain.mem_sharing_enabled)
154 
155 #undef mfn_to_page
156 #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
157 #undef page_to_mfn
158 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
159 
160 static atomic_t nr_saved_mfns   = ATOMIC_INIT(0);
161 static atomic_t nr_shared_mfns  = ATOMIC_INIT(0);
162 
163 /** Reverse map **/
164 /* Every shared frame keeps a reverse map (rmap) of <domain, gfn> tuples that
165  * this shared frame backs. For pages with a low degree of sharing, a O(n)
166  * search linked list is good enough. For pages with higher degree of sharing,
167  * we use a hash table instead. */
168 
169 typedef struct gfn_info
170 {
171     unsigned long gfn;
172     domid_t domain;
173     struct list_head list;
174 } gfn_info_t;
175 
176 static inline void
rmap_init(struct page_info * page)177 rmap_init(struct page_info *page)
178 {
179     /* We always start off as a doubly linked list. */
180     INIT_LIST_HEAD(&page->sharing->gfns);
181 }
182 
183 /* Exceedingly simple "hash function" */
184 #define HASH(domain, gfn)       \
185     (((gfn) + (domain)) % RMAP_HASHTAB_SIZE)
186 
187 /* Conversions. Tuned by the thresholds. Should only happen twice
188  * (once each) during the lifetime of a shared page */
189 static inline int
rmap_list_to_hash_table(struct page_info * page)190 rmap_list_to_hash_table(struct page_info *page)
191 {
192     unsigned int i;
193     struct list_head *pos, *tmp, *b =
194         alloc_xenheap_pages(RMAP_HASHTAB_ORDER, 0);
195 
196     if ( b == NULL )
197         return -ENOMEM;
198 
199     for ( i = 0; i < RMAP_HASHTAB_SIZE; i++ )
200         INIT_LIST_HEAD(b + i);
201 
202     list_for_each_safe(pos, tmp, &page->sharing->gfns)
203     {
204         gfn_info_t *gfn_info = list_entry(pos, gfn_info_t, list);
205         struct list_head *bucket = b + HASH(gfn_info->domain, gfn_info->gfn);
206         list_del(pos);
207         list_add(pos, bucket);
208     }
209 
210     page->sharing->hash_table.bucket = b;
211     page->sharing->hash_table.flag   = NULL;
212 
213     return 0;
214 }
215 
216 static inline void
rmap_hash_table_to_list(struct page_info * page)217 rmap_hash_table_to_list(struct page_info *page)
218 {
219     unsigned int i;
220     struct list_head *bucket = page->sharing->hash_table.bucket;
221 
222     INIT_LIST_HEAD(&page->sharing->gfns);
223 
224     for ( i = 0; i < RMAP_HASHTAB_SIZE; i++ )
225     {
226         struct list_head *pos, *tmp, *head = bucket + i;
227         list_for_each_safe(pos, tmp, head)
228         {
229             list_del(pos);
230             list_add(pos, &page->sharing->gfns);
231         }
232     }
233 
234     free_xenheap_pages(bucket, RMAP_HASHTAB_ORDER);
235 }
236 
237 /* Generic accessors to the rmap */
238 static inline unsigned long
rmap_count(struct page_info * pg)239 rmap_count(struct page_info *pg)
240 {
241     unsigned long count;
242     unsigned long t = read_atomic(&pg->u.inuse.type_info);
243     count = t & PGT_count_mask;
244     if ( t & PGT_locked )
245         count--;
246     return count;
247 }
248 
249 /* The page type count is always decreased after removing from the rmap.
250  * Use a convert flag to avoid mutating the rmap if in the middle of an
251  * iterator, or if the page will be soon destroyed anyways. */
252 static inline void
rmap_del(gfn_info_t * gfn_info,struct page_info * page,int convert)253 rmap_del(gfn_info_t *gfn_info, struct page_info *page, int convert)
254 {
255     if ( RMAP_USES_HASHTAB(page) && convert &&
256          (rmap_count(page) <= RMAP_LIGHT_SHARED_PAGE) )
257         rmap_hash_table_to_list(page);
258 
259     /* Regardless of rmap type, same removal operation */
260     list_del(&gfn_info->list);
261 }
262 
263 /* The page type count is always increased before adding to the rmap. */
264 static inline void
rmap_add(gfn_info_t * gfn_info,struct page_info * page)265 rmap_add(gfn_info_t *gfn_info, struct page_info *page)
266 {
267     struct list_head *head;
268 
269     if ( !RMAP_USES_HASHTAB(page) &&
270          (rmap_count(page) >= RMAP_HEAVY_SHARED_PAGE) )
271         /* The conversion may fail with ENOMEM. We'll be less efficient,
272          * but no reason to panic. */
273         (void)rmap_list_to_hash_table(page);
274 
275     head = (RMAP_USES_HASHTAB(page)) ?
276         page->sharing->hash_table.bucket +
277                             HASH(gfn_info->domain, gfn_info->gfn) :
278         &page->sharing->gfns;
279 
280     INIT_LIST_HEAD(&gfn_info->list);
281     list_add(&gfn_info->list, head);
282 }
283 
284 static inline gfn_info_t *
rmap_retrieve(uint16_t domain_id,unsigned long gfn,struct page_info * page)285 rmap_retrieve(uint16_t domain_id, unsigned long gfn,
286                             struct page_info *page)
287 {
288     gfn_info_t *gfn_info;
289     struct list_head *le, *head;
290 
291     head = (RMAP_USES_HASHTAB(page)) ?
292         page->sharing->hash_table.bucket + HASH(domain_id, gfn) :
293         &page->sharing->gfns;
294 
295     list_for_each(le, head)
296     {
297         gfn_info = list_entry(le, gfn_info_t, list);
298         if ( (gfn_info->gfn == gfn) && (gfn_info->domain == domain_id) )
299             return gfn_info;
300     }
301 
302     /* Nothing was found */
303     return NULL;
304 }
305 
306 /* Returns true if the rmap has only one entry. O(1) complexity. */
rmap_has_one_entry(struct page_info * page)307 static inline int rmap_has_one_entry(struct page_info *page)
308 {
309     return (rmap_count(page) == 1);
310 }
311 
312 /* Returns true if the rmap has any entries. O(1) complexity. */
rmap_has_entries(struct page_info * page)313 static inline int rmap_has_entries(struct page_info *page)
314 {
315     return (rmap_count(page) != 0);
316 }
317 
318 /* The iterator hides the details of how the rmap is implemented. This
319  * involves splitting the list_for_each_safe macro into two steps. */
320 struct rmap_iterator {
321     struct list_head *curr;
322     struct list_head *next;
323     unsigned int bucket;
324 };
325 
326 static inline void
rmap_seed_iterator(struct page_info * page,struct rmap_iterator * ri)327 rmap_seed_iterator(struct page_info *page, struct rmap_iterator *ri)
328 {
329     ri->curr = (RMAP_USES_HASHTAB(page)) ?
330                 page->sharing->hash_table.bucket :
331                 &page->sharing->gfns;
332     ri->next = ri->curr->next;
333     ri->bucket = 0;
334 }
335 
336 static inline gfn_info_t *
rmap_iterate(struct page_info * page,struct rmap_iterator * ri)337 rmap_iterate(struct page_info *page, struct rmap_iterator *ri)
338 {
339     struct list_head *head = (RMAP_USES_HASHTAB(page)) ?
340                 page->sharing->hash_table.bucket + ri->bucket :
341                 &page->sharing->gfns;
342 
343 retry:
344     if ( ri->next == head)
345     {
346         if ( RMAP_USES_HASHTAB(page) )
347         {
348             ri->bucket++;
349             if ( ri->bucket >= RMAP_HASHTAB_SIZE )
350                 /* No more hash table buckets */
351                 return NULL;
352             head = page->sharing->hash_table.bucket + ri->bucket;
353             ri->curr = head;
354             ri->next = ri->curr->next;
355             goto retry;
356         } else
357             /* List exhausted */
358             return NULL;
359     }
360 
361     ri->curr = ri->next;
362     ri->next = ri->curr->next;
363 
364     return list_entry(ri->curr, gfn_info_t, list);
365 }
366 
mem_sharing_gfn_alloc(struct page_info * page,struct domain * d,unsigned long gfn)367 static inline gfn_info_t *mem_sharing_gfn_alloc(struct page_info *page,
368                                                 struct domain *d,
369                                                 unsigned long gfn)
370 {
371     gfn_info_t *gfn_info = xmalloc(gfn_info_t);
372 
373     if ( gfn_info == NULL )
374         return NULL;
375 
376     gfn_info->gfn = gfn;
377     gfn_info->domain = d->domain_id;
378 
379     rmap_add(gfn_info, page);
380 
381     /* Increment our number of shared pges. */
382     atomic_inc(&d->shr_pages);
383 
384     return gfn_info;
385 }
386 
mem_sharing_gfn_destroy(struct page_info * page,struct domain * d,gfn_info_t * gfn_info)387 static inline void mem_sharing_gfn_destroy(struct page_info *page,
388                                            struct domain *d,
389                                            gfn_info_t *gfn_info)
390 {
391     /* Decrement the number of pages. */
392     atomic_dec(&d->shr_pages);
393 
394     /* Free the gfn_info structure. */
395     rmap_del(gfn_info, page, 1);
396     xfree(gfn_info);
397 }
398 
mem_sharing_lookup(unsigned long mfn)399 static struct page_info* mem_sharing_lookup(unsigned long mfn)
400 {
401     if ( mfn_valid(_mfn(mfn)) )
402     {
403         struct page_info* page = mfn_to_page(_mfn(mfn));
404         if ( page_get_owner(page) == dom_cow )
405         {
406             /* Count has to be at least two, because we're called
407              * with the mfn locked (1) and this is supposed to be
408              * a shared page (1). */
409             unsigned long t = read_atomic(&page->u.inuse.type_info);
410             ASSERT((t & PGT_type_mask) == PGT_shared_page);
411             ASSERT((t & PGT_count_mask) >= 2);
412             ASSERT(get_gpfn_from_mfn(mfn) == SHARED_M2P_ENTRY);
413             return page;
414         }
415     }
416 
417     return NULL;
418 }
419 
audit(void)420 static int audit(void)
421 {
422 #if MEM_SHARING_AUDIT
423     int errors = 0;
424     unsigned long count_expected;
425     unsigned long count_found = 0;
426     struct list_head *ae;
427 
428     count_expected = atomic_read(&nr_shared_mfns);
429 
430     rcu_read_lock(&shr_audit_read_lock);
431 
432     list_for_each_rcu(ae, &shr_audit_list)
433     {
434         struct page_sharing_info *pg_shared_info;
435         unsigned long nr_gfns = 0;
436         struct page_info *pg;
437         mfn_t mfn;
438         gfn_info_t *g;
439         struct rmap_iterator ri;
440 
441         pg_shared_info = list_entry(ae, struct page_sharing_info, entry);
442         pg = pg_shared_info->pg;
443         mfn = page_to_mfn(pg);
444 
445         /* If we can't lock it, it's definitely not a shared page */
446         if ( !mem_sharing_page_lock(pg) )
447         {
448            MEM_SHARING_DEBUG("mfn %lx in audit list, but cannot be locked (%lx)!\n",
449                               mfn_x(mfn), pg->u.inuse.type_info);
450            errors++;
451            continue;
452         }
453 
454         /* Check if the MFN has correct type, owner and handle. */
455         if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_shared_page )
456         {
457            MEM_SHARING_DEBUG("mfn %lx in audit list, but not PGT_shared_page (%lx)!\n",
458                               mfn_x(mfn), pg->u.inuse.type_info & PGT_type_mask);
459            errors++;
460            continue;
461         }
462 
463         /* Check the page owner. */
464         if ( page_get_owner(pg) != dom_cow )
465         {
466            MEM_SHARING_DEBUG("mfn %lx shared, but wrong owner (%hu)!\n",
467                              mfn_x(mfn), page_get_owner(pg)->domain_id);
468            errors++;
469         }
470 
471         /* Check the m2p entry */
472         if ( get_gpfn_from_mfn(mfn_x(mfn)) != SHARED_M2P_ENTRY )
473         {
474            MEM_SHARING_DEBUG("mfn %lx shared, but wrong m2p entry (%lx)!\n",
475                              mfn_x(mfn), get_gpfn_from_mfn(mfn_x(mfn)));
476            errors++;
477         }
478 
479         /* Check we have a list */
480         if ( (!pg->sharing) || !rmap_has_entries(pg) )
481         {
482            MEM_SHARING_DEBUG("mfn %lx shared, but empty gfn list!\n",
483                              mfn_x(mfn));
484            errors++;
485            continue;
486         }
487 
488         /* We've found a page that is shared */
489         count_found++;
490 
491         /* Check if all GFNs map to the MFN, and the p2m types */
492         rmap_seed_iterator(pg, &ri);
493         while ( (g = rmap_iterate(pg, &ri)) != NULL )
494         {
495             struct domain *d;
496             p2m_type_t t;
497             mfn_t o_mfn;
498 
499             d = get_domain_by_id(g->domain);
500             if ( d == NULL )
501             {
502                 MEM_SHARING_DEBUG("Unknown dom: %hu, for PFN=%lx, MFN=%lx\n",
503                                   g->domain, g->gfn, mfn_x(mfn));
504                 errors++;
505                 continue;
506             }
507             o_mfn = get_gfn_query_unlocked(d, g->gfn, &t);
508             if ( mfn_x(o_mfn) != mfn_x(mfn) )
509             {
510                 MEM_SHARING_DEBUG("Incorrect P2M for d=%hu, PFN=%lx."
511                                   "Expecting MFN=%lx, got %lx\n",
512                                   g->domain, g->gfn, mfn_x(mfn), mfn_x(o_mfn));
513                 errors++;
514             }
515             if ( t != p2m_ram_shared )
516             {
517                 MEM_SHARING_DEBUG("Incorrect P2M type for d=%hu, PFN=%lx MFN=%lx."
518                                   "Expecting t=%d, got %d\n",
519                                   g->domain, g->gfn, mfn_x(mfn), p2m_ram_shared, t);
520                 errors++;
521             }
522             put_domain(d);
523             nr_gfns++;
524         }
525         /* The type count has an extra ref because we have locked the page */
526         if ( (nr_gfns + 1) != (pg->u.inuse.type_info & PGT_count_mask) )
527         {
528             MEM_SHARING_DEBUG("Mismatched counts for MFN=%lx."
529                               "nr_gfns in list %lu, in type_info %lx\n",
530                               mfn_x(mfn), nr_gfns,
531                               (pg->u.inuse.type_info & PGT_count_mask));
532             errors++;
533         }
534 
535         mem_sharing_page_unlock(pg);
536     }
537 
538     rcu_read_unlock(&shr_audit_read_lock);
539 
540     if ( count_found != count_expected )
541     {
542         MEM_SHARING_DEBUG("Expected %ld shared mfns, found %ld.",
543                           count_expected, count_found);
544         errors++;
545     }
546 
547     return errors;
548 #else
549     return -EOPNOTSUPP;
550 #endif
551 }
552 
mem_sharing_notify_enomem(struct domain * d,unsigned long gfn,bool_t allow_sleep)553 int mem_sharing_notify_enomem(struct domain *d, unsigned long gfn,
554                                 bool_t allow_sleep)
555 {
556     struct vcpu *v = current;
557     int rc;
558     vm_event_request_t req = {
559         .reason = VM_EVENT_REASON_MEM_SHARING,
560         .vcpu_id = v->vcpu_id,
561         .u.mem_sharing.gfn = gfn,
562         .u.mem_sharing.p2mt = p2m_ram_shared
563     };
564 
565     if ( (rc = __vm_event_claim_slot(d,
566                         d->vm_event_share, allow_sleep)) < 0 )
567         return rc;
568 
569     if ( v->domain == d )
570     {
571         req.flags = VM_EVENT_FLAG_VCPU_PAUSED;
572         vm_event_vcpu_pause(v);
573     }
574 
575     vm_event_put_request(d, d->vm_event_share, &req);
576 
577     return 0;
578 }
579 
mem_sharing_get_nr_saved_mfns(void)580 unsigned int mem_sharing_get_nr_saved_mfns(void)
581 {
582     return ((unsigned int)atomic_read(&nr_saved_mfns));
583 }
584 
mem_sharing_get_nr_shared_mfns(void)585 unsigned int mem_sharing_get_nr_shared_mfns(void)
586 {
587     return (unsigned int)atomic_read(&nr_shared_mfns);
588 }
589 
590 /* Functions that change a page's type and ownership */
page_make_sharable(struct domain * d,struct page_info * page,int expected_refcnt)591 static int page_make_sharable(struct domain *d,
592                        struct page_info *page,
593                        int expected_refcnt)
594 {
595     bool_t drop_dom_ref;
596 
597     spin_lock(&d->page_alloc_lock);
598 
599     if ( d->is_dying )
600     {
601         spin_unlock(&d->page_alloc_lock);
602         return -EBUSY;
603     }
604 
605     /* Change page type and count atomically */
606     if ( !get_page_and_type(page, d, PGT_shared_page) )
607     {
608         spin_unlock(&d->page_alloc_lock);
609         return -EINVAL;
610     }
611 
612     /* Check it wasn't already sharable and undo if it was */
613     if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
614     {
615         spin_unlock(&d->page_alloc_lock);
616         put_page_and_type(page);
617         return -EEXIST;
618     }
619 
620     /* Check if the ref count is 2. The first from PGC_allocated, and
621      * the second from get_page_and_type at the top of this function */
622     if ( page->count_info != (PGC_allocated | (2 + expected_refcnt)) )
623     {
624         spin_unlock(&d->page_alloc_lock);
625         /* Return type count back to zero */
626         put_page_and_type(page);
627         return -E2BIG;
628     }
629 
630     page_set_owner(page, dom_cow);
631     drop_dom_ref = !domain_adjust_tot_pages(d, -1);
632     page_list_del(page, &d->page_list);
633     spin_unlock(&d->page_alloc_lock);
634 
635     if ( drop_dom_ref )
636         put_domain(d);
637     return 0;
638 }
639 
page_make_private(struct domain * d,struct page_info * page)640 static int page_make_private(struct domain *d, struct page_info *page)
641 {
642     unsigned long expected_type;
643 
644     if ( !get_page(page, dom_cow) )
645         return -EINVAL;
646 
647     spin_lock(&d->page_alloc_lock);
648 
649     if ( d->is_dying )
650     {
651         spin_unlock(&d->page_alloc_lock);
652         put_page(page);
653         return -EBUSY;
654     }
655 
656     /* We can only change the type if count is one */
657     /* Because we are locking pages individually, we need to drop
658      * the lock here, while the page is typed. We cannot risk the
659      * race of page_unlock and then put_page_type. */
660     expected_type = (PGT_shared_page | PGT_validated | PGT_locked | 2);
661     if ( page->u.inuse.type_info != expected_type )
662     {
663         spin_unlock(&d->page_alloc_lock);
664         put_page(page);
665         return -EEXIST;
666     }
667 
668     /* Drop the final typecount */
669     put_page_and_type(page);
670 
671     /* Now that we've dropped the type, we can unlock */
672     mem_sharing_page_unlock(page);
673 
674     /* Change the owner */
675     ASSERT(page_get_owner(page) == dom_cow);
676     page_set_owner(page, d);
677 
678     if ( domain_adjust_tot_pages(d, 1) == 1 )
679         get_knownalive_domain(d);
680     page_list_add_tail(page, &d->page_list);
681     spin_unlock(&d->page_alloc_lock);
682 
683     put_page(page);
684 
685     return 0;
686 }
687 
__grab_shared_page(mfn_t mfn)688 static inline struct page_info *__grab_shared_page(mfn_t mfn)
689 {
690     struct page_info *pg = NULL;
691 
692     if ( !mfn_valid(mfn) )
693         return NULL;
694     pg = mfn_to_page(mfn);
695 
696     /* If the page is not validated we can't lock it, and if it's
697      * not validated it's obviously not shared. */
698     if ( !mem_sharing_page_lock(pg) )
699         return NULL;
700 
701     if ( mem_sharing_lookup(mfn_x(mfn)) == NULL )
702     {
703         mem_sharing_page_unlock(pg);
704         return NULL;
705     }
706 
707     return pg;
708 }
709 
debug_mfn(mfn_t mfn)710 static int debug_mfn(mfn_t mfn)
711 {
712     struct page_info *page;
713     int num_refs;
714 
715     if ( (page = __grab_shared_page(mfn)) == NULL)
716     {
717         gdprintk(XENLOG_ERR, "Invalid MFN=%lx\n", mfn_x(mfn));
718         return -EINVAL;
719     }
720 
721     MEM_SHARING_DEBUG(
722             "Debug page: MFN=%lx is ci=%lx, ti=%lx, owner_id=%d\n",
723             mfn_x(page_to_mfn(page)),
724             page->count_info,
725             page->u.inuse.type_info,
726             page_get_owner(page)->domain_id);
727 
728     /* -1 because the page is locked and that's an additional type ref */
729     num_refs = ((int) (page->u.inuse.type_info & PGT_count_mask)) - 1;
730     mem_sharing_page_unlock(page);
731     return num_refs;
732 }
733 
debug_gfn(struct domain * d,gfn_t gfn)734 static int debug_gfn(struct domain *d, gfn_t gfn)
735 {
736     p2m_type_t p2mt;
737     mfn_t mfn;
738     int num_refs;
739 
740     mfn = get_gfn_query(d, gfn_x(gfn), &p2mt);
741 
742     MEM_SHARING_DEBUG("Debug for dom%d, gfn=%" PRI_gfn "\n",
743                       d->domain_id, gfn_x(gfn));
744     num_refs = debug_mfn(mfn);
745     put_gfn(d, gfn_x(gfn));
746 
747     return num_refs;
748 }
749 
debug_gref(struct domain * d,grant_ref_t ref)750 static int debug_gref(struct domain *d, grant_ref_t ref)
751 {
752     int rc;
753     uint16_t status;
754     gfn_t gfn;
755 
756     rc = mem_sharing_gref_to_gfn(d->grant_table, ref, &gfn, &status);
757     if ( rc )
758     {
759         MEM_SHARING_DEBUG("Asked to debug [dom=%d,gref=%u]: error %d.\n",
760                           d->domain_id, ref, rc);
761         return rc;
762     }
763 
764     MEM_SHARING_DEBUG(
765             "==> Grant [dom=%d,ref=%d], status=%x. ",
766             d->domain_id, ref, status);
767 
768     return debug_gfn(d, gfn);
769 }
770 
nominate_page(struct domain * d,gfn_t gfn,int expected_refcnt,shr_handle_t * phandle)771 static int nominate_page(struct domain *d, gfn_t gfn,
772                          int expected_refcnt, shr_handle_t *phandle)
773 {
774     struct p2m_domain *hp2m = p2m_get_hostp2m(d);
775     p2m_type_t p2mt;
776     p2m_access_t p2ma;
777     mfn_t mfn;
778     struct page_info *page = NULL; /* gcc... */
779     int ret;
780 
781     *phandle = 0UL;
782 
783     mfn = get_gfn_type_access(hp2m, gfn_x(gfn), &p2mt, &p2ma, 0, NULL);
784 
785     /* Check if mfn is valid */
786     ret = -EINVAL;
787     if ( !mfn_valid(mfn) )
788         goto out;
789 
790     /* Return the handle if the page is already shared */
791     if ( p2m_is_shared(p2mt) ) {
792         struct page_info *pg = __grab_shared_page(mfn);
793         if ( !pg )
794         {
795             gprintk(XENLOG_ERR,
796                     "Shared p2m entry gfn %" PRI_gfn ", but could not grab mfn %" PRI_mfn " dom%d\n",
797                     gfn_x(gfn), mfn_x(mfn), d->domain_id);
798             BUG();
799         }
800         *phandle = pg->sharing->handle;
801         ret = 0;
802         mem_sharing_page_unlock(pg);
803         goto out;
804     }
805 
806     /* Check p2m type */
807     if ( !p2m_is_sharable(p2mt) )
808         goto out;
809 
810     /* Check if there are mem_access/remapped altp2m entries for this page */
811     if ( altp2m_active(d) )
812     {
813         unsigned int i;
814         struct p2m_domain *ap2m;
815         mfn_t amfn;
816         p2m_type_t ap2mt;
817         p2m_access_t ap2ma;
818 
819         altp2m_list_lock(d);
820 
821         for ( i = 0; i < MAX_ALTP2M; i++ )
822         {
823             ap2m = d->arch.altp2m_p2m[i];
824             if ( !ap2m )
825                 continue;
826 
827             amfn = get_gfn_type_access(ap2m, gfn_x(gfn), &ap2mt, &ap2ma, 0, NULL);
828             if ( mfn_valid(amfn) && (!mfn_eq(amfn, mfn) || ap2ma != p2ma) )
829             {
830                 altp2m_list_unlock(d);
831                 goto out;
832             }
833         }
834 
835         altp2m_list_unlock(d);
836     }
837 
838     /* Try to convert the mfn to the sharable type */
839     page = mfn_to_page(mfn);
840     ret = page_make_sharable(d, page, expected_refcnt);
841     if ( ret )
842         goto out;
843 
844     /* Now that the page is validated, we can lock it. There is no
845      * race because we're holding the p2m entry, so no one else
846      * could be nominating this gfn */
847     ret = -ENOENT;
848     if ( !mem_sharing_page_lock(page) )
849         goto out;
850 
851     /* Initialize the shared state */
852     ret = -ENOMEM;
853     if ( (page->sharing =
854             xmalloc(struct page_sharing_info)) == NULL )
855     {
856         /* Making a page private atomically unlocks it */
857         BUG_ON(page_make_private(d, page) != 0);
858         goto out;
859     }
860     page->sharing->pg = page;
861     rmap_init(page);
862 
863     /* Create the handle */
864     page->sharing->handle = get_next_handle();
865 
866     /* Create the local gfn info */
867     if ( mem_sharing_gfn_alloc(page, d, gfn_x(gfn)) == NULL )
868     {
869         xfree(page->sharing);
870         page->sharing = NULL;
871         BUG_ON(page_make_private(d, page) != 0);
872         goto out;
873     }
874 
875     /* Change the p2m type, should never fail with p2m locked. */
876     BUG_ON(p2m_change_type_one(d, gfn_x(gfn), p2mt, p2m_ram_shared));
877 
878     /* Account for this page. */
879     atomic_inc(&nr_shared_mfns);
880 
881     /* Update m2p entry to SHARED_M2P_ENTRY */
882     set_gpfn_from_mfn(mfn_x(mfn), SHARED_M2P_ENTRY);
883 
884     *phandle = page->sharing->handle;
885     audit_add_list(page);
886     mem_sharing_page_unlock(page);
887     ret = 0;
888 
889 out:
890     put_gfn(d, gfn_x(gfn));
891     return ret;
892 }
893 
share_pages(struct domain * sd,gfn_t sgfn,shr_handle_t sh,struct domain * cd,gfn_t cgfn,shr_handle_t ch)894 static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
895                        struct domain *cd, gfn_t cgfn, shr_handle_t ch)
896 {
897     struct page_info *spage, *cpage, *firstpg, *secondpg;
898     gfn_info_t *gfn;
899     struct domain *d;
900     int ret = -EINVAL;
901     mfn_t smfn, cmfn;
902     p2m_type_t smfn_type, cmfn_type;
903     struct two_gfns tg;
904     struct rmap_iterator ri;
905 
906     get_two_gfns(sd, gfn_x(sgfn), &smfn_type, NULL, &smfn,
907                  cd, gfn_x(cgfn), &cmfn_type, NULL, &cmfn,
908                  0, &tg);
909 
910     /* This tricky business is to avoid two callers deadlocking if
911      * grabbing pages in opposite client/source order */
912     if( mfn_x(smfn) == mfn_x(cmfn) )
913     {
914         /* The pages are already the same.  We could return some
915          * kind of error here, but no matter how you look at it,
916          * the pages are already 'shared'.  It possibly represents
917          * a big problem somewhere else, but as far as sharing is
918          * concerned: great success! */
919         ret = 0;
920         goto err_out;
921     }
922     else if ( mfn_x(smfn) < mfn_x(cmfn) )
923     {
924         ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
925         spage = firstpg = __grab_shared_page(smfn);
926         if ( spage == NULL )
927             goto err_out;
928 
929         ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
930         cpage = secondpg = __grab_shared_page(cmfn);
931         if ( cpage == NULL )
932         {
933             mem_sharing_page_unlock(spage);
934             goto err_out;
935         }
936     } else {
937         ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
938         cpage = firstpg = __grab_shared_page(cmfn);
939         if ( cpage == NULL )
940             goto err_out;
941 
942         ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
943         spage = secondpg = __grab_shared_page(smfn);
944         if ( spage == NULL )
945         {
946             mem_sharing_page_unlock(cpage);
947             goto err_out;
948         }
949     }
950 
951     ASSERT(smfn_type == p2m_ram_shared);
952     ASSERT(cmfn_type == p2m_ram_shared);
953 
954     /* Check that the handles match */
955     if ( spage->sharing->handle != sh )
956     {
957         ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
958         mem_sharing_page_unlock(secondpg);
959         mem_sharing_page_unlock(firstpg);
960         goto err_out;
961     }
962     if ( cpage->sharing->handle != ch )
963     {
964         ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
965         mem_sharing_page_unlock(secondpg);
966         mem_sharing_page_unlock(firstpg);
967         goto err_out;
968     }
969 
970     /* Merge the lists together */
971     rmap_seed_iterator(cpage, &ri);
972     while ( (gfn = rmap_iterate(cpage, &ri)) != NULL)
973     {
974         /* Get the source page and type, this should never fail:
975          * we are under shr lock, and got a successful lookup */
976         BUG_ON(!get_page_and_type(spage, dom_cow, PGT_shared_page));
977         /* Move the gfn_info from client list to source list.
978          * Don't change the type of rmap for the client page. */
979         rmap_del(gfn, cpage, 0);
980         rmap_add(gfn, spage);
981         put_page_and_type(cpage);
982         d = get_domain_by_id(gfn->domain);
983         BUG_ON(!d);
984         BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn));
985         put_domain(d);
986     }
987     ASSERT(list_empty(&cpage->sharing->gfns));
988 
989     /* Clear the rest of the shared state */
990     page_sharing_dispose(cpage);
991     cpage->sharing = NULL;
992 
993     mem_sharing_page_unlock(secondpg);
994     mem_sharing_page_unlock(firstpg);
995 
996     /* Free the client page */
997     if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
998         put_page(cpage);
999 
1000     /* We managed to free a domain page. */
1001     atomic_dec(&nr_shared_mfns);
1002     atomic_inc(&nr_saved_mfns);
1003     ret = 0;
1004 
1005 err_out:
1006     put_two_gfns(&tg);
1007     return ret;
1008 }
1009 
mem_sharing_add_to_physmap(struct domain * sd,unsigned long sgfn,shr_handle_t sh,struct domain * cd,unsigned long cgfn)1010 int mem_sharing_add_to_physmap(struct domain *sd, unsigned long sgfn, shr_handle_t sh,
1011                             struct domain *cd, unsigned long cgfn)
1012 {
1013     struct page_info *spage;
1014     int ret = -EINVAL;
1015     mfn_t smfn, cmfn;
1016     p2m_type_t smfn_type, cmfn_type;
1017     struct gfn_info *gfn_info;
1018     struct p2m_domain *p2m = p2m_get_hostp2m(cd);
1019     p2m_access_t a;
1020     struct two_gfns tg;
1021 
1022     get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn,
1023                  cd, cgfn, &cmfn_type, &a, &cmfn,
1024                  0, &tg);
1025 
1026     /* Get the source shared page, check and lock */
1027     ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
1028     spage = __grab_shared_page(smfn);
1029     if ( spage == NULL )
1030         goto err_out;
1031     ASSERT(smfn_type == p2m_ram_shared);
1032 
1033     /* Check that the handles match */
1034     if ( spage->sharing->handle != sh )
1035         goto err_unlock;
1036 
1037     /* Make sure the target page is a hole in the physmap. These are typically
1038      * p2m_mmio_dm, but also accept p2m_invalid and paged out pages. See the
1039      * definition of p2m_is_hole in p2m.h. */
1040     if ( !p2m_is_hole(cmfn_type) )
1041     {
1042         ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
1043         goto err_unlock;
1044     }
1045 
1046     /* This is simpler than regular sharing */
1047     BUG_ON(!get_page_and_type(spage, dom_cow, PGT_shared_page));
1048     if ( (gfn_info = mem_sharing_gfn_alloc(spage, cd, cgfn)) == NULL )
1049     {
1050         put_page_and_type(spage);
1051         ret = -ENOMEM;
1052         goto err_unlock;
1053     }
1054 
1055     ret = p2m_set_entry(p2m, _gfn(cgfn), smfn, PAGE_ORDER_4K,
1056                         p2m_ram_shared, a);
1057 
1058     /* Tempted to turn this into an assert */
1059     if ( ret )
1060     {
1061         mem_sharing_gfn_destroy(spage, cd, gfn_info);
1062         put_page_and_type(spage);
1063     } else {
1064         /* There is a chance we're plugging a hole where a paged out page was */
1065         if ( p2m_is_paging(cmfn_type) && (cmfn_type != p2m_ram_paging_out) )
1066         {
1067             atomic_dec(&cd->paged_pages);
1068             /* Further, there is a chance this was a valid page. Don't leak it. */
1069             if ( mfn_valid(cmfn) )
1070             {
1071                 struct page_info *cpage = mfn_to_page(cmfn);
1072                 ASSERT(cpage != NULL);
1073                 if ( test_and_clear_bit(_PGC_allocated, &cpage->count_info) )
1074                     put_page(cpage);
1075             }
1076         }
1077     }
1078 
1079     atomic_inc(&nr_saved_mfns);
1080 
1081 err_unlock:
1082     mem_sharing_page_unlock(spage);
1083 err_out:
1084     put_two_gfns(&tg);
1085     return ret;
1086 }
1087 
1088 
1089 /* A note on the rationale for unshare error handling:
1090  *  1. Unshare can only fail with ENOMEM. Any other error conditions BUG_ON()'s
1091  *  2. We notify a potential dom0 helper through a vm_event ring. But we
1092  *     allow the notification to not go to sleep. If the event ring is full
1093  *     of ENOMEM warnings, then it's on the ball.
1094  *  3. We cannot go to sleep until the unshare is resolved, because we might
1095  *     be buried deep into locks (e.g. something -> copy_to_user -> __hvm_copy)
1096  *  4. So, we make sure we:
1097  *     4.1. return an error
1098  *     4.2. do not corrupt shared memory
1099  *     4.3. do not corrupt guest memory
1100  *     4.4. let the guest deal with it if the error propagation will reach it
1101  */
__mem_sharing_unshare_page(struct domain * d,unsigned long gfn,uint16_t flags)1102 int __mem_sharing_unshare_page(struct domain *d,
1103                              unsigned long gfn,
1104                              uint16_t flags)
1105 {
1106     p2m_type_t p2mt;
1107     mfn_t mfn;
1108     struct page_info *page, *old_page;
1109     int last_gfn;
1110     gfn_info_t *gfn_info = NULL;
1111 
1112     mfn = get_gfn(d, gfn, &p2mt);
1113 
1114     /* Has someone already unshared it? */
1115     if ( !p2m_is_shared(p2mt) ) {
1116         put_gfn(d, gfn);
1117         return 0;
1118     }
1119 
1120     page = __grab_shared_page(mfn);
1121     if ( page == NULL )
1122     {
1123         gdprintk(XENLOG_ERR, "Domain p2m is shared, but page is not: "
1124                                 "%lx\n", gfn);
1125         BUG();
1126     }
1127 
1128     gfn_info = rmap_retrieve(d->domain_id, gfn, page);
1129     if ( unlikely(gfn_info == NULL) )
1130     {
1131         gdprintk(XENLOG_ERR, "Could not find gfn_info for shared gfn: "
1132                                 "%lx\n", gfn);
1133         BUG();
1134     }
1135 
1136     /* Do the accounting first. If anything fails below, we have bigger
1137      * bigger fish to fry. First, remove the gfn from the list. */
1138     last_gfn = rmap_has_one_entry(page);
1139     if ( last_gfn )
1140     {
1141         /* Clean up shared state. Get rid of the <domid, gfn> tuple
1142          * before destroying the rmap. */
1143         mem_sharing_gfn_destroy(page, d, gfn_info);
1144         page_sharing_dispose(page);
1145         page->sharing = NULL;
1146         atomic_dec(&nr_shared_mfns);
1147     }
1148     else
1149         atomic_dec(&nr_saved_mfns);
1150 
1151     /* If the GFN is getting destroyed drop the references to MFN
1152      * (possibly freeing the page), and exit early */
1153     if ( flags & MEM_SHARING_DESTROY_GFN )
1154     {
1155         if ( !last_gfn )
1156             mem_sharing_gfn_destroy(page, d, gfn_info);
1157         put_page_and_type(page);
1158         mem_sharing_page_unlock(page);
1159         if ( last_gfn &&
1160             test_and_clear_bit(_PGC_allocated, &page->count_info) )
1161             put_page(page);
1162         put_gfn(d, gfn);
1163 
1164         return 0;
1165     }
1166 
1167     if ( last_gfn )
1168     {
1169         /* Making a page private atomically unlocks it */
1170         BUG_ON(page_make_private(d, page) != 0);
1171         goto private_page_found;
1172     }
1173 
1174     old_page = page;
1175     page = alloc_domheap_page(d, 0);
1176     if ( !page )
1177     {
1178         /* Undo dec of nr_saved_mfns, as the retry will decrease again. */
1179         atomic_inc(&nr_saved_mfns);
1180         mem_sharing_page_unlock(old_page);
1181         put_gfn(d, gfn);
1182         /* Caller is responsible for placing an event
1183          * in the ring */
1184         return -ENOMEM;
1185     }
1186 
1187     copy_domain_page(page_to_mfn(page), page_to_mfn(old_page));
1188 
1189     BUG_ON(set_shared_p2m_entry(d, gfn, page_to_mfn(page)));
1190     mem_sharing_gfn_destroy(old_page, d, gfn_info);
1191     mem_sharing_page_unlock(old_page);
1192     put_page_and_type(old_page);
1193 
1194 private_page_found:
1195     if ( p2m_change_type_one(d, gfn, p2m_ram_shared, p2m_ram_rw) )
1196     {
1197         gdprintk(XENLOG_ERR, "Could not change p2m type d %hu gfn %lx.\n",
1198                                 d->domain_id, gfn);
1199         BUG();
1200     }
1201 
1202     /* Update m2p entry */
1203     set_gpfn_from_mfn(mfn_x(page_to_mfn(page)), gfn);
1204 
1205     /* Now that the gfn<->mfn map is properly established,
1206      * marking dirty is feasible */
1207     paging_mark_dirty(d, page_to_mfn(page));
1208     /* We do not need to unlock a private page */
1209     put_gfn(d, gfn);
1210     return 0;
1211 }
1212 
relinquish_shared_pages(struct domain * d)1213 int relinquish_shared_pages(struct domain *d)
1214 {
1215     int rc = 0;
1216     struct p2m_domain *p2m = p2m_get_hostp2m(d);
1217     unsigned long gfn, count = 0;
1218 
1219     if ( p2m == NULL )
1220         return 0;
1221 
1222     p2m_lock(p2m);
1223     for ( gfn = p2m->next_shared_gfn_to_relinquish;
1224           gfn <= p2m->max_mapped_pfn; gfn++ )
1225     {
1226         p2m_access_t a;
1227         p2m_type_t t;
1228         mfn_t mfn;
1229         int set_rc;
1230 
1231         if ( atomic_read(&d->shr_pages) == 0 )
1232             break;
1233         mfn = p2m->get_entry(p2m, _gfn(gfn), &t, &a, 0, NULL, NULL);
1234         if ( mfn_valid(mfn) && (t == p2m_ram_shared) )
1235         {
1236             /* Does not fail with ENOMEM given the DESTROY flag */
1237             BUG_ON(__mem_sharing_unshare_page(d, gfn,
1238                     MEM_SHARING_DESTROY_GFN));
1239             /* Clear out the p2m entry so no one else may try to
1240              * unshare.  Must succeed: we just read the old entry and
1241              * we hold the p2m lock. */
1242             set_rc = p2m->set_entry(p2m, _gfn(gfn), _mfn(0), PAGE_ORDER_4K,
1243                                     p2m_invalid, p2m_access_rwx, -1);
1244             ASSERT(set_rc == 0);
1245             count += 0x10;
1246         }
1247         else
1248             ++count;
1249 
1250         /* Preempt every 2MiB (shared) or 32MiB (unshared) - arbitrary. */
1251         if ( count >= 0x2000 )
1252         {
1253             if ( hypercall_preempt_check() )
1254             {
1255                 p2m->next_shared_gfn_to_relinquish = gfn + 1;
1256                 rc = -ERESTART;
1257                 break;
1258             }
1259             count = 0;
1260         }
1261     }
1262 
1263     p2m_unlock(p2m);
1264     return rc;
1265 }
1266 
range_share(struct domain * d,struct domain * cd,struct mem_sharing_op_range * range)1267 static int range_share(struct domain *d, struct domain *cd,
1268                        struct mem_sharing_op_range *range)
1269 {
1270     int rc = 0;
1271     shr_handle_t sh, ch;
1272     unsigned long start = range->opaque ?: range->first_gfn;
1273 
1274     while ( range->last_gfn >= start )
1275     {
1276         /*
1277          * We only break out if we run out of memory as individual pages may
1278          * legitimately be unsharable and we just want to skip over those.
1279          */
1280         rc = nominate_page(d, _gfn(start), 0, &sh);
1281         if ( rc == -ENOMEM )
1282             break;
1283 
1284         if ( !rc )
1285         {
1286             rc = nominate_page(cd, _gfn(start), 0, &ch);
1287             if ( rc == -ENOMEM )
1288                 break;
1289 
1290             if ( !rc )
1291             {
1292                 /* If we get here this should be guaranteed to succeed. */
1293                 rc = share_pages(d, _gfn(start), sh, cd, _gfn(start), ch);
1294                 ASSERT(!rc);
1295             }
1296         }
1297 
1298         /* Check for continuation if it's not the last iteration. */
1299         if ( range->last_gfn >= ++start && hypercall_preempt_check() )
1300         {
1301             rc = 1;
1302             break;
1303         }
1304     }
1305 
1306     range->opaque = start;
1307 
1308     /*
1309      * The last page may fail with -EINVAL, and for range sharing we don't
1310      * care about that.
1311      */
1312     if ( range->last_gfn < start && rc == -EINVAL )
1313         rc = 0;
1314 
1315     return rc;
1316 }
1317 
mem_sharing_memop(XEN_GUEST_HANDLE_PARAM (xen_mem_sharing_op_t)arg)1318 int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
1319 {
1320     int rc;
1321     xen_mem_sharing_op_t mso;
1322     struct domain *d;
1323 
1324     rc = -EFAULT;
1325     if ( copy_from_guest(&mso, arg, 1) )
1326         return rc;
1327 
1328     if ( mso.op == XENMEM_sharing_op_audit )
1329         return audit();
1330 
1331     rc = rcu_lock_live_remote_domain_by_id(mso.domain, &d);
1332     if ( rc )
1333         return rc;
1334 
1335     rc = xsm_mem_sharing(XSM_DM_PRIV, d);
1336     if ( rc )
1337         goto out;
1338 
1339     /* Only HAP is supported */
1340     rc = -ENODEV;
1341     if ( !hap_enabled(d) || !d->arch.hvm_domain.mem_sharing_enabled )
1342         goto out;
1343 
1344     switch ( mso.op )
1345     {
1346         case XENMEM_sharing_op_nominate_gfn:
1347         {
1348             shr_handle_t handle;
1349 
1350             rc = -EINVAL;
1351             if ( !mem_sharing_enabled(d) )
1352                 goto out;
1353 
1354             rc = nominate_page(d, _gfn(mso.u.nominate.u.gfn), 0, &handle);
1355             mso.u.nominate.handle = handle;
1356         }
1357         break;
1358 
1359         case XENMEM_sharing_op_nominate_gref:
1360         {
1361             grant_ref_t gref = mso.u.nominate.u.grant_ref;
1362             gfn_t gfn;
1363             shr_handle_t handle;
1364 
1365             rc = -EINVAL;
1366             if ( !mem_sharing_enabled(d) )
1367                 goto out;
1368             rc = mem_sharing_gref_to_gfn(d->grant_table, gref, &gfn, NULL);
1369             if ( rc < 0 )
1370                 goto out;
1371 
1372             rc = nominate_page(d, gfn, 3, &handle);
1373             mso.u.nominate.handle = handle;
1374         }
1375         break;
1376 
1377         case XENMEM_sharing_op_share:
1378         {
1379             gfn_t sgfn, cgfn;
1380             struct domain *cd;
1381             shr_handle_t sh, ch;
1382 
1383             rc = -EINVAL;
1384             if ( !mem_sharing_enabled(d) )
1385                 goto out;
1386 
1387             rc = rcu_lock_live_remote_domain_by_id(mso.u.share.client_domain,
1388                                                    &cd);
1389             if ( rc )
1390                 goto out;
1391 
1392             rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd, mso.op);
1393             if ( rc )
1394             {
1395                 rcu_unlock_domain(cd);
1396                 goto out;
1397             }
1398 
1399             if ( !mem_sharing_enabled(cd) )
1400             {
1401                 rcu_unlock_domain(cd);
1402                 rc = -EINVAL;
1403                 goto out;
1404             }
1405 
1406             if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mso.u.share.source_gfn) )
1407             {
1408                 grant_ref_t gref = (grant_ref_t)
1409                                     (XENMEM_SHARING_OP_FIELD_GET_GREF(
1410                                         mso.u.share.source_gfn));
1411                 rc = mem_sharing_gref_to_gfn(d->grant_table, gref, &sgfn,
1412                                              NULL);
1413                 if ( rc < 0 )
1414                 {
1415                     rcu_unlock_domain(cd);
1416                     goto out;
1417                 }
1418             }
1419             else
1420                 sgfn = _gfn(mso.u.share.source_gfn);
1421 
1422             if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mso.u.share.client_gfn) )
1423             {
1424                 grant_ref_t gref = (grant_ref_t)
1425                                     (XENMEM_SHARING_OP_FIELD_GET_GREF(
1426                                         mso.u.share.client_gfn));
1427                 rc = mem_sharing_gref_to_gfn(cd->grant_table, gref, &cgfn,
1428                                              NULL);
1429                 if ( rc < 0 )
1430                 {
1431                     rcu_unlock_domain(cd);
1432                     goto out;
1433                 }
1434             }
1435             else
1436                 cgfn = _gfn(mso.u.share.client_gfn);
1437 
1438             sh = mso.u.share.source_handle;
1439             ch = mso.u.share.client_handle;
1440 
1441             rc = share_pages(d, sgfn, sh, cd, cgfn, ch);
1442 
1443             rcu_unlock_domain(cd);
1444         }
1445         break;
1446 
1447         case XENMEM_sharing_op_add_physmap:
1448         {
1449             unsigned long sgfn, cgfn;
1450             struct domain *cd;
1451             shr_handle_t sh;
1452 
1453             rc = -EINVAL;
1454             if ( !mem_sharing_enabled(d) )
1455                 goto out;
1456 
1457             rc = rcu_lock_live_remote_domain_by_id(mso.u.share.client_domain,
1458                                                    &cd);
1459             if ( rc )
1460                 goto out;
1461 
1462             rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd, mso.op);
1463             if ( rc )
1464             {
1465                 rcu_unlock_domain(cd);
1466                 goto out;
1467             }
1468 
1469             if ( !mem_sharing_enabled(cd) )
1470             {
1471                 rcu_unlock_domain(cd);
1472                 rc = -EINVAL;
1473                 goto out;
1474             }
1475 
1476             if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mso.u.share.source_gfn) )
1477             {
1478                 /* Cannot add a gref to the physmap */
1479                 rcu_unlock_domain(cd);
1480                 rc = -EINVAL;
1481                 goto out;
1482             }
1483 
1484             sgfn    = mso.u.share.source_gfn;
1485             sh      = mso.u.share.source_handle;
1486             cgfn    = mso.u.share.client_gfn;
1487 
1488             rc = mem_sharing_add_to_physmap(d, sgfn, sh, cd, cgfn);
1489 
1490             rcu_unlock_domain(cd);
1491         }
1492         break;
1493 
1494         case XENMEM_sharing_op_range_share:
1495         {
1496             unsigned long max_sgfn, max_cgfn;
1497             struct domain *cd;
1498 
1499             rc = -EINVAL;
1500             if ( mso.u.range._pad[0] || mso.u.range._pad[1] ||
1501                  mso.u.range._pad[2] )
1502                  goto out;
1503 
1504             /*
1505              * We use opaque for the hypercall continuation value.
1506              * Ideally the user sets this to 0 in the beginning but
1507              * there is no good way of enforcing that here, so we just check
1508              * that it's at least in range.
1509              */
1510             if ( mso.u.range.opaque &&
1511                  (mso.u.range.opaque < mso.u.range.first_gfn ||
1512                   mso.u.range.opaque > mso.u.range.last_gfn) )
1513                 goto out;
1514 
1515             if ( !mem_sharing_enabled(d) )
1516                 goto out;
1517 
1518             rc = rcu_lock_live_remote_domain_by_id(mso.u.range.client_domain,
1519                                                    &cd);
1520             if ( rc )
1521                 goto out;
1522 
1523             /*
1524              * We reuse XENMEM_sharing_op_share XSM check here as this is
1525              * essentially the same concept repeated over multiple pages.
1526              */
1527             rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd,
1528                                     XENMEM_sharing_op_share);
1529             if ( rc )
1530             {
1531                 rcu_unlock_domain(cd);
1532                 goto out;
1533             }
1534 
1535             if ( !mem_sharing_enabled(cd) )
1536             {
1537                 rcu_unlock_domain(cd);
1538                 rc = -EINVAL;
1539                 goto out;
1540             }
1541 
1542             /*
1543              * Sanity check only, the client should keep the domains paused for
1544              * the duration of this op.
1545              */
1546             if ( !atomic_read(&d->pause_count) ||
1547                  !atomic_read(&cd->pause_count) )
1548             {
1549                 rcu_unlock_domain(cd);
1550                 rc = -EINVAL;
1551                 goto out;
1552             }
1553 
1554             max_sgfn = domain_get_maximum_gpfn(d);
1555             max_cgfn = domain_get_maximum_gpfn(cd);
1556 
1557             if ( max_sgfn < mso.u.range.first_gfn ||
1558                  max_sgfn < mso.u.range.last_gfn ||
1559                  max_cgfn < mso.u.range.first_gfn ||
1560                  max_cgfn < mso.u.range.last_gfn )
1561             {
1562                 rcu_unlock_domain(cd);
1563                 rc = -EINVAL;
1564                 goto out;
1565             }
1566 
1567             rc = range_share(d, cd, &mso.u.range);
1568             rcu_unlock_domain(cd);
1569 
1570             if ( rc > 0 )
1571             {
1572                 if ( __copy_to_guest(arg, &mso, 1) )
1573                     rc = -EFAULT;
1574                 else
1575                     rc = hypercall_create_continuation(__HYPERVISOR_memory_op,
1576                                                        "lh", XENMEM_sharing_op,
1577                                                        arg);
1578             }
1579             else
1580                 mso.u.range.opaque = 0;
1581         }
1582         break;
1583 
1584         case XENMEM_sharing_op_debug_gfn:
1585             rc = debug_gfn(d, _gfn(mso.u.debug.u.gfn));
1586             break;
1587 
1588         case XENMEM_sharing_op_debug_gref:
1589             rc = debug_gref(d, mso.u.debug.u.gref);
1590             break;
1591 
1592         default:
1593             rc = -ENOSYS;
1594             break;
1595     }
1596 
1597     if ( !rc && __copy_to_guest(arg, &mso, 1) )
1598         rc = -EFAULT;
1599 
1600 out:
1601     rcu_unlock_domain(d);
1602     return rc;
1603 }
1604 
mem_sharing_domctl(struct domain * d,struct xen_domctl_mem_sharing_op * mec)1605 int mem_sharing_domctl(struct domain *d, struct xen_domctl_mem_sharing_op *mec)
1606 {
1607     int rc;
1608 
1609     /* Only HAP is supported */
1610     if ( !hap_enabled(d) )
1611          return -ENODEV;
1612 
1613     switch(mec->op)
1614     {
1615         case XEN_DOMCTL_MEM_SHARING_CONTROL:
1616         {
1617             rc = 0;
1618             if ( unlikely(need_iommu(d) && mec->u.enable) )
1619                 rc = -EXDEV;
1620             else
1621                 d->arch.hvm_domain.mem_sharing_enabled = mec->u.enable;
1622         }
1623         break;
1624 
1625         default:
1626             rc = -ENOSYS;
1627     }
1628 
1629     return rc;
1630 }
1631 
mem_sharing_init(void)1632 void __init mem_sharing_init(void)
1633 {
1634     printk("Initing memory sharing.\n");
1635 #if MEM_SHARING_AUDIT
1636     spin_lock_init(&shr_audit_lock);
1637     INIT_LIST_HEAD(&shr_audit_list);
1638 #endif
1639 }
1640 
1641