1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /******************************************************************************
3  * arch/x86/mm/mem_paging.c
4  *
5  * Memory paging support.
6  *
7  * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
8  */
9 
10 
11 #include <asm/p2m.h>
12 #include <xen/guest_access.h>
13 #include <xen/vm_event.h>
14 #include <xsm/xsm.h>
15 
16 #include "mm-locks.h"
17 
18 /*
19  * p2m_mem_paging_drop_page - Tell pager to drop its reference to a paged page
20  * @d: guest domain
21  * @gfn: guest page to drop
22  *
23  * p2m_mem_paging_drop_page() will notify the pager that a paged-out gfn was
24  * released by the guest. The pager is supposed to drop its reference of the
25  * gfn.
26  */
p2m_mem_paging_drop_page(struct domain * d,gfn_t gfn,p2m_type_t p2mt)27 void p2m_mem_paging_drop_page(struct domain *d, gfn_t gfn, p2m_type_t p2mt)
28 {
29     vm_event_request_t req = {
30         .reason = VM_EVENT_REASON_MEM_PAGING,
31         .u.mem_paging.gfn = gfn_x(gfn)
32     };
33 
34     /*
35      * We allow no ring in this unique case, because it won't affect
36      * correctness of the guest execution at this point.  If this is the only
37      * page that happens to be paged-out, we'll be okay..  but it's likely the
38      * guest will crash shortly anyways.
39      */
40     int rc = vm_event_claim_slot(d, d->vm_event_paging);
41 
42     if ( rc < 0 )
43         return;
44 
45     /* Send release notification to pager */
46     req.u.mem_paging.flags = MEM_PAGING_DROP_PAGE;
47 
48     /* Update stats unless the page hasn't yet been evicted */
49     if ( p2mt != p2m_ram_paging_out )
50         atomic_dec(&d->paged_pages);
51     else
52         /* Evict will fail now, tag this request for pager */
53         req.u.mem_paging.flags |= MEM_PAGING_EVICT_FAIL;
54 
55     vm_event_put_request(d, d->vm_event_paging, &req);
56 }
57 
58 /*
59  * p2m_mem_paging_populate - Tell pager to populate a paged page
60  * @d: guest domain
61  * @gfn: guest page in paging state
62  *
63  * p2m_mem_paging_populate() will notify the pager that a page in any of the
64  * paging states needs to be written back into the guest.
65  * This function needs to be called whenever gfn_to_mfn() returns any of the p2m
66  * paging types because the gfn may not be backed by a mfn.
67  *
68  * The gfn can be in any of the paging states, but the pager needs only be
69  * notified when the gfn is in the paging-out path (paging_out or paged).  This
70  * function may be called more than once from several vcpus. If the vcpu belongs
71  * to the guest, the vcpu must be stopped and the pager notified that the vcpu
72  * was stopped. The pager needs to handle several requests for the same gfn.
73  *
74  * If the gfn is not in the paging-out path and the vcpu does not belong to the
75  * guest, nothing needs to be done and the function assumes that a request was
76  * already sent to the pager. In this case the caller has to try again until the
77  * gfn is fully paged in again.
78  */
p2m_mem_paging_populate(struct domain * d,gfn_t gfn)79 void p2m_mem_paging_populate(struct domain *d, gfn_t gfn)
80 {
81     struct vcpu *v = current;
82     vm_event_request_t req = {
83         .reason = VM_EVENT_REASON_MEM_PAGING,
84         .u.mem_paging.gfn = gfn_x(gfn)
85     };
86     p2m_type_t p2mt;
87     p2m_access_t a;
88     mfn_t mfn;
89     struct p2m_domain *p2m = p2m_get_hostp2m(d);
90     int rc = vm_event_claim_slot(d, d->vm_event_paging);
91 
92     /* We're paging. There should be a ring. */
93     if ( rc == -EOPNOTSUPP )
94     {
95         gdprintk(XENLOG_ERR, "%pd paging gfn %"PRI_gfn" yet no ring in place\n",
96                  d, gfn_x(gfn));
97         /* Prevent the vcpu from faulting repeatedly on the same gfn */
98         if ( v->domain == d )
99             vcpu_pause_nosync(v);
100         domain_crash(d);
101         return;
102     }
103     else if ( rc < 0 )
104         return;
105 
106     /* Fix p2m mapping */
107     gfn_lock(p2m, gfn, 0);
108     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
109     /* Allow only nominated or evicted pages to enter page-in path */
110     if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged )
111     {
112         /* Evict will fail now, tag this request for pager */
113         if ( p2mt == p2m_ram_paging_out )
114             req.u.mem_paging.flags |= MEM_PAGING_EVICT_FAIL;
115 
116         rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in, a);
117     }
118     gfn_unlock(p2m, gfn, 0);
119     if ( rc < 0 )
120         goto out_cancel;
121 
122     /* Pause domain if request came from guest and gfn has paging type */
123     if ( p2m_is_paging(p2mt) && v->domain == d )
124     {
125         vm_event_vcpu_pause(v);
126         req.flags |= VM_EVENT_FLAG_VCPU_PAUSED;
127     }
128     /* No need to inform pager if the gfn is not in the page-out path */
129     else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
130     {
131         /* gfn is already on its way back and vcpu is not paused */
132     out_cancel:
133         vm_event_cancel_slot(d, d->vm_event_paging);
134         return;
135     }
136 
137     /* Send request to pager */
138     req.u.mem_paging.p2mt = p2mt;
139     req.vcpu_id = v->vcpu_id;
140 
141     vm_event_put_request(d, d->vm_event_paging, &req);
142 }
143 
144 /*
145  * p2m_mem_paging_resume - Resume guest gfn
146  * @d: guest domain
147  * @rsp: vm_event response received
148  *
149  * p2m_mem_paging_resume() will forward the p2mt of a gfn to ram_rw. It is
150  * called by the pager.
151  *
152  * The gfn was previously either evicted and populated, or nominated and
153  * populated. If the page was evicted the p2mt will be p2m_ram_paging_in. If
154  * the page was just nominated the p2mt will be p2m_ram_paging_in_start because
155  * the pager did not call prepare().
156  *
157  * If the gfn was dropped the vcpu needs to be unpaused.
158  */
p2m_mem_paging_resume(struct domain * d,vm_event_response_t * rsp)159 void p2m_mem_paging_resume(struct domain *d, vm_event_response_t *rsp)
160 {
161     struct p2m_domain *p2m = p2m_get_hostp2m(d);
162     p2m_type_t p2mt;
163     p2m_access_t a;
164     mfn_t mfn;
165 
166     /* Fix p2m entry if the page was not dropped */
167     if ( !(rsp->u.mem_paging.flags & MEM_PAGING_DROP_PAGE) )
168     {
169         gfn_t gfn = _gfn(rsp->u.mem_access.gfn);
170 
171         gfn_lock(p2m, gfn, 0);
172         mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
173         /*
174          * Allow only pages which were prepared properly, or pages which
175          * were nominated but not evicted.
176          */
177         if ( mfn_valid(mfn) && (p2mt == p2m_ram_paging_in) )
178         {
179             int rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
180                                    paging_mode_log_dirty(d) ? p2m_ram_logdirty
181                                                             : p2m_ram_rw, a);
182 
183             if ( !rc )
184                 set_gpfn_from_mfn(mfn_x(mfn), gfn_x(gfn));
185         }
186         gfn_unlock(p2m, gfn, 0);
187     }
188 }
189 
190 /*
191  * nominate - Mark a guest page as to-be-paged-out
192  * @d: guest domain
193  * @gfn: guest page to nominate
194  *
195  * Returns 0 for success or negative errno values if gfn is not pageable.
196  *
197  * nominate() is called by the pager and checks if a guest page can be paged
198  * out. If the following conditions are met the p2mt will be changed:
199  * - the gfn is backed by a mfn
200  * - the p2mt of the gfn is pageable
201  * - the mfn is not used for IO
202  * - the mfn has exactly one user and has no special meaning
203  *
204  * Once the p2mt is changed the page is readonly for the guest.  On success the
205  * pager can write the page contents to disk and later evict the page.
206  */
nominate(struct domain * d,gfn_t gfn)207 static int nominate(struct domain *d, gfn_t gfn)
208 {
209     struct page_info *page;
210     struct p2m_domain *p2m = p2m_get_hostp2m(d);
211     p2m_type_t p2mt;
212     p2m_access_t a;
213     mfn_t mfn;
214     int ret = -EBUSY;
215 
216     gfn_lock(p2m, gfn, 0);
217 
218     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
219 
220     /* Check if mfn is valid */
221     if ( !mfn_valid(mfn) )
222         goto out;
223 
224     /* Check p2m type */
225     if ( !p2m_is_pageable(p2mt) )
226         goto out;
227 
228     /* Check for io memory page */
229     if ( is_iomem_page(mfn) )
230         goto out;
231 
232     /* Check page count and type */
233     page = mfn_to_page(mfn);
234     if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
235          (1 | PGC_allocated) )
236         goto out;
237 
238     if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
239         goto out;
240 
241     /* Fix p2m entry */
242     ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_out, a);
243 
244  out:
245     gfn_unlock(p2m, gfn, 0);
246     return ret;
247 }
248 
249 /*
250  * evict - Mark a guest page as paged-out
251  * @d: guest domain
252  * @gfn: guest page to evict
253  *
254  * Returns 0 for success or negative errno values if eviction is not possible.
255  *
256  * evict() is called by the pager and will free a guest page and release it
257  * back to Xen. If the following conditions are met the page can be freed:
258  * - the gfn is backed by a mfn
259  * - the gfn was nominated
260  * - the mfn has still exactly one user and has no special meaning
261  *
262  * After successful nomination some other process could have mapped the page. In
263  * this case eviction can not be done. If the gfn was populated before the pager
264  * could evict it, eviction can not be done either. In this case the gfn is
265  * still backed by a mfn.
266  */
evict(struct domain * d,gfn_t gfn)267 static int evict(struct domain *d, gfn_t gfn)
268 {
269     struct page_info *page;
270     p2m_type_t p2mt;
271     p2m_access_t a;
272     mfn_t mfn;
273     struct p2m_domain *p2m = p2m_get_hostp2m(d);
274     int ret = -EBUSY;
275 
276     gfn_lock(p2m, gfn, 0);
277 
278     /* Get mfn */
279     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
280     if ( unlikely(!mfn_valid(mfn)) )
281         goto out;
282 
283     /* Allow only nominated pages */
284     if ( p2mt != p2m_ram_paging_out )
285         goto out;
286 
287     /* Get the page so it doesn't get modified under Xen's feet */
288     page = mfn_to_page(mfn);
289     if ( unlikely(!get_page(page, d)) )
290         goto out;
291 
292     /* Check page count and type once more */
293     if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
294          (2 | PGC_allocated) )
295         goto out_put;
296 
297     if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
298         goto out_put;
299 
300     /* Decrement guest domain's ref count of the page */
301     put_page_alloc_ref(page);
302 
303     /* Remove mapping from p2m table */
304     ret = p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K,
305                         p2m_ram_paged, a);
306 
307     /* Clear content before returning the page to Xen */
308     scrub_one_page(page);
309 
310     /* Track number of paged gfns */
311     atomic_inc(&d->paged_pages);
312 
313  out_put:
314     /* Put the page back so it gets freed */
315     put_page(page);
316 
317  out:
318     gfn_unlock(p2m, gfn, 0);
319     return ret;
320 }
321 
322 /*
323  * prepare - Allocate a new page for the guest
324  * @d: guest domain
325  * @gfn: guest page in paging state
326  *
327  * prepare() will allocate a new page for the guest if the gfn is not backed
328  * by a mfn. It is called by the pager.
329  * It is required that the gfn was already populated. The gfn may already have a
330  * mfn if populate was called for  gfn which was nominated but not evicted. In
331  * this case only the p2mt needs to be forwarded.
332  */
prepare(struct domain * d,gfn_t gfn,XEN_GUEST_HANDLE_64 (const_uint8)buffer)333 static int prepare(struct domain *d, gfn_t gfn,
334                    XEN_GUEST_HANDLE_64(const_uint8) buffer)
335 {
336     struct page_info *page = NULL;
337     p2m_type_t p2mt;
338     p2m_access_t a;
339     mfn_t mfn;
340     struct p2m_domain *p2m = p2m_get_hostp2m(d);
341     int ret, page_extant = 1;
342 
343     if ( !guest_handle_okay(buffer, PAGE_SIZE) )
344         return -EINVAL;
345 
346     gfn_lock(p2m, gfn, 0);
347 
348     mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
349 
350     ret = -ENOENT;
351     /* Allow missing pages */
352     if ( (p2mt != p2m_ram_paging_in) && (p2mt != p2m_ram_paged) )
353         goto out;
354 
355     /* Allocate a page if the gfn does not have one yet */
356     if ( !mfn_valid(mfn) )
357     {
358         void *guest_map;
359 
360         /* If the user did not provide a buffer, we disallow */
361         ret = -EINVAL;
362         if ( unlikely(guest_handle_is_null(buffer)) )
363             goto out;
364         /* Get a free page */
365         ret = -ENOMEM;
366         page_alloc_mm_pre_lock(d);
367         page = alloc_domheap_page(d, 0);
368         if ( unlikely(page == NULL) )
369             goto out;
370         if ( unlikely(!get_page(page, d)) )
371         {
372             /*
373              * The domain can't possibly know about this page yet, so failure
374              * here is a clear indication of something fishy going on.
375              */
376             gprintk(XENLOG_ERR,
377                     "%pd: fresh page for GFN %"PRI_gfn" in unexpected state\n",
378                     d, gfn_x(gfn));
379             domain_crash(d);
380             page = NULL;
381             goto out;
382         }
383         mfn = page_to_mfn(page);
384         page_extant = 0;
385 
386         guest_map = map_domain_page(mfn);
387         ret = copy_from_guest(guest_map, buffer, PAGE_SIZE);
388         unmap_domain_page(guest_map);
389         if ( ret )
390         {
391             ret = -EFAULT;
392             goto out;
393         }
394     }
395 
396     /*
397      * Make the page already guest-accessible. If the pager still has a
398      * pending resume operation, it will be idempotent p2m entry-wise, but
399      * will unpause the vcpu.
400      */
401     ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
402                         paging_mode_log_dirty(d) ? p2m_ram_logdirty
403                                                  : p2m_ram_rw, a);
404     if ( !ret )
405     {
406         set_gpfn_from_mfn(mfn_x(mfn), gfn_x(gfn));
407 
408         if ( !page_extant )
409             atomic_dec(&d->paged_pages);
410     }
411 
412  out:
413     gfn_unlock(p2m, gfn, 0);
414 
415     if ( page )
416     {
417         /*
418          * Free the page on error.  Drop our temporary reference in all
419          * cases.
420          */
421         if ( ret )
422             put_page_alloc_ref(page);
423         put_page(page);
424     }
425 
426     return ret;
427 }
428 
mem_paging_memop(XEN_GUEST_HANDLE_PARAM (xen_mem_paging_op_t)arg)429 int mem_paging_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_paging_op_t) arg)
430 {
431     int rc;
432     xen_mem_paging_op_t mpo;
433     struct domain *d;
434     bool copyback = false;
435 
436     if ( copy_from_guest(&mpo, arg, 1) )
437         return -EFAULT;
438 
439     rc = rcu_lock_live_remote_domain_by_id(mpo.domain, &d);
440     if ( rc )
441         return rc;
442 
443     rc = xsm_mem_paging(XSM_DM_PRIV, d);
444     if ( rc )
445         goto out;
446 
447     rc = -ENODEV;
448     if ( unlikely(!vm_event_check_ring(d->vm_event_paging)) )
449         goto out;
450 
451     switch( mpo.op )
452     {
453     case XENMEM_paging_op_nominate:
454         rc = nominate(d, _gfn(mpo.gfn));
455         break;
456 
457     case XENMEM_paging_op_evict:
458         rc = evict(d, _gfn(mpo.gfn));
459         break;
460 
461     case XENMEM_paging_op_prep:
462         rc = prepare(d, _gfn(mpo.gfn), mpo.buffer);
463         if ( !rc )
464             copyback = 1;
465         break;
466 
467     default:
468         rc = -ENOSYS;
469         break;
470     }
471 
472     if ( copyback && __copy_to_guest(arg, &mpo, 1) )
473         rc = -EFAULT;
474 
475 out:
476     rcu_unlock_domain(d);
477     return rc;
478 }
479 
480 
481 /*
482  * Local variables:
483  * mode: C
484  * c-file-style: "BSD"
485  * c-basic-offset: 4
486  * indent-tabs-mode: nil
487  * End:
488  */
489