1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /******************************************************************************
3 * arch/x86/mm/mem_paging.c
4 *
5 * Memory paging support.
6 *
7 * Copyright (c) 2009 Citrix Systems, Inc. (Patrick Colp)
8 */
9
10
11 #include <asm/p2m.h>
12 #include <xen/guest_access.h>
13 #include <xen/vm_event.h>
14 #include <xsm/xsm.h>
15
16 #include "mm-locks.h"
17
18 /*
19 * p2m_mem_paging_drop_page - Tell pager to drop its reference to a paged page
20 * @d: guest domain
21 * @gfn: guest page to drop
22 *
23 * p2m_mem_paging_drop_page() will notify the pager that a paged-out gfn was
24 * released by the guest. The pager is supposed to drop its reference of the
25 * gfn.
26 */
p2m_mem_paging_drop_page(struct domain * d,gfn_t gfn,p2m_type_t p2mt)27 void p2m_mem_paging_drop_page(struct domain *d, gfn_t gfn, p2m_type_t p2mt)
28 {
29 vm_event_request_t req = {
30 .reason = VM_EVENT_REASON_MEM_PAGING,
31 .u.mem_paging.gfn = gfn_x(gfn)
32 };
33
34 /*
35 * We allow no ring in this unique case, because it won't affect
36 * correctness of the guest execution at this point. If this is the only
37 * page that happens to be paged-out, we'll be okay.. but it's likely the
38 * guest will crash shortly anyways.
39 */
40 int rc = vm_event_claim_slot(d, d->vm_event_paging);
41
42 if ( rc < 0 )
43 return;
44
45 /* Send release notification to pager */
46 req.u.mem_paging.flags = MEM_PAGING_DROP_PAGE;
47
48 /* Update stats unless the page hasn't yet been evicted */
49 if ( p2mt != p2m_ram_paging_out )
50 atomic_dec(&d->paged_pages);
51 else
52 /* Evict will fail now, tag this request for pager */
53 req.u.mem_paging.flags |= MEM_PAGING_EVICT_FAIL;
54
55 vm_event_put_request(d, d->vm_event_paging, &req);
56 }
57
58 /*
59 * p2m_mem_paging_populate - Tell pager to populate a paged page
60 * @d: guest domain
61 * @gfn: guest page in paging state
62 *
63 * p2m_mem_paging_populate() will notify the pager that a page in any of the
64 * paging states needs to be written back into the guest.
65 * This function needs to be called whenever gfn_to_mfn() returns any of the p2m
66 * paging types because the gfn may not be backed by a mfn.
67 *
68 * The gfn can be in any of the paging states, but the pager needs only be
69 * notified when the gfn is in the paging-out path (paging_out or paged). This
70 * function may be called more than once from several vcpus. If the vcpu belongs
71 * to the guest, the vcpu must be stopped and the pager notified that the vcpu
72 * was stopped. The pager needs to handle several requests for the same gfn.
73 *
74 * If the gfn is not in the paging-out path and the vcpu does not belong to the
75 * guest, nothing needs to be done and the function assumes that a request was
76 * already sent to the pager. In this case the caller has to try again until the
77 * gfn is fully paged in again.
78 */
p2m_mem_paging_populate(struct domain * d,gfn_t gfn)79 void p2m_mem_paging_populate(struct domain *d, gfn_t gfn)
80 {
81 struct vcpu *v = current;
82 vm_event_request_t req = {
83 .reason = VM_EVENT_REASON_MEM_PAGING,
84 .u.mem_paging.gfn = gfn_x(gfn)
85 };
86 p2m_type_t p2mt;
87 p2m_access_t a;
88 mfn_t mfn;
89 struct p2m_domain *p2m = p2m_get_hostp2m(d);
90 int rc = vm_event_claim_slot(d, d->vm_event_paging);
91
92 /* We're paging. There should be a ring. */
93 if ( rc == -EOPNOTSUPP )
94 {
95 gdprintk(XENLOG_ERR, "%pd paging gfn %"PRI_gfn" yet no ring in place\n",
96 d, gfn_x(gfn));
97 /* Prevent the vcpu from faulting repeatedly on the same gfn */
98 if ( v->domain == d )
99 vcpu_pause_nosync(v);
100 domain_crash(d);
101 return;
102 }
103 else if ( rc < 0 )
104 return;
105
106 /* Fix p2m mapping */
107 gfn_lock(p2m, gfn, 0);
108 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
109 /* Allow only nominated or evicted pages to enter page-in path */
110 if ( p2mt == p2m_ram_paging_out || p2mt == p2m_ram_paged )
111 {
112 /* Evict will fail now, tag this request for pager */
113 if ( p2mt == p2m_ram_paging_out )
114 req.u.mem_paging.flags |= MEM_PAGING_EVICT_FAIL;
115
116 rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_in, a);
117 }
118 gfn_unlock(p2m, gfn, 0);
119 if ( rc < 0 )
120 goto out_cancel;
121
122 /* Pause domain if request came from guest and gfn has paging type */
123 if ( p2m_is_paging(p2mt) && v->domain == d )
124 {
125 vm_event_vcpu_pause(v);
126 req.flags |= VM_EVENT_FLAG_VCPU_PAUSED;
127 }
128 /* No need to inform pager if the gfn is not in the page-out path */
129 else if ( p2mt != p2m_ram_paging_out && p2mt != p2m_ram_paged )
130 {
131 /* gfn is already on its way back and vcpu is not paused */
132 out_cancel:
133 vm_event_cancel_slot(d, d->vm_event_paging);
134 return;
135 }
136
137 /* Send request to pager */
138 req.u.mem_paging.p2mt = p2mt;
139 req.vcpu_id = v->vcpu_id;
140
141 vm_event_put_request(d, d->vm_event_paging, &req);
142 }
143
144 /*
145 * p2m_mem_paging_resume - Resume guest gfn
146 * @d: guest domain
147 * @rsp: vm_event response received
148 *
149 * p2m_mem_paging_resume() will forward the p2mt of a gfn to ram_rw. It is
150 * called by the pager.
151 *
152 * The gfn was previously either evicted and populated, or nominated and
153 * populated. If the page was evicted the p2mt will be p2m_ram_paging_in. If
154 * the page was just nominated the p2mt will be p2m_ram_paging_in_start because
155 * the pager did not call prepare().
156 *
157 * If the gfn was dropped the vcpu needs to be unpaused.
158 */
p2m_mem_paging_resume(struct domain * d,vm_event_response_t * rsp)159 void p2m_mem_paging_resume(struct domain *d, vm_event_response_t *rsp)
160 {
161 struct p2m_domain *p2m = p2m_get_hostp2m(d);
162 p2m_type_t p2mt;
163 p2m_access_t a;
164 mfn_t mfn;
165
166 /* Fix p2m entry if the page was not dropped */
167 if ( !(rsp->u.mem_paging.flags & MEM_PAGING_DROP_PAGE) )
168 {
169 gfn_t gfn = _gfn(rsp->u.mem_access.gfn);
170
171 gfn_lock(p2m, gfn, 0);
172 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
173 /*
174 * Allow only pages which were prepared properly, or pages which
175 * were nominated but not evicted.
176 */
177 if ( mfn_valid(mfn) && (p2mt == p2m_ram_paging_in) )
178 {
179 int rc = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
180 paging_mode_log_dirty(d) ? p2m_ram_logdirty
181 : p2m_ram_rw, a);
182
183 if ( !rc )
184 set_gpfn_from_mfn(mfn_x(mfn), gfn_x(gfn));
185 }
186 gfn_unlock(p2m, gfn, 0);
187 }
188 }
189
190 /*
191 * nominate - Mark a guest page as to-be-paged-out
192 * @d: guest domain
193 * @gfn: guest page to nominate
194 *
195 * Returns 0 for success or negative errno values if gfn is not pageable.
196 *
197 * nominate() is called by the pager and checks if a guest page can be paged
198 * out. If the following conditions are met the p2mt will be changed:
199 * - the gfn is backed by a mfn
200 * - the p2mt of the gfn is pageable
201 * - the mfn is not used for IO
202 * - the mfn has exactly one user and has no special meaning
203 *
204 * Once the p2mt is changed the page is readonly for the guest. On success the
205 * pager can write the page contents to disk and later evict the page.
206 */
nominate(struct domain * d,gfn_t gfn)207 static int nominate(struct domain *d, gfn_t gfn)
208 {
209 struct page_info *page;
210 struct p2m_domain *p2m = p2m_get_hostp2m(d);
211 p2m_type_t p2mt;
212 p2m_access_t a;
213 mfn_t mfn;
214 int ret = -EBUSY;
215
216 gfn_lock(p2m, gfn, 0);
217
218 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
219
220 /* Check if mfn is valid */
221 if ( !mfn_valid(mfn) )
222 goto out;
223
224 /* Check p2m type */
225 if ( !p2m_is_pageable(p2mt) )
226 goto out;
227
228 /* Check for io memory page */
229 if ( is_iomem_page(mfn) )
230 goto out;
231
232 /* Check page count and type */
233 page = mfn_to_page(mfn);
234 if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
235 (1 | PGC_allocated) )
236 goto out;
237
238 if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
239 goto out;
240
241 /* Fix p2m entry */
242 ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K, p2m_ram_paging_out, a);
243
244 out:
245 gfn_unlock(p2m, gfn, 0);
246 return ret;
247 }
248
249 /*
250 * evict - Mark a guest page as paged-out
251 * @d: guest domain
252 * @gfn: guest page to evict
253 *
254 * Returns 0 for success or negative errno values if eviction is not possible.
255 *
256 * evict() is called by the pager and will free a guest page and release it
257 * back to Xen. If the following conditions are met the page can be freed:
258 * - the gfn is backed by a mfn
259 * - the gfn was nominated
260 * - the mfn has still exactly one user and has no special meaning
261 *
262 * After successful nomination some other process could have mapped the page. In
263 * this case eviction can not be done. If the gfn was populated before the pager
264 * could evict it, eviction can not be done either. In this case the gfn is
265 * still backed by a mfn.
266 */
evict(struct domain * d,gfn_t gfn)267 static int evict(struct domain *d, gfn_t gfn)
268 {
269 struct page_info *page;
270 p2m_type_t p2mt;
271 p2m_access_t a;
272 mfn_t mfn;
273 struct p2m_domain *p2m = p2m_get_hostp2m(d);
274 int ret = -EBUSY;
275
276 gfn_lock(p2m, gfn, 0);
277
278 /* Get mfn */
279 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
280 if ( unlikely(!mfn_valid(mfn)) )
281 goto out;
282
283 /* Allow only nominated pages */
284 if ( p2mt != p2m_ram_paging_out )
285 goto out;
286
287 /* Get the page so it doesn't get modified under Xen's feet */
288 page = mfn_to_page(mfn);
289 if ( unlikely(!get_page(page, d)) )
290 goto out;
291
292 /* Check page count and type once more */
293 if ( (page->count_info & (PGC_count_mask | PGC_allocated)) !=
294 (2 | PGC_allocated) )
295 goto out_put;
296
297 if ( (page->u.inuse.type_info & PGT_count_mask) != 0 )
298 goto out_put;
299
300 /* Decrement guest domain's ref count of the page */
301 put_page_alloc_ref(page);
302
303 /* Remove mapping from p2m table */
304 ret = p2m_set_entry(p2m, gfn, INVALID_MFN, PAGE_ORDER_4K,
305 p2m_ram_paged, a);
306
307 /* Clear content before returning the page to Xen */
308 scrub_one_page(page);
309
310 /* Track number of paged gfns */
311 atomic_inc(&d->paged_pages);
312
313 out_put:
314 /* Put the page back so it gets freed */
315 put_page(page);
316
317 out:
318 gfn_unlock(p2m, gfn, 0);
319 return ret;
320 }
321
322 /*
323 * prepare - Allocate a new page for the guest
324 * @d: guest domain
325 * @gfn: guest page in paging state
326 *
327 * prepare() will allocate a new page for the guest if the gfn is not backed
328 * by a mfn. It is called by the pager.
329 * It is required that the gfn was already populated. The gfn may already have a
330 * mfn if populate was called for gfn which was nominated but not evicted. In
331 * this case only the p2mt needs to be forwarded.
332 */
prepare(struct domain * d,gfn_t gfn,XEN_GUEST_HANDLE_64 (const_uint8)buffer)333 static int prepare(struct domain *d, gfn_t gfn,
334 XEN_GUEST_HANDLE_64(const_uint8) buffer)
335 {
336 struct page_info *page = NULL;
337 p2m_type_t p2mt;
338 p2m_access_t a;
339 mfn_t mfn;
340 struct p2m_domain *p2m = p2m_get_hostp2m(d);
341 int ret, page_extant = 1;
342
343 if ( !guest_handle_okay(buffer, PAGE_SIZE) )
344 return -EINVAL;
345
346 gfn_lock(p2m, gfn, 0);
347
348 mfn = p2m->get_entry(p2m, gfn, &p2mt, &a, 0, NULL, NULL);
349
350 ret = -ENOENT;
351 /* Allow missing pages */
352 if ( (p2mt != p2m_ram_paging_in) && (p2mt != p2m_ram_paged) )
353 goto out;
354
355 /* Allocate a page if the gfn does not have one yet */
356 if ( !mfn_valid(mfn) )
357 {
358 void *guest_map;
359
360 /* If the user did not provide a buffer, we disallow */
361 ret = -EINVAL;
362 if ( unlikely(guest_handle_is_null(buffer)) )
363 goto out;
364 /* Get a free page */
365 ret = -ENOMEM;
366 page_alloc_mm_pre_lock(d);
367 page = alloc_domheap_page(d, 0);
368 if ( unlikely(page == NULL) )
369 goto out;
370 if ( unlikely(!get_page(page, d)) )
371 {
372 /*
373 * The domain can't possibly know about this page yet, so failure
374 * here is a clear indication of something fishy going on.
375 */
376 gprintk(XENLOG_ERR,
377 "%pd: fresh page for GFN %"PRI_gfn" in unexpected state\n",
378 d, gfn_x(gfn));
379 domain_crash(d);
380 page = NULL;
381 goto out;
382 }
383 mfn = page_to_mfn(page);
384 page_extant = 0;
385
386 guest_map = map_domain_page(mfn);
387 ret = copy_from_guest(guest_map, buffer, PAGE_SIZE);
388 unmap_domain_page(guest_map);
389 if ( ret )
390 {
391 ret = -EFAULT;
392 goto out;
393 }
394 }
395
396 /*
397 * Make the page already guest-accessible. If the pager still has a
398 * pending resume operation, it will be idempotent p2m entry-wise, but
399 * will unpause the vcpu.
400 */
401 ret = p2m_set_entry(p2m, gfn, mfn, PAGE_ORDER_4K,
402 paging_mode_log_dirty(d) ? p2m_ram_logdirty
403 : p2m_ram_rw, a);
404 if ( !ret )
405 {
406 set_gpfn_from_mfn(mfn_x(mfn), gfn_x(gfn));
407
408 if ( !page_extant )
409 atomic_dec(&d->paged_pages);
410 }
411
412 out:
413 gfn_unlock(p2m, gfn, 0);
414
415 if ( page )
416 {
417 /*
418 * Free the page on error. Drop our temporary reference in all
419 * cases.
420 */
421 if ( ret )
422 put_page_alloc_ref(page);
423 put_page(page);
424 }
425
426 return ret;
427 }
428
mem_paging_memop(XEN_GUEST_HANDLE_PARAM (xen_mem_paging_op_t)arg)429 int mem_paging_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_paging_op_t) arg)
430 {
431 int rc;
432 xen_mem_paging_op_t mpo;
433 struct domain *d;
434 bool copyback = false;
435
436 if ( copy_from_guest(&mpo, arg, 1) )
437 return -EFAULT;
438
439 rc = rcu_lock_live_remote_domain_by_id(mpo.domain, &d);
440 if ( rc )
441 return rc;
442
443 rc = xsm_mem_paging(XSM_DM_PRIV, d);
444 if ( rc )
445 goto out;
446
447 rc = -ENODEV;
448 if ( unlikely(!vm_event_check_ring(d->vm_event_paging)) )
449 goto out;
450
451 switch( mpo.op )
452 {
453 case XENMEM_paging_op_nominate:
454 rc = nominate(d, _gfn(mpo.gfn));
455 break;
456
457 case XENMEM_paging_op_evict:
458 rc = evict(d, _gfn(mpo.gfn));
459 break;
460
461 case XENMEM_paging_op_prep:
462 rc = prepare(d, _gfn(mpo.gfn), mpo.buffer);
463 if ( !rc )
464 copyback = 1;
465 break;
466
467 default:
468 rc = -ENOSYS;
469 break;
470 }
471
472 if ( copyback && __copy_to_guest(arg, &mpo, 1) )
473 rc = -EFAULT;
474
475 out:
476 rcu_unlock_domain(d);
477 return rc;
478 }
479
480
481 /*
482 * Local variables:
483 * mode: C
484 * c-file-style: "BSD"
485 * c-basic-offset: 4
486 * indent-tabs-mode: nil
487 * End:
488 */
489