1 /******************************************************************************
2 * arch/x86/paging.c
3 *
4 * x86 specific paging support
5 * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6 * Copyright (c) 2007 XenSource Inc.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; If not, see <http://www.gnu.org/licenses/>.
20 */
21
22 #include <xen/init.h>
23 #include <xen/guest_access.h>
24 #include <asm/paging.h>
25 #include <asm/shadow.h>
26 #include <asm/p2m.h>
27 #include <asm/hap.h>
28 #include <asm/event.h>
29 #include <asm/hvm/nestedhvm.h>
30 #include <xen/numa.h>
31 #include <xsm/xsm.h>
32 #include <public/sched.h> /* SHUTDOWN_suspend */
33
34 #include "mm-locks.h"
35
36 /* Printouts */
37 #define PAGING_PRINTK(_f, _a...) \
38 debugtrace_printk("pg: %s(): " _f, __func__, ##_a)
39 #define PAGING_ERROR(_f, _a...) \
40 printk("pg error: %s(): " _f, __func__, ##_a)
41 #define PAGING_DEBUG(flag, _f, _a...) \
42 do { \
43 if (PAGING_DEBUG_ ## flag) \
44 debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \
45 } while (0)
46
47 /* Per-CPU variable for enforcing the lock ordering */
48 DEFINE_PER_CPU(int, mm_lock_level);
49
50 /* Override macros from asm/page.h to make them work with mfn_t */
51 #undef mfn_to_page
52 #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
53 #undef page_to_mfn
54 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
55
56 /************************************************/
57 /* LOG DIRTY SUPPORT */
58 /************************************************/
59
paging_new_log_dirty_page(struct domain * d)60 static mfn_t paging_new_log_dirty_page(struct domain *d)
61 {
62 struct page_info *page;
63
64 page = d->arch.paging.alloc_page(d);
65 if ( unlikely(page == NULL) )
66 {
67 d->arch.paging.log_dirty.failed_allocs++;
68 return INVALID_MFN;
69 }
70
71 d->arch.paging.log_dirty.allocs++;
72
73 return page_to_mfn(page);
74 }
75
76 /* Alloc and init a new leaf node */
paging_new_log_dirty_leaf(struct domain * d)77 static mfn_t paging_new_log_dirty_leaf(struct domain *d)
78 {
79 mfn_t mfn = paging_new_log_dirty_page(d);
80
81 if ( mfn_valid(mfn) )
82 clear_domain_page(mfn);
83
84 return mfn;
85 }
86
87 /* Alloc and init a new non-leaf node */
paging_new_log_dirty_node(struct domain * d)88 static mfn_t paging_new_log_dirty_node(struct domain *d)
89 {
90 mfn_t mfn = paging_new_log_dirty_page(d);
91 if ( mfn_valid(mfn) )
92 {
93 int i;
94 mfn_t *node = map_domain_page(mfn);
95 for ( i = 0; i < LOGDIRTY_NODE_ENTRIES; i++ )
96 node[i] = INVALID_MFN;
97 unmap_domain_page(node);
98 }
99 return mfn;
100 }
101
102 /* get the top of the log-dirty bitmap trie */
paging_map_log_dirty_bitmap(struct domain * d)103 static mfn_t *paging_map_log_dirty_bitmap(struct domain *d)
104 {
105 if ( likely(mfn_valid(d->arch.paging.log_dirty.top)) )
106 return map_domain_page(d->arch.paging.log_dirty.top);
107 return NULL;
108 }
109
paging_free_log_dirty_page(struct domain * d,mfn_t mfn)110 static void paging_free_log_dirty_page(struct domain *d, mfn_t mfn)
111 {
112 d->arch.paging.log_dirty.allocs--;
113 d->arch.paging.free_page(d, mfn_to_page(mfn));
114 }
115
paging_free_log_dirty_bitmap(struct domain * d,int rc)116 static int paging_free_log_dirty_bitmap(struct domain *d, int rc)
117 {
118 mfn_t *l4, *l3, *l2;
119 int i4, i3, i2;
120
121 paging_lock(d);
122
123 if ( !mfn_valid(d->arch.paging.log_dirty.top) )
124 {
125 paging_unlock(d);
126 return 0;
127 }
128
129 if ( !d->arch.paging.preempt.dom )
130 {
131 memset(&d->arch.paging.preempt.log_dirty, 0,
132 sizeof(d->arch.paging.preempt.log_dirty));
133 ASSERT(rc <= 0);
134 d->arch.paging.preempt.log_dirty.done = -rc;
135 }
136 else if ( d->arch.paging.preempt.dom != current->domain ||
137 d->arch.paging.preempt.op != XEN_DOMCTL_SHADOW_OP_OFF )
138 {
139 paging_unlock(d);
140 return -EBUSY;
141 }
142
143 l4 = map_domain_page(d->arch.paging.log_dirty.top);
144 i4 = d->arch.paging.preempt.log_dirty.i4;
145 i3 = d->arch.paging.preempt.log_dirty.i3;
146 rc = 0;
147
148 for ( ; i4 < LOGDIRTY_NODE_ENTRIES; i4++, i3 = 0 )
149 {
150 if ( !mfn_valid(l4[i4]) )
151 continue;
152
153 l3 = map_domain_page(l4[i4]);
154
155 for ( ; i3 < LOGDIRTY_NODE_ENTRIES; i3++ )
156 {
157 if ( !mfn_valid(l3[i3]) )
158 continue;
159
160 l2 = map_domain_page(l3[i3]);
161
162 for ( i2 = 0; i2 < LOGDIRTY_NODE_ENTRIES; i2++ )
163 if ( mfn_valid(l2[i2]) )
164 paging_free_log_dirty_page(d, l2[i2]);
165
166 unmap_domain_page(l2);
167 paging_free_log_dirty_page(d, l3[i3]);
168 l3[i3] = INVALID_MFN;
169
170 if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
171 {
172 d->arch.paging.preempt.log_dirty.i3 = i3 + 1;
173 d->arch.paging.preempt.log_dirty.i4 = i4;
174 rc = -ERESTART;
175 break;
176 }
177 }
178
179 unmap_domain_page(l3);
180 if ( rc )
181 break;
182 paging_free_log_dirty_page(d, l4[i4]);
183 l4[i4] = INVALID_MFN;
184
185 if ( i4 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
186 {
187 d->arch.paging.preempt.log_dirty.i3 = 0;
188 d->arch.paging.preempt.log_dirty.i4 = i4 + 1;
189 rc = -ERESTART;
190 break;
191 }
192 }
193
194 unmap_domain_page(l4);
195
196 if ( !rc )
197 {
198 paging_free_log_dirty_page(d, d->arch.paging.log_dirty.top);
199 d->arch.paging.log_dirty.top = INVALID_MFN;
200
201 ASSERT(d->arch.paging.log_dirty.allocs == 0);
202 d->arch.paging.log_dirty.failed_allocs = 0;
203
204 rc = -d->arch.paging.preempt.log_dirty.done;
205 d->arch.paging.preempt.dom = NULL;
206 }
207 else
208 {
209 d->arch.paging.preempt.dom = current->domain;
210 d->arch.paging.preempt.op = XEN_DOMCTL_SHADOW_OP_OFF;
211 }
212
213 paging_unlock(d);
214
215 return rc;
216 }
217
paging_log_dirty_enable(struct domain * d,bool_t log_global)218 int paging_log_dirty_enable(struct domain *d, bool_t log_global)
219 {
220 int ret;
221
222 if ( need_iommu(d) && log_global )
223 {
224 /*
225 * Refuse to turn on global log-dirty mode
226 * if the domain is using the IOMMU.
227 */
228 return -EINVAL;
229 }
230
231 if ( paging_mode_log_dirty(d) )
232 return -EINVAL;
233
234 domain_pause(d);
235 ret = d->arch.paging.log_dirty.ops->enable(d, log_global);
236 domain_unpause(d);
237
238 return ret;
239 }
240
paging_log_dirty_disable(struct domain * d,bool_t resuming)241 static int paging_log_dirty_disable(struct domain *d, bool_t resuming)
242 {
243 int ret = 1;
244
245 if ( !resuming )
246 {
247 domain_pause(d);
248 /* Safe because the domain is paused. */
249 if ( paging_mode_log_dirty(d) )
250 {
251 ret = d->arch.paging.log_dirty.ops->disable(d);
252 ASSERT(ret <= 0);
253 }
254 }
255
256 ret = paging_free_log_dirty_bitmap(d, ret);
257 if ( ret == -ERESTART )
258 return ret;
259
260 domain_unpause(d);
261
262 return ret;
263 }
264
265 /* Mark a page as dirty, with taking guest pfn as parameter */
paging_mark_pfn_dirty(struct domain * d,pfn_t pfn)266 void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn)
267 {
268 bool changed;
269 mfn_t mfn, *l4, *l3, *l2;
270 unsigned long *l1;
271 unsigned int i1, i2, i3, i4;
272
273 if ( !paging_mode_log_dirty(d) )
274 return;
275
276 /* Shared MFNs should NEVER be marked dirty */
277 BUG_ON(paging_mode_translate(d) && SHARED_M2P(pfn_x(pfn)));
278
279 /*
280 * Values with the MSB set denote MFNs that aren't really part of the
281 * domain's pseudo-physical memory map (e.g., the shared info frame).
282 * Nothing to do here...
283 */
284 if ( unlikely(!VALID_M2P(pfn_x(pfn))) )
285 return;
286
287 i1 = L1_LOGDIRTY_IDX(pfn);
288 i2 = L2_LOGDIRTY_IDX(pfn);
289 i3 = L3_LOGDIRTY_IDX(pfn);
290 i4 = L4_LOGDIRTY_IDX(pfn);
291
292 /* Recursive: this is called from inside the shadow code */
293 paging_lock_recursive(d);
294
295 if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) )
296 {
297 d->arch.paging.log_dirty.top = paging_new_log_dirty_node(d);
298 if ( unlikely(!mfn_valid(d->arch.paging.log_dirty.top)) )
299 goto out;
300 }
301
302 l4 = paging_map_log_dirty_bitmap(d);
303 mfn = l4[i4];
304 if ( !mfn_valid(mfn) )
305 l4[i4] = mfn = paging_new_log_dirty_node(d);
306 unmap_domain_page(l4);
307 if ( !mfn_valid(mfn) )
308 goto out;
309
310 l3 = map_domain_page(mfn);
311 mfn = l3[i3];
312 if ( !mfn_valid(mfn) )
313 l3[i3] = mfn = paging_new_log_dirty_node(d);
314 unmap_domain_page(l3);
315 if ( !mfn_valid(mfn) )
316 goto out;
317
318 l2 = map_domain_page(mfn);
319 mfn = l2[i2];
320 if ( !mfn_valid(mfn) )
321 l2[i2] = mfn = paging_new_log_dirty_leaf(d);
322 unmap_domain_page(l2);
323 if ( !mfn_valid(mfn) )
324 goto out;
325
326 l1 = map_domain_page(mfn);
327 changed = !__test_and_set_bit(i1, l1);
328 unmap_domain_page(l1);
329 if ( changed )
330 {
331 PAGING_DEBUG(LOGDIRTY,
332 "d%d: marked mfn %" PRI_mfn " (pfn %" PRI_pfn ")\n",
333 d->domain_id, mfn_x(mfn), pfn_x(pfn));
334 d->arch.paging.log_dirty.dirty_count++;
335 }
336
337 out:
338 /* We've already recorded any failed allocations */
339 paging_unlock(d);
340 return;
341 }
342
343 /* Mark a page as dirty */
paging_mark_dirty(struct domain * d,mfn_t gmfn)344 void paging_mark_dirty(struct domain *d, mfn_t gmfn)
345 {
346 pfn_t pfn;
347
348 if ( !paging_mode_log_dirty(d) || !mfn_valid(gmfn) ||
349 page_get_owner(mfn_to_page(gmfn)) != d )
350 return;
351
352 /* We /really/ mean PFN here, even for non-translated guests. */
353 pfn = _pfn(get_gpfn_from_mfn(mfn_x(gmfn)));
354
355 paging_mark_pfn_dirty(d, pfn);
356 }
357
358
359 /* Is this guest page dirty? */
paging_mfn_is_dirty(struct domain * d,mfn_t gmfn)360 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn)
361 {
362 pfn_t pfn;
363 mfn_t mfn, *l4, *l3, *l2;
364 unsigned long *l1;
365 int rv;
366
367 ASSERT(paging_locked_by_me(d));
368 ASSERT(paging_mode_log_dirty(d));
369
370 /* We /really/ mean PFN here, even for non-translated guests. */
371 pfn = _pfn(get_gpfn_from_mfn(mfn_x(gmfn)));
372 /* Shared pages are always read-only; invalid pages can't be dirty. */
373 if ( unlikely(SHARED_M2P(pfn_x(pfn)) || !VALID_M2P(pfn_x(pfn))) )
374 return 0;
375
376 mfn = d->arch.paging.log_dirty.top;
377 if ( !mfn_valid(mfn) )
378 return 0;
379
380 l4 = map_domain_page(mfn);
381 mfn = l4[L4_LOGDIRTY_IDX(pfn)];
382 unmap_domain_page(l4);
383 if ( !mfn_valid(mfn) )
384 return 0;
385
386 l3 = map_domain_page(mfn);
387 mfn = l3[L3_LOGDIRTY_IDX(pfn)];
388 unmap_domain_page(l3);
389 if ( !mfn_valid(mfn) )
390 return 0;
391
392 l2 = map_domain_page(mfn);
393 mfn = l2[L2_LOGDIRTY_IDX(pfn)];
394 unmap_domain_page(l2);
395 if ( !mfn_valid(mfn) )
396 return 0;
397
398 l1 = map_domain_page(mfn);
399 rv = test_bit(L1_LOGDIRTY_IDX(pfn), l1);
400 unmap_domain_page(l1);
401 return rv;
402 }
403
404
405 /* Read a domain's log-dirty bitmap and stats. If the operation is a CLEAN,
406 * clear the bitmap and stats as well. */
paging_log_dirty_op(struct domain * d,struct xen_domctl_shadow_op * sc,bool_t resuming)407 static int paging_log_dirty_op(struct domain *d,
408 struct xen_domctl_shadow_op *sc,
409 bool_t resuming)
410 {
411 int rv = 0, clean = 0, peek = 1;
412 unsigned long pages = 0;
413 mfn_t *l4 = NULL, *l3 = NULL, *l2 = NULL;
414 unsigned long *l1 = NULL;
415 int i4, i3, i2;
416
417 if ( !resuming )
418 {
419 /*
420 * Mark dirty all currently write-mapped pages on e.g. the
421 * final iteration of a save operation.
422 */
423 if ( is_hvm_domain(d) &&
424 (sc->mode & XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL) )
425 hvm_mapped_guest_frames_mark_dirty(d);
426
427 domain_pause(d);
428
429 /*
430 * Flush dirty GFNs potentially cached by hardware. Only need to flush
431 * when not resuming, as domain was paused in resuming case therefore
432 * it's not possible to have any new dirty pages.
433 */
434 p2m_flush_hardware_cached_dirty(d);
435 }
436
437 paging_lock(d);
438
439 if ( !d->arch.paging.preempt.dom )
440 memset(&d->arch.paging.preempt.log_dirty, 0,
441 sizeof(d->arch.paging.preempt.log_dirty));
442 else if ( d->arch.paging.preempt.dom != current->domain ||
443 d->arch.paging.preempt.op != sc->op )
444 {
445 paging_unlock(d);
446 ASSERT(!resuming);
447 domain_unpause(d);
448 return -EBUSY;
449 }
450
451 clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
452
453 PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n",
454 (clean) ? "clean" : "peek",
455 d->domain_id,
456 d->arch.paging.log_dirty.fault_count,
457 d->arch.paging.log_dirty.dirty_count);
458
459 sc->stats.fault_count = d->arch.paging.log_dirty.fault_count;
460 sc->stats.dirty_count = d->arch.paging.log_dirty.dirty_count;
461
462 if ( guest_handle_is_null(sc->dirty_bitmap) )
463 /* caller may have wanted just to clean the state or access stats. */
464 peek = 0;
465
466 if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
467 printk(XENLOG_WARNING
468 "%u failed page allocs while logging dirty pages of d%d\n",
469 d->arch.paging.log_dirty.failed_allocs, d->domain_id);
470 rv = -ENOMEM;
471 goto out;
472 }
473
474 l4 = paging_map_log_dirty_bitmap(d);
475 i4 = d->arch.paging.preempt.log_dirty.i4;
476 i3 = d->arch.paging.preempt.log_dirty.i3;
477 pages = d->arch.paging.preempt.log_dirty.done;
478
479 for ( ; (pages < sc->pages) && (i4 < LOGDIRTY_NODE_ENTRIES); i4++, i3 = 0 )
480 {
481 l3 = (l4 && mfn_valid(l4[i4])) ? map_domain_page(l4[i4]) : NULL;
482 for ( ; (pages < sc->pages) && (i3 < LOGDIRTY_NODE_ENTRIES); i3++ )
483 {
484 l2 = ((l3 && mfn_valid(l3[i3])) ?
485 map_domain_page(l3[i3]) : NULL);
486 for ( i2 = 0;
487 (pages < sc->pages) && (i2 < LOGDIRTY_NODE_ENTRIES);
488 i2++ )
489 {
490 unsigned int bytes = PAGE_SIZE;
491 l1 = ((l2 && mfn_valid(l2[i2])) ?
492 map_domain_page(l2[i2]) : NULL);
493 if ( unlikely(((sc->pages - pages + 7) >> 3) < bytes) )
494 bytes = (unsigned int)((sc->pages - pages + 7) >> 3);
495 if ( likely(peek) )
496 {
497 if ( (l1 ? copy_to_guest_offset(sc->dirty_bitmap,
498 pages >> 3, (uint8_t *)l1,
499 bytes)
500 : clear_guest_offset(sc->dirty_bitmap,
501 pages >> 3, bytes)) != 0 )
502 {
503 rv = -EFAULT;
504 goto out;
505 }
506 }
507 pages += bytes << 3;
508 if ( l1 )
509 {
510 if ( clean )
511 clear_page(l1);
512 unmap_domain_page(l1);
513 }
514 }
515 if ( l2 )
516 unmap_domain_page(l2);
517
518 if ( i3 < LOGDIRTY_NODE_ENTRIES - 1 && hypercall_preempt_check() )
519 {
520 d->arch.paging.preempt.log_dirty.i4 = i4;
521 d->arch.paging.preempt.log_dirty.i3 = i3 + 1;
522 rv = -ERESTART;
523 break;
524 }
525 }
526 if ( l3 )
527 unmap_domain_page(l3);
528
529 if ( !rv && i4 < LOGDIRTY_NODE_ENTRIES - 1 &&
530 hypercall_preempt_check() )
531 {
532 d->arch.paging.preempt.log_dirty.i4 = i4 + 1;
533 d->arch.paging.preempt.log_dirty.i3 = 0;
534 rv = -ERESTART;
535 }
536 if ( rv )
537 break;
538 }
539 if ( l4 )
540 unmap_domain_page(l4);
541
542 if ( !rv )
543 {
544 d->arch.paging.preempt.dom = NULL;
545 if ( clean )
546 {
547 d->arch.paging.log_dirty.fault_count = 0;
548 d->arch.paging.log_dirty.dirty_count = 0;
549 }
550 }
551 else
552 {
553 d->arch.paging.preempt.dom = current->domain;
554 d->arch.paging.preempt.op = sc->op;
555 d->arch.paging.preempt.log_dirty.done = pages;
556 }
557
558 paging_unlock(d);
559
560 if ( rv )
561 {
562 /* Never leave the domain paused on real errors. */
563 ASSERT(rv == -ERESTART);
564 return rv;
565 }
566
567 if ( pages < sc->pages )
568 sc->pages = pages;
569 if ( clean )
570 {
571 /* We need to further call clean_dirty_bitmap() functions of specific
572 * paging modes (shadow or hap). Safe because the domain is paused. */
573 d->arch.paging.log_dirty.ops->clean(d);
574 }
575 domain_unpause(d);
576 return rv;
577
578 out:
579 d->arch.paging.preempt.dom = NULL;
580 paging_unlock(d);
581 domain_unpause(d);
582
583 if ( l1 )
584 unmap_domain_page(l1);
585 if ( l2 )
586 unmap_domain_page(l2);
587 if ( l3 )
588 unmap_domain_page(l3);
589 if ( l4 )
590 unmap_domain_page(l4);
591
592 return rv;
593 }
594
paging_log_dirty_range(struct domain * d,unsigned long begin_pfn,unsigned long nr,uint8_t * dirty_bitmap)595 void paging_log_dirty_range(struct domain *d,
596 unsigned long begin_pfn,
597 unsigned long nr,
598 uint8_t *dirty_bitmap)
599 {
600 struct p2m_domain *p2m = p2m_get_hostp2m(d);
601 int i;
602 unsigned long pfn;
603
604 /*
605 * Set l1e entries of P2M table to be read-only.
606 *
607 * On first write, it page faults, its entry is changed to read-write,
608 * and on retry the write succeeds.
609 *
610 * We populate dirty_bitmap by looking for entries that have been
611 * switched to read-write.
612 */
613
614 p2m_lock(p2m);
615
616 for ( i = 0, pfn = begin_pfn; pfn < begin_pfn + nr; i++, pfn++ )
617 if ( !p2m_change_type_one(d, pfn, p2m_ram_rw, p2m_ram_logdirty) )
618 dirty_bitmap[i >> 3] |= (1 << (i & 7));
619
620 p2m_unlock(p2m);
621
622 flush_tlb_mask(d->domain_dirty_cpumask);
623 }
624
625 /*
626 * Callers must supply log_dirty_ops for the log dirty code to call. This
627 * function usually is invoked when paging is enabled. Check shadow_enable()
628 * and hap_enable() for reference.
629 *
630 * These function pointers must not be followed with the log-dirty lock held.
631 */
paging_log_dirty_init(struct domain * d,const struct log_dirty_ops * ops)632 void paging_log_dirty_init(struct domain *d, const struct log_dirty_ops *ops)
633 {
634 d->arch.paging.log_dirty.ops = ops;
635 }
636
637 /************************************************/
638 /* CODE FOR PAGING SUPPORT */
639 /************************************************/
640 /* Domain paging struct initialization. */
paging_domain_init(struct domain * d,unsigned int domcr_flags)641 int paging_domain_init(struct domain *d, unsigned int domcr_flags)
642 {
643 int rc;
644
645 if ( (rc = p2m_init(d)) != 0 )
646 return rc;
647
648 mm_lock_init(&d->arch.paging.lock);
649
650 /* This must be initialized separately from the rest of the
651 * log-dirty init code as that can be called more than once and we
652 * don't want to leak any active log-dirty bitmaps */
653 d->arch.paging.log_dirty.top = INVALID_MFN;
654
655 /*
656 * Shadow pagetables are the default, but we will use
657 * hardware assistance if it's available and enabled.
658 */
659 if ( hap_enabled(d) )
660 hap_domain_init(d);
661 else
662 rc = shadow_domain_init(d, domcr_flags);
663
664 return rc;
665 }
666
667 /* vcpu paging struct initialization goes here */
paging_vcpu_init(struct vcpu * v)668 void paging_vcpu_init(struct vcpu *v)
669 {
670 if ( hap_enabled(v->domain) )
671 hap_vcpu_init(v);
672 else
673 shadow_vcpu_init(v);
674 }
675
676
paging_domctl(struct domain * d,struct xen_domctl_shadow_op * sc,XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl,bool_t resuming)677 int paging_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
678 XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl,
679 bool_t resuming)
680 {
681 int rc;
682
683 if ( unlikely(d == current->domain) )
684 {
685 gdprintk(XENLOG_INFO, "Tried to do a paging op on itself.\n");
686 return -EINVAL;
687 }
688
689 if ( unlikely(d->is_dying) )
690 {
691 gdprintk(XENLOG_INFO, "Ignoring paging op on dying domain %u\n",
692 d->domain_id);
693 return 0;
694 }
695
696 if ( unlikely(d->vcpu == NULL) || unlikely(d->vcpu[0] == NULL) )
697 {
698 gdprintk(XENLOG_DEBUG, "Paging op on a domain (%u) with no vcpus\n",
699 d->domain_id);
700 return -EINVAL;
701 }
702
703 if ( resuming
704 ? (d->arch.paging.preempt.dom != current->domain ||
705 d->arch.paging.preempt.op != sc->op)
706 : (d->arch.paging.preempt.dom &&
707 sc->op != XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION) )
708 {
709 printk(XENLOG_G_DEBUG
710 "%pv: Paging op %#x on Dom%u with unfinished prior op %#x by Dom%u\n",
711 current, sc->op, d->domain_id, d->arch.paging.preempt.op,
712 d->arch.paging.preempt.dom
713 ? d->arch.paging.preempt.dom->domain_id : DOMID_INVALID);
714 return -EBUSY;
715 }
716
717 rc = xsm_shadow_control(XSM_HOOK, d, sc->op);
718 if ( rc )
719 return rc;
720
721 /* Code to handle log-dirty. Note that some log dirty operations
722 * piggy-back on shadow operations. For example, when
723 * XEN_DOMCTL_SHADOW_OP_OFF is called, it first checks whether log dirty
724 * mode is enabled. If does, we disables log dirty and continues with
725 * shadow code. For this reason, we need to further dispatch domctl
726 * to next-level paging code (shadow or hap).
727 */
728 switch ( sc->op )
729 {
730
731 case XEN_DOMCTL_SHADOW_OP_ENABLE:
732 if ( !(sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY) )
733 break;
734 /* Else fall through... */
735 case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
736 return paging_log_dirty_enable(d, 1);
737
738 case XEN_DOMCTL_SHADOW_OP_OFF:
739 if ( (rc = paging_log_dirty_disable(d, resuming)) != 0 )
740 return rc;
741 break;
742
743 case XEN_DOMCTL_SHADOW_OP_CLEAN:
744 case XEN_DOMCTL_SHADOW_OP_PEEK:
745 if ( sc->mode & ~XEN_DOMCTL_SHADOW_LOGDIRTY_FINAL )
746 return -EINVAL;
747 return paging_log_dirty_op(d, sc, resuming);
748 }
749
750 /* Here, dispatch domctl to the appropriate paging code */
751 if ( hap_enabled(d) )
752 return hap_domctl(d, sc, u_domctl);
753 else
754 return shadow_domctl(d, sc, u_domctl);
755 }
756
paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl)757 long paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
758 {
759 struct xen_domctl op;
760 struct domain *d;
761 int ret;
762
763 if ( copy_from_guest(&op, u_domctl, 1) )
764 return -EFAULT;
765
766 if ( op.interface_version != XEN_DOMCTL_INTERFACE_VERSION ||
767 op.cmd != XEN_DOMCTL_shadow_op )
768 return -EOPNOTSUPP;
769
770 d = rcu_lock_domain_by_id(op.domain);
771 if ( d == NULL )
772 return -ESRCH;
773
774 ret = xsm_domctl(XSM_OTHER, d, op.cmd);
775 if ( !ret )
776 {
777 if ( domctl_lock_acquire() )
778 {
779 ret = paging_domctl(d, &op.u.shadow_op, u_domctl, 1);
780
781 domctl_lock_release();
782 }
783 else
784 ret = -ERESTART;
785 }
786
787 rcu_unlock_domain(d);
788
789 if ( ret == -ERESTART )
790 ret = hypercall_create_continuation(__HYPERVISOR_arch_1,
791 "h", u_domctl);
792 else if ( __copy_field_to_guest(u_domctl, &op, u.shadow_op) )
793 ret = -EFAULT;
794
795 return ret;
796 }
797
798 /* Call when destroying a domain */
paging_teardown(struct domain * d)799 int paging_teardown(struct domain *d)
800 {
801 int rc;
802 bool preempted = false;
803
804 if ( hap_enabled(d) )
805 hap_teardown(d, &preempted);
806 else
807 shadow_teardown(d, &preempted);
808
809 if ( preempted )
810 return -ERESTART;
811
812 /* clean up log dirty resources. */
813 rc = paging_free_log_dirty_bitmap(d, 0);
814 if ( rc == -ERESTART )
815 return rc;
816
817 /* Move populate-on-demand cache back to domain_list for destruction */
818 rc = p2m_pod_empty_cache(d);
819
820 return rc;
821 }
822
823 /* Call once all of the references to the domain have gone away */
paging_final_teardown(struct domain * d)824 void paging_final_teardown(struct domain *d)
825 {
826 if ( hap_enabled(d) )
827 hap_final_teardown(d);
828 else
829 shadow_final_teardown(d);
830
831 p2m_final_teardown(d);
832 }
833
834 /* Enable an arbitrary paging-assistance mode. Call once at domain
835 * creation. */
paging_enable(struct domain * d,u32 mode)836 int paging_enable(struct domain *d, u32 mode)
837 {
838 /* Unrecognised paging mode? */
839 if ( mode & ~PG_MASK )
840 return -EINVAL;
841
842 /* All of external|translate|refcounts, or none. */
843 switch ( mode & (PG_external | PG_translate | PG_refcounts) )
844 {
845 case 0:
846 case PG_external | PG_translate | PG_refcounts:
847 break;
848 default:
849 return -EINVAL;
850 }
851
852 if ( hap_enabled(d) )
853 return hap_enable(d, mode);
854 else
855 return shadow_enable(d, mode);
856 }
857
858 /* Called from the guest to indicate that a process is being torn down
859 * and therefore its pagetables will soon be discarded */
pagetable_dying(struct domain * d,paddr_t gpa)860 void pagetable_dying(struct domain *d, paddr_t gpa)
861 {
862 #ifdef CONFIG_SHADOW_PAGING
863 struct vcpu *v;
864
865 ASSERT(paging_mode_shadow(d));
866
867 v = d->vcpu[0];
868 v->arch.paging.mode->shadow.pagetable_dying(v, gpa);
869 #else
870 BUG();
871 #endif
872 }
873
874 /* Print paging-assistance info to the console */
paging_dump_domain_info(struct domain * d)875 void paging_dump_domain_info(struct domain *d)
876 {
877 if ( paging_mode_enabled(d) )
878 {
879 printk(" paging assistance: ");
880 if ( paging_mode_shadow(d) )
881 printk("shadow ");
882 if ( paging_mode_hap(d) )
883 printk("hap ");
884 if ( paging_mode_refcounts(d) )
885 printk("refcounts ");
886 if ( paging_mode_log_dirty(d) )
887 printk("log_dirty ");
888 if ( paging_mode_translate(d) )
889 printk("translate ");
890 if ( paging_mode_external(d) )
891 printk("external ");
892 printk("\n");
893 }
894 }
895
paging_dump_vcpu_info(struct vcpu * v)896 void paging_dump_vcpu_info(struct vcpu *v)
897 {
898 if ( paging_mode_enabled(v->domain) )
899 {
900 printk(" paging assistance: ");
901 if ( paging_mode_shadow(v->domain) )
902 {
903 if ( paging_get_hostmode(v) )
904 printk("shadowed %u-on-%u\n",
905 paging_get_hostmode(v)->guest_levels,
906 paging_get_hostmode(v)->shadow.shadow_levels);
907 else
908 printk("not shadowed\n");
909 }
910 else if ( paging_mode_hap(v->domain) && paging_get_hostmode(v) )
911 printk("hap, %u levels\n",
912 paging_get_hostmode(v)->guest_levels);
913 else
914 printk("none\n");
915 }
916 }
917
paging_get_mode(struct vcpu * v)918 const struct paging_mode *paging_get_mode(struct vcpu *v)
919 {
920 if (!nestedhvm_is_n2(v))
921 return paging_get_hostmode(v);
922
923 return paging_get_nestedmode(v);
924 }
925
paging_update_nestedmode(struct vcpu * v)926 void paging_update_nestedmode(struct vcpu *v)
927 {
928 ASSERT(nestedhvm_enabled(v->domain));
929 if (nestedhvm_paging_mode_hap(v))
930 /* nested-on-nested */
931 v->arch.paging.nestedmode = hap_paging_get_mode(v);
932 else
933 /* TODO: shadow-on-shadow */
934 v->arch.paging.nestedmode = NULL;
935 hvm_asid_flush_vcpu(v);
936 }
937
paging_write_p2m_entry(struct p2m_domain * p2m,unsigned long gfn,l1_pgentry_t * p,l1_pgentry_t new,unsigned int level)938 void paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
939 l1_pgentry_t *p, l1_pgentry_t new,
940 unsigned int level)
941 {
942 struct domain *d = p2m->domain;
943 struct vcpu *v = current;
944 if ( v->domain != d )
945 v = d->vcpu ? d->vcpu[0] : NULL;
946 if ( likely(v && paging_mode_enabled(d) && paging_get_hostmode(v) != NULL) )
947 paging_get_hostmode(v)->write_p2m_entry(d, gfn, p, new, level);
948 else
949 safe_write_pte(p, new);
950 }
951
paging_set_allocation(struct domain * d,unsigned int pages,bool * preempted)952 int paging_set_allocation(struct domain *d, unsigned int pages, bool *preempted)
953 {
954 int rc;
955
956 ASSERT(paging_mode_enabled(d));
957
958 paging_lock(d);
959 if ( hap_enabled(d) )
960 rc = hap_set_allocation(d, pages, preempted);
961 else
962 rc = shadow_set_allocation(d, pages, preempted);
963 paging_unlock(d);
964
965 return rc;
966 }
967
968 /*
969 * Local variables:
970 * mode: C
971 * c-file-style: "BSD"
972 * c-basic-offset: 4
973 * indent-tabs-mode: nil
974 * End:
975 */
976