1 /******************************************************************************
2 * include/asm-x86/paging.h
3 *
4 * Common interface for paging support
5 * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6 * Parts of this code are Copyright (c) 2006 by XenSource Inc.
7 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
8 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; If not, see <http://www.gnu.org/licenses/>.
22 */
23
24 #ifndef _XEN_PAGING_H
25 #define _XEN_PAGING_H
26
27 #include <xen/mm.h>
28 #include <public/domctl.h>
29 #include <xen/sched.h>
30 #include <xen/perfc.h>
31 #include <xen/domain_page.h>
32 #include <asm/flushtlb.h>
33 #include <asm/domain.h>
34
35 /*****************************************************************************
36 * Macros to tell which paging mode a domain is in */
37
38 #define PG_SH_shift 20
39 #define PG_HAP_shift 21
40 /* We're in one of the shadow modes */
41 #ifdef CONFIG_SHADOW_PAGING
42 #define PG_SH_enable (1U << PG_SH_shift)
43 #else
44 #define PG_SH_enable 0
45 #endif
46 #define PG_HAP_enable (1U << PG_HAP_shift)
47
48 /* common paging mode bits */
49 #define PG_mode_shift 10
50 /* Refcounts based on shadow tables instead of guest tables */
51 #define PG_refcounts (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << PG_mode_shift)
52 /* Enable log dirty mode */
53 #define PG_log_dirty (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << PG_mode_shift)
54 /* Xen does p2m translation, not guest */
55 #define PG_translate (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << PG_mode_shift)
56 /* Xen does not steal address space from the domain for its own booking;
57 * requires VT or similar mechanisms */
58 #define PG_external (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << PG_mode_shift)
59
60 /* All paging modes. */
61 #define PG_MASK (PG_refcounts | PG_log_dirty | PG_translate | PG_external)
62
63 #define paging_mode_enabled(_d) (!!(_d)->arch.paging.mode)
64 #define paging_mode_shadow(_d) (!!((_d)->arch.paging.mode & PG_SH_enable))
65 #define paging_mode_hap(_d) (!!((_d)->arch.paging.mode & PG_HAP_enable))
66
67 #define paging_mode_refcounts(_d) (!!((_d)->arch.paging.mode & PG_refcounts))
68 #define paging_mode_log_dirty(_d) (!!((_d)->arch.paging.mode & PG_log_dirty))
69 #define paging_mode_translate(_d) (!!((_d)->arch.paging.mode & PG_translate))
70 #define paging_mode_external(_d) (!!((_d)->arch.paging.mode & PG_external))
71
72 /* flags used for paging debug */
73 #define PAGING_DEBUG_LOGDIRTY 0
74
75 /*****************************************************************************
76 * Mode-specific entry points into the shadow code.
77 *
78 * These shouldn't be used directly by callers; rather use the functions
79 * below which will indirect through this table as appropriate. */
80
81 struct sh_emulate_ctxt;
82 struct shadow_paging_mode {
83 #ifdef CONFIG_SHADOW_PAGING
84 void (*detach_old_tables )(struct vcpu *v);
85 int (*x86_emulate_write )(struct vcpu *v, unsigned long va,
86 void *src, u32 bytes,
87 struct sh_emulate_ctxt *sh_ctxt);
88 int (*x86_emulate_cmpxchg )(struct vcpu *v, unsigned long va,
89 unsigned long old,
90 unsigned long new,
91 unsigned int bytes,
92 struct sh_emulate_ctxt *sh_ctxt);
93 bool (*write_guest_entry )(struct vcpu *v, intpte_t *p,
94 intpte_t new, mfn_t gmfn);
95 bool (*cmpxchg_guest_entry )(struct vcpu *v, intpte_t *p,
96 intpte_t *old, intpte_t new,
97 mfn_t gmfn);
98 mfn_t (*make_monitor_table )(struct vcpu *v);
99 void (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
100 int (*guess_wrmap )(struct vcpu *v,
101 unsigned long vaddr, mfn_t gmfn);
102 void (*pagetable_dying )(struct vcpu *v, paddr_t gpa);
103 #endif
104 /* For outsiders to tell what mode we're in */
105 unsigned int shadow_levels;
106 };
107
108
109 /************************************************/
110 /* common paging interface */
111 /************************************************/
112 struct paging_mode {
113 int (*page_fault )(struct vcpu *v, unsigned long va,
114 struct cpu_user_regs *regs);
115 bool (*invlpg )(struct vcpu *v, unsigned long va);
116 unsigned long (*gva_to_gfn )(struct vcpu *v,
117 struct p2m_domain *p2m,
118 unsigned long va,
119 uint32_t *pfec);
120 unsigned long (*p2m_ga_to_gfn )(struct vcpu *v,
121 struct p2m_domain *p2m,
122 unsigned long cr3,
123 paddr_t ga, uint32_t *pfec,
124 unsigned int *page_order);
125 void (*update_cr3 )(struct vcpu *v, int do_locking);
126 void (*update_paging_modes )(struct vcpu *v);
127 void (*write_p2m_entry )(struct domain *d, unsigned long gfn,
128 l1_pgentry_t *p, l1_pgentry_t new,
129 unsigned int level);
130
131 unsigned int guest_levels;
132
133 /* paging support extension */
134 struct shadow_paging_mode shadow;
135 };
136
137 /*****************************************************************************
138 * Log dirty code */
139
140 /* get the dirty bitmap for a specific range of pfns */
141 void paging_log_dirty_range(struct domain *d,
142 unsigned long begin_pfn,
143 unsigned long nr,
144 uint8_t *dirty_bitmap);
145
146 /* enable log dirty */
147 int paging_log_dirty_enable(struct domain *d, bool_t log_global);
148
149 /* log dirty initialization */
150 void paging_log_dirty_init(struct domain *d, const struct log_dirty_ops *ops);
151
152 /* mark a page as dirty */
153 void paging_mark_dirty(struct domain *d, mfn_t gmfn);
154 /* mark a page as dirty with taking guest pfn as parameter */
155 void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn);
156
157 /* is this guest page dirty?
158 * This is called from inside paging code, with the paging lock held. */
159 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
160
161 /*
162 * Log-dirty radix tree indexing:
163 * All tree nodes are PAGE_SIZE bytes, mapped on-demand.
164 * Leaf nodes are simple bitmaps; 1 bit per guest pfn.
165 * Interior nodes are arrays of LOGDIRTY_NODE_ENTRIES mfns.
166 * TODO: Dynamic radix tree height. Most guests will only need 2 levels.
167 * The fourth level is basically unusable on 32-bit Xen.
168 * TODO2: Abstract out the radix-tree mechanics?
169 */
170 #define LOGDIRTY_NODE_ENTRIES (1 << PAGETABLE_ORDER)
171 #define L1_LOGDIRTY_IDX(pfn) (pfn_x(pfn) & ((1 << (PAGE_SHIFT + 3)) - 1))
172 #define L2_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3)) & \
173 (LOGDIRTY_NODE_ENTRIES-1))
174 #define L3_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER)) & \
175 (LOGDIRTY_NODE_ENTRIES-1))
176 #define L4_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER * 2)) & \
177 (LOGDIRTY_NODE_ENTRIES-1))
178
179 /* VRAM dirty tracking support */
180 struct sh_dirty_vram {
181 unsigned long begin_pfn;
182 unsigned long end_pfn;
183 paddr_t *sl1ma;
184 uint8_t *dirty_bitmap;
185 s_time_t last_dirty;
186 };
187
188 /*****************************************************************************
189 * Entry points into the paging-assistance code */
190
191 /* Initialize the paging resource for vcpu struct. It is called by
192 * vcpu_initialise() in domain.c */
193 void paging_vcpu_init(struct vcpu *v);
194
195 /* Set up the paging-assistance-specific parts of a domain struct at
196 * start of day. Called for every domain from arch_domain_create() */
197 int paging_domain_init(struct domain *d, unsigned int domcr_flags);
198
199 /* Handler for paging-control ops: operations from user-space to enable
200 * and disable ephemeral shadow modes (test mode and log-dirty mode) and
201 * manipulate the log-dirty bitmap. */
202 int paging_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
203 XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl,
204 bool_t resuming);
205
206 /* Helper hypercall for dealing with continuations. */
207 long paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
208
209 /* Call when destroying a domain */
210 int paging_teardown(struct domain *d);
211
212 /* Call once all of the references to the domain have gone away */
213 void paging_final_teardown(struct domain *d);
214
215 /* Enable an arbitrary paging-assistance mode. Call once at domain
216 * creation. */
217 int paging_enable(struct domain *d, u32 mode);
218
219 #define paging_get_hostmode(v) ((v)->arch.paging.mode)
220 #define paging_get_nestedmode(v) ((v)->arch.paging.nestedmode)
221 const struct paging_mode *paging_get_mode(struct vcpu *v);
222 void paging_update_nestedmode(struct vcpu *v);
223
224 /* Page fault handler
225 * Called from pagefault handler in Xen, and from the HVM trap handlers
226 * for pagefaults. Returns 1 if this fault was an artefact of the
227 * paging code (and the guest should retry) or 0 if it is not (and the
228 * fault should be handled elsewhere or passed to the guest).
229 *
230 * Note: under shadow paging, this function handles all page faults;
231 * however, for hardware-assisted paging, this function handles only
232 * host page faults (i.e. nested page faults). */
233 static inline int
paging_fault(unsigned long va,struct cpu_user_regs * regs)234 paging_fault(unsigned long va, struct cpu_user_regs *regs)
235 {
236 struct vcpu *v = current;
237 return paging_get_hostmode(v)->page_fault(v, va, regs);
238 }
239
240 /* Handle invlpg requests on vcpus. */
241 void paging_invlpg(struct vcpu *v, unsigned long va);
242
243 /*
244 * Translate a guest virtual address to the frame number that the
245 * *guest* pagetables would map it to. Returns INVALID_GFN if the guest
246 * tables don't map this address for this kind of access.
247 * *pfec is used to determine which kind of access this is when
248 * walking the tables. The caller should set the PFEC_page_present bit
249 * in *pfec; in the failure case, that bit will be cleared if appropriate.
250 *
251 * SDM Intel 64 Volume 3, Chapter Paging, PAGE-FAULT EXCEPTIONS:
252 * The PFEC_insn_fetch flag is set only when NX or SMEP are enabled.
253 */
254 unsigned long paging_gva_to_gfn(struct vcpu *v,
255 unsigned long va,
256 uint32_t *pfec);
257
258 /* Translate a guest address using a particular CR3 value. This is used
259 * to by nested HAP code, to walk the guest-supplied NPT tables as if
260 * they were pagetables.
261 * Use 'paddr_t' for the guest address so it won't overflow when
262 * l1 or l2 guest is in 32bit PAE mode.
263 * If the GFN returned is not INVALID_GFN, *page_order gives
264 * the size of the superpage (if any) it was found in. */
paging_ga_to_gfn_cr3(struct vcpu * v,unsigned long cr3,paddr_t ga,uint32_t * pfec,unsigned int * page_order)265 static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v,
266 unsigned long cr3,
267 paddr_t ga,
268 uint32_t *pfec,
269 unsigned int *page_order)
270 {
271 struct p2m_domain *p2m = v->domain->arch.p2m;
272 return paging_get_hostmode(v)->p2m_ga_to_gfn(v, p2m, cr3, ga, pfec,
273 page_order);
274 }
275
276 /* Update all the things that are derived from the guest's CR3.
277 * Called when the guest changes CR3; the caller can then use v->arch.cr3
278 * as the value to load into the host CR3 to schedule this vcpu */
paging_update_cr3(struct vcpu * v)279 static inline void paging_update_cr3(struct vcpu *v)
280 {
281 paging_get_hostmode(v)->update_cr3(v, 1);
282 }
283
284 /* Update all the things that are derived from the guest's CR0/CR3/CR4.
285 * Called to initialize paging structures if the paging mode
286 * has changed, and when bringing up a VCPU for the first time. */
paging_update_paging_modes(struct vcpu * v)287 static inline void paging_update_paging_modes(struct vcpu *v)
288 {
289 paging_get_hostmode(v)->update_paging_modes(v);
290 }
291
292
293 /*
294 * Write a new value into the guest pagetable, and update the
295 * paging-assistance state appropriately. Returns false if we page-faulted,
296 * true for success.
297 */
paging_write_guest_entry(struct vcpu * v,intpte_t * p,intpte_t new,mfn_t gmfn)298 static inline bool paging_write_guest_entry(
299 struct vcpu *v, intpte_t *p, intpte_t new, mfn_t gmfn)
300 {
301 #ifdef CONFIG_SHADOW_PAGING
302 if ( unlikely(paging_mode_shadow(v->domain)) && paging_get_hostmode(v) )
303 return paging_get_hostmode(v)->shadow.write_guest_entry(v, p, new,
304 gmfn);
305 #endif
306 return !__copy_to_user(p, &new, sizeof(new));
307 }
308
309
310 /*
311 * Cmpxchg a new value into the guest pagetable, and update the
312 * paging-assistance state appropriately. Returns false if we page-faulted,
313 * true if not. N.B. caller should check the value of "old" to see if the
314 * cmpxchg itself was successful.
315 */
paging_cmpxchg_guest_entry(struct vcpu * v,intpte_t * p,intpte_t * old,intpte_t new,mfn_t gmfn)316 static inline bool paging_cmpxchg_guest_entry(
317 struct vcpu *v, intpte_t *p, intpte_t *old, intpte_t new, mfn_t gmfn)
318 {
319 #ifdef CONFIG_SHADOW_PAGING
320 if ( unlikely(paging_mode_shadow(v->domain)) && paging_get_hostmode(v) )
321 return paging_get_hostmode(v)->shadow.cmpxchg_guest_entry(v, p, old,
322 new, gmfn);
323 #endif
324 return !cmpxchg_user(p, *old, new);
325 }
326
327 /* Helper function that writes a pte in such a way that a concurrent read
328 * never sees a half-written entry that has _PAGE_PRESENT set */
safe_write_pte(l1_pgentry_t * p,l1_pgentry_t new)329 static inline void safe_write_pte(l1_pgentry_t *p, l1_pgentry_t new)
330 {
331 *p = new;
332 }
333
334 /* Atomically write a P2M entry and update the paging-assistance state
335 * appropriately.
336 * Arguments: the domain in question, the GFN whose mapping is being updated,
337 * a pointer to the entry to be written, the MFN in which the entry resides,
338 * the new contents of the entry, and the level in the p2m tree at which
339 * we are writing. */
340 struct p2m_domain;
341
342 void paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
343 l1_pgentry_t *p, l1_pgentry_t new,
344 unsigned int level);
345
346 /* Called from the guest to indicate that the a process is being
347 * torn down and its pagetables will soon be discarded */
348 void pagetable_dying(struct domain *d, paddr_t gpa);
349
350 /* Print paging-assistance info to the console */
351 void paging_dump_domain_info(struct domain *d);
352 void paging_dump_vcpu_info(struct vcpu *v);
353
354 /* Set the pool of shadow pages to the required number of pages.
355 * Input might be rounded up to at minimum amount of pages, plus
356 * space for the p2m table.
357 * Returns 0 for success, non-zero for failure. */
358 int paging_set_allocation(struct domain *d, unsigned int pages,
359 bool *preempted);
360
361 /* Is gfn within maxphysaddr for the domain? */
gfn_valid(const struct domain * d,gfn_t gfn)362 static inline bool gfn_valid(const struct domain *d, gfn_t gfn)
363 {
364 return !(gfn_x(gfn) >> (d->arch.cpuid->extd.maxphysaddr - PAGE_SHIFT));
365 }
366
367 /* Maxphysaddr supportable by the paging infrastructure. */
paging_max_paddr_bits(const struct domain * d)368 static inline unsigned int paging_max_paddr_bits(const struct domain *d)
369 {
370 unsigned int bits = paging_mode_hap(d) ? hap_paddr_bits : paddr_bits;
371
372 if ( !IS_ENABLED(BIGMEM) && paging_mode_shadow(d) && !is_pv_domain(d) )
373 {
374 /* Shadowed superpages store GFNs in 32-bit page_info fields. */
375 bits = min(bits, 32U + PAGE_SHIFT);
376 }
377
378 return bits;
379 }
380
381 #endif /* XEN_PAGING_H */
382
383 /*
384 * Local variables:
385 * mode: C
386 * c-file-style: "BSD"
387 * c-basic-offset: 4
388 * indent-tabs-mode: nil
389 * End:
390 */
391