1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /******************************************************************************
3 * include/asm-x86/paging.h
4 *
5 * Common interface for paging support
6 * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
7 * Parts of this code are Copyright (c) 2006 by XenSource Inc.
8 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
9 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
10 */
11
12 #ifndef _XEN_PAGING_H
13 #define _XEN_PAGING_H
14
15 #include <xen/mm.h>
16 #include <public/domctl.h>
17 #include <xen/sched.h>
18 #include <xen/perfc.h>
19 #include <xen/domain_page.h>
20 #include <asm/flushtlb.h>
21 #include <asm/domain.h>
22
23 /*****************************************************************************
24 * Macros to tell which paging mode a domain is in */
25
26 #define PG_SH_shift 20
27 #define PG_HAP_shift 21
28 #define PG_SHF_shift 22
29 /* We're in one of the shadow modes */
30 #ifdef CONFIG_SHADOW_PAGING
31 #define PG_SH_enable (1U << PG_SH_shift)
32 #define PG_SH_forced (1U << PG_SHF_shift)
33 #else
34 #define PG_SH_enable 0
35 #define PG_SH_forced 0
36 #endif
37 #ifdef CONFIG_HVM
38 #define PG_HAP_enable (1U << PG_HAP_shift)
39 #else
40 #define PG_HAP_enable 0
41 #endif
42
43 /* common paging mode bits */
44 #define PG_mode_shift 10
45 #ifdef CONFIG_HVM
46 /* Refcounts based on shadow tables instead of guest tables */
47 #define PG_refcounts (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << PG_mode_shift)
48 /* Xen does p2m translation, not guest */
49 #define PG_translate (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << PG_mode_shift)
50 /* Xen does not steal address space from the domain for its own booking;
51 * requires VT or similar mechanisms */
52 #define PG_external (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << PG_mode_shift)
53 #else
54 #define PG_refcounts 0
55 #define PG_translate 0
56 #define PG_external 0
57 #endif
58 #ifdef CONFIG_PAGING
59 /* Enable log dirty mode */
60 #define PG_log_dirty (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << PG_mode_shift)
61 #else
62 #define PG_log_dirty 0
63 #endif
64
65 /* All paging modes. */
66 #define PG_MASK (PG_refcounts | PG_log_dirty | PG_translate | PG_external)
67
68 #define paging_mode_enabled(_d) (!!(_d)->arch.paging.mode)
69 #define paging_mode_shadow(_d) (!!((_d)->arch.paging.mode & PG_SH_enable))
70 #define paging_mode_sh_forced(_d) (!!((_d)->arch.paging.mode & PG_SH_forced))
71 #define paging_mode_hap(_d) (!!((_d)->arch.paging.mode & PG_HAP_enable))
72
73 #define paging_mode_refcounts(_d) (!!((_d)->arch.paging.mode & PG_refcounts))
74 #define paging_mode_log_dirty(_d) (!!((_d)->arch.paging.mode & PG_log_dirty))
75 #define paging_mode_translate(_d) (!!((_d)->arch.paging.mode & PG_translate))
76 #define paging_mode_external(_d) (!!((_d)->arch.paging.mode & PG_external))
77
78 /* flags used for paging debug */
79 #define PAGING_DEBUG_LOGDIRTY 0
80
81 /*****************************************************************************
82 * Mode-specific entry points into the shadow code.
83 *
84 * These shouldn't be used directly by callers; rather use the functions
85 * below which will indirect through this table as appropriate. */
86
87 struct shadow_paging_mode {
88 #ifdef CONFIG_SHADOW_PAGING
89 #ifdef CONFIG_HVM
90 int (*guess_wrmap )(struct vcpu *v,
91 unsigned long vaddr, mfn_t gmfn);
92 void (*pagetable_dying )(paddr_t gpa);
93 void (*trace_emul_write_val )(const void *ptr, unsigned long vaddr,
94 const void *src, unsigned int bytes);
95 #endif
96 #endif
97 /* For outsiders to tell what mode we're in */
98 unsigned int shadow_levels;
99 };
100
101
102 /************************************************/
103 /* common paging interface */
104 /************************************************/
105 struct paging_mode {
106 int (*page_fault )(struct vcpu *v, unsigned long va,
107 struct cpu_user_regs *regs);
108 bool (*invlpg )(struct vcpu *v,
109 unsigned long linear);
110 #ifdef CONFIG_HVM
111 unsigned long (*gva_to_gfn )(struct vcpu *v,
112 struct p2m_domain *p2m,
113 unsigned long va,
114 uint32_t *pfec);
115 unsigned long (*p2m_ga_to_gfn )(struct vcpu *v,
116 struct p2m_domain *p2m,
117 unsigned long cr3,
118 paddr_t ga, uint32_t *pfec,
119 unsigned int *page_order);
120 #endif
121 pagetable_t (*update_cr3 )(struct vcpu *v, bool noflush);
122
123 unsigned int guest_levels;
124
125 /* paging support extension */
126 struct shadow_paging_mode shadow;
127 };
128
129 /*****************************************************************************
130 * Log dirty code */
131
132 #define paging_logdirty_levels() \
133 (DIV_ROUND_UP(PADDR_BITS - PAGE_SHIFT - (PAGE_SHIFT + 3), \
134 PAGE_SHIFT - ilog2(sizeof(mfn_t))) + 1)
135
136 #if PG_log_dirty
137
138 /* get the dirty bitmap for a specific range of pfns */
139 void paging_log_dirty_range(struct domain *d,
140 unsigned long begin_pfn,
141 unsigned long nr,
142 uint8_t *dirty_bitmap);
143
144 /* log dirty initialization */
145 void paging_log_dirty_init(struct domain *d, const struct log_dirty_ops *ops);
146
147 /* mark a page as dirty */
148 void paging_mark_dirty(struct domain *d, mfn_t gmfn);
149 /* mark a page as dirty with taking guest pfn as parameter */
150 void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn);
151
152 /* is this guest page dirty?
153 * This is called from inside paging code, with the paging lock held. */
154 bool paging_mfn_is_dirty(const struct domain *d, mfn_t gmfn);
155
156 /*
157 * Log-dirty radix tree indexing:
158 * All tree nodes are PAGE_SIZE bytes, mapped on-demand.
159 * Leaf nodes are simple bitmaps; 1 bit per guest pfn.
160 * Interior nodes are arrays of LOGDIRTY_NODE_ENTRIES mfns.
161 * TODO: Dynamic radix tree height. Most guests will only need 2 levels.
162 * The fourth level is basically unusable on 32-bit Xen.
163 * TODO2: Abstract out the radix-tree mechanics?
164 */
165 #define LOGDIRTY_NODE_ENTRIES (1 << PAGETABLE_ORDER)
166 #define L1_LOGDIRTY_IDX(pfn) (pfn_x(pfn) & ((1 << (PAGE_SHIFT + 3)) - 1))
167 #define L2_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3)) & \
168 (LOGDIRTY_NODE_ENTRIES-1))
169 #define L3_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER)) & \
170 (LOGDIRTY_NODE_ENTRIES-1))
171 #define L4_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER * 2)) & \
172 (LOGDIRTY_NODE_ENTRIES-1))
173
174 #ifdef CONFIG_HVM
175 /* VRAM dirty tracking support */
176 struct sh_dirty_vram {
177 unsigned long begin_pfn;
178 unsigned long end_pfn;
179 #ifdef CONFIG_SHADOW_PAGING
180 paddr_t *sl1ma;
181 uint8_t *dirty_bitmap;
182 s_time_t last_dirty;
183 #endif
184 };
185 #endif
186
187 #else /* !PG_log_dirty */
188
paging_log_dirty_init(struct domain * d,const struct log_dirty_ops * ops)189 static inline void paging_log_dirty_init(struct domain *d,
190 const struct log_dirty_ops *ops) {}
paging_mark_dirty(struct domain * d,mfn_t gmfn)191 static inline void paging_mark_dirty(struct domain *d, mfn_t gmfn) {}
paging_mark_pfn_dirty(struct domain * d,pfn_t pfn)192 static inline void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn) {}
paging_mfn_is_dirty(struct domain * d,mfn_t gmfn)193 static inline bool paging_mfn_is_dirty(struct domain *d, mfn_t gmfn) { return false; }
194
195 #endif /* PG_log_dirty */
196
197 /*****************************************************************************
198 * Entry points into the paging-assistance code */
199
200 /* Initialize the paging resource for vcpu struct. It is called by
201 * vcpu_initialise() in domain.c */
202 void paging_vcpu_init(struct vcpu *v);
203
204 /* Set up the paging-assistance-specific parts of a domain struct at
205 * start of day. Called for every domain from arch_domain_create() */
206 int paging_domain_init(struct domain *d);
207
208 /* Handler for paging-control ops: operations from user-space to enable
209 * and disable ephemeral shadow modes (test mode and log-dirty mode) and
210 * manipulate the log-dirty bitmap. */
211 int paging_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
212 XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl,
213 bool resuming);
214
215 /* Call when destroying a vcpu/domain */
216 void paging_vcpu_teardown(struct vcpu *v);
217 int paging_teardown(struct domain *d);
218
219 /* Call once all of the references to the domain have gone away */
220 void paging_final_teardown(struct domain *d);
221
222 /* Enable an arbitrary paging-assistance mode. Call once at domain
223 * creation. */
224 int paging_enable(struct domain *d, u32 mode);
225
226 #define paging_get_hostmode(v) ((v)->arch.paging.mode)
227 #define paging_get_nestedmode(v) ((v)->arch.paging.nestedmode)
228 void paging_update_nestedmode(struct vcpu *v);
229
230 /* Page fault handler
231 * Called from pagefault handler in Xen, and from the HVM trap handlers
232 * for pagefaults. Returns 1 if this fault was an artefact of the
233 * paging code (and the guest should retry) or 0 if it is not (and the
234 * fault should be handled elsewhere or passed to the guest).
235 *
236 * Note: under shadow paging, this function handles all page faults;
237 * however, for hardware-assisted paging, this function handles only
238 * host page faults (i.e. nested page faults). */
239 static inline int
paging_fault(unsigned long va,struct cpu_user_regs * regs)240 paging_fault(unsigned long va, struct cpu_user_regs *regs)
241 {
242 struct vcpu *v = current;
243 return paging_get_hostmode(v)->page_fault(v, va, regs);
244 }
245
246 /* Handle invlpg requests on vcpus. */
247 void paging_invlpg(struct vcpu *v, unsigned long linear);
248
249 /*
250 * Translate a guest virtual address to the frame number that the
251 * *guest* pagetables would map it to. Returns INVALID_GFN if the guest
252 * tables don't map this address for this kind of access.
253 * *pfec is used to determine which kind of access this is when
254 * walking the tables. The caller should set the PFEC_page_present bit
255 * in *pfec; in the failure case, that bit will be cleared if appropriate.
256 *
257 * SDM Intel 64 Volume 3, Chapter Paging, PAGE-FAULT EXCEPTIONS:
258 * The PFEC_insn_fetch flag is set only when NX or SMEP are enabled.
259 */
260 unsigned long paging_gva_to_gfn(struct vcpu *v,
261 unsigned long va,
262 uint32_t *pfec);
263
264 #ifdef CONFIG_HVM
265
266 /* Translate a guest address using a particular CR3 value. This is used
267 * to by nested HAP code, to walk the guest-supplied NPT tables as if
268 * they were pagetables.
269 * Use 'paddr_t' for the guest address so it won't overflow when
270 * l1 or l2 guest is in 32bit PAE mode.
271 * If the GFN returned is not INVALID_GFN, *page_order gives
272 * the size of the superpage (if any) it was found in. */
paging_ga_to_gfn_cr3(struct vcpu * v,unsigned long cr3,paddr_t ga,uint32_t * pfec,unsigned int * page_order)273 static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v,
274 unsigned long cr3,
275 paddr_t ga,
276 uint32_t *pfec,
277 unsigned int *page_order)
278 {
279 struct p2m_domain *p2m = v->domain->arch.p2m;
280 return paging_get_hostmode(v)->p2m_ga_to_gfn(v, p2m, cr3, ga, pfec,
281 page_order);
282 }
283
284 /* Flush selected vCPUs TLBs. NULL for all. */
paging_flush_tlb(const unsigned long * vcpu_bitmap)285 static inline bool paging_flush_tlb(const unsigned long *vcpu_bitmap)
286 {
287 return current->domain->arch.paging.flush_tlb(vcpu_bitmap);
288 }
289
290 #endif /* CONFIG_HVM */
291
292 /* Update all the things that are derived from the guest's CR3.
293 * Called when the guest changes CR3; the caller can then use v->arch.cr3
294 * as the value to load into the host CR3 to schedule this vcpu */
paging_update_cr3(struct vcpu * v,bool noflush)295 static inline pagetable_t paging_update_cr3(struct vcpu *v, bool noflush)
296 {
297 return paging_get_hostmode(v)->update_cr3(v, noflush);
298 }
299
300 /* Update all the things that are derived from the guest's CR0/CR3/CR4.
301 * Called to initialize paging structures if the paging mode
302 * has changed, and when bringing up a VCPU for the first time. */
paging_update_paging_modes(struct vcpu * v)303 static inline void paging_update_paging_modes(struct vcpu *v)
304 {
305 v->domain->arch.paging.update_paging_modes(v);
306 }
307
308 /* Helper function that writes a pte in such a way that a concurrent read
309 * never sees a half-written entry that has _PAGE_PRESENT set */
safe_write_pte(l1_pgentry_t * p,l1_pgentry_t new)310 static inline void safe_write_pte(l1_pgentry_t *p, l1_pgentry_t new)
311 {
312 *p = new;
313 }
314
315 /*
316 * Called from the guest to indicate that the a process is being
317 * torn down and its pagetables will soon be discarded.
318 */
319 void pagetable_dying(paddr_t gpa);
320
321 /* Print paging-assistance info to the console */
322 void paging_dump_domain_info(struct domain *d);
323 void paging_dump_vcpu_info(struct vcpu *v);
324
325 /* Set the pool of shadow pages to the required number of pages.
326 * Input might be rounded up to at minimum amount of pages, plus
327 * space for the p2m table.
328 * Returns 0 for success, non-zero for failure. */
329 int paging_set_allocation(struct domain *d, unsigned int pages,
330 bool *preempted);
331
332 /* Is gfn within maxphysaddr for the domain? */
gfn_valid(const struct domain * d,gfn_t gfn)333 static inline bool gfn_valid(const struct domain *d, gfn_t gfn)
334 {
335 return !(gfn_x(gfn) >> (d->arch.cpuid->extd.maxphysaddr - PAGE_SHIFT));
336 }
337
338 #endif /* XEN_PAGING_H */
339
340 /*
341 * Local variables:
342 * mode: C
343 * c-file-style: "BSD"
344 * c-basic-offset: 4
345 * indent-tabs-mode: nil
346 * End:
347 */
348