1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /******************************************************************************
3  * include/asm-x86/paging.h
4  *
5  * Common interface for paging support
6  * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
7  * Parts of this code are Copyright (c) 2006 by XenSource Inc.
8  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
9  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
10  */
11 
12 #ifndef _XEN_PAGING_H
13 #define _XEN_PAGING_H
14 
15 #include <xen/mm.h>
16 #include <public/domctl.h>
17 #include <xen/sched.h>
18 #include <xen/perfc.h>
19 #include <xen/domain_page.h>
20 #include <asm/flushtlb.h>
21 #include <asm/domain.h>
22 
23 /*****************************************************************************
24  * Macros to tell which paging mode a domain is in */
25 
26 #define PG_SH_shift    20
27 #define PG_HAP_shift   21
28 #define PG_SHF_shift   22
29 /* We're in one of the shadow modes */
30 #ifdef CONFIG_SHADOW_PAGING
31 #define PG_SH_enable   (1U << PG_SH_shift)
32 #define PG_SH_forced   (1U << PG_SHF_shift)
33 #else
34 #define PG_SH_enable   0
35 #define PG_SH_forced   0
36 #endif
37 #ifdef CONFIG_HVM
38 #define PG_HAP_enable  (1U << PG_HAP_shift)
39 #else
40 #define PG_HAP_enable  0
41 #endif
42 
43 /* common paging mode bits */
44 #define PG_mode_shift  10
45 #ifdef CONFIG_HVM
46 /* Refcounts based on shadow tables instead of guest tables */
47 #define PG_refcounts   (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << PG_mode_shift)
48 /* Xen does p2m translation, not guest */
49 #define PG_translate   (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << PG_mode_shift)
50 /* Xen does not steal address space from the domain for its own booking;
51  * requires VT or similar mechanisms */
52 #define PG_external    (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << PG_mode_shift)
53 #else
54 #define PG_refcounts   0
55 #define PG_translate   0
56 #define PG_external    0
57 #endif
58 #ifdef CONFIG_PAGING
59 /* Enable log dirty mode */
60 #define PG_log_dirty   (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << PG_mode_shift)
61 #else
62 #define PG_log_dirty   0
63 #endif
64 
65 /* All paging modes. */
66 #define PG_MASK (PG_refcounts | PG_log_dirty | PG_translate | PG_external)
67 
68 #define paging_mode_enabled(_d)   (!!(_d)->arch.paging.mode)
69 #define paging_mode_shadow(_d)    (!!((_d)->arch.paging.mode & PG_SH_enable))
70 #define paging_mode_sh_forced(_d) (!!((_d)->arch.paging.mode & PG_SH_forced))
71 #define paging_mode_hap(_d)       (!!((_d)->arch.paging.mode & PG_HAP_enable))
72 
73 #define paging_mode_refcounts(_d) (!!((_d)->arch.paging.mode & PG_refcounts))
74 #define paging_mode_log_dirty(_d) (!!((_d)->arch.paging.mode & PG_log_dirty))
75 #define paging_mode_translate(_d) (!!((_d)->arch.paging.mode & PG_translate))
76 #define paging_mode_external(_d)  (!!((_d)->arch.paging.mode & PG_external))
77 
78 /* flags used for paging debug */
79 #define PAGING_DEBUG_LOGDIRTY 0
80 
81 /*****************************************************************************
82  * Mode-specific entry points into the shadow code.
83  *
84  * These shouldn't be used directly by callers; rather use the functions
85  * below which will indirect through this table as appropriate. */
86 
87 struct shadow_paging_mode {
88 #ifdef CONFIG_SHADOW_PAGING
89 #ifdef CONFIG_HVM
90     int           (*guess_wrmap           )(struct vcpu *v,
91                                             unsigned long vaddr, mfn_t gmfn);
92     void          (*pagetable_dying       )(paddr_t gpa);
93     void          (*trace_emul_write_val  )(const void *ptr, unsigned long vaddr,
94                                             const void *src, unsigned int bytes);
95 #endif
96 #endif
97     /* For outsiders to tell what mode we're in */
98     unsigned int shadow_levels;
99 };
100 
101 
102 /************************************************/
103 /*        common paging interface               */
104 /************************************************/
105 struct paging_mode {
106     int           (*page_fault            )(struct vcpu *v, unsigned long va,
107                                             struct cpu_user_regs *regs);
108     bool          (*invlpg                )(struct vcpu *v,
109                                             unsigned long linear);
110 #ifdef CONFIG_HVM
111     unsigned long (*gva_to_gfn            )(struct vcpu *v,
112                                             struct p2m_domain *p2m,
113                                             unsigned long va,
114                                             uint32_t *pfec);
115     unsigned long (*p2m_ga_to_gfn         )(struct vcpu *v,
116                                             struct p2m_domain *p2m,
117                                             unsigned long cr3,
118                                             paddr_t ga, uint32_t *pfec,
119                                             unsigned int *page_order);
120 #endif
121     pagetable_t   (*update_cr3            )(struct vcpu *v, bool noflush);
122 
123     unsigned int guest_levels;
124 
125     /* paging support extension */
126     struct shadow_paging_mode shadow;
127 };
128 
129 /*****************************************************************************
130  * Log dirty code */
131 
132 #define paging_logdirty_levels() \
133     (DIV_ROUND_UP(PADDR_BITS - PAGE_SHIFT - (PAGE_SHIFT + 3), \
134                   PAGE_SHIFT - ilog2(sizeof(mfn_t))) + 1)
135 
136 #if PG_log_dirty
137 
138 /* get the dirty bitmap for a specific range of pfns */
139 void paging_log_dirty_range(struct domain *d,
140                             unsigned long begin_pfn,
141                             unsigned long nr,
142                             uint8_t *dirty_bitmap);
143 
144 /* log dirty initialization */
145 void paging_log_dirty_init(struct domain *d, const struct log_dirty_ops *ops);
146 
147 /* mark a page as dirty */
148 void paging_mark_dirty(struct domain *d, mfn_t gmfn);
149 /* mark a page as dirty with taking guest pfn as parameter */
150 void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn);
151 
152 /* is this guest page dirty?
153  * This is called from inside paging code, with the paging lock held. */
154 bool paging_mfn_is_dirty(const struct domain *d, mfn_t gmfn);
155 
156 /*
157  * Log-dirty radix tree indexing:
158  *   All tree nodes are PAGE_SIZE bytes, mapped on-demand.
159  *   Leaf nodes are simple bitmaps; 1 bit per guest pfn.
160  *   Interior nodes are arrays of LOGDIRTY_NODE_ENTRIES mfns.
161  * TODO: Dynamic radix tree height. Most guests will only need 2 levels.
162  *       The fourth level is basically unusable on 32-bit Xen.
163  * TODO2: Abstract out the radix-tree mechanics?
164  */
165 #define LOGDIRTY_NODE_ENTRIES (1 << PAGETABLE_ORDER)
166 #define L1_LOGDIRTY_IDX(pfn) (pfn_x(pfn) & ((1 << (PAGE_SHIFT + 3)) - 1))
167 #define L2_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3)) & \
168                               (LOGDIRTY_NODE_ENTRIES-1))
169 #define L3_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER)) & \
170                               (LOGDIRTY_NODE_ENTRIES-1))
171 #define L4_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER * 2)) & \
172                               (LOGDIRTY_NODE_ENTRIES-1))
173 
174 #ifdef CONFIG_HVM
175 /* VRAM dirty tracking support */
176 struct sh_dirty_vram {
177     unsigned long begin_pfn;
178     unsigned long end_pfn;
179 #ifdef CONFIG_SHADOW_PAGING
180     paddr_t *sl1ma;
181     uint8_t *dirty_bitmap;
182     s_time_t last_dirty;
183 #endif
184 };
185 #endif
186 
187 #else /* !PG_log_dirty */
188 
paging_log_dirty_init(struct domain * d,const struct log_dirty_ops * ops)189 static inline void paging_log_dirty_init(struct domain *d,
190                                          const struct log_dirty_ops *ops) {}
paging_mark_dirty(struct domain * d,mfn_t gmfn)191 static inline void paging_mark_dirty(struct domain *d, mfn_t gmfn) {}
paging_mark_pfn_dirty(struct domain * d,pfn_t pfn)192 static inline void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn) {}
paging_mfn_is_dirty(struct domain * d,mfn_t gmfn)193 static inline bool paging_mfn_is_dirty(struct domain *d, mfn_t gmfn) { return false; }
194 
195 #endif /* PG_log_dirty */
196 
197 /*****************************************************************************
198  * Entry points into the paging-assistance code */
199 
200 /* Initialize the paging resource for vcpu struct. It is called by
201  * vcpu_initialise() in domain.c */
202 void paging_vcpu_init(struct vcpu *v);
203 
204 /* Set up the paging-assistance-specific parts of a domain struct at
205  * start of day.  Called for every domain from arch_domain_create() */
206 int paging_domain_init(struct domain *d);
207 
208 /* Handler for paging-control ops: operations from user-space to enable
209  * and disable ephemeral shadow modes (test mode and log-dirty mode) and
210  * manipulate the log-dirty bitmap. */
211 int paging_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
212                   XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl,
213                   bool resuming);
214 
215 /* Call when destroying a vcpu/domain */
216 void paging_vcpu_teardown(struct vcpu *v);
217 int paging_teardown(struct domain *d);
218 
219 /* Call once all of the references to the domain have gone away */
220 void paging_final_teardown(struct domain *d);
221 
222 /* Enable an arbitrary paging-assistance mode.  Call once at domain
223  * creation. */
224 int paging_enable(struct domain *d, u32 mode);
225 
226 #define paging_get_hostmode(v)		((v)->arch.paging.mode)
227 #define paging_get_nestedmode(v)	((v)->arch.paging.nestedmode)
228 void paging_update_nestedmode(struct vcpu *v);
229 
230 /* Page fault handler
231  * Called from pagefault handler in Xen, and from the HVM trap handlers
232  * for pagefaults.  Returns 1 if this fault was an artefact of the
233  * paging code (and the guest should retry) or 0 if it is not (and the
234  * fault should be handled elsewhere or passed to the guest).
235  *
236  * Note: under shadow paging, this function handles all page faults;
237  * however, for hardware-assisted paging, this function handles only
238  * host page faults (i.e. nested page faults). */
239 static inline int
paging_fault(unsigned long va,struct cpu_user_regs * regs)240 paging_fault(unsigned long va, struct cpu_user_regs *regs)
241 {
242     struct vcpu *v = current;
243     return paging_get_hostmode(v)->page_fault(v, va, regs);
244 }
245 
246 /* Handle invlpg requests on vcpus. */
247 void paging_invlpg(struct vcpu *v, unsigned long linear);
248 
249 /*
250  * Translate a guest virtual address to the frame number that the
251  * *guest* pagetables would map it to.  Returns INVALID_GFN if the guest
252  * tables don't map this address for this kind of access.
253  * *pfec is used to determine which kind of access this is when
254  * walking the tables.  The caller should set the PFEC_page_present bit
255  * in *pfec; in the failure case, that bit will be cleared if appropriate.
256  *
257  * SDM Intel 64 Volume 3, Chapter Paging, PAGE-FAULT EXCEPTIONS:
258  * The PFEC_insn_fetch flag is set only when NX or SMEP are enabled.
259  */
260 unsigned long paging_gva_to_gfn(struct vcpu *v,
261                                 unsigned long va,
262                                 uint32_t *pfec);
263 
264 #ifdef CONFIG_HVM
265 
266 /* Translate a guest address using a particular CR3 value.  This is used
267  * to by nested HAP code, to walk the guest-supplied NPT tables as if
268  * they were pagetables.
269  * Use 'paddr_t' for the guest address so it won't overflow when
270  * l1 or l2 guest is in 32bit PAE mode.
271  * If the GFN returned is not INVALID_GFN, *page_order gives
272  * the size of the superpage (if any) it was found in. */
paging_ga_to_gfn_cr3(struct vcpu * v,unsigned long cr3,paddr_t ga,uint32_t * pfec,unsigned int * page_order)273 static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v,
274                                                  unsigned long cr3,
275                                                  paddr_t ga,
276                                                  uint32_t *pfec,
277                                                  unsigned int *page_order)
278 {
279     struct p2m_domain *p2m = v->domain->arch.p2m;
280     return paging_get_hostmode(v)->p2m_ga_to_gfn(v, p2m, cr3, ga, pfec,
281         page_order);
282 }
283 
284 /* Flush selected vCPUs TLBs.  NULL for all. */
paging_flush_tlb(const unsigned long * vcpu_bitmap)285 static inline bool paging_flush_tlb(const unsigned long *vcpu_bitmap)
286 {
287     return current->domain->arch.paging.flush_tlb(vcpu_bitmap);
288 }
289 
290 #endif /* CONFIG_HVM */
291 
292 /* Update all the things that are derived from the guest's CR3.
293  * Called when the guest changes CR3; the caller can then use v->arch.cr3
294  * as the value to load into the host CR3 to schedule this vcpu */
paging_update_cr3(struct vcpu * v,bool noflush)295 static inline pagetable_t paging_update_cr3(struct vcpu *v, bool noflush)
296 {
297     return paging_get_hostmode(v)->update_cr3(v, noflush);
298 }
299 
300 /* Update all the things that are derived from the guest's CR0/CR3/CR4.
301  * Called to initialize paging structures if the paging mode
302  * has changed, and when bringing up a VCPU for the first time. */
paging_update_paging_modes(struct vcpu * v)303 static inline void paging_update_paging_modes(struct vcpu *v)
304 {
305     v->domain->arch.paging.update_paging_modes(v);
306 }
307 
308 /* Helper function that writes a pte in such a way that a concurrent read
309  * never sees a half-written entry that has _PAGE_PRESENT set */
safe_write_pte(l1_pgentry_t * p,l1_pgentry_t new)310 static inline void safe_write_pte(l1_pgentry_t *p, l1_pgentry_t new)
311 {
312     *p = new;
313 }
314 
315 /*
316  * Called from the guest to indicate that the a process is being
317  * torn down and its pagetables will soon be discarded.
318  */
319 void pagetable_dying(paddr_t gpa);
320 
321 /* Print paging-assistance info to the console */
322 void paging_dump_domain_info(struct domain *d);
323 void paging_dump_vcpu_info(struct vcpu *v);
324 
325 /* Set the pool of shadow pages to the required number of pages.
326  * Input might be rounded up to at minimum amount of pages, plus
327  * space for the p2m table.
328  * Returns 0 for success, non-zero for failure. */
329 int paging_set_allocation(struct domain *d, unsigned int pages,
330                           bool *preempted);
331 
332 /* Is gfn within maxphysaddr for the domain? */
gfn_valid(const struct domain * d,gfn_t gfn)333 static inline bool gfn_valid(const struct domain *d, gfn_t gfn)
334 {
335     return !(gfn_x(gfn) >> (d->arch.cpuid->extd.maxphysaddr - PAGE_SHIFT));
336 }
337 
338 #endif /* XEN_PAGING_H */
339 
340 /*
341  * Local variables:
342  * mode: C
343  * c-file-style: "BSD"
344  * c-basic-offset: 4
345  * indent-tabs-mode: nil
346  * End:
347  */
348