1 /******************************************************************************
2  * include/asm-x86/paging.h
3  *
4  * Common interface for paging support
5  * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
6  * Parts of this code are Copyright (c) 2006 by XenSource Inc.
7  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
8  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; If not, see <http://www.gnu.org/licenses/>.
22  */
23 
24 #ifndef _XEN_PAGING_H
25 #define _XEN_PAGING_H
26 
27 #include <xen/mm.h>
28 #include <public/domctl.h>
29 #include <xen/sched.h>
30 #include <xen/perfc.h>
31 #include <xen/domain_page.h>
32 #include <asm/flushtlb.h>
33 #include <asm/domain.h>
34 
35 /*****************************************************************************
36  * Macros to tell which paging mode a domain is in */
37 
38 #define PG_SH_shift    20
39 #define PG_HAP_shift   21
40 /* We're in one of the shadow modes */
41 #ifdef CONFIG_SHADOW_PAGING
42 #define PG_SH_enable   (1U << PG_SH_shift)
43 #else
44 #define PG_SH_enable   0
45 #endif
46 #define PG_HAP_enable  (1U << PG_HAP_shift)
47 
48 /* common paging mode bits */
49 #define PG_mode_shift  10
50 /* Refcounts based on shadow tables instead of guest tables */
51 #define PG_refcounts   (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << PG_mode_shift)
52 /* Enable log dirty mode */
53 #define PG_log_dirty   (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << PG_mode_shift)
54 /* Xen does p2m translation, not guest */
55 #define PG_translate   (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << PG_mode_shift)
56 /* Xen does not steal address space from the domain for its own booking;
57  * requires VT or similar mechanisms */
58 #define PG_external    (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << PG_mode_shift)
59 
60 /* All paging modes. */
61 #define PG_MASK (PG_refcounts | PG_log_dirty | PG_translate | PG_external)
62 
63 #define paging_mode_enabled(_d)   (!!(_d)->arch.paging.mode)
64 #define paging_mode_shadow(_d)    (!!((_d)->arch.paging.mode & PG_SH_enable))
65 #define paging_mode_hap(_d)       (!!((_d)->arch.paging.mode & PG_HAP_enable))
66 
67 #define paging_mode_refcounts(_d) (!!((_d)->arch.paging.mode & PG_refcounts))
68 #define paging_mode_log_dirty(_d) (!!((_d)->arch.paging.mode & PG_log_dirty))
69 #define paging_mode_translate(_d) (!!((_d)->arch.paging.mode & PG_translate))
70 #define paging_mode_external(_d)  (!!((_d)->arch.paging.mode & PG_external))
71 
72 /* flags used for paging debug */
73 #define PAGING_DEBUG_LOGDIRTY 0
74 
75 /*****************************************************************************
76  * Mode-specific entry points into the shadow code.
77  *
78  * These shouldn't be used directly by callers; rather use the functions
79  * below which will indirect through this table as appropriate. */
80 
81 struct sh_emulate_ctxt;
82 struct shadow_paging_mode {
83 #ifdef CONFIG_SHADOW_PAGING
84     void          (*detach_old_tables     )(struct vcpu *v);
85     int           (*x86_emulate_write     )(struct vcpu *v, unsigned long va,
86                                             void *src, u32 bytes,
87                                             struct sh_emulate_ctxt *sh_ctxt);
88     int           (*x86_emulate_cmpxchg   )(struct vcpu *v, unsigned long va,
89                                             unsigned long old,
90                                             unsigned long new,
91                                             unsigned int bytes,
92                                             struct sh_emulate_ctxt *sh_ctxt);
93     bool          (*write_guest_entry     )(struct vcpu *v, intpte_t *p,
94                                             intpte_t new, mfn_t gmfn);
95     bool          (*cmpxchg_guest_entry   )(struct vcpu *v, intpte_t *p,
96                                             intpte_t *old, intpte_t new,
97                                             mfn_t gmfn);
98     mfn_t         (*make_monitor_table    )(struct vcpu *v);
99     void          (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
100     int           (*guess_wrmap           )(struct vcpu *v,
101                                             unsigned long vaddr, mfn_t gmfn);
102     void          (*pagetable_dying       )(struct vcpu *v, paddr_t gpa);
103 #endif
104     /* For outsiders to tell what mode we're in */
105     unsigned int shadow_levels;
106 };
107 
108 
109 /************************************************/
110 /*        common paging interface               */
111 /************************************************/
112 struct paging_mode {
113     int           (*page_fault            )(struct vcpu *v, unsigned long va,
114                                             struct cpu_user_regs *regs);
115     bool          (*invlpg                )(struct vcpu *v, unsigned long va);
116     unsigned long (*gva_to_gfn            )(struct vcpu *v,
117                                             struct p2m_domain *p2m,
118                                             unsigned long va,
119                                             uint32_t *pfec);
120     unsigned long (*p2m_ga_to_gfn         )(struct vcpu *v,
121                                             struct p2m_domain *p2m,
122                                             unsigned long cr3,
123                                             paddr_t ga, uint32_t *pfec,
124                                             unsigned int *page_order);
125     void          (*update_cr3            )(struct vcpu *v, int do_locking);
126     void          (*update_paging_modes   )(struct vcpu *v);
127     void          (*write_p2m_entry       )(struct domain *d, unsigned long gfn,
128                                             l1_pgentry_t *p, l1_pgentry_t new,
129                                             unsigned int level);
130 
131     unsigned int guest_levels;
132 
133     /* paging support extension */
134     struct shadow_paging_mode shadow;
135 };
136 
137 /*****************************************************************************
138  * Log dirty code */
139 
140 /* get the dirty bitmap for a specific range of pfns */
141 void paging_log_dirty_range(struct domain *d,
142                             unsigned long begin_pfn,
143                             unsigned long nr,
144                             uint8_t *dirty_bitmap);
145 
146 /* enable log dirty */
147 int paging_log_dirty_enable(struct domain *d, bool_t log_global);
148 
149 /* log dirty initialization */
150 void paging_log_dirty_init(struct domain *d, const struct log_dirty_ops *ops);
151 
152 /* mark a page as dirty */
153 void paging_mark_dirty(struct domain *d, mfn_t gmfn);
154 /* mark a page as dirty with taking guest pfn as parameter */
155 void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn);
156 
157 /* is this guest page dirty?
158  * This is called from inside paging code, with the paging lock held. */
159 int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
160 
161 /*
162  * Log-dirty radix tree indexing:
163  *   All tree nodes are PAGE_SIZE bytes, mapped on-demand.
164  *   Leaf nodes are simple bitmaps; 1 bit per guest pfn.
165  *   Interior nodes are arrays of LOGDIRTY_NODE_ENTRIES mfns.
166  * TODO: Dynamic radix tree height. Most guests will only need 2 levels.
167  *       The fourth level is basically unusable on 32-bit Xen.
168  * TODO2: Abstract out the radix-tree mechanics?
169  */
170 #define LOGDIRTY_NODE_ENTRIES (1 << PAGETABLE_ORDER)
171 #define L1_LOGDIRTY_IDX(pfn) (pfn_x(pfn) & ((1 << (PAGE_SHIFT + 3)) - 1))
172 #define L2_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3)) & \
173                               (LOGDIRTY_NODE_ENTRIES-1))
174 #define L3_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER)) & \
175                               (LOGDIRTY_NODE_ENTRIES-1))
176 #define L4_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER * 2)) & \
177                               (LOGDIRTY_NODE_ENTRIES-1))
178 
179 /* VRAM dirty tracking support */
180 struct sh_dirty_vram {
181     unsigned long begin_pfn;
182     unsigned long end_pfn;
183     paddr_t *sl1ma;
184     uint8_t *dirty_bitmap;
185     s_time_t last_dirty;
186 };
187 
188 /*****************************************************************************
189  * Entry points into the paging-assistance code */
190 
191 /* Initialize the paging resource for vcpu struct. It is called by
192  * vcpu_initialise() in domain.c */
193 void paging_vcpu_init(struct vcpu *v);
194 
195 /* Set up the paging-assistance-specific parts of a domain struct at
196  * start of day.  Called for every domain from arch_domain_create() */
197 int paging_domain_init(struct domain *d, unsigned int domcr_flags);
198 
199 /* Handler for paging-control ops: operations from user-space to enable
200  * and disable ephemeral shadow modes (test mode and log-dirty mode) and
201  * manipulate the log-dirty bitmap. */
202 int paging_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
203                   XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl,
204                   bool_t resuming);
205 
206 /* Helper hypercall for dealing with continuations. */
207 long paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
208 
209 /* Call when destroying a domain */
210 int paging_teardown(struct domain *d);
211 
212 /* Call once all of the references to the domain have gone away */
213 void paging_final_teardown(struct domain *d);
214 
215 /* Enable an arbitrary paging-assistance mode.  Call once at domain
216  * creation. */
217 int paging_enable(struct domain *d, u32 mode);
218 
219 #define paging_get_hostmode(v)		((v)->arch.paging.mode)
220 #define paging_get_nestedmode(v)	((v)->arch.paging.nestedmode)
221 const struct paging_mode *paging_get_mode(struct vcpu *v);
222 void paging_update_nestedmode(struct vcpu *v);
223 
224 /* Page fault handler
225  * Called from pagefault handler in Xen, and from the HVM trap handlers
226  * for pagefaults.  Returns 1 if this fault was an artefact of the
227  * paging code (and the guest should retry) or 0 if it is not (and the
228  * fault should be handled elsewhere or passed to the guest).
229  *
230  * Note: under shadow paging, this function handles all page faults;
231  * however, for hardware-assisted paging, this function handles only
232  * host page faults (i.e. nested page faults). */
233 static inline int
paging_fault(unsigned long va,struct cpu_user_regs * regs)234 paging_fault(unsigned long va, struct cpu_user_regs *regs)
235 {
236     struct vcpu *v = current;
237     return paging_get_hostmode(v)->page_fault(v, va, regs);
238 }
239 
240 /* Handle invlpg requests on vcpus. */
241 void paging_invlpg(struct vcpu *v, unsigned long va);
242 
243 /*
244  * Translate a guest virtual address to the frame number that the
245  * *guest* pagetables would map it to.  Returns INVALID_GFN if the guest
246  * tables don't map this address for this kind of access.
247  * *pfec is used to determine which kind of access this is when
248  * walking the tables.  The caller should set the PFEC_page_present bit
249  * in *pfec; in the failure case, that bit will be cleared if appropriate.
250  *
251  * SDM Intel 64 Volume 3, Chapter Paging, PAGE-FAULT EXCEPTIONS:
252  * The PFEC_insn_fetch flag is set only when NX or SMEP are enabled.
253  */
254 unsigned long paging_gva_to_gfn(struct vcpu *v,
255                                 unsigned long va,
256                                 uint32_t *pfec);
257 
258 /* Translate a guest address using a particular CR3 value.  This is used
259  * to by nested HAP code, to walk the guest-supplied NPT tables as if
260  * they were pagetables.
261  * Use 'paddr_t' for the guest address so it won't overflow when
262  * l1 or l2 guest is in 32bit PAE mode.
263  * If the GFN returned is not INVALID_GFN, *page_order gives
264  * the size of the superpage (if any) it was found in. */
paging_ga_to_gfn_cr3(struct vcpu * v,unsigned long cr3,paddr_t ga,uint32_t * pfec,unsigned int * page_order)265 static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v,
266                                                  unsigned long cr3,
267                                                  paddr_t ga,
268                                                  uint32_t *pfec,
269                                                  unsigned int *page_order)
270 {
271     struct p2m_domain *p2m = v->domain->arch.p2m;
272     return paging_get_hostmode(v)->p2m_ga_to_gfn(v, p2m, cr3, ga, pfec,
273         page_order);
274 }
275 
276 /* Update all the things that are derived from the guest's CR3.
277  * Called when the guest changes CR3; the caller can then use v->arch.cr3
278  * as the value to load into the host CR3 to schedule this vcpu */
paging_update_cr3(struct vcpu * v)279 static inline void paging_update_cr3(struct vcpu *v)
280 {
281     paging_get_hostmode(v)->update_cr3(v, 1);
282 }
283 
284 /* Update all the things that are derived from the guest's CR0/CR3/CR4.
285  * Called to initialize paging structures if the paging mode
286  * has changed, and when bringing up a VCPU for the first time. */
paging_update_paging_modes(struct vcpu * v)287 static inline void paging_update_paging_modes(struct vcpu *v)
288 {
289     paging_get_hostmode(v)->update_paging_modes(v);
290 }
291 
292 
293 /*
294  * Write a new value into the guest pagetable, and update the
295  * paging-assistance state appropriately.  Returns false if we page-faulted,
296  * true for success.
297  */
paging_write_guest_entry(struct vcpu * v,intpte_t * p,intpte_t new,mfn_t gmfn)298 static inline bool paging_write_guest_entry(
299     struct vcpu *v, intpte_t *p, intpte_t new, mfn_t gmfn)
300 {
301 #ifdef CONFIG_SHADOW_PAGING
302     if ( unlikely(paging_mode_shadow(v->domain)) && paging_get_hostmode(v) )
303         return paging_get_hostmode(v)->shadow.write_guest_entry(v, p, new,
304                                                                 gmfn);
305 #endif
306     return !__copy_to_user(p, &new, sizeof(new));
307 }
308 
309 
310 /*
311  * Cmpxchg a new value into the guest pagetable, and update the
312  * paging-assistance state appropriately.  Returns false if we page-faulted,
313  * true if not.  N.B. caller should check the value of "old" to see if the
314  * cmpxchg itself was successful.
315  */
paging_cmpxchg_guest_entry(struct vcpu * v,intpte_t * p,intpte_t * old,intpte_t new,mfn_t gmfn)316 static inline bool paging_cmpxchg_guest_entry(
317     struct vcpu *v, intpte_t *p, intpte_t *old, intpte_t new, mfn_t gmfn)
318 {
319 #ifdef CONFIG_SHADOW_PAGING
320     if ( unlikely(paging_mode_shadow(v->domain)) && paging_get_hostmode(v) )
321         return paging_get_hostmode(v)->shadow.cmpxchg_guest_entry(v, p, old,
322                                                                   new, gmfn);
323 #endif
324     return !cmpxchg_user(p, *old, new);
325 }
326 
327 /* Helper function that writes a pte in such a way that a concurrent read
328  * never sees a half-written entry that has _PAGE_PRESENT set */
safe_write_pte(l1_pgentry_t * p,l1_pgentry_t new)329 static inline void safe_write_pte(l1_pgentry_t *p, l1_pgentry_t new)
330 {
331     *p = new;
332 }
333 
334 /* Atomically write a P2M entry and update the paging-assistance state
335  * appropriately.
336  * Arguments: the domain in question, the GFN whose mapping is being updated,
337  * a pointer to the entry to be written, the MFN in which the entry resides,
338  * the new contents of the entry, and the level in the p2m tree at which
339  * we are writing. */
340 struct p2m_domain;
341 
342 void paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
343                             l1_pgentry_t *p, l1_pgentry_t new,
344                             unsigned int level);
345 
346 /* Called from the guest to indicate that the a process is being
347  * torn down and its pagetables will soon be discarded */
348 void pagetable_dying(struct domain *d, paddr_t gpa);
349 
350 /* Print paging-assistance info to the console */
351 void paging_dump_domain_info(struct domain *d);
352 void paging_dump_vcpu_info(struct vcpu *v);
353 
354 /* Set the pool of shadow pages to the required number of pages.
355  * Input might be rounded up to at minimum amount of pages, plus
356  * space for the p2m table.
357  * Returns 0 for success, non-zero for failure. */
358 int paging_set_allocation(struct domain *d, unsigned int pages,
359                           bool *preempted);
360 
361 /* Is gfn within maxphysaddr for the domain? */
gfn_valid(const struct domain * d,gfn_t gfn)362 static inline bool gfn_valid(const struct domain *d, gfn_t gfn)
363 {
364     return !(gfn_x(gfn) >> (d->arch.cpuid->extd.maxphysaddr - PAGE_SHIFT));
365 }
366 
367 /* Maxphysaddr supportable by the paging infrastructure. */
paging_max_paddr_bits(const struct domain * d)368 static inline unsigned int paging_max_paddr_bits(const struct domain *d)
369 {
370     unsigned int bits = paging_mode_hap(d) ? hap_paddr_bits : paddr_bits;
371 
372     if ( !IS_ENABLED(BIGMEM) && paging_mode_shadow(d) && !is_pv_domain(d) )
373     {
374         /* Shadowed superpages store GFNs in 32-bit page_info fields. */
375         bits = min(bits, 32U + PAGE_SHIFT);
376     }
377 
378     return bits;
379 }
380 
381 #endif /* XEN_PAGING_H */
382 
383 /*
384  * Local variables:
385  * mode: C
386  * c-file-style: "BSD"
387  * c-basic-offset: 4
388  * indent-tabs-mode: nil
389  * End:
390  */
391