/******************************************************************************
* include/asm-x86/paging.h
*
* Common interface for paging support
* Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
* Parts of this code are Copyright (c) 2006 by XenSource Inc.
* Parts of this code are Copyright (c) 2006 by Michael A Fetterman
* Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; If not, see .
*/
#ifndef _XEN_PAGING_H
#define _XEN_PAGING_H
#include
#include
#include
#include
#include
#include
#include
/*****************************************************************************
* Macros to tell which paging mode a domain is in */
#define PG_SH_shift 20
#define PG_HAP_shift 21
/* We're in one of the shadow modes */
#ifdef CONFIG_SHADOW_PAGING
#define PG_SH_enable (1U << PG_SH_shift)
#else
#define PG_SH_enable 0
#endif
#define PG_HAP_enable (1U << PG_HAP_shift)
/* common paging mode bits */
#define PG_mode_shift 10
/* Refcounts based on shadow tables instead of guest tables */
#define PG_refcounts (XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT << PG_mode_shift)
/* Enable log dirty mode */
#define PG_log_dirty (XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY << PG_mode_shift)
/* Xen does p2m translation, not guest */
#define PG_translate (XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE << PG_mode_shift)
/* Xen does not steal address space from the domain for its own booking;
* requires VT or similar mechanisms */
#define PG_external (XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL << PG_mode_shift)
/* All paging modes. */
#define PG_MASK (PG_refcounts | PG_log_dirty | PG_translate | PG_external)
#define paging_mode_enabled(_d) (!!(_d)->arch.paging.mode)
#define paging_mode_shadow(_d) (!!((_d)->arch.paging.mode & PG_SH_enable))
#define paging_mode_hap(_d) (!!((_d)->arch.paging.mode & PG_HAP_enable))
#define paging_mode_refcounts(_d) (!!((_d)->arch.paging.mode & PG_refcounts))
#define paging_mode_log_dirty(_d) (!!((_d)->arch.paging.mode & PG_log_dirty))
#define paging_mode_translate(_d) (!!((_d)->arch.paging.mode & PG_translate))
#define paging_mode_external(_d) (!!((_d)->arch.paging.mode & PG_external))
/* flags used for paging debug */
#define PAGING_DEBUG_LOGDIRTY 0
/*****************************************************************************
* Mode-specific entry points into the shadow code.
*
* These shouldn't be used directly by callers; rather use the functions
* below which will indirect through this table as appropriate. */
struct sh_emulate_ctxt;
struct shadow_paging_mode {
#ifdef CONFIG_SHADOW_PAGING
void (*detach_old_tables )(struct vcpu *v);
int (*x86_emulate_write )(struct vcpu *v, unsigned long va,
void *src, u32 bytes,
struct sh_emulate_ctxt *sh_ctxt);
int (*x86_emulate_cmpxchg )(struct vcpu *v, unsigned long va,
unsigned long old,
unsigned long new,
unsigned int bytes,
struct sh_emulate_ctxt *sh_ctxt);
bool (*write_guest_entry )(struct vcpu *v, intpte_t *p,
intpte_t new, mfn_t gmfn);
bool (*cmpxchg_guest_entry )(struct vcpu *v, intpte_t *p,
intpte_t *old, intpte_t new,
mfn_t gmfn);
mfn_t (*make_monitor_table )(struct vcpu *v);
void (*destroy_monitor_table )(struct vcpu *v, mfn_t mmfn);
int (*guess_wrmap )(struct vcpu *v,
unsigned long vaddr, mfn_t gmfn);
void (*pagetable_dying )(struct vcpu *v, paddr_t gpa);
#endif
/* For outsiders to tell what mode we're in */
unsigned int shadow_levels;
};
/************************************************/
/* common paging interface */
/************************************************/
struct paging_mode {
int (*page_fault )(struct vcpu *v, unsigned long va,
struct cpu_user_regs *regs);
bool (*invlpg )(struct vcpu *v, unsigned long va);
unsigned long (*gva_to_gfn )(struct vcpu *v,
struct p2m_domain *p2m,
unsigned long va,
uint32_t *pfec);
unsigned long (*p2m_ga_to_gfn )(struct vcpu *v,
struct p2m_domain *p2m,
unsigned long cr3,
paddr_t ga, uint32_t *pfec,
unsigned int *page_order);
void (*update_cr3 )(struct vcpu *v, int do_locking);
void (*update_paging_modes )(struct vcpu *v);
void (*write_p2m_entry )(struct domain *d, unsigned long gfn,
l1_pgentry_t *p, l1_pgentry_t new,
unsigned int level);
unsigned int guest_levels;
/* paging support extension */
struct shadow_paging_mode shadow;
};
/*****************************************************************************
* Log dirty code */
/* get the dirty bitmap for a specific range of pfns */
void paging_log_dirty_range(struct domain *d,
unsigned long begin_pfn,
unsigned long nr,
uint8_t *dirty_bitmap);
/* enable log dirty */
int paging_log_dirty_enable(struct domain *d, bool_t log_global);
/* log dirty initialization */
void paging_log_dirty_init(struct domain *d, const struct log_dirty_ops *ops);
/* mark a page as dirty */
void paging_mark_dirty(struct domain *d, mfn_t gmfn);
/* mark a page as dirty with taking guest pfn as parameter */
void paging_mark_pfn_dirty(struct domain *d, pfn_t pfn);
/* is this guest page dirty?
* This is called from inside paging code, with the paging lock held. */
int paging_mfn_is_dirty(struct domain *d, mfn_t gmfn);
/*
* Log-dirty radix tree indexing:
* All tree nodes are PAGE_SIZE bytes, mapped on-demand.
* Leaf nodes are simple bitmaps; 1 bit per guest pfn.
* Interior nodes are arrays of LOGDIRTY_NODE_ENTRIES mfns.
* TODO: Dynamic radix tree height. Most guests will only need 2 levels.
* The fourth level is basically unusable on 32-bit Xen.
* TODO2: Abstract out the radix-tree mechanics?
*/
#define LOGDIRTY_NODE_ENTRIES (1 << PAGETABLE_ORDER)
#define L1_LOGDIRTY_IDX(pfn) (pfn_x(pfn) & ((1 << (PAGE_SHIFT + 3)) - 1))
#define L2_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3)) & \
(LOGDIRTY_NODE_ENTRIES-1))
#define L3_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER)) & \
(LOGDIRTY_NODE_ENTRIES-1))
#define L4_LOGDIRTY_IDX(pfn) ((pfn_x(pfn) >> (PAGE_SHIFT + 3 + PAGETABLE_ORDER * 2)) & \
(LOGDIRTY_NODE_ENTRIES-1))
/* VRAM dirty tracking support */
struct sh_dirty_vram {
unsigned long begin_pfn;
unsigned long end_pfn;
paddr_t *sl1ma;
uint8_t *dirty_bitmap;
s_time_t last_dirty;
};
/*****************************************************************************
* Entry points into the paging-assistance code */
/* Initialize the paging resource for vcpu struct. It is called by
* vcpu_initialise() in domain.c */
void paging_vcpu_init(struct vcpu *v);
/* Set up the paging-assistance-specific parts of a domain struct at
* start of day. Called for every domain from arch_domain_create() */
int paging_domain_init(struct domain *d, unsigned int domcr_flags);
/* Handler for paging-control ops: operations from user-space to enable
* and disable ephemeral shadow modes (test mode and log-dirty mode) and
* manipulate the log-dirty bitmap. */
int paging_domctl(struct domain *d, struct xen_domctl_shadow_op *sc,
XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl,
bool_t resuming);
/* Helper hypercall for dealing with continuations. */
long paging_domctl_continuation(XEN_GUEST_HANDLE_PARAM(xen_domctl_t));
/* Call when destroying a domain */
int paging_teardown(struct domain *d);
/* Call once all of the references to the domain have gone away */
void paging_final_teardown(struct domain *d);
/* Enable an arbitrary paging-assistance mode. Call once at domain
* creation. */
int paging_enable(struct domain *d, u32 mode);
#define paging_get_hostmode(v) ((v)->arch.paging.mode)
#define paging_get_nestedmode(v) ((v)->arch.paging.nestedmode)
const struct paging_mode *paging_get_mode(struct vcpu *v);
void paging_update_nestedmode(struct vcpu *v);
/* Page fault handler
* Called from pagefault handler in Xen, and from the HVM trap handlers
* for pagefaults. Returns 1 if this fault was an artefact of the
* paging code (and the guest should retry) or 0 if it is not (and the
* fault should be handled elsewhere or passed to the guest).
*
* Note: under shadow paging, this function handles all page faults;
* however, for hardware-assisted paging, this function handles only
* host page faults (i.e. nested page faults). */
static inline int
paging_fault(unsigned long va, struct cpu_user_regs *regs)
{
struct vcpu *v = current;
return paging_get_hostmode(v)->page_fault(v, va, regs);
}
/* Handle invlpg requests on vcpus. */
void paging_invlpg(struct vcpu *v, unsigned long va);
/*
* Translate a guest virtual address to the frame number that the
* *guest* pagetables would map it to. Returns INVALID_GFN if the guest
* tables don't map this address for this kind of access.
* *pfec is used to determine which kind of access this is when
* walking the tables. The caller should set the PFEC_page_present bit
* in *pfec; in the failure case, that bit will be cleared if appropriate.
*
* SDM Intel 64 Volume 3, Chapter Paging, PAGE-FAULT EXCEPTIONS:
* The PFEC_insn_fetch flag is set only when NX or SMEP are enabled.
*/
unsigned long paging_gva_to_gfn(struct vcpu *v,
unsigned long va,
uint32_t *pfec);
/* Translate a guest address using a particular CR3 value. This is used
* to by nested HAP code, to walk the guest-supplied NPT tables as if
* they were pagetables.
* Use 'paddr_t' for the guest address so it won't overflow when
* l1 or l2 guest is in 32bit PAE mode.
* If the GFN returned is not INVALID_GFN, *page_order gives
* the size of the superpage (if any) it was found in. */
static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v,
unsigned long cr3,
paddr_t ga,
uint32_t *pfec,
unsigned int *page_order)
{
struct p2m_domain *p2m = v->domain->arch.p2m;
return paging_get_hostmode(v)->p2m_ga_to_gfn(v, p2m, cr3, ga, pfec,
page_order);
}
/* Update all the things that are derived from the guest's CR3.
* Called when the guest changes CR3; the caller can then use v->arch.cr3
* as the value to load into the host CR3 to schedule this vcpu */
static inline void paging_update_cr3(struct vcpu *v)
{
paging_get_hostmode(v)->update_cr3(v, 1);
}
/* Update all the things that are derived from the guest's CR0/CR3/CR4.
* Called to initialize paging structures if the paging mode
* has changed, and when bringing up a VCPU for the first time. */
static inline void paging_update_paging_modes(struct vcpu *v)
{
paging_get_hostmode(v)->update_paging_modes(v);
}
/*
* Write a new value into the guest pagetable, and update the
* paging-assistance state appropriately. Returns false if we page-faulted,
* true for success.
*/
static inline bool paging_write_guest_entry(
struct vcpu *v, intpte_t *p, intpte_t new, mfn_t gmfn)
{
#ifdef CONFIG_SHADOW_PAGING
if ( unlikely(paging_mode_shadow(v->domain)) && paging_get_hostmode(v) )
return paging_get_hostmode(v)->shadow.write_guest_entry(v, p, new,
gmfn);
#endif
return !__copy_to_user(p, &new, sizeof(new));
}
/*
* Cmpxchg a new value into the guest pagetable, and update the
* paging-assistance state appropriately. Returns false if we page-faulted,
* true if not. N.B. caller should check the value of "old" to see if the
* cmpxchg itself was successful.
*/
static inline bool paging_cmpxchg_guest_entry(
struct vcpu *v, intpte_t *p, intpte_t *old, intpte_t new, mfn_t gmfn)
{
#ifdef CONFIG_SHADOW_PAGING
if ( unlikely(paging_mode_shadow(v->domain)) && paging_get_hostmode(v) )
return paging_get_hostmode(v)->shadow.cmpxchg_guest_entry(v, p, old,
new, gmfn);
#endif
return !cmpxchg_user(p, *old, new);
}
/* Helper function that writes a pte in such a way that a concurrent read
* never sees a half-written entry that has _PAGE_PRESENT set */
static inline void safe_write_pte(l1_pgentry_t *p, l1_pgentry_t new)
{
*p = new;
}
/* Atomically write a P2M entry and update the paging-assistance state
* appropriately.
* Arguments: the domain in question, the GFN whose mapping is being updated,
* a pointer to the entry to be written, the MFN in which the entry resides,
* the new contents of the entry, and the level in the p2m tree at which
* we are writing. */
struct p2m_domain;
void paging_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
l1_pgentry_t *p, l1_pgentry_t new,
unsigned int level);
/* Called from the guest to indicate that the a process is being
* torn down and its pagetables will soon be discarded */
void pagetable_dying(struct domain *d, paddr_t gpa);
/* Print paging-assistance info to the console */
void paging_dump_domain_info(struct domain *d);
void paging_dump_vcpu_info(struct vcpu *v);
/* Set the pool of shadow pages to the required number of pages.
* Input might be rounded up to at minimum amount of pages, plus
* space for the p2m table.
* Returns 0 for success, non-zero for failure. */
int paging_set_allocation(struct domain *d, unsigned int pages,
bool *preempted);
/* Is gfn within maxphysaddr for the domain? */
static inline bool gfn_valid(const struct domain *d, gfn_t gfn)
{
return !(gfn_x(gfn) >> (d->arch.cpuid->extd.maxphysaddr - PAGE_SHIFT));
}
/* Maxphysaddr supportable by the paging infrastructure. */
static inline unsigned int paging_max_paddr_bits(const struct domain *d)
{
unsigned int bits = paging_mode_hap(d) ? hap_paddr_bits : paddr_bits;
if ( !IS_ENABLED(BIGMEM) && paging_mode_shadow(d) && !is_pv_domain(d) )
{
/* Shadowed superpages store GFNs in 32-bit page_info fields. */
bits = min(bits, 32U + PAGE_SHIFT);
}
return bits;
}
#endif /* XEN_PAGING_H */
/*
* Local variables:
* mode: C
* c-file-style: "BSD"
* c-basic-offset: 4
* indent-tabs-mode: nil
* End:
*/