/****************************************************************************** * arch/x86/mm/p2m-pt.c * * Implementation of p2m datastructures as pagetables, for use by * NPT and shadow-pagetable code * * Parts of this code are Copyright (c) 2009-2011 by Citrix Systems, Inc. * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices. * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc. * Parts of this code are Copyright (c) 2006 by Michael A Fetterman * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; If not, see . */ #include #include #include #include #include #include #include #include #include #include #include #include #include "mm-locks.h" /* Override macros from asm/page.h to make them work with mfn_t */ #undef mfn_to_page #define mfn_to_page(_m) __mfn_to_page(mfn_x(_m)) #undef page_to_mfn #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg)) /* * We may store INVALID_MFN in PTEs. We need to clip this to avoid trampling * over higher-order bits (NX, p2m type, IOMMU flags). We seem to not need * to unclip on the read path, as callers are concerned only with p2m type in * such cases. */ #define p2m_l1e_from_pfn(pfn, flags) \ l1e_from_pfn((pfn) & (PADDR_MASK >> PAGE_SHIFT), (flags)) #define p2m_l2e_from_pfn(pfn, flags) \ l2e_from_pfn((pfn) & ((PADDR_MASK & ~(_PAGE_PSE_PAT | 0UL)) \ >> PAGE_SHIFT), (flags) | _PAGE_PSE) #define p2m_l3e_from_pfn(pfn, flags) \ l3e_from_pfn((pfn) & ((PADDR_MASK & ~(_PAGE_PSE_PAT | 0UL)) \ >> PAGE_SHIFT), (flags) | _PAGE_PSE) /* PTE flags for the various types of p2m entry */ #define P2M_BASE_FLAGS \ (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED) #define RECALC_FLAGS (_PAGE_USER|_PAGE_ACCESSED) #define set_recalc(level, ent) level##e_remove_flags(ent, RECALC_FLAGS) #define clear_recalc(level, ent) level##e_add_flags(ent, RECALC_FLAGS) #define _needs_recalc(flags) (!((flags) & _PAGE_USER)) #define needs_recalc(level, ent) _needs_recalc(level##e_get_flags(ent)) #define valid_recalc(level, ent) (!(level##e_get_flags(ent) & _PAGE_ACCESSED)) static unsigned long p2m_type_to_flags(const struct p2m_domain *p2m, p2m_type_t t, mfn_t mfn, unsigned int level) { unsigned long flags; /* * AMD IOMMU: When we share p2m table with iommu, bit 9 - bit 11 will be * used for iommu hardware to encode next io page level. Bit 59 - bit 62 * are used for iommu flags, We could not use these bits to store p2m types. */ flags = (unsigned long)(t & 0x7f) << 12; switch(t) { case p2m_invalid: case p2m_mmio_dm: case p2m_populate_on_demand: case p2m_ram_paging_out: case p2m_ram_paged: case p2m_ram_paging_in: default: return flags | _PAGE_NX_BIT; case p2m_grant_map_ro: return flags | P2M_BASE_FLAGS | _PAGE_NX_BIT; case p2m_ioreq_server: flags |= P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT; if ( p2m->ioreq.flags & XEN_DMOP_IOREQ_MEM_ACCESS_WRITE ) return flags & ~_PAGE_RW; return flags; case p2m_ram_ro: case p2m_ram_logdirty: case p2m_ram_shared: return flags | P2M_BASE_FLAGS; case p2m_ram_rw: return flags | P2M_BASE_FLAGS | _PAGE_RW; case p2m_grant_map_rw: case p2m_map_foreign: return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_NX_BIT; case p2m_mmio_direct: if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) ) flags |= _PAGE_RW; else { flags |= _PAGE_PWT; ASSERT(!level); } return flags | P2M_BASE_FLAGS | _PAGE_PCD; } } // Find the next level's P2M entry, checking for out-of-range gfn's... // Returns NULL on error. // static l1_pgentry_t * p2m_find_entry(void *table, unsigned long *gfn_remainder, unsigned long gfn, uint32_t shift, uint32_t max) { u32 index; index = *gfn_remainder >> shift; if ( index >= max ) { P2M_DEBUG("gfn=%#lx out of range " "(gfn_remainder=%#lx shift=%d index=%#x max=%#x)\n", gfn, *gfn_remainder, shift, index, max); return NULL; } *gfn_remainder &= (1 << shift) - 1; return (l1_pgentry_t *)table + index; } /* Free intermediate tables from a p2m sub-tree */ static void p2m_free_entry(struct p2m_domain *p2m, l1_pgentry_t *p2m_entry, int page_order) { /* End if the entry is a leaf entry. */ if ( page_order == PAGE_ORDER_4K || !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) || (l1e_get_flags(*p2m_entry) & _PAGE_PSE) ) return; if ( page_order > PAGE_ORDER_2M ) { l1_pgentry_t *l3_table = map_domain_page(l1e_get_mfn(*p2m_entry)); for ( int i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) p2m_free_entry(p2m, l3_table + i, page_order - 9); unmap_domain_page(l3_table); } p2m_free_ptp(p2m, l1e_get_page(*p2m_entry)); } // Walk one level of the P2M table, allocating a new table if required. // Returns 0 on error. // /* AMD IOMMU: Convert next level bits and r/w bits into 24 bits p2m flags */ #define iommu_nlevel_to_flags(nl, f) ((((nl) & 0x7) << 9 )|(((f) & 0x3) << 21)) static void p2m_add_iommu_flags(l1_pgentry_t *p2m_entry, unsigned int nlevel, unsigned int flags) { if ( iommu_hap_pt_share ) l1e_add_flags(*p2m_entry, iommu_nlevel_to_flags(nlevel, flags)); } /* Returns: 0 for success, -errno for failure */ static int p2m_next_level(struct p2m_domain *p2m, void **table, unsigned long *gfn_remainder, unsigned long gfn, u32 shift, u32 max, unsigned int level, bool_t unmap) { l1_pgentry_t *p2m_entry, new_entry; void *next; unsigned int flags; if ( !(p2m_entry = p2m_find_entry(*table, gfn_remainder, gfn, shift, max)) ) return -ENOENT; flags = l1e_get_flags(*p2m_entry); /* PoD/paging: Not present doesn't imply empty. */ if ( !flags ) { mfn_t mfn = p2m_alloc_ptp(p2m, level); if ( mfn_eq(mfn, INVALID_MFN) ) return -ENOMEM; new_entry = l1e_from_mfn(mfn, P2M_BASE_FLAGS | _PAGE_RW); p2m_add_iommu_flags(&new_entry, level, IOMMUF_readable|IOMMUF_writable); p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, level + 1); } else if ( flags & _PAGE_PSE ) { /* Split superpages pages into smaller ones. */ unsigned long pfn = l1e_get_pfn(*p2m_entry); mfn_t mfn; l1_pgentry_t *l1_entry; unsigned int i; switch ( level ) { case 2: break; case 1: /* * New splintered mappings inherit the flags of the old superpage, * with a little reorganisation for the _PAGE_PSE_PAT bit. */ if ( pfn & 1 ) /* ==> _PAGE_PSE_PAT was set */ pfn -= 1; /* Clear it; _PAGE_PSE becomes _PAGE_PAT */ else flags &= ~_PAGE_PSE; /* Clear _PAGE_PSE (== _PAGE_PAT) */ break; default: ASSERT_UNREACHABLE(); return -EINVAL; } mfn = p2m_alloc_ptp(p2m, level); if ( mfn_eq(mfn, INVALID_MFN) ) return -ENOMEM; l1_entry = map_domain_page(mfn); /* Inherit original IOMMU permissions, but update Next Level. */ if ( iommu_hap_pt_share ) { flags &= ~iommu_nlevel_to_flags(~0, 0); flags |= iommu_nlevel_to_flags(level - 1, 0); } for ( i = 0; i < (1u << PAGETABLE_ORDER); i++ ) { new_entry = l1e_from_pfn(pfn | (i << ((level - 1) * PAGETABLE_ORDER)), flags); p2m->write_p2m_entry(p2m, gfn, l1_entry + i, new_entry, level); } unmap_domain_page(l1_entry); new_entry = l1e_from_mfn(mfn, P2M_BASE_FLAGS | _PAGE_RW); p2m_add_iommu_flags(&new_entry, level, IOMMUF_readable|IOMMUF_writable); p2m->write_p2m_entry(p2m, gfn, p2m_entry, new_entry, level + 1); } else ASSERT(flags & _PAGE_PRESENT); next = map_domain_page(l1e_get_mfn(*p2m_entry)); if ( unmap ) unmap_domain_page(*table); *table = next; return 0; } /* * Mark (via clearing the U flag) as needing P2M type re-calculation all valid * present entries at the targeted level for the passed in GFN range, which is * guaranteed to not cross a page (table) boundary at that level. */ static int p2m_pt_set_recalc_range(struct p2m_domain *p2m, unsigned int level, unsigned long first_gfn, unsigned long last_gfn) { void *table; unsigned long gfn_remainder = first_gfn, remainder; unsigned int i; l1_pgentry_t *pent, *plast; int err = 0; table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m))); for ( i = 4; i-- > level; ) { remainder = gfn_remainder; pent = p2m_find_entry(table, &remainder, first_gfn, i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER); if ( !pent ) { err = -EINVAL; goto out; } if ( !(l1e_get_flags(*pent) & _PAGE_PRESENT) ) goto out; err = p2m_next_level(p2m, &table, &gfn_remainder, first_gfn, i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER, i, 1); if ( err ) goto out; } remainder = gfn_remainder + (last_gfn - first_gfn); pent = p2m_find_entry(table, &gfn_remainder, first_gfn, i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER); plast = p2m_find_entry(table, &remainder, last_gfn, i * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER); if ( pent && plast ) for ( ; pent <= plast; ++pent ) { l1_pgentry_t e = *pent; if ( (l1e_get_flags(e) & _PAGE_PRESENT) && !needs_recalc(l1, e) ) { set_recalc(l1, e); p2m->write_p2m_entry(p2m, first_gfn, pent, e, level); } first_gfn += 1UL << (i * PAGETABLE_ORDER); } else err = -EIO; out: unmap_domain_page(table); return err; } /* * Handle possibly necessary P2M type re-calculation (U flag clear for a * present entry) for the entries in the page table hierarchy for the given * GFN. Propagate the re-calculation flag down to the next page table level * for entries not involved in the translation of the given GFN. */ static int do_recalc(struct p2m_domain *p2m, unsigned long gfn) { void *table; unsigned long gfn_remainder = gfn; unsigned int level = 4; l1_pgentry_t *pent; int err = 0; table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m))); while ( --level ) { unsigned long remainder = gfn_remainder; pent = p2m_find_entry(table, &remainder, gfn, level * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER); if ( !pent || !(l1e_get_flags(*pent) & _PAGE_PRESENT) ) goto out; if ( l1e_get_flags(*pent) & _PAGE_PSE ) { unsigned long mask = ~0UL << (level * PAGETABLE_ORDER); ASSERT(p2m_flags_to_type(l1e_get_flags(*pent)) != p2m_ioreq_server); if ( !needs_recalc(l1, *pent) || !p2m_is_changeable(p2m_flags_to_type(l1e_get_flags(*pent))) || p2m_is_logdirty_range(p2m, gfn & mask, gfn | ~mask) >= 0 ) break; } err = p2m_next_level(p2m, &table, &gfn_remainder, gfn, level * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER, level, 0); if ( err ) goto out; if ( needs_recalc(l1, *pent) ) { l1_pgentry_t e = *pent, *ptab = table; unsigned int i; if ( !valid_recalc(l1, e) ) P2M_DEBUG("bogus recalc state at d%d:%lx:%u\n", p2m->domain->domain_id, gfn, level); remainder = gfn_remainder; for ( i = 0; i < (1 << PAGETABLE_ORDER); ++i ) { l1_pgentry_t ent = ptab[i]; if ( (l1e_get_flags(ent) & _PAGE_PRESENT) && !needs_recalc(l1, ent) ) { set_recalc(l1, ent); p2m->write_p2m_entry(p2m, gfn - remainder, &ptab[i], ent, level); } remainder -= 1UL << ((level - 1) * PAGETABLE_ORDER); } smp_wmb(); clear_recalc(l1, e); p2m->write_p2m_entry(p2m, gfn, pent, e, level + 1); } unmap_domain_page((void *)((unsigned long)pent & PAGE_MASK)); } pent = p2m_find_entry(table, &gfn_remainder, gfn, level * PAGETABLE_ORDER, 1 << PAGETABLE_ORDER); if ( pent && (l1e_get_flags(*pent) & _PAGE_PRESENT) && needs_recalc(l1, *pent) ) { l1_pgentry_t e = *pent; p2m_type_t ot, nt; unsigned long mask = ~0UL << (level * PAGETABLE_ORDER); if ( !valid_recalc(l1, e) ) P2M_DEBUG("bogus recalc leaf at d%d:%lx:%u\n", p2m->domain->domain_id, gfn, level); ot = p2m_flags_to_type(l1e_get_flags(e)); nt = p2m_recalc_type_range(true, ot, p2m, gfn & mask, gfn | ~mask); if ( nt != ot ) { unsigned long mfn = l1e_get_pfn(e); unsigned long flags = p2m_type_to_flags(p2m, nt, _mfn(mfn), level); if ( level ) { if ( flags & _PAGE_PAT ) { BUILD_BUG_ON(_PAGE_PAT != _PAGE_PSE); mfn |= _PAGE_PSE_PAT >> PAGE_SHIFT; } else mfn &= ~((unsigned long)_PAGE_PSE_PAT >> PAGE_SHIFT); flags |= _PAGE_PSE; } if ( ot == p2m_ioreq_server ) { ASSERT(p2m->ioreq.entry_count > 0); ASSERT(level == 0); p2m->ioreq.entry_count--; } e = l1e_from_pfn(mfn, flags); p2m_add_iommu_flags(&e, level, (nt == p2m_ram_rw) ? IOMMUF_readable|IOMMUF_writable : 0); ASSERT(!needs_recalc(l1, e)); } else clear_recalc(l1, e); p2m->write_p2m_entry(p2m, gfn, pent, e, level + 1); } out: unmap_domain_page(table); return err; } int p2m_pt_handle_deferred_changes(uint64_t gpa) { struct p2m_domain *p2m = p2m_get_hostp2m(current->domain); int rc; p2m_lock(p2m); rc = do_recalc(p2m, PFN_DOWN(gpa)); p2m_unlock(p2m); return rc; } /* Returns: 0 for success, -errno for failure */ static int p2m_pt_set_entry(struct p2m_domain *p2m, gfn_t gfn_, mfn_t mfn, unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma, int sve) { /* XXX -- this might be able to be faster iff current->domain == d */ void *table; unsigned long gfn = gfn_x(gfn_); unsigned long i, gfn_remainder = gfn; l1_pgentry_t *p2m_entry, entry_content; /* Intermediate table to free if we're replacing it with a superpage. */ l1_pgentry_t intermediate_entry = l1e_empty(); l2_pgentry_t l2e_content; l3_pgentry_t l3e_content; int rc; unsigned int iommu_pte_flags = p2m_get_iommu_flags(p2mt, mfn); /* * old_mfn and iommu_old_flags control possible flush/update needs on the * IOMMU: We need to flush when MFN or flags (i.e. permissions) change. * iommu_old_flags being initialized to zero covers the case of the entry * getting replaced being a non-present (leaf or intermediate) one. For * present leaf entries the real value will get calculated below, while * for present intermediate entries ~0 (guaranteed != iommu_pte_flags) * will be used (to cover all cases of what the leaf entries underneath * the intermediate one might be). */ unsigned int flags, iommu_old_flags = 0; unsigned long old_mfn = mfn_x(INVALID_MFN); ASSERT(sve != 0); if ( tb_init_done ) { struct { u64 gfn, mfn; int p2mt; int d:16,order:16; } t; t.gfn = gfn; t.mfn = mfn_x(mfn); t.p2mt = p2mt; t.d = p2m->domain->domain_id; t.order = page_order; __trace_var(TRC_MEM_SET_P2M_ENTRY, 0, sizeof(t), &t); } if ( unlikely(p2m_is_foreign(p2mt)) ) { /* hvm fixme: foreign types are only supported on ept at present */ gdprintk(XENLOG_WARNING, "Unimplemented foreign p2m type.\n"); return -EINVAL; } /* Carry out any eventually pending earlier changes first. */ rc = do_recalc(p2m, gfn); if ( rc < 0 ) return rc; table = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m))); rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn, L4_PAGETABLE_SHIFT - PAGE_SHIFT, L4_PAGETABLE_ENTRIES, 3, 1); if ( rc ) goto out; /* * Try to allocate 1GB page table if this feature is supported. */ if ( page_order == PAGE_ORDER_1G ) { p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, L3_PAGETABLE_SHIFT - PAGE_SHIFT, L3_PAGETABLE_ENTRIES); ASSERT(p2m_entry); flags = l1e_get_flags(*p2m_entry); if ( flags & _PAGE_PRESENT ) { if ( flags & _PAGE_PSE ) { old_mfn = l1e_get_pfn(*p2m_entry); iommu_old_flags = p2m_get_iommu_flags(p2m_flags_to_type(flags), _mfn(old_mfn)); } else { iommu_old_flags = ~0; intermediate_entry = *p2m_entry; } } ASSERT(p2m_flags_to_type(flags) != p2m_ioreq_server); ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); l3e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) ? p2m_l3e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2m, p2mt, mfn, 2)) : l3e_empty(); entry_content.l1 = l3e_content.l3; if ( entry_content.l1 != 0 ) p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 3); /* NB: paging_write_p2m_entry() handles tlb flushes properly */ } else { rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn, L3_PAGETABLE_SHIFT - PAGE_SHIFT, L3_PAGETABLE_ENTRIES, 2, 1); if ( rc ) goto out; } if ( page_order == PAGE_ORDER_4K ) { p2m_type_t p2mt_old; rc = p2m_next_level(p2m, &table, &gfn_remainder, gfn, L2_PAGETABLE_SHIFT - PAGE_SHIFT, L2_PAGETABLE_ENTRIES, 1, 1); if ( rc ) goto out; p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, 0, L1_PAGETABLE_ENTRIES); ASSERT(p2m_entry); old_mfn = l1e_get_pfn(*p2m_entry); iommu_old_flags = p2m_get_iommu_flags(p2m_flags_to_type(l1e_get_flags(*p2m_entry)), _mfn(old_mfn)); if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) ) entry_content = p2m_l1e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2m, p2mt, mfn, 0)); else entry_content = l1e_empty(); if ( entry_content.l1 != 0 ) p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); p2mt_old = p2m_flags_to_type(l1e_get_flags(*p2m_entry)); /* * p2m_ioreq_server is only used for 4K pages, so * the count is only done for level 1 entries. */ if ( p2mt == p2m_ioreq_server ) p2m->ioreq.entry_count++; if ( p2mt_old == p2m_ioreq_server ) { ASSERT(p2m->ioreq.entry_count > 0); p2m->ioreq.entry_count--; } /* level 1 entry */ p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 1); /* NB: paging_write_p2m_entry() handles tlb flushes properly */ } else if ( page_order == PAGE_ORDER_2M ) { p2m_entry = p2m_find_entry(table, &gfn_remainder, gfn, L2_PAGETABLE_SHIFT - PAGE_SHIFT, L2_PAGETABLE_ENTRIES); ASSERT(p2m_entry); flags = l1e_get_flags(*p2m_entry); if ( flags & _PAGE_PRESENT ) { if ( flags & _PAGE_PSE ) { old_mfn = l1e_get_pfn(*p2m_entry); iommu_old_flags = p2m_get_iommu_flags(p2m_flags_to_type(flags), _mfn(old_mfn)); } else { iommu_old_flags = ~0; intermediate_entry = *p2m_entry; } } ASSERT(p2m_flags_to_type(flags) != p2m_ioreq_server); ASSERT(!mfn_valid(mfn) || p2mt != p2m_mmio_direct); l2e_content = mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) ? p2m_l2e_from_pfn(mfn_x(mfn), p2m_type_to_flags(p2m, p2mt, mfn, 1)) : l2e_empty(); entry_content.l1 = l2e_content.l2; if ( entry_content.l1 != 0 ) p2m_add_iommu_flags(&entry_content, 0, iommu_pte_flags); p2m->write_p2m_entry(p2m, gfn, p2m_entry, entry_content, 2); /* NB: paging_write_p2m_entry() handles tlb flushes properly */ } /* Track the highest gfn for which we have ever had a valid mapping */ if ( p2mt != p2m_invalid && (gfn + (1UL << page_order) - 1 > p2m->max_mapped_pfn) ) p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1; if ( iommu_enabled && need_iommu(p2m->domain) && (iommu_old_flags != iommu_pte_flags || old_mfn != mfn_x(mfn)) ) { ASSERT(rc == 0); if ( iommu_use_hap_pt(p2m->domain) ) { if ( iommu_old_flags ) amd_iommu_flush_pages(p2m->domain, gfn, page_order); } else if ( iommu_pte_flags ) for ( i = 0; i < (1UL << page_order); i++ ) { rc = iommu_map_page(p2m->domain, gfn + i, mfn_x(mfn) + i, iommu_pte_flags); if ( unlikely(rc) ) { while ( i-- ) /* If statement to satisfy __must_check. */ if ( iommu_unmap_page(p2m->domain, gfn + i) ) continue; break; } } else for ( i = 0; i < (1UL << page_order); i++ ) { int ret = iommu_unmap_page(p2m->domain, gfn + i); if ( !rc ) rc = ret; } } /* * Free old intermediate tables if necessary. This has to be the * last thing we do, after removal from the IOMMU tables, so as to * avoid a potential use-after-free. */ if ( l1e_get_flags(intermediate_entry) & _PAGE_PRESENT ) p2m_free_entry(p2m, &intermediate_entry, page_order); out: unmap_domain_page(table); return rc; } static mfn_t p2m_pt_get_entry(struct p2m_domain *p2m, gfn_t gfn_, p2m_type_t *t, p2m_access_t *a, p2m_query_t q, unsigned int *page_order, bool_t *sve) { mfn_t mfn; unsigned long gfn = gfn_x(gfn_); paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT; l2_pgentry_t *l2e; l1_pgentry_t *l1e; unsigned int flags; p2m_type_t l1t; bool_t recalc; ASSERT(paging_mode_translate(p2m->domain)); if ( sve ) *sve = 1; /* XXX This is for compatibility with the old model, where anything not * XXX marked as RAM was considered to be emulated MMIO space. * XXX Once we start explicitly registering MMIO regions in the p2m * XXX we will return p2m_invalid for unmapped gfns */ *t = p2m_mmio_dm; /* Not implemented except with EPT */ *a = p2m_access_rwx; if ( gfn > p2m->max_mapped_pfn ) { /* This pfn is higher than the highest the p2m map currently holds */ if ( page_order ) { for ( *page_order = 3 * PAGETABLE_ORDER; *page_order; *page_order -= PAGETABLE_ORDER ) if ( (gfn & ~((1UL << *page_order) - 1)) > p2m->max_mapped_pfn ) break; } return INVALID_MFN; } mfn = pagetable_get_mfn(p2m_get_pagetable(p2m)); { l4_pgentry_t *l4e = map_domain_page(mfn); l4e += l4_table_offset(addr); if ( page_order ) *page_order = 3 * PAGETABLE_ORDER; if ( (l4e_get_flags(*l4e) & _PAGE_PRESENT) == 0 ) { unmap_domain_page(l4e); return INVALID_MFN; } mfn = l4e_get_mfn(*l4e); recalc = needs_recalc(l4, *l4e); unmap_domain_page(l4e); } { l3_pgentry_t *l3e = map_domain_page(mfn); l3e += l3_table_offset(addr); if ( page_order ) *page_order = 2 * PAGETABLE_ORDER; pod_retry_l3: flags = l3e_get_flags(*l3e); if ( !(flags & _PAGE_PRESENT) ) { if ( p2m_flags_to_type(flags) == p2m_populate_on_demand ) { if ( q & P2M_ALLOC ) { if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_1G) ) goto pod_retry_l3; gdprintk(XENLOG_ERR, "%s: Allocate 1GB failed!\n", __func__); } else *t = p2m_populate_on_demand; } unmap_domain_page(l3e); return INVALID_MFN; } if ( flags & _PAGE_PSE ) { mfn = _mfn(l3e_get_pfn(*l3e) + l2_table_offset(addr) * L1_PAGETABLE_ENTRIES + l1_table_offset(addr)); *t = p2m_recalc_type(recalc || _needs_recalc(flags), p2m_flags_to_type(flags), p2m, gfn); unmap_domain_page(l3e); ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); return (p2m_is_valid(*t)) ? mfn : INVALID_MFN; } mfn = l3e_get_mfn(*l3e); if ( _needs_recalc(flags) ) recalc = 1; unmap_domain_page(l3e); } l2e = map_domain_page(mfn); l2e += l2_table_offset(addr); if ( page_order ) *page_order = PAGETABLE_ORDER; pod_retry_l2: flags = l2e_get_flags(*l2e); if ( !(flags & _PAGE_PRESENT) ) { /* PoD: Try to populate a 2-meg chunk */ if ( p2m_flags_to_type(flags) == p2m_populate_on_demand ) { if ( q & P2M_ALLOC ) { if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_2M) ) goto pod_retry_l2; } else *t = p2m_populate_on_demand; } unmap_domain_page(l2e); return INVALID_MFN; } if ( flags & _PAGE_PSE ) { mfn = _mfn(l2e_get_pfn(*l2e) + l1_table_offset(addr)); *t = p2m_recalc_type(recalc || _needs_recalc(flags), p2m_flags_to_type(flags), p2m, gfn); unmap_domain_page(l2e); ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); return (p2m_is_valid(*t)) ? mfn : INVALID_MFN; } mfn = l2e_get_mfn(*l2e); if ( needs_recalc(l2, *l2e) ) recalc = 1; unmap_domain_page(l2e); l1e = map_domain_page(mfn); l1e += l1_table_offset(addr); if ( page_order ) *page_order = 0; pod_retry_l1: flags = l1e_get_flags(*l1e); l1t = p2m_flags_to_type(flags); if ( !(flags & _PAGE_PRESENT) && !p2m_is_paging(l1t) ) { /* PoD: Try to populate */ if ( l1t == p2m_populate_on_demand ) { if ( q & P2M_ALLOC ) { if ( p2m_pod_demand_populate(p2m, gfn_, PAGE_ORDER_4K) ) goto pod_retry_l1; } else *t = p2m_populate_on_demand; } unmap_domain_page(l1e); return INVALID_MFN; } mfn = l1e_get_mfn(*l1e); *t = p2m_recalc_type(recalc || _needs_recalc(flags), l1t, p2m, gfn); unmap_domain_page(l1e); ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t) || p2m_is_paging(*t)); return (p2m_is_valid(*t) || p2m_is_grant(*t)) ? mfn : INVALID_MFN; } static void p2m_pt_change_entry_type_global(struct p2m_domain *p2m, p2m_type_t ot, p2m_type_t nt) { l1_pgentry_t *tab; unsigned long gfn = 0; unsigned int i, changed; if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) == 0 ) return; ASSERT(hap_enabled(p2m->domain)); tab = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m))); for ( changed = i = 0; i < (1 << PAGETABLE_ORDER); ++i ) { l1_pgentry_t e = tab[i]; if ( (l1e_get_flags(e) & _PAGE_PRESENT) && !needs_recalc(l1, e) ) { set_recalc(l1, e); p2m->write_p2m_entry(p2m, gfn, &tab[i], e, 4); ++changed; } gfn += 1UL << (L4_PAGETABLE_SHIFT - PAGE_SHIFT); } unmap_domain_page(tab); if ( changed ) flush_tlb_mask(p2m->domain->domain_dirty_cpumask); } static int p2m_pt_change_entry_type_range(struct p2m_domain *p2m, p2m_type_t ot, p2m_type_t nt, unsigned long first_gfn, unsigned long last_gfn) { unsigned long mask = (1 << PAGETABLE_ORDER) - 1; unsigned int i; int err = 0; ASSERT(hap_enabled(p2m->domain)); for ( i = 1; i <= 4; ) { if ( first_gfn & mask ) { unsigned long end_gfn = min(first_gfn | mask, last_gfn); err = p2m_pt_set_recalc_range(p2m, i, first_gfn, end_gfn); if ( err || end_gfn >= last_gfn ) break; first_gfn = end_gfn + 1; } else if ( (last_gfn & mask) != mask ) { unsigned long start_gfn = max(first_gfn, last_gfn & ~mask); err = p2m_pt_set_recalc_range(p2m, i, start_gfn, last_gfn); if ( err || start_gfn <= first_gfn ) break; last_gfn = start_gfn - 1; } else { ++i; mask |= mask << PAGETABLE_ORDER; } } return err; } #if P2M_AUDIT long p2m_pt_audit_p2m(struct p2m_domain *p2m) { unsigned long entry_count = 0, pmbad = 0; unsigned long mfn, gfn, m2pfn; ASSERT(p2m_locked_by_me(p2m)); ASSERT(pod_locked_by_me(p2m)); /* Audit part one: walk the domain's p2m table, checking the entries. */ if ( pagetable_get_pfn(p2m_get_pagetable(p2m)) != 0 ) { l2_pgentry_t *l2e; l1_pgentry_t *l1e; int i1, i2; l4_pgentry_t *l4e; l3_pgentry_t *l3e; int i4, i3; l4e = map_domain_page(pagetable_get_mfn(p2m_get_pagetable(p2m))); gfn = 0; for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ ) { if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) ) { gfn += 1 << (L4_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } l3e = map_l3t_from_l4e(l4e[i4]); for ( i3 = 0; i3 < L3_PAGETABLE_ENTRIES; i3++ ) { if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) ) { gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } /* check for 1GB super page */ if ( l3e_get_flags(l3e[i3]) & _PAGE_PSE ) { mfn = l3e_get_pfn(l3e[i3]); ASSERT(mfn_valid(_mfn(mfn))); /* we have to cover 512x512 4K pages */ for ( i2 = 0; i2 < (L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES); i2++) { m2pfn = get_gpfn_from_mfn(mfn+i2); if ( m2pfn != (gfn + i2) ) { pmbad++; P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx" " -> gfn %#lx\n", gfn+i2, mfn+i2, m2pfn); BUG(); } gfn += 1 << (L3_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } } l2e = map_l2t_from_l3e(l3e[i3]); for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ ) { if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) ) { if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE) && ( p2m_flags_to_type(l2e_get_flags(l2e[i2])) == p2m_populate_on_demand ) ) entry_count+=SUPERPAGE_PAGES; gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } /* check for super page */ if ( l2e_get_flags(l2e[i2]) & _PAGE_PSE ) { mfn = l2e_get_pfn(l2e[i2]); ASSERT(mfn_valid(_mfn(mfn))); for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++) { m2pfn = get_gpfn_from_mfn(mfn+i1); /* Allow shared M2Ps */ if ( (m2pfn != (gfn + i1)) && (m2pfn != SHARED_M2P_ENTRY) ) { pmbad++; P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx" " -> gfn %#lx\n", gfn+i1, mfn+i1, m2pfn); BUG(); } } gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT); continue; } l1e = map_l1t_from_l2e(l2e[i2]); for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) { p2m_type_t type; type = p2m_flags_to_type(l1e_get_flags(l1e[i1])); if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) ) { if ( type == p2m_populate_on_demand ) entry_count++; continue; } mfn = l1e_get_pfn(l1e[i1]); ASSERT(mfn_valid(_mfn(mfn))); m2pfn = get_gpfn_from_mfn(mfn); if ( m2pfn != gfn && type != p2m_mmio_direct && !p2m_is_grant(type) && !p2m_is_shared(type) ) { pmbad++; printk("mismatch: gfn %#lx -> mfn %#lx" " -> gfn %#lx\n", gfn, mfn, m2pfn); P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx" " -> gfn %#lx\n", gfn, mfn, m2pfn); BUG(); } } unmap_domain_page(l1e); } unmap_domain_page(l2e); } unmap_domain_page(l3e); } unmap_domain_page(l4e); } if ( entry_count != p2m->pod.entry_count ) { printk("%s: refcounted entry count %ld, audit count %lu!\n", __func__, p2m->pod.entry_count, entry_count); BUG(); } return pmbad; } #endif /* P2M_AUDIT */ /* Set up the p2m function pointers for pagetable format */ void p2m_pt_init(struct p2m_domain *p2m) { p2m->set_entry = p2m_pt_set_entry; p2m->get_entry = p2m_pt_get_entry; p2m->recalc = do_recalc; p2m->change_entry_type_global = p2m_pt_change_entry_type_global; p2m->change_entry_type_range = p2m_pt_change_entry_type_range; p2m->write_p2m_entry = paging_write_p2m_entry; #if P2M_AUDIT p2m->audit_p2m = p2m_pt_audit_p2m; #else p2m->audit_p2m = NULL; #endif }