/******************************************************************************
* common/grant_table.c
*
* Mechanism for granting foreign access to page frames, and receiving
* page-ownership transfers.
*
* Copyright (c) 2005-2006 Christopher Clark
* Copyright (c) 2004 K A Fraser
* Copyright (c) 2005 Andrew Warfield
* Modifications by Geoffrey Lefebvre are (c) Intel Research Cambridge
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
/* Per-domain grant information. */
struct grant_table {
/*
* Lock protecting updates to grant table state (version, active
* entry list, etc.)
*/
percpu_rwlock_t lock;
/* Lock protecting the maptrack limit */
spinlock_t maptrack_lock;
/*
* The defined versions are 1 and 2. Set to 0 if we don't know
* what version to use yet.
*/
unsigned int gt_version;
/* Resource limits of the domain. */
unsigned int max_grant_frames;
unsigned int max_maptrack_frames;
/* Table size. Number of frames shared with guest */
unsigned int nr_grant_frames;
/* Number of grant status frames shared with guest (for version 2) */
unsigned int nr_status_frames;
/* Number of available maptrack entries. */
unsigned int maptrack_limit;
/* Shared grant table (see include/public/grant_table.h). */
union {
void **shared_raw;
struct grant_entry_v1 **shared_v1;
union grant_entry_v2 **shared_v2;
};
/* State grant table (see include/public/grant_table.h). */
grant_status_t **status;
/* Active grant table. */
struct active_grant_entry **active;
/* Mapping tracking table per vcpu. */
struct grant_mapping **maptrack;
/* Domain to which this struct grant_table belongs. */
const struct domain *domain;
struct grant_table_arch arch;
};
#ifndef DEFAULT_MAX_NR_GRANT_FRAMES /* to allow arch to override */
/* Default maximum size of a grant table. [POLICY] */
#define DEFAULT_MAX_NR_GRANT_FRAMES 64
#endif
static unsigned int __read_mostly max_grant_frames =
DEFAULT_MAX_NR_GRANT_FRAMES;
integer_runtime_param("gnttab_max_frames", max_grant_frames);
#define DEFAULT_MAX_MAPTRACK_FRAMES 1024
static unsigned int __read_mostly max_maptrack_frames =
DEFAULT_MAX_MAPTRACK_FRAMES;
integer_runtime_param("gnttab_max_maptrack_frames", max_maptrack_frames);
/*
* Note that the three values below are effectively part of the ABI, even if
* we don't need to make them a formal part of it: A guest suspended for
* migration in the middle of a continuation would fail to work if resumed on
* a hypervisor using different values.
*/
#define GNTTABOP_CONTINUATION_ARG_SHIFT 12
#define GNTTABOP_CMD_MASK ((1<nr_grant_frames;
}
/* Number of status grant table frames. Caller must hold d's gr. table lock.*/
static inline unsigned int nr_status_frames(const struct grant_table *gt)
{
return gt->nr_status_frames;
}
#define MAPTRACK_PER_PAGE (PAGE_SIZE / sizeof(struct grant_mapping))
#define maptrack_entry(t, e) \
((t)->maptrack[(e)/MAPTRACK_PER_PAGE][(e)%MAPTRACK_PER_PAGE])
static inline unsigned int
nr_maptrack_frames(struct grant_table *t)
{
return t->maptrack_limit / MAPTRACK_PER_PAGE;
}
#define MAPTRACK_TAIL (~0u)
#define SHGNT_PER_PAGE_V1 (PAGE_SIZE / sizeof(grant_entry_v1_t))
#define shared_entry_v1(t, e) \
((t)->shared_v1[(e)/SHGNT_PER_PAGE_V1][(e)%SHGNT_PER_PAGE_V1])
#define SHGNT_PER_PAGE_V2 (PAGE_SIZE / sizeof(grant_entry_v2_t))
#define shared_entry_v2(t, e) \
((t)->shared_v2[(e)/SHGNT_PER_PAGE_V2][(e)%SHGNT_PER_PAGE_V2])
#define STGNT_PER_PAGE (PAGE_SIZE / sizeof(grant_status_t))
#define status_entry(t, e) \
((t)->status[(e)/STGNT_PER_PAGE][(e)%STGNT_PER_PAGE])
static grant_entry_header_t *
shared_entry_header(struct grant_table *t, grant_ref_t ref)
{
ASSERT(t->gt_version != 0);
if ( t->gt_version == 1 )
return (grant_entry_header_t*)&shared_entry_v1(t, ref);
else
return &shared_entry_v2(t, ref).hdr;
}
/* Active grant entry - used for shadowing GTF_permit_access grants. */
struct active_grant_entry {
uint32_t pin; /* Reference count information: */
/* Count of writable host-CPU mappings. */
#define GNTPIN_hstw_shift 0
#define GNTPIN_hstw_inc (1U << GNTPIN_hstw_shift)
#define GNTPIN_hstw_mask (0xFFU << GNTPIN_hstw_shift)
/* Count of read-only host-CPU mappings. */
#define GNTPIN_hstr_shift 8
#define GNTPIN_hstr_inc (1U << GNTPIN_hstr_shift)
#define GNTPIN_hstr_mask (0xFFU << GNTPIN_hstr_shift)
/* Count of writable device-bus mappings. */
#define GNTPIN_devw_shift 16
#define GNTPIN_devw_inc (1U << GNTPIN_devw_shift)
#define GNTPIN_devw_mask (0xFFU << GNTPIN_devw_shift)
/* Count of read-only device-bus mappings. */
#define GNTPIN_devr_shift 24
#define GNTPIN_devr_inc (1U << GNTPIN_devr_shift)
#define GNTPIN_devr_mask (0xFFU << GNTPIN_devr_shift)
domid_t domid; /* Domain being granted access. */
unsigned int start:15; /* For sub-page grants, the start offset
in the page. */
bool is_sub_page:1; /* True if this is a sub-page grant. */
unsigned int length:16; /* For sub-page grants, the length of the
grant. */
grant_ref_t trans_gref;
struct domain *trans_domain;
unsigned long frame; /* Frame being granted. */
#ifndef NDEBUG
gfn_t gfn; /* Guest's idea of the frame being granted. */
#endif
spinlock_t lock; /* lock to protect access of this entry.
see docs/misc/grant-tables.txt for
locking protocol */
};
#define ACGNT_PER_PAGE (PAGE_SIZE / sizeof(struct active_grant_entry))
#define _active_entry(t, e) \
((t)->active[(e)/ACGNT_PER_PAGE][(e)%ACGNT_PER_PAGE])
static inline void act_set_gfn(struct active_grant_entry *act, gfn_t gfn)
{
#ifndef NDEBUG
act->gfn = gfn;
#endif
}
static DEFINE_PERCPU_RWLOCK_GLOBAL(grant_rwlock);
static inline void grant_read_lock(struct grant_table *gt)
{
percpu_read_lock(grant_rwlock, >->lock);
}
static inline void grant_read_unlock(struct grant_table *gt)
{
percpu_read_unlock(grant_rwlock, >->lock);
}
static inline void grant_write_lock(struct grant_table *gt)
{
percpu_write_lock(grant_rwlock, >->lock);
}
static inline void grant_write_unlock(struct grant_table *gt)
{
percpu_write_unlock(grant_rwlock, >->lock);
}
static inline void gnttab_flush_tlb(const struct domain *d)
{
if ( !paging_mode_external(d) )
flush_tlb_mask(d->domain_dirty_cpumask);
}
static inline unsigned int
num_act_frames_from_sha_frames(const unsigned int num)
{
/*
* How many frames are needed for the active grant table,
* given the size of the shared grant table?
*/
unsigned int sha_per_page = PAGE_SIZE / sizeof(grant_entry_v1_t);
return DIV_ROUND_UP(num * sha_per_page, ACGNT_PER_PAGE);
}
#define max_nr_active_grant_frames(gt) \
num_act_frames_from_sha_frames((gt)->max_grant_frames)
static inline unsigned int
nr_active_grant_frames(struct grant_table *gt)
{
return num_act_frames_from_sha_frames(nr_grant_frames(gt));
}
static inline struct active_grant_entry *
active_entry_acquire(struct grant_table *t, grant_ref_t e)
{
struct active_grant_entry *act;
/*
* The grant table for the active entry should be locked but the
* percpu rwlock cannot be checked for read lock without race conditions
* or high overhead so we cannot use an ASSERT
*
* ASSERT(rw_is_locked(&t->lock));
*/
act = &_active_entry(t, e);
spin_lock(&act->lock);
return act;
}
static inline void active_entry_release(struct active_grant_entry *act)
{
spin_unlock(&act->lock);
}
#define GRANT_STATUS_PER_PAGE (PAGE_SIZE / sizeof(grant_status_t))
#define GRANT_PER_PAGE (PAGE_SIZE / sizeof(grant_entry_v2_t))
/* Number of grant table status entries. Caller must hold d's gr. table lock.*/
static inline unsigned int grant_to_status_frames(unsigned int grant_frames)
{
return DIV_ROUND_UP(grant_frames * GRANT_PER_PAGE, GRANT_STATUS_PER_PAGE);
}
/* Check if the page has been paged out, or needs unsharing.
If rc == GNTST_okay, *page contains the page struct with a ref taken.
Caller must do put_page(*page).
If any error, *page = NULL, *frame = INVALID_MFN, no ref taken. */
static int get_paged_frame(unsigned long gfn, unsigned long *frame,
struct page_info **page, bool readonly,
struct domain *rd)
{
int rc = GNTST_okay;
p2m_type_t p2mt;
*frame = mfn_x(INVALID_MFN);
*page = get_page_from_gfn(rd, gfn, &p2mt,
readonly ? P2M_ALLOC : P2M_UNSHARE);
if ( !*page )
{
#ifdef P2M_SHARED_TYPES
if ( p2m_is_shared(p2mt) )
return GNTST_eagain;
#endif
#ifdef P2M_PAGES_TYPES
if ( p2m_is_paging(p2mt) )
{
p2m_mem_paging_populate(rd, gfn);
return GNTST_eagain;
}
#endif
return GNTST_bad_page;
}
if ( p2m_is_foreign(p2mt) )
{
put_page(*page);
*page = NULL;
return GNTST_bad_page;
}
*frame = page_to_mfn(*page);
return rc;
}
static inline void
double_gt_lock(struct grant_table *lgt, struct grant_table *rgt)
{
/*
* See mapkind() for why the write lock is also required for the
* remote domain.
*/
if ( lgt < rgt )
{
grant_write_lock(lgt);
grant_write_lock(rgt);
}
else
{
if ( lgt != rgt )
grant_write_lock(rgt);
grant_write_lock(lgt);
}
}
static inline void
double_gt_unlock(struct grant_table *lgt, struct grant_table *rgt)
{
grant_write_unlock(lgt);
if ( lgt != rgt )
grant_write_unlock(rgt);
}
#define INVALID_MAPTRACK_HANDLE UINT_MAX
static inline grant_handle_t
_get_maptrack_handle(struct grant_table *t, struct vcpu *v)
{
unsigned int head, next, prev_head;
spin_lock(&v->maptrack_freelist_lock);
do {
/* No maptrack pages allocated for this VCPU yet? */
head = read_atomic(&v->maptrack_head);
if ( unlikely(head == MAPTRACK_TAIL) )
{
spin_unlock(&v->maptrack_freelist_lock);
return INVALID_MAPTRACK_HANDLE;
}
/*
* Always keep one entry in the free list to make it easier to
* add free entries to the tail.
*/
next = read_atomic(&maptrack_entry(t, head).ref);
if ( unlikely(next == MAPTRACK_TAIL) )
{
spin_unlock(&v->maptrack_freelist_lock);
return INVALID_MAPTRACK_HANDLE;
}
prev_head = head;
head = cmpxchg(&v->maptrack_head, prev_head, next);
} while ( head != prev_head );
spin_unlock(&v->maptrack_freelist_lock);
return head;
}
/*
* Try to "steal" a free maptrack entry from another VCPU.
*
* A stolen entry is transferred to the thief, so the number of
* entries for each VCPU should tend to the usage pattern.
*
* To avoid having to atomically count the number of free entries on
* each VCPU and to avoid two VCPU repeatedly stealing entries from
* each other, the initial victim VCPU is selected randomly.
*/
static grant_handle_t steal_maptrack_handle(struct grant_table *t,
const struct vcpu *curr)
{
const struct domain *currd = curr->domain;
unsigned int first, i;
/* Find an initial victim. */
first = i = get_random() % currd->max_vcpus;
do {
if ( currd->vcpu[i] )
{
grant_handle_t handle;
handle = _get_maptrack_handle(t, currd->vcpu[i]);
if ( handle != INVALID_MAPTRACK_HANDLE )
{
maptrack_entry(t, handle).vcpu = curr->vcpu_id;
return handle;
}
}
i++;
if ( i == currd->max_vcpus )
i = 0;
} while ( i != first );
/* No free handles on any VCPU. */
return INVALID_MAPTRACK_HANDLE;
}
static inline void
put_maptrack_handle(
struct grant_table *t, grant_handle_t handle)
{
struct domain *currd = current->domain;
struct vcpu *v;
unsigned int prev_tail, cur_tail;
/* 1. Set entry to be a tail. */
maptrack_entry(t, handle).ref = MAPTRACK_TAIL;
/* 2. Add entry to the tail of the list on the original VCPU. */
v = currd->vcpu[maptrack_entry(t, handle).vcpu];
spin_lock(&v->maptrack_freelist_lock);
cur_tail = read_atomic(&v->maptrack_tail);
do {
prev_tail = cur_tail;
cur_tail = cmpxchg(&v->maptrack_tail, prev_tail, handle);
} while ( cur_tail != prev_tail );
/* 3. Update the old tail entry to point to the new entry. */
write_atomic(&maptrack_entry(t, prev_tail).ref, handle);
spin_unlock(&v->maptrack_freelist_lock);
}
static inline grant_handle_t
get_maptrack_handle(
struct grant_table *lgt)
{
struct vcpu *curr = current;
unsigned int i, head;
grant_handle_t handle;
struct grant_mapping *new_mt = NULL;
handle = _get_maptrack_handle(lgt, curr);
if ( likely(handle != INVALID_MAPTRACK_HANDLE) )
return handle;
spin_lock(&lgt->maptrack_lock);
/*
* If we've run out of handles and still have frame headroom, try
* allocating a new maptrack frame. If there is no headroom, or we're
* out of memory, try stealing an entry from another VCPU (in case the
* guest isn't mapping across its VCPUs evenly).
*/
if ( nr_maptrack_frames(lgt) < lgt->max_maptrack_frames )
new_mt = alloc_xenheap_page();
if ( !new_mt )
{
spin_unlock(&lgt->maptrack_lock);
/*
* Uninitialized free list? Steal an extra entry for the tail
* sentinel.
*/
if ( curr->maptrack_tail == MAPTRACK_TAIL )
{
handle = steal_maptrack_handle(lgt, curr);
if ( handle == INVALID_MAPTRACK_HANDLE )
return handle;
spin_lock(&curr->maptrack_freelist_lock);
maptrack_entry(lgt, handle).ref = MAPTRACK_TAIL;
curr->maptrack_tail = handle;
if ( curr->maptrack_head == MAPTRACK_TAIL )
write_atomic(&curr->maptrack_head, handle);
spin_unlock(&curr->maptrack_freelist_lock);
}
return steal_maptrack_handle(lgt, curr);
}
clear_page(new_mt);
/*
* Use the first new entry and add the remaining entries to the
* head of the free list.
*/
handle = lgt->maptrack_limit;
for ( i = 0; i < MAPTRACK_PER_PAGE; i++ )
{
BUILD_BUG_ON(sizeof(new_mt->ref) < sizeof(handle));
new_mt[i].ref = handle + i + 1;
new_mt[i].vcpu = curr->vcpu_id;
}
/* Set tail directly if this is the first page for this VCPU. */
if ( curr->maptrack_tail == MAPTRACK_TAIL )
curr->maptrack_tail = handle + MAPTRACK_PER_PAGE - 1;
lgt->maptrack[nr_maptrack_frames(lgt)] = new_mt;
smp_wmb();
lgt->maptrack_limit += MAPTRACK_PER_PAGE;
spin_unlock(&lgt->maptrack_lock);
spin_lock(&curr->maptrack_freelist_lock);
do {
new_mt[i - 1].ref = read_atomic(&curr->maptrack_head);
head = cmpxchg(&curr->maptrack_head, new_mt[i - 1].ref, handle + 1);
} while ( head != new_mt[i - 1].ref );
spin_unlock(&curr->maptrack_freelist_lock);
return handle;
}
/* Number of grant table entries. Caller must hold d's grant table lock. */
static unsigned int nr_grant_entries(struct grant_table *gt)
{
switch ( gt->gt_version )
{
#define f2e(nr, ver) (((nr) << PAGE_SHIFT) / sizeof(grant_entry_v##ver##_t))
case 1:
BUILD_BUG_ON(f2e(INITIAL_NR_GRANT_FRAMES, 1) <
GNTTAB_NR_RESERVED_ENTRIES);
return f2e(nr_grant_frames(gt), 1);
case 2:
BUILD_BUG_ON(f2e(INITIAL_NR_GRANT_FRAMES, 2) <
GNTTAB_NR_RESERVED_ENTRIES);
return f2e(nr_grant_frames(gt), 2);
#undef f2e
}
return 0;
}
static int _set_status_v1(domid_t domid,
int readonly,
int mapflag,
grant_entry_header_t *shah,
struct active_grant_entry *act)
{
int rc = GNTST_okay;
union grant_combo scombo, prev_scombo, new_scombo;
uint16_t mask = GTF_type_mask;
/*
* We bound the number of times we retry CMPXCHG on memory locations that
* we share with a guest OS. The reason is that the guest can modify that
* location at a higher rate than we can read-modify-CMPXCHG, so the guest
* could cause us to livelock. There are a few cases where it is valid for
* the guest to race our updates (e.g., to change the GTF_readonly flag),
* so we allow a few retries before failing.
*/
int retries = 0;
/* if this is a grant mapping operation we should ensure GTF_sub_page
is not set */
if ( mapflag )
mask |= GTF_sub_page;
scombo.word = *(u32 *)shah;
/*
* This loop attempts to set the access (reading/writing) flags
* in the grant table entry. It tries a cmpxchg on the field
* up to five times, and then fails under the assumption that
* the guest is misbehaving.
*/
for ( ; ; )
{
/* If not already pinned, check the grant domid and type. */
if ( !act->pin &&
(((scombo.shorts.flags & mask) !=
GTF_permit_access) ||
(scombo.shorts.domid != domid)) )
PIN_FAIL(done, GNTST_general_error,
"Bad flags (%x) or dom (%d); expected d%d\n",
scombo.shorts.flags, scombo.shorts.domid,
domid);
new_scombo = scombo;
new_scombo.shorts.flags |= GTF_reading;
if ( !readonly )
{
new_scombo.shorts.flags |= GTF_writing;
if ( unlikely(scombo.shorts.flags & GTF_readonly) )
PIN_FAIL(done, GNTST_general_error,
"Attempt to write-pin a r/o grant entry\n");
}
prev_scombo.word = cmpxchg((u32 *)shah,
scombo.word, new_scombo.word);
if ( likely(prev_scombo.word == scombo.word) )
break;
if ( retries++ == 4 )
PIN_FAIL(done, GNTST_general_error,
"Shared grant entry is unstable\n");
scombo = prev_scombo;
}
done:
return rc;
}
static int _set_status_v2(domid_t domid,
int readonly,
int mapflag,
grant_entry_header_t *shah,
struct active_grant_entry *act,
grant_status_t *status)
{
int rc = GNTST_okay;
union grant_combo scombo;
uint16_t flags = shah->flags;
domid_t id = shah->domid;
uint16_t mask = GTF_type_mask;
/* we read flags and domid in a single memory access.
this avoids the need for another memory barrier to
ensure access to these fields are not reordered */
scombo.word = *(u32 *)shah;
barrier(); /* but we still need to stop the compiler from turning
it back into two reads */
flags = scombo.shorts.flags;
id = scombo.shorts.domid;
/* if this is a grant mapping operation we should ensure GTF_sub_page
is not set */
if ( mapflag )
mask |= GTF_sub_page;
/* If not already pinned, check the grant domid and type. */
if ( !act->pin &&
( (((flags & mask) != GTF_permit_access) &&
((flags & mask) != GTF_transitive)) ||
(id != domid)) )
PIN_FAIL(done, GNTST_general_error,
"Bad flags (%x) or dom (%d); expected d%d, flags %x\n",
flags, id, domid, mask);
if ( readonly )
{
*status |= GTF_reading;
}
else
{
if ( unlikely(flags & GTF_readonly) )
PIN_FAIL(done, GNTST_general_error,
"Attempt to write-pin a r/o grant entry\n");
*status |= GTF_reading | GTF_writing;
}
/* Make sure guest sees status update before checking if flags are
still valid */
smp_mb();
scombo.word = *(u32 *)shah;
barrier();
flags = scombo.shorts.flags;
id = scombo.shorts.domid;
if ( !act->pin )
{
if ( (((flags & mask) != GTF_permit_access) &&
((flags & mask) != GTF_transitive)) ||
(id != domid) ||
(!readonly && (flags & GTF_readonly)) )
{
gnttab_clear_flag(_GTF_writing, status);
gnttab_clear_flag(_GTF_reading, status);
PIN_FAIL(done, GNTST_general_error,
"Unstable flags (%x) or dom (%d); expected d%d (r/w: %d)\n",
flags, id, domid, !readonly);
}
}
else
{
if ( unlikely(flags & GTF_readonly) )
{
gnttab_clear_flag(_GTF_writing, status);
PIN_FAIL(done, GNTST_general_error,
"Unstable grant readonly flag\n");
}
}
done:
return rc;
}
static int _set_status(unsigned gt_version,
domid_t domid,
int readonly,
int mapflag,
grant_entry_header_t *shah,
struct active_grant_entry *act,
grant_status_t *status)
{
if ( gt_version == 1 )
return _set_status_v1(domid, readonly, mapflag, shah, act);
else
return _set_status_v2(domid, readonly, mapflag, shah, act, status);
}
static struct active_grant_entry *grant_map_exists(const struct domain *ld,
struct grant_table *rgt,
unsigned long mfn,
grant_ref_t *cur_ref)
{
grant_ref_t ref, max_iter;
/*
* The remote grant table should be locked but the percpu rwlock
* cannot be checked for read lock without race conditions or high
* overhead so we cannot use an ASSERT
*
* ASSERT(rw_is_locked(&rgt->lock));
*/
max_iter = min(*cur_ref + (1 << GNTTABOP_CONTINUATION_ARG_SHIFT),
nr_grant_entries(rgt));
for ( ref = *cur_ref; ref < max_iter; ref++ )
{
struct active_grant_entry *act = active_entry_acquire(rgt, ref);
if ( act->pin && act->domid == ld->domain_id && act->frame == mfn )
return act;
active_entry_release(act);
}
if ( ref < nr_grant_entries(rgt) )
{
*cur_ref = ref;
return NULL;
}
return ERR_PTR(-EINVAL);
}
#define MAPKIND_READ 1
#define MAPKIND_WRITE 2
static unsigned int mapkind(
struct grant_table *lgt, const struct domain *rd, unsigned long mfn)
{
struct grant_mapping *map;
grant_handle_t handle, limit = lgt->maptrack_limit;
unsigned int kind = 0;
/*
* Must have the local domain's grant table write lock when
* iterating over its maptrack entries.
*/
ASSERT(percpu_rw_is_write_locked(&lgt->lock));
/*
* Must have the remote domain's grant table write lock while
* counting its active entries.
*/
ASSERT(percpu_rw_is_write_locked(&rd->grant_table->lock));
smp_rmb();
for ( handle = 0; !(kind & MAPKIND_WRITE) && handle < limit; handle++ )
{
map = &maptrack_entry(lgt, handle);
if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) ||
map->domid != rd->domain_id )
continue;
if ( _active_entry(rd->grant_table, map->ref).frame == mfn )
kind |= map->flags & GNTMAP_readonly ?
MAPKIND_READ : MAPKIND_WRITE;
}
return kind;
}
/*
* Returns 0 if TLB flush / invalidate required by caller.
* va will indicate the address to be invalidated.
*
* addr is _either_ a host virtual address, or the address of the pte to
* update, as indicated by the GNTMAP_contains_pte flag.
*/
static void
map_grant_ref(
struct gnttab_map_grant_ref *op)
{
struct domain *ld, *rd, *owner = NULL;
struct grant_table *lgt, *rgt;
struct vcpu *led;
grant_handle_t handle;
unsigned long frame = 0;
struct page_info *pg = NULL;
int rc = GNTST_okay;
u32 old_pin;
u32 act_pin;
unsigned int cache_flags, refcnt = 0, typecnt = 0;
bool host_map_created = false;
struct active_grant_entry *act = NULL;
struct grant_mapping *mt;
grant_entry_header_t *shah;
uint16_t *status;
bool_t need_iommu;
led = current;
ld = led->domain;
if ( unlikely((op->flags & (GNTMAP_device_map|GNTMAP_host_map)) == 0) )
{
gdprintk(XENLOG_INFO, "Bad flags in grant map op: %x\n", op->flags);
op->status = GNTST_bad_gntref;
return;
}
if ( unlikely(paging_mode_external(ld) &&
(op->flags & (GNTMAP_device_map|GNTMAP_application_map|
GNTMAP_contains_pte))) )
{
gdprintk(XENLOG_INFO, "No device mapping in HVM domain\n");
op->status = GNTST_general_error;
return;
}
if ( unlikely((rd = rcu_lock_domain_by_id(op->dom)) == NULL) )
{
gdprintk(XENLOG_INFO, "Could not find domain %d\n", op->dom);
op->status = GNTST_bad_domain;
return;
}
rc = xsm_grant_mapref(XSM_HOOK, ld, rd, op->flags);
if ( rc )
{
rcu_unlock_domain(rd);
op->status = GNTST_permission_denied;
return;
}
lgt = ld->grant_table;
handle = get_maptrack_handle(lgt);
if ( unlikely(handle == INVALID_MAPTRACK_HANDLE) )
{
rcu_unlock_domain(rd);
gdprintk(XENLOG_INFO, "Failed to obtain maptrack handle\n");
op->status = GNTST_no_device_space;
return;
}
rgt = rd->grant_table;
grant_read_lock(rgt);
/* Bounds check on the grant ref */
if ( unlikely(op->ref >= nr_grant_entries(rgt)))
PIN_FAIL(unlock_out, GNTST_bad_gntref, "Bad ref %#x for d%d\n",
op->ref, rgt->domain->domain_id);
act = active_entry_acquire(rgt, op->ref);
shah = shared_entry_header(rgt, op->ref);
status = rgt->gt_version == 1 ? &shah->flags : &status_entry(rgt, op->ref);
/* If already pinned, check the active domid and avoid refcnt overflow. */
if ( act->pin &&
((act->domid != ld->domain_id) ||
(act->pin & 0x80808080U) != 0 ||
(act->is_sub_page)) )
PIN_FAIL(act_release_out, GNTST_general_error,
"Bad domain (%d != %d), or risk of counter overflow %08x, or subpage %d\n",
act->domid, ld->domain_id, act->pin, act->is_sub_page);
if ( !act->pin ||
(!(op->flags & GNTMAP_readonly) &&
!(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask))) )
{
if ( (rc = _set_status(rgt->gt_version, ld->domain_id,
op->flags & GNTMAP_readonly,
1, shah, act, status) ) != GNTST_okay )
goto act_release_out;
if ( !act->pin )
{
unsigned long gfn = rgt->gt_version == 1 ?
shared_entry_v1(rgt, op->ref).frame :
shared_entry_v2(rgt, op->ref).full_page.frame;
rc = get_paged_frame(gfn, &frame, &pg,
op->flags & GNTMAP_readonly, rd);
if ( rc != GNTST_okay )
goto unlock_out_clear;
act_set_gfn(act, _gfn(gfn));
act->domid = ld->domain_id;
act->frame = frame;
act->start = 0;
act->length = PAGE_SIZE;
act->is_sub_page = false;
act->trans_domain = rd;
act->trans_gref = op->ref;
}
}
old_pin = act->pin;
if ( op->flags & GNTMAP_device_map )
act->pin += (op->flags & GNTMAP_readonly) ?
GNTPIN_devr_inc : GNTPIN_devw_inc;
if ( op->flags & GNTMAP_host_map )
act->pin += (op->flags & GNTMAP_readonly) ?
GNTPIN_hstr_inc : GNTPIN_hstw_inc;
frame = act->frame;
act_pin = act->pin;
cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
active_entry_release(act);
grant_read_unlock(rgt);
/* pg may be set, with a refcount included, from get_paged_frame(). */
if ( !pg )
{
pg = mfn_valid(_mfn(frame)) ? mfn_to_page(frame) : NULL;
if ( pg )
owner = page_get_owner_and_reference(pg);
}
else
owner = page_get_owner(pg);
if ( owner )
refcnt++;
if ( !pg || (owner == dom_io) )
{
/* Only needed the reference to confirm dom_io ownership. */
if ( pg )
{
put_page(pg);
refcnt--;
}
if ( paging_mode_external(ld) )
{
gdprintk(XENLOG_WARNING, "HVM guests can't grant map iomem\n");
rc = GNTST_general_error;
goto undo_out;
}
if ( !iomem_access_permitted(rd, frame, frame) )
{
gdprintk(XENLOG_WARNING,
"Iomem mapping not permitted %lx (domain %d)\n",
frame, rd->domain_id);
rc = GNTST_general_error;
goto undo_out;
}
if ( op->flags & GNTMAP_host_map )
{
rc = create_grant_host_mapping(op->host_addr, frame, op->flags,
cache_flags);
if ( rc != GNTST_okay )
goto undo_out;
host_map_created = true;
}
}
else if ( owner == rd || owner == dom_cow )
{
if ( (op->flags & GNTMAP_device_map) && !(op->flags & GNTMAP_readonly) )
{
if ( (owner == dom_cow) ||
!get_page_type(pg, PGT_writable_page) )
goto could_not_pin;
typecnt++;
}
if ( op->flags & GNTMAP_host_map )
{
/*
* Only need to grab another reference if device_map claimed
* the other one.
*/
if ( op->flags & GNTMAP_device_map )
{
if ( !get_page(pg, rd) )
goto could_not_pin;
refcnt++;
}
if ( gnttab_host_mapping_get_page_type(op->flags & GNTMAP_readonly,
ld, rd) )
{
if ( (owner == dom_cow) ||
!get_page_type(pg, PGT_writable_page) )
goto could_not_pin;
typecnt++;
}
rc = create_grant_host_mapping(op->host_addr, frame, op->flags, 0);
if ( rc != GNTST_okay )
goto undo_out;
host_map_created = true;
}
}
else
{
could_not_pin:
if ( !rd->is_dying )
gdprintk(XENLOG_WARNING, "Could not pin grant frame %lx\n",
frame);
rc = GNTST_general_error;
goto undo_out;
}
need_iommu = gnttab_need_iommu_mapping(ld);
if ( need_iommu )
{
unsigned int kind;
int err = 0;
double_gt_lock(lgt, rgt);
/* We're not translated, so we know that gmfns and mfns are
the same things, so the IOMMU entry is always 1-to-1. */
kind = mapkind(lgt, rd, frame);
if ( (act_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) &&
!(old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
{
if ( !(kind & MAPKIND_WRITE) )
err = iommu_map_page(ld, frame, frame,
IOMMUF_readable|IOMMUF_writable);
}
else if ( act_pin && !old_pin )
{
if ( !kind )
err = iommu_map_page(ld, frame, frame, IOMMUF_readable);
}
if ( err )
{
double_gt_unlock(lgt, rgt);
rc = GNTST_general_error;
goto undo_out;
}
}
TRACE_1D(TRC_MEM_PAGE_GRANT_MAP, op->dom);
/*
* All maptrack entry users check mt->flags first before using the
* other fields so just ensure the flags field is stored last.
*
* However, if gnttab_need_iommu_mapping() then this would race
* with a concurrent mapcount() call (on an unmap, for example)
* and a lock is required.
*/
mt = &maptrack_entry(lgt, handle);
mt->domid = op->dom;
mt->ref = op->ref;
smp_wmb();
write_atomic(&mt->flags, op->flags);
if ( need_iommu )
double_gt_unlock(lgt, rgt);
op->dev_bus_addr = (u64)frame << PAGE_SHIFT;
op->handle = handle;
op->status = GNTST_okay;
rcu_unlock_domain(rd);
return;
undo_out:
if ( host_map_created )
{
replace_grant_host_mapping(op->host_addr, frame, 0, op->flags);
gnttab_flush_tlb(ld);
}
while ( typecnt-- )
put_page_type(pg);
while ( refcnt-- )
put_page(pg);
grant_read_lock(rgt);
act = active_entry_acquire(rgt, op->ref);
if ( op->flags & GNTMAP_device_map )
act->pin -= (op->flags & GNTMAP_readonly) ?
GNTPIN_devr_inc : GNTPIN_devw_inc;
if ( op->flags & GNTMAP_host_map )
act->pin -= (op->flags & GNTMAP_readonly) ?
GNTPIN_hstr_inc : GNTPIN_hstw_inc;
unlock_out_clear:
if ( !(op->flags & GNTMAP_readonly) &&
!(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
gnttab_clear_flag(_GTF_writing, status);
if ( !act->pin )
gnttab_clear_flag(_GTF_reading, status);
act_release_out:
active_entry_release(act);
unlock_out:
grant_read_unlock(rgt);
op->status = rc;
put_maptrack_handle(lgt, handle);
rcu_unlock_domain(rd);
}
static long
gnttab_map_grant_ref(
XEN_GUEST_HANDLE_PARAM(gnttab_map_grant_ref_t) uop, unsigned int count)
{
int i;
struct gnttab_map_grant_ref op;
for ( i = 0; i < count; i++ )
{
if ( i && hypercall_preempt_check() )
return i;
if ( unlikely(__copy_from_guest_offset(&op, uop, i, 1)) )
return -EFAULT;
map_grant_ref(&op);
if ( unlikely(__copy_to_guest_offset(uop, i, &op, 1)) )
return -EFAULT;
}
return 0;
}
static void
unmap_common(
struct gnttab_unmap_common *op)
{
domid_t dom;
struct domain *ld, *rd;
struct grant_table *lgt, *rgt;
struct active_grant_entry *act;
s16 rc = 0;
struct grant_mapping *map;
unsigned int flags;
bool put_handle = false;
ld = current->domain;
lgt = ld->grant_table;
if ( unlikely(op->handle >= lgt->maptrack_limit) )
{
gdprintk(XENLOG_INFO, "Bad d%d handle %#x\n",
lgt->domain->domain_id, op->handle);
op->status = GNTST_bad_handle;
return;
}
smp_rmb();
map = &maptrack_entry(lgt, op->handle);
if ( unlikely(!read_atomic(&map->flags)) )
{
gdprintk(XENLOG_INFO, "Zero flags for d%d handle %#x\n",
lgt->domain->domain_id, op->handle);
op->status = GNTST_bad_handle;
return;
}
dom = map->domid;
if ( unlikely((rd = rcu_lock_domain_by_id(dom)) == NULL) )
{
/* This can happen when a grant is implicitly unmapped. */
gdprintk(XENLOG_INFO, "Could not find domain %d\n", dom);
domain_crash(ld); /* naughty... */
return;
}
rc = xsm_grant_unmapref(XSM_HOOK, ld, rd);
if ( rc )
{
rcu_unlock_domain(rd);
op->status = GNTST_permission_denied;
return;
}
TRACE_1D(TRC_MEM_PAGE_GRANT_UNMAP, dom);
rgt = rd->grant_table;
grant_read_lock(rgt);
if ( rgt->gt_version == 0 )
{
/*
* This ought to be impossible, as such a mapping should not have
* been established (see the nr_grant_entries(rgt) bounds check in
* gnttab_map_grant_ref()). Doing this check only in
* gnttab_unmap_common_complete() - as it used to be done - would,
* however, be too late.
*/
rc = GNTST_bad_gntref;
flags = 0;
goto unlock_out;
}
op->rd = rd;
op->ref = map->ref;
/*
* We can't assume there was no racing unmap for this maptrack entry,
* and hence we can't assume map->ref is valid for rd. While the checks
* below (with the active entry lock held) will reject any such racing
* requests, we still need to make sure we don't attempt to acquire an
* invalid lock.
*/
smp_rmb();
if ( unlikely(op->ref >= nr_grant_entries(rgt)) )
{
gdprintk(XENLOG_WARNING, "Unstable d%d handle %#x\n",
rgt->domain->domain_id, op->handle);
rc = GNTST_bad_handle;
flags = 0;
goto unlock_out;
}
act = active_entry_acquire(rgt, op->ref);
/*
* Note that we (ab)use the active entry lock here to protect against
* multiple unmaps of the same mapping here. We don't want to hold lgt's
* lock, and we only hold rgt's lock for reading (but the latter wouldn't
* be the right one anyway). Hence the easiest is to rely on a lock we
* hold anyway; see docs/misc/grant-tables.txt's "Locking" section.
*/
flags = read_atomic(&map->flags);
smp_rmb();
if ( unlikely(!flags) || unlikely(map->domid != dom) ||
unlikely(map->ref != op->ref) )
{
gdprintk(XENLOG_WARNING, "Unstable handle %#x\n", op->handle);
rc = GNTST_bad_handle;
goto act_release_out;
}
op->frame = act->frame;
if ( op->dev_bus_addr &&
unlikely(op->dev_bus_addr != pfn_to_paddr(act->frame)) )
PIN_FAIL(act_release_out, GNTST_general_error,
"Bus address doesn't match gntref (%"PRIx64" != %"PRIpaddr")\n",
op->dev_bus_addr, pfn_to_paddr(act->frame));
if ( op->host_addr && (flags & GNTMAP_host_map) )
{
if ( (rc = replace_grant_host_mapping(op->host_addr,
op->frame, op->new_addr,
flags)) < 0 )
goto act_release_out;
map->flags &= ~GNTMAP_host_map;
op->done |= GNTMAP_host_map | (flags & GNTMAP_readonly);
}
if ( op->dev_bus_addr && (flags & GNTMAP_device_map) )
{
map->flags &= ~GNTMAP_device_map;
op->done |= GNTMAP_device_map | (flags & GNTMAP_readonly);
}
if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) )
{
map->flags = 0;
put_handle = true;
}
act_release_out:
active_entry_release(act);
unlock_out:
grant_read_unlock(rgt);
if ( put_handle )
put_maptrack_handle(lgt, op->handle);
if ( rc == GNTST_okay && gnttab_need_iommu_mapping(ld) )
{
unsigned int kind;
int err = 0;
double_gt_lock(lgt, rgt);
kind = mapkind(lgt, rd, op->frame);
if ( !kind )
err = iommu_unmap_page(ld, op->frame);
else if ( !(kind & MAPKIND_WRITE) )
err = iommu_map_page(ld, op->frame, op->frame, IOMMUF_readable);
double_gt_unlock(lgt, rgt);
if ( err )
rc = GNTST_general_error;
}
/* If just unmapped a writable mapping, mark as dirtied */
if ( rc == GNTST_okay && !(flags & GNTMAP_readonly) )
gnttab_mark_dirty(rd, op->frame);
op->status = rc;
rcu_unlock_domain(rd);
}
static void
unmap_common_complete(struct gnttab_unmap_common *op)
{
struct domain *ld, *rd = op->rd;
struct grant_table *rgt;
struct active_grant_entry *act;
grant_entry_header_t *sha;
struct page_info *pg;
uint16_t *status;
if ( !op->done )
{
/* unmap_common() didn't do anything - nothing to complete. */
return;
}
ld = current->domain;
rcu_lock_domain(rd);
rgt = rd->grant_table;
grant_read_lock(rgt);
act = active_entry_acquire(rgt, op->ref);
sha = shared_entry_header(rgt, op->ref);
if ( rgt->gt_version == 1 )
status = &sha->flags;
else
status = &status_entry(rgt, op->ref);
pg = mfn_to_page(op->frame);
if ( op->done & GNTMAP_device_map )
{
if ( !is_iomem_page(_mfn(act->frame)) )
{
if ( op->done & GNTMAP_readonly )
put_page(pg);
else
put_page_and_type(pg);
}
ASSERT(act->pin & (GNTPIN_devw_mask | GNTPIN_devr_mask));
if ( op->done & GNTMAP_readonly )
act->pin -= GNTPIN_devr_inc;
else
act->pin -= GNTPIN_devw_inc;
}
if ( op->done & GNTMAP_host_map )
{
if ( !is_iomem_page(_mfn(op->frame)) )
{
if ( gnttab_host_mapping_get_page_type(op->done & GNTMAP_readonly,
ld, rd) )
put_page_type(pg);
put_page(pg);
}
ASSERT(act->pin & (GNTPIN_hstw_mask | GNTPIN_hstr_mask));
if ( op->done & GNTMAP_readonly )
act->pin -= GNTPIN_hstr_inc;
else
act->pin -= GNTPIN_hstw_inc;
}
if ( ((act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) == 0) &&
!(op->done & GNTMAP_readonly) )
gnttab_clear_flag(_GTF_writing, status);
if ( act->pin == 0 )
gnttab_clear_flag(_GTF_reading, status);
active_entry_release(act);
grant_read_unlock(rgt);
rcu_unlock_domain(rd);
}
static void
unmap_grant_ref(
struct gnttab_unmap_grant_ref *op,
struct gnttab_unmap_common *common)
{
common->host_addr = op->host_addr;
common->dev_bus_addr = op->dev_bus_addr;
common->handle = op->handle;
/* Intialise these in case common contains old state */
common->done = 0;
common->new_addr = 0;
common->rd = NULL;
common->frame = 0;
unmap_common(common);
op->status = common->status;
}
static long
gnttab_unmap_grant_ref(
XEN_GUEST_HANDLE_PARAM(gnttab_unmap_grant_ref_t) uop, unsigned int count)
{
int i, c, partial_done, done = 0;
struct gnttab_unmap_grant_ref op;
struct gnttab_unmap_common common[GNTTAB_UNMAP_BATCH_SIZE];
while ( count != 0 )
{
c = min(count, (unsigned int)GNTTAB_UNMAP_BATCH_SIZE);
partial_done = 0;
for ( i = 0; i < c; i++ )
{
if ( unlikely(__copy_from_guest(&op, uop, 1)) )
goto fault;
unmap_grant_ref(&op, &common[i]);
++partial_done;
if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
goto fault;
guest_handle_add_offset(uop, 1);
}
gnttab_flush_tlb(current->domain);
for ( i = 0; i < partial_done; i++ )
unmap_common_complete(&common[i]);
count -= c;
done += c;
if ( count && hypercall_preempt_check() )
return done;
}
return 0;
fault:
gnttab_flush_tlb(current->domain);
for ( i = 0; i < partial_done; i++ )
unmap_common_complete(&common[i]);
return -EFAULT;
}
static void
unmap_and_replace(
struct gnttab_unmap_and_replace *op,
struct gnttab_unmap_common *common)
{
common->host_addr = op->host_addr;
common->new_addr = op->new_addr;
common->handle = op->handle;
/* Intialise these in case common contains old state */
common->done = 0;
common->dev_bus_addr = 0;
common->rd = NULL;
common->frame = 0;
unmap_common(common);
op->status = common->status;
}
static long
gnttab_unmap_and_replace(
XEN_GUEST_HANDLE_PARAM(gnttab_unmap_and_replace_t) uop, unsigned int count)
{
int i, c, partial_done, done = 0;
struct gnttab_unmap_and_replace op;
struct gnttab_unmap_common common[GNTTAB_UNMAP_BATCH_SIZE];
while ( count != 0 )
{
c = min(count, (unsigned int)GNTTAB_UNMAP_BATCH_SIZE);
partial_done = 0;
for ( i = 0; i < c; i++ )
{
if ( unlikely(__copy_from_guest(&op, uop, 1)) )
goto fault;
unmap_and_replace(&op, &common[i]);
++partial_done;
if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
goto fault;
guest_handle_add_offset(uop, 1);
}
gnttab_flush_tlb(current->domain);
for ( i = 0; i < partial_done; i++ )
unmap_common_complete(&common[i]);
count -= c;
done += c;
if ( count && hypercall_preempt_check() )
return done;
}
return 0;
fault:
gnttab_flush_tlb(current->domain);
for ( i = 0; i < partial_done; i++ )
unmap_common_complete(&common[i]);
return -EFAULT;
}
static int
gnttab_populate_status_frames(struct domain *d, struct grant_table *gt,
unsigned int req_nr_frames)
{
unsigned i;
unsigned req_status_frames;
req_status_frames = grant_to_status_frames(req_nr_frames);
for ( i = nr_status_frames(gt); i < req_status_frames; i++ )
{
if ( (gt->status[i] = alloc_xenheap_page()) == NULL )
goto status_alloc_failed;
clear_page(gt->status[i]);
}
/* Share the new status frames with the recipient domain */
for ( i = nr_status_frames(gt); i < req_status_frames; i++ )
gnttab_create_status_page(d, gt, i);
gt->nr_status_frames = req_status_frames;
return 0;
status_alloc_failed:
for ( i = nr_status_frames(gt); i < req_status_frames; i++ )
{
free_xenheap_page(gt->status[i]);
gt->status[i] = NULL;
}
return -ENOMEM;
}
static void
gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt)
{
int i;
for ( i = 0; i < nr_status_frames(gt); i++ )
{
struct page_info *pg = virt_to_page(gt->status[i]);
BUG_ON(page_get_owner(pg) != d);
if ( test_and_clear_bit(_PGC_allocated, &pg->count_info) )
put_page(pg);
BUG_ON(pg->count_info & ~PGC_xen_heap);
free_xenheap_page(gt->status[i]);
gt->status[i] = NULL;
}
gt->nr_status_frames = 0;
}
/*
* Grow the grant table. The caller must hold the grant table's
* write lock before calling this function.
*/
static int
gnttab_grow_table(struct domain *d, unsigned int req_nr_frames)
{
struct grant_table *gt = d->grant_table;
unsigned int i, j;
if ( unlikely(!gt->active) )
{
gprintk(XENLOG_WARNING, "grant_table_set_limits() call missing\n");
return -ENODEV;
}
if ( req_nr_frames < INITIAL_NR_GRANT_FRAMES )
req_nr_frames = INITIAL_NR_GRANT_FRAMES;
ASSERT(req_nr_frames <= gt->max_grant_frames);
gdprintk(XENLOG_INFO,
"Expanding d%d grant table from %u to %u frames\n",
d->domain_id, nr_grant_frames(gt), req_nr_frames);
/* Active */
for ( i = nr_active_grant_frames(gt);
i < num_act_frames_from_sha_frames(req_nr_frames); i++ )
{
if ( (gt->active[i] = alloc_xenheap_page()) == NULL )
goto active_alloc_failed;
clear_page(gt->active[i]);
for ( j = 0; j < ACGNT_PER_PAGE; j++ )
spin_lock_init(>->active[i][j].lock);
}
/* Shared */
for ( i = nr_grant_frames(gt); i < req_nr_frames; i++ )
{
if ( (gt->shared_raw[i] = alloc_xenheap_page()) == NULL )
goto shared_alloc_failed;
clear_page(gt->shared_raw[i]);
}
/* Status pages - version 2 */
if ( gt->gt_version > 1 )
{
if ( gnttab_populate_status_frames(d, gt, req_nr_frames) )
goto shared_alloc_failed;
}
/* Share the new shared frames with the recipient domain */
for ( i = nr_grant_frames(gt); i < req_nr_frames; i++ )
gnttab_create_shared_page(d, gt, i);
gt->nr_grant_frames = req_nr_frames;
return 0;
shared_alloc_failed:
for ( i = nr_grant_frames(gt); i < req_nr_frames; i++ )
{
free_xenheap_page(gt->shared_raw[i]);
gt->shared_raw[i] = NULL;
}
active_alloc_failed:
for ( i = nr_active_grant_frames(gt);
i < num_act_frames_from_sha_frames(req_nr_frames); i++ )
{
free_xenheap_page(gt->active[i]);
gt->active[i] = NULL;
}
gdprintk(XENLOG_INFO, "Allocation failure when expanding d%d grant table\n",
d->domain_id);
return -ENOMEM;
}
static int
grant_table_init(struct domain *d, struct grant_table *gt,
unsigned int grant_frames, unsigned int maptrack_frames)
{
int ret = -ENOMEM;
grant_write_lock(gt);
if ( gt->active )
{
ret = -EBUSY;
goto out_no_cleanup;
}
gt->max_grant_frames = grant_frames;
gt->max_maptrack_frames = maptrack_frames;
/* Active grant table. */
gt->active = xzalloc_array(struct active_grant_entry *,
max_nr_active_grant_frames(gt));
if ( gt->active == NULL )
goto out;
/* Tracking of mapped foreign frames table */
if ( gt->max_maptrack_frames )
{
gt->maptrack = vzalloc(gt->max_maptrack_frames * sizeof(*gt->maptrack));
if ( gt->maptrack == NULL )
goto out;
}
/* Shared grant table. */
gt->shared_raw = xzalloc_array(void *, gt->max_grant_frames);
if ( gt->shared_raw == NULL )
goto out;
/* Status pages for grant table - for version 2 */
gt->status = xzalloc_array(grant_status_t *,
grant_to_status_frames(gt->max_grant_frames));
if ( gt->status == NULL )
goto out;
ret = gnttab_init_arch(gt);
if ( ret )
goto out;
/* gnttab_grow_table() allocates a min number of frames, so 0 is okay. */
ret = gnttab_grow_table(d, 0);
out:
if ( ret )
{
gnttab_destroy_arch(gt);
xfree(gt->status);
gt->status = NULL;
xfree(gt->shared_raw);
gt->shared_raw = NULL;
vfree(gt->maptrack);
gt->maptrack = NULL;
xfree(gt->active);
gt->active = NULL;
}
out_no_cleanup:
grant_write_unlock(gt);
return ret;
}
static long
gnttab_setup_table(
XEN_GUEST_HANDLE_PARAM(gnttab_setup_table_t) uop, unsigned int count,
unsigned int limit_max)
{
struct vcpu *curr = current;
struct gnttab_setup_table op;
struct domain *d = NULL;
struct grant_table *gt;
unsigned int i;
if ( count != 1 )
return -EINVAL;
if ( unlikely(copy_from_guest(&op, uop, 1)) )
return -EFAULT;
if ( !guest_handle_okay(op.frame_list, op.nr_frames) )
return -EFAULT;
d = rcu_lock_domain_by_any_id(op.dom);
if ( d == NULL )
{
op.status = GNTST_bad_domain;
goto out;
}
if ( xsm_grant_setup(XSM_TARGET, curr->domain, d) )
{
op.status = GNTST_permission_denied;
goto out;
}
gt = d->grant_table;
grant_write_lock(gt);
if ( unlikely(op.nr_frames > gt->max_grant_frames) )
{
gdprintk(XENLOG_INFO, "d%d is limited to %u grant-table frames\n",
d->domain_id, gt->max_grant_frames);
op.status = GNTST_general_error;
goto unlock;
}
if ( unlikely(limit_max < op.nr_frames) )
{
gdprintk(XENLOG_WARNING, "nr_frames for d%d is too large (%u,%u)\n",
d->domain_id, op.nr_frames, limit_max);
op.status = GNTST_general_error;
goto unlock;
}
if ( gt->gt_version == 0 )
gt->gt_version = 1;
if ( (op.nr_frames > nr_grant_frames(gt) ||
((gt->gt_version > 1) &&
(grant_to_status_frames(op.nr_frames) > nr_status_frames(gt)))) &&
gnttab_grow_table(d, op.nr_frames) )
{
gdprintk(XENLOG_INFO,
"Expand grant table of d%d to %u failed. Current: %u Max: %u\n",
d->domain_id, op.nr_frames, nr_grant_frames(gt),
gt->max_grant_frames);
op.status = GNTST_general_error;
goto unlock;
}
op.status = GNTST_okay;
for ( i = 0; i < op.nr_frames; i++ )
{
xen_pfn_t gmfn = gnttab_shared_gmfn(d, gt, i);
/* Grant tables cannot be shared */
BUG_ON(SHARED_M2P(gmfn));
if ( __copy_to_guest_offset(op.frame_list, i, &gmfn, 1) )
op.status = GNTST_bad_virt_addr;
}
unlock:
grant_write_unlock(gt);
out:
if ( d )
rcu_unlock_domain(d);
if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
return -EFAULT;
return 0;
}
static long
gnttab_query_size(
XEN_GUEST_HANDLE_PARAM(gnttab_query_size_t) uop, unsigned int count)
{
struct gnttab_query_size op;
struct domain *d;
struct grant_table *gt;
if ( count != 1 )
return -EINVAL;
if ( unlikely(copy_from_guest(&op, uop, 1)) )
return -EFAULT;
d = rcu_lock_domain_by_any_id(op.dom);
if ( d == NULL )
{
op.status = GNTST_bad_domain;
goto out;
}
if ( xsm_grant_query_size(XSM_TARGET, current->domain, d) )
{
op.status = GNTST_permission_denied;
goto out;
}
gt = d->grant_table;
grant_read_lock(gt);
op.nr_frames = nr_grant_frames(gt);
op.max_nr_frames = gt->max_grant_frames;
op.status = GNTST_okay;
grant_read_unlock(gt);
out:
if ( d )
rcu_unlock_domain(d);
if ( unlikely(__copy_to_guest(uop, &op, 1)) )
return -EFAULT;
return 0;
}
/*
* Check that the given grant reference (rd,ref) allows 'ld' to transfer
* ownership of a page frame. If so, lock down the grant entry.
*/
static int
gnttab_prepare_for_transfer(
struct domain *rd, struct domain *ld, grant_ref_t ref)
{
struct grant_table *rgt = rd->grant_table;
grant_entry_header_t *sha;
union grant_combo scombo, prev_scombo, new_scombo;
int retries = 0;
grant_read_lock(rgt);
if ( unlikely(ref >= nr_grant_entries(rgt)) )
{
gdprintk(XENLOG_INFO,
"Bad grant reference %#x for transfer to d%d\n",
ref, rd->domain_id);
goto fail;
}
sha = shared_entry_header(rgt, ref);
scombo.word = *(u32 *)&sha->flags;
for ( ; ; )
{
if ( unlikely(scombo.shorts.flags != GTF_accept_transfer) ||
unlikely(scombo.shorts.domid != ld->domain_id) )
{
gdprintk(XENLOG_INFO,
"Bad flags (%x) or dom (%d); expected d%d\n",
scombo.shorts.flags, scombo.shorts.domid,
ld->domain_id);
goto fail;
}
new_scombo = scombo;
new_scombo.shorts.flags |= GTF_transfer_committed;
prev_scombo.word = cmpxchg((u32 *)&sha->flags,
scombo.word, new_scombo.word);
if ( likely(prev_scombo.word == scombo.word) )
break;
if ( retries++ == 4 )
{
gdprintk(XENLOG_WARNING, "Shared grant entry is unstable\n");
goto fail;
}
scombo = prev_scombo;
}
grant_read_unlock(rgt);
return 1;
fail:
grant_read_unlock(rgt);
return 0;
}
static long
gnttab_transfer(
XEN_GUEST_HANDLE_PARAM(gnttab_transfer_t) uop, unsigned int count)
{
struct domain *d = current->domain;
struct domain *e;
struct page_info *page;
int i;
struct gnttab_transfer gop;
unsigned long mfn;
unsigned int max_bitsize;
struct active_grant_entry *act;
for ( i = 0; i < count; i++ )
{
bool_t okay;
int rc;
if ( i && hypercall_preempt_check() )
return i;
/* Read from caller address space. */
if ( unlikely(__copy_from_guest(&gop, uop, 1)) )
{
gdprintk(XENLOG_INFO, "error reading req %d/%u\n",
i, count);
return -EFAULT;
}
#ifdef CONFIG_X86
{
p2m_type_t p2mt;
mfn = mfn_x(get_gfn_unshare(d, gop.mfn, &p2mt));
if ( p2m_is_shared(p2mt) || !p2m_is_valid(p2mt) )
mfn = mfn_x(INVALID_MFN);
}
#else
mfn = mfn_x(gfn_to_mfn(d, _gfn(gop.mfn)));
#endif
/* Check the passed page frame for basic validity. */
if ( unlikely(!mfn_valid(_mfn(mfn))) )
{
put_gfn(d, gop.mfn);
gdprintk(XENLOG_INFO, "out-of-range %lx\n", (unsigned long)gop.mfn);
gop.status = GNTST_bad_page;
goto copyback;
}
page = mfn_to_page(mfn);
if ( (rc = steal_page(d, page, 0)) < 0 )
{
put_gfn(d, gop.mfn);
gop.status = rc == -EINVAL ? GNTST_bad_page : GNTST_general_error;
goto copyback;
}
rc = guest_physmap_remove_page(d, _gfn(gop.mfn), _mfn(mfn), 0);
gnttab_flush_tlb(d);
if ( rc )
{
gdprintk(XENLOG_INFO, "can't remove GFN %"PRI_xen_pfn" (MFN %lx)\n",
gop.mfn, mfn);
gop.status = GNTST_general_error;
goto put_gfn_and_copyback;
}
/* Find the target domain. */
if ( unlikely((e = rcu_lock_domain_by_id(gop.domid)) == NULL) )
{
gdprintk(XENLOG_INFO, "can't find d%d\n", gop.domid);
gop.status = GNTST_bad_domain;
goto put_gfn_and_copyback;
}
if ( xsm_grant_transfer(XSM_HOOK, d, e) )
{
gop.status = GNTST_permission_denied;
unlock_and_copyback:
rcu_unlock_domain(e);
put_gfn_and_copyback:
put_gfn(d, gop.mfn);
page->count_info &= ~(PGC_count_mask|PGC_allocated);
free_domheap_page(page);
goto copyback;
}
max_bitsize = domain_clamp_alloc_bitsize(
e, e->grant_table->gt_version > 1 || paging_mode_translate(e)
? BITS_PER_LONG + PAGE_SHIFT : 32 + PAGE_SHIFT);
if ( max_bitsize < BITS_PER_LONG + PAGE_SHIFT &&
(mfn >> (max_bitsize - PAGE_SHIFT)) )
{
struct page_info *new_page;
new_page = alloc_domheap_page(e, MEMF_no_owner |
MEMF_bits(max_bitsize));
if ( new_page == NULL )
{
gop.status = GNTST_address_too_big;
goto unlock_and_copyback;
}
copy_domain_page(_mfn(page_to_mfn(new_page)), _mfn(mfn));
page->count_info &= ~(PGC_count_mask|PGC_allocated);
free_domheap_page(page);
page = new_page;
}
spin_lock(&e->page_alloc_lock);
/*
* Check that 'e' will accept the page and has reservation
* headroom. Also, a domain mustn't have PGC_allocated
* pages when it is dying.
*/
if ( unlikely(e->is_dying) ||
unlikely(e->tot_pages >= e->max_pages) )
{
spin_unlock(&e->page_alloc_lock);
if ( e->is_dying )
gdprintk(XENLOG_INFO, "Transferee d%d is dying\n",
e->domain_id);
else
gdprintk(XENLOG_INFO,
"Transferee d%d has no headroom (tot %u, max %u)\n",
e->domain_id, e->tot_pages, e->max_pages);
gop.status = GNTST_general_error;
goto unlock_and_copyback;
}
/* Okay, add the page to 'e'. */
if ( unlikely(domain_adjust_tot_pages(e, 1) == 1) )
get_knownalive_domain(e);
/*
* We must drop the lock to avoid a possible deadlock in
* gnttab_prepare_for_transfer. We have reserved a page in e so can
* safely drop the lock and re-aquire it later to add page to the
* pagelist.
*/
spin_unlock(&e->page_alloc_lock);
okay = gnttab_prepare_for_transfer(e, d, gop.ref);
spin_lock(&e->page_alloc_lock);
if ( unlikely(!okay) || unlikely(e->is_dying) )
{
bool_t drop_dom_ref = !domain_adjust_tot_pages(e, -1);
spin_unlock(&e->page_alloc_lock);
if ( okay /* i.e. e->is_dying due to the surrounding if() */ )
gdprintk(XENLOG_INFO, "Transferee d%d is now dying\n",
e->domain_id);
if ( drop_dom_ref )
put_domain(e);
gop.status = GNTST_general_error;
goto unlock_and_copyback;
}
page_list_add_tail(page, &e->page_list);
page_set_owner(page, e);
spin_unlock(&e->page_alloc_lock);
put_gfn(d, gop.mfn);
TRACE_1D(TRC_MEM_PAGE_GRANT_TRANSFER, e->domain_id);
/* Tell the guest about its new page frame. */
grant_read_lock(e->grant_table);
act = active_entry_acquire(e->grant_table, gop.ref);
if ( e->grant_table->gt_version == 1 )
{
grant_entry_v1_t *sha = &shared_entry_v1(e->grant_table, gop.ref);
guest_physmap_add_page(e, _gfn(sha->frame), _mfn(mfn), 0);
if ( !paging_mode_translate(e) )
sha->frame = mfn;
}
else
{
grant_entry_v2_t *sha = &shared_entry_v2(e->grant_table, gop.ref);
guest_physmap_add_page(e, _gfn(sha->full_page.frame),
_mfn(mfn), 0);
if ( !paging_mode_translate(e) )
sha->full_page.frame = mfn;
}
smp_wmb();
shared_entry_header(e->grant_table, gop.ref)->flags |=
GTF_transfer_completed;
active_entry_release(act);
grant_read_unlock(e->grant_table);
rcu_unlock_domain(e);
gop.status = GNTST_okay;
copyback:
if ( unlikely(__copy_field_to_guest(uop, &gop, status)) )
{
gdprintk(XENLOG_INFO, "error writing resp %d/%u\n", i, count);
return -EFAULT;
}
guest_handle_add_offset(uop, 1);
}
return 0;
}
/*
* Undo acquire_grant_for_copy(). This has no effect on page type and
* reference counts.
*/
static void
release_grant_for_copy(
struct domain *rd, grant_ref_t gref, bool readonly)
{
struct grant_table *rgt = rd->grant_table;
grant_entry_header_t *sha;
struct active_grant_entry *act;
unsigned long r_frame;
uint16_t *status;
grant_ref_t trans_gref;
struct domain *td;
grant_read_lock(rgt);
act = active_entry_acquire(rgt, gref);
sha = shared_entry_header(rgt, gref);
r_frame = act->frame;
if ( rgt->gt_version == 1 )
{
status = &sha->flags;
td = rd;
trans_gref = gref;
}
else
{
status = &status_entry(rgt, gref);
td = act->trans_domain;
trans_gref = act->trans_gref;
}
if ( readonly )
{
act->pin -= GNTPIN_hstr_inc;
}
else
{
gnttab_mark_dirty(rd, r_frame);
act->pin -= GNTPIN_hstw_inc;
if ( !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) )
gnttab_clear_flag(_GTF_writing, status);
}
if ( !act->pin )
gnttab_clear_flag(_GTF_reading, status);
active_entry_release(act);
grant_read_unlock(rgt);
if ( td != rd )
{
/*
* Recursive call, but it is bounded (acquire permits only a single
* level of transitivity), so it's okay.
*/
release_grant_for_copy(td, trans_gref, readonly);
rcu_unlock_domain(td);
}
}
/* The status for a grant indicates that we're taking more access than
the pin requires. Fix up the status to match the pin. Called
under the domain's grant table lock. */
/* Only safe on transitive grants. Even then, note that we don't
attempt to drop any pin on the referent grant. */
static void fixup_status_for_copy_pin(const struct active_grant_entry *act,
uint16_t *status)
{
if ( !(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) )
gnttab_clear_flag(_GTF_writing, status);
if ( !act->pin )
gnttab_clear_flag(_GTF_reading, status);
}
/* Grab a frame number from a grant entry and update the flags and pin
count as appropriate. If rc == GNTST_okay, note that this *does*
take one ref count on the target page, stored in *page.
If there is any error, *page = NULL, no ref taken. */
static int
acquire_grant_for_copy(
struct domain *rd, grant_ref_t gref, domid_t ldom, bool readonly,
unsigned long *frame, struct page_info **page,
uint16_t *page_off, uint16_t *length, bool allow_transitive)
{
struct grant_table *rgt = rd->grant_table;
grant_entry_v2_t *sha2;
grant_entry_header_t *shah;
struct active_grant_entry *act;
grant_status_t *status;
uint32_t old_pin;
domid_t trans_domid;
grant_ref_t trans_gref;
struct domain *td;
unsigned long grant_frame;
uint16_t trans_page_off;
uint16_t trans_length;
bool is_sub_page;
s16 rc = GNTST_okay;
*page = NULL;
grant_read_lock(rgt);
if ( unlikely(gref >= nr_grant_entries(rgt)) )
PIN_FAIL(gt_unlock_out, GNTST_bad_gntref,
"Bad grant reference %#x\n", gref);
act = active_entry_acquire(rgt, gref);
shah = shared_entry_header(rgt, gref);
if ( rgt->gt_version == 1 )
{
sha2 = NULL;
status = &shah->flags;
}
else
{
sha2 = &shared_entry_v2(rgt, gref);
status = &status_entry(rgt, gref);
}
/* If already pinned, check the active domid and avoid refcnt overflow. */
if ( act->pin && ((act->domid != ldom) || (act->pin & 0x80808080U) != 0) )
PIN_FAIL(unlock_out, GNTST_general_error,
"Bad domain (%d != %d), or risk of counter overflow %08x\n",
act->domid, ldom, act->pin);
old_pin = act->pin;
if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive )
{
if ( (!old_pin || (!readonly &&
!(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)))) &&
(rc = _set_status_v2(ldom, readonly, 0, shah, act,
status)) != GNTST_okay )
goto unlock_out;
if ( !allow_transitive )
PIN_FAIL(unlock_out_clear, GNTST_general_error,
"transitive grant when transitivity not allowed\n");
trans_domid = sha2->transitive.trans_domid;
trans_gref = sha2->transitive.gref;
barrier(); /* Stop the compiler from re-loading
trans_domid from shared memory */
if ( trans_domid == rd->domain_id )
PIN_FAIL(unlock_out_clear, GNTST_general_error,
"transitive grants cannot be self-referential\n");
/*
* We allow the trans_domid == ldom case, which corresponds to a
* grant being issued by one domain, sent to another one, and then
* transitively granted back to the original domain. Allowing it
* is easy, and means that you don't need to go out of your way to
* avoid it in the guest.
*/
/* We need to leave the rrd locked during the grant copy. */
td = rcu_lock_domain_by_id(trans_domid);
if ( td == NULL )
PIN_FAIL(unlock_out_clear, GNTST_general_error,
"transitive grant referenced bad domain %d\n",
trans_domid);
/*
* acquire_grant_for_copy() could take the lock on the
* remote table (if rd == td), so we have to drop the lock
* here and reacquire.
*/
active_entry_release(act);
grant_read_unlock(rgt);
rc = acquire_grant_for_copy(td, trans_gref, rd->domain_id,
readonly, &grant_frame, page,
&trans_page_off, &trans_length,
false);
grant_read_lock(rgt);
act = active_entry_acquire(rgt, gref);
if ( rc != GNTST_okay )
{
fixup_status_for_copy_pin(act, status);
rcu_unlock_domain(td);
active_entry_release(act);
grant_read_unlock(rgt);
return rc;
}
/*
* We dropped the lock, so we have to check that the grant didn't
* change, and that nobody else tried to pin/unpin it. If anything
* changed, just give up and tell the caller to retry.
*/
if ( rgt->gt_version != 2 ||
act->pin != old_pin ||
(old_pin && (act->domid != ldom || act->frame != grant_frame ||
act->start != trans_page_off ||
act->length != trans_length ||
act->trans_domain != td ||
act->trans_gref != trans_gref ||
!act->is_sub_page)) )
{
release_grant_for_copy(td, trans_gref, readonly);
fixup_status_for_copy_pin(act, status);
rcu_unlock_domain(td);
active_entry_release(act);
grant_read_unlock(rgt);
put_page(*page);
*page = NULL;
return ERESTART;
}
if ( !old_pin )
{
act->domid = ldom;
act->start = trans_page_off;
act->length = trans_length;
act->trans_domain = td;
act->trans_gref = trans_gref;
act->frame = grant_frame;
act_set_gfn(act, INVALID_GFN);
/*
* The actual remote remote grant may or may not be a sub-page,
* but we always treat it as one because that blocks mappings of
* transitive grants.
*/
act->is_sub_page = true;
}
}
else if ( !old_pin ||
(!readonly && !(old_pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) )
{
if ( (rc = _set_status(rgt->gt_version, ldom,
readonly, 0, shah, act,
status) ) != GNTST_okay )
goto unlock_out;
td = rd;
trans_gref = gref;
if ( !sha2 )
{
unsigned long gfn = shared_entry_v1(rgt, gref).frame;
rc = get_paged_frame(gfn, &grant_frame, page, readonly, rd);
if ( rc != GNTST_okay )
goto unlock_out_clear;
act_set_gfn(act, _gfn(gfn));
is_sub_page = false;
trans_page_off = 0;
trans_length = PAGE_SIZE;
}
else if ( !(sha2->hdr.flags & GTF_sub_page) )
{
rc = get_paged_frame(sha2->full_page.frame, &grant_frame, page,
readonly, rd);
if ( rc != GNTST_okay )
goto unlock_out_clear;
act_set_gfn(act, _gfn(sha2->full_page.frame));
is_sub_page = false;
trans_page_off = 0;
trans_length = PAGE_SIZE;
}
else
{
rc = get_paged_frame(sha2->sub_page.frame, &grant_frame, page,
readonly, rd);
if ( rc != GNTST_okay )
goto unlock_out_clear;
act_set_gfn(act, _gfn(sha2->sub_page.frame));
is_sub_page = true;
trans_page_off = sha2->sub_page.page_off;
trans_length = sha2->sub_page.length;
}
if ( !act->pin )
{
act->domid = ldom;
act->is_sub_page = is_sub_page;
act->start = trans_page_off;
act->length = trans_length;
act->trans_domain = td;
act->trans_gref = trans_gref;
act->frame = grant_frame;
}
}
else
{
ASSERT(mfn_valid(_mfn(act->frame)));
*page = mfn_to_page(act->frame);
td = page_get_owner_and_reference(*page);
/*
* act->pin being non-zero should guarantee the page to have a
* non-zero refcount and hence a valid owner (matching the one on
* record), with one exception: If the owning domain is dying we
* had better not make implications from pin count (map_grant_ref()
* updates pin counts before obtaining page references, for
* example).
*/
if ( td != rd || rd->is_dying )
{
if ( td )
put_page(*page);
*page = NULL;
rc = GNTST_bad_domain;
goto unlock_out_clear;
}
}
act->pin += readonly ? GNTPIN_hstr_inc : GNTPIN_hstw_inc;
*page_off = act->start;
*length = act->length;
*frame = act->frame;
active_entry_release(act);
grant_read_unlock(rgt);
return rc;
unlock_out_clear:
if ( !(readonly) &&
!(act->pin & (GNTPIN_hstw_mask | GNTPIN_devw_mask)) )
gnttab_clear_flag(_GTF_writing, status);
if ( !act->pin )
gnttab_clear_flag(_GTF_reading, status);
unlock_out:
active_entry_release(act);
gt_unlock_out:
grant_read_unlock(rgt);
return rc;
}
struct gnttab_copy_buf {
/* Guest provided. */
struct gnttab_copy_ptr ptr;
uint16_t len;
/* Mapped etc. */
struct domain *domain;
unsigned long frame;
struct page_info *page;
void *virt;
bool_t read_only;
bool_t have_grant;
bool_t have_type;
};
static int gnttab_copy_lock_domain(domid_t domid, bool is_gref,
struct gnttab_copy_buf *buf)
{
/* Only DOMID_SELF may reference via frame. */
if ( domid != DOMID_SELF && !is_gref )
return GNTST_permission_denied;
buf->domain = rcu_lock_domain_by_any_id(domid);
if ( !buf->domain )
return GNTST_bad_domain;
buf->ptr.domid = domid;
return GNTST_okay;
}
static void gnttab_copy_unlock_domains(struct gnttab_copy_buf *src,
struct gnttab_copy_buf *dest)
{
if ( src->domain )
{
rcu_unlock_domain(src->domain);
src->domain = NULL;
}
if ( dest->domain )
{
rcu_unlock_domain(dest->domain);
dest->domain = NULL;
}
}
static int gnttab_copy_lock_domains(const struct gnttab_copy *op,
struct gnttab_copy_buf *src,
struct gnttab_copy_buf *dest)
{
int rc;
rc = gnttab_copy_lock_domain(op->source.domid,
op->flags & GNTCOPY_source_gref, src);
if ( rc < 0 )
goto error;
rc = gnttab_copy_lock_domain(op->dest.domid,
op->flags & GNTCOPY_dest_gref, dest);
if ( rc < 0 )
goto error;
rc = xsm_grant_copy(XSM_HOOK, src->domain, dest->domain);
if ( rc < 0 )
{
rc = GNTST_permission_denied;
goto error;
}
return 0;
error:
gnttab_copy_unlock_domains(src, dest);
return rc;
}
static void gnttab_copy_release_buf(struct gnttab_copy_buf *buf)
{
if ( buf->virt )
{
unmap_domain_page(buf->virt);
buf->virt = NULL;
}
if ( buf->have_grant )
{
release_grant_for_copy(buf->domain, buf->ptr.u.ref, buf->read_only);
buf->have_grant = 0;
}
if ( buf->have_type )
{
put_page_type(buf->page);
buf->have_type = 0;
}
if ( buf->page )
{
put_page(buf->page);
buf->page = NULL;
}
}
static int gnttab_copy_claim_buf(const struct gnttab_copy *op,
const struct gnttab_copy_ptr *ptr,
struct gnttab_copy_buf *buf,
unsigned int gref_flag)
{
int rc;
buf->read_only = gref_flag == GNTCOPY_source_gref;
if ( op->flags & gref_flag )
{
rc = acquire_grant_for_copy(buf->domain, ptr->u.ref,
current->domain->domain_id,
buf->read_only,
&buf->frame, &buf->page,
&buf->ptr.offset, &buf->len, true);
if ( rc != GNTST_okay )
goto out;
buf->ptr.u.ref = ptr->u.ref;
buf->have_grant = 1;
}
else
{
rc = get_paged_frame(ptr->u.gmfn, &buf->frame, &buf->page,
buf->read_only, buf->domain);
if ( rc != GNTST_okay )
PIN_FAIL(out, rc,
"source frame %"PRI_xen_pfn" invalid\n", ptr->u.gmfn);
buf->ptr.u.gmfn = ptr->u.gmfn;
buf->ptr.offset = 0;
buf->len = PAGE_SIZE;
}
if ( !buf->read_only )
{
if ( !get_page_type(buf->page, PGT_writable_page) )
{
if ( !buf->domain->is_dying )
gdprintk(XENLOG_WARNING, "Could not get writable frame %lx\n",
buf->frame);
rc = GNTST_general_error;
goto out;
}
buf->have_type = 1;
}
buf->virt = map_domain_page(_mfn(buf->frame));
rc = GNTST_okay;
out:
return rc;
}
static bool_t gnttab_copy_buf_valid(const struct gnttab_copy_ptr *p,
const struct gnttab_copy_buf *b,
bool_t has_gref)
{
if ( !b->virt )
return 0;
if ( has_gref )
return b->have_grant && p->u.ref == b->ptr.u.ref;
return p->u.gmfn == b->ptr.u.gmfn;
}
static int gnttab_copy_buf(const struct gnttab_copy *op,
struct gnttab_copy_buf *dest,
const struct gnttab_copy_buf *src)
{
int rc;
if ( ((op->source.offset + op->len) > PAGE_SIZE) ||
((op->dest.offset + op->len) > PAGE_SIZE) )
PIN_FAIL(out, GNTST_bad_copy_arg, "copy beyond page area\n");
if ( op->source.offset < src->ptr.offset ||
op->source.offset + op->len > src->ptr.offset + src->len )
PIN_FAIL(out, GNTST_general_error,
"copy source out of bounds: %d < %d || %d > %d\n",
op->source.offset, src->ptr.offset,
op->len, src->len);
if ( op->dest.offset < dest->ptr.offset ||
op->dest.offset + op->len > dest->ptr.offset + dest->len )
PIN_FAIL(out, GNTST_general_error,
"copy dest out of bounds: %d < %d || %d > %d\n",
op->dest.offset, dest->ptr.offset,
op->len, dest->len);
memcpy(dest->virt + op->dest.offset, src->virt + op->source.offset,
op->len);
gnttab_mark_dirty(dest->domain, dest->frame);
rc = GNTST_okay;
out:
return rc;
}
static int gnttab_copy_one(const struct gnttab_copy *op,
struct gnttab_copy_buf *dest,
struct gnttab_copy_buf *src)
{
int rc;
if ( !src->domain || op->source.domid != src->ptr.domid ||
!dest->domain || op->dest.domid != dest->ptr.domid )
{
gnttab_copy_release_buf(src);
gnttab_copy_release_buf(dest);
gnttab_copy_unlock_domains(src, dest);
rc = gnttab_copy_lock_domains(op, src, dest);
if ( rc < 0 )
goto out;
}
/* Different source? */
if ( !gnttab_copy_buf_valid(&op->source, src,
op->flags & GNTCOPY_source_gref) )
{
gnttab_copy_release_buf(src);
rc = gnttab_copy_claim_buf(op, &op->source, src, GNTCOPY_source_gref);
if ( rc )
goto out;
}
/* Different dest? */
if ( !gnttab_copy_buf_valid(&op->dest, dest,
op->flags & GNTCOPY_dest_gref) )
{
gnttab_copy_release_buf(dest);
rc = gnttab_copy_claim_buf(op, &op->dest, dest, GNTCOPY_dest_gref);
if ( rc )
goto out;
}
rc = gnttab_copy_buf(op, dest, src);
out:
return rc;
}
/*
* gnttab_copy(), other than the various other helpers of
* do_grant_table_op(), returns (besides possible error indicators)
* "count - i" rather than "i" to ensure that even if no progress
* was made at all (perhaps due to gnttab_copy_one() returning a
* positive value) a non-zero value is being handed back (zero needs
* to be avoided, as that means "success, all done").
*/
static long gnttab_copy(
XEN_GUEST_HANDLE_PARAM(gnttab_copy_t) uop, unsigned int count)
{
unsigned int i;
struct gnttab_copy op;
struct gnttab_copy_buf src = {};
struct gnttab_copy_buf dest = {};
long rc = 0;
for ( i = 0; i < count; i++ )
{
if ( i && hypercall_preempt_check() )
{
rc = count - i;
break;
}
if ( unlikely(__copy_from_guest(&op, uop, 1)) )
{
rc = -EFAULT;
break;
}
rc = gnttab_copy_one(&op, &dest, &src);
if ( rc > 0 )
{
rc = count - i;
break;
}
if ( rc != GNTST_okay )
{
gnttab_copy_release_buf(&src);
gnttab_copy_release_buf(&dest);
}
op.status = rc;
rc = 0;
if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
{
rc = -EFAULT;
break;
}
guest_handle_add_offset(uop, 1);
}
gnttab_copy_release_buf(&src);
gnttab_copy_release_buf(&dest);
gnttab_copy_unlock_domains(&src, &dest);
return rc;
}
static long
gnttab_set_version(XEN_GUEST_HANDLE_PARAM(gnttab_set_version_t) uop)
{
gnttab_set_version_t op;
struct domain *currd = current->domain;
struct grant_table *gt = currd->grant_table;
grant_entry_v1_t reserved_entries[GNTTAB_NR_RESERVED_ENTRIES];
int res;
unsigned int i;
if ( copy_from_guest(&op, uop, 1) )
return -EFAULT;
res = -EINVAL;
if ( op.version != 1 && op.version != 2 )
goto out;
res = 0;
if ( gt->gt_version == op.version )
goto out;
grant_write_lock(gt);
/*
* Make sure that the grant table isn't currently in use when we
* change the version number, except for the first 8 entries which
* are allowed to be in use (xenstore/xenconsole keeps them mapped).
* (You need to change the version number for e.g. kexec.)
*/
for ( i = GNTTAB_NR_RESERVED_ENTRIES; i < nr_grant_entries(gt); i++ )
{
if ( read_atomic(&_active_entry(gt, i).pin) != 0 )
{
gdprintk(XENLOG_WARNING,
"tried to change grant table version from %u to %u, but some grant entries still in use\n",
gt->gt_version, op.version);
res = -EBUSY;
goto out_unlock;
}
}
switch ( gt->gt_version )
{
case 0:
if ( op.version == 2 )
{
case 1:
/* XXX: We could maybe shrink the active grant table here. */
res = gnttab_populate_status_frames(currd, gt, nr_grant_frames(gt));
if ( res < 0)
goto out_unlock;
}
break;
case 2:
for ( i = 0; i < GNTTAB_NR_RESERVED_ENTRIES; i++ )
{
switch ( shared_entry_v2(gt, i).hdr.flags & GTF_type_mask )
{
case GTF_permit_access:
if ( !(shared_entry_v2(gt, i).full_page.frame >> 32) )
break;
/* fall through */
case GTF_transitive:
gdprintk(XENLOG_WARNING,
"tried to change grant table version to 1 with non-representable entries\n");
res = -ERANGE;
goto out_unlock;
}
}
break;
}
/* Preserve the first 8 entries (toolstack reserved grants). */
switch ( gt->gt_version )
{
case 1:
memcpy(reserved_entries, &shared_entry_v1(gt, 0),
sizeof(reserved_entries));
break;
case 2:
for ( i = 0; i < GNTTAB_NR_RESERVED_ENTRIES; i++ )
{
unsigned int flags = shared_entry_v2(gt, i).hdr.flags;
switch ( flags & GTF_type_mask )
{
case GTF_permit_access:
reserved_entries[i].flags = flags | status_entry(gt, i);
reserved_entries[i].domid = shared_entry_v2(gt, i).hdr.domid;
reserved_entries[i].frame = shared_entry_v2(gt, i).full_page.frame;
break;
default:
gdprintk(XENLOG_INFO,
"bad flags %#x in grant %#x when switching version\n",
flags, i);
/* fall through */
case GTF_invalid:
memset(&reserved_entries[i], 0, sizeof(reserved_entries[i]));
break;
}
}
break;
}
if ( op.version < 2 && gt->gt_version == 2 )
gnttab_unpopulate_status_frames(currd, gt);
/* Make sure there's no crud left over from the old version. */
for ( i = 0; i < nr_grant_frames(gt); i++ )
clear_page(gt->shared_raw[i]);
/* Restore the first 8 entries (toolstack reserved grants). */
if ( gt->gt_version )
{
switch ( op.version )
{
case 1:
memcpy(&shared_entry_v1(gt, 0), reserved_entries, sizeof(reserved_entries));
break;
case 2:
for ( i = 0; i < GNTTAB_NR_RESERVED_ENTRIES; i++ )
{
status_entry(gt, i) =
reserved_entries[i].flags & (GTF_reading | GTF_writing);
shared_entry_v2(gt, i).hdr.flags =
reserved_entries[i].flags & ~(GTF_reading | GTF_writing);
shared_entry_v2(gt, i).hdr.domid =
reserved_entries[i].domid;
shared_entry_v2(gt, i).full_page.frame =
reserved_entries[i].frame;
}
break;
}
}
gt->gt_version = op.version;
out_unlock:
grant_write_unlock(gt);
out:
op.version = gt->gt_version;
if ( __copy_to_guest(uop, &op, 1) )
res = -EFAULT;
return res;
}
static long
gnttab_get_status_frames(XEN_GUEST_HANDLE_PARAM(gnttab_get_status_frames_t) uop,
unsigned int count, unsigned int limit_max)
{
gnttab_get_status_frames_t op;
struct domain *d;
struct grant_table *gt;
uint64_t gmfn;
int i;
int rc;
if ( count != 1 )
return -EINVAL;
if ( unlikely(copy_from_guest(&op, uop, 1) != 0) )
{
gdprintk(XENLOG_INFO,
"Fault while reading gnttab_get_status_frames_t\n");
return -EFAULT;
}
d = rcu_lock_domain_by_any_id(op.dom);
if ( d == NULL )
{
op.status = GNTST_bad_domain;
goto out1;
}
rc = xsm_grant_setup(XSM_TARGET, current->domain, d);
if ( rc )
{
op.status = GNTST_permission_denied;
goto out2;
}
gt = d->grant_table;
op.status = GNTST_okay;
grant_read_lock(gt);
if ( unlikely(op.nr_frames > nr_status_frames(gt)) )
{
gdprintk(XENLOG_INFO, "Requested addresses of d%d for %u grant "
"status frames, but has only %u\n",
d->domain_id, op.nr_frames, nr_status_frames(gt));
op.status = GNTST_general_error;
goto unlock;
}
if ( unlikely(limit_max < grant_to_status_frames(op.nr_frames)) )
{
gdprintk(XENLOG_WARNING,
"grant_to_status_frames(%u) for d%d is too large (%u,%u)\n",
op.nr_frames, d->domain_id,
grant_to_status_frames(op.nr_frames), limit_max);
op.status = GNTST_general_error;
goto unlock;
}
for ( i = 0; i < op.nr_frames; i++ )
{
gmfn = gnttab_status_gmfn(d, gt, i);
if ( copy_to_guest_offset(op.frame_list, i, &gmfn, 1) )
op.status = GNTST_bad_virt_addr;
}
unlock:
grant_read_unlock(gt);
out2:
rcu_unlock_domain(d);
out1:
if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
return -EFAULT;
return 0;
}
static long
gnttab_get_version(XEN_GUEST_HANDLE_PARAM(gnttab_get_version_t) uop)
{
gnttab_get_version_t op;
struct domain *d;
int rc;
if ( copy_from_guest(&op, uop, 1) )
return -EFAULT;
d = rcu_lock_domain_by_any_id(op.dom);
if ( d == NULL )
return -ESRCH;
rc = xsm_grant_query_size(XSM_TARGET, current->domain, d);
if ( rc )
{
rcu_unlock_domain(d);
return rc;
}
op.version = d->grant_table->gt_version;
rcu_unlock_domain(d);
if ( __copy_field_to_guest(uop, &op, version) )
return -EFAULT;
return 0;
}
static s16
swap_grant_ref(grant_ref_t ref_a, grant_ref_t ref_b)
{
struct domain *d = rcu_lock_current_domain();
struct grant_table *gt = d->grant_table;
struct active_grant_entry *act_a = NULL;
struct active_grant_entry *act_b = NULL;
s16 rc = GNTST_okay;
grant_write_lock(gt);
/* Bounds check on the grant refs */
if ( unlikely(ref_a >= nr_grant_entries(d->grant_table)))
PIN_FAIL(out, GNTST_bad_gntref, "Bad ref-a %#x\n", ref_a);
if ( unlikely(ref_b >= nr_grant_entries(d->grant_table)))
PIN_FAIL(out, GNTST_bad_gntref, "Bad ref-b %#x\n", ref_b);
/* Swapping the same ref is a no-op. */
if ( ref_a == ref_b )
goto out;
act_a = active_entry_acquire(gt, ref_a);
if ( act_a->pin )
PIN_FAIL(out, GNTST_eagain, "ref a %#x busy\n", ref_a);
act_b = active_entry_acquire(gt, ref_b);
if ( act_b->pin )
PIN_FAIL(out, GNTST_eagain, "ref b %#x busy\n", ref_b);
if ( gt->gt_version == 1 )
{
grant_entry_v1_t shared;
shared = shared_entry_v1(gt, ref_a);
shared_entry_v1(gt, ref_a) = shared_entry_v1(gt, ref_b);
shared_entry_v1(gt, ref_b) = shared;
}
else
{
grant_entry_v2_t shared;
grant_status_t status;
shared = shared_entry_v2(gt, ref_a);
status = status_entry(gt, ref_a);
shared_entry_v2(gt, ref_a) = shared_entry_v2(gt, ref_b);
status_entry(gt, ref_a) = status_entry(gt, ref_b);
shared_entry_v2(gt, ref_b) = shared;
status_entry(gt, ref_b) = status;
}
out:
if ( act_b != NULL )
active_entry_release(act_b);
if ( act_a != NULL )
active_entry_release(act_a);
grant_write_unlock(gt);
rcu_unlock_domain(d);
return rc;
}
static long
gnttab_swap_grant_ref(XEN_GUEST_HANDLE_PARAM(gnttab_swap_grant_ref_t) uop,
unsigned int count)
{
int i;
gnttab_swap_grant_ref_t op;
for ( i = 0; i < count; i++ )
{
if ( i && hypercall_preempt_check() )
return i;
if ( unlikely(__copy_from_guest(&op, uop, 1)) )
return -EFAULT;
op.status = swap_grant_ref(op.ref_a, op.ref_b);
if ( unlikely(__copy_field_to_guest(uop, &op, status)) )
return -EFAULT;
guest_handle_add_offset(uop, 1);
}
return 0;
}
static int cache_flush(const gnttab_cache_flush_t *cflush, grant_ref_t *cur_ref)
{
struct domain *d, *owner;
struct page_info *page;
unsigned long mfn;
struct active_grant_entry *act = NULL;
void *v;
int ret;
if ( (cflush->offset >= PAGE_SIZE) ||
(cflush->length > PAGE_SIZE) ||
(cflush->offset + cflush->length > PAGE_SIZE) ||
(cflush->op & ~(GNTTAB_CACHE_INVAL | GNTTAB_CACHE_CLEAN)) )
return -EINVAL;
if ( cflush->length == 0 || cflush->op == 0 )
return !*cur_ref ? 0 : -EILSEQ;
/* currently unimplemented */
if ( cflush->op & GNTTAB_CACHE_SOURCE_GREF )
return -EOPNOTSUPP;
d = rcu_lock_current_domain();
mfn = cflush->a.dev_bus_addr >> PAGE_SHIFT;
if ( !mfn_valid(_mfn(mfn)) )
{
rcu_unlock_domain(d);
return -EINVAL;
}
page = mfn_to_page(mfn);
owner = page_get_owner_and_reference(page);
if ( !owner || !owner->grant_table )
{
rcu_unlock_domain(d);
return -EPERM;
}
if ( d != owner )
{
grant_read_lock(owner->grant_table);
act = grant_map_exists(d, owner->grant_table, mfn, cur_ref);
if ( IS_ERR_OR_NULL(act) )
{
grant_read_unlock(owner->grant_table);
rcu_unlock_domain(d);
put_page(page);
return act ? PTR_ERR(act) : 1;
}
}
v = map_domain_page(_mfn(mfn));
v += cflush->offset;
if ( (cflush->op & GNTTAB_CACHE_INVAL) && (cflush->op & GNTTAB_CACHE_CLEAN) )
ret = clean_and_invalidate_dcache_va_range(v, cflush->length);
else if ( cflush->op & GNTTAB_CACHE_INVAL )
ret = invalidate_dcache_va_range(v, cflush->length);
else if ( cflush->op & GNTTAB_CACHE_CLEAN )
ret = clean_dcache_va_range(v, cflush->length);
else
ret = 0;
if ( d != owner )
{
active_entry_release(act);
grant_read_unlock(owner->grant_table);
}
unmap_domain_page(v);
put_page(page);
return ret;
}
static long
gnttab_cache_flush(XEN_GUEST_HANDLE_PARAM(gnttab_cache_flush_t) uop,
grant_ref_t *cur_ref,
unsigned int count)
{
unsigned int i;
gnttab_cache_flush_t op;
for ( i = 0; i < count; i++ )
{
if ( i && hypercall_preempt_check() )
return i;
if ( unlikely(__copy_from_guest(&op, uop, 1)) )
return -EFAULT;
for ( ; ; )
{
int ret = cache_flush(&op, cur_ref);
if ( ret < 0 )
return ret;
if ( ret == 0 )
break;
if ( hypercall_preempt_check() )
return i;
}
*cur_ref = 0;
guest_handle_add_offset(uop, 1);
}
*cur_ref = 0;
return 0;
}
long
do_grant_table_op(
unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) uop, unsigned int count)
{
long rc;
unsigned int opaque_in = cmd & GNTTABOP_ARG_MASK, opaque_out = 0;
if ( (int)count < 0 )
return -EINVAL;
if ( (cmd &= GNTTABOP_CMD_MASK) != GNTTABOP_cache_flush && opaque_in )
return -EINVAL;
rc = -EFAULT;
switch ( cmd )
{
case GNTTABOP_map_grant_ref:
{
XEN_GUEST_HANDLE_PARAM(gnttab_map_grant_ref_t) map =
guest_handle_cast(uop, gnttab_map_grant_ref_t);
if ( unlikely(!guest_handle_okay(map, count)) )
goto out;
rc = gnttab_map_grant_ref(map, count);
if ( rc > 0 )
{
guest_handle_add_offset(map, rc);
uop = guest_handle_cast(map, void);
}
break;
}
case GNTTABOP_unmap_grant_ref:
{
XEN_GUEST_HANDLE_PARAM(gnttab_unmap_grant_ref_t) unmap =
guest_handle_cast(uop, gnttab_unmap_grant_ref_t);
if ( unlikely(!guest_handle_okay(unmap, count)) )
goto out;
rc = gnttab_unmap_grant_ref(unmap, count);
if ( rc > 0 )
{
guest_handle_add_offset(unmap, rc);
uop = guest_handle_cast(unmap, void);
}
break;
}
case GNTTABOP_unmap_and_replace:
{
XEN_GUEST_HANDLE_PARAM(gnttab_unmap_and_replace_t) unmap =
guest_handle_cast(uop, gnttab_unmap_and_replace_t);
if ( unlikely(!guest_handle_okay(unmap, count)) )
goto out;
rc = -ENOSYS;
if ( unlikely(!replace_grant_supported()) )
goto out;
rc = gnttab_unmap_and_replace(unmap, count);
if ( rc > 0 )
{
guest_handle_add_offset(unmap, rc);
uop = guest_handle_cast(unmap, void);
}
break;
}
case GNTTABOP_setup_table:
rc = gnttab_setup_table(
guest_handle_cast(uop, gnttab_setup_table_t), count, UINT_MAX);
ASSERT(rc <= 0);
break;
case GNTTABOP_transfer:
{
XEN_GUEST_HANDLE_PARAM(gnttab_transfer_t) transfer =
guest_handle_cast(uop, gnttab_transfer_t);
if ( unlikely(!guest_handle_okay(transfer, count)) )
goto out;
rc = gnttab_transfer(transfer, count);
if ( rc > 0 )
{
guest_handle_add_offset(transfer, rc);
uop = guest_handle_cast(transfer, void);
}
break;
}
case GNTTABOP_copy:
{
XEN_GUEST_HANDLE_PARAM(gnttab_copy_t) copy =
guest_handle_cast(uop, gnttab_copy_t);
if ( unlikely(!guest_handle_okay(copy, count)) )
goto out;
rc = gnttab_copy(copy, count);
if ( rc > 0 )
{
rc = count - rc;
guest_handle_add_offset(copy, rc);
uop = guest_handle_cast(copy, void);
}
break;
}
case GNTTABOP_query_size:
rc = gnttab_query_size(
guest_handle_cast(uop, gnttab_query_size_t), count);
ASSERT(rc <= 0);
break;
case GNTTABOP_set_version:
rc = gnttab_set_version(guest_handle_cast(uop, gnttab_set_version_t));
break;
case GNTTABOP_get_status_frames:
rc = gnttab_get_status_frames(
guest_handle_cast(uop, gnttab_get_status_frames_t), count,
UINT_MAX);
break;
case GNTTABOP_get_version:
rc = gnttab_get_version(guest_handle_cast(uop, gnttab_get_version_t));
break;
case GNTTABOP_swap_grant_ref:
{
XEN_GUEST_HANDLE_PARAM(gnttab_swap_grant_ref_t) swap =
guest_handle_cast(uop, gnttab_swap_grant_ref_t);
if ( unlikely(!guest_handle_okay(swap, count)) )
goto out;
rc = gnttab_swap_grant_ref(swap, count);
if ( rc > 0 )
{
guest_handle_add_offset(swap, rc);
uop = guest_handle_cast(swap, void);
}
break;
}
case GNTTABOP_cache_flush:
{
XEN_GUEST_HANDLE_PARAM(gnttab_cache_flush_t) cflush =
guest_handle_cast(uop, gnttab_cache_flush_t);
if ( unlikely(!guest_handle_okay(cflush, count)) )
goto out;
rc = gnttab_cache_flush(cflush, &opaque_in, count);
if ( rc > 0 )
{
guest_handle_add_offset(cflush, rc);
uop = guest_handle_cast(cflush, void);
}
opaque_out = opaque_in;
break;
}
default:
rc = -ENOSYS;
break;
}
out:
if ( rc > 0 || opaque_out != 0 )
{
ASSERT(rc < count);
ASSERT((opaque_out & GNTTABOP_CMD_MASK) == 0);
rc = hypercall_create_continuation(__HYPERVISOR_grant_table_op, "ihi",
opaque_out | cmd, uop, count - rc);
}
return rc;
}
#ifdef CONFIG_COMPAT
#include "compat/grant_table.c"
#endif
int
grant_table_create(
struct domain *d)
{
struct grant_table *t;
int ret = 0;
if ( (t = xzalloc(struct grant_table)) == NULL )
return -ENOMEM;
/* Simple stuff. */
percpu_rwlock_resource_init(&t->lock, grant_rwlock);
spin_lock_init(&t->maptrack_lock);
/* Okay, install the structure. */
t->domain = d;
d->grant_table = t;
if ( d->domain_id == 0 )
{
ret = grant_table_init(d, t, gnttab_dom0_frames(), max_maptrack_frames);
}
return ret;
}
void
gnttab_release_mappings(
struct domain *d)
{
struct grant_table *gt = d->grant_table, *rgt;
struct grant_mapping *map;
grant_ref_t ref;
grant_handle_t handle;
struct domain *rd;
struct active_grant_entry *act;
grant_entry_header_t *sha;
uint16_t *status;
struct page_info *pg;
BUG_ON(!d->is_dying);
for ( handle = 0; handle < gt->maptrack_limit; handle++ )
{
map = &maptrack_entry(gt, handle);
if ( !(map->flags & (GNTMAP_device_map|GNTMAP_host_map)) )
continue;
ref = map->ref;
gdprintk(XENLOG_INFO, "Grant release %#x ref %#x flags %#x d%d\n",
handle, ref, map->flags, map->domid);
rd = rcu_lock_domain_by_id(map->domid);
if ( rd == NULL )
{
/* Nothing to clear up... */
map->flags = 0;
continue;
}
rgt = rd->grant_table;
grant_read_lock(rgt);
act = active_entry_acquire(rgt, ref);
sha = shared_entry_header(rgt, ref);
if ( rgt->gt_version == 1 )
status = &sha->flags;
else
status = &status_entry(rgt, ref);
pg = mfn_to_page(act->frame);
if ( map->flags & GNTMAP_readonly )
{
if ( map->flags & GNTMAP_device_map )
{
BUG_ON(!(act->pin & GNTPIN_devr_mask));
act->pin -= GNTPIN_devr_inc;
if ( !is_iomem_page(_mfn(act->frame)) )
put_page(pg);
}
if ( map->flags & GNTMAP_host_map )
{
BUG_ON(!(act->pin & GNTPIN_hstr_mask));
act->pin -= GNTPIN_hstr_inc;
if ( gnttab_release_host_mappings(d) &&
!is_iomem_page(_mfn(act->frame)) )
put_page(pg);
}
}
else
{
if ( map->flags & GNTMAP_device_map )
{
BUG_ON(!(act->pin & GNTPIN_devw_mask));
act->pin -= GNTPIN_devw_inc;
if ( !is_iomem_page(_mfn(act->frame)) )
put_page_and_type(pg);
}
if ( map->flags & GNTMAP_host_map )
{
BUG_ON(!(act->pin & GNTPIN_hstw_mask));
act->pin -= GNTPIN_hstw_inc;
if ( gnttab_release_host_mappings(d) &&
!is_iomem_page(_mfn(act->frame)) )
{
if ( gnttab_host_mapping_get_page_type((map->flags &
GNTMAP_readonly),
d, rd) )
put_page_type(pg);
put_page(pg);
}
}
if ( (act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) == 0 )
gnttab_clear_flag(_GTF_writing, status);
}
if ( act->pin == 0 )
gnttab_clear_flag(_GTF_reading, status);
active_entry_release(act);
grant_read_unlock(rgt);
rcu_unlock_domain(rd);
map->flags = 0;
}
}
void grant_table_warn_active_grants(struct domain *d)
{
struct grant_table *gt = d->grant_table;
struct active_grant_entry *act;
grant_ref_t ref;
unsigned int nr_active = 0;
#define WARN_GRANT_MAX 10
grant_read_lock(gt);
for ( ref = 0; ref != nr_grant_entries(gt); ref++ )
{
act = active_entry_acquire(gt, ref);
if ( !act->pin )
{
active_entry_release(act);
continue;
}
nr_active++;
if ( nr_active <= WARN_GRANT_MAX )
printk(XENLOG_G_DEBUG "d%d has active grant %x ("
#ifndef NDEBUG
"GFN %lx, "
#endif
"MFN: %lx)\n",
d->domain_id, ref,
#ifndef NDEBUG
gfn_x(act->gfn),
#endif
act->frame);
active_entry_release(act);
}
if ( nr_active > WARN_GRANT_MAX )
printk(XENLOG_G_DEBUG "d%d has too many (%d) active grants to report\n",
d->domain_id, nr_active);
grant_read_unlock(gt);
#undef WARN_GRANT_MAX
}
void
grant_table_destroy(
struct domain *d)
{
struct grant_table *t = d->grant_table;
int i;
if ( t == NULL )
return;
gnttab_destroy_arch(t);
for ( i = 0; i < nr_grant_frames(t); i++ )
free_xenheap_page(t->shared_raw[i]);
xfree(t->shared_raw);
for ( i = 0; i < nr_maptrack_frames(t); i++ )
free_xenheap_page(t->maptrack[i]);
vfree(t->maptrack);
for ( i = 0; i < nr_active_grant_frames(t); i++ )
free_xenheap_page(t->active[i]);
xfree(t->active);
for ( i = 0; i < nr_status_frames(t); i++ )
free_xenheap_page(t->status[i]);
xfree(t->status);
xfree(t);
d->grant_table = NULL;
}
void grant_table_init_vcpu(struct vcpu *v)
{
spin_lock_init(&v->maptrack_freelist_lock);
v->maptrack_head = MAPTRACK_TAIL;
v->maptrack_tail = MAPTRACK_TAIL;
}
int grant_table_set_limits(struct domain *d, unsigned int grant_frames,
unsigned int maptrack_frames)
{
struct grant_table *gt = d->grant_table;
if ( grant_frames < INITIAL_NR_GRANT_FRAMES ||
grant_frames > max_grant_frames ||
maptrack_frames > max_maptrack_frames )
return -EINVAL;
if ( !gt )
return -ENOENT;
/* Set limits. */
return grant_table_init(d, gt, grant_frames, maptrack_frames);
}
#ifdef CONFIG_HAS_MEM_SHARING
int mem_sharing_gref_to_gfn(struct grant_table *gt, grant_ref_t ref,
gfn_t *gfn, uint16_t *status)
{
int rc = 0;
uint16_t flags = 0;
grant_read_lock(gt);
if ( gt->gt_version < 1 )
rc = -EINVAL;
else if ( ref >= nr_grant_entries(gt) )
rc = -ENOENT;
else if ( gt->gt_version == 1 )
{
const grant_entry_v1_t *sha1 = &shared_entry_v1(gt, ref);
flags = sha1->flags;
*gfn = _gfn(sha1->frame);
}
else
{
const grant_entry_v2_t *sha2 = &shared_entry_v2(gt, ref);
flags = sha2->hdr.flags;
if ( flags & GTF_sub_page )
*gfn = _gfn(sha2->sub_page.frame);
else
*gfn = _gfn(sha2->full_page.frame);
}
if ( !rc && (flags & GTF_type_mask) != GTF_permit_access )
rc = -ENXIO;
else if ( !rc && status )
{
if ( gt->gt_version == 1 )
*status = flags;
else
*status = status_entry(gt, ref);
}
grant_read_unlock(gt);
return rc;
}
#endif
int gnttab_map_frame(struct domain *d, unsigned long idx, gfn_t gfn,
mfn_t *mfn)
{
int rc = 0;
struct grant_table *gt = d->grant_table;
grant_write_lock(gt);
if ( gt->gt_version == 0 )
gt->gt_version = 1;
if ( gt->gt_version == 2 &&
(idx & XENMAPIDX_grant_table_status) )
{
idx &= ~XENMAPIDX_grant_table_status;
if ( idx < nr_status_frames(gt) )
*mfn = _mfn(virt_to_mfn(gt->status[idx]));
else
rc = -EINVAL;
}
else
{
if ( (idx >= nr_grant_frames(gt)) && (idx < gt->max_grant_frames) )
gnttab_grow_table(d, idx + 1);
if ( idx < nr_grant_frames(gt) )
*mfn = _mfn(virt_to_mfn(gt->shared_raw[idx]));
else
rc = -EINVAL;
}
if ( !rc )
gnttab_set_frame_gfn(gt, idx, gfn);
grant_write_unlock(gt);
return rc;
}
static void gnttab_usage_print(struct domain *rd)
{
int first = 1;
grant_ref_t ref;
struct grant_table *gt = rd->grant_table;
printk(" -------- active -------- -------- shared --------\n");
printk("[ref] localdom mfn pin localdom gmfn flags\n");
grant_read_lock(gt);
printk("grant-table for remote d%d (v%u)\n"
" %u frames (%u max), %u maptrack frames (%u max)\n",
rd->domain_id, gt->gt_version,
nr_grant_frames(gt), gt->max_grant_frames,
nr_maptrack_frames(gt), gt->max_maptrack_frames);
for ( ref = 0; ref != nr_grant_entries(gt); ref++ )
{
struct active_grant_entry *act;
struct grant_entry_header *sha;
uint16_t status;
uint64_t frame;
act = active_entry_acquire(gt, ref);
if ( !act->pin )
{
active_entry_release(act);
continue;
}
sha = shared_entry_header(gt, ref);
if ( gt->gt_version == 1 )
{
status = sha->flags;
frame = shared_entry_v1(gt, ref).frame;
}
else
{
frame = shared_entry_v2(gt, ref).full_page.frame;
status = status_entry(gt, ref);
}
first = 0;
/* [0xXXX] ddddd 0xXXXXXX 0xXXXXXXXX ddddd 0xXXXXXX 0xXX */
printk("[0x%03x] %5d 0x%06lx 0x%08x %5d 0x%06"PRIx64" 0x%02x\n",
ref, act->domid, act->frame, act->pin,
sha->domid, frame, status);
active_entry_release(act);
}
grant_read_unlock(gt);
if ( first )
printk("no active grant table entries\n");
}
static void gnttab_usage_print_all(unsigned char key)
{
struct domain *d;
printk("%s [ key '%c' pressed\n", __func__, key);
for_each_domain ( d )
gnttab_usage_print(d);
printk("%s ] done\n", __func__);
}
static int __init gnttab_usage_init(void)
{
register_keyhandler('g', gnttab_usage_print_all,
"print grant table usage", 1);
return 0;
}
__initcall(gnttab_usage_init);
unsigned int __init gnttab_dom0_frames(void)
{
return min(max_grant_frames, gnttab_dom0_max());
}
/*
* Local variables:
* mode: C
* c-file-style: "BSD"
* c-basic-offset: 4
* tab-width: 4
* indent-tabs-mode: nil
* End:
*/