1 /******************************************************************************
2  * include/asm-x86/paging.h
3  *
4  * physical-to-machine mappings for automatically-translated domains.
5  *
6  * Copyright (c) 2011 GridCentric Inc. (Andres Lagar-Cavilla)
7  * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
8  * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
9  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
10  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; If not, see <http://www.gnu.org/licenses/>.
24  */
25 
26 #ifndef _XEN_ASM_X86_P2M_H
27 #define _XEN_ASM_X86_P2M_H
28 
29 #include <xen/paging.h>
30 #include <xen/p2m-common.h>
31 #include <xen/mem_access.h>
32 #include <asm/mem_sharing.h>
33 #include <asm/page.h>    /* for pagetable_t */
34 
35 extern bool_t opt_hap_1gb, opt_hap_2mb;
36 
37 /*
38  * The upper levels of the p2m pagetable always contain full rights; all
39  * variation in the access control bits is made in the level-1 PTEs.
40  *
41  * In addition to the phys-to-machine translation, each p2m PTE contains
42  * *type* information about the gfn it translates, helping Xen to decide
43  * on the correct course of action when handling a page-fault to that
44  * guest frame.  We store the type in the "available" bits of the PTEs
45  * in the table, which gives us 8 possible types on 32-bit systems.
46  * Further expansions of the type system will only be supported on
47  * 64-bit Xen.
48  */
49 
50 /*
51  * AMD IOMMU: When we share p2m table with iommu, bit 52 -bit 58 in pte
52  * cannot be non-zero, otherwise, hardware generates io page faults when
53  * device access those pages. Therefore, p2m_ram_rw has to be defined as 0.
54  */
55 typedef enum {
56     p2m_ram_rw = 0,             /* Normal read/write guest RAM */
57     p2m_invalid = 1,            /* Nothing mapped here */
58     p2m_ram_logdirty = 2,       /* Temporarily read-only for log-dirty */
59     p2m_ram_ro = 3,             /* Read-only; writes are silently dropped */
60     p2m_mmio_dm = 4,            /* Reads and write go to the device model */
61     p2m_mmio_direct = 5,        /* Read/write mapping of genuine MMIO area */
62     p2m_populate_on_demand = 6, /* Place-holder for empty memory */
63 
64     /* Although these are defined in all builds, they can only
65      * be used in 64-bit builds */
66     p2m_grant_map_rw = 7,         /* Read/write grant mapping */
67     p2m_grant_map_ro = 8,         /* Read-only grant mapping */
68     p2m_ram_paging_out = 9,       /* Memory that is being paged out */
69     p2m_ram_paged = 10,           /* Memory that has been paged out */
70     p2m_ram_paging_in = 11,       /* Memory that is being paged in */
71     p2m_ram_shared = 12,          /* Shared or sharable memory */
72     p2m_ram_broken = 13,          /* Broken page, access cause domain crash */
73     p2m_map_foreign  = 14,        /* ram pages from foreign domain */
74     p2m_ioreq_server = 15,
75 } p2m_type_t;
76 
77 /* Modifiers to the query */
78 typedef unsigned int p2m_query_t;
79 #define P2M_ALLOC    (1u<<0)   /* Populate PoD and paged-out entries */
80 #define P2M_UNSHARE  (1u<<1)   /* Break CoW sharing */
81 
82 /* We use bitmaps and maks to handle groups of types */
83 #define p2m_to_mask(_t) (1UL << (_t))
84 
85 /* RAM types, which map to real machine frames */
86 #define P2M_RAM_TYPES (p2m_to_mask(p2m_ram_rw)                \
87                        | p2m_to_mask(p2m_ram_logdirty)        \
88                        | p2m_to_mask(p2m_ram_ro)              \
89                        | p2m_to_mask(p2m_ram_paging_out)      \
90                        | p2m_to_mask(p2m_ram_paged)           \
91                        | p2m_to_mask(p2m_ram_paging_in)       \
92                        | p2m_to_mask(p2m_ram_shared)          \
93                        | p2m_to_mask(p2m_ioreq_server))
94 
95 /* Types that represent a physmap hole that is ok to replace with a shared
96  * entry */
97 #define P2M_HOLE_TYPES (p2m_to_mask(p2m_mmio_dm)        \
98                        | p2m_to_mask(p2m_invalid)       \
99                        | p2m_to_mask(p2m_ram_paging_in) \
100                        | p2m_to_mask(p2m_ram_paged))
101 
102 /* Grant mapping types, which map to a real machine frame in another
103  * VM */
104 #define P2M_GRANT_TYPES (p2m_to_mask(p2m_grant_map_rw)  \
105                          | p2m_to_mask(p2m_grant_map_ro) )
106 
107 /* MMIO types, which don't have to map to anything in the frametable */
108 #define P2M_MMIO_TYPES (p2m_to_mask(p2m_mmio_dm)        \
109                         | p2m_to_mask(p2m_mmio_direct))
110 
111 /* Read-only types, which must have the _PAGE_RW bit clear in their PTEs */
112 #define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty)     \
113                       | p2m_to_mask(p2m_ram_ro)         \
114                       | p2m_to_mask(p2m_grant_map_ro)   \
115                       | p2m_to_mask(p2m_ram_shared))
116 
117 /* Write-discard types, which should discard the write operations */
118 #define P2M_DISCARD_WRITE_TYPES (p2m_to_mask(p2m_ram_ro)     \
119                       | p2m_to_mask(p2m_grant_map_ro))
120 
121 /* Types that can be subject to bulk transitions. */
122 #define P2M_CHANGEABLE_TYPES (p2m_to_mask(p2m_ram_rw) \
123                               | p2m_to_mask(p2m_ram_logdirty) \
124                               | p2m_to_mask(p2m_ioreq_server) )
125 
126 #define P2M_POD_TYPES (p2m_to_mask(p2m_populate_on_demand))
127 
128 /* Pageable types */
129 #define P2M_PAGEABLE_TYPES (p2m_to_mask(p2m_ram_rw) \
130                             | p2m_to_mask(p2m_ram_logdirty) )
131 
132 #define P2M_PAGING_TYPES (p2m_to_mask(p2m_ram_paging_out)        \
133                           | p2m_to_mask(p2m_ram_paged)           \
134                           | p2m_to_mask(p2m_ram_paging_in))
135 
136 #define P2M_PAGED_TYPES (p2m_to_mask(p2m_ram_paged))
137 
138 /* Shared types */
139 /* XXX: Sharable types could include p2m_ram_ro too, but we would need to
140  * reinit the type correctly after fault */
141 #define P2M_SHARABLE_TYPES (p2m_to_mask(p2m_ram_rw) \
142                             | p2m_to_mask(p2m_ram_logdirty) )
143 #define P2M_SHARED_TYPES   (p2m_to_mask(p2m_ram_shared))
144 
145 /* Valid types not necessarily associated with a (valid) MFN. */
146 #define P2M_INVALID_MFN_TYPES (P2M_POD_TYPES                  \
147                                | p2m_to_mask(p2m_mmio_direct) \
148                                | P2M_PAGING_TYPES)
149 
150 /* Broken type: the frame backing this pfn has failed in hardware
151  * and must not be touched. */
152 #define P2M_BROKEN_TYPES (p2m_to_mask(p2m_ram_broken))
153 
154 /* Useful predicates */
155 #define p2m_is_ram(_t) (p2m_to_mask(_t) & P2M_RAM_TYPES)
156 #define p2m_is_hole(_t) (p2m_to_mask(_t) & P2M_HOLE_TYPES)
157 #define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES)
158 #define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
159 #define p2m_is_discard_write(_t) (p2m_to_mask(_t) & P2M_DISCARD_WRITE_TYPES)
160 #define p2m_is_changeable(_t) (p2m_to_mask(_t) & P2M_CHANGEABLE_TYPES)
161 #define p2m_is_pod(_t) (p2m_to_mask(_t) & P2M_POD_TYPES)
162 #define p2m_is_grant(_t) (p2m_to_mask(_t) & P2M_GRANT_TYPES)
163 /* Grant types are *not* considered valid, because they can be
164    unmapped at any time and, unless you happen to be the shadow or p2m
165    implementations, there's no way of synchronising against that. */
166 #define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES))
167 #define p2m_has_emt(_t)  (p2m_to_mask(_t) & (P2M_RAM_TYPES | p2m_to_mask(p2m_mmio_direct)))
168 #define p2m_is_pageable(_t) (p2m_to_mask(_t) & P2M_PAGEABLE_TYPES)
169 #define p2m_is_paging(_t)   (p2m_to_mask(_t) & P2M_PAGING_TYPES)
170 #define p2m_is_paged(_t)    (p2m_to_mask(_t) & P2M_PAGED_TYPES)
171 #define p2m_is_sharable(_t) (p2m_to_mask(_t) & P2M_SHARABLE_TYPES)
172 #define p2m_is_shared(_t)   (p2m_to_mask(_t) & P2M_SHARED_TYPES)
173 #define p2m_is_broken(_t)   (p2m_to_mask(_t) & P2M_BROKEN_TYPES)
174 #define p2m_is_foreign(_t)  (p2m_to_mask(_t) & p2m_to_mask(p2m_map_foreign))
175 
176 #define p2m_is_any_ram(_t)  (p2m_to_mask(_t) &                   \
177                              (P2M_RAM_TYPES | P2M_GRANT_TYPES |  \
178                               p2m_to_mask(p2m_map_foreign)))
179 
180 #define p2m_allows_invalid_mfn(t) (p2m_to_mask(t) & P2M_INVALID_MFN_TYPES)
181 
182 typedef enum {
183     p2m_host,
184     p2m_nested,
185     p2m_alternate,
186 } p2m_class_t;
187 
188 /* Per-p2m-table state */
189 struct p2m_domain {
190     /* Lock that protects updates to the p2m */
191     mm_rwlock_t           lock;
192 
193     /* Shadow translated domain: p2m mapping */
194     pagetable_t        phys_table;
195 
196     /* Same as domain_dirty_cpumask but limited to
197      * this p2m and those physical cpus whose vcpu's are in
198      * guestmode.
199      */
200     cpumask_var_t      dirty_cpumask;
201 
202     struct domain     *domain;   /* back pointer to domain */
203 
204     p2m_class_t       p2m_class; /* host/nested/alternate */
205 
206     /* Nested p2ms only: nested p2m base value that this p2m shadows.
207      * This can be cleared to P2M_BASE_EADDR under the per-p2m lock but
208      * needs both the per-p2m lock and the per-domain nestedp2m lock
209      * to set it to any other value. */
210 #define P2M_BASE_EADDR     (~0ULL)
211     uint64_t           np2m_base;
212     uint64_t           np2m_generation;
213 
214     /* Nested p2ms: linked list of n2pms allocated to this domain.
215      * The host p2m hasolds the head of the list and the np2ms are
216      * threaded on in LRU order. */
217     struct list_head   np2m_list;
218 
219     /* Host p2m: Log-dirty ranges registered for the domain. */
220     struct rangeset   *logdirty_ranges;
221 
222     /* Host p2m: Global log-dirty mode enabled for the domain. */
223     bool_t             global_logdirty;
224 
225     /* Host p2m: when this flag is set, don't flush all the nested-p2m
226      * tables on every host-p2m change.  The setter of this flag
227      * is responsible for performing the full flush before releasing the
228      * host p2m's lock. */
229     int                defer_nested_flush;
230 
231     /* Alternate p2m: count of vcpu's currently using this p2m. */
232     atomic_t           active_vcpus;
233 
234     /* Pages used to construct the p2m */
235     struct page_list_head pages;
236 
237     int                (*set_entry)(struct p2m_domain *p2m,
238                                     gfn_t gfn,
239                                     mfn_t mfn, unsigned int page_order,
240                                     p2m_type_t p2mt,
241                                     p2m_access_t p2ma,
242                                     int sve);
243     mfn_t              (*get_entry)(struct p2m_domain *p2m,
244                                     gfn_t gfn,
245                                     p2m_type_t *p2mt,
246                                     p2m_access_t *p2ma,
247                                     p2m_query_t q,
248                                     unsigned int *page_order,
249                                     bool_t *sve);
250     int                (*recalc)(struct p2m_domain *p2m,
251                                  unsigned long gfn);
252     void               (*enable_hardware_log_dirty)(struct p2m_domain *p2m);
253     void               (*disable_hardware_log_dirty)(struct p2m_domain *p2m);
254     void               (*flush_hardware_cached_dirty)(struct p2m_domain *p2m);
255     void               (*change_entry_type_global)(struct p2m_domain *p2m,
256                                                    p2m_type_t ot,
257                                                    p2m_type_t nt);
258     int                (*change_entry_type_range)(struct p2m_domain *p2m,
259                                                   p2m_type_t ot, p2m_type_t nt,
260                                                   unsigned long first_gfn,
261                                                   unsigned long last_gfn);
262     void               (*memory_type_changed)(struct p2m_domain *p2m);
263 
264     void               (*write_p2m_entry)(struct p2m_domain *p2m,
265                                           unsigned long gfn, l1_pgentry_t *p,
266                                           l1_pgentry_t new, unsigned int level);
267     long               (*audit_p2m)(struct p2m_domain *p2m);
268 
269     /*
270      * P2M updates may require TLBs to be flushed (invalidated).
271      *
272      * If 'defer_flush' is set, flushes may be deferred by setting
273      * 'need_flush' and then flushing in 'tlb_flush()'.
274      *
275      * 'tlb_flush()' is only called if 'need_flush' was set.
276      *
277      * If a flush may be being deferred but an immediate flush is
278      * required (e.g., if a page is being freed to pool other than the
279      * domheap), call p2m_tlb_flush_sync().
280      */
281     void (*tlb_flush)(struct p2m_domain *p2m);
282     unsigned int defer_flush;
283     bool_t need_flush;
284 
285     /* Default P2M access type for each page in the the domain: new pages,
286      * swapped in pages, cleared pages, and pages that are ambiguously
287      * retyped get this access type.  See definition of p2m_access_t. */
288     p2m_access_t default_access;
289 
290     /* If true, and an access fault comes in and there is no vm_event listener,
291      * pause domain.  Otherwise, remove access restrictions. */
292     bool_t       access_required;
293 
294     /* Highest guest frame that's ever been mapped in the p2m */
295     unsigned long max_mapped_pfn;
296 
297     /*
298      * Alternate p2m's only: range of gfn's for which underlying
299      * mfn may have duplicate mappings
300      */
301     unsigned long min_remapped_gfn;
302     unsigned long max_remapped_gfn;
303 
304     /* When releasing shared gfn's in a preemptible manner, recall where
305      * to resume the search */
306     unsigned long next_shared_gfn_to_relinquish;
307 
308     /* Populate-on-demand variables
309      * All variables are protected with the pod lock. We cannot rely on
310      * the p2m lock if it's turned into a fine-grained lock.
311      * We only use the domain page_alloc lock for additions and
312      * deletions to the domain's page list. Because we use it nested
313      * within the PoD lock, we enforce it's ordering (by remembering
314      * the unlock level in the arch_domain sub struct). */
315     struct {
316         struct page_list_head super,   /* List of superpages                */
317                          single;       /* Non-super lists                   */
318         long             count,        /* # of pages in cache lists         */
319                          entry_count;  /* # of pages in p2m marked pod      */
320         gfn_t            reclaim_single; /* Last gfn of a scan */
321         gfn_t            max_guest;    /* gfn of max guest demand-populate */
322 
323         /*
324          * Tracking of the most recently populated PoD pages, for eager
325          * reclamation.
326          */
327         struct pod_mrp_list {
328 #define NR_POD_MRP_ENTRIES 32
329 
330 /* Encode ORDER_2M superpage in top bit of GFN */
331 #define POD_LAST_SUPERPAGE (gfn_x(INVALID_GFN) & ~(gfn_x(INVALID_GFN) >> 1))
332 
333             unsigned long list[NR_POD_MRP_ENTRIES];
334             unsigned int idx;
335         } mrp;
336         mm_lock_t        lock;         /* Locking of private pod structs,   *
337                                         * not relying on the p2m lock.      */
338     } pod;
339     union {
340         struct ept_data ept;
341         /* NPT-equivalent structure could be added here. */
342     };
343 
344      struct {
345          spinlock_t lock;
346          /*
347           * ioreq server who's responsible for the emulation of
348           * gfns with specific p2m type(for now, p2m_ioreq_server).
349           */
350          struct hvm_ioreq_server *server;
351          /*
352           * flags specifies whether read, write or both operations
353           * are to be emulated by an ioreq server.
354           */
355          unsigned int flags;
356          unsigned long entry_count;
357      } ioreq;
358 };
359 
360 /* get host p2m table */
361 #define p2m_get_hostp2m(d)      ((d)->arch.p2m)
362 
363 /*
364  * Updates vCPU's n2pm to match its np2m_base in VMCx12 and returns that np2m.
365  */
366 struct p2m_domain *p2m_get_nestedp2m(struct vcpu *v);
367 /* Similar to the above except that returned p2m is still write-locked */
368 struct p2m_domain *p2m_get_nestedp2m_locked(struct vcpu *v);
369 
370 /* If vcpu is in host mode then behaviour matches p2m_get_hostp2m().
371  * If vcpu is in guest mode then behaviour matches p2m_get_nestedp2m().
372  */
373 struct p2m_domain *p2m_get_p2m(struct vcpu *v);
374 
375 #define NP2M_SCHEDLE_IN  0
376 #define NP2M_SCHEDLE_OUT 1
377 
378 void np2m_schedule(int dir);
379 
p2m_is_hostp2m(const struct p2m_domain * p2m)380 static inline bool_t p2m_is_hostp2m(const struct p2m_domain *p2m)
381 {
382     return p2m->p2m_class == p2m_host;
383 }
384 
p2m_is_nestedp2m(const struct p2m_domain * p2m)385 static inline bool_t p2m_is_nestedp2m(const struct p2m_domain *p2m)
386 {
387     return p2m->p2m_class == p2m_nested;
388 }
389 
p2m_is_altp2m(const struct p2m_domain * p2m)390 static inline bool_t p2m_is_altp2m(const struct p2m_domain *p2m)
391 {
392     return p2m->p2m_class == p2m_alternate;
393 }
394 
395 #define p2m_get_pagetable(p2m)  ((p2m)->phys_table)
396 
397 /*
398  * Ensure any deferred p2m TLB flush has been completed on all VCPUs.
399  */
400 void p2m_tlb_flush_sync(struct p2m_domain *p2m);
401 void p2m_unlock_and_tlb_flush(struct p2m_domain *p2m);
402 
403 /**** p2m query accessors. They lock p2m_lock, and thus serialize
404  * lookups wrt modifications. They _do not_ release the lock on exit.
405  * After calling any of the variants below, caller needs to use
406  * put_gfn. ****/
407 
408 mfn_t __nonnull(3, 4) __get_gfn_type_access(
409     struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t,
410     p2m_access_t *a, p2m_query_t q, unsigned int *page_order, bool_t locked);
411 
412 /* Read a particular P2M table, mapping pages as we go.  Most callers
413  * should _not_ call this directly; use the other get_gfn* functions
414  * below unless you know you want to walk a p2m that isn't a domain's
415  * main one.
416  * If the lookup succeeds, the return value is != INVALID_MFN and
417  * *page_order is filled in with the order of the superpage (if any) that
418  * the entry was found in.  */
get_gfn_type_access(struct p2m_domain * p2m,unsigned long gfn,p2m_type_t * t,p2m_access_t * a,p2m_query_t q,unsigned int * page_order)419 static inline mfn_t __nonnull(3, 4) get_gfn_type_access(
420     struct p2m_domain *p2m, unsigned long gfn, p2m_type_t *t,
421     p2m_access_t *a, p2m_query_t q, unsigned int *page_order)
422 {
423     return __get_gfn_type_access(p2m, gfn, t, a, q, page_order, true);
424 }
425 
426 /* General conversion function from gfn to mfn */
get_gfn_type(struct domain * d,unsigned long gfn,p2m_type_t * t,p2m_query_t q)427 static inline mfn_t __nonnull(3) get_gfn_type(
428     struct domain *d, unsigned long gfn, p2m_type_t *t, p2m_query_t q)
429 {
430     p2m_access_t a;
431     return get_gfn_type_access(p2m_get_hostp2m(d), gfn, t, &a, q, NULL);
432 }
433 
434 /* Syntactic sugar: most callers will use one of these.
435  * N.B. get_gfn_query() is the _only_ one guaranteed not to take the
436  * p2m lock; none of the others can be called with the p2m or paging
437  * lock held. */
438 #define get_gfn(d, g, t)         get_gfn_type((d), (g), (t), P2M_ALLOC)
439 #define get_gfn_query(d, g, t)   get_gfn_type((d), (g), (t), 0)
440 #define get_gfn_unshare(d, g, t) get_gfn_type((d), (g), (t), \
441                                               P2M_ALLOC | P2M_UNSHARE)
442 
443 /* Will release the p2m_lock for this gfn entry. */
444 void __put_gfn(struct p2m_domain *p2m, unsigned long gfn);
445 
446 #define put_gfn(d, gfn) __put_gfn(p2m_get_hostp2m((d)), (gfn))
447 
448 /* The intent of the "unlocked" accessor is to have the caller not worry about
449  * put_gfn. They apply to very specific situations: debug printk's, dumps
450  * during a domain crash, or to peek at a p2m entry/type. Caller is not
451  * holding the p2m entry exclusively during or after calling this.
452  *
453  * This is also used in the shadow code whenever the paging lock is
454  * held -- in those cases, the caller is protected against concurrent
455  * p2m updates by the fact that shadow_write_p2m_entry() also takes
456  * the paging lock.
457  *
458  * Note that an unlocked accessor only makes sense for a "query" lookup.
459  * Any other type of query can cause a change in the p2m and may need to
460  * perform locking.
461  */
get_gfn_query_unlocked(struct domain * d,unsigned long gfn,p2m_type_t * t)462 static inline mfn_t get_gfn_query_unlocked(struct domain *d,
463                                            unsigned long gfn,
464                                            p2m_type_t *t)
465 {
466     p2m_access_t a;
467     return __get_gfn_type_access(p2m_get_hostp2m(d), gfn, t, &a, 0, NULL, 0);
468 }
469 
470 /* Atomically look up a GFN and take a reference count on the backing page.
471  * This makes sure the page doesn't get freed (or shared) underfoot,
472  * and should be used by any path that intends to write to the backing page.
473  * Returns NULL if the page is not backed by RAM.
474  * The caller is responsible for calling put_page() afterwards. */
475 struct page_info *p2m_get_page_from_gfn(struct p2m_domain *p2m, gfn_t gfn,
476                                         p2m_type_t *t, p2m_access_t *a,
477                                         p2m_query_t q);
478 
get_page_from_gfn(struct domain * d,unsigned long gfn,p2m_type_t * t,p2m_query_t q)479 static inline struct page_info *get_page_from_gfn(
480     struct domain *d, unsigned long gfn, p2m_type_t *t, p2m_query_t q)
481 {
482     struct page_info *page;
483 
484     if ( paging_mode_translate(d) )
485         return p2m_get_page_from_gfn(p2m_get_hostp2m(d), _gfn(gfn), t, NULL, q);
486 
487     /* Non-translated guests see 1-1 RAM / MMIO mappings everywhere */
488     if ( t )
489         *t = likely(d != dom_io) ? p2m_ram_rw : p2m_mmio_direct;
490     page = __mfn_to_page(gfn);
491     return mfn_valid(_mfn(gfn)) && get_page(page, d) ? page : NULL;
492 }
493 
494 
495 /* General conversion function from mfn to gfn */
mfn_to_gfn(struct domain * d,mfn_t mfn)496 static inline unsigned long mfn_to_gfn(struct domain *d, mfn_t mfn)
497 {
498     if ( paging_mode_translate(d) )
499         return get_gpfn_from_mfn(mfn_x(mfn));
500     else
501         return mfn_x(mfn);
502 }
503 
504 /* Deadlock-avoidance scheme when calling get_gfn on different gfn's */
505 struct two_gfns {
506     struct domain  *first_domain;
507     unsigned long   first_gfn;
508     struct domain  *second_domain;
509     unsigned long   second_gfn;
510 };
511 
512 /* Returns mfn, type and access for potential caller consumption, but any
513  * of those can be NULL */
get_two_gfns(struct domain * rd,unsigned long rgfn,p2m_type_t * rt,p2m_access_t * ra,mfn_t * rmfn,struct domain * ld,unsigned long lgfn,p2m_type_t * lt,p2m_access_t * la,mfn_t * lmfn,p2m_query_t q,struct two_gfns * rval)514 static inline void get_two_gfns(struct domain *rd, unsigned long rgfn,
515         p2m_type_t *rt, p2m_access_t *ra, mfn_t *rmfn, struct domain *ld,
516         unsigned long lgfn, p2m_type_t *lt, p2m_access_t *la, mfn_t *lmfn,
517         p2m_query_t q, struct two_gfns *rval)
518 {
519     mfn_t           *first_mfn, *second_mfn, scratch_mfn;
520     p2m_access_t    *first_a, *second_a, scratch_a;
521     p2m_type_t      *first_t, *second_t, scratch_t;
522 
523     /* Sort by domain, if same domain by gfn */
524 
525 #define assign_pointers(dest, source)                   \
526 do {                                                    \
527     rval-> dest ## _domain = source ## d;               \
528     rval-> dest ## _gfn = source ## gfn;                \
529     dest ## _mfn = (source ## mfn) ?: &scratch_mfn;     \
530     dest ## _a   = (source ## a)   ?: &scratch_a;       \
531     dest ## _t   = (source ## t)   ?: &scratch_t;       \
532 } while (0)
533 
534     if ( (rd->domain_id <= ld->domain_id) || ((rd == ld) && (rgfn <= lgfn)) )
535     {
536         assign_pointers(first, r);
537         assign_pointers(second, l);
538     } else {
539         assign_pointers(first, l);
540         assign_pointers(second, r);
541     }
542 
543 #undef assign_pointers
544 
545     /* Now do the gets */
546     *first_mfn  = get_gfn_type_access(p2m_get_hostp2m(rval->first_domain),
547                                       rval->first_gfn, first_t, first_a, q, NULL);
548     *second_mfn = get_gfn_type_access(p2m_get_hostp2m(rval->second_domain),
549                                       rval->second_gfn, second_t, second_a, q, NULL);
550 }
551 
put_two_gfns(struct two_gfns * arg)552 static inline void put_two_gfns(struct two_gfns *arg)
553 {
554     if ( !arg )
555         return;
556 
557     put_gfn(arg->second_domain, arg->second_gfn);
558     put_gfn(arg->first_domain, arg->first_gfn);
559 }
560 
561 /* Init the datastructures for later use by the p2m code */
562 int p2m_init(struct domain *d);
563 
564 /* Allocate a new p2m table for a domain.
565  *
566  * Returns 0 for success or -errno. */
567 int p2m_alloc_table(struct p2m_domain *p2m);
568 
569 /* Return all the p2m resources to Xen. */
570 void p2m_teardown(struct p2m_domain *p2m);
571 void p2m_final_teardown(struct domain *d);
572 
573 /* Add a page to a domain's p2m table */
574 int guest_physmap_add_entry(struct domain *d, gfn_t gfn,
575                             mfn_t mfn, unsigned int page_order,
576                             p2m_type_t t);
577 
578 /* Untyped version for RAM only, for compatibility */
guest_physmap_add_page(struct domain * d,gfn_t gfn,mfn_t mfn,unsigned int page_order)579 static inline int guest_physmap_add_page(struct domain *d,
580                                          gfn_t gfn,
581                                          mfn_t mfn,
582                                          unsigned int page_order)
583 {
584     return guest_physmap_add_entry(d, gfn, mfn, page_order, p2m_ram_rw);
585 }
586 
587 /* Set a p2m range as populate-on-demand */
588 int guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
589                                           unsigned int order);
590 /* Enable hardware-assisted log-dirty. */
591 void p2m_enable_hardware_log_dirty(struct domain *d);
592 
593 /* Disable hardware-assisted log-dirty */
594 void p2m_disable_hardware_log_dirty(struct domain *d);
595 
596 /* Flush hardware cached dirty GFNs */
597 void p2m_flush_hardware_cached_dirty(struct domain *d);
598 
599 /* Change types across all p2m entries in a domain */
600 void p2m_change_entry_type_global(struct domain *d,
601                                   p2m_type_t ot, p2m_type_t nt);
602 
603 /* Change types across a range of p2m entries (start ... end-1) */
604 void p2m_change_type_range(struct domain *d,
605                            unsigned long start, unsigned long end,
606                            p2m_type_t ot, p2m_type_t nt);
607 
608 /* Compare-exchange the type of a single p2m entry */
609 int p2m_change_type_one(struct domain *d, unsigned long gfn,
610                         p2m_type_t ot, p2m_type_t nt);
611 
612 /* Synchronously change the p2m type for a range of gfns */
613 int p2m_finish_type_change(struct domain *d,
614                            gfn_t first_gfn,
615                            unsigned long max_nr);
616 
617 /* Report a change affecting memory types. */
618 void p2m_memory_type_changed(struct domain *d);
619 
620 int p2m_is_logdirty_range(struct p2m_domain *, unsigned long start,
621                           unsigned long end);
622 
623 /* Set mmio addresses in the p2m table (for pass-through) */
624 int set_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
625                        unsigned int order, p2m_access_t access);
626 int clear_mmio_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
627                          unsigned int order);
628 
629 /* Set identity addresses in the p2m table (for pass-through) */
630 int set_identity_p2m_entry(struct domain *d, unsigned long gfn,
631                            p2m_access_t p2ma, unsigned int flag);
632 int clear_identity_p2m_entry(struct domain *d, unsigned long gfn);
633 
634 /* Add foreign mapping to the guest's p2m table. */
635 int p2m_add_foreign(struct domain *tdom, unsigned long fgfn,
636                     unsigned long gpfn, domid_t foreign_domid);
637 
638 /*
639  * Populate-on-demand
640  */
641 
642 /* Dump PoD information about the domain */
643 void p2m_pod_dump_data(struct domain *d);
644 
645 /* Move all pages from the populate-on-demand cache to the domain page_list
646  * (usually in preparation for domain destruction) */
647 int p2m_pod_empty_cache(struct domain *d);
648 
649 /* Set populate-on-demand cache size so that the total memory allocated to a
650  * domain matches target */
651 int p2m_pod_set_mem_target(struct domain *d, unsigned long target);
652 
653 /* Scan pod cache when offline/broken page triggered */
654 int
655 p2m_pod_offline_or_broken_hit(struct page_info *p);
656 
657 /* Replace pod cache when offline/broken page triggered */
658 void
659 p2m_pod_offline_or_broken_replace(struct page_info *p);
660 
661 
662 /*
663  * Paging to disk and page-sharing
664  */
665 
666 /* Modify p2m table for shared gfn */
667 int set_shared_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn);
668 
669 /* Check if a nominated gfn is valid to be paged out */
670 int p2m_mem_paging_nominate(struct domain *d, unsigned long gfn);
671 /* Evict a frame */
672 int p2m_mem_paging_evict(struct domain *d, unsigned long gfn);
673 /* Tell xenpaging to drop a paged out frame */
674 void p2m_mem_paging_drop_page(struct domain *d, unsigned long gfn,
675                                 p2m_type_t p2mt);
676 /* Start populating a paged out frame */
677 void p2m_mem_paging_populate(struct domain *d, unsigned long gfn);
678 /* Prepare the p2m for paging a frame in */
679 int p2m_mem_paging_prep(struct domain *d, unsigned long gfn, uint64_t buffer);
680 /* Resume normal operation (in case a domain was paused) */
681 void p2m_mem_paging_resume(struct domain *d, vm_event_response_t *rsp);
682 
683 /*
684  * Internal functions, only called by other p2m code
685  */
686 
687 mfn_t p2m_alloc_ptp(struct p2m_domain *p2m, unsigned int level);
688 void p2m_free_ptp(struct p2m_domain *p2m, struct page_info *pg);
689 
690 /* Directly set a p2m entry: only for use by p2m code. Does not need
691  * a call to put_gfn afterwards/ */
692 int p2m_set_entry(struct p2m_domain *p2m, gfn_t gfn, mfn_t mfn,
693                   unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma);
694 
695 /* Set up function pointers for PT implementation: only for use by p2m code */
696 extern void p2m_pt_init(struct p2m_domain *p2m);
697 
698 void *map_domain_gfn(struct p2m_domain *p2m, gfn_t gfn, mfn_t *mfn,
699                      p2m_type_t *p2mt, p2m_query_t q, uint32_t *pfec);
700 
701 /* Debugging and auditing of the P2M code? */
702 #ifndef NDEBUG
703 #define P2M_AUDIT     1
704 #else
705 #define P2M_AUDIT     0
706 #endif
707 #define P2M_DEBUGGING 0
708 
709 #if P2M_AUDIT
710 extern void audit_p2m(struct domain *d,
711                       uint64_t *orphans,
712                       uint64_t *m2p_bad,
713                       uint64_t *p2m_bad);
714 #endif /* P2M_AUDIT */
715 
716 /* Printouts */
717 #define P2M_PRINTK(f, a...)                                \
718     debugtrace_printk("p2m: %s(): " f, __func__, ##a)
719 #define P2M_ERROR(f, a...)                                 \
720     printk(XENLOG_G_ERR "pg error: %s(): " f, __func__, ##a)
721 #if P2M_DEBUGGING
722 #define P2M_DEBUG(f, a...)                                 \
723     debugtrace_printk("p2mdebug: %s(): " f, __func__, ##a)
724 #else
725 #define P2M_DEBUG(f, a...) do { (void)(f); } while(0)
726 #endif
727 
728 /* Called by p2m code when demand-populating a PoD page */
729 bool
730 p2m_pod_demand_populate(struct p2m_domain *p2m, gfn_t gfn, unsigned int order);
731 
732 /*
733  * Functions specific to the p2m-pt implementation
734  */
735 
736 /* Extract the type from the PTE flags that store it */
p2m_flags_to_type(unsigned long flags)737 static inline p2m_type_t p2m_flags_to_type(unsigned long flags)
738 {
739     /* For AMD IOMMUs we need to use type 0 for plain RAM, but we need
740      * to make sure that an entirely empty PTE doesn't have RAM type */
741     if ( flags == 0 )
742         return p2m_invalid;
743     /* AMD IOMMUs use bits 9-11 to encode next io page level and bits
744      * 59-62 for iommu flags so we can't use them to store p2m type info. */
745     return (flags >> 12) & 0x7f;
746 }
747 
p2m_recalc_type_range(bool recalc,p2m_type_t t,struct p2m_domain * p2m,unsigned long gfn_start,unsigned long gfn_end)748 static inline p2m_type_t p2m_recalc_type_range(bool recalc, p2m_type_t t,
749                                                struct p2m_domain *p2m,
750                                                unsigned long gfn_start,
751                                                unsigned long gfn_end)
752 {
753     if ( !recalc || !p2m_is_changeable(t) )
754         return t;
755 
756     if ( t == p2m_ioreq_server && p2m->ioreq.server != NULL )
757         return t;
758 
759     return p2m_is_logdirty_range(p2m, gfn_start, gfn_end) ? p2m_ram_logdirty
760                                                           : p2m_ram_rw;
761 }
762 
p2m_recalc_type(bool recalc,p2m_type_t t,struct p2m_domain * p2m,unsigned long gfn)763 static inline p2m_type_t p2m_recalc_type(bool recalc, p2m_type_t t,
764                                          struct p2m_domain *p2m,
765                                          unsigned long gfn)
766 {
767     return p2m_recalc_type_range(recalc, t, p2m, gfn, gfn);
768 }
769 
770 int p2m_pt_handle_deferred_changes(uint64_t gpa);
771 
772 /*
773  * Nested p2m: shadow p2m tables used for nested HVM virtualization
774  */
775 
776 /* Flushes specified p2m table */
777 void p2m_flush(struct vcpu *v, struct p2m_domain *p2m);
778 /* Flushes all nested p2m tables */
779 void p2m_flush_nestedp2m(struct domain *d);
780 /* Flushes the np2m specified by np2m_base (if it exists) */
781 void np2m_flush_base(struct vcpu *v, unsigned long np2m_base);
782 
783 void nestedp2m_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
784     l1_pgentry_t *p, l1_pgentry_t new, unsigned int level);
785 
786 /*
787  * Alternate p2m: shadow p2m tables used for alternate memory views
788  */
789 
790 /* get current alternate p2m table */
p2m_get_altp2m(struct vcpu * v)791 static inline struct p2m_domain *p2m_get_altp2m(struct vcpu *v)
792 {
793     unsigned int index = vcpu_altp2m(v).p2midx;
794 
795     if ( index == INVALID_ALTP2M )
796         return NULL;
797 
798     BUG_ON(index >= MAX_ALTP2M);
799 
800     return v->domain->arch.altp2m_p2m[index];
801 }
802 
803 /* Switch alternate p2m for a single vcpu */
804 bool_t p2m_switch_vcpu_altp2m_by_id(struct vcpu *v, unsigned int idx);
805 
806 /* Check to see if vcpu should be switched to a different p2m. */
807 void p2m_altp2m_check(struct vcpu *v, uint16_t idx);
808 
809 /* Flush all the alternate p2m's for a domain */
810 void p2m_flush_altp2m(struct domain *d);
811 
812 /* Alternate p2m paging */
813 bool_t p2m_altp2m_lazy_copy(struct vcpu *v, paddr_t gpa,
814     unsigned long gla, struct npfec npfec, struct p2m_domain **ap2m);
815 
816 /* Make a specific alternate p2m valid */
817 int p2m_init_altp2m_by_id(struct domain *d, unsigned int idx);
818 
819 /* Find an available alternate p2m and make it valid */
820 int p2m_init_next_altp2m(struct domain *d, uint16_t *idx);
821 
822 /* Make a specific alternate p2m invalid */
823 int p2m_destroy_altp2m_by_id(struct domain *d, unsigned int idx);
824 
825 /* Switch alternate p2m for entire domain */
826 int p2m_switch_domain_altp2m_by_id(struct domain *d, unsigned int idx);
827 
828 /* Change a gfn->mfn mapping */
829 int p2m_change_altp2m_gfn(struct domain *d, unsigned int idx,
830                           gfn_t old_gfn, gfn_t new_gfn);
831 
832 /* Propagate a host p2m change to all alternate p2m's */
833 void p2m_altp2m_propagate_change(struct domain *d, gfn_t gfn,
834                                  mfn_t mfn, unsigned int page_order,
835                                  p2m_type_t p2mt, p2m_access_t p2ma);
836 
837 /*
838  * p2m type to IOMMU flags
839  */
p2m_get_iommu_flags(p2m_type_t p2mt,mfn_t mfn)840 static inline unsigned int p2m_get_iommu_flags(p2m_type_t p2mt, mfn_t mfn)
841 {
842     unsigned int flags;
843 
844     switch( p2mt )
845     {
846     case p2m_ram_rw:
847     case p2m_grant_map_rw:
848     case p2m_ram_logdirty:
849     case p2m_map_foreign:
850         flags =  IOMMUF_readable | IOMMUF_writable;
851         break;
852     case p2m_ram_ro:
853     case p2m_grant_map_ro:
854         flags = IOMMUF_readable;
855         break;
856     case p2m_mmio_direct:
857         flags = IOMMUF_readable;
858         if ( !rangeset_contains_singleton(mmio_ro_ranges, mfn_x(mfn)) )
859             flags |= IOMMUF_writable;
860         break;
861     default:
862         flags = 0;
863         break;
864     }
865 
866     return flags;
867 }
868 
869 int p2m_set_ioreq_server(struct domain *d, unsigned int flags,
870                          struct hvm_ioreq_server *s);
871 struct hvm_ioreq_server *p2m_get_ioreq_server(struct domain *d,
872                                               unsigned int *flags);
873 
874 #endif /* _XEN_ASM_X86_P2M_H */
875 
876 /*
877  * Local variables:
878  * mode: C
879  * c-file-style: "BSD"
880  * c-basic-offset: 4
881  * indent-tabs-mode: nil
882  * End:
883  */
884