1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /******************************************************************************
3  * arch/x86/mm/shadow/set.c
4  *
5  * Simple, mostly-synchronous shadow page tables.
6  * Parts of this code are Copyright (c) 2006 by XenSource Inc.
7  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
8  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
9  */
10 
11 #define GUEST_PAGING_LEVELS 0
12 
13 #include <xen/sched.h>
14 #include <xsm/xsm.h>
15 #include <asm/shadow.h>
16 #include "private.h"
17 #include "types.h"
18 
19 /*
20  * These functions update shadow entries (and do bookkeeping on the shadow
21  * tables they are in).  It is intended that they are the only
22  * functions which ever write (non-zero) data onto a shadow page.
23  */
24 
25 static inline void
shadow_write_entries(void * d,const void * s,unsigned int entries,mfn_t mfn)26 shadow_write_entries(void *d, const void *s, unsigned int entries, mfn_t mfn)
27 /*
28  * This function does the actual writes to shadow pages.
29  * It must not be called directly, since it doesn't do the bookkeeping
30  * that shadow_set_l*e() functions do.
31  *
32  * Copy PTEs safely when processors might be running on the
33  * destination pagetable.  This does *not* give safety against
34  * concurrent writes (that's what the paging lock is for), just
35  * stops the hardware picking up partially written entries.
36  */
37 {
38     shadow_l1e_t *dst = d;
39     const shadow_l1e_t *src = s;
40     void *map = NULL;
41     unsigned int i = 0;
42 
43     /*
44      * Because we mirror access rights at all levels in the shadow, an
45      * l2 (or higher) entry with the RW bit cleared will leave us with
46      * no write access through the linear map.
47      * We detect that by writing to the shadow with put_unsafe() and
48      * using map_domain_page() to get a writeable mapping if we need to.
49      */
50     if ( put_unsafe(*src, dst) )
51     {
52         perfc_incr(shadow_linear_map_failed);
53         map = map_domain_page(mfn);
54         dst = map + PAGE_OFFSET(dst);
55     }
56     else
57     {
58         ++src;
59         ++dst;
60         i = 1;
61     }
62 
63     ASSERT(IS_ALIGNED((unsigned long)dst, sizeof(*dst)));
64 
65     for ( ; i < entries; i++ )
66         write_atomic(&dst++->l1, src++->l1);
67 
68     unmap_domain_page(map);
69 }
70 
71 /*
72  * "type" is only used to distinguish grant map pages from ordinary RAM
73  * i.e. non-p2m_is_grant() pages are treated as p2m_ram_rw.
74  */
75 static int inline
shadow_get_page_from_l1e(shadow_l1e_t sl1e,struct domain * d,p2m_type_t type)76 shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d, p2m_type_t type)
77 {
78     int res;
79     mfn_t mfn = shadow_l1e_get_mfn(sl1e);
80     const struct page_info *pg = NULL;
81     struct domain *owner = NULL;
82 
83     ASSERT(!sh_l1e_is_magic(sl1e));
84     ASSERT(shadow_mode_refcounts(d));
85 
86     if ( mfn_valid(mfn) )
87     {
88         pg = mfn_to_page(mfn);
89         owner = page_get_owner(pg);
90     }
91 
92     if ( owner == dom_io )
93         owner = NULL;
94 
95     /*
96      * If a privileged domain is attempting to install a map of a page it does
97      * not own, we let it succeed anyway.
98      */
99     if ( owner && (d != owner) &&
100          !(res = xsm_priv_mapping(XSM_TARGET, d, owner)) )
101     {
102         res = get_page_from_l1e(sl1e, d, owner);
103         SHADOW_PRINTK("privileged %pd installs map of %pd's mfn %"PRI_mfn": %s\n",
104                       d, owner, mfn_x(mfn),
105                       res >= 0 ? "success" : "failed");
106     }
107     /* Okay, it might still be a grant mapping PTE.  Try it. */
108     else if ( owner &&
109               (type == p2m_grant_map_rw ||
110                (type == p2m_grant_map_ro &&
111                 !(shadow_l1e_get_flags(sl1e) & _PAGE_RW))) )
112     {
113         /*
114          * It's a grant mapping.  The grant table implementation will
115          * already have checked that we're supposed to have access, so
116          * we can just grab a reference directly.
117          */
118         res = get_page_from_l1e(sl1e, d, owner);
119     }
120     else
121         res = get_page_from_l1e(sl1e, d, d);
122 
123     if ( unlikely(res < 0) )
124     {
125         perfc_incr(shadow_get_page_fail);
126         SHADOW_PRINTK("failed: l1e=" SH_PRI_pte "\n");
127     }
128 
129     return res;
130 }
131 
shadow_set_l4e(struct domain * d,shadow_l4e_t * sl4e,shadow_l4e_t new_sl4e,mfn_t sl4mfn)132 int shadow_set_l4e(struct domain *d, shadow_l4e_t *sl4e,
133                    shadow_l4e_t new_sl4e, mfn_t sl4mfn)
134 {
135     int flags = 0;
136     shadow_l4e_t old_sl4e;
137     paddr_t paddr;
138 
139     ASSERT(sl4e != NULL);
140     old_sl4e = *sl4e;
141 
142     if ( old_sl4e.l4 == new_sl4e.l4 ) return 0; /* Nothing to do */
143 
144     paddr = mfn_to_maddr(sl4mfn) | PAGE_OFFSET(sl4e);
145 
146     if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT )
147     {
148         /* About to install a new reference */
149         mfn_t sl3mfn = shadow_l4e_get_mfn(new_sl4e);
150 
151         if ( !sh_get_ref(d, sl3mfn, paddr) )
152         {
153             domain_crash(d);
154             return SHADOW_SET_ERROR;
155         }
156 
157         /* Are we pinning l3 shadows to handle weird Linux behaviour? */
158         if ( sh_type_is_pinnable(d, SH_type_l3_64_shadow) )
159             sh_pin(d, sl3mfn);
160     }
161 
162     /* Write the new entry */
163     shadow_write_entries(sl4e, &new_sl4e, 1, sl4mfn);
164     flush_root_pgtbl_domain(d);
165 
166     flags |= SHADOW_SET_CHANGED;
167 
168     if ( shadow_l4e_get_flags(old_sl4e) & _PAGE_PRESENT )
169     {
170         /* We lost a reference to an old mfn. */
171         mfn_t osl3mfn = shadow_l4e_get_mfn(old_sl4e);
172 
173         if ( !mfn_eq(osl3mfn, shadow_l4e_get_mfn(new_sl4e)) ||
174              !perms_strictly_increased(shadow_l4e_get_flags(old_sl4e),
175                                        shadow_l4e_get_flags(new_sl4e)) )
176             flags |= SHADOW_SET_FLUSH;
177 
178         sh_put_ref(d, osl3mfn, paddr);
179     }
180 
181     return flags;
182 }
183 
shadow_set_l3e(struct domain * d,shadow_l3e_t * sl3e,shadow_l3e_t new_sl3e,mfn_t sl3mfn)184 int shadow_set_l3e(struct domain *d, shadow_l3e_t *sl3e,
185                    shadow_l3e_t new_sl3e, mfn_t sl3mfn)
186 {
187     int flags = 0;
188     shadow_l3e_t old_sl3e;
189     paddr_t paddr;
190 
191     ASSERT(sl3e != NULL);
192     old_sl3e = *sl3e;
193 
194     if ( old_sl3e.l3 == new_sl3e.l3 ) return 0; /* Nothing to do */
195 
196     paddr = mfn_to_maddr(sl3mfn) | PAGE_OFFSET(sl3e);
197 
198     if ( shadow_l3e_get_flags(new_sl3e) & _PAGE_PRESENT )
199     {
200         /* About to install a new reference */
201         if ( !sh_get_ref(d, shadow_l3e_get_mfn(new_sl3e), paddr) )
202         {
203             domain_crash(d);
204             return SHADOW_SET_ERROR;
205         }
206     }
207 
208     /* Write the new entry */
209     shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn);
210     flags |= SHADOW_SET_CHANGED;
211 
212     if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT )
213     {
214         /* We lost a reference to an old mfn. */
215         mfn_t osl2mfn = shadow_l3e_get_mfn(old_sl3e);
216 
217         if ( !mfn_eq(osl2mfn, shadow_l3e_get_mfn(new_sl3e)) ||
218              !perms_strictly_increased(shadow_l3e_get_flags(old_sl3e),
219                                        shadow_l3e_get_flags(new_sl3e)) )
220             flags |= SHADOW_SET_FLUSH;
221 
222         sh_put_ref(d, osl2mfn, paddr);
223     }
224 
225     return flags;
226 }
227 
shadow_set_l2e(struct domain * d,shadow_l2e_t * sl2e,shadow_l2e_t new_sl2e,mfn_t sl2mfn,unsigned int type_fl1_shadow,mfn_t (* next_page)(mfn_t smfn))228 int shadow_set_l2e(struct domain *d, shadow_l2e_t *sl2e,
229                    shadow_l2e_t new_sl2e, mfn_t sl2mfn,
230                    unsigned int type_fl1_shadow,
231                    mfn_t (*next_page)(mfn_t smfn))
232 {
233     int flags = 0;
234     shadow_l2e_t old_sl2e;
235     paddr_t paddr;
236     /*
237      * In 2-on-3 we work with pairs of l2es pointing at two-page
238      * shadows.  Reference counting and up-pointers track from the first
239      * page of the shadow to the first l2e, so make sure that we're
240      * working with those:
241      * Start with a pair of identical entries.
242      */
243     shadow_l2e_t pair[2] = { new_sl2e, new_sl2e };
244 
245     if ( next_page )
246     {
247         /* Align the pointer down so it's pointing at the first of the pair */
248         sl2e = (shadow_l2e_t *)((unsigned long)sl2e & ~sizeof(shadow_l2e_t));
249     }
250 
251     ASSERT(sl2e != NULL);
252     old_sl2e = *sl2e;
253 
254     if ( old_sl2e.l2 == new_sl2e.l2 ) return 0; /* Nothing to do */
255 
256     paddr = mfn_to_maddr(sl2mfn) | PAGE_OFFSET(sl2e);
257 
258     if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT )
259     {
260         mfn_t sl1mfn = shadow_l2e_get_mfn(new_sl2e);
261         ASSERT(mfn_to_page(sl1mfn)->u.sh.head);
262 
263         /* About to install a new reference */
264         if ( !sh_get_ref(d, sl1mfn, paddr) )
265         {
266             domain_crash(d);
267             return SHADOW_SET_ERROR;
268         }
269 
270 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
271         {
272             struct page_info *sp = mfn_to_page(sl1mfn);
273             mfn_t gl1mfn;
274 
275             ASSERT(sp->u.sh.head);
276             gl1mfn = backpointer(sp);
277             /*
278              * If the shadow is a fl1 then the backpointer contains the
279              * GFN instead of the GMFN, and it's definitely not OOS.
280              */
281             if ( (sp->u.sh.type != type_fl1_shadow) && mfn_valid(gl1mfn)
282                  && mfn_is_out_of_sync(gl1mfn) )
283                 sh_resync(d, gl1mfn);
284         }
285 #endif
286 
287         if ( next_page )
288         {
289             /* Update the second entry to point to the second half of the l1 */
290             sl1mfn = next_page(sl1mfn);
291             pair[1] = shadow_l2e_from_mfn(sl1mfn,
292                                           shadow_l2e_get_flags(new_sl2e));
293         }
294     }
295 
296     /* Write the new entry / entries */
297     shadow_write_entries(sl2e, &pair, !next_page ? 1 : 2, sl2mfn);
298 
299     flags |= SHADOW_SET_CHANGED;
300 
301     if ( shadow_l2e_get_flags(old_sl2e) & _PAGE_PRESENT )
302     {
303         /* We lost a reference to an old mfn. */
304         mfn_t osl1mfn = shadow_l2e_get_mfn(old_sl2e);
305 
306         if ( !mfn_eq(osl1mfn, shadow_l2e_get_mfn(new_sl2e)) ||
307              !perms_strictly_increased(shadow_l2e_get_flags(old_sl2e),
308                                        shadow_l2e_get_flags(new_sl2e)) )
309             flags |= SHADOW_SET_FLUSH;
310 
311         sh_put_ref(d, osl1mfn, paddr);
312     }
313 
314     return flags;
315 }
316 
shadow_set_l1e(struct domain * d,shadow_l1e_t * sl1e,shadow_l1e_t new_sl1e,p2m_type_t new_type,mfn_t sl1mfn)317 int shadow_set_l1e(struct domain *d, shadow_l1e_t *sl1e,
318                    shadow_l1e_t new_sl1e, p2m_type_t new_type,
319                    mfn_t sl1mfn)
320 {
321     int flags = 0;
322     shadow_l1e_t old_sl1e;
323     unsigned int old_sl1f;
324 #if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
325     mfn_t new_gmfn = shadow_l1e_get_mfn(new_sl1e);
326 #endif
327 
328     ASSERT(sl1e != NULL);
329 
330 #if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
331     if ( mfn_valid(new_gmfn) && mfn_oos_may_write(new_gmfn) &&
332          ((shadow_l1e_get_flags(new_sl1e) & (_PAGE_RW | _PAGE_PRESENT)) ==
333           (_PAGE_RW | _PAGE_PRESENT)) )
334         oos_fixup_add(d, new_gmfn, sl1mfn, pgentry_ptr_to_slot(sl1e));
335 #endif
336 
337     old_sl1e = *sl1e;
338 
339     if ( old_sl1e.l1 == new_sl1e.l1 ) return 0; /* Nothing to do */
340 
341     if ( (shadow_l1e_get_flags(new_sl1e) & _PAGE_PRESENT) &&
342          !sh_l1e_is_magic(new_sl1e) )
343     {
344         /* About to install a new reference */
345         if ( shadow_mode_refcounts(d) )
346         {
347 #define PAGE_FLIPPABLE (_PAGE_RW | _PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
348             int rc;
349 
350             TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF);
351             switch ( rc = shadow_get_page_from_l1e(new_sl1e, d, new_type) )
352             {
353             default:
354                 /* Doesn't look like a pagetable. */
355                 flags |= SHADOW_SET_ERROR;
356                 new_sl1e = shadow_l1e_empty();
357                 break;
358             case PAGE_FLIPPABLE & -PAGE_FLIPPABLE ... PAGE_FLIPPABLE:
359                 ASSERT(!(rc & ~PAGE_FLIPPABLE));
360                 new_sl1e = shadow_l1e_flip_flags(new_sl1e, rc);
361                 /* fall through */
362             case 0:
363                 shadow_vram_get_mfn(shadow_l1e_get_mfn(new_sl1e),
364                                     shadow_l1e_get_flags(new_sl1e),
365                                     sl1mfn, sl1e, d);
366                 break;
367             }
368 #undef PAGE_FLIPPABLE
369         }
370     }
371 
372     /* Write the new entry */
373     shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
374     flags |= SHADOW_SET_CHANGED;
375 
376     old_sl1f = shadow_l1e_get_flags(old_sl1e);
377     if ( (old_sl1f & _PAGE_PRESENT) && !sh_l1e_is_magic(old_sl1e) &&
378          shadow_mode_refcounts(d) )
379     {
380         /*
381          * We lost a reference to an old mfn.
382          *
383          * N.B. Unlike higher-level sets, never need an extra flush when
384          * writing an l1e.  Because it points to the same guest frame as the
385          * guest l1e did, it's the guest's responsibility to trigger a flush
386          * later.
387          */
388         shadow_vram_put_mfn(shadow_l1e_get_mfn(old_sl1e), old_sl1f,
389                             sl1mfn, sl1e, d);
390         shadow_put_page_from_l1e(old_sl1e, d);
391         TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
392     }
393 
394     return flags;
395 }
396 
397 /*
398  * Local variables:
399  * mode: C
400  * c-file-style: "BSD"
401  * c-basic-offset: 4
402  * indent-tabs-mode: nil
403  * End:
404  */
405