1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /******************************************************************************
3 * arch/x86/mm/shadow/set.c
4 *
5 * Simple, mostly-synchronous shadow page tables.
6 * Parts of this code are Copyright (c) 2006 by XenSource Inc.
7 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
8 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
9 */
10
11 #define GUEST_PAGING_LEVELS 0
12
13 #include <xen/sched.h>
14 #include <xsm/xsm.h>
15 #include <asm/shadow.h>
16 #include "private.h"
17 #include "types.h"
18
19 /*
20 * These functions update shadow entries (and do bookkeeping on the shadow
21 * tables they are in). It is intended that they are the only
22 * functions which ever write (non-zero) data onto a shadow page.
23 */
24
25 static inline void
shadow_write_entries(void * d,const void * s,unsigned int entries,mfn_t mfn)26 shadow_write_entries(void *d, const void *s, unsigned int entries, mfn_t mfn)
27 /*
28 * This function does the actual writes to shadow pages.
29 * It must not be called directly, since it doesn't do the bookkeeping
30 * that shadow_set_l*e() functions do.
31 *
32 * Copy PTEs safely when processors might be running on the
33 * destination pagetable. This does *not* give safety against
34 * concurrent writes (that's what the paging lock is for), just
35 * stops the hardware picking up partially written entries.
36 */
37 {
38 shadow_l1e_t *dst = d;
39 const shadow_l1e_t *src = s;
40 void *map = NULL;
41 unsigned int i = 0;
42
43 /*
44 * Because we mirror access rights at all levels in the shadow, an
45 * l2 (or higher) entry with the RW bit cleared will leave us with
46 * no write access through the linear map.
47 * We detect that by writing to the shadow with put_unsafe() and
48 * using map_domain_page() to get a writeable mapping if we need to.
49 */
50 if ( put_unsafe(*src, dst) )
51 {
52 perfc_incr(shadow_linear_map_failed);
53 map = map_domain_page(mfn);
54 dst = map + PAGE_OFFSET(dst);
55 }
56 else
57 {
58 ++src;
59 ++dst;
60 i = 1;
61 }
62
63 ASSERT(IS_ALIGNED((unsigned long)dst, sizeof(*dst)));
64
65 for ( ; i < entries; i++ )
66 write_atomic(&dst++->l1, src++->l1);
67
68 unmap_domain_page(map);
69 }
70
71 /*
72 * "type" is only used to distinguish grant map pages from ordinary RAM
73 * i.e. non-p2m_is_grant() pages are treated as p2m_ram_rw.
74 */
75 static int inline
shadow_get_page_from_l1e(shadow_l1e_t sl1e,struct domain * d,p2m_type_t type)76 shadow_get_page_from_l1e(shadow_l1e_t sl1e, struct domain *d, p2m_type_t type)
77 {
78 int res;
79 mfn_t mfn = shadow_l1e_get_mfn(sl1e);
80 const struct page_info *pg = NULL;
81 struct domain *owner = NULL;
82
83 ASSERT(!sh_l1e_is_magic(sl1e));
84 ASSERT(shadow_mode_refcounts(d));
85
86 if ( mfn_valid(mfn) )
87 {
88 pg = mfn_to_page(mfn);
89 owner = page_get_owner(pg);
90 }
91
92 if ( owner == dom_io )
93 owner = NULL;
94
95 /*
96 * If a privileged domain is attempting to install a map of a page it does
97 * not own, we let it succeed anyway.
98 */
99 if ( owner && (d != owner) &&
100 !(res = xsm_priv_mapping(XSM_TARGET, d, owner)) )
101 {
102 res = get_page_from_l1e(sl1e, d, owner);
103 SHADOW_PRINTK("privileged %pd installs map of %pd's mfn %"PRI_mfn": %s\n",
104 d, owner, mfn_x(mfn),
105 res >= 0 ? "success" : "failed");
106 }
107 /* Okay, it might still be a grant mapping PTE. Try it. */
108 else if ( owner &&
109 (type == p2m_grant_map_rw ||
110 (type == p2m_grant_map_ro &&
111 !(shadow_l1e_get_flags(sl1e) & _PAGE_RW))) )
112 {
113 /*
114 * It's a grant mapping. The grant table implementation will
115 * already have checked that we're supposed to have access, so
116 * we can just grab a reference directly.
117 */
118 res = get_page_from_l1e(sl1e, d, owner);
119 }
120 else
121 res = get_page_from_l1e(sl1e, d, d);
122
123 if ( unlikely(res < 0) )
124 {
125 perfc_incr(shadow_get_page_fail);
126 SHADOW_PRINTK("failed: l1e=" SH_PRI_pte "\n");
127 }
128
129 return res;
130 }
131
shadow_set_l4e(struct domain * d,shadow_l4e_t * sl4e,shadow_l4e_t new_sl4e,mfn_t sl4mfn)132 int shadow_set_l4e(struct domain *d, shadow_l4e_t *sl4e,
133 shadow_l4e_t new_sl4e, mfn_t sl4mfn)
134 {
135 int flags = 0;
136 shadow_l4e_t old_sl4e;
137 paddr_t paddr;
138
139 ASSERT(sl4e != NULL);
140 old_sl4e = *sl4e;
141
142 if ( old_sl4e.l4 == new_sl4e.l4 ) return 0; /* Nothing to do */
143
144 paddr = mfn_to_maddr(sl4mfn) | PAGE_OFFSET(sl4e);
145
146 if ( shadow_l4e_get_flags(new_sl4e) & _PAGE_PRESENT )
147 {
148 /* About to install a new reference */
149 mfn_t sl3mfn = shadow_l4e_get_mfn(new_sl4e);
150
151 if ( !sh_get_ref(d, sl3mfn, paddr) )
152 {
153 domain_crash(d);
154 return SHADOW_SET_ERROR;
155 }
156
157 /* Are we pinning l3 shadows to handle weird Linux behaviour? */
158 if ( sh_type_is_pinnable(d, SH_type_l3_64_shadow) )
159 sh_pin(d, sl3mfn);
160 }
161
162 /* Write the new entry */
163 shadow_write_entries(sl4e, &new_sl4e, 1, sl4mfn);
164 flush_root_pgtbl_domain(d);
165
166 flags |= SHADOW_SET_CHANGED;
167
168 if ( shadow_l4e_get_flags(old_sl4e) & _PAGE_PRESENT )
169 {
170 /* We lost a reference to an old mfn. */
171 mfn_t osl3mfn = shadow_l4e_get_mfn(old_sl4e);
172
173 if ( !mfn_eq(osl3mfn, shadow_l4e_get_mfn(new_sl4e)) ||
174 !perms_strictly_increased(shadow_l4e_get_flags(old_sl4e),
175 shadow_l4e_get_flags(new_sl4e)) )
176 flags |= SHADOW_SET_FLUSH;
177
178 sh_put_ref(d, osl3mfn, paddr);
179 }
180
181 return flags;
182 }
183
shadow_set_l3e(struct domain * d,shadow_l3e_t * sl3e,shadow_l3e_t new_sl3e,mfn_t sl3mfn)184 int shadow_set_l3e(struct domain *d, shadow_l3e_t *sl3e,
185 shadow_l3e_t new_sl3e, mfn_t sl3mfn)
186 {
187 int flags = 0;
188 shadow_l3e_t old_sl3e;
189 paddr_t paddr;
190
191 ASSERT(sl3e != NULL);
192 old_sl3e = *sl3e;
193
194 if ( old_sl3e.l3 == new_sl3e.l3 ) return 0; /* Nothing to do */
195
196 paddr = mfn_to_maddr(sl3mfn) | PAGE_OFFSET(sl3e);
197
198 if ( shadow_l3e_get_flags(new_sl3e) & _PAGE_PRESENT )
199 {
200 /* About to install a new reference */
201 if ( !sh_get_ref(d, shadow_l3e_get_mfn(new_sl3e), paddr) )
202 {
203 domain_crash(d);
204 return SHADOW_SET_ERROR;
205 }
206 }
207
208 /* Write the new entry */
209 shadow_write_entries(sl3e, &new_sl3e, 1, sl3mfn);
210 flags |= SHADOW_SET_CHANGED;
211
212 if ( shadow_l3e_get_flags(old_sl3e) & _PAGE_PRESENT )
213 {
214 /* We lost a reference to an old mfn. */
215 mfn_t osl2mfn = shadow_l3e_get_mfn(old_sl3e);
216
217 if ( !mfn_eq(osl2mfn, shadow_l3e_get_mfn(new_sl3e)) ||
218 !perms_strictly_increased(shadow_l3e_get_flags(old_sl3e),
219 shadow_l3e_get_flags(new_sl3e)) )
220 flags |= SHADOW_SET_FLUSH;
221
222 sh_put_ref(d, osl2mfn, paddr);
223 }
224
225 return flags;
226 }
227
shadow_set_l2e(struct domain * d,shadow_l2e_t * sl2e,shadow_l2e_t new_sl2e,mfn_t sl2mfn,unsigned int type_fl1_shadow,mfn_t (* next_page)(mfn_t smfn))228 int shadow_set_l2e(struct domain *d, shadow_l2e_t *sl2e,
229 shadow_l2e_t new_sl2e, mfn_t sl2mfn,
230 unsigned int type_fl1_shadow,
231 mfn_t (*next_page)(mfn_t smfn))
232 {
233 int flags = 0;
234 shadow_l2e_t old_sl2e;
235 paddr_t paddr;
236 /*
237 * In 2-on-3 we work with pairs of l2es pointing at two-page
238 * shadows. Reference counting and up-pointers track from the first
239 * page of the shadow to the first l2e, so make sure that we're
240 * working with those:
241 * Start with a pair of identical entries.
242 */
243 shadow_l2e_t pair[2] = { new_sl2e, new_sl2e };
244
245 if ( next_page )
246 {
247 /* Align the pointer down so it's pointing at the first of the pair */
248 sl2e = (shadow_l2e_t *)((unsigned long)sl2e & ~sizeof(shadow_l2e_t));
249 }
250
251 ASSERT(sl2e != NULL);
252 old_sl2e = *sl2e;
253
254 if ( old_sl2e.l2 == new_sl2e.l2 ) return 0; /* Nothing to do */
255
256 paddr = mfn_to_maddr(sl2mfn) | PAGE_OFFSET(sl2e);
257
258 if ( shadow_l2e_get_flags(new_sl2e) & _PAGE_PRESENT )
259 {
260 mfn_t sl1mfn = shadow_l2e_get_mfn(new_sl2e);
261 ASSERT(mfn_to_page(sl1mfn)->u.sh.head);
262
263 /* About to install a new reference */
264 if ( !sh_get_ref(d, sl1mfn, paddr) )
265 {
266 domain_crash(d);
267 return SHADOW_SET_ERROR;
268 }
269
270 #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
271 {
272 struct page_info *sp = mfn_to_page(sl1mfn);
273 mfn_t gl1mfn;
274
275 ASSERT(sp->u.sh.head);
276 gl1mfn = backpointer(sp);
277 /*
278 * If the shadow is a fl1 then the backpointer contains the
279 * GFN instead of the GMFN, and it's definitely not OOS.
280 */
281 if ( (sp->u.sh.type != type_fl1_shadow) && mfn_valid(gl1mfn)
282 && mfn_is_out_of_sync(gl1mfn) )
283 sh_resync(d, gl1mfn);
284 }
285 #endif
286
287 if ( next_page )
288 {
289 /* Update the second entry to point to the second half of the l1 */
290 sl1mfn = next_page(sl1mfn);
291 pair[1] = shadow_l2e_from_mfn(sl1mfn,
292 shadow_l2e_get_flags(new_sl2e));
293 }
294 }
295
296 /* Write the new entry / entries */
297 shadow_write_entries(sl2e, &pair, !next_page ? 1 : 2, sl2mfn);
298
299 flags |= SHADOW_SET_CHANGED;
300
301 if ( shadow_l2e_get_flags(old_sl2e) & _PAGE_PRESENT )
302 {
303 /* We lost a reference to an old mfn. */
304 mfn_t osl1mfn = shadow_l2e_get_mfn(old_sl2e);
305
306 if ( !mfn_eq(osl1mfn, shadow_l2e_get_mfn(new_sl2e)) ||
307 !perms_strictly_increased(shadow_l2e_get_flags(old_sl2e),
308 shadow_l2e_get_flags(new_sl2e)) )
309 flags |= SHADOW_SET_FLUSH;
310
311 sh_put_ref(d, osl1mfn, paddr);
312 }
313
314 return flags;
315 }
316
shadow_set_l1e(struct domain * d,shadow_l1e_t * sl1e,shadow_l1e_t new_sl1e,p2m_type_t new_type,mfn_t sl1mfn)317 int shadow_set_l1e(struct domain *d, shadow_l1e_t *sl1e,
318 shadow_l1e_t new_sl1e, p2m_type_t new_type,
319 mfn_t sl1mfn)
320 {
321 int flags = 0;
322 shadow_l1e_t old_sl1e;
323 unsigned int old_sl1f;
324 #if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
325 mfn_t new_gmfn = shadow_l1e_get_mfn(new_sl1e);
326 #endif
327
328 ASSERT(sl1e != NULL);
329
330 #if SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC
331 if ( mfn_valid(new_gmfn) && mfn_oos_may_write(new_gmfn) &&
332 ((shadow_l1e_get_flags(new_sl1e) & (_PAGE_RW | _PAGE_PRESENT)) ==
333 (_PAGE_RW | _PAGE_PRESENT)) )
334 oos_fixup_add(d, new_gmfn, sl1mfn, pgentry_ptr_to_slot(sl1e));
335 #endif
336
337 old_sl1e = *sl1e;
338
339 if ( old_sl1e.l1 == new_sl1e.l1 ) return 0; /* Nothing to do */
340
341 if ( (shadow_l1e_get_flags(new_sl1e) & _PAGE_PRESENT) &&
342 !sh_l1e_is_magic(new_sl1e) )
343 {
344 /* About to install a new reference */
345 if ( shadow_mode_refcounts(d) )
346 {
347 #define PAGE_FLIPPABLE (_PAGE_RW | _PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
348 int rc;
349
350 TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_GET_REF);
351 switch ( rc = shadow_get_page_from_l1e(new_sl1e, d, new_type) )
352 {
353 default:
354 /* Doesn't look like a pagetable. */
355 flags |= SHADOW_SET_ERROR;
356 new_sl1e = shadow_l1e_empty();
357 break;
358 case PAGE_FLIPPABLE & -PAGE_FLIPPABLE ... PAGE_FLIPPABLE:
359 ASSERT(!(rc & ~PAGE_FLIPPABLE));
360 new_sl1e = shadow_l1e_flip_flags(new_sl1e, rc);
361 /* fall through */
362 case 0:
363 shadow_vram_get_mfn(shadow_l1e_get_mfn(new_sl1e),
364 shadow_l1e_get_flags(new_sl1e),
365 sl1mfn, sl1e, d);
366 break;
367 }
368 #undef PAGE_FLIPPABLE
369 }
370 }
371
372 /* Write the new entry */
373 shadow_write_entries(sl1e, &new_sl1e, 1, sl1mfn);
374 flags |= SHADOW_SET_CHANGED;
375
376 old_sl1f = shadow_l1e_get_flags(old_sl1e);
377 if ( (old_sl1f & _PAGE_PRESENT) && !sh_l1e_is_magic(old_sl1e) &&
378 shadow_mode_refcounts(d) )
379 {
380 /*
381 * We lost a reference to an old mfn.
382 *
383 * N.B. Unlike higher-level sets, never need an extra flush when
384 * writing an l1e. Because it points to the same guest frame as the
385 * guest l1e did, it's the guest's responsibility to trigger a flush
386 * later.
387 */
388 shadow_vram_put_mfn(shadow_l1e_get_mfn(old_sl1e), old_sl1f,
389 sl1mfn, sl1e, d);
390 shadow_put_page_from_l1e(old_sl1e, d);
391 TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SHADOW_L1_PUT_REF);
392 }
393
394 return flags;
395 }
396
397 /*
398 * Local variables:
399 * mode: C
400 * c-file-style: "BSD"
401 * c-basic-offset: 4
402 * indent-tabs-mode: nil
403 * End:
404 */
405