1 /******************************************************************************
2  * include/asm-x86/shadow.h
3  *
4  * Parts of this code are Copyright (c) 2006 by XenSource Inc.
5  * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
6  * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; If not, see <http://www.gnu.org/licenses/>.
20  */
21 
22 #ifndef _XEN_SHADOW_H
23 #define _XEN_SHADOW_H
24 
25 #include <xen/sched.h>
26 #include <xen/perfc.h>
27 #include <xen/domain_page.h>
28 #include <asm/flushtlb.h>
29 #include <asm/paging.h>
30 #include <asm/p2m.h>
31 #include <asm/spec_ctrl.h>
32 
33 #include <public/domctl.h>
34 
35 /*****************************************************************************
36  * Macros to tell which shadow paging mode a domain is in*/
37 
38 #define shadow_mode_enabled(_d)    paging_mode_shadow(_d)
39 #define shadow_mode_refcounts(_d) (paging_mode_shadow(_d) && \
40                                    paging_mode_refcounts(_d))
41 #define shadow_mode_log_dirty(_d) (paging_mode_shadow(_d) && \
42                                    paging_mode_log_dirty(_d))
43 #define shadow_mode_translate(_d) (paging_mode_shadow(_d) && \
44                                    paging_mode_translate(_d))
45 #define shadow_mode_external(_d)  (paging_mode_shadow(_d) && \
46                                    paging_mode_external(_d))
47 
48 /*****************************************************************************
49  * Entry points into the shadow code */
50 
51 /* Set up the shadow-specific parts of a domain struct at start of day.
52  * Called from paging_domain_init(). */
53 int shadow_domain_init(struct domain *d);
54 
55 /* Setup the shadow-specific parts of a vcpu struct. It is called by
56  * paging_vcpu_init() in paging.c */
57 void shadow_vcpu_init(struct vcpu *v);
58 
59 #ifdef CONFIG_SHADOW_PAGING
60 
61 /* Enable an arbitrary shadow mode.  Call once at domain creation. */
62 int shadow_enable(struct domain *d, u32 mode);
63 
64 /* Enable VRAM dirty bit tracking. */
65 int shadow_track_dirty_vram(struct domain *d,
66                             unsigned long first_pfn,
67                             unsigned int nr_frames,
68                             XEN_GUEST_HANDLE(void) guest_dirty_bitmap);
69 
70 /* Handler for shadow control ops: operations from user-space to enable
71  * and disable ephemeral shadow modes (test mode and log-dirty mode) and
72  * manipulate the log-dirty bitmap. */
73 int shadow_domctl(struct domain *d,
74                   struct xen_domctl_shadow_op *sc,
75                   XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl);
76 
77 /* Call when destroying a vcpu/domain */
78 void shadow_vcpu_teardown(struct vcpu *v);
79 void shadow_teardown(struct domain *d, bool *preempted);
80 
81 void sh_remove_shadows(struct domain *d, mfn_t gmfn, int fast, int all);
82 
83 /* Adjust shadows ready for a guest page to change its type. */
84 void shadow_prepare_page_type_change(struct domain *d,
85                                      const struct page_info *page);
86 
87 /* Discard _all_ mappings from the domain's shadows. */
88 void shadow_blow_tables_per_domain(struct domain *d);
89 
90 /* Set the pool of shadow pages to the required number of pages.
91  * Input will be rounded up to at least shadow_min_acceptable_pages(),
92  * plus space for the p2m table.
93  * Returns 0 for success, non-zero for failure. */
94 int shadow_set_allocation(struct domain *d, unsigned int pages,
95                           bool *preempted);
96 
97 /* Helper to invoke for deferred releasing of a top-level shadow's reference. */
98 void shadow_put_top_level(struct domain *d, pagetable_t old);
99 
100 #else /* !CONFIG_SHADOW_PAGING */
101 
102 #define shadow_vcpu_teardown(v) ASSERT(is_pv_vcpu(v))
103 #define shadow_teardown(d, p) ASSERT(is_pv_domain(d))
104 #define shadow_final_teardown(d) ASSERT(is_pv_domain(d))
105 #define shadow_enable(d, mode) \
106     ({ ASSERT(is_pv_domain(d)); -EOPNOTSUPP; })
107 #define shadow_track_dirty_vram(d, begin_pfn, nr, bitmap) \
108     ({ ASSERT_UNREACHABLE(); -EOPNOTSUPP; })
109 #define shadow_set_allocation(d, pages, preempted) \
110     ({ ASSERT_UNREACHABLE(); -EOPNOTSUPP; })
111 
sh_remove_shadows(struct domain * d,mfn_t gmfn,int fast,int all)112 static inline void sh_remove_shadows(struct domain *d, mfn_t gmfn,
113                                      int fast, int all) {}
114 
shadow_prepare_page_type_change(struct domain * d,const struct page_info * page)115 static inline void shadow_prepare_page_type_change(struct domain *d,
116                                                    const struct page_info *page) {}
117 
shadow_blow_tables_per_domain(struct domain * d)118 static inline void shadow_blow_tables_per_domain(struct domain *d) {}
119 
shadow_put_top_level(struct domain * d,pagetable_t old)120 static inline void shadow_put_top_level(struct domain *d, pagetable_t old)
121 {
122     ASSERT_UNREACHABLE();
123 }
124 
shadow_domctl(struct domain * d,struct xen_domctl_shadow_op * sc,XEN_GUEST_HANDLE_PARAM (xen_domctl_t)u_domctl)125 static inline int shadow_domctl(struct domain *d,
126                                 struct xen_domctl_shadow_op *sc,
127                                 XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
128 {
129     return -EINVAL;
130 }
131 
132 #endif /* CONFIG_SHADOW_PAGING */
133 
134 /*
135  * Mitigations for L1TF / CVE-2018-3620 for PV guests.
136  *
137  * We cannot alter an architecturally-legitimate PTE which a PV guest has
138  * chosen to write, as traditional paged-out metadata is L1TF-vulnerable.
139  * What we can do is force a PV guest which writes a vulnerable PTE into
140  * shadow mode, so Xen controls the pagetables which are reachable by the CPU
141  * pagewalk.
142  *
143  * The core of the L1TF vulnerability is that the address bits of the PTE
144  * (accounting for PSE and factoring in the level-relevant part of the linear
145  * access) are sent for an L1D lookup (to retrieve the next-level PTE, or
146  * eventual memory address) before the Present or reserved bits (which would
147  * cause a terminal fault) are accounted for.  If an L1D hit occurs, the
148  * resulting data is available for potentially dependent instructions.
149  *
150  * For Present PTEs, the PV type-count safety logic ensures that the address
151  * bits always point at a guest-accessible frame, which is safe WRT L1TF from
152  * Xen's point of view.  In practice, a PV guest should be unable to set any
153  * reserved bits, so should be unable to create any present L1TF-vulnerable
154  * PTEs at all.
155  *
156  * Therefore, these safety checks apply to Not-Present PTEs only, where
157  * traditionally, Xen would have let the guest write any value it chose.
158  *
159  * The all-zero PTE potentially leaks mfn 0.  All software on the system is
160  * expected to cooperate and not put any secrets there.  In a Xen system,
161  * neither Xen nor dom0 are expected to touch mfn 0, as it typically contains
162  * the real mode IVT and Bios Data Area.  Therefore, mfn 0 is considered safe.
163  *
164  * Any PTE whose address is higher than the maximum cacheable address is safe,
165  * as it won't get an L1D hit.
166  *
167  * Speculative superpages also need accounting for, as PSE is considered
168  * irrespective of Present.  We disallow PSE being set, as it allows an
169  * attacker to leak 2M or 1G of data starting from mfn 0.  Also, because of
170  * recursive/linear pagetables, we must consider PSE even at L4, as hardware
171  * will interpret an L4e as an L3e during a recursive walk.
172  */
173 
is_l1tf_safe_maddr(intpte_t pte)174 static inline bool is_l1tf_safe_maddr(intpte_t pte)
175 {
176     paddr_t maddr = pte & l1tf_addr_mask;
177 
178     return maddr == 0 || maddr >= l1tf_safe_maddr;
179 }
180 
181 #ifdef CONFIG_PV
182 
pv_l1tf_check_pte(struct domain * d,unsigned int level,intpte_t pte)183 static inline bool pv_l1tf_check_pte(struct domain *d, unsigned int level,
184                                      intpte_t pte)
185 {
186     ASSERT(is_pv_domain(d));
187     ASSERT(!(pte & _PAGE_PRESENT));
188 
189     if ( d->arch.pv.check_l1tf && !paging_mode_sh_forced(d) &&
190          (((level > 1) && (pte & _PAGE_PSE)) || !is_l1tf_safe_maddr(pte)) )
191     {
192 #ifdef CONFIG_SHADOW_PAGING
193         struct tasklet *t = &d->arch.paging.shadow.pv_l1tf_tasklet;
194 
195         printk(XENLOG_G_WARNING
196                "d%d L1TF-vulnerable L%ue %016"PRIx64" - Shadowing\n",
197                d->domain_id, level, pte);
198         /*
199          * Safety consideration for accessing tasklet.scheduled_on without the
200          * tasklet lock.  This is a singleshot tasklet with the side effect of
201          * setting PG_SH_forced (checked just above).  Multiple vcpus can race
202          * to schedule the tasklet, but if we observe it scheduled anywhere,
203          * that is good enough.
204          */
205         smp_rmb();
206         if ( !tasklet_is_scheduled(t) )
207             tasklet_schedule(t);
208 #else
209         printk(XENLOG_G_ERR
210                "d%d L1TF-vulnerable L%ue %016"PRIx64" - Crashing\n",
211                d->domain_id, level, pte);
212         domain_crash(d);
213 #endif
214         return true;
215     }
216 
217     return false;
218 }
219 
pv_l1tf_check_l1e(struct domain * d,l1_pgentry_t l1e)220 static inline bool pv_l1tf_check_l1e(struct domain *d, l1_pgentry_t l1e)
221 {
222     return pv_l1tf_check_pte(d, 1, l1e.l1);
223 }
224 
pv_l1tf_check_l2e(struct domain * d,l2_pgentry_t l2e)225 static inline bool pv_l1tf_check_l2e(struct domain *d, l2_pgentry_t l2e)
226 {
227     return pv_l1tf_check_pte(d, 2, l2e.l2);
228 }
229 
pv_l1tf_check_l3e(struct domain * d,l3_pgentry_t l3e)230 static inline bool pv_l1tf_check_l3e(struct domain *d, l3_pgentry_t l3e)
231 {
232     return pv_l1tf_check_pte(d, 3, l3e.l3);
233 }
234 
pv_l1tf_check_l4e(struct domain * d,l4_pgentry_t l4e)235 static inline bool pv_l1tf_check_l4e(struct domain *d, l4_pgentry_t l4e)
236 {
237     return pv_l1tf_check_pte(d, 4, l4e.l4);
238 }
239 
240 void cf_check pv_l1tf_tasklet(void *data);
241 
pv_l1tf_domain_init(struct domain * d)242 static inline void pv_l1tf_domain_init(struct domain *d)
243 {
244     d->arch.pv.check_l1tf = is_hardware_domain(d) ? opt_pv_l1tf_hwdom
245                                                   : opt_pv_l1tf_domu;
246 
247 #ifdef CONFIG_SHADOW_PAGING
248     tasklet_init(&d->arch.paging.shadow.pv_l1tf_tasklet, pv_l1tf_tasklet, d);
249 #endif
250 }
251 
pv_l1tf_domain_destroy(struct domain * d)252 static inline void pv_l1tf_domain_destroy(struct domain *d)
253 {
254 #ifdef CONFIG_SHADOW_PAGING
255     tasklet_kill(&d->arch.paging.shadow.pv_l1tf_tasklet);
256 #endif
257 }
258 
259 /* Functions that atomically write PV guest PT entries */
260 void shadow_write_guest_entry(
261     struct vcpu *v, intpte_t *p, intpte_t new, mfn_t gmfn);
262 intpte_t shadow_cmpxchg_guest_entry(
263     struct vcpu *v, intpte_t *p, intpte_t old, intpte_t new, mfn_t gmfn);
264 
265 #endif /* CONFIG_PV */
266 
267 /* Remove all shadows of the guest mfn. */
shadow_remove_all_shadows(struct domain * d,mfn_t gmfn)268 static inline void shadow_remove_all_shadows(struct domain *d, mfn_t gmfn)
269 {
270     /* See the comment about locking in sh_remove_shadows */
271     sh_remove_shadows(d, gmfn, 0 /* Be thorough */, 1 /* Must succeed */);
272 }
273 
274 #endif /* _XEN_SHADOW_H */
275 
276 /*
277  * Local variables:
278  * mode: C
279  * c-file-style: "BSD"
280  * c-basic-offset: 4
281  * indent-tabs-mode: nil
282  * End:
283  */
284