1 /******************************************************************************
2  * arch/x86/mm/hap/nested_hap.c
3  *
4  * Code for Nested Virtualization
5  * Copyright (c) 2011 Advanced Micro Devices
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; If not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include <xen/vm_event.h>
22 #include <xen/event.h>
23 #include <public/vm_event.h>
24 #include <asm/domain.h>
25 #include <asm/page.h>
26 #include <asm/paging.h>
27 #include <asm/p2m.h>
28 #include <asm/mem_sharing.h>
29 #include <asm/hap.h>
30 #include <asm/hvm/support.h>
31 
32 #include <asm/hvm/nestedhvm.h>
33 
34 #include "private.h"
35 
36 /* AlGORITHM for NESTED PAGE FAULT
37  *
38  * NOTATION
39  * Levels: L0, L1, L2
40  * Guests: L1 guest, L2 guest
41  * Hypervisor: L0 hypervisor
42  * Addresses: L2-GVA, L2-GPA, L1-GVA, L1-GPA, MPA
43  *
44  * On L0, when #NPF happens, the handler function should do:
45  * hap_page_fault(GPA)
46  * {
47  *    1. If #NPF is from L1 guest, then we crash the guest VM (same as old
48  *       code)
49  *    2. If #NPF is from L2 guest, then we continue from (3)
50  *    3. Get np2m base from L1 guest. Map np2m base into L0 hypervisor address
51  *       space.
52  *    4. Walk the np2m's  page table
53  *    5.    - if not present or permission check failure, then we inject #NPF
54  *            back to L1 guest and
55  *            re-launch L1 guest (L1 guest will either treat this #NPF as MMIO,
56  *            or fix its p2m table for L2 guest)
57  *    6.    - if present, then we will get the a new translated value L1-GPA
58  *            (points to L1 machine memory)
59  *    7.        * Use L1-GPA to walk L0 P2M table
60  *    8.            - if not present, then crash the guest (should not happen)
61  *    9.            - if present, then we get a new translated value MPA
62  *                    (points to real machine memory)
63  *   10.                * Finally, use GPA and MPA to walk nested_p2m
64  *                        and fix the bits.
65  * }
66  *
67  */
68 
69 
70 /********************************************/
71 /*        NESTED VIRT P2M FUNCTIONS         */
72 /********************************************/
73 /* Override macros from asm/page.h to make them work with mfn_t */
74 #undef page_to_mfn
75 #define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
76 
77 void
nestedp2m_write_p2m_entry(struct p2m_domain * p2m,unsigned long gfn,l1_pgentry_t * p,l1_pgentry_t new,unsigned int level)78 nestedp2m_write_p2m_entry(struct p2m_domain *p2m, unsigned long gfn,
79     l1_pgentry_t *p, l1_pgentry_t new, unsigned int level)
80 {
81     struct domain *d = p2m->domain;
82     uint32_t old_flags;
83 
84     paging_lock(d);
85 
86     old_flags = l1e_get_flags(*p);
87     safe_write_pte(p, new);
88 
89     if (old_flags & _PAGE_PRESENT)
90         flush_tlb_mask(p2m->dirty_cpumask);
91 
92     paging_unlock(d);
93 }
94 
95 /********************************************/
96 /*          NESTED VIRT FUNCTIONS           */
97 /********************************************/
98 static void
nestedhap_fix_p2m(struct vcpu * v,struct p2m_domain * p2m,paddr_t L2_gpa,paddr_t L0_gpa,unsigned int page_order,p2m_type_t p2mt,p2m_access_t p2ma)99 nestedhap_fix_p2m(struct vcpu *v, struct p2m_domain *p2m,
100                   paddr_t L2_gpa, paddr_t L0_gpa,
101                   unsigned int page_order, p2m_type_t p2mt, p2m_access_t p2ma)
102 {
103     int rc = 0;
104     unsigned long gfn, mask;
105     mfn_t mfn;
106 
107     ASSERT(p2m);
108     ASSERT(p2m->set_entry);
109     ASSERT(p2m_locked_by_me(p2m));
110 
111     /*
112      * If this is a superpage mapping, round down both addresses to
113      * the start of the superpage.
114      */
115     mask = ~((1UL << page_order) - 1);
116     gfn = (L2_gpa >> PAGE_SHIFT) & mask;
117     mfn = _mfn((L0_gpa >> PAGE_SHIFT) & mask);
118 
119     rc = p2m_set_entry(p2m, _gfn(gfn), mfn, page_order, p2mt, p2ma);
120 
121     if ( rc )
122     {
123         gdprintk(XENLOG_ERR,
124                  "failed to set entry for %#"PRIx64" -> %#"PRIx64" rc:%d\n",
125                  L2_gpa, L0_gpa, rc);
126         domain_crash(p2m->domain);
127     }
128 }
129 
130 /* This function uses L2_gpa to walk the P2M page table in L1. If the
131  * walk is successful, the translated value is returned in
132  * L1_gpa. The result value tells what to do next.
133  */
134 int
nestedhap_walk_L1_p2m(struct vcpu * v,paddr_t L2_gpa,paddr_t * L1_gpa,unsigned int * page_order,uint8_t * p2m_acc,bool_t access_r,bool_t access_w,bool_t access_x)135 nestedhap_walk_L1_p2m(struct vcpu *v, paddr_t L2_gpa, paddr_t *L1_gpa,
136                       unsigned int *page_order, uint8_t *p2m_acc,
137                       bool_t access_r, bool_t access_w, bool_t access_x)
138 {
139     ASSERT(hvm_funcs.nhvm_hap_walk_L1_p2m);
140 
141     return hvm_funcs.nhvm_hap_walk_L1_p2m(v, L2_gpa, L1_gpa, page_order,
142         p2m_acc, access_r, access_w, access_x);
143 }
144 
145 
146 /* This function uses L1_gpa to walk the P2M table in L0 hypervisor. If the
147  * walk is successful, the translated value is returned in L0_gpa. The return
148  * value tells the upper level what to do.
149  */
150 static int
nestedhap_walk_L0_p2m(struct p2m_domain * p2m,paddr_t L1_gpa,paddr_t * L0_gpa,p2m_type_t * p2mt,p2m_access_t * p2ma,unsigned int * page_order,bool_t access_r,bool_t access_w,bool_t access_x)151 nestedhap_walk_L0_p2m(struct p2m_domain *p2m, paddr_t L1_gpa, paddr_t *L0_gpa,
152                       p2m_type_t *p2mt, p2m_access_t *p2ma,
153                       unsigned int *page_order,
154                       bool_t access_r, bool_t access_w, bool_t access_x)
155 {
156     mfn_t mfn;
157     int rc;
158 
159     /* walk L0 P2M table */
160     mfn = get_gfn_type_access(p2m, L1_gpa >> PAGE_SHIFT, p2mt, p2ma,
161                               0, page_order);
162 
163     rc = NESTEDHVM_PAGEFAULT_DIRECT_MMIO;
164     if ( *p2mt == p2m_mmio_direct )
165         goto direct_mmio_out;
166     rc = NESTEDHVM_PAGEFAULT_MMIO;
167     if ( *p2mt == p2m_mmio_dm )
168         goto out;
169 
170     rc = NESTEDHVM_PAGEFAULT_L0_ERROR;
171     if ( access_w && p2m_is_readonly(*p2mt) )
172         goto out;
173 
174     if ( p2m_is_paging(*p2mt) || p2m_is_shared(*p2mt) || !p2m_is_ram(*p2mt) )
175         goto out;
176 
177     if ( !mfn_valid(mfn) )
178         goto out;
179 
180     rc = NESTEDHVM_PAGEFAULT_DONE;
181 direct_mmio_out:
182     *L0_gpa = (mfn_x(mfn) << PAGE_SHIFT) + (L1_gpa & ~PAGE_MASK);
183 out:
184     __put_gfn(p2m, L1_gpa >> PAGE_SHIFT);
185     return rc;
186 }
187 
188 /*
189  * The following function, nestedhap_page_fault(), is for steps (3)--(10).
190  *
191  * Returns:
192  */
193 int
nestedhvm_hap_nested_page_fault(struct vcpu * v,paddr_t * L2_gpa,bool_t access_r,bool_t access_w,bool_t access_x)194 nestedhvm_hap_nested_page_fault(struct vcpu *v, paddr_t *L2_gpa,
195     bool_t access_r, bool_t access_w, bool_t access_x)
196 {
197     int rv;
198     paddr_t L1_gpa, L0_gpa;
199     struct domain *d = v->domain;
200     struct p2m_domain *p2m, *nested_p2m;
201     unsigned int page_order_21, page_order_10, page_order_20;
202     p2m_type_t p2mt_10;
203     p2m_access_t p2ma_10 = p2m_access_rwx;
204     uint8_t p2ma_21 = p2m_access_rwx;
205 
206     p2m = p2m_get_hostp2m(d); /* L0 p2m */
207 
208     /* walk the L1 P2M table */
209     rv = nestedhap_walk_L1_p2m(v, *L2_gpa, &L1_gpa, &page_order_21, &p2ma_21,
210         access_r, access_w, access_x);
211 
212     /* let caller to handle these two cases */
213     switch (rv) {
214     case NESTEDHVM_PAGEFAULT_INJECT:
215     case NESTEDHVM_PAGEFAULT_RETRY:
216     case NESTEDHVM_PAGEFAULT_L1_ERROR:
217         return rv;
218     case NESTEDHVM_PAGEFAULT_DONE:
219         break;
220     default:
221         BUG();
222         break;
223     }
224 
225     /* ==> we have to walk L0 P2M */
226     rv = nestedhap_walk_L0_p2m(p2m, L1_gpa, &L0_gpa,
227         &p2mt_10, &p2ma_10, &page_order_10,
228         access_r, access_w, access_x);
229 
230     /* let upper level caller to handle these two cases */
231     switch (rv) {
232     case NESTEDHVM_PAGEFAULT_INJECT:
233         return rv;
234     case NESTEDHVM_PAGEFAULT_L0_ERROR:
235         *L2_gpa = L1_gpa;
236         return rv;
237     case NESTEDHVM_PAGEFAULT_DONE:
238         break;
239     case NESTEDHVM_PAGEFAULT_MMIO:
240         return rv;
241     case NESTEDHVM_PAGEFAULT_DIRECT_MMIO:
242         break;
243     default:
244         BUG();
245         break;
246     }
247 
248     page_order_20 = min(page_order_21, page_order_10);
249 
250     ASSERT(p2ma_10 <= p2m_access_n2rwx);
251     /*NOTE: if assert fails, needs to handle new access type here */
252 
253     switch ( p2ma_10 )
254     {
255     case p2m_access_n ... p2m_access_rwx:
256         break;
257     case p2m_access_rx2rw:
258         p2ma_10 = p2m_access_rx;
259         break;
260     case p2m_access_n2rwx:
261         p2ma_10 = p2m_access_n;
262         break;
263     default:
264         p2ma_10 = p2m_access_n;
265         /* For safety, remove all permissions. */
266         gdprintk(XENLOG_ERR, "Unhandled p2m access type:%d\n", p2ma_10);
267     }
268     /* Use minimal permission for nested p2m. */
269     p2ma_10 &= (p2m_access_t)p2ma_21;
270 
271     /* fix p2m_get_pagetable(nested_p2m) */
272     nested_p2m = p2m_get_nestedp2m_locked(v);
273     nestedhap_fix_p2m(v, nested_p2m, *L2_gpa, L0_gpa, page_order_20,
274         p2mt_10, p2ma_10);
275     p2m_unlock(nested_p2m);
276 
277     return NESTEDHVM_PAGEFAULT_DONE;
278 }
279 
280 /********************************************/
281 /*     NESTED VIRT INITIALIZATION FUNCS     */
282 /********************************************/
283 
284 /*
285  * Local variables:
286  * mode: C
287  * c-file-style: "BSD"
288  * c-basic-offset: 4
289  * tab-width: 4
290  * indent-tabs-mode: nil
291  * End:
292  */
293