1 /******************************************************************************
2  * domain_page.h
3  *
4  * Allow temporary mapping of domain pages.
5  *
6  * Copyright (c) 2003-2006, Keir Fraser <keir@xensource.com>
7  */
8 
9 #include <xen/domain_page.h>
10 #include <xen/efi.h>
11 #include <xen/mm.h>
12 #include <xen/perfc.h>
13 #include <xen/pfn.h>
14 #include <xen/sched.h>
15 #include <xen/vmap.h>
16 #include <asm/current.h>
17 #include <asm/flushtlb.h>
18 #include <asm/hardirq.h>
19 #include <asm/setup.h>
20 
21 static DEFINE_PER_CPU(struct vcpu *, override);
22 
mapcache_current_vcpu(void)23 static inline struct vcpu *mapcache_current_vcpu(void)
24 {
25     /* In the common case we use the mapcache of the running VCPU. */
26     struct vcpu *v = this_cpu(override) ?: current;
27 
28     /*
29      * When current isn't properly set up yet, this is equivalent to
30      * running in an idle vCPU (callers must check for NULL).
31      */
32     if ( v == INVALID_VCPU )
33         return NULL;
34 
35     /*
36      * When using efi runtime page tables, we have the equivalent of the idle
37      * domain's page tables but current may point at another domain's VCPU.
38      * Return NULL as though current is not properly set up yet.
39      */
40     if ( efi_rs_using_pgtables() )
41         return NULL;
42 
43     /*
44      * If guest_table is NULL, and we are running a paravirtualised guest,
45      * then it means we are running on the idle domain's page table and must
46      * therefore use its mapcache.
47      */
48     if ( unlikely(pagetable_is_null(v->arch.guest_table)) && is_pv_vcpu(v) )
49     {
50         /* If we really are idling, perform lazy context switch now. */
51         if ( (v = idle_vcpu[smp_processor_id()]) == current )
52             sync_local_execstate();
53         /* We must now be running on the idle page table. */
54         ASSERT(read_cr3() == __pa(idle_pg_table));
55     }
56 
57     return v;
58 }
59 
mapcache_override_current(struct vcpu * v)60 void __init mapcache_override_current(struct vcpu *v)
61 {
62     this_cpu(override) = v;
63 }
64 
65 #define mapcache_l2_entry(e) ((e) >> PAGETABLE_ORDER)
66 #define MAPCACHE_L2_ENTRIES (mapcache_l2_entry(MAPCACHE_ENTRIES - 1) + 1)
67 #define MAPCACHE_L1ENT(idx) \
68     __linear_l1_table[l1_linear_offset(MAPCACHE_VIRT_START + pfn_to_paddr(idx))]
69 
map_domain_page(mfn_t mfn)70 void *map_domain_page(mfn_t mfn)
71 {
72     unsigned long flags;
73     unsigned int idx, i;
74     struct vcpu *v;
75     struct mapcache_domain *dcache;
76     struct mapcache_vcpu *vcache;
77     struct vcpu_maphash_entry *hashent;
78 
79 #ifdef NDEBUG
80     if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
81         return mfn_to_virt(mfn_x(mfn));
82 #endif
83 
84     v = mapcache_current_vcpu();
85     if ( !v || !is_pv_vcpu(v) )
86         return mfn_to_virt(mfn_x(mfn));
87 
88     dcache = &v->domain->arch.pv_domain.mapcache;
89     vcache = &v->arch.pv_vcpu.mapcache;
90     if ( !dcache->inuse )
91         return mfn_to_virt(mfn_x(mfn));
92 
93     perfc_incr(map_domain_page_count);
94 
95     local_irq_save(flags);
96 
97     hashent = &vcache->hash[MAPHASH_HASHFN(mfn_x(mfn))];
98     if ( hashent->mfn == mfn_x(mfn) )
99     {
100         idx = hashent->idx;
101         ASSERT(idx < dcache->entries);
102         hashent->refcnt++;
103         ASSERT(hashent->refcnt);
104         ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(idx)) == mfn_x(mfn));
105         goto out;
106     }
107 
108     spin_lock(&dcache->lock);
109 
110     /* Has some other CPU caused a wrap? We must flush if so. */
111     if ( unlikely(dcache->epoch != vcache->shadow_epoch) )
112     {
113         vcache->shadow_epoch = dcache->epoch;
114         if ( NEED_FLUSH(this_cpu(tlbflush_time), dcache->tlbflush_timestamp) )
115         {
116             perfc_incr(domain_page_tlb_flush);
117             flush_tlb_local();
118         }
119     }
120 
121     idx = find_next_zero_bit(dcache->inuse, dcache->entries, dcache->cursor);
122     if ( unlikely(idx >= dcache->entries) )
123     {
124         unsigned long accum = 0, prev = 0;
125 
126         /* /First/, clean the garbage map and update the inuse list. */
127         for ( i = 0; i < BITS_TO_LONGS(dcache->entries); i++ )
128         {
129             accum |= prev;
130             dcache->inuse[i] &= ~xchg(&dcache->garbage[i], 0);
131             prev = ~dcache->inuse[i];
132         }
133 
134         if ( accum | (prev & BITMAP_LAST_WORD_MASK(dcache->entries)) )
135             idx = find_first_zero_bit(dcache->inuse, dcache->entries);
136         else
137         {
138             /* Replace a hash entry instead. */
139             i = MAPHASH_HASHFN(mfn_x(mfn));
140             do {
141                 hashent = &vcache->hash[i];
142                 if ( hashent->idx != MAPHASHENT_NOTINUSE && !hashent->refcnt )
143                 {
144                     idx = hashent->idx;
145                     ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(idx)) == hashent->mfn);
146                     l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty());
147                     hashent->idx = MAPHASHENT_NOTINUSE;
148                     hashent->mfn = ~0UL;
149                     break;
150                 }
151                 if ( ++i == MAPHASH_ENTRIES )
152                     i = 0;
153             } while ( i != MAPHASH_HASHFN(mfn_x(mfn)) );
154         }
155         BUG_ON(idx >= dcache->entries);
156 
157         /* /Second/, flush TLBs. */
158         perfc_incr(domain_page_tlb_flush);
159         flush_tlb_local();
160         vcache->shadow_epoch = ++dcache->epoch;
161         dcache->tlbflush_timestamp = tlbflush_current_time();
162     }
163 
164     set_bit(idx, dcache->inuse);
165     dcache->cursor = idx + 1;
166 
167     spin_unlock(&dcache->lock);
168 
169     l1e_write(&MAPCACHE_L1ENT(idx), l1e_from_mfn(mfn, __PAGE_HYPERVISOR_RW));
170 
171  out:
172     local_irq_restore(flags);
173     return (void *)MAPCACHE_VIRT_START + pfn_to_paddr(idx);
174 }
175 
unmap_domain_page(const void * ptr)176 void unmap_domain_page(const void *ptr)
177 {
178     unsigned int idx;
179     struct vcpu *v;
180     struct mapcache_domain *dcache;
181     unsigned long va = (unsigned long)ptr, mfn, flags;
182     struct vcpu_maphash_entry *hashent;
183 
184     if ( va >= DIRECTMAP_VIRT_START )
185         return;
186 
187     ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END);
188 
189     v = mapcache_current_vcpu();
190     ASSERT(v && is_pv_vcpu(v));
191 
192     dcache = &v->domain->arch.pv_domain.mapcache;
193     ASSERT(dcache->inuse);
194 
195     idx = PFN_DOWN(va - MAPCACHE_VIRT_START);
196     mfn = l1e_get_pfn(MAPCACHE_L1ENT(idx));
197     hashent = &v->arch.pv_vcpu.mapcache.hash[MAPHASH_HASHFN(mfn)];
198 
199     local_irq_save(flags);
200 
201     if ( hashent->idx == idx )
202     {
203         ASSERT(hashent->mfn == mfn);
204         ASSERT(hashent->refcnt);
205         hashent->refcnt--;
206     }
207     else if ( !hashent->refcnt )
208     {
209         if ( hashent->idx != MAPHASHENT_NOTINUSE )
210         {
211             /* /First/, zap the PTE. */
212             ASSERT(l1e_get_pfn(MAPCACHE_L1ENT(hashent->idx)) ==
213                    hashent->mfn);
214             l1e_write(&MAPCACHE_L1ENT(hashent->idx), l1e_empty());
215             /* /Second/, mark as garbage. */
216             set_bit(hashent->idx, dcache->garbage);
217         }
218 
219         /* Add newly-freed mapping to the maphash. */
220         hashent->mfn = mfn;
221         hashent->idx = idx;
222     }
223     else
224     {
225         /* /First/, zap the PTE. */
226         l1e_write(&MAPCACHE_L1ENT(idx), l1e_empty());
227         /* /Second/, mark as garbage. */
228         set_bit(idx, dcache->garbage);
229     }
230 
231     local_irq_restore(flags);
232 }
233 
mapcache_domain_init(struct domain * d)234 int mapcache_domain_init(struct domain *d)
235 {
236     struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
237     unsigned int bitmap_pages;
238 
239     if ( !is_pv_domain(d) || is_idle_domain(d) )
240         return 0;
241 
242 #ifdef NDEBUG
243     if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
244         return 0;
245 #endif
246 
247     BUILD_BUG_ON(MAPCACHE_VIRT_END + PAGE_SIZE * (3 +
248                  2 * PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long))) >
249                  MAPCACHE_VIRT_START + (PERDOMAIN_SLOT_MBYTES << 20));
250     bitmap_pages = PFN_UP(BITS_TO_LONGS(MAPCACHE_ENTRIES) * sizeof(long));
251     dcache->inuse = (void *)MAPCACHE_VIRT_END + PAGE_SIZE;
252     dcache->garbage = dcache->inuse +
253                       (bitmap_pages + 1) * PAGE_SIZE / sizeof(long);
254 
255     spin_lock_init(&dcache->lock);
256 
257     return create_perdomain_mapping(d, (unsigned long)dcache->inuse,
258                                     2 * bitmap_pages + 1,
259                                     NIL(l1_pgentry_t *), NULL);
260 }
261 
mapcache_vcpu_init(struct vcpu * v)262 int mapcache_vcpu_init(struct vcpu *v)
263 {
264     struct domain *d = v->domain;
265     struct mapcache_domain *dcache = &d->arch.pv_domain.mapcache;
266     unsigned long i;
267     unsigned int ents = d->max_vcpus * MAPCACHE_VCPU_ENTRIES;
268     unsigned int nr = PFN_UP(BITS_TO_LONGS(ents) * sizeof(long));
269 
270     if ( !is_pv_vcpu(v) || !dcache->inuse )
271         return 0;
272 
273     if ( ents > dcache->entries )
274     {
275         /* Populate page tables. */
276         int rc = create_perdomain_mapping(d, MAPCACHE_VIRT_START, ents,
277                                           NIL(l1_pgentry_t *), NULL);
278 
279         /* Populate bit maps. */
280         if ( !rc )
281             rc = create_perdomain_mapping(d, (unsigned long)dcache->inuse,
282                                           nr, NULL, NIL(struct page_info *));
283         if ( !rc )
284             rc = create_perdomain_mapping(d, (unsigned long)dcache->garbage,
285                                           nr, NULL, NIL(struct page_info *));
286 
287         if ( rc )
288             return rc;
289 
290         dcache->entries = ents;
291     }
292 
293     /* Mark all maphash entries as not in use. */
294     BUILD_BUG_ON(MAPHASHENT_NOTINUSE < MAPCACHE_ENTRIES);
295     for ( i = 0; i < MAPHASH_ENTRIES; i++ )
296     {
297         struct vcpu_maphash_entry *hashent = &v->arch.pv_vcpu.mapcache.hash[i];
298 
299         hashent->mfn = ~0UL; /* never valid to map */
300         hashent->idx = MAPHASHENT_NOTINUSE;
301     }
302 
303     return 0;
304 }
305 
map_domain_page_global(mfn_t mfn)306 void *map_domain_page_global(mfn_t mfn)
307 {
308     ASSERT(!in_irq() &&
309            ((system_state >= SYS_STATE_boot &&
310              system_state < SYS_STATE_active) ||
311             local_irq_is_enabled()));
312 
313 #ifdef NDEBUG
314     if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) )
315         return mfn_to_virt(mfn_x(mfn));
316 #endif
317 
318     return vmap(&mfn, 1);
319 }
320 
unmap_domain_page_global(const void * ptr)321 void unmap_domain_page_global(const void *ptr)
322 {
323     unsigned long va = (unsigned long)ptr;
324 
325     if ( va >= DIRECTMAP_VIRT_START )
326         return;
327 
328     ASSERT(va >= VMAP_VIRT_START && va < VMAP_VIRT_END);
329 
330     vunmap(ptr);
331 }
332 
333 /* Translate a map-domain-page'd address to the underlying MFN */
domain_page_map_to_mfn(const void * ptr)334 unsigned long domain_page_map_to_mfn(const void *ptr)
335 {
336     unsigned long va = (unsigned long)ptr;
337     const l1_pgentry_t *pl1e;
338 
339     if ( va >= DIRECTMAP_VIRT_START )
340         return virt_to_mfn(ptr);
341 
342     if ( va >= VMAP_VIRT_START && va < VMAP_VIRT_END )
343     {
344         pl1e = virt_to_xen_l1e(va);
345         BUG_ON(!pl1e);
346     }
347     else
348     {
349         ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END);
350         pl1e = &__linear_l1_table[l1_linear_offset(va)];
351     }
352 
353     return l1e_get_pfn(*pl1e);
354 }
355