1 /******************************************************************************
2  * hvm/emulate.c
3  *
4  * HVM instruction emulation. Used for MMIO and VMX real mode.
5  *
6  * Copyright (c) 2008, Citrix Systems, Inc.
7  *
8  * Authors:
9  *    Keir Fraser <keir@xen.org>
10  */
11 
12 #include <xen/init.h>
13 #include <xen/lib.h>
14 #include <xen/sched.h>
15 #include <xen/paging.h>
16 #include <xen/trace.h>
17 #include <xen/vm_event.h>
18 #include <asm/event.h>
19 #include <asm/i387.h>
20 #include <asm/xstate.h>
21 #include <asm/hvm/emulate.h>
22 #include <asm/hvm/hvm.h>
23 #include <asm/hvm/ioreq.h>
24 #include <asm/hvm/monitor.h>
25 #include <asm/hvm/trace.h>
26 #include <asm/hvm/support.h>
27 #include <asm/hvm/svm/svm.h>
28 #include <asm/vm_event.h>
29 
hvmtrace_io_assist(const ioreq_t * p)30 static void hvmtrace_io_assist(const ioreq_t *p)
31 {
32     unsigned int size, event;
33     unsigned char buffer[12];
34 
35     if ( likely(!tb_init_done) )
36         return;
37 
38     if ( p->type == IOREQ_TYPE_COPY )
39         event = p->dir ? TRC_HVM_IOMEM_READ : TRC_HVM_IOMEM_WRITE;
40     else
41         event = p->dir ? TRC_HVM_IOPORT_READ : TRC_HVM_IOPORT_WRITE;
42 
43     *(uint64_t *)buffer = p->addr;
44     size = (p->addr != (u32)p->addr) ? 8 : 4;
45     if ( size == 8 )
46         event |= TRC_64_FLAG;
47 
48     if ( !p->data_is_ptr )
49     {
50         *(uint32_t *)&buffer[size] = p->data;
51         size += 4;
52     }
53 
54     trace_var(event, 0/*!cycles*/, size, buffer);
55 }
56 
null_read(const struct hvm_io_handler * io_handler,uint64_t addr,uint32_t size,uint64_t * data)57 static int null_read(const struct hvm_io_handler *io_handler,
58                      uint64_t addr,
59                      uint32_t size,
60                      uint64_t *data)
61 {
62     *data = ~0ul;
63     return X86EMUL_OKAY;
64 }
65 
null_write(const struct hvm_io_handler * handler,uint64_t addr,uint32_t size,uint64_t data)66 static int null_write(const struct hvm_io_handler *handler,
67                       uint64_t addr,
68                       uint32_t size,
69                       uint64_t data)
70 {
71     return X86EMUL_OKAY;
72 }
73 
set_context_data(void * buffer,unsigned int size)74 static int set_context_data(void *buffer, unsigned int size)
75 {
76     struct vcpu *curr = current;
77 
78     if ( curr->arch.vm_event )
79     {
80         unsigned int safe_size =
81             min(size, curr->arch.vm_event->emul.read.size);
82 
83         memcpy(buffer, curr->arch.vm_event->emul.read.data, safe_size);
84         memset(buffer + safe_size, 0, size - safe_size);
85         return X86EMUL_OKAY;
86     }
87 
88     return X86EMUL_UNHANDLEABLE;
89 }
90 
91 static const struct hvm_io_ops null_ops = {
92     .read = null_read,
93     .write = null_write
94 };
95 
96 static const struct hvm_io_handler null_handler = {
97     .ops = &null_ops
98 };
99 
ioreq_server_read(const struct hvm_io_handler * io_handler,uint64_t addr,uint32_t size,uint64_t * data)100 static int ioreq_server_read(const struct hvm_io_handler *io_handler,
101                     uint64_t addr,
102                     uint32_t size,
103                     uint64_t *data)
104 {
105     if ( hvm_copy_from_guest_phys(data, addr, size) != HVMTRANS_okay )
106         return X86EMUL_UNHANDLEABLE;
107 
108     return X86EMUL_OKAY;
109 }
110 
111 static const struct hvm_io_ops ioreq_server_ops = {
112     .read = ioreq_server_read,
113     .write = null_write
114 };
115 
116 static const struct hvm_io_handler ioreq_server_handler = {
117     .ops = &ioreq_server_ops
118 };
119 
hvmemul_do_io(bool_t is_mmio,paddr_t addr,unsigned long * reps,unsigned int size,uint8_t dir,bool_t df,bool_t data_is_addr,uintptr_t data)120 static int hvmemul_do_io(
121     bool_t is_mmio, paddr_t addr, unsigned long *reps, unsigned int size,
122     uint8_t dir, bool_t df, bool_t data_is_addr, uintptr_t data)
123 {
124     struct vcpu *curr = current;
125     struct domain *currd = curr->domain;
126     struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
127     ioreq_t p = {
128         .type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO,
129         .addr = addr,
130         .size = size,
131         .count = *reps,
132         .dir = dir,
133         .df = df,
134         .data = data_is_addr ? data : 0,
135         .data_is_ptr = data_is_addr, /* ioreq_t field name is misleading */
136         .state = STATE_IOREQ_READY,
137     };
138     void *p_data = (void *)data;
139     int rc;
140 
141     /*
142      * Weird-sized accesses have undefined behaviour: we discard writes
143      * and read all-ones.
144      */
145     if ( unlikely((size > sizeof(long)) || (size & (size - 1))) )
146     {
147         gdprintk(XENLOG_WARNING, "bad mmio size %d\n", size);
148         return X86EMUL_UNHANDLEABLE;
149     }
150 
151     switch ( vio->io_req.state )
152     {
153     case STATE_IOREQ_NONE:
154         break;
155     case STATE_IORESP_READY:
156         vio->io_req.state = STATE_IOREQ_NONE;
157         p = vio->io_req;
158 
159         /* Verify the emulation request has been correctly re-issued */
160         if ( (p.type != (is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO)) ||
161              (p.addr != addr) ||
162              (p.size != size) ||
163              (p.count > *reps) ||
164              (p.dir != dir) ||
165              (p.df != df) ||
166              (p.data_is_ptr != data_is_addr) )
167             domain_crash(currd);
168 
169         if ( data_is_addr )
170             return X86EMUL_UNHANDLEABLE;
171 
172         *reps = p.count;
173         goto finish_access;
174     default:
175         return X86EMUL_UNHANDLEABLE;
176     }
177 
178     if ( dir == IOREQ_WRITE )
179     {
180         if ( !data_is_addr )
181             memcpy(&p.data, p_data, size);
182 
183         hvmtrace_io_assist(&p);
184     }
185 
186     vio->io_req = p;
187 
188     rc = hvm_io_intercept(&p);
189 
190     /*
191      * p.count may have got reduced (see hvm_process_io_intercept()) - inform
192      * our callers and mirror this into latched state.
193      */
194     ASSERT(p.count <= *reps);
195     *reps = vio->io_req.count = p.count;
196 
197     switch ( rc )
198     {
199     case X86EMUL_OKAY:
200         vio->io_req.state = STATE_IOREQ_NONE;
201         break;
202     case X86EMUL_UNHANDLEABLE:
203     {
204         /*
205          * Xen isn't emulating the instruction internally, so see if there's
206          * an ioreq server that can handle it.
207          *
208          * Rules:
209          * A> PIO or MMIO accesses run through hvm_select_ioreq_server() to
210          * choose the ioreq server by range. If no server is found, the access
211          * is ignored.
212          *
213          * B> p2m_ioreq_server accesses are handled by the designated
214          * ioreq server for the domain, but there are some corner cases:
215          *
216          *   - If the domain ioreq server is NULL, it's likely we suffer from
217          *   a race with an unmap operation on the ioreq server, so re-try the
218          *   instruction.
219          *
220          *   - If the accesss is a read, this could be part of a
221          *   read-modify-write instruction, emulate the read first.
222          *
223          * Note: Even when an ioreq server is found, its value could become
224          * stale later, because it is possible that
225          *
226          *   - the PIO or MMIO address is removed from the rangeset of the
227          *   ioreq server, before the event is delivered to the device model.
228          *
229          *   - the p2m_ioreq_server type is unmapped from the ioreq server,
230          *   before the event is delivered to the device model.
231          *
232          * However, there's no cheap approach to avoid above situations in xen,
233          * so the device model side needs to check the incoming ioreq event.
234          */
235         struct hvm_ioreq_server *s = NULL;
236         p2m_type_t p2mt = p2m_invalid;
237 
238         if ( is_mmio )
239         {
240             unsigned long gmfn = paddr_to_pfn(addr);
241 
242             get_gfn_query_unlocked(currd, gmfn, &p2mt);
243 
244             if ( p2mt == p2m_ioreq_server )
245             {
246                 unsigned int flags;
247 
248                 s = p2m_get_ioreq_server(currd, &flags);
249 
250                 if ( s == NULL )
251                 {
252                     rc = X86EMUL_RETRY;
253                     vio->io_req.state = STATE_IOREQ_NONE;
254                     break;
255                 }
256 
257                 /*
258                  * This is part of a read-modify-write instruction.
259                  * Emulate the read part so we have the value available.
260                  */
261                 if ( dir == IOREQ_READ )
262                 {
263                     rc = hvm_process_io_intercept(&ioreq_server_handler, &p);
264                     vio->io_req.state = STATE_IOREQ_NONE;
265                     break;
266                 }
267             }
268         }
269 
270         if ( !s )
271             s = hvm_select_ioreq_server(currd, &p);
272 
273         /* If there is no suitable backing DM, just ignore accesses */
274         if ( !s )
275         {
276             rc = hvm_process_io_intercept(&null_handler, &p);
277             vio->io_req.state = STATE_IOREQ_NONE;
278         }
279         else
280         {
281             rc = hvm_send_ioreq(s, &p, 0);
282             if ( rc != X86EMUL_RETRY || currd->is_shutting_down )
283                 vio->io_req.state = STATE_IOREQ_NONE;
284             else if ( data_is_addr )
285                 rc = X86EMUL_OKAY;
286         }
287         break;
288     }
289     case X86EMUL_UNIMPLEMENTED:
290         ASSERT_UNREACHABLE();
291         /* Fall-through */
292     default:
293         BUG();
294     }
295 
296     ASSERT(rc != X86EMUL_UNIMPLEMENTED);
297 
298     if ( rc != X86EMUL_OKAY )
299         return rc;
300 
301  finish_access:
302     if ( dir == IOREQ_READ )
303     {
304         hvmtrace_io_assist(&p);
305 
306         if ( !data_is_addr )
307             memcpy(p_data, &p.data, size);
308     }
309 
310     return X86EMUL_OKAY;
311 }
312 
hvmemul_do_io_buffer(bool_t is_mmio,paddr_t addr,unsigned long * reps,unsigned int size,uint8_t dir,bool_t df,void * buffer)313 static int hvmemul_do_io_buffer(
314     bool_t is_mmio, paddr_t addr, unsigned long *reps, unsigned int size,
315     uint8_t dir, bool_t df, void *buffer)
316 {
317     int rc;
318 
319     BUG_ON(buffer == NULL);
320 
321     rc = hvmemul_do_io(is_mmio, addr, reps, size, dir, df, 0,
322                        (uintptr_t)buffer);
323 
324     ASSERT(rc != X86EMUL_UNIMPLEMENTED);
325 
326     if ( rc == X86EMUL_UNHANDLEABLE && dir == IOREQ_READ )
327         memset(buffer, 0xff, size);
328 
329     return rc;
330 }
331 
hvmemul_acquire_page(unsigned long gmfn,struct page_info ** page)332 static int hvmemul_acquire_page(unsigned long gmfn, struct page_info **page)
333 {
334     struct domain *curr_d = current->domain;
335     p2m_type_t p2mt;
336 
337     *page = get_page_from_gfn(curr_d, gmfn, &p2mt, P2M_UNSHARE);
338 
339     if ( *page == NULL )
340         return X86EMUL_UNHANDLEABLE;
341 
342     if ( p2m_is_paging(p2mt) )
343     {
344         put_page(*page);
345         p2m_mem_paging_populate(curr_d, gmfn);
346         return X86EMUL_RETRY;
347     }
348 
349     if ( p2m_is_shared(p2mt) )
350     {
351         put_page(*page);
352         return X86EMUL_RETRY;
353     }
354 
355     /* This code should not be reached if the gmfn is not RAM */
356     if ( p2m_is_mmio(p2mt) )
357     {
358         domain_crash(curr_d);
359 
360         put_page(*page);
361         return X86EMUL_UNHANDLEABLE;
362     }
363 
364     return X86EMUL_OKAY;
365 }
366 
hvmemul_release_page(struct page_info * page)367 static inline void hvmemul_release_page(struct page_info *page)
368 {
369     put_page(page);
370 }
371 
hvmemul_do_io_addr(bool_t is_mmio,paddr_t addr,unsigned long * reps,unsigned int size,uint8_t dir,bool_t df,paddr_t ram_gpa)372 static int hvmemul_do_io_addr(
373     bool_t is_mmio, paddr_t addr, unsigned long *reps,
374     unsigned int size, uint8_t dir, bool_t df, paddr_t ram_gpa)
375 {
376     struct vcpu *v = current;
377     unsigned long ram_gmfn = paddr_to_pfn(ram_gpa);
378     unsigned int page_off = ram_gpa & (PAGE_SIZE - 1);
379     struct page_info *ram_page[2];
380     unsigned int nr_pages = 0;
381     unsigned long count;
382     int rc;
383 
384     rc = hvmemul_acquire_page(ram_gmfn, &ram_page[nr_pages]);
385     if ( rc != X86EMUL_OKAY )
386         goto out;
387 
388     nr_pages++;
389 
390     /* Detemine how many reps will fit within this page */
391     count = min_t(unsigned long,
392                   *reps,
393                   df ?
394                   ((page_off + size - 1) & ~PAGE_MASK) / size :
395                   (PAGE_SIZE - page_off) / size);
396 
397     if ( count == 0 )
398     {
399         /*
400          * This access must span two pages, so grab a reference to
401          * the next page and do a single rep.
402          * It is safe to assume multiple pages are physically
403          * contiguous at this point as hvmemul_linear_to_phys() will
404          * ensure this is the case.
405          */
406         rc = hvmemul_acquire_page(df ? ram_gmfn - 1 : ram_gmfn + 1,
407                                   &ram_page[nr_pages]);
408         if ( rc != X86EMUL_OKAY )
409             goto out;
410 
411         nr_pages++;
412         count = 1;
413     }
414 
415     rc = hvmemul_do_io(is_mmio, addr, &count, size, dir, df, 1,
416                        ram_gpa);
417 
418     ASSERT(rc != X86EMUL_UNIMPLEMENTED);
419 
420     if ( rc == X86EMUL_OKAY )
421         v->arch.hvm_vcpu.hvm_io.mmio_retry = (count < *reps);
422 
423     *reps = count;
424 
425  out:
426     while ( nr_pages )
427         hvmemul_release_page(ram_page[--nr_pages]);
428 
429     return rc;
430 }
431 
432 /*
433  * Perform I/O between <port> and <buffer>. <dir> indicates the
434  * direction: IOREQ_READ means a read from <port> to <buffer> and
435  * IOREQ_WRITE means a write from <buffer> to <port>. Each access has
436  * width <size>.
437  */
hvmemul_do_pio_buffer(uint16_t port,unsigned int size,uint8_t dir,void * buffer)438 int hvmemul_do_pio_buffer(uint16_t port,
439                           unsigned int size,
440                           uint8_t dir,
441                           void *buffer)
442 {
443     unsigned long one_rep = 1;
444 
445     return hvmemul_do_io_buffer(0, port, &one_rep, size, dir, 0, buffer);
446 }
447 
448 /*
449  * Perform I/O between <port> and guest RAM starting at <ram_addr>.
450  * <dir> indicates the direction: IOREQ_READ means a read from <port> to
451  * RAM and IOREQ_WRITE means a write from RAM to <port>. Each access has
452  * width <size> and up to *<reps> accesses will be performed. If
453  * X86EMUL_OKAY is returned then <reps> will be updated with the number
454  * of accesses actually performed.
455  * Each access will be done to/from successive RAM addresses, increasing
456  * if <df> is 0 or decreasing if <df> is 1.
457  */
hvmemul_do_pio_addr(uint16_t port,unsigned long * reps,unsigned int size,uint8_t dir,bool_t df,paddr_t ram_addr)458 static int hvmemul_do_pio_addr(uint16_t port,
459                                unsigned long *reps,
460                                unsigned int size,
461                                uint8_t dir,
462                                bool_t df,
463                                paddr_t ram_addr)
464 {
465     return hvmemul_do_io_addr(0, port, reps, size, dir, df, ram_addr);
466 }
467 
468 /*
469  * Perform I/O between MMIO space starting at <mmio_gpa> and <buffer>.
470  * <dir> indicates the direction: IOREQ_READ means a read from MMIO to
471  * <buffer> and IOREQ_WRITE means a write from <buffer> to MMIO. Each
472  * access has width <size> and up to *<reps> accesses will be performed.
473  * If X86EMUL_OKAY is returned then <reps> will be updated with the number
474  * of accesses actually performed.
475  * Each access will be done to/from successive MMIO addresses, increasing
476  * if <df> is 0 or decreasing if <df> is 1.
477  *
478  * NOTE: If *<reps> is greater than 1, each access will use the
479  *       <buffer> pointer; there is no implicit interation over a
480  *       block of memory starting at <buffer>.
481  */
hvmemul_do_mmio_buffer(paddr_t mmio_gpa,unsigned long * reps,unsigned int size,uint8_t dir,bool_t df,void * buffer)482 static int hvmemul_do_mmio_buffer(paddr_t mmio_gpa,
483                                   unsigned long *reps,
484                                   unsigned int size,
485                                   uint8_t dir,
486                                   bool_t df,
487                                   void *buffer)
488 {
489     return hvmemul_do_io_buffer(1, mmio_gpa, reps, size, dir, df, buffer);
490 }
491 
492 /*
493  * Perform I/O between MMIO space starting at <mmio_gpa> and guest RAM
494  * starting at <ram_gpa>. <dir> indicates the direction: IOREQ_READ
495  * means a read from MMIO to RAM and IOREQ_WRITE means a write from RAM
496  * to MMIO. Each access has width <size> and up to *<reps> accesses will
497  * be performed. If X86EMUL_OKAY is returned then <reps> will be updated
498  * with the number of accesses actually performed.
499  * Each access will be done to/from successive RAM *and* MMIO addresses,
500  * increasing if <df> is 0 or decreasing if <df> is 1.
501  */
hvmemul_do_mmio_addr(paddr_t mmio_gpa,unsigned long * reps,unsigned int size,uint8_t dir,bool_t df,paddr_t ram_gpa)502 static int hvmemul_do_mmio_addr(paddr_t mmio_gpa,
503                                 unsigned long *reps,
504                                 unsigned int size,
505                                 uint8_t dir,
506                                 bool_t df,
507                                 paddr_t ram_gpa)
508 {
509     return hvmemul_do_io_addr(1, mmio_gpa, reps, size, dir, df, ram_gpa);
510 }
511 
512 /*
513  * Map the frame(s) covering an individual linear access, for writeable
514  * access.  May return NULL for MMIO, or ERR_PTR(~X86EMUL_*) for other errors
515  * including ERR_PTR(~X86EMUL_OKAY) for write-discard mappings.
516  *
517  * In debug builds, map() checks that each slot in hvmemul_ctxt->mfn[] is
518  * clean before use, and poisions unused slots with INVALID_MFN.
519  */
hvmemul_map_linear_addr(unsigned long linear,unsigned int bytes,uint32_t pfec,struct hvm_emulate_ctxt * hvmemul_ctxt)520 static void *hvmemul_map_linear_addr(
521     unsigned long linear, unsigned int bytes, uint32_t pfec,
522     struct hvm_emulate_ctxt *hvmemul_ctxt)
523 {
524     struct vcpu *curr = current;
525     void *err, *mapping;
526     unsigned int nr_frames = ((linear + bytes - !!bytes) >> PAGE_SHIFT) -
527         (linear >> PAGE_SHIFT) + 1;
528     unsigned int i;
529 
530     /*
531      * mfn points to the next free slot.  All used slots have a page reference
532      * held on them.
533      */
534     mfn_t *mfn = &hvmemul_ctxt->mfn[0];
535 
536     /*
537      * The caller has no legitimate reason for trying a zero-byte write, but
538      * all other code here is written to work if the check below was dropped.
539      *
540      * The maximum write size depends on the number of adjacent mfns[] which
541      * can be vmap()'d, accouting for possible misalignment within the region.
542      * The higher level emulation callers are responsible for ensuring that
543      * mfns[] is large enough for the requested write size.
544      */
545     if ( bytes == 0 ||
546          nr_frames > ARRAY_SIZE(hvmemul_ctxt->mfn) )
547     {
548         ASSERT_UNREACHABLE();
549         goto unhandleable;
550     }
551 
552     for ( i = 0; i < nr_frames; i++ )
553     {
554         enum hvm_translation_result res;
555         struct page_info *page;
556         pagefault_info_t pfinfo;
557         p2m_type_t p2mt;
558         unsigned long addr = i ? (linear + (i << PAGE_SHIFT)) & PAGE_MASK : linear;
559 
560         if ( hvmemul_ctxt->ctxt.addr_size < 64 )
561             addr = (uint32_t)addr;
562 
563         /* Error checking.  Confirm that the current slot is clean. */
564         ASSERT(mfn_x(*mfn) == 0);
565 
566         res = hvm_translate_get_page(curr, addr, true, pfec,
567                                      &pfinfo, &page, NULL, &p2mt);
568 
569         switch ( res )
570         {
571         case HVMTRANS_okay:
572             break;
573 
574         case HVMTRANS_bad_linear_to_gfn:
575             ASSERT(pfinfo.linear == addr);
576             x86_emul_pagefault(pfinfo.ec, pfinfo.linear, &hvmemul_ctxt->ctxt);
577             err = ERR_PTR(~X86EMUL_EXCEPTION);
578             goto out;
579 
580         case HVMTRANS_bad_gfn_to_mfn:
581             err = NULL;
582             goto out;
583 
584         case HVMTRANS_gfn_paged_out:
585         case HVMTRANS_gfn_shared:
586             err = ERR_PTR(~X86EMUL_RETRY);
587             goto out;
588 
589         default:
590             goto unhandleable;
591         }
592 
593         *mfn++ = _mfn(page_to_mfn(page));
594 
595         if ( p2m_is_discard_write(p2mt) )
596         {
597             err = ERR_PTR(~X86EMUL_OKAY);
598             goto out;
599         }
600     }
601 
602     /* Entire access within a single frame? */
603     if ( nr_frames == 1 )
604         mapping = map_domain_page(hvmemul_ctxt->mfn[0]);
605     /* Multiple frames? Need to vmap(). */
606     else if ( (mapping = vmap(hvmemul_ctxt->mfn,
607                               nr_frames)) == NULL )
608         goto unhandleable;
609 
610 #ifndef NDEBUG /* Poision unused mfn[]s with INVALID_MFN. */
611     while ( mfn < hvmemul_ctxt->mfn + ARRAY_SIZE(hvmemul_ctxt->mfn) )
612     {
613         ASSERT(mfn_x(*mfn) == 0);
614         *mfn++ = INVALID_MFN;
615     }
616 #endif
617     return mapping + (linear & ~PAGE_MASK);
618 
619  unhandleable:
620     err = ERR_PTR(~X86EMUL_UNHANDLEABLE);
621 
622  out:
623     /* Drop all held references. */
624     while ( mfn-- > hvmemul_ctxt->mfn )
625         put_page(mfn_to_page(mfn_x(*mfn)));
626 
627     return err;
628 }
629 
hvmemul_unmap_linear_addr(void * mapping,unsigned long linear,unsigned int bytes,struct hvm_emulate_ctxt * hvmemul_ctxt)630 static void hvmemul_unmap_linear_addr(
631     void *mapping, unsigned long linear, unsigned int bytes,
632     struct hvm_emulate_ctxt *hvmemul_ctxt)
633 {
634     struct domain *currd = current->domain;
635     unsigned int nr_frames = ((linear + bytes - !!bytes) >> PAGE_SHIFT) -
636         (linear >> PAGE_SHIFT) + 1;
637     unsigned int i;
638     mfn_t *mfn = &hvmemul_ctxt->mfn[0];
639 
640     ASSERT(bytes > 0);
641 
642     if ( nr_frames == 1 )
643         unmap_domain_page(mapping);
644     else
645         vunmap(mapping);
646 
647     for ( i = 0; i < nr_frames; i++ )
648     {
649         ASSERT(mfn_valid(*mfn));
650         paging_mark_dirty(currd, *mfn);
651         put_page(mfn_to_page(mfn_x(*mfn)));
652 
653         *mfn++ = _mfn(0); /* Clean slot for map()'s error checking. */
654     }
655 
656 #ifndef NDEBUG /* Check (and clean) all unused mfns. */
657     while ( mfn < hvmemul_ctxt->mfn + ARRAY_SIZE(hvmemul_ctxt->mfn) )
658     {
659         ASSERT(mfn_eq(*mfn, INVALID_MFN));
660         *mfn++ = _mfn(0);
661     }
662 #endif
663 }
664 
665 /*
666  * Convert addr from linear to physical form, valid over the range
667  * [addr, addr + *reps * bytes_per_rep]. *reps is adjusted according to
668  * the valid computed range. It is always >0 when X86EMUL_OKAY is returned.
669  * @pfec indicates the access checks to be performed during page-table walks.
670  */
hvmemul_linear_to_phys(unsigned long addr,paddr_t * paddr,unsigned int bytes_per_rep,unsigned long * reps,uint32_t pfec,struct hvm_emulate_ctxt * hvmemul_ctxt)671 static int hvmemul_linear_to_phys(
672     unsigned long addr,
673     paddr_t *paddr,
674     unsigned int bytes_per_rep,
675     unsigned long *reps,
676     uint32_t pfec,
677     struct hvm_emulate_ctxt *hvmemul_ctxt)
678 {
679     struct vcpu *curr = current;
680     unsigned long pfn, npfn, done, todo, i, offset = addr & ~PAGE_MASK;
681     int reverse;
682 
683     /*
684      * Clip repetitions to a sensible maximum. This avoids extensive looping in
685      * this function while still amortising the cost of I/O trap-and-emulate.
686      */
687     *reps = min_t(unsigned long, *reps, 4096);
688 
689     /* With no paging it's easy: linear == physical. */
690     if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) )
691     {
692         *paddr = addr;
693         return X86EMUL_OKAY;
694     }
695 
696     /* Reverse mode if this is a backwards multi-iteration string operation. */
697     reverse = (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1);
698 
699     if ( reverse && ((PAGE_SIZE - offset) < bytes_per_rep) )
700     {
701         /* Do page-straddling first iteration forwards via recursion. */
702         paddr_t _paddr;
703         unsigned long one_rep = 1;
704         int rc = hvmemul_linear_to_phys(
705             addr, &_paddr, bytes_per_rep, &one_rep, pfec, hvmemul_ctxt);
706         if ( rc != X86EMUL_OKAY )
707             return rc;
708         pfn = _paddr >> PAGE_SHIFT;
709     }
710     else if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == gfn_x(INVALID_GFN) )
711     {
712         if ( pfec & (PFEC_page_paged | PFEC_page_shared) )
713             return X86EMUL_RETRY;
714         *reps = 0;
715         x86_emul_pagefault(pfec, addr, &hvmemul_ctxt->ctxt);
716         return X86EMUL_EXCEPTION;
717     }
718 
719     done = reverse ? bytes_per_rep + offset : PAGE_SIZE - offset;
720     todo = *reps * bytes_per_rep;
721     for ( i = 1; done < todo; i++ )
722     {
723         /* Get the next PFN in the range. */
724         addr += reverse ? -PAGE_SIZE : PAGE_SIZE;
725         npfn = paging_gva_to_gfn(curr, addr, &pfec);
726 
727         /* Is it contiguous with the preceding PFNs? If not then we're done. */
728         if ( (npfn == gfn_x(INVALID_GFN)) ||
729              (npfn != (pfn + (reverse ? -i : i))) )
730         {
731             if ( pfec & (PFEC_page_paged | PFEC_page_shared) )
732                 return X86EMUL_RETRY;
733             done /= bytes_per_rep;
734             if ( done == 0 )
735             {
736                 ASSERT(!reverse);
737                 if ( npfn != gfn_x(INVALID_GFN) )
738                     return X86EMUL_UNHANDLEABLE;
739                 *reps = 0;
740                 x86_emul_pagefault(pfec, addr & PAGE_MASK, &hvmemul_ctxt->ctxt);
741                 return X86EMUL_EXCEPTION;
742             }
743             *reps = done;
744             break;
745         }
746 
747         done += PAGE_SIZE;
748     }
749 
750     *paddr = ((paddr_t)pfn << PAGE_SHIFT) | offset;
751     return X86EMUL_OKAY;
752 }
753 
754 
hvmemul_virtual_to_linear(enum x86_segment seg,unsigned long offset,unsigned int bytes_per_rep,unsigned long * reps,enum hvm_access_type access_type,struct hvm_emulate_ctxt * hvmemul_ctxt,unsigned long * linear)755 static int hvmemul_virtual_to_linear(
756     enum x86_segment seg,
757     unsigned long offset,
758     unsigned int bytes_per_rep,
759     unsigned long *reps,
760     enum hvm_access_type access_type,
761     struct hvm_emulate_ctxt *hvmemul_ctxt,
762     unsigned long *linear)
763 {
764     struct segment_register *reg;
765     int okay;
766     unsigned long max_reps = 4096;
767 
768     if ( seg == x86_seg_none )
769     {
770         *linear = offset;
771         return X86EMUL_OKAY;
772     }
773 
774     /*
775      * If introspection has been enabled for this domain, and we're emulating
776      * becase a vm_reply asked us to (i.e. not doing regular IO) reps should
777      * be at most 1, since optimization might otherwise cause a single
778      * vm_event being triggered for repeated writes to a whole page.
779      */
780     if ( unlikely(current->domain->arch.mem_access_emulate_each_rep) &&
781          current->arch.vm_event->emulate_flags != 0 )
782        max_reps = 1;
783 
784     /*
785      * Clip repetitions to avoid overflow when multiplying by @bytes_per_rep.
786      * The chosen maximum is very conservative but it's what we use in
787      * hvmemul_linear_to_phys() so there is no point in using a larger value.
788      */
789     *reps = min_t(unsigned long, *reps, max_reps);
790 
791     reg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
792     if ( IS_ERR(reg) )
793         return -PTR_ERR(reg);
794 
795     if ( (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1) )
796     {
797         /*
798          * x86_emulate() clips the repetition count to ensure we don't wrap
799          * the effective-address index register. Hence this assertion holds.
800          */
801         ASSERT(offset >= ((*reps - 1) * bytes_per_rep));
802         okay = hvm_virtual_to_linear_addr(
803             seg, reg, offset - (*reps - 1) * bytes_per_rep,
804             *reps * bytes_per_rep, access_type,
805             hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt), linear);
806         *linear += (*reps - 1) * bytes_per_rep;
807         if ( hvmemul_ctxt->ctxt.addr_size != 64 )
808             *linear = (uint32_t)*linear;
809     }
810     else
811     {
812         okay = hvm_virtual_to_linear_addr(
813             seg, reg, offset, *reps * bytes_per_rep, access_type,
814             hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt), linear);
815     }
816 
817     if ( okay )
818         return X86EMUL_OKAY;
819 
820     /* If this is a string operation, emulate each iteration separately. */
821     if ( *reps != 1 )
822         return X86EMUL_UNHANDLEABLE;
823 
824     /*
825      * Leave exception injection to the caller for non-user segments: We
826      * neither know the exact error code to be used, nor can we easily
827      * determine the kind of exception (#GP or #TS) in that case.
828      */
829     *reps = 0;
830     if ( is_x86_user_segment(seg) )
831         x86_emul_hw_exception((seg == x86_seg_ss)
832                               ? TRAP_stack_error
833                               : TRAP_gp_fault, 0, &hvmemul_ctxt->ctxt);
834 
835     return X86EMUL_EXCEPTION;
836 }
837 
hvmemul_phys_mmio_access(struct hvm_mmio_cache * cache,paddr_t gpa,unsigned int size,uint8_t dir,uint8_t * buffer,unsigned int offset)838 static int hvmemul_phys_mmio_access(
839     struct hvm_mmio_cache *cache, paddr_t gpa, unsigned int size, uint8_t dir,
840     uint8_t *buffer, unsigned int offset)
841 {
842     unsigned long one_rep = 1;
843     unsigned int chunk;
844     int rc = X86EMUL_OKAY;
845 
846     /* Accesses must fall within a page. */
847     BUG_ON((gpa & ~PAGE_MASK) + size > PAGE_SIZE);
848 
849     /*
850      * hvmemul_do_io() cannot handle non-power-of-2 accesses or
851      * accesses larger than sizeof(long), so choose the highest power
852      * of 2 not exceeding sizeof(long) as the 'chunk' size.
853      */
854     ASSERT(size != 0);
855     chunk = 1u << (fls(size) - 1);
856     if ( chunk > sizeof (long) )
857         chunk = sizeof (long);
858 
859     for ( ;; )
860     {
861         /* Have we already done this chunk? */
862         if ( offset < cache->size )
863         {
864             ASSERT((offset + chunk) <= cache->size);
865 
866             if ( dir == IOREQ_READ )
867                 memcpy(&buffer[offset], &cache->buffer[offset], chunk);
868             else if ( memcmp(&buffer[offset], &cache->buffer[offset], chunk) != 0 )
869                 domain_crash(current->domain);
870         }
871         else
872         {
873             ASSERT(offset == cache->size);
874 
875             rc = hvmemul_do_mmio_buffer(gpa, &one_rep, chunk, dir, 0,
876                                         &buffer[offset]);
877             if ( rc != X86EMUL_OKAY )
878                 break;
879 
880             /* Note that we have now done this chunk. */
881             memcpy(&cache->buffer[offset], &buffer[offset], chunk);
882             cache->size += chunk;
883         }
884 
885         /* Advance to the next chunk. */
886         gpa += chunk;
887         offset += chunk;
888         size -= chunk;
889 
890         if ( size == 0 )
891             break;
892 
893         /*
894          * If the chunk now exceeds the remaining size, choose the next
895          * lowest power of 2 that will fit.
896          */
897         while ( chunk > size )
898             chunk >>= 1;
899     }
900 
901     return rc;
902 }
903 
904 /*
905  * Multi-cycle MMIO handling is based upon the assumption that emulation
906  * of the same instruction will not access the same MMIO region more
907  * than once. Hence we can deal with re-emulation (for secondary or
908  * subsequent cycles) by looking up the result or previous I/O in a
909  * cache indexed by linear MMIO address.
910  */
hvmemul_find_mmio_cache(struct hvm_vcpu_io * vio,unsigned long gla,uint8_t dir)911 static struct hvm_mmio_cache *hvmemul_find_mmio_cache(
912     struct hvm_vcpu_io *vio, unsigned long gla, uint8_t dir)
913 {
914     unsigned int i;
915     struct hvm_mmio_cache *cache;
916 
917     for ( i = 0; i < vio->mmio_cache_count; i ++ )
918     {
919         cache = &vio->mmio_cache[i];
920 
921         if ( gla == cache->gla &&
922              dir == cache->dir )
923             return cache;
924     }
925 
926     i = vio->mmio_cache_count++;
927     if( i == ARRAY_SIZE(vio->mmio_cache) )
928     {
929         domain_crash(current->domain);
930         return NULL;
931     }
932 
933     cache = &vio->mmio_cache[i];
934     memset(cache, 0, sizeof (*cache));
935 
936     cache->gla = gla;
937     cache->dir = dir;
938 
939     return cache;
940 }
941 
latch_linear_to_phys(struct hvm_vcpu_io * vio,unsigned long gla,unsigned long gpa,bool_t write)942 static void latch_linear_to_phys(struct hvm_vcpu_io *vio, unsigned long gla,
943                                  unsigned long gpa, bool_t write)
944 {
945     if ( vio->mmio_access.gla_valid )
946         return;
947 
948     vio->mmio_gla = gla & PAGE_MASK;
949     vio->mmio_gpfn = PFN_DOWN(gpa);
950     vio->mmio_access = (struct npfec){ .gla_valid = 1,
951                                        .read_access = 1,
952                                        .write_access = write };
953 }
954 
hvmemul_linear_mmio_access(unsigned long gla,unsigned int size,uint8_t dir,void * buffer,uint32_t pfec,struct hvm_emulate_ctxt * hvmemul_ctxt,bool_t known_gpfn)955 static int hvmemul_linear_mmio_access(
956     unsigned long gla, unsigned int size, uint8_t dir, void *buffer,
957     uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt, bool_t known_gpfn)
958 {
959     struct hvm_vcpu_io *vio = &current->arch.hvm_vcpu.hvm_io;
960     unsigned long offset = gla & ~PAGE_MASK;
961     struct hvm_mmio_cache *cache = hvmemul_find_mmio_cache(vio, gla, dir);
962     unsigned int chunk, buffer_offset = 0;
963     paddr_t gpa;
964     unsigned long one_rep = 1;
965     int rc;
966 
967     if ( cache == NULL )
968         return X86EMUL_UNHANDLEABLE;
969 
970     chunk = min_t(unsigned int, size, PAGE_SIZE - offset);
971 
972     if ( known_gpfn )
973         gpa = pfn_to_paddr(vio->mmio_gpfn) | offset;
974     else
975     {
976         rc = hvmemul_linear_to_phys(gla, &gpa, chunk, &one_rep, pfec,
977                                     hvmemul_ctxt);
978         if ( rc != X86EMUL_OKAY )
979             return rc;
980 
981         latch_linear_to_phys(vio, gla, gpa, dir == IOREQ_WRITE);
982     }
983 
984     for ( ;; )
985     {
986         rc = hvmemul_phys_mmio_access(cache, gpa, chunk, dir, buffer, buffer_offset);
987         if ( rc != X86EMUL_OKAY )
988             break;
989 
990         gla += chunk;
991         buffer_offset += chunk;
992         size -= chunk;
993 
994         if ( size == 0 )
995             break;
996 
997         chunk = min_t(unsigned int, size, PAGE_SIZE);
998         rc = hvmemul_linear_to_phys(gla, &gpa, chunk, &one_rep, pfec,
999                                     hvmemul_ctxt);
1000         if ( rc != X86EMUL_OKAY )
1001             return rc;
1002     }
1003 
1004     return rc;
1005 }
1006 
hvmemul_linear_mmio_read(unsigned long gla,unsigned int size,void * buffer,uint32_t pfec,struct hvm_emulate_ctxt * hvmemul_ctxt,bool_t translate)1007 static inline int hvmemul_linear_mmio_read(
1008     unsigned long gla, unsigned int size, void *buffer,
1009     uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt,
1010     bool_t translate)
1011 {
1012     return hvmemul_linear_mmio_access(gla, size, IOREQ_READ, buffer,
1013                                       pfec, hvmemul_ctxt, translate);
1014 }
1015 
hvmemul_linear_mmio_write(unsigned long gla,unsigned int size,void * buffer,uint32_t pfec,struct hvm_emulate_ctxt * hvmemul_ctxt,bool_t translate)1016 static inline int hvmemul_linear_mmio_write(
1017     unsigned long gla, unsigned int size, void *buffer,
1018     uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt,
1019     bool_t translate)
1020 {
1021     return hvmemul_linear_mmio_access(gla, size, IOREQ_WRITE, buffer,
1022                                       pfec, hvmemul_ctxt, translate);
1023 }
1024 
__hvmemul_read(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,enum hvm_access_type access_type,struct hvm_emulate_ctxt * hvmemul_ctxt)1025 static int __hvmemul_read(
1026     enum x86_segment seg,
1027     unsigned long offset,
1028     void *p_data,
1029     unsigned int bytes,
1030     enum hvm_access_type access_type,
1031     struct hvm_emulate_ctxt *hvmemul_ctxt)
1032 {
1033     struct vcpu *curr = current;
1034     pagefault_info_t pfinfo;
1035     unsigned long addr, reps = 1;
1036     uint32_t pfec = PFEC_page_present;
1037     struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
1038     int rc;
1039 
1040     if ( is_x86_system_segment(seg) )
1041         pfec |= PFEC_implicit;
1042     else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1043         pfec |= PFEC_user_mode;
1044 
1045     rc = hvmemul_virtual_to_linear(
1046         seg, offset, bytes, &reps, access_type, hvmemul_ctxt, &addr);
1047     if ( rc != X86EMUL_OKAY || !bytes )
1048         return rc;
1049     if ( ((access_type != hvm_access_insn_fetch
1050            ? vio->mmio_access.read_access
1051            : vio->mmio_access.insn_fetch)) &&
1052          (vio->mmio_gla == (addr & PAGE_MASK)) )
1053         return hvmemul_linear_mmio_read(addr, bytes, p_data, pfec, hvmemul_ctxt, 1);
1054 
1055     rc = ((access_type == hvm_access_insn_fetch) ?
1056           hvm_fetch_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo) :
1057           hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo));
1058 
1059     switch ( rc )
1060     {
1061     case HVMTRANS_okay:
1062         break;
1063     case HVMTRANS_bad_linear_to_gfn:
1064         x86_emul_pagefault(pfinfo.ec, pfinfo.linear, &hvmemul_ctxt->ctxt);
1065         return X86EMUL_EXCEPTION;
1066     case HVMTRANS_bad_gfn_to_mfn:
1067         if ( access_type == hvm_access_insn_fetch )
1068             return X86EMUL_UNHANDLEABLE;
1069 
1070         return hvmemul_linear_mmio_read(addr, bytes, p_data, pfec, hvmemul_ctxt, 0);
1071     case HVMTRANS_gfn_paged_out:
1072     case HVMTRANS_gfn_shared:
1073         return X86EMUL_RETRY;
1074     default:
1075         return X86EMUL_UNHANDLEABLE;
1076     }
1077 
1078     return X86EMUL_OKAY;
1079 }
1080 
hvmemul_read(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)1081 static int hvmemul_read(
1082     enum x86_segment seg,
1083     unsigned long offset,
1084     void *p_data,
1085     unsigned int bytes,
1086     struct x86_emulate_ctxt *ctxt)
1087 {
1088     struct hvm_emulate_ctxt *hvmemul_ctxt =
1089         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1090 
1091     if ( unlikely(hvmemul_ctxt->set_context) )
1092         return set_context_data(p_data, bytes);
1093 
1094     return __hvmemul_read(
1095         seg, offset, p_data, bytes, hvm_access_read,
1096         container_of(ctxt, struct hvm_emulate_ctxt, ctxt));
1097 }
1098 
hvmemul_insn_fetch(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)1099 int hvmemul_insn_fetch(
1100     enum x86_segment seg,
1101     unsigned long offset,
1102     void *p_data,
1103     unsigned int bytes,
1104     struct x86_emulate_ctxt *ctxt)
1105 {
1106     struct hvm_emulate_ctxt *hvmemul_ctxt =
1107         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1108     /* Careful, as offset can wrap or truncate WRT insn_buf_eip. */
1109     uint8_t insn_off = offset - hvmemul_ctxt->insn_buf_eip;
1110 
1111     /*
1112      * Fall back if requested bytes are not in the prefetch cache.
1113      * But always perform the (fake) read when bytes == 0.
1114      */
1115     if ( !bytes ||
1116          unlikely((insn_off + bytes) > hvmemul_ctxt->insn_buf_bytes) )
1117     {
1118         int rc = __hvmemul_read(seg, offset, p_data, bytes,
1119                                 hvm_access_insn_fetch, hvmemul_ctxt);
1120 
1121         if ( rc == X86EMUL_OKAY && bytes )
1122         {
1123             /*
1124              * Will we overflow insn_buf[]?  This shouldn't be able to happen,
1125              * which means something went wrong with instruction decoding...
1126              */
1127             if ( insn_off >= sizeof(hvmemul_ctxt->insn_buf) ||
1128                  insn_off + bytes > sizeof(hvmemul_ctxt->insn_buf) )
1129             {
1130                 ASSERT_UNREACHABLE();
1131                 return X86EMUL_UNHANDLEABLE;
1132             }
1133 
1134             memcpy(&hvmemul_ctxt->insn_buf[insn_off], p_data, bytes);
1135             hvmemul_ctxt->insn_buf_bytes = insn_off + bytes;
1136         }
1137 
1138         return rc;
1139     }
1140 
1141     /* Hit the cache. Simple memcpy. */
1142     memcpy(p_data, &hvmemul_ctxt->insn_buf[insn_off], bytes);
1143     return X86EMUL_OKAY;
1144 }
1145 
hvmemul_write(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)1146 static int hvmemul_write(
1147     enum x86_segment seg,
1148     unsigned long offset,
1149     void *p_data,
1150     unsigned int bytes,
1151     struct x86_emulate_ctxt *ctxt)
1152 {
1153     struct hvm_emulate_ctxt *hvmemul_ctxt =
1154         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1155     struct vcpu *curr = current;
1156     unsigned long addr, reps = 1;
1157     uint32_t pfec = PFEC_page_present | PFEC_write_access;
1158     struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
1159     int rc;
1160     void *mapping;
1161 
1162     if ( is_x86_system_segment(seg) )
1163         pfec |= PFEC_implicit;
1164     else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1165         pfec |= PFEC_user_mode;
1166 
1167     rc = hvmemul_virtual_to_linear(
1168         seg, offset, bytes, &reps, hvm_access_write, hvmemul_ctxt, &addr);
1169     if ( rc != X86EMUL_OKAY || !bytes )
1170         return rc;
1171 
1172     if ( vio->mmio_access.write_access &&
1173          (vio->mmio_gla == (addr & PAGE_MASK)) )
1174         return hvmemul_linear_mmio_write(addr, bytes, p_data, pfec, hvmemul_ctxt, 1);
1175 
1176     mapping = hvmemul_map_linear_addr(addr, bytes, pfec, hvmemul_ctxt);
1177     if ( IS_ERR(mapping) )
1178         return ~PTR_ERR(mapping);
1179 
1180     if ( !mapping )
1181         return hvmemul_linear_mmio_write(addr, bytes, p_data, pfec, hvmemul_ctxt, 0);
1182 
1183     memcpy(mapping, p_data, bytes);
1184 
1185     hvmemul_unmap_linear_addr(mapping, addr, bytes, hvmemul_ctxt);
1186 
1187     return X86EMUL_OKAY;
1188 }
1189 
hvmemul_write_discard(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)1190 static int hvmemul_write_discard(
1191     enum x86_segment seg,
1192     unsigned long offset,
1193     void *p_data,
1194     unsigned int bytes,
1195     struct x86_emulate_ctxt *ctxt)
1196 {
1197     /* Discarding the write. */
1198     return X86EMUL_OKAY;
1199 }
1200 
hvmemul_rep_ins_discard(uint16_t src_port,enum x86_segment dst_seg,unsigned long dst_offset,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1201 static int hvmemul_rep_ins_discard(
1202     uint16_t src_port,
1203     enum x86_segment dst_seg,
1204     unsigned long dst_offset,
1205     unsigned int bytes_per_rep,
1206     unsigned long *reps,
1207     struct x86_emulate_ctxt *ctxt)
1208 {
1209     return X86EMUL_OKAY;
1210 }
1211 
hvmemul_rep_movs_discard(enum x86_segment src_seg,unsigned long src_offset,enum x86_segment dst_seg,unsigned long dst_offset,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1212 static int hvmemul_rep_movs_discard(
1213    enum x86_segment src_seg,
1214    unsigned long src_offset,
1215    enum x86_segment dst_seg,
1216    unsigned long dst_offset,
1217    unsigned int bytes_per_rep,
1218    unsigned long *reps,
1219    struct x86_emulate_ctxt *ctxt)
1220 {
1221     return X86EMUL_OKAY;
1222 }
1223 
hvmemul_rep_stos_discard(void * p_data,enum x86_segment seg,unsigned long offset,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1224 static int hvmemul_rep_stos_discard(
1225     void *p_data,
1226     enum x86_segment seg,
1227     unsigned long offset,
1228     unsigned int bytes_per_rep,
1229     unsigned long *reps,
1230     struct x86_emulate_ctxt *ctxt)
1231 {
1232     return X86EMUL_OKAY;
1233 }
1234 
hvmemul_rep_outs_discard(enum x86_segment src_seg,unsigned long src_offset,uint16_t dst_port,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1235 static int hvmemul_rep_outs_discard(
1236     enum x86_segment src_seg,
1237     unsigned long src_offset,
1238     uint16_t dst_port,
1239     unsigned int bytes_per_rep,
1240     unsigned long *reps,
1241     struct x86_emulate_ctxt *ctxt)
1242 {
1243     return X86EMUL_OKAY;
1244 }
1245 
hvmemul_cmpxchg_discard(enum x86_segment seg,unsigned long offset,void * p_old,void * p_new,unsigned int bytes,struct x86_emulate_ctxt * ctxt)1246 static int hvmemul_cmpxchg_discard(
1247     enum x86_segment seg,
1248     unsigned long offset,
1249     void *p_old,
1250     void *p_new,
1251     unsigned int bytes,
1252     struct x86_emulate_ctxt *ctxt)
1253 {
1254     return X86EMUL_OKAY;
1255 }
1256 
hvmemul_read_io_discard(unsigned int port,unsigned int bytes,unsigned long * val,struct x86_emulate_ctxt * ctxt)1257 static int hvmemul_read_io_discard(
1258     unsigned int port,
1259     unsigned int bytes,
1260     unsigned long *val,
1261     struct x86_emulate_ctxt *ctxt)
1262 {
1263     return X86EMUL_OKAY;
1264 }
1265 
hvmemul_write_io_discard(unsigned int port,unsigned int bytes,unsigned long val,struct x86_emulate_ctxt * ctxt)1266 static int hvmemul_write_io_discard(
1267     unsigned int port,
1268     unsigned int bytes,
1269     unsigned long val,
1270     struct x86_emulate_ctxt *ctxt)
1271 {
1272     return X86EMUL_OKAY;
1273 }
1274 
hvmemul_write_msr_discard(unsigned int reg,uint64_t val,struct x86_emulate_ctxt * ctxt)1275 static int hvmemul_write_msr_discard(
1276     unsigned int reg,
1277     uint64_t val,
1278     struct x86_emulate_ctxt *ctxt)
1279 {
1280     return X86EMUL_OKAY;
1281 }
1282 
hvmemul_wbinvd_discard(struct x86_emulate_ctxt * ctxt)1283 static int hvmemul_wbinvd_discard(
1284     struct x86_emulate_ctxt *ctxt)
1285 {
1286     return X86EMUL_OKAY;
1287 }
1288 
hvmemul_cmpxchg(enum x86_segment seg,unsigned long offset,void * p_old,void * p_new,unsigned int bytes,struct x86_emulate_ctxt * ctxt)1289 static int hvmemul_cmpxchg(
1290     enum x86_segment seg,
1291     unsigned long offset,
1292     void *p_old,
1293     void *p_new,
1294     unsigned int bytes,
1295     struct x86_emulate_ctxt *ctxt)
1296 {
1297     /* Fix this in case the guest is really relying on r-m-w atomicity. */
1298     return hvmemul_write(seg, offset, p_new, bytes, ctxt);
1299 }
1300 
hvmemul_validate(const struct x86_emulate_state * state,struct x86_emulate_ctxt * ctxt)1301 static int hvmemul_validate(
1302     const struct x86_emulate_state *state,
1303     struct x86_emulate_ctxt *ctxt)
1304 {
1305     const struct hvm_emulate_ctxt *hvmemul_ctxt =
1306         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1307 
1308     return !hvmemul_ctxt->validate || hvmemul_ctxt->validate(state, ctxt)
1309            ? X86EMUL_OKAY : X86EMUL_UNHANDLEABLE;
1310 }
1311 
hvmemul_rep_ins(uint16_t src_port,enum x86_segment dst_seg,unsigned long dst_offset,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1312 static int hvmemul_rep_ins(
1313     uint16_t src_port,
1314     enum x86_segment dst_seg,
1315     unsigned long dst_offset,
1316     unsigned int bytes_per_rep,
1317     unsigned long *reps,
1318     struct x86_emulate_ctxt *ctxt)
1319 {
1320     struct hvm_emulate_ctxt *hvmemul_ctxt =
1321         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1322     unsigned long addr;
1323     uint32_t pfec = PFEC_page_present | PFEC_write_access;
1324     paddr_t gpa;
1325     p2m_type_t p2mt;
1326     int rc;
1327 
1328     rc = hvmemul_virtual_to_linear(
1329         dst_seg, dst_offset, bytes_per_rep, reps, hvm_access_write,
1330         hvmemul_ctxt, &addr);
1331     if ( rc != X86EMUL_OKAY )
1332         return rc;
1333 
1334     if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1335         pfec |= PFEC_user_mode;
1336 
1337     rc = hvmemul_linear_to_phys(
1338         addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
1339     if ( rc != X86EMUL_OKAY )
1340         return rc;
1341 
1342     (void) get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
1343     if ( p2mt == p2m_mmio_direct || p2mt == p2m_mmio_dm )
1344         return X86EMUL_UNHANDLEABLE;
1345 
1346     return hvmemul_do_pio_addr(src_port, reps, bytes_per_rep, IOREQ_READ,
1347                                !!(ctxt->regs->eflags & X86_EFLAGS_DF), gpa);
1348 }
1349 
hvmemul_rep_outs_set_context(enum x86_segment src_seg,unsigned long src_offset,uint16_t dst_port,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1350 static int hvmemul_rep_outs_set_context(
1351     enum x86_segment src_seg,
1352     unsigned long src_offset,
1353     uint16_t dst_port,
1354     unsigned int bytes_per_rep,
1355     unsigned long *reps,
1356     struct x86_emulate_ctxt *ctxt)
1357 {
1358     unsigned int bytes = *reps * bytes_per_rep;
1359     char *buf;
1360     int rc;
1361 
1362     buf = xmalloc_array(char, bytes);
1363 
1364     if ( buf == NULL )
1365         return X86EMUL_UNHANDLEABLE;
1366 
1367     rc = set_context_data(buf, bytes);
1368 
1369     if ( rc == X86EMUL_OKAY )
1370         rc = hvmemul_do_pio_buffer(dst_port, bytes, IOREQ_WRITE, buf);
1371 
1372     xfree(buf);
1373 
1374     return rc;
1375 }
1376 
hvmemul_rep_outs(enum x86_segment src_seg,unsigned long src_offset,uint16_t dst_port,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1377 static int hvmemul_rep_outs(
1378     enum x86_segment src_seg,
1379     unsigned long src_offset,
1380     uint16_t dst_port,
1381     unsigned int bytes_per_rep,
1382     unsigned long *reps,
1383     struct x86_emulate_ctxt *ctxt)
1384 {
1385     struct hvm_emulate_ctxt *hvmemul_ctxt =
1386         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1387     unsigned long addr;
1388     uint32_t pfec = PFEC_page_present;
1389     paddr_t gpa;
1390     p2m_type_t p2mt;
1391     int rc;
1392 
1393     if ( unlikely(hvmemul_ctxt->set_context) )
1394         return hvmemul_rep_outs_set_context(src_seg, src_offset, dst_port,
1395                                             bytes_per_rep, reps, ctxt);
1396 
1397     rc = hvmemul_virtual_to_linear(
1398         src_seg, src_offset, bytes_per_rep, reps, hvm_access_read,
1399         hvmemul_ctxt, &addr);
1400     if ( rc != X86EMUL_OKAY )
1401         return rc;
1402 
1403     if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1404         pfec |= PFEC_user_mode;
1405 
1406     rc = hvmemul_linear_to_phys(
1407         addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
1408     if ( rc != X86EMUL_OKAY )
1409         return rc;
1410 
1411     (void) get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
1412     if ( p2mt == p2m_mmio_direct || p2mt == p2m_mmio_dm )
1413         return X86EMUL_UNHANDLEABLE;
1414 
1415     return hvmemul_do_pio_addr(dst_port, reps, bytes_per_rep, IOREQ_WRITE,
1416                                !!(ctxt->regs->eflags & X86_EFLAGS_DF), gpa);
1417 }
1418 
hvmemul_rep_movs(enum x86_segment src_seg,unsigned long src_offset,enum x86_segment dst_seg,unsigned long dst_offset,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1419 static int hvmemul_rep_movs(
1420    enum x86_segment src_seg,
1421    unsigned long src_offset,
1422    enum x86_segment dst_seg,
1423    unsigned long dst_offset,
1424    unsigned int bytes_per_rep,
1425    unsigned long *reps,
1426    struct x86_emulate_ctxt *ctxt)
1427 {
1428     struct hvm_emulate_ctxt *hvmemul_ctxt =
1429         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1430     struct hvm_vcpu_io *vio = &current->arch.hvm_vcpu.hvm_io;
1431     unsigned long saddr, daddr, bytes;
1432     paddr_t sgpa, dgpa;
1433     uint32_t pfec = PFEC_page_present;
1434     p2m_type_t sp2mt, dp2mt;
1435     int rc, df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
1436     char *buf;
1437 
1438     rc = hvmemul_virtual_to_linear(
1439         src_seg, src_offset, bytes_per_rep, reps, hvm_access_read,
1440         hvmemul_ctxt, &saddr);
1441     if ( rc != X86EMUL_OKAY )
1442         return rc;
1443 
1444     rc = hvmemul_virtual_to_linear(
1445         dst_seg, dst_offset, bytes_per_rep, reps, hvm_access_write,
1446         hvmemul_ctxt, &daddr);
1447     if ( rc != X86EMUL_OKAY )
1448         return rc;
1449 
1450     if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1451         pfec |= PFEC_user_mode;
1452 
1453     if ( vio->mmio_access.read_access &&
1454          (vio->mmio_gla == (saddr & PAGE_MASK)) &&
1455          /*
1456           * Upon initial invocation don't truncate large batches just because
1457           * of a hit for the translation: Doing the guest page table walk is
1458           * cheaper than multiple round trips through the device model. Yet
1459           * when processing a response we can always re-use the translation.
1460           */
1461          (vio->io_req.state == STATE_IORESP_READY ||
1462           ((!df || *reps == 1) &&
1463            PAGE_SIZE - (saddr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
1464         sgpa = pfn_to_paddr(vio->mmio_gpfn) | (saddr & ~PAGE_MASK);
1465     else
1466     {
1467         rc = hvmemul_linear_to_phys(saddr, &sgpa, bytes_per_rep, reps, pfec,
1468                                     hvmemul_ctxt);
1469         if ( rc != X86EMUL_OKAY )
1470             return rc;
1471     }
1472 
1473     bytes = PAGE_SIZE - (daddr & ~PAGE_MASK);
1474     if ( vio->mmio_access.write_access &&
1475          (vio->mmio_gla == (daddr & PAGE_MASK)) &&
1476          /* See comment above. */
1477          (vio->io_req.state == STATE_IORESP_READY ||
1478           ((!df || *reps == 1) &&
1479            PAGE_SIZE - (daddr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
1480         dgpa = pfn_to_paddr(vio->mmio_gpfn) | (daddr & ~PAGE_MASK);
1481     else
1482     {
1483         rc = hvmemul_linear_to_phys(daddr, &dgpa, bytes_per_rep, reps,
1484                                     pfec | PFEC_write_access, hvmemul_ctxt);
1485         if ( rc != X86EMUL_OKAY )
1486             return rc;
1487     }
1488 
1489     /* Check for MMIO ops */
1490     (void) get_gfn_query_unlocked(current->domain, sgpa >> PAGE_SHIFT, &sp2mt);
1491     (void) get_gfn_query_unlocked(current->domain, dgpa >> PAGE_SHIFT, &dp2mt);
1492 
1493     if ( sp2mt == p2m_mmio_direct || dp2mt == p2m_mmio_direct ||
1494          (sp2mt == p2m_mmio_dm && dp2mt == p2m_mmio_dm) )
1495         return X86EMUL_UNHANDLEABLE;
1496 
1497     if ( sp2mt == p2m_mmio_dm )
1498     {
1499         latch_linear_to_phys(vio, saddr, sgpa, 0);
1500         return hvmemul_do_mmio_addr(
1501             sgpa, reps, bytes_per_rep, IOREQ_READ, df, dgpa);
1502     }
1503 
1504     if ( dp2mt == p2m_mmio_dm )
1505     {
1506         latch_linear_to_phys(vio, daddr, dgpa, 1);
1507         return hvmemul_do_mmio_addr(
1508             dgpa, reps, bytes_per_rep, IOREQ_WRITE, df, sgpa);
1509     }
1510 
1511     /* RAM-to-RAM copy: emulate as equivalent of memmove(dgpa, sgpa, bytes). */
1512     bytes = *reps * bytes_per_rep;
1513 
1514     /* Adjust source address for reverse copy. */
1515     if ( df )
1516         sgpa -= bytes - bytes_per_rep;
1517 
1518     /*
1519      * Will first iteration copy fall within source range? If not then entire
1520      * copy does not corrupt itself. If so, then this is more complex than
1521      * can be emulated by a source-to-buffer-to-destination block copy.
1522      */
1523     if ( ((dgpa + bytes_per_rep) > sgpa) && (dgpa < (sgpa + bytes)) )
1524         return X86EMUL_UNHANDLEABLE;
1525 
1526     /* Adjust destination address for reverse copy. */
1527     if ( df )
1528         dgpa -= bytes - bytes_per_rep;
1529 
1530     /* Allocate temporary buffer. Fall back to slow emulation if this fails. */
1531     buf = xmalloc_bytes(bytes);
1532     if ( buf == NULL )
1533         return X86EMUL_UNHANDLEABLE;
1534 
1535     if ( unlikely(hvmemul_ctxt->set_context) )
1536     {
1537         rc = set_context_data(buf, bytes);
1538 
1539         if ( rc != X86EMUL_OKAY)
1540         {
1541             xfree(buf);
1542             return rc;
1543         }
1544 
1545         rc = HVMTRANS_okay;
1546     }
1547     else
1548         /*
1549          * We do a modicum of checking here, just for paranoia's sake and to
1550          * definitely avoid copying an unitialised buffer into guest address
1551          * space.
1552          */
1553         rc = hvm_copy_from_guest_phys(buf, sgpa, bytes);
1554 
1555     if ( rc == HVMTRANS_okay )
1556         rc = hvm_copy_to_guest_phys(dgpa, buf, bytes, current);
1557 
1558     xfree(buf);
1559 
1560     if ( rc == HVMTRANS_gfn_paged_out )
1561         return X86EMUL_RETRY;
1562     if ( rc == HVMTRANS_gfn_shared )
1563         return X86EMUL_RETRY;
1564     if ( rc != HVMTRANS_okay )
1565     {
1566         gdprintk(XENLOG_WARNING, "Failed memory-to-memory REP MOVS: sgpa=%"
1567                  PRIpaddr" dgpa=%"PRIpaddr" reps=%lu bytes_per_rep=%u\n",
1568                  sgpa, dgpa, *reps, bytes_per_rep);
1569         return X86EMUL_UNHANDLEABLE;
1570     }
1571 
1572     return X86EMUL_OKAY;
1573 }
1574 
hvmemul_rep_stos(void * p_data,enum x86_segment seg,unsigned long offset,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1575 static int hvmemul_rep_stos(
1576     void *p_data,
1577     enum x86_segment seg,
1578     unsigned long offset,
1579     unsigned int bytes_per_rep,
1580     unsigned long *reps,
1581     struct x86_emulate_ctxt *ctxt)
1582 {
1583     struct hvm_emulate_ctxt *hvmemul_ctxt =
1584         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1585     struct hvm_vcpu_io *vio = &current->arch.hvm_vcpu.hvm_io;
1586     unsigned long addr, bytes;
1587     paddr_t gpa;
1588     p2m_type_t p2mt;
1589     bool_t df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
1590     int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps,
1591                                        hvm_access_write, hvmemul_ctxt, &addr);
1592 
1593     if ( rc != X86EMUL_OKAY )
1594         return rc;
1595 
1596     bytes = PAGE_SIZE - (addr & ~PAGE_MASK);
1597     if ( vio->mmio_access.write_access &&
1598          (vio->mmio_gla == (addr & PAGE_MASK)) &&
1599          /* See respective comment in MOVS processing. */
1600          (vio->io_req.state == STATE_IORESP_READY ||
1601           ((!df || *reps == 1) &&
1602            PAGE_SIZE - (addr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
1603         gpa = pfn_to_paddr(vio->mmio_gpfn) | (addr & ~PAGE_MASK);
1604     else
1605     {
1606         uint32_t pfec = PFEC_page_present | PFEC_write_access;
1607 
1608         if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1609             pfec |= PFEC_user_mode;
1610 
1611         rc = hvmemul_linear_to_phys(addr, &gpa, bytes_per_rep, reps, pfec,
1612                                     hvmemul_ctxt);
1613         if ( rc != X86EMUL_OKAY )
1614             return rc;
1615     }
1616 
1617     /* Check for MMIO op */
1618     (void)get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
1619 
1620     switch ( p2mt )
1621     {
1622         unsigned long bytes;
1623         void *buf;
1624 
1625     default:
1626         /* Allocate temporary buffer. */
1627         for ( ; ; )
1628         {
1629             bytes = *reps * bytes_per_rep;
1630             buf = xmalloc_bytes(bytes);
1631             if ( buf || *reps <= 1 )
1632                 break;
1633             *reps >>= 1;
1634         }
1635 
1636         if ( !buf )
1637             buf = p_data;
1638         else
1639             switch ( bytes_per_rep )
1640             {
1641                 unsigned long dummy;
1642 
1643 #define CASE(bits, suffix)                                     \
1644             case (bits) / 8:                                   \
1645                 asm ( "rep stos" #suffix                       \
1646                       : "=m" (*(char (*)[bytes])buf),          \
1647                         "=D" (dummy), "=c" (dummy)             \
1648                       : "a" (*(const uint##bits##_t *)p_data), \
1649                          "1" (buf), "2" (*reps) );             \
1650                 break
1651             CASE(8, b);
1652             CASE(16, w);
1653             CASE(32, l);
1654             CASE(64, q);
1655 #undef CASE
1656 
1657             default:
1658                 ASSERT_UNREACHABLE();
1659                 xfree(buf);
1660                 return X86EMUL_UNHANDLEABLE;
1661             }
1662 
1663         /* Adjust address for reverse store. */
1664         if ( df )
1665             gpa -= bytes - bytes_per_rep;
1666 
1667         rc = hvm_copy_to_guest_phys(gpa, buf, bytes, current);
1668 
1669         if ( buf != p_data )
1670             xfree(buf);
1671 
1672         switch ( rc )
1673         {
1674         case HVMTRANS_gfn_paged_out:
1675         case HVMTRANS_gfn_shared:
1676             return X86EMUL_RETRY;
1677         case HVMTRANS_okay:
1678             return X86EMUL_OKAY;
1679         }
1680 
1681         gdprintk(XENLOG_WARNING,
1682                  "Failed REP STOS: gpa=%"PRIpaddr" reps=%lu bytes_per_rep=%u\n",
1683                  gpa, *reps, bytes_per_rep);
1684         /* fall through */
1685     case p2m_mmio_direct:
1686         return X86EMUL_UNHANDLEABLE;
1687 
1688     case p2m_mmio_dm:
1689         latch_linear_to_phys(vio, addr, gpa, 1);
1690         return hvmemul_do_mmio_buffer(gpa, reps, bytes_per_rep, IOREQ_WRITE, df,
1691                                       p_data);
1692     }
1693 }
1694 
hvmemul_read_segment(enum x86_segment seg,struct segment_register * reg,struct x86_emulate_ctxt * ctxt)1695 static int hvmemul_read_segment(
1696     enum x86_segment seg,
1697     struct segment_register *reg,
1698     struct x86_emulate_ctxt *ctxt)
1699 {
1700     struct hvm_emulate_ctxt *hvmemul_ctxt =
1701         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1702     struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
1703 
1704     if ( IS_ERR(sreg) )
1705          return -PTR_ERR(sreg);
1706 
1707     *reg = *sreg;
1708 
1709     return X86EMUL_OKAY;
1710 }
1711 
hvmemul_write_segment(enum x86_segment seg,const struct segment_register * reg,struct x86_emulate_ctxt * ctxt)1712 static int hvmemul_write_segment(
1713     enum x86_segment seg,
1714     const struct segment_register *reg,
1715     struct x86_emulate_ctxt *ctxt)
1716 {
1717     struct hvm_emulate_ctxt *hvmemul_ctxt =
1718         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1719     unsigned int idx = seg;
1720 
1721     if ( idx >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
1722         return X86EMUL_UNHANDLEABLE;
1723 
1724     hvmemul_ctxt->seg_reg[idx] = *reg;
1725     __set_bit(idx, &hvmemul_ctxt->seg_reg_accessed);
1726     __set_bit(idx, &hvmemul_ctxt->seg_reg_dirty);
1727 
1728     return X86EMUL_OKAY;
1729 }
1730 
hvmemul_read_io(unsigned int port,unsigned int bytes,unsigned long * val,struct x86_emulate_ctxt * ctxt)1731 static int hvmemul_read_io(
1732     unsigned int port,
1733     unsigned int bytes,
1734     unsigned long *val,
1735     struct x86_emulate_ctxt *ctxt)
1736 {
1737     struct hvm_emulate_ctxt *hvmemul_ctxt =
1738         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1739 
1740     *val = 0;
1741 
1742     if ( unlikely(hvmemul_ctxt->set_context) )
1743         return set_context_data(val, bytes);
1744 
1745     return hvmemul_do_pio_buffer(port, bytes, IOREQ_READ, val);
1746 }
1747 
hvmemul_write_io(unsigned int port,unsigned int bytes,unsigned long val,struct x86_emulate_ctxt * ctxt)1748 static int hvmemul_write_io(
1749     unsigned int port,
1750     unsigned int bytes,
1751     unsigned long val,
1752     struct x86_emulate_ctxt *ctxt)
1753 {
1754     return hvmemul_do_pio_buffer(port, bytes, IOREQ_WRITE, &val);
1755 }
1756 
hvmemul_read_cr(unsigned int reg,unsigned long * val,struct x86_emulate_ctxt * ctxt)1757 static int hvmemul_read_cr(
1758     unsigned int reg,
1759     unsigned long *val,
1760     struct x86_emulate_ctxt *ctxt)
1761 {
1762     switch ( reg )
1763     {
1764     case 0:
1765     case 2:
1766     case 3:
1767     case 4:
1768         *val = current->arch.hvm_vcpu.guest_cr[reg];
1769         HVMTRACE_LONG_2D(CR_READ, reg, TRC_PAR_LONG(*val));
1770         return X86EMUL_OKAY;
1771     default:
1772         break;
1773     }
1774 
1775     return X86EMUL_UNHANDLEABLE;
1776 }
1777 
hvmemul_write_cr(unsigned int reg,unsigned long val,struct x86_emulate_ctxt * ctxt)1778 static int hvmemul_write_cr(
1779     unsigned int reg,
1780     unsigned long val,
1781     struct x86_emulate_ctxt *ctxt)
1782 {
1783     int rc;
1784 
1785     HVMTRACE_LONG_2D(CR_WRITE, reg, TRC_PAR_LONG(val));
1786     switch ( reg )
1787     {
1788     case 0:
1789         rc = hvm_set_cr0(val, 1);
1790         break;
1791 
1792     case 2:
1793         current->arch.hvm_vcpu.guest_cr[2] = val;
1794         rc = X86EMUL_OKAY;
1795         break;
1796 
1797     case 3:
1798         rc = hvm_set_cr3(val, 1);
1799         break;
1800 
1801     case 4:
1802         rc = hvm_set_cr4(val, 1);
1803         break;
1804 
1805     default:
1806         rc = X86EMUL_UNHANDLEABLE;
1807         break;
1808     }
1809 
1810     if ( rc == X86EMUL_EXCEPTION )
1811         x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
1812 
1813     return rc;
1814 }
1815 
hvmemul_read_msr(unsigned int reg,uint64_t * val,struct x86_emulate_ctxt * ctxt)1816 static int hvmemul_read_msr(
1817     unsigned int reg,
1818     uint64_t *val,
1819     struct x86_emulate_ctxt *ctxt)
1820 {
1821     int rc = hvm_msr_read_intercept(reg, val);
1822 
1823     if ( rc == X86EMUL_EXCEPTION )
1824         x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
1825 
1826     return rc;
1827 }
1828 
hvmemul_write_msr(unsigned int reg,uint64_t val,struct x86_emulate_ctxt * ctxt)1829 static int hvmemul_write_msr(
1830     unsigned int reg,
1831     uint64_t val,
1832     struct x86_emulate_ctxt *ctxt)
1833 {
1834     int rc = hvm_msr_write_intercept(reg, val, 1);
1835 
1836     if ( rc == X86EMUL_EXCEPTION )
1837         x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
1838 
1839     return rc;
1840 }
1841 
hvmemul_wbinvd(struct x86_emulate_ctxt * ctxt)1842 static int hvmemul_wbinvd(
1843     struct x86_emulate_ctxt *ctxt)
1844 {
1845     hvm_funcs.wbinvd_intercept();
1846     return X86EMUL_OKAY;
1847 }
1848 
hvmemul_cpuid(uint32_t leaf,uint32_t subleaf,struct cpuid_leaf * res,struct x86_emulate_ctxt * ctxt)1849 int hvmemul_cpuid(uint32_t leaf, uint32_t subleaf,
1850                   struct cpuid_leaf *res, struct x86_emulate_ctxt *ctxt)
1851 {
1852     guest_cpuid(current, leaf, subleaf, res);
1853     return X86EMUL_OKAY;
1854 }
1855 
hvmemul_get_fpu(void (* exception_callback)(void *,struct cpu_user_regs *),void * exception_callback_arg,enum x86_emulate_fpu_type type,struct x86_emulate_ctxt * ctxt)1856 static int hvmemul_get_fpu(
1857     void (*exception_callback)(void *, struct cpu_user_regs *),
1858     void *exception_callback_arg,
1859     enum x86_emulate_fpu_type type,
1860     struct x86_emulate_ctxt *ctxt)
1861 {
1862     struct vcpu *curr = current;
1863 
1864     switch ( type )
1865     {
1866     case X86EMUL_FPU_fpu:
1867     case X86EMUL_FPU_wait:
1868     case X86EMUL_FPU_mmx:
1869     case X86EMUL_FPU_xmm:
1870         break;
1871     case X86EMUL_FPU_ymm:
1872         if ( !(curr->arch.xcr0 & XSTATE_SSE) ||
1873              !(curr->arch.xcr0 & XSTATE_YMM) )
1874             return X86EMUL_UNHANDLEABLE;
1875         break;
1876     default:
1877         return X86EMUL_UNHANDLEABLE;
1878     }
1879 
1880     if ( !curr->fpu_dirtied )
1881         hvm_funcs.fpu_dirty_intercept();
1882     else if ( type == X86EMUL_FPU_fpu )
1883     {
1884         const typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt =
1885             curr->arch.fpu_ctxt;
1886 
1887         /*
1888          * Latch current register state so that we can back out changes
1889          * if needed (namely when a memory write fails after register state
1890          * has already been updated).
1891          * NB: We don't really need the "enable" part of the called function
1892          * (->fpu_dirtied set implies CR0.TS clear), but the additional
1893          * overhead should be low enough to not warrant introduction of yet
1894          * another slightly different function. However, we need to undo the
1895          * ->fpu_dirtied clearing the function does as well as the possible
1896          * masking of all exceptions by FNSTENV.)
1897          */
1898         save_fpu_enable();
1899         curr->fpu_dirtied = true;
1900         if ( (fpu_ctxt->fcw & 0x3f) != 0x3f )
1901         {
1902             uint16_t fcw;
1903 
1904             asm ( "fnstcw %0" : "=m" (fcw) );
1905             if ( (fcw & 0x3f) == 0x3f )
1906                 asm ( "fldcw %0" :: "m" (fpu_ctxt->fcw) );
1907             else
1908                 ASSERT(fcw == fpu_ctxt->fcw);
1909         }
1910     }
1911 
1912     curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback;
1913     curr->arch.hvm_vcpu.fpu_exception_callback_arg = exception_callback_arg;
1914 
1915     return X86EMUL_OKAY;
1916 }
1917 
hvmemul_put_fpu(struct x86_emulate_ctxt * ctxt,enum x86_emulate_fpu_type backout,const struct x86_emul_fpu_aux * aux)1918 static void hvmemul_put_fpu(
1919     struct x86_emulate_ctxt *ctxt,
1920     enum x86_emulate_fpu_type backout,
1921     const struct x86_emul_fpu_aux *aux)
1922 {
1923     struct vcpu *curr = current;
1924 
1925     curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
1926 
1927     if ( aux )
1928     {
1929         typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt = curr->arch.fpu_ctxt;
1930         bool dval = aux->dval;
1931         int mode = hvm_guest_x86_mode(curr);
1932 
1933         ASSERT(backout == X86EMUL_FPU_none);
1934         /*
1935          * Latch current register state so that we can replace FIP/FDP/FOP
1936          * (which have values resulting from our own invocation of the FPU
1937          * instruction during emulation).
1938          * NB: See also the comment in hvmemul_get_fpu(); we don't need to
1939          * set ->fpu_dirtied here as it is going to be cleared below, and
1940          * we also don't need to reload FCW as we're forcing full state to
1941          * be reloaded anyway.
1942          */
1943         save_fpu_enable();
1944 
1945         if ( boot_cpu_has(X86_FEATURE_FDP_EXCP_ONLY) &&
1946              !(fpu_ctxt->fsw & ~fpu_ctxt->fcw & 0x003f) )
1947             dval = false;
1948 
1949         switch ( mode )
1950         {
1951         case 8:
1952             fpu_ctxt->fip.addr = aux->ip;
1953             if ( dval )
1954                 fpu_ctxt->fdp.addr = aux->dp;
1955             fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = 8;
1956             break;
1957 
1958         case 4: case 2:
1959             fpu_ctxt->fip.offs = aux->ip;
1960             fpu_ctxt->fip.sel  = aux->cs;
1961             if ( dval )
1962             {
1963                 fpu_ctxt->fdp.offs = aux->dp;
1964                 fpu_ctxt->fdp.sel  = aux->ds;
1965             }
1966             fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = mode;
1967             break;
1968 
1969         case 0: case 1:
1970             fpu_ctxt->fip.addr = aux->ip | (aux->cs << 4);
1971             if ( dval )
1972                 fpu_ctxt->fdp.addr = aux->dp | (aux->ds << 4);
1973             fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = 2;
1974             break;
1975 
1976         default:
1977             ASSERT_UNREACHABLE();
1978             return;
1979         }
1980 
1981         fpu_ctxt->fop = aux->op;
1982 
1983         /* Re-use backout code below. */
1984         backout = X86EMUL_FPU_fpu;
1985     }
1986 
1987     if ( backout == X86EMUL_FPU_fpu )
1988     {
1989         /*
1990          * To back out changes to the register file simply adjust state such
1991          * that upon next FPU insn use by the guest we'll reload the state
1992          * saved (or freshly loaded) by hvmemul_get_fpu().
1993          */
1994         curr->fpu_dirtied = false;
1995         stts();
1996         hvm_funcs.fpu_leave(curr);
1997     }
1998 }
1999 
hvmemul_invlpg(enum x86_segment seg,unsigned long offset,struct x86_emulate_ctxt * ctxt)2000 static int hvmemul_invlpg(
2001     enum x86_segment seg,
2002     unsigned long offset,
2003     struct x86_emulate_ctxt *ctxt)
2004 {
2005     struct hvm_emulate_ctxt *hvmemul_ctxt =
2006         container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
2007     unsigned long addr, reps = 1;
2008     int rc;
2009 
2010     rc = hvmemul_virtual_to_linear(
2011         seg, offset, 1, &reps, hvm_access_none, hvmemul_ctxt, &addr);
2012 
2013     if ( rc == X86EMUL_EXCEPTION )
2014     {
2015         /*
2016          * `invlpg` takes segment bases into account, but is not subject to
2017          * faults from segment type/limit checks, and is specified as a NOP
2018          * when issued on non-canonical addresses.
2019          *
2020          * hvmemul_virtual_to_linear() raises exceptions for type/limit
2021          * violations, so squash them.
2022          */
2023         x86_emul_reset_event(ctxt);
2024         rc = X86EMUL_OKAY;
2025     }
2026 
2027     if ( rc == X86EMUL_OKAY )
2028         paging_invlpg(current, addr);
2029 
2030     return rc;
2031 }
2032 
hvmemul_vmfunc(struct x86_emulate_ctxt * ctxt)2033 static int hvmemul_vmfunc(
2034     struct x86_emulate_ctxt *ctxt)
2035 {
2036     int rc;
2037 
2038     if ( !hvm_funcs.altp2m_vcpu_emulate_vmfunc )
2039         return X86EMUL_UNHANDLEABLE;
2040     rc = hvm_funcs.altp2m_vcpu_emulate_vmfunc(ctxt->regs);
2041     if ( rc == X86EMUL_EXCEPTION )
2042         x86_emul_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC, ctxt);
2043 
2044     return rc;
2045 }
2046 
2047 static const struct x86_emulate_ops hvm_emulate_ops = {
2048     .read          = hvmemul_read,
2049     .insn_fetch    = hvmemul_insn_fetch,
2050     .write         = hvmemul_write,
2051     .cmpxchg       = hvmemul_cmpxchg,
2052     .validate      = hvmemul_validate,
2053     .rep_ins       = hvmemul_rep_ins,
2054     .rep_outs      = hvmemul_rep_outs,
2055     .rep_movs      = hvmemul_rep_movs,
2056     .rep_stos      = hvmemul_rep_stos,
2057     .read_segment  = hvmemul_read_segment,
2058     .write_segment = hvmemul_write_segment,
2059     .read_io       = hvmemul_read_io,
2060     .write_io      = hvmemul_write_io,
2061     .read_cr       = hvmemul_read_cr,
2062     .write_cr      = hvmemul_write_cr,
2063     .read_msr      = hvmemul_read_msr,
2064     .write_msr     = hvmemul_write_msr,
2065     .wbinvd        = hvmemul_wbinvd,
2066     .cpuid         = hvmemul_cpuid,
2067     .get_fpu       = hvmemul_get_fpu,
2068     .put_fpu       = hvmemul_put_fpu,
2069     .invlpg        = hvmemul_invlpg,
2070     .vmfunc        = hvmemul_vmfunc,
2071 };
2072 
2073 static const struct x86_emulate_ops hvm_emulate_ops_no_write = {
2074     .read          = hvmemul_read,
2075     .insn_fetch    = hvmemul_insn_fetch,
2076     .write         = hvmemul_write_discard,
2077     .cmpxchg       = hvmemul_cmpxchg_discard,
2078     .rep_ins       = hvmemul_rep_ins_discard,
2079     .rep_outs      = hvmemul_rep_outs_discard,
2080     .rep_movs      = hvmemul_rep_movs_discard,
2081     .rep_stos      = hvmemul_rep_stos_discard,
2082     .read_segment  = hvmemul_read_segment,
2083     .write_segment = hvmemul_write_segment,
2084     .read_io       = hvmemul_read_io_discard,
2085     .write_io      = hvmemul_write_io_discard,
2086     .read_cr       = hvmemul_read_cr,
2087     .write_cr      = hvmemul_write_cr,
2088     .read_msr      = hvmemul_read_msr,
2089     .write_msr     = hvmemul_write_msr_discard,
2090     .wbinvd        = hvmemul_wbinvd_discard,
2091     .cpuid         = hvmemul_cpuid,
2092     .get_fpu       = hvmemul_get_fpu,
2093     .put_fpu       = hvmemul_put_fpu,
2094     .invlpg        = hvmemul_invlpg,
2095     .vmfunc        = hvmemul_vmfunc,
2096 };
2097 
_hvm_emulate_one(struct hvm_emulate_ctxt * hvmemul_ctxt,const struct x86_emulate_ops * ops)2098 static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
2099     const struct x86_emulate_ops *ops)
2100 {
2101     const struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
2102     struct vcpu *curr = current;
2103     uint32_t new_intr_shadow;
2104     struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
2105     int rc;
2106 
2107     hvm_emulate_init_per_insn(hvmemul_ctxt, vio->mmio_insn,
2108                               vio->mmio_insn_bytes);
2109 
2110     vio->mmio_retry = 0;
2111 
2112     switch ( rc = x86_emulate(&hvmemul_ctxt->ctxt, ops) )
2113     {
2114     case X86EMUL_OKAY:
2115         if ( vio->mmio_retry )
2116             rc = X86EMUL_RETRY;
2117         /* fall through */
2118     default:
2119         vio->mmio_cache_count = 0;
2120         vio->mmio_insn_bytes = 0;
2121         break;
2122 
2123     case X86EMUL_RETRY:
2124         BUILD_BUG_ON(sizeof(vio->mmio_insn) < sizeof(hvmemul_ctxt->insn_buf));
2125         vio->mmio_insn_bytes = hvmemul_ctxt->insn_buf_bytes;
2126         memcpy(vio->mmio_insn, hvmemul_ctxt->insn_buf, vio->mmio_insn_bytes);
2127         break;
2128     }
2129 
2130     if ( hvmemul_ctxt->ctxt.retire.singlestep )
2131         hvm_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC);
2132 
2133     new_intr_shadow = hvmemul_ctxt->intr_shadow;
2134 
2135     /* MOV-SS instruction toggles MOV-SS shadow, else we just clear it. */
2136     if ( hvmemul_ctxt->ctxt.retire.mov_ss )
2137         new_intr_shadow ^= HVM_INTR_SHADOW_MOV_SS;
2138     else if ( rc != X86EMUL_RETRY )
2139         new_intr_shadow &= ~HVM_INTR_SHADOW_MOV_SS;
2140 
2141     /* STI instruction toggles STI shadow, else we just clear it. */
2142     if ( hvmemul_ctxt->ctxt.retire.sti )
2143         new_intr_shadow ^= HVM_INTR_SHADOW_STI;
2144     else if ( rc != X86EMUL_RETRY )
2145         new_intr_shadow &= ~HVM_INTR_SHADOW_STI;
2146 
2147     /* IRET, if valid in the given context, clears NMI blocking. */
2148     if ( hvmemul_ctxt->ctxt.retire.unblock_nmi )
2149         new_intr_shadow &= ~HVM_INTR_SHADOW_NMI;
2150 
2151     if ( hvmemul_ctxt->intr_shadow != new_intr_shadow )
2152     {
2153         hvmemul_ctxt->intr_shadow = new_intr_shadow;
2154         hvm_funcs.set_interrupt_shadow(curr, new_intr_shadow);
2155     }
2156 
2157     if ( hvmemul_ctxt->ctxt.retire.hlt &&
2158          !hvm_local_events_need_delivery(curr) )
2159     {
2160         hvm_hlt(regs->eflags);
2161     }
2162 
2163     return rc;
2164 }
2165 
hvm_emulate_one(struct hvm_emulate_ctxt * hvmemul_ctxt)2166 int hvm_emulate_one(
2167     struct hvm_emulate_ctxt *hvmemul_ctxt)
2168 {
2169     return _hvm_emulate_one(hvmemul_ctxt, &hvm_emulate_ops);
2170 }
2171 
hvm_emulate_one_mmio(unsigned long mfn,unsigned long gla)2172 int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla)
2173 {
2174     static const struct x86_emulate_ops hvm_intercept_ops_mmcfg = {
2175         .read       = x86emul_unhandleable_rw,
2176         .insn_fetch = hvmemul_insn_fetch,
2177         .write      = mmcfg_intercept_write,
2178         .cpuid      = hvmemul_cpuid,
2179     };
2180     static const struct x86_emulate_ops hvm_ro_emulate_ops_mmio = {
2181         .read       = x86emul_unhandleable_rw,
2182         .insn_fetch = hvmemul_insn_fetch,
2183         .write      = mmio_ro_emulated_write,
2184         .cpuid      = hvmemul_cpuid,
2185     };
2186     struct mmio_ro_emulate_ctxt mmio_ro_ctxt = { .cr2 = gla };
2187     struct hvm_emulate_ctxt ctxt;
2188     const struct x86_emulate_ops *ops;
2189     unsigned int seg, bdf;
2190     int rc;
2191 
2192     if ( pci_ro_mmcfg_decode(mfn, &seg, &bdf) )
2193     {
2194         mmio_ro_ctxt.seg = seg;
2195         mmio_ro_ctxt.bdf = bdf;
2196         ops = &hvm_intercept_ops_mmcfg;
2197     }
2198     else
2199         ops = &hvm_ro_emulate_ops_mmio;
2200 
2201     hvm_emulate_init_once(&ctxt, x86_insn_is_mem_write,
2202                           guest_cpu_user_regs());
2203     ctxt.ctxt.data = &mmio_ro_ctxt;
2204     rc = _hvm_emulate_one(&ctxt, ops);
2205     switch ( rc )
2206     {
2207     case X86EMUL_UNHANDLEABLE:
2208     case X86EMUL_UNIMPLEMENTED:
2209         hvm_dump_emulation_state(XENLOG_G_WARNING, "MMCFG", &ctxt, rc);
2210         break;
2211     case X86EMUL_EXCEPTION:
2212         hvm_inject_event(&ctxt.ctxt.event);
2213         /* fallthrough */
2214     default:
2215         hvm_emulate_writeback(&ctxt);
2216     }
2217 
2218     return rc;
2219 }
2220 
hvm_emulate_one_vm_event(enum emul_kind kind,unsigned int trapnr,unsigned int errcode)2221 void hvm_emulate_one_vm_event(enum emul_kind kind, unsigned int trapnr,
2222     unsigned int errcode)
2223 {
2224     struct hvm_emulate_ctxt ctx = {{ 0 }};
2225     int rc;
2226 
2227     hvm_emulate_init_once(&ctx, NULL, guest_cpu_user_regs());
2228 
2229     switch ( kind )
2230     {
2231     case EMUL_KIND_NOWRITE:
2232         rc = _hvm_emulate_one(&ctx, &hvm_emulate_ops_no_write);
2233         break;
2234     case EMUL_KIND_SET_CONTEXT_INSN: {
2235         struct vcpu *curr = current;
2236         struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
2237 
2238         BUILD_BUG_ON(sizeof(vio->mmio_insn) !=
2239                      sizeof(curr->arch.vm_event->emul.insn.data));
2240         ASSERT(!vio->mmio_insn_bytes);
2241 
2242         /*
2243          * Stash insn buffer into mmio buffer here instead of ctx
2244          * to avoid having to add more logic to hvm_emulate_one.
2245          */
2246         vio->mmio_insn_bytes = sizeof(vio->mmio_insn);
2247         memcpy(vio->mmio_insn, curr->arch.vm_event->emul.insn.data,
2248                vio->mmio_insn_bytes);
2249     }
2250     /* Fall-through */
2251     default:
2252         ctx.set_context = (kind == EMUL_KIND_SET_CONTEXT_DATA);
2253         rc = hvm_emulate_one(&ctx);
2254     }
2255 
2256     switch ( rc )
2257     {
2258     case X86EMUL_RETRY:
2259         /*
2260          * This function is called when handling an EPT-related vm_event
2261          * reply. As such, nothing else needs to be done here, since simply
2262          * returning makes the current instruction cause a page fault again,
2263          * consistent with X86EMUL_RETRY.
2264          */
2265         return;
2266     case X86EMUL_UNIMPLEMENTED:
2267         if ( hvm_monitor_emul_unimplemented() )
2268             return;
2269         /* fall-through */
2270     case X86EMUL_UNHANDLEABLE:
2271         hvm_dump_emulation_state(XENLOG_G_DEBUG, "Mem event", &ctx, rc);
2272         hvm_inject_hw_exception(trapnr, errcode);
2273         break;
2274     case X86EMUL_EXCEPTION:
2275         hvm_inject_event(&ctx.ctxt.event);
2276         break;
2277     }
2278 
2279     hvm_emulate_writeback(&ctx);
2280 }
2281 
hvm_emulate_init_once(struct hvm_emulate_ctxt * hvmemul_ctxt,hvm_emulate_validate_t * validate,struct cpu_user_regs * regs)2282 void hvm_emulate_init_once(
2283     struct hvm_emulate_ctxt *hvmemul_ctxt,
2284     hvm_emulate_validate_t *validate,
2285     struct cpu_user_regs *regs)
2286 {
2287     struct vcpu *curr = current;
2288 
2289     memset(hvmemul_ctxt, 0, sizeof(*hvmemul_ctxt));
2290 
2291     hvmemul_ctxt->intr_shadow = hvm_funcs.get_interrupt_shadow(curr);
2292     hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt);
2293     hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt);
2294 
2295     hvmemul_ctxt->validate = validate;
2296     hvmemul_ctxt->ctxt.regs = regs;
2297     hvmemul_ctxt->ctxt.vendor = curr->domain->arch.cpuid->x86_vendor;
2298     hvmemul_ctxt->ctxt.force_writeback = true;
2299 }
2300 
hvm_emulate_init_per_insn(struct hvm_emulate_ctxt * hvmemul_ctxt,const unsigned char * insn_buf,unsigned int insn_bytes)2301 void hvm_emulate_init_per_insn(
2302     struct hvm_emulate_ctxt *hvmemul_ctxt,
2303     const unsigned char *insn_buf,
2304     unsigned int insn_bytes)
2305 {
2306     struct vcpu *curr = current;
2307     unsigned int pfec = PFEC_page_present;
2308     unsigned long addr;
2309 
2310     hvmemul_ctxt->ctxt.lma = hvm_long_mode_active(curr);
2311 
2312     if ( hvmemul_ctxt->ctxt.lma &&
2313          hvmemul_ctxt->seg_reg[x86_seg_cs].l )
2314         hvmemul_ctxt->ctxt.addr_size = hvmemul_ctxt->ctxt.sp_size = 64;
2315     else
2316     {
2317         hvmemul_ctxt->ctxt.addr_size =
2318             hvmemul_ctxt->seg_reg[x86_seg_cs].db ? 32 : 16;
2319         hvmemul_ctxt->ctxt.sp_size =
2320             hvmemul_ctxt->seg_reg[x86_seg_ss].db ? 32 : 16;
2321     }
2322 
2323     if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
2324         pfec |= PFEC_user_mode;
2325 
2326     hvmemul_ctxt->insn_buf_eip = hvmemul_ctxt->ctxt.regs->rip;
2327     if ( !insn_bytes )
2328     {
2329         hvmemul_ctxt->insn_buf_bytes =
2330             hvm_get_insn_bytes(curr, hvmemul_ctxt->insn_buf) ?:
2331             (hvm_virtual_to_linear_addr(x86_seg_cs,
2332                                         &hvmemul_ctxt->seg_reg[x86_seg_cs],
2333                                         hvmemul_ctxt->insn_buf_eip,
2334                                         sizeof(hvmemul_ctxt->insn_buf),
2335                                         hvm_access_insn_fetch,
2336                                         &hvmemul_ctxt->seg_reg[x86_seg_cs],
2337                                         &addr) &&
2338              hvm_fetch_from_guest_linear(hvmemul_ctxt->insn_buf, addr,
2339                                          sizeof(hvmemul_ctxt->insn_buf),
2340                                          pfec, NULL) == HVMTRANS_okay) ?
2341             sizeof(hvmemul_ctxt->insn_buf) : 0;
2342     }
2343     else
2344     {
2345         hvmemul_ctxt->insn_buf_bytes = insn_bytes;
2346         memcpy(hvmemul_ctxt->insn_buf, insn_buf, insn_bytes);
2347     }
2348 }
2349 
hvm_emulate_writeback(struct hvm_emulate_ctxt * hvmemul_ctxt)2350 void hvm_emulate_writeback(
2351     struct hvm_emulate_ctxt *hvmemul_ctxt)
2352 {
2353     enum x86_segment seg;
2354 
2355     seg = find_first_bit(&hvmemul_ctxt->seg_reg_dirty,
2356                          ARRAY_SIZE(hvmemul_ctxt->seg_reg));
2357 
2358     while ( seg < ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
2359     {
2360         hvm_set_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]);
2361         seg = find_next_bit(&hvmemul_ctxt->seg_reg_dirty,
2362                             ARRAY_SIZE(hvmemul_ctxt->seg_reg),
2363                             seg+1);
2364     }
2365 }
2366 
2367 /*
2368  * Callers which pass a known in-range x86_segment can rely on the return
2369  * pointer being valid.  Other callers must explicitly check for errors.
2370  */
hvmemul_get_seg_reg(enum x86_segment seg,struct hvm_emulate_ctxt * hvmemul_ctxt)2371 struct segment_register *hvmemul_get_seg_reg(
2372     enum x86_segment seg,
2373     struct hvm_emulate_ctxt *hvmemul_ctxt)
2374 {
2375     unsigned int idx = seg;
2376 
2377     if ( idx >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
2378         return ERR_PTR(-X86EMUL_UNHANDLEABLE);
2379 
2380     if ( !__test_and_set_bit(idx, &hvmemul_ctxt->seg_reg_accessed) )
2381         hvm_get_segment_register(current, idx, &hvmemul_ctxt->seg_reg[idx]);
2382     return &hvmemul_ctxt->seg_reg[idx];
2383 }
2384 
guest_x86_mode_to_str(int mode)2385 static const char *guest_x86_mode_to_str(int mode)
2386 {
2387     switch ( mode )
2388     {
2389     case 0:  return "Real";
2390     case 1:  return "v86";
2391     case 2:  return "16bit";
2392     case 4:  return "32bit";
2393     case 8:  return "64bit";
2394     default: return "Unknown";
2395     }
2396 }
2397 
hvm_dump_emulation_state(const char * loglvl,const char * prefix,struct hvm_emulate_ctxt * hvmemul_ctxt,int rc)2398 void hvm_dump_emulation_state(const char *loglvl, const char *prefix,
2399                               struct hvm_emulate_ctxt *hvmemul_ctxt, int rc)
2400 {
2401     struct vcpu *curr = current;
2402     const char *mode_str = guest_x86_mode_to_str(hvm_guest_x86_mode(curr));
2403     const struct segment_register *cs =
2404         hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt);
2405 
2406     printk("%s%s emulation failed (%d): %pv %s @ %04x:%08lx -> %*ph\n",
2407            loglvl, prefix, rc, curr, mode_str, cs->sel,
2408            hvmemul_ctxt->insn_buf_eip, hvmemul_ctxt->insn_buf_bytes,
2409            hvmemul_ctxt->insn_buf);
2410 }
2411 
2412 /*
2413  * Local variables:
2414  * mode: C
2415  * c-file-style: "BSD"
2416  * c-basic-offset: 4
2417  * tab-width: 4
2418  * indent-tabs-mode: nil
2419  * End:
2420  */
2421