1 /******************************************************************************
2 * hvm/emulate.c
3 *
4 * HVM instruction emulation. Used for MMIO and VMX real mode.
5 *
6 * Copyright (c) 2008, Citrix Systems, Inc.
7 *
8 * Authors:
9 * Keir Fraser <keir@xen.org>
10 */
11
12 #include <xen/init.h>
13 #include <xen/lib.h>
14 #include <xen/sched.h>
15 #include <xen/paging.h>
16 #include <xen/trace.h>
17 #include <xen/vm_event.h>
18 #include <asm/event.h>
19 #include <asm/i387.h>
20 #include <asm/xstate.h>
21 #include <asm/hvm/emulate.h>
22 #include <asm/hvm/hvm.h>
23 #include <asm/hvm/ioreq.h>
24 #include <asm/hvm/monitor.h>
25 #include <asm/hvm/trace.h>
26 #include <asm/hvm/support.h>
27 #include <asm/hvm/svm/svm.h>
28 #include <asm/vm_event.h>
29
hvmtrace_io_assist(const ioreq_t * p)30 static void hvmtrace_io_assist(const ioreq_t *p)
31 {
32 unsigned int size, event;
33 unsigned char buffer[12];
34
35 if ( likely(!tb_init_done) )
36 return;
37
38 if ( p->type == IOREQ_TYPE_COPY )
39 event = p->dir ? TRC_HVM_IOMEM_READ : TRC_HVM_IOMEM_WRITE;
40 else
41 event = p->dir ? TRC_HVM_IOPORT_READ : TRC_HVM_IOPORT_WRITE;
42
43 *(uint64_t *)buffer = p->addr;
44 size = (p->addr != (u32)p->addr) ? 8 : 4;
45 if ( size == 8 )
46 event |= TRC_64_FLAG;
47
48 if ( !p->data_is_ptr )
49 {
50 *(uint32_t *)&buffer[size] = p->data;
51 size += 4;
52 }
53
54 trace_var(event, 0/*!cycles*/, size, buffer);
55 }
56
null_read(const struct hvm_io_handler * io_handler,uint64_t addr,uint32_t size,uint64_t * data)57 static int null_read(const struct hvm_io_handler *io_handler,
58 uint64_t addr,
59 uint32_t size,
60 uint64_t *data)
61 {
62 *data = ~0ul;
63 return X86EMUL_OKAY;
64 }
65
null_write(const struct hvm_io_handler * handler,uint64_t addr,uint32_t size,uint64_t data)66 static int null_write(const struct hvm_io_handler *handler,
67 uint64_t addr,
68 uint32_t size,
69 uint64_t data)
70 {
71 return X86EMUL_OKAY;
72 }
73
set_context_data(void * buffer,unsigned int size)74 static int set_context_data(void *buffer, unsigned int size)
75 {
76 struct vcpu *curr = current;
77
78 if ( curr->arch.vm_event )
79 {
80 unsigned int safe_size =
81 min(size, curr->arch.vm_event->emul.read.size);
82
83 memcpy(buffer, curr->arch.vm_event->emul.read.data, safe_size);
84 memset(buffer + safe_size, 0, size - safe_size);
85 return X86EMUL_OKAY;
86 }
87
88 return X86EMUL_UNHANDLEABLE;
89 }
90
91 static const struct hvm_io_ops null_ops = {
92 .read = null_read,
93 .write = null_write
94 };
95
96 static const struct hvm_io_handler null_handler = {
97 .ops = &null_ops
98 };
99
ioreq_server_read(const struct hvm_io_handler * io_handler,uint64_t addr,uint32_t size,uint64_t * data)100 static int ioreq_server_read(const struct hvm_io_handler *io_handler,
101 uint64_t addr,
102 uint32_t size,
103 uint64_t *data)
104 {
105 if ( hvm_copy_from_guest_phys(data, addr, size) != HVMTRANS_okay )
106 return X86EMUL_UNHANDLEABLE;
107
108 return X86EMUL_OKAY;
109 }
110
111 static const struct hvm_io_ops ioreq_server_ops = {
112 .read = ioreq_server_read,
113 .write = null_write
114 };
115
116 static const struct hvm_io_handler ioreq_server_handler = {
117 .ops = &ioreq_server_ops
118 };
119
hvmemul_do_io(bool_t is_mmio,paddr_t addr,unsigned long * reps,unsigned int size,uint8_t dir,bool_t df,bool_t data_is_addr,uintptr_t data)120 static int hvmemul_do_io(
121 bool_t is_mmio, paddr_t addr, unsigned long *reps, unsigned int size,
122 uint8_t dir, bool_t df, bool_t data_is_addr, uintptr_t data)
123 {
124 struct vcpu *curr = current;
125 struct domain *currd = curr->domain;
126 struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
127 ioreq_t p = {
128 .type = is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO,
129 .addr = addr,
130 .size = size,
131 .count = *reps,
132 .dir = dir,
133 .df = df,
134 .data = data_is_addr ? data : 0,
135 .data_is_ptr = data_is_addr, /* ioreq_t field name is misleading */
136 .state = STATE_IOREQ_READY,
137 };
138 void *p_data = (void *)data;
139 int rc;
140
141 /*
142 * Weird-sized accesses have undefined behaviour: we discard writes
143 * and read all-ones.
144 */
145 if ( unlikely((size > sizeof(long)) || (size & (size - 1))) )
146 {
147 gdprintk(XENLOG_WARNING, "bad mmio size %d\n", size);
148 return X86EMUL_UNHANDLEABLE;
149 }
150
151 switch ( vio->io_req.state )
152 {
153 case STATE_IOREQ_NONE:
154 break;
155 case STATE_IORESP_READY:
156 vio->io_req.state = STATE_IOREQ_NONE;
157 p = vio->io_req;
158
159 /* Verify the emulation request has been correctly re-issued */
160 if ( (p.type != (is_mmio ? IOREQ_TYPE_COPY : IOREQ_TYPE_PIO)) ||
161 (p.addr != addr) ||
162 (p.size != size) ||
163 (p.count > *reps) ||
164 (p.dir != dir) ||
165 (p.df != df) ||
166 (p.data_is_ptr != data_is_addr) )
167 domain_crash(currd);
168
169 if ( data_is_addr )
170 return X86EMUL_UNHANDLEABLE;
171
172 *reps = p.count;
173 goto finish_access;
174 default:
175 return X86EMUL_UNHANDLEABLE;
176 }
177
178 if ( dir == IOREQ_WRITE )
179 {
180 if ( !data_is_addr )
181 memcpy(&p.data, p_data, size);
182
183 hvmtrace_io_assist(&p);
184 }
185
186 vio->io_req = p;
187
188 rc = hvm_io_intercept(&p);
189
190 /*
191 * p.count may have got reduced (see hvm_process_io_intercept()) - inform
192 * our callers and mirror this into latched state.
193 */
194 ASSERT(p.count <= *reps);
195 *reps = vio->io_req.count = p.count;
196
197 switch ( rc )
198 {
199 case X86EMUL_OKAY:
200 vio->io_req.state = STATE_IOREQ_NONE;
201 break;
202 case X86EMUL_UNHANDLEABLE:
203 {
204 /*
205 * Xen isn't emulating the instruction internally, so see if there's
206 * an ioreq server that can handle it.
207 *
208 * Rules:
209 * A> PIO or MMIO accesses run through hvm_select_ioreq_server() to
210 * choose the ioreq server by range. If no server is found, the access
211 * is ignored.
212 *
213 * B> p2m_ioreq_server accesses are handled by the designated
214 * ioreq server for the domain, but there are some corner cases:
215 *
216 * - If the domain ioreq server is NULL, it's likely we suffer from
217 * a race with an unmap operation on the ioreq server, so re-try the
218 * instruction.
219 *
220 * - If the accesss is a read, this could be part of a
221 * read-modify-write instruction, emulate the read first.
222 *
223 * Note: Even when an ioreq server is found, its value could become
224 * stale later, because it is possible that
225 *
226 * - the PIO or MMIO address is removed from the rangeset of the
227 * ioreq server, before the event is delivered to the device model.
228 *
229 * - the p2m_ioreq_server type is unmapped from the ioreq server,
230 * before the event is delivered to the device model.
231 *
232 * However, there's no cheap approach to avoid above situations in xen,
233 * so the device model side needs to check the incoming ioreq event.
234 */
235 struct hvm_ioreq_server *s = NULL;
236 p2m_type_t p2mt = p2m_invalid;
237
238 if ( is_mmio )
239 {
240 unsigned long gmfn = paddr_to_pfn(addr);
241
242 get_gfn_query_unlocked(currd, gmfn, &p2mt);
243
244 if ( p2mt == p2m_ioreq_server )
245 {
246 unsigned int flags;
247
248 s = p2m_get_ioreq_server(currd, &flags);
249
250 if ( s == NULL )
251 {
252 rc = X86EMUL_RETRY;
253 vio->io_req.state = STATE_IOREQ_NONE;
254 break;
255 }
256
257 /*
258 * This is part of a read-modify-write instruction.
259 * Emulate the read part so we have the value available.
260 */
261 if ( dir == IOREQ_READ )
262 {
263 rc = hvm_process_io_intercept(&ioreq_server_handler, &p);
264 vio->io_req.state = STATE_IOREQ_NONE;
265 break;
266 }
267 }
268 }
269
270 if ( !s )
271 s = hvm_select_ioreq_server(currd, &p);
272
273 /* If there is no suitable backing DM, just ignore accesses */
274 if ( !s )
275 {
276 rc = hvm_process_io_intercept(&null_handler, &p);
277 vio->io_req.state = STATE_IOREQ_NONE;
278 }
279 else
280 {
281 rc = hvm_send_ioreq(s, &p, 0);
282 if ( rc != X86EMUL_RETRY || currd->is_shutting_down )
283 vio->io_req.state = STATE_IOREQ_NONE;
284 else if ( data_is_addr )
285 rc = X86EMUL_OKAY;
286 }
287 break;
288 }
289 case X86EMUL_UNIMPLEMENTED:
290 ASSERT_UNREACHABLE();
291 /* Fall-through */
292 default:
293 BUG();
294 }
295
296 ASSERT(rc != X86EMUL_UNIMPLEMENTED);
297
298 if ( rc != X86EMUL_OKAY )
299 return rc;
300
301 finish_access:
302 if ( dir == IOREQ_READ )
303 {
304 hvmtrace_io_assist(&p);
305
306 if ( !data_is_addr )
307 memcpy(p_data, &p.data, size);
308 }
309
310 return X86EMUL_OKAY;
311 }
312
hvmemul_do_io_buffer(bool_t is_mmio,paddr_t addr,unsigned long * reps,unsigned int size,uint8_t dir,bool_t df,void * buffer)313 static int hvmemul_do_io_buffer(
314 bool_t is_mmio, paddr_t addr, unsigned long *reps, unsigned int size,
315 uint8_t dir, bool_t df, void *buffer)
316 {
317 int rc;
318
319 BUG_ON(buffer == NULL);
320
321 rc = hvmemul_do_io(is_mmio, addr, reps, size, dir, df, 0,
322 (uintptr_t)buffer);
323
324 ASSERT(rc != X86EMUL_UNIMPLEMENTED);
325
326 if ( rc == X86EMUL_UNHANDLEABLE && dir == IOREQ_READ )
327 memset(buffer, 0xff, size);
328
329 return rc;
330 }
331
hvmemul_acquire_page(unsigned long gmfn,struct page_info ** page)332 static int hvmemul_acquire_page(unsigned long gmfn, struct page_info **page)
333 {
334 struct domain *curr_d = current->domain;
335 p2m_type_t p2mt;
336
337 *page = get_page_from_gfn(curr_d, gmfn, &p2mt, P2M_UNSHARE);
338
339 if ( *page == NULL )
340 return X86EMUL_UNHANDLEABLE;
341
342 if ( p2m_is_paging(p2mt) )
343 {
344 put_page(*page);
345 p2m_mem_paging_populate(curr_d, gmfn);
346 return X86EMUL_RETRY;
347 }
348
349 if ( p2m_is_shared(p2mt) )
350 {
351 put_page(*page);
352 return X86EMUL_RETRY;
353 }
354
355 /* This code should not be reached if the gmfn is not RAM */
356 if ( p2m_is_mmio(p2mt) )
357 {
358 domain_crash(curr_d);
359
360 put_page(*page);
361 return X86EMUL_UNHANDLEABLE;
362 }
363
364 return X86EMUL_OKAY;
365 }
366
hvmemul_release_page(struct page_info * page)367 static inline void hvmemul_release_page(struct page_info *page)
368 {
369 put_page(page);
370 }
371
hvmemul_do_io_addr(bool_t is_mmio,paddr_t addr,unsigned long * reps,unsigned int size,uint8_t dir,bool_t df,paddr_t ram_gpa)372 static int hvmemul_do_io_addr(
373 bool_t is_mmio, paddr_t addr, unsigned long *reps,
374 unsigned int size, uint8_t dir, bool_t df, paddr_t ram_gpa)
375 {
376 struct vcpu *v = current;
377 unsigned long ram_gmfn = paddr_to_pfn(ram_gpa);
378 unsigned int page_off = ram_gpa & (PAGE_SIZE - 1);
379 struct page_info *ram_page[2];
380 unsigned int nr_pages = 0;
381 unsigned long count;
382 int rc;
383
384 rc = hvmemul_acquire_page(ram_gmfn, &ram_page[nr_pages]);
385 if ( rc != X86EMUL_OKAY )
386 goto out;
387
388 nr_pages++;
389
390 /* Detemine how many reps will fit within this page */
391 count = min_t(unsigned long,
392 *reps,
393 df ?
394 ((page_off + size - 1) & ~PAGE_MASK) / size :
395 (PAGE_SIZE - page_off) / size);
396
397 if ( count == 0 )
398 {
399 /*
400 * This access must span two pages, so grab a reference to
401 * the next page and do a single rep.
402 * It is safe to assume multiple pages are physically
403 * contiguous at this point as hvmemul_linear_to_phys() will
404 * ensure this is the case.
405 */
406 rc = hvmemul_acquire_page(df ? ram_gmfn - 1 : ram_gmfn + 1,
407 &ram_page[nr_pages]);
408 if ( rc != X86EMUL_OKAY )
409 goto out;
410
411 nr_pages++;
412 count = 1;
413 }
414
415 rc = hvmemul_do_io(is_mmio, addr, &count, size, dir, df, 1,
416 ram_gpa);
417
418 ASSERT(rc != X86EMUL_UNIMPLEMENTED);
419
420 if ( rc == X86EMUL_OKAY )
421 v->arch.hvm_vcpu.hvm_io.mmio_retry = (count < *reps);
422
423 *reps = count;
424
425 out:
426 while ( nr_pages )
427 hvmemul_release_page(ram_page[--nr_pages]);
428
429 return rc;
430 }
431
432 /*
433 * Perform I/O between <port> and <buffer>. <dir> indicates the
434 * direction: IOREQ_READ means a read from <port> to <buffer> and
435 * IOREQ_WRITE means a write from <buffer> to <port>. Each access has
436 * width <size>.
437 */
hvmemul_do_pio_buffer(uint16_t port,unsigned int size,uint8_t dir,void * buffer)438 int hvmemul_do_pio_buffer(uint16_t port,
439 unsigned int size,
440 uint8_t dir,
441 void *buffer)
442 {
443 unsigned long one_rep = 1;
444
445 return hvmemul_do_io_buffer(0, port, &one_rep, size, dir, 0, buffer);
446 }
447
448 /*
449 * Perform I/O between <port> and guest RAM starting at <ram_addr>.
450 * <dir> indicates the direction: IOREQ_READ means a read from <port> to
451 * RAM and IOREQ_WRITE means a write from RAM to <port>. Each access has
452 * width <size> and up to *<reps> accesses will be performed. If
453 * X86EMUL_OKAY is returned then <reps> will be updated with the number
454 * of accesses actually performed.
455 * Each access will be done to/from successive RAM addresses, increasing
456 * if <df> is 0 or decreasing if <df> is 1.
457 */
hvmemul_do_pio_addr(uint16_t port,unsigned long * reps,unsigned int size,uint8_t dir,bool_t df,paddr_t ram_addr)458 static int hvmemul_do_pio_addr(uint16_t port,
459 unsigned long *reps,
460 unsigned int size,
461 uint8_t dir,
462 bool_t df,
463 paddr_t ram_addr)
464 {
465 return hvmemul_do_io_addr(0, port, reps, size, dir, df, ram_addr);
466 }
467
468 /*
469 * Perform I/O between MMIO space starting at <mmio_gpa> and <buffer>.
470 * <dir> indicates the direction: IOREQ_READ means a read from MMIO to
471 * <buffer> and IOREQ_WRITE means a write from <buffer> to MMIO. Each
472 * access has width <size> and up to *<reps> accesses will be performed.
473 * If X86EMUL_OKAY is returned then <reps> will be updated with the number
474 * of accesses actually performed.
475 * Each access will be done to/from successive MMIO addresses, increasing
476 * if <df> is 0 or decreasing if <df> is 1.
477 *
478 * NOTE: If *<reps> is greater than 1, each access will use the
479 * <buffer> pointer; there is no implicit interation over a
480 * block of memory starting at <buffer>.
481 */
hvmemul_do_mmio_buffer(paddr_t mmio_gpa,unsigned long * reps,unsigned int size,uint8_t dir,bool_t df,void * buffer)482 static int hvmemul_do_mmio_buffer(paddr_t mmio_gpa,
483 unsigned long *reps,
484 unsigned int size,
485 uint8_t dir,
486 bool_t df,
487 void *buffer)
488 {
489 return hvmemul_do_io_buffer(1, mmio_gpa, reps, size, dir, df, buffer);
490 }
491
492 /*
493 * Perform I/O between MMIO space starting at <mmio_gpa> and guest RAM
494 * starting at <ram_gpa>. <dir> indicates the direction: IOREQ_READ
495 * means a read from MMIO to RAM and IOREQ_WRITE means a write from RAM
496 * to MMIO. Each access has width <size> and up to *<reps> accesses will
497 * be performed. If X86EMUL_OKAY is returned then <reps> will be updated
498 * with the number of accesses actually performed.
499 * Each access will be done to/from successive RAM *and* MMIO addresses,
500 * increasing if <df> is 0 or decreasing if <df> is 1.
501 */
hvmemul_do_mmio_addr(paddr_t mmio_gpa,unsigned long * reps,unsigned int size,uint8_t dir,bool_t df,paddr_t ram_gpa)502 static int hvmemul_do_mmio_addr(paddr_t mmio_gpa,
503 unsigned long *reps,
504 unsigned int size,
505 uint8_t dir,
506 bool_t df,
507 paddr_t ram_gpa)
508 {
509 return hvmemul_do_io_addr(1, mmio_gpa, reps, size, dir, df, ram_gpa);
510 }
511
512 /*
513 * Map the frame(s) covering an individual linear access, for writeable
514 * access. May return NULL for MMIO, or ERR_PTR(~X86EMUL_*) for other errors
515 * including ERR_PTR(~X86EMUL_OKAY) for write-discard mappings.
516 *
517 * In debug builds, map() checks that each slot in hvmemul_ctxt->mfn[] is
518 * clean before use, and poisions unused slots with INVALID_MFN.
519 */
hvmemul_map_linear_addr(unsigned long linear,unsigned int bytes,uint32_t pfec,struct hvm_emulate_ctxt * hvmemul_ctxt)520 static void *hvmemul_map_linear_addr(
521 unsigned long linear, unsigned int bytes, uint32_t pfec,
522 struct hvm_emulate_ctxt *hvmemul_ctxt)
523 {
524 struct vcpu *curr = current;
525 void *err, *mapping;
526 unsigned int nr_frames = ((linear + bytes - !!bytes) >> PAGE_SHIFT) -
527 (linear >> PAGE_SHIFT) + 1;
528 unsigned int i;
529
530 /*
531 * mfn points to the next free slot. All used slots have a page reference
532 * held on them.
533 */
534 mfn_t *mfn = &hvmemul_ctxt->mfn[0];
535
536 /*
537 * The caller has no legitimate reason for trying a zero-byte write, but
538 * all other code here is written to work if the check below was dropped.
539 *
540 * The maximum write size depends on the number of adjacent mfns[] which
541 * can be vmap()'d, accouting for possible misalignment within the region.
542 * The higher level emulation callers are responsible for ensuring that
543 * mfns[] is large enough for the requested write size.
544 */
545 if ( bytes == 0 ||
546 nr_frames > ARRAY_SIZE(hvmemul_ctxt->mfn) )
547 {
548 ASSERT_UNREACHABLE();
549 goto unhandleable;
550 }
551
552 for ( i = 0; i < nr_frames; i++ )
553 {
554 enum hvm_translation_result res;
555 struct page_info *page;
556 pagefault_info_t pfinfo;
557 p2m_type_t p2mt;
558 unsigned long addr = i ? (linear + (i << PAGE_SHIFT)) & PAGE_MASK : linear;
559
560 if ( hvmemul_ctxt->ctxt.addr_size < 64 )
561 addr = (uint32_t)addr;
562
563 /* Error checking. Confirm that the current slot is clean. */
564 ASSERT(mfn_x(*mfn) == 0);
565
566 res = hvm_translate_get_page(curr, addr, true, pfec,
567 &pfinfo, &page, NULL, &p2mt);
568
569 switch ( res )
570 {
571 case HVMTRANS_okay:
572 break;
573
574 case HVMTRANS_bad_linear_to_gfn:
575 ASSERT(pfinfo.linear == addr);
576 x86_emul_pagefault(pfinfo.ec, pfinfo.linear, &hvmemul_ctxt->ctxt);
577 err = ERR_PTR(~X86EMUL_EXCEPTION);
578 goto out;
579
580 case HVMTRANS_bad_gfn_to_mfn:
581 err = NULL;
582 goto out;
583
584 case HVMTRANS_gfn_paged_out:
585 case HVMTRANS_gfn_shared:
586 err = ERR_PTR(~X86EMUL_RETRY);
587 goto out;
588
589 default:
590 goto unhandleable;
591 }
592
593 *mfn++ = _mfn(page_to_mfn(page));
594
595 if ( p2m_is_discard_write(p2mt) )
596 {
597 err = ERR_PTR(~X86EMUL_OKAY);
598 goto out;
599 }
600 }
601
602 /* Entire access within a single frame? */
603 if ( nr_frames == 1 )
604 mapping = map_domain_page(hvmemul_ctxt->mfn[0]);
605 /* Multiple frames? Need to vmap(). */
606 else if ( (mapping = vmap(hvmemul_ctxt->mfn,
607 nr_frames)) == NULL )
608 goto unhandleable;
609
610 #ifndef NDEBUG /* Poision unused mfn[]s with INVALID_MFN. */
611 while ( mfn < hvmemul_ctxt->mfn + ARRAY_SIZE(hvmemul_ctxt->mfn) )
612 {
613 ASSERT(mfn_x(*mfn) == 0);
614 *mfn++ = INVALID_MFN;
615 }
616 #endif
617 return mapping + (linear & ~PAGE_MASK);
618
619 unhandleable:
620 err = ERR_PTR(~X86EMUL_UNHANDLEABLE);
621
622 out:
623 /* Drop all held references. */
624 while ( mfn-- > hvmemul_ctxt->mfn )
625 put_page(mfn_to_page(mfn_x(*mfn)));
626
627 return err;
628 }
629
hvmemul_unmap_linear_addr(void * mapping,unsigned long linear,unsigned int bytes,struct hvm_emulate_ctxt * hvmemul_ctxt)630 static void hvmemul_unmap_linear_addr(
631 void *mapping, unsigned long linear, unsigned int bytes,
632 struct hvm_emulate_ctxt *hvmemul_ctxt)
633 {
634 struct domain *currd = current->domain;
635 unsigned int nr_frames = ((linear + bytes - !!bytes) >> PAGE_SHIFT) -
636 (linear >> PAGE_SHIFT) + 1;
637 unsigned int i;
638 mfn_t *mfn = &hvmemul_ctxt->mfn[0];
639
640 ASSERT(bytes > 0);
641
642 if ( nr_frames == 1 )
643 unmap_domain_page(mapping);
644 else
645 vunmap(mapping);
646
647 for ( i = 0; i < nr_frames; i++ )
648 {
649 ASSERT(mfn_valid(*mfn));
650 paging_mark_dirty(currd, *mfn);
651 put_page(mfn_to_page(mfn_x(*mfn)));
652
653 *mfn++ = _mfn(0); /* Clean slot for map()'s error checking. */
654 }
655
656 #ifndef NDEBUG /* Check (and clean) all unused mfns. */
657 while ( mfn < hvmemul_ctxt->mfn + ARRAY_SIZE(hvmemul_ctxt->mfn) )
658 {
659 ASSERT(mfn_eq(*mfn, INVALID_MFN));
660 *mfn++ = _mfn(0);
661 }
662 #endif
663 }
664
665 /*
666 * Convert addr from linear to physical form, valid over the range
667 * [addr, addr + *reps * bytes_per_rep]. *reps is adjusted according to
668 * the valid computed range. It is always >0 when X86EMUL_OKAY is returned.
669 * @pfec indicates the access checks to be performed during page-table walks.
670 */
hvmemul_linear_to_phys(unsigned long addr,paddr_t * paddr,unsigned int bytes_per_rep,unsigned long * reps,uint32_t pfec,struct hvm_emulate_ctxt * hvmemul_ctxt)671 static int hvmemul_linear_to_phys(
672 unsigned long addr,
673 paddr_t *paddr,
674 unsigned int bytes_per_rep,
675 unsigned long *reps,
676 uint32_t pfec,
677 struct hvm_emulate_ctxt *hvmemul_ctxt)
678 {
679 struct vcpu *curr = current;
680 unsigned long pfn, npfn, done, todo, i, offset = addr & ~PAGE_MASK;
681 int reverse;
682
683 /*
684 * Clip repetitions to a sensible maximum. This avoids extensive looping in
685 * this function while still amortising the cost of I/O trap-and-emulate.
686 */
687 *reps = min_t(unsigned long, *reps, 4096);
688
689 /* With no paging it's easy: linear == physical. */
690 if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG) )
691 {
692 *paddr = addr;
693 return X86EMUL_OKAY;
694 }
695
696 /* Reverse mode if this is a backwards multi-iteration string operation. */
697 reverse = (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1);
698
699 if ( reverse && ((PAGE_SIZE - offset) < bytes_per_rep) )
700 {
701 /* Do page-straddling first iteration forwards via recursion. */
702 paddr_t _paddr;
703 unsigned long one_rep = 1;
704 int rc = hvmemul_linear_to_phys(
705 addr, &_paddr, bytes_per_rep, &one_rep, pfec, hvmemul_ctxt);
706 if ( rc != X86EMUL_OKAY )
707 return rc;
708 pfn = _paddr >> PAGE_SHIFT;
709 }
710 else if ( (pfn = paging_gva_to_gfn(curr, addr, &pfec)) == gfn_x(INVALID_GFN) )
711 {
712 if ( pfec & (PFEC_page_paged | PFEC_page_shared) )
713 return X86EMUL_RETRY;
714 *reps = 0;
715 x86_emul_pagefault(pfec, addr, &hvmemul_ctxt->ctxt);
716 return X86EMUL_EXCEPTION;
717 }
718
719 done = reverse ? bytes_per_rep + offset : PAGE_SIZE - offset;
720 todo = *reps * bytes_per_rep;
721 for ( i = 1; done < todo; i++ )
722 {
723 /* Get the next PFN in the range. */
724 addr += reverse ? -PAGE_SIZE : PAGE_SIZE;
725 npfn = paging_gva_to_gfn(curr, addr, &pfec);
726
727 /* Is it contiguous with the preceding PFNs? If not then we're done. */
728 if ( (npfn == gfn_x(INVALID_GFN)) ||
729 (npfn != (pfn + (reverse ? -i : i))) )
730 {
731 if ( pfec & (PFEC_page_paged | PFEC_page_shared) )
732 return X86EMUL_RETRY;
733 done /= bytes_per_rep;
734 if ( done == 0 )
735 {
736 ASSERT(!reverse);
737 if ( npfn != gfn_x(INVALID_GFN) )
738 return X86EMUL_UNHANDLEABLE;
739 *reps = 0;
740 x86_emul_pagefault(pfec, addr & PAGE_MASK, &hvmemul_ctxt->ctxt);
741 return X86EMUL_EXCEPTION;
742 }
743 *reps = done;
744 break;
745 }
746
747 done += PAGE_SIZE;
748 }
749
750 *paddr = ((paddr_t)pfn << PAGE_SHIFT) | offset;
751 return X86EMUL_OKAY;
752 }
753
754
hvmemul_virtual_to_linear(enum x86_segment seg,unsigned long offset,unsigned int bytes_per_rep,unsigned long * reps,enum hvm_access_type access_type,struct hvm_emulate_ctxt * hvmemul_ctxt,unsigned long * linear)755 static int hvmemul_virtual_to_linear(
756 enum x86_segment seg,
757 unsigned long offset,
758 unsigned int bytes_per_rep,
759 unsigned long *reps,
760 enum hvm_access_type access_type,
761 struct hvm_emulate_ctxt *hvmemul_ctxt,
762 unsigned long *linear)
763 {
764 struct segment_register *reg;
765 int okay;
766 unsigned long max_reps = 4096;
767
768 if ( seg == x86_seg_none )
769 {
770 *linear = offset;
771 return X86EMUL_OKAY;
772 }
773
774 /*
775 * If introspection has been enabled for this domain, and we're emulating
776 * becase a vm_reply asked us to (i.e. not doing regular IO) reps should
777 * be at most 1, since optimization might otherwise cause a single
778 * vm_event being triggered for repeated writes to a whole page.
779 */
780 if ( unlikely(current->domain->arch.mem_access_emulate_each_rep) &&
781 current->arch.vm_event->emulate_flags != 0 )
782 max_reps = 1;
783
784 /*
785 * Clip repetitions to avoid overflow when multiplying by @bytes_per_rep.
786 * The chosen maximum is very conservative but it's what we use in
787 * hvmemul_linear_to_phys() so there is no point in using a larger value.
788 */
789 *reps = min_t(unsigned long, *reps, max_reps);
790
791 reg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
792 if ( IS_ERR(reg) )
793 return -PTR_ERR(reg);
794
795 if ( (hvmemul_ctxt->ctxt.regs->eflags & X86_EFLAGS_DF) && (*reps > 1) )
796 {
797 /*
798 * x86_emulate() clips the repetition count to ensure we don't wrap
799 * the effective-address index register. Hence this assertion holds.
800 */
801 ASSERT(offset >= ((*reps - 1) * bytes_per_rep));
802 okay = hvm_virtual_to_linear_addr(
803 seg, reg, offset - (*reps - 1) * bytes_per_rep,
804 *reps * bytes_per_rep, access_type,
805 hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt), linear);
806 *linear += (*reps - 1) * bytes_per_rep;
807 if ( hvmemul_ctxt->ctxt.addr_size != 64 )
808 *linear = (uint32_t)*linear;
809 }
810 else
811 {
812 okay = hvm_virtual_to_linear_addr(
813 seg, reg, offset, *reps * bytes_per_rep, access_type,
814 hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt), linear);
815 }
816
817 if ( okay )
818 return X86EMUL_OKAY;
819
820 /* If this is a string operation, emulate each iteration separately. */
821 if ( *reps != 1 )
822 return X86EMUL_UNHANDLEABLE;
823
824 /*
825 * Leave exception injection to the caller for non-user segments: We
826 * neither know the exact error code to be used, nor can we easily
827 * determine the kind of exception (#GP or #TS) in that case.
828 */
829 *reps = 0;
830 if ( is_x86_user_segment(seg) )
831 x86_emul_hw_exception((seg == x86_seg_ss)
832 ? TRAP_stack_error
833 : TRAP_gp_fault, 0, &hvmemul_ctxt->ctxt);
834
835 return X86EMUL_EXCEPTION;
836 }
837
hvmemul_phys_mmio_access(struct hvm_mmio_cache * cache,paddr_t gpa,unsigned int size,uint8_t dir,uint8_t * buffer,unsigned int offset)838 static int hvmemul_phys_mmio_access(
839 struct hvm_mmio_cache *cache, paddr_t gpa, unsigned int size, uint8_t dir,
840 uint8_t *buffer, unsigned int offset)
841 {
842 unsigned long one_rep = 1;
843 unsigned int chunk;
844 int rc = X86EMUL_OKAY;
845
846 /* Accesses must fall within a page. */
847 BUG_ON((gpa & ~PAGE_MASK) + size > PAGE_SIZE);
848
849 /*
850 * hvmemul_do_io() cannot handle non-power-of-2 accesses or
851 * accesses larger than sizeof(long), so choose the highest power
852 * of 2 not exceeding sizeof(long) as the 'chunk' size.
853 */
854 ASSERT(size != 0);
855 chunk = 1u << (fls(size) - 1);
856 if ( chunk > sizeof (long) )
857 chunk = sizeof (long);
858
859 for ( ;; )
860 {
861 /* Have we already done this chunk? */
862 if ( offset < cache->size )
863 {
864 ASSERT((offset + chunk) <= cache->size);
865
866 if ( dir == IOREQ_READ )
867 memcpy(&buffer[offset], &cache->buffer[offset], chunk);
868 else if ( memcmp(&buffer[offset], &cache->buffer[offset], chunk) != 0 )
869 domain_crash(current->domain);
870 }
871 else
872 {
873 ASSERT(offset == cache->size);
874
875 rc = hvmemul_do_mmio_buffer(gpa, &one_rep, chunk, dir, 0,
876 &buffer[offset]);
877 if ( rc != X86EMUL_OKAY )
878 break;
879
880 /* Note that we have now done this chunk. */
881 memcpy(&cache->buffer[offset], &buffer[offset], chunk);
882 cache->size += chunk;
883 }
884
885 /* Advance to the next chunk. */
886 gpa += chunk;
887 offset += chunk;
888 size -= chunk;
889
890 if ( size == 0 )
891 break;
892
893 /*
894 * If the chunk now exceeds the remaining size, choose the next
895 * lowest power of 2 that will fit.
896 */
897 while ( chunk > size )
898 chunk >>= 1;
899 }
900
901 return rc;
902 }
903
904 /*
905 * Multi-cycle MMIO handling is based upon the assumption that emulation
906 * of the same instruction will not access the same MMIO region more
907 * than once. Hence we can deal with re-emulation (for secondary or
908 * subsequent cycles) by looking up the result or previous I/O in a
909 * cache indexed by linear MMIO address.
910 */
hvmemul_find_mmio_cache(struct hvm_vcpu_io * vio,unsigned long gla,uint8_t dir)911 static struct hvm_mmio_cache *hvmemul_find_mmio_cache(
912 struct hvm_vcpu_io *vio, unsigned long gla, uint8_t dir)
913 {
914 unsigned int i;
915 struct hvm_mmio_cache *cache;
916
917 for ( i = 0; i < vio->mmio_cache_count; i ++ )
918 {
919 cache = &vio->mmio_cache[i];
920
921 if ( gla == cache->gla &&
922 dir == cache->dir )
923 return cache;
924 }
925
926 i = vio->mmio_cache_count++;
927 if( i == ARRAY_SIZE(vio->mmio_cache) )
928 {
929 domain_crash(current->domain);
930 return NULL;
931 }
932
933 cache = &vio->mmio_cache[i];
934 memset(cache, 0, sizeof (*cache));
935
936 cache->gla = gla;
937 cache->dir = dir;
938
939 return cache;
940 }
941
latch_linear_to_phys(struct hvm_vcpu_io * vio,unsigned long gla,unsigned long gpa,bool_t write)942 static void latch_linear_to_phys(struct hvm_vcpu_io *vio, unsigned long gla,
943 unsigned long gpa, bool_t write)
944 {
945 if ( vio->mmio_access.gla_valid )
946 return;
947
948 vio->mmio_gla = gla & PAGE_MASK;
949 vio->mmio_gpfn = PFN_DOWN(gpa);
950 vio->mmio_access = (struct npfec){ .gla_valid = 1,
951 .read_access = 1,
952 .write_access = write };
953 }
954
hvmemul_linear_mmio_access(unsigned long gla,unsigned int size,uint8_t dir,void * buffer,uint32_t pfec,struct hvm_emulate_ctxt * hvmemul_ctxt,bool_t known_gpfn)955 static int hvmemul_linear_mmio_access(
956 unsigned long gla, unsigned int size, uint8_t dir, void *buffer,
957 uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt, bool_t known_gpfn)
958 {
959 struct hvm_vcpu_io *vio = ¤t->arch.hvm_vcpu.hvm_io;
960 unsigned long offset = gla & ~PAGE_MASK;
961 struct hvm_mmio_cache *cache = hvmemul_find_mmio_cache(vio, gla, dir);
962 unsigned int chunk, buffer_offset = 0;
963 paddr_t gpa;
964 unsigned long one_rep = 1;
965 int rc;
966
967 if ( cache == NULL )
968 return X86EMUL_UNHANDLEABLE;
969
970 chunk = min_t(unsigned int, size, PAGE_SIZE - offset);
971
972 if ( known_gpfn )
973 gpa = pfn_to_paddr(vio->mmio_gpfn) | offset;
974 else
975 {
976 rc = hvmemul_linear_to_phys(gla, &gpa, chunk, &one_rep, pfec,
977 hvmemul_ctxt);
978 if ( rc != X86EMUL_OKAY )
979 return rc;
980
981 latch_linear_to_phys(vio, gla, gpa, dir == IOREQ_WRITE);
982 }
983
984 for ( ;; )
985 {
986 rc = hvmemul_phys_mmio_access(cache, gpa, chunk, dir, buffer, buffer_offset);
987 if ( rc != X86EMUL_OKAY )
988 break;
989
990 gla += chunk;
991 buffer_offset += chunk;
992 size -= chunk;
993
994 if ( size == 0 )
995 break;
996
997 chunk = min_t(unsigned int, size, PAGE_SIZE);
998 rc = hvmemul_linear_to_phys(gla, &gpa, chunk, &one_rep, pfec,
999 hvmemul_ctxt);
1000 if ( rc != X86EMUL_OKAY )
1001 return rc;
1002 }
1003
1004 return rc;
1005 }
1006
hvmemul_linear_mmio_read(unsigned long gla,unsigned int size,void * buffer,uint32_t pfec,struct hvm_emulate_ctxt * hvmemul_ctxt,bool_t translate)1007 static inline int hvmemul_linear_mmio_read(
1008 unsigned long gla, unsigned int size, void *buffer,
1009 uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt,
1010 bool_t translate)
1011 {
1012 return hvmemul_linear_mmio_access(gla, size, IOREQ_READ, buffer,
1013 pfec, hvmemul_ctxt, translate);
1014 }
1015
hvmemul_linear_mmio_write(unsigned long gla,unsigned int size,void * buffer,uint32_t pfec,struct hvm_emulate_ctxt * hvmemul_ctxt,bool_t translate)1016 static inline int hvmemul_linear_mmio_write(
1017 unsigned long gla, unsigned int size, void *buffer,
1018 uint32_t pfec, struct hvm_emulate_ctxt *hvmemul_ctxt,
1019 bool_t translate)
1020 {
1021 return hvmemul_linear_mmio_access(gla, size, IOREQ_WRITE, buffer,
1022 pfec, hvmemul_ctxt, translate);
1023 }
1024
__hvmemul_read(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,enum hvm_access_type access_type,struct hvm_emulate_ctxt * hvmemul_ctxt)1025 static int __hvmemul_read(
1026 enum x86_segment seg,
1027 unsigned long offset,
1028 void *p_data,
1029 unsigned int bytes,
1030 enum hvm_access_type access_type,
1031 struct hvm_emulate_ctxt *hvmemul_ctxt)
1032 {
1033 struct vcpu *curr = current;
1034 pagefault_info_t pfinfo;
1035 unsigned long addr, reps = 1;
1036 uint32_t pfec = PFEC_page_present;
1037 struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
1038 int rc;
1039
1040 if ( is_x86_system_segment(seg) )
1041 pfec |= PFEC_implicit;
1042 else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1043 pfec |= PFEC_user_mode;
1044
1045 rc = hvmemul_virtual_to_linear(
1046 seg, offset, bytes, &reps, access_type, hvmemul_ctxt, &addr);
1047 if ( rc != X86EMUL_OKAY || !bytes )
1048 return rc;
1049 if ( ((access_type != hvm_access_insn_fetch
1050 ? vio->mmio_access.read_access
1051 : vio->mmio_access.insn_fetch)) &&
1052 (vio->mmio_gla == (addr & PAGE_MASK)) )
1053 return hvmemul_linear_mmio_read(addr, bytes, p_data, pfec, hvmemul_ctxt, 1);
1054
1055 rc = ((access_type == hvm_access_insn_fetch) ?
1056 hvm_fetch_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo) :
1057 hvm_copy_from_guest_linear(p_data, addr, bytes, pfec, &pfinfo));
1058
1059 switch ( rc )
1060 {
1061 case HVMTRANS_okay:
1062 break;
1063 case HVMTRANS_bad_linear_to_gfn:
1064 x86_emul_pagefault(pfinfo.ec, pfinfo.linear, &hvmemul_ctxt->ctxt);
1065 return X86EMUL_EXCEPTION;
1066 case HVMTRANS_bad_gfn_to_mfn:
1067 if ( access_type == hvm_access_insn_fetch )
1068 return X86EMUL_UNHANDLEABLE;
1069
1070 return hvmemul_linear_mmio_read(addr, bytes, p_data, pfec, hvmemul_ctxt, 0);
1071 case HVMTRANS_gfn_paged_out:
1072 case HVMTRANS_gfn_shared:
1073 return X86EMUL_RETRY;
1074 default:
1075 return X86EMUL_UNHANDLEABLE;
1076 }
1077
1078 return X86EMUL_OKAY;
1079 }
1080
hvmemul_read(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)1081 static int hvmemul_read(
1082 enum x86_segment seg,
1083 unsigned long offset,
1084 void *p_data,
1085 unsigned int bytes,
1086 struct x86_emulate_ctxt *ctxt)
1087 {
1088 struct hvm_emulate_ctxt *hvmemul_ctxt =
1089 container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1090
1091 if ( unlikely(hvmemul_ctxt->set_context) )
1092 return set_context_data(p_data, bytes);
1093
1094 return __hvmemul_read(
1095 seg, offset, p_data, bytes, hvm_access_read,
1096 container_of(ctxt, struct hvm_emulate_ctxt, ctxt));
1097 }
1098
hvmemul_insn_fetch(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)1099 int hvmemul_insn_fetch(
1100 enum x86_segment seg,
1101 unsigned long offset,
1102 void *p_data,
1103 unsigned int bytes,
1104 struct x86_emulate_ctxt *ctxt)
1105 {
1106 struct hvm_emulate_ctxt *hvmemul_ctxt =
1107 container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1108 /* Careful, as offset can wrap or truncate WRT insn_buf_eip. */
1109 uint8_t insn_off = offset - hvmemul_ctxt->insn_buf_eip;
1110
1111 /*
1112 * Fall back if requested bytes are not in the prefetch cache.
1113 * But always perform the (fake) read when bytes == 0.
1114 */
1115 if ( !bytes ||
1116 unlikely((insn_off + bytes) > hvmemul_ctxt->insn_buf_bytes) )
1117 {
1118 int rc = __hvmemul_read(seg, offset, p_data, bytes,
1119 hvm_access_insn_fetch, hvmemul_ctxt);
1120
1121 if ( rc == X86EMUL_OKAY && bytes )
1122 {
1123 /*
1124 * Will we overflow insn_buf[]? This shouldn't be able to happen,
1125 * which means something went wrong with instruction decoding...
1126 */
1127 if ( insn_off >= sizeof(hvmemul_ctxt->insn_buf) ||
1128 insn_off + bytes > sizeof(hvmemul_ctxt->insn_buf) )
1129 {
1130 ASSERT_UNREACHABLE();
1131 return X86EMUL_UNHANDLEABLE;
1132 }
1133
1134 memcpy(&hvmemul_ctxt->insn_buf[insn_off], p_data, bytes);
1135 hvmemul_ctxt->insn_buf_bytes = insn_off + bytes;
1136 }
1137
1138 return rc;
1139 }
1140
1141 /* Hit the cache. Simple memcpy. */
1142 memcpy(p_data, &hvmemul_ctxt->insn_buf[insn_off], bytes);
1143 return X86EMUL_OKAY;
1144 }
1145
hvmemul_write(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)1146 static int hvmemul_write(
1147 enum x86_segment seg,
1148 unsigned long offset,
1149 void *p_data,
1150 unsigned int bytes,
1151 struct x86_emulate_ctxt *ctxt)
1152 {
1153 struct hvm_emulate_ctxt *hvmemul_ctxt =
1154 container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1155 struct vcpu *curr = current;
1156 unsigned long addr, reps = 1;
1157 uint32_t pfec = PFEC_page_present | PFEC_write_access;
1158 struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
1159 int rc;
1160 void *mapping;
1161
1162 if ( is_x86_system_segment(seg) )
1163 pfec |= PFEC_implicit;
1164 else if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1165 pfec |= PFEC_user_mode;
1166
1167 rc = hvmemul_virtual_to_linear(
1168 seg, offset, bytes, &reps, hvm_access_write, hvmemul_ctxt, &addr);
1169 if ( rc != X86EMUL_OKAY || !bytes )
1170 return rc;
1171
1172 if ( vio->mmio_access.write_access &&
1173 (vio->mmio_gla == (addr & PAGE_MASK)) )
1174 return hvmemul_linear_mmio_write(addr, bytes, p_data, pfec, hvmemul_ctxt, 1);
1175
1176 mapping = hvmemul_map_linear_addr(addr, bytes, pfec, hvmemul_ctxt);
1177 if ( IS_ERR(mapping) )
1178 return ~PTR_ERR(mapping);
1179
1180 if ( !mapping )
1181 return hvmemul_linear_mmio_write(addr, bytes, p_data, pfec, hvmemul_ctxt, 0);
1182
1183 memcpy(mapping, p_data, bytes);
1184
1185 hvmemul_unmap_linear_addr(mapping, addr, bytes, hvmemul_ctxt);
1186
1187 return X86EMUL_OKAY;
1188 }
1189
hvmemul_write_discard(enum x86_segment seg,unsigned long offset,void * p_data,unsigned int bytes,struct x86_emulate_ctxt * ctxt)1190 static int hvmemul_write_discard(
1191 enum x86_segment seg,
1192 unsigned long offset,
1193 void *p_data,
1194 unsigned int bytes,
1195 struct x86_emulate_ctxt *ctxt)
1196 {
1197 /* Discarding the write. */
1198 return X86EMUL_OKAY;
1199 }
1200
hvmemul_rep_ins_discard(uint16_t src_port,enum x86_segment dst_seg,unsigned long dst_offset,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1201 static int hvmemul_rep_ins_discard(
1202 uint16_t src_port,
1203 enum x86_segment dst_seg,
1204 unsigned long dst_offset,
1205 unsigned int bytes_per_rep,
1206 unsigned long *reps,
1207 struct x86_emulate_ctxt *ctxt)
1208 {
1209 return X86EMUL_OKAY;
1210 }
1211
hvmemul_rep_movs_discard(enum x86_segment src_seg,unsigned long src_offset,enum x86_segment dst_seg,unsigned long dst_offset,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1212 static int hvmemul_rep_movs_discard(
1213 enum x86_segment src_seg,
1214 unsigned long src_offset,
1215 enum x86_segment dst_seg,
1216 unsigned long dst_offset,
1217 unsigned int bytes_per_rep,
1218 unsigned long *reps,
1219 struct x86_emulate_ctxt *ctxt)
1220 {
1221 return X86EMUL_OKAY;
1222 }
1223
hvmemul_rep_stos_discard(void * p_data,enum x86_segment seg,unsigned long offset,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1224 static int hvmemul_rep_stos_discard(
1225 void *p_data,
1226 enum x86_segment seg,
1227 unsigned long offset,
1228 unsigned int bytes_per_rep,
1229 unsigned long *reps,
1230 struct x86_emulate_ctxt *ctxt)
1231 {
1232 return X86EMUL_OKAY;
1233 }
1234
hvmemul_rep_outs_discard(enum x86_segment src_seg,unsigned long src_offset,uint16_t dst_port,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1235 static int hvmemul_rep_outs_discard(
1236 enum x86_segment src_seg,
1237 unsigned long src_offset,
1238 uint16_t dst_port,
1239 unsigned int bytes_per_rep,
1240 unsigned long *reps,
1241 struct x86_emulate_ctxt *ctxt)
1242 {
1243 return X86EMUL_OKAY;
1244 }
1245
hvmemul_cmpxchg_discard(enum x86_segment seg,unsigned long offset,void * p_old,void * p_new,unsigned int bytes,struct x86_emulate_ctxt * ctxt)1246 static int hvmemul_cmpxchg_discard(
1247 enum x86_segment seg,
1248 unsigned long offset,
1249 void *p_old,
1250 void *p_new,
1251 unsigned int bytes,
1252 struct x86_emulate_ctxt *ctxt)
1253 {
1254 return X86EMUL_OKAY;
1255 }
1256
hvmemul_read_io_discard(unsigned int port,unsigned int bytes,unsigned long * val,struct x86_emulate_ctxt * ctxt)1257 static int hvmemul_read_io_discard(
1258 unsigned int port,
1259 unsigned int bytes,
1260 unsigned long *val,
1261 struct x86_emulate_ctxt *ctxt)
1262 {
1263 return X86EMUL_OKAY;
1264 }
1265
hvmemul_write_io_discard(unsigned int port,unsigned int bytes,unsigned long val,struct x86_emulate_ctxt * ctxt)1266 static int hvmemul_write_io_discard(
1267 unsigned int port,
1268 unsigned int bytes,
1269 unsigned long val,
1270 struct x86_emulate_ctxt *ctxt)
1271 {
1272 return X86EMUL_OKAY;
1273 }
1274
hvmemul_write_msr_discard(unsigned int reg,uint64_t val,struct x86_emulate_ctxt * ctxt)1275 static int hvmemul_write_msr_discard(
1276 unsigned int reg,
1277 uint64_t val,
1278 struct x86_emulate_ctxt *ctxt)
1279 {
1280 return X86EMUL_OKAY;
1281 }
1282
hvmemul_wbinvd_discard(struct x86_emulate_ctxt * ctxt)1283 static int hvmemul_wbinvd_discard(
1284 struct x86_emulate_ctxt *ctxt)
1285 {
1286 return X86EMUL_OKAY;
1287 }
1288
hvmemul_cmpxchg(enum x86_segment seg,unsigned long offset,void * p_old,void * p_new,unsigned int bytes,struct x86_emulate_ctxt * ctxt)1289 static int hvmemul_cmpxchg(
1290 enum x86_segment seg,
1291 unsigned long offset,
1292 void *p_old,
1293 void *p_new,
1294 unsigned int bytes,
1295 struct x86_emulate_ctxt *ctxt)
1296 {
1297 /* Fix this in case the guest is really relying on r-m-w atomicity. */
1298 return hvmemul_write(seg, offset, p_new, bytes, ctxt);
1299 }
1300
hvmemul_validate(const struct x86_emulate_state * state,struct x86_emulate_ctxt * ctxt)1301 static int hvmemul_validate(
1302 const struct x86_emulate_state *state,
1303 struct x86_emulate_ctxt *ctxt)
1304 {
1305 const struct hvm_emulate_ctxt *hvmemul_ctxt =
1306 container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1307
1308 return !hvmemul_ctxt->validate || hvmemul_ctxt->validate(state, ctxt)
1309 ? X86EMUL_OKAY : X86EMUL_UNHANDLEABLE;
1310 }
1311
hvmemul_rep_ins(uint16_t src_port,enum x86_segment dst_seg,unsigned long dst_offset,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1312 static int hvmemul_rep_ins(
1313 uint16_t src_port,
1314 enum x86_segment dst_seg,
1315 unsigned long dst_offset,
1316 unsigned int bytes_per_rep,
1317 unsigned long *reps,
1318 struct x86_emulate_ctxt *ctxt)
1319 {
1320 struct hvm_emulate_ctxt *hvmemul_ctxt =
1321 container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1322 unsigned long addr;
1323 uint32_t pfec = PFEC_page_present | PFEC_write_access;
1324 paddr_t gpa;
1325 p2m_type_t p2mt;
1326 int rc;
1327
1328 rc = hvmemul_virtual_to_linear(
1329 dst_seg, dst_offset, bytes_per_rep, reps, hvm_access_write,
1330 hvmemul_ctxt, &addr);
1331 if ( rc != X86EMUL_OKAY )
1332 return rc;
1333
1334 if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1335 pfec |= PFEC_user_mode;
1336
1337 rc = hvmemul_linear_to_phys(
1338 addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
1339 if ( rc != X86EMUL_OKAY )
1340 return rc;
1341
1342 (void) get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
1343 if ( p2mt == p2m_mmio_direct || p2mt == p2m_mmio_dm )
1344 return X86EMUL_UNHANDLEABLE;
1345
1346 return hvmemul_do_pio_addr(src_port, reps, bytes_per_rep, IOREQ_READ,
1347 !!(ctxt->regs->eflags & X86_EFLAGS_DF), gpa);
1348 }
1349
hvmemul_rep_outs_set_context(enum x86_segment src_seg,unsigned long src_offset,uint16_t dst_port,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1350 static int hvmemul_rep_outs_set_context(
1351 enum x86_segment src_seg,
1352 unsigned long src_offset,
1353 uint16_t dst_port,
1354 unsigned int bytes_per_rep,
1355 unsigned long *reps,
1356 struct x86_emulate_ctxt *ctxt)
1357 {
1358 unsigned int bytes = *reps * bytes_per_rep;
1359 char *buf;
1360 int rc;
1361
1362 buf = xmalloc_array(char, bytes);
1363
1364 if ( buf == NULL )
1365 return X86EMUL_UNHANDLEABLE;
1366
1367 rc = set_context_data(buf, bytes);
1368
1369 if ( rc == X86EMUL_OKAY )
1370 rc = hvmemul_do_pio_buffer(dst_port, bytes, IOREQ_WRITE, buf);
1371
1372 xfree(buf);
1373
1374 return rc;
1375 }
1376
hvmemul_rep_outs(enum x86_segment src_seg,unsigned long src_offset,uint16_t dst_port,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1377 static int hvmemul_rep_outs(
1378 enum x86_segment src_seg,
1379 unsigned long src_offset,
1380 uint16_t dst_port,
1381 unsigned int bytes_per_rep,
1382 unsigned long *reps,
1383 struct x86_emulate_ctxt *ctxt)
1384 {
1385 struct hvm_emulate_ctxt *hvmemul_ctxt =
1386 container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1387 unsigned long addr;
1388 uint32_t pfec = PFEC_page_present;
1389 paddr_t gpa;
1390 p2m_type_t p2mt;
1391 int rc;
1392
1393 if ( unlikely(hvmemul_ctxt->set_context) )
1394 return hvmemul_rep_outs_set_context(src_seg, src_offset, dst_port,
1395 bytes_per_rep, reps, ctxt);
1396
1397 rc = hvmemul_virtual_to_linear(
1398 src_seg, src_offset, bytes_per_rep, reps, hvm_access_read,
1399 hvmemul_ctxt, &addr);
1400 if ( rc != X86EMUL_OKAY )
1401 return rc;
1402
1403 if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1404 pfec |= PFEC_user_mode;
1405
1406 rc = hvmemul_linear_to_phys(
1407 addr, &gpa, bytes_per_rep, reps, pfec, hvmemul_ctxt);
1408 if ( rc != X86EMUL_OKAY )
1409 return rc;
1410
1411 (void) get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
1412 if ( p2mt == p2m_mmio_direct || p2mt == p2m_mmio_dm )
1413 return X86EMUL_UNHANDLEABLE;
1414
1415 return hvmemul_do_pio_addr(dst_port, reps, bytes_per_rep, IOREQ_WRITE,
1416 !!(ctxt->regs->eflags & X86_EFLAGS_DF), gpa);
1417 }
1418
hvmemul_rep_movs(enum x86_segment src_seg,unsigned long src_offset,enum x86_segment dst_seg,unsigned long dst_offset,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1419 static int hvmemul_rep_movs(
1420 enum x86_segment src_seg,
1421 unsigned long src_offset,
1422 enum x86_segment dst_seg,
1423 unsigned long dst_offset,
1424 unsigned int bytes_per_rep,
1425 unsigned long *reps,
1426 struct x86_emulate_ctxt *ctxt)
1427 {
1428 struct hvm_emulate_ctxt *hvmemul_ctxt =
1429 container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1430 struct hvm_vcpu_io *vio = ¤t->arch.hvm_vcpu.hvm_io;
1431 unsigned long saddr, daddr, bytes;
1432 paddr_t sgpa, dgpa;
1433 uint32_t pfec = PFEC_page_present;
1434 p2m_type_t sp2mt, dp2mt;
1435 int rc, df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
1436 char *buf;
1437
1438 rc = hvmemul_virtual_to_linear(
1439 src_seg, src_offset, bytes_per_rep, reps, hvm_access_read,
1440 hvmemul_ctxt, &saddr);
1441 if ( rc != X86EMUL_OKAY )
1442 return rc;
1443
1444 rc = hvmemul_virtual_to_linear(
1445 dst_seg, dst_offset, bytes_per_rep, reps, hvm_access_write,
1446 hvmemul_ctxt, &daddr);
1447 if ( rc != X86EMUL_OKAY )
1448 return rc;
1449
1450 if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1451 pfec |= PFEC_user_mode;
1452
1453 if ( vio->mmio_access.read_access &&
1454 (vio->mmio_gla == (saddr & PAGE_MASK)) &&
1455 /*
1456 * Upon initial invocation don't truncate large batches just because
1457 * of a hit for the translation: Doing the guest page table walk is
1458 * cheaper than multiple round trips through the device model. Yet
1459 * when processing a response we can always re-use the translation.
1460 */
1461 (vio->io_req.state == STATE_IORESP_READY ||
1462 ((!df || *reps == 1) &&
1463 PAGE_SIZE - (saddr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
1464 sgpa = pfn_to_paddr(vio->mmio_gpfn) | (saddr & ~PAGE_MASK);
1465 else
1466 {
1467 rc = hvmemul_linear_to_phys(saddr, &sgpa, bytes_per_rep, reps, pfec,
1468 hvmemul_ctxt);
1469 if ( rc != X86EMUL_OKAY )
1470 return rc;
1471 }
1472
1473 bytes = PAGE_SIZE - (daddr & ~PAGE_MASK);
1474 if ( vio->mmio_access.write_access &&
1475 (vio->mmio_gla == (daddr & PAGE_MASK)) &&
1476 /* See comment above. */
1477 (vio->io_req.state == STATE_IORESP_READY ||
1478 ((!df || *reps == 1) &&
1479 PAGE_SIZE - (daddr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
1480 dgpa = pfn_to_paddr(vio->mmio_gpfn) | (daddr & ~PAGE_MASK);
1481 else
1482 {
1483 rc = hvmemul_linear_to_phys(daddr, &dgpa, bytes_per_rep, reps,
1484 pfec | PFEC_write_access, hvmemul_ctxt);
1485 if ( rc != X86EMUL_OKAY )
1486 return rc;
1487 }
1488
1489 /* Check for MMIO ops */
1490 (void) get_gfn_query_unlocked(current->domain, sgpa >> PAGE_SHIFT, &sp2mt);
1491 (void) get_gfn_query_unlocked(current->domain, dgpa >> PAGE_SHIFT, &dp2mt);
1492
1493 if ( sp2mt == p2m_mmio_direct || dp2mt == p2m_mmio_direct ||
1494 (sp2mt == p2m_mmio_dm && dp2mt == p2m_mmio_dm) )
1495 return X86EMUL_UNHANDLEABLE;
1496
1497 if ( sp2mt == p2m_mmio_dm )
1498 {
1499 latch_linear_to_phys(vio, saddr, sgpa, 0);
1500 return hvmemul_do_mmio_addr(
1501 sgpa, reps, bytes_per_rep, IOREQ_READ, df, dgpa);
1502 }
1503
1504 if ( dp2mt == p2m_mmio_dm )
1505 {
1506 latch_linear_to_phys(vio, daddr, dgpa, 1);
1507 return hvmemul_do_mmio_addr(
1508 dgpa, reps, bytes_per_rep, IOREQ_WRITE, df, sgpa);
1509 }
1510
1511 /* RAM-to-RAM copy: emulate as equivalent of memmove(dgpa, sgpa, bytes). */
1512 bytes = *reps * bytes_per_rep;
1513
1514 /* Adjust source address for reverse copy. */
1515 if ( df )
1516 sgpa -= bytes - bytes_per_rep;
1517
1518 /*
1519 * Will first iteration copy fall within source range? If not then entire
1520 * copy does not corrupt itself. If so, then this is more complex than
1521 * can be emulated by a source-to-buffer-to-destination block copy.
1522 */
1523 if ( ((dgpa + bytes_per_rep) > sgpa) && (dgpa < (sgpa + bytes)) )
1524 return X86EMUL_UNHANDLEABLE;
1525
1526 /* Adjust destination address for reverse copy. */
1527 if ( df )
1528 dgpa -= bytes - bytes_per_rep;
1529
1530 /* Allocate temporary buffer. Fall back to slow emulation if this fails. */
1531 buf = xmalloc_bytes(bytes);
1532 if ( buf == NULL )
1533 return X86EMUL_UNHANDLEABLE;
1534
1535 if ( unlikely(hvmemul_ctxt->set_context) )
1536 {
1537 rc = set_context_data(buf, bytes);
1538
1539 if ( rc != X86EMUL_OKAY)
1540 {
1541 xfree(buf);
1542 return rc;
1543 }
1544
1545 rc = HVMTRANS_okay;
1546 }
1547 else
1548 /*
1549 * We do a modicum of checking here, just for paranoia's sake and to
1550 * definitely avoid copying an unitialised buffer into guest address
1551 * space.
1552 */
1553 rc = hvm_copy_from_guest_phys(buf, sgpa, bytes);
1554
1555 if ( rc == HVMTRANS_okay )
1556 rc = hvm_copy_to_guest_phys(dgpa, buf, bytes, current);
1557
1558 xfree(buf);
1559
1560 if ( rc == HVMTRANS_gfn_paged_out )
1561 return X86EMUL_RETRY;
1562 if ( rc == HVMTRANS_gfn_shared )
1563 return X86EMUL_RETRY;
1564 if ( rc != HVMTRANS_okay )
1565 {
1566 gdprintk(XENLOG_WARNING, "Failed memory-to-memory REP MOVS: sgpa=%"
1567 PRIpaddr" dgpa=%"PRIpaddr" reps=%lu bytes_per_rep=%u\n",
1568 sgpa, dgpa, *reps, bytes_per_rep);
1569 return X86EMUL_UNHANDLEABLE;
1570 }
1571
1572 return X86EMUL_OKAY;
1573 }
1574
hvmemul_rep_stos(void * p_data,enum x86_segment seg,unsigned long offset,unsigned int bytes_per_rep,unsigned long * reps,struct x86_emulate_ctxt * ctxt)1575 static int hvmemul_rep_stos(
1576 void *p_data,
1577 enum x86_segment seg,
1578 unsigned long offset,
1579 unsigned int bytes_per_rep,
1580 unsigned long *reps,
1581 struct x86_emulate_ctxt *ctxt)
1582 {
1583 struct hvm_emulate_ctxt *hvmemul_ctxt =
1584 container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1585 struct hvm_vcpu_io *vio = ¤t->arch.hvm_vcpu.hvm_io;
1586 unsigned long addr, bytes;
1587 paddr_t gpa;
1588 p2m_type_t p2mt;
1589 bool_t df = !!(ctxt->regs->eflags & X86_EFLAGS_DF);
1590 int rc = hvmemul_virtual_to_linear(seg, offset, bytes_per_rep, reps,
1591 hvm_access_write, hvmemul_ctxt, &addr);
1592
1593 if ( rc != X86EMUL_OKAY )
1594 return rc;
1595
1596 bytes = PAGE_SIZE - (addr & ~PAGE_MASK);
1597 if ( vio->mmio_access.write_access &&
1598 (vio->mmio_gla == (addr & PAGE_MASK)) &&
1599 /* See respective comment in MOVS processing. */
1600 (vio->io_req.state == STATE_IORESP_READY ||
1601 ((!df || *reps == 1) &&
1602 PAGE_SIZE - (addr & ~PAGE_MASK) >= *reps * bytes_per_rep)) )
1603 gpa = pfn_to_paddr(vio->mmio_gpfn) | (addr & ~PAGE_MASK);
1604 else
1605 {
1606 uint32_t pfec = PFEC_page_present | PFEC_write_access;
1607
1608 if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
1609 pfec |= PFEC_user_mode;
1610
1611 rc = hvmemul_linear_to_phys(addr, &gpa, bytes_per_rep, reps, pfec,
1612 hvmemul_ctxt);
1613 if ( rc != X86EMUL_OKAY )
1614 return rc;
1615 }
1616
1617 /* Check for MMIO op */
1618 (void)get_gfn_query_unlocked(current->domain, gpa >> PAGE_SHIFT, &p2mt);
1619
1620 switch ( p2mt )
1621 {
1622 unsigned long bytes;
1623 void *buf;
1624
1625 default:
1626 /* Allocate temporary buffer. */
1627 for ( ; ; )
1628 {
1629 bytes = *reps * bytes_per_rep;
1630 buf = xmalloc_bytes(bytes);
1631 if ( buf || *reps <= 1 )
1632 break;
1633 *reps >>= 1;
1634 }
1635
1636 if ( !buf )
1637 buf = p_data;
1638 else
1639 switch ( bytes_per_rep )
1640 {
1641 unsigned long dummy;
1642
1643 #define CASE(bits, suffix) \
1644 case (bits) / 8: \
1645 asm ( "rep stos" #suffix \
1646 : "=m" (*(char (*)[bytes])buf), \
1647 "=D" (dummy), "=c" (dummy) \
1648 : "a" (*(const uint##bits##_t *)p_data), \
1649 "1" (buf), "2" (*reps) ); \
1650 break
1651 CASE(8, b);
1652 CASE(16, w);
1653 CASE(32, l);
1654 CASE(64, q);
1655 #undef CASE
1656
1657 default:
1658 ASSERT_UNREACHABLE();
1659 xfree(buf);
1660 return X86EMUL_UNHANDLEABLE;
1661 }
1662
1663 /* Adjust address for reverse store. */
1664 if ( df )
1665 gpa -= bytes - bytes_per_rep;
1666
1667 rc = hvm_copy_to_guest_phys(gpa, buf, bytes, current);
1668
1669 if ( buf != p_data )
1670 xfree(buf);
1671
1672 switch ( rc )
1673 {
1674 case HVMTRANS_gfn_paged_out:
1675 case HVMTRANS_gfn_shared:
1676 return X86EMUL_RETRY;
1677 case HVMTRANS_okay:
1678 return X86EMUL_OKAY;
1679 }
1680
1681 gdprintk(XENLOG_WARNING,
1682 "Failed REP STOS: gpa=%"PRIpaddr" reps=%lu bytes_per_rep=%u\n",
1683 gpa, *reps, bytes_per_rep);
1684 /* fall through */
1685 case p2m_mmio_direct:
1686 return X86EMUL_UNHANDLEABLE;
1687
1688 case p2m_mmio_dm:
1689 latch_linear_to_phys(vio, addr, gpa, 1);
1690 return hvmemul_do_mmio_buffer(gpa, reps, bytes_per_rep, IOREQ_WRITE, df,
1691 p_data);
1692 }
1693 }
1694
hvmemul_read_segment(enum x86_segment seg,struct segment_register * reg,struct x86_emulate_ctxt * ctxt)1695 static int hvmemul_read_segment(
1696 enum x86_segment seg,
1697 struct segment_register *reg,
1698 struct x86_emulate_ctxt *ctxt)
1699 {
1700 struct hvm_emulate_ctxt *hvmemul_ctxt =
1701 container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1702 struct segment_register *sreg = hvmemul_get_seg_reg(seg, hvmemul_ctxt);
1703
1704 if ( IS_ERR(sreg) )
1705 return -PTR_ERR(sreg);
1706
1707 *reg = *sreg;
1708
1709 return X86EMUL_OKAY;
1710 }
1711
hvmemul_write_segment(enum x86_segment seg,const struct segment_register * reg,struct x86_emulate_ctxt * ctxt)1712 static int hvmemul_write_segment(
1713 enum x86_segment seg,
1714 const struct segment_register *reg,
1715 struct x86_emulate_ctxt *ctxt)
1716 {
1717 struct hvm_emulate_ctxt *hvmemul_ctxt =
1718 container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1719 unsigned int idx = seg;
1720
1721 if ( idx >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
1722 return X86EMUL_UNHANDLEABLE;
1723
1724 hvmemul_ctxt->seg_reg[idx] = *reg;
1725 __set_bit(idx, &hvmemul_ctxt->seg_reg_accessed);
1726 __set_bit(idx, &hvmemul_ctxt->seg_reg_dirty);
1727
1728 return X86EMUL_OKAY;
1729 }
1730
hvmemul_read_io(unsigned int port,unsigned int bytes,unsigned long * val,struct x86_emulate_ctxt * ctxt)1731 static int hvmemul_read_io(
1732 unsigned int port,
1733 unsigned int bytes,
1734 unsigned long *val,
1735 struct x86_emulate_ctxt *ctxt)
1736 {
1737 struct hvm_emulate_ctxt *hvmemul_ctxt =
1738 container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
1739
1740 *val = 0;
1741
1742 if ( unlikely(hvmemul_ctxt->set_context) )
1743 return set_context_data(val, bytes);
1744
1745 return hvmemul_do_pio_buffer(port, bytes, IOREQ_READ, val);
1746 }
1747
hvmemul_write_io(unsigned int port,unsigned int bytes,unsigned long val,struct x86_emulate_ctxt * ctxt)1748 static int hvmemul_write_io(
1749 unsigned int port,
1750 unsigned int bytes,
1751 unsigned long val,
1752 struct x86_emulate_ctxt *ctxt)
1753 {
1754 return hvmemul_do_pio_buffer(port, bytes, IOREQ_WRITE, &val);
1755 }
1756
hvmemul_read_cr(unsigned int reg,unsigned long * val,struct x86_emulate_ctxt * ctxt)1757 static int hvmemul_read_cr(
1758 unsigned int reg,
1759 unsigned long *val,
1760 struct x86_emulate_ctxt *ctxt)
1761 {
1762 switch ( reg )
1763 {
1764 case 0:
1765 case 2:
1766 case 3:
1767 case 4:
1768 *val = current->arch.hvm_vcpu.guest_cr[reg];
1769 HVMTRACE_LONG_2D(CR_READ, reg, TRC_PAR_LONG(*val));
1770 return X86EMUL_OKAY;
1771 default:
1772 break;
1773 }
1774
1775 return X86EMUL_UNHANDLEABLE;
1776 }
1777
hvmemul_write_cr(unsigned int reg,unsigned long val,struct x86_emulate_ctxt * ctxt)1778 static int hvmemul_write_cr(
1779 unsigned int reg,
1780 unsigned long val,
1781 struct x86_emulate_ctxt *ctxt)
1782 {
1783 int rc;
1784
1785 HVMTRACE_LONG_2D(CR_WRITE, reg, TRC_PAR_LONG(val));
1786 switch ( reg )
1787 {
1788 case 0:
1789 rc = hvm_set_cr0(val, 1);
1790 break;
1791
1792 case 2:
1793 current->arch.hvm_vcpu.guest_cr[2] = val;
1794 rc = X86EMUL_OKAY;
1795 break;
1796
1797 case 3:
1798 rc = hvm_set_cr3(val, 1);
1799 break;
1800
1801 case 4:
1802 rc = hvm_set_cr4(val, 1);
1803 break;
1804
1805 default:
1806 rc = X86EMUL_UNHANDLEABLE;
1807 break;
1808 }
1809
1810 if ( rc == X86EMUL_EXCEPTION )
1811 x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
1812
1813 return rc;
1814 }
1815
hvmemul_read_msr(unsigned int reg,uint64_t * val,struct x86_emulate_ctxt * ctxt)1816 static int hvmemul_read_msr(
1817 unsigned int reg,
1818 uint64_t *val,
1819 struct x86_emulate_ctxt *ctxt)
1820 {
1821 int rc = hvm_msr_read_intercept(reg, val);
1822
1823 if ( rc == X86EMUL_EXCEPTION )
1824 x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
1825
1826 return rc;
1827 }
1828
hvmemul_write_msr(unsigned int reg,uint64_t val,struct x86_emulate_ctxt * ctxt)1829 static int hvmemul_write_msr(
1830 unsigned int reg,
1831 uint64_t val,
1832 struct x86_emulate_ctxt *ctxt)
1833 {
1834 int rc = hvm_msr_write_intercept(reg, val, 1);
1835
1836 if ( rc == X86EMUL_EXCEPTION )
1837 x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
1838
1839 return rc;
1840 }
1841
hvmemul_wbinvd(struct x86_emulate_ctxt * ctxt)1842 static int hvmemul_wbinvd(
1843 struct x86_emulate_ctxt *ctxt)
1844 {
1845 hvm_funcs.wbinvd_intercept();
1846 return X86EMUL_OKAY;
1847 }
1848
hvmemul_cpuid(uint32_t leaf,uint32_t subleaf,struct cpuid_leaf * res,struct x86_emulate_ctxt * ctxt)1849 int hvmemul_cpuid(uint32_t leaf, uint32_t subleaf,
1850 struct cpuid_leaf *res, struct x86_emulate_ctxt *ctxt)
1851 {
1852 guest_cpuid(current, leaf, subleaf, res);
1853 return X86EMUL_OKAY;
1854 }
1855
hvmemul_get_fpu(void (* exception_callback)(void *,struct cpu_user_regs *),void * exception_callback_arg,enum x86_emulate_fpu_type type,struct x86_emulate_ctxt * ctxt)1856 static int hvmemul_get_fpu(
1857 void (*exception_callback)(void *, struct cpu_user_regs *),
1858 void *exception_callback_arg,
1859 enum x86_emulate_fpu_type type,
1860 struct x86_emulate_ctxt *ctxt)
1861 {
1862 struct vcpu *curr = current;
1863
1864 switch ( type )
1865 {
1866 case X86EMUL_FPU_fpu:
1867 case X86EMUL_FPU_wait:
1868 case X86EMUL_FPU_mmx:
1869 case X86EMUL_FPU_xmm:
1870 break;
1871 case X86EMUL_FPU_ymm:
1872 if ( !(curr->arch.xcr0 & XSTATE_SSE) ||
1873 !(curr->arch.xcr0 & XSTATE_YMM) )
1874 return X86EMUL_UNHANDLEABLE;
1875 break;
1876 default:
1877 return X86EMUL_UNHANDLEABLE;
1878 }
1879
1880 if ( !curr->fpu_dirtied )
1881 hvm_funcs.fpu_dirty_intercept();
1882 else if ( type == X86EMUL_FPU_fpu )
1883 {
1884 const typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt =
1885 curr->arch.fpu_ctxt;
1886
1887 /*
1888 * Latch current register state so that we can back out changes
1889 * if needed (namely when a memory write fails after register state
1890 * has already been updated).
1891 * NB: We don't really need the "enable" part of the called function
1892 * (->fpu_dirtied set implies CR0.TS clear), but the additional
1893 * overhead should be low enough to not warrant introduction of yet
1894 * another slightly different function. However, we need to undo the
1895 * ->fpu_dirtied clearing the function does as well as the possible
1896 * masking of all exceptions by FNSTENV.)
1897 */
1898 save_fpu_enable();
1899 curr->fpu_dirtied = true;
1900 if ( (fpu_ctxt->fcw & 0x3f) != 0x3f )
1901 {
1902 uint16_t fcw;
1903
1904 asm ( "fnstcw %0" : "=m" (fcw) );
1905 if ( (fcw & 0x3f) == 0x3f )
1906 asm ( "fldcw %0" :: "m" (fpu_ctxt->fcw) );
1907 else
1908 ASSERT(fcw == fpu_ctxt->fcw);
1909 }
1910 }
1911
1912 curr->arch.hvm_vcpu.fpu_exception_callback = exception_callback;
1913 curr->arch.hvm_vcpu.fpu_exception_callback_arg = exception_callback_arg;
1914
1915 return X86EMUL_OKAY;
1916 }
1917
hvmemul_put_fpu(struct x86_emulate_ctxt * ctxt,enum x86_emulate_fpu_type backout,const struct x86_emul_fpu_aux * aux)1918 static void hvmemul_put_fpu(
1919 struct x86_emulate_ctxt *ctxt,
1920 enum x86_emulate_fpu_type backout,
1921 const struct x86_emul_fpu_aux *aux)
1922 {
1923 struct vcpu *curr = current;
1924
1925 curr->arch.hvm_vcpu.fpu_exception_callback = NULL;
1926
1927 if ( aux )
1928 {
1929 typeof(curr->arch.xsave_area->fpu_sse) *fpu_ctxt = curr->arch.fpu_ctxt;
1930 bool dval = aux->dval;
1931 int mode = hvm_guest_x86_mode(curr);
1932
1933 ASSERT(backout == X86EMUL_FPU_none);
1934 /*
1935 * Latch current register state so that we can replace FIP/FDP/FOP
1936 * (which have values resulting from our own invocation of the FPU
1937 * instruction during emulation).
1938 * NB: See also the comment in hvmemul_get_fpu(); we don't need to
1939 * set ->fpu_dirtied here as it is going to be cleared below, and
1940 * we also don't need to reload FCW as we're forcing full state to
1941 * be reloaded anyway.
1942 */
1943 save_fpu_enable();
1944
1945 if ( boot_cpu_has(X86_FEATURE_FDP_EXCP_ONLY) &&
1946 !(fpu_ctxt->fsw & ~fpu_ctxt->fcw & 0x003f) )
1947 dval = false;
1948
1949 switch ( mode )
1950 {
1951 case 8:
1952 fpu_ctxt->fip.addr = aux->ip;
1953 if ( dval )
1954 fpu_ctxt->fdp.addr = aux->dp;
1955 fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = 8;
1956 break;
1957
1958 case 4: case 2:
1959 fpu_ctxt->fip.offs = aux->ip;
1960 fpu_ctxt->fip.sel = aux->cs;
1961 if ( dval )
1962 {
1963 fpu_ctxt->fdp.offs = aux->dp;
1964 fpu_ctxt->fdp.sel = aux->ds;
1965 }
1966 fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = mode;
1967 break;
1968
1969 case 0: case 1:
1970 fpu_ctxt->fip.addr = aux->ip | (aux->cs << 4);
1971 if ( dval )
1972 fpu_ctxt->fdp.addr = aux->dp | (aux->ds << 4);
1973 fpu_ctxt->x[FPU_WORD_SIZE_OFFSET] = 2;
1974 break;
1975
1976 default:
1977 ASSERT_UNREACHABLE();
1978 return;
1979 }
1980
1981 fpu_ctxt->fop = aux->op;
1982
1983 /* Re-use backout code below. */
1984 backout = X86EMUL_FPU_fpu;
1985 }
1986
1987 if ( backout == X86EMUL_FPU_fpu )
1988 {
1989 /*
1990 * To back out changes to the register file simply adjust state such
1991 * that upon next FPU insn use by the guest we'll reload the state
1992 * saved (or freshly loaded) by hvmemul_get_fpu().
1993 */
1994 curr->fpu_dirtied = false;
1995 stts();
1996 hvm_funcs.fpu_leave(curr);
1997 }
1998 }
1999
hvmemul_invlpg(enum x86_segment seg,unsigned long offset,struct x86_emulate_ctxt * ctxt)2000 static int hvmemul_invlpg(
2001 enum x86_segment seg,
2002 unsigned long offset,
2003 struct x86_emulate_ctxt *ctxt)
2004 {
2005 struct hvm_emulate_ctxt *hvmemul_ctxt =
2006 container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
2007 unsigned long addr, reps = 1;
2008 int rc;
2009
2010 rc = hvmemul_virtual_to_linear(
2011 seg, offset, 1, &reps, hvm_access_none, hvmemul_ctxt, &addr);
2012
2013 if ( rc == X86EMUL_EXCEPTION )
2014 {
2015 /*
2016 * `invlpg` takes segment bases into account, but is not subject to
2017 * faults from segment type/limit checks, and is specified as a NOP
2018 * when issued on non-canonical addresses.
2019 *
2020 * hvmemul_virtual_to_linear() raises exceptions for type/limit
2021 * violations, so squash them.
2022 */
2023 x86_emul_reset_event(ctxt);
2024 rc = X86EMUL_OKAY;
2025 }
2026
2027 if ( rc == X86EMUL_OKAY )
2028 paging_invlpg(current, addr);
2029
2030 return rc;
2031 }
2032
hvmemul_vmfunc(struct x86_emulate_ctxt * ctxt)2033 static int hvmemul_vmfunc(
2034 struct x86_emulate_ctxt *ctxt)
2035 {
2036 int rc;
2037
2038 if ( !hvm_funcs.altp2m_vcpu_emulate_vmfunc )
2039 return X86EMUL_UNHANDLEABLE;
2040 rc = hvm_funcs.altp2m_vcpu_emulate_vmfunc(ctxt->regs);
2041 if ( rc == X86EMUL_EXCEPTION )
2042 x86_emul_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC, ctxt);
2043
2044 return rc;
2045 }
2046
2047 static const struct x86_emulate_ops hvm_emulate_ops = {
2048 .read = hvmemul_read,
2049 .insn_fetch = hvmemul_insn_fetch,
2050 .write = hvmemul_write,
2051 .cmpxchg = hvmemul_cmpxchg,
2052 .validate = hvmemul_validate,
2053 .rep_ins = hvmemul_rep_ins,
2054 .rep_outs = hvmemul_rep_outs,
2055 .rep_movs = hvmemul_rep_movs,
2056 .rep_stos = hvmemul_rep_stos,
2057 .read_segment = hvmemul_read_segment,
2058 .write_segment = hvmemul_write_segment,
2059 .read_io = hvmemul_read_io,
2060 .write_io = hvmemul_write_io,
2061 .read_cr = hvmemul_read_cr,
2062 .write_cr = hvmemul_write_cr,
2063 .read_msr = hvmemul_read_msr,
2064 .write_msr = hvmemul_write_msr,
2065 .wbinvd = hvmemul_wbinvd,
2066 .cpuid = hvmemul_cpuid,
2067 .get_fpu = hvmemul_get_fpu,
2068 .put_fpu = hvmemul_put_fpu,
2069 .invlpg = hvmemul_invlpg,
2070 .vmfunc = hvmemul_vmfunc,
2071 };
2072
2073 static const struct x86_emulate_ops hvm_emulate_ops_no_write = {
2074 .read = hvmemul_read,
2075 .insn_fetch = hvmemul_insn_fetch,
2076 .write = hvmemul_write_discard,
2077 .cmpxchg = hvmemul_cmpxchg_discard,
2078 .rep_ins = hvmemul_rep_ins_discard,
2079 .rep_outs = hvmemul_rep_outs_discard,
2080 .rep_movs = hvmemul_rep_movs_discard,
2081 .rep_stos = hvmemul_rep_stos_discard,
2082 .read_segment = hvmemul_read_segment,
2083 .write_segment = hvmemul_write_segment,
2084 .read_io = hvmemul_read_io_discard,
2085 .write_io = hvmemul_write_io_discard,
2086 .read_cr = hvmemul_read_cr,
2087 .write_cr = hvmemul_write_cr,
2088 .read_msr = hvmemul_read_msr,
2089 .write_msr = hvmemul_write_msr_discard,
2090 .wbinvd = hvmemul_wbinvd_discard,
2091 .cpuid = hvmemul_cpuid,
2092 .get_fpu = hvmemul_get_fpu,
2093 .put_fpu = hvmemul_put_fpu,
2094 .invlpg = hvmemul_invlpg,
2095 .vmfunc = hvmemul_vmfunc,
2096 };
2097
_hvm_emulate_one(struct hvm_emulate_ctxt * hvmemul_ctxt,const struct x86_emulate_ops * ops)2098 static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
2099 const struct x86_emulate_ops *ops)
2100 {
2101 const struct cpu_user_regs *regs = hvmemul_ctxt->ctxt.regs;
2102 struct vcpu *curr = current;
2103 uint32_t new_intr_shadow;
2104 struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
2105 int rc;
2106
2107 hvm_emulate_init_per_insn(hvmemul_ctxt, vio->mmio_insn,
2108 vio->mmio_insn_bytes);
2109
2110 vio->mmio_retry = 0;
2111
2112 switch ( rc = x86_emulate(&hvmemul_ctxt->ctxt, ops) )
2113 {
2114 case X86EMUL_OKAY:
2115 if ( vio->mmio_retry )
2116 rc = X86EMUL_RETRY;
2117 /* fall through */
2118 default:
2119 vio->mmio_cache_count = 0;
2120 vio->mmio_insn_bytes = 0;
2121 break;
2122
2123 case X86EMUL_RETRY:
2124 BUILD_BUG_ON(sizeof(vio->mmio_insn) < sizeof(hvmemul_ctxt->insn_buf));
2125 vio->mmio_insn_bytes = hvmemul_ctxt->insn_buf_bytes;
2126 memcpy(vio->mmio_insn, hvmemul_ctxt->insn_buf, vio->mmio_insn_bytes);
2127 break;
2128 }
2129
2130 if ( hvmemul_ctxt->ctxt.retire.singlestep )
2131 hvm_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC);
2132
2133 new_intr_shadow = hvmemul_ctxt->intr_shadow;
2134
2135 /* MOV-SS instruction toggles MOV-SS shadow, else we just clear it. */
2136 if ( hvmemul_ctxt->ctxt.retire.mov_ss )
2137 new_intr_shadow ^= HVM_INTR_SHADOW_MOV_SS;
2138 else if ( rc != X86EMUL_RETRY )
2139 new_intr_shadow &= ~HVM_INTR_SHADOW_MOV_SS;
2140
2141 /* STI instruction toggles STI shadow, else we just clear it. */
2142 if ( hvmemul_ctxt->ctxt.retire.sti )
2143 new_intr_shadow ^= HVM_INTR_SHADOW_STI;
2144 else if ( rc != X86EMUL_RETRY )
2145 new_intr_shadow &= ~HVM_INTR_SHADOW_STI;
2146
2147 /* IRET, if valid in the given context, clears NMI blocking. */
2148 if ( hvmemul_ctxt->ctxt.retire.unblock_nmi )
2149 new_intr_shadow &= ~HVM_INTR_SHADOW_NMI;
2150
2151 if ( hvmemul_ctxt->intr_shadow != new_intr_shadow )
2152 {
2153 hvmemul_ctxt->intr_shadow = new_intr_shadow;
2154 hvm_funcs.set_interrupt_shadow(curr, new_intr_shadow);
2155 }
2156
2157 if ( hvmemul_ctxt->ctxt.retire.hlt &&
2158 !hvm_local_events_need_delivery(curr) )
2159 {
2160 hvm_hlt(regs->eflags);
2161 }
2162
2163 return rc;
2164 }
2165
hvm_emulate_one(struct hvm_emulate_ctxt * hvmemul_ctxt)2166 int hvm_emulate_one(
2167 struct hvm_emulate_ctxt *hvmemul_ctxt)
2168 {
2169 return _hvm_emulate_one(hvmemul_ctxt, &hvm_emulate_ops);
2170 }
2171
hvm_emulate_one_mmio(unsigned long mfn,unsigned long gla)2172 int hvm_emulate_one_mmio(unsigned long mfn, unsigned long gla)
2173 {
2174 static const struct x86_emulate_ops hvm_intercept_ops_mmcfg = {
2175 .read = x86emul_unhandleable_rw,
2176 .insn_fetch = hvmemul_insn_fetch,
2177 .write = mmcfg_intercept_write,
2178 .cpuid = hvmemul_cpuid,
2179 };
2180 static const struct x86_emulate_ops hvm_ro_emulate_ops_mmio = {
2181 .read = x86emul_unhandleable_rw,
2182 .insn_fetch = hvmemul_insn_fetch,
2183 .write = mmio_ro_emulated_write,
2184 .cpuid = hvmemul_cpuid,
2185 };
2186 struct mmio_ro_emulate_ctxt mmio_ro_ctxt = { .cr2 = gla };
2187 struct hvm_emulate_ctxt ctxt;
2188 const struct x86_emulate_ops *ops;
2189 unsigned int seg, bdf;
2190 int rc;
2191
2192 if ( pci_ro_mmcfg_decode(mfn, &seg, &bdf) )
2193 {
2194 mmio_ro_ctxt.seg = seg;
2195 mmio_ro_ctxt.bdf = bdf;
2196 ops = &hvm_intercept_ops_mmcfg;
2197 }
2198 else
2199 ops = &hvm_ro_emulate_ops_mmio;
2200
2201 hvm_emulate_init_once(&ctxt, x86_insn_is_mem_write,
2202 guest_cpu_user_regs());
2203 ctxt.ctxt.data = &mmio_ro_ctxt;
2204 rc = _hvm_emulate_one(&ctxt, ops);
2205 switch ( rc )
2206 {
2207 case X86EMUL_UNHANDLEABLE:
2208 case X86EMUL_UNIMPLEMENTED:
2209 hvm_dump_emulation_state(XENLOG_G_WARNING, "MMCFG", &ctxt, rc);
2210 break;
2211 case X86EMUL_EXCEPTION:
2212 hvm_inject_event(&ctxt.ctxt.event);
2213 /* fallthrough */
2214 default:
2215 hvm_emulate_writeback(&ctxt);
2216 }
2217
2218 return rc;
2219 }
2220
hvm_emulate_one_vm_event(enum emul_kind kind,unsigned int trapnr,unsigned int errcode)2221 void hvm_emulate_one_vm_event(enum emul_kind kind, unsigned int trapnr,
2222 unsigned int errcode)
2223 {
2224 struct hvm_emulate_ctxt ctx = {{ 0 }};
2225 int rc;
2226
2227 hvm_emulate_init_once(&ctx, NULL, guest_cpu_user_regs());
2228
2229 switch ( kind )
2230 {
2231 case EMUL_KIND_NOWRITE:
2232 rc = _hvm_emulate_one(&ctx, &hvm_emulate_ops_no_write);
2233 break;
2234 case EMUL_KIND_SET_CONTEXT_INSN: {
2235 struct vcpu *curr = current;
2236 struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io;
2237
2238 BUILD_BUG_ON(sizeof(vio->mmio_insn) !=
2239 sizeof(curr->arch.vm_event->emul.insn.data));
2240 ASSERT(!vio->mmio_insn_bytes);
2241
2242 /*
2243 * Stash insn buffer into mmio buffer here instead of ctx
2244 * to avoid having to add more logic to hvm_emulate_one.
2245 */
2246 vio->mmio_insn_bytes = sizeof(vio->mmio_insn);
2247 memcpy(vio->mmio_insn, curr->arch.vm_event->emul.insn.data,
2248 vio->mmio_insn_bytes);
2249 }
2250 /* Fall-through */
2251 default:
2252 ctx.set_context = (kind == EMUL_KIND_SET_CONTEXT_DATA);
2253 rc = hvm_emulate_one(&ctx);
2254 }
2255
2256 switch ( rc )
2257 {
2258 case X86EMUL_RETRY:
2259 /*
2260 * This function is called when handling an EPT-related vm_event
2261 * reply. As such, nothing else needs to be done here, since simply
2262 * returning makes the current instruction cause a page fault again,
2263 * consistent with X86EMUL_RETRY.
2264 */
2265 return;
2266 case X86EMUL_UNIMPLEMENTED:
2267 if ( hvm_monitor_emul_unimplemented() )
2268 return;
2269 /* fall-through */
2270 case X86EMUL_UNHANDLEABLE:
2271 hvm_dump_emulation_state(XENLOG_G_DEBUG, "Mem event", &ctx, rc);
2272 hvm_inject_hw_exception(trapnr, errcode);
2273 break;
2274 case X86EMUL_EXCEPTION:
2275 hvm_inject_event(&ctx.ctxt.event);
2276 break;
2277 }
2278
2279 hvm_emulate_writeback(&ctx);
2280 }
2281
hvm_emulate_init_once(struct hvm_emulate_ctxt * hvmemul_ctxt,hvm_emulate_validate_t * validate,struct cpu_user_regs * regs)2282 void hvm_emulate_init_once(
2283 struct hvm_emulate_ctxt *hvmemul_ctxt,
2284 hvm_emulate_validate_t *validate,
2285 struct cpu_user_regs *regs)
2286 {
2287 struct vcpu *curr = current;
2288
2289 memset(hvmemul_ctxt, 0, sizeof(*hvmemul_ctxt));
2290
2291 hvmemul_ctxt->intr_shadow = hvm_funcs.get_interrupt_shadow(curr);
2292 hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt);
2293 hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt);
2294
2295 hvmemul_ctxt->validate = validate;
2296 hvmemul_ctxt->ctxt.regs = regs;
2297 hvmemul_ctxt->ctxt.vendor = curr->domain->arch.cpuid->x86_vendor;
2298 hvmemul_ctxt->ctxt.force_writeback = true;
2299 }
2300
hvm_emulate_init_per_insn(struct hvm_emulate_ctxt * hvmemul_ctxt,const unsigned char * insn_buf,unsigned int insn_bytes)2301 void hvm_emulate_init_per_insn(
2302 struct hvm_emulate_ctxt *hvmemul_ctxt,
2303 const unsigned char *insn_buf,
2304 unsigned int insn_bytes)
2305 {
2306 struct vcpu *curr = current;
2307 unsigned int pfec = PFEC_page_present;
2308 unsigned long addr;
2309
2310 hvmemul_ctxt->ctxt.lma = hvm_long_mode_active(curr);
2311
2312 if ( hvmemul_ctxt->ctxt.lma &&
2313 hvmemul_ctxt->seg_reg[x86_seg_cs].l )
2314 hvmemul_ctxt->ctxt.addr_size = hvmemul_ctxt->ctxt.sp_size = 64;
2315 else
2316 {
2317 hvmemul_ctxt->ctxt.addr_size =
2318 hvmemul_ctxt->seg_reg[x86_seg_cs].db ? 32 : 16;
2319 hvmemul_ctxt->ctxt.sp_size =
2320 hvmemul_ctxt->seg_reg[x86_seg_ss].db ? 32 : 16;
2321 }
2322
2323 if ( hvmemul_ctxt->seg_reg[x86_seg_ss].dpl == 3 )
2324 pfec |= PFEC_user_mode;
2325
2326 hvmemul_ctxt->insn_buf_eip = hvmemul_ctxt->ctxt.regs->rip;
2327 if ( !insn_bytes )
2328 {
2329 hvmemul_ctxt->insn_buf_bytes =
2330 hvm_get_insn_bytes(curr, hvmemul_ctxt->insn_buf) ?:
2331 (hvm_virtual_to_linear_addr(x86_seg_cs,
2332 &hvmemul_ctxt->seg_reg[x86_seg_cs],
2333 hvmemul_ctxt->insn_buf_eip,
2334 sizeof(hvmemul_ctxt->insn_buf),
2335 hvm_access_insn_fetch,
2336 &hvmemul_ctxt->seg_reg[x86_seg_cs],
2337 &addr) &&
2338 hvm_fetch_from_guest_linear(hvmemul_ctxt->insn_buf, addr,
2339 sizeof(hvmemul_ctxt->insn_buf),
2340 pfec, NULL) == HVMTRANS_okay) ?
2341 sizeof(hvmemul_ctxt->insn_buf) : 0;
2342 }
2343 else
2344 {
2345 hvmemul_ctxt->insn_buf_bytes = insn_bytes;
2346 memcpy(hvmemul_ctxt->insn_buf, insn_buf, insn_bytes);
2347 }
2348 }
2349
hvm_emulate_writeback(struct hvm_emulate_ctxt * hvmemul_ctxt)2350 void hvm_emulate_writeback(
2351 struct hvm_emulate_ctxt *hvmemul_ctxt)
2352 {
2353 enum x86_segment seg;
2354
2355 seg = find_first_bit(&hvmemul_ctxt->seg_reg_dirty,
2356 ARRAY_SIZE(hvmemul_ctxt->seg_reg));
2357
2358 while ( seg < ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
2359 {
2360 hvm_set_segment_register(current, seg, &hvmemul_ctxt->seg_reg[seg]);
2361 seg = find_next_bit(&hvmemul_ctxt->seg_reg_dirty,
2362 ARRAY_SIZE(hvmemul_ctxt->seg_reg),
2363 seg+1);
2364 }
2365 }
2366
2367 /*
2368 * Callers which pass a known in-range x86_segment can rely on the return
2369 * pointer being valid. Other callers must explicitly check for errors.
2370 */
hvmemul_get_seg_reg(enum x86_segment seg,struct hvm_emulate_ctxt * hvmemul_ctxt)2371 struct segment_register *hvmemul_get_seg_reg(
2372 enum x86_segment seg,
2373 struct hvm_emulate_ctxt *hvmemul_ctxt)
2374 {
2375 unsigned int idx = seg;
2376
2377 if ( idx >= ARRAY_SIZE(hvmemul_ctxt->seg_reg) )
2378 return ERR_PTR(-X86EMUL_UNHANDLEABLE);
2379
2380 if ( !__test_and_set_bit(idx, &hvmemul_ctxt->seg_reg_accessed) )
2381 hvm_get_segment_register(current, idx, &hvmemul_ctxt->seg_reg[idx]);
2382 return &hvmemul_ctxt->seg_reg[idx];
2383 }
2384
guest_x86_mode_to_str(int mode)2385 static const char *guest_x86_mode_to_str(int mode)
2386 {
2387 switch ( mode )
2388 {
2389 case 0: return "Real";
2390 case 1: return "v86";
2391 case 2: return "16bit";
2392 case 4: return "32bit";
2393 case 8: return "64bit";
2394 default: return "Unknown";
2395 }
2396 }
2397
hvm_dump_emulation_state(const char * loglvl,const char * prefix,struct hvm_emulate_ctxt * hvmemul_ctxt,int rc)2398 void hvm_dump_emulation_state(const char *loglvl, const char *prefix,
2399 struct hvm_emulate_ctxt *hvmemul_ctxt, int rc)
2400 {
2401 struct vcpu *curr = current;
2402 const char *mode_str = guest_x86_mode_to_str(hvm_guest_x86_mode(curr));
2403 const struct segment_register *cs =
2404 hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt);
2405
2406 printk("%s%s emulation failed (%d): %pv %s @ %04x:%08lx -> %*ph\n",
2407 loglvl, prefix, rc, curr, mode_str, cs->sel,
2408 hvmemul_ctxt->insn_buf_eip, hvmemul_ctxt->insn_buf_bytes,
2409 hvmemul_ctxt->insn_buf);
2410 }
2411
2412 /*
2413 * Local variables:
2414 * mode: C
2415 * c-file-style: "BSD"
2416 * c-basic-offset: 4
2417 * tab-width: 4
2418 * indent-tabs-mode: nil
2419 * End:
2420 */
2421