1 /******************************************************************************
2 * flushtlb.c
3 *
4 * TLB flushes are timestamped using a global virtual 'clock' which ticks
5 * on any TLB flush on any processor.
6 *
7 * Copyright (c) 2003-2006, K A Fraser
8 */
9
10 #include <xen/sched.h>
11 #include <xen/softirq.h>
12 #include <asm/flushtlb.h>
13 #include <asm/page.h>
14
15 /* Debug builds: Wrap frequently to stress-test the wrap logic. */
16 #ifdef NDEBUG
17 #define WRAP_MASK (0xFFFFFFFFU)
18 #else
19 #define WRAP_MASK (0x000003FFU)
20 #endif
21
22 u32 tlbflush_clock = 1U;
23 DEFINE_PER_CPU(u32, tlbflush_time);
24
25 /*
26 * pre_flush(): Increment the virtual TLB-flush clock. Returns new clock value.
27 *
28 * This must happen *before* we flush the TLB. If we do it after, we race other
29 * CPUs invalidating PTEs. For example, a page invalidated after the flush
30 * might get the old timestamp, but this CPU can speculatively fetch the
31 * mapping into its TLB after the flush but before inc'ing the clock.
32 */
pre_flush(void)33 static u32 pre_flush(void)
34 {
35 u32 t, t1, t2;
36
37 t = tlbflush_clock;
38 do {
39 t1 = t2 = t;
40 /* Clock wrapped: someone else is leading a global TLB shootdown. */
41 if ( unlikely(t1 == 0) )
42 goto skip_clocktick;
43 t2 = (t + 1) & WRAP_MASK;
44 }
45 while ( unlikely((t = cmpxchg(&tlbflush_clock, t1, t2)) != t1) );
46
47 /* Clock wrapped: we will lead a global TLB shootdown. */
48 if ( unlikely(t2 == 0) )
49 raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ);
50
51 skip_clocktick:
52 return t2;
53 }
54
55 /*
56 * post_flush(): Update this CPU's timestamp with specified clock value.
57 *
58 * Note that this happens *after* flushing the TLB, as otherwise we can race a
59 * NEED_FLUSH() test on another CPU. (e.g., other CPU sees the updated CPU
60 * stamp and so does not force a synchronous TLB flush, but the flush in this
61 * function hasn't yet occurred and so the TLB might be stale). The ordering
62 * would only actually matter if this function were interruptible, and
63 * something that abuses the stale mapping could exist in an interrupt
64 * handler. In fact neither of these is the case, so really we are being ultra
65 * paranoid.
66 */
post_flush(u32 t)67 static void post_flush(u32 t)
68 {
69 this_cpu(tlbflush_time) = t;
70 }
71
write_cr3(unsigned long cr3)72 void write_cr3(unsigned long cr3)
73 {
74 unsigned long flags, cr4 = read_cr4();
75 u32 t;
76
77 /* This non-reentrant function is sometimes called in interrupt context. */
78 local_irq_save(flags);
79
80 t = pre_flush();
81
82 hvm_flush_guest_tlbs();
83
84 write_cr4(cr4 & ~X86_CR4_PGE);
85 asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
86 write_cr4(cr4);
87
88 post_flush(t);
89
90 local_irq_restore(flags);
91 }
92
93 /*
94 * The return value of this function is the passed in "flags" argument with
95 * bits cleared that have been fully (i.e. system-wide) taken care of, i.e.
96 * namely not requiring any further action on remote CPUs.
97 */
flush_area_local(const void * va,unsigned int flags)98 unsigned int flush_area_local(const void *va, unsigned int flags)
99 {
100 unsigned int order = (flags - 1) & FLUSH_ORDER_MASK;
101 unsigned long irqfl;
102
103 /* This non-reentrant function is sometimes called in interrupt context. */
104 local_irq_save(irqfl);
105
106 if ( flags & (FLUSH_TLB|FLUSH_TLB_GLOBAL) )
107 {
108 if ( order == 0 )
109 {
110 /*
111 * We don't INVLPG multi-page regions because the 2M/4M/1G
112 * region may not have been mapped with a superpage. Also there
113 * are various errata surrounding INVLPG usage on superpages, and
114 * a full flush is in any case not *that* expensive.
115 */
116 asm volatile ( "invlpg %0"
117 : : "m" (*(const char *)(va)) : "memory" );
118 }
119 else
120 {
121 u32 t = pre_flush();
122 unsigned long cr4 = read_cr4();
123
124 hvm_flush_guest_tlbs();
125
126 write_cr4(cr4 & ~X86_CR4_PGE);
127 barrier();
128 write_cr4(cr4);
129
130 post_flush(t);
131 }
132 }
133
134 if ( flags & FLUSH_CACHE )
135 {
136 const struct cpuinfo_x86 *c = ¤t_cpu_data;
137 unsigned long i, sz = 0;
138
139 if ( order < (BITS_PER_LONG - PAGE_SHIFT) )
140 sz = 1UL << (order + PAGE_SHIFT);
141
142 if ( (!(flags & (FLUSH_TLB|FLUSH_TLB_GLOBAL)) ||
143 (flags & FLUSH_VA_VALID)) &&
144 c->x86_clflush_size && c->x86_cache_size && sz &&
145 ((sz >> 10) < c->x86_cache_size) )
146 {
147 alternative(ASM_NOP3, "sfence", X86_FEATURE_CLFLUSHOPT);
148 for ( i = 0; i < sz; i += c->x86_clflush_size )
149 alternative_input(".byte " __stringify(NOP_DS_PREFIX) ";"
150 " clflush %0",
151 "data16 clflush %0", /* clflushopt */
152 X86_FEATURE_CLFLUSHOPT,
153 "m" (((const char *)va)[i]));
154 flags &= ~FLUSH_CACHE;
155 }
156 else
157 {
158 wbinvd();
159 }
160 }
161
162 local_irq_restore(irqfl);
163
164 return flags;
165 }
166