1 /******************************************************************************
2  * flushtlb.c
3  *
4  * TLB flushes are timestamped using a global virtual 'clock' which ticks
5  * on any TLB flush on any processor.
6  *
7  * Copyright (c) 2003-2006, K A Fraser
8  */
9 
10 #include <xen/sched.h>
11 #include <xen/softirq.h>
12 #include <asm/flushtlb.h>
13 #include <asm/page.h>
14 
15 /* Debug builds: Wrap frequently to stress-test the wrap logic. */
16 #ifdef NDEBUG
17 #define WRAP_MASK (0xFFFFFFFFU)
18 #else
19 #define WRAP_MASK (0x000003FFU)
20 #endif
21 
22 u32 tlbflush_clock = 1U;
23 DEFINE_PER_CPU(u32, tlbflush_time);
24 
25 /*
26  * pre_flush(): Increment the virtual TLB-flush clock. Returns new clock value.
27  *
28  * This must happen *before* we flush the TLB. If we do it after, we race other
29  * CPUs invalidating PTEs. For example, a page invalidated after the flush
30  * might get the old timestamp, but this CPU can speculatively fetch the
31  * mapping into its TLB after the flush but before inc'ing the clock.
32  */
pre_flush(void)33 static u32 pre_flush(void)
34 {
35     u32 t, t1, t2;
36 
37     t = tlbflush_clock;
38     do {
39         t1 = t2 = t;
40         /* Clock wrapped: someone else is leading a global TLB shootdown. */
41         if ( unlikely(t1 == 0) )
42             goto skip_clocktick;
43         t2 = (t + 1) & WRAP_MASK;
44     }
45     while ( unlikely((t = cmpxchg(&tlbflush_clock, t1, t2)) != t1) );
46 
47     /* Clock wrapped: we will lead a global TLB shootdown. */
48     if ( unlikely(t2 == 0) )
49         raise_softirq(NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ);
50 
51  skip_clocktick:
52     return t2;
53 }
54 
55 /*
56  * post_flush(): Update this CPU's timestamp with specified clock value.
57  *
58  * Note that this happens *after* flushing the TLB, as otherwise we can race a
59  * NEED_FLUSH() test on another CPU. (e.g., other CPU sees the updated CPU
60  * stamp and so does not force a synchronous TLB flush, but the flush in this
61  * function hasn't yet occurred and so the TLB might be stale). The ordering
62  * would only actually matter if this function were interruptible, and
63  * something that abuses the stale mapping could exist in an interrupt
64  * handler. In fact neither of these is the case, so really we are being ultra
65  * paranoid.
66  */
post_flush(u32 t)67 static void post_flush(u32 t)
68 {
69     this_cpu(tlbflush_time) = t;
70 }
71 
write_cr3(unsigned long cr3)72 void write_cr3(unsigned long cr3)
73 {
74     unsigned long flags, cr4 = read_cr4();
75     u32 t;
76 
77     /* This non-reentrant function is sometimes called in interrupt context. */
78     local_irq_save(flags);
79 
80     t = pre_flush();
81 
82     hvm_flush_guest_tlbs();
83 
84     write_cr4(cr4 & ~X86_CR4_PGE);
85     asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" );
86     write_cr4(cr4);
87 
88     post_flush(t);
89 
90     local_irq_restore(flags);
91 }
92 
93 /*
94  * The return value of this function is the passed in "flags" argument with
95  * bits cleared that have been fully (i.e. system-wide) taken care of, i.e.
96  * namely not requiring any further action on remote CPUs.
97  */
flush_area_local(const void * va,unsigned int flags)98 unsigned int flush_area_local(const void *va, unsigned int flags)
99 {
100     unsigned int order = (flags - 1) & FLUSH_ORDER_MASK;
101     unsigned long irqfl;
102 
103     /* This non-reentrant function is sometimes called in interrupt context. */
104     local_irq_save(irqfl);
105 
106     if ( flags & (FLUSH_TLB|FLUSH_TLB_GLOBAL) )
107     {
108         if ( order == 0 )
109         {
110             /*
111              * We don't INVLPG multi-page regions because the 2M/4M/1G
112              * region may not have been mapped with a superpage. Also there
113              * are various errata surrounding INVLPG usage on superpages, and
114              * a full flush is in any case not *that* expensive.
115              */
116             asm volatile ( "invlpg %0"
117                            : : "m" (*(const char *)(va)) : "memory" );
118         }
119         else
120         {
121             u32 t = pre_flush();
122             unsigned long cr4 = read_cr4();
123 
124             hvm_flush_guest_tlbs();
125 
126             write_cr4(cr4 & ~X86_CR4_PGE);
127             barrier();
128             write_cr4(cr4);
129 
130             post_flush(t);
131         }
132     }
133 
134     if ( flags & FLUSH_CACHE )
135     {
136         const struct cpuinfo_x86 *c = &current_cpu_data;
137         unsigned long i, sz = 0;
138 
139         if ( order < (BITS_PER_LONG - PAGE_SHIFT) )
140             sz = 1UL << (order + PAGE_SHIFT);
141 
142         if ( (!(flags & (FLUSH_TLB|FLUSH_TLB_GLOBAL)) ||
143               (flags & FLUSH_VA_VALID)) &&
144              c->x86_clflush_size && c->x86_cache_size && sz &&
145              ((sz >> 10) < c->x86_cache_size) )
146         {
147             alternative(ASM_NOP3, "sfence", X86_FEATURE_CLFLUSHOPT);
148             for ( i = 0; i < sz; i += c->x86_clflush_size )
149                 alternative_input(".byte " __stringify(NOP_DS_PREFIX) ";"
150                                   " clflush %0",
151                                   "data16 clflush %0",      /* clflushopt */
152                                   X86_FEATURE_CLFLUSHOPT,
153                                   "m" (((const char *)va)[i]));
154             flags &= ~FLUSH_CACHE;
155         }
156         else
157         {
158             wbinvd();
159         }
160     }
161 
162     local_irq_restore(irqfl);
163 
164     return flags;
165 }
166