1 /*
2 * Intel SMP support routines.
3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5 * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
6 *
7 * This code is released under the GNU General Public License version 2 or
8 * later.
9 */
10
11 #include <xen/cpu.h>
12 #include <xen/irq.h>
13 #include <xen/sched.h>
14 #include <xen/delay.h>
15 #include <xen/perfc.h>
16 #include <xen/spinlock.h>
17
18 #include <asm/apic.h>
19 #include <asm/current.h>
20 #include <asm/genapic.h>
21 #include <asm/guest.h>
22 #include <asm/irq-vectors.h>
23 #include <asm/smp.h>
24 #include <asm/mc146818rtc.h>
25 #include <asm/flushtlb.h>
26 #include <asm/hardirq.h>
27 #include <asm/hpet.h>
28 #include <asm/setup.h>
29
30 /* Helper functions to prepare APIC register values. */
prepare_ICR(unsigned int shortcut,int vector)31 static unsigned int prepare_ICR(unsigned int shortcut, int vector)
32 {
33 return APIC_DM_FIXED | shortcut | vector;
34 }
35
prepare_ICR2(unsigned int mask)36 static unsigned int prepare_ICR2(unsigned int mask)
37 {
38 return SET_xAPIC_DEST_FIELD(mask);
39 }
40
apic_wait_icr_idle(void)41 void apic_wait_icr_idle(void)
42 {
43 if ( x2apic_enabled )
44 return;
45
46 while ( apic_read(APIC_ICR) & APIC_ICR_BUSY )
47 cpu_relax();
48 }
49
50 /* Helper for sending APIC IPIs using a shorthand. */
send_IPI_shortcut(unsigned int shortcut,int vector,unsigned int dest)51 static void send_IPI_shortcut(unsigned int shortcut, int vector,
52 unsigned int dest)
53 {
54 unsigned int cfg;
55
56 /* Wait for idle. */
57 apic_wait_icr_idle();
58 /* Prepare target chip field. */
59 cfg = prepare_ICR(shortcut, vector) | dest;
60 /* Send the IPI. The write to APIC_ICR fires this off. */
61 apic_write(APIC_ICR, cfg);
62 }
63
64 /*
65 * send_IPI_mask(cpumask, vector): sends @vector IPI to CPUs in @cpumask,
66 * excluding the local CPU. @cpumask may be empty.
67 */
68
send_IPI_mask(const cpumask_t * mask,int vector)69 void send_IPI_mask(const cpumask_t *mask, int vector)
70 {
71 bool cpus_locked = false;
72 cpumask_t *scratch = this_cpu(send_ipi_cpumask);
73
74 if ( in_irq() || in_mce_handler() || in_nmi_handler() )
75 {
76 /*
77 * When in IRQ, NMI or #MC context fallback to the old (and simpler)
78 * IPI sending routine, and avoid doing any performance optimizations
79 * (like using a shorthand) in order to avoid using the scratch
80 * cpumask which cannot be used in interrupt context.
81 */
82 alternative_vcall(genapic.send_IPI_mask, mask, vector);
83 return;
84 }
85
86 /*
87 * This can only be safely used when no CPU hotplug or unplug operations
88 * are taking place, there are no offline CPUs (unless those have been
89 * onlined and parked), there are no disabled CPUs and all possible CPUs in
90 * the system have been accounted for.
91 */
92 if ( system_state > SYS_STATE_smp_boot &&
93 !unaccounted_cpus && !disabled_cpus && !cpu_in_hotplug_context() &&
94 /* NB: get_cpu_maps lock requires enabled interrupts. */
95 local_irq_is_enabled() && (cpus_locked = get_cpu_maps()) &&
96 (park_offline_cpus ||
97 cpumask_equal(&cpu_online_map, &cpu_present_map)) )
98 cpumask_or(scratch, mask, cpumask_of(smp_processor_id()));
99 else
100 {
101 if ( cpus_locked )
102 {
103 put_cpu_maps();
104 cpus_locked = false;
105 }
106 cpumask_clear(scratch);
107 }
108
109 if ( cpumask_equal(scratch, &cpu_online_map) )
110 send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_PHYSICAL);
111 else
112 alternative_vcall(genapic.send_IPI_mask, mask, vector);
113
114 if ( cpus_locked )
115 put_cpu_maps();
116 }
117
send_IPI_self(int vector)118 void send_IPI_self(int vector)
119 {
120 alternative_vcall(genapic.send_IPI_self, vector);
121 }
122
123 /*
124 * Some notes on x86 processor bugs affecting SMP operation:
125 *
126 * Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
127 * The Linux implications for SMP are handled as follows:
128 *
129 * Pentium III / [Xeon]
130 * None of the E1AP-E3AP errata are visible to the user.
131 *
132 * E1AP. see PII A1AP
133 * E2AP. see PII A2AP
134 * E3AP. see PII A3AP
135 *
136 * Pentium II / [Xeon]
137 * None of the A1AP-A3AP errata are visible to the user.
138 *
139 * A1AP. see PPro 1AP
140 * A2AP. see PPro 2AP
141 * A3AP. see PPro 7AP
142 *
143 * Pentium Pro
144 * None of 1AP-9AP errata are visible to the normal user,
145 * except occasional delivery of 'spurious interrupt' as trap #15.
146 * This is very rare and a non-problem.
147 *
148 * 1AP. Linux maps APIC as non-cacheable
149 * 2AP. worked around in hardware
150 * 3AP. fixed in C0 and above steppings microcode update.
151 * Linux does not use excessive STARTUP_IPIs.
152 * 4AP. worked around in hardware
153 * 5AP. symmetric IO mode (normal Linux operation) not affected.
154 * 'noapic' mode has vector 0xf filled out properly.
155 * 6AP. 'noapic' mode might be affected - fixed in later steppings
156 * 7AP. We do not assume writes to the LVT deassering IRQs
157 * 8AP. We do not enable low power mode (deep sleep) during MP bootup
158 * 9AP. We do not use mixed mode
159 */
160
161 /*
162 * The following functions deal with sending IPIs between CPUs.
163 */
164
send_IPI_self_legacy(uint8_t vector)165 void cf_check send_IPI_self_legacy(uint8_t vector)
166 {
167 /* NMI continuation handling relies on using a shorthand here. */
168 send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
169 }
170
send_IPI_mask_flat(const cpumask_t * cpumask,int vector)171 void cf_check send_IPI_mask_flat(const cpumask_t *cpumask, int vector)
172 {
173 unsigned long mask = cpumask_bits(cpumask)[0];
174 unsigned long cfg;
175 unsigned long flags;
176
177 mask &= cpumask_bits(&cpu_online_map)[0];
178 mask &= ~(1UL << smp_processor_id());
179 if ( mask == 0 )
180 return;
181
182 local_irq_save(flags);
183
184 /*
185 * Wait for idle.
186 */
187 apic_wait_icr_idle();
188
189 /*
190 * prepare target chip field
191 */
192 cfg = prepare_ICR2(mask);
193 apic_write(APIC_ICR2, cfg);
194
195 /*
196 * program the ICR
197 */
198 cfg = prepare_ICR(0, vector) | APIC_DEST_LOGICAL;
199
200 /*
201 * Send the IPI. The write to APIC_ICR fires this off.
202 */
203 apic_write(APIC_ICR, cfg);
204
205 local_irq_restore(flags);
206 }
207
send_IPI_mask_phys(const cpumask_t * mask,int vector)208 void cf_check send_IPI_mask_phys(const cpumask_t *mask, int vector)
209 {
210 unsigned long cfg, flags;
211 unsigned int query_cpu;
212
213 local_irq_save(flags);
214
215 for_each_cpu ( query_cpu, mask )
216 {
217 if ( !cpu_online(query_cpu) || (query_cpu == smp_processor_id()) )
218 continue;
219
220 /*
221 * Wait for idle.
222 */
223 apic_wait_icr_idle();
224
225 /*
226 * prepare target chip field
227 */
228 cfg = prepare_ICR2(cpu_physical_id(query_cpu));
229 apic_write(APIC_ICR2, cfg);
230
231 /*
232 * program the ICR
233 */
234 cfg = prepare_ICR(0, vector) | APIC_DEST_PHYSICAL;
235
236 /*
237 * Send the IPI. The write to APIC_ICR fires this off.
238 */
239 apic_write(APIC_ICR, cfg);
240 }
241
242 local_irq_restore(flags);
243 }
244
245 static DEFINE_SPINLOCK(flush_lock);
246 static cpumask_t flush_cpumask;
247 static const void *flush_va;
248 static unsigned int flush_flags;
249
invalidate_interrupt(void)250 void cf_check invalidate_interrupt(void)
251 {
252 unsigned int flags = flush_flags;
253 ack_APIC_irq();
254 perfc_incr(ipis);
255 if ( (flags & FLUSH_VCPU_STATE) && __sync_local_execstate() )
256 flags &= ~(FLUSH_TLB | FLUSH_TLB_GLOBAL | FLUSH_ROOT_PGTBL);
257 if ( flags & ~(FLUSH_VCPU_STATE | FLUSH_ORDER_MASK) )
258 flush_area_local(flush_va, flags);
259 cpumask_clear_cpu(smp_processor_id(), &flush_cpumask);
260 }
261
flush_area_mask(const cpumask_t * mask,const void * va,unsigned int flags)262 void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags)
263 {
264 unsigned int cpu = smp_processor_id();
265
266 /* Local flushes can be performed with interrupts disabled. */
267 ASSERT(local_irq_is_enabled() || cpumask_subset(mask, cpumask_of(cpu)));
268 /* Exclude use of FLUSH_VCPU_STATE for the local CPU. */
269 ASSERT(!cpumask_test_cpu(cpu, mask) || !(flags & FLUSH_VCPU_STATE));
270
271 if ( (flags & ~(FLUSH_VCPU_STATE | FLUSH_ORDER_MASK)) &&
272 cpumask_test_cpu(cpu, mask) )
273 flags = flush_area_local(va, flags);
274
275 if ( (flags & ~FLUSH_ORDER_MASK) &&
276 !cpumask_subset(mask, cpumask_of(cpu)) )
277 {
278 if ( cpu_has_hypervisor &&
279 !(flags & ~(FLUSH_TLB | FLUSH_TLB_GLOBAL | FLUSH_VA_VALID |
280 FLUSH_ORDER_MASK)) &&
281 !hypervisor_flush_tlb(mask, va, flags) )
282 return;
283
284 spin_lock(&flush_lock);
285 cpumask_and(&flush_cpumask, mask, &cpu_online_map);
286 cpumask_clear_cpu(cpu, &flush_cpumask);
287 flush_va = va;
288 flush_flags = flags;
289 send_IPI_mask(&flush_cpumask, INVALIDATE_TLB_VECTOR);
290 while ( !cpumask_empty(&flush_cpumask) )
291 cpu_relax();
292 spin_unlock(&flush_lock);
293 }
294 }
295
296 /* Call with no locks held and interrupts enabled (e.g., softirq context). */
new_tlbflush_clock_period(void)297 void cf_check new_tlbflush_clock_period(void)
298 {
299 cpumask_t allbutself;
300
301 /* Flush everyone else. We definitely flushed just before entry. */
302 cpumask_andnot(&allbutself, &cpu_online_map,
303 cpumask_of(smp_processor_id()));
304 flush_mask(&allbutself, FLUSH_TLB);
305
306 /* No need for atomicity: we are the only possible updater. */
307 ASSERT(tlbflush_clock == 0);
308 tlbflush_clock++;
309 }
310
smp_send_event_check_mask(const cpumask_t * mask)311 void smp_send_event_check_mask(const cpumask_t *mask)
312 {
313 send_IPI_mask(mask, EVENT_CHECK_VECTOR);
314 }
315
smp_send_call_function_mask(const cpumask_t * mask)316 void smp_send_call_function_mask(const cpumask_t *mask)
317 {
318 send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
319
320 if ( cpumask_test_cpu(smp_processor_id(), mask) )
321 {
322 local_irq_disable();
323 smp_call_function_interrupt();
324 local_irq_enable();
325 }
326 }
327
__stop_this_cpu(void)328 void __stop_this_cpu(void)
329 {
330 ASSERT(!local_irq_is_enabled());
331
332 disable_local_APIC();
333
334 hvm_cpu_down();
335
336 /*
337 * Clear FPU, zapping any pending exceptions. Needed for warm reset with
338 * some BIOSes.
339 */
340 clts();
341 asm volatile ( "fninit" );
342
343 cpumask_clear_cpu(smp_processor_id(), &cpu_online_map);
344 }
345
stop_this_cpu(void * dummy)346 static void noreturn cf_check stop_this_cpu(void *dummy)
347 {
348 const bool *stop_aps = dummy;
349
350 while ( !*stop_aps )
351 cpu_relax();
352
353 __stop_this_cpu();
354 for ( ; ; )
355 halt();
356 }
357
358 /*
359 * Stop all CPUs and turn off local APICs and the IO-APIC, so other OSs see a
360 * clean IRQ state.
361 */
smp_send_stop(void)362 void smp_send_stop(void)
363 {
364 unsigned int cpu = smp_processor_id();
365 bool stop_aps = false;
366
367 /*
368 * Perform AP offlining and disabling of interrupt controllers with all
369 * CPUs on the system having interrupts disabled to prevent interrupt
370 * delivery errors. On AMD systems "Receive accept error" will be
371 * broadcast to local APICs if interrupts target CPUs that are offline.
372 */
373 if ( num_online_cpus() > 1 )
374 smp_call_function(stop_this_cpu, &stop_aps, 0);
375
376 local_irq_disable();
377 pci_disable_msi_all();
378 disable_IO_APIC();
379 hpet_disable();
380 iommu_quiesce();
381
382 if ( num_online_cpus() > 1 )
383 {
384 int timeout = 10;
385
386 /* Signal APs to stop. */
387 stop_aps = true;
388
389 /* Wait 10ms for all other CPUs to go offline. */
390 while ( (num_online_cpus() > 1) && (timeout-- > 0) )
391 mdelay(1);
392 }
393
394 if ( cpu_online(cpu) )
395 {
396 __stop_this_cpu();
397 x2apic_enabled = (current_local_apic_mode() == APIC_MODE_X2APIC);
398 }
399 local_irq_enable();
400 }
401
smp_send_nmi_allbutself(void)402 void smp_send_nmi_allbutself(void)
403 {
404 send_IPI_mask(&cpu_online_map, APIC_DM_NMI);
405 }
406
event_check_interrupt(void)407 void cf_check event_check_interrupt(void)
408 {
409 ack_APIC_irq();
410 perfc_incr(ipis);
411 this_cpu(irq_count)++;
412 }
413
call_function_interrupt(void)414 void cf_check call_function_interrupt(void)
415 {
416 ack_APIC_irq();
417 perfc_incr(ipis);
418 smp_call_function_interrupt();
419 }
420
cpu_up_helper(void * data)421 long cf_check cpu_up_helper(void *data)
422 {
423 unsigned int cpu = (unsigned long)data;
424 int ret = cpu_up(cpu);
425
426 /* Have one more go on EBUSY. */
427 if ( ret == -EBUSY )
428 ret = cpu_up(cpu);
429
430 if ( !ret && !opt_smt &&
431 cpu_data[cpu].compute_unit_id == INVALID_CUID &&
432 cpumask_weight(per_cpu(cpu_sibling_mask, cpu)) > 1 )
433 {
434 ret = cpu_down_helper(data);
435 if ( ret )
436 printk("Could not re-offline CPU%u (%d)\n", cpu, ret);
437 else
438 ret = -EPERM;
439 }
440
441 return ret;
442 }
443
cpu_down_helper(void * data)444 long cf_check cpu_down_helper(void *data)
445 {
446 int cpu = (unsigned long)data;
447 int ret = cpu_down(cpu);
448 /* Have one more go on EBUSY. */
449 if ( ret == -EBUSY )
450 ret = cpu_down(cpu);
451 return ret;
452 }
453