1 /*
2  *	Intel SMP support routines.
3  *
4  *	(c) 1995 Alan Cox, Building #3 <alan@redhat.com>
5  *	(c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
6  *
7  *	This code is released under the GNU General Public License version 2 or
8  *	later.
9  */
10 
11 #include <xen/cpu.h>
12 #include <xen/irq.h>
13 #include <xen/sched.h>
14 #include <xen/delay.h>
15 #include <xen/perfc.h>
16 #include <xen/spinlock.h>
17 
18 #include <asm/apic.h>
19 #include <asm/current.h>
20 #include <asm/genapic.h>
21 #include <asm/guest.h>
22 #include <asm/irq-vectors.h>
23 #include <asm/smp.h>
24 #include <asm/mc146818rtc.h>
25 #include <asm/flushtlb.h>
26 #include <asm/hardirq.h>
27 #include <asm/hpet.h>
28 #include <asm/setup.h>
29 
30 /* Helper functions to prepare APIC register values. */
prepare_ICR(unsigned int shortcut,int vector)31 static unsigned int prepare_ICR(unsigned int shortcut, int vector)
32 {
33     return APIC_DM_FIXED | shortcut | vector;
34 }
35 
prepare_ICR2(unsigned int mask)36 static unsigned int prepare_ICR2(unsigned int mask)
37 {
38     return SET_xAPIC_DEST_FIELD(mask);
39 }
40 
apic_wait_icr_idle(void)41 void apic_wait_icr_idle(void)
42 {
43     if ( x2apic_enabled )
44         return;
45 
46     while ( apic_read(APIC_ICR) & APIC_ICR_BUSY )
47         cpu_relax();
48 }
49 
50 /* Helper for sending APIC IPIs using a shorthand. */
send_IPI_shortcut(unsigned int shortcut,int vector,unsigned int dest)51 static void send_IPI_shortcut(unsigned int shortcut, int vector,
52                               unsigned int dest)
53 {
54     unsigned int cfg;
55 
56     /* Wait for idle. */
57     apic_wait_icr_idle();
58     /* Prepare target chip field. */
59     cfg = prepare_ICR(shortcut, vector) | dest;
60     /* Send the IPI. The write to APIC_ICR fires this off. */
61     apic_write(APIC_ICR, cfg);
62 }
63 
64 /*
65  * send_IPI_mask(cpumask, vector): sends @vector IPI to CPUs in @cpumask,
66  * excluding the local CPU. @cpumask may be empty.
67  */
68 
send_IPI_mask(const cpumask_t * mask,int vector)69 void send_IPI_mask(const cpumask_t *mask, int vector)
70 {
71     bool cpus_locked = false;
72     cpumask_t *scratch = this_cpu(send_ipi_cpumask);
73 
74     if ( in_irq() || in_mce_handler() || in_nmi_handler() )
75     {
76         /*
77          * When in IRQ, NMI or #MC context fallback to the old (and simpler)
78          * IPI sending routine, and avoid doing any performance optimizations
79          * (like using a shorthand) in order to avoid using the scratch
80          * cpumask which cannot be used in interrupt context.
81          */
82         alternative_vcall(genapic.send_IPI_mask, mask, vector);
83         return;
84     }
85 
86     /*
87      * This can only be safely used when no CPU hotplug or unplug operations
88      * are taking place, there are no offline CPUs (unless those have been
89      * onlined and parked), there are no disabled CPUs and all possible CPUs in
90      * the system have been accounted for.
91      */
92     if ( system_state > SYS_STATE_smp_boot &&
93          !unaccounted_cpus && !disabled_cpus && !cpu_in_hotplug_context() &&
94          /* NB: get_cpu_maps lock requires enabled interrupts. */
95          local_irq_is_enabled() && (cpus_locked = get_cpu_maps()) &&
96          (park_offline_cpus ||
97           cpumask_equal(&cpu_online_map, &cpu_present_map)) )
98         cpumask_or(scratch, mask, cpumask_of(smp_processor_id()));
99     else
100     {
101         if ( cpus_locked )
102         {
103             put_cpu_maps();
104             cpus_locked = false;
105         }
106         cpumask_clear(scratch);
107     }
108 
109     if ( cpumask_equal(scratch, &cpu_online_map) )
110         send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_PHYSICAL);
111     else
112         alternative_vcall(genapic.send_IPI_mask, mask, vector);
113 
114     if ( cpus_locked )
115         put_cpu_maps();
116 }
117 
send_IPI_self(int vector)118 void send_IPI_self(int vector)
119 {
120     alternative_vcall(genapic.send_IPI_self, vector);
121 }
122 
123 /*
124  *	Some notes on x86 processor bugs affecting SMP operation:
125  *
126  *	Pentium, Pentium Pro, II, III (and all CPUs) have bugs.
127  *	The Linux implications for SMP are handled as follows:
128  *
129  *	Pentium III / [Xeon]
130  *		None of the E1AP-E3AP errata are visible to the user.
131  *
132  *	E1AP.	see PII A1AP
133  *	E2AP.	see PII A2AP
134  *	E3AP.	see PII A3AP
135  *
136  *	Pentium II / [Xeon]
137  *		None of the A1AP-A3AP errata are visible to the user.
138  *
139  *	A1AP.	see PPro 1AP
140  *	A2AP.	see PPro 2AP
141  *	A3AP.	see PPro 7AP
142  *
143  *	Pentium Pro
144  *		None of 1AP-9AP errata are visible to the normal user,
145  *	except occasional delivery of 'spurious interrupt' as trap #15.
146  *	This is very rare and a non-problem.
147  *
148  *	1AP.	Linux maps APIC as non-cacheable
149  *	2AP.	worked around in hardware
150  *	3AP.	fixed in C0 and above steppings microcode update.
151  *		Linux does not use excessive STARTUP_IPIs.
152  *	4AP.	worked around in hardware
153  *	5AP.	symmetric IO mode (normal Linux operation) not affected.
154  *		'noapic' mode has vector 0xf filled out properly.
155  *	6AP.	'noapic' mode might be affected - fixed in later steppings
156  *	7AP.	We do not assume writes to the LVT deassering IRQs
157  *	8AP.	We do not enable low power mode (deep sleep) during MP bootup
158  *	9AP.	We do not use mixed mode
159  */
160 
161 /*
162  * The following functions deal with sending IPIs between CPUs.
163  */
164 
send_IPI_self_legacy(uint8_t vector)165 void cf_check send_IPI_self_legacy(uint8_t vector)
166 {
167     /* NMI continuation handling relies on using a shorthand here. */
168     send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
169 }
170 
send_IPI_mask_flat(const cpumask_t * cpumask,int vector)171 void cf_check send_IPI_mask_flat(const cpumask_t *cpumask, int vector)
172 {
173     unsigned long mask = cpumask_bits(cpumask)[0];
174     unsigned long cfg;
175     unsigned long flags;
176 
177     mask &= cpumask_bits(&cpu_online_map)[0];
178     mask &= ~(1UL << smp_processor_id());
179     if ( mask == 0 )
180         return;
181 
182     local_irq_save(flags);
183 
184     /*
185      * Wait for idle.
186      */
187     apic_wait_icr_idle();
188 
189     /*
190      * prepare target chip field
191      */
192     cfg = prepare_ICR2(mask);
193     apic_write(APIC_ICR2, cfg);
194 
195     /*
196      * program the ICR
197      */
198     cfg = prepare_ICR(0, vector) | APIC_DEST_LOGICAL;
199 
200     /*
201      * Send the IPI. The write to APIC_ICR fires this off.
202      */
203     apic_write(APIC_ICR, cfg);
204 
205     local_irq_restore(flags);
206 }
207 
send_IPI_mask_phys(const cpumask_t * mask,int vector)208 void cf_check send_IPI_mask_phys(const cpumask_t *mask, int vector)
209 {
210     unsigned long cfg, flags;
211     unsigned int query_cpu;
212 
213     local_irq_save(flags);
214 
215     for_each_cpu ( query_cpu, mask )
216     {
217         if ( !cpu_online(query_cpu) || (query_cpu == smp_processor_id()) )
218             continue;
219 
220         /*
221          * Wait for idle.
222          */
223         apic_wait_icr_idle();
224 
225         /*
226          * prepare target chip field
227          */
228         cfg = prepare_ICR2(cpu_physical_id(query_cpu));
229         apic_write(APIC_ICR2, cfg);
230 
231         /*
232          * program the ICR
233          */
234         cfg = prepare_ICR(0, vector) | APIC_DEST_PHYSICAL;
235 
236         /*
237          * Send the IPI. The write to APIC_ICR fires this off.
238          */
239         apic_write(APIC_ICR, cfg);
240     }
241 
242     local_irq_restore(flags);
243 }
244 
245 static DEFINE_SPINLOCK(flush_lock);
246 static cpumask_t flush_cpumask;
247 static const void *flush_va;
248 static unsigned int flush_flags;
249 
invalidate_interrupt(void)250 void cf_check invalidate_interrupt(void)
251 {
252     unsigned int flags = flush_flags;
253     ack_APIC_irq();
254     perfc_incr(ipis);
255     if ( (flags & FLUSH_VCPU_STATE) && __sync_local_execstate() )
256         flags &= ~(FLUSH_TLB | FLUSH_TLB_GLOBAL | FLUSH_ROOT_PGTBL);
257     if ( flags & ~(FLUSH_VCPU_STATE | FLUSH_ORDER_MASK) )
258         flush_area_local(flush_va, flags);
259     cpumask_clear_cpu(smp_processor_id(), &flush_cpumask);
260 }
261 
flush_area_mask(const cpumask_t * mask,const void * va,unsigned int flags)262 void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags)
263 {
264     unsigned int cpu = smp_processor_id();
265 
266     /* Local flushes can be performed with interrupts disabled. */
267     ASSERT(local_irq_is_enabled() || cpumask_subset(mask, cpumask_of(cpu)));
268     /* Exclude use of FLUSH_VCPU_STATE for the local CPU. */
269     ASSERT(!cpumask_test_cpu(cpu, mask) || !(flags & FLUSH_VCPU_STATE));
270 
271     if ( (flags & ~(FLUSH_VCPU_STATE | FLUSH_ORDER_MASK)) &&
272          cpumask_test_cpu(cpu, mask) )
273         flags = flush_area_local(va, flags);
274 
275     if ( (flags & ~FLUSH_ORDER_MASK) &&
276          !cpumask_subset(mask, cpumask_of(cpu)) )
277     {
278         if ( cpu_has_hypervisor &&
279              !(flags & ~(FLUSH_TLB | FLUSH_TLB_GLOBAL | FLUSH_VA_VALID |
280                          FLUSH_ORDER_MASK)) &&
281              !hypervisor_flush_tlb(mask, va, flags) )
282             return;
283 
284         spin_lock(&flush_lock);
285         cpumask_and(&flush_cpumask, mask, &cpu_online_map);
286         cpumask_clear_cpu(cpu, &flush_cpumask);
287         flush_va      = va;
288         flush_flags   = flags;
289         send_IPI_mask(&flush_cpumask, INVALIDATE_TLB_VECTOR);
290         while ( !cpumask_empty(&flush_cpumask) )
291             cpu_relax();
292         spin_unlock(&flush_lock);
293     }
294 }
295 
296 /* Call with no locks held and interrupts enabled (e.g., softirq context). */
new_tlbflush_clock_period(void)297 void cf_check new_tlbflush_clock_period(void)
298 {
299     cpumask_t allbutself;
300 
301     /* Flush everyone else. We definitely flushed just before entry. */
302     cpumask_andnot(&allbutself, &cpu_online_map,
303                    cpumask_of(smp_processor_id()));
304     flush_mask(&allbutself, FLUSH_TLB);
305 
306     /* No need for atomicity: we are the only possible updater. */
307     ASSERT(tlbflush_clock == 0);
308     tlbflush_clock++;
309 }
310 
smp_send_event_check_mask(const cpumask_t * mask)311 void smp_send_event_check_mask(const cpumask_t *mask)
312 {
313     send_IPI_mask(mask, EVENT_CHECK_VECTOR);
314 }
315 
smp_send_call_function_mask(const cpumask_t * mask)316 void smp_send_call_function_mask(const cpumask_t *mask)
317 {
318     send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
319 
320     if ( cpumask_test_cpu(smp_processor_id(), mask) )
321     {
322         local_irq_disable();
323         smp_call_function_interrupt();
324         local_irq_enable();
325     }
326 }
327 
__stop_this_cpu(void)328 void __stop_this_cpu(void)
329 {
330     ASSERT(!local_irq_is_enabled());
331 
332     disable_local_APIC();
333 
334     hvm_cpu_down();
335 
336     /*
337      * Clear FPU, zapping any pending exceptions. Needed for warm reset with
338      * some BIOSes.
339      */
340     clts();
341     asm volatile ( "fninit" );
342 
343     cpumask_clear_cpu(smp_processor_id(), &cpu_online_map);
344 }
345 
stop_this_cpu(void * dummy)346 static void noreturn cf_check stop_this_cpu(void *dummy)
347 {
348     const bool *stop_aps = dummy;
349 
350     while ( !*stop_aps )
351         cpu_relax();
352 
353     __stop_this_cpu();
354     for ( ; ; )
355         halt();
356 }
357 
358 /*
359  * Stop all CPUs and turn off local APICs and the IO-APIC, so other OSs see a
360  * clean IRQ state.
361  */
smp_send_stop(void)362 void smp_send_stop(void)
363 {
364     unsigned int cpu = smp_processor_id();
365     bool stop_aps = false;
366 
367     /*
368      * Perform AP offlining and disabling of interrupt controllers with all
369      * CPUs on the system having interrupts disabled to prevent interrupt
370      * delivery errors.  On AMD systems "Receive accept error" will be
371      * broadcast to local APICs if interrupts target CPUs that are offline.
372      */
373     if ( num_online_cpus() > 1 )
374         smp_call_function(stop_this_cpu, &stop_aps, 0);
375 
376     local_irq_disable();
377     pci_disable_msi_all();
378     disable_IO_APIC();
379     hpet_disable();
380     iommu_quiesce();
381 
382     if ( num_online_cpus() > 1 )
383     {
384         int timeout = 10;
385 
386         /* Signal APs to stop. */
387         stop_aps = true;
388 
389         /* Wait 10ms for all other CPUs to go offline. */
390         while ( (num_online_cpus() > 1) && (timeout-- > 0) )
391             mdelay(1);
392     }
393 
394     if ( cpu_online(cpu) )
395     {
396         __stop_this_cpu();
397         x2apic_enabled = (current_local_apic_mode() == APIC_MODE_X2APIC);
398     }
399     local_irq_enable();
400 }
401 
smp_send_nmi_allbutself(void)402 void smp_send_nmi_allbutself(void)
403 {
404     send_IPI_mask(&cpu_online_map, APIC_DM_NMI);
405 }
406 
event_check_interrupt(void)407 void cf_check event_check_interrupt(void)
408 {
409     ack_APIC_irq();
410     perfc_incr(ipis);
411     this_cpu(irq_count)++;
412 }
413 
call_function_interrupt(void)414 void cf_check call_function_interrupt(void)
415 {
416     ack_APIC_irq();
417     perfc_incr(ipis);
418     smp_call_function_interrupt();
419 }
420 
cpu_up_helper(void * data)421 long cf_check cpu_up_helper(void *data)
422 {
423     unsigned int cpu = (unsigned long)data;
424     int ret = cpu_up(cpu);
425 
426     /* Have one more go on EBUSY. */
427     if ( ret == -EBUSY )
428         ret = cpu_up(cpu);
429 
430     if ( !ret && !opt_smt &&
431          cpu_data[cpu].compute_unit_id == INVALID_CUID &&
432          cpumask_weight(per_cpu(cpu_sibling_mask, cpu)) > 1 )
433     {
434         ret = cpu_down_helper(data);
435         if ( ret )
436             printk("Could not re-offline CPU%u (%d)\n", cpu, ret);
437         else
438             ret = -EPERM;
439     }
440 
441     return ret;
442 }
443 
cpu_down_helper(void * data)444 long cf_check cpu_down_helper(void *data)
445 {
446     int cpu = (unsigned long)data;
447     int ret = cpu_down(cpu);
448     /* Have one more go on EBUSY. */
449     if ( ret == -EBUSY )
450         ret = cpu_down(cpu);
451     return ret;
452 }
453