1 /*
2 * based on linux-2.6.17.13/arch/i386/kernel/apic.c
3 *
4 * Local APIC handling, local APIC timers
5 *
6 * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
7 *
8 * Fixes
9 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
10 * thanks to Eric Gilmore
11 * and Rolf G. Tews
12 * for testing these extensively.
13 * Maciej W. Rozycki : Various updates and fixes.
14 * Mikael Pettersson : Power Management for UP-APIC.
15 * Pavel Machek and
16 * Mikael Pettersson : PM converted to driver model.
17 */
18
19 #include <xen/perfc.h>
20 #include <xen/errno.h>
21 #include <xen/init.h>
22 #include <xen/mm.h>
23 #include <xen/param.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/delay.h>
27 #include <xen/smp.h>
28 #include <xen/softirq.h>
29 #include <asm/mc146818rtc.h>
30 #include <asm/microcode.h>
31 #include <asm/msr.h>
32 #include <asm/atomic.h>
33 #include <asm/mpspec.h>
34 #include <asm/flushtlb.h>
35 #include <asm/hardirq.h>
36 #include <asm/apic.h>
37 #include <asm/io_apic.h>
38 #include <mach_apic.h>
39 #include <io_ports.h>
40 #include <irq_vectors.h>
41 #include <xen/kexec.h>
42 #include <asm/guest.h>
43 #include <asm/nmi.h>
44 #include <asm/time.h>
45
46 static bool __read_mostly tdt_enabled;
47 static bool __initdata tdt_enable = true;
48 boolean_param("tdt", tdt_enable);
49
50 bool __read_mostly iommu_x2apic_enabled;
51
52 static struct {
53 int active;
54 /* r/w apic fields */
55 unsigned int apic_id;
56 unsigned int apic_taskpri;
57 unsigned int apic_ldr;
58 unsigned int apic_dfr;
59 unsigned int apic_spiv;
60 unsigned int apic_lvtt;
61 unsigned int apic_lvtpc;
62 unsigned int apic_lvtcmci;
63 unsigned int apic_lvt0;
64 unsigned int apic_lvt1;
65 unsigned int apic_lvterr;
66 unsigned int apic_tmict;
67 unsigned int apic_tdcr;
68 unsigned int apic_thmr;
69 } apic_pm_state;
70
71 /*
72 * Knob to control our willingness to enable the local APIC.
73 */
74 static s8 __initdata enable_local_apic; /* -1=force-disable, +1=force-enable */
75
76 /*
77 * Debug level
78 */
79 u8 __read_mostly apic_verbosity;
80
81 static bool __initdata opt_x2apic = true;
82 boolean_param("x2apic", opt_x2apic);
83
84 /*
85 * Bootstrap processor local APIC boot mode - so we can undo our changes
86 * to the APIC state.
87 */
88 static enum apic_mode apic_boot_mode = APIC_MODE_INVALID;
89
90 bool __read_mostly x2apic_enabled;
91 bool __read_mostly directed_eoi_enabled;
92
modern_apic(void)93 static int modern_apic(void)
94 {
95 unsigned int lvr, version;
96 /* AMD systems use old APIC versions, so check the CPU */
97 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
98 boot_cpu_data.x86 >= 0xf)
99 return 1;
100
101 /* Hygon systems use modern APIC */
102 if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
103 return 1;
104
105 lvr = apic_read(APIC_LVR);
106 version = GET_APIC_VERSION(lvr);
107 return version >= 0x14;
108 }
109
110 /*
111 * 'what should we do if we get a hw irq event on an illegal vector'.
112 * each architecture has to answer this themselves.
113 */
ack_bad_irq(unsigned int irq)114 void ack_bad_irq(unsigned int irq)
115 {
116 printk("unexpected IRQ trap at irq %02x\n", irq);
117 /*
118 * Currently unexpected vectors happen only on SMP and APIC.
119 * We _must_ ack these because every local APIC has only N
120 * irq slots per priority level, and a 'hanging, unacked' IRQ
121 * holds up an irq slot - in excessive cases (when multiple
122 * unexpected vectors occur) that might lock up the APIC
123 * completely.
124 * But only ack when the APIC is enabled -AK
125 */
126 if (cpu_has_apic)
127 ack_APIC_irq();
128 }
129
130 /* Using APIC to generate smp_local_timer_interrupt? */
131 static bool __read_mostly using_apic_timer;
132
133 static bool __read_mostly enabled_via_apicbase;
134
get_physical_broadcast(void)135 int get_physical_broadcast(void)
136 {
137 if (modern_apic())
138 return 0xff;
139 else
140 return 0xf;
141 }
142
get_maxlvt(void)143 int get_maxlvt(void)
144 {
145 unsigned int v = apic_read(APIC_LVR);
146
147 return GET_APIC_MAXLVT(v);
148 }
149
clear_local_APIC(void)150 void clear_local_APIC(void)
151 {
152 int maxlvt;
153 unsigned long v;
154
155 maxlvt = get_maxlvt();
156
157 /* Work around AMD Erratum 411. This is a nice thing to do anyway. */
158 apic_write(APIC_TMICT, 0);
159
160 /*
161 * Masking an LVT entry on a P6 can trigger a local APIC error
162 * if the vector is zero. Mask LVTERR first to prevent this.
163 */
164 if (maxlvt >= 3) {
165 v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
166 apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
167 }
168 /*
169 * Careful: we have to set masks only first to deassert
170 * any level-triggered sources.
171 */
172 v = apic_read(APIC_LVTT);
173 apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
174 v = apic_read(APIC_LVT0);
175 apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
176 v = apic_read(APIC_LVT1);
177 apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
178 if (maxlvt >= 4) {
179 v = apic_read(APIC_LVTPC);
180 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
181 }
182 if (maxlvt >= 5) {
183 v = apic_read(APIC_LVTTHMR);
184 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
185 }
186 if (maxlvt >= 6) {
187 v = apic_read(APIC_CMCI);
188 apic_write(APIC_CMCI, v | APIC_LVT_MASKED);
189 }
190
191 /*
192 * Clean APIC state for other OSs:
193 */
194 apic_write(APIC_LVTT, APIC_LVT_MASKED);
195 apic_write(APIC_LVT0, APIC_LVT_MASKED);
196 apic_write(APIC_LVT1, APIC_LVT_MASKED);
197 if (maxlvt >= 3)
198 apic_write(APIC_LVTERR, APIC_LVT_MASKED);
199 if (maxlvt >= 4)
200 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
201 if (maxlvt >= 5)
202 apic_write(APIC_LVTTHMR, APIC_LVT_MASKED);
203 if (maxlvt >= 6)
204 apic_write(APIC_CMCI, APIC_LVT_MASKED);
205 if (!x2apic_enabled) {
206 v = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
207 apic_write(APIC_LDR, v);
208 }
209
210 if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */
211 apic_write(APIC_ESR, 0);
212 apic_read(APIC_ESR);
213 }
214
connect_bsp_APIC(void)215 void __init connect_bsp_APIC(void)
216 {
217 if (pic_mode) {
218 /*
219 * Do not trust the local APIC being empty at bootup.
220 */
221 clear_local_APIC();
222 /*
223 * PIC mode, enable APIC mode in the IMCR, i.e.
224 * connect BSP's local APIC to INT and NMI lines.
225 */
226 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
227 "enabling APIC mode.\n");
228 outb(0x70, 0x22);
229 outb(0x01, 0x23);
230 }
231
232 printk("Enabling APIC mode. Using %d I/O APICs\n", nr_ioapics);
233 enable_apic_mode();
234 }
235
disconnect_bsp_APIC(int virt_wire_setup)236 void disconnect_bsp_APIC(int virt_wire_setup)
237 {
238 if (pic_mode) {
239 /*
240 * Put the board back into PIC mode (has an effect
241 * only on certain older boards). Note that APIC
242 * interrupts, including IPIs, won't work beyond
243 * this point! The only exception are INIT IPIs.
244 */
245 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
246 "entering PIC mode.\n");
247 outb(0x70, 0x22);
248 outb(0x00, 0x23);
249 }
250 else {
251 /* Go back to Virtual Wire compatibility mode */
252 unsigned long value;
253
254 clear_local_APIC();
255
256 /* For the spurious interrupt use vector F, and enable it */
257 value = apic_read(APIC_SPIV);
258 value &= ~APIC_VECTOR_MASK;
259 value |= APIC_SPIV_APIC_ENABLED;
260 value |= 0xf;
261 apic_write(APIC_SPIV, value);
262
263 if (!virt_wire_setup) {
264 /* For LVT0 make it edge triggered, active high, external and enabled */
265 value = apic_read(APIC_LVT0);
266 value &= ~(APIC_DM_MASK | APIC_SEND_PENDING |
267 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
268 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
269 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING | APIC_DM_EXTINT;
270 apic_write(APIC_LVT0, value);
271 }
272
273 /* For LVT1 make it edge triggered, active high, nmi and enabled */
274 value = apic_read(APIC_LVT1);
275 value &= ~(
276 APIC_DM_MASK | APIC_SEND_PENDING |
277 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
278 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
279 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING | APIC_DM_NMI;
280 apic_write(APIC_LVT1, value);
281 }
282 }
283
disable_local_APIC(void)284 void disable_local_APIC(void)
285 {
286 clear_local_APIC();
287
288 /*
289 * Disable APIC (implies clearing of registers
290 * for 82489DX!).
291 */
292 apic_write(APIC_SPIV, apic_read(APIC_SPIV) & ~APIC_SPIV_APIC_ENABLED);
293
294 if (enabled_via_apicbase) {
295 uint64_t msr_content;
296 rdmsrl(MSR_APIC_BASE, msr_content);
297 wrmsrl(MSR_APIC_BASE, msr_content &
298 ~(APIC_BASE_ENABLE | APIC_BASE_EXTD));
299 }
300
301 if ( kexecing && (current_local_apic_mode() != apic_boot_mode) )
302 {
303 uint64_t msr_content;
304 rdmsrl(MSR_APIC_BASE, msr_content);
305 msr_content &= ~(APIC_BASE_ENABLE | APIC_BASE_EXTD);
306 wrmsrl(MSR_APIC_BASE, msr_content);
307
308 switch ( apic_boot_mode )
309 {
310 case APIC_MODE_DISABLED:
311 break; /* Nothing to do - we did this above */
312 case APIC_MODE_XAPIC:
313 msr_content |= APIC_BASE_ENABLE;
314 wrmsrl(MSR_APIC_BASE, msr_content);
315 break;
316 case APIC_MODE_X2APIC:
317 msr_content |= APIC_BASE_ENABLE;
318 wrmsrl(MSR_APIC_BASE, msr_content);
319 msr_content |= APIC_BASE_EXTD;
320 wrmsrl(MSR_APIC_BASE, msr_content);
321 break;
322 default:
323 printk("Default case when reverting #%d lapic to boot state\n",
324 smp_processor_id());
325 break;
326 }
327 }
328
329 }
330
331 /*
332 * This is to verify that we're looking at a real local APIC.
333 * Check these against your board if the CPUs aren't getting
334 * started for no apparent reason.
335 */
verify_local_APIC(void)336 int __init verify_local_APIC(void)
337 {
338 unsigned int reg0, reg1;
339
340 /*
341 * The version register is read-only in a real APIC.
342 */
343 reg0 = apic_read(APIC_LVR);
344 apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
345
346 /* We don't try writing LVR in x2APIC mode since that incurs #GP. */
347 if ( !x2apic_enabled )
348 apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
349 reg1 = apic_read(APIC_LVR);
350 apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
351
352 /*
353 * The two version reads above should print the same
354 * numbers. If the second one is different, then we
355 * poke at a non-APIC.
356 */
357 if (reg1 != reg0)
358 return 0;
359
360 /*
361 * Check if the version looks reasonably.
362 */
363 reg1 = GET_APIC_VERSION(reg0);
364 if (reg1 == 0x00 || reg1 == 0xff)
365 return 0;
366 reg1 = get_maxlvt();
367 if (reg1 < 0x02 || reg1 == 0xff)
368 return 0;
369
370 /*
371 * Detecting directed EOI on BSP:
372 * If having directed EOI support in lapic, force to use ioapic_ack_old,
373 * and enable the directed EOI for intr handling.
374 */
375 if ( reg0 & APIC_LVR_DIRECTED_EOI )
376 {
377 if ( ioapic_ack_new && ioapic_ack_forced )
378 printk("Not enabling directed EOI because ioapic_ack_new has been "
379 "forced on the command line\n");
380 else
381 {
382 ioapic_ack_new = false;
383 directed_eoi_enabled = true;
384 printk("Enabled directed EOI with ioapic_ack_old on!\n");
385 }
386 }
387
388 /*
389 * The ID register is read/write in a real APIC.
390 */
391 reg0 = apic_read(APIC_ID);
392 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
393
394 /*
395 * The next two are just to see if we have sane values.
396 * They're only really relevant if we're in Virtual Wire
397 * compatibility mode, but most boxes are anymore.
398 */
399 reg0 = apic_read(APIC_LVT0);
400 apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
401 reg1 = apic_read(APIC_LVT1);
402 apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
403
404 return 1;
405 }
406
sync_Arb_IDs(void)407 void __init sync_Arb_IDs(void)
408 {
409 /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1
410 And not needed on AMD */
411 if (modern_apic())
412 return;
413 /*
414 * Wait for idle.
415 */
416 apic_wait_icr_idle();
417
418 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
419 apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT);
420 }
421
422 /*
423 * An initial setup of the virtual wire mode.
424 */
init_bsp_APIC(void)425 void __init init_bsp_APIC(void)
426 {
427 unsigned long value;
428
429 /*
430 * Don't do the setup now if we have a SMP BIOS as the
431 * through-I/O-APIC virtual wire mode might be active.
432 */
433 if (smp_found_config || !cpu_has_apic)
434 return;
435
436 /*
437 * Do not trust the local APIC being empty at bootup.
438 */
439 clear_local_APIC();
440
441 /*
442 * Enable APIC.
443 */
444 value = apic_read(APIC_SPIV);
445 value &= ~APIC_VECTOR_MASK;
446 value |= APIC_SPIV_APIC_ENABLED;
447
448 /* This bit is reserved on P4/Xeon and should be cleared */
449 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15))
450 value &= ~APIC_SPIV_FOCUS_DISABLED;
451 else
452 value |= APIC_SPIV_FOCUS_DISABLED;
453 value |= SPURIOUS_APIC_VECTOR;
454 apic_write(APIC_SPIV, value);
455
456 /*
457 * Set up the virtual wire mode.
458 */
459 apic_write(APIC_LVT0, APIC_DM_EXTINT);
460 apic_write(APIC_LVT1, APIC_DM_NMI);
461 }
462
apic_pm_activate(void)463 static void apic_pm_activate(void)
464 {
465 apic_pm_state.active = 1;
466 }
467
__enable_x2apic(void)468 static void __enable_x2apic(void)
469 {
470 uint64_t msr_content;
471
472 rdmsrl(MSR_APIC_BASE, msr_content);
473 if ( !(msr_content & APIC_BASE_EXTD) )
474 {
475 msr_content |= APIC_BASE_ENABLE | APIC_BASE_EXTD;
476 msr_content = (uint32_t)msr_content;
477 wrmsrl(MSR_APIC_BASE, msr_content);
478 }
479 }
480
resume_x2apic(void)481 static void resume_x2apic(void)
482 {
483 if ( iommu_x2apic_enabled )
484 iommu_enable_x2apic();
485 __enable_x2apic();
486 }
487
setup_local_APIC(bool bsp)488 void setup_local_APIC(bool bsp)
489 {
490 unsigned long oldvalue, value, maxlvt;
491 int i, j;
492
493 BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
494
495 /*
496 * Double-check whether this APIC is really registered.
497 */
498 if (!apic_id_registered())
499 BUG();
500
501 /*
502 * Intel recommends to set DFR, LDR and TPR before enabling
503 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
504 * document number 292116). So here it goes...
505 */
506 init_apic_ldr();
507
508 /*
509 * Set Task Priority to reject any interrupts below FIRST_IRQ_VECTOR.
510 */
511 apic_write(APIC_TASKPRI, (FIRST_IRQ_VECTOR & 0xF0) - 0x10);
512
513 /*
514 * After a crash, we no longer service the interrupts and a pending
515 * interrupt from previous kernel might still have ISR bit set.
516 *
517 * Most probably by now CPU has serviced that pending interrupt and
518 * it might not have done the ack_APIC_irq() because it thought,
519 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
520 * does not clear the ISR bit and cpu thinks it has already serivced
521 * the interrupt. Hence a vector might get locked. It was noticed
522 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
523 */
524 for (i = APIC_ISR_NR - 1; i >= 0; i--) {
525 value = apic_read(APIC_ISR + i*0x10);
526 for (j = 31; j >= 0; j--) {
527 if (value & (1u << j))
528 ack_APIC_irq();
529 }
530 }
531
532 /*
533 * Now that we are all set up, enable the APIC
534 */
535 value = apic_read(APIC_SPIV);
536 value &= ~APIC_VECTOR_MASK;
537 /*
538 * Enable APIC
539 */
540 value |= APIC_SPIV_APIC_ENABLED;
541
542 /*
543 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
544 * certain networking cards. If high frequency interrupts are
545 * happening on a particular IOAPIC pin, plus the IOAPIC routing
546 * entry is masked/unmasked at a high rate as well then sooner or
547 * later IOAPIC line gets 'stuck', no more interrupts are received
548 * from the device. If focus CPU is disabled then the hang goes
549 * away, oh well :-(
550 *
551 * [ This bug can be reproduced easily with a level-triggered
552 * PCI Ne2000 networking cards and PII/PIII processors, dual
553 * BX chipset. ]
554 */
555 /*
556 * Actually disabling the focus CPU check just makes the hang less
557 * frequent as it makes the interrupt distributon model be more
558 * like LRU than MRU (the short-term load is more even across CPUs).
559 * See also the comment in end_level_ioapic_irq(). --macro
560 */
561 #if 1
562 /* Enable focus processor (bit==0) */
563 value &= ~APIC_SPIV_FOCUS_DISABLED;
564 #else
565 /* Disable focus processor (bit==1) */
566 value |= APIC_SPIV_FOCUS_DISABLED;
567 #endif
568 /*
569 * Set spurious IRQ vector
570 */
571 value |= SPURIOUS_APIC_VECTOR;
572
573 /*
574 * Enable directed EOI
575 */
576 if ( directed_eoi_enabled )
577 {
578 value |= APIC_SPIV_DIRECTED_EOI;
579 if ( bsp )
580 apic_printk(APIC_VERBOSE, "Suppressing EOI broadcast\n");
581 }
582
583 apic_write(APIC_SPIV, value);
584
585 /*
586 * Set up LVT0, LVT1:
587 *
588 * set up through-local-APIC on the BP's LINT0. This is not
589 * strictly necessery in pure symmetric-IO mode, but sometimes
590 * we delegate interrupts to the 8259A.
591 */
592 /*
593 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
594 */
595 value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
596 if (bsp && (pic_mode || !value)) {
597 value = APIC_DM_EXTINT;
598 apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
599 smp_processor_id());
600 } else {
601 value = APIC_DM_EXTINT | APIC_LVT_MASKED;
602 if (bsp)
603 apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
604 smp_processor_id());
605 }
606 apic_write(APIC_LVT0, value);
607
608 /*
609 * only the BP should see the LINT1 NMI signal, obviously.
610 */
611 if (bsp)
612 value = APIC_DM_NMI;
613 else
614 value = APIC_DM_NMI | APIC_LVT_MASKED;
615 apic_write(APIC_LVT1, value);
616
617 maxlvt = get_maxlvt();
618 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
619 apic_write(APIC_ESR, 0);
620 oldvalue = apic_read(APIC_ESR);
621
622 value = ERROR_APIC_VECTOR; // enables sending errors
623 apic_write(APIC_LVTERR, value);
624 /* spec says clear errors after enabling vector. */
625 if (maxlvt > 3)
626 apic_write(APIC_ESR, 0);
627 value = apic_read(APIC_ESR);
628 if (value != oldvalue)
629 apic_printk(APIC_VERBOSE,
630 "ESR value before enabling vector: %#lx after: %#lx\n",
631 oldvalue, value);
632
633 if (nmi_watchdog == NMI_LOCAL_APIC && !bsp)
634 setup_apic_nmi_watchdog();
635 apic_pm_activate();
636 }
637
lapic_suspend(void)638 int lapic_suspend(void)
639 {
640 unsigned long flags;
641 int maxlvt = get_maxlvt();
642 if (!apic_pm_state.active)
643 return 0;
644
645 apic_pm_state.apic_id = apic_read(APIC_ID);
646 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
647 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
648 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
649 apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
650 apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
651 if (maxlvt >= 4)
652 apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
653
654 if (maxlvt >= 6) {
655 apic_pm_state.apic_lvtcmci = apic_read(APIC_CMCI);
656 }
657
658 apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
659 apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
660 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
661 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
662 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
663 if (maxlvt >= 5)
664 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
665
666 local_irq_save(flags);
667 disable_local_APIC();
668 if ( iommu_x2apic_enabled )
669 iommu_disable_x2apic();
670 local_irq_restore(flags);
671 return 0;
672 }
673
lapic_resume(void)674 int lapic_resume(void)
675 {
676 uint64_t msr_content;
677 unsigned long flags;
678 int maxlvt;
679
680 if (!apic_pm_state.active)
681 return 0;
682
683 local_irq_save(flags);
684
685 /*
686 * Make sure the APICBASE points to the right address
687 *
688 * FIXME! This will be wrong if we ever support suspend on
689 * SMP! We'll need to do this as part of the CPU restore!
690 */
691 if ( !x2apic_enabled )
692 {
693 rdmsrl(MSR_APIC_BASE, msr_content);
694 msr_content &= ~APIC_BASE_ADDR_MASK;
695 wrmsrl(MSR_APIC_BASE,
696 msr_content | APIC_BASE_ENABLE | mp_lapic_addr);
697 }
698 else
699 resume_x2apic();
700
701 maxlvt = get_maxlvt();
702 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
703 apic_write(APIC_ID, apic_pm_state.apic_id);
704 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
705 apic_write(APIC_LDR, apic_pm_state.apic_ldr);
706 apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
707 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
708 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
709 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
710 if (maxlvt >= 5)
711 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
712
713 if (maxlvt >= 6) {
714 apic_write(APIC_CMCI, apic_pm_state.apic_lvtcmci);
715 }
716
717 if (maxlvt >= 4)
718 apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
719 apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
720 apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
721 apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
722 apic_write(APIC_ESR, 0);
723 apic_read(APIC_ESR);
724 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
725 apic_write(APIC_ESR, 0);
726 apic_read(APIC_ESR);
727 local_irq_restore(flags);
728 return 0;
729 }
730
731
732 /*
733 * Detect and enable local APICs on non-SMP boards.
734 * Original code written by Keir Fraser.
735 */
736
lapic_disable(const char * str)737 static int __init cf_check lapic_disable(const char *str)
738 {
739 enable_local_apic = -1;
740 setup_clear_cpu_cap(X86_FEATURE_APIC);
741 return 0;
742 }
743 custom_param("nolapic", lapic_disable);
744 boolean_param("lapic", enable_local_apic);
745
apic_set_verbosity(const char * str)746 static int __init cf_check apic_set_verbosity(const char *str)
747 {
748 if (strcmp("debug", str) == 0)
749 apic_verbosity = APIC_DEBUG;
750 else if (strcmp("verbose", str) == 0)
751 apic_verbosity = APIC_VERBOSE;
752 else
753 return -EINVAL;
754
755 return 0;
756 }
757 custom_param("apic_verbosity", apic_set_verbosity);
758
detect_init_APIC(void)759 static int __init detect_init_APIC (void)
760 {
761 uint64_t msr_content;
762
763 /* Disabled by kernel option? */
764 if (enable_local_apic < 0)
765 return -1;
766
767 if ( rdmsr_safe(MSR_APIC_BASE, msr_content) )
768 {
769 printk("No local APIC present\n");
770 return -1;
771 }
772
773 if (!cpu_has_apic) {
774 /*
775 * Over-ride BIOS and try to enable the local
776 * APIC only if "lapic" specified.
777 */
778 if (enable_local_apic <= 0) {
779 printk("Local APIC disabled by BIOS -- "
780 "you can enable it with \"lapic\"\n");
781 return -1;
782 }
783 /*
784 * Some BIOSes disable the local APIC in the
785 * APIC_BASE MSR. This can only be done in
786 * software for Intel P6 or later and AMD K7
787 * (Model > 1) or later.
788 */
789 if ( !(msr_content & APIC_BASE_ENABLE) )
790 {
791 printk("Local APIC disabled by BIOS -- reenabling.\n");
792 msr_content &= ~APIC_BASE_ADDR_MASK;
793 msr_content |= APIC_BASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
794 wrmsrl(MSR_APIC_BASE, msr_content);
795 enabled_via_apicbase = true;
796 }
797 }
798 /*
799 * The APIC feature bit should now be enabled
800 * in `cpuid'
801 */
802 if (!(cpuid_edx(1) & cpufeat_mask(X86_FEATURE_APIC))) {
803 printk("Could not enable APIC!\n");
804 return -1;
805 }
806
807 setup_force_cpu_cap(X86_FEATURE_APIC);
808 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
809
810 /* The BIOS may have set up the APIC at some other address */
811 if ( msr_content & APIC_BASE_ENABLE )
812 mp_lapic_addr = msr_content & APIC_BASE_ADDR_MASK;
813
814 if (nmi_watchdog != NMI_NONE)
815 nmi_watchdog = NMI_LOCAL_APIC;
816
817 printk("Found and enabled local APIC!\n");
818
819 apic_pm_activate();
820
821 return 0;
822 }
823
x2apic_ap_setup(void)824 void x2apic_ap_setup(void)
825 {
826 if ( x2apic_enabled )
827 __enable_x2apic();
828 }
829
x2apic_bsp_setup(void)830 void __init x2apic_bsp_setup(void)
831 {
832 struct IO_APIC_route_entry **ioapic_entries = NULL;
833 bool iommu_x2apic;
834 const char *orig_name;
835
836 if ( !cpu_has_x2apic )
837 return;
838
839 if ( !opt_x2apic )
840 {
841 if ( !x2apic_enabled )
842 {
843 printk("Not enabling x2APIC: disabled by cmdline.\n");
844 return;
845 }
846 printk("x2APIC: Already enabled by BIOS: Ignoring cmdline disable.\n");
847 }
848
849 iommu_x2apic = iommu_supports_x2apic();
850 if ( iommu_x2apic )
851 {
852 if ( (ioapic_entries = alloc_ioapic_entries()) == NULL )
853 {
854 printk("Allocate ioapic_entries failed\n");
855 goto out;
856 }
857
858 if ( save_IO_APIC_setup(ioapic_entries) )
859 {
860 printk("Saving IO-APIC state failed\n");
861 goto out;
862 }
863
864 mask_8259A();
865 mask_IO_APIC_setup(ioapic_entries);
866
867 switch ( iommu_enable_x2apic() )
868 {
869 case 0:
870 iommu_x2apic_enabled = true;
871 break;
872
873 case -ENXIO: /* ACPI_DMAR_X2APIC_OPT_OUT set */
874 if ( x2apic_enabled )
875 panic("IOMMU requests xAPIC mode, but x2APIC already enabled by firmware\n");
876
877 printk("Not enabling x2APIC (upon firmware request)\n");
878 iommu_x2apic_enabled = false;
879 goto restore_out;
880
881 default:
882 printk(XENLOG_ERR "Failed to enable Interrupt Remapping\n");
883 iommu_x2apic_enabled = false;
884 break;
885 }
886
887 if ( iommu_x2apic_enabled )
888 force_iommu = 1;
889 }
890
891 if ( !x2apic_enabled )
892 {
893 x2apic_enabled = true;
894 __enable_x2apic();
895 }
896
897 orig_name = genapic.name;
898 genapic = *apic_x2apic_probe();
899 if ( genapic.name != orig_name )
900 printk("Switched to APIC driver %s\n", genapic.name);
901
902 restore_out:
903 /*
904 * iommu_x2apic_enabled and iommu_supports_x2apic() cannot be used here
905 * in the error case.
906 */
907 if ( iommu_x2apic )
908 {
909 /*
910 * NB: do not use raw mode when restoring entries if the iommu has
911 * been enabled during the process, because the entries need to be
912 * translated and added to the remapping table in that case.
913 */
914 restore_IO_APIC_setup(ioapic_entries, !iommu_x2apic_enabled);
915 unmask_8259A();
916 }
917
918 out:
919 if ( ioapic_entries )
920 free_ioapic_entries(ioapic_entries);
921 }
922
init_apic_mappings(void)923 void __init init_apic_mappings(void)
924 {
925 unsigned long apic_phys;
926
927 if ( x2apic_enabled )
928 goto __next;
929 /*
930 * If no local APIC can be found then set up a fake all
931 * zeroes page to simulate the local APIC and another
932 * one for the IO-APIC.
933 */
934 if (!smp_found_config && detect_init_APIC()) {
935 apic_phys = __pa(alloc_xenheap_page());
936 clear_page(__va(apic_phys));
937 } else
938 apic_phys = mp_lapic_addr;
939
940 set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
941 apic_printk(APIC_VERBOSE, "mapped APIC to %08Lx (%08lx)\n", APIC_BASE,
942 apic_phys);
943
944 __next:
945 /*
946 * Fetch the APIC ID of the BSP in case we have a
947 * default configuration (or the MP table is broken).
948 */
949 if (boot_cpu_physical_apicid == -1U)
950 boot_cpu_physical_apicid = get_apic_id();
951 x86_cpu_to_apicid[0] = get_apic_id();
952
953 ioapic_init();
954 }
955
956 /*****************************************************************************
957 * APIC calibration
958 *
959 * The APIC is programmed in bus cycles.
960 * Timeout values should specified in real time units.
961 * The "cheapest" time source is the cyclecounter.
962 *
963 * Thus, we need a mappings from: bus cycles <- cycle counter <- system time
964 *
965 * The calibration is currently a bit shoddy since it requires the external
966 * timer chip to generate periodic timer interupts.
967 *****************************************************************************/
968
969 /* used for system time scaling */
970 static u32 __read_mostly bus_scale; /* scaling factor: ns -> bus cycles */
971
972 /*
973 * The timer chip is already set up at HZ interrupts per second here,
974 * but we do not accept timer interrupts yet. We only allow the BP
975 * to calibrate.
976 */
get_8254_timer_count(void)977 static unsigned int __init get_8254_timer_count(void)
978 {
979 /*extern spinlock_t i8253_lock;*/
980 /*unsigned long flags;*/
981
982 unsigned int count;
983
984 /*spin_lock_irqsave(&i8253_lock, flags);*/
985
986 outb_p(PIT_LTCH_CH(0), PIT_MODE);
987 count = inb_p(PIT_CH0);
988 count |= inb_p(PIT_CH0) << 8;
989
990 /*spin_unlock_irqrestore(&i8253_lock, flags);*/
991
992 return count;
993 }
994
995 /* next tick in 8254 can be caught by catching timer wraparound */
wait_8254_wraparound(void)996 static void __init wait_8254_wraparound(void)
997 {
998 unsigned int curr_count, prev_count;
999
1000 curr_count = get_8254_timer_count();
1001 do {
1002 prev_count = curr_count;
1003 curr_count = get_8254_timer_count();
1004 } while (prev_count >= curr_count);
1005 }
1006
1007 /*
1008 * This function sets up the local APIC timer, with a timeout of
1009 * 'clocks' APIC bus clock. During calibration we actually call
1010 * this function twice on the boot CPU, once with a bogus timeout
1011 * value, second time for real. The other (noncalibrating) CPUs
1012 * call this function only once, with the real, calibrated value.
1013 */
1014
1015 #define APIC_DIVISOR 1
1016
__setup_APIC_LVTT(unsigned int clocks)1017 static void __setup_APIC_LVTT(unsigned int clocks)
1018 {
1019 unsigned int lvtt_value, tmp_value;
1020
1021 if ( tdt_enabled )
1022 {
1023 lvtt_value = APIC_TIMER_MODE_TSC_DEADLINE | LOCAL_TIMER_VECTOR;
1024 apic_write(APIC_LVTT, lvtt_value);
1025
1026 /*
1027 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
1028 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
1029 * According to Intel, MFENCE can do the serialization here.
1030 */
1031 asm volatile( "mfence" : : : "memory" );
1032
1033 return;
1034 }
1035
1036 /* NB. Xen uses local APIC timer in one-shot mode. */
1037 lvtt_value = APIC_TIMER_MODE_ONESHOT | LOCAL_TIMER_VECTOR;
1038 apic_write(APIC_LVTT, lvtt_value);
1039
1040 tmp_value = apic_read(APIC_TDCR) & ~APIC_TDR_DIV_MASK;
1041 apic_write(APIC_TDCR, tmp_value | PASTE(APIC_TDR_DIV_, APIC_DIVISOR));
1042
1043 apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
1044 }
1045
setup_APIC_timer(void)1046 static void setup_APIC_timer(void)
1047 {
1048 unsigned long flags;
1049 local_irq_save(flags);
1050 __setup_APIC_LVTT(0);
1051 local_irq_restore(flags);
1052 }
1053
1054 #define DEADLINE_MODEL_MATCH(m, fr) \
1055 { .vendor = X86_VENDOR_INTEL, .family = 6, .model = (m), \
1056 .feature = X86_FEATURE_TSC_DEADLINE, \
1057 .driver_data = (void *)(unsigned long)(fr) }
1058
hsx_deadline_rev(void)1059 static unsigned int __init hsx_deadline_rev(void)
1060 {
1061 switch ( boot_cpu_data.x86_mask )
1062 {
1063 case 0x02: return 0x3a; /* EP */
1064 case 0x04: return 0x0f; /* EX */
1065 }
1066
1067 return ~0U;
1068 }
1069
bdx_deadline_rev(void)1070 static unsigned int __init bdx_deadline_rev(void)
1071 {
1072 switch ( boot_cpu_data.x86_mask )
1073 {
1074 case 0x02: return 0x00000011;
1075 case 0x03: return 0x0700000e;
1076 case 0x04: return 0x0f00000c;
1077 case 0x05: return 0x0e000003;
1078 }
1079
1080 return ~0U;
1081 }
1082
skx_deadline_rev(void)1083 static unsigned int __init skx_deadline_rev(void)
1084 {
1085 switch ( boot_cpu_data.x86_mask )
1086 {
1087 case 0x00 ... 0x02: return ~0U;
1088 case 0x03: return 0x01000136;
1089 case 0x04: return 0x02000014;
1090 }
1091
1092 return 0;
1093 }
1094
1095 static const struct x86_cpu_id __initconstrel deadline_match[] = {
1096 DEADLINE_MODEL_MATCH(0x3c, 0x22), /* Haswell */
1097 DEADLINE_MODEL_MATCH(0x3f, hsx_deadline_rev), /* Haswell EP/EX */
1098 DEADLINE_MODEL_MATCH(0x45, 0x20), /* Haswell D */
1099 DEADLINE_MODEL_MATCH(0x46, 0x17), /* Haswell H */
1100
1101 DEADLINE_MODEL_MATCH(0x3d, 0x25), /* Broadwell */
1102 DEADLINE_MODEL_MATCH(0x47, 0x17), /* Broadwell H */
1103 DEADLINE_MODEL_MATCH(0x4f, 0x0b000020), /* Broadwell EP/EX */
1104 DEADLINE_MODEL_MATCH(0x56, bdx_deadline_rev), /* Broadwell D */
1105
1106 DEADLINE_MODEL_MATCH(0x4e, 0xb2), /* Skylake M */
1107 DEADLINE_MODEL_MATCH(0x55, skx_deadline_rev), /* Skylake X */
1108 DEADLINE_MODEL_MATCH(0x5e, 0xb2), /* Skylake D */
1109
1110 DEADLINE_MODEL_MATCH(0x8e, 0x52), /* Kabylake M */
1111 DEADLINE_MODEL_MATCH(0x9e, 0x52), /* Kabylake D */
1112
1113 {}
1114 };
1115
check_deadline_errata(void)1116 static void __init check_deadline_errata(void)
1117 {
1118 const struct x86_cpu_id *m;
1119 unsigned int rev;
1120
1121 if ( cpu_has_hypervisor )
1122 return;
1123
1124 m = x86_match_cpu(deadline_match);
1125 if ( !m )
1126 return;
1127
1128 /*
1129 * Function pointers will have the MSB set due to address layout,
1130 * immediate revisions will not.
1131 */
1132 if ( (long)m->driver_data < 0 )
1133 rev = ((unsigned int (*)(void))(m->driver_data))();
1134 else
1135 rev = (unsigned long)m->driver_data;
1136
1137 if ( this_cpu(cpu_sig).rev >= rev )
1138 return;
1139
1140 setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE);
1141 printk(XENLOG_WARNING "TSC_DEADLINE disabled due to Errata; "
1142 "please update microcode to version %#x (or later)\n", rev);
1143 }
1144
apic_tmcct_read(void)1145 uint32_t __init apic_tmcct_read(void)
1146 {
1147 if ( x2apic_enabled )
1148 {
1149 /*
1150 * Have a barrier here just like in rdtsc_ordered() as it's
1151 * unclear whether this non-serializing RDMSR also can be
1152 * executed speculatively (like RDTSC can).
1153 */
1154 alternative("lfence", "mfence", X86_FEATURE_MFENCE_RDTSC);
1155 return apic_rdmsr(APIC_TMCCT);
1156 }
1157
1158 return apic_mem_read(APIC_TMCCT);
1159 }
1160
1161 /*
1162 * In this function we calibrate APIC bus clocks to the external
1163 * timer. Unfortunately we cannot use jiffies and the timer irq
1164 * to calibrate, since some later bootup code depends on getting
1165 * the first irq? Ugh.
1166 *
1167 * We want to do the calibration only once since we
1168 * want to have local timer irqs syncron. CPUs connected
1169 * by the same APIC bus have the very same bus frequency.
1170 * And we want to have irqs off anyways, no accidental
1171 * APIC irq that way.
1172 */
1173
1174 #define BUS_SCALE_SHIFT 18
1175
calibrate_APIC_clock(void)1176 static void __init calibrate_APIC_clock(void)
1177 {
1178 unsigned long bus_freq; /* KAF: pointer-size avoids compile warns. */
1179 unsigned int bus_cycle; /* length of one bus cycle in pico-seconds */
1180 #define LOOPS_FRAC 10U /* measure for one tenth of a second */
1181
1182 apic_printk(APIC_VERBOSE, "calibrating APIC timer ...\n");
1183
1184 /*
1185 * Setup the APIC counter to maximum. There is no way the lapic
1186 * can underflow in the 100ms detection time frame.
1187 */
1188 __setup_APIC_LVTT(0xffffffffU);
1189
1190 bus_freq = calibrate_apic_timer();
1191 if ( !bus_freq )
1192 {
1193 unsigned int i, tt1, tt2;
1194 unsigned long t1, t2;
1195
1196 ASSERT(!xen_guest);
1197
1198 /*
1199 * The timer chip counts down to zero. Let's wait for a wraparound to
1200 * start exact measurement (the current tick might have been already
1201 * half done):
1202 */
1203 wait_8254_wraparound();
1204
1205 /* We wrapped around just now. Let's start: */
1206 t1 = rdtsc_ordered();
1207 tt1 = apic_read(APIC_TMCCT);
1208
1209 /* Let's wait HZ / LOOPS_FRAC ticks: */
1210 for ( i = 0; i < HZ / LOOPS_FRAC; ++i )
1211 wait_8254_wraparound();
1212
1213 t2 = rdtsc_ordered();
1214 tt2 = apic_read(APIC_TMCCT);
1215
1216 bus_freq = (tt1 - tt2) * APIC_DIVISOR * LOOPS_FRAC;
1217
1218 apic_printk(APIC_VERBOSE, "..... CPU clock speed is %lu.%04lu MHz.\n",
1219 ((t2 - t1) * LOOPS_FRAC) / 1000000,
1220 (((t2 - t1) * LOOPS_FRAC) / 100) % 10000);
1221 }
1222
1223 apic_printk(APIC_VERBOSE, "..... host bus clock speed is %ld.%04ld MHz.\n",
1224 bus_freq / 1000000, (bus_freq / 100) % 10000);
1225
1226 /* set up multipliers for accurate timer code */
1227 bus_cycle = 1000000000000UL / bus_freq; /* in pico seconds */
1228 bus_cycle += (1000000000000UL % bus_freq) * 2 > bus_freq;
1229 bus_scale = (1000 << BUS_SCALE_SHIFT) / bus_cycle;
1230 bus_scale += ((1000 << BUS_SCALE_SHIFT) % bus_cycle) * 2 > bus_cycle;
1231
1232 apic_printk(APIC_VERBOSE, "..... bus_scale = %#x\n", bus_scale);
1233 /* reset APIC to zero timeout value */
1234 __setup_APIC_LVTT(0);
1235
1236 #undef LOOPS_FRAC
1237 }
1238
setup_boot_APIC_clock(void)1239 void __init setup_boot_APIC_clock(void)
1240 {
1241 unsigned long flags;
1242 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
1243 using_apic_timer = true;
1244
1245 check_deadline_errata();
1246
1247 if ( !boot_cpu_has(X86_FEATURE_TSC_DEADLINE) )
1248 tdt_enable = false;
1249
1250 local_irq_save(flags);
1251
1252 if ( !tdt_enable || apic_verbosity )
1253 calibrate_APIC_clock();
1254
1255 if ( tdt_enable )
1256 {
1257 printk(KERN_DEBUG "TSC deadline timer enabled\n");
1258 tdt_enabled = true;
1259 }
1260
1261 setup_APIC_timer();
1262
1263 local_irq_restore(flags);
1264 }
1265
setup_secondary_APIC_clock(void)1266 void setup_secondary_APIC_clock(void)
1267 {
1268 setup_APIC_timer();
1269 }
1270
disable_APIC_timer(void)1271 void disable_APIC_timer(void)
1272 {
1273 if (using_apic_timer) {
1274 unsigned long v;
1275
1276 /* Work around AMD Erratum 411. This is a nice thing to do anyway. */
1277 apic_write(APIC_TMICT, 0);
1278
1279 v = apic_read(APIC_LVTT);
1280 apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
1281 }
1282 }
1283
enable_APIC_timer(void)1284 void enable_APIC_timer(void)
1285 {
1286 if (using_apic_timer) {
1287 unsigned long v;
1288
1289 v = apic_read(APIC_LVTT);
1290 apic_write(APIC_LVTT, v & ~APIC_LVT_MASKED);
1291 }
1292 }
1293
1294 #undef APIC_DIVISOR
1295
1296 /*
1297 * reprogram_timer: Reprogram the APIC timer.
1298 * Timeout is a Xen system time (nanoseconds since boot); 0 disables the timer.
1299 * Returns 1 on success; 0 if the timeout is too soon or is in the past.
1300 */
reprogram_timer(s_time_t timeout)1301 int reprogram_timer(s_time_t timeout)
1302 {
1303 s_time_t expire;
1304 u32 apic_tmict = 0;
1305
1306 /* No local APIC: timer list is polled via the PIT interrupt. */
1307 if ( !cpu_has_apic )
1308 return 1;
1309
1310 if ( tdt_enabled )
1311 {
1312 wrmsrl(MSR_IA32_TSC_DEADLINE, timeout ? stime2tsc(timeout) : 0);
1313 return 1;
1314 }
1315
1316 if ( timeout && ((expire = timeout - NOW()) > 0) )
1317 apic_tmict = min_t(uint64_t, (bus_scale * expire) >> BUS_SCALE_SHIFT,
1318 UINT32_MAX);
1319
1320 apic_write(APIC_TMICT, (unsigned long)apic_tmict);
1321
1322 return apic_tmict || !timeout;
1323 }
1324
apic_timer_interrupt(void)1325 static void cf_check apic_timer_interrupt(void)
1326 {
1327 ack_APIC_irq();
1328 perfc_incr(apic_timer);
1329 raise_softirq(TIMER_SOFTIRQ);
1330 }
1331
1332 static DEFINE_PER_CPU(bool, state_dump_pending);
1333
smp_send_state_dump(unsigned int cpu)1334 void smp_send_state_dump(unsigned int cpu)
1335 {
1336 /* We overload the spurious interrupt handler to handle the dump. */
1337 per_cpu(state_dump_pending, cpu) = true;
1338 send_IPI_mask(cpumask_of(cpu), SPURIOUS_APIC_VECTOR);
1339 }
1340
1341 /*
1342 * Spurious interrupts should _never_ happen with our APIC/SMP architecture.
1343 */
spurious_interrupt(void)1344 static void cf_check spurious_interrupt(void)
1345 {
1346 /*
1347 * Check if this is a vectored interrupt (most likely, as this is probably
1348 * a request to dump local CPU state or to continue NMI handling).
1349 * Vectored interrupts are ACKed; spurious interrupts are not.
1350 */
1351 if (apic_isr_read(SPURIOUS_APIC_VECTOR)) {
1352 bool is_spurious;
1353
1354 ack_APIC_irq();
1355 is_spurious = !nmi_check_continuation();
1356 if (this_cpu(state_dump_pending)) {
1357 this_cpu(state_dump_pending) = false;
1358 dump_execstate(get_irq_regs());
1359 is_spurious = false;
1360 }
1361
1362 if ( !is_spurious )
1363 return;
1364 }
1365
1366 /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1367 printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should "
1368 "never happen.\n", smp_processor_id());
1369 }
1370
1371 /*
1372 * This interrupt should never happen with our APIC/SMP architecture
1373 */
1374
error_interrupt(void)1375 static void cf_check error_interrupt(void)
1376 {
1377 static const char *const esr_fields[] = {
1378 ", Send CS error",
1379 ", Receive CS error",
1380 ", Send accept error",
1381 ", Receive accept error",
1382 ", Redirectable IPI",
1383 ", Send illegal vector",
1384 ", Received illegal vector",
1385 ", Illegal register address",
1386 };
1387 const char *entries[ARRAY_SIZE(esr_fields)];
1388 unsigned int v, v1;
1389 unsigned int i;
1390
1391 /* First tickle the hardware, only then report what went on. -- REW */
1392 v = apic_read(APIC_ESR);
1393 apic_write(APIC_ESR, 0);
1394 v1 = apic_read(APIC_ESR);
1395 ack_APIC_irq();
1396
1397 for ( i = 0; i < ARRAY_SIZE(entries); ++i )
1398 entries[i] = v1 & (1 << i) ? esr_fields[i] : "";
1399 printk(XENLOG_DEBUG
1400 "APIC error on CPU%u: %02x(%02x)%s%s%s%s%s%s%s%s\n",
1401 smp_processor_id(), v, v1,
1402 entries[7], entries[6], entries[5], entries[4],
1403 entries[3], entries[2], entries[1], entries[0]);
1404 }
1405
1406 /*
1407 * This interrupt handles performance counters interrupt
1408 */
1409
pmu_interrupt(void)1410 static void cf_check pmu_interrupt(void)
1411 {
1412 ack_APIC_irq();
1413 vpmu_do_interrupt();
1414 }
1415
apic_intr_init(void)1416 void __init apic_intr_init(void)
1417 {
1418 smp_intr_init();
1419
1420 /* self generated IPI for local APIC timer */
1421 set_direct_apic_vector(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
1422
1423 /* IPI vectors for APIC spurious and error interrupts */
1424 set_direct_apic_vector(SPURIOUS_APIC_VECTOR, spurious_interrupt);
1425 set_direct_apic_vector(ERROR_APIC_VECTOR, error_interrupt);
1426
1427 /* Performance Counters Interrupt */
1428 set_direct_apic_vector(PMU_APIC_VECTOR, pmu_interrupt);
1429 }
1430
1431 /*
1432 * This initializes the IO-APIC and APIC hardware if this is
1433 * a UP kernel.
1434 */
APIC_init_uniprocessor(void)1435 int __init APIC_init_uniprocessor (void)
1436 {
1437 if (enable_local_apic < 0)
1438 setup_clear_cpu_cap(X86_FEATURE_APIC);
1439
1440 if (!smp_found_config && !cpu_has_apic) {
1441 skip_ioapic_setup = true;
1442 return -1;
1443 }
1444
1445 /*
1446 * Complain if the BIOS pretends there is one.
1447 */
1448 if (!cpu_has_apic) {
1449 printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
1450 boot_cpu_physical_apicid);
1451 skip_ioapic_setup = true;
1452 return -1;
1453 }
1454
1455 verify_local_APIC();
1456
1457 connect_bsp_APIC();
1458
1459 /*
1460 * Hack: In case of kdump, after a crash, kernel might be booting
1461 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
1462 * might be zero if read from MP tables. Get it from LAPIC.
1463 */
1464 #ifdef CONFIG_CRASH_DUMP
1465 boot_cpu_physical_apicid = get_apic_id();
1466 #endif
1467 physids_clear(phys_cpu_present_map);
1468 physid_set(boot_cpu_physical_apicid, phys_cpu_present_map);
1469
1470 if ( !skip_ioapic_setup && nr_ioapics )
1471 /* Sanitize the IO-APIC pins before enabling the lapic LVTERR/ESR. */
1472 enable_IO_APIC();
1473
1474 setup_local_APIC(true);
1475
1476 if (nmi_watchdog == NMI_LOCAL_APIC)
1477 check_nmi_watchdog();
1478
1479 if (smp_found_config)
1480 if (!skip_ioapic_setup && nr_ioapics)
1481 setup_IO_APIC();
1482
1483 setup_boot_APIC_clock();
1484
1485 return 0;
1486 }
1487
apic_mode_to_str(const enum apic_mode mode)1488 static const char * __init apic_mode_to_str(const enum apic_mode mode)
1489 {
1490 switch ( mode )
1491 {
1492 case APIC_MODE_INVALID:
1493 return "invalid";
1494 case APIC_MODE_DISABLED:
1495 return "disabled";
1496 case APIC_MODE_XAPIC:
1497 return "xapic";
1498 case APIC_MODE_X2APIC:
1499 return "x2apic";
1500 default:
1501 return "unrecognised";
1502 }
1503 }
1504
1505 /* Needs to be called during startup. It records the state the BIOS
1506 * leaves the local APIC so we can undo upon kexec.
1507 */
record_boot_APIC_mode(void)1508 void __init record_boot_APIC_mode(void)
1509 {
1510 /* Sanity check - we should only ever run once, but could possibly
1511 * be called several times */
1512 if ( APIC_MODE_INVALID != apic_boot_mode )
1513 return;
1514
1515 apic_boot_mode = current_local_apic_mode();
1516
1517 apic_printk(APIC_DEBUG, "APIC boot state is '%s'\n",
1518 apic_mode_to_str(apic_boot_mode));
1519 }
1520
1521 /* Look at the bits in MSR_APIC_BASE and work out which APIC mode we are in */
current_local_apic_mode(void)1522 enum apic_mode current_local_apic_mode(void)
1523 {
1524 u64 msr_contents;
1525
1526 rdmsrl(MSR_APIC_BASE, msr_contents);
1527
1528 /* Reading EXTD bit from the MSR is only valid if CPUID
1529 * says so, else reserved */
1530 if ( boot_cpu_has(X86_FEATURE_X2APIC) && (msr_contents & APIC_BASE_EXTD) )
1531 return APIC_MODE_X2APIC;
1532
1533 /* EN bit should always be valid as long as we can read the MSR
1534 */
1535 if ( msr_contents & APIC_BASE_ENABLE )
1536 return APIC_MODE_XAPIC;
1537
1538 return APIC_MODE_DISABLED;
1539 }
1540
1541
check_for_unexpected_msi(unsigned int vector)1542 void check_for_unexpected_msi(unsigned int vector)
1543 {
1544 BUG_ON(apic_isr_read(vector));
1545 }
1546