1 /*
2  *      based on linux-2.6.17.13/arch/i386/kernel/apic.c
3  *
4  *  Local APIC handling, local APIC timers
5  *
6  *  (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
7  *
8  *  Fixes
9  *  Maciej W. Rozycki   :   Bits for genuine 82489DX APICs;
10  *                  thanks to Eric Gilmore
11  *                  and Rolf G. Tews
12  *                  for testing these extensively.
13  *    Maciej W. Rozycki :   Various updates and fixes.
14  *    Mikael Pettersson :   Power Management for UP-APIC.
15  *    Pavel Machek and
16  *    Mikael Pettersson    :    PM converted to driver model.
17  */
18 
19 #include <xen/perfc.h>
20 #include <xen/errno.h>
21 #include <xen/init.h>
22 #include <xen/mm.h>
23 #include <xen/param.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/delay.h>
27 #include <xen/smp.h>
28 #include <xen/softirq.h>
29 #include <asm/mc146818rtc.h>
30 #include <asm/microcode.h>
31 #include <asm/msr.h>
32 #include <asm/atomic.h>
33 #include <asm/mpspec.h>
34 #include <asm/flushtlb.h>
35 #include <asm/hardirq.h>
36 #include <asm/apic.h>
37 #include <asm/io_apic.h>
38 #include <mach_apic.h>
39 #include <io_ports.h>
40 #include <irq_vectors.h>
41 #include <xen/kexec.h>
42 #include <asm/guest.h>
43 #include <asm/nmi.h>
44 #include <asm/time.h>
45 
46 static bool __read_mostly tdt_enabled;
47 static bool __initdata tdt_enable = true;
48 boolean_param("tdt", tdt_enable);
49 
50 bool __read_mostly iommu_x2apic_enabled;
51 
52 static struct {
53     int active;
54     /* r/w apic fields */
55     unsigned int apic_id;
56     unsigned int apic_taskpri;
57     unsigned int apic_ldr;
58     unsigned int apic_dfr;
59     unsigned int apic_spiv;
60     unsigned int apic_lvtt;
61     unsigned int apic_lvtpc;
62     unsigned int apic_lvtcmci;
63     unsigned int apic_lvt0;
64     unsigned int apic_lvt1;
65     unsigned int apic_lvterr;
66     unsigned int apic_tmict;
67     unsigned int apic_tdcr;
68     unsigned int apic_thmr;
69 } apic_pm_state;
70 
71 /*
72  * Knob to control our willingness to enable the local APIC.
73  */
74 static s8 __initdata enable_local_apic; /* -1=force-disable, +1=force-enable */
75 
76 /*
77  * Debug level
78  */
79 u8 __read_mostly apic_verbosity;
80 
81 static bool __initdata opt_x2apic = true;
82 boolean_param("x2apic", opt_x2apic);
83 
84 /*
85  * Bootstrap processor local APIC boot mode - so we can undo our changes
86  * to the APIC state.
87  */
88 static enum apic_mode apic_boot_mode = APIC_MODE_INVALID;
89 
90 bool __read_mostly x2apic_enabled;
91 bool __read_mostly directed_eoi_enabled;
92 
modern_apic(void)93 static int modern_apic(void)
94 {
95     unsigned int lvr, version;
96     /* AMD systems use old APIC versions, so check the CPU */
97     if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
98         boot_cpu_data.x86 >= 0xf)
99         return 1;
100 
101     /* Hygon systems use modern APIC */
102     if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
103         return 1;
104 
105     lvr = apic_read(APIC_LVR);
106     version = GET_APIC_VERSION(lvr);
107     return version >= 0x14;
108 }
109 
110 /*
111  * 'what should we do if we get a hw irq event on an illegal vector'.
112  * each architecture has to answer this themselves.
113  */
ack_bad_irq(unsigned int irq)114 void ack_bad_irq(unsigned int irq)
115 {
116     printk("unexpected IRQ trap at irq %02x\n", irq);
117     /*
118      * Currently unexpected vectors happen only on SMP and APIC.
119      * We _must_ ack these because every local APIC has only N
120      * irq slots per priority level, and a 'hanging, unacked' IRQ
121      * holds up an irq slot - in excessive cases (when multiple
122      * unexpected vectors occur) that might lock up the APIC
123      * completely.
124      * But only ack when the APIC is enabled -AK
125      */
126     if (cpu_has_apic)
127         ack_APIC_irq();
128 }
129 
130 /* Using APIC to generate smp_local_timer_interrupt? */
131 static bool __read_mostly using_apic_timer;
132 
133 static bool __read_mostly enabled_via_apicbase;
134 
get_physical_broadcast(void)135 int get_physical_broadcast(void)
136 {
137     if (modern_apic())
138         return 0xff;
139     else
140         return 0xf;
141 }
142 
get_maxlvt(void)143 int get_maxlvt(void)
144 {
145     unsigned int v = apic_read(APIC_LVR);
146 
147     return GET_APIC_MAXLVT(v);
148 }
149 
clear_local_APIC(void)150 void clear_local_APIC(void)
151 {
152     int maxlvt;
153     unsigned long v;
154 
155     maxlvt = get_maxlvt();
156 
157     /* Work around AMD Erratum 411. This is a nice thing to do anyway. */
158     apic_write(APIC_TMICT, 0);
159 
160     /*
161      * Masking an LVT entry on a P6 can trigger a local APIC error
162      * if the vector is zero. Mask LVTERR first to prevent this.
163      */
164     if (maxlvt >= 3) {
165         v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
166         apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
167     }
168     /*
169      * Careful: we have to set masks only first to deassert
170      * any level-triggered sources.
171      */
172     v = apic_read(APIC_LVTT);
173     apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
174     v = apic_read(APIC_LVT0);
175     apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
176     v = apic_read(APIC_LVT1);
177     apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
178     if (maxlvt >= 4) {
179         v = apic_read(APIC_LVTPC);
180         apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
181     }
182     if (maxlvt >= 5) {
183         v = apic_read(APIC_LVTTHMR);
184         apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
185     }
186     if (maxlvt >= 6) {
187         v = apic_read(APIC_CMCI);
188         apic_write(APIC_CMCI, v | APIC_LVT_MASKED);
189     }
190 
191     /*
192      * Clean APIC state for other OSs:
193      */
194     apic_write(APIC_LVTT, APIC_LVT_MASKED);
195     apic_write(APIC_LVT0, APIC_LVT_MASKED);
196     apic_write(APIC_LVT1, APIC_LVT_MASKED);
197     if (maxlvt >= 3)
198         apic_write(APIC_LVTERR, APIC_LVT_MASKED);
199     if (maxlvt >= 4)
200         apic_write(APIC_LVTPC, APIC_LVT_MASKED);
201     if (maxlvt >= 5)
202         apic_write(APIC_LVTTHMR, APIC_LVT_MASKED);
203     if (maxlvt >= 6)
204         apic_write(APIC_CMCI, APIC_LVT_MASKED);
205     if (!x2apic_enabled) {
206         v = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
207         apic_write(APIC_LDR, v);
208     }
209 
210     if (maxlvt > 3)        /* Due to Pentium errata 3AP and 11AP. */
211         apic_write(APIC_ESR, 0);
212     apic_read(APIC_ESR);
213 }
214 
connect_bsp_APIC(void)215 void __init connect_bsp_APIC(void)
216 {
217     if (pic_mode) {
218         /*
219          * Do not trust the local APIC being empty at bootup.
220          */
221         clear_local_APIC();
222         /*
223          * PIC mode, enable APIC mode in the IMCR, i.e.
224          * connect BSP's local APIC to INT and NMI lines.
225          */
226         apic_printk(APIC_VERBOSE, "leaving PIC mode, "
227                     "enabling APIC mode.\n");
228         outb(0x70, 0x22);
229         outb(0x01, 0x23);
230     }
231 
232     printk("Enabling APIC mode.  Using %d I/O APICs\n", nr_ioapics);
233     enable_apic_mode();
234 }
235 
disconnect_bsp_APIC(int virt_wire_setup)236 void disconnect_bsp_APIC(int virt_wire_setup)
237 {
238     if (pic_mode) {
239         /*
240          * Put the board back into PIC mode (has an effect
241          * only on certain older boards).  Note that APIC
242          * interrupts, including IPIs, won't work beyond
243          * this point!  The only exception are INIT IPIs.
244          */
245         apic_printk(APIC_VERBOSE, "disabling APIC mode, "
246                     "entering PIC mode.\n");
247         outb(0x70, 0x22);
248         outb(0x00, 0x23);
249     }
250     else {
251         /* Go back to Virtual Wire compatibility mode */
252         unsigned long value;
253 
254         clear_local_APIC();
255 
256         /* For the spurious interrupt use vector F, and enable it */
257         value = apic_read(APIC_SPIV);
258         value &= ~APIC_VECTOR_MASK;
259         value |= APIC_SPIV_APIC_ENABLED;
260         value |= 0xf;
261         apic_write(APIC_SPIV, value);
262 
263         if (!virt_wire_setup) {
264             /* For LVT0 make it edge triggered, active high, external and enabled */
265             value = apic_read(APIC_LVT0);
266             value &= ~(APIC_DM_MASK | APIC_SEND_PENDING |
267                        APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
268                        APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED );
269             value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING | APIC_DM_EXTINT;
270             apic_write(APIC_LVT0, value);
271         }
272 
273         /* For LVT1 make it edge triggered, active high, nmi and enabled */
274         value = apic_read(APIC_LVT1);
275         value &= ~(
276             APIC_DM_MASK | APIC_SEND_PENDING |
277             APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
278             APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
279         value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING | APIC_DM_NMI;
280         apic_write(APIC_LVT1, value);
281     }
282 }
283 
disable_local_APIC(void)284 void disable_local_APIC(void)
285 {
286     clear_local_APIC();
287 
288     /*
289      * Disable APIC (implies clearing of registers
290      * for 82489DX!).
291      */
292     apic_write(APIC_SPIV, apic_read(APIC_SPIV) & ~APIC_SPIV_APIC_ENABLED);
293 
294     if (enabled_via_apicbase) {
295         uint64_t msr_content;
296         rdmsrl(MSR_APIC_BASE, msr_content);
297         wrmsrl(MSR_APIC_BASE, msr_content &
298                ~(APIC_BASE_ENABLE | APIC_BASE_EXTD));
299     }
300 
301     if ( kexecing && (current_local_apic_mode() != apic_boot_mode) )
302     {
303         uint64_t msr_content;
304         rdmsrl(MSR_APIC_BASE, msr_content);
305         msr_content &= ~(APIC_BASE_ENABLE | APIC_BASE_EXTD);
306         wrmsrl(MSR_APIC_BASE, msr_content);
307 
308         switch ( apic_boot_mode )
309         {
310         case APIC_MODE_DISABLED:
311             break; /* Nothing to do - we did this above */
312         case APIC_MODE_XAPIC:
313             msr_content |= APIC_BASE_ENABLE;
314             wrmsrl(MSR_APIC_BASE, msr_content);
315             break;
316         case APIC_MODE_X2APIC:
317             msr_content |= APIC_BASE_ENABLE;
318             wrmsrl(MSR_APIC_BASE, msr_content);
319             msr_content |= APIC_BASE_EXTD;
320             wrmsrl(MSR_APIC_BASE, msr_content);
321             break;
322         default:
323             printk("Default case when reverting #%d lapic to boot state\n",
324                    smp_processor_id());
325             break;
326         }
327     }
328 
329 }
330 
331 /*
332  * This is to verify that we're looking at a real local APIC.
333  * Check these against your board if the CPUs aren't getting
334  * started for no apparent reason.
335  */
verify_local_APIC(void)336 int __init verify_local_APIC(void)
337 {
338     unsigned int reg0, reg1;
339 
340     /*
341      * The version register is read-only in a real APIC.
342      */
343     reg0 = apic_read(APIC_LVR);
344     apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
345 
346     /* We don't try writing LVR in x2APIC mode since that incurs #GP. */
347     if ( !x2apic_enabled )
348         apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
349     reg1 = apic_read(APIC_LVR);
350     apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
351 
352     /*
353      * The two version reads above should print the same
354      * numbers.  If the second one is different, then we
355      * poke at a non-APIC.
356      */
357     if (reg1 != reg0)
358         return 0;
359 
360     /*
361      * Check if the version looks reasonably.
362      */
363     reg1 = GET_APIC_VERSION(reg0);
364     if (reg1 == 0x00 || reg1 == 0xff)
365         return 0;
366     reg1 = get_maxlvt();
367     if (reg1 < 0x02 || reg1 == 0xff)
368         return 0;
369 
370     /*
371      * Detecting directed EOI on BSP:
372      * If having directed EOI support in lapic, force to use ioapic_ack_old,
373      * and enable the directed EOI for intr handling.
374      */
375     if ( reg0 & APIC_LVR_DIRECTED_EOI )
376     {
377         if ( ioapic_ack_new && ioapic_ack_forced )
378             printk("Not enabling directed EOI because ioapic_ack_new has been "
379                    "forced on the command line\n");
380         else
381         {
382             ioapic_ack_new = false;
383             directed_eoi_enabled = true;
384             printk("Enabled directed EOI with ioapic_ack_old on!\n");
385         }
386     }
387 
388     /*
389      * The ID register is read/write in a real APIC.
390      */
391     reg0 = apic_read(APIC_ID);
392     apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
393 
394     /*
395      * The next two are just to see if we have sane values.
396      * They're only really relevant if we're in Virtual Wire
397      * compatibility mode, but most boxes are anymore.
398      */
399     reg0 = apic_read(APIC_LVT0);
400     apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
401     reg1 = apic_read(APIC_LVT1);
402     apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
403 
404     return 1;
405 }
406 
sync_Arb_IDs(void)407 void __init sync_Arb_IDs(void)
408 {
409     /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1
410        And not needed on AMD */
411     if (modern_apic())
412         return;
413     /*
414      * Wait for idle.
415      */
416     apic_wait_icr_idle();
417 
418     apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
419     apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT);
420 }
421 
422 /*
423  * An initial setup of the virtual wire mode.
424  */
init_bsp_APIC(void)425 void __init init_bsp_APIC(void)
426 {
427     unsigned long value;
428 
429     /*
430      * Don't do the setup now if we have a SMP BIOS as the
431      * through-I/O-APIC virtual wire mode might be active.
432      */
433     if (smp_found_config || !cpu_has_apic)
434         return;
435 
436     /*
437      * Do not trust the local APIC being empty at bootup.
438      */
439     clear_local_APIC();
440 
441     /*
442      * Enable APIC.
443      */
444     value = apic_read(APIC_SPIV);
445     value &= ~APIC_VECTOR_MASK;
446     value |= APIC_SPIV_APIC_ENABLED;
447 
448     /* This bit is reserved on P4/Xeon and should be cleared */
449     if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 15))
450         value &= ~APIC_SPIV_FOCUS_DISABLED;
451     else
452         value |= APIC_SPIV_FOCUS_DISABLED;
453     value |= SPURIOUS_APIC_VECTOR;
454     apic_write(APIC_SPIV, value);
455 
456     /*
457      * Set up the virtual wire mode.
458      */
459     apic_write(APIC_LVT0, APIC_DM_EXTINT);
460     apic_write(APIC_LVT1, APIC_DM_NMI);
461 }
462 
apic_pm_activate(void)463 static void apic_pm_activate(void)
464 {
465     apic_pm_state.active = 1;
466 }
467 
__enable_x2apic(void)468 static void __enable_x2apic(void)
469 {
470     uint64_t msr_content;
471 
472     rdmsrl(MSR_APIC_BASE, msr_content);
473     if ( !(msr_content & APIC_BASE_EXTD) )
474     {
475         msr_content |= APIC_BASE_ENABLE | APIC_BASE_EXTD;
476         msr_content = (uint32_t)msr_content;
477         wrmsrl(MSR_APIC_BASE, msr_content);
478     }
479 }
480 
resume_x2apic(void)481 static void resume_x2apic(void)
482 {
483     if ( iommu_x2apic_enabled )
484         iommu_enable_x2apic();
485     __enable_x2apic();
486 }
487 
setup_local_APIC(bool bsp)488 void setup_local_APIC(bool bsp)
489 {
490     unsigned long oldvalue, value, maxlvt;
491     int i, j;
492 
493     BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f);
494 
495     /*
496      * Double-check whether this APIC is really registered.
497      */
498     if (!apic_id_registered())
499         BUG();
500 
501     /*
502      * Intel recommends to set DFR, LDR and TPR before enabling
503      * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
504      * document number 292116).  So here it goes...
505      */
506     init_apic_ldr();
507 
508     /*
509      * Set Task Priority to reject any interrupts below FIRST_IRQ_VECTOR.
510      */
511     apic_write(APIC_TASKPRI, (FIRST_IRQ_VECTOR & 0xF0) - 0x10);
512 
513     /*
514      * After a crash, we no longer service the interrupts and a pending
515      * interrupt from previous kernel might still have ISR bit set.
516      *
517      * Most probably by now CPU has serviced that pending interrupt and
518      * it might not have done the ack_APIC_irq() because it thought,
519      * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
520      * does not clear the ISR bit and cpu thinks it has already serivced
521      * the interrupt. Hence a vector might get locked. It was noticed
522      * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
523      */
524     for (i = APIC_ISR_NR - 1; i >= 0; i--) {
525         value = apic_read(APIC_ISR + i*0x10);
526         for (j = 31; j >= 0; j--) {
527             if (value & (1u << j))
528                 ack_APIC_irq();
529         }
530     }
531 
532     /*
533      * Now that we are all set up, enable the APIC
534      */
535     value = apic_read(APIC_SPIV);
536     value &= ~APIC_VECTOR_MASK;
537     /*
538      * Enable APIC
539      */
540     value |= APIC_SPIV_APIC_ENABLED;
541 
542     /*
543      * Some unknown Intel IO/APIC (or APIC) errata is biting us with
544      * certain networking cards. If high frequency interrupts are
545      * happening on a particular IOAPIC pin, plus the IOAPIC routing
546      * entry is masked/unmasked at a high rate as well then sooner or
547      * later IOAPIC line gets 'stuck', no more interrupts are received
548      * from the device. If focus CPU is disabled then the hang goes
549      * away, oh well :-(
550      *
551      * [ This bug can be reproduced easily with a level-triggered
552      *   PCI Ne2000 networking cards and PII/PIII processors, dual
553      *   BX chipset. ]
554      */
555     /*
556      * Actually disabling the focus CPU check just makes the hang less
557      * frequent as it makes the interrupt distributon model be more
558      * like LRU than MRU (the short-term load is more even across CPUs).
559      * See also the comment in end_level_ioapic_irq().  --macro
560      */
561 #if 1
562     /* Enable focus processor (bit==0) */
563     value &= ~APIC_SPIV_FOCUS_DISABLED;
564 #else
565     /* Disable focus processor (bit==1) */
566     value |= APIC_SPIV_FOCUS_DISABLED;
567 #endif
568     /*
569      * Set spurious IRQ vector
570      */
571     value |= SPURIOUS_APIC_VECTOR;
572 
573     /*
574      * Enable directed EOI
575      */
576     if ( directed_eoi_enabled )
577     {
578         value |= APIC_SPIV_DIRECTED_EOI;
579         if ( bsp )
580             apic_printk(APIC_VERBOSE, "Suppressing EOI broadcast\n");
581     }
582 
583     apic_write(APIC_SPIV, value);
584 
585     /*
586      * Set up LVT0, LVT1:
587      *
588      * set up through-local-APIC on the BP's LINT0. This is not
589      * strictly necessery in pure symmetric-IO mode, but sometimes
590      * we delegate interrupts to the 8259A.
591      */
592     /*
593      * TODO: set up through-local-APIC from through-I/O-APIC? --macro
594      */
595     value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
596     if (bsp && (pic_mode || !value)) {
597         value = APIC_DM_EXTINT;
598         apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
599                     smp_processor_id());
600     } else {
601         value = APIC_DM_EXTINT | APIC_LVT_MASKED;
602         if (bsp)
603             apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
604                         smp_processor_id());
605     }
606     apic_write(APIC_LVT0, value);
607 
608     /*
609      * only the BP should see the LINT1 NMI signal, obviously.
610      */
611     if (bsp)
612         value = APIC_DM_NMI;
613     else
614         value = APIC_DM_NMI | APIC_LVT_MASKED;
615     apic_write(APIC_LVT1, value);
616 
617     maxlvt = get_maxlvt();
618     if (maxlvt > 3)     /* Due to the Pentium erratum 3AP. */
619         apic_write(APIC_ESR, 0);
620     oldvalue = apic_read(APIC_ESR);
621 
622     value = ERROR_APIC_VECTOR;      // enables sending errors
623     apic_write(APIC_LVTERR, value);
624     /* spec says clear errors after enabling vector. */
625     if (maxlvt > 3)
626         apic_write(APIC_ESR, 0);
627     value = apic_read(APIC_ESR);
628     if (value != oldvalue)
629         apic_printk(APIC_VERBOSE,
630                     "ESR value before enabling vector: %#lx  after: %#lx\n",
631                     oldvalue, value);
632 
633     if (nmi_watchdog == NMI_LOCAL_APIC && !bsp)
634         setup_apic_nmi_watchdog();
635     apic_pm_activate();
636 }
637 
lapic_suspend(void)638 int lapic_suspend(void)
639 {
640     unsigned long flags;
641     int maxlvt = get_maxlvt();
642     if (!apic_pm_state.active)
643         return 0;
644 
645     apic_pm_state.apic_id = apic_read(APIC_ID);
646     apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
647     apic_pm_state.apic_ldr = apic_read(APIC_LDR);
648     apic_pm_state.apic_dfr = apic_read(APIC_DFR);
649     apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
650     apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
651     if (maxlvt >= 4)
652         apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
653 
654     if (maxlvt >= 6) {
655         apic_pm_state.apic_lvtcmci = apic_read(APIC_CMCI);
656     }
657 
658     apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
659     apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
660     apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
661     apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
662     apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
663     if (maxlvt >= 5)
664         apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
665 
666     local_irq_save(flags);
667     disable_local_APIC();
668     if ( iommu_x2apic_enabled )
669         iommu_disable_x2apic();
670     local_irq_restore(flags);
671     return 0;
672 }
673 
lapic_resume(void)674 int lapic_resume(void)
675 {
676     uint64_t msr_content;
677     unsigned long flags;
678     int maxlvt;
679 
680     if (!apic_pm_state.active)
681         return 0;
682 
683     local_irq_save(flags);
684 
685     /*
686      * Make sure the APICBASE points to the right address
687      *
688      * FIXME! This will be wrong if we ever support suspend on
689      * SMP! We'll need to do this as part of the CPU restore!
690      */
691     if ( !x2apic_enabled )
692     {
693         rdmsrl(MSR_APIC_BASE, msr_content);
694         msr_content &= ~APIC_BASE_ADDR_MASK;
695         wrmsrl(MSR_APIC_BASE,
696                msr_content | APIC_BASE_ENABLE | mp_lapic_addr);
697     }
698     else
699         resume_x2apic();
700 
701     maxlvt = get_maxlvt();
702     apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
703     apic_write(APIC_ID, apic_pm_state.apic_id);
704     apic_write(APIC_DFR, apic_pm_state.apic_dfr);
705     apic_write(APIC_LDR, apic_pm_state.apic_ldr);
706     apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
707     apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
708     apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
709     apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
710     if (maxlvt >= 5)
711         apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
712 
713     if (maxlvt >= 6) {
714         apic_write(APIC_CMCI, apic_pm_state.apic_lvtcmci);
715     }
716 
717     if (maxlvt >= 4)
718         apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
719     apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
720     apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
721     apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
722     apic_write(APIC_ESR, 0);
723     apic_read(APIC_ESR);
724     apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
725     apic_write(APIC_ESR, 0);
726     apic_read(APIC_ESR);
727     local_irq_restore(flags);
728     return 0;
729 }
730 
731 
732 /*
733  * Detect and enable local APICs on non-SMP boards.
734  * Original code written by Keir Fraser.
735  */
736 
lapic_disable(const char * str)737 static int __init cf_check lapic_disable(const char *str)
738 {
739     enable_local_apic = -1;
740     setup_clear_cpu_cap(X86_FEATURE_APIC);
741     return 0;
742 }
743 custom_param("nolapic", lapic_disable);
744 boolean_param("lapic", enable_local_apic);
745 
apic_set_verbosity(const char * str)746 static int __init cf_check apic_set_verbosity(const char *str)
747 {
748     if (strcmp("debug", str) == 0)
749         apic_verbosity = APIC_DEBUG;
750     else if (strcmp("verbose", str) == 0)
751         apic_verbosity = APIC_VERBOSE;
752     else
753         return -EINVAL;
754 
755     return 0;
756 }
757 custom_param("apic_verbosity", apic_set_verbosity);
758 
detect_init_APIC(void)759 static int __init detect_init_APIC (void)
760 {
761     uint64_t msr_content;
762 
763     /* Disabled by kernel option? */
764     if (enable_local_apic < 0)
765         return -1;
766 
767     if ( rdmsr_safe(MSR_APIC_BASE, msr_content) )
768     {
769         printk("No local APIC present\n");
770         return -1;
771     }
772 
773     if (!cpu_has_apic) {
774         /*
775          * Over-ride BIOS and try to enable the local
776          * APIC only if "lapic" specified.
777          */
778         if (enable_local_apic <= 0) {
779             printk("Local APIC disabled by BIOS -- "
780                    "you can enable it with \"lapic\"\n");
781             return -1;
782         }
783         /*
784          * Some BIOSes disable the local APIC in the
785          * APIC_BASE MSR. This can only be done in
786          * software for Intel P6 or later and AMD K7
787          * (Model > 1) or later.
788          */
789         if ( !(msr_content & APIC_BASE_ENABLE) )
790         {
791             printk("Local APIC disabled by BIOS -- reenabling.\n");
792             msr_content &= ~APIC_BASE_ADDR_MASK;
793             msr_content |= APIC_BASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
794             wrmsrl(MSR_APIC_BASE, msr_content);
795             enabled_via_apicbase = true;
796         }
797     }
798     /*
799      * The APIC feature bit should now be enabled
800      * in `cpuid'
801      */
802     if (!(cpuid_edx(1) & cpufeat_mask(X86_FEATURE_APIC))) {
803         printk("Could not enable APIC!\n");
804         return -1;
805     }
806 
807     setup_force_cpu_cap(X86_FEATURE_APIC);
808     mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
809 
810     /* The BIOS may have set up the APIC at some other address */
811     if ( msr_content & APIC_BASE_ENABLE )
812         mp_lapic_addr = msr_content & APIC_BASE_ADDR_MASK;
813 
814     if (nmi_watchdog != NMI_NONE)
815         nmi_watchdog = NMI_LOCAL_APIC;
816 
817     printk("Found and enabled local APIC!\n");
818 
819     apic_pm_activate();
820 
821     return 0;
822 }
823 
x2apic_ap_setup(void)824 void x2apic_ap_setup(void)
825 {
826     if ( x2apic_enabled )
827         __enable_x2apic();
828 }
829 
x2apic_bsp_setup(void)830 void __init x2apic_bsp_setup(void)
831 {
832     struct IO_APIC_route_entry **ioapic_entries = NULL;
833     bool iommu_x2apic;
834     const char *orig_name;
835 
836     if ( !cpu_has_x2apic )
837         return;
838 
839     if ( !opt_x2apic )
840     {
841         if ( !x2apic_enabled )
842         {
843             printk("Not enabling x2APIC: disabled by cmdline.\n");
844             return;
845         }
846         printk("x2APIC: Already enabled by BIOS: Ignoring cmdline disable.\n");
847     }
848 
849     iommu_x2apic = iommu_supports_x2apic();
850     if ( iommu_x2apic )
851     {
852         if ( (ioapic_entries = alloc_ioapic_entries()) == NULL )
853         {
854             printk("Allocate ioapic_entries failed\n");
855             goto out;
856         }
857 
858         if ( save_IO_APIC_setup(ioapic_entries) )
859         {
860             printk("Saving IO-APIC state failed\n");
861             goto out;
862         }
863 
864         mask_8259A();
865         mask_IO_APIC_setup(ioapic_entries);
866 
867         switch ( iommu_enable_x2apic() )
868         {
869         case 0:
870             iommu_x2apic_enabled = true;
871             break;
872 
873         case -ENXIO: /* ACPI_DMAR_X2APIC_OPT_OUT set */
874             if ( x2apic_enabled )
875                 panic("IOMMU requests xAPIC mode, but x2APIC already enabled by firmware\n");
876 
877             printk("Not enabling x2APIC (upon firmware request)\n");
878             iommu_x2apic_enabled = false;
879             goto restore_out;
880 
881         default:
882             printk(XENLOG_ERR "Failed to enable Interrupt Remapping\n");
883             iommu_x2apic_enabled = false;
884             break;
885         }
886 
887         if ( iommu_x2apic_enabled )
888             force_iommu = 1;
889     }
890 
891     if ( !x2apic_enabled )
892     {
893         x2apic_enabled = true;
894         __enable_x2apic();
895     }
896 
897     orig_name = genapic.name;
898     genapic = *apic_x2apic_probe();
899     if ( genapic.name != orig_name )
900         printk("Switched to APIC driver %s\n", genapic.name);
901 
902 restore_out:
903     /*
904      * iommu_x2apic_enabled and iommu_supports_x2apic() cannot be used here
905      * in the error case.
906      */
907     if ( iommu_x2apic )
908     {
909         /*
910          * NB: do not use raw mode when restoring entries if the iommu has
911          * been enabled during the process, because the entries need to be
912          * translated and added to the remapping table in that case.
913          */
914         restore_IO_APIC_setup(ioapic_entries, !iommu_x2apic_enabled);
915         unmask_8259A();
916     }
917 
918 out:
919     if ( ioapic_entries )
920         free_ioapic_entries(ioapic_entries);
921 }
922 
init_apic_mappings(void)923 void __init init_apic_mappings(void)
924 {
925     unsigned long apic_phys;
926 
927     if ( x2apic_enabled )
928         goto __next;
929     /*
930      * If no local APIC can be found then set up a fake all
931      * zeroes page to simulate the local APIC and another
932      * one for the IO-APIC.
933      */
934     if (!smp_found_config && detect_init_APIC()) {
935         apic_phys = __pa(alloc_xenheap_page());
936         clear_page(__va(apic_phys));
937     } else
938         apic_phys = mp_lapic_addr;
939 
940     set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
941     apic_printk(APIC_VERBOSE, "mapped APIC to %08Lx (%08lx)\n", APIC_BASE,
942                 apic_phys);
943 
944 __next:
945     /*
946      * Fetch the APIC ID of the BSP in case we have a
947      * default configuration (or the MP table is broken).
948      */
949     if (boot_cpu_physical_apicid == -1U)
950         boot_cpu_physical_apicid = get_apic_id();
951     x86_cpu_to_apicid[0] = get_apic_id();
952 
953     ioapic_init();
954 }
955 
956 /*****************************************************************************
957  * APIC calibration
958  *
959  * The APIC is programmed in bus cycles.
960  * Timeout values should specified in real time units.
961  * The "cheapest" time source is the cyclecounter.
962  *
963  * Thus, we need a mappings from: bus cycles <- cycle counter <- system time
964  *
965  * The calibration is currently a bit shoddy since it requires the external
966  * timer chip to generate periodic timer interupts.
967  *****************************************************************************/
968 
969 /* used for system time scaling */
970 static u32 __read_mostly bus_scale; /* scaling factor: ns -> bus cycles */
971 
972 /*
973  * The timer chip is already set up at HZ interrupts per second here,
974  * but we do not accept timer interrupts yet. We only allow the BP
975  * to calibrate.
976  */
get_8254_timer_count(void)977 static unsigned int __init get_8254_timer_count(void)
978 {
979     /*extern spinlock_t i8253_lock;*/
980     /*unsigned long flags;*/
981 
982     unsigned int count;
983 
984     /*spin_lock_irqsave(&i8253_lock, flags);*/
985 
986     outb_p(PIT_LTCH_CH(0), PIT_MODE);
987     count = inb_p(PIT_CH0);
988     count |= inb_p(PIT_CH0) << 8;
989 
990     /*spin_unlock_irqrestore(&i8253_lock, flags);*/
991 
992     return count;
993 }
994 
995 /* next tick in 8254 can be caught by catching timer wraparound */
wait_8254_wraparound(void)996 static void __init wait_8254_wraparound(void)
997 {
998     unsigned int curr_count, prev_count;
999 
1000     curr_count = get_8254_timer_count();
1001     do {
1002         prev_count = curr_count;
1003         curr_count = get_8254_timer_count();
1004     } while (prev_count >= curr_count);
1005 }
1006 
1007 /*
1008  * This function sets up the local APIC timer, with a timeout of
1009  * 'clocks' APIC bus clock. During calibration we actually call
1010  * this function twice on the boot CPU, once with a bogus timeout
1011  * value, second time for real. The other (noncalibrating) CPUs
1012  * call this function only once, with the real, calibrated value.
1013  */
1014 
1015 #define APIC_DIVISOR 1
1016 
__setup_APIC_LVTT(unsigned int clocks)1017 static void __setup_APIC_LVTT(unsigned int clocks)
1018 {
1019     unsigned int lvtt_value, tmp_value;
1020 
1021     if ( tdt_enabled )
1022     {
1023         lvtt_value = APIC_TIMER_MODE_TSC_DEADLINE | LOCAL_TIMER_VECTOR;
1024         apic_write(APIC_LVTT, lvtt_value);
1025 
1026         /*
1027          * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
1028          * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
1029          * According to Intel, MFENCE can do the serialization here.
1030          */
1031         asm volatile( "mfence" : : : "memory" );
1032 
1033         return;
1034     }
1035 
1036     /* NB. Xen uses local APIC timer in one-shot mode. */
1037     lvtt_value = APIC_TIMER_MODE_ONESHOT | LOCAL_TIMER_VECTOR;
1038     apic_write(APIC_LVTT, lvtt_value);
1039 
1040     tmp_value = apic_read(APIC_TDCR) & ~APIC_TDR_DIV_MASK;
1041     apic_write(APIC_TDCR, tmp_value | PASTE(APIC_TDR_DIV_, APIC_DIVISOR));
1042 
1043     apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
1044 }
1045 
setup_APIC_timer(void)1046 static void setup_APIC_timer(void)
1047 {
1048     unsigned long flags;
1049     local_irq_save(flags);
1050     __setup_APIC_LVTT(0);
1051     local_irq_restore(flags);
1052 }
1053 
1054 #define DEADLINE_MODEL_MATCH(m, fr) \
1055     { .vendor = X86_VENDOR_INTEL, .family = 6, .model = (m), \
1056       .feature = X86_FEATURE_TSC_DEADLINE, \
1057       .driver_data = (void *)(unsigned long)(fr) }
1058 
hsx_deadline_rev(void)1059 static unsigned int __init hsx_deadline_rev(void)
1060 {
1061     switch ( boot_cpu_data.x86_mask )
1062     {
1063     case 0x02: return 0x3a; /* EP */
1064     case 0x04: return 0x0f; /* EX */
1065     }
1066 
1067     return ~0U;
1068 }
1069 
bdx_deadline_rev(void)1070 static unsigned int __init bdx_deadline_rev(void)
1071 {
1072     switch ( boot_cpu_data.x86_mask )
1073     {
1074     case 0x02: return 0x00000011;
1075     case 0x03: return 0x0700000e;
1076     case 0x04: return 0x0f00000c;
1077     case 0x05: return 0x0e000003;
1078     }
1079 
1080     return ~0U;
1081 }
1082 
skx_deadline_rev(void)1083 static unsigned int __init skx_deadline_rev(void)
1084 {
1085     switch ( boot_cpu_data.x86_mask )
1086     {
1087     case 0x00 ... 0x02: return ~0U;
1088     case 0x03: return 0x01000136;
1089     case 0x04: return 0x02000014;
1090     }
1091 
1092     return 0;
1093 }
1094 
1095 static const struct x86_cpu_id __initconstrel deadline_match[] = {
1096     DEADLINE_MODEL_MATCH(0x3c, 0x22),             /* Haswell */
1097     DEADLINE_MODEL_MATCH(0x3f, hsx_deadline_rev), /* Haswell EP/EX */
1098     DEADLINE_MODEL_MATCH(0x45, 0x20),             /* Haswell D */
1099     DEADLINE_MODEL_MATCH(0x46, 0x17),             /* Haswell H */
1100 
1101     DEADLINE_MODEL_MATCH(0x3d, 0x25),             /* Broadwell */
1102     DEADLINE_MODEL_MATCH(0x47, 0x17),             /* Broadwell H */
1103     DEADLINE_MODEL_MATCH(0x4f, 0x0b000020),       /* Broadwell EP/EX */
1104     DEADLINE_MODEL_MATCH(0x56, bdx_deadline_rev), /* Broadwell D */
1105 
1106     DEADLINE_MODEL_MATCH(0x4e, 0xb2),             /* Skylake M */
1107     DEADLINE_MODEL_MATCH(0x55, skx_deadline_rev), /* Skylake X */
1108     DEADLINE_MODEL_MATCH(0x5e, 0xb2),             /* Skylake D */
1109 
1110     DEADLINE_MODEL_MATCH(0x8e, 0x52),             /* Kabylake M */
1111     DEADLINE_MODEL_MATCH(0x9e, 0x52),             /* Kabylake D */
1112 
1113     {}
1114 };
1115 
check_deadline_errata(void)1116 static void __init check_deadline_errata(void)
1117 {
1118     const struct x86_cpu_id *m;
1119     unsigned int rev;
1120 
1121     if ( cpu_has_hypervisor )
1122         return;
1123 
1124     m = x86_match_cpu(deadline_match);
1125     if ( !m )
1126         return;
1127 
1128     /*
1129      * Function pointers will have the MSB set due to address layout,
1130      * immediate revisions will not.
1131      */
1132     if ( (long)m->driver_data < 0 )
1133         rev = ((unsigned int (*)(void))(m->driver_data))();
1134     else
1135         rev = (unsigned long)m->driver_data;
1136 
1137     if ( this_cpu(cpu_sig).rev >= rev )
1138         return;
1139 
1140     setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE);
1141     printk(XENLOG_WARNING "TSC_DEADLINE disabled due to Errata; "
1142            "please update microcode to version %#x (or later)\n", rev);
1143 }
1144 
apic_tmcct_read(void)1145 uint32_t __init apic_tmcct_read(void)
1146 {
1147     if ( x2apic_enabled )
1148     {
1149         /*
1150          * Have a barrier here just like in rdtsc_ordered() as it's
1151          * unclear whether this non-serializing RDMSR also can be
1152          * executed speculatively (like RDTSC can).
1153          */
1154         alternative("lfence", "mfence", X86_FEATURE_MFENCE_RDTSC);
1155         return apic_rdmsr(APIC_TMCCT);
1156     }
1157 
1158     return apic_mem_read(APIC_TMCCT);
1159 }
1160 
1161 /*
1162  * In this function we calibrate APIC bus clocks to the external
1163  * timer. Unfortunately we cannot use jiffies and the timer irq
1164  * to calibrate, since some later bootup code depends on getting
1165  * the first irq? Ugh.
1166  *
1167  * We want to do the calibration only once since we
1168  * want to have local timer irqs syncron. CPUs connected
1169  * by the same APIC bus have the very same bus frequency.
1170  * And we want to have irqs off anyways, no accidental
1171  * APIC irq that way.
1172  */
1173 
1174 #define BUS_SCALE_SHIFT 18
1175 
calibrate_APIC_clock(void)1176 static void __init calibrate_APIC_clock(void)
1177 {
1178     unsigned long bus_freq; /* KAF: pointer-size avoids compile warns. */
1179     unsigned int bus_cycle; /* length of one bus cycle in pico-seconds */
1180 #define LOOPS_FRAC 10U      /* measure for one tenth of a second */
1181 
1182     apic_printk(APIC_VERBOSE, "calibrating APIC timer ...\n");
1183 
1184     /*
1185      * Setup the APIC counter to maximum. There is no way the lapic
1186      * can underflow in the 100ms detection time frame.
1187      */
1188     __setup_APIC_LVTT(0xffffffffU);
1189 
1190     bus_freq = calibrate_apic_timer();
1191     if ( !bus_freq )
1192     {
1193         unsigned int i, tt1, tt2;
1194         unsigned long t1, t2;
1195 
1196         ASSERT(!xen_guest);
1197 
1198         /*
1199          * The timer chip counts down to zero. Let's wait for a wraparound to
1200          * start exact measurement (the current tick might have been already
1201          * half done):
1202          */
1203         wait_8254_wraparound();
1204 
1205         /* We wrapped around just now. Let's start: */
1206         t1 = rdtsc_ordered();
1207         tt1 = apic_read(APIC_TMCCT);
1208 
1209         /* Let's wait HZ / LOOPS_FRAC ticks: */
1210         for ( i = 0; i < HZ / LOOPS_FRAC; ++i )
1211             wait_8254_wraparound();
1212 
1213         t2 = rdtsc_ordered();
1214         tt2 = apic_read(APIC_TMCCT);
1215 
1216         bus_freq = (tt1 - tt2) * APIC_DIVISOR * LOOPS_FRAC;
1217 
1218         apic_printk(APIC_VERBOSE, "..... CPU clock speed is %lu.%04lu MHz.\n",
1219                     ((t2 - t1) * LOOPS_FRAC) / 1000000,
1220                     (((t2 - t1) * LOOPS_FRAC) / 100) % 10000);
1221     }
1222 
1223     apic_printk(APIC_VERBOSE, "..... host bus clock speed is %ld.%04ld MHz.\n",
1224                 bus_freq / 1000000, (bus_freq / 100) % 10000);
1225 
1226     /* set up multipliers for accurate timer code */
1227     bus_cycle  = 1000000000000UL / bus_freq; /* in pico seconds */
1228     bus_cycle += (1000000000000UL % bus_freq) * 2 > bus_freq;
1229     bus_scale  = (1000 << BUS_SCALE_SHIFT) / bus_cycle;
1230     bus_scale += ((1000 << BUS_SCALE_SHIFT) % bus_cycle) * 2 > bus_cycle;
1231 
1232     apic_printk(APIC_VERBOSE, "..... bus_scale = %#x\n", bus_scale);
1233     /* reset APIC to zero timeout value */
1234     __setup_APIC_LVTT(0);
1235 
1236 #undef LOOPS_FRAC
1237 }
1238 
setup_boot_APIC_clock(void)1239 void __init setup_boot_APIC_clock(void)
1240 {
1241     unsigned long flags;
1242     apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n");
1243     using_apic_timer = true;
1244 
1245     check_deadline_errata();
1246 
1247     if ( !boot_cpu_has(X86_FEATURE_TSC_DEADLINE) )
1248         tdt_enable = false;
1249 
1250     local_irq_save(flags);
1251 
1252     if ( !tdt_enable || apic_verbosity )
1253         calibrate_APIC_clock();
1254 
1255     if ( tdt_enable )
1256     {
1257         printk(KERN_DEBUG "TSC deadline timer enabled\n");
1258         tdt_enabled = true;
1259     }
1260 
1261     setup_APIC_timer();
1262 
1263     local_irq_restore(flags);
1264 }
1265 
setup_secondary_APIC_clock(void)1266 void setup_secondary_APIC_clock(void)
1267 {
1268     setup_APIC_timer();
1269 }
1270 
disable_APIC_timer(void)1271 void disable_APIC_timer(void)
1272 {
1273     if (using_apic_timer) {
1274         unsigned long v;
1275 
1276         /* Work around AMD Erratum 411. This is a nice thing to do anyway. */
1277         apic_write(APIC_TMICT, 0);
1278 
1279         v = apic_read(APIC_LVTT);
1280         apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
1281     }
1282 }
1283 
enable_APIC_timer(void)1284 void enable_APIC_timer(void)
1285 {
1286     if (using_apic_timer) {
1287         unsigned long v;
1288 
1289         v = apic_read(APIC_LVTT);
1290         apic_write(APIC_LVTT, v & ~APIC_LVT_MASKED);
1291     }
1292 }
1293 
1294 #undef APIC_DIVISOR
1295 
1296 /*
1297  * reprogram_timer: Reprogram the APIC timer.
1298  * Timeout is a Xen system time (nanoseconds since boot); 0 disables the timer.
1299  * Returns 1 on success; 0 if the timeout is too soon or is in the past.
1300  */
reprogram_timer(s_time_t timeout)1301 int reprogram_timer(s_time_t timeout)
1302 {
1303     s_time_t expire;
1304     u32 apic_tmict = 0;
1305 
1306     /* No local APIC: timer list is polled via the PIT interrupt. */
1307     if ( !cpu_has_apic )
1308         return 1;
1309 
1310     if ( tdt_enabled )
1311     {
1312         wrmsrl(MSR_IA32_TSC_DEADLINE, timeout ? stime2tsc(timeout) : 0);
1313         return 1;
1314     }
1315 
1316     if ( timeout && ((expire = timeout - NOW()) > 0) )
1317         apic_tmict = min_t(uint64_t, (bus_scale * expire) >> BUS_SCALE_SHIFT,
1318                            UINT32_MAX);
1319 
1320     apic_write(APIC_TMICT, (unsigned long)apic_tmict);
1321 
1322     return apic_tmict || !timeout;
1323 }
1324 
apic_timer_interrupt(void)1325 static void cf_check apic_timer_interrupt(void)
1326 {
1327     ack_APIC_irq();
1328     perfc_incr(apic_timer);
1329     raise_softirq(TIMER_SOFTIRQ);
1330 }
1331 
1332 static DEFINE_PER_CPU(bool, state_dump_pending);
1333 
smp_send_state_dump(unsigned int cpu)1334 void smp_send_state_dump(unsigned int cpu)
1335 {
1336     /* We overload the spurious interrupt handler to handle the dump. */
1337     per_cpu(state_dump_pending, cpu) = true;
1338     send_IPI_mask(cpumask_of(cpu), SPURIOUS_APIC_VECTOR);
1339 }
1340 
1341 /*
1342  * Spurious interrupts should _never_ happen with our APIC/SMP architecture.
1343  */
spurious_interrupt(void)1344 static void cf_check spurious_interrupt(void)
1345 {
1346     /*
1347      * Check if this is a vectored interrupt (most likely, as this is probably
1348      * a request to dump local CPU state or to continue NMI handling).
1349      * Vectored interrupts are ACKed; spurious interrupts are not.
1350      */
1351     if (apic_isr_read(SPURIOUS_APIC_VECTOR)) {
1352         bool is_spurious;
1353 
1354         ack_APIC_irq();
1355         is_spurious = !nmi_check_continuation();
1356         if (this_cpu(state_dump_pending)) {
1357             this_cpu(state_dump_pending) = false;
1358             dump_execstate(get_irq_regs());
1359             is_spurious = false;
1360         }
1361 
1362         if ( !is_spurious )
1363             return;
1364     }
1365 
1366     /* see sw-dev-man vol 3, chapter 7.4.13.5 */
1367     printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should "
1368            "never happen.\n", smp_processor_id());
1369 }
1370 
1371 /*
1372  * This interrupt should never happen with our APIC/SMP architecture
1373  */
1374 
error_interrupt(void)1375 static void cf_check error_interrupt(void)
1376 {
1377     static const char *const esr_fields[] = {
1378         ", Send CS error",
1379         ", Receive CS error",
1380         ", Send accept error",
1381         ", Receive accept error",
1382         ", Redirectable IPI",
1383         ", Send illegal vector",
1384         ", Received illegal vector",
1385         ", Illegal register address",
1386     };
1387     const char *entries[ARRAY_SIZE(esr_fields)];
1388     unsigned int v, v1;
1389     unsigned int i;
1390 
1391     /* First tickle the hardware, only then report what went on. -- REW */
1392     v = apic_read(APIC_ESR);
1393     apic_write(APIC_ESR, 0);
1394     v1 = apic_read(APIC_ESR);
1395     ack_APIC_irq();
1396 
1397     for ( i = 0; i < ARRAY_SIZE(entries); ++i )
1398         entries[i] = v1 & (1 << i) ? esr_fields[i] : "";
1399     printk(XENLOG_DEBUG
1400            "APIC error on CPU%u: %02x(%02x)%s%s%s%s%s%s%s%s\n",
1401            smp_processor_id(), v, v1,
1402            entries[7], entries[6], entries[5], entries[4],
1403            entries[3], entries[2], entries[1], entries[0]);
1404 }
1405 
1406 /*
1407  * This interrupt handles performance counters interrupt
1408  */
1409 
pmu_interrupt(void)1410 static void cf_check pmu_interrupt(void)
1411 {
1412     ack_APIC_irq();
1413     vpmu_do_interrupt();
1414 }
1415 
apic_intr_init(void)1416 void __init apic_intr_init(void)
1417 {
1418     smp_intr_init();
1419 
1420     /* self generated IPI for local APIC timer */
1421     set_direct_apic_vector(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
1422 
1423     /* IPI vectors for APIC spurious and error interrupts */
1424     set_direct_apic_vector(SPURIOUS_APIC_VECTOR, spurious_interrupt);
1425     set_direct_apic_vector(ERROR_APIC_VECTOR, error_interrupt);
1426 
1427     /* Performance Counters Interrupt */
1428     set_direct_apic_vector(PMU_APIC_VECTOR, pmu_interrupt);
1429 }
1430 
1431 /*
1432  * This initializes the IO-APIC and APIC hardware if this is
1433  * a UP kernel.
1434  */
APIC_init_uniprocessor(void)1435 int __init APIC_init_uniprocessor (void)
1436 {
1437     if (enable_local_apic < 0)
1438         setup_clear_cpu_cap(X86_FEATURE_APIC);
1439 
1440     if (!smp_found_config && !cpu_has_apic) {
1441         skip_ioapic_setup = true;
1442         return -1;
1443     }
1444 
1445     /*
1446      * Complain if the BIOS pretends there is one.
1447      */
1448     if (!cpu_has_apic) {
1449         printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
1450                boot_cpu_physical_apicid);
1451         skip_ioapic_setup = true;
1452         return -1;
1453     }
1454 
1455     verify_local_APIC();
1456 
1457     connect_bsp_APIC();
1458 
1459     /*
1460      * Hack: In case of kdump, after a crash, kernel might be booting
1461      * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
1462      * might be zero if read from MP tables. Get it from LAPIC.
1463      */
1464 #ifdef CONFIG_CRASH_DUMP
1465     boot_cpu_physical_apicid = get_apic_id();
1466 #endif
1467     physids_clear(phys_cpu_present_map);
1468     physid_set(boot_cpu_physical_apicid, phys_cpu_present_map);
1469 
1470     if ( !skip_ioapic_setup && nr_ioapics )
1471         /* Sanitize the IO-APIC pins before enabling the lapic LVTERR/ESR. */
1472         enable_IO_APIC();
1473 
1474     setup_local_APIC(true);
1475 
1476     if (nmi_watchdog == NMI_LOCAL_APIC)
1477         check_nmi_watchdog();
1478 
1479     if (smp_found_config)
1480         if (!skip_ioapic_setup && nr_ioapics)
1481             setup_IO_APIC();
1482 
1483     setup_boot_APIC_clock();
1484 
1485     return 0;
1486 }
1487 
apic_mode_to_str(const enum apic_mode mode)1488 static const char * __init apic_mode_to_str(const enum apic_mode mode)
1489 {
1490     switch ( mode )
1491     {
1492         case APIC_MODE_INVALID:
1493             return "invalid";
1494         case APIC_MODE_DISABLED:
1495             return "disabled";
1496         case APIC_MODE_XAPIC:
1497             return "xapic";
1498         case APIC_MODE_X2APIC:
1499             return "x2apic";
1500         default:
1501             return "unrecognised";
1502     }
1503 }
1504 
1505 /* Needs to be called during startup.  It records the state the BIOS
1506  * leaves the local APIC so we can undo upon kexec.
1507  */
record_boot_APIC_mode(void)1508 void __init record_boot_APIC_mode(void)
1509 {
1510     /* Sanity check - we should only ever run once, but could possibly
1511      * be called several times */
1512     if ( APIC_MODE_INVALID != apic_boot_mode )
1513         return;
1514 
1515     apic_boot_mode = current_local_apic_mode();
1516 
1517     apic_printk(APIC_DEBUG, "APIC boot state is '%s'\n",
1518                 apic_mode_to_str(apic_boot_mode));
1519 }
1520 
1521 /* Look at the bits in MSR_APIC_BASE and work out which APIC mode we are in */
current_local_apic_mode(void)1522 enum apic_mode current_local_apic_mode(void)
1523 {
1524     u64 msr_contents;
1525 
1526     rdmsrl(MSR_APIC_BASE, msr_contents);
1527 
1528     /* Reading EXTD bit from the MSR is only valid if CPUID
1529      * says so, else reserved */
1530     if ( boot_cpu_has(X86_FEATURE_X2APIC) && (msr_contents & APIC_BASE_EXTD) )
1531         return APIC_MODE_X2APIC;
1532 
1533     /* EN bit should always be valid as long as we can read the MSR
1534      */
1535     if ( msr_contents & APIC_BASE_ENABLE )
1536         return APIC_MODE_XAPIC;
1537 
1538     return APIC_MODE_DISABLED;
1539 }
1540 
1541 
check_for_unexpected_msi(unsigned int vector)1542 void check_for_unexpected_msi(unsigned int vector)
1543 {
1544     BUG_ON(apic_isr_read(vector));
1545 }
1546