1 /**
2  * \file
3  * \brief Perfomance Monitoring using P5/P6 Measurement Counters.
4  *
5  * Define either CPU_PENTIUM or CPU_P6
6  */
7 /*
8  * (c) 2008-2009 Adam Lackorzynski <adam@os.inf.tu-dresden.de>,
9  *               Frank Mehnert <fm3@os.inf.tu-dresden.de>,
10  *               Lars Reuther <reuther@os.inf.tu-dresden.de>
11  *     economic rights: Technische Universität Dresden (Germany)
12  * This file is part of TUD:OS and distributed under the terms of the
13  * GNU Lesser General Public License 2.1.
14  * Please see the COPYING-LGPL-2.1 file for details.
15  */
16 
17 #ifndef __L4UTIL_PERFORM_H
18 #define __L4UTIL_PERFORM_H
19 
20 #include <l4/sys/types.h>
21 #include <l4/sys/compiler.h>
22 
23 EXTERN_C_BEGIN
24 
25 extern const char*strp6pmc_event(l4_uint32_t event);
26 
27 #ifndef CONFIG_PERFORM_ONLY_PROTOTYPES
28 
29 #if ! (defined CPU_PENTIUM  ^ defined CPU_P6 ^ defined CPU_K7)
30 
31 #error You must define your target architecture.
32 #error Define EITHER CPU_PENTIUM for Intel Pentium or CPU_P6 for Intel PPro/PII/PIII.
33 
34 #else
35 
36 /* P5/P6/K7 section */
37 
38 /* Makros for access to model specific registers (MSR) */
39 
40 /* Write the 64-Bit Model Specific Register. First argument is the register,
41    second the 64-Bit value. This can only be called at priviledge level 0.
42    With L4, the kernel emulates the WRMSR when calling in PL 3.
43    */
l4_i586_wrmsr(unsigned reg,unsigned long long * val)44 static inline void l4_i586_wrmsr(unsigned reg,unsigned long long*val){
45   unsigned long dummyeax, dummyecx, dummyedx;
46 
47   asm volatile(
48         ".byte 0xf; .byte 0x30\n"	/* wrmsr */
49 	: "=a" (dummyeax), "=d" (dummyedx), "=c" (dummyecx)
50 	: "2" (reg), "0" (*(unsigned *)val), "1" (*((unsigned *)val+1))
51 	);
52 }
53 
54 /* Read the 64-Bit Model Specific Register. First argument is the register,
55    second the address to a 64-Bit value. This can only be called at
56    priviledge level 0.  With L4, the kernel emulates the RDMSR when calling
57    in PL 3.
58    */
l4_i586_rdmsr(unsigned reg,unsigned long long * val)59 static inline void l4_i586_rdmsr(unsigned reg,unsigned long long*val){
60   unsigned dummy;
61 
62   asm volatile(
63         ".byte 0xf; .byte 0x32\n"	/* rdmsr */
64 	: "=a" (*(unsigned *)val), "=d" (*((unsigned *)val+1)), "=c" (dummy)
65 	: "2" (reg)
66 	);
67 }
68 
69 
70 #ifdef CPU_PENTIUM
71 /* Pentium section */
72 
73 /* functions and events defined here are only usable at Pentium
74    Processors. P6 architecture does NOT support this kind of measuring and
75    these events. P6 architecture has its own counters and its own events.
76    See P6-section for details. */
77 
78 /* from l4linux/arch/l4-i386/include/perform.h */
79 
80 static inline void
l4_i586_reset_event_counter(void)81 l4_i586_reset_event_counter(void){
82    asm volatile("xor %%eax, %%eax\n"
83 		"xor %%edx, %%edx\n"
84 		"movl $0x12, %%ecx\n"
85 		".byte 0x0f, 0x30\n"
86 		"movl $0x13, %%ecx\n"
87 		".byte 0x0f, 0x30\n"
88 		: : : "cx", "ax", "dx"
89 		);
90 };
91 
92 static inline void
l4_i586_read_event_counter_long(long long * counter0,long long * counter1)93 l4_i586_read_event_counter_long(long long *counter0, long long *counter1)
94 {
95   asm volatile(
96 	       /*	       "movl	$0, %%eax\n"
97 	       "movl	$0x11, %%ecx\n"
98 	       ".byte 0x0f, 0x30\n" *//* stop event counting */
99 	       "movl $0x12, %%ecx\n"
100 	       ".byte 0x0f, 0x32\n"
101 	       "movl %%eax, (%%ebx)\n"
102 	       "movl %%edx, 4(%%ebx)\n"
103 	       "movl $0x13, %%ecx\n"
104 	       ".byte 0x0f, 0x32\n"
105 	       "movl %%eax, (%%esi)\n"
106 	       "movl %%edx, 4(%%esi)\n"
107 	       : /* no output */
108 	       : "b" (counter0), "S" (counter1)
109 	       : "ax", "cx", "dx"
110 	       );
111 }
112 
113 static inline void
l4_i586_read_event_counter(int * counter0,int * counter1)114 l4_i586_read_event_counter(int *counter0, int *counter1)
115 {
116   asm volatile("pushl	%%edx\n"
117 	       ".byte 0x0f, 0x30\n"
118 	       "movl $0x12, %%ecx\n"
119 	       ".byte 0x0f, 0x32\n"
120 	       "movl %%eax, %%ebx\n"
121 	       "movl $0x13, %%ecx\n"
122 	       ".byte 0x0f, 0x32\n"
123 	       "popl	%%edx\n"
124 	       : "=b" (*counter0), "=a" (*counter1)
125 	       : "1" (0), "c" (0x11)
126 	       );
127 }
128 
129 static inline void
l4_i586_select_event(int event0,int event1)130 l4_i586_select_event(int event0, int event1)
131 {
132    asm volatile(".byte 0x0f, 0x30\n"
133 		:
134 		:
135  		"a" (event0 + (event1 << 16)),
136 		"d" (0),
137 		"c" (0x11)
138 		);
139 };
140 
141 #define P5_RD_MISS          0x003	/* 000011B */
142 #define P5_WR_MISS          0x008	/* 000100B */
143 #define P5_RW_MISS          0x029	/* 101001B */
144 #define P5_EX_MISS          0x00e	/* 001110B */
145 
146 #define P5_D_WBACK          0x006	/* 000110B */
147 
148 #define P5_RW_TLB           0x002	/* 00010B */
149 #define P5_EX_TLB           0x00d	/* 01101B */
150 
151 #define P5_A_STALL          0x01f	/* 11111B */
152 #define P5_W_STALL          0x019	/* 11001B */
153 #define P5_R_STALL          0x01a	/* 11010B */
154 #define P5_X_STALL          0x01b	/* 11011B */
155 
156 #define P5_AGI_STALL        0x01f	/* 11111B */
157 
158 #define P5_PIPLINE_FLUSH    0x015	/* 10101B */
159 
160 #define P5_NON_CACHE_RD     0x01e	/* 11110B */
161 #define P5_NCACHE_REFS      0x01e	/* 11110B */
162 #define P5_LOCKED_BUS       0x01c	/* 11100B */
163 
164 #define P5_MEM2PIPE         0x009	/* 01001B */
165 #define P5_BANK_CONF        0x00a	/* 01010B */
166 
167 
168 #define P5_INSTRS_EX        0x016	/* 10110B */
169 #define P5_INSTRS_EX_V      0x017	/* 10111B */
170 
171 
172 #define P5_CNT_NOTHING      (0x00 << 6)	/* 00B << 6 */
173 #define P5_CNT_EVENT_PL0    (0x01 << 6)	/* 01B << 6 */
174 #define P5_CNT_EVENT_PL3    (0x02 << 6)	/* 10B << 6 */
175 #define P5_CNT_EVENT        (0x03 << 6)	/* 11B << 6 */
176 #define P5_CNT_CLOCKS_PL0   (0x05 << 6)	/* 101B << 6 */
177 #define P5_CNT_CLOCKS_PL3   (0x06 << 6)	/* 110B << 6 */
178 #define P5_CNT_CLOCKS       (0x07 << 6)	/* 111B << 6 */
179 
180 
181 #else
182 #if defined CPU_P6
183 /* PPro/PII/PIII section */
184 
185 /*-
186  * Copyright (c) 1997 The President and Fellows of Harvard College.
187  * All rights reserved.
188  * Copyright (c) 1997 Aaron B. Brown.
189  *
190  * Redistribution and use in source and binary forms, with or without
191  * modification, are permitted provided that the following conditions
192  * are met:
193  * 1. Redistributions of source code must retain the above copyright
194  *    notice, this list of conditions and the following disclaimer.
195  * 2. Redistributions in binary form must reproduce the above copyright
196  *    notice, this list of conditions and the following disclaimer in the
197  *    documentation and/or other materials provided with the distribution.
198  * 3. All advertising materials mentioning features or use of this software
199  *    must display the following acknowledgement:
200  *      This product includes software developed by Harvard University
201  *      and its contributors.
202  * 4. Neither the name of the University nor the names of its contributors
203  *    may be used to endorse or promote products derived from this software
204  *    without specific prior written permission.
205  *
206  * THIS SOFTWARE IS PROVIDED BY HARVARD AND CONTRIBUTORS ``AS IS'' AND
207  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
208  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
209  * ARE DISCLAIMED.  IN NO EVENT SHALL HARVARD UNIVERSITY OR CONTRIBUTORS BE
210  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
211  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
212  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
213  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
214  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
215  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
216  * POSSIBILITY OF SUCH DAMAGE.
217  */
218 
219 /*********************************************************************
220  ** Symbolic names for counter numbers (used in select_p6counter()) **
221  *********************************************************************
222  *
223  * These correspond in order to the Pentium Pro counters. Add new counters at
224  * the end. These agree with the mneumonics in the Pentium Pro Family
225  * Developer's Manual, vol 3.
226  *
227  * Those events marked with a $ require a MESI unit field; those marked with
228  * a @ require a self/any unit field. Those marked with a 0 are only supported
229  * in counter 0; those marked with 1 are only supported in counter 1.
230  */
231 
232 /* Data cache unit */
233 #define P6_DATA_MEM_REFS	0x43	/* total memory refs */
234 #define P6_DCU_LINES_IN		0x45	/* all lines allocated in cache unit */
235 #define P6_DCU_M_LINES_IN	0x46	/* M lines allocated in cache unit */
236 #define P6_DCU_M_LINES_OUT	0x47	/* M lines evicted from cache */
237 #define P6_DCU_MISS_OUTSTANDING	0x48	/* #cycles a miss is outstanding */
238 
239 /* Instruction fetch unit */
240 #define P6_IFU_IFETCH		0x80	/* instruction fetches */
241 #define P6_IFU_IFETCH_MISS	0x81	/* instruction fetch misses */
242 #define P6_ITLB_MISS		0x85	/* ITLB misses */
243 #define P6_IFU_MEM_STALL	0x86	/* number of cycles IFU is stalled */
244 #define P6_ILD_STALL		0x87	/* #stalls in instr length decode */
245 
246 /* L2 Cache */
247 #define P6_L2_IFETCH		0x28	/* ($) l2 ifetches */
248 #define P6_L2_LD		0x29	/* ($) l2 data loads */
249 #define P6_L2_ST		0x2a	/* ($) l2 data stores */
250 #define P6_L2_LINES_IN		0x24	/* lines allocated in l2 */
251 #define P6_L2_LINES_OUT		0x26	/* lines removed from l2 */
252 #define P6_L2_M_LINES_INM	0x25	/* modified lines allocated in L2 */
253 #define P6_L2_M_LINES_OUTM	0x27	/* modified lines removed from L2 */
254 #define P6_L2_RQSTS		0x2e	/* ($) number of l2 requests */
255 #define P6_L2_ADS		0x21	/* number of l2 addr strobes */
256 #define P6_L2_DBUS_BUSY		0x22	/* number of data bus busy cycles */
257 #define P6_L2_DBUS_BUSY_RD	0x23	/* #bus cycles xferring l2->cpu */
258 
259 /* External bus logic */
260 #define P6_BUS_DRDY_CLOCKS	0x62	/* (@) #clocks DRDY is asserted */
261 #define P6_BUS_LOCK_CLOCKS	0x63	/* (@) #clocks LOCK is asserted */
262 #define P6_BUS_REQ_OUTSTANDING	0x60	/* #bus requests outstanding */
263 #define P6_BUS_TRAN_BRD		0x65	/* (@) bus burst read txns */
264 #define P6_BUS_TRAN_RFO		0x66	/* (@) bus read for ownership txns */
265 #define P6_BUS_TRAN_WB		0x67	/* (@) bus writeback txns */
266 #define P6_BUS_TRAN_IFETCH	0x68	/* (@) bus instr fetch txns */
267 #define P6_BUS_TRAN_INVAL	0x69	/* (@) bus invalidate txns */
268 #define P6_BUS_TRAN_PWR		0x6a	/* (@) bus partial write txns */
269 #define P6_BUS_TRANS_P		0x6b	/* (@) bus partial txns */
270 #define P6_BUS_TRANS_IO		0x6c	/* (@) bus I/O txns */
271 #define P6_BUS_TRAN_DEF		0x6d	/* (@) bus deferred txns */
272 #define P6_BUS_TRAN_BURST	0x6e	/* (@) bus burst txns */
273 #define P6_BUS_TRAN_ANY		0x70	/* (@) total bus txns */
274 #define P6_BUS_TRAN_MEM		0x6f	/* (@) total memory txns */
275 #define P6_BUS_DATA_RCV		0x64	/* #busclocks CPU is receiving data */
276 #define P6_BUS_BNR_DRV		0x61	/* #busclocks CPU is driving BNR pin */
277 #define P6_BUS_HIT_DRV		0x7a	/* #busclocks CPU is driving HIT pin */
278 #define P6_BUS_HITM_DRV		0x7b	/* #busclocks CPU is driving HITM pin*/
279 #define P6_BUS_SNOOP_STALL	0x7e	/* #clkcycles bus is snoop-stalled */
280 
281 /* FPU */
282 #define P6_FLOPS	       	0xc1	/* (0) number of FP ops retired */
283 #define	P6_FP_COMP_OPS		0x10	/* (0) computational FPOPS exec'd */
284 #define P6_FP_ASSIST		0x11	/* (1) FP excep's handled in ucode */
285 #define P6_MUL			0x12	/* (1) number of FP multiplies */
286 #define P6_DIV			0x13	/* (1) number of FP divides */
287 #define P6_CYCLES_DIV_BUSY	0x14	/* (0) number of cycles divider busy */
288 
289 /* Memory ordering */
290 #define P6_LD_BLOCKS		0x03	/* number of store buffer blocks */
291 #define P6_SB_DRAINS		0x04	/* # of store buffer drain cycles */
292 #define P6_MISALING_MEM_REF	0x05	/* # misaligned data memory refs */
293 
294 /* Instruction decoding and retirement */
295 #define P6_INST_RETIRED		0xc0	/* number of instrs retired */
296 #define P6_UOPS_RETIRED		0xc2	/* number of micro-ops retired */
297 #define P6_INST_DECODER		0xd0	/* number of instructions decoded */
298 
299 /* Interrupts */
300 #define P6_HW_INT_RX		0xc8	/* number of hardware interrupts */
301 #define P6_CYCLES_INT_MASKED	0xc6	/* number of cycles hardints masked */
302 #define P6_CYCLES_INT_PENDING_AND_MASKED 0xc7 /* #cycles masked but pending */
303 
304 /* Branches */
305 #define P6_BR_INST_RETIRED	0xc4	/* number of branch instrs retired */
306 #define P6_BR_MISS_PRED_RETIRED	0xc5	/* number of mispred'd brs retired */
307 #define P6_BR_TAKEN_RETIRED	0xc9	/* number of taken branches retired */
308 #define P6_BR_MISS_PRED_TAKEN_RET 0xca	/* #taken mispredictions br's retired*/
309 #define P6_BR_INST_DECODED    	0xe0	/* number of branch instrs decoded */
310 #define P6_BTB_MISSES		0xe2	/* # of branches that missed in BTB */
311 #define P6_BR_BOGUS		0xe4	/* number of bogus branches */
312 #define P6_BACLEARS		0xe6	/* # times BACLEAR is asserted */
313 
314 /* Stalls */
315 #define P6_RESOURCE_STALLS	0xa2	/* # resource-related stall cycles */
316 #define P6_PARTIAL_RAT_STALLS	0xd2	/* # cycles/events for partial stalls*/
317 
318 /* Segment register loads */
319 #define P6_SEGMENT_REG_LOADS	0x06	/* number of segment register loads */
320 
321 /* Clocks */
322 #define P6_CPU_CLK_UNHALTED	0x79	/* #clocks CPU is not halted */
323 
324 /* Unit field tags */
325 #define P6_UNIT_M		0x0800
326 #define P6_UNIT_E		0x0400
327 #define P6_UNIT_S		0x0200
328 #define P6_UNIT_I		0x0100
329 #define P6_UNIT_MESI		0x0f00
330 
331 #define P6_UNIT_SELF		0x0000
332 #define P6_UNIT_ANY		0x2000
333 
334 /****************************************************************************
335  ** Flag bit definitions (used for the 'flag' field in select_p6counter()) **
336  ****************************************************************************
337  *
338  * The driver accepts fully-formed counter specifications from user-level.
339  * The following flags are mneumonics for the bits that get set in the
340  * PerfEvtSel0 and PerfEvtSel1 MSR's
341  *
342  */
343 #define P6CNT_U  0x010000	/* Monitor user-level events */
344 #define P6CNT_K  0x020000	/* Monitor kernel-level events */
345 #define P6CNT_E	 0x040000	/* Edge detect: count state transitions */
346 #define P6CNT_PC 0x080000	/* Pin control: ?? */
347 #define P6CNT_IE 0x100000	/* Int enable: enable interrupt on overflow */
348 #define P6CNT_F  0x200000	/* Freeze counter (handled in software) */
349 #define P6CNT_EN 0x400000	/* enable counters (in PerfEvtSel0) */
350 #define P6CNT_IV 0x800000	/* Invert counter mask comparison result */
351 
352 /*****************************
353  ** Miscellaneous constants **
354  *****************************
355  *
356  * Number of Pentium Pro programable hardware counters.
357  */
358 #define NUM_P6HWC 2
359 
360 /*****************************************************************************
361 *
362 * End of Copyright by Harvard College
363 *
364 *****************************************************************************/
365 
366 
367 #define MSR_P6_EVNTSEL0 0x186
368 #define MSR_P6_EVNTSEL1 0x187
369 #define MSR_P6_PERFCTR0 0xc1
370 #define MSR_P6_PERFCTR1 0xc2
371 
372 /* P6-specific Makros to manipulate and read counters */
373 
374 /* Read the 40 bit performance monitoring counter. This requires
375    the PCE-flag in CR4 to be set. Otherwise GP0 is raised. Works only
376    at P6.
377    */
378 #define l4_i686_rdpmc(cntr, res_p) \
379   __asm __volatile(						\
380 	 "movl %2, %%ecx	# put counter number in		\n\
381 	 .byte 0xf; .byte 0x33	# RDPMC instruction		\n\
382          movl %%edx, %1		# High order 32 bits		\n\
383          movl %%eax, %0		# Low order 32 bits"		\
384 	: "=g" (*(int *)(res_p)), "=g" (*(((int *)res_p)+1)) 	\
385 	: "g" (cntr)						\
386 	: "ecx", "eax", "edx")
387 
l4_i686_rdpmc_32(int cntr)388 static inline l4_uint32_t l4_i686_rdpmc_32(int cntr){
389   l4_uint32_t x;
390 
391   __asm__ __volatile__(
392 	 ".byte 0xf; .byte 0x33	# RDPMC instruction"
393 	: "=a" (x)
394 	: "c" (cntr)
395 	: "ecx", "eax", "edx");
396   return x;
397 }
398 
l4_i686_select_perfctr_event(int counter,unsigned long long val)399 static inline void l4_i686_select_perfctr_event(int counter,
400                                                 unsigned long long val){
401   l4_i586_wrmsr(MSR_P6_EVNTSEL0+counter, &val);
402 }
403 
l4_i686_select_perfctr0_event(long long * val)404 static inline void l4_i686_select_perfctr0_event(long long *val){
405   asm volatile(
406                "movl $MSR_P6_EVNTSEL0, %%ecx\n"
407                "movl (%%ebx), %%eax\n"
408                "movl 4(%%ebx), %%edx\n"
409                //".byte 0xcc, 0xeb, 0x01, 0x21\n"
410                ".byte 0x0f, 0x30\n"	// wrmsr
411                //".byte 0xcc, 0xeb, 0x01, 0x21\n"
412                : /* no output */
413                : "b" (val)
414                : "ax", "cx", "dx", "bx"
415                );
416 
417 }
418 
419 /* end of P6 section */
420 #else
421 
422 #define K7CNT_U  0x010000	/* Monitor user-level events */
423 #define K7CNT_K  0x020000	/* Monitor kernel-level events */
424 #define K7CNT_E	 0x040000	/* Edge detect: count state transitions */
425 #define K7CNT_PC 0x080000	/* Pin control: ?? */
426 #define K7CNT_IE 0x100000	/* Int enable: enable interrupt on overflow */
427 #define K7CNT_F  0x200000	/* Freeze counter (handled in software) */
428 #define K7CNT_EN 0x400000	/* enable counters (in PerfEvtSel0) */
429 #define K7CNT_IV 0x800000	/* Invert counter mask comparison result */
430 
431 #define MSR_K7_EVNTSEL0 0xC0010000
432 #define MSR_K7_EVNTSEL1 0xC0010001
433 #define MSR_K7_EVNTSEL2 0xC0010002
434 #define MSR_K7_EVNTSEL3 0xC0010003
435 #define MSR_K7_PERFCTR0 0xC0010004
436 #define MSR_K7_PERFCTR1 0xC0010005
437 #define MSR_K7_PERFCTR2 0xC0010006
438 #define MSR_K7_PERFCTR3 0xC0010007
439 
440 #endif
441 
442 #endif
443 
444 /* end of P5/P6/K7 section*/
445 #endif
446 
447 /* end of not only lib-prototypes section */
448 #endif
449 
450 EXTERN_C_END
451 
452 #endif
453