Main Page   File List  

perform.h

00001 /* 
00002  * $Id: perform.h 28276 2007-04-10 12:44:30Z adam $
00003  *
00004  * Perfomance Monitoring using P5/P6 Measurement Counters.
00005  *
00006  * Define either CPU_PENTIUM or CPU_P6
00007  *
00008  */
00009 
00010 #ifndef __L4UTIL_PERFORM_H
00011 #define __L4UTIL_PERFORM_H
00012 
00013 #include <l4/sys/types.h>
00014 #include <l4/sys/compiler.h>
00015 
00016 EXTERN_C_BEGIN
00017 
00018 extern const char*strp6pmc_event(l4_uint32_t event);
00019 
00020 #ifndef CONFIG_PERFORM_ONLY_PROTOTYPES
00021 
00022 #if ! (defined CPU_PENTIUM  ^ defined CPU_P6 ^ defined CPU_K7)
00023 
00024 #error You must define your target architecture.
00025 #error Define EITHER CPU_PENTIUM for Intel Pentium or CPU_P6 for Intel PPro/PII/PIII.
00026 
00027 #else
00028 
00029 /* P5/P6/K7 section */
00030 
00031 /* Makros for access to model specific registers (MSR) */
00032 
00033 /* Write the 64-Bit Model Specific Register. First argument is the register,
00034    second the 64-Bit value. This can only be called at priviledge level 0.
00035    With L4, the kernel emulates the WRMSR when calling in PL 3.
00036    */
00037 static inline void l4_i586_wrmsr(unsigned reg,unsigned long long*val){
00038   unsigned long dummyeax, dummyecx, dummyedx;
00039 
00040   asm volatile(
00041         ".byte 0xf; .byte 0x30\n"       /* wrmsr */
00042         : "=a" (dummyeax), "=d" (dummyedx), "=c" (dummyecx)
00043         : "2" (reg), "0" (*(unsigned *)val), "1" (*((unsigned *)val+1))
00044         );
00045 }
00046 
00047 /* Read the 64-Bit Model Specific Register. First argument is the register,
00048    second the address to a 64-Bit value. This can only be called at
00049    priviledge level 0.  With L4, the kernel emulates the RDMSR when calling
00050    in PL 3.
00051    */
00052 static inline void l4_i586_rdmsr(unsigned reg,unsigned long long*val){
00053   unsigned dummy;
00054 
00055   asm volatile(
00056         ".byte 0xf; .byte 0x32\n"       /* rdmsr */
00057         : "=a" (*(unsigned *)val), "=d" (*((unsigned *)val+1)), "=c" (dummy)
00058         : "2" (reg)
00059         );
00060 }
00061 
00062 
00063 #ifdef CPU_PENTIUM
00064 /* Pentium section */
00065 
00066 /* functions and events defined here are only usable at Pentium
00067    Processors. P6 architecture does NOT support this kind of measuring and
00068    these events. P6 architecture has its own counters and its own events.
00069    See P6-section for details. */
00070 
00071 /* from l4linux/arch/l4-i386/include/perform.h */
00072 
00073 static inline void 
00074 l4_i586_reset_event_counter(void){
00075    asm volatile("xor %%rax, %%rax\n"
00076                 "xor %%rdx, %%rdx\n"
00077                 "mov $0x12, %%rcx\n"
00078                 ".byte 0x0f, 0x30\n"
00079                 "movl $0x13, %%rcx\n"
00080                 ".byte 0x0f, 0x30\n"
00081                 : : : "cx", "ax", "dx" 
00082                 );
00083 };
00084 
00085 static inline void
00086 l4_i586_read_event_counter_long(long long *counter0, long long *counter1)
00087 {
00088   asm volatile(
00089                /*              "movl    $0, %%eax\n"
00090                "movl    $0x11, %%ecx\n"
00091                ".byte 0x0f, 0x30\n" *//* stop event counting */
00092                "mov  $0x12, %%rcx\n"
00093                ".byte 0x0f, 0x32\n"
00094                "mov %%rax, (%%rbx)\n"
00095                "mov %%rdx, 4(%%rbx)\n"
00096                "mov $0x13, %%ecx\n"
00097                ".byte 0x0f, 0x32\n"
00098                "mov %%rax, (%%rsi)\n"
00099                "mov %%rdx, 4(%%rsi)\n"
00100                : /* no output */
00101                : "b" (counter0), "S" (counter1)
00102                : "ax", "cx", "dx"
00103                );
00104 }
00105 
00106 static inline void
00107 l4_i586_read_event_counter(int *counter0, int *counter1)
00108 {
00109   asm volatile("push %%rdx              \n"
00110                ".byte  0x0f, 0x30       \n"
00111                "mov  $0x12, %%rcx       \n"
00112                ".byte 0x0f, 0x32        \n"
00113                "mov  %%rax, %%rbx       \n"
00114                "movl $0x13, %%rcx       \n"
00115                ".byte 0x0f, 0x32\n"
00116                "popl    %%edx\n"
00117                : "=b" (*counter0), "=a" (*counter1)
00118                : "1" (0), "c" (0x11)
00119                );
00120 }
00121 
00122 static inline void 
00123 l4_i586_select_event(int event0, int event1)
00124 {
00125    asm volatile(".byte 0x0f, 0x30\n"
00126                 :
00127                 :
00128                 "a" (event0 + (event1 << 16)),
00129                 "d" (0),
00130                 "c" (0x11)
00131                 );
00132 };
00133 
00134 #define P5_RD_MISS          0x003       /* 000011B */
00135 #define P5_WR_MISS          0x008       /* 000100B */
00136 #define P5_RW_MISS          0x029       /* 101001B */
00137 #define P5_EX_MISS          0x00e       /* 001110B */
00138 
00139 #define P5_D_WBACK          0x006       /* 000110B */
00140 
00141 #define P5_RW_TLB           0x002       /* 00010B */
00142 #define P5_EX_TLB           0x00d       /* 01101B */
00143 
00144 #define P5_A_STALL          0x01f       /* 11111B */
00145 #define P5_W_STALL          0x019       /* 11001B */
00146 #define P5_R_STALL          0x01a       /* 11010B */
00147 #define P5_X_STALL          0x01b       /* 11011B */
00148 
00149 #define P5_AGI_STALL        0x01f       /* 11111B */
00150 
00151 #define P5_PIPLINE_FLUSH    0x015       /* 10101B */
00152 
00153 #define P5_NON_CACHE_RD     0x01e       /* 11110B */
00154 #define P5_NCACHE_REFS      0x01e       /* 11110B */
00155 #define P5_LOCKED_BUS       0x01c       /* 11100B */
00156 
00157 #define P5_MEM2PIPE         0x009       /* 01001B */
00158 #define P5_BANK_CONF        0x00a       /* 01010B */
00159 
00160 
00161 #define P5_INSTRS_EX        0x016       /* 10110B */
00162 #define P5_INSTRS_EX_V      0x017       /* 10111B */
00163 
00164 
00165 #define P5_CNT_NOTHING      (0x00 << 6) /* 00B << 6 */
00166 #define P5_CNT_EVENT_PL0    (0x01 << 6) /* 01B << 6 */
00167 #define P5_CNT_EVENT_PL3    (0x02 << 6) /* 10B << 6 */
00168 #define P5_CNT_EVENT        (0x03 << 6) /* 11B << 6 */
00169 #define P5_CNT_CLOCKS_PL0   (0x05 << 6) /* 101B << 6 */
00170 #define P5_CNT_CLOCKS_PL3   (0x06 << 6) /* 110B << 6 */
00171 #define P5_CNT_CLOCKS       (0x07 << 6) /* 111B << 6 */
00172 
00173 
00174 #else
00175 #if defined CPU_P6
00176 /* PPro/PII/PIII section */
00177 
00178 /*-
00179  * Copyright (c) 1997 The President and Fellows of Harvard College.
00180  * All rights reserved.
00181  * Copyright (c) 1997 Aaron B. Brown.
00182  *
00183  * Redistribution and use in source and binary forms, with or without
00184  * modification, are permitted provided that the following conditions
00185  * are met:
00186  * 1. Redistributions of source code must retain the above copyright
00187  *    notice, this list of conditions and the following disclaimer.
00188  * 2. Redistributions in binary form must reproduce the above copyright
00189  *    notice, this list of conditions and the following disclaimer in the
00190  *    documentation and/or other materials provided with the distribution.
00191  * 3. All advertising materials mentioning features or use of this software
00192  *    must display the following acknowledgement:
00193  *      This product includes software developed by Harvard University
00194  *      and its contributors.
00195  * 4. Neither the name of the University nor the names of its contributors
00196  *    may be used to endorse or promote products derived from this software
00197  *    without specific prior written permission.
00198  *
00199  * THIS SOFTWARE IS PROVIDED BY HARVARD AND CONTRIBUTORS ``AS IS'' AND
00200  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00201  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00202  * ARE DISCLAIMED.  IN NO EVENT SHALL HARVARD UNIVERSITY OR CONTRIBUTORS BE
00203  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00204  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00205  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00206  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00207  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00208  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
00209  * POSSIBILITY OF SUCH DAMAGE.
00210  */
00211 
00212 /*********************************************************************
00213  ** Symbolic names for counter numbers (used in select_p6counter()) **
00214  *********************************************************************
00215  *
00216  * These correspond in order to the Pentium Pro counters. Add new counters at
00217  * the end. These agree with the mneumonics in the Pentium Pro Family
00218  * Developer's Manual, vol 3.
00219  *
00220  * Those events marked with a $ require a MESI unit field; those marked with
00221  * a @ require a self/any unit field. Those marked with a 0 are only supported
00222  * in counter 0; those marked with 1 are only supported in counter 1.
00223  */
00224 
00225 /* Data cache unit */
00226 #define P6_DATA_MEM_REFS        0x43    /* total memory refs */
00227 #define P6_DCU_LINES_IN         0x45    /* all lines allocated in cache unit */
00228 #define P6_DCU_M_LINES_IN       0x46    /* M lines allocated in cache unit */
00229 #define P6_DCU_M_LINES_OUT      0x47    /* M lines evicted from cache */
00230 #define P6_DCU_MISS_OUTSTANDING 0x48    /* #cycles a miss is outstanding */
00231 
00232 /* Instruction fetch unit */
00233 #define P6_IFU_IFETCH           0x80    /* instruction fetches */
00234 #define P6_IFU_IFETCH_MISS      0x81    /* instruction fetch misses */
00235 #define P6_ITLB_MISS            0x85    /* ITLB misses */
00236 #define P6_IFU_MEM_STALL        0x86    /* number of cycles IFU is stalled */
00237 #define P6_ILD_STALL            0x87    /* #stalls in instr length decode */
00238 
00239 /* L2 Cache */
00240 #define P6_L2_IFETCH            0x28    /* ($) l2 ifetches */
00241 #define P6_L2_LD                0x29    /* ($) l2 data loads */
00242 #define P6_L2_ST                0x2a    /* ($) l2 data stores */
00243 #define P6_L2_LINES_IN          0x24    /* lines allocated in l2 */
00244 #define P6_L2_LINES_OUT         0x26    /* lines removed from l2 */
00245 #define P6_L2_M_LINES_INM       0x25    /* modified lines allocated in L2 */
00246 #define P6_L2_M_LINES_OUTM      0x27    /* modified lines removed from L2 */
00247 #define P6_L2_RQSTS             0x2e    /* ($) number of l2 requests */
00248 #define P6_L2_ADS               0x21    /* number of l2 addr strobes */
00249 #define P6_L2_DBUS_BUSY         0x22    /* number of data bus busy cycles */
00250 #define P6_L2_DBUS_BUSY_RD      0x23    /* #bus cycles xferring l2->cpu */
00251 
00252 /* External bus logic */
00253 #define P6_BUS_DRDY_CLOCKS      0x62    /* (@) #clocks DRDY is asserted */
00254 #define P6_BUS_LOCK_CLOCKS      0x63    /* (@) #clocks LOCK is asserted */
00255 #define P6_BUS_REQ_OUTSTANDING  0x60    /* #bus requests outstanding */
00256 #define P6_BUS_TRAN_BRD         0x65    /* (@) bus burst read txns */
00257 #define P6_BUS_TRAN_RFO         0x66    /* (@) bus read for ownership txns */
00258 #define P6_BUS_TRAN_WB          0x67    /* (@) bus writeback txns */
00259 #define P6_BUS_TRAN_IFETCH      0x68    /* (@) bus instr fetch txns */
00260 #define P6_BUS_TRAN_INVAL       0x69    /* (@) bus invalidate txns */
00261 #define P6_BUS_TRAN_PWR         0x6a    /* (@) bus partial write txns */
00262 #define P6_BUS_TRANS_P          0x6b    /* (@) bus partial txns */
00263 #define P6_BUS_TRANS_IO         0x6c    /* (@) bus I/O txns */
00264 #define P6_BUS_TRAN_DEF         0x6d    /* (@) bus deferred txns */
00265 #define P6_BUS_TRAN_BURST       0x6e    /* (@) bus burst txns */
00266 #define P6_BUS_TRAN_ANY         0x70    /* (@) total bus txns */
00267 #define P6_BUS_TRAN_MEM         0x6f    /* (@) total memory txns */
00268 #define P6_BUS_DATA_RCV         0x64    /* #busclocks CPU is receiving data */
00269 #define P6_BUS_BNR_DRV          0x61    /* #busclocks CPU is driving BNR pin */
00270 #define P6_BUS_HIT_DRV          0x7a    /* #busclocks CPU is driving HIT pin */
00271 #define P6_BUS_HITM_DRV         0x7b    /* #busclocks CPU is driving HITM pin*/
00272 #define P6_BUS_SNOOP_STALL      0x7e    /* #clkcycles bus is snoop-stalled */
00273 
00274 /* FPU */
00275 #define P6_FLOPS                0xc1    /* (0) number of FP ops retired */
00276 #define P6_FP_COMP_OPS          0x10    /* (0) computational FPOPS exec'd */
00277 #define P6_FP_ASSIST            0x11    /* (1) FP excep's handled in ucode */
00278 #define P6_MUL                  0x12    /* (1) number of FP multiplies */
00279 #define P6_DIV                  0x13    /* (1) number of FP divides */
00280 #define P6_CYCLES_DIV_BUSY      0x14    /* (0) number of cycles divider busy */
00281 
00282 /* Memory ordering */
00283 #define P6_LD_BLOCKS            0x03    /* number of store buffer blocks */
00284 #define P6_SB_DRAINS            0x04    /* # of store buffer drain cycles */
00285 #define P6_MISALING_MEM_REF     0x05    /* # misaligned data memory refs */
00286 
00287 /* Instruction decoding and retirement */
00288 #define P6_INST_RETIRED         0xc0    /* number of instrs retired */
00289 #define P6_UOPS_RETIRED         0xc2    /* number of micro-ops retired */
00290 #define P6_INST_DECODER         0xd0    /* number of instructions decoded */
00291 
00292 /* Interrupts */
00293 #define P6_HW_INT_RX            0xc8    /* number of hardware interrupts */
00294 #define P6_CYCLES_INT_MASKED    0xc6    /* number of cycles hardints masked */
00295 #define P6_CYCLES_INT_PENDING_AND_MASKED 0xc7 /* #cycles masked but pending */
00296 
00297 /* Branches */
00298 #define P6_BR_INST_RETIRED      0xc4    /* number of branch instrs retired */
00299 #define P6_BR_MISS_PRED_RETIRED 0xc5    /* number of mispred'd brs retired */
00300 #define P6_BR_TAKEN_RETIRED     0xc9    /* number of taken branches retired */
00301 #define P6_BR_MISS_PRED_TAKEN_RET 0xca  /* #taken mispredictions br's retired*/
00302 #define P6_BR_INST_DECODED      0xe0    /* number of branch instrs decoded */
00303 #define P6_BTB_MISSES           0xe2    /* # of branches that missed in BTB */
00304 #define P6_BR_BOGUS             0xe4    /* number of bogus branches */
00305 #define P6_BACLEARS             0xe6    /* # times BACLEAR is asserted */
00306 
00307 /* Stalls */
00308 #define P6_RESOURCE_STALLS      0xa2    /* # resource-related stall cycles */
00309 #define P6_PARTIAL_RAT_STALLS   0xd2    /* # cycles/events for partial stalls*/
00310 
00311 /* Segment register loads */
00312 #define P6_SEGMENT_REG_LOADS    0x06    /* number of segment register loads */
00313 
00314 /* Clocks */
00315 #define P6_CPU_CLK_UNHALTED     0x79    /* #clocks CPU is not halted */
00316 
00317 /* Unit field tags */
00318 #define P6_UNIT_M               0x0800
00319 #define P6_UNIT_E               0x0400
00320 #define P6_UNIT_S               0x0200
00321 #define P6_UNIT_I               0x0100
00322 #define P6_UNIT_MESI            0x0f00
00323 
00324 #define P6_UNIT_SELF            0x0000
00325 #define P6_UNIT_ANY             0x2000
00326 
00327 /****************************************************************************
00328  ** Flag bit definitions (used for the 'flag' field in select_p6counter()) **
00329  ****************************************************************************
00330  *
00331  * The driver accepts fully-formed counter specifications from user-level.
00332  * The following flags are mneumonics for the bits that get set in the
00333  * PerfEvtSel0 and PerfEvtSel1 MSR's
00334  *
00335  */
00336 #define P6CNT_U  0x010000       /* Monitor user-level events */
00337 #define P6CNT_K  0x020000       /* Monitor kernel-level events */
00338 #define P6CNT_E  0x040000       /* Edge detect: count state transitions */
00339 #define P6CNT_PC 0x080000       /* Pin control: ?? */
00340 #define P6CNT_IE 0x100000       /* Int enable: enable interrupt on overflow */
00341 #define P6CNT_F  0x200000       /* Freeze counter (handled in software) */
00342 #define P6CNT_EN 0x400000       /* enable counters (in PerfEvtSel0) */
00343 #define P6CNT_IV 0x800000       /* Invert counter mask comparison result */
00344 
00345 /*****************************
00346  ** Miscellaneous constants **
00347  *****************************
00348  *
00349  * Number of Pentium Pro programable hardware counters. 
00350  */
00351 #define NUM_P6HWC 2
00352 
00353 /*****************************************************************************
00354 *
00355 * End of Copyright by Harvard College
00356 *
00357 *****************************************************************************/
00358 
00359 
00360 #define MSR_P6_EVNTSEL0 0x186
00361 #define MSR_P6_EVNTSEL1 0x187
00362 #define MSR_P6_PERFCTR0 0xc1
00363 #define MSR_P6_PERFCTR1 0xc2
00364 
00365 /* P6-specific Makros to manipulate and read counters */
00366 
00367 /* Read the 40 bit performance monitoring counter. This requires 
00368    the PCE-flag in CR4 to be set. Otherwise GP0 is raised. Works only
00369    at P6.
00370    */
00371 #define l4_i686_rdpmc(cntr, res_p) \
00372   __asm __volatile(                                             \
00373          "mov %2, %%rcx         # put counter number in         \n\
00374          .byte 0xf; .byte 0x33  # RDPMC instruction             \n\
00375          mov %%rdx, %1          # High order 32 bits            \n\
00376          mov %%rax, %0          # Low order 32 bits"            \
00377         : "=g" (*(int *)(res_p)), "=g" (*(((int *)res_p)+1))    \
00378         : "g" (cntr)                                            \
00379         : "ecx", "eax", "edx")
00380 
00381 static inline l4_uint32_t l4_i686_rdpmc_32(int cntr){
00382   l4_uint32_t x;
00383   
00384   __asm__ __volatile__(
00385          ".byte 0xf; .byte 0x33 # RDPMC instruction"
00386         : "=a" (x)
00387         : "c" (cntr)
00388         : "rcx", "rax", "rdx");
00389   return x;
00390 }
00391 
00392 static inline void l4_i686_select_perfctr_event(int counter, 
00393                                                 unsigned long long val){
00394   l4_i586_wrmsr(MSR_P6_EVNTSEL0+counter, &val);
00395 }
00396 
00397 static inline void l4_i686_select_perfctr0_event(long long *val){
00398   asm volatile(
00399                "mov $MSR_P6_EVNTSEL0, %%rcx\n"
00400                "mov (%%rbx), %%rax\n"
00401                "mov 4(%%rbx), %%rdx\n"
00402                //".byte 0xcc, 0xeb, 0x01, 0x21\n"
00403                ".byte 0x0f, 0x30\n"     // wrmsr
00404                //".byte 0xcc, 0xeb, 0x01, 0x21\n"
00405                : /* no output */
00406                : "b" (val)
00407                : "ax", "cx", "dx", "bx"
00408                );
00409 
00410 }
00411 
00412 /* end of P6 section */
00413 #else
00414 
00415 #define K7CNT_U  0x010000       /* Monitor user-level events */
00416 #define K7CNT_K  0x020000       /* Monitor kernel-level events */
00417 #define K7CNT_E  0x040000       /* Edge detect: count state transitions */
00418 #define K7CNT_PC 0x080000       /* Pin control: ?? */
00419 #define K7CNT_IE 0x100000       /* Int enable: enable interrupt on overflow */
00420 #define K7CNT_F  0x200000       /* Freeze counter (handled in software) */
00421 #define K7CNT_EN 0x400000       /* enable counters (in PerfEvtSel0) */
00422 #define K7CNT_IV 0x800000       /* Invert counter mask comparison result */
00423 
00424 #define MSR_K7_EVNTSEL0 0xC0010000
00425 #define MSR_K7_EVNTSEL1 0xC0010001
00426 #define MSR_K7_EVNTSEL2 0xC0010002
00427 #define MSR_K7_EVNTSEL3 0xC0010003
00428 #define MSR_K7_PERFCTR0 0xC0010004
00429 #define MSR_K7_PERFCTR1 0xC0010005
00430 #define MSR_K7_PERFCTR2 0xC0010006
00431 #define MSR_K7_PERFCTR3 0xC0010007
00432 
00433 #endif
00434 
00435 #endif
00436 
00437 /* end of P5/P6/K7 section*/
00438 #endif
00439 
00440 /* end of not only lib-prototypes section */
00441 #endif
00442 
00443 EXTERN_C_END
00444 
00445 #endif

L4 Utilities, part of DROPS  © 2000-2003