1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * This file contains preset event names from the Performance Application 27 * Programming Interface v3.5 which included the following notice: 28 * 29 * Copyright (c) 2005,6 30 * Innovative Computing Labs 31 * Computer Science Department, 32 * University of Tennessee, 33 * Knoxville, TN. 34 * All Rights Reserved. 35 * 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions are met: 39 * 40 * * Redistributions of source code must retain the above copyright notice, 41 * this list of conditions and the following disclaimer. 42 * * Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * * Neither the name of the University of Tennessee nor the names of its 46 * contributors may be used to endorse or promote products derived from 47 * this software without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 50 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 53 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 59 * POSSIBILITY OF SUCH DAMAGE. 60 * 61 * 62 * This open source software license conforms to the BSD License template. 63 */ 64 65 66 /* 67 * Performance Counter Back-End for Intel processors supporting Architectural 68 * Performance Monitoring. 69 */ 70 71 #include <sys/cpuvar.h> 72 #include <sys/param.h> 73 #include <sys/cpc_impl.h> 74 #include <sys/cpc_pcbe.h> 75 #include <sys/modctl.h> 76 #include <sys/inttypes.h> 77 #include <sys/systm.h> 78 #include <sys/cmn_err.h> 79 #include <sys/x86_archext.h> 80 #include <sys/sdt.h> 81 #include <sys/archsystm.h> 82 #include <sys/privregs.h> 83 #include <sys/ddi.h> 84 #include <sys/sunddi.h> 85 #include <sys/cred.h> 86 #include <sys/policy.h> 87 88 static int core_pcbe_init(void); 89 static uint_t core_pcbe_ncounters(void); 90 static const char *core_pcbe_impl_name(void); 91 static const char *core_pcbe_cpuref(void); 92 static char *core_pcbe_list_events(uint_t picnum); 93 static char *core_pcbe_list_attrs(void); 94 static uint64_t core_pcbe_event_coverage(char *event); 95 static uint64_t core_pcbe_overflow_bitmap(void); 96 static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset, 97 uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data, 98 void *token); 99 static void core_pcbe_program(void *token); 100 static void core_pcbe_allstop(void); 101 static void core_pcbe_sample(void *token); 102 static void core_pcbe_free(void *config); 103 104 #define FALSE 0 105 #define TRUE 1 106 107 /* Counter Type */ 108 #define CORE_GPC 0 /* General-Purpose Counter (GPC) */ 109 #define CORE_FFC 1 /* Fixed-Function Counter (FFC) */ 110 111 /* MSR Addresses */ 112 #define GPC_BASE_PMC 0x00c1 /* First GPC */ 113 #define GPC_BASE_PES 0x0186 /* First GPC Event Select register */ 114 #define FFC_BASE_PMC 0x0309 /* First FFC */ 115 #define PERF_FIXED_CTR_CTRL 0x038d /* Used to enable/disable FFCs */ 116 #define PERF_GLOBAL_STATUS 0x038e /* Overflow status register */ 117 #define PERF_GLOBAL_CTRL 0x038f /* Used to enable/disable counting */ 118 #define PERF_GLOBAL_OVF_CTRL 0x0390 /* Used to clear overflow status */ 119 120 /* 121 * Processor Event Select register fields 122 */ 123 #define CORE_USR (1ULL << 16) /* Count while not in ring 0 */ 124 #define CORE_OS (1ULL << 17) /* Count while in ring 0 */ 125 #define CORE_EDGE (1ULL << 18) /* Enable edge detection */ 126 #define CORE_PC (1ULL << 19) /* Enable pin control */ 127 #define CORE_INT (1ULL << 20) /* Enable interrupt on overflow */ 128 #define CORE_EN (1ULL << 22) /* Enable counting */ 129 #define CORE_INV (1ULL << 23) /* Invert the CMASK */ 130 #define CORE_ANYTHR (1ULL << 21) /* Count event for any thread on core */ 131 132 #define CORE_UMASK_SHIFT 8 133 #define CORE_UMASK_MASK 0xffu 134 #define CORE_CMASK_SHIFT 24 135 #define CORE_CMASK_MASK 0xffu 136 137 /* 138 * Fixed-function counter attributes 139 */ 140 #define CORE_FFC_OS_EN (1ULL << 0) /* Count while not in ring 0 */ 141 #define CORE_FFC_USR_EN (1ULL << 1) /* Count while in ring 1 */ 142 #define CORE_FFC_ANYTHR (1ULL << 2) /* Count event for any thread on core */ 143 #define CORE_FFC_PMI (1ULL << 3) /* Enable interrupt on overflow */ 144 145 /* 146 * Number of bits for specifying each FFC's attributes in the control register 147 */ 148 #define CORE_FFC_ATTR_SIZE 4 149 150 /* 151 * CondChgd and OvfBuffer fields of global status and overflow control registers 152 */ 153 #define CONDCHGD (1ULL << 63) 154 #define OVFBUFFER (1ULL << 62) 155 #define MASK_CONDCHGD_OVFBUFFER (CONDCHGD | OVFBUFFER) 156 157 #define ALL_STOPPED 0ULL 158 159 #define BITMASK_XBITS(x) ((1ull << (x)) - 1ull) 160 161 /* 162 * Only the lower 32-bits can be written to in the general-purpose 163 * counters. The higher bits are extended from bit 31; all ones if 164 * bit 31 is one and all zeros otherwise. 165 * 166 * The fixed-function counters do not have this restriction. 167 */ 168 #define BITS_EXTENDED_FROM_31 (BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31)) 169 170 #define WRMSR(msr, value) \ 171 wrmsr((msr), (value)); \ 172 DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value)); 173 174 #define RDMSR(msr, value) \ 175 (value) = rdmsr((msr)); \ 176 DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value)); 177 178 typedef struct core_pcbe_config { 179 uint64_t core_rawpic; 180 uint64_t core_ctl; /* Event Select bits */ 181 uint64_t core_pmc; /* Counter register address */ 182 uint64_t core_pes; /* Event Select register address */ 183 uint_t core_picno; 184 uint8_t core_pictype; /* CORE_GPC or CORE_FFC */ 185 } core_pcbe_config_t; 186 187 pcbe_ops_t core_pcbe_ops = { 188 PCBE_VER_1, /* pcbe_ver */ 189 CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE, /* pcbe_caps */ 190 core_pcbe_ncounters, /* pcbe_ncounters */ 191 core_pcbe_impl_name, /* pcbe_impl_name */ 192 core_pcbe_cpuref, /* pcbe_cpuref */ 193 core_pcbe_list_events, /* pcbe_list_events */ 194 core_pcbe_list_attrs, /* pcbe_list_attrs */ 195 core_pcbe_event_coverage, /* pcbe_event_coverage */ 196 core_pcbe_overflow_bitmap, /* pcbe_overflow_bitmap */ 197 core_pcbe_configure, /* pcbe_configure */ 198 core_pcbe_program, /* pcbe_program */ 199 core_pcbe_allstop, /* pcbe_allstop */ 200 core_pcbe_sample, /* pcbe_sample */ 201 core_pcbe_free /* pcbe_free */ 202 }; 203 204 struct nametable_core_uarch { 205 const char *name; 206 uint64_t restricted_bits; 207 uint8_t event_num; 208 }; 209 210 #define NT_END 0xFF 211 212 /* 213 * Counting an event for all cores or all bus agents requires cpc_cpu privileges 214 */ 215 #define ALL_CORES (1ULL << 15) 216 #define ALL_AGENTS (1ULL << 13) 217 218 struct generic_events { 219 const char *name; 220 uint8_t event_num; 221 uint8_t umask; 222 }; 223 224 static const struct generic_events cmn_generic_events[] = { 225 { "PAPI_tot_cyc", 0x3c, 0x00 }, /* cpu_clk_unhalted.thread_p/core */ 226 { "PAPI_tot_ins", 0xc0, 0x00 }, /* inst_retired.any_p */ 227 { "PAPI_br_ins", 0xc4, 0x0c }, /* br_inst_retired.taken */ 228 { "PAPI_br_msp", 0xc5, 0x00 }, /* br_inst_retired.mispred */ 229 { "PAPI_br_ntk", 0xc4, 0x03 }, 230 /* br_inst_retired.pred_not_taken|pred_taken */ 231 { "PAPI_br_prc", 0xc4, 0x05 }, 232 /* br_inst_retired.pred_not_taken|pred_taken */ 233 { "PAPI_hw_int", 0xc8, 0x00 }, /* hw_int_rvc */ 234 { "PAPI_tot_iis", 0xaa, 0x01 }, /* macro_insts.decoded */ 235 { "PAPI_l1_dca", 0x43, 0x01 }, /* l1d_all_ref */ 236 { "PAPI_l1_icm", 0x81, 0x00 }, /* l1i_misses */ 237 { "PAPI_l1_icr", 0x80, 0x00 }, /* l1i_reads */ 238 { "PAPI_l1_tcw", 0x41, 0x0f }, /* l1d_cache_st.mesi */ 239 { "PAPI_l2_stm", 0x2a, 0x41 }, /* l2_st.self.i_state */ 240 { "PAPI_l2_tca", 0x2e, 0x4f }, /* l2_rqsts.self.demand.mesi */ 241 { "PAPI_l2_tch", 0x2e, 0x4e }, /* l2_rqsts.mes */ 242 { "PAPI_l2_tcm", 0x2e, 0x41 }, /* l2_rqsts.self.demand.i_state */ 243 { "PAPI_l2_tcw", 0x2a, 0x4f }, /* l2_st.self.mesi */ 244 { "PAPI_ld_ins", 0xc0, 0x01 }, /* inst_retired.loads */ 245 { "PAPI_lst_ins", 0xc0, 0x03 }, /* inst_retired.loads|stores */ 246 { "PAPI_sr_ins", 0xc0, 0x02 }, /* inst_retired.stores */ 247 { "PAPI_tlb_dm", 0x08, 0x01 }, /* dtlb_misses.any */ 248 { "PAPI_tlb_im", 0x82, 0x12 }, /* itlb.small_miss|large_miss */ 249 { "PAPI_tlb_tl", 0x0c, 0x03 }, /* page_walks */ 250 { "", NT_END, 0 } 251 }; 252 253 static const struct generic_events generic_events_pic0[] = { 254 { "PAPI_l1_dcm", 0xcb, 0x01 }, /* mem_load_retired.l1d_miss */ 255 { "", NT_END, 0 } 256 }; 257 258 /* 259 * The events listed in the following table can be counted on all 260 * general-purpose counters on processors that are of Penryn and Merom Family 261 */ 262 static const struct nametable_core_uarch cmn_gpc_events_core_uarch[] = { 263 /* Alphabetical order of event name */ 264 265 { "baclears", 0x0, 0xe6 }, 266 { "bogus_br", 0x0, 0xe4 }, 267 { "br_bac_missp_exec", 0x0, 0x8a }, 268 269 { "br_call_exec", 0x0, 0x92 }, 270 { "br_call_missp_exec", 0x0, 0x93 }, 271 { "br_cnd_exec", 0x0, 0x8b }, 272 273 { "br_cnd_missp_exec", 0x0, 0x8c }, 274 { "br_ind_call_exec", 0x0, 0x94 }, 275 { "br_ind_exec", 0x0, 0x8d }, 276 277 { "br_ind_missp_exec", 0x0, 0x8e }, 278 { "br_inst_decoded", 0x0, 0xe0 }, 279 { "br_inst_exec", 0x0, 0x88 }, 280 281 { "br_inst_retired", 0x0, 0xc4 }, 282 { "br_inst_retired_mispred", 0x0, 0xc5 }, 283 { "br_missp_exec", 0x0, 0x89 }, 284 285 { "br_ret_bac_missp_exec", 0x0, 0x91 }, 286 { "br_ret_exec", 0x0, 0x8f }, 287 { "br_ret_missp_exec", 0x0, 0x90 }, 288 289 { "br_tkn_bubble_1", 0x0, 0x97 }, 290 { "br_tkn_bubble_2", 0x0, 0x98 }, 291 { "bus_bnr_drv", ALL_AGENTS, 0x61 }, 292 293 { "bus_data_rcv", ALL_CORES, 0x64 }, 294 { "bus_drdy_clocks", ALL_AGENTS, 0x62 }, 295 { "bus_hit_drv", ALL_AGENTS, 0x7a }, 296 297 { "bus_hitm_drv", ALL_AGENTS, 0x7b }, 298 { "bus_io_wait", ALL_CORES, 0x7f }, 299 { "bus_lock_clocks", ALL_CORES | ALL_AGENTS, 0x63 }, 300 301 { "bus_request_outstanding", ALL_CORES | ALL_AGENTS, 0x60 }, 302 { "bus_trans_any", ALL_CORES | ALL_AGENTS, 0x70 }, 303 { "bus_trans_brd", ALL_CORES | ALL_AGENTS, 0x65 }, 304 305 { "bus_trans_burst", ALL_CORES | ALL_AGENTS, 0x6e }, 306 { "bus_trans_def", ALL_CORES | ALL_AGENTS, 0x6d }, 307 { "bus_trans_ifetch", ALL_CORES | ALL_AGENTS, 0x68 }, 308 309 { "bus_trans_inval", ALL_CORES | ALL_AGENTS, 0x69 }, 310 { "bus_trans_io", ALL_CORES | ALL_AGENTS, 0x6c }, 311 { "bus_trans_mem", ALL_CORES | ALL_AGENTS, 0x6f }, 312 313 { "bus_trans_p", ALL_CORES | ALL_AGENTS, 0x6b }, 314 { "bus_trans_pwr", ALL_CORES | ALL_AGENTS, 0x6a }, 315 { "bus_trans_rfo", ALL_CORES | ALL_AGENTS, 0x66 }, 316 317 { "bus_trans_wb", ALL_CORES | ALL_AGENTS, 0x67 }, 318 { "busq_empty", ALL_CORES, 0x7d }, 319 { "cmp_snoop", ALL_CORES, 0x78 }, 320 321 { "cpu_clk_unhalted", 0x0, 0x3c }, 322 { "cycles_int", 0x0, 0xc6 }, 323 { "cycles_l1i_mem_stalled", 0x0, 0x86 }, 324 325 { "dtlb_misses", 0x0, 0x08 }, 326 { "eist_trans", 0x0, 0x3a }, 327 { "esp", 0x0, 0xab }, 328 329 { "ext_snoop", ALL_AGENTS, 0x77 }, 330 { "fp_mmx_trans", 0x0, 0xcc }, 331 { "hw_int_rcv", 0x0, 0xc8 }, 332 333 { "ild_stall", 0x0, 0x87 }, 334 { "inst_queue", 0x0, 0x83 }, 335 { "inst_retired", 0x0, 0xc0 }, 336 337 { "itlb", 0x0, 0x82 }, 338 { "itlb_miss_retired", 0x0, 0xc9 }, 339 { "l1d_all_ref", 0x0, 0x43 }, 340 341 { "l1d_cache_ld", 0x0, 0x40 }, 342 { "l1d_cache_lock", 0x0, 0x42 }, 343 { "l1d_cache_st", 0x0, 0x41 }, 344 345 { "l1d_m_evict", 0x0, 0x47 }, 346 { "l1d_m_repl", 0x0, 0x46 }, 347 { "l1d_pend_miss", 0x0, 0x48 }, 348 349 { "l1d_prefetch", 0x0, 0x4e }, 350 { "l1d_repl", 0x0, 0x45 }, 351 { "l1d_split", 0x0, 0x49 }, 352 353 { "l1i_misses", 0x0, 0x81 }, 354 { "l1i_reads", 0x0, 0x80 }, 355 { "l2_ads", ALL_CORES, 0x21 }, 356 357 { "l2_dbus_busy_rd", ALL_CORES, 0x23 }, 358 { "l2_ifetch", ALL_CORES, 0x28 }, 359 { "l2_ld", ALL_CORES, 0x29 }, 360 361 { "l2_lines_in", ALL_CORES, 0x24 }, 362 { "l2_lines_out", ALL_CORES, 0x26 }, 363 { "l2_lock", ALL_CORES, 0x2b }, 364 365 { "l2_m_lines_in", ALL_CORES, 0x25 }, 366 { "l2_m_lines_out", ALL_CORES, 0x27 }, 367 { "l2_no_req", ALL_CORES, 0x32 }, 368 369 { "l2_reject_busq", ALL_CORES, 0x30 }, 370 { "l2_rqsts", ALL_CORES, 0x2e }, 371 { "l2_st", ALL_CORES, 0x2a }, 372 373 { "load_block", 0x0, 0x03 }, 374 { "load_hit_pre", 0x0, 0x4c }, 375 { "machine_nukes", 0x0, 0xc3 }, 376 377 { "macro_insts", 0x0, 0xaa }, 378 { "memory_disambiguation", 0x0, 0x09 }, 379 { "misalign_mem_ref", 0x0, 0x05 }, 380 { "page_walks", 0x0, 0x0c }, 381 382 { "pref_rqsts_dn", 0x0, 0xf8 }, 383 { "pref_rqsts_up", 0x0, 0xf0 }, 384 { "rat_stalls", 0x0, 0xd2 }, 385 386 { "resource_stalls", 0x0, 0xdc }, 387 { "rs_uops_dispatched", 0x0, 0xa0 }, 388 { "seg_reg_renames", 0x0, 0xd5 }, 389 390 { "seg_rename_stalls", 0x0, 0xd4 }, 391 { "segment_reg_loads", 0x0, 0x06 }, 392 { "simd_assist", 0x0, 0xcd }, 393 394 { "simd_comp_inst_retired", 0x0, 0xca }, 395 { "simd_inst_retired", 0x0, 0xc7 }, 396 { "simd_instr_retired", 0x0, 0xce }, 397 398 { "simd_sat_instr_retired", 0x0, 0xcf }, 399 { "simd_sat_uop_exec", 0x0, 0xb1 }, 400 { "simd_uop_type_exec", 0x0, 0xb3 }, 401 402 { "simd_uops_exec", 0x0, 0xb0 }, 403 { "snoop_stall_drv", ALL_CORES | ALL_AGENTS, 0x7e }, 404 { "sse_pre_exec", 0x0, 0x07 }, 405 406 { "sse_pre_miss", 0x0, 0x4b }, 407 { "store_block", 0x0, 0x04 }, 408 { "thermal_trip", 0x0, 0x3b }, 409 410 { "uops_retired", 0x0, 0xc2 }, 411 { "x87_ops_retired", 0x0, 0xc1 }, 412 { "", 0x0, NT_END } 413 }; 414 415 /* 416 * If any of the pic specific events require privileges, make sure to add a 417 * check in configure_gpc() to find whether an event hard-coded as a number by 418 * the user has any privilege requirements 419 */ 420 static const struct nametable_core_uarch pic0_events[] = { 421 /* Alphabetical order of event name */ 422 423 { "cycles_div_busy", 0x0, 0x14 }, 424 { "fp_comp_ops_exe", 0x0, 0x10 }, 425 { "idle_during_div", 0x0, 0x18 }, 426 427 { "mem_load_retired", 0x0, 0xcb }, 428 { "rs_uops_dispatched_port", 0x0, 0xa1 }, 429 { "", 0x0, NT_END } 430 }; 431 432 static const struct nametable_core_uarch pic1_events[] = { 433 /* Alphabetical order of event name */ 434 435 { "delayed_bypass", 0x0, 0x19 }, 436 { "div", 0x0, 0x13 }, 437 { "fp_assist", 0x0, 0x11 }, 438 439 { "mul", 0x0, 0x12 }, 440 { "", 0x0, NT_END } 441 }; 442 443 /* FFC entries must be in order */ 444 static char *ffc_names_non_htt[] = { 445 "instr_retired.any", 446 "cpu_clk_unhalted.core", 447 "cpu_clk_unhalted.ref", 448 NULL 449 }; 450 451 static char *ffc_names_htt[] = { 452 "instr_retired.any", 453 "cpu_clk_unhalted.thread", 454 "cpu_clk_unhalted.ref", 455 NULL 456 }; 457 458 static char *ffc_genericnames[] = { 459 "PAPI_tot_ins", 460 "PAPI_tot_cyc", 461 "", 462 NULL 463 }; 464 465 static char **ffc_names = NULL; 466 static char **ffc_allnames = NULL; 467 static char **gpc_names = NULL; 468 static uint32_t versionid; 469 static uint64_t num_gpc; 470 static uint64_t width_gpc; 471 static uint64_t mask_gpc; 472 static uint64_t num_ffc; 473 static uint64_t width_ffc; 474 static uint64_t mask_ffc; 475 static uint_t total_pmc; 476 static uint64_t control_ffc; 477 static uint64_t control_gpc; 478 static uint64_t control_mask; 479 static uint32_t arch_events_vector; 480 481 #define IMPL_NAME_LEN 100 482 static char core_impl_name[IMPL_NAME_LEN]; 483 484 static const char *core_cpuref = 485 "See Appendix A of the \"Intel 64 and IA-32 Architectures Software" \ 486 " Developer's Manual Volume 3B: System Programming Guide, Part 2\"" \ 487 " Order Number: 253669-026US, Februrary 2008"; 488 489 struct events_table_t { 490 uint8_t eventselect; 491 uint8_t unitmask; 492 uint64_t supported_counters; 493 const char *name; 494 }; 495 496 /* Used to describe which counters support an event */ 497 #define C(x) (1 << (x)) 498 #define C0 C(0) 499 #define C1 C(1) 500 #define C2 C(2) 501 #define C3 C(3) 502 #define C_ALL 0xFFFFFFFFFFFFFFFF 503 504 /* Architectural events */ 505 #define ARCH_EVENTS_COMMON \ 506 { 0xc0, 0x00, C_ALL, "inst_retired.any_p" }, \ 507 { 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" }, \ 508 { 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" }, \ 509 { 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" }, \ 510 { 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" }, \ 511 { 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" } 512 513 static const struct events_table_t arch_events_table_non_htt[] = { 514 { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.core" }, 515 ARCH_EVENTS_COMMON 516 }; 517 518 static const struct events_table_t arch_events_table_htt[] = { 519 { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" }, 520 ARCH_EVENTS_COMMON 521 }; 522 523 static char *arch_genevents_table[] = { 524 "PAPI_tot_cyc", /* cpu_clk_unhalted.thread_p/core */ 525 "PAPI_tot_ins", /* inst_retired.any_p */ 526 "", /* cpu_clk_unhalted.ref_p */ 527 "", /* longest_lat_cache.reference */ 528 "", /* longest_lat_cache.miss */ 529 "", /* br_inst_retired.all_branches */ 530 "", /* br_misp_retired.all_branches */ 531 }; 532 533 static const struct events_table_t *arch_events_table = NULL; 534 static uint64_t known_arch_events; 535 static uint64_t known_ffc_num; 536 537 #define GENERICEVENTS_FAM6_NHM \ 538 { 0xc4, 0x01, C0|C1|C2|C3, "PAPI_br_cn" }, /* br_inst_retired.conditional */ \ 539 { 0x1d, 0x01, C0|C1|C2|C3, "PAPI_hw_int" }, /* hw_int.rcx */ \ 540 { 0x17, 0x01, C0|C1|C2|C3, "PAPI_tot_iis" }, /* inst_queue_writes */ \ 541 { 0x43, 0x01, C0|C1, "PAPI_l1_dca" }, /* l1d_all_ref.any */ \ 542 { 0x24, 0x03, C0|C1|C2|C3, "PAPI_l1_dcm" }, /* l2_rqsts. loads and rfos */ \ 543 { 0x40, 0x0f, C0|C1|C2|C3, "PAPI_l1_dcr" }, /* l1d_cache_ld.mesi */ \ 544 { 0x41, 0x0f, C0|C1|C2|C3, "PAPI_l1_dcw" }, /* l1d_cache_st.mesi */ \ 545 { 0x80, 0x03, C0|C1|C2|C3, "PAPI_l1_ica" }, /* l1i.reads */ \ 546 { 0x80, 0x01, C0|C1|C2|C3, "PAPI_l1_ich" }, /* l1i.hits */ \ 547 { 0x80, 0x02, C0|C1|C2|C3, "PAPI_l1_icm" }, /* l1i.misses */ \ 548 { 0x80, 0x03, C0|C1|C2|C3, "PAPI_l1_icr" }, /* l1i.reads */ \ 549 { 0x24, 0x33, C0|C1|C2|C3, "PAPI_l1_ldm" }, /* l2_rqsts. loads and ifetches */\ 550 { 0x24, 0xff, C0|C1|C2|C3, "PAPI_l1_tcm" }, /* l2_rqsts.references */ \ 551 { 0x24, 0x02, C0|C1|C2|C3, "PAPI_l2_ldm" }, /* l2_rqsts.ld_miss */ \ 552 { 0x24, 0x08, C0|C1|C2|C3, "PAPI_l2_stm" }, /* l2_rqsts.rfo_miss */ \ 553 { 0x24, 0x3f, C0|C1|C2|C3, "PAPI_l2_tca" }, \ 554 /* l2_rqsts. loads, rfos and ifetches */ \ 555 { 0x24, 0x15, C0|C1|C2|C3, "PAPI_l2_tch" }, \ 556 /* l2_rqsts. ld_hit, rfo_hit and ifetch_hit */ \ 557 { 0x24, 0x2a, C0|C1|C2|C3, "PAPI_l2_tcm" }, \ 558 /* l2_rqsts. ld_miss, rfo_miss and ifetch_miss */ \ 559 { 0x24, 0x33, C0|C1|C2|C3, "PAPI_l2_tcr" }, /* l2_rqsts. loads and ifetches */\ 560 { 0x24, 0x0c, C0|C1|C2|C3, "PAPI_l2_tcw" }, /* l2_rqsts.rfos */ \ 561 { 0x2e, 0x4f, C0|C1|C2|C3, "PAPI_l3_tca" }, /* l3_lat_cache.reference */ \ 562 { 0x2e, 0x41, C0|C1|C2|C3, "PAPI_l3_tcm" }, /* l3_lat_cache.misses */ \ 563 { 0x0b, 0x01, C0|C1|C2|C3, "PAPI_ld_ins" }, /* mem_inst_retired.loads */ \ 564 { 0x0b, 0x03, C0|C1|C2|C3, "PAPI_lst_ins" }, \ 565 /* mem_inst_retired.loads and stores */ \ 566 { 0x26, 0xf0, C0|C1|C2|C3, "PAPI_prf_dm" }, /* l2_data_rqsts.prefetch.mesi */ \ 567 { 0x0b, 0x02, C0|C1|C2|C3, "PAPI_sr_ins" }, /* mem_inst_retired.stores */ \ 568 { 0x49, 0x01, C0|C1|C2|C3, "PAPI_tlb_dm" }, /* dtlb_misses.any */ \ 569 { 0x85, 0x01, C0|C1|C2|C3, "PAPI_tlb_im" } /* itlb_misses.any */ 570 571 572 #define EVENTS_FAM6_NHM \ 573 \ 574 { 0x80, 0x04, C0|C1|C2|C3, "l1i.cycles_stalled" }, \ 575 { 0x80, 0x01, C0|C1|C2|C3, "l1i.hits" }, \ 576 { 0x80, 0x02, C0|C1|C2|C3, "l1i.misses" }, \ 577 \ 578 { 0x80, 0x03, C0|C1|C2|C3, "l1i.reads" }, \ 579 { 0x82, 0x01, C0|C1|C2|C3, "large_itlb.hit" }, \ 580 { 0x87, 0x0F, C0|C1|C2|C3, "ild_stall.any" }, \ 581 \ 582 { 0x87, 0x04, C0|C1|C2|C3, "ild_stall.iq_full" }, \ 583 { 0x87, 0x01, C0|C1|C2|C3, "ild_stall.lcp" }, \ 584 { 0x87, 0x02, C0|C1|C2|C3, "ild_stall.mru" }, \ 585 \ 586 { 0x87, 0x08, C0|C1|C2|C3, "ild_stall.regen" }, \ 587 { 0xE6, 0x02, C0|C1|C2|C3, "baclear.bad_target" }, \ 588 { 0xE6, 0x01, C0|C1|C2|C3, "baclear.clear" }, \ 589 \ 590 { 0xE8, 0x01, C0|C1|C2|C3, "bpu_clears.early" }, \ 591 { 0xE8, 0x02, C0|C1|C2|C3, "bpu_clears.late" }, \ 592 { 0xE5, 0x01, C0|C1|C2|C3, "bpu_missed_call_ret" }, \ 593 \ 594 { 0xE0, 0x01, C0|C1|C2|C3, "br_inst_decoded" }, \ 595 { 0x88, 0x7F, C0|C1|C2|C3, "br_inst_exec.any" }, \ 596 { 0x88, 0x01, C0|C1|C2|C3, "br_inst_exec.cond" }, \ 597 \ 598 { 0x88, 0x02, C0|C1|C2|C3, "br_inst_exec.direct" }, \ 599 { 0x88, 0x10, C0|C1|C2|C3, "br_inst_exec.direct_near_call" }, \ 600 { 0x88, 0x20, C0|C1|C2|C3, "br_inst_exec.indirect_near_call" }, \ 601 \ 602 { 0x88, 0x04, C0|C1|C2|C3, "br_inst_exec.indirect_non_call" }, \ 603 { 0x88, 0x30, C0|C1|C2|C3, "br_inst_exec.near_calls" }, \ 604 { 0x88, 0x07, C0|C1|C2|C3, "br_inst_exec.non_calls" }, \ 605 \ 606 { 0x88, 0x08, C0|C1|C2|C3, "br_inst_exec.return_near" }, \ 607 { 0x88, 0x40, C0|C1|C2|C3, "br_inst_exec.taken" }, \ 608 { 0x89, 0x7F, C0|C1|C2|C3, "br_misp_exec.any" }, \ 609 \ 610 { 0x89, 0x01, C0|C1|C2|C3, "br_misp_exec.cond" }, \ 611 { 0x89, 0x02, C0|C1|C2|C3, "br_misp_exec.direct" }, \ 612 { 0x89, 0x10, C0|C1|C2|C3, "br_misp_exec.direct_near_call" }, \ 613 \ 614 { 0x89, 0x20, C0|C1|C2|C3, "br_misp_exec.indirect_near_call" }, \ 615 { 0x89, 0x04, C0|C1|C2|C3, "br_misp_exec.indirect_non_call" }, \ 616 { 0x89, 0x30, C0|C1|C2|C3, "br_misp_exec.near_calls" }, \ 617 \ 618 { 0x89, 0x07, C0|C1|C2|C3, "br_misp_exec.non_calls" }, \ 619 { 0x89, 0x08, C0|C1|C2|C3, "br_misp_exec.return_near" }, \ 620 { 0x89, 0x40, C0|C1|C2|C3, "br_misp_exec.taken" }, \ 621 \ 622 { 0x17, 0x01, C0|C1|C2|C3, "inst_queue_writes" }, \ 623 { 0x1E, 0x01, C0|C1|C2|C3, "inst_queue_write_cycles" }, \ 624 { 0xA7, 0x01, C0|C1|C2|C3, "baclear_force_iq" }, \ 625 \ 626 { 0xD0, 0x01, C0|C1|C2|C3, "macro_insts.decoded" }, \ 627 { 0xA6, 0x01, C0|C1|C2|C3, "macro_insts.fusions_decoded" }, \ 628 { 0x19, 0x01, C0|C1|C2|C3, "two_uop_insts_decoded" }, \ 629 \ 630 { 0x18, 0x01, C0|C1|C2|C3, "inst_decoded.dec0" }, \ 631 { 0xD1, 0x04, C0|C1|C2|C3, "uops_decoded.esp_folding" }, \ 632 { 0xD1, 0x08, C0|C1|C2|C3, "uops_decoded.esp_sync" }, \ 633 \ 634 { 0xD1, 0x02, C0|C1|C2|C3, "uops_decoded.ms" }, \ 635 { 0x20, 0x01, C0|C1|C2|C3, "lsd_overflow" }, \ 636 { 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.any" }, \ 637 \ 638 { 0x0E, 0x02, C0|C1|C2|C3, "uops_issued.fused" }, \ 639 { 0xA2, 0x20, C0|C1|C2|C3, "resource_stalls.fpcw" }, \ 640 { 0xA2, 0x02, C0|C1|C2|C3, "resource_stalls.load" }, \ 641 \ 642 { 0xA2, 0x40, C0|C1|C2|C3, "resource_stalls.mxcsr" }, \ 643 { 0xA2, 0x04, C0|C1|C2|C3, "resource_stalls.rs_full" }, \ 644 { 0xA2, 0x08, C0|C1|C2|C3, "resource_stalls.store" }, \ 645 \ 646 { 0xA2, 0x01, C0|C1|C2|C3, "resource_stalls.any" }, \ 647 { 0xD2, 0x01, C0|C1|C2|C3, "rat_stalls.flags" }, \ 648 { 0xD2, 0x02, C0|C1|C2|C3, "rat_stalls.registers" }, \ 649 \ 650 { 0xD2, 0x04, C0|C1|C2|C3, "rat_stalls.rob_read_port" }, \ 651 { 0xD2, 0x08, C0|C1|C2|C3, "rat_stalls.scoreboard" }, \ 652 { 0xD2, 0x0F, C0|C1|C2|C3, "rat_stalls.any" }, \ 653 \ 654 { 0xD4, 0x01, C0|C1|C2|C3, "seg_rename_stalls" }, \ 655 { 0xD5, 0x01, C0|C1|C2|C3, "es_reg_renames" }, \ 656 { 0x10, 0x02, C0|C1|C2|C3, "fp_comp_ops_exe.mmx" }, \ 657 \ 658 { 0x10, 0x80, C0|C1|C2|C3, "fp_comp_ops_exe.sse_double_precision" }, \ 659 { 0x10, 0x04, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp" }, \ 660 { 0x10, 0x10, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_packed" }, \ 661 \ 662 { 0x10, 0x20, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_scalar" }, \ 663 { 0x10, 0x40, C0|C1|C2|C3, "fp_comp_ops_exe.sse_single_precision" }, \ 664 { 0x10, 0x08, C0|C1|C2|C3, "fp_comp_ops_exe.sse2_integer" }, \ 665 \ 666 { 0x10, 0x01, C0|C1|C2|C3, "fp_comp_ops_exe.x87" }, \ 667 { 0x14, 0x01, C0|C1|C2|C3, "arith.cycles_div_busy" }, \ 668 { 0x14, 0x02, C0|C1|C2|C3, "arith.mul" }, \ 669 \ 670 { 0x12, 0x04, C0|C1|C2|C3, "simd_int_128.pack" }, \ 671 { 0x12, 0x20, C0|C1|C2|C3, "simd_int_128.packed_arith" }, \ 672 { 0x12, 0x10, C0|C1|C2|C3, "simd_int_128.packed_logical" }, \ 673 \ 674 { 0x12, 0x01, C0|C1|C2|C3, "simd_int_128.packed_mpy" }, \ 675 { 0x12, 0x02, C0|C1|C2|C3, "simd_int_128.packed_shift" }, \ 676 { 0x12, 0x40, C0|C1|C2|C3, "simd_int_128.shuffle_move" }, \ 677 \ 678 { 0x12, 0x08, C0|C1|C2|C3, "simd_int_128.unpack" }, \ 679 { 0xFD, 0x04, C0|C1|C2|C3, "simd_int_64.pack" }, \ 680 { 0xFD, 0x20, C0|C1|C2|C3, "simd_int_64.packed_arith" }, \ 681 \ 682 { 0xFD, 0x10, C0|C1|C2|C3, "simd_int_64.packed_logical" }, \ 683 { 0xFD, 0x01, C0|C1|C2|C3, "simd_int_64.packed_mpy" }, \ 684 { 0xFD, 0x02, C0|C1|C2|C3, "simd_int_64.packed_shift" }, \ 685 \ 686 { 0xFD, 0x40, C0|C1|C2|C3, "simd_int_64.shuffle_move" }, \ 687 { 0xFD, 0x08, C0|C1|C2|C3, "simd_int_64.unpack" }, \ 688 { 0xB1, 0x01, C0|C1|C2|C3, "uops_executed.port0" }, \ 689 \ 690 { 0xB1, 0x02, C0|C1|C2|C3, "uops_executed.port1" }, \ 691 { 0x40, 0x04, C0|C1, "l1d_cache_ld.e_state" }, \ 692 { 0x40, 0x01, C0|C1, "l1d_cache_ld.i_state" }, \ 693 \ 694 { 0x40, 0x08, C0|C1, "l1d_cache_ld.m_state" }, \ 695 { 0x40, 0x0F, C0|C1, "l1d_cache_ld.mesi" }, \ 696 { 0x40, 0x02, C0|C1, "l1d_cache_ld.s_state" }, \ 697 \ 698 { 0x41, 0x04, C0|C1, "l1d_cache_st.e_state" }, \ 699 { 0x41, 0x08, C0|C1, "l1d_cache_st.m_state" }, \ 700 { 0x41, 0x0F, C0|C1, "l1d_cache_st.mesi" }, \ 701 \ 702 { 0x41, 0x02, C0|C1, "l1d_cache_st.s_state" }, \ 703 { 0x42, 0x04, C0|C1, "l1d_cache_lock.e_state" }, \ 704 { 0x42, 0x01, C0|C1, "l1d_cache_lock.hit" }, \ 705 \ 706 { 0x42, 0x08, C0|C1, "l1d_cache_lock.m_state" }, \ 707 { 0x42, 0x02, C0|C1, "l1d_cache_lock.s_state" }, \ 708 { 0x43, 0x01, C0|C1, "l1d_all_ref.any" }, \ 709 \ 710 { 0x43, 0x02, C0|C1, "l1d_all_ref.cacheable" }, \ 711 { 0x4B, 0x01, C0|C1, "mmx2_mem_exec.nta" }, \ 712 { 0x4C, 0x01, C0|C1, "load_hit_pre" }, \ 713 \ 714 { 0x4E, 0x02, C0|C1, "l1d_prefetch.miss" }, \ 715 { 0x4E, 0x01, C0|C1, "l1d_prefetch.requests" }, \ 716 { 0x4E, 0x04, C0|C1, "l1d_prefetch.triggers" }, \ 717 \ 718 { 0x51, 0x04, C0|C1, "l1d.m_evict" }, \ 719 { 0x51, 0x02, C0|C1, "l1d.m_repl" }, \ 720 { 0x51, 0x08, C0|C1, "l1d.m_snoop_evict" }, \ 721 \ 722 { 0x51, 0x01, C0|C1, "l1d.repl" }, \ 723 { 0x52, 0x01, C0|C1, "l1d_cache_prefetch_lock_fb_hit" }, \ 724 { 0x53, 0x01, C0|C1, "l1d_cache_lock_fb_hit" }, \ 725 \ 726 { 0x63, 0x02, C0|C1, "cache_lock_cycles.l1d" }, \ 727 { 0x63, 0x01, C0|C1, "cache_lock_cycles.l1d_l2" }, \ 728 { 0x06, 0x04, C0|C1|C2|C3, "store_blocks.at_ret" }, \ 729 \ 730 { 0x06, 0x08, C0|C1|C2|C3, "store_blocks.l1d_block" }, \ 731 { 0x06, 0x01, C0|C1|C2|C3, "store_blocks.not_sta" }, \ 732 { 0x06, 0x02, C0|C1|C2|C3, "store_blocks.sta" }, \ 733 \ 734 { 0x13, 0x07, C0|C1|C2|C3, "load_dispatch.any" }, \ 735 { 0x13, 0x04, C0|C1|C2|C3, "load_dispatch.mob" }, \ 736 { 0x13, 0x01, C0|C1|C2|C3, "load_dispatch.rs" }, \ 737 \ 738 { 0x13, 0x02, C0|C1|C2|C3, "load_dispatch.rs_delayed" }, \ 739 { 0x08, 0x01, C0|C1|C2|C3, "dtlb_load_misses.any" }, \ 740 { 0x08, 0x20, C0|C1|C2|C3, "dtlb_load_misses.pde_miss" }, \ 741 \ 742 { 0x08, 0x02, C0|C1|C2|C3, "dtlb_load_misses.walk_completed" }, \ 743 { 0x49, 0x01, C0|C1|C2|C3, "dtlb_misses.any" }, \ 744 { 0x49, 0x10, C0|C1|C2|C3, "dtlb_misses.stlb_hit" }, \ 745 \ 746 { 0x49, 0x02, C0|C1|C2|C3, "dtlb_misses.walk_completed" }, \ 747 { 0x4F, 0x02, C0|C1|C2|C3, "ept.epde_miss" }, \ 748 { 0x4F, 0x08, C0|C1|C2|C3, "ept.epdpe_miss" }, \ 749 \ 750 { 0x85, 0x01, C0|C1|C2|C3, "itlb_misses.any" }, \ 751 { 0x85, 0x02, C0|C1|C2|C3, "itlb_misses.walk_completed" }, \ 752 { 0x24, 0xAA, C0|C1|C2|C3, "l2_rqsts.miss" }, \ 753 \ 754 { 0x24, 0xFF, C0|C1|C2|C3, "l2_rqsts.references" }, \ 755 { 0x24, 0x10, C0|C1|C2|C3, "l2_rqsts.ifetch_hit" }, \ 756 { 0x24, 0x20, C0|C1|C2|C3, "l2_rqsts.ifetch_miss" }, \ 757 \ 758 { 0x24, 0x30, C0|C1|C2|C3, "l2_rqsts.ifetches" }, \ 759 { 0x24, 0x01, C0|C1|C2|C3, "l2_rqsts.ld_hit" }, \ 760 { 0x24, 0x02, C0|C1|C2|C3, "l2_rqsts.ld_miss" }, \ 761 \ 762 { 0x24, 0x03, C0|C1|C2|C3, "l2_rqsts.loads" }, \ 763 { 0x24, 0x40, C0|C1|C2|C3, "l2_rqsts.prefetch_hit" }, \ 764 { 0x24, 0x80, C0|C1|C2|C3, "l2_rqsts.prefetch_miss" }, \ 765 \ 766 { 0x24, 0xC0, C0|C1|C2|C3, "l2_rqsts.prefetches" }, \ 767 { 0x24, 0x04, C0|C1|C2|C3, "l2_rqsts.rfo_hit" }, \ 768 { 0x24, 0x08, C0|C1|C2|C3, "l2_rqsts.rfo_miss" }, \ 769 \ 770 { 0x24, 0x0C, C0|C1|C2|C3, "l2_rqsts.rfos" }, \ 771 { 0x26, 0xFF, C0|C1|C2|C3, "l2_data_rqsts.any" }, \ 772 { 0x26, 0x04, C0|C1|C2|C3, "l2_data_rqsts.demand.e_state" }, \ 773 \ 774 { 0x26, 0x01, C0|C1|C2|C3, "l2_data_rqsts.demand.i_state" }, \ 775 { 0x26, 0x08, C0|C1|C2|C3, "l2_data_rqsts.demand.m_state" }, \ 776 { 0x26, 0x0F, C0|C1|C2|C3, "l2_data_rqsts.demand.mesi" }, \ 777 \ 778 { 0x26, 0x02, C0|C1|C2|C3, "l2_data_rqsts.demand.s_state" }, \ 779 { 0x26, 0x40, C0|C1|C2|C3, "l2_data_rqsts.prefetch.e_state" }, \ 780 { 0x26, 0x10, C0|C1|C2|C3, "l2_data_rqsts.prefetch.i_state" }, \ 781 \ 782 { 0x26, 0x80, C0|C1|C2|C3, "l2_data_rqsts.prefetch.m_state" }, \ 783 { 0x26, 0xF0, C0|C1|C2|C3, "l2_data_rqsts.prefetch.mesi" }, \ 784 { 0x26, 0x20, C0|C1|C2|C3, "l2_data_rqsts.prefetch.s_state" }, \ 785 \ 786 { 0x27, 0x40, C0|C1|C2|C3, "l2_write.lock.e_state" }, \ 787 { 0x27, 0x10, C0|C1|C2|C3, "l2_write.lock.i_state" }, \ 788 { 0x27, 0x20, C0|C1|C2|C3, "l2_write.lock.s_state" }, \ 789 \ 790 { 0x27, 0x0E, C0|C1|C2|C3, "l2_write.rfo.hit" }, \ 791 { 0x27, 0x01, C0|C1|C2|C3, "l2_write.rfo.i_state" }, \ 792 { 0x27, 0x08, C0|C1|C2|C3, "l2_write.rfo.m_state" }, \ 793 \ 794 { 0x27, 0x0F, C0|C1|C2|C3, "l2_write.rfo.mesi" }, \ 795 { 0x27, 0x02, C0|C1|C2|C3, "l2_write.rfo.s_state" }, \ 796 { 0x28, 0x04, C0|C1|C2|C3, "l1d_wb_l2.e_state" }, \ 797 \ 798 { 0x28, 0x01, C0|C1|C2|C3, "l1d_wb_l2.i_state" }, \ 799 { 0x28, 0x08, C0|C1|C2|C3, "l1d_wb_l2.m_state" }, \ 800 { 0xF0, 0x80, C0|C1|C2|C3, "l2_transactions.any" }, \ 801 \ 802 { 0xF0, 0x20, C0|C1|C2|C3, "l2_transactions.fill" }, \ 803 { 0xF0, 0x04, C0|C1|C2|C3, "l2_transactions.ifetch" }, \ 804 { 0xF0, 0x10, C0|C1|C2|C3, "l2_transactions.l1d_wb" }, \ 805 \ 806 { 0xF0, 0x01, C0|C1|C2|C3, "l2_transactions.load" }, \ 807 { 0xF0, 0x08, C0|C1|C2|C3, "l2_transactions.prefetch" }, \ 808 { 0xF0, 0x02, C0|C1|C2|C3, "l2_transactions.rfo" }, \ 809 \ 810 { 0xF0, 0x40, C0|C1|C2|C3, "l2_transactions.wb" }, \ 811 { 0xF1, 0x07, C0|C1|C2|C3, "l2_lines_in.any" }, \ 812 { 0xF1, 0x04, C0|C1|C2|C3, "l2_lines_in.e_state" }, \ 813 \ 814 { 0xF1, 0x02, C0|C1|C2|C3, "l2_lines_in.s_state" }, \ 815 { 0xF2, 0x0F, C0|C1|C2|C3, "l2_lines_out.any" }, \ 816 { 0xF2, 0x01, C0|C1|C2|C3, "l2_lines_out.demand_clean" }, \ 817 \ 818 { 0xF2, 0x02, C0|C1|C2|C3, "l2_lines_out.demand_dirty" }, \ 819 { 0xF2, 0x04, C0|C1|C2|C3, "l2_lines_out.prefetch_clean" }, \ 820 { 0x6C, 0x01, C0|C1|C2|C3, "io_transactions" }, \ 821 \ 822 { 0xB0, 0x80, C0|C1|C2|C3, "offcore_requests.any" }, \ 823 { 0xB0, 0x10, C0|C1|C2|C3, "offcore_requests.any.rfo" }, \ 824 { 0xB0, 0x40, C0|C1|C2|C3, "offcore_requests.l1d_writeback" }, \ 825 \ 826 { 0xB8, 0x01, C0|C1|C2|C3, "snoop_response.hit" }, \ 827 { 0xB8, 0x02, C0|C1|C2|C3, "snoop_response.hite" }, \ 828 { 0xB8, 0x04, C0|C1|C2|C3, "snoop_response.hitm" }, \ 829 \ 830 { 0xF4, 0x10, C0|C1|C2|C3, "sq_misc.split_lock" }, \ 831 { 0x0B, 0x01, C0|C1|C2|C3, "mem_inst_retired.loads" }, \ 832 { 0x0B, 0x02, C0|C1|C2|C3, "mem_inst_retired.stores" }, \ 833 \ 834 { 0xC0, 0x04, C0|C1|C2|C3, "inst_retired.mmx" }, \ 835 { 0xC0, 0x02, C0|C1|C2|C3, "inst_retired.x87" }, \ 836 { 0xC7, 0x04, C0|C1|C2|C3, "ssex_uops_retired.packed_double" }, \ 837 \ 838 { 0xC7, 0x01, C0|C1|C2|C3, "ssex_uops_retired.packed_single" }, \ 839 { 0xC7, 0x08, C0|C1|C2|C3, "ssex_uops_retired.scalar_double" }, \ 840 { 0xC7, 0x02, C0|C1|C2|C3, "ssex_uops_retired.scalar_single" }, \ 841 \ 842 { 0xC7, 0x10, C0|C1|C2|C3, "ssex_uops_retired.vector_integer" }, \ 843 { 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.any" }, \ 844 { 0xC2, 0x04, C0|C1|C2|C3, "uops_retired.macro_fused" }, \ 845 \ 846 { 0xC8, 0x20, C0|C1|C2|C3, "itlb_miss_retired" }, \ 847 { 0xCB, 0x80, C0|C1|C2|C3, "mem_load_retired.dtlb_miss" }, \ 848 { 0xCB, 0x40, C0|C1|C2|C3, "mem_load_retired.hit_lfb" }, \ 849 \ 850 { 0xCB, 0x01, C0|C1|C2|C3, "mem_load_retired.l1d_hit" }, \ 851 { 0xCB, 0x02, C0|C1|C2|C3, "mem_load_retired.l2_hit" }, \ 852 { 0xCB, 0x10, C0|C1|C2|C3, "mem_load_retired.llc_miss" }, \ 853 \ 854 { 0xCB, 0x04, C0|C1|C2|C3, "mem_load_retired.llc_unshared_hit" }, \ 855 { 0xCB, 0x08, C0|C1|C2|C3, "mem_load_retired.other_core_l2_hit_hitm" }, \ 856 { 0x0F, 0x02, C0|C1|C2|C3, "mem_uncore_retired.other_core_l2_hitm" }, \ 857 \ 858 { 0x0F, 0x08, C0|C1|C2|C3, "mem_uncore_retired.remote_cache_local_home_hit" },\ 859 { 0x0F, 0x10, C0|C1|C2|C3, "mem_uncore_retired.remote_dram" }, \ 860 { 0x0F, 0x20, C0|C1|C2|C3, "mem_uncore_retired.local_dram" }, \ 861 \ 862 { 0x0C, 0x01, C0|C1|C2|C3, "mem_store_retired.dtlb_miss" }, \ 863 { 0xC4, 0x01, C0|C1|C2|C3, "br_inst_retired.conditional" }, \ 864 { 0xC4, 0x02, C0|C1|C2|C3, "br_inst_retired.near_call" }, \ 865 \ 866 { 0xC5, 0x02, C0|C1|C2|C3, "br_misp_retired.near_call" }, \ 867 { 0xDB, 0x01, C0|C1|C2|C3, "uop_unfusion" }, \ 868 { 0xF7, 0x01, C0|C1|C2|C3, "fp_assist.all" }, \ 869 \ 870 { 0xF7, 0x04, C0|C1|C2|C3, "fp_assist.input" }, \ 871 { 0xF7, 0x02, C0|C1|C2|C3, "fp_assist.output" }, \ 872 { 0xCC, 0x03, C0|C1|C2|C3, "fp_mmx_trans.any" }, \ 873 \ 874 { 0xCC, 0x01, C0|C1|C2|C3, "fp_mmx_trans.to_fp" }, \ 875 { 0xCC, 0x02, C0|C1|C2|C3, "fp_mmx_trans.to_mmx" }, \ 876 { 0xC3, 0x04, C0|C1|C2|C3, "machine_clears.smc" } 877 878 #define GENERICEVENTS_FAM6_MOD28 \ 879 { 0xc4, 0x00, C0|C1, "PAPI_br_ins" }, /* br_inst_retired.any */ \ 880 { 0xc5, 0x00, C0|C1, "PAPI_br_msp" }, /* br_inst_retired.mispred */ \ 881 { 0xc4, 0x03, C0|C1, "PAPI_br_ntk" }, \ 882 /* br_inst_retired.pred_not_taken|mispred_not_taken */ \ 883 { 0xc4, 0x05, C0|C1, "PAPI_br_prc" }, \ 884 /* br_inst_retired.pred_not_taken|pred_taken */ \ 885 { 0xc8, 0x00, C0|C1, "PAPI_hw_int" }, /* hw_int_rcv */ \ 886 { 0xaa, 0x03, C0|C1, "PAPI_tot_iis" }, /* macro_insts.all_decoded */ \ 887 { 0x40, 0x23, C0|C1, "PAPI_l1_dca" }, /* l1d_cache.l1|st */ \ 888 { 0x2a, 0x41, C0|C1, "PAPI_l2_stm" }, /* l2_st.self.i_state */ \ 889 { 0x2e, 0x4f, C0|C1, "PAPI_l2_tca" }, /* longest_lat_cache.reference */ \ 890 { 0x2e, 0x4e, C0|C1, "PAPI_l2_tch" }, /* l2_rqsts.mes */ \ 891 { 0x2e, 0x41, C0|C1, "PAPI_l2_tcm" }, /* longest_lat_cache.miss */ \ 892 { 0x2a, 0x4f, C0|C1, "PAPI_l2_tcw" }, /* l2_st.self.mesi */ \ 893 { 0x08, 0x07, C0|C1, "PAPI_tlb_dm" }, /* data_tlb_misses.dtlb.miss */ \ 894 { 0x82, 0x02, C0|C1, "PAPI_tlb_im" } /* itlb.misses */ 895 896 897 #define EVENTS_FAM6_MOD28 \ 898 { 0x2, 0x81, C0|C1, "store_forwards.good" }, \ 899 { 0x6, 0x0, C0|C1, "segment_reg_loads.any" }, \ 900 { 0x7, 0x1, C0|C1, "prefetch.prefetcht0" }, \ 901 { 0x7, 0x6, C0|C1, "prefetch.sw_l2" }, \ 902 { 0x7, 0x8, C0|C1, "prefetch.prefetchnta" }, \ 903 { 0x8, 0x7, C0|C1, "data_tlb_misses.dtlb_miss" }, \ 904 { 0x8, 0x5, C0|C1, "data_tlb_misses.dtlb_miss_ld" }, \ 905 { 0x8, 0x9, C0|C1, "data_tlb_misses.l0_dtlb_miss_ld" }, \ 906 { 0x8, 0x6, C0|C1, "data_tlb_misses.dtlb_miss_st" }, \ 907 { 0xC, 0x3, C0|C1, "page_walks.cycles" }, \ 908 { 0x10, 0x1, C0|C1, "x87_comp_ops_exe.any.s" }, \ 909 { 0x10, 0x81, C0|C1, "x87_comp_ops_exe.any.ar" }, \ 910 { 0x11, 0x1, C0|C1, "fp_assist" }, \ 911 { 0x11, 0x81, C0|C1, "fp_assist.ar" }, \ 912 { 0x12, 0x1, C0|C1, "mul.s" }, \ 913 { 0x12, 0x81, C0|C1, "mul.ar" }, \ 914 { 0x13, 0x1, C0|C1, "div.s" }, \ 915 { 0x13, 0x81, C0|C1, "div.ar" }, \ 916 { 0x14, 0x1, C0|C1, "cycles_div_busy" }, \ 917 { 0x21, 0x0, C0|C1, "l2_ads" }, \ 918 { 0x22, 0x0, C0|C1, "l2_dbus_busy" }, \ 919 { 0x24, 0x0, C0|C1, "l2_lines_in" }, \ 920 { 0x25, 0x0, C0|C1, "l2_m_lines_in" }, \ 921 { 0x26, 0x0, C0|C1, "l2_lines_out" }, \ 922 { 0x27, 0x0, C0|C1, "l2_m_lines_out" }, \ 923 { 0x28, 0x0, C0|C1, "l2_ifetch" }, \ 924 { 0x29, 0x0, C0|C1, "l2_ld" }, \ 925 { 0x2A, 0x0, C0|C1, "l2_st" }, \ 926 { 0x2B, 0x0, C0|C1, "l2_lock" }, \ 927 { 0x2E, 0x0, C0|C1, "l2_rqsts" }, \ 928 { 0x2E, 0x41, C0|C1, "l2_rqsts.self.demand.i_state" }, \ 929 { 0x2E, 0x4F, C0|C1, "l2_rqsts.self.demand.mesi" }, \ 930 { 0x30, 0x0, C0|C1, "l2_reject_bus_q" }, \ 931 { 0x32, 0x0, C0|C1, "l2_no_req" }, \ 932 { 0x3A, 0x0, C0|C1, "eist_trans" }, \ 933 { 0x3B, 0xC0, C0|C1, "thermal_trip" }, \ 934 { 0x3C, 0x0, C0|C1, "cpu_clk_unhalted.core_p" }, \ 935 { 0x3C, 0x1, C0|C1, "cpu_clk_unhalted.bus" }, \ 936 { 0x3C, 0x2, C0|C1, "cpu_clk_unhalted.no_other" }, \ 937 { 0x40, 0x21, C0|C1, "l1d_cache.ld" }, \ 938 { 0x40, 0x22, C0|C1, "l1d_cache.st" }, \ 939 { 0x60, 0x0, C0|C1, "bus_request_outstanding" }, \ 940 { 0x61, 0x0, C0|C1, "bus_bnr_drv" }, \ 941 { 0x62, 0x0, C0|C1, "bus_drdy_clocks" }, \ 942 { 0x63, 0x0, C0|C1, "bus_lock_clocks" }, \ 943 { 0x64, 0x0, C0|C1, "bus_data_rcv" }, \ 944 { 0x65, 0x0, C0|C1, "bus_trans_brd" }, \ 945 { 0x66, 0x0, C0|C1, "bus_trans_rfo" }, \ 946 { 0x67, 0x0, C0|C1, "bus_trans_wb" }, \ 947 { 0x68, 0x0, C0|C1, "bus_trans_ifetch" }, \ 948 { 0x69, 0x0, C0|C1, "bus_trans_inval" }, \ 949 { 0x6A, 0x0, C0|C1, "bus_trans_pwr" }, \ 950 { 0x6B, 0x0, C0|C1, "bus_trans_p" }, \ 951 { 0x6C, 0x0, C0|C1, "bus_trans_io" }, \ 952 { 0x6D, 0x0, C0|C1, "bus_trans_def" }, \ 953 { 0x6E, 0x0, C0|C1, "bus_trans_burst" }, \ 954 { 0x6F, 0x0, C0|C1, "bus_trans_mem" }, \ 955 { 0x70, 0x0, C0|C1, "bus_trans_any" }, \ 956 { 0x77, 0x0, C0|C1, "ext_snoop" }, \ 957 { 0x7A, 0x0, C0|C1, "bus_hit_drv" }, \ 958 { 0x7B, 0x0, C0|C1, "bus_hitm_drv" }, \ 959 { 0x7D, 0x0, C0|C1, "busq_empty" }, \ 960 { 0x7E, 0x0, C0|C1, "snoop_stall_drv" }, \ 961 { 0x7F, 0x0, C0|C1, "bus_io_wait" }, \ 962 { 0x80, 0x3, C0|C1, "icache.accesses" }, \ 963 { 0x80, 0x2, C0|C1, "icache.misses" }, \ 964 { 0x82, 0x4, C0|C1, "itlb.flush" }, \ 965 { 0x82, 0x2, C0|C1, "itlb.misses" }, \ 966 { 0xAA, 0x2, C0|C1, "macro_insts.cisc_decoded" }, \ 967 { 0xAA, 0x3, C0|C1, "macro_insts.all_decoded" }, \ 968 { 0xB0, 0x0, C0|C1, "simd_uops_exec.s" }, \ 969 { 0xB0, 0x80, C0|C1, "simd_uops_exec.ar" }, \ 970 { 0xB1, 0x0, C0|C1, "simd_sat_uop_exec.s" }, \ 971 { 0xB1, 0x80, C0|C1, "simd_sat_uop_exec.ar" }, \ 972 { 0xB3, 0x1, C0|C1, "simd_uop_type_exec.mul.s" }, \ 973 { 0xB3, 0x81, C0|C1, "simd_uop_type_exec.mul.ar" }, \ 974 { 0xB3, 0x02, C0|C1, "simd_uop_type_exec.shift.s" }, \ 975 { 0xB3, 0x82, C0|C1, "simd_uop_type_exec.shift.ar" }, \ 976 { 0xB3, 0x04, C0|C1, "simd_uop_type_exec.pack.s" }, \ 977 { 0xB3, 0x84, C0|C1, "simd_uop_type_exec.pack.ar" }, \ 978 { 0xB3, 0x08, C0|C1, "simd_uop_type_exec.unpack.s" }, \ 979 { 0xB3, 0x88, C0|C1, "simd_uop_type_exec.unpack.ar" }, \ 980 { 0xB3, 0x10, C0|C1, "simd_uop_type_exec.logical.s" }, \ 981 { 0xB3, 0x90, C0|C1, "simd_uop_type_exec.logical.ar" }, \ 982 { 0xB3, 0x20, C0|C1, "simd_uop_type_exec.arithmetic.s" }, \ 983 { 0xB3, 0xA0, C0|C1, "simd_uop_type_exec.arithmetic.ar" }, \ 984 { 0xC2, 0x10, C0|C1, "uops_retired.any" }, \ 985 { 0xC3, 0x1, C0|C1, "machine_clears.smc" }, \ 986 { 0xC4, 0x0, C0|C1, "br_inst_retired.any" }, \ 987 { 0xC4, 0x1, C0|C1, "br_inst_retired.pred_not_taken" }, \ 988 { 0xC4, 0x2, C0|C1, "br_inst_retired.mispred_not_taken" }, \ 989 { 0xC4, 0x4, C0|C1, "br_inst_retired.pred_taken" }, \ 990 { 0xC4, 0x8, C0|C1, "br_inst_retired.mispred_taken" }, \ 991 { 0xC4, 0xA, C0|C1, "br_inst_retired.mispred" }, \ 992 { 0xC4, 0xC, C0|C1, "br_inst_retired.taken" }, \ 993 { 0xC4, 0xF, C0|C1, "br_inst_retired.any1" }, \ 994 { 0xC6, 0x1, C0|C1, "cycles_int_masked.cycles_int_masked" }, \ 995 { 0xC6, 0x2, C0|C1, \ 996 "cycles_int_masked.cycles_int_pending_and_masked" }, \ 997 { 0xC7, 0x1, C0|C1, "simd_inst_retired.packed_single" }, \ 998 { 0xC7, 0x2, C0|C1, "simd_inst_retired.scalar_single" }, \ 999 { 0xC7, 0x4, C0|C1, "simd_inst_retired.packed_double" }, \ 1000 { 0xC7, 0x8, C0|C1, "simd_inst_retired.scalar_double" }, \ 1001 { 0xC7, 0x10, C0|C1, "simd_inst_retired.vector" }, \ 1002 { 0xC7, 0x1F, C0|C1, "simd_inst_retired.any" }, \ 1003 { 0xC8, 0x00, C0|C1, "hw_int_rcv" }, \ 1004 { 0xCA, 0x1, C0|C1, "simd_comp_inst_retired.packed_single" }, \ 1005 { 0xCA, 0x2, C0|C1, "simd_comp_inst_retired.scalar_single" }, \ 1006 { 0xCA, 0x4, C0|C1, "simd_comp_inst_retired.packed_double" }, \ 1007 { 0xCA, 0x8, C0|C1, "simd_comp_inst_retired.scalar_double" }, \ 1008 { 0xCB, 0x1, C0|C1, "mem_load_retired.l2_hit" }, \ 1009 { 0xCB, 0x2, C0|C1, "mem_load_retired.l2_miss" }, \ 1010 { 0xCB, 0x4, C0|C1, "mem_load_retired.dtlb_miss" }, \ 1011 { 0xCD, 0x0, C0|C1, "simd_assist" }, \ 1012 { 0xCE, 0x0, C0|C1, "simd_instr_retired" }, \ 1013 { 0xCF, 0x0, C0|C1, "simd_sat_instr_retired" }, \ 1014 { 0xE0, 0x1, C0|C1, "br_inst_decoded" }, \ 1015 { 0xE4, 0x1, C0|C1, "bogus_br" }, \ 1016 { 0xE6, 0x1, C0|C1, "baclears.any" } 1017 1018 static const struct events_table_t *events_table = NULL; 1019 1020 const struct events_table_t events_fam6_nhm[] = { 1021 GENERICEVENTS_FAM6_NHM, 1022 EVENTS_FAM6_NHM, 1023 { NT_END, 0, 0, "" } 1024 }; 1025 1026 const struct events_table_t events_fam6_mod28[] = { 1027 GENERICEVENTS_FAM6_MOD28, 1028 EVENTS_FAM6_MOD28, 1029 { NT_END, 0, 0, "" } 1030 }; 1031 1032 /* 1033 * Initialize string containing list of supported general-purpose counter 1034 * events for processors of Penryn and Merom Family 1035 */ 1036 static void 1037 pcbe_init_core_uarch() 1038 { 1039 const struct nametable_core_uarch *n; 1040 const struct generic_events *k; 1041 const struct nametable_core_uarch *picspecific_events; 1042 const struct generic_events *picspecific_genericevents; 1043 size_t common_size; 1044 size_t size; 1045 uint64_t i; 1046 1047 gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP); 1048 1049 /* Calculate space needed to save all the common event names */ 1050 common_size = 0; 1051 for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END; n++) { 1052 common_size += strlen(n->name) + 1; 1053 } 1054 1055 for (k = cmn_generic_events; k->event_num != NT_END; k++) { 1056 common_size += strlen(k->name) + 1; 1057 } 1058 1059 for (i = 0; i < num_gpc; i++) { 1060 size = 0; 1061 picspecific_genericevents = NULL; 1062 1063 switch (i) { 1064 case 0: 1065 picspecific_events = pic0_events; 1066 picspecific_genericevents = generic_events_pic0; 1067 break; 1068 case 1: 1069 picspecific_events = pic1_events; 1070 break; 1071 default: 1072 picspecific_events = NULL; 1073 break; 1074 } 1075 if (picspecific_events != NULL) { 1076 for (n = picspecific_events; 1077 n->event_num != NT_END; 1078 n++) { 1079 size += strlen(n->name) + 1; 1080 } 1081 } 1082 if (picspecific_genericevents != NULL) { 1083 for (k = picspecific_genericevents; 1084 k->event_num != NT_END; k++) { 1085 size += strlen(k->name) + 1; 1086 } 1087 } 1088 1089 gpc_names[i] = 1090 kmem_alloc(size + common_size + 1, KM_SLEEP); 1091 1092 gpc_names[i][0] = '\0'; 1093 if (picspecific_events != NULL) { 1094 for (n = picspecific_events; 1095 n->event_num != NT_END; n++) { 1096 (void) strcat(gpc_names[i], n->name); 1097 (void) strcat(gpc_names[i], ","); 1098 } 1099 } 1100 if (picspecific_genericevents != NULL) { 1101 for (k = picspecific_genericevents; 1102 k->event_num != NT_END; k++) { 1103 (void) strcat(gpc_names[i], k->name); 1104 (void) strcat(gpc_names[i], ","); 1105 } 1106 } 1107 for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END; 1108 n++) { 1109 (void) strcat(gpc_names[i], n->name); 1110 (void) strcat(gpc_names[i], ","); 1111 } 1112 for (k = cmn_generic_events; k->event_num != NT_END; k++) { 1113 (void) strcat(gpc_names[i], k->name); 1114 (void) strcat(gpc_names[i], ","); 1115 } 1116 1117 /* 1118 * Remove trailing comma. 1119 */ 1120 gpc_names[i][common_size + size - 1] = '\0'; 1121 } 1122 } 1123 1124 static int 1125 core_pcbe_init(void) 1126 { 1127 struct cpuid_regs cp; 1128 size_t size; 1129 uint64_t i; 1130 uint64_t j; 1131 uint64_t arch_events_vector_length; 1132 size_t arch_events_string_length; 1133 uint_t model; 1134 1135 if (cpuid_getvendor(CPU) != X86_VENDOR_Intel) 1136 return (-1); 1137 1138 /* Obtain Basic CPUID information */ 1139 cp.cp_eax = 0x0; 1140 (void) __cpuid_insn(&cp); 1141 1142 /* No Architectural Performance Monitoring Leaf returned by CPUID */ 1143 if (cp.cp_eax < 0xa) { 1144 return (-1); 1145 } 1146 1147 /* Obtain the Architectural Performance Monitoring Leaf */ 1148 cp.cp_eax = 0xa; 1149 (void) __cpuid_insn(&cp); 1150 1151 versionid = cp.cp_eax & 0xFF; 1152 1153 /* 1154 * Fixed-Function Counters (FFC) 1155 * 1156 * All Family 6 Model 15 and Model 23 processors have fixed-function 1157 * counters. These counters were made Architectural with 1158 * Family 6 Model 15 Stepping 9. 1159 */ 1160 switch (versionid) { 1161 1162 case 0: 1163 return (-1); 1164 1165 case 2: 1166 num_ffc = cp.cp_edx & 0x1F; 1167 width_ffc = (cp.cp_edx >> 5) & 0xFF; 1168 1169 /* 1170 * Some processors have an errata (AW34) where 1171 * versionid is reported as 2 when actually 1. 1172 * In this case, fixed-function counters are 1173 * model-specific as in Version 1. 1174 */ 1175 if (num_ffc != 0) { 1176 break; 1177 } 1178 /* FALLTHROUGH */ 1179 case 1: 1180 num_ffc = 3; 1181 width_ffc = 40; 1182 versionid = 1; 1183 break; 1184 1185 default: 1186 num_ffc = cp.cp_edx & 0x1F; 1187 width_ffc = (cp.cp_edx >> 5) & 0xFF; 1188 break; 1189 } 1190 1191 1192 if (num_ffc >= 64) 1193 return (-1); 1194 1195 /* Set HTT-specific names of architectural & FFC events */ 1196 if (is_x86_feature(x86_featureset, X86FSET_HTT)) { 1197 ffc_names = ffc_names_htt; 1198 arch_events_table = arch_events_table_htt; 1199 known_arch_events = 1200 sizeof (arch_events_table_htt) / 1201 sizeof (struct events_table_t); 1202 known_ffc_num = 1203 sizeof (ffc_names_htt) / sizeof (char *); 1204 } else { 1205 ffc_names = ffc_names_non_htt; 1206 arch_events_table = arch_events_table_non_htt; 1207 known_arch_events = 1208 sizeof (arch_events_table_non_htt) / 1209 sizeof (struct events_table_t); 1210 known_ffc_num = 1211 sizeof (ffc_names_non_htt) / sizeof (char *); 1212 } 1213 1214 if (num_ffc >= known_ffc_num) { 1215 /* 1216 * The system seems to have more fixed-function counters than 1217 * what this PCBE is able to handle correctly. Default to the 1218 * maximum number of fixed-function counters that this driver 1219 * is aware of. 1220 */ 1221 num_ffc = known_ffc_num - 1; 1222 } 1223 1224 mask_ffc = BITMASK_XBITS(width_ffc); 1225 control_ffc = BITMASK_XBITS(num_ffc); 1226 1227 /* 1228 * General Purpose Counters (GPC) 1229 */ 1230 num_gpc = (cp.cp_eax >> 8) & 0xFF; 1231 width_gpc = (cp.cp_eax >> 16) & 0xFF; 1232 1233 if (num_gpc >= 64) 1234 return (-1); 1235 1236 mask_gpc = BITMASK_XBITS(width_gpc); 1237 1238 control_gpc = BITMASK_XBITS(num_gpc); 1239 1240 control_mask = (control_ffc << 32) | control_gpc; 1241 1242 total_pmc = num_gpc + num_ffc; 1243 if (total_pmc > 64) { 1244 /* Too wide for the overflow bitmap */ 1245 return (-1); 1246 } 1247 1248 /* FFC names */ 1249 ffc_allnames = kmem_alloc(num_ffc * sizeof (char *), KM_SLEEP); 1250 for (i = 0; i < num_ffc; i++) { 1251 ffc_allnames[i] = kmem_alloc( 1252 strlen(ffc_names[i]) + strlen(ffc_genericnames[i]) + 2, 1253 KM_SLEEP); 1254 1255 ffc_allnames[i][0] = '\0'; 1256 (void) strcat(ffc_allnames[i], ffc_names[i]); 1257 1258 /* Check if this ffc has a generic name */ 1259 if (strcmp(ffc_genericnames[i], "") != 0) { 1260 (void) strcat(ffc_allnames[i], ","); 1261 (void) strcat(ffc_allnames[i], ffc_genericnames[i]); 1262 } 1263 } 1264 1265 /* GPC events for Family 6 Models 15, 23 and 29 only */ 1266 if ((cpuid_getfamily(CPU) == 6) && 1267 ((cpuid_getmodel(CPU) == 15) || (cpuid_getmodel(CPU) == 23) || 1268 (cpuid_getmodel(CPU) == 29))) { 1269 (void) snprintf(core_impl_name, IMPL_NAME_LEN, 1270 "Core Microarchitecture"); 1271 pcbe_init_core_uarch(); 1272 return (0); 1273 } 1274 1275 (void) snprintf(core_impl_name, IMPL_NAME_LEN, 1276 "Intel Arch PerfMon v%d on Family %d Model %d", 1277 versionid, cpuid_getfamily(CPU), cpuid_getmodel(CPU)); 1278 1279 /* 1280 * Architectural events 1281 */ 1282 arch_events_vector_length = (cp.cp_eax >> 24) & 0xFF; 1283 1284 ASSERT(known_arch_events == arch_events_vector_length); 1285 1286 /* 1287 * To handle the case where a new performance monitoring setup is run 1288 * on a non-debug kernel 1289 */ 1290 if (known_arch_events > arch_events_vector_length) { 1291 known_arch_events = arch_events_vector_length; 1292 } else { 1293 arch_events_vector_length = known_arch_events; 1294 } 1295 1296 arch_events_vector = cp.cp_ebx & 1297 BITMASK_XBITS(arch_events_vector_length); 1298 1299 /* 1300 * Process architectural and non-architectural events using GPC 1301 */ 1302 if (num_gpc > 0) { 1303 1304 gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP); 1305 1306 /* Calculate space required for the architectural gpc events */ 1307 arch_events_string_length = 0; 1308 for (i = 0; i < known_arch_events; i++) { 1309 if (((1U << i) & arch_events_vector) == 0) { 1310 arch_events_string_length += 1311 strlen(arch_events_table[i].name) + 1; 1312 if (strcmp(arch_genevents_table[i], "") != 0) { 1313 arch_events_string_length += 1314 strlen(arch_genevents_table[i]) + 1; 1315 } 1316 } 1317 } 1318 1319 /* Non-architectural events list */ 1320 model = cpuid_getmodel(CPU); 1321 switch (model) { 1322 /* Nehalem */ 1323 case 26: 1324 case 30: 1325 case 31: 1326 /* Westmere */ 1327 case 37: 1328 case 44: 1329 /* Nehalem-EX */ 1330 case 46: 1331 case 47: 1332 events_table = events_fam6_nhm; 1333 break; 1334 case 28: 1335 events_table = events_fam6_mod28; 1336 break; 1337 } 1338 1339 for (i = 0; i < num_gpc; i++) { 1340 1341 /* 1342 * Determine length of all supported event names 1343 * (architectural + non-architectural) 1344 */ 1345 size = arch_events_string_length; 1346 for (j = 0; events_table != NULL && 1347 events_table[j].eventselect != NT_END; 1348 j++) { 1349 if (C(i) & events_table[j].supported_counters) { 1350 size += strlen(events_table[j].name) + 1351 1; 1352 } 1353 } 1354 1355 /* Allocate memory for this pics list */ 1356 gpc_names[i] = kmem_alloc(size + 1, KM_SLEEP); 1357 gpc_names[i][0] = '\0'; 1358 if (size == 0) { 1359 continue; 1360 } 1361 1362 /* 1363 * Create the list of all supported events 1364 * (architectural + non-architectural) 1365 */ 1366 for (j = 0; j < known_arch_events; j++) { 1367 if (((1U << j) & arch_events_vector) == 0) { 1368 (void) strcat(gpc_names[i], 1369 arch_events_table[j].name); 1370 (void) strcat(gpc_names[i], ","); 1371 if (strcmp( 1372 arch_genevents_table[j], "") 1373 != 0) { 1374 (void) strcat(gpc_names[i], 1375 arch_genevents_table[j]); 1376 (void) strcat(gpc_names[i], 1377 ","); 1378 } 1379 } 1380 } 1381 1382 for (j = 0; events_table != NULL && 1383 events_table[j].eventselect != NT_END; 1384 j++) { 1385 if (C(i) & events_table[j].supported_counters) { 1386 (void) strcat(gpc_names[i], 1387 events_table[j].name); 1388 (void) strcat(gpc_names[i], ","); 1389 } 1390 } 1391 1392 /* Remove trailing comma */ 1393 gpc_names[i][size - 1] = '\0'; 1394 } 1395 } 1396 1397 return (0); 1398 } 1399 1400 static uint_t core_pcbe_ncounters() 1401 { 1402 return (total_pmc); 1403 } 1404 1405 static const char *core_pcbe_impl_name(void) 1406 { 1407 return (core_impl_name); 1408 } 1409 1410 static const char *core_pcbe_cpuref(void) 1411 { 1412 return (core_cpuref); 1413 } 1414 1415 static char *core_pcbe_list_events(uint_t picnum) 1416 { 1417 ASSERT(picnum < cpc_ncounters); 1418 1419 if (picnum < num_gpc) { 1420 return (gpc_names[picnum]); 1421 } else { 1422 return (ffc_allnames[picnum - num_gpc]); 1423 } 1424 } 1425 1426 static char *core_pcbe_list_attrs(void) 1427 { 1428 if (versionid >= 3) { 1429 return ("edge,inv,umask,cmask,anythr"); 1430 } else { 1431 return ("edge,pc,inv,umask,cmask"); 1432 } 1433 } 1434 1435 static const struct nametable_core_uarch * 1436 find_gpcevent_core_uarch(char *name, 1437 const struct nametable_core_uarch *nametable) 1438 { 1439 const struct nametable_core_uarch *n; 1440 int compare_result = -1; 1441 1442 for (n = nametable; n->event_num != NT_END; n++) { 1443 compare_result = strcmp(name, n->name); 1444 if (compare_result <= 0) { 1445 break; 1446 } 1447 } 1448 1449 if (compare_result == 0) { 1450 return (n); 1451 } 1452 1453 return (NULL); 1454 } 1455 1456 static const struct generic_events * 1457 find_generic_events(char *name, const struct generic_events *table) 1458 { 1459 const struct generic_events *n; 1460 1461 for (n = table; n->event_num != NT_END; n++) { 1462 if (strcmp(name, n->name) == 0) { 1463 return (n); 1464 }; 1465 } 1466 1467 return (NULL); 1468 } 1469 1470 static const struct events_table_t * 1471 find_gpcevent(char *name) 1472 { 1473 int i; 1474 1475 /* Search architectural events */ 1476 for (i = 0; i < known_arch_events; i++) { 1477 if (strcmp(name, arch_events_table[i].name) == 0 || 1478 strcmp(name, arch_genevents_table[i]) == 0) { 1479 if (((1U << i) & arch_events_vector) == 0) { 1480 return (&arch_events_table[i]); 1481 } 1482 } 1483 } 1484 1485 /* Search non-architectural events */ 1486 if (events_table != NULL) { 1487 for (i = 0; events_table[i].eventselect != NT_END; i++) { 1488 if (strcmp(name, events_table[i].name) == 0) { 1489 return (&events_table[i]); 1490 } 1491 } 1492 } 1493 1494 return (NULL); 1495 } 1496 1497 static uint64_t 1498 core_pcbe_event_coverage(char *event) 1499 { 1500 uint64_t bitmap; 1501 uint64_t bitmask; 1502 const struct events_table_t *n; 1503 int i; 1504 1505 bitmap = 0; 1506 1507 /* Is it an event that a GPC can track? */ 1508 if (versionid >= 3) { 1509 n = find_gpcevent(event); 1510 if (n != NULL) { 1511 bitmap |= (n->supported_counters & 1512 BITMASK_XBITS(num_gpc)); 1513 } 1514 } else { 1515 if (find_generic_events(event, cmn_generic_events) != NULL) { 1516 bitmap |= BITMASK_XBITS(num_gpc); 1517 } if (find_generic_events(event, generic_events_pic0) != NULL) { 1518 bitmap |= 1ULL; 1519 } else if (find_gpcevent_core_uarch(event, 1520 cmn_gpc_events_core_uarch) != NULL) { 1521 bitmap |= BITMASK_XBITS(num_gpc); 1522 } else if (find_gpcevent_core_uarch(event, pic0_events) != 1523 NULL) { 1524 bitmap |= 1ULL; 1525 } else if (find_gpcevent_core_uarch(event, pic1_events) != 1526 NULL) { 1527 bitmap |= 1ULL << 1; 1528 } 1529 } 1530 1531 /* Check if the event can be counted in the fixed-function counters */ 1532 if (num_ffc > 0) { 1533 bitmask = 1ULL << num_gpc; 1534 for (i = 0; i < num_ffc; i++) { 1535 if (strcmp(event, ffc_names[i]) == 0) { 1536 bitmap |= bitmask; 1537 } else if (strcmp(event, ffc_genericnames[i]) == 0) { 1538 bitmap |= bitmask; 1539 } 1540 bitmask = bitmask << 1; 1541 } 1542 } 1543 1544 return (bitmap); 1545 } 1546 1547 static uint64_t 1548 core_pcbe_overflow_bitmap(void) 1549 { 1550 uint64_t interrupt_status; 1551 uint64_t intrbits_ffc; 1552 uint64_t intrbits_gpc; 1553 extern int kcpc_hw_overflow_intr_installed; 1554 uint64_t overflow_bitmap; 1555 1556 RDMSR(PERF_GLOBAL_STATUS, interrupt_status); 1557 WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status); 1558 1559 interrupt_status = interrupt_status & control_mask; 1560 intrbits_ffc = (interrupt_status >> 32) & control_ffc; 1561 intrbits_gpc = interrupt_status & control_gpc; 1562 overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc; 1563 1564 ASSERT(kcpc_hw_overflow_intr_installed); 1565 (*kcpc_hw_enable_cpc_intr)(); 1566 1567 return (overflow_bitmap); 1568 } 1569 1570 static int 1571 check_cpc_securitypolicy(core_pcbe_config_t *conf, 1572 const struct nametable_core_uarch *n) 1573 { 1574 if (conf->core_ctl & n->restricted_bits) { 1575 if (secpolicy_cpc_cpu(crgetcred()) != 0) { 1576 return (CPC_ATTR_REQUIRES_PRIVILEGE); 1577 } 1578 } 1579 return (0); 1580 } 1581 1582 static int 1583 configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags, 1584 uint_t nattrs, kcpc_attr_t *attrs, void **data) 1585 { 1586 core_pcbe_config_t conf; 1587 const struct nametable_core_uarch *n; 1588 const struct generic_events *k = NULL; 1589 const struct nametable_core_uarch *m; 1590 const struct nametable_core_uarch *picspecific_events; 1591 struct nametable_core_uarch nt_raw = { "", 0x0, 0x0 }; 1592 uint_t i; 1593 long event_num; 1594 const struct events_table_t *eventcode; 1595 1596 if (((preset & BITS_EXTENDED_FROM_31) != 0) && 1597 ((preset & BITS_EXTENDED_FROM_31) != 1598 BITS_EXTENDED_FROM_31)) { 1599 1600 /* 1601 * Bits beyond bit-31 in the general-purpose counters can only 1602 * be written to by extension of bit 31. We cannot preset 1603 * these bits to any value other than all 1s or all 0s. 1604 */ 1605 return (CPC_ATTRIBUTE_OUT_OF_RANGE); 1606 } 1607 1608 if (versionid >= 3) { 1609 eventcode = find_gpcevent(event); 1610 if (eventcode != NULL) { 1611 if ((C(picnum) & eventcode->supported_counters) == 0) { 1612 return (CPC_PIC_NOT_CAPABLE); 1613 } 1614 if (nattrs > 0 && 1615 (strncmp("PAPI_", event, 5) == 0)) { 1616 return (CPC_ATTRIBUTE_OUT_OF_RANGE); 1617 } 1618 conf.core_ctl = eventcode->eventselect; 1619 conf.core_ctl |= eventcode->unitmask << 1620 CORE_UMASK_SHIFT; 1621 } else { 1622 /* Event specified as raw event code */ 1623 if (ddi_strtol(event, NULL, 0, &event_num) != 0) { 1624 return (CPC_INVALID_EVENT); 1625 } 1626 conf.core_ctl = event_num & 0xFF; 1627 } 1628 } else { 1629 if ((k = find_generic_events(event, cmn_generic_events)) != 1630 NULL || 1631 (picnum == 0 && 1632 (k = find_generic_events(event, generic_events_pic0)) != 1633 NULL)) { 1634 if (nattrs > 0) { 1635 return (CPC_ATTRIBUTE_OUT_OF_RANGE); 1636 } 1637 conf.core_ctl = k->event_num; 1638 conf.core_ctl |= k->umask << CORE_UMASK_SHIFT; 1639 } else { 1640 /* Not a generic event */ 1641 1642 n = find_gpcevent_core_uarch(event, 1643 cmn_gpc_events_core_uarch); 1644 if (n == NULL) { 1645 switch (picnum) { 1646 case 0: 1647 picspecific_events = 1648 pic0_events; 1649 break; 1650 case 1: 1651 picspecific_events = 1652 pic1_events; 1653 break; 1654 default: 1655 picspecific_events = NULL; 1656 break; 1657 } 1658 if (picspecific_events != NULL) { 1659 n = find_gpcevent_core_uarch(event, 1660 picspecific_events); 1661 } 1662 } 1663 if (n == NULL) { 1664 1665 /* 1666 * Check if this is a case where the event was 1667 * specified directly by its event number 1668 * instead of its name string. 1669 */ 1670 if (ddi_strtol(event, NULL, 0, &event_num) != 1671 0) { 1672 return (CPC_INVALID_EVENT); 1673 } 1674 1675 event_num = event_num & 0xFF; 1676 1677 /* 1678 * Search the event table to find out if the 1679 * event specified has an privilege 1680 * requirements. Currently none of the 1681 * pic-specific counters have any privilege 1682 * requirements. Hence only the table 1683 * cmn_gpc_events_core_uarch is searched. 1684 */ 1685 for (m = cmn_gpc_events_core_uarch; 1686 m->event_num != NT_END; 1687 m++) { 1688 if (event_num == m->event_num) { 1689 break; 1690 } 1691 } 1692 if (m->event_num == NT_END) { 1693 nt_raw.event_num = (uint8_t)event_num; 1694 n = &nt_raw; 1695 } else { 1696 n = m; 1697 } 1698 } 1699 conf.core_ctl = n->event_num; /* Event Select */ 1700 } 1701 } 1702 1703 1704 conf.core_picno = picnum; 1705 conf.core_pictype = CORE_GPC; 1706 conf.core_rawpic = preset & mask_gpc; 1707 1708 conf.core_pes = GPC_BASE_PES + picnum; 1709 conf.core_pmc = GPC_BASE_PMC + picnum; 1710 1711 for (i = 0; i < nattrs; i++) { 1712 if (strncmp(attrs[i].ka_name, "umask", 6) == 0) { 1713 if ((attrs[i].ka_val | CORE_UMASK_MASK) != 1714 CORE_UMASK_MASK) { 1715 return (CPC_ATTRIBUTE_OUT_OF_RANGE); 1716 } 1717 /* Clear out the default umask */ 1718 conf.core_ctl &= ~ (CORE_UMASK_MASK << 1719 CORE_UMASK_SHIFT); 1720 /* Use the user provided umask */ 1721 conf.core_ctl |= attrs[i].ka_val << 1722 CORE_UMASK_SHIFT; 1723 } else if (strncmp(attrs[i].ka_name, "edge", 6) == 0) { 1724 if (attrs[i].ka_val != 0) 1725 conf.core_ctl |= CORE_EDGE; 1726 } else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) { 1727 if (attrs[i].ka_val != 0) 1728 conf.core_ctl |= CORE_INV; 1729 } else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) { 1730 if ((attrs[i].ka_val | CORE_CMASK_MASK) != 1731 CORE_CMASK_MASK) { 1732 return (CPC_ATTRIBUTE_OUT_OF_RANGE); 1733 } 1734 conf.core_ctl |= attrs[i].ka_val << 1735 CORE_CMASK_SHIFT; 1736 } else if (strncmp(attrs[i].ka_name, "anythr", 7) == 1737 0) { 1738 if (versionid < 3) 1739 return (CPC_INVALID_ATTRIBUTE); 1740 if (secpolicy_cpc_cpu(crgetcred()) != 0) { 1741 return (CPC_ATTR_REQUIRES_PRIVILEGE); 1742 } 1743 if (attrs[i].ka_val != 0) 1744 conf.core_ctl |= CORE_ANYTHR; 1745 } else { 1746 return (CPC_INVALID_ATTRIBUTE); 1747 } 1748 } 1749 1750 if (flags & CPC_COUNT_USER) 1751 conf.core_ctl |= CORE_USR; 1752 if (flags & CPC_COUNT_SYSTEM) 1753 conf.core_ctl |= CORE_OS; 1754 if (flags & CPC_OVF_NOTIFY_EMT) 1755 conf.core_ctl |= CORE_INT; 1756 conf.core_ctl |= CORE_EN; 1757 1758 if (versionid < 3 && k == NULL) { 1759 if (check_cpc_securitypolicy(&conf, n) != 0) { 1760 return (CPC_ATTR_REQUIRES_PRIVILEGE); 1761 } 1762 } 1763 1764 *data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP); 1765 *((core_pcbe_config_t *)*data) = conf; 1766 1767 return (0); 1768 } 1769 1770 static int 1771 configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags, 1772 uint_t nattrs, kcpc_attr_t *attrs, void **data) 1773 { 1774 core_pcbe_config_t *conf; 1775 uint_t i; 1776 1777 if (picnum - num_gpc >= num_ffc) { 1778 return (CPC_INVALID_PICNUM); 1779 } 1780 1781 if ((strcmp(ffc_names[picnum-num_gpc], event) != 0) && 1782 (strcmp(ffc_genericnames[picnum-num_gpc], event) != 0)) { 1783 return (CPC_INVALID_EVENT); 1784 } 1785 1786 if ((versionid < 3) && (nattrs != 0)) { 1787 return (CPC_INVALID_ATTRIBUTE); 1788 } 1789 1790 conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP); 1791 conf->core_ctl = 0; 1792 1793 for (i = 0; i < nattrs; i++) { 1794 if (strncmp(attrs[i].ka_name, "anythr", 7) == 0) { 1795 if (secpolicy_cpc_cpu(crgetcred()) != 0) { 1796 kmem_free(conf, sizeof (core_pcbe_config_t)); 1797 return (CPC_ATTR_REQUIRES_PRIVILEGE); 1798 } 1799 if (attrs[i].ka_val != 0) { 1800 conf->core_ctl |= CORE_FFC_ANYTHR; 1801 } 1802 } else { 1803 kmem_free(conf, sizeof (core_pcbe_config_t)); 1804 return (CPC_INVALID_ATTRIBUTE); 1805 } 1806 } 1807 1808 conf->core_picno = picnum; 1809 conf->core_pictype = CORE_FFC; 1810 conf->core_rawpic = preset & mask_ffc; 1811 conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc); 1812 1813 /* All fixed-function counters have the same control register */ 1814 conf->core_pes = PERF_FIXED_CTR_CTRL; 1815 1816 if (flags & CPC_COUNT_USER) 1817 conf->core_ctl |= CORE_FFC_USR_EN; 1818 if (flags & CPC_COUNT_SYSTEM) 1819 conf->core_ctl |= CORE_FFC_OS_EN; 1820 if (flags & CPC_OVF_NOTIFY_EMT) 1821 conf->core_ctl |= CORE_FFC_PMI; 1822 1823 *data = conf; 1824 return (0); 1825 } 1826 1827 /*ARGSUSED*/ 1828 static int 1829 core_pcbe_configure(uint_t picnum, char *event, uint64_t preset, 1830 uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data, 1831 void *token) 1832 { 1833 int ret; 1834 core_pcbe_config_t *conf; 1835 1836 /* 1837 * If we've been handed an existing configuration, we need only preset 1838 * the counter value. 1839 */ 1840 if (*data != NULL) { 1841 conf = *data; 1842 ASSERT(conf->core_pictype == CORE_GPC || 1843 conf->core_pictype == CORE_FFC); 1844 if (conf->core_pictype == CORE_GPC) 1845 conf->core_rawpic = preset & mask_gpc; 1846 else /* CORE_FFC */ 1847 conf->core_rawpic = preset & mask_ffc; 1848 return (0); 1849 } 1850 1851 if (picnum >= total_pmc) { 1852 return (CPC_INVALID_PICNUM); 1853 } 1854 1855 if (picnum < num_gpc) { 1856 ret = configure_gpc(picnum, event, preset, flags, 1857 nattrs, attrs, data); 1858 } else { 1859 ret = configure_ffc(picnum, event, preset, flags, 1860 nattrs, attrs, data); 1861 } 1862 return (ret); 1863 } 1864 1865 static void 1866 core_pcbe_program(void *token) 1867 { 1868 core_pcbe_config_t *cfg; 1869 uint64_t perf_global_ctrl; 1870 uint64_t perf_fixed_ctr_ctrl; 1871 uint64_t curcr4; 1872 1873 core_pcbe_allstop(); 1874 1875 curcr4 = getcr4(); 1876 if (kcpc_allow_nonpriv(token)) 1877 /* Allow RDPMC at any ring level */ 1878 setcr4(curcr4 | CR4_PCE); 1879 else 1880 /* Allow RDPMC only at ring 0 */ 1881 setcr4(curcr4 & ~CR4_PCE); 1882 1883 /* Clear any overflow indicators before programming the counters */ 1884 WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask); 1885 1886 cfg = NULL; 1887 perf_global_ctrl = 0; 1888 perf_fixed_ctr_ctrl = 0; 1889 cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL); 1890 while (cfg != NULL) { 1891 ASSERT(cfg->core_pictype == CORE_GPC || 1892 cfg->core_pictype == CORE_FFC); 1893 1894 if (cfg->core_pictype == CORE_GPC) { 1895 /* 1896 * General-purpose counter registers have write 1897 * restrictions where only the lower 32-bits can be 1898 * written to. The rest of the relevant bits are 1899 * written to by extension from bit 31 (all ZEROS if 1900 * bit-31 is ZERO and all ONE if bit-31 is ONE). This 1901 * makes it possible to write to the counter register 1902 * only values that have all ONEs or all ZEROs in the 1903 * higher bits. 1904 */ 1905 if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) || 1906 ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 1907 BITS_EXTENDED_FROM_31)) { 1908 /* 1909 * Straighforward case where the higher bits 1910 * are all ZEROs or all ONEs. 1911 */ 1912 WRMSR(cfg->core_pmc, 1913 (cfg->core_rawpic & mask_gpc)); 1914 } else { 1915 /* 1916 * The high order bits are not all the same. 1917 * We save what is currently in the registers 1918 * and do not write to it. When we want to do 1919 * a read from this register later (in 1920 * core_pcbe_sample()), we subtract the value 1921 * we save here to get the actual event count. 1922 * 1923 * NOTE: As a result, we will not get overflow 1924 * interrupts as expected. 1925 */ 1926 RDMSR(cfg->core_pmc, cfg->core_rawpic); 1927 cfg->core_rawpic = cfg->core_rawpic & mask_gpc; 1928 } 1929 WRMSR(cfg->core_pes, cfg->core_ctl); 1930 perf_global_ctrl |= 1ull << cfg->core_picno; 1931 } else { 1932 /* 1933 * Unlike the general-purpose counters, all relevant 1934 * bits of fixed-function counters can be written to. 1935 */ 1936 WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc); 1937 1938 /* 1939 * Collect the control bits for all the 1940 * fixed-function counters and write it at one shot 1941 * later in this function 1942 */ 1943 perf_fixed_ctr_ctrl |= cfg->core_ctl << 1944 ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE); 1945 perf_global_ctrl |= 1946 1ull << (cfg->core_picno - num_gpc + 32); 1947 } 1948 1949 cfg = (core_pcbe_config_t *) 1950 kcpc_next_config(token, cfg, NULL); 1951 } 1952 1953 /* Enable all the counters */ 1954 WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl); 1955 WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl); 1956 } 1957 1958 static void 1959 core_pcbe_allstop(void) 1960 { 1961 /* Disable all the counters together */ 1962 WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED); 1963 1964 setcr4(getcr4() & ~CR4_PCE); 1965 } 1966 1967 static void 1968 core_pcbe_sample(void *token) 1969 { 1970 uint64_t *daddr; 1971 uint64_t curpic; 1972 core_pcbe_config_t *cfg; 1973 uint64_t counter_mask; 1974 1975 cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr); 1976 while (cfg != NULL) { 1977 ASSERT(cfg->core_pictype == CORE_GPC || 1978 cfg->core_pictype == CORE_FFC); 1979 1980 curpic = rdmsr(cfg->core_pmc); 1981 1982 DTRACE_PROBE4(core__pcbe__sample, 1983 uint64_t, cfg->core_pmc, 1984 uint64_t, curpic, 1985 uint64_t, cfg->core_rawpic, 1986 uint64_t, *daddr); 1987 1988 if (cfg->core_pictype == CORE_GPC) { 1989 counter_mask = mask_gpc; 1990 } else { 1991 counter_mask = mask_ffc; 1992 } 1993 curpic = curpic & counter_mask; 1994 if (curpic >= cfg->core_rawpic) { 1995 *daddr += curpic - cfg->core_rawpic; 1996 } else { 1997 /* Counter overflowed since our last sample */ 1998 *daddr += counter_mask - (cfg->core_rawpic - curpic) + 1999 1; 2000 } 2001 cfg->core_rawpic = *daddr & counter_mask; 2002 2003 cfg = 2004 (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr); 2005 } 2006 } 2007 2008 static void 2009 core_pcbe_free(void *config) 2010 { 2011 kmem_free(config, sizeof (core_pcbe_config_t)); 2012 } 2013 2014 static struct modlpcbe core_modlpcbe = { 2015 &mod_pcbeops, 2016 "Core Performance Counters", 2017 &core_pcbe_ops 2018 }; 2019 2020 static struct modlinkage core_modl = { 2021 MODREV_1, 2022 &core_modlpcbe, 2023 }; 2024 2025 int 2026 _init(void) 2027 { 2028 if (core_pcbe_init() != 0) { 2029 return (ENOTSUP); 2030 } 2031 return (mod_install(&core_modl)); 2032 } 2033 2034 int 2035 _fini(void) 2036 { 2037 return (mod_remove(&core_modl)); 2038 } 2039 2040 int 2041 _info(struct modinfo *mi) 2042 { 2043 return (mod_info(&core_modl, mi)); 2044 }