1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /*
  26  * This file contains preset event names from the Performance Application
  27  * Programming Interface v3.5 which included the following notice:
  28  *
  29  *                             Copyright (c) 2005,6
  30  *                           Innovative Computing Labs
  31  *                         Computer Science Department,
  32  *                            University of Tennessee,
  33  *                                 Knoxville, TN.
  34  *                              All Rights Reserved.
  35  *
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions are met:
  39  *
  40  *    * Redistributions of source code must retain the above copyright notice,
  41  *      this list of conditions and the following disclaimer.
  42  *    * Redistributions in binary form must reproduce the above copyright
  43  *      notice, this list of conditions and the following disclaimer in the
  44  *      documentation and/or other materials provided with the distribution.
  45  *    * Neither the name of the University of Tennessee nor the names of its
  46  *      contributors may be used to endorse or promote products derived from
  47  *      this software without specific prior written permission.
  48  *
  49  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  50  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  52  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  53  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  59  * POSSIBILITY OF SUCH DAMAGE.
  60  *
  61  *
  62  * This open source software license conforms to the BSD License template.
  63  */
  64 
  65 
  66 /*
  67  * Performance Counter Back-End for Intel processors supporting Architectural
  68  * Performance Monitoring.
  69  */
  70 
  71 #include <sys/cpuvar.h>
  72 #include <sys/param.h>
  73 #include <sys/cpc_impl.h>
  74 #include <sys/cpc_pcbe.h>
  75 #include <sys/modctl.h>
  76 #include <sys/inttypes.h>
  77 #include <sys/systm.h>
  78 #include <sys/cmn_err.h>
  79 #include <sys/x86_archext.h>
  80 #include <sys/sdt.h>
  81 #include <sys/archsystm.h>
  82 #include <sys/privregs.h>
  83 #include <sys/ddi.h>
  84 #include <sys/sunddi.h>
  85 #include <sys/cred.h>
  86 #include <sys/policy.h>
  87 
  88 static int core_pcbe_init(void);
  89 static uint_t core_pcbe_ncounters(void);
  90 static const char *core_pcbe_impl_name(void);
  91 static const char *core_pcbe_cpuref(void);
  92 static char *core_pcbe_list_events(uint_t picnum);
  93 static char *core_pcbe_list_attrs(void);
  94 static uint64_t core_pcbe_event_coverage(char *event);
  95 static uint64_t core_pcbe_overflow_bitmap(void);
  96 static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
  97     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
  98     void *token);
  99 static void core_pcbe_program(void *token);
 100 static void core_pcbe_allstop(void);
 101 static void core_pcbe_sample(void *token);
 102 static void core_pcbe_free(void *config);
 103 
 104 #define FALSE   0
 105 #define TRUE    1
 106 
 107 /* Counter Type */
 108 #define CORE_GPC        0       /* General-Purpose Counter (GPC) */
 109 #define CORE_FFC        1       /* Fixed-Function Counter (FFC) */
 110 
 111 /* MSR Addresses */
 112 #define GPC_BASE_PMC            0x00c1  /* First GPC */
 113 #define GPC_BASE_PES            0x0186  /* First GPC Event Select register */
 114 #define FFC_BASE_PMC            0x0309  /* First FFC */
 115 #define PERF_FIXED_CTR_CTRL     0x038d  /* Used to enable/disable FFCs */
 116 #define PERF_GLOBAL_STATUS      0x038e  /* Overflow status register */
 117 #define PERF_GLOBAL_CTRL        0x038f  /* Used to enable/disable counting */
 118 #define PERF_GLOBAL_OVF_CTRL    0x0390  /* Used to clear overflow status */
 119 
 120 /*
 121  * Processor Event Select register fields
 122  */
 123 #define CORE_USR        (1ULL << 16)      /* Count while not in ring 0 */
 124 #define CORE_OS         (1ULL << 17)      /* Count while in ring 0 */
 125 #define CORE_EDGE       (1ULL << 18)      /* Enable edge detection */
 126 #define CORE_PC         (1ULL << 19)      /* Enable pin control */
 127 #define CORE_INT        (1ULL << 20)      /* Enable interrupt on overflow */
 128 #define CORE_EN         (1ULL << 22)      /* Enable counting */
 129 #define CORE_INV        (1ULL << 23)      /* Invert the CMASK */
 130 #define CORE_ANYTHR     (1ULL << 21)      /* Count event for any thread on core */
 131 
 132 #define CORE_UMASK_SHIFT        8
 133 #define CORE_UMASK_MASK         0xffu
 134 #define CORE_CMASK_SHIFT        24
 135 #define CORE_CMASK_MASK         0xffu
 136 
 137 /*
 138  * Fixed-function counter attributes
 139  */
 140 #define CORE_FFC_OS_EN  (1ULL << 0)       /* Count while not in ring 0 */
 141 #define CORE_FFC_USR_EN (1ULL << 1)       /* Count while in ring 1 */
 142 #define CORE_FFC_ANYTHR (1ULL << 2)       /* Count event for any thread on core */
 143 #define CORE_FFC_PMI    (1ULL << 3)       /* Enable interrupt on overflow */
 144 
 145 /*
 146  * Number of bits for specifying each FFC's attributes in the control register
 147  */
 148 #define CORE_FFC_ATTR_SIZE      4
 149 
 150 /*
 151  * CondChgd and OvfBuffer fields of global status and overflow control registers
 152  */
 153 #define CONDCHGD        (1ULL << 63)
 154 #define OVFBUFFER       (1ULL << 62)
 155 #define MASK_CONDCHGD_OVFBUFFER (CONDCHGD | OVFBUFFER)
 156 
 157 #define ALL_STOPPED     0ULL
 158 
 159 #define BITMASK_XBITS(x)        ((1ull << (x)) - 1ull)
 160 
 161 /*
 162  * Only the lower 32-bits can be written to in the general-purpose
 163  * counters.  The higher bits are extended from bit 31; all ones if
 164  * bit 31 is one and all zeros otherwise.
 165  *
 166  * The fixed-function counters do not have this restriction.
 167  */
 168 #define BITS_EXTENDED_FROM_31   (BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31))
 169 
 170 #define WRMSR(msr, value)                                               \
 171         wrmsr((msr), (value));                                          \
 172         DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value));
 173 
 174 #define RDMSR(msr, value)                                               \
 175         (value) = rdmsr((msr));                                         \
 176         DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value));
 177 
 178 typedef struct core_pcbe_config {
 179         uint64_t        core_rawpic;
 180         uint64_t        core_ctl;       /* Event Select bits */
 181         uint64_t        core_pmc;       /* Counter register address */
 182         uint64_t        core_pes;       /* Event Select register address */
 183         uint_t          core_picno;
 184         uint8_t         core_pictype;   /* CORE_GPC or CORE_FFC */
 185 } core_pcbe_config_t;
 186 
 187 pcbe_ops_t core_pcbe_ops = {
 188         PCBE_VER_1,                     /* pcbe_ver */
 189         CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE,  /* pcbe_caps */
 190         core_pcbe_ncounters,            /* pcbe_ncounters */
 191         core_pcbe_impl_name,            /* pcbe_impl_name */
 192         core_pcbe_cpuref,               /* pcbe_cpuref */
 193         core_pcbe_list_events,          /* pcbe_list_events */
 194         core_pcbe_list_attrs,           /* pcbe_list_attrs */
 195         core_pcbe_event_coverage,       /* pcbe_event_coverage */
 196         core_pcbe_overflow_bitmap,      /* pcbe_overflow_bitmap */
 197         core_pcbe_configure,            /* pcbe_configure */
 198         core_pcbe_program,              /* pcbe_program */
 199         core_pcbe_allstop,              /* pcbe_allstop */
 200         core_pcbe_sample,               /* pcbe_sample */
 201         core_pcbe_free                  /* pcbe_free */
 202 };
 203 
 204 struct nametable_core_uarch {
 205         const char      *name;
 206         uint64_t        restricted_bits;
 207         uint8_t         event_num;
 208 };
 209 
 210 #define NT_END  0xFF
 211 
 212 /*
 213  * Counting an event for all cores or all bus agents requires cpc_cpu privileges
 214  */
 215 #define ALL_CORES       (1ULL << 15)
 216 #define ALL_AGENTS      (1ULL << 13)
 217 
 218 struct generic_events {
 219         const char      *name;
 220         uint8_t         event_num;
 221         uint8_t         umask;
 222 };
 223 
 224 static const struct generic_events cmn_generic_events[] = {
 225         { "PAPI_tot_cyc", 0x3c, 0x00 }, /* cpu_clk_unhalted.thread_p/core */
 226         { "PAPI_tot_ins", 0xc0, 0x00 }, /* inst_retired.any_p             */
 227         { "PAPI_br_ins",  0xc4, 0x0c }, /* br_inst_retired.taken          */
 228         { "PAPI_br_msp",  0xc5, 0x00 }, /* br_inst_retired.mispred        */
 229         { "PAPI_br_ntk",  0xc4, 0x03 },
 230                                 /* br_inst_retired.pred_not_taken|pred_taken */
 231         { "PAPI_br_prc",  0xc4, 0x05 },
 232                                 /* br_inst_retired.pred_not_taken|pred_taken */
 233         { "PAPI_hw_int",  0xc8, 0x00 }, /* hw_int_rvc                     */
 234         { "PAPI_tot_iis", 0xaa, 0x01 }, /* macro_insts.decoded            */
 235         { "PAPI_l1_dca",  0x43, 0x01 }, /* l1d_all_ref                    */
 236         { "PAPI_l1_icm",  0x81, 0x00 }, /* l1i_misses                     */
 237         { "PAPI_l1_icr",  0x80, 0x00 }, /* l1i_reads                      */
 238         { "PAPI_l1_tcw",  0x41, 0x0f }, /* l1d_cache_st.mesi              */
 239         { "PAPI_l2_stm",  0x2a, 0x41 }, /* l2_st.self.i_state             */
 240         { "PAPI_l2_tca",  0x2e, 0x4f }, /* l2_rqsts.self.demand.mesi      */
 241         { "PAPI_l2_tch",  0x2e, 0x4e }, /* l2_rqsts.mes                   */
 242         { "PAPI_l2_tcm",  0x2e, 0x41 }, /* l2_rqsts.self.demand.i_state   */
 243         { "PAPI_l2_tcw",  0x2a, 0x4f }, /* l2_st.self.mesi                */
 244         { "PAPI_ld_ins",  0xc0, 0x01 }, /* inst_retired.loads             */
 245         { "PAPI_lst_ins", 0xc0, 0x03 }, /* inst_retired.loads|stores      */
 246         { "PAPI_sr_ins",  0xc0, 0x02 }, /* inst_retired.stores            */
 247         { "PAPI_tlb_dm",  0x08, 0x01 }, /* dtlb_misses.any                */
 248         { "PAPI_tlb_im",  0x82, 0x12 }, /* itlb.small_miss|large_miss     */
 249         { "PAPI_tlb_tl",  0x0c, 0x03 }, /* page_walks                     */
 250         { "",             NT_END, 0  }
 251 };
 252 
 253 static const struct generic_events generic_events_pic0[] = {
 254         { "PAPI_l1_dcm",  0xcb, 0x01 }, /* mem_load_retired.l1d_miss */
 255         { "",             NT_END, 0  }
 256 };
 257 
 258 /*
 259  * The events listed in the following table can be counted on all
 260  * general-purpose counters on processors that are of Penryn and Merom Family
 261  */
 262 static const struct nametable_core_uarch cmn_gpc_events_core_uarch[] = {
 263         /* Alphabetical order of event name */
 264 
 265         { "baclears",                   0x0,    0xe6 },
 266         { "bogus_br",                   0x0,    0xe4 },
 267         { "br_bac_missp_exec",          0x0,    0x8a },
 268 
 269         { "br_call_exec",               0x0,    0x92 },
 270         { "br_call_missp_exec",         0x0,    0x93 },
 271         { "br_cnd_exec",                0x0,    0x8b },
 272 
 273         { "br_cnd_missp_exec",          0x0,    0x8c },
 274         { "br_ind_call_exec",           0x0,    0x94 },
 275         { "br_ind_exec",                0x0,    0x8d },
 276 
 277         { "br_ind_missp_exec",          0x0,    0x8e },
 278         { "br_inst_decoded",            0x0,    0xe0 },
 279         { "br_inst_exec",               0x0,    0x88 },
 280 
 281         { "br_inst_retired",            0x0,    0xc4 },
 282         { "br_inst_retired_mispred",    0x0,    0xc5 },
 283         { "br_missp_exec",              0x0,    0x89 },
 284 
 285         { "br_ret_bac_missp_exec",      0x0,    0x91 },
 286         { "br_ret_exec",                0x0,    0x8f },
 287         { "br_ret_missp_exec",          0x0,    0x90 },
 288 
 289         { "br_tkn_bubble_1",            0x0,    0x97 },
 290         { "br_tkn_bubble_2",            0x0,    0x98 },
 291         { "bus_bnr_drv",                ALL_AGENTS,     0x61 },
 292 
 293         { "bus_data_rcv",               ALL_CORES,      0x64 },
 294         { "bus_drdy_clocks",            ALL_AGENTS,     0x62 },
 295         { "bus_hit_drv",                ALL_AGENTS,     0x7a },
 296 
 297         { "bus_hitm_drv",               ALL_AGENTS,     0x7b },
 298         { "bus_io_wait",                ALL_CORES,      0x7f },
 299         { "bus_lock_clocks",            ALL_CORES | ALL_AGENTS, 0x63 },
 300 
 301         { "bus_request_outstanding",    ALL_CORES | ALL_AGENTS, 0x60 },
 302         { "bus_trans_any",              ALL_CORES | ALL_AGENTS, 0x70 },
 303         { "bus_trans_brd",              ALL_CORES | ALL_AGENTS, 0x65 },
 304 
 305         { "bus_trans_burst",            ALL_CORES | ALL_AGENTS, 0x6e },
 306         { "bus_trans_def",              ALL_CORES | ALL_AGENTS, 0x6d },
 307         { "bus_trans_ifetch",           ALL_CORES | ALL_AGENTS, 0x68 },
 308 
 309         { "bus_trans_inval",            ALL_CORES | ALL_AGENTS, 0x69 },
 310         { "bus_trans_io",               ALL_CORES | ALL_AGENTS, 0x6c },
 311         { "bus_trans_mem",              ALL_CORES | ALL_AGENTS, 0x6f },
 312 
 313         { "bus_trans_p",                ALL_CORES | ALL_AGENTS, 0x6b },
 314         { "bus_trans_pwr",              ALL_CORES | ALL_AGENTS, 0x6a },
 315         { "bus_trans_rfo",              ALL_CORES | ALL_AGENTS, 0x66 },
 316 
 317         { "bus_trans_wb",               ALL_CORES | ALL_AGENTS, 0x67 },
 318         { "busq_empty",                 ALL_CORES,      0x7d },
 319         { "cmp_snoop",                  ALL_CORES,      0x78 },
 320 
 321         { "cpu_clk_unhalted",           0x0,    0x3c },
 322         { "cycles_int",                 0x0,    0xc6 },
 323         { "cycles_l1i_mem_stalled",     0x0,    0x86 },
 324 
 325         { "dtlb_misses",                0x0,    0x08 },
 326         { "eist_trans",                 0x0,    0x3a },
 327         { "esp",                        0x0,    0xab },
 328 
 329         { "ext_snoop",                  ALL_AGENTS,     0x77 },
 330         { "fp_mmx_trans",               0x0,    0xcc },
 331         { "hw_int_rcv",                 0x0,    0xc8 },
 332 
 333         { "ild_stall",                  0x0,    0x87 },
 334         { "inst_queue",                 0x0,    0x83 },
 335         { "inst_retired",               0x0,    0xc0 },
 336 
 337         { "itlb",                       0x0,    0x82 },
 338         { "itlb_miss_retired",          0x0,    0xc9 },
 339         { "l1d_all_ref",                0x0,    0x43 },
 340 
 341         { "l1d_cache_ld",               0x0,    0x40 },
 342         { "l1d_cache_lock",             0x0,    0x42 },
 343         { "l1d_cache_st",               0x0,    0x41 },
 344 
 345         { "l1d_m_evict",                0x0,    0x47 },
 346         { "l1d_m_repl",                 0x0,    0x46 },
 347         { "l1d_pend_miss",              0x0,    0x48 },
 348 
 349         { "l1d_prefetch",               0x0,    0x4e },
 350         { "l1d_repl",                   0x0,    0x45 },
 351         { "l1d_split",                  0x0,    0x49 },
 352 
 353         { "l1i_misses",                 0x0,    0x81 },
 354         { "l1i_reads",                  0x0,    0x80 },
 355         { "l2_ads",                     ALL_CORES,      0x21 },
 356 
 357         { "l2_dbus_busy_rd",            ALL_CORES,      0x23 },
 358         { "l2_ifetch",                  ALL_CORES,      0x28 },
 359         { "l2_ld",                      ALL_CORES,      0x29 },
 360 
 361         { "l2_lines_in",                ALL_CORES,      0x24 },
 362         { "l2_lines_out",               ALL_CORES,      0x26 },
 363         { "l2_lock",                    ALL_CORES,      0x2b },
 364 
 365         { "l2_m_lines_in",              ALL_CORES,      0x25 },
 366         { "l2_m_lines_out",             ALL_CORES,      0x27 },
 367         { "l2_no_req",                  ALL_CORES,      0x32 },
 368 
 369         { "l2_reject_busq",             ALL_CORES,      0x30 },
 370         { "l2_rqsts",                   ALL_CORES,      0x2e },
 371         { "l2_st",                      ALL_CORES,      0x2a },
 372 
 373         { "load_block",                 0x0,    0x03 },
 374         { "load_hit_pre",               0x0,    0x4c },
 375         { "machine_nukes",              0x0,    0xc3 },
 376 
 377         { "macro_insts",                0x0,    0xaa },
 378         { "memory_disambiguation",      0x0,    0x09 },
 379         { "misalign_mem_ref",           0x0,    0x05 },
 380         { "page_walks",                 0x0,    0x0c },
 381 
 382         { "pref_rqsts_dn",              0x0,    0xf8 },
 383         { "pref_rqsts_up",              0x0,    0xf0 },
 384         { "rat_stalls",                 0x0,    0xd2 },
 385 
 386         { "resource_stalls",            0x0,    0xdc },
 387         { "rs_uops_dispatched",         0x0,    0xa0 },
 388         { "seg_reg_renames",            0x0,    0xd5 },
 389 
 390         { "seg_rename_stalls",          0x0,    0xd4 },
 391         { "segment_reg_loads",          0x0,    0x06 },
 392         { "simd_assist",                0x0,    0xcd },
 393 
 394         { "simd_comp_inst_retired",     0x0,    0xca },
 395         { "simd_inst_retired",          0x0,    0xc7 },
 396         { "simd_instr_retired",         0x0,    0xce },
 397 
 398         { "simd_sat_instr_retired",     0x0,    0xcf },
 399         { "simd_sat_uop_exec",          0x0,    0xb1 },
 400         { "simd_uop_type_exec",         0x0,    0xb3 },
 401 
 402         { "simd_uops_exec",             0x0,    0xb0 },
 403         { "snoop_stall_drv",            ALL_CORES | ALL_AGENTS, 0x7e },
 404         { "sse_pre_exec",               0x0,    0x07 },
 405 
 406         { "sse_pre_miss",               0x0,    0x4b },
 407         { "store_block",                0x0,    0x04 },
 408         { "thermal_trip",               0x0,    0x3b },
 409 
 410         { "uops_retired",               0x0,    0xc2 },
 411         { "x87_ops_retired",            0x0,    0xc1 },
 412         { "",                           0x0,    NT_END }
 413 };
 414 
 415 /*
 416  * If any of the pic specific events require privileges, make sure to add a
 417  * check in configure_gpc() to find whether an event hard-coded as a number by
 418  * the user has any privilege requirements
 419  */
 420 static const struct nametable_core_uarch pic0_events[] = {
 421         /* Alphabetical order of event name */
 422 
 423         { "cycles_div_busy",            0x0,    0x14 },
 424         { "fp_comp_ops_exe",            0x0,    0x10 },
 425         { "idle_during_div",            0x0,    0x18 },
 426 
 427         { "mem_load_retired",           0x0,    0xcb },
 428         { "rs_uops_dispatched_port",    0x0,    0xa1 },
 429         { "",                           0x0,    NT_END }
 430 };
 431 
 432 static const struct nametable_core_uarch pic1_events[] = {
 433         /* Alphabetical order of event name */
 434 
 435         { "delayed_bypass",     0x0,    0x19 },
 436         { "div",                0x0,    0x13 },
 437         { "fp_assist",          0x0,    0x11 },
 438 
 439         { "mul",                0x0,    0x12 },
 440         { "",                   0x0,    NT_END }
 441 };
 442 
 443 /* FFC entries must be in order */
 444 static char *ffc_names_non_htt[] = {
 445         "instr_retired.any",
 446         "cpu_clk_unhalted.core",
 447         "cpu_clk_unhalted.ref",
 448         NULL
 449 };
 450 
 451 static char *ffc_names_htt[] = {
 452         "instr_retired.any",
 453         "cpu_clk_unhalted.thread",
 454         "cpu_clk_unhalted.ref",
 455         NULL
 456 };
 457 
 458 static char *ffc_genericnames[] = {
 459         "PAPI_tot_ins",
 460         "PAPI_tot_cyc",
 461         "",
 462         NULL
 463 };
 464 
 465 static char     **ffc_names = NULL;
 466 static char     **ffc_allnames = NULL;
 467 static char     **gpc_names = NULL;
 468 static uint32_t versionid;
 469 static uint64_t num_gpc;
 470 static uint64_t width_gpc;
 471 static uint64_t mask_gpc;
 472 static uint64_t num_ffc;
 473 static uint64_t width_ffc;
 474 static uint64_t mask_ffc;
 475 static uint_t   total_pmc;
 476 static uint64_t control_ffc;
 477 static uint64_t control_gpc;
 478 static uint64_t control_mask;
 479 static uint32_t arch_events_vector;
 480 
 481 #define IMPL_NAME_LEN 100
 482 static char core_impl_name[IMPL_NAME_LEN];
 483 
 484 static const char *core_cpuref =
 485         "See Appendix A of the \"Intel 64 and IA-32 Architectures Software" \
 486         " Developer's Manual Volume 3B: System Programming Guide, Part 2\"" \
 487         " Order Number: 253669-026US, Februrary 2008";
 488 
 489 struct events_table_t {
 490         uint8_t         eventselect;
 491         uint8_t         unitmask;
 492         uint64_t        supported_counters;
 493         const char      *name;
 494 };
 495 
 496 /* Used to describe which counters support an event */
 497 #define C(x) (1 << (x))
 498 #define C0 C(0)
 499 #define C1 C(1)
 500 #define C2 C(2)
 501 #define C3 C(3)
 502 #define C_ALL 0xFFFFFFFFFFFFFFFF
 503 
 504 /* Architectural events */
 505 #define ARCH_EVENTS_COMMON                                      \
 506         { 0xc0, 0x00, C_ALL, "inst_retired.any_p" },            \
 507         { 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" },        \
 508         { 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" },   \
 509         { 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" },        \
 510         { 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" },  \
 511         { 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" }
 512 
 513 static const struct events_table_t arch_events_table_non_htt[] = {
 514         { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.core" },
 515         ARCH_EVENTS_COMMON
 516 };
 517 
 518 static const struct events_table_t arch_events_table_htt[] = {
 519         { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" },
 520         ARCH_EVENTS_COMMON
 521 };
 522 
 523 static char *arch_genevents_table[] = {
 524         "PAPI_tot_cyc", /* cpu_clk_unhalted.thread_p/core */
 525         "PAPI_tot_ins", /* inst_retired.any_p             */
 526         "",             /* cpu_clk_unhalted.ref_p         */
 527         "",             /* longest_lat_cache.reference    */
 528         "",             /* longest_lat_cache.miss         */
 529         "",             /* br_inst_retired.all_branches   */
 530         "",             /* br_misp_retired.all_branches   */
 531 };
 532 
 533 static const struct events_table_t *arch_events_table = NULL;
 534 static uint64_t known_arch_events;
 535 static uint64_t known_ffc_num;
 536 
 537 #define GENERICEVENTS_FAM6_NHM                                                 \
 538 { 0xc4, 0x01, C0|C1|C2|C3, "PAPI_br_cn" },   /* br_inst_retired.conditional */ \
 539 { 0x1d, 0x01, C0|C1|C2|C3, "PAPI_hw_int" },  /* hw_int.rcx                  */ \
 540 { 0x17, 0x01, C0|C1|C2|C3, "PAPI_tot_iis" }, /* inst_queue_writes           */ \
 541 { 0x43, 0x01, C0|C1,       "PAPI_l1_dca" },  /* l1d_all_ref.any             */ \
 542 { 0x24, 0x03, C0|C1|C2|C3, "PAPI_l1_dcm" },  /* l2_rqsts. loads and rfos    */ \
 543 { 0x40, 0x0f, C0|C1|C2|C3, "PAPI_l1_dcr" },  /* l1d_cache_ld.mesi           */ \
 544 { 0x41, 0x0f, C0|C1|C2|C3, "PAPI_l1_dcw" },  /* l1d_cache_st.mesi           */ \
 545 { 0x80, 0x03, C0|C1|C2|C3, "PAPI_l1_ica" },  /* l1i.reads                   */ \
 546 { 0x80, 0x01, C0|C1|C2|C3, "PAPI_l1_ich" },  /* l1i.hits                    */ \
 547 { 0x80, 0x02, C0|C1|C2|C3, "PAPI_l1_icm" },  /* l1i.misses                  */ \
 548 { 0x80, 0x03, C0|C1|C2|C3, "PAPI_l1_icr" },  /* l1i.reads                   */ \
 549 { 0x24, 0x33, C0|C1|C2|C3, "PAPI_l1_ldm" },  /* l2_rqsts. loads and ifetches */\
 550 { 0x24, 0xff, C0|C1|C2|C3, "PAPI_l1_tcm" },  /* l2_rqsts.references         */ \
 551 { 0x24, 0x02, C0|C1|C2|C3, "PAPI_l2_ldm" },  /* l2_rqsts.ld_miss            */ \
 552 { 0x24, 0x08, C0|C1|C2|C3, "PAPI_l2_stm" },  /* l2_rqsts.rfo_miss           */ \
 553 { 0x24, 0x3f, C0|C1|C2|C3, "PAPI_l2_tca" },                                    \
 554                                 /* l2_rqsts. loads, rfos and ifetches */       \
 555 { 0x24, 0x15, C0|C1|C2|C3, "PAPI_l2_tch" },                                    \
 556                                 /* l2_rqsts. ld_hit, rfo_hit and ifetch_hit */ \
 557 { 0x24, 0x2a, C0|C1|C2|C3, "PAPI_l2_tcm" },                                    \
 558                         /* l2_rqsts. ld_miss, rfo_miss and ifetch_miss */      \
 559 { 0x24, 0x33, C0|C1|C2|C3, "PAPI_l2_tcr" },  /* l2_rqsts. loads and ifetches */\
 560 { 0x24, 0x0c, C0|C1|C2|C3, "PAPI_l2_tcw" },  /* l2_rqsts.rfos               */ \
 561 { 0x2e, 0x4f, C0|C1|C2|C3, "PAPI_l3_tca" },  /* l3_lat_cache.reference      */ \
 562 { 0x2e, 0x41, C0|C1|C2|C3, "PAPI_l3_tcm" },  /* l3_lat_cache.misses         */ \
 563 { 0x0b, 0x01, C0|C1|C2|C3, "PAPI_ld_ins" },  /* mem_inst_retired.loads      */ \
 564 { 0x0b, 0x03, C0|C1|C2|C3, "PAPI_lst_ins" },                                   \
 565                                 /* mem_inst_retired.loads and stores        */ \
 566 { 0x26, 0xf0, C0|C1|C2|C3, "PAPI_prf_dm" },  /* l2_data_rqsts.prefetch.mesi */ \
 567 { 0x0b, 0x02, C0|C1|C2|C3, "PAPI_sr_ins" },  /* mem_inst_retired.stores     */ \
 568 { 0x49, 0x01, C0|C1|C2|C3, "PAPI_tlb_dm" },  /* dtlb_misses.any             */ \
 569 { 0x85, 0x01, C0|C1|C2|C3, "PAPI_tlb_im" }   /* itlb_misses.any             */
 570 
 571 
 572 #define EVENTS_FAM6_NHM                                                 \
 573                                                                         \
 574 { 0x80, 0x04, C0|C1|C2|C3, "l1i.cycles_stalled" },                      \
 575 { 0x80, 0x01, C0|C1|C2|C3, "l1i.hits" },                                \
 576 { 0x80, 0x02, C0|C1|C2|C3, "l1i.misses" },                              \
 577                                                                         \
 578 { 0x80, 0x03, C0|C1|C2|C3, "l1i.reads" },                               \
 579 { 0x82, 0x01, C0|C1|C2|C3, "large_itlb.hit" },                          \
 580 { 0x87, 0x0F, C0|C1|C2|C3, "ild_stall.any" },                           \
 581                                                                         \
 582 { 0x87, 0x04, C0|C1|C2|C3, "ild_stall.iq_full" },                       \
 583 { 0x87, 0x01, C0|C1|C2|C3, "ild_stall.lcp" },                           \
 584 { 0x87, 0x02, C0|C1|C2|C3, "ild_stall.mru" },                           \
 585                                                                         \
 586 { 0x87, 0x08, C0|C1|C2|C3, "ild_stall.regen" },                         \
 587 { 0xE6, 0x02, C0|C1|C2|C3, "baclear.bad_target" },                      \
 588 { 0xE6, 0x01, C0|C1|C2|C3, "baclear.clear" },                           \
 589                                                                         \
 590 { 0xE8, 0x01, C0|C1|C2|C3, "bpu_clears.early" },                        \
 591 { 0xE8, 0x02, C0|C1|C2|C3, "bpu_clears.late" },                         \
 592 { 0xE5, 0x01, C0|C1|C2|C3, "bpu_missed_call_ret" },                     \
 593                                                                         \
 594 { 0xE0, 0x01, C0|C1|C2|C3, "br_inst_decoded" },                         \
 595 { 0x88, 0x7F, C0|C1|C2|C3, "br_inst_exec.any" },                        \
 596 { 0x88, 0x01, C0|C1|C2|C3, "br_inst_exec.cond" },                       \
 597                                                                         \
 598 { 0x88, 0x02, C0|C1|C2|C3, "br_inst_exec.direct" },                     \
 599 { 0x88, 0x10, C0|C1|C2|C3, "br_inst_exec.direct_near_call" },           \
 600 { 0x88, 0x20, C0|C1|C2|C3, "br_inst_exec.indirect_near_call" },         \
 601                                                                         \
 602 { 0x88, 0x04, C0|C1|C2|C3, "br_inst_exec.indirect_non_call" },          \
 603 { 0x88, 0x30, C0|C1|C2|C3, "br_inst_exec.near_calls" },                 \
 604 { 0x88, 0x07, C0|C1|C2|C3, "br_inst_exec.non_calls" },                  \
 605                                                                         \
 606 { 0x88, 0x08, C0|C1|C2|C3, "br_inst_exec.return_near" },                \
 607 { 0x88, 0x40, C0|C1|C2|C3, "br_inst_exec.taken" },                      \
 608 { 0x89, 0x7F, C0|C1|C2|C3, "br_misp_exec.any" },                        \
 609                                                                         \
 610 { 0x89, 0x01, C0|C1|C2|C3, "br_misp_exec.cond" },                       \
 611 { 0x89, 0x02, C0|C1|C2|C3, "br_misp_exec.direct" },                     \
 612 { 0x89, 0x10, C0|C1|C2|C3, "br_misp_exec.direct_near_call" },           \
 613                                                                         \
 614 { 0x89, 0x20, C0|C1|C2|C3, "br_misp_exec.indirect_near_call" },         \
 615 { 0x89, 0x04, C0|C1|C2|C3, "br_misp_exec.indirect_non_call" },          \
 616 { 0x89, 0x30, C0|C1|C2|C3, "br_misp_exec.near_calls" },                 \
 617                                                                         \
 618 { 0x89, 0x07, C0|C1|C2|C3, "br_misp_exec.non_calls" },                  \
 619 { 0x89, 0x08, C0|C1|C2|C3, "br_misp_exec.return_near" },                \
 620 { 0x89, 0x40, C0|C1|C2|C3, "br_misp_exec.taken" },                      \
 621                                                                         \
 622 { 0x17, 0x01, C0|C1|C2|C3, "inst_queue_writes" },                       \
 623 { 0x1E, 0x01, C0|C1|C2|C3, "inst_queue_write_cycles" },                 \
 624 { 0xA7, 0x01, C0|C1|C2|C3, "baclear_force_iq" },                        \
 625                                                                         \
 626 { 0xD0, 0x01, C0|C1|C2|C3, "macro_insts.decoded" },                     \
 627 { 0xA6, 0x01, C0|C1|C2|C3, "macro_insts.fusions_decoded" },             \
 628 { 0x19, 0x01, C0|C1|C2|C3, "two_uop_insts_decoded" },                   \
 629                                                                         \
 630 { 0x18, 0x01, C0|C1|C2|C3, "inst_decoded.dec0" },                       \
 631 { 0xD1, 0x04, C0|C1|C2|C3, "uops_decoded.esp_folding" },                \
 632 { 0xD1, 0x08, C0|C1|C2|C3, "uops_decoded.esp_sync" },                   \
 633                                                                         \
 634 { 0xD1, 0x02, C0|C1|C2|C3, "uops_decoded.ms" },                         \
 635 { 0x20, 0x01, C0|C1|C2|C3, "lsd_overflow" },                            \
 636 { 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.any" },                         \
 637                                                                         \
 638 { 0x0E, 0x02, C0|C1|C2|C3, "uops_issued.fused" },                       \
 639 { 0xA2, 0x20, C0|C1|C2|C3, "resource_stalls.fpcw" },                    \
 640 { 0xA2, 0x02, C0|C1|C2|C3, "resource_stalls.load" },                    \
 641                                                                         \
 642 { 0xA2, 0x40, C0|C1|C2|C3, "resource_stalls.mxcsr" },                   \
 643 { 0xA2, 0x04, C0|C1|C2|C3, "resource_stalls.rs_full" },                 \
 644 { 0xA2, 0x08, C0|C1|C2|C3, "resource_stalls.store" },                   \
 645                                                                         \
 646 { 0xA2, 0x01, C0|C1|C2|C3, "resource_stalls.any" },                     \
 647 { 0xD2, 0x01, C0|C1|C2|C3, "rat_stalls.flags" },                        \
 648 { 0xD2, 0x02, C0|C1|C2|C3, "rat_stalls.registers" },                    \
 649                                                                         \
 650 { 0xD2, 0x04, C0|C1|C2|C3, "rat_stalls.rob_read_port" },                \
 651 { 0xD2, 0x08, C0|C1|C2|C3, "rat_stalls.scoreboard" },                   \
 652 { 0xD2, 0x0F, C0|C1|C2|C3, "rat_stalls.any" },                          \
 653                                                                         \
 654 { 0xD4, 0x01, C0|C1|C2|C3, "seg_rename_stalls" },                       \
 655 { 0xD5, 0x01, C0|C1|C2|C3, "es_reg_renames" },                          \
 656 { 0x10, 0x02, C0|C1|C2|C3, "fp_comp_ops_exe.mmx" },                     \
 657                                                                         \
 658 { 0x10, 0x80, C0|C1|C2|C3, "fp_comp_ops_exe.sse_double_precision" },    \
 659 { 0x10, 0x04, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp" },                  \
 660 { 0x10, 0x10, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_packed" },           \
 661                                                                         \
 662 { 0x10, 0x20, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_scalar" },           \
 663 { 0x10, 0x40, C0|C1|C2|C3, "fp_comp_ops_exe.sse_single_precision" },    \
 664 { 0x10, 0x08, C0|C1|C2|C3, "fp_comp_ops_exe.sse2_integer" },            \
 665                                                                         \
 666 { 0x10, 0x01, C0|C1|C2|C3, "fp_comp_ops_exe.x87" },                     \
 667 { 0x14, 0x01, C0|C1|C2|C3, "arith.cycles_div_busy" },                   \
 668 { 0x14, 0x02, C0|C1|C2|C3, "arith.mul" },                               \
 669                                                                         \
 670 { 0x12, 0x04, C0|C1|C2|C3, "simd_int_128.pack" },                       \
 671 { 0x12, 0x20, C0|C1|C2|C3, "simd_int_128.packed_arith" },               \
 672 { 0x12, 0x10, C0|C1|C2|C3, "simd_int_128.packed_logical" },             \
 673                                                                         \
 674 { 0x12, 0x01, C0|C1|C2|C3, "simd_int_128.packed_mpy" },                 \
 675 { 0x12, 0x02, C0|C1|C2|C3, "simd_int_128.packed_shift" },               \
 676 { 0x12, 0x40, C0|C1|C2|C3, "simd_int_128.shuffle_move" },               \
 677                                                                         \
 678 { 0x12, 0x08, C0|C1|C2|C3, "simd_int_128.unpack" },                     \
 679 { 0xFD, 0x04, C0|C1|C2|C3, "simd_int_64.pack" },                        \
 680 { 0xFD, 0x20, C0|C1|C2|C3, "simd_int_64.packed_arith" },                \
 681                                                                         \
 682 { 0xFD, 0x10, C0|C1|C2|C3, "simd_int_64.packed_logical" },              \
 683 { 0xFD, 0x01, C0|C1|C2|C3, "simd_int_64.packed_mpy" },                  \
 684 { 0xFD, 0x02, C0|C1|C2|C3, "simd_int_64.packed_shift" },                \
 685                                                                         \
 686 { 0xFD, 0x40, C0|C1|C2|C3, "simd_int_64.shuffle_move" },                \
 687 { 0xFD, 0x08, C0|C1|C2|C3, "simd_int_64.unpack" },                      \
 688 { 0xB1, 0x01, C0|C1|C2|C3, "uops_executed.port0" },                     \
 689                                                                         \
 690 { 0xB1, 0x02, C0|C1|C2|C3, "uops_executed.port1" },                     \
 691 { 0x40, 0x04, C0|C1, "l1d_cache_ld.e_state" },                          \
 692 { 0x40, 0x01, C0|C1, "l1d_cache_ld.i_state" },                          \
 693                                                                         \
 694 { 0x40, 0x08, C0|C1, "l1d_cache_ld.m_state" },                          \
 695 { 0x40, 0x0F, C0|C1, "l1d_cache_ld.mesi" },                             \
 696 { 0x40, 0x02, C0|C1, "l1d_cache_ld.s_state" },                          \
 697                                                                         \
 698 { 0x41, 0x04, C0|C1, "l1d_cache_st.e_state" },                          \
 699 { 0x41, 0x08, C0|C1, "l1d_cache_st.m_state" },                          \
 700 { 0x41, 0x0F, C0|C1, "l1d_cache_st.mesi" },                             \
 701                                                                         \
 702 { 0x41, 0x02, C0|C1, "l1d_cache_st.s_state" },                          \
 703 { 0x42, 0x04, C0|C1, "l1d_cache_lock.e_state" },                        \
 704 { 0x42, 0x01, C0|C1, "l1d_cache_lock.hit" },                            \
 705                                                                         \
 706 { 0x42, 0x08, C0|C1, "l1d_cache_lock.m_state" },                        \
 707 { 0x42, 0x02, C0|C1, "l1d_cache_lock.s_state" },                        \
 708 { 0x43, 0x01, C0|C1, "l1d_all_ref.any" },                               \
 709                                                                         \
 710 { 0x43, 0x02, C0|C1, "l1d_all_ref.cacheable" },                         \
 711 { 0x4B, 0x01, C0|C1, "mmx2_mem_exec.nta" },                             \
 712 { 0x4C, 0x01, C0|C1, "load_hit_pre" },                                  \
 713                                                                         \
 714 { 0x4E, 0x02, C0|C1, "l1d_prefetch.miss" },                             \
 715 { 0x4E, 0x01, C0|C1, "l1d_prefetch.requests" },                         \
 716 { 0x4E, 0x04, C0|C1, "l1d_prefetch.triggers" },                         \
 717                                                                         \
 718 { 0x51, 0x04, C0|C1, "l1d.m_evict" },                                   \
 719 { 0x51, 0x02, C0|C1, "l1d.m_repl" },                                    \
 720 { 0x51, 0x08, C0|C1, "l1d.m_snoop_evict" },                             \
 721                                                                         \
 722 { 0x51, 0x01, C0|C1, "l1d.repl" },                                      \
 723 { 0x52, 0x01, C0|C1, "l1d_cache_prefetch_lock_fb_hit" },                \
 724 { 0x53, 0x01, C0|C1, "l1d_cache_lock_fb_hit" },                         \
 725                                                                         \
 726 { 0x63, 0x02, C0|C1, "cache_lock_cycles.l1d" },                         \
 727 { 0x63, 0x01, C0|C1, "cache_lock_cycles.l1d_l2" },                      \
 728 { 0x06, 0x04, C0|C1|C2|C3, "store_blocks.at_ret" },                     \
 729                                                                         \
 730 { 0x06, 0x08, C0|C1|C2|C3, "store_blocks.l1d_block" },                  \
 731 { 0x06, 0x01, C0|C1|C2|C3, "store_blocks.not_sta" },                    \
 732 { 0x06, 0x02, C0|C1|C2|C3, "store_blocks.sta" },                        \
 733                                                                         \
 734 { 0x13, 0x07, C0|C1|C2|C3, "load_dispatch.any" },                       \
 735 { 0x13, 0x04, C0|C1|C2|C3, "load_dispatch.mob" },                       \
 736 { 0x13, 0x01, C0|C1|C2|C3, "load_dispatch.rs" },                        \
 737                                                                         \
 738 { 0x13, 0x02, C0|C1|C2|C3, "load_dispatch.rs_delayed" },                \
 739 { 0x08, 0x01, C0|C1|C2|C3, "dtlb_load_misses.any" },                    \
 740 { 0x08, 0x20, C0|C1|C2|C3, "dtlb_load_misses.pde_miss" },               \
 741                                                                         \
 742 { 0x08, 0x02, C0|C1|C2|C3, "dtlb_load_misses.walk_completed" },         \
 743 { 0x49, 0x01, C0|C1|C2|C3, "dtlb_misses.any" },                         \
 744 { 0x49, 0x10, C0|C1|C2|C3, "dtlb_misses.stlb_hit" },                    \
 745                                                                         \
 746 { 0x49, 0x02, C0|C1|C2|C3, "dtlb_misses.walk_completed" },              \
 747 { 0x4F, 0x02, C0|C1|C2|C3, "ept.epde_miss" },                           \
 748 { 0x4F, 0x08, C0|C1|C2|C3, "ept.epdpe_miss" },                          \
 749                                                                         \
 750 { 0x85, 0x01, C0|C1|C2|C3, "itlb_misses.any" },                         \
 751 { 0x85, 0x02, C0|C1|C2|C3, "itlb_misses.walk_completed" },              \
 752 { 0x24, 0xAA, C0|C1|C2|C3, "l2_rqsts.miss" },                           \
 753                                                                         \
 754 { 0x24, 0xFF, C0|C1|C2|C3, "l2_rqsts.references" },                     \
 755 { 0x24, 0x10, C0|C1|C2|C3, "l2_rqsts.ifetch_hit" },                     \
 756 { 0x24, 0x20, C0|C1|C2|C3, "l2_rqsts.ifetch_miss" },                    \
 757                                                                         \
 758 { 0x24, 0x30, C0|C1|C2|C3, "l2_rqsts.ifetches" },                       \
 759 { 0x24, 0x01, C0|C1|C2|C3, "l2_rqsts.ld_hit" },                         \
 760 { 0x24, 0x02, C0|C1|C2|C3, "l2_rqsts.ld_miss" },                        \
 761                                                                         \
 762 { 0x24, 0x03, C0|C1|C2|C3, "l2_rqsts.loads" },                          \
 763 { 0x24, 0x40, C0|C1|C2|C3, "l2_rqsts.prefetch_hit" },                   \
 764 { 0x24, 0x80, C0|C1|C2|C3, "l2_rqsts.prefetch_miss" },                  \
 765                                                                         \
 766 { 0x24, 0xC0, C0|C1|C2|C3, "l2_rqsts.prefetches" },                     \
 767 { 0x24, 0x04, C0|C1|C2|C3, "l2_rqsts.rfo_hit" },                        \
 768 { 0x24, 0x08, C0|C1|C2|C3, "l2_rqsts.rfo_miss" },                       \
 769                                                                         \
 770 { 0x24, 0x0C, C0|C1|C2|C3, "l2_rqsts.rfos" },                           \
 771 { 0x26, 0xFF, C0|C1|C2|C3, "l2_data_rqsts.any" },                       \
 772 { 0x26, 0x04, C0|C1|C2|C3, "l2_data_rqsts.demand.e_state" },            \
 773                                                                         \
 774 { 0x26, 0x01, C0|C1|C2|C3, "l2_data_rqsts.demand.i_state" },            \
 775 { 0x26, 0x08, C0|C1|C2|C3, "l2_data_rqsts.demand.m_state" },            \
 776 { 0x26, 0x0F, C0|C1|C2|C3, "l2_data_rqsts.demand.mesi" },               \
 777                                                                         \
 778 { 0x26, 0x02, C0|C1|C2|C3, "l2_data_rqsts.demand.s_state" },            \
 779 { 0x26, 0x40, C0|C1|C2|C3, "l2_data_rqsts.prefetch.e_state" },          \
 780 { 0x26, 0x10, C0|C1|C2|C3, "l2_data_rqsts.prefetch.i_state" },          \
 781                                                                         \
 782 { 0x26, 0x80, C0|C1|C2|C3, "l2_data_rqsts.prefetch.m_state" },          \
 783 { 0x26, 0xF0, C0|C1|C2|C3, "l2_data_rqsts.prefetch.mesi" },             \
 784 { 0x26, 0x20, C0|C1|C2|C3, "l2_data_rqsts.prefetch.s_state" },          \
 785                                                                         \
 786 { 0x27, 0x40, C0|C1|C2|C3, "l2_write.lock.e_state" },                   \
 787 { 0x27, 0x10, C0|C1|C2|C3, "l2_write.lock.i_state" },                   \
 788 { 0x27, 0x20, C0|C1|C2|C3, "l2_write.lock.s_state" },                   \
 789                                                                         \
 790 { 0x27, 0x0E, C0|C1|C2|C3, "l2_write.rfo.hit" },                        \
 791 { 0x27, 0x01, C0|C1|C2|C3, "l2_write.rfo.i_state" },                    \
 792 { 0x27, 0x08, C0|C1|C2|C3, "l2_write.rfo.m_state" },                    \
 793                                                                         \
 794 { 0x27, 0x0F, C0|C1|C2|C3, "l2_write.rfo.mesi" },                       \
 795 { 0x27, 0x02, C0|C1|C2|C3, "l2_write.rfo.s_state" },                    \
 796 { 0x28, 0x04, C0|C1|C2|C3, "l1d_wb_l2.e_state" },                       \
 797                                                                         \
 798 { 0x28, 0x01, C0|C1|C2|C3, "l1d_wb_l2.i_state" },                       \
 799 { 0x28, 0x08, C0|C1|C2|C3, "l1d_wb_l2.m_state" },                       \
 800 { 0xF0, 0x80, C0|C1|C2|C3, "l2_transactions.any" },                     \
 801                                                                         \
 802 { 0xF0, 0x20, C0|C1|C2|C3, "l2_transactions.fill" },                    \
 803 { 0xF0, 0x04, C0|C1|C2|C3, "l2_transactions.ifetch" },                  \
 804 { 0xF0, 0x10, C0|C1|C2|C3, "l2_transactions.l1d_wb" },                  \
 805                                                                         \
 806 { 0xF0, 0x01, C0|C1|C2|C3, "l2_transactions.load" },                    \
 807 { 0xF0, 0x08, C0|C1|C2|C3, "l2_transactions.prefetch" },                \
 808 { 0xF0, 0x02, C0|C1|C2|C3, "l2_transactions.rfo" },                     \
 809                                                                         \
 810 { 0xF0, 0x40, C0|C1|C2|C3, "l2_transactions.wb" },                      \
 811 { 0xF1, 0x07, C0|C1|C2|C3, "l2_lines_in.any" },                         \
 812 { 0xF1, 0x04, C0|C1|C2|C3, "l2_lines_in.e_state" },                     \
 813                                                                         \
 814 { 0xF1, 0x02, C0|C1|C2|C3, "l2_lines_in.s_state" },                     \
 815 { 0xF2, 0x0F, C0|C1|C2|C3, "l2_lines_out.any" },                        \
 816 { 0xF2, 0x01, C0|C1|C2|C3, "l2_lines_out.demand_clean" },               \
 817                                                                         \
 818 { 0xF2, 0x02, C0|C1|C2|C3, "l2_lines_out.demand_dirty" },               \
 819 { 0xF2, 0x04, C0|C1|C2|C3, "l2_lines_out.prefetch_clean" },             \
 820 { 0x6C, 0x01, C0|C1|C2|C3, "io_transactions" },                         \
 821                                                                         \
 822 { 0xB0, 0x80, C0|C1|C2|C3, "offcore_requests.any" },                    \
 823 { 0xB0, 0x10, C0|C1|C2|C3, "offcore_requests.any.rfo" },                \
 824 { 0xB0, 0x40, C0|C1|C2|C3, "offcore_requests.l1d_writeback" },          \
 825                                                                         \
 826 { 0xB8, 0x01, C0|C1|C2|C3, "snoop_response.hit" },                      \
 827 { 0xB8, 0x02, C0|C1|C2|C3, "snoop_response.hite" },                     \
 828 { 0xB8, 0x04, C0|C1|C2|C3, "snoop_response.hitm" },                     \
 829                                                                         \
 830 { 0xF4, 0x10, C0|C1|C2|C3, "sq_misc.split_lock" },                      \
 831 { 0x0B, 0x01, C0|C1|C2|C3, "mem_inst_retired.loads" },                  \
 832 { 0x0B, 0x02, C0|C1|C2|C3, "mem_inst_retired.stores" },                 \
 833                                                                         \
 834 { 0xC0, 0x04, C0|C1|C2|C3, "inst_retired.mmx" },                        \
 835 { 0xC0, 0x02, C0|C1|C2|C3, "inst_retired.x87" },                        \
 836 { 0xC7, 0x04, C0|C1|C2|C3, "ssex_uops_retired.packed_double" },         \
 837                                                                         \
 838 { 0xC7, 0x01, C0|C1|C2|C3, "ssex_uops_retired.packed_single" },         \
 839 { 0xC7, 0x08, C0|C1|C2|C3, "ssex_uops_retired.scalar_double" },         \
 840 { 0xC7, 0x02, C0|C1|C2|C3, "ssex_uops_retired.scalar_single" },         \
 841                                                                         \
 842 { 0xC7, 0x10, C0|C1|C2|C3, "ssex_uops_retired.vector_integer" },        \
 843 { 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.any" },                        \
 844 { 0xC2, 0x04, C0|C1|C2|C3, "uops_retired.macro_fused" },                \
 845                                                                         \
 846 { 0xC8, 0x20, C0|C1|C2|C3, "itlb_miss_retired" },                       \
 847 { 0xCB, 0x80, C0|C1|C2|C3, "mem_load_retired.dtlb_miss" },              \
 848 { 0xCB, 0x40, C0|C1|C2|C3, "mem_load_retired.hit_lfb" },                \
 849                                                                         \
 850 { 0xCB, 0x01, C0|C1|C2|C3, "mem_load_retired.l1d_hit" },                \
 851 { 0xCB, 0x02, C0|C1|C2|C3, "mem_load_retired.l2_hit" },                 \
 852 { 0xCB, 0x10, C0|C1|C2|C3, "mem_load_retired.llc_miss" },               \
 853                                                                         \
 854 { 0xCB, 0x04, C0|C1|C2|C3, "mem_load_retired.llc_unshared_hit" },       \
 855 { 0xCB, 0x08, C0|C1|C2|C3, "mem_load_retired.other_core_l2_hit_hitm" }, \
 856 { 0x0F, 0x02, C0|C1|C2|C3, "mem_uncore_retired.other_core_l2_hitm" },   \
 857                                                                         \
 858 { 0x0F, 0x08, C0|C1|C2|C3, "mem_uncore_retired.remote_cache_local_home_hit" },\
 859 { 0x0F, 0x10, C0|C1|C2|C3, "mem_uncore_retired.remote_dram" },          \
 860 { 0x0F, 0x20, C0|C1|C2|C3, "mem_uncore_retired.local_dram" },           \
 861                                                                         \
 862 { 0x0C, 0x01, C0|C1|C2|C3, "mem_store_retired.dtlb_miss" },             \
 863 { 0xC4, 0x01, C0|C1|C2|C3, "br_inst_retired.conditional" },             \
 864 { 0xC4, 0x02, C0|C1|C2|C3, "br_inst_retired.near_call" },               \
 865                                                                         \
 866 { 0xC5, 0x02, C0|C1|C2|C3, "br_misp_retired.near_call" },               \
 867 { 0xDB, 0x01, C0|C1|C2|C3, "uop_unfusion" },                            \
 868 { 0xF7, 0x01, C0|C1|C2|C3, "fp_assist.all" },                           \
 869                                                                         \
 870 { 0xF7, 0x04, C0|C1|C2|C3, "fp_assist.input" },                         \
 871 { 0xF7, 0x02, C0|C1|C2|C3, "fp_assist.output" },                        \
 872 { 0xCC, 0x03, C0|C1|C2|C3, "fp_mmx_trans.any" },                        \
 873                                                                         \
 874 { 0xCC, 0x01, C0|C1|C2|C3, "fp_mmx_trans.to_fp" },                      \
 875 { 0xCC, 0x02, C0|C1|C2|C3, "fp_mmx_trans.to_mmx" },                     \
 876 { 0xC3, 0x04, C0|C1|C2|C3, "machine_clears.smc" }
 877 
 878 #define GENERICEVENTS_FAM6_MOD28                                               \
 879 { 0xc4, 0x00, C0|C1, "PAPI_br_ins" },   /* br_inst_retired.any */              \
 880 { 0xc5, 0x00, C0|C1, "PAPI_br_msp" },   /* br_inst_retired.mispred */          \
 881 { 0xc4, 0x03, C0|C1, "PAPI_br_ntk" },                                          \
 882                         /* br_inst_retired.pred_not_taken|mispred_not_taken */ \
 883 { 0xc4, 0x05, C0|C1, "PAPI_br_prc" },                                          \
 884                         /* br_inst_retired.pred_not_taken|pred_taken */        \
 885 { 0xc8, 0x00, C0|C1, "PAPI_hw_int" },   /* hw_int_rcv */                       \
 886 { 0xaa, 0x03, C0|C1, "PAPI_tot_iis" },  /* macro_insts.all_decoded */          \
 887 { 0x40, 0x23, C0|C1, "PAPI_l1_dca" },   /* l1d_cache.l1|st */                  \
 888 { 0x2a, 0x41, C0|C1, "PAPI_l2_stm" },   /* l2_st.self.i_state */               \
 889 { 0x2e, 0x4f, C0|C1, "PAPI_l2_tca" },   /* longest_lat_cache.reference */      \
 890 { 0x2e, 0x4e, C0|C1, "PAPI_l2_tch" },   /* l2_rqsts.mes */                     \
 891 { 0x2e, 0x41, C0|C1, "PAPI_l2_tcm" },   /* longest_lat_cache.miss */           \
 892 { 0x2a, 0x4f, C0|C1, "PAPI_l2_tcw" },   /* l2_st.self.mesi */                  \
 893 { 0x08, 0x07, C0|C1, "PAPI_tlb_dm" },   /* data_tlb_misses.dtlb.miss */        \
 894 { 0x82, 0x02, C0|C1, "PAPI_tlb_im" }    /* itlb.misses */
 895 
 896 
 897 #define EVENTS_FAM6_MOD28                                               \
 898         { 0x2,  0x81, C0|C1, "store_forwards.good" },                   \
 899         { 0x6,  0x0,  C0|C1, "segment_reg_loads.any" },                 \
 900         { 0x7,  0x1,  C0|C1, "prefetch.prefetcht0" },                   \
 901         { 0x7,  0x6,  C0|C1, "prefetch.sw_l2" },                        \
 902         { 0x7,  0x8,  C0|C1, "prefetch.prefetchnta" },                  \
 903         { 0x8,  0x7,  C0|C1, "data_tlb_misses.dtlb_miss" },             \
 904         { 0x8,  0x5,  C0|C1, "data_tlb_misses.dtlb_miss_ld" },          \
 905         { 0x8,  0x9,  C0|C1, "data_tlb_misses.l0_dtlb_miss_ld" },       \
 906         { 0x8,  0x6,  C0|C1, "data_tlb_misses.dtlb_miss_st" },          \
 907         { 0xC,  0x3,  C0|C1, "page_walks.cycles" },                     \
 908         { 0x10, 0x1,  C0|C1, "x87_comp_ops_exe.any.s" },                \
 909         { 0x10, 0x81, C0|C1, "x87_comp_ops_exe.any.ar" },               \
 910         { 0x11, 0x1,  C0|C1, "fp_assist" },                             \
 911         { 0x11, 0x81, C0|C1, "fp_assist.ar" },                          \
 912         { 0x12, 0x1,  C0|C1, "mul.s" },                                 \
 913         { 0x12, 0x81, C0|C1, "mul.ar" },                                \
 914         { 0x13, 0x1,  C0|C1, "div.s" },                                 \
 915         { 0x13, 0x81, C0|C1, "div.ar" },                                \
 916         { 0x14, 0x1,  C0|C1, "cycles_div_busy" },                       \
 917         { 0x21, 0x0,  C0|C1, "l2_ads" },                                \
 918         { 0x22, 0x0,  C0|C1, "l2_dbus_busy" },                          \
 919         { 0x24, 0x0,  C0|C1, "l2_lines_in" },                           \
 920         { 0x25, 0x0,  C0|C1, "l2_m_lines_in" },                         \
 921         { 0x26, 0x0,  C0|C1, "l2_lines_out" },                          \
 922         { 0x27, 0x0,  C0|C1, "l2_m_lines_out" },                        \
 923         { 0x28, 0x0,  C0|C1, "l2_ifetch" },                             \
 924         { 0x29, 0x0,  C0|C1, "l2_ld" },                                 \
 925         { 0x2A, 0x0,  C0|C1, "l2_st" },                                 \
 926         { 0x2B, 0x0,  C0|C1, "l2_lock" },                               \
 927         { 0x2E, 0x0,  C0|C1, "l2_rqsts" },                              \
 928         { 0x2E, 0x41, C0|C1, "l2_rqsts.self.demand.i_state" },          \
 929         { 0x2E, 0x4F, C0|C1, "l2_rqsts.self.demand.mesi" },             \
 930         { 0x30, 0x0,  C0|C1, "l2_reject_bus_q" },                       \
 931         { 0x32, 0x0,  C0|C1, "l2_no_req" },                             \
 932         { 0x3A, 0x0,  C0|C1, "eist_trans" },                            \
 933         { 0x3B, 0xC0, C0|C1, "thermal_trip" },                          \
 934         { 0x3C, 0x0,  C0|C1, "cpu_clk_unhalted.core_p" },               \
 935         { 0x3C, 0x1,  C0|C1, "cpu_clk_unhalted.bus" },                  \
 936         { 0x3C, 0x2,  C0|C1, "cpu_clk_unhalted.no_other" },             \
 937         { 0x40, 0x21, C0|C1, "l1d_cache.ld" },                          \
 938         { 0x40, 0x22, C0|C1, "l1d_cache.st" },                          \
 939         { 0x60, 0x0,  C0|C1, "bus_request_outstanding" },               \
 940         { 0x61, 0x0,  C0|C1, "bus_bnr_drv" },                           \
 941         { 0x62, 0x0,  C0|C1, "bus_drdy_clocks" },                       \
 942         { 0x63, 0x0,  C0|C1, "bus_lock_clocks" },                       \
 943         { 0x64, 0x0,  C0|C1, "bus_data_rcv" },                          \
 944         { 0x65, 0x0,  C0|C1, "bus_trans_brd" },                         \
 945         { 0x66, 0x0,  C0|C1, "bus_trans_rfo" },                         \
 946         { 0x67, 0x0,  C0|C1, "bus_trans_wb" },                          \
 947         { 0x68, 0x0,  C0|C1, "bus_trans_ifetch" },                      \
 948         { 0x69, 0x0,  C0|C1, "bus_trans_inval" },                       \
 949         { 0x6A, 0x0,  C0|C1, "bus_trans_pwr" },                         \
 950         { 0x6B, 0x0,  C0|C1, "bus_trans_p" },                           \
 951         { 0x6C, 0x0,  C0|C1, "bus_trans_io" },                          \
 952         { 0x6D, 0x0,  C0|C1, "bus_trans_def" },                         \
 953         { 0x6E, 0x0,  C0|C1, "bus_trans_burst" },                       \
 954         { 0x6F, 0x0,  C0|C1, "bus_trans_mem" },                         \
 955         { 0x70, 0x0,  C0|C1, "bus_trans_any" },                         \
 956         { 0x77, 0x0,  C0|C1, "ext_snoop" },                             \
 957         { 0x7A, 0x0,  C0|C1, "bus_hit_drv" },                           \
 958         { 0x7B, 0x0,  C0|C1, "bus_hitm_drv" },                          \
 959         { 0x7D, 0x0,  C0|C1, "busq_empty" },                            \
 960         { 0x7E, 0x0,  C0|C1, "snoop_stall_drv" },                       \
 961         { 0x7F, 0x0,  C0|C1, "bus_io_wait" },                           \
 962         { 0x80, 0x3,  C0|C1, "icache.accesses" },                       \
 963         { 0x80, 0x2,  C0|C1, "icache.misses" },                         \
 964         { 0x82, 0x4,  C0|C1, "itlb.flush" },                            \
 965         { 0x82, 0x2,  C0|C1, "itlb.misses" },                           \
 966         { 0xAA, 0x2,  C0|C1, "macro_insts.cisc_decoded" },              \
 967         { 0xAA, 0x3,  C0|C1, "macro_insts.all_decoded" },               \
 968         { 0xB0, 0x0,  C0|C1, "simd_uops_exec.s" },                      \
 969         { 0xB0, 0x80, C0|C1, "simd_uops_exec.ar" },                     \
 970         { 0xB1, 0x0,  C0|C1, "simd_sat_uop_exec.s" },                   \
 971         { 0xB1, 0x80, C0|C1, "simd_sat_uop_exec.ar" },                  \
 972         { 0xB3, 0x1,  C0|C1, "simd_uop_type_exec.mul.s" },              \
 973         { 0xB3, 0x81, C0|C1, "simd_uop_type_exec.mul.ar" },             \
 974         { 0xB3, 0x02, C0|C1, "simd_uop_type_exec.shift.s" },            \
 975         { 0xB3, 0x82, C0|C1, "simd_uop_type_exec.shift.ar" },           \
 976         { 0xB3, 0x04, C0|C1, "simd_uop_type_exec.pack.s" },             \
 977         { 0xB3, 0x84, C0|C1, "simd_uop_type_exec.pack.ar" },            \
 978         { 0xB3, 0x08, C0|C1, "simd_uop_type_exec.unpack.s" },           \
 979         { 0xB3, 0x88, C0|C1, "simd_uop_type_exec.unpack.ar" },          \
 980         { 0xB3, 0x10, C0|C1, "simd_uop_type_exec.logical.s" },          \
 981         { 0xB3, 0x90, C0|C1, "simd_uop_type_exec.logical.ar" },         \
 982         { 0xB3, 0x20, C0|C1, "simd_uop_type_exec.arithmetic.s" },       \
 983         { 0xB3, 0xA0, C0|C1, "simd_uop_type_exec.arithmetic.ar" },      \
 984         { 0xC2, 0x10, C0|C1, "uops_retired.any" },                      \
 985         { 0xC3, 0x1,  C0|C1, "machine_clears.smc" },                    \
 986         { 0xC4, 0x0,  C0|C1, "br_inst_retired.any" },                   \
 987         { 0xC4, 0x1,  C0|C1, "br_inst_retired.pred_not_taken" },        \
 988         { 0xC4, 0x2,  C0|C1, "br_inst_retired.mispred_not_taken" },     \
 989         { 0xC4, 0x4,  C0|C1, "br_inst_retired.pred_taken" },            \
 990         { 0xC4, 0x8,  C0|C1, "br_inst_retired.mispred_taken" },         \
 991         { 0xC4, 0xA,  C0|C1, "br_inst_retired.mispred" },               \
 992         { 0xC4, 0xC,  C0|C1, "br_inst_retired.taken" },                 \
 993         { 0xC4, 0xF,  C0|C1, "br_inst_retired.any1" },                  \
 994         { 0xC6, 0x1,  C0|C1, "cycles_int_masked.cycles_int_masked" },   \
 995         { 0xC6, 0x2,  C0|C1,                                            \
 996                 "cycles_int_masked.cycles_int_pending_and_masked" },    \
 997         { 0xC7, 0x1,  C0|C1, "simd_inst_retired.packed_single" },       \
 998         { 0xC7, 0x2,  C0|C1, "simd_inst_retired.scalar_single" },       \
 999         { 0xC7, 0x4,  C0|C1, "simd_inst_retired.packed_double" },       \
1000         { 0xC7, 0x8,  C0|C1, "simd_inst_retired.scalar_double" },       \
1001         { 0xC7, 0x10, C0|C1, "simd_inst_retired.vector" },              \
1002         { 0xC7, 0x1F, C0|C1, "simd_inst_retired.any" },                 \
1003         { 0xC8, 0x00, C0|C1, "hw_int_rcv" },                            \
1004         { 0xCA, 0x1,  C0|C1, "simd_comp_inst_retired.packed_single" },  \
1005         { 0xCA, 0x2,  C0|C1, "simd_comp_inst_retired.scalar_single" },  \
1006         { 0xCA, 0x4,  C0|C1, "simd_comp_inst_retired.packed_double" },  \
1007         { 0xCA, 0x8,  C0|C1, "simd_comp_inst_retired.scalar_double" },  \
1008         { 0xCB, 0x1,  C0|C1, "mem_load_retired.l2_hit" },               \
1009         { 0xCB, 0x2,  C0|C1, "mem_load_retired.l2_miss" },              \
1010         { 0xCB, 0x4,  C0|C1, "mem_load_retired.dtlb_miss" },            \
1011         { 0xCD, 0x0,  C0|C1, "simd_assist" },                           \
1012         { 0xCE, 0x0,  C0|C1, "simd_instr_retired" },                    \
1013         { 0xCF, 0x0,  C0|C1, "simd_sat_instr_retired" },                \
1014         { 0xE0, 0x1,  C0|C1, "br_inst_decoded" },                       \
1015         { 0xE4, 0x1,  C0|C1, "bogus_br" },                              \
1016         { 0xE6, 0x1,  C0|C1, "baclears.any" }
1017 
1018 static const struct events_table_t *events_table = NULL;
1019 
1020 const struct events_table_t events_fam6_nhm[] = {
1021         GENERICEVENTS_FAM6_NHM,
1022         EVENTS_FAM6_NHM,
1023         { NT_END, 0, 0, "" }
1024 };
1025 
1026 const struct events_table_t events_fam6_mod28[] = {
1027         GENERICEVENTS_FAM6_MOD28,
1028         EVENTS_FAM6_MOD28,
1029         { NT_END, 0, 0, "" }
1030 };
1031 
1032 /*
1033  * Initialize string containing list of supported general-purpose counter
1034  * events for processors of Penryn and Merom Family
1035  */
1036 static void
1037 pcbe_init_core_uarch()
1038 {
1039         const struct nametable_core_uarch       *n;
1040         const struct generic_events             *k;
1041         const struct nametable_core_uarch       *picspecific_events;
1042         const struct generic_events             *picspecific_genericevents;
1043         size_t                  common_size;
1044         size_t                  size;
1045         uint64_t                i;
1046 
1047         gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
1048 
1049         /* Calculate space needed to save all the common event names */
1050         common_size = 0;
1051         for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END; n++) {
1052                 common_size += strlen(n->name) + 1;
1053         }
1054 
1055         for (k = cmn_generic_events; k->event_num != NT_END; k++) {
1056                 common_size += strlen(k->name) + 1;
1057         }
1058 
1059         for (i = 0; i < num_gpc; i++) {
1060                 size = 0;
1061                 picspecific_genericevents = NULL;
1062 
1063                 switch (i) {
1064                         case 0:
1065                                 picspecific_events = pic0_events;
1066                                 picspecific_genericevents = generic_events_pic0;
1067                                 break;
1068                         case 1:
1069                                 picspecific_events = pic1_events;
1070                                 break;
1071                         default:
1072                                 picspecific_events = NULL;
1073                                 break;
1074                 }
1075                 if (picspecific_events != NULL) {
1076                         for (n = picspecific_events;
1077                             n->event_num != NT_END;
1078                             n++) {
1079                                 size += strlen(n->name) + 1;
1080                         }
1081                 }
1082                 if (picspecific_genericevents != NULL) {
1083                         for (k = picspecific_genericevents;
1084                             k->event_num != NT_END; k++) {
1085                                 size += strlen(k->name) + 1;
1086                         }
1087                 }
1088 
1089                 gpc_names[i] =
1090                     kmem_alloc(size + common_size + 1, KM_SLEEP);
1091 
1092                 gpc_names[i][0] = '\0';
1093                 if (picspecific_events != NULL) {
1094                         for (n = picspecific_events;
1095                             n->event_num != NT_END; n++) {
1096                                 (void) strcat(gpc_names[i], n->name);
1097                                 (void) strcat(gpc_names[i], ",");
1098                         }
1099                 }
1100                 if (picspecific_genericevents != NULL) {
1101                         for (k = picspecific_genericevents;
1102                             k->event_num != NT_END; k++) {
1103                                 (void) strcat(gpc_names[i], k->name);
1104                                 (void) strcat(gpc_names[i], ",");
1105                         }
1106                 }
1107                 for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END;
1108                     n++) {
1109                         (void) strcat(gpc_names[i], n->name);
1110                         (void) strcat(gpc_names[i], ",");
1111                 }
1112                 for (k = cmn_generic_events; k->event_num != NT_END; k++) {
1113                         (void) strcat(gpc_names[i], k->name);
1114                         (void) strcat(gpc_names[i], ",");
1115                 }
1116 
1117                 /*
1118                  * Remove trailing comma.
1119                  */
1120                 gpc_names[i][common_size + size - 1] = '\0';
1121         }
1122 }
1123 
1124 static int
1125 core_pcbe_init(void)
1126 {
1127         struct cpuid_regs       cp;
1128         size_t                  size;
1129         uint64_t                i;
1130         uint64_t                j;
1131         uint64_t                arch_events_vector_length;
1132         size_t                  arch_events_string_length;
1133         uint_t                  model;
1134 
1135         if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
1136                 return (-1);
1137 
1138         /* Obtain Basic CPUID information */
1139         cp.cp_eax = 0x0;
1140         (void) __cpuid_insn(&cp);
1141 
1142         /* No Architectural Performance Monitoring Leaf returned by CPUID */
1143         if (cp.cp_eax < 0xa) {
1144                 return (-1);
1145         }
1146 
1147         /* Obtain the Architectural Performance Monitoring Leaf */
1148         cp.cp_eax = 0xa;
1149         (void) __cpuid_insn(&cp);
1150 
1151         versionid = cp.cp_eax & 0xFF;
1152 
1153         /*
1154          * Fixed-Function Counters (FFC)
1155          *
1156          * All Family 6 Model 15 and Model 23 processors have fixed-function
1157          * counters.  These counters were made Architectural with
1158          * Family 6 Model 15 Stepping 9.
1159          */
1160         switch (versionid) {
1161 
1162                 case 0:
1163                         return (-1);
1164 
1165                 case 2:
1166                         num_ffc = cp.cp_edx & 0x1F;
1167                         width_ffc = (cp.cp_edx >> 5) & 0xFF;
1168 
1169                         /*
1170                          * Some processors have an errata (AW34) where
1171                          * versionid is reported as 2 when actually 1.
1172                          * In this case, fixed-function counters are
1173                          * model-specific as in Version 1.
1174                          */
1175                         if (num_ffc != 0) {
1176                                 break;
1177                         }
1178                         /* FALLTHROUGH */
1179                 case 1:
1180                         num_ffc = 3;
1181                         width_ffc = 40;
1182                         versionid = 1;
1183                         break;
1184 
1185                 default:
1186                         num_ffc = cp.cp_edx & 0x1F;
1187                         width_ffc = (cp.cp_edx >> 5) & 0xFF;
1188                         break;
1189         }
1190 
1191 
1192         if (num_ffc >= 64)
1193                 return (-1);
1194 
1195         /* Set HTT-specific names of architectural & FFC events */
1196         if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
1197                 ffc_names = ffc_names_htt;
1198                 arch_events_table = arch_events_table_htt;
1199                 known_arch_events =
1200                     sizeof (arch_events_table_htt) /
1201                     sizeof (struct events_table_t);
1202                 known_ffc_num =
1203                     sizeof (ffc_names_htt) / sizeof (char *);
1204         } else {
1205                 ffc_names = ffc_names_non_htt;
1206                 arch_events_table = arch_events_table_non_htt;
1207                 known_arch_events =
1208                     sizeof (arch_events_table_non_htt) /
1209                     sizeof (struct events_table_t);
1210                 known_ffc_num =
1211                     sizeof (ffc_names_non_htt) / sizeof (char *);
1212         }
1213 
1214         if (num_ffc >= known_ffc_num) {
1215                 /*
1216                  * The system seems to have more fixed-function counters than
1217                  * what this PCBE is able to handle correctly.  Default to the
1218                  * maximum number of fixed-function counters that this driver
1219                  * is aware of.
1220                  */
1221                 num_ffc = known_ffc_num - 1;
1222         }
1223 
1224         mask_ffc = BITMASK_XBITS(width_ffc);
1225         control_ffc = BITMASK_XBITS(num_ffc);
1226 
1227         /*
1228          * General Purpose Counters (GPC)
1229          */
1230         num_gpc = (cp.cp_eax >> 8) & 0xFF;
1231         width_gpc = (cp.cp_eax >> 16) & 0xFF;
1232 
1233         if (num_gpc >= 64)
1234                 return (-1);
1235 
1236         mask_gpc = BITMASK_XBITS(width_gpc);
1237 
1238         control_gpc = BITMASK_XBITS(num_gpc);
1239 
1240         control_mask = (control_ffc << 32) | control_gpc;
1241 
1242         total_pmc = num_gpc + num_ffc;
1243         if (total_pmc > 64) {
1244                 /* Too wide for the overflow bitmap */
1245                 return (-1);
1246         }
1247 
1248         /* FFC names */
1249         ffc_allnames = kmem_alloc(num_ffc * sizeof (char *), KM_SLEEP);
1250         for (i = 0; i < num_ffc; i++) {
1251                 ffc_allnames[i] = kmem_alloc(
1252                     strlen(ffc_names[i]) + strlen(ffc_genericnames[i]) + 2,
1253                     KM_SLEEP);
1254 
1255                 ffc_allnames[i][0] = '\0';
1256                 (void) strcat(ffc_allnames[i], ffc_names[i]);
1257 
1258                 /* Check if this ffc has a generic name */
1259                 if (strcmp(ffc_genericnames[i], "") != 0) {
1260                         (void) strcat(ffc_allnames[i], ",");
1261                         (void) strcat(ffc_allnames[i], ffc_genericnames[i]);
1262                 }
1263         }
1264 
1265         /* GPC events for Family 6 Models 15, 23 and 29 only */
1266         if ((cpuid_getfamily(CPU) == 6) &&
1267             ((cpuid_getmodel(CPU) == 15) || (cpuid_getmodel(CPU) == 23) ||
1268             (cpuid_getmodel(CPU) == 29))) {
1269                 (void) snprintf(core_impl_name, IMPL_NAME_LEN,
1270                     "Core Microarchitecture");
1271                 pcbe_init_core_uarch();
1272                 return (0);
1273         }
1274 
1275         (void) snprintf(core_impl_name, IMPL_NAME_LEN,
1276             "Intel Arch PerfMon v%d on Family %d Model %d",
1277             versionid, cpuid_getfamily(CPU), cpuid_getmodel(CPU));
1278 
1279         /*
1280          * Architectural events
1281          */
1282         arch_events_vector_length = (cp.cp_eax >> 24) & 0xFF;
1283 
1284         ASSERT(known_arch_events == arch_events_vector_length);
1285 
1286         /*
1287          * To handle the case where a new performance monitoring setup is run
1288          * on a non-debug kernel
1289          */
1290         if (known_arch_events > arch_events_vector_length) {
1291                 known_arch_events = arch_events_vector_length;
1292         } else {
1293                 arch_events_vector_length = known_arch_events;
1294         }
1295 
1296         arch_events_vector = cp.cp_ebx &
1297             BITMASK_XBITS(arch_events_vector_length);
1298 
1299         /*
1300          * Process architectural and non-architectural events using GPC
1301          */
1302         if (num_gpc > 0) {
1303 
1304                 gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
1305 
1306                 /* Calculate space required for the architectural gpc events */
1307                 arch_events_string_length = 0;
1308                 for (i = 0; i < known_arch_events; i++) {
1309                         if (((1U << i) & arch_events_vector) == 0) {
1310                                 arch_events_string_length +=
1311                                     strlen(arch_events_table[i].name) + 1;
1312                                 if (strcmp(arch_genevents_table[i], "") != 0) {
1313                                         arch_events_string_length +=
1314                                             strlen(arch_genevents_table[i]) + 1;
1315                                 }
1316                         }
1317                 }
1318 
1319                 /* Non-architectural events list */
1320                 model = cpuid_getmodel(CPU);
1321                 switch (model) {
1322                         /* Nehalem */
1323                         case 26:
1324                         case 30:
1325                         case 31:
1326                         /* Westmere */
1327                         case 37:
1328                         case 44:
1329                         /* Nehalem-EX */
1330                         case 46:
1331                         case 47:
1332                                 events_table = events_fam6_nhm;
1333                                 break;
1334                         case 28:
1335                                 events_table = events_fam6_mod28;
1336                                 break;
1337                 }
1338 
1339                 for (i = 0; i < num_gpc; i++) {
1340 
1341                         /*
1342                          * Determine length of all supported event names
1343                          * (architectural + non-architectural)
1344                          */
1345                         size = arch_events_string_length;
1346                         for (j = 0; events_table != NULL &&
1347                             events_table[j].eventselect != NT_END;
1348                             j++) {
1349                                 if (C(i) & events_table[j].supported_counters) {
1350                                         size += strlen(events_table[j].name) +
1351                                             1;
1352                                 }
1353                         }
1354 
1355                         /* Allocate memory for this pics list */
1356                         gpc_names[i] = kmem_alloc(size + 1, KM_SLEEP);
1357                         gpc_names[i][0] = '\0';
1358                         if (size == 0) {
1359                                 continue;
1360                         }
1361 
1362                         /*
1363                          * Create the list of all supported events
1364                          * (architectural + non-architectural)
1365                          */
1366                         for (j = 0; j < known_arch_events; j++) {
1367                                 if (((1U << j) & arch_events_vector) == 0) {
1368                                         (void) strcat(gpc_names[i],
1369                                             arch_events_table[j].name);
1370                                         (void) strcat(gpc_names[i], ",");
1371                                         if (strcmp(
1372                                             arch_genevents_table[j], "")
1373                                             != 0) {
1374                                                 (void) strcat(gpc_names[i],
1375                                                     arch_genevents_table[j]);
1376                                                 (void) strcat(gpc_names[i],
1377                                                     ",");
1378                                         }
1379                                 }
1380                         }
1381 
1382                         for (j = 0; events_table != NULL &&
1383                             events_table[j].eventselect != NT_END;
1384                             j++) {
1385                                 if (C(i) & events_table[j].supported_counters) {
1386                                         (void) strcat(gpc_names[i],
1387                                             events_table[j].name);
1388                                         (void) strcat(gpc_names[i], ",");
1389                                 }
1390                         }
1391 
1392                         /* Remove trailing comma */
1393                         gpc_names[i][size - 1] = '\0';
1394                 }
1395         }
1396 
1397         return (0);
1398 }
1399 
1400 static uint_t core_pcbe_ncounters()
1401 {
1402         return (total_pmc);
1403 }
1404 
1405 static const char *core_pcbe_impl_name(void)
1406 {
1407         return (core_impl_name);
1408 }
1409 
1410 static const char *core_pcbe_cpuref(void)
1411 {
1412         return (core_cpuref);
1413 }
1414 
1415 static char *core_pcbe_list_events(uint_t picnum)
1416 {
1417         ASSERT(picnum < cpc_ncounters);
1418 
1419         if (picnum < num_gpc) {
1420                 return (gpc_names[picnum]);
1421         } else {
1422                 return (ffc_allnames[picnum - num_gpc]);
1423         }
1424 }
1425 
1426 static char *core_pcbe_list_attrs(void)
1427 {
1428         if (versionid >= 3) {
1429                 return ("edge,inv,umask,cmask,anythr");
1430         } else {
1431                 return ("edge,pc,inv,umask,cmask");
1432         }
1433 }
1434 
1435 static const struct nametable_core_uarch *
1436 find_gpcevent_core_uarch(char *name,
1437     const struct nametable_core_uarch *nametable)
1438 {
1439         const struct nametable_core_uarch *n;
1440         int compare_result = -1;
1441 
1442         for (n = nametable; n->event_num != NT_END; n++) {
1443                 compare_result = strcmp(name, n->name);
1444                 if (compare_result <= 0) {
1445                         break;
1446                 }
1447         }
1448 
1449         if (compare_result == 0) {
1450                 return (n);
1451         }
1452 
1453         return (NULL);
1454 }
1455 
1456 static const struct generic_events *
1457 find_generic_events(char *name, const struct generic_events *table)
1458 {
1459         const struct generic_events *n;
1460 
1461         for (n = table; n->event_num != NT_END; n++) {
1462                 if (strcmp(name, n->name) == 0) {
1463                         return (n);
1464                 };
1465         }
1466 
1467         return (NULL);
1468 }
1469 
1470 static const struct events_table_t *
1471 find_gpcevent(char *name)
1472 {
1473         int i;
1474 
1475         /* Search architectural events */
1476         for (i = 0; i < known_arch_events; i++) {
1477                 if (strcmp(name, arch_events_table[i].name) == 0 ||
1478                     strcmp(name, arch_genevents_table[i]) == 0) {
1479                         if (((1U << i) & arch_events_vector) == 0) {
1480                                 return (&arch_events_table[i]);
1481                         }
1482                 }
1483         }
1484 
1485         /* Search non-architectural events */
1486         if (events_table != NULL) {
1487                 for (i = 0; events_table[i].eventselect != NT_END; i++) {
1488                         if (strcmp(name, events_table[i].name) == 0) {
1489                                 return (&events_table[i]);
1490                         }
1491                 }
1492         }
1493 
1494         return (NULL);
1495 }
1496 
1497 static uint64_t
1498 core_pcbe_event_coverage(char *event)
1499 {
1500         uint64_t bitmap;
1501         uint64_t bitmask;
1502         const struct events_table_t *n;
1503         int i;
1504 
1505         bitmap = 0;
1506 
1507         /* Is it an event that a GPC can track? */
1508         if (versionid >= 3) {
1509                 n = find_gpcevent(event);
1510                 if (n != NULL) {
1511                         bitmap |= (n->supported_counters &
1512                             BITMASK_XBITS(num_gpc));
1513                 }
1514         } else {
1515                 if (find_generic_events(event, cmn_generic_events) != NULL) {
1516                         bitmap |= BITMASK_XBITS(num_gpc);
1517                 } if (find_generic_events(event, generic_events_pic0) != NULL) {
1518                         bitmap |= 1ULL;
1519                 } else if (find_gpcevent_core_uarch(event,
1520                     cmn_gpc_events_core_uarch) != NULL) {
1521                         bitmap |= BITMASK_XBITS(num_gpc);
1522                 } else if (find_gpcevent_core_uarch(event, pic0_events) !=
1523                     NULL) {
1524                         bitmap |= 1ULL;
1525                 } else if (find_gpcevent_core_uarch(event, pic1_events) !=
1526                     NULL) {
1527                         bitmap |= 1ULL << 1;
1528                 }
1529         }
1530 
1531         /* Check if the event can be counted in the fixed-function counters */
1532         if (num_ffc > 0) {
1533                 bitmask = 1ULL << num_gpc;
1534                 for (i = 0; i < num_ffc; i++) {
1535                         if (strcmp(event, ffc_names[i]) == 0) {
1536                                 bitmap |= bitmask;
1537                         } else if (strcmp(event, ffc_genericnames[i]) == 0) {
1538                                 bitmap |= bitmask;
1539                         }
1540                         bitmask = bitmask << 1;
1541                 }
1542         }
1543 
1544         return (bitmap);
1545 }
1546 
1547 static uint64_t
1548 core_pcbe_overflow_bitmap(void)
1549 {
1550         uint64_t interrupt_status;
1551         uint64_t intrbits_ffc;
1552         uint64_t intrbits_gpc;
1553         extern int kcpc_hw_overflow_intr_installed;
1554         uint64_t overflow_bitmap;
1555 
1556         RDMSR(PERF_GLOBAL_STATUS, interrupt_status);
1557         WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status);
1558 
1559         interrupt_status = interrupt_status & control_mask;
1560         intrbits_ffc = (interrupt_status >> 32) & control_ffc;
1561         intrbits_gpc = interrupt_status & control_gpc;
1562         overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc;
1563 
1564         ASSERT(kcpc_hw_overflow_intr_installed);
1565         (*kcpc_hw_enable_cpc_intr)();
1566 
1567         return (overflow_bitmap);
1568 }
1569 
1570 static int
1571 check_cpc_securitypolicy(core_pcbe_config_t *conf,
1572     const struct nametable_core_uarch *n)
1573 {
1574         if (conf->core_ctl & n->restricted_bits) {
1575                 if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1576                         return (CPC_ATTR_REQUIRES_PRIVILEGE);
1577                 }
1578         }
1579         return (0);
1580 }
1581 
1582 static int
1583 configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1584     uint_t nattrs, kcpc_attr_t *attrs, void **data)
1585 {
1586         core_pcbe_config_t      conf;
1587         const struct nametable_core_uarch       *n;
1588         const struct generic_events *k = NULL;
1589         const struct nametable_core_uarch       *m;
1590         const struct nametable_core_uarch       *picspecific_events;
1591         struct nametable_core_uarch     nt_raw = { "", 0x0, 0x0 };
1592         uint_t                  i;
1593         long                    event_num;
1594         const struct events_table_t *eventcode;
1595 
1596         if (((preset & BITS_EXTENDED_FROM_31) != 0) &&
1597             ((preset & BITS_EXTENDED_FROM_31) !=
1598             BITS_EXTENDED_FROM_31)) {
1599 
1600                 /*
1601                  * Bits beyond bit-31 in the general-purpose counters can only
1602                  * be written to by extension of bit 31.  We cannot preset
1603                  * these bits to any value other than all 1s or all 0s.
1604                  */
1605                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1606         }
1607 
1608         if (versionid >= 3) {
1609                 eventcode = find_gpcevent(event);
1610                 if (eventcode != NULL) {
1611                         if ((C(picnum) & eventcode->supported_counters) == 0) {
1612                                 return (CPC_PIC_NOT_CAPABLE);
1613                         }
1614                         if (nattrs > 0 &&
1615                             (strncmp("PAPI_", event, 5) == 0)) {
1616                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1617                         }
1618                         conf.core_ctl = eventcode->eventselect;
1619                         conf.core_ctl |= eventcode->unitmask <<
1620                             CORE_UMASK_SHIFT;
1621                 } else {
1622                         /* Event specified as raw event code */
1623                         if (ddi_strtol(event, NULL, 0, &event_num) != 0) {
1624                                 return (CPC_INVALID_EVENT);
1625                         }
1626                         conf.core_ctl = event_num & 0xFF;
1627                 }
1628         } else {
1629                 if ((k = find_generic_events(event, cmn_generic_events)) !=
1630                     NULL ||
1631                     (picnum == 0 &&
1632                     (k = find_generic_events(event, generic_events_pic0)) !=
1633                     NULL)) {
1634                         if (nattrs > 0) {
1635                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1636                         }
1637                         conf.core_ctl = k->event_num;
1638                         conf.core_ctl |= k->umask << CORE_UMASK_SHIFT;
1639                 } else {
1640                         /* Not a generic event */
1641 
1642                         n = find_gpcevent_core_uarch(event,
1643                             cmn_gpc_events_core_uarch);
1644                         if (n == NULL) {
1645                                 switch (picnum) {
1646                                         case 0:
1647                                                 picspecific_events =
1648                                                     pic0_events;
1649                                                 break;
1650                                         case 1:
1651                                                 picspecific_events =
1652                                                     pic1_events;
1653                                                 break;
1654                                         default:
1655                                                 picspecific_events = NULL;
1656                                                 break;
1657                                 }
1658                                 if (picspecific_events != NULL) {
1659                                         n = find_gpcevent_core_uarch(event,
1660                                             picspecific_events);
1661                                 }
1662                         }
1663                         if (n == NULL) {
1664 
1665                                 /*
1666                                  * Check if this is a case where the event was
1667                                  * specified directly by its event number
1668                                  * instead of its name string.
1669                                  */
1670                                 if (ddi_strtol(event, NULL, 0, &event_num) !=
1671                                     0) {
1672                                         return (CPC_INVALID_EVENT);
1673                                 }
1674 
1675                                 event_num = event_num & 0xFF;
1676 
1677                                 /*
1678                                  * Search the event table to find out if the
1679                                  * event specified has an privilege
1680                                  * requirements.  Currently none of the
1681                                  * pic-specific counters have any privilege
1682                                  * requirements.  Hence only the table
1683                                  * cmn_gpc_events_core_uarch is searched.
1684                                  */
1685                                 for (m = cmn_gpc_events_core_uarch;
1686                                     m->event_num != NT_END;
1687                                     m++) {
1688                                         if (event_num == m->event_num) {
1689                                                 break;
1690                                         }
1691                                 }
1692                                 if (m->event_num == NT_END) {
1693                                         nt_raw.event_num = (uint8_t)event_num;
1694                                         n = &nt_raw;
1695                                 } else {
1696                                         n = m;
1697                                 }
1698                         }
1699                         conf.core_ctl = n->event_num; /* Event Select */
1700                 }
1701         }
1702 
1703 
1704         conf.core_picno = picnum;
1705         conf.core_pictype = CORE_GPC;
1706         conf.core_rawpic = preset & mask_gpc;
1707 
1708         conf.core_pes = GPC_BASE_PES + picnum;
1709         conf.core_pmc = GPC_BASE_PMC + picnum;
1710 
1711         for (i = 0; i < nattrs; i++) {
1712                 if (strncmp(attrs[i].ka_name, "umask", 6) == 0) {
1713                         if ((attrs[i].ka_val | CORE_UMASK_MASK) !=
1714                             CORE_UMASK_MASK) {
1715                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1716                         }
1717                         /* Clear out the default umask */
1718                         conf.core_ctl &= ~ (CORE_UMASK_MASK <<
1719                             CORE_UMASK_SHIFT);
1720                         /* Use the user provided umask */
1721                         conf.core_ctl |= attrs[i].ka_val <<
1722                             CORE_UMASK_SHIFT;
1723                 } else  if (strncmp(attrs[i].ka_name, "edge", 6) == 0) {
1724                         if (attrs[i].ka_val != 0)
1725                                 conf.core_ctl |= CORE_EDGE;
1726                 } else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) {
1727                         if (attrs[i].ka_val != 0)
1728                                 conf.core_ctl |= CORE_INV;
1729                 } else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) {
1730                         if ((attrs[i].ka_val | CORE_CMASK_MASK) !=
1731                             CORE_CMASK_MASK) {
1732                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1733                         }
1734                         conf.core_ctl |= attrs[i].ka_val <<
1735                             CORE_CMASK_SHIFT;
1736                 } else if (strncmp(attrs[i].ka_name, "anythr", 7) ==
1737                     0) {
1738                         if (versionid < 3)
1739                                 return (CPC_INVALID_ATTRIBUTE);
1740                         if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1741                                 return (CPC_ATTR_REQUIRES_PRIVILEGE);
1742                         }
1743                         if (attrs[i].ka_val != 0)
1744                                 conf.core_ctl |= CORE_ANYTHR;
1745                 } else {
1746                         return (CPC_INVALID_ATTRIBUTE);
1747                 }
1748         }
1749 
1750         if (flags & CPC_COUNT_USER)
1751                 conf.core_ctl |= CORE_USR;
1752         if (flags & CPC_COUNT_SYSTEM)
1753                 conf.core_ctl |= CORE_OS;
1754         if (flags & CPC_OVF_NOTIFY_EMT)
1755                 conf.core_ctl |= CORE_INT;
1756         conf.core_ctl |= CORE_EN;
1757 
1758         if (versionid < 3 && k == NULL) {
1759                 if (check_cpc_securitypolicy(&conf, n) != 0) {
1760                         return (CPC_ATTR_REQUIRES_PRIVILEGE);
1761                 }
1762         }
1763 
1764         *data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1765         *((core_pcbe_config_t *)*data) = conf;
1766 
1767         return (0);
1768 }
1769 
1770 static int
1771 configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1772     uint_t nattrs, kcpc_attr_t *attrs, void **data)
1773 {
1774         core_pcbe_config_t      *conf;
1775         uint_t                  i;
1776 
1777         if (picnum - num_gpc >= num_ffc) {
1778                 return (CPC_INVALID_PICNUM);
1779         }
1780 
1781         if ((strcmp(ffc_names[picnum-num_gpc], event) != 0) &&
1782             (strcmp(ffc_genericnames[picnum-num_gpc], event) != 0)) {
1783                 return (CPC_INVALID_EVENT);
1784         }
1785 
1786         if ((versionid < 3) && (nattrs != 0)) {
1787                 return (CPC_INVALID_ATTRIBUTE);
1788         }
1789 
1790         conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1791         conf->core_ctl = 0;
1792 
1793         for (i = 0; i < nattrs; i++) {
1794                 if (strncmp(attrs[i].ka_name, "anythr", 7) == 0) {
1795                         if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1796                                 kmem_free(conf, sizeof (core_pcbe_config_t));
1797                                 return (CPC_ATTR_REQUIRES_PRIVILEGE);
1798                         }
1799                         if (attrs[i].ka_val != 0) {
1800                                 conf->core_ctl |= CORE_FFC_ANYTHR;
1801                         }
1802                 } else {
1803                         kmem_free(conf, sizeof (core_pcbe_config_t));
1804                         return (CPC_INVALID_ATTRIBUTE);
1805                 }
1806         }
1807 
1808         conf->core_picno = picnum;
1809         conf->core_pictype = CORE_FFC;
1810         conf->core_rawpic = preset & mask_ffc;
1811         conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc);
1812 
1813         /* All fixed-function counters have the same control register */
1814         conf->core_pes = PERF_FIXED_CTR_CTRL;
1815 
1816         if (flags & CPC_COUNT_USER)
1817                 conf->core_ctl |= CORE_FFC_USR_EN;
1818         if (flags & CPC_COUNT_SYSTEM)
1819                 conf->core_ctl |= CORE_FFC_OS_EN;
1820         if (flags & CPC_OVF_NOTIFY_EMT)
1821                 conf->core_ctl |= CORE_FFC_PMI;
1822 
1823         *data = conf;
1824         return (0);
1825 }
1826 
1827 /*ARGSUSED*/
1828 static int
1829 core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
1830     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
1831     void *token)
1832 {
1833         int                     ret;
1834         core_pcbe_config_t      *conf;
1835 
1836         /*
1837          * If we've been handed an existing configuration, we need only preset
1838          * the counter value.
1839          */
1840         if (*data != NULL) {
1841                 conf = *data;
1842                 ASSERT(conf->core_pictype == CORE_GPC ||
1843                     conf->core_pictype == CORE_FFC);
1844                 if (conf->core_pictype == CORE_GPC)
1845                         conf->core_rawpic = preset & mask_gpc;
1846                 else /* CORE_FFC */
1847                         conf->core_rawpic = preset & mask_ffc;
1848                 return (0);
1849         }
1850 
1851         if (picnum >= total_pmc) {
1852                 return (CPC_INVALID_PICNUM);
1853         }
1854 
1855         if (picnum < num_gpc) {
1856                 ret = configure_gpc(picnum, event, preset, flags,
1857                     nattrs, attrs, data);
1858         } else {
1859                 ret = configure_ffc(picnum, event, preset, flags,
1860                     nattrs, attrs, data);
1861         }
1862         return (ret);
1863 }
1864 
1865 static void
1866 core_pcbe_program(void *token)
1867 {
1868         core_pcbe_config_t      *cfg;
1869         uint64_t                perf_global_ctrl;
1870         uint64_t                perf_fixed_ctr_ctrl;
1871         uint64_t                curcr4;
1872 
1873         core_pcbe_allstop();
1874 
1875         curcr4 = getcr4();
1876         if (kcpc_allow_nonpriv(token))
1877                 /* Allow RDPMC at any ring level */
1878                 setcr4(curcr4 | CR4_PCE);
1879         else
1880                 /* Allow RDPMC only at ring 0 */
1881                 setcr4(curcr4 & ~CR4_PCE);
1882 
1883         /* Clear any overflow indicators before programming the counters */
1884         WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask);
1885 
1886         cfg = NULL;
1887         perf_global_ctrl = 0;
1888         perf_fixed_ctr_ctrl = 0;
1889         cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL);
1890         while (cfg != NULL) {
1891                 ASSERT(cfg->core_pictype == CORE_GPC ||
1892                     cfg->core_pictype == CORE_FFC);
1893 
1894                 if (cfg->core_pictype == CORE_GPC) {
1895                         /*
1896                          * General-purpose counter registers have write
1897                          * restrictions where only the lower 32-bits can be
1898                          * written to.  The rest of the relevant bits are
1899                          * written to by extension from bit 31 (all ZEROS if
1900                          * bit-31 is ZERO and all ONE if bit-31 is ONE).  This
1901                          * makes it possible to write to the counter register
1902                          * only values that have all ONEs or all ZEROs in the
1903                          * higher bits.
1904                          */
1905                         if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) ||
1906                             ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) ==
1907                             BITS_EXTENDED_FROM_31)) {
1908                                 /*
1909                                  * Straighforward case where the higher bits
1910                                  * are all ZEROs or all ONEs.
1911                                  */
1912                                 WRMSR(cfg->core_pmc,
1913                                     (cfg->core_rawpic & mask_gpc));
1914                         } else {
1915                                 /*
1916                                  * The high order bits are not all the same.
1917                                  * We save what is currently in the registers
1918                                  * and do not write to it.  When we want to do
1919                                  * a read from this register later (in
1920                                  * core_pcbe_sample()), we subtract the value
1921                                  * we save here to get the actual event count.
1922                                  *
1923                                  * NOTE: As a result, we will not get overflow
1924                                  * interrupts as expected.
1925                                  */
1926                                 RDMSR(cfg->core_pmc, cfg->core_rawpic);
1927                                 cfg->core_rawpic = cfg->core_rawpic & mask_gpc;
1928                         }
1929                         WRMSR(cfg->core_pes, cfg->core_ctl);
1930                         perf_global_ctrl |= 1ull << cfg->core_picno;
1931                 } else {
1932                         /*
1933                          * Unlike the general-purpose counters, all relevant
1934                          * bits of fixed-function counters can be written to.
1935                          */
1936                         WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc);
1937 
1938                         /*
1939                          * Collect the control bits for all the
1940                          * fixed-function counters and write it at one shot
1941                          * later in this function
1942                          */
1943                         perf_fixed_ctr_ctrl |= cfg->core_ctl <<
1944                             ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE);
1945                         perf_global_ctrl |=
1946                             1ull << (cfg->core_picno - num_gpc + 32);
1947                 }
1948 
1949                 cfg = (core_pcbe_config_t *)
1950                     kcpc_next_config(token, cfg, NULL);
1951         }
1952 
1953         /* Enable all the counters */
1954         WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl);
1955         WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl);
1956 }
1957 
1958 static void
1959 core_pcbe_allstop(void)
1960 {
1961         /* Disable all the counters together */
1962         WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED);
1963 
1964         setcr4(getcr4() & ~CR4_PCE);
1965 }
1966 
1967 static void
1968 core_pcbe_sample(void *token)
1969 {
1970         uint64_t                *daddr;
1971         uint64_t                curpic;
1972         core_pcbe_config_t      *cfg;
1973         uint64_t                        counter_mask;
1974 
1975         cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr);
1976         while (cfg != NULL) {
1977                 ASSERT(cfg->core_pictype == CORE_GPC ||
1978                     cfg->core_pictype == CORE_FFC);
1979 
1980                 curpic = rdmsr(cfg->core_pmc);
1981 
1982                 DTRACE_PROBE4(core__pcbe__sample,
1983                     uint64_t, cfg->core_pmc,
1984                     uint64_t, curpic,
1985                     uint64_t, cfg->core_rawpic,
1986                     uint64_t, *daddr);
1987 
1988                 if (cfg->core_pictype == CORE_GPC) {
1989                         counter_mask = mask_gpc;
1990                 } else {
1991                         counter_mask = mask_ffc;
1992                 }
1993                 curpic = curpic & counter_mask;
1994                 if (curpic >= cfg->core_rawpic) {
1995                         *daddr += curpic - cfg->core_rawpic;
1996                 } else {
1997                         /* Counter overflowed since our last sample */
1998                         *daddr += counter_mask - (cfg->core_rawpic - curpic) +
1999                             1;
2000                 }
2001                 cfg->core_rawpic = *daddr & counter_mask;
2002 
2003                 cfg =
2004                     (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr);
2005         }
2006 }
2007 
2008 static void
2009 core_pcbe_free(void *config)
2010 {
2011         kmem_free(config, sizeof (core_pcbe_config_t));
2012 }
2013 
2014 static struct modlpcbe core_modlpcbe = {
2015         &mod_pcbeops,
2016         "Core Performance Counters",
2017         &core_pcbe_ops
2018 };
2019 
2020 static struct modlinkage core_modl = {
2021         MODREV_1,
2022         &core_modlpcbe,
2023 };
2024 
2025 int
2026 _init(void)
2027 {
2028         if (core_pcbe_init() != 0) {
2029                 return (ENOTSUP);
2030         }
2031         return (mod_install(&core_modl));
2032 }
2033 
2034 int
2035 _fini(void)
2036 {
2037         return (mod_remove(&core_modl));
2038 }
2039 
2040 int
2041 _info(struct modinfo *mi)
2042 {
2043         return (mod_info(&core_modl, mi));
2044 }