1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2019 Joyent, Inc.
  24  */
  25 
  26 /*
  27  * This file contains preset event names from the Performance Application
  28  * Programming Interface v3.5 which included the following notice:
  29  *
  30  *                             Copyright (c) 2005,6
  31  *                           Innovative Computing Labs
  32  *                         Computer Science Department,
  33  *                            University of Tennessee,
  34  *                                 Knoxville, TN.
  35  *                              All Rights Reserved.
  36  *
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions are met:
  40  *
  41  *    * Redistributions of source code must retain the above copyright notice,
  42  *      this list of conditions and the following disclaimer.
  43  *    * Redistributions in binary form must reproduce the above copyright
  44  *      notice, this list of conditions and the following disclaimer in the
  45  *      documentation and/or other materials provided with the distribution.
  46  *    * Neither the name of the University of Tennessee nor the names of its
  47  *      contributors may be used to endorse or promote products derived from
  48  *      this software without specific prior written permission.
  49  *
  50  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  51  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  53  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  54  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  55  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  56  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  57  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  58  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  59  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  60  * POSSIBILITY OF SUCH DAMAGE.
  61  *
  62  *
  63  * This open source software license conforms to the BSD License template.
  64  */
  65 
  66 
  67 /*
  68  * Performance Counter Back-End for Intel processors supporting Architectural
  69  * Performance Monitoring.
  70  */
  71 
  72 #include <sys/cpuvar.h>
  73 #include <sys/param.h>
  74 #include <sys/cpc_impl.h>
  75 #include <sys/cpc_pcbe.h>
  76 #include <sys/modctl.h>
  77 #include <sys/inttypes.h>
  78 #include <sys/systm.h>
  79 #include <sys/cmn_err.h>
  80 #include <sys/x86_archext.h>
  81 #include <sys/sdt.h>
  82 #include <sys/archsystm.h>
  83 #include <sys/privregs.h>
  84 #include <sys/ddi.h>
  85 #include <sys/sunddi.h>
  86 #include <sys/cred.h>
  87 #include <sys/policy.h>
  88 
  89 #include "core_pcbe_table.h"
  90 #include <core_pcbe_cpcgen.h>
  91 
  92 static int core_pcbe_init(void);
  93 static uint_t core_pcbe_ncounters(void);
  94 static const char *core_pcbe_impl_name(void);
  95 static const char *core_pcbe_cpuref(void);
  96 static char *core_pcbe_list_events(uint_t picnum);
  97 static char *core_pcbe_list_attrs(void);
  98 static uint64_t core_pcbe_event_coverage(char *event);
  99 static uint64_t core_pcbe_overflow_bitmap(void);
 100 static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
 101     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
 102     void *token);
 103 static void core_pcbe_program(void *token);
 104 static void core_pcbe_allstop(void);
 105 static void core_pcbe_sample(void *token);
 106 static void core_pcbe_free(void *config);
 107 
 108 #define FALSE   0
 109 #define TRUE    1
 110 
 111 /* Counter Type */
 112 #define CORE_GPC        0       /* General-Purpose Counter (GPC) */
 113 #define CORE_FFC        1       /* Fixed-Function Counter (FFC) */
 114 
 115 /* MSR Addresses */
 116 #define GPC_BASE_PMC            0x00c1  /* First GPC */
 117 #define GPC_BASE_PES            0x0186  /* First GPC Event Select register */
 118 #define FFC_BASE_PMC            0x0309  /* First FFC */
 119 #define PERF_FIXED_CTR_CTRL     0x038d  /* Used to enable/disable FFCs */
 120 #define PERF_GLOBAL_STATUS      0x038e  /* Overflow status register */
 121 #define PERF_GLOBAL_CTRL        0x038f  /* Used to enable/disable counting */
 122 #define PERF_GLOBAL_OVF_CTRL    0x0390  /* Used to clear overflow status */
 123 
 124 /*
 125  * Processor Event Select register fields
 126  */
 127 #define CORE_USR        (1ULL << 16)      /* Count while not in ring 0 */
 128 #define CORE_OS         (1ULL << 17)      /* Count while in ring 0 */
 129 #define CORE_EDGE       (1ULL << 18)      /* Enable edge detection */
 130 #define CORE_PC         (1ULL << 19)      /* Enable pin control */
 131 #define CORE_INT        (1ULL << 20)      /* Enable interrupt on overflow */
 132 #define CORE_EN         (1ULL << 22)      /* Enable counting */
 133 #define CORE_INV        (1ULL << 23)      /* Invert the CMASK */
 134 #define CORE_ANYTHR     (1ULL << 21)      /* Count event for any thread on core */
 135 
 136 #define CORE_UMASK_SHIFT        8
 137 #define CORE_UMASK_MASK         0xffu
 138 #define CORE_CMASK_SHIFT        24
 139 #define CORE_CMASK_MASK         0xffu
 140 
 141 /*
 142  * Fixed-function counter attributes
 143  */
 144 #define CORE_FFC_OS_EN  (1ULL << 0)       /* Count while not in ring 0 */
 145 #define CORE_FFC_USR_EN (1ULL << 1)       /* Count while in ring 1 */
 146 #define CORE_FFC_ANYTHR (1ULL << 2)       /* Count event for any thread on core */
 147 #define CORE_FFC_PMI    (1ULL << 3)       /* Enable interrupt on overflow */
 148 
 149 /*
 150  * Number of bits for specifying each FFC's attributes in the control register
 151  */
 152 #define CORE_FFC_ATTR_SIZE      4
 153 
 154 /*
 155  * CondChgd and OvfBuffer fields of global status and overflow control registers
 156  */
 157 #define CONDCHGD        (1ULL << 63)
 158 #define OVFBUFFER       (1ULL << 62)
 159 #define MASK_CONDCHGD_OVFBUFFER (CONDCHGD | OVFBUFFER)
 160 
 161 #define ALL_STOPPED     0ULL
 162 
 163 #define BITMASK_XBITS(x)        ((1ull << (x)) - 1ull)
 164 
 165 /*
 166  * Only the lower 32-bits can be written to in the general-purpose
 167  * counters.  The higher bits are extended from bit 31; all ones if
 168  * bit 31 is one and all zeros otherwise.
 169  *
 170  * The fixed-function counters do not have this restriction.
 171  */
 172 #define BITS_EXTENDED_FROM_31   (BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31))
 173 
 174 #define WRMSR(msr, value)                                               \
 175         wrmsr((msr), (value));                                          \
 176         DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value));
 177 
 178 #define RDMSR(msr, value)                                               \
 179         (value) = rdmsr((msr));                                         \
 180         DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value));
 181 
 182 typedef struct core_pcbe_config {
 183         uint64_t        core_rawpic;
 184         uint64_t        core_ctl;       /* Event Select bits */
 185         uint64_t        core_pmc;       /* Counter register address */
 186         uint64_t        core_pes;       /* Event Select register address */
 187         uint_t          core_picno;
 188         uint8_t         core_pictype;   /* CORE_GPC or CORE_FFC */
 189 } core_pcbe_config_t;
 190 
 191 pcbe_ops_t core_pcbe_ops = {
 192         PCBE_VER_1,                     /* pcbe_ver */
 193         CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE,  /* pcbe_caps */
 194         core_pcbe_ncounters,            /* pcbe_ncounters */
 195         core_pcbe_impl_name,            /* pcbe_impl_name */
 196         core_pcbe_cpuref,               /* pcbe_cpuref */
 197         core_pcbe_list_events,          /* pcbe_list_events */
 198         core_pcbe_list_attrs,           /* pcbe_list_attrs */
 199         core_pcbe_event_coverage,       /* pcbe_event_coverage */
 200         core_pcbe_overflow_bitmap,      /* pcbe_overflow_bitmap */
 201         core_pcbe_configure,            /* pcbe_configure */
 202         core_pcbe_program,              /* pcbe_program */
 203         core_pcbe_allstop,              /* pcbe_allstop */
 204         core_pcbe_sample,               /* pcbe_sample */
 205         core_pcbe_free                  /* pcbe_free */
 206 };
 207 
 208 struct nametable_core_uarch {
 209         const char      *name;
 210         uint64_t        restricted_bits;
 211         uint8_t         event_num;
 212 };
 213 
 214 /*
 215  * Counting an event for all cores or all bus agents requires cpc_cpu privileges
 216  */
 217 #define ALL_CORES       (1ULL << 15)
 218 #define ALL_AGENTS      (1ULL << 13)
 219 
 220 struct generic_events {
 221         const char      *name;
 222         uint8_t         event_num;
 223         uint8_t         umask;
 224 };
 225 
 226 static const struct generic_events cmn_generic_events[] = {
 227         { "PAPI_tot_cyc", 0x3c, 0x00 }, /* cpu_clk_unhalted.thread_p/core */
 228         { "PAPI_tot_ins", 0xc0, 0x00 }, /* inst_retired.any_p             */
 229         { "PAPI_br_ins",  0xc4, 0x0c }, /* br_inst_retired.taken          */
 230         { "PAPI_br_msp",  0xc5, 0x00 }, /* br_inst_retired.mispred        */
 231         { "PAPI_br_ntk",  0xc4, 0x03 },
 232                                 /* br_inst_retired.pred_not_taken|pred_taken */
 233         { "PAPI_br_prc",  0xc4, 0x05 },
 234                                 /* br_inst_retired.pred_not_taken|pred_taken */
 235         { "PAPI_hw_int",  0xc8, 0x00 }, /* hw_int_rvc                     */
 236         { "PAPI_tot_iis", 0xaa, 0x01 }, /* macro_insts.decoded            */
 237         { "PAPI_l1_dca",  0x43, 0x01 }, /* l1d_all_ref                    */
 238         { "PAPI_l1_icm",  0x81, 0x00 }, /* l1i_misses                     */
 239         { "PAPI_l1_icr",  0x80, 0x00 }, /* l1i_reads                      */
 240         { "PAPI_l1_tcw",  0x41, 0x0f }, /* l1d_cache_st.mesi              */
 241         { "PAPI_l2_stm",  0x2a, 0x41 }, /* l2_st.self.i_state             */
 242         { "PAPI_l2_tca",  0x2e, 0x4f }, /* l2_rqsts.self.demand.mesi      */
 243         { "PAPI_l2_tch",  0x2e, 0x4e }, /* l2_rqsts.mes                   */
 244         { "PAPI_l2_tcm",  0x2e, 0x41 }, /* l2_rqsts.self.demand.i_state   */
 245         { "PAPI_l2_tcw",  0x2a, 0x4f }, /* l2_st.self.mesi                */
 246         { "PAPI_ld_ins",  0xc0, 0x01 }, /* inst_retired.loads             */
 247         { "PAPI_lst_ins", 0xc0, 0x03 }, /* inst_retired.loads|stores      */
 248         { "PAPI_sr_ins",  0xc0, 0x02 }, /* inst_retired.stores            */
 249         { "PAPI_tlb_dm",  0x08, 0x01 }, /* dtlb_misses.any                */
 250         { "PAPI_tlb_im",  0x82, 0x12 }, /* itlb.small_miss|large_miss     */
 251         { "PAPI_tlb_tl",  0x0c, 0x03 }, /* page_walks                     */
 252         { "",             NT_END, 0  }
 253 };
 254 
 255 static const struct generic_events generic_events_pic0[] = {
 256         { "PAPI_l1_dcm",  0xcb, 0x01 }, /* mem_load_retired.l1d_miss */
 257         { "",             NT_END, 0  }
 258 };
 259 
 260 /*
 261  * The events listed in the following table can be counted on all
 262  * general-purpose counters on processors that are of Penryn and Merom Family
 263  */
 264 static const struct nametable_core_uarch cmn_gpc_events_core_uarch[] = {
 265         /* Alphabetical order of event name */
 266 
 267         { "baclears",                   0x0,    0xe6 },
 268         { "bogus_br",                   0x0,    0xe4 },
 269         { "br_bac_missp_exec",          0x0,    0x8a },
 270 
 271         { "br_call_exec",               0x0,    0x92 },
 272         { "br_call_missp_exec",         0x0,    0x93 },
 273         { "br_cnd_exec",                0x0,    0x8b },
 274 
 275         { "br_cnd_missp_exec",          0x0,    0x8c },
 276         { "br_ind_call_exec",           0x0,    0x94 },
 277         { "br_ind_exec",                0x0,    0x8d },
 278 
 279         { "br_ind_missp_exec",          0x0,    0x8e },
 280         { "br_inst_decoded",            0x0,    0xe0 },
 281         { "br_inst_exec",               0x0,    0x88 },
 282 
 283         { "br_inst_retired",            0x0,    0xc4 },
 284         { "br_inst_retired_mispred",    0x0,    0xc5 },
 285         { "br_missp_exec",              0x0,    0x89 },
 286 
 287         { "br_ret_bac_missp_exec",      0x0,    0x91 },
 288         { "br_ret_exec",                0x0,    0x8f },
 289         { "br_ret_missp_exec",          0x0,    0x90 },
 290 
 291         { "br_tkn_bubble_1",            0x0,    0x97 },
 292         { "br_tkn_bubble_2",            0x0,    0x98 },
 293         { "bus_bnr_drv",                ALL_AGENTS,     0x61 },
 294 
 295         { "bus_data_rcv",               ALL_CORES,      0x64 },
 296         { "bus_drdy_clocks",            ALL_AGENTS,     0x62 },
 297         { "bus_hit_drv",                ALL_AGENTS,     0x7a },
 298 
 299         { "bus_hitm_drv",               ALL_AGENTS,     0x7b },
 300         { "bus_io_wait",                ALL_CORES,      0x7f },
 301         { "bus_lock_clocks",            ALL_CORES | ALL_AGENTS, 0x63 },
 302 
 303         { "bus_request_outstanding",    ALL_CORES | ALL_AGENTS, 0x60 },
 304         { "bus_trans_any",              ALL_CORES | ALL_AGENTS, 0x70 },
 305         { "bus_trans_brd",              ALL_CORES | ALL_AGENTS, 0x65 },
 306 
 307         { "bus_trans_burst",            ALL_CORES | ALL_AGENTS, 0x6e },
 308         { "bus_trans_def",              ALL_CORES | ALL_AGENTS, 0x6d },
 309         { "bus_trans_ifetch",           ALL_CORES | ALL_AGENTS, 0x68 },
 310 
 311         { "bus_trans_inval",            ALL_CORES | ALL_AGENTS, 0x69 },
 312         { "bus_trans_io",               ALL_CORES | ALL_AGENTS, 0x6c },
 313         { "bus_trans_mem",              ALL_CORES | ALL_AGENTS, 0x6f },
 314 
 315         { "bus_trans_p",                ALL_CORES | ALL_AGENTS, 0x6b },
 316         { "bus_trans_pwr",              ALL_CORES | ALL_AGENTS, 0x6a },
 317         { "bus_trans_rfo",              ALL_CORES | ALL_AGENTS, 0x66 },
 318 
 319         { "bus_trans_wb",               ALL_CORES | ALL_AGENTS, 0x67 },
 320         { "busq_empty",                 ALL_CORES,      0x7d },
 321         { "cmp_snoop",                  ALL_CORES,      0x78 },
 322 
 323         { "cpu_clk_unhalted",           0x0,    0x3c },
 324         { "cycles_int",                 0x0,    0xc6 },
 325         { "cycles_l1i_mem_stalled",     0x0,    0x86 },
 326 
 327         { "dtlb_misses",                0x0,    0x08 },
 328         { "eist_trans",                 0x0,    0x3a },
 329         { "esp",                        0x0,    0xab },
 330 
 331         { "ext_snoop",                  ALL_AGENTS,     0x77 },
 332         { "fp_mmx_trans",               0x0,    0xcc },
 333         { "hw_int_rcv",                 0x0,    0xc8 },
 334 
 335         { "ild_stall",                  0x0,    0x87 },
 336         { "inst_queue",                 0x0,    0x83 },
 337         { "inst_retired",               0x0,    0xc0 },
 338 
 339         { "itlb",                       0x0,    0x82 },
 340         { "itlb_miss_retired",          0x0,    0xc9 },
 341         { "l1d_all_ref",                0x0,    0x43 },
 342 
 343         { "l1d_cache_ld",               0x0,    0x40 },
 344         { "l1d_cache_lock",             0x0,    0x42 },
 345         { "l1d_cache_st",               0x0,    0x41 },
 346 
 347         { "l1d_m_evict",                0x0,    0x47 },
 348         { "l1d_m_repl",                 0x0,    0x46 },
 349         { "l1d_pend_miss",              0x0,    0x48 },
 350 
 351         { "l1d_prefetch",               0x0,    0x4e },
 352         { "l1d_repl",                   0x0,    0x45 },
 353         { "l1d_split",                  0x0,    0x49 },
 354 
 355         { "l1i_misses",                 0x0,    0x81 },
 356         { "l1i_reads",                  0x0,    0x80 },
 357         { "l2_ads",                     ALL_CORES,      0x21 },
 358 
 359         { "l2_dbus_busy_rd",            ALL_CORES,      0x23 },
 360         { "l2_ifetch",                  ALL_CORES,      0x28 },
 361         { "l2_ld",                      ALL_CORES,      0x29 },
 362 
 363         { "l2_lines_in",                ALL_CORES,      0x24 },
 364         { "l2_lines_out",               ALL_CORES,      0x26 },
 365         { "l2_lock",                    ALL_CORES,      0x2b },
 366 
 367         { "l2_m_lines_in",              ALL_CORES,      0x25 },
 368         { "l2_m_lines_out",             ALL_CORES,      0x27 },
 369         { "l2_no_req",                  ALL_CORES,      0x32 },
 370 
 371         { "l2_reject_busq",             ALL_CORES,      0x30 },
 372         { "l2_rqsts",                   ALL_CORES,      0x2e },
 373         { "l2_st",                      ALL_CORES,      0x2a },
 374 
 375         { "load_block",                 0x0,    0x03 },
 376         { "load_hit_pre",               0x0,    0x4c },
 377         { "machine_nukes",              0x0,    0xc3 },
 378 
 379         { "macro_insts",                0x0,    0xaa },
 380         { "memory_disambiguation",      0x0,    0x09 },
 381         { "misalign_mem_ref",           0x0,    0x05 },
 382         { "page_walks",                 0x0,    0x0c },
 383 
 384         { "pref_rqsts_dn",              0x0,    0xf8 },
 385         { "pref_rqsts_up",              0x0,    0xf0 },
 386         { "rat_stalls",                 0x0,    0xd2 },
 387 
 388         { "resource_stalls",            0x0,    0xdc },
 389         { "rs_uops_dispatched",         0x0,    0xa0 },
 390         { "seg_reg_renames",            0x0,    0xd5 },
 391 
 392         { "seg_rename_stalls",          0x0,    0xd4 },
 393         { "segment_reg_loads",          0x0,    0x06 },
 394         { "simd_assist",                0x0,    0xcd },
 395 
 396         { "simd_comp_inst_retired",     0x0,    0xca },
 397         { "simd_inst_retired",          0x0,    0xc7 },
 398         { "simd_instr_retired",         0x0,    0xce },
 399 
 400         { "simd_sat_instr_retired",     0x0,    0xcf },
 401         { "simd_sat_uop_exec",          0x0,    0xb1 },
 402         { "simd_uop_type_exec",         0x0,    0xb3 },
 403 
 404         { "simd_uops_exec",             0x0,    0xb0 },
 405         { "snoop_stall_drv",            ALL_CORES | ALL_AGENTS, 0x7e },
 406         { "sse_pre_exec",               0x0,    0x07 },
 407 
 408         { "sse_pre_miss",               0x0,    0x4b },
 409         { "store_block",                0x0,    0x04 },
 410         { "thermal_trip",               0x0,    0x3b },
 411 
 412         { "uops_retired",               0x0,    0xc2 },
 413         { "x87_ops_retired",            0x0,    0xc1 },
 414         { "",                           0x0,    NT_END }
 415 };
 416 
 417 /*
 418  * If any of the pic specific events require privileges, make sure to add a
 419  * check in configure_gpc() to find whether an event hard-coded as a number by
 420  * the user has any privilege requirements
 421  */
 422 static const struct nametable_core_uarch pic0_events[] = {
 423         /* Alphabetical order of event name */
 424 
 425         { "cycles_div_busy",            0x0,    0x14 },
 426         { "fp_comp_ops_exe",            0x0,    0x10 },
 427         { "idle_during_div",            0x0,    0x18 },
 428 
 429         { "mem_load_retired",           0x0,    0xcb },
 430         { "rs_uops_dispatched_port",    0x0,    0xa1 },
 431         { "",                           0x0,    NT_END }
 432 };
 433 
 434 static const struct nametable_core_uarch pic1_events[] = {
 435         /* Alphabetical order of event name */
 436 
 437         { "delayed_bypass",     0x0,    0x19 },
 438         { "div",                0x0,    0x13 },
 439         { "fp_assist",          0x0,    0x11 },
 440 
 441         { "mul",                0x0,    0x12 },
 442         { "",                   0x0,    NT_END }
 443 };
 444 
 445 /* FFC entries must be in order */
 446 static char *ffc_names_non_htt[] = {
 447         "instr_retired.any",
 448         "cpu_clk_unhalted.core",
 449         "cpu_clk_unhalted.ref",
 450         NULL
 451 };
 452 
 453 static char *ffc_names_htt[] = {
 454         "instr_retired.any",
 455         "cpu_clk_unhalted.thread",
 456         "cpu_clk_unhalted.ref",
 457         NULL
 458 };
 459 
 460 static char *ffc_genericnames[] = {
 461         "PAPI_tot_ins",
 462         "PAPI_tot_cyc",
 463         "",
 464         NULL
 465 };
 466 
 467 static char     **ffc_names = NULL;
 468 static char     **ffc_allnames = NULL;
 469 static char     **gpc_names = NULL;
 470 static uint32_t versionid;
 471 static uint64_t num_gpc;
 472 static uint64_t width_gpc;
 473 static uint64_t mask_gpc;
 474 static uint64_t num_ffc;
 475 static uint64_t width_ffc;
 476 static uint64_t mask_ffc;
 477 static uint_t   total_pmc;
 478 static uint64_t control_ffc;
 479 static uint64_t control_gpc;
 480 static uint64_t control_mask;
 481 static uint32_t arch_events_vector;
 482 
 483 #define IMPL_NAME_LEN 100
 484 static char core_impl_name[IMPL_NAME_LEN];
 485 
 486 static const char *core_cpuref =
 487         "See https://download.01.org/perfmon/index/ or Chapers 18 and 19 " \
 488         "of the \"Intel 64 and IA-32 Architectures Software Developer's " \
 489         "Manual Volume 3: System Programming Guide\" Order Number: " \
 490         "325384-062US, March 2017.";
 491 
 492 
 493 /* Architectural events */
 494 #define ARCH_EVENTS_COMMON                                      \
 495         { 0xc0, 0x00, C_ALL, "inst_retired.any_p" },            \
 496         { 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" },        \
 497         { 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" },   \
 498         { 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" },        \
 499         { 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" },  \
 500         { 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" }
 501 
 502 static const struct events_table_t arch_events_table_non_htt[] = {
 503         { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.core" },
 504         ARCH_EVENTS_COMMON
 505 };
 506 
 507 static const struct events_table_t arch_events_table_htt[] = {
 508         { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" },
 509         ARCH_EVENTS_COMMON
 510 };
 511 
 512 static char *arch_genevents_table[] = {
 513         "PAPI_tot_cyc", /* cpu_clk_unhalted.thread_p/core */
 514         "PAPI_tot_ins", /* inst_retired.any_p             */
 515         "",             /* cpu_clk_unhalted.ref_p         */
 516         "",             /* longest_lat_cache.reference    */
 517         "",             /* longest_lat_cache.miss         */
 518         "",             /* br_inst_retired.all_branches   */
 519         "",             /* br_misp_retired.all_branches   */
 520 };
 521 
 522 static const struct events_table_t *arch_events_table = NULL;
 523 static uint64_t known_arch_events;
 524 static uint64_t known_ffc_num;
 525 static const struct events_table_t *events_table = NULL;
 526 
 527 /*
 528  * Initialize string containing list of supported general-purpose counter
 529  * events for processors of Penryn and Merom Family
 530  */
 531 static void
 532 pcbe_init_core_uarch()
 533 {
 534         const struct nametable_core_uarch       *n;
 535         const struct generic_events             *k;
 536         const struct nametable_core_uarch       *picspecific_events;
 537         const struct generic_events             *picspecific_genericevents;
 538         size_t                  common_size;
 539         size_t                  size;
 540         uint64_t                i;
 541 
 542         gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
 543 
 544         /* Calculate space needed to save all the common event names */
 545         common_size = 0;
 546         for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END; n++) {
 547                 common_size += strlen(n->name) + 1;
 548         }
 549 
 550         for (k = cmn_generic_events; k->event_num != NT_END; k++) {
 551                 common_size += strlen(k->name) + 1;
 552         }
 553 
 554         for (i = 0; i < num_gpc; i++) {
 555                 size = 0;
 556                 picspecific_genericevents = NULL;
 557 
 558                 switch (i) {
 559                         case 0:
 560                                 picspecific_events = pic0_events;
 561                                 picspecific_genericevents = generic_events_pic0;
 562                                 break;
 563                         case 1:
 564                                 picspecific_events = pic1_events;
 565                                 break;
 566                         default:
 567                                 picspecific_events = NULL;
 568                                 break;
 569                 }
 570                 if (picspecific_events != NULL) {
 571                         for (n = picspecific_events;
 572                             n->event_num != NT_END;
 573                             n++) {
 574                                 size += strlen(n->name) + 1;
 575                         }
 576                 }
 577                 if (picspecific_genericevents != NULL) {
 578                         for (k = picspecific_genericevents;
 579                             k->event_num != NT_END; k++) {
 580                                 size += strlen(k->name) + 1;
 581                         }
 582                 }
 583 
 584                 gpc_names[i] =
 585                     kmem_alloc(size + common_size + 1, KM_SLEEP);
 586 
 587                 gpc_names[i][0] = '\0';
 588                 if (picspecific_events != NULL) {
 589                         for (n = picspecific_events;
 590                             n->event_num != NT_END; n++) {
 591                                 (void) strcat(gpc_names[i], n->name);
 592                                 (void) strcat(gpc_names[i], ",");
 593                         }
 594                 }
 595                 if (picspecific_genericevents != NULL) {
 596                         for (k = picspecific_genericevents;
 597                             k->event_num != NT_END; k++) {
 598                                 (void) strcat(gpc_names[i], k->name);
 599                                 (void) strcat(gpc_names[i], ",");
 600                         }
 601                 }
 602                 for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END;
 603                     n++) {
 604                         (void) strcat(gpc_names[i], n->name);
 605                         (void) strcat(gpc_names[i], ",");
 606                 }
 607                 for (k = cmn_generic_events; k->event_num != NT_END; k++) {
 608                         (void) strcat(gpc_names[i], k->name);
 609                         (void) strcat(gpc_names[i], ",");
 610                 }
 611 
 612                 /*
 613                  * Remove trailing comma.
 614                  */
 615                 gpc_names[i][common_size + size - 1] = '\0';
 616         }
 617 }
 618 
 619 static int
 620 core_pcbe_init(void)
 621 {
 622         struct cpuid_regs       cp;
 623         size_t                  size;
 624         uint64_t                i;
 625         uint64_t                j;
 626         uint64_t                arch_events_vector_length;
 627         size_t                  arch_events_string_length;
 628         uint_t                  model;
 629 
 630         if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
 631                 return (-1);
 632 
 633         /* Obtain Basic CPUID information */
 634         cp.cp_eax = 0x0;
 635         (void) __cpuid_insn(&cp);
 636 
 637         /* No Architectural Performance Monitoring Leaf returned by CPUID */
 638         if (cp.cp_eax < 0xa) {
 639                 return (-1);
 640         }
 641 
 642         /* Obtain the Architectural Performance Monitoring Leaf */
 643         cp.cp_eax = 0xa;
 644         (void) __cpuid_insn(&cp);
 645 
 646         versionid = cp.cp_eax & 0xFF;
 647 
 648         /*
 649          * Fixed-Function Counters (FFC)
 650          *
 651          * All Family 6 Model 15 and Model 23 processors have fixed-function
 652          * counters.  These counters were made Architectural with
 653          * Family 6 Model 15 Stepping 9.
 654          */
 655         switch (versionid) {
 656 
 657                 case 0:
 658                         return (-1);
 659 
 660                 case 2:
 661                         num_ffc = cp.cp_edx & 0x1F;
 662                         width_ffc = (cp.cp_edx >> 5) & 0xFF;
 663 
 664                         /*
 665                          * Some processors have an errata (AW34) where
 666                          * versionid is reported as 2 when actually 1.
 667                          * In this case, fixed-function counters are
 668                          * model-specific as in Version 1.
 669                          */
 670                         if (num_ffc != 0) {
 671                                 break;
 672                         }
 673                         /* FALLTHROUGH */
 674                 case 1:
 675                         num_ffc = 3;
 676                         width_ffc = 40;
 677                         versionid = 1;
 678                         break;
 679 
 680                 default:
 681                         num_ffc = cp.cp_edx & 0x1F;
 682                         width_ffc = (cp.cp_edx >> 5) & 0xFF;
 683                         break;
 684         }
 685 
 686 
 687         if (num_ffc >= 64)
 688                 return (-1);
 689 
 690         /* Set HTT-specific names of architectural & FFC events */
 691         if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
 692                 ffc_names = ffc_names_htt;
 693                 arch_events_table = arch_events_table_htt;
 694                 known_arch_events =
 695                     sizeof (arch_events_table_htt) /
 696                     sizeof (struct events_table_t);
 697                 known_ffc_num =
 698                     sizeof (ffc_names_htt) / sizeof (char *);
 699         } else {
 700                 ffc_names = ffc_names_non_htt;
 701                 arch_events_table = arch_events_table_non_htt;
 702                 known_arch_events =
 703                     sizeof (arch_events_table_non_htt) /
 704                     sizeof (struct events_table_t);
 705                 known_ffc_num =
 706                     sizeof (ffc_names_non_htt) / sizeof (char *);
 707         }
 708 
 709         if (num_ffc >= known_ffc_num) {
 710                 /*
 711                  * The system seems to have more fixed-function counters than
 712                  * what this PCBE is able to handle correctly.  Default to the
 713                  * maximum number of fixed-function counters that this driver
 714                  * is aware of.
 715                  */
 716                 num_ffc = known_ffc_num - 1;
 717         }
 718 
 719         mask_ffc = BITMASK_XBITS(width_ffc);
 720         control_ffc = BITMASK_XBITS(num_ffc);
 721 
 722         /*
 723          * General Purpose Counters (GPC)
 724          */
 725         num_gpc = (cp.cp_eax >> 8) & 0xFF;
 726         width_gpc = (cp.cp_eax >> 16) & 0xFF;
 727 
 728         if (num_gpc >= 64)
 729                 return (-1);
 730 
 731         mask_gpc = BITMASK_XBITS(width_gpc);
 732 
 733         control_gpc = BITMASK_XBITS(num_gpc);
 734 
 735         control_mask = (control_ffc << 32) | control_gpc;
 736 
 737         total_pmc = num_gpc + num_ffc;
 738         if (total_pmc > 64) {
 739                 /* Too wide for the overflow bitmap */
 740                 return (-1);
 741         }
 742 
 743         /* FFC names */
 744         ffc_allnames = kmem_alloc(num_ffc * sizeof (char *), KM_SLEEP);
 745         for (i = 0; i < num_ffc; i++) {
 746                 ffc_allnames[i] = kmem_alloc(
 747                     strlen(ffc_names[i]) + strlen(ffc_genericnames[i]) + 2,
 748                     KM_SLEEP);
 749 
 750                 ffc_allnames[i][0] = '\0';
 751                 (void) strcat(ffc_allnames[i], ffc_names[i]);
 752 
 753                 /* Check if this ffc has a generic name */
 754                 if (strcmp(ffc_genericnames[i], "") != 0) {
 755                         (void) strcat(ffc_allnames[i], ",");
 756                         (void) strcat(ffc_allnames[i], ffc_genericnames[i]);
 757                 }
 758         }
 759 
 760         /* GPC events for Family 6 Models 15, 23 and 29 only */
 761         if ((cpuid_getfamily(CPU) == 6) &&
 762             ((cpuid_getmodel(CPU) == 15) || (cpuid_getmodel(CPU) == 23) ||
 763             (cpuid_getmodel(CPU) == 29))) {
 764                 (void) snprintf(core_impl_name, IMPL_NAME_LEN,
 765                     "Core Microarchitecture");
 766                 pcbe_init_core_uarch();
 767                 return (0);
 768         }
 769 
 770         (void) snprintf(core_impl_name, IMPL_NAME_LEN,
 771             "Intel Arch PerfMon v%d on Family %d Model %d",
 772             versionid, cpuid_getfamily(CPU), cpuid_getmodel(CPU));
 773 
 774         /*
 775          * Architectural events
 776          */
 777         arch_events_vector_length = (cp.cp_eax >> 24) & 0xFF;
 778 
 779         ASSERT(known_arch_events == arch_events_vector_length);
 780 
 781         /*
 782          * To handle the case where a new performance monitoring setup is run
 783          * on a non-debug kernel
 784          */
 785         if (known_arch_events > arch_events_vector_length) {
 786                 known_arch_events = arch_events_vector_length;
 787         } else {
 788                 arch_events_vector_length = known_arch_events;
 789         }
 790 
 791         arch_events_vector = cp.cp_ebx &
 792             BITMASK_XBITS(arch_events_vector_length);
 793 
 794         /*
 795          * Process architectural and non-architectural events using GPC
 796          */
 797         if (num_gpc > 0) {
 798 
 799                 gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
 800 
 801                 /* Calculate space required for the architectural gpc events */
 802                 arch_events_string_length = 0;
 803                 for (i = 0; i < known_arch_events; i++) {
 804                         if (((1U << i) & arch_events_vector) == 0) {
 805                                 arch_events_string_length +=
 806                                     strlen(arch_events_table[i].name) + 1;
 807                                 if (strcmp(arch_genevents_table[i], "") != 0) {
 808                                         arch_events_string_length +=
 809                                             strlen(arch_genevents_table[i]) + 1;
 810                                 }
 811                         }
 812                 }
 813 
 814                 /* Non-architectural events list */
 815                 model = cpuid_getmodel(CPU);
 816                 events_table = core_cpcgen_table(model);
 817 
 818                 for (i = 0; i < num_gpc; i++) {
 819 
 820                         /*
 821                          * Determine length of all supported event names
 822                          * (architectural + non-architectural)
 823                          */
 824                         size = arch_events_string_length;
 825                         for (j = 0; events_table != NULL &&
 826                             events_table[j].eventselect != NT_END;
 827                             j++) {
 828                                 if (C(i) & events_table[j].supported_counters) {
 829                                         size += strlen(events_table[j].name) +
 830                                             1;
 831                                 }
 832                         }
 833 
 834                         /* Allocate memory for this pics list */
 835                         gpc_names[i] = kmem_alloc(size + 1, KM_SLEEP);
 836                         gpc_names[i][0] = '\0';
 837                         if (size == 0) {
 838                                 continue;
 839                         }
 840 
 841                         /*
 842                          * Create the list of all supported events
 843                          * (architectural + non-architectural)
 844                          */
 845                         for (j = 0; j < known_arch_events; j++) {
 846                                 if (((1U << j) & arch_events_vector) == 0) {
 847                                         (void) strcat(gpc_names[i],
 848                                             arch_events_table[j].name);
 849                                         (void) strcat(gpc_names[i], ",");
 850                                         if (strcmp(
 851                                             arch_genevents_table[j], "")
 852                                             != 0) {
 853                                                 (void) strcat(gpc_names[i],
 854                                                     arch_genevents_table[j]);
 855                                                 (void) strcat(gpc_names[i],
 856                                                     ",");
 857                                         }
 858                                 }
 859                         }
 860 
 861                         for (j = 0; events_table != NULL &&
 862                             events_table[j].eventselect != NT_END;
 863                             j++) {
 864                                 if (C(i) & events_table[j].supported_counters) {
 865                                         (void) strcat(gpc_names[i],
 866                                             events_table[j].name);
 867                                         (void) strcat(gpc_names[i], ",");
 868                                 }
 869                         }
 870 
 871                         /* Remove trailing comma */
 872                         gpc_names[i][size - 1] = '\0';
 873                 }
 874         }
 875 
 876         return (0);
 877 }
 878 
 879 static uint_t core_pcbe_ncounters()
 880 {
 881         return (total_pmc);
 882 }
 883 
 884 static const char *core_pcbe_impl_name(void)
 885 {
 886         return (core_impl_name);
 887 }
 888 
 889 static const char *core_pcbe_cpuref(void)
 890 {
 891         return (core_cpuref);
 892 }
 893 
 894 static char *core_pcbe_list_events(uint_t picnum)
 895 {
 896         ASSERT(picnum < cpc_ncounters);
 897 
 898         if (picnum < num_gpc) {
 899                 return (gpc_names[picnum]);
 900         } else {
 901                 return (ffc_allnames[picnum - num_gpc]);
 902         }
 903 }
 904 
 905 static char *core_pcbe_list_attrs(void)
 906 {
 907         if (versionid >= 3) {
 908                 return ("edge,inv,umask,cmask,anythr");
 909         } else {
 910                 return ("edge,pc,inv,umask,cmask");
 911         }
 912 }
 913 
 914 static const struct nametable_core_uarch *
 915 find_gpcevent_core_uarch(char *name,
 916     const struct nametable_core_uarch *nametable)
 917 {
 918         const struct nametable_core_uarch *n;
 919         int compare_result = -1;
 920 
 921         for (n = nametable; n->event_num != NT_END; n++) {
 922                 compare_result = strcmp(name, n->name);
 923                 if (compare_result <= 0) {
 924                         break;
 925                 }
 926         }
 927 
 928         if (compare_result == 0) {
 929                 return (n);
 930         }
 931 
 932         return (NULL);
 933 }
 934 
 935 static const struct generic_events *
 936 find_generic_events(char *name, const struct generic_events *table)
 937 {
 938         const struct generic_events *n;
 939 
 940         for (n = table; n->event_num != NT_END; n++) {
 941                 if (strcmp(name, n->name) == 0) {
 942                         return (n);
 943                 };
 944         }
 945 
 946         return (NULL);
 947 }
 948 
 949 static const struct events_table_t *
 950 find_gpcevent(char *name)
 951 {
 952         int i;
 953 
 954         /* Search architectural events */
 955         for (i = 0; i < known_arch_events; i++) {
 956                 if (strcmp(name, arch_events_table[i].name) == 0 ||
 957                     strcmp(name, arch_genevents_table[i]) == 0) {
 958                         if (((1U << i) & arch_events_vector) == 0) {
 959                                 return (&arch_events_table[i]);
 960                         }
 961                 }
 962         }
 963 
 964         /* Search non-architectural events */
 965         if (events_table != NULL) {
 966                 for (i = 0; events_table[i].eventselect != NT_END; i++) {
 967                         if (strcmp(name, events_table[i].name) == 0) {
 968                                 return (&events_table[i]);
 969                         }
 970                 }
 971         }
 972 
 973         return (NULL);
 974 }
 975 
 976 static uint64_t
 977 core_pcbe_event_coverage(char *event)
 978 {
 979         uint64_t bitmap;
 980         uint64_t bitmask;
 981         const struct events_table_t *n;
 982         int i;
 983 
 984         bitmap = 0;
 985 
 986         /* Is it an event that a GPC can track? */
 987         if (versionid >= 3) {
 988                 n = find_gpcevent(event);
 989                 if (n != NULL) {
 990                         bitmap |= (n->supported_counters &
 991                             BITMASK_XBITS(num_gpc));
 992                 }
 993         } else {
 994                 if (find_generic_events(event, cmn_generic_events) != NULL) {
 995                         bitmap |= BITMASK_XBITS(num_gpc);
 996                 } else if (find_generic_events(event,
 997                     generic_events_pic0) != NULL) {
 998                         bitmap |= 1ULL;
 999                 } else if (find_gpcevent_core_uarch(event,
1000                     cmn_gpc_events_core_uarch) != NULL) {
1001                         bitmap |= BITMASK_XBITS(num_gpc);
1002                 } else if (find_gpcevent_core_uarch(event, pic0_events) !=
1003                     NULL) {
1004                         bitmap |= 1ULL;
1005                 } else if (find_gpcevent_core_uarch(event, pic1_events) !=
1006                     NULL) {
1007                         bitmap |= 1ULL << 1;
1008                 }
1009         }
1010 
1011         /* Check if the event can be counted in the fixed-function counters */
1012         if (num_ffc > 0) {
1013                 bitmask = 1ULL << num_gpc;
1014                 for (i = 0; i < num_ffc; i++) {
1015                         if (strcmp(event, ffc_names[i]) == 0) {
1016                                 bitmap |= bitmask;
1017                         } else if (strcmp(event, ffc_genericnames[i]) == 0) {
1018                                 bitmap |= bitmask;
1019                         }
1020                         bitmask = bitmask << 1;
1021                 }
1022         }
1023 
1024         return (bitmap);
1025 }
1026 
1027 static uint64_t
1028 core_pcbe_overflow_bitmap(void)
1029 {
1030         uint64_t interrupt_status;
1031         uint64_t intrbits_ffc;
1032         uint64_t intrbits_gpc;
1033         extern int kcpc_hw_overflow_intr_installed;
1034         uint64_t overflow_bitmap;
1035 
1036         RDMSR(PERF_GLOBAL_STATUS, interrupt_status);
1037         WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status);
1038 
1039         interrupt_status = interrupt_status & control_mask;
1040         intrbits_ffc = (interrupt_status >> 32) & control_ffc;
1041         intrbits_gpc = interrupt_status & control_gpc;
1042         overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc;
1043 
1044         ASSERT(kcpc_hw_overflow_intr_installed);
1045         (*kcpc_hw_enable_cpc_intr)();
1046 
1047         return (overflow_bitmap);
1048 }
1049 
1050 static int
1051 check_cpc_securitypolicy(core_pcbe_config_t *conf,
1052     const struct nametable_core_uarch *n)
1053 {
1054         if (conf->core_ctl & n->restricted_bits) {
1055                 if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1056                         return (CPC_ATTR_REQUIRES_PRIVILEGE);
1057                 }
1058         }
1059         return (0);
1060 }
1061 
1062 static int
1063 configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1064     uint_t nattrs, kcpc_attr_t *attrs, void **data)
1065 {
1066         core_pcbe_config_t      conf;
1067         const struct nametable_core_uarch       *n;
1068         const struct generic_events *k = NULL;
1069         const struct nametable_core_uarch       *m;
1070         const struct nametable_core_uarch       *picspecific_events;
1071         struct nametable_core_uarch     nt_raw = { "", 0x0, 0x0 };
1072         uint_t                  i;
1073         long                    event_num;
1074         const struct events_table_t *eventcode;
1075 
1076         if (((preset & BITS_EXTENDED_FROM_31) != 0) &&
1077             ((preset & BITS_EXTENDED_FROM_31) !=
1078             BITS_EXTENDED_FROM_31)) {
1079 
1080                 /*
1081                  * Bits beyond bit-31 in the general-purpose counters can only
1082                  * be written to by extension of bit 31.  We cannot preset
1083                  * these bits to any value other than all 1s or all 0s.
1084                  */
1085                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1086         }
1087 
1088         if (versionid >= 3) {
1089                 eventcode = find_gpcevent(event);
1090                 if (eventcode != NULL) {
1091                         if ((C(picnum) & eventcode->supported_counters) == 0) {
1092                                 return (CPC_PIC_NOT_CAPABLE);
1093                         }
1094                         if (nattrs > 0 &&
1095                             (strncmp("PAPI_", event, 5) == 0)) {
1096                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1097                         }
1098                         conf.core_ctl = eventcode->eventselect;
1099                         conf.core_ctl |= eventcode->unitmask <<
1100                             CORE_UMASK_SHIFT;
1101                 } else {
1102                         /* Event specified as raw event code */
1103                         if (ddi_strtol(event, NULL, 0, &event_num) != 0) {
1104                                 return (CPC_INVALID_EVENT);
1105                         }
1106                         conf.core_ctl = event_num & 0xFF;
1107                 }
1108         } else {
1109                 if ((k = find_generic_events(event, cmn_generic_events)) !=
1110                     NULL ||
1111                     (picnum == 0 &&
1112                     (k = find_generic_events(event, generic_events_pic0)) !=
1113                     NULL)) {
1114                         if (nattrs > 0) {
1115                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1116                         }
1117                         conf.core_ctl = k->event_num;
1118                         conf.core_ctl |= k->umask << CORE_UMASK_SHIFT;
1119                 } else {
1120                         /* Not a generic event */
1121 
1122                         n = find_gpcevent_core_uarch(event,
1123                             cmn_gpc_events_core_uarch);
1124                         if (n == NULL) {
1125                                 switch (picnum) {
1126                                         case 0:
1127                                                 picspecific_events =
1128                                                     pic0_events;
1129                                                 break;
1130                                         case 1:
1131                                                 picspecific_events =
1132                                                     pic1_events;
1133                                                 break;
1134                                         default:
1135                                                 picspecific_events = NULL;
1136                                                 break;
1137                                 }
1138                                 if (picspecific_events != NULL) {
1139                                         n = find_gpcevent_core_uarch(event,
1140                                             picspecific_events);
1141                                 }
1142                         }
1143                         if (n == NULL) {
1144 
1145                                 /*
1146                                  * Check if this is a case where the event was
1147                                  * specified directly by its event number
1148                                  * instead of its name string.
1149                                  */
1150                                 if (ddi_strtol(event, NULL, 0, &event_num) !=
1151                                     0) {
1152                                         return (CPC_INVALID_EVENT);
1153                                 }
1154 
1155                                 event_num = event_num & 0xFF;
1156 
1157                                 /*
1158                                  * Search the event table to find out if the
1159                                  * event specified has an privilege
1160                                  * requirements.  Currently none of the
1161                                  * pic-specific counters have any privilege
1162                                  * requirements.  Hence only the table
1163                                  * cmn_gpc_events_core_uarch is searched.
1164                                  */
1165                                 for (m = cmn_gpc_events_core_uarch;
1166                                     m->event_num != NT_END;
1167                                     m++) {
1168                                         if (event_num == m->event_num) {
1169                                                 break;
1170                                         }
1171                                 }
1172                                 if (m->event_num == NT_END) {
1173                                         nt_raw.event_num = (uint8_t)event_num;
1174                                         n = &nt_raw;
1175                                 } else {
1176                                         n = m;
1177                                 }
1178                         }
1179                         conf.core_ctl = n->event_num; /* Event Select */
1180                 }
1181         }
1182 
1183 
1184         conf.core_picno = picnum;
1185         conf.core_pictype = CORE_GPC;
1186         conf.core_rawpic = preset & mask_gpc;
1187 
1188         conf.core_pes = GPC_BASE_PES + picnum;
1189         conf.core_pmc = GPC_BASE_PMC + picnum;
1190 
1191         for (i = 0; i < nattrs; i++) {
1192                 if (strncmp(attrs[i].ka_name, "umask", 6) == 0) {
1193                         if ((attrs[i].ka_val | CORE_UMASK_MASK) !=
1194                             CORE_UMASK_MASK) {
1195                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1196                         }
1197                         /* Clear out the default umask */
1198                         conf.core_ctl &= ~ (CORE_UMASK_MASK <<
1199                             CORE_UMASK_SHIFT);
1200                         /* Use the user provided umask */
1201                         conf.core_ctl |= attrs[i].ka_val <<
1202                             CORE_UMASK_SHIFT;
1203                 } else  if (strncmp(attrs[i].ka_name, "edge", 6) == 0) {
1204                         if (attrs[i].ka_val != 0)
1205                                 conf.core_ctl |= CORE_EDGE;
1206                 } else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) {
1207                         if (attrs[i].ka_val != 0)
1208                                 conf.core_ctl |= CORE_INV;
1209                 } else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) {
1210                         if ((attrs[i].ka_val | CORE_CMASK_MASK) !=
1211                             CORE_CMASK_MASK) {
1212                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1213                         }
1214                         conf.core_ctl |= attrs[i].ka_val <<
1215                             CORE_CMASK_SHIFT;
1216                 } else if (strncmp(attrs[i].ka_name, "anythr", 7) ==
1217                     0) {
1218                         if (versionid < 3)
1219                                 return (CPC_INVALID_ATTRIBUTE);
1220                         if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1221                                 return (CPC_ATTR_REQUIRES_PRIVILEGE);
1222                         }
1223                         if (attrs[i].ka_val != 0)
1224                                 conf.core_ctl |= CORE_ANYTHR;
1225                 } else {
1226                         return (CPC_INVALID_ATTRIBUTE);
1227                 }
1228         }
1229 
1230         if (flags & CPC_COUNT_USER)
1231                 conf.core_ctl |= CORE_USR;
1232         if (flags & CPC_COUNT_SYSTEM)
1233                 conf.core_ctl |= CORE_OS;
1234         if (flags & CPC_OVF_NOTIFY_EMT)
1235                 conf.core_ctl |= CORE_INT;
1236         conf.core_ctl |= CORE_EN;
1237 
1238         if (versionid < 3 && k == NULL) {
1239                 if (check_cpc_securitypolicy(&conf, n) != 0) {
1240                         return (CPC_ATTR_REQUIRES_PRIVILEGE);
1241                 }
1242         }
1243 
1244         *data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1245         *((core_pcbe_config_t *)*data) = conf;
1246 
1247         return (0);
1248 }
1249 
1250 static int
1251 configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1252     uint_t nattrs, kcpc_attr_t *attrs, void **data)
1253 {
1254         core_pcbe_config_t      *conf;
1255         uint_t                  i;
1256 
1257         if (picnum - num_gpc >= num_ffc) {
1258                 return (CPC_INVALID_PICNUM);
1259         }
1260 
1261         if ((strcmp(ffc_names[picnum-num_gpc], event) != 0) &&
1262             (strcmp(ffc_genericnames[picnum-num_gpc], event) != 0)) {
1263                 return (CPC_INVALID_EVENT);
1264         }
1265 
1266         if ((versionid < 3) && (nattrs != 0)) {
1267                 return (CPC_INVALID_ATTRIBUTE);
1268         }
1269 
1270         conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1271         conf->core_ctl = 0;
1272 
1273         for (i = 0; i < nattrs; i++) {
1274                 if (strncmp(attrs[i].ka_name, "anythr", 7) == 0) {
1275                         if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1276                                 kmem_free(conf, sizeof (core_pcbe_config_t));
1277                                 return (CPC_ATTR_REQUIRES_PRIVILEGE);
1278                         }
1279                         if (attrs[i].ka_val != 0) {
1280                                 conf->core_ctl |= CORE_FFC_ANYTHR;
1281                         }
1282                 } else {
1283                         kmem_free(conf, sizeof (core_pcbe_config_t));
1284                         return (CPC_INVALID_ATTRIBUTE);
1285                 }
1286         }
1287 
1288         conf->core_picno = picnum;
1289         conf->core_pictype = CORE_FFC;
1290         conf->core_rawpic = preset & mask_ffc;
1291         conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc);
1292 
1293         /* All fixed-function counters have the same control register */
1294         conf->core_pes = PERF_FIXED_CTR_CTRL;
1295 
1296         if (flags & CPC_COUNT_USER)
1297                 conf->core_ctl |= CORE_FFC_USR_EN;
1298         if (flags & CPC_COUNT_SYSTEM)
1299                 conf->core_ctl |= CORE_FFC_OS_EN;
1300         if (flags & CPC_OVF_NOTIFY_EMT)
1301                 conf->core_ctl |= CORE_FFC_PMI;
1302 
1303         *data = conf;
1304         return (0);
1305 }
1306 
1307 /*ARGSUSED*/
1308 static int
1309 core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
1310     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
1311     void *token)
1312 {
1313         int                     ret;
1314         core_pcbe_config_t      *conf;
1315 
1316         /*
1317          * If we've been handed an existing configuration, we need only preset
1318          * the counter value.
1319          */
1320         if (*data != NULL) {
1321                 conf = *data;
1322                 ASSERT(conf->core_pictype == CORE_GPC ||
1323                     conf->core_pictype == CORE_FFC);
1324                 if (conf->core_pictype == CORE_GPC)
1325                         conf->core_rawpic = preset & mask_gpc;
1326                 else /* CORE_FFC */
1327                         conf->core_rawpic = preset & mask_ffc;
1328                 return (0);
1329         }
1330 
1331         if (picnum >= total_pmc) {
1332                 return (CPC_INVALID_PICNUM);
1333         }
1334 
1335         if (picnum < num_gpc) {
1336                 ret = configure_gpc(picnum, event, preset, flags,
1337                     nattrs, attrs, data);
1338         } else {
1339                 ret = configure_ffc(picnum, event, preset, flags,
1340                     nattrs, attrs, data);
1341         }
1342         return (ret);
1343 }
1344 
1345 static void
1346 core_pcbe_program(void *token)
1347 {
1348         core_pcbe_config_t      *cfg;
1349         uint64_t                perf_global_ctrl;
1350         uint64_t                perf_fixed_ctr_ctrl;
1351         uint64_t                curcr4;
1352 
1353         core_pcbe_allstop();
1354 
1355         curcr4 = getcr4();
1356         if (kcpc_allow_nonpriv(token))
1357                 /* Allow RDPMC at any ring level */
1358                 setcr4(curcr4 | CR4_PCE);
1359         else
1360                 /* Allow RDPMC only at ring 0 */
1361                 setcr4(curcr4 & ~CR4_PCE);
1362 
1363         /* Clear any overflow indicators before programming the counters */
1364         WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask);
1365 
1366         cfg = NULL;
1367         perf_global_ctrl = 0;
1368         perf_fixed_ctr_ctrl = 0;
1369         cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL);
1370         while (cfg != NULL) {
1371                 ASSERT(cfg->core_pictype == CORE_GPC ||
1372                     cfg->core_pictype == CORE_FFC);
1373 
1374                 if (cfg->core_pictype == CORE_GPC) {
1375                         /*
1376                          * General-purpose counter registers have write
1377                          * restrictions where only the lower 32-bits can be
1378                          * written to.  The rest of the relevant bits are
1379                          * written to by extension from bit 31 (all ZEROS if
1380                          * bit-31 is ZERO and all ONE if bit-31 is ONE).  This
1381                          * makes it possible to write to the counter register
1382                          * only values that have all ONEs or all ZEROs in the
1383                          * higher bits.
1384                          */
1385                         if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) ||
1386                             ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) ==
1387                             BITS_EXTENDED_FROM_31)) {
1388                                 /*
1389                                  * Straighforward case where the higher bits
1390                                  * are all ZEROs or all ONEs.
1391                                  */
1392                                 WRMSR(cfg->core_pmc,
1393                                     (cfg->core_rawpic & mask_gpc));
1394                         } else {
1395                                 /*
1396                                  * The high order bits are not all the same.
1397                                  * We save what is currently in the registers
1398                                  * and do not write to it.  When we want to do
1399                                  * a read from this register later (in
1400                                  * core_pcbe_sample()), we subtract the value
1401                                  * we save here to get the actual event count.
1402                                  *
1403                                  * NOTE: As a result, we will not get overflow
1404                                  * interrupts as expected.
1405                                  */
1406                                 RDMSR(cfg->core_pmc, cfg->core_rawpic);
1407                                 cfg->core_rawpic = cfg->core_rawpic & mask_gpc;
1408                         }
1409                         WRMSR(cfg->core_pes, cfg->core_ctl);
1410                         perf_global_ctrl |= 1ull << cfg->core_picno;
1411                 } else {
1412                         /*
1413                          * Unlike the general-purpose counters, all relevant
1414                          * bits of fixed-function counters can be written to.
1415                          */
1416                         WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc);
1417 
1418                         /*
1419                          * Collect the control bits for all the
1420                          * fixed-function counters and write it at one shot
1421                          * later in this function
1422                          */
1423                         perf_fixed_ctr_ctrl |= cfg->core_ctl <<
1424                             ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE);
1425                         perf_global_ctrl |=
1426                             1ull << (cfg->core_picno - num_gpc + 32);
1427                 }
1428 
1429                 cfg = (core_pcbe_config_t *)
1430                     kcpc_next_config(token, cfg, NULL);
1431         }
1432 
1433         /* Enable all the counters */
1434         WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl);
1435         WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl);
1436 }
1437 
1438 static void
1439 core_pcbe_allstop(void)
1440 {
1441         /* Disable all the counters together */
1442         WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED);
1443 
1444         setcr4(getcr4() & ~CR4_PCE);
1445 }
1446 
1447 static void
1448 core_pcbe_sample(void *token)
1449 {
1450         uint64_t                *daddr;
1451         uint64_t                curpic;
1452         core_pcbe_config_t      *cfg;
1453         uint64_t                        counter_mask;
1454 
1455         cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr);
1456         while (cfg != NULL) {
1457                 ASSERT(cfg->core_pictype == CORE_GPC ||
1458                     cfg->core_pictype == CORE_FFC);
1459 
1460                 curpic = rdmsr(cfg->core_pmc);
1461 
1462                 DTRACE_PROBE4(core__pcbe__sample,
1463                     uint64_t, cfg->core_pmc,
1464                     uint64_t, curpic,
1465                     uint64_t, cfg->core_rawpic,
1466                     uint64_t, *daddr);
1467 
1468                 if (cfg->core_pictype == CORE_GPC) {
1469                         counter_mask = mask_gpc;
1470                 } else {
1471                         counter_mask = mask_ffc;
1472                 }
1473                 curpic = curpic & counter_mask;
1474                 if (curpic >= cfg->core_rawpic) {
1475                         *daddr += curpic - cfg->core_rawpic;
1476                 } else {
1477                         /* Counter overflowed since our last sample */
1478                         *daddr += counter_mask - (cfg->core_rawpic - curpic) +
1479                             1;
1480                 }
1481                 cfg->core_rawpic = *daddr & counter_mask;
1482 
1483                 cfg =
1484                     (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr);
1485         }
1486 }
1487 
1488 static void
1489 core_pcbe_free(void *config)
1490 {
1491         kmem_free(config, sizeof (core_pcbe_config_t));
1492 }
1493 
1494 static struct modlpcbe core_modlpcbe = {
1495         &mod_pcbeops,
1496         "Core Performance Counters",
1497         &core_pcbe_ops
1498 };
1499 
1500 static struct modlinkage core_modl = {
1501         MODREV_1,
1502         &core_modlpcbe,
1503 };
1504 
1505 int
1506 _init(void)
1507 {
1508         if (core_pcbe_init() != 0) {
1509                 return (ENOTSUP);
1510         }
1511         return (mod_install(&core_modl));
1512 }
1513 
1514 int
1515 _fini(void)
1516 {
1517         return (mod_remove(&core_modl));
1518 }
1519 
1520 int
1521 _info(struct modinfo *mi)
1522 {
1523         return (mod_info(&core_modl, mi));
1524 }