1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /*
  26  * This file contains preset event names from the Performance Application
  27  * Programming Interface v3.5 which included the following notice:
  28  *
  29  *                             Copyright (c) 2005,6
  30  *                           Innovative Computing Labs
  31  *                         Computer Science Department,
  32  *                            University of Tennessee,
  33  *                                 Knoxville, TN.
  34  *                              All Rights Reserved.
  35  *
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions are met:
  39  *
  40  *    * Redistributions of source code must retain the above copyright notice,
  41  *      this list of conditions and the following disclaimer.
  42  *    * Redistributions in binary form must reproduce the above copyright
  43  *      notice, this list of conditions and the following disclaimer in the
  44  *      documentation and/or other materials provided with the distribution.
  45  *    * Neither the name of the University of Tennessee nor the names of its
  46  *      contributors may be used to endorse or promote products derived from
  47  *      this software without specific prior written permission.
  48  *
  49  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  50  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  52  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  53  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  59  * POSSIBILITY OF SUCH DAMAGE.
  60  *
  61  *
  62  * This open source software license conforms to the BSD License template.
  63  */
  64 
  65 /*
  66  * Performance Counter Back-End for Pentiums I, II, and III.
  67  */
  68 
  69 #include <sys/cpuvar.h>
  70 #include <sys/param.h>
  71 #include <sys/cpc_impl.h>
  72 #include <sys/cpc_pcbe.h>
  73 #include <sys/modctl.h>
  74 #include <sys/inttypes.h>
  75 #include <sys/systm.h>
  76 #include <sys/cmn_err.h>
  77 #include <sys/x86_archext.h>
  78 #include <sys/sdt.h>
  79 #include <sys/archsystm.h>
  80 #include <sys/privregs.h>
  81 #include <sys/ddi.h>
  82 #include <sys/sunddi.h>
  83 
  84 static int64_t diff3931(uint64_t sample, uint64_t old);
  85 static uint64_t trunc3931(uint64_t value);
  86 
  87 static int ptm_pcbe_init(void);
  88 static uint_t ptm_pcbe_ncounters(void);
  89 static const char *ptm_pcbe_impl_name(void);
  90 static const char *ptm_pcbe_cpuref(void);
  91 static char *ptm_pcbe_list_events(uint_t picnum);
  92 static char *ptm_pcbe_list_attrs(void);
  93 static uint64_t ptm_pcbe_event_coverage(char *event);
  94 static int ptm_pcbe_pic_index(char *picname);
  95 static uint64_t ptm_pcbe_overflow_bitmap(void);
  96 static int ptm_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
  97     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
  98     void *token);
  99 static void ptm_pcbe_program(void *token);
 100 static void ptm_pcbe_allstop(void);
 101 static void ptm_pcbe_sample(void *token);
 102 static void ptm_pcbe_free(void *config);
 103 
 104 pcbe_ops_t ptm_pcbe_ops = {
 105         PCBE_VER_1,
 106         0,
 107         ptm_pcbe_ncounters,
 108         ptm_pcbe_impl_name,
 109         ptm_pcbe_cpuref,
 110         ptm_pcbe_list_events,
 111         ptm_pcbe_list_attrs,
 112         ptm_pcbe_event_coverage,
 113         ptm_pcbe_overflow_bitmap,
 114         ptm_pcbe_configure,
 115         ptm_pcbe_program,
 116         ptm_pcbe_allstop,
 117         ptm_pcbe_sample,
 118         ptm_pcbe_free
 119 };
 120 
 121 typedef enum _ptm_ver {
 122         PTM_VER_P5,
 123         PTM_VER_P6
 124 } ptm_ver_t;
 125 
 126 static ptm_ver_t ptm_ver;
 127 static const char *ptm_impl_name;
 128 static const char *ptm_cpuref;
 129 static char *pic_events[2] = { NULL, NULL };
 130 
 131 /*
 132  * Indicates whether the "rdpmc" instruction is available on this processor.
 133  */
 134 static int ptm_rdpmc_avail = 0;
 135 
 136 #define ALL_STOPPED     0ULL
 137 
 138 typedef struct _ptm_pcbe_config {
 139         uint8_t         ptm_picno;      /* 0 for pic0 or 1 for pic1 */
 140         uint32_t        ptm_ctl;    /* P6: PerfEventSelect; P5: cesr, shifted */
 141         uint64_t        ptm_rawpic;
 142 } ptm_pcbe_config_t;
 143 
 144 struct nametable {
 145         uint8_t         bits;
 146         const char      *name;
 147 };
 148 
 149 typedef struct _ptm_generic_events {
 150         char *name;
 151         char *event;
 152         uint8_t umask;
 153 } ptm_generic_event_t;
 154 
 155 #define NT_END 0xFF
 156 #define CPC_GEN_END { NULL, NULL }
 157 
 158 /*
 159  * Basic Pentium events
 160  */
 161 #define P5_EVENTS                               \
 162         {0x0,   "data_read"},                   \
 163         {0x1,   "data_write"},                  \
 164         {0x2,   "data_tlb_miss"},               \
 165         {0x3,   "data_read_miss"},              \
 166         {0x4,   "data_write_miss"},             \
 167         {0x5,   "write_hit_to_M_or_E"},         \
 168         {0x6,   "dcache_lines_wrback"},         \
 169         {0x7,   "external_snoops"},             \
 170         {0x8,   "external_dcache_snoop_hits"},  \
 171         {0x9,   "memory_access_in_both_pipes"}, \
 172         {0xa,   "bank_conflicts"},              \
 173         {0xb,   "misaligned_ref"},              \
 174         {0xc,   "code_read"},                   \
 175         {0xd,   "code_tlb_miss"},               \
 176         {0xe,   "code_cache_miss"},             \
 177         {0xf,   "any_segreg_loaded"},           \
 178         {0x12,  "branches"},                    \
 179         {0x13,  "btb_hits"},                    \
 180         {0x14,  "taken_or_btb_hit"},            \
 181         {0x15,  "pipeline_flushes"},            \
 182         {0x16,  "instr_exec"},                  \
 183         {0x17,  "instr_exec_V_pipe"},           \
 184         {0x18,  "clks_bus_cycle"},              \
 185         {0x19,  "clks_full_wbufs"},             \
 186         {0x1a,  "pipe_stall_read"},             \
 187         {0x1b,  "stall_on_write_ME"},           \
 188         {0x1c,  "locked_bus_cycle"},            \
 189         {0x1d,  "io_rw_cycles"},                \
 190         {0x1e,  "reads_noncache_mem"},          \
 191         {0x1f,  "pipeline_agi_stalls"},         \
 192         {0x22,  "flops"},                       \
 193         {0x23,  "bp_match_dr0"},                \
 194         {0x24,  "bp_match_dr1"},                \
 195         {0x25,  "bp_match_dr2"},                \
 196         {0x26,  "bp_match_dr3"},                \
 197         {0x27,  "hw_intrs"},                    \
 198         {0x28,  "data_rw"},                     \
 199         {0x29,  "data_rw_miss"}
 200 
 201 static const struct nametable P5mmx_names0[] = {
 202         P5_EVENTS,
 203         {0x2a,  "bus_ownership_latency"},
 204         {0x2b,  "mmx_instr_upipe"},
 205         {0x2c,  "cache_M_line_sharing"},
 206         {0x2d,  "emms_instr"},
 207         {0x2e,  "bus_util_processor"},
 208         {0x2f,  "sat_mmx_instr"},
 209         {0x30,  "clks_not_HLT"},
 210         {0x31,  "mmx_data_read"},
 211         {0x32,  "clks_fp_stall"},
 212         {0x33,  "d1_starv_fifo_0"},
 213         {0x34,  "mmx_data_write"},
 214         {0x35,  "pipe_flush_wbp"},
 215         {0x36,  "mmx_misalign_data_refs"},
 216         {0x37,  "rets_pred_incorrect"},
 217         {0x38,  "mmx_multiply_unit_interlock"},
 218         {0x39,  "rets"},
 219         {0x3a,  "btb_false_entries"},
 220         {0x3b,  "clocks_stall_full_wb"},
 221         {NT_END, ""}
 222 };
 223 
 224 static const struct nametable P5mmx_names1[] = {
 225         P5_EVENTS,
 226         {0x2a,  "bus_ownership_transfers"},
 227         {0x2b,  "mmx_instr_vpipe"},
 228         {0x2c,  "cache_lint_sharing"},
 229         {0x2d,  "mmx_fp_transitions"},
 230         {0x2e,  "writes_noncache_mem"},
 231         {0x2f,  "sats_performed"},
 232         {0x30,  "clks_dcache_tlb_miss"},
 233         {0x31,  "mmx_data_read_miss"},
 234         {0x32,  "taken_br"},
 235         {0x33,  "d1_starv_fifo_1"},
 236         {0x34,  "mmx_data_write_miss"},
 237         {0x35,  "pipe_flush_wbp_wb"},
 238         {0x36,  "mmx_pipe_stall_data_read"},
 239         {0x37,  "rets_pred"},
 240         {0x38,  "movd_movq_stall"},
 241         {0x39,  "rsb_overflow"},
 242         {0x3a,  "btb_mispred_nt"},
 243         {0x3b,  "mmx_stall_write_ME"},
 244         {NT_END, ""}
 245 };
 246 
 247 static const struct nametable *P5mmx_names[2] = {
 248         P5mmx_names0,
 249         P5mmx_names1
 250 };
 251 
 252 /*
 253  * Pentium Pro and Pentium II events
 254  */
 255 static const struct nametable _P6_names[] = {
 256         /*
 257          * Data cache unit
 258          */
 259         {0x43,  "data_mem_refs"},
 260         {0x45,  "dcu_lines_in"},
 261         {0x46,  "dcu_m_lines_in"},
 262         {0x47,  "dcu_m_lines_out"},
 263         {0x48,  "dcu_miss_outstanding"},
 264 
 265         /*
 266          * Instruction fetch unit
 267          */
 268         {0x80,  "ifu_ifetch"},
 269         {0x81,  "ifu_ifetch_miss"},
 270         {0x85,  "itlb_miss"},
 271         {0x86,  "ifu_mem_stall"},
 272         {0x87,  "ild_stall"},
 273 
 274         /*
 275          * L2 cache
 276          */
 277         {0x28,  "l2_ifetch"},
 278         {0x29,  "l2_ld"},
 279         {0x2a,  "l2_st"},
 280         {0x24,  "l2_lines_in"},
 281         {0x26,  "l2_lines_out"},
 282         {0x25,  "l2_m_lines_inm"},
 283         {0x27,  "l2_m_lines_outm"},
 284         {0x2e,  "l2_rqsts"},
 285         {0x21,  "l2_ads"},
 286         {0x22,  "l2_dbus_busy"},
 287         {0x23,  "l2_dbus_busy_rd"},
 288 
 289         /*
 290          * External bus logic
 291          */
 292         {0x62,  "bus_drdy_clocks"},
 293         {0x63,  "bus_lock_clocks"},
 294         {0x60,  "bus_req_outstanding"},
 295         {0x65,  "bus_tran_brd"},
 296         {0x66,  "bus_tran_rfo"},
 297         {0x67,  "bus_trans_wb"},
 298         {0x68,  "bus_tran_ifetch"},
 299         {0x69,  "bus_tran_inval"},
 300         {0x6a,  "bus_tran_pwr"},
 301         {0x6b,  "bus_trans_p"},
 302         {0x6c,  "bus_trans_io"},
 303         {0x6d,  "bus_tran_def"},
 304         {0x6e,  "bus_tran_burst"},
 305         {0x70,  "bus_tran_any"},
 306         {0x6f,  "bus_tran_mem"},
 307         {0x64,  "bus_data_rcv"},
 308         {0x61,  "bus_bnr_drv"},
 309         {0x7a,  "bus_hit_drv"},
 310         {0x7b,  "bus_hitm_drv"},
 311         {0x7e,  "bus_snoop_stall"},
 312 
 313         /*
 314          * Floating point unit
 315          */
 316         {0xc1,  "flops"},               /* 0 only */
 317         {0x10,  "fp_comp_ops_exe"},     /* 0 only */
 318         {0x11,  "fp_assist"},           /* 1 only */
 319         {0x12,  "mul"},                 /* 1 only */
 320         {0x13,  "div"},                 /* 1 only */
 321         {0x14,  "cycles_div_busy"},     /* 0 only */
 322 
 323         /*
 324          * Memory ordering
 325          */
 326         {0x3,   "ld_blocks"},
 327         {0x4,   "sb_drains"},
 328         {0x5,   "misalign_mem_ref"},
 329 
 330         /*
 331          * Instruction decoding and retirement
 332          */
 333         {0xc0,  "inst_retired"},
 334         {0xc2,  "uops_retired"},
 335         {0xd0,  "inst_decoder"},
 336 
 337         /*
 338          * Interrupts
 339          */
 340         {0xc8,  "hw_int_rx"},
 341         {0xc6,  "cycles_int_masked"},
 342         {0xc7,  "cycles_int_pending_and_masked"},
 343 
 344         /*
 345          * Branches
 346          */
 347         {0xc4,  "br_inst_retired"},
 348         {0xc5,  "br_miss_pred_retired"},
 349         {0xc9,  "br_taken_retired"},
 350         {0xca,  "br_miss_pred_taken_ret"},
 351         {0xe0,  "br_inst_decoded"},
 352         {0xe2,  "btb_misses"},
 353         {0xe4,  "br_bogus"},
 354         {0xe6,  "baclears"},
 355 
 356         /*
 357          * Stalls
 358          */
 359         {0xa2,  "resource_stalls"},
 360         {0xd2,  "partial_rat_stalls"},
 361 
 362         /*
 363          * Segment register loads
 364          */
 365         {0x6,   "segment_reg_loads"},
 366 
 367         /*
 368          * Clocks
 369          */
 370         {0x79,  "cpu_clk_unhalted"},
 371 
 372         /*
 373          * MMX
 374          */
 375         {0xb0,  "mmx_instr_exec"},
 376         {0xb1,  "mmx_sat_instr_exec"},
 377         {0xb2,  "mmx_uops_exec"},
 378         {0xb3,  "mmx_instr_type_exec"},
 379         {0xcc,  "fp_mmx_trans"},
 380         {0xcd,  "mmx_assists"},
 381         {0xce,  "mmx_instr_ret"},
 382         {0xd4,  "seg_rename_stalls"},
 383         {0xd5,  "seg_reg_renames"},
 384         {0xd6,  "ret_seg_renames"},
 385 
 386         {NT_END, ""}
 387 };
 388 
 389 static const struct nametable *P6_names[2] = {
 390         _P6_names,
 391         _P6_names
 392 };
 393 
 394 #define P5_GENERIC_EVENTS                                       \
 395         { "PAPI_tot_ins",       "instr_exec",    0x0 },         \
 396         { "PAPI_tlb_dm",        "data_tlb_miss", 0x0 },         \
 397         { "PAPI_tlb_im",        "code_tlb_miss", 0x0 },         \
 398         { "PAPI_fp_ops",        "flops" }
 399 
 400 static const ptm_generic_event_t P5mmx_generic_names0[] = {
 401         P5_GENERIC_EVENTS,
 402         { "PAPI_tot_cyc",       "clks_not_HLT", 0x0 },
 403         CPC_GEN_END
 404 };
 405 
 406 static const ptm_generic_event_t P5mmx_generic_names1[] = {
 407         P5_GENERIC_EVENTS,
 408         { "PAPI_br_ins",        "taken_br",     0x0 },
 409         CPC_GEN_END
 410 };
 411 
 412 static const ptm_generic_event_t *P5mmx_generic_names[2] = {
 413         P5mmx_generic_names0,
 414         P5mmx_generic_names1
 415 };
 416 
 417 static const ptm_generic_event_t _P6_generic_names[] = {
 418         { "PAPI_ca_shr",        "l2_ifetch",            0xf },
 419         { "PAPI_ca_cln",        "bus_tran_rfo",         0x0 },
 420         { "PAPI_ca_itv",        "bus_tran_inval",       0x0 },
 421         { "PAPI_tlb_im",        "itlb_miss",            0x0 },
 422         { "PAPI_btac_m",        "btb_misses",           0x0 },
 423         { "PAPI_hw_int",        "hw_int_rx",            0x0 },
 424         { "PAPI_br_cn",         "br_inst_retired",      0x0 },
 425         { "PAPI_br_tkn",        "br_taken_retired",     0x0 },
 426         { "PAPI_br_msp",        "br_miss_pred_taken_ret", 0x0 },
 427         { "PAPI_br_ins",        "br_inst_retired",      0x0 },
 428         { "PAPI_res_stl",       "resource_stalls",      0x0 },
 429         { "PAPI_tot_iis",       "inst_decoder",         0x0 },
 430         { "PAPI_tot_ins",       "inst_retired",         0x0 },
 431         { "PAPI_tot_cyc",       "cpu_clk_unhalted",     0x0 },
 432         { "PAPI_l1_dcm",        "dcu_lines_in",         0x0 },
 433         { "PAPI_l1_icm",        "l2_ifetch",            0xf },
 434         { "PAPI_l1_tcm",        "l2_rqsts",             0xf },
 435         { "PAPI_l1_dca",        "data_mem_refs",        0x0 },
 436         { "PAPI_l1_stm",        "l2_st",                0xf },
 437         { "PAPI_l2_icm",        "bus_tran_ifetch",      0x0 },
 438         { "PAPI_l2_dcr",        "l2_ld",                0xf },
 439         { "PAPI_l2_dcw",        "l2_st",                0xf },
 440         { "PAPI_l2_tcm",        "l2_lines_in",          0x0 },
 441         { "PAPI_l2_tca",        "l2_rqsts",             0xf },
 442         { "PAPI_l2_tcw",        "l2_st",                0xf },
 443         { "PAPI_l2_stm",        "l2_m_lines_inm",       0x0 },
 444         { "PAPI_fp_ins",        "flops",                0x0 },
 445         { "PAPI_fp_ops",        "flops",                0x0 },
 446         { "PAPI_fml_ins",       "mul",                  0x0 },
 447         { "PAPI_fdv_ins",       "div",                  0x0 },
 448         CPC_GEN_END
 449 };
 450 
 451 static const ptm_generic_event_t *P6_generic_names[2] = {
 452         _P6_generic_names,
 453         _P6_generic_names
 454 };
 455 
 456 static const struct nametable **events;
 457 static const ptm_generic_event_t **generic_events;
 458 
 459 #define BITS(v, u, l)   \
 460         (((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
 461 
 462 /*
 463  * "Well known" bit fields in the Pentium CES register
 464  * The interfaces in libcpc should make these #defines uninteresting.
 465  */
 466 #define CPC_P5_CESR_ES0_SHIFT   0
 467 #define CPC_P5_CESR_ES0_MASK    0x3f
 468 #define CPC_P5_CESR_ES1_SHIFT   16
 469 #define CPC_P5_CESR_ES1_MASK    0x3f
 470 
 471 #define CPC_P5_CESR_OS0         6
 472 #define CPC_P5_CESR_USR0        7
 473 #define CPC_P5_CESR_CLK0        8
 474 #define CPC_P5_CESR_PC0         9
 475 #define CPC_P5_CESR_OS1         (CPC_P5_CESR_OS0 + 16)
 476 #define CPC_P5_CESR_USR1        (CPC_P5_CESR_USR0 + 16)
 477 #define CPC_P5_CESR_CLK1        (CPC_P5_CESR_CLK0 + 16)
 478 #define CPC_P5_CESR_PC1         (CPC_P5_CESR_PC0 + 16)
 479 
 480 /*
 481  * "Well known" bit fields in the Pentium Pro PerfEvtSel registers
 482  * The interfaces in libcpc should make these #defines uninteresting.
 483  */
 484 #define CPC_P6_PES_INV          23
 485 #define CPC_P6_PES_EN           22
 486 #define CPC_P6_PES_INT          20
 487 #define CPC_P6_PES_PC           19
 488 #define CPC_P6_PES_E            18
 489 #define CPC_P6_PES_OS           17
 490 #define CPC_P6_PES_USR          16
 491 
 492 #define CPC_P6_PES_UMASK_SHIFT  8
 493 #define CPC_P6_PES_UMASK_MASK   (0xffu)
 494 
 495 #define CPC_P6_PES_CMASK_SHIFT  24
 496 #define CPC_P6_PES_CMASK_MASK   (0xffu)
 497 
 498 #define CPC_P6_PES_PIC0_MASK    (0xffu)
 499 #define CPC_P6_PES_PIC1_MASK    (0xffu)
 500 
 501 #define P6_PES_EN       (UINT32_C(1) << CPC_P6_PES_EN)
 502 #define P6_PES_INT      (UINT32_C(1) << CPC_P6_PES_INT)
 503 #define P6_PES_OS       (UINT32_C(1) << CPC_P6_PES_OS)
 504 
 505 /*
 506  * Pentium 5 attributes
 507  */
 508 #define P5_NOEDGE       0x1     /* "noedge"     - no edge detection */
 509 #define P5_PC           0x2     /* "pc"         - pin control */
 510 
 511 /*
 512  * Pentium 6 attributes
 513  */
 514 #define P6_NOEDGE       0x1
 515 #define P6_PC           0x2
 516 #define P6_INV          0x4     /* "inv" - count inverted transitions */
 517 #define P6_INT          0x8     /* "int" - interrupt on overflow */
 518 
 519 /*
 520  * CPU reference strings
 521  */
 522 
 523 #define P5_CPUREF       "See Appendix A.4 of the \"IA-32 Intel Architecture "  \
 524                         "Software Developer's Manual Volume 3: System "        \
 525                         "Programming Guide,\" Order # 245472-012, 2003"
 526 
 527 #define P6_CPUREF       "See Appendix A.3 of the \"IA-32 Intel Architecture "  \
 528                         "Software Developer's Manual Volume 3: System "        \
 529                         "Programming Guide,\" Order # 245472-012, 2003"
 530 
 531 static int
 532 ptm_pcbe_init(void)
 533 {
 534         const struct nametable          *n;
 535         const ptm_generic_event_t       *gevp;
 536         int                             i;
 537         size_t                          size;
 538 
 539         if (is_x86_feature(x86_featureset, X86FSET_MMX))
 540                 ptm_rdpmc_avail = 1;
 541 
 542         /*
 543          * Discover type of CPU and set events pointer appropriately.
 544          *
 545          * Map family and model into the performance
 546          * counter architectures we currently understand.
 547          *
 548          * See application note AP485 (from developer.intel.com)
 549          * for further explanation.
 550          */
 551         if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
 552                 return (-1);
 553         switch (cpuid_getfamily(CPU)) {
 554         case 5:         /* Pentium and Pentium with MMX */
 555                 events = P5mmx_names;
 556                 generic_events = P5mmx_generic_names;
 557                 ptm_ver = PTM_VER_P5;
 558                 ptm_cpuref = P5_CPUREF;
 559                 if (cpuid_getmodel(CPU) < 4)
 560                         ptm_impl_name = "Pentium";
 561                 else
 562                         ptm_impl_name = "Pentium with MMX";
 563                 break;
 564         case 6:         /* Pentium Pro and Pentium II and III */
 565                 events = P6_names;
 566                 generic_events = P6_generic_names;
 567                 ptm_ver = PTM_VER_P6;
 568                 ptm_cpuref = P6_CPUREF;
 569                 ptm_pcbe_ops.pcbe_caps = CPC_CAP_OVERFLOW_INTERRUPT;
 570                 if (is_x86_feature(x86_featureset, X86FSET_MMX))
 571                         ptm_impl_name = "Pentium Pro with MMX, Pentium II";
 572                 else
 573                         ptm_impl_name = "Pentium Pro, Pentium II";
 574                 break;
 575         default:
 576                 return (-1);
 577         }
 578 
 579         /*
 580          * Initialize the list of events for each PIC.
 581          * Do two passes: one to compute the size necessary and another
 582          * to copy the strings. Need room for event, comma, and NULL terminator.
 583          */
 584         for (i = 0; i < 2; i++) {
 585                 size = 0;
 586                 for (n = events[i]; n->bits != NT_END; n++)
 587                         size += strlen(n->name) + 1;
 588                 for (gevp = generic_events[i]; gevp->name != NULL; gevp++)
 589                         size += strlen(gevp->name) + 1;
 590                 pic_events[i] = kmem_alloc(size + 1, KM_SLEEP);
 591                 *pic_events[i] = '\0';
 592                 for (n = events[i]; n->bits != NT_END; n++) {
 593                         (void) strcat(pic_events[i], n->name);
 594                         (void) strcat(pic_events[i], ",");
 595                 }
 596                 for (gevp = generic_events[i]; gevp->name != NULL; gevp++) {
 597                         (void) strcat(pic_events[i], gevp->name);
 598                         (void) strcat(pic_events[i], ",");
 599                 }
 600 
 601                 /*
 602                  * Remove trailing comma.
 603                  */
 604                 pic_events[i][size - 1] = '\0';
 605         }
 606 
 607         return (0);
 608 }
 609 
 610 static uint_t
 611 ptm_pcbe_ncounters(void)
 612 {
 613         return (2);
 614 }
 615 
 616 static const char *
 617 ptm_pcbe_impl_name(void)
 618 {
 619         return (ptm_impl_name);
 620 }
 621 
 622 static const char *
 623 ptm_pcbe_cpuref(void)
 624 {
 625         return (ptm_cpuref);
 626 }
 627 
 628 static char *
 629 ptm_pcbe_list_events(uint_t picnum)
 630 {
 631         ASSERT(picnum >= 0 && picnum < cpc_ncounters);
 632 
 633         if (pic_events[0] == NULL) {
 634                 ASSERT(pic_events[1] == NULL);
 635         }
 636 
 637         return (pic_events[picnum]);
 638 }
 639 
 640 static char *
 641 ptm_pcbe_list_attrs(void)
 642 {
 643         if (ptm_ver == PTM_VER_P5)
 644                 return ("noedge,pc");
 645         else
 646                 return ("noedge,pc,inv,int,umask,cmask");
 647 }
 648 
 649 static const ptm_generic_event_t *
 650 find_generic_event(int regno, char *name)
 651 {
 652         const ptm_generic_event_t       *gevp;
 653 
 654         for (gevp = generic_events[regno]; gevp->name != NULL; gevp++)
 655                 if (strcmp(name, gevp->name) == 0)
 656                         return (gevp);
 657 
 658         return (NULL);
 659 }
 660 
 661 static const struct nametable *
 662 find_event(int regno, char *name)
 663 {
 664         const struct nametable *n;
 665 
 666         n = events[regno];
 667 
 668         for (; n->bits != NT_END; n++)
 669                 if (strcmp(name, n->name) == 0)
 670                         return (n);
 671 
 672         return (NULL);
 673 }
 674 
 675 static uint64_t
 676 ptm_pcbe_event_coverage(char *event)
 677 {
 678         uint64_t bitmap = 0;
 679 
 680         if ((find_event(0, event) != NULL) ||
 681             (find_generic_event(0, event) != NULL))
 682                 bitmap = 0x1;
 683         if ((find_event(1, event) != NULL) ||
 684             (find_generic_event(1, event) != NULL))
 685                 bitmap |= 0x2;
 686 
 687         return (bitmap);
 688 }
 689 
 690 static uint64_t
 691 ptm_pcbe_overflow_bitmap(void)
 692 {
 693         uint64_t        ret = 0;
 694         uint64_t        pes[2];
 695 
 696         /*
 697          * P5 is not capable of generating interrupts.
 698          */
 699         ASSERT(ptm_ver == PTM_VER_P6);
 700 
 701         /*
 702          * CPC could have caused an interrupt provided that
 703          *
 704          * 1) Counters are enabled
 705          * 2) Either counter has requested an interrupt
 706          */
 707 
 708         pes[0] = rdmsr(REG_PERFEVNT0);
 709         if (((uint32_t)pes[0] & P6_PES_EN) != P6_PES_EN)
 710                 return (0);
 711 
 712         /*
 713          * If a particular counter requested an interrupt, assume it caused
 714          * this interrupt. There is no way to determine which counter overflowed
 715          * on this hardware other than by using unreliable heuristics.
 716          */
 717 
 718         pes[1] = rdmsr(REG_PERFEVNT1);
 719         if ((uint32_t)pes[0] & P6_PES_INT)
 720                 ret |= 0x1;
 721         if ((uint32_t)pes[1] & P6_PES_INT)
 722                 ret |= 0x2;
 723 
 724         return (ret);
 725 }
 726 
 727 /*ARGSUSED*/
 728 static int
 729 ptm_pcbe_configure(uint_t picnum, char *eventname, uint64_t preset,
 730     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
 731     void *token)
 732 {
 733         ptm_pcbe_config_t               *conf;
 734         const struct nametable          *n;
 735         const ptm_generic_event_t       *gevp;
 736         struct nametable                nt_raw = { 0, "raw" };
 737         int                             i;
 738         int                             ptm_flags = 0;
 739 
 740         /*
 741          * If we've been handed an existing configuration, we need only preset
 742          * the counter value.
 743          */
 744         if (*data != NULL) {
 745                 conf = *data;
 746                 conf->ptm_rawpic = trunc3931(preset);
 747                 return (0);
 748         }
 749 
 750         if (picnum != 0 && picnum != 1)
 751                 return (CPC_INVALID_PICNUM);
 752 
 753         conf = kmem_alloc(sizeof (ptm_pcbe_config_t), KM_SLEEP);
 754 
 755         conf->ptm_picno = picnum;
 756         conf->ptm_rawpic = trunc3931(preset);
 757         conf->ptm_ctl = 0;
 758 
 759         if ((n = find_event(picnum, eventname)) == NULL) {
 760                 if ((gevp = find_generic_event(picnum, eventname)) != NULL) {
 761                         n = find_event(picnum, gevp->event);
 762                         ASSERT(n != NULL);
 763 
 764                         if (nattrs > 0) {
 765                                 kmem_free(conf, sizeof (ptm_pcbe_config_t));
 766                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
 767                         }
 768 
 769                         if (ptm_ver == PTM_VER_P6)
 770                                 conf->ptm_ctl |= gevp->umask <<
 771                                     CPC_P6_PES_UMASK_SHIFT;
 772                 } else {
 773                         long tmp;
 774 
 775                         /*
 776                          * If ddi_strtol() likes this event, use it as a raw
 777                          * event code.
 778                          */
 779                         if (ddi_strtol(eventname, NULL, 0, &tmp) != 0) {
 780                                 kmem_free(conf, sizeof (ptm_pcbe_config_t));
 781                                 return (CPC_INVALID_EVENT);
 782                         }
 783 
 784                         nt_raw.bits = tmp;
 785 
 786                         if (ptm_ver == PTM_VER_P5)
 787                                 nt_raw.bits &= CPC_P5_CESR_ES0_MASK;
 788                         else
 789                                 nt_raw.bits &= CPC_P6_PES_PIC0_MASK;
 790 
 791                         n = &nt_raw;
 792                 }
 793         }
 794 
 795         if (ptm_ver == PTM_VER_P5) {
 796                 int picshift;
 797                 picshift = (picnum == 0) ? 0 : 16;
 798 
 799                 for (i = 0; i < nattrs; i++) {
 800                         /*
 801                          * Value of these attributes is ignored; their presence
 802                          * alone tells us to set the corresponding flag.
 803                          */
 804                         if (strncmp(attrs[i].ka_name, "noedge", 7) == 0) {
 805                                 if (attrs[i].ka_val != 0)
 806                                         ptm_flags |= P5_NOEDGE;
 807                         } else if (strncmp(attrs[i].ka_name, "pc", 3) == 0) {
 808                                 if (attrs[i].ka_val != 0)
 809                                         ptm_flags |= P5_PC;
 810                         } else {
 811                                 kmem_free(conf, sizeof (ptm_pcbe_config_t));
 812                                 return (CPC_INVALID_ATTRIBUTE);
 813                         }
 814                 }
 815 
 816                 if (flags & CPC_COUNT_USER)
 817                         conf->ptm_ctl |= (1 << (CPC_P5_CESR_USR0 + picshift));
 818                 if (flags & CPC_COUNT_SYSTEM)
 819                         conf->ptm_ctl |= (1 << (CPC_P5_CESR_OS0 + picshift));
 820                 if (ptm_flags & P5_NOEDGE)
 821                         conf->ptm_ctl |= (1 << (CPC_P5_CESR_CLK0 + picshift));
 822                 if (ptm_flags & P5_PC)
 823                         conf->ptm_ctl |= (1 << (CPC_P5_CESR_PC0 + picshift));
 824 
 825                 ASSERT((n->bits | CPC_P5_CESR_ES0_MASK) ==
 826                     CPC_P5_CESR_ES0_MASK);
 827 
 828                 conf->ptm_ctl |= (n->bits << picshift);
 829         } else {
 830                 for (i = 0; i < nattrs; i++) {
 831                         if (strncmp(attrs[i].ka_name, "noedge", 6) == 0) {
 832                                 if (attrs[i].ka_val != 0)
 833                                         ptm_flags |= P6_NOEDGE;
 834                         } else if (strncmp(attrs[i].ka_name, "pc", 2) == 0) {
 835                                 if (attrs[i].ka_val != 0)
 836                                         ptm_flags |= P6_PC;
 837                         } else if (strncmp(attrs[i].ka_name, "inv", 3) == 0) {
 838                                 if (attrs[i].ka_val != 0)
 839                                         ptm_flags |= P6_INV;
 840                         } else if (strncmp(attrs[i].ka_name, "umask", 5) == 0) {
 841                                 if ((attrs[i].ka_val | CPC_P6_PES_UMASK_MASK) !=
 842                                     CPC_P6_PES_UMASK_MASK) {
 843                                         kmem_free(conf,
 844                                             sizeof (ptm_pcbe_config_t));
 845                                         return (CPC_ATTRIBUTE_OUT_OF_RANGE);
 846                                 }
 847                                 conf->ptm_ctl |= (uint8_t)attrs[i].ka_val <<
 848                                     CPC_P6_PES_UMASK_SHIFT;
 849                         } else if (strncmp(attrs[i].ka_name, "cmask", 5) == 0) {
 850                                 if ((attrs[i].ka_val | CPC_P6_PES_CMASK_MASK) !=
 851                                     CPC_P6_PES_CMASK_MASK) {
 852                                         kmem_free(conf,
 853                                             sizeof (ptm_pcbe_config_t));
 854                                         return (CPC_ATTRIBUTE_OUT_OF_RANGE);
 855                                 }
 856                                 conf->ptm_ctl |= (uint8_t)attrs[i].ka_val <<
 857                                     CPC_P6_PES_CMASK_SHIFT;
 858                         } else if (strncmp(attrs[i].ka_name, "int", 3) == 0) {
 859                                 if (attrs[i].ka_val != 0)
 860                                         ptm_flags |= P6_INT;
 861                         } else {
 862                                 kmem_free(conf, sizeof (ptm_pcbe_config_t));
 863                                 return (CPC_INVALID_ATTRIBUTE);
 864                         }
 865                 }
 866 
 867                 if (flags & CPC_OVF_NOTIFY_EMT)
 868                         /*
 869                          * If the user has requested notification of overflows,
 870                          * we automatically program the hardware to generate
 871                          * overflow interrupts.
 872                          */
 873                         ptm_flags |= P6_INT;
 874                 if (flags & CPC_COUNT_USER)
 875                         conf->ptm_ctl |= (1 << CPC_P6_PES_USR);
 876                 if (flags & CPC_COUNT_SYSTEM)
 877                         conf->ptm_ctl |= (1 << CPC_P6_PES_OS);
 878                 if ((ptm_flags & P6_NOEDGE) == 0)
 879                         conf->ptm_ctl |= (1 << CPC_P6_PES_E);
 880                 if (ptm_flags & P6_PC)
 881                         conf->ptm_ctl |= (1 << CPC_P6_PES_PC);
 882                 if (ptm_flags & P6_INV)
 883                         conf->ptm_ctl |= (1 << CPC_P6_PES_INV);
 884                 if (ptm_flags & P6_INT)
 885                         conf->ptm_ctl |= (1 << CPC_P6_PES_INT);
 886 
 887                 ASSERT((n->bits | CPC_P6_PES_PIC0_MASK) ==
 888                     CPC_P6_PES_PIC0_MASK);
 889 
 890                 conf->ptm_ctl |= n->bits;
 891         }
 892 
 893         *data = conf;
 894         return (0);
 895 }
 896 
 897 static void
 898 ptm_pcbe_program(void *token)
 899 {
 900         ptm_pcbe_config_t       *pic0;
 901         ptm_pcbe_config_t       *pic1;
 902         ptm_pcbe_config_t       *tmp;
 903         ptm_pcbe_config_t       empty = { 1, 0, 0 }; /* assume pic1 to start */
 904 
 905         if ((pic0 = kcpc_next_config(token, NULL, NULL)) == NULL)
 906                 panic("ptm_pcbe: token %p has no configs", token);
 907 
 908         if ((pic1 = kcpc_next_config(token, pic0, NULL)) == NULL)
 909                 pic1 = &empty;
 910 
 911         if (pic0->ptm_picno != 0) {
 912                 empty.ptm_picno = 0;
 913                 tmp = pic1;
 914                 pic1 = pic0;
 915                 pic0 = tmp;
 916         }
 917 
 918         ASSERT(pic0->ptm_picno == 0 && pic1->ptm_picno == 1);
 919 
 920         if (ptm_rdpmc_avail) {
 921                 ulong_t curcr4 = getcr4();
 922                 if (kcpc_allow_nonpriv(token))
 923                         setcr4(curcr4 | CR4_PCE);
 924                 else
 925                         setcr4(curcr4 & ~CR4_PCE);
 926         }
 927 
 928         if (ptm_ver == PTM_VER_P5) {
 929                 wrmsr(P5_CESR, ALL_STOPPED);
 930                 wrmsr(P5_CTR0, pic0->ptm_rawpic);
 931                 wrmsr(P5_CTR1, pic1->ptm_rawpic);
 932                 wrmsr(P5_CESR, pic0->ptm_ctl | pic1->ptm_ctl);
 933                 pic0->ptm_rawpic = rdmsr(P5_CTR0);
 934                 pic1->ptm_rawpic = rdmsr(P5_CTR1);
 935         } else {
 936                 uint64_t        pes;
 937                 wrmsr(REG_PERFEVNT0, ALL_STOPPED);
 938                 wrmsr(REG_PERFCTR0, pic0->ptm_rawpic);
 939                 wrmsr(REG_PERFCTR1, pic1->ptm_rawpic);
 940                 pes = pic1->ptm_ctl;
 941                 DTRACE_PROBE1(ptm__pes1, uint64_t, pes);
 942                 wrmsr(REG_PERFEVNT1, pes);
 943                 pes = pic0->ptm_ctl | (1 << CPC_P6_PES_EN);
 944                 DTRACE_PROBE1(ptm__pes0, uint64_t, pes);
 945                 wrmsr(REG_PERFEVNT0, pes);
 946         }
 947 }
 948 
 949 static void
 950 ptm_pcbe_allstop(void)
 951 {
 952         if (ptm_ver == PTM_VER_P5)
 953                 wrmsr(P5_CESR, ALL_STOPPED);
 954         else {
 955                 wrmsr(REG_PERFEVNT0, ALL_STOPPED);
 956                 setcr4(getcr4() & ~CR4_PCE);
 957         }
 958 }
 959 
 960 static void
 961 ptm_pcbe_sample(void *token)
 962 {
 963         ptm_pcbe_config_t       *pic0;
 964         ptm_pcbe_config_t       *pic1;
 965         ptm_pcbe_config_t       *swap;
 966         ptm_pcbe_config_t       empty = { 1, 0, 0 }; /* assume pic1 to start */
 967         uint64_t                tmp;
 968         uint64_t                *pic0_data;
 969         uint64_t                *pic1_data;
 970         uint64_t                *dtmp;
 971         uint64_t                curpic[2];
 972 
 973         if ((pic0 = kcpc_next_config(token, NULL, &pic0_data)) == NULL)
 974                 panic("ptm_pcbe: token %p has no configs", token);
 975 
 976         if ((pic1 = kcpc_next_config(token, pic0, &pic1_data)) == NULL) {
 977                 pic1 = &empty;
 978                 pic1_data = &tmp;
 979         }
 980 
 981         if (pic0->ptm_picno != 0) {
 982                 empty.ptm_picno = 0;
 983                 swap = pic0;
 984                 pic0 = pic1;
 985                 pic1 = swap;
 986                 dtmp = pic0_data;
 987                 pic0_data = pic1_data;
 988                 pic1_data = dtmp;
 989         }
 990 
 991         ASSERT(pic0->ptm_picno == 0 && pic1->ptm_picno == 1);
 992 
 993         if (ptm_ver == PTM_VER_P5) {
 994                 curpic[0] = rdmsr(P5_CTR0);
 995                 curpic[1] = rdmsr(P5_CTR1);
 996         } else {
 997                 curpic[0] = rdmsr(REG_PERFCTR0);
 998                 curpic[1] = rdmsr(REG_PERFCTR1);
 999         }
1000 
1001         DTRACE_PROBE1(ptm__curpic0, uint64_t, curpic[0]);
1002         DTRACE_PROBE1(ptm__curpic1, uint64_t, curpic[1]);
1003 
1004         *pic0_data += diff3931(curpic[0], pic0->ptm_rawpic);
1005         pic0->ptm_rawpic = trunc3931(*pic0_data);
1006 
1007         *pic1_data += diff3931(curpic[1], pic1->ptm_rawpic);
1008         pic1->ptm_rawpic = trunc3931(*pic1_data);
1009 }
1010 
1011 static void
1012 ptm_pcbe_free(void *config)
1013 {
1014         kmem_free(config, sizeof (ptm_pcbe_config_t));
1015 }
1016 
1017 /*
1018  * Virtualizes the 40-bit field of the %pic
1019  * register into a 64-bit software register.
1020  *
1021  * We can retrieve 40 (signed) bits from the counters,
1022  * but we can set only 32 (signed) bits into the counters.
1023  * This makes virtualizing more than 31-bits of registers
1024  * quite tricky.
1025  *
1026  * If bits 39 to 31 are set in the virtualized pic register,
1027  * then we can preset the counter to this value using the fact
1028  * that wrmsr sign extends bit 31.   Though it might look easier
1029  * to only use the bottom 31-bits of the register, we have to allow
1030  * the full 40-bits to be used to perform overflow profiling.
1031  */
1032 
1033 #define MASK40          UINT64_C(0xffffffffff)
1034 #define MASK31          UINT64_C(0x7fffffff)
1035 #define BITS_39_31      UINT64_C(0xff80000000)
1036 
1037 static int64_t
1038 diff3931(uint64_t sample, uint64_t old)
1039 {
1040         int64_t diff;
1041 
1042         if ((old & BITS_39_31) == BITS_39_31) {
1043                 diff = (MASK40 & sample) - old;
1044                 if (diff < 0)
1045                         diff += (UINT64_C(1) << 40);
1046         } else {
1047                 diff = (MASK31 & sample) - old;
1048                 if (diff < 0)
1049                         diff += (UINT64_C(1) << 31);
1050         }
1051         return (diff);
1052 }
1053 
1054 static uint64_t
1055 trunc3931(uint64_t value)
1056 {
1057         if ((value & BITS_39_31) == BITS_39_31)
1058                 return (MASK40 & value);
1059         return (MASK31 & value);
1060 }
1061 
1062 static struct modlpcbe modlpcbe = {
1063         &mod_pcbeops,
1064         "Pentium Performance Counters",
1065         &ptm_pcbe_ops
1066 };
1067 
1068 static struct modlinkage modl = {
1069         MODREV_1,
1070         &modlpcbe,
1071 };
1072 
1073 int
1074 _init(void)
1075 {
1076         if (ptm_pcbe_init() != 0)
1077                 return (ENOTSUP);
1078         return (mod_install(&modl));
1079 }
1080 
1081 int
1082 _fini(void)
1083 {
1084         return (mod_remove(&modl));
1085 }
1086 
1087 int
1088 _info(struct modinfo *mi)
1089 {
1090         return (mod_info(&modl, mi));
1091 }