1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /*
  26  * This file contains preset event names from the Performance Application
  27  * Programming Interface v3.5 which included the following notice:
  28  *
  29  *                             Copyright (c) 2005,6
  30  *                           Innovative Computing Labs
  31  *                         Computer Science Department,
  32  *                            University of Tennessee,
  33  *                                 Knoxville, TN.
  34  *                              All Rights Reserved.
  35  *
  36  *
  37  * Redistribution and use in source and binary forms, with or without
  38  * modification, are permitted provided that the following conditions are met:
  39  *
  40  *    * Redistributions of source code must retain the above copyright notice,
  41  *      this list of conditions and the following disclaimer.
  42  *    * Redistributions in binary form must reproduce the above copyright
  43  *      notice, this list of conditions and the following disclaimer in the
  44  *      documentation and/or other materials provided with the distribution.
  45  *    * Neither the name of the University of Tennessee nor the names of its
  46  *      contributors may be used to endorse or promote products derived from
  47  *      this software without specific prior written permission.
  48  *
  49  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  50  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  52  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  53  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  54  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  55  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  56  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  57  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  58  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  59  * POSSIBILITY OF SUCH DAMAGE.
  60  *
  61  *
  62  * This open source software license conforms to the BSD License template.
  63  */
  64 
  65 /*
  66  * Performance Counter Back-End for Pentium 4.
  67  */
  68 
  69 #include <sys/cpuvar.h>
  70 #include <sys/param.h>
  71 #include <sys/cpc_impl.h>
  72 #include <sys/cpc_pcbe.h>
  73 #include <sys/inttypes.h>
  74 #include <sys/errno.h>
  75 #include <sys/systm.h>
  76 #include <sys/archsystm.h>
  77 #include <sys/x86_archext.h>
  78 #include <sys/modctl.h>
  79 #include <sys/sdt.h>
  80 #include <sys/cred.h>
  81 #include <sys/policy.h>
  82 #include <sys/privregs.h>
  83 
  84 static int p4_pcbe_init(void);
  85 static uint_t p4_pcbe_ncounters(void);
  86 static const char *p4_pcbe_impl_name(void);
  87 static const char *p4_pcbe_cpuref(void);
  88 static char *p4_pcbe_list_events(uint_t picnum);
  89 static char *p4_pcbe_list_attrs(void);
  90 static uint64_t p4_pcbe_event_coverage(char *event);
  91 static uint64_t p4_pcbe_overflow_bitmap(void);
  92 static int p4_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
  93     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
  94     void *token);
  95 static void p4_pcbe_program(void *token);
  96 static void p4_pcbe_allstop(void);
  97 static void p4_pcbe_sample(void *token);
  98 static void p4_pcbe_free(void *config);
  99 
 100 extern int cpuid_get_clogid(cpu_t *);
 101 
 102 static pcbe_ops_t p4_pcbe_ops = {
 103         PCBE_VER_1,
 104         CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE,
 105         p4_pcbe_ncounters,
 106         p4_pcbe_impl_name,
 107         p4_pcbe_cpuref,
 108         p4_pcbe_list_events,
 109         p4_pcbe_list_attrs,
 110         p4_pcbe_event_coverage,
 111         p4_pcbe_overflow_bitmap,
 112         p4_pcbe_configure,
 113         p4_pcbe_program,
 114         p4_pcbe_allstop,
 115         p4_pcbe_sample,
 116         p4_pcbe_free
 117 };
 118 
 119 /*
 120  * P4 Configuration Flags.
 121  */
 122 #define P4_THIS_USR     0x1 /* HTT: Measure usr events on this logical CPU */
 123 #define P4_THIS_SYS     0x2 /* HTT: Measure os events on this logical CPU */
 124 #define P4_SIBLING_USR  0x4 /* HTT: Measure os events on other logical CPU */
 125 #define P4_SIBLING_SYS  0x8 /* HTT: Measure usr events on other logical CPU */
 126 #define P4_PMI          0x10 /* HTT: Set PMI bit for local logical CPU */
 127 
 128 typedef struct _p4_pcbe_config {
 129         uint8_t         p4_flags;
 130         uint8_t         p4_picno;       /* From 0 to 18 */
 131         uint8_t         p4_escr_ndx;    /* Which ESCR to use */
 132         uint32_t        p4_escr;        /* Value to program in selected ESCR */
 133         uint32_t        p4_cccr;        /* Value to program in counter's CCCR */
 134         uint64_t        p4_rawpic;
 135 } p4_pcbe_config_t;
 136 
 137 typedef uint32_t cntr_map_t;
 138 
 139 typedef struct _p4_escr {
 140         int             pe_num;
 141         uint32_t        pe_addr;
 142         uint32_t        pe_map; /* bitmap of counters; bit 1 means ctr 0 */
 143 } p4_escr_t;
 144 
 145 #define MASK40                  UINT64_C(0xffffffffff)
 146 
 147 /*
 148  * CCCR field definitions.
 149  *
 150  * Note that the Intel Developer's Manual states that the reserved field at
 151  * bit location 16 and 17 must be set to 11. (??)
 152  */
 153 #define CCCR_ENABLE_SHIFT       12
 154 #define CCCR_ESCR_SEL_SHIFT     13
 155 #define CCCR_ACTV_THR_SHIFT     16
 156 #define CCCR_COMPARE_SHIFT      18
 157 #define CCCR_COMPLEMENT_SHIFT   19
 158 #define CCCR_THRESHOLD_SHIFT    20
 159 #define CCCR_EDGE_SHIFT         24
 160 #define CCCR_OVF_PMI_SHIFT      26
 161 #define CCCR_OVF_PMI_T0_SHIFT   26
 162 #define CCCR_OVF_PMI_T1_SHIFT   27
 163 #define CCCR_OVF_SHIFT          31
 164 #define CCCR_ACTV_THR_MASK      0x3
 165 #define CCCR_THRESHOLD_MAX      0xF
 166 #define CCCR_ENABLE             (1U << CCCR_ENABLE_SHIFT)
 167 #define CCCR_COMPARE            (1U << CCCR_COMPARE_SHIFT)
 168 #define CCCR_COMPLEMENT         (1U << CCCR_COMPLEMENT_SHIFT)
 169 #define CCCR_EDGE               (1U << CCCR_EDGE_SHIFT)
 170 #define CCCR_OVF_PMI            (1U << CCCR_OVF_PMI_SHIFT)
 171 #define CCCR_OVF_PMI_T0         (1U << CCCR_OVF_PMI_T0_SHIFT)
 172 #define CCCR_OVF_PMI_T1         (1U << CCCR_OVF_PMI_T1_SHIFT)
 173 #define CCCR_INIT               CCCR_ENABLE
 174 #define CCCR_OVF                (1U << CCCR_OVF_SHIFT)
 175 
 176 #define ESCR_EVSEL_SHIFT        25
 177 #define ESCR_EVMASK_SHIFT       9
 178 #define ESCR_TAG_VALUE_SHIFT    5
 179 #define ESCR_TAG_VALUE_MAX      0xF
 180 #define ESCR_TAG_ENABLE_SHIFT   4
 181 #define ESCR_USR_SHIFT          2
 182 #define ESCR_OS_SHIFT           3
 183 #define ESCR_USR                (1U << ESCR_USR_SHIFT)
 184 #define ESCR_OS                 (1U << ESCR_OS_SHIFT)
 185 #define ESCR_TAG_ENABLE         (1U << ESCR_TAG_ENABLE_SHIFT)
 186 
 187 /*
 188  * HyperThreaded ESCR fields.
 189  */
 190 #define ESCR_T0_OS_SHIFT        3
 191 #define ESCR_T0_USR_SHIFT       2
 192 #define ESCR_T1_OS_SHIFT        1
 193 #define ESCR_T1_USR_SHIFT       0
 194 #define ESCR_T0_OS              (1U << ESCR_T0_OS_SHIFT)
 195 #define ESCR_T0_USR             (1U << ESCR_T0_USR_SHIFT)
 196 #define ESCR_T1_OS              (1U << ESCR_T1_OS_SHIFT)
 197 #define ESCR_T1_USR             (1U << ESCR_T1_USR_SHIFT)
 198 
 199 /*
 200  * ESCRs are grouped by counter; each group of ESCRs is associated with a
 201  * distinct group of counters. Use these macros to fill in the table below.
 202  */
 203 #define BPU0_map        (0x1 | 0x2)             /* Counters 0 and 1 */
 204 #define BPU2_map        (0x4 | 0x8)             /* Counters 2 and 3 */
 205 #define MS0_map         (0x10 | 0x20)           /* Counters 4 and 5 */
 206 #define MS2_map         (0x40 | 0x80)           /* Counters 6 and 7 */
 207 #define FLAME0_map      (0x100 | 0x200)         /* Counters 8 and 9 */
 208 #define FLAME2_map      (0x400 | 0x800)         /* Counters 10 and 11 */
 209 #define IQ0_map         (0x1000 | 0x2000 | 0x10000) /* Counters 12, 13, 16 */
 210 #define IQ2_map         (0x4000 | 0x8000 | 0x20000) /* Counters 14, 15, 17 */
 211 
 212 /*
 213  * Table describing the 45 Event Selection and Control Registers (ESCRs).
 214  */
 215 const p4_escr_t p4_escrs[] = {
 216 #define BPU0 (1)
 217         { 0, 0x3B2, BPU0_map },         /* 0 */
 218 #define IS0 (1ULL << 1)
 219         { 1, 0x3B4, BPU0_map },         /* 1 */
 220 #define MOB0 (1ULL << 2)
 221         { 2, 0x3AA, BPU0_map },         /* 2 */
 222 #define ITLB0 (1ULL << 3)
 223         { 3, 0x3B6, BPU0_map },         /* 3 */
 224 #define PMH0 (1ULL << 4)
 225         { 4, 0x3AC, BPU0_map },         /* 4 */
 226 #define IX0 (1ULL << 5)
 227         { 5, 0x3C8, BPU0_map },         /* 5 */
 228 #define FSB0 (1ULL << 6)
 229         { 6, 0x3A2, BPU0_map },         /* 6 */
 230 #define BSU0 (1ULL << 7)
 231         { 7, 0x3A0, BPU0_map },         /* 7 */
 232 #define BPU1 (1ULL << 8)
 233         { 0, 0x3B3, BPU2_map },         /* 8 */
 234 #define IS1 (1ULL << 9)
 235         { 1, 0x3B5, BPU2_map },         /* 9 */
 236 #define MOB1 (1ULL << 10)
 237         { 2, 0x3AB, BPU2_map },         /* 10 */
 238 #define ITLB1 (1ULL << 11)
 239         { 3, 0x3B7, BPU2_map },         /* 11 */
 240 #define PMH1 (1ULL << 12)
 241         { 4, 0x3AD, BPU2_map },         /* 12 */
 242 #define IX1 (1ULL << 13)
 243         { 5, 0x3C9, BPU2_map },         /* 13 */
 244 #define FSB1 (1ULL << 14)
 245         { 6, 0x3A3, BPU2_map },         /* 14 */
 246 #define BSU1 (1ULL << 15)
 247         { 7, 0x3A1, BPU2_map },         /* 15 */
 248 #define MS0 (1ULL << 16)
 249         { 0, 0x3C0, MS0_map },          /* 16 */
 250 #define TC0 (1ULL << 17)
 251         { 1, 0x3C4, MS0_map },          /* 17 */
 252 #define TBPU0 (1ULL << 18)
 253         { 2, 0x3C2, MS0_map },          /* 18 */
 254 #define MS1 (1ULL << 19)
 255         { 0, 0x3C1, MS2_map },          /* 19 */
 256 #define TC1 (1ULL << 20)
 257         { 1, 0x3C5, MS2_map },          /* 20 */
 258 #define TBPU1 (1ULL << 21)
 259         { 2, 0x3C3, MS2_map },          /* 21 */
 260 #define FLAME0 (1ULL << 22)
 261         { 0, 0x3A6, FLAME0_map },       /* 22 */
 262 #define FIRM0 (1ULL << 23)
 263         { 1, 0x3A4, FLAME0_map },       /* 23 */
 264 #define SAAT0 (1ULL << 24)
 265         { 2, 0x3AE, FLAME0_map },       /* 24 */
 266 #define U2L0 (1ULL << 25)
 267         { 3, 0x3B0, FLAME0_map },       /* 25 */
 268 #define DAC0 (1ULL << 26)
 269         { 5, 0x3A8, FLAME0_map },       /* 26 */
 270 #define FLAME1 (1ULL << 27)
 271         { 0, 0x3A7, FLAME2_map },       /* 27 */
 272 #define FIRM1 (1ULL << 28)
 273         { 1, 0x3A5, FLAME2_map },       /* 28 */
 274 #define SAAT1 (1ULL << 29)
 275         { 2, 0x3AF, FLAME2_map },       /* 29 */
 276 #define U2L1 (1ULL << 30)
 277         { 3, 0x3B1, FLAME2_map },       /* 30 */
 278 #define DAC1 (1ULL << 31)
 279         { 5, 0x3A9, FLAME2_map },       /* 31 */
 280 #define IQ0 (1ULL << 32)
 281         { 0, 0x3BA, IQ0_map },          /* 32 */
 282 #define ALF0 (1ULL << 33)
 283         { 1, 0x3CA, IQ0_map },          /* 33 */
 284 #define RAT0 (1ULL << 34)
 285         { 2, 0x3BC, IQ0_map },          /* 34 */
 286 #define SSU0 (1ULL << 35)
 287         { 3, 0x3BE, IQ0_map },          /* 35 */
 288 #define CRU0 (1ULL << 36)
 289         { 4, 0x3B8, IQ0_map },          /* 36 */
 290 #define CRU2 (1ULL << 37)
 291         { 5, 0x3CC, IQ0_map },          /* 37 */
 292 #define CRU4 (1ULL << 38)
 293         { 6, 0x3E0, IQ0_map },          /* 38 */
 294 #define IQ1 (1ULL << 39)
 295         { 0, 0x3BB, IQ2_map },          /* 39 */
 296 #define ALF1 (1ULL << 40)
 297         { 1, 0x3CB, IQ2_map },          /* 40 */
 298 #define RAT1 (1ULL << 41)
 299         { 2, 0x3BD, IQ2_map },          /* 41 */
 300 #define CRU1 (1ULL << 42)
 301         { 4, 0x3B9, IQ2_map },          /* 42 */
 302 #define CRU3 (1ULL << 43)
 303         { 5, 0x3CD, IQ2_map },          /* 43 */
 304 #define CRU5 (1ULL << 44)
 305         { 6, 0x3E1, IQ2_map }           /* 44 */
 306 };
 307 
 308 #define ESCR_MAX_INDEX 44
 309 
 310 typedef struct _p4_ctr {
 311         uint32_t        pc_caddr;       /* counter MSR address */
 312         uint32_t        pc_ctladdr;     /* counter's CCCR MSR address */
 313         uint64_t        pc_map;         /* bitmap of ESCRs controlling ctr */
 314 } p4_ctr_t;
 315 
 316 const p4_ctr_t p4_ctrs[18] = {
 317 { /* BPU_COUNTER0 */ 0x300, 0x360, BSU0|FSB0|MOB0|PMH0|BPU0|IS0|ITLB0|IX0},
 318 { /* BPU_COUNTER1 */ 0x301, 0x361, BSU0|FSB0|MOB0|PMH0|BPU0|IS0|ITLB0|IX0},
 319 { /* BPU_COUNTER2 */ 0x302, 0x362, BSU1|FSB1|MOB1|PMH1|BPU1|IS1|ITLB1|IX1},
 320 { /* BPU_COUNTER3 */ 0x303, 0x363, BSU1|FSB1|MOB1|PMH1|BPU1|IS1|ITLB1|IX1},
 321 { /* MS_COUNTER0 */  0x304, 0x364, MS0|TBPU0|TC0 },
 322 { /* MS_COUNTER1 */  0x305, 0x365, MS0|TBPU0|TC0 },
 323 { /* MS_COUNTER2 */  0x306, 0x366, MS1|TBPU1|TC1 },
 324 { /* MS_COUNTER3 */  0x307, 0x367, MS1|TBPU1|TC1 },
 325 { /* FLAME_COUNTER0 */ 0x308, 0x368, FIRM0|FLAME0|DAC0|SAAT0|U2L0 },
 326 { /* FLAME_COUNTER1 */ 0x309, 0x369, FIRM0|FLAME0|DAC0|SAAT0|U2L0 },
 327 { /* FLAME_COUNTER2 */ 0x30A, 0x36A, FIRM1|FLAME1|DAC1|SAAT1|U2L1 },
 328 { /* FLAME_COUNTER3 */ 0x30B, 0x36B, FIRM1|FLAME1|DAC1|SAAT1|U2L1 },
 329 { /* IQ_COUNTER0 */  0x30C, 0x36C, CRU0|CRU2|CRU4|IQ0|RAT0|SSU0|ALF0 },
 330 { /* IQ_COUNTER1 */  0x30D, 0x36D, CRU0|CRU2|CRU4|IQ0|RAT0|SSU0|ALF0 },
 331 { /* IQ_COUNTER2 */  0x30E, 0x36E, CRU1|CRU3|CRU5|IQ1|RAT1|ALF1 },
 332 { /* IQ_COUNTER3 */  0x30F, 0x36F, CRU1|CRU3|CRU5|IQ1|RAT1|ALF1 },
 333 { /* IQ_COUNTER4 */  0x310, 0x370, CRU0|CRU2|CRU4|IQ0|RAT0|SSU0|ALF0 },
 334 { /* IQ_COUNTER5 */  0x311, 0x371, CRU1|CRU3|CRU5|IQ1|RAT1|ALF1 }
 335 };
 336 
 337 typedef struct _p4_event {
 338         char            *pe_name;       /* Name of event according to docs */
 339         uint64_t        pe_escr_map;    /* Bitmap of ESCRs capable of event */
 340         uint32_t        pe_escr_mask;   /* permissible ESCR event mask */
 341         uint8_t         pe_ev;          /* ESCR event select value */
 342         uint16_t        pe_cccr;        /* CCCR select value */
 343         uint32_t        pe_ctr_mask;    /* Bitmap of capable counters */
 344 } p4_event_t;
 345 
 346 typedef struct _p4_generic_event {
 347         char            *name;
 348         char            *event;
 349         uint16_t        emask;
 350         uint32_t        ctr_mask;
 351 } p4_generic_event_t;
 352 
 353 #define C(n) (1 << n)
 354 #define GEN_EVT_END { NULL, NULL, 0x0, 0x0 }
 355 
 356 p4_event_t p4_events[] = {
 357 { "branch_retired", CRU2|CRU3, 0xF, 0x6, 0x5, C(12)|C(13)|C(14)|C(15)|C(16) },
 358 { "mispred_branch_retired", CRU0|CRU1, 0x1, 0x3, 0x4,
 359         C(12)|C(13)|C(14)|C(15)|C(16) },
 360 { "TC_deliver_mode", TC0|TC1, 0xFF, 0x1, 0x1, C(4)|C(5)|C(6)|C(7) },
 361 { "BPU_fetch_request", BPU0|BPU1, 0x1, 0x3, 0x0, C(0)|C(1)|C(2)|C(3) },
 362 { "ITLB_reference", ITLB0|ITLB1, 0x7, 0x18, 0x3, C(0)|C(1)|C(2)|C(3) },
 363 { "memory_cancel", DAC0|DAC1, 0x6, 0x2, 0x5, C(8)|C(9)|C(10)|C(11) },
 364 { "memory_complete", SAAT0|SAAT1, 0x3, 0x8, 0x2, C(8)|C(9)|C(10)|C(11) },
 365 { "load_port_replay", SAAT0|SAAT1, 0x1, 0x4, 0x2, C(8)|C(9)|C(10)|C(11) },
 366 { "store_port_replay", SAAT0|SAAT1, 0x1, 0x5, 0x2, C(8)|C(9)|C(10)|C(11) },
 367 { "MOB_load_replay", MOB0|MOB1, 0x35, 0x3, 0x2, C(0)|C(1)|C(2)|C(3) },
 368 { "page_walk_type", PMH0|PMH1, 0x3, 0x1, 0x4, C(0)|C(1)|C(2)|C(3) },
 369 { "BSQ_cache_reference", BSU0|BSU1, 0x73F, 0xC, 0x7, C(0)|C(1)|C(2)|C(3) },
 370 { "IOQ_allocation", FSB0, 0xEFFF, 0x3, 0x6, C(0)|C(1) },
 371 { "IOQ_active_entries", FSB1, 0xEFFF, 0x1A, 0x6, C(2)|C(3) },
 372 { "FSB_data_activity", FSB0|FSB1, 0x3F, 0x17, 0x6, C(0)|C(1)|C(2)|C(3) },
 373 { "BSQ_allocation", BSU0, 0x3FEF, 0x5, 0x7, C(0)|C(1) },
 374 { "bsq_active_entries", BSU1, 0x3FEF, 0x6, 0x7, C(2)|C(3) },
 375 { "x87_assist", CRU2|CRU3, 0x1F, 0x3, 0x5, C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
 376 { "SSE_input_assist", FIRM0|FIRM1, 0x8000, 0x34, 0x1, C(8)|C(9)|C(10)|C(11) },
 377 { "packed_SP_uop", FIRM0|FIRM1, 0x8000, 0x8, 0x1, C(8)|C(9)|C(10)|C(11) },
 378 { "packed_DP_uop", FIRM0|FIRM1, 0x8000, 0xC, 0x1, C(8)|C(9)|C(10)|C(11) },
 379 { "scalar_SP_uop", FIRM0|FIRM1, 0x8000, 0xA, 0x1, C(8)|C(9)|C(10)|C(11) },
 380 { "scalar_DP_uop", FIRM0|FIRM1, 0x8000, 0xE, 0x1, C(8)|C(9)|C(10)|C(11) },
 381 { "64bit_MMX_uop", FIRM0|FIRM1, 0x8000, 0x2, 0x1, C(8)|C(9)|C(10)|C(11) },
 382 { "128bit_MMX_uop", FIRM0|FIRM1, 0x8000, 0x1A, 0x1, C(8)|C(9)|C(10)|C(11) },
 383 { "x87_FP_uop", FIRM0|FIRM1, 0x8000, 0x4, 0x1, C(8)|C(9)|C(10)|C(11) },
 384 { "x87_SIMD_moves_uop", FIRM0|FIRM1, 0x18, 0x2E, 0x1, C(8)|C(9)|C(10)|C(11) },
 385 { "machine_clear", CRU2|CRU3, 0xD, 0x2, 0x5,
 386         C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
 387 { "global_power_events", FSB0|FSB1, 0x1, 0x13, 0x6, C(0)|C(1)|C(2)|C(3) },
 388 { "tc_ms_xfer", MS0|MS1, 0x1, 0x5, 0x0, C(4)|C(5)|C(6)|C(7) },
 389 { "uop_queue_writes", MS0|MS1, 0x7, 0x9, 0x0, C(4)|C(5)|C(6)|C(7) },
 390 { "front_end_event", CRU2|CRU3, 0x3, 0x8, 0x5,
 391         C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
 392 { "execution_event", CRU2|CRU3, 0xFF, 0xC, 0x5,
 393         C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
 394 { "replay_event", CRU2|CRU3, 0x3, 0x9, 0x5,
 395         C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
 396 { "instr_retired", CRU0|CRU1, 0xF, 0x2, 0x4,
 397         C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
 398 { "uops_retired", CRU0|CRU1, 0x3, 0x1, 0x4,
 399         C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
 400 { "uop_type", RAT0|RAT1, 0x3, 0x2, 0x2, C(12)|C(13)|C(14)|C(15)|C(16)|C(17)},
 401 { "retired_mispred_branch_type", TBPU0|TBPU1, 0x1F, 0x5, 0x2,
 402         C(4)|C(5)|C(6)|C(7)},
 403 { "retired_branch_type", TBPU0|TBPU1, 0x1F, 0x4, 0x2, C(4)|C(5)|C(6)|C(7) },
 404 { NULL, 0, 0, 0, 0 }
 405 };
 406 
 407 static p4_generic_event_t p4_generic_events[] = {
 408 { "PAPI_br_msp", "branch_retired", 0xa, C(12)|C(13)|C(14)|C(15)|C(16) },
 409 { "PAPI_br_ins", "branch_retired", 0xf, C(12)|C(13)|C(14)|C(15)|C(16) },
 410 { "PAPI_br_tkn", "branch_retired", 0xc, C(12)|C(13)|C(14)|C(15)|C(16) },
 411 { "PAPI_br_ntk", "branch_retired", 0x3, C(12)|C(13)|C(14)|C(15)|C(16) },
 412 { "PAPI_br_prc", "branch_retired", 0x5, C(12)|C(13)|C(14)|C(15)|C(16) },
 413 { "PAPI_tot_ins", "instr_retired", 0x3, C(12)|C(13)|C(14)|C(15)|C(16)|C(17) },
 414 { "PAPI_tot_cyc", "global_power_events", 0x1, C(0)|C(1)|C(2)|C(3) },
 415 { "PAPI_tlb_dm", "page_walk_type", 0x1, C(0)|C(1)|C(2)|C(3) },
 416 { "PAPI_tlb_im", "page_walk_type", 0x2, C(0)|C(1)|C(2)|C(3) },
 417 { "PAPI_tlb_tm", "page_walk_type", 0x3, C(0)|C(1)|C(2)|C(3) },
 418 { "PAPI_l1_icm", "BPU_fetch_request", 0x1, C(0)|C(1)|C(2)|C(3) },
 419 { "PAPI_l2_ldm", "BSQ_cache_reference", 0x100, C(0)|C(1)|C(2)|C(3) },
 420 { "PAPI_l2_stm", "BSQ_cache_reference", 0x400, C(0)|C(1)|C(2)|C(3) },
 421 { "PAPI_l2_tcm", "BSQ_cache_reference", 0x500, C(0)|C(1)|C(2)|C(3) },
 422 GEN_EVT_END
 423 };
 424 
 425 /*
 426  * Indicates whether the "rdpmc" instruction is available on this processor.
 427  */
 428 static int p4_rdpmc_avail = 0;
 429 
 430 static const uint64_t p4_cccrstop = 0;
 431 
 432 static char *p4_eventlist[18];
 433 
 434 /*
 435  * If set, this processor has HyperThreading.
 436  */
 437 static int p4_htt = 0;
 438 
 439 #define P4_FAMILY       0xF
 440 
 441 static int
 442 p4_pcbe_init(void)
 443 {
 444         int                     i;
 445         size_t                  size;
 446         p4_event_t              *ev;
 447         p4_generic_event_t      *gevp;
 448 
 449         /*
 450          * If we're not running on a P4, refuse to load.
 451          */
 452         if (cpuid_getvendor(CPU) != X86_VENDOR_Intel ||
 453             cpuid_getfamily(CPU) != P4_FAMILY)
 454                 return (-1);
 455 
 456         /*
 457          * Set up the event lists for each counter.
 458          *
 459          * First pass calculates the size of the event list, and the second
 460          * pass copies each event name into the event list.
 461          */
 462         for (i = 0; i < 18; i++) {
 463                 size = 0;
 464 
 465                 for (ev = p4_events; ev->pe_name != NULL; ev++) {
 466                         if (ev->pe_ctr_mask & C(i))
 467                                 size += strlen(ev->pe_name) + 1;
 468                 }
 469 
 470                 for (gevp = p4_generic_events; gevp->name != NULL; gevp++) {
 471                         if (gevp->ctr_mask & C(i))
 472                                 size += strlen(gevp->name) + 1;
 473                 }
 474 
 475                 /*
 476                  * We use 'size + 1' here to ensure room for the final
 477                  * strcat when it terminates the string.
 478                  */
 479                 p4_eventlist[i] = (char *)kmem_alloc(size + 1, KM_SLEEP);
 480                 *p4_eventlist[i] = '\0';
 481 
 482                 for (ev = p4_events; ev->pe_name != NULL; ev++) {
 483                         if (ev->pe_ctr_mask & C(i)) {
 484                                 (void) strcat(p4_eventlist[i], ev->pe_name);
 485                                 (void) strcat(p4_eventlist[i], ",");
 486                         }
 487                 }
 488 
 489                 for (gevp = p4_generic_events; gevp->name != NULL; gevp++) {
 490                         if (gevp->ctr_mask & C(i)) {
 491                                 (void) strcat(p4_eventlist[i], gevp->name);
 492                                 (void) strcat(p4_eventlist[i], ",");
 493                         }
 494                 }
 495 
 496                 /*
 497                  * Remove trailing ','
 498                  */
 499                 p4_eventlist[i][size - 1] = '\0';
 500         }
 501 
 502         if (is_x86_feature(x86_featureset, X86FSET_MMX))
 503                 p4_rdpmc_avail = 1;
 504         /*
 505          * The X86_HTT flag may disappear soon, so we'll isolate the impact of
 506          * its demise to the following if().
 507          */
 508         if (is_x86_feature(x86_featureset, X86FSET_HTT))
 509                 p4_htt = 1;
 510 
 511         return (0);
 512 }
 513 
 514 static uint_t
 515 p4_pcbe_ncounters(void)
 516 {
 517         return (18);
 518 }
 519 
 520 static const char *
 521 p4_pcbe_impl_name(void)
 522 {
 523         if (p4_htt)
 524                 return (PCBE_IMPL_NAME_P4HT);
 525         return ("Pentium 4");
 526 }
 527 
 528 static const char *
 529 p4_pcbe_cpuref(void)
 530 {
 531         return ("See Appendix A.1 of the \"IA-32 Intel Architecture Software " \
 532             "Developer's Manual Volume 3: System Programming Guide,\" "        \
 533             "Order # 245472-012, 2003");
 534 }
 535 
 536 static char *
 537 p4_pcbe_list_events(uint_t picnum)
 538 {
 539         ASSERT(picnum >= 0 && picnum < 18);
 540 
 541         return (p4_eventlist[picnum]);
 542 }
 543 
 544 #define P4_ATTRS "emask,tag,compare,complement,threshold,edge"
 545 
 546 static char *
 547 p4_pcbe_list_attrs(void)
 548 {
 549         if (p4_htt)
 550                 return (P4_ATTRS ",active_thread,count_sibling_usr,"
 551                     "count_sibling_sys");
 552         return (P4_ATTRS);
 553 }
 554 
 555 static p4_generic_event_t *
 556 find_generic_event(char *name)
 557 {
 558         p4_generic_event_t      *gevp;
 559 
 560         for (gevp = p4_generic_events; gevp->name != NULL; gevp++)
 561                 if (strcmp(name, gevp->name) == 0)
 562                         return (gevp);
 563 
 564         return (NULL);
 565 }
 566 
 567 static p4_event_t *
 568 find_event(char *name)
 569 {
 570         p4_event_t              *evp;
 571 
 572         for (evp = p4_events; evp->pe_name != NULL; evp++)
 573                 if (strcmp(name, evp->pe_name) == 0)
 574                         return (evp);
 575 
 576         return (NULL);
 577 }
 578 
 579 static uint64_t
 580 p4_pcbe_event_coverage(char *event)
 581 {
 582         p4_event_t              *ev;
 583         p4_generic_event_t      *gevp;
 584 
 585         if ((ev = find_event(event)) == NULL) {
 586                 if ((gevp = find_generic_event(event)) != NULL)
 587                         return (gevp->ctr_mask);
 588                 else
 589                         return (0);
 590         }
 591 
 592         return (ev->pe_ctr_mask);
 593 }
 594 
 595 static uint64_t
 596 p4_pcbe_overflow_bitmap(void)
 597 {
 598         extern int      kcpc_hw_overflow_intr_installed;
 599         uint64_t        ret = 0;
 600         int             i;
 601 
 602         /*
 603          * The CCCR's OVF bit indicates that the corresponding counter has
 604          * overflowed. It must be explicitly cleared by software, so it is
 605          * safe to read the CCCR values here.
 606          */
 607         for (i = 0; i < 18; i++) {
 608                 if (rdmsr(p4_ctrs[i].pc_ctladdr) & CCCR_OVF)
 609                         ret |= (1 << i);
 610         }
 611 
 612         /*
 613          * Pentium 4 and Xeon turn off the CPC interrupt mask bit in the LVT at
 614          * every overflow. Turn it back on here.
 615          */
 616         ASSERT(kcpc_hw_overflow_intr_installed);
 617         (*kcpc_hw_enable_cpc_intr)();
 618 
 619         return (ret);
 620 }
 621 
 622 static int
 623 p4_escr_inuse(p4_pcbe_config_t **cfgs, int escr_ndx)
 624 {
 625         int i;
 626 
 627         for (i = 0; i < 18; i++) {
 628                 if (cfgs[i] == NULL)
 629                         continue;
 630                 if (cfgs[i]->p4_escr_ndx == escr_ndx)
 631                         return (1);
 632         }
 633 
 634         return (0);
 635 }
 636 
 637 static void
 638 build_cfgs(p4_pcbe_config_t *cfgs[18], uint64_t *data[18], void *token)
 639 {
 640         p4_pcbe_config_t        *cfg = NULL;
 641         uint64_t                *daddr;
 642 
 643         bzero(cfgs, 18 * sizeof (p4_pcbe_config_t *));
 644 
 645         do {
 646                 cfg = (p4_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr);
 647 
 648                 if (cfg != NULL) {
 649                         ASSERT(cfg->p4_picno < 18);
 650                         cfgs[cfg->p4_picno] = cfg;
 651                         if (data != NULL) {
 652                                 ASSERT(daddr != NULL);
 653                                 data[cfg->p4_picno] = daddr;
 654                         }
 655                 }
 656         } while (cfg != NULL);
 657 }
 658 
 659 /*
 660  * Programming a counter:
 661  *
 662  * Select event.
 663  * Choose an ESCR capable of counting that event.
 664  * Set up the ESCR with the desired parameters (usr, sys, tag).
 665  * Set up the CCCR to point to the selected ESCR.
 666  * Set the CCCR parameters (overflow, cascade, edge, etc).
 667  */
 668 static int
 669 p4_pcbe_configure(uint_t picnum, char *eventname, uint64_t preset,
 670     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
 671     void *token)
 672 {
 673         p4_pcbe_config_t        *cfgs[18];
 674         p4_pcbe_config_t        *cfg;
 675         p4_event_t              *ev;
 676         p4_generic_event_t      *gevp;
 677         int                     escr_ndx;
 678         int                     i;
 679         uint16_t                emask = 0;
 680         uint8_t                 tag;
 681         int                     use_tag = 0;
 682         int                     active_thread = 0x3; /* default is "any" */
 683         int                     compare = 0;
 684         int                     complement = 0;
 685         int                     threshold = 0;
 686         int                     edge = 0;
 687         int                     sibling_usr = 0; /* count usr on other cpu */
 688         int                     sibling_sys = 0; /* count sys on other cpu */
 689         int                     invalid_attr = 0;
 690 
 691         /*
 692          * If we've been handed an existing configuration, we need only preset
 693          * the counter value.
 694          */
 695         if (*data != NULL) {
 696                 cfg = *data;
 697                 cfg->p4_rawpic = preset & MASK40;
 698                 return (0);
 699         }
 700 
 701         if (picnum < 0 || picnum >= 18)
 702                 return (CPC_INVALID_PICNUM);
 703 
 704         if ((ev = find_event(eventname)) == NULL) {
 705                 if ((gevp = find_generic_event(eventname)) != NULL) {
 706                         ev = find_event(gevp->event);
 707                         ASSERT(ev != NULL);
 708 
 709                         /*
 710                          * For generic events a HTT processor is only allowed
 711                          * to specify the 'active_thread', 'count_sibling_usr'
 712                          * and 'count_sibling_sys' attributes.
 713                          */
 714                         if (p4_htt)
 715                                 for (i = 0; i < nattrs; i++)
 716                                         if (strstr(P4_ATTRS,
 717                                             attrs[i].ka_name) != NULL)
 718                                                 invalid_attr = 1;
 719 
 720                         if ((p4_htt && invalid_attr) ||
 721                             (!p4_htt && nattrs > 0))
 722                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
 723 
 724                         emask = gevp->emask;
 725                 } else {
 726                         return (CPC_INVALID_EVENT);
 727                 }
 728         }
 729 
 730         build_cfgs(cfgs, NULL, token);
 731 
 732         /*
 733          * Find an ESCR capable of counting this event.
 734          */
 735         for (escr_ndx = 0; escr_ndx < ESCR_MAX_INDEX; escr_ndx++) {
 736                 if ((ev->pe_escr_map & (1ULL << escr_ndx)) &&
 737                     p4_escr_inuse(cfgs, escr_ndx) == 0)
 738                         break;
 739         }
 740 
 741         /*
 742          * All ESCRs capable of counting this event are already being
 743          * used.
 744          */
 745         if (escr_ndx == ESCR_MAX_INDEX)
 746                 return (CPC_RESOURCE_UNAVAIL);
 747 
 748         /*
 749          * At this point, ev points to the desired event and escr is the index
 750          * of a capable and available ESCR.
 751          *
 752          * Now process and verify the attributes.
 753          */
 754         for (i = 0; i < nattrs; i++) {
 755                 if (strcmp("emask", attrs[i].ka_name) == 0) {
 756                         if ((attrs[i].ka_val | ev->pe_escr_mask)
 757                             != ev->pe_escr_mask)
 758                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
 759                         emask = attrs[i].ka_val;
 760                         continue;
 761                 } else if (strcmp("tag", attrs[i].ka_name) == 0) {
 762                         if (attrs[i].ka_val > ESCR_TAG_VALUE_MAX)
 763                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
 764                         tag = attrs[i].ka_val;
 765                         use_tag = 1;
 766                         continue;
 767                 } else if (strcmp("compare", attrs[i].ka_name) == 0) {
 768                         if (attrs[i].ka_val != 0)
 769                                 compare = 1;
 770                         continue;
 771                 } else if (strcmp("complement", attrs[i].ka_name) == 0) {
 772                         if (attrs[i].ka_val != 0)
 773                                 complement = 1;
 774                         continue;
 775                 } else if (strcmp("threshold", attrs[i].ka_name) == 0) {
 776                         if (attrs[i].ka_val > CCCR_THRESHOLD_MAX)
 777                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
 778                         threshold = attrs[i].ka_val;
 779                         continue;
 780                 } else if (strcmp("edge", attrs[i].ka_name) == 0) {
 781                         if (attrs[i].ka_val != 0)
 782                                 edge = 1;
 783                         continue;
 784                 }
 785 
 786                 /*
 787                  * The remaining attributes are valid only on HyperThreaded P4s
 788                  * for processes with the "cpc_cpu" privilege.
 789                  */
 790                 if (p4_htt == 0)
 791                         return (CPC_INVALID_ATTRIBUTE);
 792 
 793                 if (secpolicy_cpc_cpu(crgetcred()) != 0)
 794                         return (CPC_ATTR_REQUIRES_PRIVILEGE);
 795 
 796                 if (strcmp("active_thread", attrs[i].ka_name) == 0) {
 797                         if ((attrs[i].ka_val | CCCR_ACTV_THR_MASK) !=
 798                             CCCR_ACTV_THR_MASK)
 799                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
 800                         active_thread = (int)attrs[i].ka_val;
 801                 } else if (strcmp("count_sibling_usr", attrs[i].ka_name) == 0) {
 802                         if (attrs[i].ka_val != 0)
 803                                 sibling_usr = 1;
 804                 } else if (strcmp("count_sibling_sys", attrs[i].ka_name) == 0) {
 805                         if (attrs[i].ka_val != 0)
 806                                 sibling_sys = 1;
 807                 } else
 808                         return (CPC_INVALID_ATTRIBUTE);
 809         }
 810 
 811         /*
 812          * Make sure the counter can count this event
 813          */
 814         if ((ev->pe_ctr_mask & C(picnum)) == 0)
 815                 return (CPC_PIC_NOT_CAPABLE);
 816 
 817         /*
 818          * Find an ESCR that lines up with the event _and_ the counter.
 819          */
 820         for (escr_ndx = 0; escr_ndx < ESCR_MAX_INDEX; escr_ndx++) {
 821                 if ((ev->pe_escr_map & (1ULL << escr_ndx)) &&
 822                     (p4_escrs[escr_ndx].pe_map & (1 << picnum)) &&
 823                     p4_escr_inuse(cfgs, escr_ndx) == 0)
 824                         break;
 825         }
 826         if (escr_ndx == ESCR_MAX_INDEX)
 827                 return (CPC_RESOURCE_UNAVAIL);
 828 
 829         cfg = (p4_pcbe_config_t *)kmem_alloc(sizeof (p4_pcbe_config_t),
 830             KM_SLEEP);
 831 
 832         cfg->p4_flags = 0;
 833         cfg->p4_picno = picnum;
 834         cfg->p4_escr_ndx = escr_ndx;
 835         cfg->p4_escr = (ev->pe_ev << ESCR_EVSEL_SHIFT) |
 836             (emask << ESCR_EVMASK_SHIFT);
 837 
 838         if (use_tag == 1) {
 839                 cfg->p4_escr |= tag << ESCR_TAG_VALUE_SHIFT;
 840                 cfg->p4_escr |= ESCR_TAG_ENABLE;
 841         }
 842 
 843         if (p4_htt) {
 844                 /*
 845                  * This is a HyperThreaded P4.  Since we don't know which
 846                  * logical CPU this configuration will eventually be programmed
 847                  * on, we can't yet decide which fields of the ESCR to select.
 848                  *
 849                  * Record the necessary information in the flags for later.
 850                  */
 851                 if (flags & CPC_COUNT_USER)
 852                         cfg->p4_flags |= P4_THIS_USR;
 853                 if (flags & CPC_COUNT_SYSTEM)
 854                         cfg->p4_flags |= P4_THIS_SYS;
 855                 if (p4_htt && sibling_usr)
 856                         cfg->p4_flags |= P4_SIBLING_USR;
 857                 if (p4_htt && sibling_sys)
 858                         cfg->p4_flags |= P4_SIBLING_SYS;
 859         } else {
 860                 /*
 861                  * This is not HyperThreaded, so we can determine the exact
 862                  * ESCR value necessary now.
 863                  */
 864                 if (flags & CPC_COUNT_USER)
 865                         cfg->p4_escr |= ESCR_USR;
 866                 if (flags & CPC_COUNT_SYSTEM)
 867                         cfg->p4_escr |= ESCR_OS;
 868         }
 869 
 870         cfg->p4_rawpic = preset & MASK40;
 871 
 872         /*
 873          * Even on non-HT P4s, Intel states the active_thread field (marked as
 874          * "reserved" for the non-HT chips) must be set to all 1s.
 875          */
 876         cfg->p4_cccr = CCCR_INIT | (active_thread << CCCR_ACTV_THR_SHIFT);
 877         if (compare)
 878                 cfg->p4_cccr |= CCCR_COMPARE;
 879         if (complement)
 880                 cfg->p4_cccr |= CCCR_COMPLEMENT;
 881         cfg->p4_cccr |= threshold << CCCR_THRESHOLD_SHIFT;
 882         if (edge)
 883                 cfg->p4_cccr |= CCCR_EDGE;
 884         cfg->p4_cccr |= p4_escrs[cfg->p4_escr_ndx].pe_num
 885             << CCCR_ESCR_SEL_SHIFT;
 886         if (flags & CPC_OVF_NOTIFY_EMT) {
 887                 if (p4_htt)
 888                         cfg->p4_flags |= P4_PMI;
 889                 else {
 890                         /*
 891                          * If the user has asked for notification of overflows,
 892                          * we automatically program the hardware to generate an
 893                          * interrupt on overflow.
 894                          *
 895                          * This can only be programmed now if this P4 doesn't
 896                          * have HyperThreading. If it does, we must wait until
 897                          * we know which logical CPU we'll be programming.
 898                          */
 899                         cfg->p4_cccr |= CCCR_OVF_PMI;
 900                 }
 901         }
 902 
 903         *data = cfg;
 904 
 905         return (0);
 906 }
 907 
 908 static void
 909 p4_pcbe_program(void *token)
 910 {
 911         int                     i;
 912         uint64_t                cccr;
 913         p4_pcbe_config_t        *cfgs[18];
 914 
 915         p4_pcbe_allstop();
 916 
 917         build_cfgs(cfgs, NULL, token);
 918 
 919         if (p4_rdpmc_avail) {
 920                 ulong_t curcr4 = getcr4();
 921                 if (kcpc_allow_nonpriv(token))
 922                         setcr4(curcr4 | CR4_PCE);
 923                 else
 924                         setcr4(curcr4 & ~CR4_PCE);
 925         }
 926 
 927         /*
 928          * Ideally we would start all counters with a single operation, but in
 929          * P4 each counter is enabled individually via its CCCR. To minimize the
 930          * probe effect of enabling the counters, we do it in two passes: the
 931          * first programs the counter and ESCR, and the second programs the
 932          * CCCR (and thus enables the counter).
 933          */
 934         if (p4_htt) {
 935                 int     lid = cpuid_get_clogid(CPU); /* Logical ID of CPU */
 936 
 937                 for (i = 0; i < 18; i++) {
 938                         uint64_t escr;
 939 
 940                         if (cfgs[i] == NULL)
 941                                 continue;
 942                         escr = (uint64_t)cfgs[i]->p4_escr;
 943 
 944                         if (cfgs[i]->p4_flags & P4_THIS_USR)
 945                                 escr |= (lid == 0) ? ESCR_T0_USR : ESCR_T1_USR;
 946                         if (cfgs[i]->p4_flags & P4_THIS_SYS)
 947                                 escr |= (lid == 0) ? ESCR_T0_OS : ESCR_T1_OS;
 948                         if (cfgs[i]->p4_flags & P4_SIBLING_USR)
 949                                 escr |= (lid == 0) ? ESCR_T1_USR : ESCR_T0_USR;
 950                         if (cfgs[i]->p4_flags & P4_SIBLING_SYS)
 951                                 escr |= (lid == 0) ? ESCR_T1_OS : ESCR_T0_OS;
 952 
 953                         wrmsr(p4_ctrs[i].pc_caddr, cfgs[i]->p4_rawpic);
 954                         wrmsr(p4_escrs[cfgs[i]->p4_escr_ndx].pe_addr, escr);
 955                 }
 956 
 957                 for (i = 0; i < 18; i++) {
 958                         if (cfgs[i] == NULL)
 959                                 continue;
 960                         cccr = (uint64_t)cfgs[i]->p4_cccr;
 961                         /*
 962                          * We always target the overflow interrupt at the
 963                          * logical CPU which is doing the counting.
 964                          */
 965                         if (cfgs[i]->p4_flags & P4_PMI)
 966                                 cccr |= (lid == 0) ?
 967                                     CCCR_OVF_PMI_T0 : CCCR_OVF_PMI_T1;
 968                         wrmsr(p4_ctrs[i].pc_ctladdr, cccr);
 969                 }
 970         } else {
 971                 for (i = 0; i < 18; i++) {
 972                         if (cfgs[i] == NULL)
 973                                 continue;
 974                         wrmsr(p4_ctrs[i].pc_caddr, cfgs[i]->p4_rawpic);
 975                         wrmsr(p4_escrs[cfgs[i]->p4_escr_ndx].pe_addr,
 976                             (uint64_t)cfgs[i]->p4_escr);
 977                 }
 978 
 979                 for (i = 0; i < 18; i++) {
 980                         if (cfgs[i] == NULL)
 981                                 continue;
 982                         wrmsr(p4_ctrs[i].pc_ctladdr,
 983                             (uint64_t)cfgs[i]->p4_cccr);
 984                 }
 985         }
 986 }
 987 
 988 static void
 989 p4_pcbe_allstop(void)
 990 {
 991         int             i;
 992 
 993         for (i = 0; i < 18; i++)
 994                 wrmsr(p4_ctrs[i].pc_ctladdr, 0ULL);
 995 
 996         setcr4(getcr4() & ~CR4_PCE);
 997 }
 998 
 999 
1000 static void
1001 p4_pcbe_sample(void *token)
1002 {
1003         p4_pcbe_config_t        *cfgs[18];
1004         uint64_t                *addrs[18];
1005         uint64_t                curpic[18];
1006         int64_t                 diff;
1007         int                     i;
1008 
1009         for (i = 0; i < 18; i++)
1010                 curpic[i] = rdmsr(p4_ctrs[i].pc_caddr);
1011 
1012         build_cfgs(cfgs, addrs, token);
1013 
1014         for (i = 0; i < 18; i++) {
1015                 if (cfgs[i] == NULL)
1016                         continue;
1017                 diff = curpic[i] - cfgs[i]->p4_rawpic;
1018                 if (diff < 0)
1019                         diff += (1ll << 40);
1020                 *addrs[i] += diff;
1021                 DTRACE_PROBE4(p4__pcbe__sample, int, i, uint64_t, *addrs[i],
1022                     uint64_t, curpic[i], uint64_t, cfgs[i]->p4_rawpic);
1023                 cfgs[i]->p4_rawpic = *addrs[i] & MASK40;
1024         }
1025 }
1026 
1027 static void
1028 p4_pcbe_free(void *config)
1029 {
1030         kmem_free(config, sizeof (p4_pcbe_config_t));
1031 }
1032 
1033 static struct modlpcbe modlpcbe = {
1034         &mod_pcbeops,
1035         "Pentium 4 Performance Counters",
1036         &p4_pcbe_ops
1037 };
1038 
1039 static struct modlinkage modl = {
1040         MODREV_1,
1041         { &modlpcbe, NULL }
1042 };
1043 
1044 int
1045 _init(void)
1046 {
1047         if (p4_pcbe_init() != 0)
1048                 return (ENOTSUP);
1049         return (mod_install(&modl));
1050 }
1051 
1052 int
1053 _fini(void)
1054 {
1055         return (mod_remove(&modl));
1056 }
1057 
1058 int
1059 _info(struct modinfo *mi)
1060 {
1061         return (mod_info(&modl, mi));
1062 }