1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * This file contains preset event names from the Performance Application
  28  * Programming Interface v3.5 which included the following notice:
  29  *
  30  *                             Copyright (c) 2005,6
  31  *                           Innovative Computing Labs
  32  *                         Computer Science Department,
  33  *                            University of Tennessee,
  34  *                                 Knoxville, TN.
  35  *                              All Rights Reserved.
  36  *
  37  *
  38  * Redistribution and use in source and binary forms, with or without
  39  * modification, are permitted provided that the following conditions are met:
  40  *
  41  *    * Redistributions of source code must retain the above copyright notice,
  42  *      this list of conditions and the following disclaimer.
  43  *    * Redistributions in binary form must reproduce the above copyright
  44  *      notice, this list of conditions and the following disclaimer in the
  45  *      documentation and/or other materials provided with the distribution.
  46  *    * Neither the name of the University of Tennessee nor the names of its
  47  *      contributors may be used to endorse or promote products derived from
  48  *      this software without specific prior written permission.
  49  *
  50  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  51  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  53  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  54  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  55  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  56  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  57  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  58  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  59  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  60  * POSSIBILITY OF SUCH DAMAGE.
  61  *
  62  *
  63  * This open source software license conforms to the BSD License template.
  64  */
  65 
  66 /*
  67  * Portions Copyright 2009 Advanced Micro Devices, Inc.
  68  */
  69 
  70 /*
  71  * Performance Counter Back-End for AMD Opteron and AMD Athlon 64 processors.
  72  */
  73 
  74 #include <sys/cpuvar.h>
  75 #include <sys/param.h>
  76 #include <sys/systm.h>
  77 #include <sys/cpc_pcbe.h>
  78 #include <sys/kmem.h>
  79 #include <sys/sdt.h>
  80 #include <sys/modctl.h>
  81 #include <sys/errno.h>
  82 #include <sys/debug.h>
  83 #include <sys/archsystm.h>
  84 #include <sys/x86_archext.h>
  85 #include <sys/privregs.h>
  86 #include <sys/ddi.h>
  87 #include <sys/sunddi.h>
  88 
  89 static int opt_pcbe_init(void);
  90 static uint_t opt_pcbe_ncounters(void);
  91 static const char *opt_pcbe_impl_name(void);
  92 static const char *opt_pcbe_cpuref(void);
  93 static char *opt_pcbe_list_events(uint_t picnum);
  94 static char *opt_pcbe_list_attrs(void);
  95 static uint64_t opt_pcbe_event_coverage(char *event);
  96 static uint64_t opt_pcbe_overflow_bitmap(void);
  97 static int opt_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
  98     uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
  99     void *token);
 100 static void opt_pcbe_program(void *token);
 101 static void opt_pcbe_allstop(void);
 102 static void opt_pcbe_sample(void *token);
 103 static void opt_pcbe_free(void *config);
 104 
 105 static pcbe_ops_t opt_pcbe_ops = {
 106         PCBE_VER_1,
 107         CPC_CAP_OVERFLOW_INTERRUPT,
 108         opt_pcbe_ncounters,
 109         opt_pcbe_impl_name,
 110         opt_pcbe_cpuref,
 111         opt_pcbe_list_events,
 112         opt_pcbe_list_attrs,
 113         opt_pcbe_event_coverage,
 114         opt_pcbe_overflow_bitmap,
 115         opt_pcbe_configure,
 116         opt_pcbe_program,
 117         opt_pcbe_allstop,
 118         opt_pcbe_sample,
 119         opt_pcbe_free
 120 };
 121 
 122 /*
 123  * Define offsets and masks for the fields in the Performance
 124  * Event-Select (PES) registers.
 125  */
 126 #define OPT_PES_HOST_SHIFT      41
 127 #define OPT_PES_GUEST_SHIFT     40
 128 #define OPT_PES_CMASK_SHIFT     24
 129 #define OPT_PES_CMASK_MASK      0xFF
 130 #define OPT_PES_INV_SHIFT       23
 131 #define OPT_PES_ENABLE_SHIFT    22
 132 #define OPT_PES_INT_SHIFT       20
 133 #define OPT_PES_PC_SHIFT        19
 134 #define OPT_PES_EDGE_SHIFT      18
 135 #define OPT_PES_OS_SHIFT        17
 136 #define OPT_PES_USR_SHIFT       16
 137 #define OPT_PES_UMASK_SHIFT     8
 138 #define OPT_PES_UMASK_MASK      0xFF
 139 
 140 #define OPT_PES_INV             (1ULL << OPT_PES_INV_SHIFT)
 141 #define OPT_PES_ENABLE          (1ULL << OPT_PES_ENABLE_SHIFT)
 142 #define OPT_PES_INT             (1ULL << OPT_PES_INT_SHIFT)
 143 #define OPT_PES_PC              (1ULL << OPT_PES_PC_SHIFT)
 144 #define OPT_PES_EDGE            (1ULL << OPT_PES_EDGE_SHIFT)
 145 #define OPT_PES_OS              (1ULL << OPT_PES_OS_SHIFT)
 146 #define OPT_PES_USR             (1ULL << OPT_PES_USR_SHIFT)
 147 #define OPT_PES_HOST            (1ULL << OPT_PES_HOST_SHIFT)
 148 #define OPT_PES_GUEST           (1ULL << OPT_PES_GUEST_SHIFT)
 149 
 150 typedef struct _opt_pcbe_config {
 151         uint8_t         opt_picno;      /* Counter number: 0, 1, 2, or 3 */
 152         uint64_t        opt_evsel;      /* Event Selection register */
 153         uint64_t        opt_rawpic;     /* Raw counter value */
 154 } opt_pcbe_config_t;
 155 
 156 opt_pcbe_config_t nullcfgs[4] = {
 157         { 0, 0, 0 },
 158         { 1, 0, 0 },
 159         { 2, 0, 0 },
 160         { 3, 0, 0 }
 161 };
 162 
 163 typedef struct _amd_event {
 164         char            *name;
 165         uint16_t        emask;          /* Event mask setting */
 166 } amd_event_t;
 167 
 168 typedef struct _amd_generic_event {
 169         char *name;
 170         char *event;
 171         uint8_t umask;
 172 } amd_generic_event_t;
 173 
 174 /*
 175  * Base MSR addresses for the PerfEvtSel registers and the counters themselves.
 176  * Add counter number to base address to get corresponding MSR address.
 177  */
 178 #define PES_BASE_ADDR   0xC0010000
 179 #define PIC_BASE_ADDR   0xC0010004
 180 
 181 #define MASK48          0xFFFFFFFFFFFF
 182 
 183 #define EV_END {NULL, 0}
 184 #define GEN_EV_END {NULL, NULL, 0 }
 185 
 186 #define AMD_cmn_events                                          \
 187         { "FP_dispatched_fpu_ops",                      0x0 },  \
 188         { "FP_cycles_no_fpu_ops_retired",               0x1 },  \
 189         { "FP_dispatched_fpu_ops_ff",                   0x2 },  \
 190         { "LS_seg_reg_load",                            0x20 }, \
 191         { "LS_uarch_resync_self_modify",                0x21 }, \
 192         { "LS_uarch_resync_snoop",                      0x22 }, \
 193         { "LS_buffer_2_full",                           0x23 }, \
 194         { "LS_locked_operation",                        0x24 }, \
 195         { "LS_retired_cflush",                          0x26 }, \
 196         { "LS_retired_cpuid",                           0x27 }, \
 197         { "DC_access",                                  0x40 }, \
 198         { "DC_miss",                                    0x41 }, \
 199         { "DC_refill_from_L2",                          0x42 }, \
 200         { "DC_refill_from_system",                      0x43 }, \
 201         { "DC_copyback",                                0x44 }, \
 202         { "DC_dtlb_L1_miss_L2_hit",                     0x45 }, \
 203         { "DC_dtlb_L1_miss_L2_miss",                    0x46 }, \
 204         { "DC_misaligned_data_ref",                     0x47 }, \
 205         { "DC_uarch_late_cancel_access",                0x48 }, \
 206         { "DC_uarch_early_cancel_access",               0x49 }, \
 207         { "DC_1bit_ecc_error_found",                    0x4A }, \
 208         { "DC_dispatched_prefetch_instr",               0x4B }, \
 209         { "DC_dcache_accesses_by_locks",                0x4C }, \
 210         { "BU_memory_requests",                         0x65 }, \
 211         { "BU_data_prefetch",                           0x67 }, \
 212         { "BU_system_read_responses",                   0x6C }, \
 213         { "BU_cpu_clk_unhalted",                        0x76 }, \
 214         { "BU_internal_L2_req",                         0x7D }, \
 215         { "BU_fill_req_missed_L2",                      0x7E }, \
 216         { "BU_fill_into_L2",                            0x7F }, \
 217         { "IC_fetch",                                   0x80 }, \
 218         { "IC_miss",                                    0x81 }, \
 219         { "IC_refill_from_L2",                          0x82 }, \
 220         { "IC_refill_from_system",                      0x83 }, \
 221         { "IC_itlb_L1_miss_L2_hit",                     0x84 }, \
 222         { "IC_itlb_L1_miss_L2_miss",                    0x85 }, \
 223         { "IC_uarch_resync_snoop",                      0x86 }, \
 224         { "IC_instr_fetch_stall",                       0x87 }, \
 225         { "IC_return_stack_hit",                        0x88 }, \
 226         { "IC_return_stack_overflow",                   0x89 }, \
 227         { "FR_retired_x86_instr_w_excp_intr",           0xC0 }, \
 228         { "FR_retired_uops",                            0xC1 }, \
 229         { "FR_retired_branches_w_excp_intr",            0xC2 }, \
 230         { "FR_retired_branches_mispred",                0xC3 }, \
 231         { "FR_retired_taken_branches",                  0xC4 }, \
 232         { "FR_retired_taken_branches_mispred",          0xC5 }, \
 233         { "FR_retired_far_ctl_transfer",                0xC6 }, \
 234         { "FR_retired_resyncs",                         0xC7 }, \
 235         { "FR_retired_near_rets",                       0xC8 }, \
 236         { "FR_retired_near_rets_mispred",               0xC9 }, \
 237         { "FR_retired_taken_branches_mispred_addr_miscomp",     0xCA },\
 238         { "FR_retired_fastpath_double_op_instr",        0xCC }, \
 239         { "FR_intr_masked_cycles",                      0xCD }, \
 240         { "FR_intr_masked_while_pending_cycles",        0xCE }, \
 241         { "FR_taken_hardware_intrs",                    0xCF }, \
 242         { "FR_nothing_to_dispatch",                     0xD0 }, \
 243         { "FR_dispatch_stalls",                         0xD1 }, \
 244         { "FR_dispatch_stall_branch_abort_to_retire",   0xD2 }, \
 245         { "FR_dispatch_stall_serialization",            0xD3 }, \
 246         { "FR_dispatch_stall_segment_load",             0xD4 }, \
 247         { "FR_dispatch_stall_reorder_buffer_full",      0xD5 }, \
 248         { "FR_dispatch_stall_resv_stations_full",       0xD6 }, \
 249         { "FR_dispatch_stall_fpu_full",                 0xD7 }, \
 250         { "FR_dispatch_stall_ls_full",                  0xD8 }, \
 251         { "FR_dispatch_stall_waiting_all_quiet",        0xD9 }, \
 252         { "FR_dispatch_stall_far_ctl_trsfr_resync_branch_pend", 0xDA },\
 253         { "FR_fpu_exception",                           0xDB }, \
 254         { "FR_num_brkpts_dr0",                          0xDC }, \
 255         { "FR_num_brkpts_dr1",                          0xDD }, \
 256         { "FR_num_brkpts_dr2",                          0xDE }, \
 257         { "FR_num_brkpts_dr3",                          0xDF }, \
 258         { "NB_mem_ctrlr_page_access",                   0xE0 }, \
 259         { "NB_mem_ctrlr_turnaround",                    0xE3 }, \
 260         { "NB_mem_ctrlr_bypass_counter_saturation",     0xE4 }, \
 261         { "NB_cpu_io_to_mem_io",                        0xE9 }, \
 262         { "NB_cache_block_commands",                    0xEA }, \
 263         { "NB_sized_commands",                          0xEB }, \
 264         { "NB_ht_bus0_bandwidth",                       0xF6 }
 265 
 266 #define AMD_FAMILY_f_events                                     \
 267         { "BU_quadwords_written_to_system",             0x6D }, \
 268         { "FR_retired_fpu_instr",                       0xCB }, \
 269         { "NB_mem_ctrlr_page_table_overflow",           0xE1 }, \
 270         { "NB_sized_blocks",                            0xE5 }, \
 271         { "NB_ECC_errors",                              0xE8 }, \
 272         { "NB_probe_result",                            0xEC }, \
 273         { "NB_gart_events",                             0xEE }, \
 274         { "NB_ht_bus1_bandwidth",                       0xF7 }, \
 275         { "NB_ht_bus2_bandwidth",                       0xF8 }
 276 
 277 #define AMD_FAMILY_10h_events                                   \
 278         { "FP_retired_sse_ops",                         0x3 },  \
 279         { "FP_retired_move_ops",                        0x4 },  \
 280         { "FP_retired_serialize_ops",                   0x5 },  \
 281         { "FP_serialize_ops_cycles",                    0x6 },  \
 282         { "LS_cancelled_store_to_load_fwd_ops",         0x2A }, \
 283         { "LS_smi_received",                            0x2B }, \
 284         { "DC_dtlb_L1_hit",                             0x4D }, \
 285         { "LS_ineffective_prefetch",                    0x52 }, \
 286         { "LS_global_tlb_flush",                        0x54 }, \
 287         { "BU_octwords_written_to_system",              0x6D }, \
 288         { "Page_size_mismatches",                       0x165 },        \
 289         { "IC_eviction",                                0x8B }, \
 290         { "IC_cache_lines_invalidate",                  0x8C }, \
 291         { "IC_itlb_reload",                             0x99 }, \
 292         { "IC_itlb_reload_aborted",                     0x9A }, \
 293         { "FR_retired_mmx_sse_fp_instr",                0xCB }, \
 294         { "Retired_x87_fp_ops",                         0x1C0 },        \
 295         { "IBS_ops_tagged",                             0x1CF },        \
 296         { "LFENCE_inst_retired",                        0x1D3 },        \
 297         { "SFENCE_inst_retired",                        0x1D4 },        \
 298         { "MFENCE_inst_retired",                        0x1D5 },        \
 299         { "NB_mem_ctrlr_page_table_overflow",           0xE1 }, \
 300         { "NB_mem_ctrlr_dram_cmd_slots_missed",         0xE2 }, \
 301         { "NB_thermal_status",                          0xE8 }, \
 302         { "NB_probe_results_upstream_req",              0xEC }, \
 303         { "NB_gart_events",                             0xEE }, \
 304         { "NB_mem_ctrlr_req",                           0x1F0 },        \
 305         { "CB_cpu_to_dram_req_to_target",               0x1E0 },        \
 306         { "CB_io_to_dram_req_to_target",                0x1E1 },        \
 307         { "CB_cpu_read_cmd_latency_to_target_0_to_3",   0x1E2 },        \
 308         { "CB_cpu_read_cmd_req_to_target_0_to_3",       0x1E3 },        \
 309         { "CB_cpu_read_cmd_latency_to_target_4_to_7",   0x1E4 },        \
 310         { "CB_cpu_read_cmd_req_to_target_4_to_7",       0x1E5 },        \
 311         { "CB_cpu_cmd_latency_to_target_0_to_7",        0x1E6 },        \
 312         { "CB_cpu_req_to_target_0_to_7",                0x1E7 },        \
 313         { "NB_ht_bus1_bandwidth",                       0xF7 }, \
 314         { "NB_ht_bus2_bandwidth",                       0xF8 }, \
 315         { "NB_ht_bus3_bandwidth",                       0x1F9 },        \
 316         { "L3_read_req",                                0x4E0 },        \
 317         { "L3_miss",                                    0x4E1 },        \
 318         { "L3_l2_eviction_l3_fill",                     0x4E2 },        \
 319         { "L3_eviction",                                0x4E3 }
 320 
 321 #define AMD_FAMILY_11h_events                                   \
 322         { "BU_quadwords_written_to_system",             0x6D }, \
 323         { "FR_retired_mmx_fp_instr",                    0xCB }, \
 324         { "NB_mem_ctrlr_page_table_events",             0xE1 }, \
 325         { "NB_thermal_status",                          0xE8 }, \
 326         { "NB_probe_results_upstream_req",              0xEC }, \
 327         { "NB_dev_events",                              0xEE }, \
 328         { "NB_mem_ctrlr_req",                           0x1F0 }
 329 
 330 #define AMD_cmn_generic_events                                          \
 331         { "PAPI_br_ins",        "FR_retired_branches_w_excp_intr", 0x0 },\
 332         { "PAPI_br_msp",        "FR_retired_branches_mispred",  0x0 },  \
 333         { "PAPI_br_tkn",        "FR_retired_taken_branches",    0x0 },  \
 334         { "PAPI_fp_ops",        "FP_dispatched_fpu_ops",        0x3 },  \
 335         { "PAPI_fad_ins",       "FP_dispatched_fpu_ops",        0x1 },  \
 336         { "PAPI_fml_ins",       "FP_dispatched_fpu_ops",        0x2 },  \
 337         { "PAPI_fpu_idl",       "FP_cycles_no_fpu_ops_retired", 0x0 },  \
 338         { "PAPI_tot_cyc",       "BU_cpu_clk_unhalted",          0x0 },  \
 339         { "PAPI_tot_ins",       "FR_retired_x86_instr_w_excp_intr", 0x0 }, \
 340         { "PAPI_l1_dca",        "DC_access",                    0x0 },  \
 341         { "PAPI_l1_dcm",        "DC_miss",                      0x0 },  \
 342         { "PAPI_l1_ldm",        "DC_refill_from_L2",            0xe },  \
 343         { "PAPI_l1_stm",        "DC_refill_from_L2",            0x10 }, \
 344         { "PAPI_l1_ica",        "IC_fetch",                     0x0 },  \
 345         { "PAPI_l1_icm",        "IC_miss",                      0x0 },  \
 346         { "PAPI_l1_icr",        "IC_fetch",                     0x0 },  \
 347         { "PAPI_l2_dch",        "DC_refill_from_L2",            0x1e }, \
 348         { "PAPI_l2_dcm",        "DC_refill_from_system",        0x1e }, \
 349         { "PAPI_l2_dcr",        "DC_refill_from_L2",            0xe },  \
 350         { "PAPI_l2_dcw",        "DC_refill_from_L2",            0x10 }, \
 351         { "PAPI_l2_ich",        "IC_refill_from_L2",            0x0 },  \
 352         { "PAPI_l2_icm",        "IC_refill_from_system",        0x0 },  \
 353         { "PAPI_l2_ldm",        "DC_refill_from_system",        0xe },  \
 354         { "PAPI_l2_stm",        "DC_refill_from_system",        0x10 }, \
 355         { "PAPI_res_stl",       "FR_dispatch_stalls",           0x0 },  \
 356         { "PAPI_stl_icy",       "FR_nothing_to_dispatch",       0x0 },  \
 357         { "PAPI_hw_int",        "FR_taken_hardware_intrs",      0x0 }
 358 
 359 #define OPT_cmn_generic_events                                          \
 360         { "PAPI_tlb_dm",        "DC_dtlb_L1_miss_L2_miss",      0x0 },  \
 361         { "PAPI_tlb_im",        "IC_itlb_L1_miss_L2_miss",      0x0 },  \
 362         { "PAPI_fp_ins",        "FR_retired_fpu_instr",         0xd },  \
 363         { "PAPI_vec_ins",       "FR_retired_fpu_instr",         0x4 }
 364 
 365 #define AMD_FAMILY_10h_generic_events                                   \
 366         { "PAPI_tlb_dm",        "DC_dtlb_L1_miss_L2_miss",      0x7 },  \
 367         { "PAPI_tlb_im",        "IC_itlb_L1_miss_L2_miss",      0x3 },  \
 368         { "PAPI_l3_dcr",        "L3_read_req",                  0xf1 }, \
 369         { "PAPI_l3_icr",        "L3_read_req",                  0xf2 }, \
 370         { "PAPI_l3_tcr",        "L3_read_req",                  0xf7 }, \
 371         { "PAPI_l3_stm",        "L3_miss",                      0xf4 }, \
 372         { "PAPI_l3_ldm",        "L3_miss",                      0xf3 }, \
 373         { "PAPI_l3_tcm",        "L3_miss",                      0xf7 }
 374 
 375 #define AMD_PCBE_SUPPORTED(family) (((family) >= 0xf) && ((family) <= 0x11))
 376 
 377 static amd_event_t family_f_events[] = {
 378         AMD_cmn_events,
 379         AMD_FAMILY_f_events,
 380         EV_END
 381 };
 382 
 383 static amd_event_t family_10h_events[] = {
 384         AMD_cmn_events,
 385         AMD_FAMILY_10h_events,
 386         EV_END
 387 };
 388 
 389 static amd_event_t family_11h_events[] = {
 390         AMD_cmn_events,
 391         AMD_FAMILY_11h_events,
 392         EV_END
 393 };
 394 
 395 static amd_generic_event_t opt_generic_events[] = {
 396         AMD_cmn_generic_events,
 397         OPT_cmn_generic_events,
 398         GEN_EV_END
 399 };
 400 
 401 static amd_generic_event_t family_10h_generic_events[] = {
 402         AMD_cmn_generic_events,
 403         AMD_FAMILY_10h_generic_events,
 404         GEN_EV_END
 405 };
 406 
 407 static char     *evlist;
 408 static size_t   evlist_sz;
 409 static amd_event_t *amd_events = NULL;
 410 static uint_t amd_family;
 411 static amd_generic_event_t *amd_generic_events = NULL;
 412 
 413 #define AMD_CPUREF_SIZE 256
 414 static char amd_generic_bkdg[AMD_CPUREF_SIZE];
 415 static char amd_fam_f_rev_ae_bkdg[] = "See \"BIOS and Kernel Developer's " \
 416 "Guide for AMD Athlon 64 and AMD Opteron Processors\" (AMD publication 26094)";
 417 static char amd_fam_f_NPT_bkdg[] = "See \"BIOS and Kernel Developer's Guide " \
 418 "for AMD NPT Family 0Fh Processors\" (AMD publication 32559)";
 419 static char amd_fam_10h_bkdg[] = "See \"BIOS and Kernel Developer's Guide " \
 420 "(BKDG) For AMD Family 10h Processors\" (AMD publication 31116)";
 421 static char amd_fam_11h_bkdg[] = "See \"BIOS and Kernel Developer's Guide " \
 422 "(BKDG) For AMD Family 11h Processors\" (AMD publication 41256)";
 423 
 424 static char amd_pcbe_impl_name[64];
 425 static char *amd_pcbe_cpuref;
 426 
 427 
 428 #define BITS(v, u, l)   \
 429         (((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
 430 
 431 
 432 static int
 433 opt_pcbe_init(void)
 434 {
 435         amd_event_t             *evp;
 436         amd_generic_event_t     *gevp;
 437 
 438         amd_family = cpuid_getfamily(CPU);
 439 
 440         /*
 441          * Make sure this really _is_ an Opteron or Athlon 64 system. The kernel
 442          * loads this module based on its name in the module directory, but it
 443          * could have been renamed.
 444          */
 445         if (cpuid_getvendor(CPU) != X86_VENDOR_AMD || amd_family < 0xf)
 446                 return (-1);
 447 
 448         if (amd_family == 0xf)
 449                 /* Some tools expect this string for family 0fh */
 450                 (void) snprintf(amd_pcbe_impl_name, sizeof (amd_pcbe_impl_name),
 451                     "AMD Opteron & Athlon64");
 452         else
 453                 (void) snprintf(amd_pcbe_impl_name, sizeof (amd_pcbe_impl_name),
 454                     "AMD Family %02xh%s", amd_family,
 455                     AMD_PCBE_SUPPORTED(amd_family) ? "" :" (unsupported)");
 456 
 457         /*
 458          * Figure out processor revision here and assign appropriate
 459          * event configuration.
 460          */
 461 
 462         if (amd_family == 0xf) {
 463                 uint32_t rev;
 464 
 465                 rev = cpuid_getchiprev(CPU);
 466 
 467                 if (X86_CHIPREV_ATLEAST(rev, X86_CHIPREV_AMD_F_REV_F))
 468                         amd_pcbe_cpuref = amd_fam_f_NPT_bkdg;
 469                 else
 470                         amd_pcbe_cpuref = amd_fam_f_rev_ae_bkdg;
 471                 amd_events = family_f_events;
 472                 amd_generic_events = opt_generic_events;
 473         } else if (amd_family == 0x10) {
 474                 amd_pcbe_cpuref = amd_fam_10h_bkdg;
 475                 amd_events = family_10h_events;
 476                 amd_generic_events = family_10h_generic_events;
 477         } else if (amd_family == 0x11) {
 478                 amd_pcbe_cpuref = amd_fam_11h_bkdg;
 479                 amd_events = family_11h_events;
 480                 amd_generic_events = opt_generic_events;
 481         } else {
 482 
 483                 amd_pcbe_cpuref = amd_generic_bkdg;
 484                 (void) snprintf(amd_pcbe_cpuref, AMD_CPUREF_SIZE,
 485                     "See BIOS and Kernel Developer's Guide "    \
 486                     "(BKDG) For AMD Family %02xh Processors. "  \
 487                     "(Note that this pcbe does not explicitly " \
 488                     "support this family)", amd_family);
 489 
 490                 /*
 491                  * For families that are not explicitly supported we'll use
 492                  * events for family 0xf. Even if they are not quite right,
 493                  * it's OK --- we state that pcbe is unsupported.
 494                  */
 495                 amd_events = family_f_events;
 496                 amd_generic_events = opt_generic_events;
 497         }
 498 
 499         /*
 500          * Construct event list.
 501          *
 502          * First pass:  Calculate size needed. We'll need an additional byte
 503          *              for the NULL pointer during the last strcat.
 504          *
 505          * Second pass: Copy strings.
 506          */
 507         for (evp = amd_events; evp->name != NULL; evp++)
 508                 evlist_sz += strlen(evp->name) + 1;
 509 
 510         for (gevp = amd_generic_events; gevp->name != NULL; gevp++)
 511                 evlist_sz += strlen(gevp->name) + 1;
 512 
 513         evlist = kmem_alloc(evlist_sz + 1, KM_SLEEP);
 514         evlist[0] = '\0';
 515 
 516         for (evp = amd_events; evp->name != NULL; evp++) {
 517                 (void) strcat(evlist, evp->name);
 518                 (void) strcat(evlist, ",");
 519         }
 520 
 521         for (gevp = amd_generic_events; gevp->name != NULL; gevp++) {
 522                 (void) strcat(evlist, gevp->name);
 523                 (void) strcat(evlist, ",");
 524         }
 525 
 526         /*
 527          * Remove trailing comma.
 528          */
 529         evlist[evlist_sz - 1] = '\0';
 530 
 531         return (0);
 532 }
 533 
 534 static uint_t
 535 opt_pcbe_ncounters(void)
 536 {
 537         return (4);
 538 }
 539 
 540 static const char *
 541 opt_pcbe_impl_name(void)
 542 {
 543         return (amd_pcbe_impl_name);
 544 }
 545 
 546 static const char *
 547 opt_pcbe_cpuref(void)
 548 {
 549 
 550         return (amd_pcbe_cpuref);
 551 }
 552 
 553 /*ARGSUSED*/
 554 static char *
 555 opt_pcbe_list_events(uint_t picnum)
 556 {
 557         return (evlist);
 558 }
 559 
 560 static char *
 561 opt_pcbe_list_attrs(void)
 562 {
 563         return ("edge,pc,inv,cmask,umask");
 564 }
 565 
 566 static amd_generic_event_t *
 567 find_generic_event(char *name)
 568 {
 569         amd_generic_event_t     *gevp;
 570 
 571         for (gevp = amd_generic_events; gevp->name != NULL; gevp++)
 572                 if (strcmp(name, gevp->name) == 0)
 573                         return (gevp);
 574 
 575         return (NULL);
 576 }
 577 
 578 static amd_event_t *
 579 find_event(char *name)
 580 {
 581         amd_event_t             *evp;
 582 
 583         for (evp = amd_events; evp->name != NULL; evp++)
 584                 if (strcmp(name, evp->name) == 0)
 585                         return (evp);
 586 
 587         return (NULL);
 588 }
 589 
 590 /*ARGSUSED*/
 591 static uint64_t
 592 opt_pcbe_event_coverage(char *event)
 593 {
 594         /*
 595          * Check whether counter event is supported
 596          */
 597         if (find_event(event) == NULL && find_generic_event(event) == NULL)
 598                 return (0);
 599 
 600         /*
 601          * Fortunately, all counters can count all events.
 602          */
 603         return (0xF);
 604 }
 605 
 606 static uint64_t
 607 opt_pcbe_overflow_bitmap(void)
 608 {
 609         /*
 610          * Unfortunately, this chip cannot detect which counter overflowed, so
 611          * we must act as if they all did.
 612          */
 613         return (0xF);
 614 }
 615 
 616 /*ARGSUSED*/
 617 static int
 618 opt_pcbe_configure(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
 619     uint_t nattrs, kcpc_attr_t *attrs, void **data, void *token)
 620 {
 621         opt_pcbe_config_t       *cfg;
 622         amd_event_t             *evp;
 623         amd_event_t             ev_raw = { "raw", 0};
 624         amd_generic_event_t     *gevp;
 625         int                     i;
 626         uint64_t                evsel = 0, evsel_tmp = 0;
 627 
 628         /*
 629          * If we've been handed an existing configuration, we need only preset
 630          * the counter value.
 631          */
 632         if (*data != NULL) {
 633                 cfg = *data;
 634                 cfg->opt_rawpic = preset & MASK48;
 635                 return (0);
 636         }
 637 
 638         if (picnum >= 4)
 639                 return (CPC_INVALID_PICNUM);
 640 
 641         if ((evp = find_event(event)) == NULL) {
 642                 if ((gevp = find_generic_event(event)) != NULL) {
 643                         evp = find_event(gevp->event);
 644                         ASSERT(evp != NULL);
 645 
 646                         if (nattrs > 0)
 647                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
 648 
 649                         evsel |= gevp->umask << OPT_PES_UMASK_SHIFT;
 650                 } else {
 651                         long tmp;
 652 
 653                         /*
 654                          * If ddi_strtol() likes this event, use it as a raw
 655                          * event code.
 656                          */
 657                         if (ddi_strtol(event, NULL, 0, &tmp) != 0)
 658                                 return (CPC_INVALID_EVENT);
 659 
 660                         ev_raw.emask = tmp;
 661                         evp = &ev_raw;
 662                 }
 663         }
 664 
 665         /*
 666          * Configuration of EventSelect register. While on some families
 667          * certain bits might not be supported (e.g. Guest/Host on family
 668          * 11h), setting these bits is harmless
 669          */
 670 
 671         /* Set GuestOnly bit to 0 and HostOnly bit to 1 */
 672         evsel &= ~OPT_PES_HOST;
 673         evsel &= ~OPT_PES_GUEST;
 674 
 675         /* Set bits [35:32] for extended part of Event Select field */
 676         evsel_tmp = evp->emask & 0x0f00;
 677         evsel |= evsel_tmp << 24;
 678 
 679         evsel |= evp->emask & 0x00ff;
 680 
 681         if (flags & CPC_COUNT_USER)
 682                 evsel |= OPT_PES_USR;
 683         if (flags & CPC_COUNT_SYSTEM)
 684                 evsel |= OPT_PES_OS;
 685         if (flags & CPC_OVF_NOTIFY_EMT)
 686                 evsel |= OPT_PES_INT;
 687 
 688         for (i = 0; i < nattrs; i++) {
 689                 if (strcmp(attrs[i].ka_name, "edge") == 0) {
 690                         if (attrs[i].ka_val != 0)
 691                                 evsel |= OPT_PES_EDGE;
 692                 } else if (strcmp(attrs[i].ka_name, "pc") == 0) {
 693                         if (attrs[i].ka_val != 0)
 694                                 evsel |= OPT_PES_PC;
 695                 } else if (strcmp(attrs[i].ka_name, "inv") == 0) {
 696                         if (attrs[i].ka_val != 0)
 697                                 evsel |= OPT_PES_INV;
 698                 } else if (strcmp(attrs[i].ka_name, "cmask") == 0) {
 699                         if ((attrs[i].ka_val | OPT_PES_CMASK_MASK) !=
 700                             OPT_PES_CMASK_MASK)
 701                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
 702                         evsel |= attrs[i].ka_val << OPT_PES_CMASK_SHIFT;
 703                 } else if (strcmp(attrs[i].ka_name, "umask") == 0) {
 704                         if ((attrs[i].ka_val | OPT_PES_UMASK_MASK) !=
 705                             OPT_PES_UMASK_MASK)
 706                                 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
 707                         evsel |= attrs[i].ka_val << OPT_PES_UMASK_SHIFT;
 708                 } else
 709                         return (CPC_INVALID_ATTRIBUTE);
 710         }
 711 
 712         cfg = kmem_alloc(sizeof (*cfg), KM_SLEEP);
 713 
 714         cfg->opt_picno = picnum;
 715         cfg->opt_evsel = evsel;
 716         cfg->opt_rawpic = preset & MASK48;
 717 
 718         *data = cfg;
 719         return (0);
 720 }
 721 
 722 static void
 723 opt_pcbe_program(void *token)
 724 {
 725         opt_pcbe_config_t       *cfgs[4] = { &nullcfgs[0], &nullcfgs[1],
 726                                                 &nullcfgs[2], &nullcfgs[3] };
 727         opt_pcbe_config_t       *pcfg = NULL;
 728         int                     i;
 729         ulong_t                 curcr4 = getcr4();
 730 
 731         /*
 732          * Allow nonprivileged code to read the performance counters if desired.
 733          */
 734         if (kcpc_allow_nonpriv(token))
 735                 setcr4(curcr4 | CR4_PCE);
 736         else
 737                 setcr4(curcr4 & ~CR4_PCE);
 738 
 739         /*
 740          * Query kernel for all configs which will be co-programmed.
 741          */
 742         do {
 743                 pcfg = (opt_pcbe_config_t *)kcpc_next_config(token, pcfg, NULL);
 744 
 745                 if (pcfg != NULL) {
 746                         ASSERT(pcfg->opt_picno < 4);
 747                         cfgs[pcfg->opt_picno] = pcfg;
 748                 }
 749         } while (pcfg != NULL);
 750 
 751         /*
 752          * Program in two loops. The first configures and presets the counter,
 753          * and the second loop enables the counters. This ensures that the
 754          * counters are all enabled as closely together in time as possible.
 755          */
 756 
 757         for (i = 0; i < 4; i++) {
 758                 wrmsr(PES_BASE_ADDR + i, cfgs[i]->opt_evsel);
 759                 wrmsr(PIC_BASE_ADDR + i, cfgs[i]->opt_rawpic);
 760         }
 761 
 762         for (i = 0; i < 4; i++) {
 763                 wrmsr(PES_BASE_ADDR + i, cfgs[i]->opt_evsel |
 764                     (uint64_t)(uintptr_t)OPT_PES_ENABLE);
 765         }
 766 }
 767 
 768 static void
 769 opt_pcbe_allstop(void)
 770 {
 771         int             i;
 772 
 773         for (i = 0; i < 4; i++)
 774                 wrmsr(PES_BASE_ADDR + i, 0ULL);
 775 
 776         /*
 777          * Disable non-privileged access to the counter registers.
 778          */
 779         setcr4(getcr4() & ~CR4_PCE);
 780 }
 781 
 782 static void
 783 opt_pcbe_sample(void *token)
 784 {
 785         opt_pcbe_config_t       *cfgs[4] = { NULL, NULL, NULL, NULL };
 786         opt_pcbe_config_t       *pcfg = NULL;
 787         int                     i;
 788         uint64_t                curpic[4];
 789         uint64_t                *addrs[4];
 790         uint64_t                *tmp;
 791         int64_t                 diff;
 792 
 793         for (i = 0; i < 4; i++)
 794                 curpic[i] = rdmsr(PIC_BASE_ADDR + i);
 795 
 796         /*
 797          * Query kernel for all configs which are co-programmed.
 798          */
 799         do {
 800                 pcfg = (opt_pcbe_config_t *)kcpc_next_config(token, pcfg, &tmp);
 801 
 802                 if (pcfg != NULL) {
 803                         ASSERT(pcfg->opt_picno < 4);
 804                         cfgs[pcfg->opt_picno] = pcfg;
 805                         addrs[pcfg->opt_picno] = tmp;
 806                 }
 807         } while (pcfg != NULL);
 808 
 809         for (i = 0; i < 4; i++) {
 810                 if (cfgs[i] == NULL)
 811                         continue;
 812 
 813                 diff = (curpic[i] - cfgs[i]->opt_rawpic) & MASK48;
 814                 *addrs[i] += diff;
 815                 DTRACE_PROBE4(opt__pcbe__sample, int, i, uint64_t, *addrs[i],
 816                     uint64_t, curpic[i], uint64_t, cfgs[i]->opt_rawpic);
 817                 cfgs[i]->opt_rawpic = *addrs[i] & MASK48;
 818         }
 819 }
 820 
 821 static void
 822 opt_pcbe_free(void *config)
 823 {
 824         kmem_free(config, sizeof (opt_pcbe_config_t));
 825 }
 826 
 827 
 828 static struct modlpcbe modlpcbe = {
 829         &mod_pcbeops,
 830         "AMD Performance Counters",
 831         &opt_pcbe_ops
 832 };
 833 
 834 static struct modlinkage modl = {
 835         MODREV_1,
 836         { &modlpcbe, NULL }
 837 };
 838 
 839 int
 840 _init(void)
 841 {
 842         int ret;
 843 
 844         if (opt_pcbe_init() != 0)
 845                 return (ENOTSUP);
 846 
 847         if ((ret = mod_install(&modl)) != 0)
 848                 kmem_free(evlist, evlist_sz + 1);
 849 
 850         return (ret);
 851 }
 852 
 853 int
 854 _fini(void)
 855 {
 856         int ret;
 857 
 858         if ((ret = mod_remove(&modl)) == 0)
 859                 kmem_free(evlist, evlist_sz + 1);
 860         return (ret);
 861 }
 862 
 863 int
 864 _info(struct modinfo *mi)
 865 {
 866         return (mod_info(&modl, mi));
 867 }