Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/intel/pcbe/core_pcbe.c
+++ new/usr/src/uts/intel/pcbe/core_pcbe.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /*
26 26 * This file contains preset event names from the Performance Application
27 27 * Programming Interface v3.5 which included the following notice:
28 28 *
29 29 * Copyright (c) 2005,6
30 30 * Innovative Computing Labs
31 31 * Computer Science Department,
32 32 * University of Tennessee,
33 33 * Knoxville, TN.
34 34 * All Rights Reserved.
35 35 *
36 36 *
37 37 * Redistribution and use in source and binary forms, with or without
38 38 * modification, are permitted provided that the following conditions are met:
39 39 *
40 40 * * Redistributions of source code must retain the above copyright notice,
41 41 * this list of conditions and the following disclaimer.
42 42 * * Redistributions in binary form must reproduce the above copyright
43 43 * notice, this list of conditions and the following disclaimer in the
44 44 * documentation and/or other materials provided with the distribution.
45 45 * * Neither the name of the University of Tennessee nor the names of its
46 46 * contributors may be used to endorse or promote products derived from
47 47 * this software without specific prior written permission.
48 48 *
49 49 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
50 50 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
53 53 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 59 * POSSIBILITY OF SUCH DAMAGE.
60 60 *
61 61 *
62 62 * This open source software license conforms to the BSD License template.
63 63 */
64 64
65 65
66 66 /*
67 67 * Performance Counter Back-End for Intel processors supporting Architectural
68 68 * Performance Monitoring.
69 69 */
70 70
71 71 #include <sys/cpuvar.h>
72 72 #include <sys/param.h>
73 73 #include <sys/cpc_impl.h>
74 74 #include <sys/cpc_pcbe.h>
75 75 #include <sys/modctl.h>
76 76 #include <sys/inttypes.h>
77 77 #include <sys/systm.h>
78 78 #include <sys/cmn_err.h>
79 79 #include <sys/x86_archext.h>
80 80 #include <sys/sdt.h>
81 81 #include <sys/archsystm.h>
82 82 #include <sys/privregs.h>
83 83 #include <sys/ddi.h>
84 84 #include <sys/sunddi.h>
85 85 #include <sys/cred.h>
86 86 #include <sys/policy.h>
87 87
88 88 static int core_pcbe_init(void);
89 89 static uint_t core_pcbe_ncounters(void);
90 90 static const char *core_pcbe_impl_name(void);
91 91 static const char *core_pcbe_cpuref(void);
92 92 static char *core_pcbe_list_events(uint_t picnum);
93 93 static char *core_pcbe_list_attrs(void);
94 94 static uint64_t core_pcbe_event_coverage(char *event);
95 95 static uint64_t core_pcbe_overflow_bitmap(void);
96 96 static int core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
97 97 uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
98 98 void *token);
99 99 static void core_pcbe_program(void *token);
100 100 static void core_pcbe_allstop(void);
101 101 static void core_pcbe_sample(void *token);
102 102 static void core_pcbe_free(void *config);
103 103
104 104 #define FALSE 0
105 105 #define TRUE 1
106 106
107 107 /* Counter Type */
108 108 #define CORE_GPC 0 /* General-Purpose Counter (GPC) */
109 109 #define CORE_FFC 1 /* Fixed-Function Counter (FFC) */
110 110
111 111 /* MSR Addresses */
112 112 #define GPC_BASE_PMC 0x00c1 /* First GPC */
113 113 #define GPC_BASE_PES 0x0186 /* First GPC Event Select register */
114 114 #define FFC_BASE_PMC 0x0309 /* First FFC */
115 115 #define PERF_FIXED_CTR_CTRL 0x038d /* Used to enable/disable FFCs */
116 116 #define PERF_GLOBAL_STATUS 0x038e /* Overflow status register */
117 117 #define PERF_GLOBAL_CTRL 0x038f /* Used to enable/disable counting */
118 118 #define PERF_GLOBAL_OVF_CTRL 0x0390 /* Used to clear overflow status */
119 119
120 120 /*
121 121 * Processor Event Select register fields
122 122 */
123 123 #define CORE_USR (1ULL << 16) /* Count while not in ring 0 */
124 124 #define CORE_OS (1ULL << 17) /* Count while in ring 0 */
125 125 #define CORE_EDGE (1ULL << 18) /* Enable edge detection */
126 126 #define CORE_PC (1ULL << 19) /* Enable pin control */
127 127 #define CORE_INT (1ULL << 20) /* Enable interrupt on overflow */
128 128 #define CORE_EN (1ULL << 22) /* Enable counting */
129 129 #define CORE_INV (1ULL << 23) /* Invert the CMASK */
130 130 #define CORE_ANYTHR (1ULL << 21) /* Count event for any thread on core */
131 131
132 132 #define CORE_UMASK_SHIFT 8
133 133 #define CORE_UMASK_MASK 0xffu
134 134 #define CORE_CMASK_SHIFT 24
135 135 #define CORE_CMASK_MASK 0xffu
136 136
137 137 /*
138 138 * Fixed-function counter attributes
139 139 */
140 140 #define CORE_FFC_OS_EN (1ULL << 0) /* Count while not in ring 0 */
141 141 #define CORE_FFC_USR_EN (1ULL << 1) /* Count while in ring 1 */
142 142 #define CORE_FFC_ANYTHR (1ULL << 2) /* Count event for any thread on core */
143 143 #define CORE_FFC_PMI (1ULL << 3) /* Enable interrupt on overflow */
144 144
145 145 /*
146 146 * Number of bits for specifying each FFC's attributes in the control register
147 147 */
148 148 #define CORE_FFC_ATTR_SIZE 4
149 149
150 150 /*
151 151 * CondChgd and OvfBuffer fields of global status and overflow control registers
152 152 */
153 153 #define CONDCHGD (1ULL << 63)
154 154 #define OVFBUFFER (1ULL << 62)
155 155 #define MASK_CONDCHGD_OVFBUFFER (CONDCHGD | OVFBUFFER)
156 156
157 157 #define ALL_STOPPED 0ULL
158 158
159 159 #define BITMASK_XBITS(x) ((1ull << (x)) - 1ull)
160 160
161 161 /*
162 162 * Only the lower 32-bits can be written to in the general-purpose
163 163 * counters. The higher bits are extended from bit 31; all ones if
164 164 * bit 31 is one and all zeros otherwise.
165 165 *
166 166 * The fixed-function counters do not have this restriction.
167 167 */
168 168 #define BITS_EXTENDED_FROM_31 (BITMASK_XBITS(width_gpc) & ~BITMASK_XBITS(31))
169 169
170 170 #define WRMSR(msr, value) \
171 171 wrmsr((msr), (value)); \
172 172 DTRACE_PROBE2(wrmsr, uint64_t, (msr), uint64_t, (value));
173 173
174 174 #define RDMSR(msr, value) \
175 175 (value) = rdmsr((msr)); \
176 176 DTRACE_PROBE2(rdmsr, uint64_t, (msr), uint64_t, (value));
177 177
178 178 typedef struct core_pcbe_config {
179 179 uint64_t core_rawpic;
180 180 uint64_t core_ctl; /* Event Select bits */
181 181 uint64_t core_pmc; /* Counter register address */
182 182 uint64_t core_pes; /* Event Select register address */
183 183 uint_t core_picno;
184 184 uint8_t core_pictype; /* CORE_GPC or CORE_FFC */
185 185 } core_pcbe_config_t;
186 186
187 187 pcbe_ops_t core_pcbe_ops = {
188 188 PCBE_VER_1, /* pcbe_ver */
189 189 CPC_CAP_OVERFLOW_INTERRUPT | CPC_CAP_OVERFLOW_PRECISE, /* pcbe_caps */
190 190 core_pcbe_ncounters, /* pcbe_ncounters */
191 191 core_pcbe_impl_name, /* pcbe_impl_name */
192 192 core_pcbe_cpuref, /* pcbe_cpuref */
193 193 core_pcbe_list_events, /* pcbe_list_events */
194 194 core_pcbe_list_attrs, /* pcbe_list_attrs */
195 195 core_pcbe_event_coverage, /* pcbe_event_coverage */
196 196 core_pcbe_overflow_bitmap, /* pcbe_overflow_bitmap */
197 197 core_pcbe_configure, /* pcbe_configure */
198 198 core_pcbe_program, /* pcbe_program */
199 199 core_pcbe_allstop, /* pcbe_allstop */
200 200 core_pcbe_sample, /* pcbe_sample */
201 201 core_pcbe_free /* pcbe_free */
202 202 };
203 203
204 204 struct nametable_core_uarch {
205 205 const char *name;
206 206 uint64_t restricted_bits;
207 207 uint8_t event_num;
208 208 };
209 209
210 210 #define NT_END 0xFF
211 211
212 212 /*
213 213 * Counting an event for all cores or all bus agents requires cpc_cpu privileges
214 214 */
215 215 #define ALL_CORES (1ULL << 15)
216 216 #define ALL_AGENTS (1ULL << 13)
217 217
218 218 struct generic_events {
219 219 const char *name;
220 220 uint8_t event_num;
221 221 uint8_t umask;
222 222 };
223 223
224 224 static const struct generic_events cmn_generic_events[] = {
225 225 { "PAPI_tot_cyc", 0x3c, 0x00 }, /* cpu_clk_unhalted.thread_p/core */
226 226 { "PAPI_tot_ins", 0xc0, 0x00 }, /* inst_retired.any_p */
227 227 { "PAPI_br_ins", 0xc4, 0x0c }, /* br_inst_retired.taken */
228 228 { "PAPI_br_msp", 0xc5, 0x00 }, /* br_inst_retired.mispred */
229 229 { "PAPI_br_ntk", 0xc4, 0x03 },
230 230 /* br_inst_retired.pred_not_taken|pred_taken */
231 231 { "PAPI_br_prc", 0xc4, 0x05 },
232 232 /* br_inst_retired.pred_not_taken|pred_taken */
233 233 { "PAPI_hw_int", 0xc8, 0x00 }, /* hw_int_rvc */
234 234 { "PAPI_tot_iis", 0xaa, 0x01 }, /* macro_insts.decoded */
235 235 { "PAPI_l1_dca", 0x43, 0x01 }, /* l1d_all_ref */
236 236 { "PAPI_l1_icm", 0x81, 0x00 }, /* l1i_misses */
237 237 { "PAPI_l1_icr", 0x80, 0x00 }, /* l1i_reads */
238 238 { "PAPI_l1_tcw", 0x41, 0x0f }, /* l1d_cache_st.mesi */
239 239 { "PAPI_l2_stm", 0x2a, 0x41 }, /* l2_st.self.i_state */
240 240 { "PAPI_l2_tca", 0x2e, 0x4f }, /* l2_rqsts.self.demand.mesi */
241 241 { "PAPI_l2_tch", 0x2e, 0x4e }, /* l2_rqsts.mes */
242 242 { "PAPI_l2_tcm", 0x2e, 0x41 }, /* l2_rqsts.self.demand.i_state */
243 243 { "PAPI_l2_tcw", 0x2a, 0x4f }, /* l2_st.self.mesi */
244 244 { "PAPI_ld_ins", 0xc0, 0x01 }, /* inst_retired.loads */
245 245 { "PAPI_lst_ins", 0xc0, 0x03 }, /* inst_retired.loads|stores */
246 246 { "PAPI_sr_ins", 0xc0, 0x02 }, /* inst_retired.stores */
247 247 { "PAPI_tlb_dm", 0x08, 0x01 }, /* dtlb_misses.any */
248 248 { "PAPI_tlb_im", 0x82, 0x12 }, /* itlb.small_miss|large_miss */
249 249 { "PAPI_tlb_tl", 0x0c, 0x03 }, /* page_walks */
250 250 { "", NT_END, 0 }
251 251 };
252 252
253 253 static const struct generic_events generic_events_pic0[] = {
254 254 { "PAPI_l1_dcm", 0xcb, 0x01 }, /* mem_load_retired.l1d_miss */
255 255 { "", NT_END, 0 }
256 256 };
257 257
258 258 /*
259 259 * The events listed in the following table can be counted on all
260 260 * general-purpose counters on processors that are of Penryn and Merom Family
261 261 */
262 262 static const struct nametable_core_uarch cmn_gpc_events_core_uarch[] = {
263 263 /* Alphabetical order of event name */
264 264
265 265 { "baclears", 0x0, 0xe6 },
266 266 { "bogus_br", 0x0, 0xe4 },
267 267 { "br_bac_missp_exec", 0x0, 0x8a },
268 268
269 269 { "br_call_exec", 0x0, 0x92 },
270 270 { "br_call_missp_exec", 0x0, 0x93 },
271 271 { "br_cnd_exec", 0x0, 0x8b },
272 272
273 273 { "br_cnd_missp_exec", 0x0, 0x8c },
274 274 { "br_ind_call_exec", 0x0, 0x94 },
275 275 { "br_ind_exec", 0x0, 0x8d },
276 276
277 277 { "br_ind_missp_exec", 0x0, 0x8e },
278 278 { "br_inst_decoded", 0x0, 0xe0 },
279 279 { "br_inst_exec", 0x0, 0x88 },
280 280
281 281 { "br_inst_retired", 0x0, 0xc4 },
282 282 { "br_inst_retired_mispred", 0x0, 0xc5 },
283 283 { "br_missp_exec", 0x0, 0x89 },
284 284
285 285 { "br_ret_bac_missp_exec", 0x0, 0x91 },
286 286 { "br_ret_exec", 0x0, 0x8f },
287 287 { "br_ret_missp_exec", 0x0, 0x90 },
288 288
289 289 { "br_tkn_bubble_1", 0x0, 0x97 },
290 290 { "br_tkn_bubble_2", 0x0, 0x98 },
291 291 { "bus_bnr_drv", ALL_AGENTS, 0x61 },
292 292
293 293 { "bus_data_rcv", ALL_CORES, 0x64 },
294 294 { "bus_drdy_clocks", ALL_AGENTS, 0x62 },
295 295 { "bus_hit_drv", ALL_AGENTS, 0x7a },
296 296
297 297 { "bus_hitm_drv", ALL_AGENTS, 0x7b },
298 298 { "bus_io_wait", ALL_CORES, 0x7f },
299 299 { "bus_lock_clocks", ALL_CORES | ALL_AGENTS, 0x63 },
300 300
301 301 { "bus_request_outstanding", ALL_CORES | ALL_AGENTS, 0x60 },
302 302 { "bus_trans_any", ALL_CORES | ALL_AGENTS, 0x70 },
303 303 { "bus_trans_brd", ALL_CORES | ALL_AGENTS, 0x65 },
304 304
305 305 { "bus_trans_burst", ALL_CORES | ALL_AGENTS, 0x6e },
306 306 { "bus_trans_def", ALL_CORES | ALL_AGENTS, 0x6d },
307 307 { "bus_trans_ifetch", ALL_CORES | ALL_AGENTS, 0x68 },
308 308
309 309 { "bus_trans_inval", ALL_CORES | ALL_AGENTS, 0x69 },
310 310 { "bus_trans_io", ALL_CORES | ALL_AGENTS, 0x6c },
311 311 { "bus_trans_mem", ALL_CORES | ALL_AGENTS, 0x6f },
312 312
313 313 { "bus_trans_p", ALL_CORES | ALL_AGENTS, 0x6b },
314 314 { "bus_trans_pwr", ALL_CORES | ALL_AGENTS, 0x6a },
315 315 { "bus_trans_rfo", ALL_CORES | ALL_AGENTS, 0x66 },
316 316
317 317 { "bus_trans_wb", ALL_CORES | ALL_AGENTS, 0x67 },
318 318 { "busq_empty", ALL_CORES, 0x7d },
319 319 { "cmp_snoop", ALL_CORES, 0x78 },
320 320
321 321 { "cpu_clk_unhalted", 0x0, 0x3c },
322 322 { "cycles_int", 0x0, 0xc6 },
323 323 { "cycles_l1i_mem_stalled", 0x0, 0x86 },
324 324
325 325 { "dtlb_misses", 0x0, 0x08 },
326 326 { "eist_trans", 0x0, 0x3a },
327 327 { "esp", 0x0, 0xab },
328 328
329 329 { "ext_snoop", ALL_AGENTS, 0x77 },
330 330 { "fp_mmx_trans", 0x0, 0xcc },
331 331 { "hw_int_rcv", 0x0, 0xc8 },
332 332
333 333 { "ild_stall", 0x0, 0x87 },
334 334 { "inst_queue", 0x0, 0x83 },
335 335 { "inst_retired", 0x0, 0xc0 },
336 336
337 337 { "itlb", 0x0, 0x82 },
338 338 { "itlb_miss_retired", 0x0, 0xc9 },
339 339 { "l1d_all_ref", 0x0, 0x43 },
340 340
341 341 { "l1d_cache_ld", 0x0, 0x40 },
342 342 { "l1d_cache_lock", 0x0, 0x42 },
343 343 { "l1d_cache_st", 0x0, 0x41 },
344 344
345 345 { "l1d_m_evict", 0x0, 0x47 },
346 346 { "l1d_m_repl", 0x0, 0x46 },
347 347 { "l1d_pend_miss", 0x0, 0x48 },
348 348
349 349 { "l1d_prefetch", 0x0, 0x4e },
350 350 { "l1d_repl", 0x0, 0x45 },
351 351 { "l1d_split", 0x0, 0x49 },
352 352
353 353 { "l1i_misses", 0x0, 0x81 },
354 354 { "l1i_reads", 0x0, 0x80 },
355 355 { "l2_ads", ALL_CORES, 0x21 },
356 356
357 357 { "l2_dbus_busy_rd", ALL_CORES, 0x23 },
358 358 { "l2_ifetch", ALL_CORES, 0x28 },
359 359 { "l2_ld", ALL_CORES, 0x29 },
360 360
361 361 { "l2_lines_in", ALL_CORES, 0x24 },
362 362 { "l2_lines_out", ALL_CORES, 0x26 },
363 363 { "l2_lock", ALL_CORES, 0x2b },
364 364
365 365 { "l2_m_lines_in", ALL_CORES, 0x25 },
366 366 { "l2_m_lines_out", ALL_CORES, 0x27 },
367 367 { "l2_no_req", ALL_CORES, 0x32 },
368 368
369 369 { "l2_reject_busq", ALL_CORES, 0x30 },
370 370 { "l2_rqsts", ALL_CORES, 0x2e },
371 371 { "l2_st", ALL_CORES, 0x2a },
372 372
373 373 { "load_block", 0x0, 0x03 },
374 374 { "load_hit_pre", 0x0, 0x4c },
375 375 { "machine_nukes", 0x0, 0xc3 },
376 376
377 377 { "macro_insts", 0x0, 0xaa },
378 378 { "memory_disambiguation", 0x0, 0x09 },
379 379 { "misalign_mem_ref", 0x0, 0x05 },
380 380 { "page_walks", 0x0, 0x0c },
381 381
382 382 { "pref_rqsts_dn", 0x0, 0xf8 },
383 383 { "pref_rqsts_up", 0x0, 0xf0 },
384 384 { "rat_stalls", 0x0, 0xd2 },
385 385
386 386 { "resource_stalls", 0x0, 0xdc },
387 387 { "rs_uops_dispatched", 0x0, 0xa0 },
388 388 { "seg_reg_renames", 0x0, 0xd5 },
389 389
390 390 { "seg_rename_stalls", 0x0, 0xd4 },
391 391 { "segment_reg_loads", 0x0, 0x06 },
392 392 { "simd_assist", 0x0, 0xcd },
393 393
394 394 { "simd_comp_inst_retired", 0x0, 0xca },
395 395 { "simd_inst_retired", 0x0, 0xc7 },
396 396 { "simd_instr_retired", 0x0, 0xce },
397 397
398 398 { "simd_sat_instr_retired", 0x0, 0xcf },
399 399 { "simd_sat_uop_exec", 0x0, 0xb1 },
400 400 { "simd_uop_type_exec", 0x0, 0xb3 },
401 401
402 402 { "simd_uops_exec", 0x0, 0xb0 },
403 403 { "snoop_stall_drv", ALL_CORES | ALL_AGENTS, 0x7e },
404 404 { "sse_pre_exec", 0x0, 0x07 },
405 405
406 406 { "sse_pre_miss", 0x0, 0x4b },
407 407 { "store_block", 0x0, 0x04 },
408 408 { "thermal_trip", 0x0, 0x3b },
409 409
410 410 { "uops_retired", 0x0, 0xc2 },
411 411 { "x87_ops_retired", 0x0, 0xc1 },
412 412 { "", 0x0, NT_END }
413 413 };
414 414
415 415 /*
416 416 * If any of the pic specific events require privileges, make sure to add a
417 417 * check in configure_gpc() to find whether an event hard-coded as a number by
418 418 * the user has any privilege requirements
419 419 */
420 420 static const struct nametable_core_uarch pic0_events[] = {
421 421 /* Alphabetical order of event name */
422 422
423 423 { "cycles_div_busy", 0x0, 0x14 },
424 424 { "fp_comp_ops_exe", 0x0, 0x10 },
425 425 { "idle_during_div", 0x0, 0x18 },
426 426
427 427 { "mem_load_retired", 0x0, 0xcb },
428 428 { "rs_uops_dispatched_port", 0x0, 0xa1 },
429 429 { "", 0x0, NT_END }
430 430 };
431 431
432 432 static const struct nametable_core_uarch pic1_events[] = {
433 433 /* Alphabetical order of event name */
434 434
435 435 { "delayed_bypass", 0x0, 0x19 },
436 436 { "div", 0x0, 0x13 },
437 437 { "fp_assist", 0x0, 0x11 },
438 438
439 439 { "mul", 0x0, 0x12 },
440 440 { "", 0x0, NT_END }
441 441 };
442 442
443 443 /* FFC entries must be in order */
444 444 static char *ffc_names_non_htt[] = {
445 445 "instr_retired.any",
446 446 "cpu_clk_unhalted.core",
447 447 "cpu_clk_unhalted.ref",
448 448 NULL
449 449 };
450 450
451 451 static char *ffc_names_htt[] = {
452 452 "instr_retired.any",
453 453 "cpu_clk_unhalted.thread",
454 454 "cpu_clk_unhalted.ref",
455 455 NULL
456 456 };
457 457
458 458 static char *ffc_genericnames[] = {
459 459 "PAPI_tot_ins",
460 460 "PAPI_tot_cyc",
461 461 "",
462 462 NULL
463 463 };
464 464
465 465 static char **ffc_names = NULL;
466 466 static char **ffc_allnames = NULL;
467 467 static char **gpc_names = NULL;
468 468 static uint32_t versionid;
469 469 static uint64_t num_gpc;
470 470 static uint64_t width_gpc;
471 471 static uint64_t mask_gpc;
472 472 static uint64_t num_ffc;
473 473 static uint64_t width_ffc;
474 474 static uint64_t mask_ffc;
475 475 static uint_t total_pmc;
476 476 static uint64_t control_ffc;
477 477 static uint64_t control_gpc;
478 478 static uint64_t control_mask;
479 479 static uint32_t arch_events_vector;
480 480
481 481 #define IMPL_NAME_LEN 100
482 482 static char core_impl_name[IMPL_NAME_LEN];
483 483
484 484 static const char *core_cpuref =
485 485 "See Appendix A of the \"Intel 64 and IA-32 Architectures Software" \
486 486 " Developer's Manual Volume 3B: System Programming Guide, Part 2\"" \
487 487 " Order Number: 253669-026US, Februrary 2008";
488 488
489 489 struct events_table_t {
490 490 uint8_t eventselect;
491 491 uint8_t unitmask;
492 492 uint64_t supported_counters;
493 493 const char *name;
494 494 };
495 495
496 496 /* Used to describe which counters support an event */
497 497 #define C(x) (1 << (x))
498 498 #define C0 C(0)
499 499 #define C1 C(1)
500 500 #define C2 C(2)
501 501 #define C3 C(3)
502 502 #define C_ALL 0xFFFFFFFFFFFFFFFF
503 503
504 504 /* Architectural events */
505 505 #define ARCH_EVENTS_COMMON \
506 506 { 0xc0, 0x00, C_ALL, "inst_retired.any_p" }, \
507 507 { 0x3c, 0x01, C_ALL, "cpu_clk_unhalted.ref_p" }, \
508 508 { 0x2e, 0x4f, C_ALL, "longest_lat_cache.reference" }, \
509 509 { 0x2e, 0x41, C_ALL, "longest_lat_cache.miss" }, \
510 510 { 0xc4, 0x00, C_ALL, "br_inst_retired.all_branches" }, \
511 511 { 0xc5, 0x00, C_ALL, "br_misp_retired.all_branches" }
512 512
513 513 static const struct events_table_t arch_events_table_non_htt[] = {
514 514 { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.core" },
515 515 ARCH_EVENTS_COMMON
516 516 };
517 517
518 518 static const struct events_table_t arch_events_table_htt[] = {
519 519 { 0x3c, 0x00, C_ALL, "cpu_clk_unhalted.thread_p" },
520 520 ARCH_EVENTS_COMMON
521 521 };
522 522
523 523 static char *arch_genevents_table[] = {
524 524 "PAPI_tot_cyc", /* cpu_clk_unhalted.thread_p/core */
525 525 "PAPI_tot_ins", /* inst_retired.any_p */
526 526 "", /* cpu_clk_unhalted.ref_p */
527 527 "", /* longest_lat_cache.reference */
528 528 "", /* longest_lat_cache.miss */
529 529 "", /* br_inst_retired.all_branches */
530 530 "", /* br_misp_retired.all_branches */
531 531 };
532 532
533 533 static const struct events_table_t *arch_events_table = NULL;
534 534 static uint64_t known_arch_events;
535 535 static uint64_t known_ffc_num;
536 536
537 537 #define GENERICEVENTS_FAM6_NHM \
538 538 { 0xc4, 0x01, C0|C1|C2|C3, "PAPI_br_cn" }, /* br_inst_retired.conditional */ \
539 539 { 0x1d, 0x01, C0|C1|C2|C3, "PAPI_hw_int" }, /* hw_int.rcx */ \
540 540 { 0x17, 0x01, C0|C1|C2|C3, "PAPI_tot_iis" }, /* inst_queue_writes */ \
541 541 { 0x43, 0x01, C0|C1, "PAPI_l1_dca" }, /* l1d_all_ref.any */ \
542 542 { 0x24, 0x03, C0|C1|C2|C3, "PAPI_l1_dcm" }, /* l2_rqsts. loads and rfos */ \
543 543 { 0x40, 0x0f, C0|C1|C2|C3, "PAPI_l1_dcr" }, /* l1d_cache_ld.mesi */ \
544 544 { 0x41, 0x0f, C0|C1|C2|C3, "PAPI_l1_dcw" }, /* l1d_cache_st.mesi */ \
545 545 { 0x80, 0x03, C0|C1|C2|C3, "PAPI_l1_ica" }, /* l1i.reads */ \
546 546 { 0x80, 0x01, C0|C1|C2|C3, "PAPI_l1_ich" }, /* l1i.hits */ \
547 547 { 0x80, 0x02, C0|C1|C2|C3, "PAPI_l1_icm" }, /* l1i.misses */ \
548 548 { 0x80, 0x03, C0|C1|C2|C3, "PAPI_l1_icr" }, /* l1i.reads */ \
549 549 { 0x24, 0x33, C0|C1|C2|C3, "PAPI_l1_ldm" }, /* l2_rqsts. loads and ifetches */\
550 550 { 0x24, 0xff, C0|C1|C2|C3, "PAPI_l1_tcm" }, /* l2_rqsts.references */ \
551 551 { 0x24, 0x02, C0|C1|C2|C3, "PAPI_l2_ldm" }, /* l2_rqsts.ld_miss */ \
552 552 { 0x24, 0x08, C0|C1|C2|C3, "PAPI_l2_stm" }, /* l2_rqsts.rfo_miss */ \
553 553 { 0x24, 0x3f, C0|C1|C2|C3, "PAPI_l2_tca" }, \
554 554 /* l2_rqsts. loads, rfos and ifetches */ \
555 555 { 0x24, 0x15, C0|C1|C2|C3, "PAPI_l2_tch" }, \
556 556 /* l2_rqsts. ld_hit, rfo_hit and ifetch_hit */ \
557 557 { 0x24, 0x2a, C0|C1|C2|C3, "PAPI_l2_tcm" }, \
558 558 /* l2_rqsts. ld_miss, rfo_miss and ifetch_miss */ \
559 559 { 0x24, 0x33, C0|C1|C2|C3, "PAPI_l2_tcr" }, /* l2_rqsts. loads and ifetches */\
560 560 { 0x24, 0x0c, C0|C1|C2|C3, "PAPI_l2_tcw" }, /* l2_rqsts.rfos */ \
561 561 { 0x2e, 0x4f, C0|C1|C2|C3, "PAPI_l3_tca" }, /* l3_lat_cache.reference */ \
562 562 { 0x2e, 0x41, C0|C1|C2|C3, "PAPI_l3_tcm" }, /* l3_lat_cache.misses */ \
563 563 { 0x0b, 0x01, C0|C1|C2|C3, "PAPI_ld_ins" }, /* mem_inst_retired.loads */ \
564 564 { 0x0b, 0x03, C0|C1|C2|C3, "PAPI_lst_ins" }, \
565 565 /* mem_inst_retired.loads and stores */ \
566 566 { 0x26, 0xf0, C0|C1|C2|C3, "PAPI_prf_dm" }, /* l2_data_rqsts.prefetch.mesi */ \
567 567 { 0x0b, 0x02, C0|C1|C2|C3, "PAPI_sr_ins" }, /* mem_inst_retired.stores */ \
568 568 { 0x49, 0x01, C0|C1|C2|C3, "PAPI_tlb_dm" }, /* dtlb_misses.any */ \
569 569 { 0x85, 0x01, C0|C1|C2|C3, "PAPI_tlb_im" } /* itlb_misses.any */
570 570
571 571
572 572 #define EVENTS_FAM6_NHM \
573 573 \
574 574 { 0x80, 0x04, C0|C1|C2|C3, "l1i.cycles_stalled" }, \
575 575 { 0x80, 0x01, C0|C1|C2|C3, "l1i.hits" }, \
576 576 { 0x80, 0x02, C0|C1|C2|C3, "l1i.misses" }, \
577 577 \
578 578 { 0x80, 0x03, C0|C1|C2|C3, "l1i.reads" }, \
579 579 { 0x82, 0x01, C0|C1|C2|C3, "large_itlb.hit" }, \
580 580 { 0x87, 0x0F, C0|C1|C2|C3, "ild_stall.any" }, \
581 581 \
582 582 { 0x87, 0x04, C0|C1|C2|C3, "ild_stall.iq_full" }, \
583 583 { 0x87, 0x01, C0|C1|C2|C3, "ild_stall.lcp" }, \
584 584 { 0x87, 0x02, C0|C1|C2|C3, "ild_stall.mru" }, \
585 585 \
586 586 { 0x87, 0x08, C0|C1|C2|C3, "ild_stall.regen" }, \
587 587 { 0xE6, 0x02, C0|C1|C2|C3, "baclear.bad_target" }, \
588 588 { 0xE6, 0x01, C0|C1|C2|C3, "baclear.clear" }, \
589 589 \
590 590 { 0xE8, 0x01, C0|C1|C2|C3, "bpu_clears.early" }, \
591 591 { 0xE8, 0x02, C0|C1|C2|C3, "bpu_clears.late" }, \
592 592 { 0xE5, 0x01, C0|C1|C2|C3, "bpu_missed_call_ret" }, \
593 593 \
594 594 { 0xE0, 0x01, C0|C1|C2|C3, "br_inst_decoded" }, \
595 595 { 0x88, 0x7F, C0|C1|C2|C3, "br_inst_exec.any" }, \
596 596 { 0x88, 0x01, C0|C1|C2|C3, "br_inst_exec.cond" }, \
597 597 \
598 598 { 0x88, 0x02, C0|C1|C2|C3, "br_inst_exec.direct" }, \
599 599 { 0x88, 0x10, C0|C1|C2|C3, "br_inst_exec.direct_near_call" }, \
600 600 { 0x88, 0x20, C0|C1|C2|C3, "br_inst_exec.indirect_near_call" }, \
601 601 \
602 602 { 0x88, 0x04, C0|C1|C2|C3, "br_inst_exec.indirect_non_call" }, \
603 603 { 0x88, 0x30, C0|C1|C2|C3, "br_inst_exec.near_calls" }, \
604 604 { 0x88, 0x07, C0|C1|C2|C3, "br_inst_exec.non_calls" }, \
605 605 \
606 606 { 0x88, 0x08, C0|C1|C2|C3, "br_inst_exec.return_near" }, \
607 607 { 0x88, 0x40, C0|C1|C2|C3, "br_inst_exec.taken" }, \
608 608 { 0x89, 0x7F, C0|C1|C2|C3, "br_misp_exec.any" }, \
609 609 \
610 610 { 0x89, 0x01, C0|C1|C2|C3, "br_misp_exec.cond" }, \
611 611 { 0x89, 0x02, C0|C1|C2|C3, "br_misp_exec.direct" }, \
612 612 { 0x89, 0x10, C0|C1|C2|C3, "br_misp_exec.direct_near_call" }, \
613 613 \
614 614 { 0x89, 0x20, C0|C1|C2|C3, "br_misp_exec.indirect_near_call" }, \
615 615 { 0x89, 0x04, C0|C1|C2|C3, "br_misp_exec.indirect_non_call" }, \
616 616 { 0x89, 0x30, C0|C1|C2|C3, "br_misp_exec.near_calls" }, \
617 617 \
618 618 { 0x89, 0x07, C0|C1|C2|C3, "br_misp_exec.non_calls" }, \
619 619 { 0x89, 0x08, C0|C1|C2|C3, "br_misp_exec.return_near" }, \
620 620 { 0x89, 0x40, C0|C1|C2|C3, "br_misp_exec.taken" }, \
621 621 \
622 622 { 0x17, 0x01, C0|C1|C2|C3, "inst_queue_writes" }, \
623 623 { 0x1E, 0x01, C0|C1|C2|C3, "inst_queue_write_cycles" }, \
624 624 { 0xA7, 0x01, C0|C1|C2|C3, "baclear_force_iq" }, \
625 625 \
626 626 { 0xD0, 0x01, C0|C1|C2|C3, "macro_insts.decoded" }, \
627 627 { 0xA6, 0x01, C0|C1|C2|C3, "macro_insts.fusions_decoded" }, \
628 628 { 0x19, 0x01, C0|C1|C2|C3, "two_uop_insts_decoded" }, \
629 629 \
630 630 { 0x18, 0x01, C0|C1|C2|C3, "inst_decoded.dec0" }, \
631 631 { 0xD1, 0x04, C0|C1|C2|C3, "uops_decoded.esp_folding" }, \
632 632 { 0xD1, 0x08, C0|C1|C2|C3, "uops_decoded.esp_sync" }, \
633 633 \
634 634 { 0xD1, 0x02, C0|C1|C2|C3, "uops_decoded.ms" }, \
635 635 { 0x20, 0x01, C0|C1|C2|C3, "lsd_overflow" }, \
636 636 { 0x0E, 0x01, C0|C1|C2|C3, "uops_issued.any" }, \
637 637 \
638 638 { 0x0E, 0x02, C0|C1|C2|C3, "uops_issued.fused" }, \
639 639 { 0xA2, 0x20, C0|C1|C2|C3, "resource_stalls.fpcw" }, \
640 640 { 0xA2, 0x02, C0|C1|C2|C3, "resource_stalls.load" }, \
641 641 \
642 642 { 0xA2, 0x40, C0|C1|C2|C3, "resource_stalls.mxcsr" }, \
643 643 { 0xA2, 0x04, C0|C1|C2|C3, "resource_stalls.rs_full" }, \
644 644 { 0xA2, 0x08, C0|C1|C2|C3, "resource_stalls.store" }, \
645 645 \
646 646 { 0xA2, 0x01, C0|C1|C2|C3, "resource_stalls.any" }, \
647 647 { 0xD2, 0x01, C0|C1|C2|C3, "rat_stalls.flags" }, \
648 648 { 0xD2, 0x02, C0|C1|C2|C3, "rat_stalls.registers" }, \
649 649 \
650 650 { 0xD2, 0x04, C0|C1|C2|C3, "rat_stalls.rob_read_port" }, \
651 651 { 0xD2, 0x08, C0|C1|C2|C3, "rat_stalls.scoreboard" }, \
652 652 { 0xD2, 0x0F, C0|C1|C2|C3, "rat_stalls.any" }, \
653 653 \
654 654 { 0xD4, 0x01, C0|C1|C2|C3, "seg_rename_stalls" }, \
655 655 { 0xD5, 0x01, C0|C1|C2|C3, "es_reg_renames" }, \
656 656 { 0x10, 0x02, C0|C1|C2|C3, "fp_comp_ops_exe.mmx" }, \
657 657 \
658 658 { 0x10, 0x80, C0|C1|C2|C3, "fp_comp_ops_exe.sse_double_precision" }, \
659 659 { 0x10, 0x04, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp" }, \
660 660 { 0x10, 0x10, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_packed" }, \
661 661 \
662 662 { 0x10, 0x20, C0|C1|C2|C3, "fp_comp_ops_exe.sse_fp_scalar" }, \
663 663 { 0x10, 0x40, C0|C1|C2|C3, "fp_comp_ops_exe.sse_single_precision" }, \
664 664 { 0x10, 0x08, C0|C1|C2|C3, "fp_comp_ops_exe.sse2_integer" }, \
665 665 \
666 666 { 0x10, 0x01, C0|C1|C2|C3, "fp_comp_ops_exe.x87" }, \
667 667 { 0x14, 0x01, C0|C1|C2|C3, "arith.cycles_div_busy" }, \
668 668 { 0x14, 0x02, C0|C1|C2|C3, "arith.mul" }, \
669 669 \
670 670 { 0x12, 0x04, C0|C1|C2|C3, "simd_int_128.pack" }, \
671 671 { 0x12, 0x20, C0|C1|C2|C3, "simd_int_128.packed_arith" }, \
672 672 { 0x12, 0x10, C0|C1|C2|C3, "simd_int_128.packed_logical" }, \
673 673 \
674 674 { 0x12, 0x01, C0|C1|C2|C3, "simd_int_128.packed_mpy" }, \
675 675 { 0x12, 0x02, C0|C1|C2|C3, "simd_int_128.packed_shift" }, \
676 676 { 0x12, 0x40, C0|C1|C2|C3, "simd_int_128.shuffle_move" }, \
677 677 \
678 678 { 0x12, 0x08, C0|C1|C2|C3, "simd_int_128.unpack" }, \
679 679 { 0xFD, 0x04, C0|C1|C2|C3, "simd_int_64.pack" }, \
680 680 { 0xFD, 0x20, C0|C1|C2|C3, "simd_int_64.packed_arith" }, \
681 681 \
682 682 { 0xFD, 0x10, C0|C1|C2|C3, "simd_int_64.packed_logical" }, \
683 683 { 0xFD, 0x01, C0|C1|C2|C3, "simd_int_64.packed_mpy" }, \
684 684 { 0xFD, 0x02, C0|C1|C2|C3, "simd_int_64.packed_shift" }, \
685 685 \
686 686 { 0xFD, 0x40, C0|C1|C2|C3, "simd_int_64.shuffle_move" }, \
687 687 { 0xFD, 0x08, C0|C1|C2|C3, "simd_int_64.unpack" }, \
688 688 { 0xB1, 0x01, C0|C1|C2|C3, "uops_executed.port0" }, \
689 689 \
690 690 { 0xB1, 0x02, C0|C1|C2|C3, "uops_executed.port1" }, \
691 691 { 0x40, 0x04, C0|C1, "l1d_cache_ld.e_state" }, \
692 692 { 0x40, 0x01, C0|C1, "l1d_cache_ld.i_state" }, \
693 693 \
694 694 { 0x40, 0x08, C0|C1, "l1d_cache_ld.m_state" }, \
695 695 { 0x40, 0x0F, C0|C1, "l1d_cache_ld.mesi" }, \
696 696 { 0x40, 0x02, C0|C1, "l1d_cache_ld.s_state" }, \
697 697 \
698 698 { 0x41, 0x04, C0|C1, "l1d_cache_st.e_state" }, \
699 699 { 0x41, 0x08, C0|C1, "l1d_cache_st.m_state" }, \
700 700 { 0x41, 0x0F, C0|C1, "l1d_cache_st.mesi" }, \
701 701 \
702 702 { 0x41, 0x02, C0|C1, "l1d_cache_st.s_state" }, \
703 703 { 0x42, 0x04, C0|C1, "l1d_cache_lock.e_state" }, \
704 704 { 0x42, 0x01, C0|C1, "l1d_cache_lock.hit" }, \
705 705 \
706 706 { 0x42, 0x08, C0|C1, "l1d_cache_lock.m_state" }, \
707 707 { 0x42, 0x02, C0|C1, "l1d_cache_lock.s_state" }, \
708 708 { 0x43, 0x01, C0|C1, "l1d_all_ref.any" }, \
709 709 \
710 710 { 0x43, 0x02, C0|C1, "l1d_all_ref.cacheable" }, \
711 711 { 0x4B, 0x01, C0|C1, "mmx2_mem_exec.nta" }, \
712 712 { 0x4C, 0x01, C0|C1, "load_hit_pre" }, \
713 713 \
714 714 { 0x4E, 0x02, C0|C1, "l1d_prefetch.miss" }, \
715 715 { 0x4E, 0x01, C0|C1, "l1d_prefetch.requests" }, \
716 716 { 0x4E, 0x04, C0|C1, "l1d_prefetch.triggers" }, \
717 717 \
718 718 { 0x51, 0x04, C0|C1, "l1d.m_evict" }, \
719 719 { 0x51, 0x02, C0|C1, "l1d.m_repl" }, \
720 720 { 0x51, 0x08, C0|C1, "l1d.m_snoop_evict" }, \
721 721 \
722 722 { 0x51, 0x01, C0|C1, "l1d.repl" }, \
723 723 { 0x52, 0x01, C0|C1, "l1d_cache_prefetch_lock_fb_hit" }, \
724 724 { 0x53, 0x01, C0|C1, "l1d_cache_lock_fb_hit" }, \
725 725 \
726 726 { 0x63, 0x02, C0|C1, "cache_lock_cycles.l1d" }, \
727 727 { 0x63, 0x01, C0|C1, "cache_lock_cycles.l1d_l2" }, \
728 728 { 0x06, 0x04, C0|C1|C2|C3, "store_blocks.at_ret" }, \
729 729 \
730 730 { 0x06, 0x08, C0|C1|C2|C3, "store_blocks.l1d_block" }, \
731 731 { 0x06, 0x01, C0|C1|C2|C3, "store_blocks.not_sta" }, \
732 732 { 0x06, 0x02, C0|C1|C2|C3, "store_blocks.sta" }, \
733 733 \
734 734 { 0x13, 0x07, C0|C1|C2|C3, "load_dispatch.any" }, \
735 735 { 0x13, 0x04, C0|C1|C2|C3, "load_dispatch.mob" }, \
736 736 { 0x13, 0x01, C0|C1|C2|C3, "load_dispatch.rs" }, \
737 737 \
738 738 { 0x13, 0x02, C0|C1|C2|C3, "load_dispatch.rs_delayed" }, \
739 739 { 0x08, 0x01, C0|C1|C2|C3, "dtlb_load_misses.any" }, \
740 740 { 0x08, 0x20, C0|C1|C2|C3, "dtlb_load_misses.pde_miss" }, \
741 741 \
742 742 { 0x08, 0x02, C0|C1|C2|C3, "dtlb_load_misses.walk_completed" }, \
743 743 { 0x49, 0x01, C0|C1|C2|C3, "dtlb_misses.any" }, \
744 744 { 0x49, 0x10, C0|C1|C2|C3, "dtlb_misses.stlb_hit" }, \
745 745 \
746 746 { 0x49, 0x02, C0|C1|C2|C3, "dtlb_misses.walk_completed" }, \
747 747 { 0x4F, 0x02, C0|C1|C2|C3, "ept.epde_miss" }, \
748 748 { 0x4F, 0x08, C0|C1|C2|C3, "ept.epdpe_miss" }, \
749 749 \
750 750 { 0x85, 0x01, C0|C1|C2|C3, "itlb_misses.any" }, \
751 751 { 0x85, 0x02, C0|C1|C2|C3, "itlb_misses.walk_completed" }, \
752 752 { 0x24, 0xAA, C0|C1|C2|C3, "l2_rqsts.miss" }, \
753 753 \
754 754 { 0x24, 0xFF, C0|C1|C2|C3, "l2_rqsts.references" }, \
755 755 { 0x24, 0x10, C0|C1|C2|C3, "l2_rqsts.ifetch_hit" }, \
756 756 { 0x24, 0x20, C0|C1|C2|C3, "l2_rqsts.ifetch_miss" }, \
757 757 \
758 758 { 0x24, 0x30, C0|C1|C2|C3, "l2_rqsts.ifetches" }, \
759 759 { 0x24, 0x01, C0|C1|C2|C3, "l2_rqsts.ld_hit" }, \
760 760 { 0x24, 0x02, C0|C1|C2|C3, "l2_rqsts.ld_miss" }, \
761 761 \
762 762 { 0x24, 0x03, C0|C1|C2|C3, "l2_rqsts.loads" }, \
763 763 { 0x24, 0x40, C0|C1|C2|C3, "l2_rqsts.prefetch_hit" }, \
764 764 { 0x24, 0x80, C0|C1|C2|C3, "l2_rqsts.prefetch_miss" }, \
765 765 \
766 766 { 0x24, 0xC0, C0|C1|C2|C3, "l2_rqsts.prefetches" }, \
767 767 { 0x24, 0x04, C0|C1|C2|C3, "l2_rqsts.rfo_hit" }, \
768 768 { 0x24, 0x08, C0|C1|C2|C3, "l2_rqsts.rfo_miss" }, \
769 769 \
770 770 { 0x24, 0x0C, C0|C1|C2|C3, "l2_rqsts.rfos" }, \
771 771 { 0x26, 0xFF, C0|C1|C2|C3, "l2_data_rqsts.any" }, \
772 772 { 0x26, 0x04, C0|C1|C2|C3, "l2_data_rqsts.demand.e_state" }, \
773 773 \
774 774 { 0x26, 0x01, C0|C1|C2|C3, "l2_data_rqsts.demand.i_state" }, \
775 775 { 0x26, 0x08, C0|C1|C2|C3, "l2_data_rqsts.demand.m_state" }, \
776 776 { 0x26, 0x0F, C0|C1|C2|C3, "l2_data_rqsts.demand.mesi" }, \
777 777 \
778 778 { 0x26, 0x02, C0|C1|C2|C3, "l2_data_rqsts.demand.s_state" }, \
779 779 { 0x26, 0x40, C0|C1|C2|C3, "l2_data_rqsts.prefetch.e_state" }, \
780 780 { 0x26, 0x10, C0|C1|C2|C3, "l2_data_rqsts.prefetch.i_state" }, \
781 781 \
782 782 { 0x26, 0x80, C0|C1|C2|C3, "l2_data_rqsts.prefetch.m_state" }, \
783 783 { 0x26, 0xF0, C0|C1|C2|C3, "l2_data_rqsts.prefetch.mesi" }, \
784 784 { 0x26, 0x20, C0|C1|C2|C3, "l2_data_rqsts.prefetch.s_state" }, \
785 785 \
786 786 { 0x27, 0x40, C0|C1|C2|C3, "l2_write.lock.e_state" }, \
787 787 { 0x27, 0x10, C0|C1|C2|C3, "l2_write.lock.i_state" }, \
788 788 { 0x27, 0x20, C0|C1|C2|C3, "l2_write.lock.s_state" }, \
789 789 \
790 790 { 0x27, 0x0E, C0|C1|C2|C3, "l2_write.rfo.hit" }, \
791 791 { 0x27, 0x01, C0|C1|C2|C3, "l2_write.rfo.i_state" }, \
792 792 { 0x27, 0x08, C0|C1|C2|C3, "l2_write.rfo.m_state" }, \
793 793 \
794 794 { 0x27, 0x0F, C0|C1|C2|C3, "l2_write.rfo.mesi" }, \
795 795 { 0x27, 0x02, C0|C1|C2|C3, "l2_write.rfo.s_state" }, \
796 796 { 0x28, 0x04, C0|C1|C2|C3, "l1d_wb_l2.e_state" }, \
797 797 \
798 798 { 0x28, 0x01, C0|C1|C2|C3, "l1d_wb_l2.i_state" }, \
799 799 { 0x28, 0x08, C0|C1|C2|C3, "l1d_wb_l2.m_state" }, \
800 800 { 0xF0, 0x80, C0|C1|C2|C3, "l2_transactions.any" }, \
801 801 \
802 802 { 0xF0, 0x20, C0|C1|C2|C3, "l2_transactions.fill" }, \
803 803 { 0xF0, 0x04, C0|C1|C2|C3, "l2_transactions.ifetch" }, \
804 804 { 0xF0, 0x10, C0|C1|C2|C3, "l2_transactions.l1d_wb" }, \
805 805 \
806 806 { 0xF0, 0x01, C0|C1|C2|C3, "l2_transactions.load" }, \
807 807 { 0xF0, 0x08, C0|C1|C2|C3, "l2_transactions.prefetch" }, \
808 808 { 0xF0, 0x02, C0|C1|C2|C3, "l2_transactions.rfo" }, \
809 809 \
810 810 { 0xF0, 0x40, C0|C1|C2|C3, "l2_transactions.wb" }, \
811 811 { 0xF1, 0x07, C0|C1|C2|C3, "l2_lines_in.any" }, \
812 812 { 0xF1, 0x04, C0|C1|C2|C3, "l2_lines_in.e_state" }, \
813 813 \
814 814 { 0xF1, 0x02, C0|C1|C2|C3, "l2_lines_in.s_state" }, \
815 815 { 0xF2, 0x0F, C0|C1|C2|C3, "l2_lines_out.any" }, \
816 816 { 0xF2, 0x01, C0|C1|C2|C3, "l2_lines_out.demand_clean" }, \
817 817 \
818 818 { 0xF2, 0x02, C0|C1|C2|C3, "l2_lines_out.demand_dirty" }, \
819 819 { 0xF2, 0x04, C0|C1|C2|C3, "l2_lines_out.prefetch_clean" }, \
820 820 { 0x6C, 0x01, C0|C1|C2|C3, "io_transactions" }, \
821 821 \
822 822 { 0xB0, 0x80, C0|C1|C2|C3, "offcore_requests.any" }, \
823 823 { 0xB0, 0x10, C0|C1|C2|C3, "offcore_requests.any.rfo" }, \
824 824 { 0xB0, 0x40, C0|C1|C2|C3, "offcore_requests.l1d_writeback" }, \
825 825 \
826 826 { 0xB8, 0x01, C0|C1|C2|C3, "snoop_response.hit" }, \
827 827 { 0xB8, 0x02, C0|C1|C2|C3, "snoop_response.hite" }, \
828 828 { 0xB8, 0x04, C0|C1|C2|C3, "snoop_response.hitm" }, \
829 829 \
830 830 { 0xF4, 0x10, C0|C1|C2|C3, "sq_misc.split_lock" }, \
831 831 { 0x0B, 0x01, C0|C1|C2|C3, "mem_inst_retired.loads" }, \
832 832 { 0x0B, 0x02, C0|C1|C2|C3, "mem_inst_retired.stores" }, \
833 833 \
834 834 { 0xC0, 0x04, C0|C1|C2|C3, "inst_retired.mmx" }, \
835 835 { 0xC0, 0x02, C0|C1|C2|C3, "inst_retired.x87" }, \
836 836 { 0xC7, 0x04, C0|C1|C2|C3, "ssex_uops_retired.packed_double" }, \
837 837 \
838 838 { 0xC7, 0x01, C0|C1|C2|C3, "ssex_uops_retired.packed_single" }, \
839 839 { 0xC7, 0x08, C0|C1|C2|C3, "ssex_uops_retired.scalar_double" }, \
840 840 { 0xC7, 0x02, C0|C1|C2|C3, "ssex_uops_retired.scalar_single" }, \
841 841 \
842 842 { 0xC7, 0x10, C0|C1|C2|C3, "ssex_uops_retired.vector_integer" }, \
843 843 { 0xC2, 0x01, C0|C1|C2|C3, "uops_retired.any" }, \
844 844 { 0xC2, 0x04, C0|C1|C2|C3, "uops_retired.macro_fused" }, \
845 845 \
846 846 { 0xC8, 0x20, C0|C1|C2|C3, "itlb_miss_retired" }, \
847 847 { 0xCB, 0x80, C0|C1|C2|C3, "mem_load_retired.dtlb_miss" }, \
848 848 { 0xCB, 0x40, C0|C1|C2|C3, "mem_load_retired.hit_lfb" }, \
849 849 \
850 850 { 0xCB, 0x01, C0|C1|C2|C3, "mem_load_retired.l1d_hit" }, \
851 851 { 0xCB, 0x02, C0|C1|C2|C3, "mem_load_retired.l2_hit" }, \
852 852 { 0xCB, 0x10, C0|C1|C2|C3, "mem_load_retired.llc_miss" }, \
853 853 \
854 854 { 0xCB, 0x04, C0|C1|C2|C3, "mem_load_retired.llc_unshared_hit" }, \
855 855 { 0xCB, 0x08, C0|C1|C2|C3, "mem_load_retired.other_core_l2_hit_hitm" }, \
856 856 { 0x0F, 0x02, C0|C1|C2|C3, "mem_uncore_retired.other_core_l2_hitm" }, \
857 857 \
858 858 { 0x0F, 0x08, C0|C1|C2|C3, "mem_uncore_retired.remote_cache_local_home_hit" },\
859 859 { 0x0F, 0x10, C0|C1|C2|C3, "mem_uncore_retired.remote_dram" }, \
860 860 { 0x0F, 0x20, C0|C1|C2|C3, "mem_uncore_retired.local_dram" }, \
861 861 \
862 862 { 0x0C, 0x01, C0|C1|C2|C3, "mem_store_retired.dtlb_miss" }, \
863 863 { 0xC4, 0x01, C0|C1|C2|C3, "br_inst_retired.conditional" }, \
864 864 { 0xC4, 0x02, C0|C1|C2|C3, "br_inst_retired.near_call" }, \
865 865 \
866 866 { 0xC5, 0x02, C0|C1|C2|C3, "br_misp_retired.near_call" }, \
867 867 { 0xDB, 0x01, C0|C1|C2|C3, "uop_unfusion" }, \
868 868 { 0xF7, 0x01, C0|C1|C2|C3, "fp_assist.all" }, \
869 869 \
870 870 { 0xF7, 0x04, C0|C1|C2|C3, "fp_assist.input" }, \
871 871 { 0xF7, 0x02, C0|C1|C2|C3, "fp_assist.output" }, \
872 872 { 0xCC, 0x03, C0|C1|C2|C3, "fp_mmx_trans.any" }, \
873 873 \
874 874 { 0xCC, 0x01, C0|C1|C2|C3, "fp_mmx_trans.to_fp" }, \
875 875 { 0xCC, 0x02, C0|C1|C2|C3, "fp_mmx_trans.to_mmx" }, \
876 876 { 0xC3, 0x04, C0|C1|C2|C3, "machine_clears.smc" }
877 877
878 878 #define GENERICEVENTS_FAM6_MOD28 \
879 879 { 0xc4, 0x00, C0|C1, "PAPI_br_ins" }, /* br_inst_retired.any */ \
880 880 { 0xc5, 0x00, C0|C1, "PAPI_br_msp" }, /* br_inst_retired.mispred */ \
881 881 { 0xc4, 0x03, C0|C1, "PAPI_br_ntk" }, \
882 882 /* br_inst_retired.pred_not_taken|mispred_not_taken */ \
883 883 { 0xc4, 0x05, C0|C1, "PAPI_br_prc" }, \
884 884 /* br_inst_retired.pred_not_taken|pred_taken */ \
885 885 { 0xc8, 0x00, C0|C1, "PAPI_hw_int" }, /* hw_int_rcv */ \
886 886 { 0xaa, 0x03, C0|C1, "PAPI_tot_iis" }, /* macro_insts.all_decoded */ \
887 887 { 0x40, 0x23, C0|C1, "PAPI_l1_dca" }, /* l1d_cache.l1|st */ \
888 888 { 0x2a, 0x41, C0|C1, "PAPI_l2_stm" }, /* l2_st.self.i_state */ \
889 889 { 0x2e, 0x4f, C0|C1, "PAPI_l2_tca" }, /* longest_lat_cache.reference */ \
890 890 { 0x2e, 0x4e, C0|C1, "PAPI_l2_tch" }, /* l2_rqsts.mes */ \
891 891 { 0x2e, 0x41, C0|C1, "PAPI_l2_tcm" }, /* longest_lat_cache.miss */ \
892 892 { 0x2a, 0x4f, C0|C1, "PAPI_l2_tcw" }, /* l2_st.self.mesi */ \
893 893 { 0x08, 0x07, C0|C1, "PAPI_tlb_dm" }, /* data_tlb_misses.dtlb.miss */ \
894 894 { 0x82, 0x02, C0|C1, "PAPI_tlb_im" } /* itlb.misses */
895 895
896 896
897 897 #define EVENTS_FAM6_MOD28 \
898 898 { 0x2, 0x81, C0|C1, "store_forwards.good" }, \
899 899 { 0x6, 0x0, C0|C1, "segment_reg_loads.any" }, \
900 900 { 0x7, 0x1, C0|C1, "prefetch.prefetcht0" }, \
901 901 { 0x7, 0x6, C0|C1, "prefetch.sw_l2" }, \
902 902 { 0x7, 0x8, C0|C1, "prefetch.prefetchnta" }, \
903 903 { 0x8, 0x7, C0|C1, "data_tlb_misses.dtlb_miss" }, \
904 904 { 0x8, 0x5, C0|C1, "data_tlb_misses.dtlb_miss_ld" }, \
905 905 { 0x8, 0x9, C0|C1, "data_tlb_misses.l0_dtlb_miss_ld" }, \
906 906 { 0x8, 0x6, C0|C1, "data_tlb_misses.dtlb_miss_st" }, \
907 907 { 0xC, 0x3, C0|C1, "page_walks.cycles" }, \
908 908 { 0x10, 0x1, C0|C1, "x87_comp_ops_exe.any.s" }, \
909 909 { 0x10, 0x81, C0|C1, "x87_comp_ops_exe.any.ar" }, \
910 910 { 0x11, 0x1, C0|C1, "fp_assist" }, \
911 911 { 0x11, 0x81, C0|C1, "fp_assist.ar" }, \
912 912 { 0x12, 0x1, C0|C1, "mul.s" }, \
913 913 { 0x12, 0x81, C0|C1, "mul.ar" }, \
914 914 { 0x13, 0x1, C0|C1, "div.s" }, \
915 915 { 0x13, 0x81, C0|C1, "div.ar" }, \
916 916 { 0x14, 0x1, C0|C1, "cycles_div_busy" }, \
917 917 { 0x21, 0x0, C0|C1, "l2_ads" }, \
918 918 { 0x22, 0x0, C0|C1, "l2_dbus_busy" }, \
919 919 { 0x24, 0x0, C0|C1, "l2_lines_in" }, \
920 920 { 0x25, 0x0, C0|C1, "l2_m_lines_in" }, \
921 921 { 0x26, 0x0, C0|C1, "l2_lines_out" }, \
922 922 { 0x27, 0x0, C0|C1, "l2_m_lines_out" }, \
923 923 { 0x28, 0x0, C0|C1, "l2_ifetch" }, \
924 924 { 0x29, 0x0, C0|C1, "l2_ld" }, \
925 925 { 0x2A, 0x0, C0|C1, "l2_st" }, \
926 926 { 0x2B, 0x0, C0|C1, "l2_lock" }, \
927 927 { 0x2E, 0x0, C0|C1, "l2_rqsts" }, \
928 928 { 0x2E, 0x41, C0|C1, "l2_rqsts.self.demand.i_state" }, \
929 929 { 0x2E, 0x4F, C0|C1, "l2_rqsts.self.demand.mesi" }, \
930 930 { 0x30, 0x0, C0|C1, "l2_reject_bus_q" }, \
931 931 { 0x32, 0x0, C0|C1, "l2_no_req" }, \
932 932 { 0x3A, 0x0, C0|C1, "eist_trans" }, \
933 933 { 0x3B, 0xC0, C0|C1, "thermal_trip" }, \
934 934 { 0x3C, 0x0, C0|C1, "cpu_clk_unhalted.core_p" }, \
935 935 { 0x3C, 0x1, C0|C1, "cpu_clk_unhalted.bus" }, \
936 936 { 0x3C, 0x2, C0|C1, "cpu_clk_unhalted.no_other" }, \
937 937 { 0x40, 0x21, C0|C1, "l1d_cache.ld" }, \
938 938 { 0x40, 0x22, C0|C1, "l1d_cache.st" }, \
939 939 { 0x60, 0x0, C0|C1, "bus_request_outstanding" }, \
940 940 { 0x61, 0x0, C0|C1, "bus_bnr_drv" }, \
941 941 { 0x62, 0x0, C0|C1, "bus_drdy_clocks" }, \
942 942 { 0x63, 0x0, C0|C1, "bus_lock_clocks" }, \
943 943 { 0x64, 0x0, C0|C1, "bus_data_rcv" }, \
944 944 { 0x65, 0x0, C0|C1, "bus_trans_brd" }, \
945 945 { 0x66, 0x0, C0|C1, "bus_trans_rfo" }, \
946 946 { 0x67, 0x0, C0|C1, "bus_trans_wb" }, \
947 947 { 0x68, 0x0, C0|C1, "bus_trans_ifetch" }, \
948 948 { 0x69, 0x0, C0|C1, "bus_trans_inval" }, \
949 949 { 0x6A, 0x0, C0|C1, "bus_trans_pwr" }, \
950 950 { 0x6B, 0x0, C0|C1, "bus_trans_p" }, \
951 951 { 0x6C, 0x0, C0|C1, "bus_trans_io" }, \
952 952 { 0x6D, 0x0, C0|C1, "bus_trans_def" }, \
953 953 { 0x6E, 0x0, C0|C1, "bus_trans_burst" }, \
954 954 { 0x6F, 0x0, C0|C1, "bus_trans_mem" }, \
955 955 { 0x70, 0x0, C0|C1, "bus_trans_any" }, \
956 956 { 0x77, 0x0, C0|C1, "ext_snoop" }, \
957 957 { 0x7A, 0x0, C0|C1, "bus_hit_drv" }, \
958 958 { 0x7B, 0x0, C0|C1, "bus_hitm_drv" }, \
959 959 { 0x7D, 0x0, C0|C1, "busq_empty" }, \
960 960 { 0x7E, 0x0, C0|C1, "snoop_stall_drv" }, \
961 961 { 0x7F, 0x0, C0|C1, "bus_io_wait" }, \
962 962 { 0x80, 0x3, C0|C1, "icache.accesses" }, \
963 963 { 0x80, 0x2, C0|C1, "icache.misses" }, \
964 964 { 0x82, 0x4, C0|C1, "itlb.flush" }, \
965 965 { 0x82, 0x2, C0|C1, "itlb.misses" }, \
966 966 { 0xAA, 0x2, C0|C1, "macro_insts.cisc_decoded" }, \
967 967 { 0xAA, 0x3, C0|C1, "macro_insts.all_decoded" }, \
968 968 { 0xB0, 0x0, C0|C1, "simd_uops_exec.s" }, \
969 969 { 0xB0, 0x80, C0|C1, "simd_uops_exec.ar" }, \
970 970 { 0xB1, 0x0, C0|C1, "simd_sat_uop_exec.s" }, \
971 971 { 0xB1, 0x80, C0|C1, "simd_sat_uop_exec.ar" }, \
972 972 { 0xB3, 0x1, C0|C1, "simd_uop_type_exec.mul.s" }, \
973 973 { 0xB3, 0x81, C0|C1, "simd_uop_type_exec.mul.ar" }, \
974 974 { 0xB3, 0x02, C0|C1, "simd_uop_type_exec.shift.s" }, \
975 975 { 0xB3, 0x82, C0|C1, "simd_uop_type_exec.shift.ar" }, \
976 976 { 0xB3, 0x04, C0|C1, "simd_uop_type_exec.pack.s" }, \
977 977 { 0xB3, 0x84, C0|C1, "simd_uop_type_exec.pack.ar" }, \
978 978 { 0xB3, 0x08, C0|C1, "simd_uop_type_exec.unpack.s" }, \
979 979 { 0xB3, 0x88, C0|C1, "simd_uop_type_exec.unpack.ar" }, \
980 980 { 0xB3, 0x10, C0|C1, "simd_uop_type_exec.logical.s" }, \
981 981 { 0xB3, 0x90, C0|C1, "simd_uop_type_exec.logical.ar" }, \
982 982 { 0xB3, 0x20, C0|C1, "simd_uop_type_exec.arithmetic.s" }, \
983 983 { 0xB3, 0xA0, C0|C1, "simd_uop_type_exec.arithmetic.ar" }, \
984 984 { 0xC2, 0x10, C0|C1, "uops_retired.any" }, \
985 985 { 0xC3, 0x1, C0|C1, "machine_clears.smc" }, \
986 986 { 0xC4, 0x0, C0|C1, "br_inst_retired.any" }, \
987 987 { 0xC4, 0x1, C0|C1, "br_inst_retired.pred_not_taken" }, \
988 988 { 0xC4, 0x2, C0|C1, "br_inst_retired.mispred_not_taken" }, \
989 989 { 0xC4, 0x4, C0|C1, "br_inst_retired.pred_taken" }, \
990 990 { 0xC4, 0x8, C0|C1, "br_inst_retired.mispred_taken" }, \
991 991 { 0xC4, 0xA, C0|C1, "br_inst_retired.mispred" }, \
992 992 { 0xC4, 0xC, C0|C1, "br_inst_retired.taken" }, \
993 993 { 0xC4, 0xF, C0|C1, "br_inst_retired.any1" }, \
994 994 { 0xC6, 0x1, C0|C1, "cycles_int_masked.cycles_int_masked" }, \
995 995 { 0xC6, 0x2, C0|C1, \
996 996 "cycles_int_masked.cycles_int_pending_and_masked" }, \
997 997 { 0xC7, 0x1, C0|C1, "simd_inst_retired.packed_single" }, \
998 998 { 0xC7, 0x2, C0|C1, "simd_inst_retired.scalar_single" }, \
999 999 { 0xC7, 0x4, C0|C1, "simd_inst_retired.packed_double" }, \
1000 1000 { 0xC7, 0x8, C0|C1, "simd_inst_retired.scalar_double" }, \
1001 1001 { 0xC7, 0x10, C0|C1, "simd_inst_retired.vector" }, \
1002 1002 { 0xC7, 0x1F, C0|C1, "simd_inst_retired.any" }, \
1003 1003 { 0xC8, 0x00, C0|C1, "hw_int_rcv" }, \
1004 1004 { 0xCA, 0x1, C0|C1, "simd_comp_inst_retired.packed_single" }, \
1005 1005 { 0xCA, 0x2, C0|C1, "simd_comp_inst_retired.scalar_single" }, \
1006 1006 { 0xCA, 0x4, C0|C1, "simd_comp_inst_retired.packed_double" }, \
1007 1007 { 0xCA, 0x8, C0|C1, "simd_comp_inst_retired.scalar_double" }, \
1008 1008 { 0xCB, 0x1, C0|C1, "mem_load_retired.l2_hit" }, \
1009 1009 { 0xCB, 0x2, C0|C1, "mem_load_retired.l2_miss" }, \
1010 1010 { 0xCB, 0x4, C0|C1, "mem_load_retired.dtlb_miss" }, \
1011 1011 { 0xCD, 0x0, C0|C1, "simd_assist" }, \
1012 1012 { 0xCE, 0x0, C0|C1, "simd_instr_retired" }, \
1013 1013 { 0xCF, 0x0, C0|C1, "simd_sat_instr_retired" }, \
1014 1014 { 0xE0, 0x1, C0|C1, "br_inst_decoded" }, \
1015 1015 { 0xE4, 0x1, C0|C1, "bogus_br" }, \
1016 1016 { 0xE6, 0x1, C0|C1, "baclears.any" }
1017 1017
1018 1018 static const struct events_table_t *events_table = NULL;
1019 1019
1020 1020 const struct events_table_t events_fam6_nhm[] = {
1021 1021 GENERICEVENTS_FAM6_NHM,
1022 1022 EVENTS_FAM6_NHM,
1023 1023 { NT_END, 0, 0, "" }
1024 1024 };
1025 1025
1026 1026 const struct events_table_t events_fam6_mod28[] = {
1027 1027 GENERICEVENTS_FAM6_MOD28,
1028 1028 EVENTS_FAM6_MOD28,
1029 1029 { NT_END, 0, 0, "" }
1030 1030 };
1031 1031
1032 1032 /*
1033 1033 * Initialize string containing list of supported general-purpose counter
1034 1034 * events for processors of Penryn and Merom Family
1035 1035 */
1036 1036 static void
1037 1037 pcbe_init_core_uarch()
1038 1038 {
1039 1039 const struct nametable_core_uarch *n;
1040 1040 const struct generic_events *k;
1041 1041 const struct nametable_core_uarch *picspecific_events;
1042 1042 const struct generic_events *picspecific_genericevents;
1043 1043 size_t common_size;
1044 1044 size_t size;
1045 1045 uint64_t i;
1046 1046
1047 1047 gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
1048 1048
1049 1049 /* Calculate space needed to save all the common event names */
1050 1050 common_size = 0;
1051 1051 for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END; n++) {
1052 1052 common_size += strlen(n->name) + 1;
1053 1053 }
1054 1054
1055 1055 for (k = cmn_generic_events; k->event_num != NT_END; k++) {
1056 1056 common_size += strlen(k->name) + 1;
1057 1057 }
1058 1058
1059 1059 for (i = 0; i < num_gpc; i++) {
1060 1060 size = 0;
1061 1061 picspecific_genericevents = NULL;
1062 1062
1063 1063 switch (i) {
1064 1064 case 0:
1065 1065 picspecific_events = pic0_events;
1066 1066 picspecific_genericevents = generic_events_pic0;
1067 1067 break;
1068 1068 case 1:
1069 1069 picspecific_events = pic1_events;
1070 1070 break;
1071 1071 default:
1072 1072 picspecific_events = NULL;
1073 1073 break;
1074 1074 }
1075 1075 if (picspecific_events != NULL) {
1076 1076 for (n = picspecific_events;
1077 1077 n->event_num != NT_END;
1078 1078 n++) {
1079 1079 size += strlen(n->name) + 1;
1080 1080 }
1081 1081 }
1082 1082 if (picspecific_genericevents != NULL) {
1083 1083 for (k = picspecific_genericevents;
1084 1084 k->event_num != NT_END; k++) {
1085 1085 size += strlen(k->name) + 1;
1086 1086 }
1087 1087 }
1088 1088
1089 1089 gpc_names[i] =
1090 1090 kmem_alloc(size + common_size + 1, KM_SLEEP);
1091 1091
1092 1092 gpc_names[i][0] = '\0';
1093 1093 if (picspecific_events != NULL) {
1094 1094 for (n = picspecific_events;
1095 1095 n->event_num != NT_END; n++) {
1096 1096 (void) strcat(gpc_names[i], n->name);
1097 1097 (void) strcat(gpc_names[i], ",");
1098 1098 }
1099 1099 }
1100 1100 if (picspecific_genericevents != NULL) {
1101 1101 for (k = picspecific_genericevents;
1102 1102 k->event_num != NT_END; k++) {
1103 1103 (void) strcat(gpc_names[i], k->name);
1104 1104 (void) strcat(gpc_names[i], ",");
1105 1105 }
1106 1106 }
1107 1107 for (n = cmn_gpc_events_core_uarch; n->event_num != NT_END;
1108 1108 n++) {
1109 1109 (void) strcat(gpc_names[i], n->name);
1110 1110 (void) strcat(gpc_names[i], ",");
1111 1111 }
1112 1112 for (k = cmn_generic_events; k->event_num != NT_END; k++) {
1113 1113 (void) strcat(gpc_names[i], k->name);
1114 1114 (void) strcat(gpc_names[i], ",");
1115 1115 }
1116 1116
1117 1117 /*
1118 1118 * Remove trailing comma.
1119 1119 */
1120 1120 gpc_names[i][common_size + size - 1] = '\0';
1121 1121 }
1122 1122 }
1123 1123
1124 1124 static int
1125 1125 core_pcbe_init(void)
1126 1126 {
1127 1127 struct cpuid_regs cp;
1128 1128 size_t size;
1129 1129 uint64_t i;
1130 1130 uint64_t j;
1131 1131 uint64_t arch_events_vector_length;
1132 1132 size_t arch_events_string_length;
1133 1133 uint_t model;
1134 1134
1135 1135 if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
1136 1136 return (-1);
1137 1137
1138 1138 /* Obtain Basic CPUID information */
1139 1139 cp.cp_eax = 0x0;
1140 1140 (void) __cpuid_insn(&cp);
1141 1141
1142 1142 /* No Architectural Performance Monitoring Leaf returned by CPUID */
1143 1143 if (cp.cp_eax < 0xa) {
1144 1144 return (-1);
1145 1145 }
1146 1146
1147 1147 /* Obtain the Architectural Performance Monitoring Leaf */
1148 1148 cp.cp_eax = 0xa;
1149 1149 (void) __cpuid_insn(&cp);
1150 1150
1151 1151 versionid = cp.cp_eax & 0xFF;
1152 1152
1153 1153 /*
1154 1154 * Fixed-Function Counters (FFC)
1155 1155 *
1156 1156 * All Family 6 Model 15 and Model 23 processors have fixed-function
1157 1157 * counters. These counters were made Architectural with
1158 1158 * Family 6 Model 15 Stepping 9.
1159 1159 */
1160 1160 switch (versionid) {
1161 1161
1162 1162 case 0:
1163 1163 return (-1);
1164 1164
1165 1165 case 2:
1166 1166 num_ffc = cp.cp_edx & 0x1F;
1167 1167 width_ffc = (cp.cp_edx >> 5) & 0xFF;
1168 1168
1169 1169 /*
1170 1170 * Some processors have an errata (AW34) where
1171 1171 * versionid is reported as 2 when actually 1.
1172 1172 * In this case, fixed-function counters are
1173 1173 * model-specific as in Version 1.
1174 1174 */
1175 1175 if (num_ffc != 0) {
1176 1176 break;
1177 1177 }
1178 1178 /* FALLTHROUGH */
1179 1179 case 1:
1180 1180 num_ffc = 3;
1181 1181 width_ffc = 40;
1182 1182 versionid = 1;
1183 1183 break;
1184 1184
1185 1185 default:
1186 1186 num_ffc = cp.cp_edx & 0x1F;
1187 1187 width_ffc = (cp.cp_edx >> 5) & 0xFF;
1188 1188 break;
1189 1189 }
1190 1190
1191 1191
1192 1192 if (num_ffc >= 64)
1193 1193 return (-1);
1194 1194
1195 1195 /* Set HTT-specific names of architectural & FFC events */
1196 1196 if (is_x86_feature(x86_featureset, X86FSET_HTT)) {
1197 1197 ffc_names = ffc_names_htt;
1198 1198 arch_events_table = arch_events_table_htt;
1199 1199 known_arch_events =
1200 1200 sizeof (arch_events_table_htt) /
1201 1201 sizeof (struct events_table_t);
1202 1202 known_ffc_num =
1203 1203 sizeof (ffc_names_htt) / sizeof (char *);
1204 1204 } else {
1205 1205 ffc_names = ffc_names_non_htt;
1206 1206 arch_events_table = arch_events_table_non_htt;
1207 1207 known_arch_events =
1208 1208 sizeof (arch_events_table_non_htt) /
1209 1209 sizeof (struct events_table_t);
1210 1210 known_ffc_num =
1211 1211 sizeof (ffc_names_non_htt) / sizeof (char *);
1212 1212 }
1213 1213
1214 1214 if (num_ffc >= known_ffc_num) {
1215 1215 /*
1216 1216 * The system seems to have more fixed-function counters than
1217 1217 * what this PCBE is able to handle correctly. Default to the
1218 1218 * maximum number of fixed-function counters that this driver
1219 1219 * is aware of.
1220 1220 */
1221 1221 num_ffc = known_ffc_num - 1;
1222 1222 }
1223 1223
1224 1224 mask_ffc = BITMASK_XBITS(width_ffc);
1225 1225 control_ffc = BITMASK_XBITS(num_ffc);
1226 1226
1227 1227 /*
1228 1228 * General Purpose Counters (GPC)
1229 1229 */
1230 1230 num_gpc = (cp.cp_eax >> 8) & 0xFF;
1231 1231 width_gpc = (cp.cp_eax >> 16) & 0xFF;
1232 1232
1233 1233 if (num_gpc >= 64)
1234 1234 return (-1);
1235 1235
1236 1236 mask_gpc = BITMASK_XBITS(width_gpc);
1237 1237
1238 1238 control_gpc = BITMASK_XBITS(num_gpc);
1239 1239
1240 1240 control_mask = (control_ffc << 32) | control_gpc;
1241 1241
1242 1242 total_pmc = num_gpc + num_ffc;
1243 1243 if (total_pmc > 64) {
1244 1244 /* Too wide for the overflow bitmap */
1245 1245 return (-1);
1246 1246 }
1247 1247
1248 1248 /* FFC names */
1249 1249 ffc_allnames = kmem_alloc(num_ffc * sizeof (char *), KM_SLEEP);
1250 1250 for (i = 0; i < num_ffc; i++) {
1251 1251 ffc_allnames[i] = kmem_alloc(
1252 1252 strlen(ffc_names[i]) + strlen(ffc_genericnames[i]) + 2,
1253 1253 KM_SLEEP);
1254 1254
1255 1255 ffc_allnames[i][0] = '\0';
1256 1256 (void) strcat(ffc_allnames[i], ffc_names[i]);
1257 1257
1258 1258 /* Check if this ffc has a generic name */
1259 1259 if (strcmp(ffc_genericnames[i], "") != 0) {
1260 1260 (void) strcat(ffc_allnames[i], ",");
1261 1261 (void) strcat(ffc_allnames[i], ffc_genericnames[i]);
1262 1262 }
1263 1263 }
1264 1264
1265 1265 /* GPC events for Family 6 Models 15, 23 and 29 only */
1266 1266 if ((cpuid_getfamily(CPU) == 6) &&
1267 1267 ((cpuid_getmodel(CPU) == 15) || (cpuid_getmodel(CPU) == 23) ||
1268 1268 (cpuid_getmodel(CPU) == 29))) {
1269 1269 (void) snprintf(core_impl_name, IMPL_NAME_LEN,
1270 1270 "Core Microarchitecture");
1271 1271 pcbe_init_core_uarch();
1272 1272 return (0);
1273 1273 }
1274 1274
1275 1275 (void) snprintf(core_impl_name, IMPL_NAME_LEN,
1276 1276 "Intel Arch PerfMon v%d on Family %d Model %d",
1277 1277 versionid, cpuid_getfamily(CPU), cpuid_getmodel(CPU));
1278 1278
1279 1279 /*
1280 1280 * Architectural events
1281 1281 */
1282 1282 arch_events_vector_length = (cp.cp_eax >> 24) & 0xFF;
1283 1283
1284 1284 ASSERT(known_arch_events == arch_events_vector_length);
1285 1285
1286 1286 /*
1287 1287 * To handle the case where a new performance monitoring setup is run
1288 1288 * on a non-debug kernel
1289 1289 */
1290 1290 if (known_arch_events > arch_events_vector_length) {
1291 1291 known_arch_events = arch_events_vector_length;
1292 1292 } else {
1293 1293 arch_events_vector_length = known_arch_events;
1294 1294 }
1295 1295
1296 1296 arch_events_vector = cp.cp_ebx &
1297 1297 BITMASK_XBITS(arch_events_vector_length);
1298 1298
1299 1299 /*
1300 1300 * Process architectural and non-architectural events using GPC
1301 1301 */
1302 1302 if (num_gpc > 0) {
1303 1303
1304 1304 gpc_names = kmem_alloc(num_gpc * sizeof (char *), KM_SLEEP);
1305 1305
1306 1306 /* Calculate space required for the architectural gpc events */
1307 1307 arch_events_string_length = 0;
1308 1308 for (i = 0; i < known_arch_events; i++) {
1309 1309 if (((1U << i) & arch_events_vector) == 0) {
1310 1310 arch_events_string_length +=
1311 1311 strlen(arch_events_table[i].name) + 1;
1312 1312 if (strcmp(arch_genevents_table[i], "") != 0) {
1313 1313 arch_events_string_length +=
1314 1314 strlen(arch_genevents_table[i]) + 1;
1315 1315 }
1316 1316 }
1317 1317 }
1318 1318
1319 1319 /* Non-architectural events list */
1320 1320 model = cpuid_getmodel(CPU);
1321 1321 switch (model) {
1322 1322 /* Nehalem */
1323 1323 case 26:
1324 1324 case 30:
1325 1325 case 31:
1326 1326 /* Westmere */
1327 1327 case 37:
1328 1328 case 44:
1329 1329 /* Nehalem-EX */
1330 1330 case 46:
1331 1331 case 47:
1332 1332 events_table = events_fam6_nhm;
1333 1333 break;
1334 1334 case 28:
1335 1335 events_table = events_fam6_mod28;
1336 1336 break;
1337 1337 }
1338 1338
1339 1339 for (i = 0; i < num_gpc; i++) {
1340 1340
1341 1341 /*
1342 1342 * Determine length of all supported event names
1343 1343 * (architectural + non-architectural)
1344 1344 */
1345 1345 size = arch_events_string_length;
1346 1346 for (j = 0; events_table != NULL &&
1347 1347 events_table[j].eventselect != NT_END;
1348 1348 j++) {
1349 1349 if (C(i) & events_table[j].supported_counters) {
1350 1350 size += strlen(events_table[j].name) +
1351 1351 1;
1352 1352 }
1353 1353 }
1354 1354
1355 1355 /* Allocate memory for this pics list */
1356 1356 gpc_names[i] = kmem_alloc(size + 1, KM_SLEEP);
1357 1357 gpc_names[i][0] = '\0';
1358 1358 if (size == 0) {
1359 1359 continue;
1360 1360 }
1361 1361
1362 1362 /*
1363 1363 * Create the list of all supported events
1364 1364 * (architectural + non-architectural)
1365 1365 */
1366 1366 for (j = 0; j < known_arch_events; j++) {
1367 1367 if (((1U << j) & arch_events_vector) == 0) {
1368 1368 (void) strcat(gpc_names[i],
1369 1369 arch_events_table[j].name);
1370 1370 (void) strcat(gpc_names[i], ",");
1371 1371 if (strcmp(
1372 1372 arch_genevents_table[j], "")
1373 1373 != 0) {
1374 1374 (void) strcat(gpc_names[i],
1375 1375 arch_genevents_table[j]);
1376 1376 (void) strcat(gpc_names[i],
1377 1377 ",");
1378 1378 }
1379 1379 }
1380 1380 }
1381 1381
1382 1382 for (j = 0; events_table != NULL &&
1383 1383 events_table[j].eventselect != NT_END;
1384 1384 j++) {
1385 1385 if (C(i) & events_table[j].supported_counters) {
1386 1386 (void) strcat(gpc_names[i],
1387 1387 events_table[j].name);
1388 1388 (void) strcat(gpc_names[i], ",");
1389 1389 }
1390 1390 }
1391 1391
1392 1392 /* Remove trailing comma */
1393 1393 gpc_names[i][size - 1] = '\0';
1394 1394 }
1395 1395 }
1396 1396
1397 1397 return (0);
1398 1398 }
1399 1399
1400 1400 static uint_t core_pcbe_ncounters()
1401 1401 {
1402 1402 return (total_pmc);
1403 1403 }
1404 1404
1405 1405 static const char *core_pcbe_impl_name(void)
1406 1406 {
1407 1407 return (core_impl_name);
1408 1408 }
1409 1409
1410 1410 static const char *core_pcbe_cpuref(void)
1411 1411 {
1412 1412 return (core_cpuref);
1413 1413 }
1414 1414
1415 1415 static char *core_pcbe_list_events(uint_t picnum)
1416 1416 {
1417 1417 ASSERT(picnum < cpc_ncounters);
1418 1418
1419 1419 if (picnum < num_gpc) {
1420 1420 return (gpc_names[picnum]);
1421 1421 } else {
1422 1422 return (ffc_allnames[picnum - num_gpc]);
1423 1423 }
1424 1424 }
1425 1425
1426 1426 static char *core_pcbe_list_attrs(void)
1427 1427 {
1428 1428 if (versionid >= 3) {
1429 1429 return ("edge,inv,umask,cmask,anythr");
1430 1430 } else {
1431 1431 return ("edge,pc,inv,umask,cmask");
1432 1432 }
1433 1433 }
1434 1434
1435 1435 static const struct nametable_core_uarch *
1436 1436 find_gpcevent_core_uarch(char *name,
1437 1437 const struct nametable_core_uarch *nametable)
1438 1438 {
1439 1439 const struct nametable_core_uarch *n;
1440 1440 int compare_result = -1;
1441 1441
1442 1442 for (n = nametable; n->event_num != NT_END; n++) {
1443 1443 compare_result = strcmp(name, n->name);
1444 1444 if (compare_result <= 0) {
1445 1445 break;
1446 1446 }
1447 1447 }
1448 1448
1449 1449 if (compare_result == 0) {
1450 1450 return (n);
1451 1451 }
1452 1452
1453 1453 return (NULL);
1454 1454 }
1455 1455
1456 1456 static const struct generic_events *
1457 1457 find_generic_events(char *name, const struct generic_events *table)
1458 1458 {
1459 1459 const struct generic_events *n;
1460 1460
1461 1461 for (n = table; n->event_num != NT_END; n++) {
1462 1462 if (strcmp(name, n->name) == 0) {
1463 1463 return (n);
1464 1464 };
1465 1465 }
1466 1466
1467 1467 return (NULL);
1468 1468 }
1469 1469
1470 1470 static const struct events_table_t *
1471 1471 find_gpcevent(char *name)
1472 1472 {
1473 1473 int i;
1474 1474
1475 1475 /* Search architectural events */
1476 1476 for (i = 0; i < known_arch_events; i++) {
1477 1477 if (strcmp(name, arch_events_table[i].name) == 0 ||
1478 1478 strcmp(name, arch_genevents_table[i]) == 0) {
1479 1479 if (((1U << i) & arch_events_vector) == 0) {
1480 1480 return (&arch_events_table[i]);
1481 1481 }
1482 1482 }
1483 1483 }
1484 1484
1485 1485 /* Search non-architectural events */
1486 1486 if (events_table != NULL) {
1487 1487 for (i = 0; events_table[i].eventselect != NT_END; i++) {
1488 1488 if (strcmp(name, events_table[i].name) == 0) {
1489 1489 return (&events_table[i]);
1490 1490 }
1491 1491 }
1492 1492 }
1493 1493
1494 1494 return (NULL);
1495 1495 }
1496 1496
1497 1497 static uint64_t
1498 1498 core_pcbe_event_coverage(char *event)
1499 1499 {
1500 1500 uint64_t bitmap;
1501 1501 uint64_t bitmask;
1502 1502 const struct events_table_t *n;
1503 1503 int i;
1504 1504
1505 1505 bitmap = 0;
1506 1506
1507 1507 /* Is it an event that a GPC can track? */
1508 1508 if (versionid >= 3) {
1509 1509 n = find_gpcevent(event);
1510 1510 if (n != NULL) {
1511 1511 bitmap |= (n->supported_counters &
1512 1512 BITMASK_XBITS(num_gpc));
1513 1513 }
1514 1514 } else {
1515 1515 if (find_generic_events(event, cmn_generic_events) != NULL) {
1516 1516 bitmap |= BITMASK_XBITS(num_gpc);
1517 1517 } if (find_generic_events(event, generic_events_pic0) != NULL) {
1518 1518 bitmap |= 1ULL;
1519 1519 } else if (find_gpcevent_core_uarch(event,
1520 1520 cmn_gpc_events_core_uarch) != NULL) {
1521 1521 bitmap |= BITMASK_XBITS(num_gpc);
1522 1522 } else if (find_gpcevent_core_uarch(event, pic0_events) !=
1523 1523 NULL) {
1524 1524 bitmap |= 1ULL;
1525 1525 } else if (find_gpcevent_core_uarch(event, pic1_events) !=
1526 1526 NULL) {
1527 1527 bitmap |= 1ULL << 1;
1528 1528 }
1529 1529 }
1530 1530
1531 1531 /* Check if the event can be counted in the fixed-function counters */
1532 1532 if (num_ffc > 0) {
1533 1533 bitmask = 1ULL << num_gpc;
1534 1534 for (i = 0; i < num_ffc; i++) {
1535 1535 if (strcmp(event, ffc_names[i]) == 0) {
1536 1536 bitmap |= bitmask;
1537 1537 } else if (strcmp(event, ffc_genericnames[i]) == 0) {
1538 1538 bitmap |= bitmask;
1539 1539 }
1540 1540 bitmask = bitmask << 1;
1541 1541 }
1542 1542 }
1543 1543
1544 1544 return (bitmap);
1545 1545 }
1546 1546
1547 1547 static uint64_t
1548 1548 core_pcbe_overflow_bitmap(void)
1549 1549 {
1550 1550 uint64_t interrupt_status;
1551 1551 uint64_t intrbits_ffc;
1552 1552 uint64_t intrbits_gpc;
1553 1553 extern int kcpc_hw_overflow_intr_installed;
1554 1554 uint64_t overflow_bitmap;
1555 1555
1556 1556 RDMSR(PERF_GLOBAL_STATUS, interrupt_status);
1557 1557 WRMSR(PERF_GLOBAL_OVF_CTRL, interrupt_status);
1558 1558
1559 1559 interrupt_status = interrupt_status & control_mask;
1560 1560 intrbits_ffc = (interrupt_status >> 32) & control_ffc;
1561 1561 intrbits_gpc = interrupt_status & control_gpc;
1562 1562 overflow_bitmap = (intrbits_ffc << num_gpc) | intrbits_gpc;
1563 1563
1564 1564 ASSERT(kcpc_hw_overflow_intr_installed);
1565 1565 (*kcpc_hw_enable_cpc_intr)();
1566 1566
1567 1567 return (overflow_bitmap);
1568 1568 }
1569 1569
1570 1570 static int
1571 1571 check_cpc_securitypolicy(core_pcbe_config_t *conf,
1572 1572 const struct nametable_core_uarch *n)
1573 1573 {
1574 1574 if (conf->core_ctl & n->restricted_bits) {
1575 1575 if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1576 1576 return (CPC_ATTR_REQUIRES_PRIVILEGE);
1577 1577 }
1578 1578 }
1579 1579 return (0);
1580 1580 }
1581 1581
1582 1582 static int
1583 1583 configure_gpc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1584 1584 uint_t nattrs, kcpc_attr_t *attrs, void **data)
1585 1585 {
1586 1586 core_pcbe_config_t conf;
1587 1587 const struct nametable_core_uarch *n;
1588 1588 const struct generic_events *k = NULL;
1589 1589 const struct nametable_core_uarch *m;
1590 1590 const struct nametable_core_uarch *picspecific_events;
1591 1591 struct nametable_core_uarch nt_raw = { "", 0x0, 0x0 };
1592 1592 uint_t i;
1593 1593 long event_num;
1594 1594 const struct events_table_t *eventcode;
1595 1595
1596 1596 if (((preset & BITS_EXTENDED_FROM_31) != 0) &&
1597 1597 ((preset & BITS_EXTENDED_FROM_31) !=
1598 1598 BITS_EXTENDED_FROM_31)) {
1599 1599
1600 1600 /*
1601 1601 * Bits beyond bit-31 in the general-purpose counters can only
1602 1602 * be written to by extension of bit 31. We cannot preset
1603 1603 * these bits to any value other than all 1s or all 0s.
1604 1604 */
1605 1605 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1606 1606 }
1607 1607
1608 1608 if (versionid >= 3) {
1609 1609 eventcode = find_gpcevent(event);
1610 1610 if (eventcode != NULL) {
1611 1611 if ((C(picnum) & eventcode->supported_counters) == 0) {
1612 1612 return (CPC_PIC_NOT_CAPABLE);
1613 1613 }
1614 1614 if (nattrs > 0 &&
1615 1615 (strncmp("PAPI_", event, 5) == 0)) {
1616 1616 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1617 1617 }
1618 1618 conf.core_ctl = eventcode->eventselect;
1619 1619 conf.core_ctl |= eventcode->unitmask <<
1620 1620 CORE_UMASK_SHIFT;
1621 1621 } else {
1622 1622 /* Event specified as raw event code */
1623 1623 if (ddi_strtol(event, NULL, 0, &event_num) != 0) {
1624 1624 return (CPC_INVALID_EVENT);
1625 1625 }
1626 1626 conf.core_ctl = event_num & 0xFF;
1627 1627 }
1628 1628 } else {
1629 1629 if ((k = find_generic_events(event, cmn_generic_events)) !=
1630 1630 NULL ||
1631 1631 (picnum == 0 &&
1632 1632 (k = find_generic_events(event, generic_events_pic0)) !=
1633 1633 NULL)) {
1634 1634 if (nattrs > 0) {
1635 1635 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1636 1636 }
1637 1637 conf.core_ctl = k->event_num;
1638 1638 conf.core_ctl |= k->umask << CORE_UMASK_SHIFT;
1639 1639 } else {
1640 1640 /* Not a generic event */
1641 1641
1642 1642 n = find_gpcevent_core_uarch(event,
1643 1643 cmn_gpc_events_core_uarch);
1644 1644 if (n == NULL) {
1645 1645 switch (picnum) {
1646 1646 case 0:
1647 1647 picspecific_events =
1648 1648 pic0_events;
1649 1649 break;
1650 1650 case 1:
1651 1651 picspecific_events =
1652 1652 pic1_events;
1653 1653 break;
1654 1654 default:
1655 1655 picspecific_events = NULL;
1656 1656 break;
1657 1657 }
1658 1658 if (picspecific_events != NULL) {
1659 1659 n = find_gpcevent_core_uarch(event,
1660 1660 picspecific_events);
1661 1661 }
1662 1662 }
1663 1663 if (n == NULL) {
1664 1664
1665 1665 /*
1666 1666 * Check if this is a case where the event was
1667 1667 * specified directly by its event number
1668 1668 * instead of its name string.
1669 1669 */
1670 1670 if (ddi_strtol(event, NULL, 0, &event_num) !=
1671 1671 0) {
1672 1672 return (CPC_INVALID_EVENT);
1673 1673 }
1674 1674
1675 1675 event_num = event_num & 0xFF;
1676 1676
1677 1677 /*
1678 1678 * Search the event table to find out if the
1679 1679 * event specified has an privilege
1680 1680 * requirements. Currently none of the
1681 1681 * pic-specific counters have any privilege
1682 1682 * requirements. Hence only the table
1683 1683 * cmn_gpc_events_core_uarch is searched.
1684 1684 */
1685 1685 for (m = cmn_gpc_events_core_uarch;
1686 1686 m->event_num != NT_END;
1687 1687 m++) {
1688 1688 if (event_num == m->event_num) {
1689 1689 break;
1690 1690 }
1691 1691 }
1692 1692 if (m->event_num == NT_END) {
1693 1693 nt_raw.event_num = (uint8_t)event_num;
1694 1694 n = &nt_raw;
1695 1695 } else {
1696 1696 n = m;
1697 1697 }
1698 1698 }
1699 1699 conf.core_ctl = n->event_num; /* Event Select */
1700 1700 }
1701 1701 }
1702 1702
1703 1703
1704 1704 conf.core_picno = picnum;
1705 1705 conf.core_pictype = CORE_GPC;
1706 1706 conf.core_rawpic = preset & mask_gpc;
1707 1707
1708 1708 conf.core_pes = GPC_BASE_PES + picnum;
1709 1709 conf.core_pmc = GPC_BASE_PMC + picnum;
1710 1710
1711 1711 for (i = 0; i < nattrs; i++) {
1712 1712 if (strncmp(attrs[i].ka_name, "umask", 6) == 0) {
1713 1713 if ((attrs[i].ka_val | CORE_UMASK_MASK) !=
1714 1714 CORE_UMASK_MASK) {
1715 1715 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1716 1716 }
1717 1717 /* Clear out the default umask */
1718 1718 conf.core_ctl &= ~ (CORE_UMASK_MASK <<
1719 1719 CORE_UMASK_SHIFT);
1720 1720 /* Use the user provided umask */
1721 1721 conf.core_ctl |= attrs[i].ka_val <<
1722 1722 CORE_UMASK_SHIFT;
1723 1723 } else if (strncmp(attrs[i].ka_name, "edge", 6) == 0) {
1724 1724 if (attrs[i].ka_val != 0)
1725 1725 conf.core_ctl |= CORE_EDGE;
1726 1726 } else if (strncmp(attrs[i].ka_name, "inv", 4) == 0) {
1727 1727 if (attrs[i].ka_val != 0)
1728 1728 conf.core_ctl |= CORE_INV;
1729 1729 } else if (strncmp(attrs[i].ka_name, "cmask", 6) == 0) {
1730 1730 if ((attrs[i].ka_val | CORE_CMASK_MASK) !=
1731 1731 CORE_CMASK_MASK) {
1732 1732 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
1733 1733 }
1734 1734 conf.core_ctl |= attrs[i].ka_val <<
1735 1735 CORE_CMASK_SHIFT;
1736 1736 } else if (strncmp(attrs[i].ka_name, "anythr", 7) ==
1737 1737 0) {
1738 1738 if (versionid < 3)
1739 1739 return (CPC_INVALID_ATTRIBUTE);
1740 1740 if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1741 1741 return (CPC_ATTR_REQUIRES_PRIVILEGE);
1742 1742 }
1743 1743 if (attrs[i].ka_val != 0)
1744 1744 conf.core_ctl |= CORE_ANYTHR;
1745 1745 } else {
1746 1746 return (CPC_INVALID_ATTRIBUTE);
1747 1747 }
1748 1748 }
1749 1749
1750 1750 if (flags & CPC_COUNT_USER)
1751 1751 conf.core_ctl |= CORE_USR;
1752 1752 if (flags & CPC_COUNT_SYSTEM)
1753 1753 conf.core_ctl |= CORE_OS;
1754 1754 if (flags & CPC_OVF_NOTIFY_EMT)
1755 1755 conf.core_ctl |= CORE_INT;
1756 1756 conf.core_ctl |= CORE_EN;
1757 1757
1758 1758 if (versionid < 3 && k == NULL) {
1759 1759 if (check_cpc_securitypolicy(&conf, n) != 0) {
1760 1760 return (CPC_ATTR_REQUIRES_PRIVILEGE);
1761 1761 }
1762 1762 }
1763 1763
1764 1764 *data = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1765 1765 *((core_pcbe_config_t *)*data) = conf;
1766 1766
1767 1767 return (0);
1768 1768 }
1769 1769
1770 1770 static int
1771 1771 configure_ffc(uint_t picnum, char *event, uint64_t preset, uint32_t flags,
1772 1772 uint_t nattrs, kcpc_attr_t *attrs, void **data)
1773 1773 {
1774 1774 core_pcbe_config_t *conf;
1775 1775 uint_t i;
1776 1776
1777 1777 if (picnum - num_gpc >= num_ffc) {
1778 1778 return (CPC_INVALID_PICNUM);
1779 1779 }
1780 1780
1781 1781 if ((strcmp(ffc_names[picnum-num_gpc], event) != 0) &&
1782 1782 (strcmp(ffc_genericnames[picnum-num_gpc], event) != 0)) {
1783 1783 return (CPC_INVALID_EVENT);
1784 1784 }
1785 1785
1786 1786 if ((versionid < 3) && (nattrs != 0)) {
1787 1787 return (CPC_INVALID_ATTRIBUTE);
1788 1788 }
1789 1789
1790 1790 conf = kmem_alloc(sizeof (core_pcbe_config_t), KM_SLEEP);
1791 1791 conf->core_ctl = 0;
1792 1792
1793 1793 for (i = 0; i < nattrs; i++) {
1794 1794 if (strncmp(attrs[i].ka_name, "anythr", 7) == 0) {
1795 1795 if (secpolicy_cpc_cpu(crgetcred()) != 0) {
1796 1796 kmem_free(conf, sizeof (core_pcbe_config_t));
1797 1797 return (CPC_ATTR_REQUIRES_PRIVILEGE);
1798 1798 }
1799 1799 if (attrs[i].ka_val != 0) {
1800 1800 conf->core_ctl |= CORE_FFC_ANYTHR;
1801 1801 }
1802 1802 } else {
1803 1803 kmem_free(conf, sizeof (core_pcbe_config_t));
1804 1804 return (CPC_INVALID_ATTRIBUTE);
1805 1805 }
1806 1806 }
1807 1807
1808 1808 conf->core_picno = picnum;
1809 1809 conf->core_pictype = CORE_FFC;
1810 1810 conf->core_rawpic = preset & mask_ffc;
1811 1811 conf->core_pmc = FFC_BASE_PMC + (picnum - num_gpc);
1812 1812
1813 1813 /* All fixed-function counters have the same control register */
1814 1814 conf->core_pes = PERF_FIXED_CTR_CTRL;
1815 1815
1816 1816 if (flags & CPC_COUNT_USER)
1817 1817 conf->core_ctl |= CORE_FFC_USR_EN;
1818 1818 if (flags & CPC_COUNT_SYSTEM)
1819 1819 conf->core_ctl |= CORE_FFC_OS_EN;
1820 1820 if (flags & CPC_OVF_NOTIFY_EMT)
1821 1821 conf->core_ctl |= CORE_FFC_PMI;
1822 1822
1823 1823 *data = conf;
1824 1824 return (0);
1825 1825 }
1826 1826
1827 1827 /*ARGSUSED*/
1828 1828 static int
1829 1829 core_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
1830 1830 uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
1831 1831 void *token)
1832 1832 {
1833 1833 int ret;
1834 1834 core_pcbe_config_t *conf;
1835 1835
1836 1836 /*
1837 1837 * If we've been handed an existing configuration, we need only preset
1838 1838 * the counter value.
1839 1839 */
1840 1840 if (*data != NULL) {
1841 1841 conf = *data;
1842 1842 ASSERT(conf->core_pictype == CORE_GPC ||
1843 1843 conf->core_pictype == CORE_FFC);
1844 1844 if (conf->core_pictype == CORE_GPC)
1845 1845 conf->core_rawpic = preset & mask_gpc;
1846 1846 else /* CORE_FFC */
1847 1847 conf->core_rawpic = preset & mask_ffc;
1848 1848 return (0);
1849 1849 }
1850 1850
1851 1851 if (picnum >= total_pmc) {
1852 1852 return (CPC_INVALID_PICNUM);
1853 1853 }
1854 1854
1855 1855 if (picnum < num_gpc) {
1856 1856 ret = configure_gpc(picnum, event, preset, flags,
1857 1857 nattrs, attrs, data);
1858 1858 } else {
1859 1859 ret = configure_ffc(picnum, event, preset, flags,
1860 1860 nattrs, attrs, data);
1861 1861 }
1862 1862 return (ret);
1863 1863 }
1864 1864
1865 1865 static void
1866 1866 core_pcbe_program(void *token)
1867 1867 {
1868 1868 core_pcbe_config_t *cfg;
1869 1869 uint64_t perf_global_ctrl;
1870 1870 uint64_t perf_fixed_ctr_ctrl;
1871 1871 uint64_t curcr4;
1872 1872
1873 1873 core_pcbe_allstop();
1874 1874
1875 1875 curcr4 = getcr4();
1876 1876 if (kcpc_allow_nonpriv(token))
1877 1877 /* Allow RDPMC at any ring level */
1878 1878 setcr4(curcr4 | CR4_PCE);
1879 1879 else
1880 1880 /* Allow RDPMC only at ring 0 */
1881 1881 setcr4(curcr4 & ~CR4_PCE);
1882 1882
1883 1883 /* Clear any overflow indicators before programming the counters */
1884 1884 WRMSR(PERF_GLOBAL_OVF_CTRL, MASK_CONDCHGD_OVFBUFFER | control_mask);
1885 1885
1886 1886 cfg = NULL;
1887 1887 perf_global_ctrl = 0;
1888 1888 perf_fixed_ctr_ctrl = 0;
1889 1889 cfg = (core_pcbe_config_t *)kcpc_next_config(token, cfg, NULL);
1890 1890 while (cfg != NULL) {
1891 1891 ASSERT(cfg->core_pictype == CORE_GPC ||
1892 1892 cfg->core_pictype == CORE_FFC);
1893 1893
1894 1894 if (cfg->core_pictype == CORE_GPC) {
1895 1895 /*
1896 1896 * General-purpose counter registers have write
1897 1897 * restrictions where only the lower 32-bits can be
1898 1898 * written to. The rest of the relevant bits are
1899 1899 * written to by extension from bit 31 (all ZEROS if
1900 1900 * bit-31 is ZERO and all ONE if bit-31 is ONE). This
1901 1901 * makes it possible to write to the counter register
1902 1902 * only values that have all ONEs or all ZEROs in the
1903 1903 * higher bits.
1904 1904 */
1905 1905 if (((cfg->core_rawpic & BITS_EXTENDED_FROM_31) == 0) ||
1906 1906 ((cfg->core_rawpic & BITS_EXTENDED_FROM_31) ==
1907 1907 BITS_EXTENDED_FROM_31)) {
1908 1908 /*
1909 1909 * Straighforward case where the higher bits
1910 1910 * are all ZEROs or all ONEs.
1911 1911 */
1912 1912 WRMSR(cfg->core_pmc,
1913 1913 (cfg->core_rawpic & mask_gpc));
1914 1914 } else {
1915 1915 /*
1916 1916 * The high order bits are not all the same.
1917 1917 * We save what is currently in the registers
1918 1918 * and do not write to it. When we want to do
1919 1919 * a read from this register later (in
1920 1920 * core_pcbe_sample()), we subtract the value
1921 1921 * we save here to get the actual event count.
1922 1922 *
1923 1923 * NOTE: As a result, we will not get overflow
1924 1924 * interrupts as expected.
1925 1925 */
1926 1926 RDMSR(cfg->core_pmc, cfg->core_rawpic);
1927 1927 cfg->core_rawpic = cfg->core_rawpic & mask_gpc;
1928 1928 }
1929 1929 WRMSR(cfg->core_pes, cfg->core_ctl);
1930 1930 perf_global_ctrl |= 1ull << cfg->core_picno;
1931 1931 } else {
1932 1932 /*
1933 1933 * Unlike the general-purpose counters, all relevant
1934 1934 * bits of fixed-function counters can be written to.
1935 1935 */
1936 1936 WRMSR(cfg->core_pmc, cfg->core_rawpic & mask_ffc);
1937 1937
1938 1938 /*
1939 1939 * Collect the control bits for all the
1940 1940 * fixed-function counters and write it at one shot
1941 1941 * later in this function
1942 1942 */
1943 1943 perf_fixed_ctr_ctrl |= cfg->core_ctl <<
1944 1944 ((cfg->core_picno - num_gpc) * CORE_FFC_ATTR_SIZE);
1945 1945 perf_global_ctrl |=
1946 1946 1ull << (cfg->core_picno - num_gpc + 32);
1947 1947 }
1948 1948
1949 1949 cfg = (core_pcbe_config_t *)
1950 1950 kcpc_next_config(token, cfg, NULL);
1951 1951 }
1952 1952
1953 1953 /* Enable all the counters */
1954 1954 WRMSR(PERF_FIXED_CTR_CTRL, perf_fixed_ctr_ctrl);
1955 1955 WRMSR(PERF_GLOBAL_CTRL, perf_global_ctrl);
1956 1956 }
1957 1957
1958 1958 static void
1959 1959 core_pcbe_allstop(void)
1960 1960 {
1961 1961 /* Disable all the counters together */
1962 1962 WRMSR(PERF_GLOBAL_CTRL, ALL_STOPPED);
1963 1963
1964 1964 setcr4(getcr4() & ~CR4_PCE);
1965 1965 }
1966 1966
1967 1967 static void
1968 1968 core_pcbe_sample(void *token)
1969 1969 {
1970 1970 uint64_t *daddr;
1971 1971 uint64_t curpic;
1972 1972 core_pcbe_config_t *cfg;
1973 1973 uint64_t counter_mask;
1974 1974
1975 1975 cfg = (core_pcbe_config_t *)kcpc_next_config(token, NULL, &daddr);
1976 1976 while (cfg != NULL) {
1977 1977 ASSERT(cfg->core_pictype == CORE_GPC ||
1978 1978 cfg->core_pictype == CORE_FFC);
1979 1979
1980 1980 curpic = rdmsr(cfg->core_pmc);
1981 1981
1982 1982 DTRACE_PROBE4(core__pcbe__sample,
1983 1983 uint64_t, cfg->core_pmc,
1984 1984 uint64_t, curpic,
1985 1985 uint64_t, cfg->core_rawpic,
1986 1986 uint64_t, *daddr);
1987 1987
1988 1988 if (cfg->core_pictype == CORE_GPC) {
1989 1989 counter_mask = mask_gpc;
1990 1990 } else {
1991 1991 counter_mask = mask_ffc;
1992 1992 }
1993 1993 curpic = curpic & counter_mask;
1994 1994 if (curpic >= cfg->core_rawpic) {
1995 1995 *daddr += curpic - cfg->core_rawpic;
1996 1996 } else {
1997 1997 /* Counter overflowed since our last sample */
1998 1998 *daddr += counter_mask - (cfg->core_rawpic - curpic) +
1999 1999 1;
2000 2000 }
2001 2001 cfg->core_rawpic = *daddr & counter_mask;
2002 2002
2003 2003 cfg =
2004 2004 (core_pcbe_config_t *)kcpc_next_config(token, cfg, &daddr);
2005 2005 }
2006 2006 }
2007 2007
2008 2008 static void
2009 2009 core_pcbe_free(void *config)
2010 2010 {
2011 2011 kmem_free(config, sizeof (core_pcbe_config_t));
↓ open down ↓ |
2011 lines elided |
↑ open up ↑ |
2012 2012 }
2013 2013
2014 2014 static struct modlpcbe core_modlpcbe = {
2015 2015 &mod_pcbeops,
2016 2016 "Core Performance Counters",
2017 2017 &core_pcbe_ops
2018 2018 };
2019 2019
2020 2020 static struct modlinkage core_modl = {
2021 2021 MODREV_1,
2022 - &core_modlpcbe,
2022 + { &core_modlpcbe, NULL }
2023 2023 };
2024 2024
2025 2025 int
2026 2026 _init(void)
2027 2027 {
2028 2028 if (core_pcbe_init() != 0) {
2029 2029 return (ENOTSUP);
2030 2030 }
2031 2031 return (mod_install(&core_modl));
2032 2032 }
2033 2033
2034 2034 int
2035 2035 _fini(void)
2036 2036 {
2037 2037 return (mod_remove(&core_modl));
2038 2038 }
2039 2039
2040 2040 int
2041 2041 _info(struct modinfo *mi)
2042 2042 {
2043 2043 return (mod_info(&core_modl, mi));
2044 2044 }
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX