Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/intel/pcbe/p123_pcbe.c
+++ new/usr/src/uts/intel/pcbe/p123_pcbe.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /*
26 26 * This file contains preset event names from the Performance Application
27 27 * Programming Interface v3.5 which included the following notice:
28 28 *
29 29 * Copyright (c) 2005,6
30 30 * Innovative Computing Labs
31 31 * Computer Science Department,
32 32 * University of Tennessee,
33 33 * Knoxville, TN.
34 34 * All Rights Reserved.
35 35 *
36 36 *
37 37 * Redistribution and use in source and binary forms, with or without
38 38 * modification, are permitted provided that the following conditions are met:
39 39 *
40 40 * * Redistributions of source code must retain the above copyright notice,
41 41 * this list of conditions and the following disclaimer.
42 42 * * Redistributions in binary form must reproduce the above copyright
43 43 * notice, this list of conditions and the following disclaimer in the
44 44 * documentation and/or other materials provided with the distribution.
45 45 * * Neither the name of the University of Tennessee nor the names of its
46 46 * contributors may be used to endorse or promote products derived from
47 47 * this software without specific prior written permission.
48 48 *
49 49 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
50 50 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
53 53 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 59 * POSSIBILITY OF SUCH DAMAGE.
60 60 *
61 61 *
62 62 * This open source software license conforms to the BSD License template.
63 63 */
64 64
65 65 /*
66 66 * Performance Counter Back-End for Pentiums I, II, and III.
67 67 */
68 68
69 69 #include <sys/cpuvar.h>
70 70 #include <sys/param.h>
71 71 #include <sys/cpc_impl.h>
72 72 #include <sys/cpc_pcbe.h>
73 73 #include <sys/modctl.h>
74 74 #include <sys/inttypes.h>
75 75 #include <sys/systm.h>
76 76 #include <sys/cmn_err.h>
77 77 #include <sys/x86_archext.h>
78 78 #include <sys/sdt.h>
79 79 #include <sys/archsystm.h>
80 80 #include <sys/privregs.h>
81 81 #include <sys/ddi.h>
82 82 #include <sys/sunddi.h>
83 83
84 84 static int64_t diff3931(uint64_t sample, uint64_t old);
85 85 static uint64_t trunc3931(uint64_t value);
86 86
87 87 static int ptm_pcbe_init(void);
88 88 static uint_t ptm_pcbe_ncounters(void);
89 89 static const char *ptm_pcbe_impl_name(void);
90 90 static const char *ptm_pcbe_cpuref(void);
91 91 static char *ptm_pcbe_list_events(uint_t picnum);
92 92 static char *ptm_pcbe_list_attrs(void);
93 93 static uint64_t ptm_pcbe_event_coverage(char *event);
94 94 static int ptm_pcbe_pic_index(char *picname);
95 95 static uint64_t ptm_pcbe_overflow_bitmap(void);
96 96 static int ptm_pcbe_configure(uint_t picnum, char *event, uint64_t preset,
97 97 uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
98 98 void *token);
99 99 static void ptm_pcbe_program(void *token);
100 100 static void ptm_pcbe_allstop(void);
101 101 static void ptm_pcbe_sample(void *token);
102 102 static void ptm_pcbe_free(void *config);
103 103
104 104 pcbe_ops_t ptm_pcbe_ops = {
105 105 PCBE_VER_1,
106 106 0,
107 107 ptm_pcbe_ncounters,
108 108 ptm_pcbe_impl_name,
109 109 ptm_pcbe_cpuref,
110 110 ptm_pcbe_list_events,
111 111 ptm_pcbe_list_attrs,
112 112 ptm_pcbe_event_coverage,
113 113 ptm_pcbe_overflow_bitmap,
114 114 ptm_pcbe_configure,
115 115 ptm_pcbe_program,
116 116 ptm_pcbe_allstop,
117 117 ptm_pcbe_sample,
118 118 ptm_pcbe_free
119 119 };
120 120
121 121 typedef enum _ptm_ver {
122 122 PTM_VER_P5,
123 123 PTM_VER_P6
124 124 } ptm_ver_t;
125 125
126 126 static ptm_ver_t ptm_ver;
127 127 static const char *ptm_impl_name;
128 128 static const char *ptm_cpuref;
129 129 static char *pic_events[2] = { NULL, NULL };
130 130
131 131 /*
132 132 * Indicates whether the "rdpmc" instruction is available on this processor.
133 133 */
134 134 static int ptm_rdpmc_avail = 0;
135 135
136 136 #define ALL_STOPPED 0ULL
137 137
138 138 typedef struct _ptm_pcbe_config {
139 139 uint8_t ptm_picno; /* 0 for pic0 or 1 for pic1 */
140 140 uint32_t ptm_ctl; /* P6: PerfEventSelect; P5: cesr, shifted */
141 141 uint64_t ptm_rawpic;
142 142 } ptm_pcbe_config_t;
143 143
144 144 struct nametable {
145 145 uint8_t bits;
146 146 const char *name;
147 147 };
148 148
149 149 typedef struct _ptm_generic_events {
150 150 char *name;
151 151 char *event;
152 152 uint8_t umask;
153 153 } ptm_generic_event_t;
154 154
155 155 #define NT_END 0xFF
156 156 #define CPC_GEN_END { NULL, NULL }
157 157
158 158 /*
159 159 * Basic Pentium events
160 160 */
161 161 #define P5_EVENTS \
162 162 {0x0, "data_read"}, \
163 163 {0x1, "data_write"}, \
164 164 {0x2, "data_tlb_miss"}, \
165 165 {0x3, "data_read_miss"}, \
166 166 {0x4, "data_write_miss"}, \
167 167 {0x5, "write_hit_to_M_or_E"}, \
168 168 {0x6, "dcache_lines_wrback"}, \
169 169 {0x7, "external_snoops"}, \
170 170 {0x8, "external_dcache_snoop_hits"}, \
171 171 {0x9, "memory_access_in_both_pipes"}, \
172 172 {0xa, "bank_conflicts"}, \
173 173 {0xb, "misaligned_ref"}, \
174 174 {0xc, "code_read"}, \
175 175 {0xd, "code_tlb_miss"}, \
176 176 {0xe, "code_cache_miss"}, \
177 177 {0xf, "any_segreg_loaded"}, \
178 178 {0x12, "branches"}, \
179 179 {0x13, "btb_hits"}, \
180 180 {0x14, "taken_or_btb_hit"}, \
181 181 {0x15, "pipeline_flushes"}, \
182 182 {0x16, "instr_exec"}, \
183 183 {0x17, "instr_exec_V_pipe"}, \
184 184 {0x18, "clks_bus_cycle"}, \
185 185 {0x19, "clks_full_wbufs"}, \
186 186 {0x1a, "pipe_stall_read"}, \
187 187 {0x1b, "stall_on_write_ME"}, \
188 188 {0x1c, "locked_bus_cycle"}, \
189 189 {0x1d, "io_rw_cycles"}, \
190 190 {0x1e, "reads_noncache_mem"}, \
191 191 {0x1f, "pipeline_agi_stalls"}, \
192 192 {0x22, "flops"}, \
193 193 {0x23, "bp_match_dr0"}, \
194 194 {0x24, "bp_match_dr1"}, \
195 195 {0x25, "bp_match_dr2"}, \
196 196 {0x26, "bp_match_dr3"}, \
197 197 {0x27, "hw_intrs"}, \
198 198 {0x28, "data_rw"}, \
199 199 {0x29, "data_rw_miss"}
200 200
201 201 static const struct nametable P5mmx_names0[] = {
202 202 P5_EVENTS,
203 203 {0x2a, "bus_ownership_latency"},
204 204 {0x2b, "mmx_instr_upipe"},
205 205 {0x2c, "cache_M_line_sharing"},
206 206 {0x2d, "emms_instr"},
207 207 {0x2e, "bus_util_processor"},
208 208 {0x2f, "sat_mmx_instr"},
209 209 {0x30, "clks_not_HLT"},
210 210 {0x31, "mmx_data_read"},
211 211 {0x32, "clks_fp_stall"},
212 212 {0x33, "d1_starv_fifo_0"},
213 213 {0x34, "mmx_data_write"},
214 214 {0x35, "pipe_flush_wbp"},
215 215 {0x36, "mmx_misalign_data_refs"},
216 216 {0x37, "rets_pred_incorrect"},
217 217 {0x38, "mmx_multiply_unit_interlock"},
218 218 {0x39, "rets"},
219 219 {0x3a, "btb_false_entries"},
220 220 {0x3b, "clocks_stall_full_wb"},
221 221 {NT_END, ""}
222 222 };
223 223
224 224 static const struct nametable P5mmx_names1[] = {
225 225 P5_EVENTS,
226 226 {0x2a, "bus_ownership_transfers"},
227 227 {0x2b, "mmx_instr_vpipe"},
228 228 {0x2c, "cache_lint_sharing"},
229 229 {0x2d, "mmx_fp_transitions"},
230 230 {0x2e, "writes_noncache_mem"},
231 231 {0x2f, "sats_performed"},
232 232 {0x30, "clks_dcache_tlb_miss"},
233 233 {0x31, "mmx_data_read_miss"},
234 234 {0x32, "taken_br"},
235 235 {0x33, "d1_starv_fifo_1"},
236 236 {0x34, "mmx_data_write_miss"},
237 237 {0x35, "pipe_flush_wbp_wb"},
238 238 {0x36, "mmx_pipe_stall_data_read"},
239 239 {0x37, "rets_pred"},
240 240 {0x38, "movd_movq_stall"},
241 241 {0x39, "rsb_overflow"},
242 242 {0x3a, "btb_mispred_nt"},
243 243 {0x3b, "mmx_stall_write_ME"},
244 244 {NT_END, ""}
245 245 };
246 246
247 247 static const struct nametable *P5mmx_names[2] = {
248 248 P5mmx_names0,
249 249 P5mmx_names1
250 250 };
251 251
252 252 /*
253 253 * Pentium Pro and Pentium II events
254 254 */
255 255 static const struct nametable _P6_names[] = {
256 256 /*
257 257 * Data cache unit
258 258 */
259 259 {0x43, "data_mem_refs"},
260 260 {0x45, "dcu_lines_in"},
261 261 {0x46, "dcu_m_lines_in"},
262 262 {0x47, "dcu_m_lines_out"},
263 263 {0x48, "dcu_miss_outstanding"},
264 264
265 265 /*
266 266 * Instruction fetch unit
267 267 */
268 268 {0x80, "ifu_ifetch"},
269 269 {0x81, "ifu_ifetch_miss"},
270 270 {0x85, "itlb_miss"},
271 271 {0x86, "ifu_mem_stall"},
272 272 {0x87, "ild_stall"},
273 273
274 274 /*
275 275 * L2 cache
276 276 */
277 277 {0x28, "l2_ifetch"},
278 278 {0x29, "l2_ld"},
279 279 {0x2a, "l2_st"},
280 280 {0x24, "l2_lines_in"},
281 281 {0x26, "l2_lines_out"},
282 282 {0x25, "l2_m_lines_inm"},
283 283 {0x27, "l2_m_lines_outm"},
284 284 {0x2e, "l2_rqsts"},
285 285 {0x21, "l2_ads"},
286 286 {0x22, "l2_dbus_busy"},
287 287 {0x23, "l2_dbus_busy_rd"},
288 288
289 289 /*
290 290 * External bus logic
291 291 */
292 292 {0x62, "bus_drdy_clocks"},
293 293 {0x63, "bus_lock_clocks"},
294 294 {0x60, "bus_req_outstanding"},
295 295 {0x65, "bus_tran_brd"},
296 296 {0x66, "bus_tran_rfo"},
297 297 {0x67, "bus_trans_wb"},
298 298 {0x68, "bus_tran_ifetch"},
299 299 {0x69, "bus_tran_inval"},
300 300 {0x6a, "bus_tran_pwr"},
301 301 {0x6b, "bus_trans_p"},
302 302 {0x6c, "bus_trans_io"},
303 303 {0x6d, "bus_tran_def"},
304 304 {0x6e, "bus_tran_burst"},
305 305 {0x70, "bus_tran_any"},
306 306 {0x6f, "bus_tran_mem"},
307 307 {0x64, "bus_data_rcv"},
308 308 {0x61, "bus_bnr_drv"},
309 309 {0x7a, "bus_hit_drv"},
310 310 {0x7b, "bus_hitm_drv"},
311 311 {0x7e, "bus_snoop_stall"},
312 312
313 313 /*
314 314 * Floating point unit
315 315 */
316 316 {0xc1, "flops"}, /* 0 only */
317 317 {0x10, "fp_comp_ops_exe"}, /* 0 only */
318 318 {0x11, "fp_assist"}, /* 1 only */
319 319 {0x12, "mul"}, /* 1 only */
320 320 {0x13, "div"}, /* 1 only */
321 321 {0x14, "cycles_div_busy"}, /* 0 only */
322 322
323 323 /*
324 324 * Memory ordering
325 325 */
326 326 {0x3, "ld_blocks"},
327 327 {0x4, "sb_drains"},
328 328 {0x5, "misalign_mem_ref"},
329 329
330 330 /*
331 331 * Instruction decoding and retirement
332 332 */
333 333 {0xc0, "inst_retired"},
334 334 {0xc2, "uops_retired"},
335 335 {0xd0, "inst_decoder"},
336 336
337 337 /*
338 338 * Interrupts
339 339 */
340 340 {0xc8, "hw_int_rx"},
341 341 {0xc6, "cycles_int_masked"},
342 342 {0xc7, "cycles_int_pending_and_masked"},
343 343
344 344 /*
345 345 * Branches
346 346 */
347 347 {0xc4, "br_inst_retired"},
348 348 {0xc5, "br_miss_pred_retired"},
349 349 {0xc9, "br_taken_retired"},
350 350 {0xca, "br_miss_pred_taken_ret"},
351 351 {0xe0, "br_inst_decoded"},
352 352 {0xe2, "btb_misses"},
353 353 {0xe4, "br_bogus"},
354 354 {0xe6, "baclears"},
355 355
356 356 /*
357 357 * Stalls
358 358 */
359 359 {0xa2, "resource_stalls"},
360 360 {0xd2, "partial_rat_stalls"},
361 361
362 362 /*
363 363 * Segment register loads
364 364 */
365 365 {0x6, "segment_reg_loads"},
366 366
367 367 /*
368 368 * Clocks
369 369 */
370 370 {0x79, "cpu_clk_unhalted"},
371 371
372 372 /*
373 373 * MMX
374 374 */
375 375 {0xb0, "mmx_instr_exec"},
376 376 {0xb1, "mmx_sat_instr_exec"},
377 377 {0xb2, "mmx_uops_exec"},
378 378 {0xb3, "mmx_instr_type_exec"},
379 379 {0xcc, "fp_mmx_trans"},
380 380 {0xcd, "mmx_assists"},
381 381 {0xce, "mmx_instr_ret"},
382 382 {0xd4, "seg_rename_stalls"},
383 383 {0xd5, "seg_reg_renames"},
384 384 {0xd6, "ret_seg_renames"},
385 385
386 386 {NT_END, ""}
387 387 };
388 388
389 389 static const struct nametable *P6_names[2] = {
390 390 _P6_names,
391 391 _P6_names
392 392 };
393 393
394 394 #define P5_GENERIC_EVENTS \
395 395 { "PAPI_tot_ins", "instr_exec", 0x0 }, \
396 396 { "PAPI_tlb_dm", "data_tlb_miss", 0x0 }, \
397 397 { "PAPI_tlb_im", "code_tlb_miss", 0x0 }, \
398 398 { "PAPI_fp_ops", "flops" }
399 399
400 400 static const ptm_generic_event_t P5mmx_generic_names0[] = {
401 401 P5_GENERIC_EVENTS,
402 402 { "PAPI_tot_cyc", "clks_not_HLT", 0x0 },
403 403 CPC_GEN_END
404 404 };
405 405
406 406 static const ptm_generic_event_t P5mmx_generic_names1[] = {
407 407 P5_GENERIC_EVENTS,
408 408 { "PAPI_br_ins", "taken_br", 0x0 },
409 409 CPC_GEN_END
410 410 };
411 411
412 412 static const ptm_generic_event_t *P5mmx_generic_names[2] = {
413 413 P5mmx_generic_names0,
414 414 P5mmx_generic_names1
415 415 };
416 416
417 417 static const ptm_generic_event_t _P6_generic_names[] = {
418 418 { "PAPI_ca_shr", "l2_ifetch", 0xf },
419 419 { "PAPI_ca_cln", "bus_tran_rfo", 0x0 },
420 420 { "PAPI_ca_itv", "bus_tran_inval", 0x0 },
421 421 { "PAPI_tlb_im", "itlb_miss", 0x0 },
422 422 { "PAPI_btac_m", "btb_misses", 0x0 },
423 423 { "PAPI_hw_int", "hw_int_rx", 0x0 },
424 424 { "PAPI_br_cn", "br_inst_retired", 0x0 },
425 425 { "PAPI_br_tkn", "br_taken_retired", 0x0 },
426 426 { "PAPI_br_msp", "br_miss_pred_taken_ret", 0x0 },
427 427 { "PAPI_br_ins", "br_inst_retired", 0x0 },
428 428 { "PAPI_res_stl", "resource_stalls", 0x0 },
429 429 { "PAPI_tot_iis", "inst_decoder", 0x0 },
430 430 { "PAPI_tot_ins", "inst_retired", 0x0 },
431 431 { "PAPI_tot_cyc", "cpu_clk_unhalted", 0x0 },
432 432 { "PAPI_l1_dcm", "dcu_lines_in", 0x0 },
433 433 { "PAPI_l1_icm", "l2_ifetch", 0xf },
434 434 { "PAPI_l1_tcm", "l2_rqsts", 0xf },
435 435 { "PAPI_l1_dca", "data_mem_refs", 0x0 },
436 436 { "PAPI_l1_stm", "l2_st", 0xf },
437 437 { "PAPI_l2_icm", "bus_tran_ifetch", 0x0 },
438 438 { "PAPI_l2_dcr", "l2_ld", 0xf },
439 439 { "PAPI_l2_dcw", "l2_st", 0xf },
440 440 { "PAPI_l2_tcm", "l2_lines_in", 0x0 },
441 441 { "PAPI_l2_tca", "l2_rqsts", 0xf },
442 442 { "PAPI_l2_tcw", "l2_st", 0xf },
443 443 { "PAPI_l2_stm", "l2_m_lines_inm", 0x0 },
444 444 { "PAPI_fp_ins", "flops", 0x0 },
445 445 { "PAPI_fp_ops", "flops", 0x0 },
446 446 { "PAPI_fml_ins", "mul", 0x0 },
447 447 { "PAPI_fdv_ins", "div", 0x0 },
448 448 CPC_GEN_END
449 449 };
450 450
451 451 static const ptm_generic_event_t *P6_generic_names[2] = {
452 452 _P6_generic_names,
453 453 _P6_generic_names
454 454 };
455 455
456 456 static const struct nametable **events;
457 457 static const ptm_generic_event_t **generic_events;
458 458
459 459 #define BITS(v, u, l) \
460 460 (((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
461 461
462 462 /*
463 463 * "Well known" bit fields in the Pentium CES register
464 464 * The interfaces in libcpc should make these #defines uninteresting.
465 465 */
466 466 #define CPC_P5_CESR_ES0_SHIFT 0
467 467 #define CPC_P5_CESR_ES0_MASK 0x3f
468 468 #define CPC_P5_CESR_ES1_SHIFT 16
469 469 #define CPC_P5_CESR_ES1_MASK 0x3f
470 470
471 471 #define CPC_P5_CESR_OS0 6
472 472 #define CPC_P5_CESR_USR0 7
473 473 #define CPC_P5_CESR_CLK0 8
474 474 #define CPC_P5_CESR_PC0 9
475 475 #define CPC_P5_CESR_OS1 (CPC_P5_CESR_OS0 + 16)
476 476 #define CPC_P5_CESR_USR1 (CPC_P5_CESR_USR0 + 16)
477 477 #define CPC_P5_CESR_CLK1 (CPC_P5_CESR_CLK0 + 16)
478 478 #define CPC_P5_CESR_PC1 (CPC_P5_CESR_PC0 + 16)
479 479
480 480 /*
481 481 * "Well known" bit fields in the Pentium Pro PerfEvtSel registers
482 482 * The interfaces in libcpc should make these #defines uninteresting.
483 483 */
484 484 #define CPC_P6_PES_INV 23
485 485 #define CPC_P6_PES_EN 22
486 486 #define CPC_P6_PES_INT 20
487 487 #define CPC_P6_PES_PC 19
488 488 #define CPC_P6_PES_E 18
489 489 #define CPC_P6_PES_OS 17
490 490 #define CPC_P6_PES_USR 16
491 491
492 492 #define CPC_P6_PES_UMASK_SHIFT 8
493 493 #define CPC_P6_PES_UMASK_MASK (0xffu)
494 494
495 495 #define CPC_P6_PES_CMASK_SHIFT 24
496 496 #define CPC_P6_PES_CMASK_MASK (0xffu)
497 497
498 498 #define CPC_P6_PES_PIC0_MASK (0xffu)
499 499 #define CPC_P6_PES_PIC1_MASK (0xffu)
500 500
501 501 #define P6_PES_EN (UINT32_C(1) << CPC_P6_PES_EN)
502 502 #define P6_PES_INT (UINT32_C(1) << CPC_P6_PES_INT)
503 503 #define P6_PES_OS (UINT32_C(1) << CPC_P6_PES_OS)
504 504
505 505 /*
506 506 * Pentium 5 attributes
507 507 */
508 508 #define P5_NOEDGE 0x1 /* "noedge" - no edge detection */
509 509 #define P5_PC 0x2 /* "pc" - pin control */
510 510
511 511 /*
512 512 * Pentium 6 attributes
513 513 */
514 514 #define P6_NOEDGE 0x1
515 515 #define P6_PC 0x2
516 516 #define P6_INV 0x4 /* "inv" - count inverted transitions */
517 517 #define P6_INT 0x8 /* "int" - interrupt on overflow */
518 518
519 519 /*
520 520 * CPU reference strings
521 521 */
522 522
523 523 #define P5_CPUREF "See Appendix A.4 of the \"IA-32 Intel Architecture " \
524 524 "Software Developer's Manual Volume 3: System " \
525 525 "Programming Guide,\" Order # 245472-012, 2003"
526 526
527 527 #define P6_CPUREF "See Appendix A.3 of the \"IA-32 Intel Architecture " \
528 528 "Software Developer's Manual Volume 3: System " \
529 529 "Programming Guide,\" Order # 245472-012, 2003"
530 530
531 531 static int
532 532 ptm_pcbe_init(void)
533 533 {
534 534 const struct nametable *n;
535 535 const ptm_generic_event_t *gevp;
536 536 int i;
537 537 size_t size;
538 538
539 539 if (is_x86_feature(x86_featureset, X86FSET_MMX))
540 540 ptm_rdpmc_avail = 1;
541 541
542 542 /*
543 543 * Discover type of CPU and set events pointer appropriately.
544 544 *
545 545 * Map family and model into the performance
546 546 * counter architectures we currently understand.
547 547 *
548 548 * See application note AP485 (from developer.intel.com)
549 549 * for further explanation.
550 550 */
551 551 if (cpuid_getvendor(CPU) != X86_VENDOR_Intel)
552 552 return (-1);
553 553 switch (cpuid_getfamily(CPU)) {
554 554 case 5: /* Pentium and Pentium with MMX */
555 555 events = P5mmx_names;
556 556 generic_events = P5mmx_generic_names;
557 557 ptm_ver = PTM_VER_P5;
558 558 ptm_cpuref = P5_CPUREF;
559 559 if (cpuid_getmodel(CPU) < 4)
560 560 ptm_impl_name = "Pentium";
561 561 else
562 562 ptm_impl_name = "Pentium with MMX";
563 563 break;
564 564 case 6: /* Pentium Pro and Pentium II and III */
565 565 events = P6_names;
566 566 generic_events = P6_generic_names;
567 567 ptm_ver = PTM_VER_P6;
568 568 ptm_cpuref = P6_CPUREF;
569 569 ptm_pcbe_ops.pcbe_caps = CPC_CAP_OVERFLOW_INTERRUPT;
570 570 if (is_x86_feature(x86_featureset, X86FSET_MMX))
571 571 ptm_impl_name = "Pentium Pro with MMX, Pentium II";
572 572 else
573 573 ptm_impl_name = "Pentium Pro, Pentium II";
574 574 break;
575 575 default:
576 576 return (-1);
577 577 }
578 578
579 579 /*
580 580 * Initialize the list of events for each PIC.
581 581 * Do two passes: one to compute the size necessary and another
582 582 * to copy the strings. Need room for event, comma, and NULL terminator.
583 583 */
584 584 for (i = 0; i < 2; i++) {
585 585 size = 0;
586 586 for (n = events[i]; n->bits != NT_END; n++)
587 587 size += strlen(n->name) + 1;
588 588 for (gevp = generic_events[i]; gevp->name != NULL; gevp++)
589 589 size += strlen(gevp->name) + 1;
590 590 pic_events[i] = kmem_alloc(size + 1, KM_SLEEP);
591 591 *pic_events[i] = '\0';
592 592 for (n = events[i]; n->bits != NT_END; n++) {
593 593 (void) strcat(pic_events[i], n->name);
594 594 (void) strcat(pic_events[i], ",");
595 595 }
596 596 for (gevp = generic_events[i]; gevp->name != NULL; gevp++) {
597 597 (void) strcat(pic_events[i], gevp->name);
598 598 (void) strcat(pic_events[i], ",");
599 599 }
600 600
601 601 /*
602 602 * Remove trailing comma.
603 603 */
604 604 pic_events[i][size - 1] = '\0';
605 605 }
606 606
607 607 return (0);
608 608 }
609 609
610 610 static uint_t
611 611 ptm_pcbe_ncounters(void)
612 612 {
613 613 return (2);
614 614 }
615 615
616 616 static const char *
617 617 ptm_pcbe_impl_name(void)
618 618 {
619 619 return (ptm_impl_name);
620 620 }
621 621
622 622 static const char *
623 623 ptm_pcbe_cpuref(void)
624 624 {
625 625 return (ptm_cpuref);
626 626 }
627 627
628 628 static char *
629 629 ptm_pcbe_list_events(uint_t picnum)
630 630 {
631 631 ASSERT(picnum >= 0 && picnum < cpc_ncounters);
632 632
633 633 if (pic_events[0] == NULL) {
634 634 ASSERT(pic_events[1] == NULL);
635 635 }
636 636
637 637 return (pic_events[picnum]);
638 638 }
639 639
640 640 static char *
641 641 ptm_pcbe_list_attrs(void)
642 642 {
643 643 if (ptm_ver == PTM_VER_P5)
644 644 return ("noedge,pc");
645 645 else
646 646 return ("noedge,pc,inv,int,umask,cmask");
647 647 }
648 648
649 649 static const ptm_generic_event_t *
650 650 find_generic_event(int regno, char *name)
651 651 {
652 652 const ptm_generic_event_t *gevp;
653 653
654 654 for (gevp = generic_events[regno]; gevp->name != NULL; gevp++)
655 655 if (strcmp(name, gevp->name) == 0)
656 656 return (gevp);
657 657
658 658 return (NULL);
659 659 }
660 660
661 661 static const struct nametable *
662 662 find_event(int regno, char *name)
663 663 {
664 664 const struct nametable *n;
665 665
666 666 n = events[regno];
667 667
668 668 for (; n->bits != NT_END; n++)
669 669 if (strcmp(name, n->name) == 0)
670 670 return (n);
671 671
672 672 return (NULL);
673 673 }
674 674
675 675 static uint64_t
676 676 ptm_pcbe_event_coverage(char *event)
677 677 {
678 678 uint64_t bitmap = 0;
679 679
680 680 if ((find_event(0, event) != NULL) ||
681 681 (find_generic_event(0, event) != NULL))
682 682 bitmap = 0x1;
683 683 if ((find_event(1, event) != NULL) ||
684 684 (find_generic_event(1, event) != NULL))
685 685 bitmap |= 0x2;
686 686
687 687 return (bitmap);
688 688 }
689 689
690 690 static uint64_t
691 691 ptm_pcbe_overflow_bitmap(void)
692 692 {
693 693 uint64_t ret = 0;
694 694 uint64_t pes[2];
695 695
696 696 /*
697 697 * P5 is not capable of generating interrupts.
698 698 */
699 699 ASSERT(ptm_ver == PTM_VER_P6);
700 700
701 701 /*
702 702 * CPC could have caused an interrupt provided that
703 703 *
704 704 * 1) Counters are enabled
705 705 * 2) Either counter has requested an interrupt
706 706 */
707 707
708 708 pes[0] = rdmsr(REG_PERFEVNT0);
709 709 if (((uint32_t)pes[0] & P6_PES_EN) != P6_PES_EN)
710 710 return (0);
711 711
712 712 /*
713 713 * If a particular counter requested an interrupt, assume it caused
714 714 * this interrupt. There is no way to determine which counter overflowed
715 715 * on this hardware other than by using unreliable heuristics.
716 716 */
717 717
718 718 pes[1] = rdmsr(REG_PERFEVNT1);
719 719 if ((uint32_t)pes[0] & P6_PES_INT)
720 720 ret |= 0x1;
721 721 if ((uint32_t)pes[1] & P6_PES_INT)
722 722 ret |= 0x2;
723 723
724 724 return (ret);
725 725 }
726 726
727 727 /*ARGSUSED*/
728 728 static int
729 729 ptm_pcbe_configure(uint_t picnum, char *eventname, uint64_t preset,
730 730 uint32_t flags, uint_t nattrs, kcpc_attr_t *attrs, void **data,
731 731 void *token)
732 732 {
733 733 ptm_pcbe_config_t *conf;
734 734 const struct nametable *n;
735 735 const ptm_generic_event_t *gevp;
736 736 struct nametable nt_raw = { 0, "raw" };
737 737 int i;
738 738 int ptm_flags = 0;
739 739
740 740 /*
741 741 * If we've been handed an existing configuration, we need only preset
742 742 * the counter value.
743 743 */
744 744 if (*data != NULL) {
745 745 conf = *data;
746 746 conf->ptm_rawpic = trunc3931(preset);
747 747 return (0);
748 748 }
749 749
750 750 if (picnum != 0 && picnum != 1)
751 751 return (CPC_INVALID_PICNUM);
752 752
753 753 conf = kmem_alloc(sizeof (ptm_pcbe_config_t), KM_SLEEP);
754 754
755 755 conf->ptm_picno = picnum;
756 756 conf->ptm_rawpic = trunc3931(preset);
757 757 conf->ptm_ctl = 0;
758 758
759 759 if ((n = find_event(picnum, eventname)) == NULL) {
760 760 if ((gevp = find_generic_event(picnum, eventname)) != NULL) {
761 761 n = find_event(picnum, gevp->event);
762 762 ASSERT(n != NULL);
763 763
764 764 if (nattrs > 0) {
765 765 kmem_free(conf, sizeof (ptm_pcbe_config_t));
766 766 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
767 767 }
768 768
769 769 if (ptm_ver == PTM_VER_P6)
770 770 conf->ptm_ctl |= gevp->umask <<
771 771 CPC_P6_PES_UMASK_SHIFT;
772 772 } else {
773 773 long tmp;
774 774
775 775 /*
776 776 * If ddi_strtol() likes this event, use it as a raw
777 777 * event code.
778 778 */
779 779 if (ddi_strtol(eventname, NULL, 0, &tmp) != 0) {
780 780 kmem_free(conf, sizeof (ptm_pcbe_config_t));
781 781 return (CPC_INVALID_EVENT);
782 782 }
783 783
784 784 nt_raw.bits = tmp;
785 785
786 786 if (ptm_ver == PTM_VER_P5)
787 787 nt_raw.bits &= CPC_P5_CESR_ES0_MASK;
788 788 else
789 789 nt_raw.bits &= CPC_P6_PES_PIC0_MASK;
790 790
791 791 n = &nt_raw;
792 792 }
793 793 }
794 794
795 795 if (ptm_ver == PTM_VER_P5) {
796 796 int picshift;
797 797 picshift = (picnum == 0) ? 0 : 16;
798 798
799 799 for (i = 0; i < nattrs; i++) {
800 800 /*
801 801 * Value of these attributes is ignored; their presence
802 802 * alone tells us to set the corresponding flag.
803 803 */
804 804 if (strncmp(attrs[i].ka_name, "noedge", 7) == 0) {
805 805 if (attrs[i].ka_val != 0)
806 806 ptm_flags |= P5_NOEDGE;
807 807 } else if (strncmp(attrs[i].ka_name, "pc", 3) == 0) {
808 808 if (attrs[i].ka_val != 0)
809 809 ptm_flags |= P5_PC;
810 810 } else {
811 811 kmem_free(conf, sizeof (ptm_pcbe_config_t));
812 812 return (CPC_INVALID_ATTRIBUTE);
813 813 }
814 814 }
815 815
816 816 if (flags & CPC_COUNT_USER)
817 817 conf->ptm_ctl |= (1 << (CPC_P5_CESR_USR0 + picshift));
818 818 if (flags & CPC_COUNT_SYSTEM)
819 819 conf->ptm_ctl |= (1 << (CPC_P5_CESR_OS0 + picshift));
820 820 if (ptm_flags & P5_NOEDGE)
821 821 conf->ptm_ctl |= (1 << (CPC_P5_CESR_CLK0 + picshift));
822 822 if (ptm_flags & P5_PC)
823 823 conf->ptm_ctl |= (1 << (CPC_P5_CESR_PC0 + picshift));
824 824
825 825 ASSERT((n->bits | CPC_P5_CESR_ES0_MASK) ==
826 826 CPC_P5_CESR_ES0_MASK);
827 827
828 828 conf->ptm_ctl |= (n->bits << picshift);
829 829 } else {
830 830 for (i = 0; i < nattrs; i++) {
831 831 if (strncmp(attrs[i].ka_name, "noedge", 6) == 0) {
832 832 if (attrs[i].ka_val != 0)
833 833 ptm_flags |= P6_NOEDGE;
834 834 } else if (strncmp(attrs[i].ka_name, "pc", 2) == 0) {
835 835 if (attrs[i].ka_val != 0)
836 836 ptm_flags |= P6_PC;
837 837 } else if (strncmp(attrs[i].ka_name, "inv", 3) == 0) {
838 838 if (attrs[i].ka_val != 0)
839 839 ptm_flags |= P6_INV;
840 840 } else if (strncmp(attrs[i].ka_name, "umask", 5) == 0) {
841 841 if ((attrs[i].ka_val | CPC_P6_PES_UMASK_MASK) !=
842 842 CPC_P6_PES_UMASK_MASK) {
843 843 kmem_free(conf,
844 844 sizeof (ptm_pcbe_config_t));
845 845 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
846 846 }
847 847 conf->ptm_ctl |= (uint8_t)attrs[i].ka_val <<
848 848 CPC_P6_PES_UMASK_SHIFT;
849 849 } else if (strncmp(attrs[i].ka_name, "cmask", 5) == 0) {
850 850 if ((attrs[i].ka_val | CPC_P6_PES_CMASK_MASK) !=
851 851 CPC_P6_PES_CMASK_MASK) {
852 852 kmem_free(conf,
853 853 sizeof (ptm_pcbe_config_t));
854 854 return (CPC_ATTRIBUTE_OUT_OF_RANGE);
855 855 }
856 856 conf->ptm_ctl |= (uint8_t)attrs[i].ka_val <<
857 857 CPC_P6_PES_CMASK_SHIFT;
858 858 } else if (strncmp(attrs[i].ka_name, "int", 3) == 0) {
859 859 if (attrs[i].ka_val != 0)
860 860 ptm_flags |= P6_INT;
861 861 } else {
862 862 kmem_free(conf, sizeof (ptm_pcbe_config_t));
863 863 return (CPC_INVALID_ATTRIBUTE);
864 864 }
865 865 }
866 866
867 867 if (flags & CPC_OVF_NOTIFY_EMT)
868 868 /*
869 869 * If the user has requested notification of overflows,
870 870 * we automatically program the hardware to generate
871 871 * overflow interrupts.
872 872 */
873 873 ptm_flags |= P6_INT;
874 874 if (flags & CPC_COUNT_USER)
875 875 conf->ptm_ctl |= (1 << CPC_P6_PES_USR);
876 876 if (flags & CPC_COUNT_SYSTEM)
877 877 conf->ptm_ctl |= (1 << CPC_P6_PES_OS);
878 878 if ((ptm_flags & P6_NOEDGE) == 0)
879 879 conf->ptm_ctl |= (1 << CPC_P6_PES_E);
880 880 if (ptm_flags & P6_PC)
881 881 conf->ptm_ctl |= (1 << CPC_P6_PES_PC);
882 882 if (ptm_flags & P6_INV)
883 883 conf->ptm_ctl |= (1 << CPC_P6_PES_INV);
884 884 if (ptm_flags & P6_INT)
885 885 conf->ptm_ctl |= (1 << CPC_P6_PES_INT);
886 886
887 887 ASSERT((n->bits | CPC_P6_PES_PIC0_MASK) ==
888 888 CPC_P6_PES_PIC0_MASK);
889 889
890 890 conf->ptm_ctl |= n->bits;
891 891 }
892 892
893 893 *data = conf;
894 894 return (0);
895 895 }
896 896
897 897 static void
898 898 ptm_pcbe_program(void *token)
899 899 {
900 900 ptm_pcbe_config_t *pic0;
901 901 ptm_pcbe_config_t *pic1;
902 902 ptm_pcbe_config_t *tmp;
903 903 ptm_pcbe_config_t empty = { 1, 0, 0 }; /* assume pic1 to start */
904 904
905 905 if ((pic0 = kcpc_next_config(token, NULL, NULL)) == NULL)
906 906 panic("ptm_pcbe: token %p has no configs", token);
907 907
908 908 if ((pic1 = kcpc_next_config(token, pic0, NULL)) == NULL)
909 909 pic1 = ∅
910 910
911 911 if (pic0->ptm_picno != 0) {
912 912 empty.ptm_picno = 0;
913 913 tmp = pic1;
914 914 pic1 = pic0;
915 915 pic0 = tmp;
916 916 }
917 917
918 918 ASSERT(pic0->ptm_picno == 0 && pic1->ptm_picno == 1);
919 919
920 920 if (ptm_rdpmc_avail) {
921 921 ulong_t curcr4 = getcr4();
922 922 if (kcpc_allow_nonpriv(token))
923 923 setcr4(curcr4 | CR4_PCE);
924 924 else
925 925 setcr4(curcr4 & ~CR4_PCE);
926 926 }
927 927
928 928 if (ptm_ver == PTM_VER_P5) {
929 929 wrmsr(P5_CESR, ALL_STOPPED);
930 930 wrmsr(P5_CTR0, pic0->ptm_rawpic);
931 931 wrmsr(P5_CTR1, pic1->ptm_rawpic);
932 932 wrmsr(P5_CESR, pic0->ptm_ctl | pic1->ptm_ctl);
933 933 pic0->ptm_rawpic = rdmsr(P5_CTR0);
934 934 pic1->ptm_rawpic = rdmsr(P5_CTR1);
935 935 } else {
936 936 uint64_t pes;
937 937 wrmsr(REG_PERFEVNT0, ALL_STOPPED);
938 938 wrmsr(REG_PERFCTR0, pic0->ptm_rawpic);
939 939 wrmsr(REG_PERFCTR1, pic1->ptm_rawpic);
940 940 pes = pic1->ptm_ctl;
941 941 DTRACE_PROBE1(ptm__pes1, uint64_t, pes);
942 942 wrmsr(REG_PERFEVNT1, pes);
943 943 pes = pic0->ptm_ctl | (1 << CPC_P6_PES_EN);
944 944 DTRACE_PROBE1(ptm__pes0, uint64_t, pes);
945 945 wrmsr(REG_PERFEVNT0, pes);
946 946 }
947 947 }
948 948
949 949 static void
950 950 ptm_pcbe_allstop(void)
951 951 {
952 952 if (ptm_ver == PTM_VER_P5)
953 953 wrmsr(P5_CESR, ALL_STOPPED);
954 954 else {
955 955 wrmsr(REG_PERFEVNT0, ALL_STOPPED);
956 956 setcr4(getcr4() & ~CR4_PCE);
957 957 }
958 958 }
959 959
960 960 static void
961 961 ptm_pcbe_sample(void *token)
962 962 {
963 963 ptm_pcbe_config_t *pic0;
964 964 ptm_pcbe_config_t *pic1;
965 965 ptm_pcbe_config_t *swap;
966 966 ptm_pcbe_config_t empty = { 1, 0, 0 }; /* assume pic1 to start */
967 967 uint64_t tmp;
968 968 uint64_t *pic0_data;
969 969 uint64_t *pic1_data;
970 970 uint64_t *dtmp;
971 971 uint64_t curpic[2];
972 972
973 973 if ((pic0 = kcpc_next_config(token, NULL, &pic0_data)) == NULL)
974 974 panic("ptm_pcbe: token %p has no configs", token);
975 975
976 976 if ((pic1 = kcpc_next_config(token, pic0, &pic1_data)) == NULL) {
977 977 pic1 = ∅
978 978 pic1_data = &tmp;
979 979 }
980 980
981 981 if (pic0->ptm_picno != 0) {
982 982 empty.ptm_picno = 0;
983 983 swap = pic0;
984 984 pic0 = pic1;
985 985 pic1 = swap;
986 986 dtmp = pic0_data;
987 987 pic0_data = pic1_data;
988 988 pic1_data = dtmp;
989 989 }
990 990
991 991 ASSERT(pic0->ptm_picno == 0 && pic1->ptm_picno == 1);
992 992
993 993 if (ptm_ver == PTM_VER_P5) {
994 994 curpic[0] = rdmsr(P5_CTR0);
995 995 curpic[1] = rdmsr(P5_CTR1);
996 996 } else {
997 997 curpic[0] = rdmsr(REG_PERFCTR0);
998 998 curpic[1] = rdmsr(REG_PERFCTR1);
999 999 }
1000 1000
1001 1001 DTRACE_PROBE1(ptm__curpic0, uint64_t, curpic[0]);
1002 1002 DTRACE_PROBE1(ptm__curpic1, uint64_t, curpic[1]);
1003 1003
1004 1004 *pic0_data += diff3931(curpic[0], pic0->ptm_rawpic);
1005 1005 pic0->ptm_rawpic = trunc3931(*pic0_data);
1006 1006
1007 1007 *pic1_data += diff3931(curpic[1], pic1->ptm_rawpic);
1008 1008 pic1->ptm_rawpic = trunc3931(*pic1_data);
1009 1009 }
1010 1010
1011 1011 static void
1012 1012 ptm_pcbe_free(void *config)
1013 1013 {
1014 1014 kmem_free(config, sizeof (ptm_pcbe_config_t));
1015 1015 }
1016 1016
1017 1017 /*
1018 1018 * Virtualizes the 40-bit field of the %pic
1019 1019 * register into a 64-bit software register.
1020 1020 *
1021 1021 * We can retrieve 40 (signed) bits from the counters,
1022 1022 * but we can set only 32 (signed) bits into the counters.
1023 1023 * This makes virtualizing more than 31-bits of registers
1024 1024 * quite tricky.
1025 1025 *
1026 1026 * If bits 39 to 31 are set in the virtualized pic register,
1027 1027 * then we can preset the counter to this value using the fact
1028 1028 * that wrmsr sign extends bit 31. Though it might look easier
1029 1029 * to only use the bottom 31-bits of the register, we have to allow
1030 1030 * the full 40-bits to be used to perform overflow profiling.
1031 1031 */
1032 1032
1033 1033 #define MASK40 UINT64_C(0xffffffffff)
1034 1034 #define MASK31 UINT64_C(0x7fffffff)
1035 1035 #define BITS_39_31 UINT64_C(0xff80000000)
1036 1036
1037 1037 static int64_t
1038 1038 diff3931(uint64_t sample, uint64_t old)
1039 1039 {
1040 1040 int64_t diff;
1041 1041
1042 1042 if ((old & BITS_39_31) == BITS_39_31) {
1043 1043 diff = (MASK40 & sample) - old;
1044 1044 if (diff < 0)
1045 1045 diff += (UINT64_C(1) << 40);
1046 1046 } else {
1047 1047 diff = (MASK31 & sample) - old;
1048 1048 if (diff < 0)
1049 1049 diff += (UINT64_C(1) << 31);
1050 1050 }
1051 1051 return (diff);
1052 1052 }
1053 1053
1054 1054 static uint64_t
1055 1055 trunc3931(uint64_t value)
1056 1056 {
1057 1057 if ((value & BITS_39_31) == BITS_39_31)
1058 1058 return (MASK40 & value);
1059 1059 return (MASK31 & value);
↓ open down ↓ |
1059 lines elided |
↑ open up ↑ |
1060 1060 }
1061 1061
1062 1062 static struct modlpcbe modlpcbe = {
1063 1063 &mod_pcbeops,
1064 1064 "Pentium Performance Counters",
1065 1065 &ptm_pcbe_ops
1066 1066 };
1067 1067
1068 1068 static struct modlinkage modl = {
1069 1069 MODREV_1,
1070 - &modlpcbe,
1070 + { &modlpcbe, NULL }
1071 1071 };
1072 1072
1073 1073 int
1074 1074 _init(void)
1075 1075 {
1076 1076 if (ptm_pcbe_init() != 0)
1077 1077 return (ENOTSUP);
1078 1078 return (mod_install(&modl));
1079 1079 }
1080 1080
1081 1081 int
1082 1082 _fini(void)
1083 1083 {
1084 1084 return (mod_remove(&modl));
1085 1085 }
1086 1086
1087 1087 int
1088 1088 _info(struct modinfo *mi)
1089 1089 {
1090 1090 return (mod_info(&modl, mi));
1091 1091 }
↓ open down ↓ |
11 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX