Print this page
10597 would like a way to set NMI behavior at boot
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Andy Fiddaman <andy@omniosce.org>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/io/pcplusmp/apic_common.c
+++ new/usr/src/uts/i86pc/io/pcplusmp/apic_common.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25 /*
26 - * Copyright 2018 Joyent, Inc.
26 + * Copyright 2019, Joyent, Inc.
27 27 * Copyright (c) 2016, 2017 by Delphix. All rights reserved.
28 28 */
29 29
30 30 /*
31 31 * PSMI 1.1 extensions are supported only in 2.6 and later versions.
32 32 * PSMI 1.2 extensions are supported only in 2.7 and later versions.
33 33 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
34 34 * PSMI 1.5 extensions are supported in Solaris Nevada.
35 35 * PSMI 1.6 extensions are supported in Solaris Nevada.
36 36 * PSMI 1.7 extensions are supported in Solaris Nevada.
37 37 */
38 38 #define PSMI_1_7
39 39
40 40 #include <sys/processor.h>
41 41 #include <sys/time.h>
42 42 #include <sys/psm.h>
43 43 #include <sys/smp_impldefs.h>
44 44 #include <sys/cram.h>
45 45 #include <sys/acpi/acpi.h>
46 46 #include <sys/acpica.h>
47 47 #include <sys/psm_common.h>
48 48 #include <sys/apic.h>
49 49 #include <sys/pit.h>
50 50 #include <sys/ddi.h>
51 51 #include <sys/sunddi.h>
52 52 #include <sys/ddi_impldefs.h>
53 53 #include <sys/pci.h>
54 54 #include <sys/promif.h>
55 55 #include <sys/x86_archext.h>
56 56 #include <sys/cpc_impl.h>
57 57 #include <sys/uadmin.h>
58 58 #include <sys/panic.h>
59 59 #include <sys/debug.h>
60 60 #include <sys/archsystm.h>
61 61 #include <sys/trap.h>
62 62 #include <sys/machsystm.h>
63 63 #include <sys/sysmacros.h>
64 64 #include <sys/cpuvar.h>
65 65 #include <sys/rm_platter.h>
66 66 #include <sys/privregs.h>
67 67 #include <sys/note.h>
68 68 #include <sys/pci_intr_lib.h>
69 69 #include <sys/spl.h>
70 70 #include <sys/clock.h>
71 71 #include <sys/dditypes.h>
72 72 #include <sys/sunddi.h>
73 73 #include <sys/x_call.h>
74 74 #include <sys/reboot.h>
75 75 #include <sys/hpet.h>
76 76 #include <sys/apic_common.h>
77 77 #include <sys/apic_timer.h>
78 78
79 79 static void apic_record_ioapic_rdt(void *intrmap_private,
80 80 ioapic_rdt_t *irdt);
81 81 static void apic_record_msi(void *intrmap_private, msi_regs_t *mregs);
82 82
83 83 /*
84 84 * Common routines between pcplusmp & apix (taken from apic.c).
85 85 */
86 86
87 87 int apic_clkinit(int);
88 88 hrtime_t apic_gethrtime(void);
89 89 void apic_send_ipi(int, int);
90 90 void apic_set_idlecpu(processorid_t);
91 91 void apic_unset_idlecpu(processorid_t);
92 92 void apic_shutdown(int, int);
93 93 void apic_preshutdown(int, int);
94 94 processorid_t apic_get_next_processorid(processorid_t);
95 95
96 96 hrtime_t apic_gettime();
97 97
98 98 enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP;
99 99
100 100 /* Now the ones for Dynamic Interrupt distribution */
101 101 int apic_enable_dynamic_migration = 0;
102 102
103 103 /* maximum loop count when sending Start IPIs. */
104 104 int apic_sipi_max_loop_count = 0x1000;
105 105
106 106 /*
107 107 * These variables are frequently accessed in apic_intr_enter(),
108 108 * apic_intr_exit and apic_setspl, so group them together
109 109 */
110 110 volatile uint32_t *apicadr = NULL; /* virtual addr of local APIC */
111 111 int apic_setspl_delay = 1; /* apic_setspl - delay enable */
112 112 int apic_clkvect;
113 113
114 114 /* vector at which error interrupts come in */
115 115 int apic_errvect;
116 116 int apic_enable_error_intr = 1;
117 117 int apic_error_display_delay = 100;
118 118
119 119 /* vector at which performance counter overflow interrupts come in */
120 120 int apic_cpcovf_vect;
121 121 int apic_enable_cpcovf_intr = 1;
122 122
123 123 /* vector at which CMCI interrupts come in */
124 124 int apic_cmci_vect;
125 125 extern void cmi_cmci_trap(void);
126 126
127 127 lock_t apic_mode_switch_lock;
128 128
129 129 int apic_pir_vect;
130 130
131 131 /*
132 132 * Patchable global variables.
133 133 */
134 134 int apic_forceload = 0;
135 135
136 136 int apic_coarse_hrtime = 1; /* 0 - use accurate slow gethrtime() */
137 137
138 138 int apic_flat_model = 0; /* 0 - clustered. 1 - flat */
139 139 int apic_panic_on_nmi = 0;
140 140 int apic_panic_on_apic_error = 0;
141 141
142 142 int apic_verbose = 0; /* 0x1ff */
143 143
144 144 #ifdef DEBUG
145 145 int apic_debug = 0;
146 146 int apic_restrict_vector = 0;
147 147
148 148 int apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
149 149 int apic_debug_msgbufindex = 0;
150 150
151 151 #endif /* DEBUG */
152 152
153 153 uint_t apic_nticks = 0;
154 154 uint_t apic_skipped_redistribute = 0;
155 155
156 156 uint_t last_count_read = 0;
157 157 lock_t apic_gethrtime_lock;
158 158 volatile int apic_hrtime_stamp = 0;
159 159 volatile hrtime_t apic_nsec_since_boot = 0;
160 160
161 161 static hrtime_t apic_last_hrtime = 0;
162 162 int apic_hrtime_error = 0;
163 163 int apic_remote_hrterr = 0;
164 164 int apic_num_nmis = 0;
165 165 int apic_apic_error = 0;
166 166 int apic_num_apic_errors = 0;
167 167 int apic_num_cksum_errors = 0;
168 168
169 169 int apic_error = 0;
170 170
171 171 static int apic_cmos_ssb_set = 0;
172 172
173 173 /* use to make sure only one cpu handles the nmi */
174 174 lock_t apic_nmi_lock;
175 175 /* use to make sure only one cpu handles the error interrupt */
176 176 lock_t apic_error_lock;
177 177
178 178 static struct {
179 179 uchar_t cntl;
180 180 uchar_t data;
181 181 } aspen_bmc[] = {
182 182 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */
183 183 { CC_SMS_WR_NEXT, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */
184 184 { CC_SMS_WR_NEXT, 0x84 }, /* DataByte 1: SMS/OS no log */
185 185 { CC_SMS_WR_NEXT, 0x2 }, /* DataByte 2: Power Down */
186 186 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 3: no pre-timeout */
187 187 { CC_SMS_WR_NEXT, 0x0 }, /* DataByte 4: timer expir. */
188 188 { CC_SMS_WR_NEXT, 0xa }, /* DataByte 5: init countdown */
189 189 { CC_SMS_WR_END, 0x0 }, /* DataByte 6: init countdown */
190 190
191 191 { CC_SMS_WR_START, 0x18 }, /* NetFn/LUN */
192 192 { CC_SMS_WR_END, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */
193 193 };
194 194
195 195 static struct {
196 196 int port;
197 197 uchar_t data;
198 198 } sitka_bmc[] = {
199 199 { SMS_COMMAND_REGISTER, SMS_WRITE_START },
200 200 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */
201 201 { SMS_DATA_REGISTER, 0x24 }, /* Cmd SET_WATCHDOG_TIMER */
202 202 { SMS_DATA_REGISTER, 0x84 }, /* DataByte 1: SMS/OS no log */
203 203 { SMS_DATA_REGISTER, 0x2 }, /* DataByte 2: Power Down */
204 204 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 3: no pre-timeout */
205 205 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 4: timer expir. */
206 206 { SMS_DATA_REGISTER, 0xa }, /* DataByte 5: init countdown */
207 207 { SMS_COMMAND_REGISTER, SMS_WRITE_END },
208 208 { SMS_DATA_REGISTER, 0x0 }, /* DataByte 6: init countdown */
209 209
210 210 { SMS_COMMAND_REGISTER, SMS_WRITE_START },
211 211 { SMS_DATA_REGISTER, 0x18 }, /* NetFn/LUN */
212 212 { SMS_COMMAND_REGISTER, SMS_WRITE_END },
213 213 { SMS_DATA_REGISTER, 0x22 } /* Cmd RESET_WATCHDOG_TIMER */
214 214 };
215 215
216 216 /* Patchable global variables. */
217 217 int apic_kmdb_on_nmi = 0; /* 0 - no, 1 - yes enter kmdb */
218 218 uint32_t apic_divide_reg_init = 0; /* 0 - divide by 2 */
219 219
220 220 /* default apic ops without interrupt remapping */
221 221 static apic_intrmap_ops_t apic_nointrmap_ops = {
222 222 (int (*)(int))return_instr,
223 223 (void (*)(int))return_instr,
224 224 (void (*)(void **, dev_info_t *, uint16_t, int, uchar_t))return_instr,
225 225 (void (*)(void *, void *, uint16_t, int))return_instr,
226 226 (void (*)(void **))return_instr,
227 227 apic_record_ioapic_rdt,
228 228 apic_record_msi,
229 229 };
230 230
231 231 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops;
232 232 apic_cpus_info_t *apic_cpus = NULL;
233 233 cpuset_t apic_cpumask;
234 234 uint_t apic_picinit_called;
235 235
236 236 /* Flag to indicate that we need to shut down all processors */
237 237 static uint_t apic_shutdown_processors;
238 238
239 239 /*
240 240 * Probe the ioapic method for apix module. Called in apic_probe_common()
241 241 */
242 242 int
243 243 apic_ioapic_method_probe()
244 244 {
245 245 if (apix_enable == 0)
246 246 return (PSM_SUCCESS);
247 247
248 248 /*
249 249 * Set IOAPIC EOI handling method. The priority from low to high is:
250 250 * 1. IOxAPIC: with EOI register
251 251 * 2. IOMMU interrupt mapping
252 252 * 3. Mask-Before-EOI method for systems without boot
253 253 * interrupt routing, such as systems with only one IOAPIC;
254 254 * NVIDIA CK8-04/MCP55 systems; systems with bridge solution
255 255 * which disables the boot interrupt routing already.
256 256 * 4. Directed EOI
257 257 */
258 258 if (apic_io_ver[0] >= 0x20)
259 259 apix_mul_ioapic_method = APIC_MUL_IOAPIC_IOXAPIC;
260 260 if ((apic_io_max == 1) || (apic_nvidia_io_max == apic_io_max))
261 261 apix_mul_ioapic_method = APIC_MUL_IOAPIC_MASK;
262 262 if (apic_directed_EOI_supported())
263 263 apix_mul_ioapic_method = APIC_MUL_IOAPIC_DEOI;
264 264
265 265 /* fall back to pcplusmp */
266 266 if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_PCPLUSMP) {
267 267 /* make sure apix is after pcplusmp in /etc/mach */
268 268 apix_enable = 0; /* go ahead with pcplusmp install next */
269 269 return (PSM_FAILURE);
270 270 }
271 271
272 272 return (PSM_SUCCESS);
273 273 }
274 274
275 275 /*
276 276 * handler for APIC Error interrupt. Just print a warning and continue
277 277 */
278 278 int
279 279 apic_error_intr()
280 280 {
281 281 uint_t error0, error1, error;
282 282 uint_t i;
283 283
284 284 /*
285 285 * We need to write before read as per 7.4.17 of system prog manual.
286 286 * We do both and or the results to be safe
287 287 */
288 288 error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
289 289 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
290 290 error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
291 291 error = error0 | error1;
292 292
293 293 /*
294 294 * Clear the APIC error status (do this on all cpus that enter here)
295 295 * (two writes are required due to the semantics of accessing the
296 296 * error status register.)
297 297 */
298 298 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
299 299 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
300 300
301 301 /*
302 302 * Prevent more than 1 CPU from handling error interrupt causing
303 303 * double printing (interleave of characters from multiple
304 304 * CPU's when using prom_printf)
305 305 */
306 306 if (lock_try(&apic_error_lock) == 0)
307 307 return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
308 308 if (error) {
309 309 #if DEBUG
310 310 if (apic_debug)
311 311 debug_enter("pcplusmp: APIC Error interrupt received");
312 312 #endif /* DEBUG */
313 313 if (apic_panic_on_apic_error)
314 314 cmn_err(CE_PANIC,
315 315 "APIC Error interrupt on CPU %d. Status = %x",
316 316 psm_get_cpu_id(), error);
317 317 else {
318 318 if ((error & ~APIC_CS_ERRORS) == 0) {
319 319 /* cksum error only */
320 320 apic_error |= APIC_ERR_APIC_ERROR;
321 321 apic_apic_error |= error;
322 322 apic_num_apic_errors++;
323 323 apic_num_cksum_errors++;
324 324 } else {
325 325 /*
326 326 * prom_printf is the best shot we have of
327 327 * something which is problem free from
328 328 * high level/NMI type of interrupts
329 329 */
330 330 prom_printf("APIC Error interrupt on CPU %d. "
331 331 "Status 0 = %x, Status 1 = %x\n",
332 332 psm_get_cpu_id(), error0, error1);
333 333 apic_error |= APIC_ERR_APIC_ERROR;
334 334 apic_apic_error |= error;
335 335 apic_num_apic_errors++;
336 336 for (i = 0; i < apic_error_display_delay; i++) {
337 337 tenmicrosec();
338 338 }
339 339 /*
340 340 * provide more delay next time limited to
341 341 * roughly 1 clock tick time
342 342 */
343 343 if (apic_error_display_delay < 500)
344 344 apic_error_display_delay *= 2;
345 345 }
346 346 }
347 347 lock_clear(&apic_error_lock);
348 348 return (DDI_INTR_CLAIMED);
349 349 } else {
350 350 lock_clear(&apic_error_lock);
351 351 return (DDI_INTR_UNCLAIMED);
352 352 }
353 353 }
354 354
355 355 /*
356 356 * Turn off the mask bit in the performance counter Local Vector Table entry.
357 357 */
358 358 void
359 359 apic_cpcovf_mask_clear(void)
360 360 {
361 361 apic_reg_ops->apic_write(APIC_PCINT_VECT,
362 362 (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
363 363 }
364 364
365 365 /*ARGSUSED*/
366 366 static int
367 367 apic_cmci_enable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
368 368 {
369 369 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
370 370 return (0);
371 371 }
372 372
373 373 /*ARGSUSED*/
374 374 static int
375 375 apic_cmci_disable(xc_arg_t arg1, xc_arg_t arg2, xc_arg_t arg3)
376 376 {
377 377 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
378 378 return (0);
379 379 }
380 380
381 381 void
382 382 apic_cmci_setup(processorid_t cpuid, boolean_t enable)
383 383 {
384 384 cpuset_t cpu_set;
385 385
386 386 CPUSET_ONLY(cpu_set, cpuid);
387 387
388 388 if (enable) {
389 389 xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
390 390 (xc_func_t)apic_cmci_enable);
391 391 } else {
392 392 xc_call(NULL, NULL, NULL, CPUSET2BV(cpu_set),
393 393 (xc_func_t)apic_cmci_disable);
394 394 }
395 395 }
396 396
397 397 static void
398 398 apic_disable_local_apic(void)
399 399 {
400 400 apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
401 401 apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
402 402
403 403 /* local intr reg 0 */
404 404 apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
405 405
406 406 /* disable NMI */
407 407 apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
408 408
409 409 /* and error interrupt */
410 410 apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
411 411
412 412 /* and perf counter intr */
413 413 apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
414 414
415 415 apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
416 416 }
417 417
418 418 static void
419 419 apic_cpu_send_SIPI(processorid_t cpun, boolean_t start)
420 420 {
421 421 int loop_count;
422 422 uint32_t vector;
423 423 uint_t apicid;
424 424 ulong_t iflag;
425 425
426 426 apicid = apic_cpus[cpun].aci_local_id;
427 427
428 428 /*
429 429 * Interrupts on current CPU will be disabled during the
430 430 * steps in order to avoid unwanted side effects from
431 431 * executing interrupt handlers on a problematic BIOS.
432 432 */
433 433 iflag = intr_clear();
434 434
435 435 if (start) {
436 436 outb(CMOS_ADDR, SSB);
437 437 outb(CMOS_DATA, BIOS_SHUTDOWN);
438 438 }
439 439
440 440 /*
441 441 * According to X2APIC specification in section '2.3.5.1' of
442 442 * Interrupt Command Register Semantics, the semantics of
443 443 * programming the Interrupt Command Register to dispatch an interrupt
444 444 * is simplified. A single MSR write to the 64-bit ICR is required
445 445 * for dispatching an interrupt. Specifically, with the 64-bit MSR
446 446 * interface to ICR, system software is not required to check the
447 447 * status of the delivery status bit prior to writing to the ICR
448 448 * to send an IPI. With the removal of the Delivery Status bit,
449 449 * system software no longer has a reason to read the ICR. It remains
450 450 * readable only to aid in debugging.
451 451 */
452 452 #ifdef DEBUG
453 453 APIC_AV_PENDING_SET();
454 454 #else
455 455 if (apic_mode == LOCAL_APIC) {
456 456 APIC_AV_PENDING_SET();
457 457 }
458 458 #endif /* DEBUG */
459 459
460 460 /* for integrated - make sure there is one INIT IPI in buffer */
461 461 /* for external - it will wake up the cpu */
462 462 apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET);
463 463
464 464 /* If only 1 CPU is installed, PENDING bit will not go low */
465 465 for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) {
466 466 if (apic_mode == LOCAL_APIC &&
467 467 apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
468 468 apic_ret();
469 469 else
470 470 break;
471 471 }
472 472
473 473 apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET);
474 474 drv_usecwait(20000); /* 20 milli sec */
475 475
476 476 if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
477 477 /* integrated apic */
478 478
479 479 vector = (rm_platter_pa >> MMU_PAGESHIFT) &
480 480 (APIC_VECTOR_MASK | APIC_IPL_MASK);
481 481
482 482 /* to offset the INIT IPI queue up in the buffer */
483 483 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
484 484 drv_usecwait(200); /* 20 micro sec */
485 485
486 486 /*
487 487 * send the second SIPI (Startup IPI) as recommended by Intel
488 488 * software development manual.
489 489 */
490 490 apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
491 491 drv_usecwait(200); /* 20 micro sec */
492 492 }
493 493
494 494 intr_restore(iflag);
495 495 }
496 496
497 497 /*ARGSUSED1*/
498 498 int
499 499 apic_cpu_start(processorid_t cpun, caddr_t arg)
500 500 {
501 501 ASSERT(MUTEX_HELD(&cpu_lock));
502 502
503 503 if (!apic_cpu_in_range(cpun)) {
504 504 return (EINVAL);
505 505 }
506 506
507 507 /*
508 508 * Switch to apic_common_send_ipi for safety during starting other CPUs.
509 509 */
510 510 if (apic_mode == LOCAL_X2APIC) {
511 511 apic_switch_ipi_callback(B_TRUE);
512 512 }
513 513
514 514 apic_cmos_ssb_set = 1;
515 515 apic_cpu_send_SIPI(cpun, B_TRUE);
516 516
517 517 return (0);
518 518 }
519 519
520 520 /*
521 521 * Put CPU into halted state with interrupts disabled.
522 522 */
523 523 /*ARGSUSED1*/
524 524 int
525 525 apic_cpu_stop(processorid_t cpun, caddr_t arg)
526 526 {
527 527 int rc;
528 528 cpu_t *cp;
529 529 extern cpuset_t cpu_ready_set;
530 530 extern void cpu_idle_intercept_cpu(cpu_t *cp);
531 531
532 532 ASSERT(MUTEX_HELD(&cpu_lock));
533 533
534 534 if (!apic_cpu_in_range(cpun)) {
535 535 return (EINVAL);
536 536 }
537 537 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
538 538 return (ENOTSUP);
539 539 }
540 540
541 541 cp = cpu_get(cpun);
542 542 ASSERT(cp != NULL);
543 543 ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
544 544 ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
545 545 ASSERT((cp->cpu_flags & CPU_ENABLE) == 0);
546 546
547 547 /* Clear CPU_READY flag to disable cross calls. */
548 548 cp->cpu_flags &= ~CPU_READY;
549 549 CPUSET_ATOMIC_DEL(cpu_ready_set, cpun);
550 550 rc = xc_flush_cpu(cp);
551 551 if (rc != 0) {
552 552 CPUSET_ATOMIC_ADD(cpu_ready_set, cpun);
553 553 cp->cpu_flags |= CPU_READY;
554 554 return (rc);
555 555 }
556 556
557 557 /* Intercept target CPU at a safe point before powering it off. */
558 558 cpu_idle_intercept_cpu(cp);
559 559
560 560 apic_cpu_send_SIPI(cpun, B_FALSE);
561 561 cp->cpu_flags &= ~CPU_RUNNING;
562 562
563 563 return (0);
564 564 }
565 565
566 566 int
567 567 apic_cpu_ops(psm_cpu_request_t *reqp)
568 568 {
569 569 if (reqp == NULL) {
570 570 return (EINVAL);
571 571 }
572 572
573 573 switch (reqp->pcr_cmd) {
574 574 case PSM_CPU_ADD:
575 575 return (apic_cpu_add(reqp));
576 576
577 577 case PSM_CPU_REMOVE:
578 578 return (apic_cpu_remove(reqp));
579 579
580 580 case PSM_CPU_STOP:
581 581 return (apic_cpu_stop(reqp->req.cpu_stop.cpuid,
582 582 reqp->req.cpu_stop.ctx));
583 583
584 584 default:
585 585 return (ENOTSUP);
586 586 }
587 587 }
588 588
589 589 #ifdef DEBUG
590 590 int apic_break_on_cpu = 9;
591 591 int apic_stretch_interrupts = 0;
592 592 int apic_stretch_ISR = 1 << 3; /* IPL of 3 matches nothing now */
593 593 #endif /* DEBUG */
594 594
595 595 /*
596 596 * generates an interprocessor interrupt to another CPU. Any changes made to
597 597 * this routine must be accompanied by similar changes to
598 598 * apic_common_send_ipi().
599 599 */
600 600 void
601 601 apic_send_ipi(int cpun, int ipl)
602 602 {
603 603 int vector;
604 604 ulong_t flag;
605 605
606 606 vector = apic_resv_vector[ipl];
607 607
608 608 ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
609 609
610 610 flag = intr_clear();
611 611
612 612 APIC_AV_PENDING_SET();
613 613
614 614 apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
615 615 vector);
616 616
617 617 intr_restore(flag);
618 618 }
619 619
620 620 void
621 621 apic_send_pir_ipi(processorid_t cpun)
622 622 {
623 623 const int vector = apic_pir_vect;
624 624 ulong_t flag;
625 625
626 626 ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
627 627
628 628 flag = intr_clear();
629 629
630 630 /* Self-IPI for inducing PIR makes no sense. */
631 631 if ((cpun != psm_get_cpu_id())) {
632 632 APIC_AV_PENDING_SET();
633 633 apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
634 634 vector);
635 635 }
636 636
637 637 intr_restore(flag);
638 638 }
639 639
640 640 int
641 641 apic_get_pir_ipivect(void)
642 642 {
643 643 return (apic_pir_vect);
644 644 }
645 645
646 646 /*ARGSUSED*/
647 647 void
648 648 apic_set_idlecpu(processorid_t cpun)
649 649 {
650 650 }
651 651
652 652 /*ARGSUSED*/
653 653 void
654 654 apic_unset_idlecpu(processorid_t cpun)
655 655 {
656 656 }
657 657
658 658
659 659 void
660 660 apic_ret()
661 661 {
662 662 }
663 663
664 664 /*
665 665 * If apic_coarse_time == 1, then apic_gettime() is used instead of
666 666 * apic_gethrtime(). This is used for performance instead of accuracy.
667 667 */
668 668
669 669 hrtime_t
670 670 apic_gettime()
671 671 {
672 672 int old_hrtime_stamp;
673 673 hrtime_t temp;
674 674
675 675 /*
676 676 * In one-shot mode, we do not keep time, so if anyone
677 677 * calls psm_gettime() directly, we vector over to
678 678 * gethrtime().
679 679 * one-shot mode MUST NOT be enabled if this psm is the source of
680 680 * hrtime.
681 681 */
682 682
683 683 if (apic_oneshot)
684 684 return (gethrtime());
685 685
686 686
687 687 gettime_again:
688 688 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
689 689 apic_ret();
690 690
691 691 temp = apic_nsec_since_boot;
692 692
693 693 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */
694 694 goto gettime_again;
695 695 }
696 696 return (temp);
697 697 }
698 698
699 699 /*
700 700 * Here we return the number of nanoseconds since booting. Note every
701 701 * clock interrupt increments apic_nsec_since_boot by the appropriate
702 702 * amount.
703 703 */
704 704 hrtime_t
705 705 apic_gethrtime(void)
706 706 {
707 707 int curr_timeval, countval, elapsed_ticks;
708 708 int old_hrtime_stamp, status;
709 709 hrtime_t temp;
710 710 uint32_t cpun;
711 711 ulong_t oflags;
712 712
713 713 /*
714 714 * In one-shot mode, we do not keep time, so if anyone
715 715 * calls psm_gethrtime() directly, we vector over to
716 716 * gethrtime().
717 717 * one-shot mode MUST NOT be enabled if this psm is the source of
718 718 * hrtime.
719 719 */
720 720
721 721 if (apic_oneshot)
722 722 return (gethrtime());
723 723
724 724 oflags = intr_clear(); /* prevent migration */
725 725
726 726 cpun = apic_reg_ops->apic_read(APIC_LID_REG);
727 727 if (apic_mode == LOCAL_APIC)
728 728 cpun >>= APIC_ID_BIT_OFFSET;
729 729
730 730 lock_set(&apic_gethrtime_lock);
731 731
732 732 gethrtime_again:
733 733 while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
734 734 apic_ret();
735 735
736 736 /*
737 737 * Check to see which CPU we are on. Note the time is kept on
738 738 * the local APIC of CPU 0. If on CPU 0, simply read the current
739 739 * counter. If on another CPU, issue a remote read command to CPU 0.
740 740 */
741 741 if (cpun == apic_cpus[0].aci_local_id) {
742 742 countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
743 743 } else {
744 744 #ifdef DEBUG
745 745 APIC_AV_PENDING_SET();
746 746 #else
747 747 if (apic_mode == LOCAL_APIC)
748 748 APIC_AV_PENDING_SET();
749 749 #endif /* DEBUG */
750 750
751 751 apic_reg_ops->apic_write_int_cmd(
752 752 apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
753 753
754 754 while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
755 755 & AV_READ_PENDING) {
756 756 apic_ret();
757 757 }
758 758
759 759 if (status & AV_REMOTE_STATUS) /* 1 = valid */
760 760 countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
761 761 else { /* 0 = invalid */
762 762 apic_remote_hrterr++;
763 763 /*
764 764 * return last hrtime right now, will need more
765 765 * testing if change to retry
766 766 */
767 767 temp = apic_last_hrtime;
768 768
769 769 lock_clear(&apic_gethrtime_lock);
770 770
771 771 intr_restore(oflags);
772 772
773 773 return (temp);
774 774 }
775 775 }
776 776 if (countval > last_count_read)
777 777 countval = 0;
778 778 else
779 779 last_count_read = countval;
780 780
781 781 elapsed_ticks = apic_hertz_count - countval;
782 782
783 783 curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
784 784 temp = apic_nsec_since_boot + curr_timeval;
785 785
786 786 if (apic_hrtime_stamp != old_hrtime_stamp) { /* got an interrupt */
787 787 /* we might have clobbered last_count_read. Restore it */
788 788 last_count_read = apic_hertz_count;
789 789 goto gethrtime_again;
790 790 }
791 791
792 792 if (temp < apic_last_hrtime) {
793 793 /* return last hrtime if error occurs */
794 794 apic_hrtime_error++;
795 795 temp = apic_last_hrtime;
796 796 }
797 797 else
798 798 apic_last_hrtime = temp;
799 799
800 800 lock_clear(&apic_gethrtime_lock);
↓ open down ↓ |
764 lines elided |
↑ open up ↑ |
801 801 intr_restore(oflags);
802 802
803 803 return (temp);
804 804 }
805 805
806 806 /* apic NMI handler */
807 807 /*ARGSUSED*/
808 808 void
809 809 apic_nmi_intr(caddr_t arg, struct regs *rp)
810 810 {
811 + nmi_action_t action = nmi_action;
812 +
811 813 if (apic_shutdown_processors) {
812 814 apic_disable_local_apic();
813 815 return;
814 816 }
815 817
816 818 apic_error |= APIC_ERR_NMI;
817 819
818 820 if (!lock_try(&apic_nmi_lock))
819 821 return;
820 822 apic_num_nmis++;
821 823
822 - if (apic_kmdb_on_nmi && psm_debugger()) {
823 - debug_enter("NMI received: entering kmdb\n");
824 - } else if (apic_panic_on_nmi) {
825 - /* Keep panic from entering kmdb. */
826 - nopanicdebug = 1;
827 - panic("NMI received\n");
828 - } else {
824 + /*
825 + * "nmi_action" always over-rides the older way of doing this, unless we
826 + * can't actually drop into kmdb when requested.
827 + */
828 + if (action == NMI_ACTION_KMDB && !psm_debugger())
829 + action = NMI_ACTION_UNSET;
830 +
831 + if (action == NMI_ACTION_UNSET) {
832 + if (apic_kmdb_on_nmi && psm_debugger())
833 + action = NMI_ACTION_KMDB;
834 + else if (apic_panic_on_nmi)
835 + action = NMI_ACTION_PANIC;
836 + else
837 + action = NMI_ACTION_IGNORE;
838 + }
839 +
840 + switch (action) {
841 + case NMI_ACTION_IGNORE:
829 842 /*
830 843 * prom_printf is the best shot we have of something which is
831 844 * problem free from high level/NMI type of interrupts
832 845 */
833 846 prom_printf("NMI received\n");
847 + break;
848 +
849 + case NMI_ACTION_PANIC:
850 + /* Keep panic from entering kmdb. */
851 + nopanicdebug = 1;
852 + panic("NMI received\n");
853 + break;
854 +
855 + case NMI_ACTION_KMDB:
856 + default:
857 + debug_enter("NMI received: entering kmdb\n");
858 + break;
834 859 }
835 860
836 861 lock_clear(&apic_nmi_lock);
837 862 }
838 863
839 864 processorid_t
840 865 apic_get_next_processorid(processorid_t cpu_id)
841 866 {
842 867
843 868 int i;
844 869
845 870 if (cpu_id == -1)
846 871 return ((processorid_t)0);
847 872
848 873 for (i = cpu_id + 1; i < NCPU; i++) {
849 874 if (apic_cpu_in_range(i))
850 875 return (i);
851 876 }
852 877
853 878 return ((processorid_t)-1);
854 879 }
855 880
856 881 int
857 882 apic_cpu_add(psm_cpu_request_t *reqp)
858 883 {
859 884 int i, rv = 0;
860 885 ulong_t iflag;
861 886 boolean_t first = B_TRUE;
862 887 uchar_t localver = 0;
863 888 uint32_t localid, procid;
864 889 processorid_t cpuid = (processorid_t)-1;
865 890 mach_cpu_add_arg_t *ap;
866 891
867 892 ASSERT(reqp != NULL);
868 893 reqp->req.cpu_add.cpuid = (processorid_t)-1;
869 894
870 895 /* Check whether CPU hotplug is supported. */
871 896 if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
872 897 return (ENOTSUP);
873 898 }
874 899
875 900 ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp;
876 901 switch (ap->type) {
877 902 case MACH_CPU_ARG_LOCAL_APIC:
878 903 localid = ap->arg.apic.apic_id;
879 904 procid = ap->arg.apic.proc_id;
880 905 if (localid >= 255 || procid > 255) {
881 906 cmn_err(CE_WARN,
882 907 "!apic: apicid(%u) or procid(%u) is invalid.",
883 908 localid, procid);
884 909 return (EINVAL);
885 910 }
886 911 break;
887 912
888 913 case MACH_CPU_ARG_LOCAL_X2APIC:
889 914 localid = ap->arg.apic.apic_id;
890 915 procid = ap->arg.apic.proc_id;
891 916 if (localid >= UINT32_MAX) {
892 917 cmn_err(CE_WARN,
893 918 "!apic: x2apicid(%u) is invalid.", localid);
894 919 return (EINVAL);
895 920 } else if (localid >= 255 && apic_mode == LOCAL_APIC) {
896 921 cmn_err(CE_WARN, "!apic: system is in APIC mode, "
897 922 "can't support x2APIC processor.");
898 923 return (ENOTSUP);
899 924 }
900 925 break;
901 926
902 927 default:
903 928 cmn_err(CE_WARN,
904 929 "!apic: unknown argument type %d to apic_cpu_add().",
905 930 ap->type);
906 931 return (EINVAL);
907 932 }
908 933
909 934 /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
910 935 iflag = intr_clear();
911 936 lock_set(&apic_ioapic_lock);
912 937
913 938 /* Check whether local APIC id already exists. */
914 939 for (i = 0; i < apic_nproc; i++) {
915 940 if (!CPU_IN_SET(apic_cpumask, i))
916 941 continue;
917 942 if (apic_cpus[i].aci_local_id == localid) {
918 943 lock_clear(&apic_ioapic_lock);
919 944 intr_restore(iflag);
920 945 cmn_err(CE_WARN,
921 946 "!apic: local apic id %u already exists.",
922 947 localid);
923 948 return (EEXIST);
924 949 } else if (apic_cpus[i].aci_processor_id == procid) {
925 950 lock_clear(&apic_ioapic_lock);
926 951 intr_restore(iflag);
927 952 cmn_err(CE_WARN,
928 953 "!apic: processor id %u already exists.",
929 954 (int)procid);
930 955 return (EEXIST);
931 956 }
932 957
933 958 /*
934 959 * There's no local APIC version number available in MADT table,
935 960 * so assume that all CPUs are homogeneous and use local APIC
936 961 * version number of the first existing CPU.
937 962 */
938 963 if (first) {
939 964 first = B_FALSE;
940 965 localver = apic_cpus[i].aci_local_ver;
941 966 }
942 967 }
943 968 ASSERT(first == B_FALSE);
944 969
945 970 /*
946 971 * Try to assign the same cpuid if APIC id exists in the dirty cache.
947 972 */
948 973 for (i = 0; i < apic_max_nproc; i++) {
949 974 if (CPU_IN_SET(apic_cpumask, i)) {
950 975 ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0);
951 976 continue;
952 977 }
953 978 ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE);
954 979 if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) &&
955 980 apic_cpus[i].aci_local_id == localid &&
956 981 apic_cpus[i].aci_processor_id == procid) {
957 982 cpuid = i;
958 983 break;
959 984 }
960 985 }
961 986
962 987 /* Avoid the dirty cache and allocate fresh slot if possible. */
963 988 if (cpuid == (processorid_t)-1) {
964 989 for (i = 0; i < apic_max_nproc; i++) {
965 990 if ((apic_cpus[i].aci_status & APIC_CPU_FREE) &&
966 991 (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) {
967 992 cpuid = i;
968 993 break;
969 994 }
970 995 }
971 996 }
972 997
973 998 /* Try to find any free slot as last resort. */
974 999 if (cpuid == (processorid_t)-1) {
975 1000 for (i = 0; i < apic_max_nproc; i++) {
976 1001 if (apic_cpus[i].aci_status & APIC_CPU_FREE) {
977 1002 cpuid = i;
978 1003 break;
979 1004 }
980 1005 }
981 1006 }
982 1007
983 1008 if (cpuid == (processorid_t)-1) {
984 1009 lock_clear(&apic_ioapic_lock);
985 1010 intr_restore(iflag);
986 1011 cmn_err(CE_NOTE,
987 1012 "!apic: failed to allocate cpu id for processor %u.",
988 1013 procid);
989 1014 rv = EAGAIN;
990 1015 } else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) {
991 1016 lock_clear(&apic_ioapic_lock);
992 1017 intr_restore(iflag);
993 1018 cmn_err(CE_NOTE,
994 1019 "!apic: failed to build mapping for processor %u.",
995 1020 procid);
996 1021 rv = EBUSY;
997 1022 } else {
998 1023 ASSERT(cpuid >= 0 && cpuid < NCPU);
999 1024 ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus);
1000 1025 bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0]));
1001 1026 apic_cpus[cpuid].aci_processor_id = procid;
1002 1027 apic_cpus[cpuid].aci_local_id = localid;
1003 1028 apic_cpus[cpuid].aci_local_ver = localver;
1004 1029 CPUSET_ATOMIC_ADD(apic_cpumask, cpuid);
1005 1030 if (cpuid >= apic_nproc) {
1006 1031 apic_nproc = cpuid + 1;
1007 1032 }
1008 1033 lock_clear(&apic_ioapic_lock);
1009 1034 intr_restore(iflag);
1010 1035 reqp->req.cpu_add.cpuid = cpuid;
1011 1036 }
1012 1037
1013 1038 return (rv);
1014 1039 }
1015 1040
1016 1041 int
1017 1042 apic_cpu_remove(psm_cpu_request_t *reqp)
1018 1043 {
1019 1044 int i;
1020 1045 ulong_t iflag;
1021 1046 processorid_t cpuid;
1022 1047
1023 1048 /* Check whether CPU hotplug is supported. */
1024 1049 if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
1025 1050 return (ENOTSUP);
1026 1051 }
1027 1052
1028 1053 cpuid = reqp->req.cpu_remove.cpuid;
1029 1054
1030 1055 /* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
1031 1056 iflag = intr_clear();
1032 1057 lock_set(&apic_ioapic_lock);
1033 1058
1034 1059 if (!apic_cpu_in_range(cpuid)) {
1035 1060 lock_clear(&apic_ioapic_lock);
1036 1061 intr_restore(iflag);
1037 1062 cmn_err(CE_WARN,
1038 1063 "!apic: cpuid %d doesn't exist in apic_cpus array.",
1039 1064 cpuid);
1040 1065 return (ENODEV);
1041 1066 }
1042 1067 ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0);
1043 1068
1044 1069 if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) {
1045 1070 lock_clear(&apic_ioapic_lock);
1046 1071 intr_restore(iflag);
1047 1072 return (ENOENT);
1048 1073 }
1049 1074
1050 1075 if (cpuid == apic_nproc - 1) {
1051 1076 /*
1052 1077 * We are removing the highest numbered cpuid so we need to
1053 1078 * find the next highest cpuid as the new value for apic_nproc.
1054 1079 */
1055 1080 for (i = apic_nproc; i > 0; i--) {
1056 1081 if (CPU_IN_SET(apic_cpumask, i - 1)) {
1057 1082 apic_nproc = i;
1058 1083 break;
1059 1084 }
1060 1085 }
1061 1086 /* at least one CPU left */
1062 1087 ASSERT(i > 0);
1063 1088 }
1064 1089 CPUSET_ATOMIC_DEL(apic_cpumask, cpuid);
1065 1090 /* mark slot as free and keep it in the dirty cache */
1066 1091 apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
1067 1092
1068 1093 lock_clear(&apic_ioapic_lock);
1069 1094 intr_restore(iflag);
1070 1095
1071 1096 return (0);
1072 1097 }
1073 1098
1074 1099 /*
1075 1100 * Return the number of ticks the APIC decrements in SF nanoseconds.
1076 1101 * The fixed-frequency PIT (aka 8254) is used for the measurement.
1077 1102 */
1078 1103 static uint64_t
1079 1104 apic_calibrate_impl()
1080 1105 {
1081 1106 uint8_t pit_tick_lo;
1082 1107 uint16_t pit_tick, target_pit_tick, pit_ticks_adj;
1083 1108 uint32_t pit_ticks;
1084 1109 uint32_t start_apic_tick, end_apic_tick, apic_ticks;
1085 1110 ulong_t iflag;
1086 1111
1087 1112 apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1088 1113 apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
1089 1114
1090 1115 iflag = intr_clear();
1091 1116
1092 1117 do {
1093 1118 pit_tick_lo = inb(PITCTR0_PORT);
1094 1119 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1095 1120 } while (pit_tick < APIC_TIME_MIN ||
1096 1121 pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
1097 1122
1098 1123 /*
1099 1124 * Wait for the PIT to decrement by 5 ticks to ensure
1100 1125 * we didn't start in the middle of a tick.
1101 1126 * Compare with 0x10 for the wrap around case.
1102 1127 */
1103 1128 target_pit_tick = pit_tick - 5;
1104 1129 do {
1105 1130 pit_tick_lo = inb(PITCTR0_PORT);
1106 1131 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1107 1132 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1108 1133
1109 1134 start_apic_tick = apic_reg_ops->apic_read(APIC_CURR_COUNT);
1110 1135
1111 1136 /*
1112 1137 * Wait for the PIT to decrement by APIC_TIME_COUNT ticks
1113 1138 */
1114 1139 target_pit_tick = pit_tick - APIC_TIME_COUNT;
1115 1140 do {
1116 1141 pit_tick_lo = inb(PITCTR0_PORT);
1117 1142 pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1118 1143 } while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1119 1144
1120 1145 end_apic_tick = apic_reg_ops->apic_read(APIC_CURR_COUNT);
1121 1146
1122 1147 intr_restore(iflag);
1123 1148
1124 1149 apic_ticks = start_apic_tick - end_apic_tick;
1125 1150
1126 1151 /* The PIT might have decremented by more ticks than planned */
1127 1152 pit_ticks_adj = target_pit_tick - pit_tick;
1128 1153 /* total number of PIT ticks corresponding to apic_ticks */
1129 1154 pit_ticks = APIC_TIME_COUNT + pit_ticks_adj;
1130 1155
1131 1156 /*
1132 1157 * Determine the number of nanoseconds per APIC clock tick
1133 1158 * and then determine how many APIC ticks to interrupt at the
1134 1159 * desired frequency
1135 1160 * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s
1136 1161 * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s
1137 1162 * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9)
1138 1163 * apic_ticks_per_SFns =
1139 1164 * (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9)
1140 1165 */
1141 1166 return ((SF * apic_ticks * PIT_HZ) / ((uint64_t)pit_ticks * NANOSEC));
1142 1167 }
1143 1168
1144 1169 /*
1145 1170 * It was found empirically that 5 measurements seem sufficient to give a good
1146 1171 * accuracy. Most spurious measurements are higher than the target value thus
1147 1172 * we eliminate up to 2/5 spurious measurements.
1148 1173 */
1149 1174 #define APIC_CALIBRATE_MEASUREMENTS 5
1150 1175
1151 1176 #define APIC_CALIBRATE_PERCENT_OFF_WARNING 10
1152 1177
1153 1178 /*
1154 1179 * Return the number of ticks the APIC decrements in SF nanoseconds.
1155 1180 * Several measurements are taken to filter out outliers.
1156 1181 */
1157 1182 uint64_t
1158 1183 apic_calibrate()
1159 1184 {
1160 1185 uint64_t measurements[APIC_CALIBRATE_MEASUREMENTS];
1161 1186 int median_idx;
1162 1187 uint64_t median;
1163 1188
1164 1189 /*
1165 1190 * When running under a virtual machine, the emulated PIT and APIC
1166 1191 * counters do not always return the right values and can roll over.
1167 1192 * Those spurious measurements are relatively rare but could
1168 1193 * significantly affect the calibration.
1169 1194 * Therefore we take several measurements and then keep the median.
1170 1195 * The median is preferred to the average here as we only want to
1171 1196 * discard outliers.
1172 1197 */
1173 1198 for (int i = 0; i < APIC_CALIBRATE_MEASUREMENTS; i++)
1174 1199 measurements[i] = apic_calibrate_impl();
1175 1200
1176 1201 /*
1177 1202 * sort results and retrieve median.
1178 1203 */
1179 1204 for (int i = 0; i < APIC_CALIBRATE_MEASUREMENTS; i++) {
1180 1205 for (int j = i + 1; j < APIC_CALIBRATE_MEASUREMENTS; j++) {
1181 1206 if (measurements[j] < measurements[i]) {
1182 1207 uint64_t tmp = measurements[i];
1183 1208 measurements[i] = measurements[j];
1184 1209 measurements[j] = tmp;
1185 1210 }
1186 1211 }
1187 1212 }
1188 1213 median_idx = APIC_CALIBRATE_MEASUREMENTS / 2;
1189 1214 median = measurements[median_idx];
1190 1215
1191 1216 #if (APIC_CALIBRATE_MEASUREMENTS >= 3)
1192 1217 /*
1193 1218 * Check that measurements are consistent. Post a warning
1194 1219 * if the three middle values are not close to each other.
1195 1220 */
1196 1221 uint64_t delta_warn = median *
1197 1222 APIC_CALIBRATE_PERCENT_OFF_WARNING / 100;
1198 1223 if ((median - measurements[median_idx - 1]) > delta_warn ||
1199 1224 (measurements[median_idx + 1] - median) > delta_warn) {
1200 1225 cmn_err(CE_WARN, "apic_calibrate measurements lack "
1201 1226 "precision: %llu, %llu, %llu.",
1202 1227 (u_longlong_t)measurements[median_idx - 1],
1203 1228 (u_longlong_t)median,
1204 1229 (u_longlong_t)measurements[median_idx + 1]);
1205 1230 }
1206 1231 #endif
1207 1232
1208 1233 return (median);
1209 1234 }
1210 1235
1211 1236 /*
1212 1237 * Initialise the APIC timer on the local APIC of CPU 0 to the desired
1213 1238 * frequency. Note at this stage in the boot sequence, the boot processor
1214 1239 * is the only active processor.
1215 1240 * hertz value of 0 indicates a one-shot mode request. In this case
1216 1241 * the function returns the resolution (in nanoseconds) for the hardware
1217 1242 * timer interrupt. If one-shot mode capability is not available,
1218 1243 * the return value will be 0. apic_enable_oneshot is a global switch
1219 1244 * for disabling the functionality.
1220 1245 * A non-zero positive value for hertz indicates a periodic mode request.
1221 1246 * In this case the hardware will be programmed to generate clock interrupts
1222 1247 * at hertz frequency and returns the resolution of interrupts in
1223 1248 * nanosecond.
1224 1249 */
1225 1250
1226 1251 int
1227 1252 apic_clkinit(int hertz)
1228 1253 {
1229 1254 int ret;
1230 1255
1231 1256 apic_int_busy_mark = (apic_int_busy_mark *
1232 1257 apic_sample_factor_redistribution) / 100;
1233 1258 apic_int_free_mark = (apic_int_free_mark *
1234 1259 apic_sample_factor_redistribution) / 100;
1235 1260 apic_diff_for_redistribution = (apic_diff_for_redistribution *
1236 1261 apic_sample_factor_redistribution) / 100;
1237 1262
1238 1263 ret = apic_timer_init(hertz);
1239 1264 return (ret);
1240 1265
1241 1266 }
1242 1267
1243 1268 /*
1244 1269 * apic_preshutdown:
1245 1270 * Called early in shutdown whilst we can still access filesystems to do
1246 1271 * things like loading modules which will be required to complete shutdown
1247 1272 * after filesystems are all unmounted.
1248 1273 */
1249 1274 void
1250 1275 apic_preshutdown(int cmd, int fcn)
1251 1276 {
1252 1277 APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
1253 1278 cmd, fcn, apic_poweroff_method, apic_enable_acpi));
1254 1279 }
1255 1280
1256 1281 void
1257 1282 apic_shutdown(int cmd, int fcn)
1258 1283 {
1259 1284 int restarts, attempts;
1260 1285 int i;
1261 1286 uchar_t byte;
1262 1287 ulong_t iflag;
1263 1288
1264 1289 hpet_acpi_fini();
1265 1290
1266 1291 /* Send NMI to all CPUs except self to do per processor shutdown */
1267 1292 iflag = intr_clear();
1268 1293 #ifdef DEBUG
1269 1294 APIC_AV_PENDING_SET();
1270 1295 #else
1271 1296 if (apic_mode == LOCAL_APIC)
1272 1297 APIC_AV_PENDING_SET();
1273 1298 #endif /* DEBUG */
1274 1299 apic_shutdown_processors = 1;
1275 1300 apic_reg_ops->apic_write(APIC_INT_CMD1,
1276 1301 AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
1277 1302
1278 1303 /* restore cmos shutdown byte before reboot */
1279 1304 if (apic_cmos_ssb_set) {
1280 1305 outb(CMOS_ADDR, SSB);
1281 1306 outb(CMOS_DATA, 0);
1282 1307 }
1283 1308
1284 1309 ioapic_disable_redirection();
1285 1310
1286 1311 /* disable apic mode if imcr present */
1287 1312 if (apic_imcrp) {
1288 1313 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1289 1314 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
1290 1315 }
1291 1316
1292 1317 apic_disable_local_apic();
1293 1318
1294 1319 intr_restore(iflag);
1295 1320
1296 1321 /* remainder of function is for shutdown cases only */
1297 1322 if (cmd != A_SHUTDOWN)
1298 1323 return;
1299 1324
1300 1325 /*
1301 1326 * Switch system back into Legacy-Mode if using ACPI and
1302 1327 * not powering-off. Some BIOSes need to remain in ACPI-mode
1303 1328 * for power-off to succeed (Dell Dimension 4600)
1304 1329 * Do not disable ACPI while doing fastreboot
1305 1330 */
1306 1331 if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
1307 1332 (void) AcpiDisable();
1308 1333
1309 1334 if (fcn == AD_FASTREBOOT) {
1310 1335 apic_reg_ops->apic_write(APIC_INT_CMD1,
1311 1336 AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
1312 1337 }
1313 1338
1314 1339 /* remainder of function is for shutdown+poweroff case only */
1315 1340 if (fcn != AD_POWEROFF)
1316 1341 return;
1317 1342
1318 1343 switch (apic_poweroff_method) {
1319 1344 case APIC_POWEROFF_VIA_RTC:
1320 1345
1321 1346 /* select the extended NVRAM bank in the RTC */
1322 1347 outb(CMOS_ADDR, RTC_REGA);
1323 1348 byte = inb(CMOS_DATA);
1324 1349 outb(CMOS_DATA, (byte | EXT_BANK));
1325 1350
1326 1351 outb(CMOS_ADDR, PFR_REG);
1327 1352
1328 1353 /* for Predator must toggle the PAB bit */
1329 1354 byte = inb(CMOS_DATA);
1330 1355
1331 1356 /*
1332 1357 * clear power active bar, wakeup alarm and
1333 1358 * kickstart
1334 1359 */
1335 1360 byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
1336 1361 outb(CMOS_DATA, byte);
1337 1362
1338 1363 /* delay before next write */
1339 1364 drv_usecwait(1000);
1340 1365
1341 1366 /* for S40 the following would suffice */
1342 1367 byte = inb(CMOS_DATA);
1343 1368
1344 1369 /* power active bar control bit */
1345 1370 byte |= PAB_CBIT;
1346 1371 outb(CMOS_DATA, byte);
1347 1372
1348 1373 break;
1349 1374
1350 1375 case APIC_POWEROFF_VIA_ASPEN_BMC:
1351 1376 restarts = 0;
1352 1377 restart_aspen_bmc:
1353 1378 if (++restarts == 3)
1354 1379 break;
1355 1380 attempts = 0;
1356 1381 do {
1357 1382 byte = inb(MISMIC_FLAG_REGISTER);
1358 1383 byte &= MISMIC_BUSY_MASK;
1359 1384 if (byte != 0) {
1360 1385 drv_usecwait(1000);
1361 1386 if (attempts >= 3)
1362 1387 goto restart_aspen_bmc;
1363 1388 ++attempts;
1364 1389 }
1365 1390 } while (byte != 0);
1366 1391 outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
1367 1392 byte = inb(MISMIC_FLAG_REGISTER);
1368 1393 byte |= 0x1;
1369 1394 outb(MISMIC_FLAG_REGISTER, byte);
1370 1395 i = 0;
1371 1396 for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
1372 1397 i++) {
1373 1398 attempts = 0;
1374 1399 do {
1375 1400 byte = inb(MISMIC_FLAG_REGISTER);
1376 1401 byte &= MISMIC_BUSY_MASK;
1377 1402 if (byte != 0) {
1378 1403 drv_usecwait(1000);
1379 1404 if (attempts >= 3)
1380 1405 goto restart_aspen_bmc;
1381 1406 ++attempts;
1382 1407 }
1383 1408 } while (byte != 0);
1384 1409 outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
1385 1410 outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
1386 1411 byte = inb(MISMIC_FLAG_REGISTER);
1387 1412 byte |= 0x1;
1388 1413 outb(MISMIC_FLAG_REGISTER, byte);
1389 1414 }
1390 1415 break;
1391 1416
1392 1417 case APIC_POWEROFF_VIA_SITKA_BMC:
1393 1418 restarts = 0;
1394 1419 restart_sitka_bmc:
1395 1420 if (++restarts == 3)
1396 1421 break;
1397 1422 attempts = 0;
1398 1423 do {
1399 1424 byte = inb(SMS_STATUS_REGISTER);
1400 1425 byte &= SMS_STATE_MASK;
1401 1426 if ((byte == SMS_READ_STATE) ||
1402 1427 (byte == SMS_WRITE_STATE)) {
1403 1428 drv_usecwait(1000);
1404 1429 if (attempts >= 3)
1405 1430 goto restart_sitka_bmc;
1406 1431 ++attempts;
1407 1432 }
1408 1433 } while ((byte == SMS_READ_STATE) ||
1409 1434 (byte == SMS_WRITE_STATE));
1410 1435 outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
1411 1436 i = 0;
1412 1437 for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
1413 1438 i++) {
1414 1439 attempts = 0;
1415 1440 do {
1416 1441 byte = inb(SMS_STATUS_REGISTER);
1417 1442 byte &= SMS_IBF_MASK;
1418 1443 if (byte != 0) {
1419 1444 drv_usecwait(1000);
1420 1445 if (attempts >= 3)
1421 1446 goto restart_sitka_bmc;
1422 1447 ++attempts;
1423 1448 }
1424 1449 } while (byte != 0);
1425 1450 outb(sitka_bmc[i].port, sitka_bmc[i].data);
1426 1451 }
1427 1452 break;
1428 1453
1429 1454 case APIC_POWEROFF_NONE:
1430 1455
1431 1456 /* If no APIC direct method, we will try using ACPI */
1432 1457 if (apic_enable_acpi) {
1433 1458 if (acpi_poweroff() == 1)
1434 1459 return;
1435 1460 } else
1436 1461 return;
1437 1462
1438 1463 break;
1439 1464 }
1440 1465 /*
1441 1466 * Wait a limited time here for power to go off.
1442 1467 * If the power does not go off, then there was a
1443 1468 * problem and we should continue to the halt which
1444 1469 * prints a message for the user to press a key to
1445 1470 * reboot.
1446 1471 */
1447 1472 drv_usecwait(7000000); /* wait seven seconds */
1448 1473
1449 1474 }
1450 1475
1451 1476 cyclic_id_t apic_cyclic_id;
1452 1477
1453 1478 /*
1454 1479 * The following functions are in the platform specific file so that they
1455 1480 * can be different functions depending on whether we are running on
1456 1481 * bare metal or a hypervisor.
1457 1482 */
1458 1483
1459 1484 /*
1460 1485 * map an apic for memory-mapped access
1461 1486 */
1462 1487 uint32_t *
1463 1488 mapin_apic(uint32_t addr, size_t len, int flags)
1464 1489 {
1465 1490 return ((void *)psm_map_phys(addr, len, flags));
1466 1491 }
1467 1492
1468 1493 uint32_t *
1469 1494 mapin_ioapic(uint32_t addr, size_t len, int flags)
1470 1495 {
1471 1496 return (mapin_apic(addr, len, flags));
1472 1497 }
1473 1498
1474 1499 /*
1475 1500 * unmap an apic
1476 1501 */
1477 1502 void
1478 1503 mapout_apic(caddr_t addr, size_t len)
1479 1504 {
1480 1505 psm_unmap_phys(addr, len);
1481 1506 }
1482 1507
1483 1508 void
1484 1509 mapout_ioapic(caddr_t addr, size_t len)
1485 1510 {
1486 1511 mapout_apic(addr, len);
1487 1512 }
1488 1513
1489 1514 uint32_t
1490 1515 ioapic_read(int ioapic_ix, uint32_t reg)
1491 1516 {
1492 1517 volatile uint32_t *ioapic;
1493 1518
1494 1519 ioapic = apicioadr[ioapic_ix];
1495 1520 ioapic[APIC_IO_REG] = reg;
1496 1521 return (ioapic[APIC_IO_DATA]);
1497 1522 }
1498 1523
1499 1524 void
1500 1525 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
1501 1526 {
1502 1527 volatile uint32_t *ioapic;
1503 1528
1504 1529 ioapic = apicioadr[ioapic_ix];
1505 1530 ioapic[APIC_IO_REG] = reg;
1506 1531 ioapic[APIC_IO_DATA] = value;
1507 1532 }
1508 1533
1509 1534 void
1510 1535 ioapic_write_eoi(int ioapic_ix, uint32_t value)
1511 1536 {
1512 1537 volatile uint32_t *ioapic;
1513 1538
1514 1539 ioapic = apicioadr[ioapic_ix];
1515 1540 ioapic[APIC_IO_EOI] = value;
1516 1541 }
1517 1542
1518 1543 /*
1519 1544 * Round-robin algorithm to find the next CPU with interrupts enabled.
1520 1545 * It can't share the same static variable apic_next_bind_cpu with
1521 1546 * apic_get_next_bind_cpu(), since that will cause all interrupts to be
1522 1547 * bound to CPU1 at boot time. During boot, only CPU0 is online with
1523 1548 * interrupts enabled when apic_get_next_bind_cpu() and apic_find_cpu()
1524 1549 * are called. However, the pcplusmp driver assumes that there will be
1525 1550 * boot_ncpus CPUs configured eventually so it tries to distribute all
1526 1551 * interrupts among CPU0 - CPU[boot_ncpus - 1]. Thus to prevent all
1527 1552 * interrupts being targetted at CPU1, we need to use a dedicated static
1528 1553 * variable for find_next_cpu() instead of sharing apic_next_bind_cpu.
1529 1554 */
1530 1555
1531 1556 processorid_t
1532 1557 apic_find_cpu(int flag)
1533 1558 {
1534 1559 int i;
1535 1560 static processorid_t acid = 0;
1536 1561
1537 1562 /* Find the first CPU with the passed-in flag set */
1538 1563 for (i = 0; i < apic_nproc; i++) {
1539 1564 if (++acid >= apic_nproc) {
1540 1565 acid = 0;
1541 1566 }
1542 1567 if (apic_cpu_in_range(acid) &&
1543 1568 (apic_cpus[acid].aci_status & flag)) {
1544 1569 break;
1545 1570 }
1546 1571 }
1547 1572
1548 1573 ASSERT((apic_cpus[acid].aci_status & flag) != 0);
1549 1574 return (acid);
1550 1575 }
1551 1576
1552 1577 void
1553 1578 apic_intrmap_init(int apic_mode)
1554 1579 {
1555 1580 int suppress_brdcst_eoi = 0;
1556 1581
1557 1582 /*
1558 1583 * Intel Software Developer's Manual 3A, 10.12.7:
1559 1584 *
1560 1585 * Routing of device interrupts to local APIC units operating in
1561 1586 * x2APIC mode requires use of the interrupt-remapping architecture
1562 1587 * specified in the Intel Virtualization Technology for Directed
1563 1588 * I/O, Revision 1.3. Because of this, BIOS must enumerate support
1564 1589 * for and software must enable this interrupt remapping with
1565 1590 * Extended Interrupt Mode Enabled before it enabling x2APIC mode in
1566 1591 * the local APIC units.
1567 1592 *
1568 1593 *
1569 1594 * In other words, to use the APIC in x2APIC mode, we need interrupt
1570 1595 * remapping. Since we don't start up the IOMMU by default, we
1571 1596 * won't be able to do any interrupt remapping and therefore have to
1572 1597 * use the APIC in traditional 'local APIC' mode with memory mapped
1573 1598 * I/O.
1574 1599 */
1575 1600
1576 1601 if (psm_vt_ops != NULL) {
1577 1602 if (((apic_intrmap_ops_t *)psm_vt_ops)->
1578 1603 apic_intrmap_init(apic_mode) == DDI_SUCCESS) {
1579 1604
1580 1605 apic_vt_ops = psm_vt_ops;
1581 1606
1582 1607 /*
1583 1608 * We leverage the interrupt remapping engine to
1584 1609 * suppress broadcast EOI; thus we must send the
1585 1610 * directed EOI with the directed-EOI handler.
1586 1611 */
1587 1612 if (apic_directed_EOI_supported() == 0) {
1588 1613 suppress_brdcst_eoi = 1;
1589 1614 }
1590 1615
1591 1616 apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi);
1592 1617
1593 1618 if (apic_detect_x2apic()) {
1594 1619 apic_enable_x2apic();
1595 1620 }
1596 1621
1597 1622 if (apic_directed_EOI_supported() == 0) {
1598 1623 apic_set_directed_EOI_handler();
1599 1624 }
1600 1625 }
1601 1626 }
1602 1627 }
1603 1628
1604 1629 /*ARGSUSED*/
1605 1630 static void
1606 1631 apic_record_ioapic_rdt(void *intrmap_private, ioapic_rdt_t *irdt)
1607 1632 {
1608 1633 irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
1609 1634 }
1610 1635
1611 1636 /*ARGSUSED*/
1612 1637 static void
1613 1638 apic_record_msi(void *intrmap_private, msi_regs_t *mregs)
1614 1639 {
1615 1640 mregs->mr_addr = MSI_ADDR_HDR |
1616 1641 (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
1617 1642 (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
1618 1643 (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
1619 1644 mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
1620 1645 mregs->mr_data;
1621 1646 }
1622 1647
1623 1648 /*
1624 1649 * Functions from apic_introp.c
1625 1650 *
1626 1651 * Those functions are used by apic_intr_ops().
1627 1652 */
1628 1653
1629 1654 /*
1630 1655 * MSI support flag:
1631 1656 * reflects whether MSI is supported at APIC level
1632 1657 * it can also be patched through /etc/system
1633 1658 *
1634 1659 * 0 = default value - don't know and need to call apic_check_msi_support()
1635 1660 * to find out then set it accordingly
1636 1661 * 1 = supported
1637 1662 * -1 = not supported
1638 1663 */
1639 1664 int apic_support_msi = 0;
1640 1665
1641 1666 /* Multiple vector support for MSI-X */
1642 1667 int apic_msix_enable = 1;
1643 1668
1644 1669 /* Multiple vector support for MSI */
1645 1670 int apic_multi_msi_enable = 1;
1646 1671
1647 1672 /*
1648 1673 * Check whether the system supports MSI.
1649 1674 *
1650 1675 * MSI is required for PCI-E and for PCI versions later than 2.2, so if we find
1651 1676 * a PCI-E bus or we find a PCI bus whose version we know is >= 2.2, then we
1652 1677 * return PSM_SUCCESS to indicate this system supports MSI.
1653 1678 *
1654 1679 * (Currently the only way we check whether a given PCI bus supports >= 2.2 is
1655 1680 * by detecting if we are running inside the KVM hypervisor, which guarantees
1656 1681 * this version number.)
1657 1682 */
1658 1683 int
1659 1684 apic_check_msi_support()
1660 1685 {
1661 1686 dev_info_t *cdip;
1662 1687 char dev_type[16];
1663 1688 int dev_len;
1664 1689 int hwenv = get_hwenv();
1665 1690
1666 1691 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
1667 1692
1668 1693 /*
1669 1694 * check whether the first level children of root_node have
1670 1695 * PCI-E or PCI capability.
1671 1696 */
1672 1697 for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
1673 1698 cdip = ddi_get_next_sibling(cdip)) {
1674 1699
1675 1700 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
1676 1701 " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
1677 1702 ddi_driver_name(cdip), ddi_binding_name(cdip),
1678 1703 ddi_node_name(cdip)));
1679 1704 dev_len = sizeof (dev_type);
1680 1705 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1681 1706 "device_type", (caddr_t)dev_type, &dev_len)
1682 1707 != DDI_PROP_SUCCESS)
1683 1708 continue;
1684 1709 if (strcmp(dev_type, "pciex") == 0)
1685 1710 return (PSM_SUCCESS);
1686 1711 if (strcmp(dev_type, "pci") == 0 &&
1687 1712 (hwenv == HW_KVM || hwenv == HW_BHYVE))
1688 1713 return (PSM_SUCCESS);
1689 1714 }
1690 1715
1691 1716 /* MSI is not supported on this system */
1692 1717 DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
1693 1718 "device_type found\n"));
1694 1719 return (PSM_FAILURE);
1695 1720 }
1696 1721
1697 1722 /*
1698 1723 * apic_pci_msi_unconfigure:
1699 1724 *
1700 1725 * This and next two interfaces are copied from pci_intr_lib.c
1701 1726 * Do ensure that these two files stay in sync.
1702 1727 * These needed to be copied over here to avoid a deadlock situation on
1703 1728 * certain mp systems that use MSI interrupts.
1704 1729 *
1705 1730 * IMPORTANT regards next three interfaces:
1706 1731 * i) are called only for MSI/X interrupts.
1707 1732 * ii) called with interrupts disabled, and must not block
1708 1733 */
1709 1734 void
1710 1735 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
1711 1736 {
1712 1737 ushort_t msi_ctrl;
1713 1738 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1714 1739 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip);
1715 1740
1716 1741 ASSERT((handle != NULL) && (cap_ptr != 0));
1717 1742
1718 1743 if (type == DDI_INTR_TYPE_MSI) {
1719 1744 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1720 1745 msi_ctrl &= (~PCI_MSI_MME_MASK);
1721 1746 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1722 1747 pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
1723 1748
1724 1749 if (msi_ctrl & PCI_MSI_64BIT_MASK) {
1725 1750 pci_config_put16(handle,
1726 1751 cap_ptr + PCI_MSI_64BIT_DATA, 0);
1727 1752 pci_config_put32(handle,
1728 1753 cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
1729 1754 } else {
1730 1755 pci_config_put16(handle,
1731 1756 cap_ptr + PCI_MSI_32BIT_DATA, 0);
1732 1757 }
1733 1758
1734 1759 } else if (type == DDI_INTR_TYPE_MSIX) {
1735 1760 uintptr_t off;
1736 1761 uint32_t mask;
1737 1762 ddi_intr_msix_t *msix_p = i_ddi_get_msix(rdip);
1738 1763
1739 1764 ASSERT(msix_p != NULL);
1740 1765
1741 1766 /* Offset into "inum"th entry in the MSI-X table & mask it */
1742 1767 off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1743 1768 PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1744 1769
1745 1770 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1746 1771
1747 1772 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
1748 1773
1749 1774 /* Offset into the "inum"th entry in the MSI-X table */
1750 1775 off = (uintptr_t)msix_p->msix_tbl_addr +
1751 1776 (inum * PCI_MSIX_VECTOR_SIZE);
1752 1777
1753 1778 /* Reset the "data" and "addr" bits */
1754 1779 ddi_put32(msix_p->msix_tbl_hdl,
1755 1780 (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
1756 1781 ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
1757 1782 }
1758 1783 }
1759 1784
1760 1785 /*
1761 1786 * apic_pci_msi_disable_mode:
1762 1787 */
1763 1788 void
1764 1789 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
1765 1790 {
1766 1791 ushort_t msi_ctrl;
1767 1792 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1768 1793 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(rdip);
1769 1794
1770 1795 ASSERT((handle != NULL) && (cap_ptr != 0));
1771 1796
1772 1797 if (type == DDI_INTR_TYPE_MSI) {
1773 1798 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1774 1799 if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
1775 1800 return;
1776 1801
1777 1802 msi_ctrl &= ~PCI_MSI_ENABLE_BIT; /* MSI disable */
1778 1803 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1779 1804
1780 1805 } else if (type == DDI_INTR_TYPE_MSIX) {
1781 1806 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1782 1807 if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
1783 1808 msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
1784 1809 pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
1785 1810 msi_ctrl);
1786 1811 }
1787 1812 }
1788 1813 }
1789 1814
1790 1815 uint32_t
1791 1816 apic_get_localapicid(uint32_t cpuid)
1792 1817 {
1793 1818 ASSERT(cpuid < apic_nproc && apic_cpus != NULL);
1794 1819
1795 1820 return (apic_cpus[cpuid].aci_local_id);
1796 1821 }
1797 1822
1798 1823 uchar_t
1799 1824 apic_get_ioapicid(uchar_t ioapicindex)
1800 1825 {
1801 1826 ASSERT(ioapicindex < MAX_IO_APIC);
1802 1827
1803 1828 return (apic_io_id[ioapicindex]);
1804 1829 }
↓ open down ↓ |
961 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX