Print this page
3605 Xen HVM hangs during boot if apix is enabled
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/i86pc/io/apix/apix.c
+++ new/usr/src/uts/i86pc/io/apix/apix.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright (c) 2010, Intel Corporation.
27 27 * All rights reserved.
28 28 */
29 29 /*
30 30 * Copyright (c) 2017, Joyent, Inc. All rights reserved.
31 31 */
32 32
33 33 /*
34 34 * To understand how the apix module interacts with the interrupt subsystem read
35 35 * the theory statement in uts/i86pc/os/intr.c.
36 36 */
37 37
38 38 /*
39 39 * PSMI 1.1 extensions are supported only in 2.6 and later versions.
40 40 * PSMI 1.2 extensions are supported only in 2.7 and later versions.
41 41 * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
42 42 * PSMI 1.5 extensions are supported in Solaris Nevada.
43 43 * PSMI 1.6 extensions are supported in Solaris Nevada.
44 44 * PSMI 1.7 extensions are supported in Solaris Nevada.
45 45 */
46 46 #define PSMI_1_7
47 47
48 48 #include <sys/processor.h>
49 49 #include <sys/time.h>
50 50 #include <sys/psm.h>
51 51 #include <sys/smp_impldefs.h>
52 52 #include <sys/cram.h>
53 53 #include <sys/acpi/acpi.h>
54 54 #include <sys/acpica.h>
55 55 #include <sys/psm_common.h>
56 56 #include <sys/pit.h>
57 57 #include <sys/ddi.h>
58 58 #include <sys/sunddi.h>
59 59 #include <sys/ddi_impldefs.h>
60 60 #include <sys/pci.h>
61 61 #include <sys/promif.h>
62 62 #include <sys/x86_archext.h>
63 63 #include <sys/cpc_impl.h>
64 64 #include <sys/uadmin.h>
65 65 #include <sys/panic.h>
66 66 #include <sys/debug.h>
67 67 #include <sys/archsystm.h>
68 68 #include <sys/trap.h>
69 69 #include <sys/machsystm.h>
70 70 #include <sys/sysmacros.h>
71 71 #include <sys/cpuvar.h>
72 72 #include <sys/rm_platter.h>
73 73 #include <sys/privregs.h>
74 74 #include <sys/note.h>
75 75 #include <sys/pci_intr_lib.h>
76 76 #include <sys/spl.h>
77 77 #include <sys/clock.h>
78 78 #include <sys/cyclic.h>
79 79 #include <sys/dditypes.h>
80 80 #include <sys/sunddi.h>
81 81 #include <sys/x_call.h>
82 82 #include <sys/reboot.h>
83 83 #include <sys/mach_intr.h>
84 84 #include <sys/apix.h>
85 85 #include <sys/apix_irm_impl.h>
86 86
87 87 static int apix_probe();
88 88 static void apix_init();
89 89 static void apix_picinit(void);
90 90 static int apix_intr_enter(int, int *);
91 91 static void apix_intr_exit(int, int);
92 92 static void apix_setspl(int);
93 93 static int apix_disable_intr(processorid_t);
94 94 static void apix_enable_intr(processorid_t);
95 95 static int apix_get_clkvect(int);
96 96 static int apix_get_ipivect(int, int);
97 97 static void apix_post_cyclic_setup(void *);
98 98 static int apix_post_cpu_start();
99 99 static int apix_intr_ops(dev_info_t *, ddi_intr_handle_impl_t *,
100 100 psm_intr_op_t, int *);
101 101
102 102 /*
103 103 * Helper functions for apix_intr_ops()
104 104 */
105 105 static void apix_redistribute_compute(void);
106 106 static int apix_get_pending(apix_vector_t *);
↓ open down ↓ |
106 lines elided |
↑ open up ↑ |
107 107 static apix_vector_t *apix_get_req_vector(ddi_intr_handle_impl_t *, ushort_t);
108 108 static int apix_get_intr_info(ddi_intr_handle_impl_t *, apic_get_intr_t *);
109 109 static char *apix_get_apic_type(void);
110 110 static int apix_intx_get_pending(int);
111 111 static void apix_intx_set_mask(int irqno);
112 112 static void apix_intx_clear_mask(int irqno);
113 113 static int apix_intx_get_shared(int irqno);
114 114 static void apix_intx_set_shared(int irqno, int delta);
115 115 static apix_vector_t *apix_intx_xlate_vector(dev_info_t *, int,
116 116 struct intrspec *);
117 -static int apix_intx_alloc_vector(dev_info_t *, int, struct intrspec *);
117 +static int apix_intx_alloc_vector(dev_info_t *, ddi_intr_handle_impl_t *,
118 + struct intrspec *);
118 119
119 120 extern int apic_clkinit(int);
120 121
121 122 /* IRM initialization for APIX PSM module */
122 123 extern void apix_irm_init(void);
123 124
124 125 extern int irm_enable;
125 126
126 127 /*
127 128 * Local static data
128 129 */
129 130 static struct psm_ops apix_ops = {
130 131 apix_probe,
131 132
132 133 apix_init,
133 134 apix_picinit,
134 135 apix_intr_enter,
135 136 apix_intr_exit,
136 137 apix_setspl,
137 138 apix_addspl,
138 139 apix_delspl,
139 140 apix_disable_intr,
140 141 apix_enable_intr,
141 142 NULL, /* psm_softlvl_to_irq */
142 143 NULL, /* psm_set_softintr */
143 144
144 145 apic_set_idlecpu,
145 146 apic_unset_idlecpu,
146 147
147 148 apic_clkinit,
148 149 apix_get_clkvect,
149 150 NULL, /* psm_hrtimeinit */
150 151 apic_gethrtime,
151 152
152 153 apic_get_next_processorid,
153 154 apic_cpu_start,
154 155 apix_post_cpu_start,
155 156 apic_shutdown,
156 157 apix_get_ipivect,
157 158 apic_send_ipi,
158 159
159 160 NULL, /* psm_translate_irq */
160 161 NULL, /* psm_notify_error */
161 162 NULL, /* psm_notify_func */
162 163 apic_timer_reprogram,
163 164 apic_timer_enable,
164 165 apic_timer_disable,
165 166 apix_post_cyclic_setup,
166 167 apic_preshutdown,
167 168 apix_intr_ops, /* Advanced DDI Interrupt framework */
168 169 apic_state, /* save, restore apic state for S3 */
169 170 apic_cpu_ops, /* CPU control interface. */
170 171 };
171 172
172 173 struct psm_ops *psmops = &apix_ops;
173 174
174 175 static struct psm_info apix_psm_info = {
175 176 PSM_INFO_VER01_7, /* version */
176 177 PSM_OWN_EXCLUSIVE, /* ownership */
177 178 &apix_ops, /* operation */
178 179 APIX_NAME, /* machine name */
179 180 "apix MPv1.4 compatible",
180 181 };
181 182
182 183 static void *apix_hdlp;
183 184
184 185 static int apix_is_enabled = 0;
185 186
186 187 /*
187 188 * Flag to indicate if APIX is to be enabled only for platforms
188 189 * with specific hw feature(s).
189 190 */
190 191 int apix_hw_chk_enable = 1;
191 192
192 193 /*
193 194 * Hw features that are checked for enabling APIX support.
194 195 */
195 196 #define APIX_SUPPORT_X2APIC 0x00000001
196 197 uint_t apix_supported_hw = APIX_SUPPORT_X2APIC;
197 198
198 199 /*
199 200 * apix_lock is used for cpu selection and vector re-binding
200 201 */
201 202 lock_t apix_lock;
202 203 apix_impl_t *apixs[NCPU];
203 204 /*
204 205 * Mapping between device interrupt and the allocated vector. Indexed
205 206 * by major number.
206 207 */
207 208 apix_dev_vector_t **apix_dev_vector;
208 209 /*
209 210 * Mapping between device major number and cpu id. It gets used
210 211 * when interrupt binding policy round robin with affinity is
211 212 * applied. With that policy, devices with the same major number
212 213 * will be bound to the same CPU.
213 214 */
214 215 processorid_t *apix_major_to_cpu; /* major to cpu mapping */
215 216 kmutex_t apix_mutex; /* for apix_dev_vector & apix_major_to_cpu */
216 217
217 218 int apix_nipis = 16; /* Maximum number of IPIs */
218 219 /*
219 220 * Maximum number of vectors in a CPU that can be used for interrupt
220 221 * allocation (including IPIs and the reserved vectors).
221 222 */
222 223 int apix_cpu_nvectors = APIX_NVECTOR;
223 224
224 225 /* number of CPUs in power-on transition state */
225 226 static int apic_poweron_cnt = 0;
226 227
227 228 /* gcpu.h */
228 229
229 230 extern void apic_do_interrupt(struct regs *rp, trap_trace_rec_t *ttp);
230 231 extern void apic_change_eoi();
231 232
232 233 /*
233 234 * This is the loadable module wrapper
234 235 */
235 236
236 237 int
237 238 _init(void)
238 239 {
239 240 if (apic_coarse_hrtime)
240 241 apix_ops.psm_gethrtime = &apic_gettime;
241 242 return (psm_mod_init(&apix_hdlp, &apix_psm_info));
242 243 }
243 244
244 245 int
245 246 _fini(void)
246 247 {
247 248 return (psm_mod_fini(&apix_hdlp, &apix_psm_info));
248 249 }
249 250
250 251 int
251 252 _info(struct modinfo *modinfop)
252 253 {
253 254 return (psm_mod_info(&apix_hdlp, &apix_psm_info, modinfop));
↓ open down ↓ |
126 lines elided |
↑ open up ↑ |
254 255 }
255 256
256 257 static int
257 258 apix_probe()
258 259 {
259 260 int rval;
260 261
261 262 if (apix_enable == 0)
262 263 return (PSM_FAILURE);
263 264
264 - /*
265 - * FIXME Temporarily disable apix module on Xen HVM platform due to
266 - * known hang during boot (see #3605).
267 - *
268 - * Please remove when/if the issue is resolved.
269 - */
270 - if (get_hwenv() == HW_XEN_HVM)
271 - return (PSM_FAILURE);
272 -
273 265 /* check for hw features if specified */
274 266 if (apix_hw_chk_enable) {
275 267 /* check if x2APIC mode is supported */
276 268 if ((apix_supported_hw & APIX_SUPPORT_X2APIC) ==
277 269 APIX_SUPPORT_X2APIC) {
278 270 if (apic_local_mode() == LOCAL_X2APIC) {
279 271 /* x2APIC mode activated by BIOS, switch ops */
280 272 apic_mode = LOCAL_X2APIC;
281 273 apic_change_ops();
282 274 } else if (!apic_detect_x2apic()) {
283 275 /* x2APIC mode is not supported in the hw */
284 276 apix_enable = 0;
285 277 }
286 278 }
287 279 if (apix_enable == 0)
288 280 return (PSM_FAILURE);
289 281 }
290 282
291 283 rval = apic_probe_common(apix_psm_info.p_mach_idstring);
292 284 if (rval == PSM_SUCCESS)
293 285 apix_is_enabled = 1;
294 286 else
295 287 apix_is_enabled = 0;
296 288 return (rval);
297 289 }
298 290
299 291 /*
300 292 * Initialize the data structures needed by pcplusmpx module.
301 293 * Specifically, the data structures used by addspl() and delspl()
302 294 * routines.
303 295 */
304 296 static void
305 297 apix_softinit()
306 298 {
307 299 int i, *iptr;
308 300 apix_impl_t *hdlp;
309 301 int nproc;
310 302
311 303 nproc = max(apic_nproc, apic_max_nproc);
312 304
313 305 hdlp = kmem_zalloc(nproc * sizeof (apix_impl_t), KM_SLEEP);
314 306 for (i = 0; i < nproc; i++) {
315 307 apixs[i] = &hdlp[i];
316 308 apixs[i]->x_cpuid = i;
317 309 LOCK_INIT_CLEAR(&apixs[i]->x_lock);
318 310 }
319 311
320 312 /* cpu 0 is always up (for now) */
321 313 apic_cpus[0].aci_status = APIC_CPU_ONLINE | APIC_CPU_INTR_ENABLE;
322 314
323 315 iptr = (int *)&apic_irq_table[0];
324 316 for (i = 0; i <= APIC_MAX_VECTOR; i++) {
325 317 apic_level_intr[i] = 0;
326 318 *iptr++ = NULL;
327 319 }
328 320 mutex_init(&airq_mutex, NULL, MUTEX_DEFAULT, NULL);
329 321
330 322 apix_dev_vector = kmem_zalloc(sizeof (apix_dev_vector_t *) * devcnt,
331 323 KM_SLEEP);
332 324
333 325 if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
334 326 apix_major_to_cpu = kmem_zalloc(sizeof (int) * devcnt,
335 327 KM_SLEEP);
336 328 for (i = 0; i < devcnt; i++)
337 329 apix_major_to_cpu[i] = IRQ_UNINIT;
338 330 }
339 331
340 332 mutex_init(&apix_mutex, NULL, MUTEX_DEFAULT, NULL);
341 333 }
342 334
343 335 static int
344 336 apix_get_pending_spl(void)
345 337 {
346 338 int cpuid = CPU->cpu_id;
347 339
348 340 return (bsrw_insn(apixs[cpuid]->x_intr_pending));
349 341 }
350 342
351 343 static uintptr_t
352 344 apix_get_intr_handler(int cpu, short vec)
353 345 {
354 346 apix_vector_t *apix_vector;
355 347
356 348 ASSERT(cpu < apic_nproc && vec < APIX_NVECTOR);
357 349 if (cpu >= apic_nproc)
358 350 return (NULL);
359 351
360 352 apix_vector = apixs[cpu]->x_vectbl[vec];
361 353
362 354 return ((uintptr_t)(apix_vector->v_autovect));
363 355 }
364 356
365 357 static void
366 358 apix_init()
367 359 {
368 360 extern void (*do_interrupt_common)(struct regs *, trap_trace_rec_t *);
369 361
370 362 APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_softinit\n"));
371 363
372 364 do_interrupt_common = apix_do_interrupt;
373 365 addintr = apix_add_avintr;
374 366 remintr = apix_rem_avintr;
375 367 get_pending_spl = apix_get_pending_spl;
376 368 get_intr_handler = apix_get_intr_handler;
377 369 psm_get_localapicid = apic_get_localapicid;
378 370 psm_get_ioapicid = apic_get_ioapicid;
379 371
380 372 apix_softinit();
381 373
382 374 #if !defined(__amd64)
383 375 if (cpuid_have_cr8access(CPU))
384 376 apic_have_32bit_cr8 = 1;
385 377 #endif
386 378
387 379 /*
388 380 * Initialize IRM pool parameters
389 381 */
390 382 if (irm_enable) {
391 383 int i;
392 384 int lowest_irq;
393 385 int highest_irq;
394 386
395 387 /* number of CPUs present */
396 388 apix_irminfo.apix_ncpus = apic_nproc;
397 389 /* total number of entries in all of the IOAPICs present */
398 390 lowest_irq = apic_io_vectbase[0];
399 391 highest_irq = apic_io_vectend[0];
400 392 for (i = 1; i < apic_io_max; i++) {
401 393 if (apic_io_vectbase[i] < lowest_irq)
402 394 lowest_irq = apic_io_vectbase[i];
403 395 if (apic_io_vectend[i] > highest_irq)
404 396 highest_irq = apic_io_vectend[i];
405 397 }
406 398 apix_irminfo.apix_ioapic_max_vectors =
407 399 highest_irq - lowest_irq + 1;
408 400 /*
409 401 * Number of available per-CPU vectors excluding
410 402 * reserved vectors for Dtrace, int80, system-call,
411 403 * fast-trap, etc.
412 404 */
413 405 apix_irminfo.apix_per_cpu_vectors = APIX_NAVINTR -
414 406 APIX_SW_RESERVED_VECTORS;
415 407
416 408 /* Number of vectors (pre) allocated (SCI and HPET) */
417 409 apix_irminfo.apix_vectors_allocated = 0;
418 410 if (apic_hpet_vect != -1)
419 411 apix_irminfo.apix_vectors_allocated++;
420 412 if (apic_sci_vect != -1)
421 413 apix_irminfo.apix_vectors_allocated++;
422 414 }
423 415 }
424 416
425 417 static void
426 418 apix_init_intr()
427 419 {
428 420 processorid_t cpun = psm_get_cpu_id();
429 421 uint_t nlvt;
430 422 uint32_t svr = AV_UNIT_ENABLE | APIC_SPUR_INTR;
431 423 extern void cmi_cmci_trap(void);
432 424
433 425 apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
434 426
435 427 if (apic_mode == LOCAL_APIC) {
436 428 /*
437 429 * We are running APIC in MMIO mode.
438 430 */
439 431 if (apic_flat_model) {
440 432 apic_reg_ops->apic_write(APIC_FORMAT_REG,
441 433 APIC_FLAT_MODEL);
442 434 } else {
443 435 apic_reg_ops->apic_write(APIC_FORMAT_REG,
444 436 APIC_CLUSTER_MODEL);
445 437 }
446 438
447 439 apic_reg_ops->apic_write(APIC_DEST_REG,
448 440 AV_HIGH_ORDER >> cpun);
449 441 }
450 442
451 443 if (apic_directed_EOI_supported()) {
452 444 /*
453 445 * Setting the 12th bit in the Spurious Interrupt Vector
454 446 * Register suppresses broadcast EOIs generated by the local
455 447 * APIC. The suppression of broadcast EOIs happens only when
456 448 * interrupts are level-triggered.
457 449 */
458 450 svr |= APIC_SVR_SUPPRESS_BROADCAST_EOI;
459 451 }
460 452
461 453 /* need to enable APIC before unmasking NMI */
462 454 apic_reg_ops->apic_write(APIC_SPUR_INT_REG, svr);
463 455
464 456 /*
465 457 * Presence of an invalid vector with delivery mode AV_FIXED can
466 458 * cause an error interrupt, even if the entry is masked...so
467 459 * write a valid vector to LVT entries along with the mask bit
468 460 */
469 461
470 462 /* All APICs have timer and LINT0/1 */
471 463 apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK|APIC_RESV_IRQ);
472 464 apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK|APIC_RESV_IRQ);
473 465 apic_reg_ops->apic_write(APIC_INT_VECT1, AV_NMI); /* enable NMI */
474 466
475 467 /*
476 468 * On integrated APICs, the number of LVT entries is
477 469 * 'Max LVT entry' + 1; on 82489DX's (non-integrated
478 470 * APICs), nlvt is "3" (LINT0, LINT1, and timer)
479 471 */
480 472
481 473 if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
482 474 nlvt = 3;
483 475 } else {
484 476 nlvt = ((apic_reg_ops->apic_read(APIC_VERS_REG) >> 16) &
485 477 0xFF) + 1;
486 478 }
487 479
488 480 if (nlvt >= 5) {
489 481 /* Enable performance counter overflow interrupt */
490 482
491 483 if (!is_x86_feature(x86_featureset, X86FSET_MSR))
492 484 apic_enable_cpcovf_intr = 0;
493 485 if (apic_enable_cpcovf_intr) {
494 486 if (apic_cpcovf_vect == 0) {
495 487 int ipl = APIC_PCINT_IPL;
496 488
497 489 apic_cpcovf_vect = apix_get_ipivect(ipl, -1);
498 490 ASSERT(apic_cpcovf_vect);
499 491
500 492 (void) add_avintr(NULL, ipl,
501 493 (avfunc)kcpc_hw_overflow_intr,
502 494 "apic pcint", apic_cpcovf_vect,
503 495 NULL, NULL, NULL, NULL);
504 496 kcpc_hw_overflow_intr_installed = 1;
505 497 kcpc_hw_enable_cpc_intr =
506 498 apic_cpcovf_mask_clear;
507 499 }
508 500 apic_reg_ops->apic_write(APIC_PCINT_VECT,
509 501 apic_cpcovf_vect);
510 502 }
511 503 }
512 504
513 505 if (nlvt >= 6) {
514 506 /* Only mask TM intr if the BIOS apparently doesn't use it */
515 507
516 508 uint32_t lvtval;
517 509
518 510 lvtval = apic_reg_ops->apic_read(APIC_THERM_VECT);
519 511 if (((lvtval & AV_MASK) == AV_MASK) ||
520 512 ((lvtval & AV_DELIV_MODE) != AV_SMI)) {
521 513 apic_reg_ops->apic_write(APIC_THERM_VECT,
522 514 AV_MASK|APIC_RESV_IRQ);
523 515 }
524 516 }
525 517
526 518 /* Enable error interrupt */
527 519
528 520 if (nlvt >= 4 && apic_enable_error_intr) {
529 521 if (apic_errvect == 0) {
530 522 int ipl = 0xf; /* get highest priority intr */
531 523 apic_errvect = apix_get_ipivect(ipl, -1);
532 524 ASSERT(apic_errvect);
533 525 /*
534 526 * Not PSMI compliant, but we are going to merge
535 527 * with ON anyway
536 528 */
537 529 (void) add_avintr(NULL, ipl,
538 530 (avfunc)apic_error_intr, "apic error intr",
539 531 apic_errvect, NULL, NULL, NULL, NULL);
540 532 }
541 533 apic_reg_ops->apic_write(APIC_ERR_VECT, apic_errvect);
542 534 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
543 535 apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
544 536 }
545 537
546 538 /* Enable CMCI interrupt */
547 539 if (cmi_enable_cmci) {
548 540 mutex_enter(&cmci_cpu_setup_lock);
549 541 if (cmci_cpu_setup_registered == 0) {
550 542 mutex_enter(&cpu_lock);
551 543 register_cpu_setup_func(cmci_cpu_setup, NULL);
552 544 mutex_exit(&cpu_lock);
553 545 cmci_cpu_setup_registered = 1;
554 546 }
555 547 mutex_exit(&cmci_cpu_setup_lock);
556 548
557 549 if (apic_cmci_vect == 0) {
558 550 int ipl = 0x2;
559 551 apic_cmci_vect = apix_get_ipivect(ipl, -1);
560 552 ASSERT(apic_cmci_vect);
561 553
562 554 (void) add_avintr(NULL, ipl,
563 555 (avfunc)cmi_cmci_trap, "apic cmci intr",
564 556 apic_cmci_vect, NULL, NULL, NULL, NULL);
565 557 }
566 558 apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
567 559 }
568 560
569 561 apic_reg_ops->apic_write_task_reg(0);
570 562 }
571 563
572 564 static void
573 565 apix_picinit(void)
574 566 {
575 567 int i, j;
576 568 uint_t isr;
577 569
578 570 APIC_VERBOSE(INIT, (CE_CONT, "apix: psm_picinit\n"));
579 571
580 572 /*
581 573 * initialize interrupt remapping before apic
582 574 * hardware initialization
583 575 */
584 576 apic_intrmap_init(apic_mode);
585 577 if (apic_vt_ops == psm_vt_ops)
586 578 apix_mul_ioapic_method = APIC_MUL_IOAPIC_IIR;
587 579
588 580 /*
589 581 * On UniSys Model 6520, the BIOS leaves vector 0x20 isr
590 582 * bit on without clearing it with EOI. Since softint
591 583 * uses vector 0x20 to interrupt itself, so softint will
592 584 * not work on this machine. In order to fix this problem
593 585 * a check is made to verify all the isr bits are clear.
594 586 * If not, EOIs are issued to clear the bits.
595 587 */
596 588 for (i = 7; i >= 1; i--) {
597 589 isr = apic_reg_ops->apic_read(APIC_ISR_REG + (i * 4));
598 590 if (isr != 0)
599 591 for (j = 0; ((j < 32) && (isr != 0)); j++)
600 592 if (isr & (1 << j)) {
601 593 apic_reg_ops->apic_write(
602 594 APIC_EOI_REG, 0);
603 595 isr &= ~(1 << j);
604 596 apic_error |= APIC_ERR_BOOT_EOI;
605 597 }
606 598 }
607 599
608 600 /* set a flag so we know we have run apic_picinit() */
609 601 apic_picinit_called = 1;
610 602 LOCK_INIT_CLEAR(&apic_gethrtime_lock);
611 603 LOCK_INIT_CLEAR(&apic_ioapic_lock);
612 604 LOCK_INIT_CLEAR(&apic_error_lock);
613 605 LOCK_INIT_CLEAR(&apic_mode_switch_lock);
614 606
615 607 picsetup(); /* initialise the 8259 */
616 608
617 609 /* add nmi handler - least priority nmi handler */
618 610 LOCK_INIT_CLEAR(&apic_nmi_lock);
619 611
620 612 if (!psm_add_nmintr(0, (avfunc) apic_nmi_intr,
621 613 "apix NMI handler", (caddr_t)NULL))
622 614 cmn_err(CE_WARN, "apix: Unable to add nmi handler");
623 615
624 616 apix_init_intr();
625 617
626 618 /* enable apic mode if imcr present */
627 619 if (apic_imcrp) {
628 620 outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
629 621 outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_APIC);
630 622 }
631 623
632 624 ioapix_init_intr(IOAPIC_MASK);
633 625
634 626 /* setup global IRM pool if applicable */
635 627 if (irm_enable)
636 628 apix_irm_init();
637 629 }
638 630
639 631 static __inline__ void
640 632 apix_send_eoi(void)
641 633 {
642 634 if (apic_mode == LOCAL_APIC)
643 635 LOCAL_APIC_WRITE_REG(APIC_EOI_REG, 0);
644 636 else
645 637 X2APIC_WRITE(APIC_EOI_REG, 0);
646 638 }
647 639
648 640 /*
649 641 * platform_intr_enter
650 642 *
651 643 * Called at the beginning of the interrupt service routine, but unlike
652 644 * pcplusmp, does not mask interrupts. An EOI is given to the interrupt
653 645 * controller to enable other HW interrupts but interrupts are still
654 646 * masked by the IF flag.
655 647 *
656 648 * Return -1 for spurious interrupts
657 649 *
658 650 */
659 651 static int
660 652 apix_intr_enter(int ipl, int *vectorp)
661 653 {
662 654 struct cpu *cpu = CPU;
663 655 uint32_t cpuid = CPU->cpu_id;
664 656 apic_cpus_info_t *cpu_infop;
665 657 uchar_t vector;
666 658 apix_vector_t *vecp;
667 659 int nipl = -1;
668 660
669 661 /*
670 662 * The real vector delivered is (*vectorp + 0x20), but our caller
671 663 * subtracts 0x20 from the vector before passing it to us.
672 664 * (That's why APIC_BASE_VECT is 0x20.)
673 665 */
674 666 vector = *vectorp = (uchar_t)*vectorp + APIC_BASE_VECT;
675 667
676 668 cpu_infop = &apic_cpus[cpuid];
677 669 if (vector == APIC_SPUR_INTR) {
678 670 cpu_infop->aci_spur_cnt++;
679 671 return (APIC_INT_SPURIOUS);
680 672 }
681 673
682 674 vecp = xv_vector(cpuid, vector);
683 675 if (vecp == NULL) {
684 676 if (APIX_IS_FAKE_INTR(vector))
685 677 nipl = apix_rebindinfo.i_pri;
686 678 apix_send_eoi();
687 679 return (nipl);
688 680 }
689 681 nipl = vecp->v_pri;
690 682
691 683 /* if interrupted by the clock, increment apic_nsec_since_boot */
692 684 if (vector == (apic_clkvect + APIC_BASE_VECT)) {
693 685 if (!apic_oneshot) {
694 686 /* NOTE: this is not MT aware */
695 687 apic_hrtime_stamp++;
696 688 apic_nsec_since_boot += apic_nsec_per_intr;
697 689 apic_hrtime_stamp++;
698 690 last_count_read = apic_hertz_count;
699 691 apix_redistribute_compute();
700 692 }
701 693
702 694 apix_send_eoi();
703 695
704 696 return (nipl);
705 697 }
706 698
707 699 ASSERT(vecp->v_state != APIX_STATE_OBSOLETED);
708 700
709 701 /* pre-EOI handling for level-triggered interrupts */
710 702 if (!APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method) &&
711 703 (vecp->v_type & APIX_TYPE_FIXED) && apic_level_intr[vecp->v_inum])
712 704 apix_level_intr_pre_eoi(vecp->v_inum);
713 705
714 706 /* send back EOI */
715 707 apix_send_eoi();
716 708
717 709 cpu_infop->aci_current[nipl] = vector;
718 710 if ((nipl > ipl) && (nipl > cpu->cpu_base_spl)) {
719 711 cpu_infop->aci_curipl = (uchar_t)nipl;
720 712 cpu_infop->aci_ISR_in_progress |= 1 << nipl;
721 713 }
722 714
723 715 #ifdef DEBUG
724 716 if (vector >= APIX_IPI_MIN)
725 717 return (nipl); /* skip IPI */
726 718
727 719 APIC_DEBUG_BUF_PUT(vector);
728 720 APIC_DEBUG_BUF_PUT(vecp->v_inum);
729 721 APIC_DEBUG_BUF_PUT(nipl);
730 722 APIC_DEBUG_BUF_PUT(psm_get_cpu_id());
731 723 if ((apic_stretch_interrupts) && (apic_stretch_ISR & (1 << nipl)))
732 724 drv_usecwait(apic_stretch_interrupts);
733 725 #endif /* DEBUG */
734 726
735 727 return (nipl);
736 728 }
737 729
738 730 /*
739 731 * Any changes made to this function must also change X2APIC
740 732 * version of intr_exit.
741 733 */
742 734 static void
743 735 apix_intr_exit(int prev_ipl, int arg2)
744 736 {
745 737 int cpuid = psm_get_cpu_id();
746 738 apic_cpus_info_t *cpu_infop = &apic_cpus[cpuid];
747 739 apix_impl_t *apixp = apixs[cpuid];
748 740
749 741 UNREFERENCED_1PARAMETER(arg2);
750 742
751 743 cpu_infop->aci_curipl = (uchar_t)prev_ipl;
752 744 /* ISR above current pri could not be in progress */
753 745 cpu_infop->aci_ISR_in_progress &= (2 << prev_ipl) - 1;
754 746
755 747 if (apixp->x_obsoletes != NULL) {
756 748 if (APIX_CPU_LOCK_HELD(cpuid))
757 749 return;
758 750
759 751 APIX_ENTER_CPU_LOCK(cpuid);
760 752 (void) apix_obsolete_vector(apixp->x_obsoletes);
761 753 APIX_LEAVE_CPU_LOCK(cpuid);
762 754 }
763 755 }
764 756
765 757 /*
766 758 * The pcplusmp setspl code uses the TPR to mask all interrupts at or below the
767 759 * given ipl, but apix never uses the TPR and we never mask a subset of the
768 760 * interrupts. They are either all blocked by the IF flag or all can come in.
769 761 *
770 762 * For setspl, we mask all interrupts for XC_HI_PIL (15), otherwise, interrupts
771 763 * can come in if currently enabled by the IF flag. This table shows the state
772 764 * of the IF flag when we leave this function.
773 765 *
774 766 * curr IF | ipl == 15 ipl != 15
775 767 * --------+---------------------------
776 768 * 0 | 0 0
777 769 * 1 | 0 1
778 770 */
779 771 static void
780 772 apix_setspl(int ipl)
781 773 {
782 774 /*
783 775 * Interrupts at ipl above this cannot be in progress, so the following
784 776 * mask is ok.
785 777 */
786 778 apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1;
787 779
788 780 if (ipl == XC_HI_PIL)
789 781 cli();
790 782 }
791 783
792 784 int
793 785 apix_addspl(int virtvec, int ipl, int min_ipl, int max_ipl)
794 786 {
795 787 uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
796 788 uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
797 789 apix_vector_t *vecp = xv_vector(cpuid, vector);
798 790
799 791 UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
800 792 ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
801 793
802 794 if (vecp->v_type == APIX_TYPE_FIXED)
803 795 apix_intx_set_shared(vecp->v_inum, 1);
804 796
805 797 /* There are more interrupts, so it's already been enabled */
806 798 if (vecp->v_share > 1)
807 799 return (PSM_SUCCESS);
808 800
809 801 /* return if it is not hardware interrupt */
810 802 if (vecp->v_type == APIX_TYPE_IPI)
811 803 return (PSM_SUCCESS);
812 804
813 805 /*
814 806 * if apix_picinit() has not been called yet, just return.
815 807 * At the end of apic_picinit(), we will call setup_io_intr().
816 808 */
817 809 if (!apic_picinit_called)
818 810 return (PSM_SUCCESS);
819 811
820 812 (void) apix_setup_io_intr(vecp);
821 813
822 814 return (PSM_SUCCESS);
823 815 }
824 816
825 817 int
826 818 apix_delspl(int virtvec, int ipl, int min_ipl, int max_ipl)
827 819 {
828 820 uint32_t cpuid = APIX_VIRTVEC_CPU(virtvec);
829 821 uchar_t vector = (uchar_t)APIX_VIRTVEC_VECTOR(virtvec);
830 822 apix_vector_t *vecp = xv_vector(cpuid, vector);
831 823
832 824 UNREFERENCED_3PARAMETER(ipl, min_ipl, max_ipl);
833 825 ASSERT(vecp != NULL && LOCK_HELD(&apix_lock));
834 826
835 827 if (vecp->v_type == APIX_TYPE_FIXED)
836 828 apix_intx_set_shared(vecp->v_inum, -1);
837 829
838 830 /* There are more interrupts */
839 831 if (vecp->v_share > 1)
840 832 return (PSM_SUCCESS);
841 833
842 834 /* return if it is not hardware interrupt */
843 835 if (vecp->v_type == APIX_TYPE_IPI)
844 836 return (PSM_SUCCESS);
845 837
846 838 if (!apic_picinit_called) {
847 839 cmn_err(CE_WARN, "apix: delete 0x%x before apic init",
848 840 virtvec);
849 841 return (PSM_SUCCESS);
850 842 }
851 843
852 844 apix_disable_vector(vecp);
853 845
854 846 return (PSM_SUCCESS);
855 847 }
856 848
857 849 /*
858 850 * Try and disable all interrupts. We just assign interrupts to other
859 851 * processors based on policy. If any were bound by user request, we
860 852 * let them continue and return failure. We do not bother to check
861 853 * for cache affinity while rebinding.
862 854 */
863 855 static int
864 856 apix_disable_intr(processorid_t cpun)
865 857 {
866 858 apix_impl_t *apixp = apixs[cpun];
867 859 apix_vector_t *vecp, *newp;
868 860 int bindcpu, i, hardbound = 0, errbound = 0, ret, loop, type;
869 861
870 862 lock_set(&apix_lock);
871 863
872 864 apic_cpus[cpun].aci_status &= ~APIC_CPU_INTR_ENABLE;
873 865 apic_cpus[cpun].aci_curipl = 0;
874 866
875 867 /* if this is for SUSPEND operation, skip rebinding */
876 868 if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
877 869 for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
878 870 vecp = apixp->x_vectbl[i];
879 871 if (!IS_VECT_ENABLED(vecp))
880 872 continue;
881 873
882 874 apix_disable_vector(vecp);
883 875 }
884 876 lock_clear(&apix_lock);
885 877 return (PSM_SUCCESS);
886 878 }
887 879
888 880 for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
889 881 vecp = apixp->x_vectbl[i];
890 882 if (!IS_VECT_ENABLED(vecp))
891 883 continue;
892 884
893 885 if (vecp->v_flags & APIX_VECT_USER_BOUND) {
894 886 hardbound++;
895 887 continue;
896 888 }
897 889 type = vecp->v_type;
898 890
899 891 /*
900 892 * If there are bound interrupts on this cpu, then
901 893 * rebind them to other processors.
902 894 */
903 895 loop = 0;
904 896 do {
905 897 bindcpu = apic_find_cpu(APIC_CPU_INTR_ENABLE);
906 898
907 899 if (type != APIX_TYPE_MSI)
908 900 newp = apix_set_cpu(vecp, bindcpu, &ret);
909 901 else
910 902 newp = apix_grp_set_cpu(vecp, bindcpu, &ret);
911 903 } while ((newp == NULL) && (loop++ < apic_nproc));
912 904
913 905 if (loop >= apic_nproc) {
914 906 errbound++;
915 907 cmn_err(CE_WARN, "apix: failed to rebind vector %x/%x",
916 908 vecp->v_cpuid, vecp->v_vector);
917 909 }
918 910 }
919 911
920 912 lock_clear(&apix_lock);
921 913
922 914 if (hardbound || errbound) {
923 915 cmn_err(CE_WARN, "Could not disable interrupts on %d"
924 916 "due to user bound interrupts or failed operation",
925 917 cpun);
926 918 return (PSM_FAILURE);
927 919 }
928 920
929 921 return (PSM_SUCCESS);
930 922 }
931 923
932 924 /*
933 925 * Bind interrupts to specified CPU
934 926 */
935 927 static void
936 928 apix_enable_intr(processorid_t cpun)
937 929 {
938 930 apix_vector_t *vecp;
939 931 int i, ret;
940 932 processorid_t n;
941 933
942 934 lock_set(&apix_lock);
943 935
944 936 apic_cpus[cpun].aci_status |= APIC_CPU_INTR_ENABLE;
945 937
946 938 /* interrupt enabling for system resume */
947 939 if (apic_cpus[cpun].aci_status & APIC_CPU_SUSPEND) {
948 940 for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
949 941 vecp = xv_vector(cpun, i);
950 942 if (!IS_VECT_ENABLED(vecp))
951 943 continue;
952 944
953 945 apix_enable_vector(vecp);
954 946 }
955 947 apic_cpus[cpun].aci_status &= ~APIC_CPU_SUSPEND;
956 948 }
957 949
958 950 for (n = 0; n < apic_nproc; n++) {
959 951 if (!apic_cpu_in_range(n) || n == cpun ||
960 952 (apic_cpus[n].aci_status & APIC_CPU_INTR_ENABLE) == 0)
961 953 continue;
962 954
963 955 for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
964 956 vecp = xv_vector(n, i);
965 957 if (!IS_VECT_ENABLED(vecp) ||
966 958 vecp->v_bound_cpuid != cpun)
967 959 continue;
968 960
969 961 if (vecp->v_type != APIX_TYPE_MSI)
970 962 (void) apix_set_cpu(vecp, cpun, &ret);
971 963 else
972 964 (void) apix_grp_set_cpu(vecp, cpun, &ret);
973 965 }
974 966 }
975 967
976 968 lock_clear(&apix_lock);
977 969 }
978 970
979 971 /*
980 972 * Allocate vector for IPI
981 973 * type == -1 indicates it is an internal request. Do not change
982 974 * resv_vector for these requests.
983 975 */
984 976 static int
985 977 apix_get_ipivect(int ipl, int type)
986 978 {
987 979 uchar_t vector;
988 980
989 981 if ((vector = apix_alloc_ipi(ipl)) > 0) {
990 982 if (type != -1)
991 983 apic_resv_vector[ipl] = vector;
992 984 return (vector);
993 985 }
994 986 apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
995 987 return (-1); /* shouldn't happen */
996 988 }
997 989
998 990 static int
999 991 apix_get_clkvect(int ipl)
1000 992 {
1001 993 int vector;
1002 994
1003 995 if ((vector = apix_get_ipivect(ipl, -1)) == -1)
1004 996 return (-1);
1005 997
1006 998 apic_clkvect = vector - APIC_BASE_VECT;
1007 999 APIC_VERBOSE(IPI, (CE_CONT, "apix: clock vector = %x\n",
1008 1000 apic_clkvect));
1009 1001 return (vector);
1010 1002 }
1011 1003
1012 1004 static int
1013 1005 apix_post_cpu_start()
1014 1006 {
1015 1007 int cpun;
1016 1008 static int cpus_started = 1;
1017 1009
1018 1010 /* We know this CPU + BSP started successfully. */
1019 1011 cpus_started++;
1020 1012
1021 1013 /*
1022 1014 * On BSP we would have enabled X2APIC, if supported by processor,
1023 1015 * in acpi_probe(), but on AP we do it here.
1024 1016 *
1025 1017 * We enable X2APIC mode only if BSP is running in X2APIC & the
1026 1018 * local APIC mode of the current CPU is MMIO (xAPIC).
1027 1019 */
1028 1020 if (apic_mode == LOCAL_X2APIC && apic_detect_x2apic() &&
1029 1021 apic_local_mode() == LOCAL_APIC) {
1030 1022 apic_enable_x2apic();
1031 1023 }
1032 1024
1033 1025 /*
1034 1026 * Switch back to x2apic IPI sending method for performance when target
1035 1027 * CPU has entered x2apic mode.
1036 1028 */
1037 1029 if (apic_mode == LOCAL_X2APIC) {
1038 1030 apic_switch_ipi_callback(B_FALSE);
1039 1031 }
1040 1032
1041 1033 splx(ipltospl(LOCK_LEVEL));
1042 1034 apix_init_intr();
1043 1035
1044 1036 /*
1045 1037 * since some systems don't enable the internal cache on the non-boot
1046 1038 * cpus, so we have to enable them here
1047 1039 */
1048 1040 setcr0(getcr0() & ~(CR0_CD | CR0_NW));
1049 1041
1050 1042 #ifdef DEBUG
1051 1043 APIC_AV_PENDING_SET();
1052 1044 #else
1053 1045 if (apic_mode == LOCAL_APIC)
1054 1046 APIC_AV_PENDING_SET();
1055 1047 #endif /* DEBUG */
1056 1048
1057 1049 /*
1058 1050 * We may be booting, or resuming from suspend; aci_status will
1059 1051 * be APIC_CPU_INTR_ENABLE if coming from suspend, so we add the
1060 1052 * APIC_CPU_ONLINE flag here rather than setting aci_status completely.
1061 1053 */
1062 1054 cpun = psm_get_cpu_id();
1063 1055 apic_cpus[cpun].aci_status |= APIC_CPU_ONLINE;
1064 1056
1065 1057 apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1066 1058
1067 1059 return (PSM_SUCCESS);
1068 1060 }
1069 1061
1070 1062 /*
1071 1063 * If this module needs a periodic handler for the interrupt distribution, it
1072 1064 * can be added here. The argument to the periodic handler is not currently
1073 1065 * used, but is reserved for future.
1074 1066 */
1075 1067 static void
1076 1068 apix_post_cyclic_setup(void *arg)
1077 1069 {
1078 1070 UNREFERENCED_1PARAMETER(arg);
1079 1071
1080 1072 cyc_handler_t cyh;
1081 1073 cyc_time_t cyt;
1082 1074
1083 1075 /* cpu_lock is held */
1084 1076 /* set up a periodic handler for intr redistribution */
1085 1077
1086 1078 /*
1087 1079 * In peridoc mode intr redistribution processing is done in
1088 1080 * apic_intr_enter during clk intr processing
1089 1081 */
1090 1082 if (!apic_oneshot)
1091 1083 return;
1092 1084
1093 1085 /*
1094 1086 * Register a periodical handler for the redistribution processing.
1095 1087 * Though we would generally prefer to use the DDI interface for
1096 1088 * periodic handler invocation, ddi_periodic_add(9F), we are
1097 1089 * unfortunately already holding cpu_lock, which ddi_periodic_add will
1098 1090 * attempt to take for us. Thus, we add our own cyclic directly:
1099 1091 */
1100 1092 cyh.cyh_func = (void (*)(void *))apix_redistribute_compute;
1101 1093 cyh.cyh_arg = NULL;
1102 1094 cyh.cyh_level = CY_LOW_LEVEL;
1103 1095
1104 1096 cyt.cyt_when = 0;
1105 1097 cyt.cyt_interval = apic_redistribute_sample_interval;
1106 1098
1107 1099 apic_cyclic_id = cyclic_add(&cyh, &cyt);
1108 1100 }
1109 1101
1110 1102 /*
1111 1103 * Called the first time we enable x2apic mode on this cpu.
1112 1104 * Update some of the function pointers to use x2apic routines.
1113 1105 */
1114 1106 void
1115 1107 x2apic_update_psm()
1116 1108 {
1117 1109 struct psm_ops *pops = &apix_ops;
1118 1110
1119 1111 ASSERT(pops != NULL);
1120 1112
1121 1113 /*
1122 1114 * The pcplusmp module's version of x2apic_update_psm makes additional
1123 1115 * changes that we do not have to make here. It needs to make those
1124 1116 * changes because pcplusmp relies on the TPR register and the means of
1125 1117 * addressing that changes when using the local apic versus the x2apic.
1126 1118 * It's also worth noting that the apix driver specific function end up
1127 1119 * being apix_foo as opposed to apic_foo and x2apic_foo.
1128 1120 */
1129 1121 pops->psm_send_ipi = x2apic_send_ipi;
1130 1122
1131 1123 send_dirintf = pops->psm_send_ipi;
1132 1124
1133 1125 apic_mode = LOCAL_X2APIC;
1134 1126 apic_change_ops();
1135 1127 }
1136 1128
1137 1129 /*
1138 1130 * This function provides external interface to the nexus for all
1139 1131 * functionalities related to the new DDI interrupt framework.
1140 1132 *
1141 1133 * Input:
1142 1134 * dip - pointer to the dev_info structure of the requested device
1143 1135 * hdlp - pointer to the internal interrupt handle structure for the
1144 1136 * requested interrupt
1145 1137 * intr_op - opcode for this call
1146 1138 * result - pointer to the integer that will hold the result to be
1147 1139 * passed back if return value is PSM_SUCCESS
1148 1140 *
1149 1141 * Output:
1150 1142 * return value is either PSM_SUCCESS or PSM_FAILURE
1151 1143 */
1152 1144 static int
1153 1145 apix_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
1154 1146 psm_intr_op_t intr_op, int *result)
1155 1147 {
1156 1148 int cap;
1157 1149 apix_vector_t *vecp, *newvecp;
1158 1150 struct intrspec *ispec, intr_spec;
1159 1151 processorid_t target;
1160 1152
1161 1153 ispec = &intr_spec;
1162 1154 ispec->intrspec_pri = hdlp->ih_pri;
1163 1155 ispec->intrspec_vec = hdlp->ih_inum;
1164 1156 ispec->intrspec_func = hdlp->ih_cb_func;
1165 1157
1166 1158 switch (intr_op) {
1167 1159 case PSM_INTR_OP_ALLOC_VECTORS:
1168 1160 switch (hdlp->ih_type) {
1169 1161 case DDI_INTR_TYPE_MSI:
1170 1162 /* allocate MSI vectors */
1171 1163 *result = apix_alloc_msi(dip, hdlp->ih_inum,
1172 1164 hdlp->ih_scratch1,
1173 1165 (int)(uintptr_t)hdlp->ih_scratch2);
1174 1166 break;
1175 1167 case DDI_INTR_TYPE_MSIX:
1176 1168 /* allocate MSI-X vectors */
↓ open down ↓ |
894 lines elided |
↑ open up ↑ |
1177 1169 *result = apix_alloc_msix(dip, hdlp->ih_inum,
1178 1170 hdlp->ih_scratch1,
1179 1171 (int)(uintptr_t)hdlp->ih_scratch2);
1180 1172 break;
1181 1173 case DDI_INTR_TYPE_FIXED:
1182 1174 /* allocate or share vector for fixed */
1183 1175 if ((ihdl_plat_t *)hdlp->ih_private == NULL) {
1184 1176 return (PSM_FAILURE);
1185 1177 }
1186 1178 ispec = ((ihdl_plat_t *)hdlp->ih_private)->ip_ispecp;
1187 - *result = apix_intx_alloc_vector(dip, hdlp->ih_inum,
1188 - ispec);
1179 + *result = apix_intx_alloc_vector(dip, hdlp, ispec);
1189 1180 break;
1190 1181 default:
1191 1182 return (PSM_FAILURE);
1192 1183 }
1193 1184 break;
1194 1185 case PSM_INTR_OP_FREE_VECTORS:
1195 1186 apix_free_vectors(dip, hdlp->ih_inum, hdlp->ih_scratch1,
1196 1187 hdlp->ih_type);
1197 1188 break;
1198 1189 case PSM_INTR_OP_XLATE_VECTOR:
1199 1190 /*
1200 1191 * Vectors are allocated by ALLOC and freed by FREE.
1201 1192 * XLATE finds and returns APIX_VIRTVEC_VECTOR(cpu, vector).
1202 1193 */
1203 1194 *result = APIX_INVALID_VECT;
1204 1195 vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1205 1196 if (vecp != NULL) {
1206 1197 *result = APIX_VIRTVECTOR(vecp->v_cpuid,
1207 1198 vecp->v_vector);
1208 1199 break;
1209 1200 }
1210 1201
1211 1202 /*
1212 1203 * No vector to device mapping exists. If this is FIXED type
1213 1204 * then check if this IRQ is already mapped for another device
1214 1205 * then return the vector number for it (i.e. shared IRQ case).
1215 1206 * Otherwise, return PSM_FAILURE.
1216 1207 */
1217 1208 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED) {
1218 1209 vecp = apix_intx_xlate_vector(dip, hdlp->ih_inum,
1219 1210 ispec);
1220 1211 *result = (vecp == NULL) ? APIX_INVALID_VECT :
1221 1212 APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1222 1213 }
1223 1214 if (*result == APIX_INVALID_VECT)
1224 1215 return (PSM_FAILURE);
1225 1216 break;
1226 1217 case PSM_INTR_OP_GET_PENDING:
1227 1218 vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1228 1219 if (vecp == NULL)
1229 1220 return (PSM_FAILURE);
1230 1221
1231 1222 *result = apix_get_pending(vecp);
1232 1223 break;
1233 1224 case PSM_INTR_OP_CLEAR_MASK:
1234 1225 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1235 1226 return (PSM_FAILURE);
1236 1227
1237 1228 vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1238 1229 if (vecp == NULL)
1239 1230 return (PSM_FAILURE);
1240 1231
1241 1232 apix_intx_clear_mask(vecp->v_inum);
1242 1233 break;
1243 1234 case PSM_INTR_OP_SET_MASK:
1244 1235 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1245 1236 return (PSM_FAILURE);
1246 1237
1247 1238 vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1248 1239 if (vecp == NULL)
1249 1240 return (PSM_FAILURE);
1250 1241
1251 1242 apix_intx_set_mask(vecp->v_inum);
1252 1243 break;
1253 1244 case PSM_INTR_OP_GET_SHARED:
1254 1245 if (hdlp->ih_type != DDI_INTR_TYPE_FIXED)
1255 1246 return (PSM_FAILURE);
1256 1247
1257 1248 vecp = apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type);
1258 1249 if (vecp == NULL)
1259 1250 return (PSM_FAILURE);
1260 1251
1261 1252 *result = apix_intx_get_shared(vecp->v_inum);
1262 1253 break;
1263 1254 case PSM_INTR_OP_SET_PRI:
1264 1255 /*
1265 1256 * Called prior to adding the interrupt handler or when
1266 1257 * an interrupt handler is unassigned.
1267 1258 */
1268 1259 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1269 1260 return (PSM_SUCCESS);
1270 1261
1271 1262 if (apix_get_dev_map(dip, hdlp->ih_inum, hdlp->ih_type) == NULL)
1272 1263 return (PSM_FAILURE);
1273 1264
1274 1265 break;
1275 1266 case PSM_INTR_OP_SET_CPU:
1276 1267 case PSM_INTR_OP_GRP_SET_CPU:
1277 1268 /*
1278 1269 * The interrupt handle given here has been allocated
1279 1270 * specifically for this command, and ih_private carries
1280 1271 * a CPU value.
1281 1272 */
1282 1273 *result = EINVAL;
1283 1274 target = (int)(intptr_t)hdlp->ih_private;
1284 1275 if (!apic_cpu_in_range(target)) {
1285 1276 DDI_INTR_IMPLDBG((CE_WARN,
1286 1277 "[grp_]set_cpu: cpu out of range: %d\n", target));
1287 1278 return (PSM_FAILURE);
1288 1279 }
1289 1280
1290 1281 lock_set(&apix_lock);
1291 1282
1292 1283 vecp = apix_get_req_vector(hdlp, hdlp->ih_flags);
1293 1284 if (!IS_VECT_ENABLED(vecp)) {
1294 1285 DDI_INTR_IMPLDBG((CE_WARN,
1295 1286 "[grp]_set_cpu: invalid vector 0x%x\n",
1296 1287 hdlp->ih_vector));
1297 1288 lock_clear(&apix_lock);
1298 1289 return (PSM_FAILURE);
1299 1290 }
1300 1291
1301 1292 *result = 0;
1302 1293
1303 1294 if (intr_op == PSM_INTR_OP_SET_CPU)
1304 1295 newvecp = apix_set_cpu(vecp, target, result);
1305 1296 else
1306 1297 newvecp = apix_grp_set_cpu(vecp, target, result);
1307 1298
1308 1299 lock_clear(&apix_lock);
1309 1300
1310 1301 if (newvecp == NULL) {
1311 1302 *result = EIO;
1312 1303 return (PSM_FAILURE);
1313 1304 }
1314 1305 newvecp->v_bound_cpuid = target;
1315 1306 hdlp->ih_vector = APIX_VIRTVECTOR(newvecp->v_cpuid,
1316 1307 newvecp->v_vector);
1317 1308 break;
1318 1309
1319 1310 case PSM_INTR_OP_GET_INTR:
1320 1311 /*
1321 1312 * The interrupt handle given here has been allocated
1322 1313 * specifically for this command, and ih_private carries
1323 1314 * a pointer to a apic_get_intr_t.
1324 1315 */
1325 1316 if (apix_get_intr_info(hdlp, hdlp->ih_private) != PSM_SUCCESS)
1326 1317 return (PSM_FAILURE);
1327 1318 break;
1328 1319
1329 1320 case PSM_INTR_OP_CHECK_MSI:
1330 1321 /*
1331 1322 * Check MSI/X is supported or not at APIC level and
1332 1323 * masked off the MSI/X bits in hdlp->ih_type if not
1333 1324 * supported before return. If MSI/X is supported,
1334 1325 * leave the ih_type unchanged and return.
1335 1326 *
1336 1327 * hdlp->ih_type passed in from the nexus has all the
1337 1328 * interrupt types supported by the device.
1338 1329 */
1339 1330 if (apic_support_msi == 0) { /* uninitialized */
1340 1331 /*
1341 1332 * if apic_support_msi is not set, call
1342 1333 * apic_check_msi_support() to check whether msi
1343 1334 * is supported first
1344 1335 */
1345 1336 if (apic_check_msi_support() == PSM_SUCCESS)
1346 1337 apic_support_msi = 1; /* supported */
1347 1338 else
1348 1339 apic_support_msi = -1; /* not-supported */
1349 1340 }
1350 1341 if (apic_support_msi == 1) {
1351 1342 if (apic_msix_enable)
1352 1343 *result = hdlp->ih_type;
1353 1344 else
1354 1345 *result = hdlp->ih_type & ~DDI_INTR_TYPE_MSIX;
1355 1346 } else
1356 1347 *result = hdlp->ih_type & ~(DDI_INTR_TYPE_MSI |
1357 1348 DDI_INTR_TYPE_MSIX);
1358 1349 break;
1359 1350 case PSM_INTR_OP_GET_CAP:
1360 1351 cap = DDI_INTR_FLAG_PENDING;
1361 1352 if (hdlp->ih_type == DDI_INTR_TYPE_FIXED)
1362 1353 cap |= DDI_INTR_FLAG_MASKABLE;
1363 1354 *result = cap;
1364 1355 break;
1365 1356 case PSM_INTR_OP_APIC_TYPE:
1366 1357 ((apic_get_type_t *)(hdlp->ih_private))->avgi_type =
1367 1358 apix_get_apic_type();
1368 1359 ((apic_get_type_t *)(hdlp->ih_private))->avgi_num_intr =
1369 1360 APIX_IPI_MIN;
1370 1361 ((apic_get_type_t *)(hdlp->ih_private))->avgi_num_cpu =
1371 1362 apic_nproc;
1372 1363 hdlp->ih_ver = apic_get_apic_version();
1373 1364 break;
1374 1365 case PSM_INTR_OP_SET_CAP:
1375 1366 default:
1376 1367 return (PSM_FAILURE);
1377 1368 }
1378 1369
1379 1370 return (PSM_SUCCESS);
1380 1371 }
1381 1372
1382 1373 static void
1383 1374 apix_cleanup_busy(void)
1384 1375 {
1385 1376 int i, j;
1386 1377 apix_vector_t *vecp;
1387 1378
1388 1379 for (i = 0; i < apic_nproc; i++) {
1389 1380 if (!apic_cpu_in_range(i))
1390 1381 continue;
1391 1382 apic_cpus[i].aci_busy = 0;
1392 1383 for (j = APIX_AVINTR_MIN; j < APIX_AVINTR_MAX; j++) {
1393 1384 if ((vecp = xv_vector(i, j)) != NULL)
1394 1385 vecp->v_busy = 0;
1395 1386 }
1396 1387 }
1397 1388 }
1398 1389
1399 1390 static void
1400 1391 apix_redistribute_compute(void)
1401 1392 {
1402 1393 int i, j, max_busy;
1403 1394
1404 1395 if (!apic_enable_dynamic_migration)
1405 1396 return;
1406 1397
1407 1398 if (++apic_nticks == apic_sample_factor_redistribution) {
1408 1399 /*
1409 1400 * Time to call apic_intr_redistribute().
1410 1401 * reset apic_nticks. This will cause max_busy
1411 1402 * to be calculated below and if it is more than
1412 1403 * apic_int_busy, we will do the whole thing
1413 1404 */
1414 1405 apic_nticks = 0;
1415 1406 }
1416 1407 max_busy = 0;
1417 1408 for (i = 0; i < apic_nproc; i++) {
1418 1409 if (!apic_cpu_in_range(i))
1419 1410 continue;
1420 1411 /*
1421 1412 * Check if curipl is non zero & if ISR is in
1422 1413 * progress
1423 1414 */
1424 1415 if (((j = apic_cpus[i].aci_curipl) != 0) &&
1425 1416 (apic_cpus[i].aci_ISR_in_progress & (1 << j))) {
1426 1417
1427 1418 int vect;
1428 1419 apic_cpus[i].aci_busy++;
1429 1420 vect = apic_cpus[i].aci_current[j];
1430 1421 apixs[i]->x_vectbl[vect]->v_busy++;
1431 1422 }
1432 1423
1433 1424 if (!apic_nticks &&
1434 1425 (apic_cpus[i].aci_busy > max_busy))
1435 1426 max_busy = apic_cpus[i].aci_busy;
1436 1427 }
1437 1428 if (!apic_nticks) {
1438 1429 if (max_busy > apic_int_busy_mark) {
1439 1430 /*
1440 1431 * We could make the following check be
1441 1432 * skipped > 1 in which case, we get a
1442 1433 * redistribution at half the busy mark (due to
1443 1434 * double interval). Need to be able to collect
1444 1435 * more empirical data to decide if that is a
1445 1436 * good strategy. Punt for now.
1446 1437 */
1447 1438 apix_cleanup_busy();
1448 1439 apic_skipped_redistribute = 0;
1449 1440 } else
1450 1441 apic_skipped_redistribute++;
1451 1442 }
1452 1443 }
1453 1444
1454 1445 /*
1455 1446 * intr_ops() service routines
1456 1447 */
1457 1448
1458 1449 static int
1459 1450 apix_get_pending(apix_vector_t *vecp)
1460 1451 {
1461 1452 int bit, index, irr, pending;
1462 1453
1463 1454 /* need to get on the bound cpu */
1464 1455 mutex_enter(&cpu_lock);
1465 1456 affinity_set(vecp->v_cpuid);
1466 1457
1467 1458 index = vecp->v_vector / 32;
1468 1459 bit = vecp->v_vector % 32;
1469 1460 irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
1470 1461
1471 1462 affinity_clear();
1472 1463 mutex_exit(&cpu_lock);
1473 1464
1474 1465 pending = (irr & (1 << bit)) ? 1 : 0;
1475 1466 if (!pending && vecp->v_type == APIX_TYPE_FIXED)
1476 1467 pending = apix_intx_get_pending(vecp->v_inum);
1477 1468
1478 1469 return (pending);
1479 1470 }
1480 1471
1481 1472 static apix_vector_t *
1482 1473 apix_get_req_vector(ddi_intr_handle_impl_t *hdlp, ushort_t flags)
1483 1474 {
1484 1475 apix_vector_t *vecp;
1485 1476 processorid_t cpuid;
1486 1477 int32_t virt_vec = 0;
1487 1478
1488 1479 switch (flags & PSMGI_INTRBY_FLAGS) {
1489 1480 case PSMGI_INTRBY_IRQ:
1490 1481 return (apix_intx_get_vector(hdlp->ih_vector));
1491 1482 case PSMGI_INTRBY_VEC:
1492 1483 virt_vec = (virt_vec == 0) ? hdlp->ih_vector : virt_vec;
1493 1484
1494 1485 cpuid = APIX_VIRTVEC_CPU(virt_vec);
1495 1486 if (!apic_cpu_in_range(cpuid))
1496 1487 return (NULL);
1497 1488
1498 1489 vecp = xv_vector(cpuid, APIX_VIRTVEC_VECTOR(virt_vec));
1499 1490 break;
1500 1491 case PSMGI_INTRBY_DEFAULT:
1501 1492 vecp = apix_get_dev_map(hdlp->ih_dip, hdlp->ih_inum,
1502 1493 hdlp->ih_type);
1503 1494 break;
1504 1495 default:
1505 1496 return (NULL);
1506 1497 }
1507 1498
1508 1499 return (vecp);
1509 1500 }
1510 1501
1511 1502 static int
1512 1503 apix_get_intr_info(ddi_intr_handle_impl_t *hdlp,
1513 1504 apic_get_intr_t *intr_params_p)
1514 1505 {
1515 1506 apix_vector_t *vecp;
1516 1507 struct autovec *av_dev;
1517 1508 int i;
1518 1509
1519 1510 vecp = apix_get_req_vector(hdlp, intr_params_p->avgi_req_flags);
1520 1511 if (IS_VECT_FREE(vecp)) {
1521 1512 intr_params_p->avgi_num_devs = 0;
1522 1513 intr_params_p->avgi_cpu_id = 0;
1523 1514 intr_params_p->avgi_req_flags = 0;
1524 1515 return (PSM_SUCCESS);
1525 1516 }
1526 1517
1527 1518 if (intr_params_p->avgi_req_flags & PSMGI_REQ_CPUID) {
1528 1519 intr_params_p->avgi_cpu_id = vecp->v_cpuid;
1529 1520
1530 1521 /* Return user bound info for intrd. */
1531 1522 if (intr_params_p->avgi_cpu_id & IRQ_USER_BOUND) {
1532 1523 intr_params_p->avgi_cpu_id &= ~IRQ_USER_BOUND;
1533 1524 intr_params_p->avgi_cpu_id |= PSMGI_CPU_USER_BOUND;
1534 1525 }
1535 1526 }
1536 1527
1537 1528 if (intr_params_p->avgi_req_flags & PSMGI_REQ_VECTOR)
1538 1529 intr_params_p->avgi_vector = vecp->v_vector;
1539 1530
1540 1531 if (intr_params_p->avgi_req_flags &
1541 1532 (PSMGI_REQ_NUM_DEVS | PSMGI_REQ_GET_DEVS))
1542 1533 /* Get number of devices from apic_irq table shared field. */
1543 1534 intr_params_p->avgi_num_devs = vecp->v_share;
1544 1535
1545 1536 if (intr_params_p->avgi_req_flags & PSMGI_REQ_GET_DEVS) {
1546 1537
1547 1538 intr_params_p->avgi_req_flags |= PSMGI_REQ_NUM_DEVS;
1548 1539
1549 1540 /* Some devices have NULL dip. Don't count these. */
1550 1541 if (intr_params_p->avgi_num_devs > 0) {
1551 1542 for (i = 0, av_dev = vecp->v_autovect; av_dev;
1552 1543 av_dev = av_dev->av_link) {
1553 1544 if (av_dev->av_vector && av_dev->av_dip)
1554 1545 i++;
1555 1546 }
1556 1547 intr_params_p->avgi_num_devs =
1557 1548 (uint8_t)MIN(intr_params_p->avgi_num_devs, i);
1558 1549 }
1559 1550
1560 1551 /* There are no viable dips to return. */
1561 1552 if (intr_params_p->avgi_num_devs == 0) {
1562 1553 intr_params_p->avgi_dip_list = NULL;
1563 1554
1564 1555 } else { /* Return list of dips */
1565 1556
1566 1557 /* Allocate space in array for that number of devs. */
1567 1558 intr_params_p->avgi_dip_list = kmem_zalloc(
1568 1559 intr_params_p->avgi_num_devs *
1569 1560 sizeof (dev_info_t *),
1570 1561 KM_NOSLEEP);
1571 1562 if (intr_params_p->avgi_dip_list == NULL) {
1572 1563 DDI_INTR_IMPLDBG((CE_WARN,
1573 1564 "apix_get_vector_intr_info: no memory"));
1574 1565 return (PSM_FAILURE);
1575 1566 }
1576 1567
1577 1568 /*
1578 1569 * Loop through the device list of the autovec table
1579 1570 * filling in the dip array.
1580 1571 *
1581 1572 * Note that the autovect table may have some special
1582 1573 * entries which contain NULL dips. These will be
1583 1574 * ignored.
1584 1575 */
1585 1576 for (i = 0, av_dev = vecp->v_autovect; av_dev;
1586 1577 av_dev = av_dev->av_link) {
1587 1578 if (av_dev->av_vector && av_dev->av_dip)
1588 1579 intr_params_p->avgi_dip_list[i++] =
1589 1580 av_dev->av_dip;
1590 1581 }
1591 1582 }
1592 1583 }
1593 1584
1594 1585 return (PSM_SUCCESS);
1595 1586 }
1596 1587
1597 1588 static char *
1598 1589 apix_get_apic_type(void)
1599 1590 {
1600 1591 return (apix_psm_info.p_mach_idstring);
1601 1592 }
1602 1593
1603 1594 apix_vector_t *
1604 1595 apix_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1605 1596 {
1606 1597 apix_vector_t *newp = NULL;
1607 1598 dev_info_t *dip;
1608 1599 int inum, cap_ptr;
1609 1600 ddi_acc_handle_t handle;
1610 1601 ddi_intr_msix_t *msix_p = NULL;
1611 1602 ushort_t msix_ctrl;
1612 1603 uintptr_t off = 0;
1613 1604 uint32_t mask = 0;
1614 1605
1615 1606 ASSERT(LOCK_HELD(&apix_lock));
1616 1607 *result = ENXIO;
1617 1608
1618 1609 /* Fail if this is an MSI intr and is part of a group. */
1619 1610 if (vecp->v_type == APIX_TYPE_MSI) {
1620 1611 if (i_ddi_intr_get_current_nintrs(APIX_GET_DIP(vecp)) > 1)
1621 1612 return (NULL);
1622 1613 else
1623 1614 return (apix_grp_set_cpu(vecp, new_cpu, result));
1624 1615 }
1625 1616
1626 1617 /*
1627 1618 * Mask MSI-X. It's unmasked when MSI-X gets enabled.
1628 1619 */
1629 1620 if (vecp->v_type == APIX_TYPE_MSIX && IS_VECT_ENABLED(vecp)) {
1630 1621 if ((dip = APIX_GET_DIP(vecp)) == NULL)
1631 1622 return (NULL);
1632 1623 inum = vecp->v_devp->dv_inum;
1633 1624
1634 1625 handle = i_ddi_get_pci_config_handle(dip);
1635 1626 cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1636 1627 msix_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1637 1628 if ((msix_ctrl & PCI_MSIX_FUNCTION_MASK) == 0) {
1638 1629 /*
1639 1630 * Function is not masked, then mask "inum"th
1640 1631 * entry in the MSI-X table
1641 1632 */
1642 1633 msix_p = i_ddi_get_msix(dip);
1643 1634 off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1644 1635 PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1645 1636 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1646 1637 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off,
1647 1638 mask | 1);
1648 1639 }
1649 1640 }
1650 1641
1651 1642 *result = 0;
1652 1643 if ((newp = apix_rebind(vecp, new_cpu, 1)) == NULL)
1653 1644 *result = EIO;
1654 1645
1655 1646 /* Restore mask bit */
1656 1647 if (msix_p != NULL)
1657 1648 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, mask);
1658 1649
1659 1650 return (newp);
1660 1651 }
1661 1652
1662 1653 /*
1663 1654 * Set cpu for MSIs
1664 1655 */
1665 1656 apix_vector_t *
1666 1657 apix_grp_set_cpu(apix_vector_t *vecp, int new_cpu, int *result)
1667 1658 {
1668 1659 apix_vector_t *newp, *vp;
1669 1660 uint32_t orig_cpu = vecp->v_cpuid;
1670 1661 int orig_vect = vecp->v_vector;
1671 1662 int i, num_vectors, cap_ptr, msi_mask_off = 0;
1672 1663 uint32_t msi_pvm = 0;
1673 1664 ushort_t msi_ctrl;
1674 1665 ddi_acc_handle_t handle;
1675 1666 dev_info_t *dip;
1676 1667
1677 1668 APIC_VERBOSE(INTR, (CE_CONT, "apix_grp_set_cpu: oldcpu: %x, vector: %x,"
1678 1669 " newcpu:%x\n", vecp->v_cpuid, vecp->v_vector, new_cpu));
1679 1670
1680 1671 ASSERT(LOCK_HELD(&apix_lock));
1681 1672
1682 1673 *result = ENXIO;
1683 1674
1684 1675 if (vecp->v_type != APIX_TYPE_MSI) {
1685 1676 DDI_INTR_IMPLDBG((CE_WARN, "set_grp: intr not MSI\n"));
1686 1677 return (NULL);
1687 1678 }
1688 1679
1689 1680 if ((dip = APIX_GET_DIP(vecp)) == NULL)
1690 1681 return (NULL);
1691 1682
1692 1683 num_vectors = i_ddi_intr_get_current_nintrs(dip);
1693 1684 if ((num_vectors < 1) || ((num_vectors - 1) & orig_vect)) {
1694 1685 APIC_VERBOSE(INTR, (CE_WARN,
1695 1686 "set_grp: base vec not part of a grp or not aligned: "
1696 1687 "vec:0x%x, num_vec:0x%x\n", orig_vect, num_vectors));
1697 1688 return (NULL);
1698 1689 }
1699 1690
1700 1691 if (vecp->v_inum != apix_get_min_dev_inum(dip, vecp->v_type))
1701 1692 return (NULL);
1702 1693
1703 1694 *result = EIO;
1704 1695 for (i = 1; i < num_vectors; i++) {
1705 1696 if ((vp = xv_vector(orig_cpu, orig_vect + i)) == NULL)
1706 1697 return (NULL);
1707 1698 #ifdef DEBUG
1708 1699 /*
1709 1700 * Sanity check: CPU and dip is the same for all entries.
1710 1701 * May be called when first msi to be enabled, at this time
1711 1702 * add_avintr() is not called for other msi
1712 1703 */
1713 1704 if ((vp->v_share != 0) &&
1714 1705 ((APIX_GET_DIP(vp) != dip) ||
1715 1706 (vp->v_cpuid != vecp->v_cpuid))) {
1716 1707 APIC_VERBOSE(INTR, (CE_WARN,
1717 1708 "set_grp: cpu or dip for vec 0x%x difft than for "
1718 1709 "vec 0x%x\n", orig_vect, orig_vect + i));
1719 1710 APIC_VERBOSE(INTR, (CE_WARN,
1720 1711 " cpu: %d vs %d, dip: 0x%p vs 0x%p\n", orig_cpu,
1721 1712 vp->v_cpuid, (void *)dip,
1722 1713 (void *)APIX_GET_DIP(vp)));
1723 1714 return (NULL);
1724 1715 }
1725 1716 #endif /* DEBUG */
1726 1717 }
1727 1718
1728 1719 cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1729 1720 handle = i_ddi_get_pci_config_handle(dip);
1730 1721 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1731 1722
1732 1723 /* MSI Per vector masking is supported. */
1733 1724 if (msi_ctrl & PCI_MSI_PVM_MASK) {
1734 1725 if (msi_ctrl & PCI_MSI_64BIT_MASK)
1735 1726 msi_mask_off = cap_ptr + PCI_MSI_64BIT_MASKBITS;
1736 1727 else
1737 1728 msi_mask_off = cap_ptr + PCI_MSI_32BIT_MASK;
1738 1729 msi_pvm = pci_config_get32(handle, msi_mask_off);
1739 1730 pci_config_put32(handle, msi_mask_off, (uint32_t)-1);
1740 1731 APIC_VERBOSE(INTR, (CE_CONT,
1741 1732 "set_grp: pvm supported. Mask set to 0x%x\n",
1742 1733 pci_config_get32(handle, msi_mask_off)));
1743 1734 }
1744 1735
1745 1736 if ((newp = apix_rebind(vecp, new_cpu, num_vectors)) != NULL)
1746 1737 *result = 0;
1747 1738
1748 1739 /* Reenable vectors if per vector masking is supported. */
1749 1740 if (msi_ctrl & PCI_MSI_PVM_MASK) {
1750 1741 pci_config_put32(handle, msi_mask_off, msi_pvm);
1751 1742 APIC_VERBOSE(INTR, (CE_CONT,
1752 1743 "set_grp: pvm supported. Mask restored to 0x%x\n",
1753 1744 pci_config_get32(handle, msi_mask_off)));
1754 1745 }
1755 1746
1756 1747 return (newp);
1757 1748 }
1758 1749
1759 1750 void
1760 1751 apix_intx_set_vector(int irqno, uint32_t cpuid, uchar_t vector)
1761 1752 {
1762 1753 apic_irq_t *irqp;
1763 1754
1764 1755 mutex_enter(&airq_mutex);
1765 1756 irqp = apic_irq_table[irqno];
1766 1757 irqp->airq_cpu = cpuid;
1767 1758 irqp->airq_vector = vector;
1768 1759 apic_record_rdt_entry(irqp, irqno);
1769 1760 mutex_exit(&airq_mutex);
1770 1761 }
1771 1762
1772 1763 apix_vector_t *
1773 1764 apix_intx_get_vector(int irqno)
1774 1765 {
1775 1766 apic_irq_t *irqp;
1776 1767 uint32_t cpuid;
1777 1768 uchar_t vector;
1778 1769
1779 1770 mutex_enter(&airq_mutex);
1780 1771 irqp = apic_irq_table[irqno & 0xff];
1781 1772 if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
1782 1773 mutex_exit(&airq_mutex);
1783 1774 return (NULL);
1784 1775 }
1785 1776 cpuid = irqp->airq_cpu;
1786 1777 vector = irqp->airq_vector;
1787 1778 mutex_exit(&airq_mutex);
1788 1779
1789 1780 return (xv_vector(cpuid, vector));
1790 1781 }
1791 1782
1792 1783 /*
1793 1784 * Must called with interrupts disabled and apic_ioapic_lock held
1794 1785 */
1795 1786 void
1796 1787 apix_intx_enable(int irqno)
1797 1788 {
1798 1789 uchar_t ioapicindex, intin;
1799 1790 apic_irq_t *irqp = apic_irq_table[irqno];
1800 1791 ioapic_rdt_t irdt;
1801 1792 apic_cpus_info_t *cpu_infop;
1802 1793 apix_vector_t *vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1803 1794
1804 1795 ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1805 1796
1806 1797 ioapicindex = irqp->airq_ioapicindex;
1807 1798 intin = irqp->airq_intin_no;
1808 1799 cpu_infop = &apic_cpus[irqp->airq_cpu];
1809 1800
1810 1801 irdt.ir_lo = AV_PDEST | AV_FIXED | irqp->airq_rdt_entry;
1811 1802 irdt.ir_hi = cpu_infop->aci_local_id;
1812 1803
1813 1804 apic_vt_ops->apic_intrmap_alloc_entry(&vecp->v_intrmap_private, NULL,
1814 1805 vecp->v_type, 1, ioapicindex);
1815 1806 apic_vt_ops->apic_intrmap_map_entry(vecp->v_intrmap_private,
1816 1807 (void *)&irdt, vecp->v_type, 1);
1817 1808 apic_vt_ops->apic_intrmap_record_rdt(vecp->v_intrmap_private, &irdt);
1818 1809
1819 1810 /* write RDT entry high dword - destination */
1820 1811 WRITE_IOAPIC_RDT_ENTRY_HIGH_DWORD(ioapicindex, intin,
1821 1812 irdt.ir_hi);
1822 1813
1823 1814 /* Write the vector, trigger, and polarity portion of the RDT */
1824 1815 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin, irdt.ir_lo);
1825 1816
1826 1817 vecp->v_state = APIX_STATE_ENABLED;
1827 1818
1828 1819 APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_enable: ioapic 0x%x"
1829 1820 " intin 0x%x rdt_low 0x%x rdt_high 0x%x\n",
1830 1821 ioapicindex, intin, irdt.ir_lo, irdt.ir_hi));
1831 1822 }
1832 1823
1833 1824 /*
1834 1825 * Must called with interrupts disabled and apic_ioapic_lock held
1835 1826 */
1836 1827 void
1837 1828 apix_intx_disable(int irqno)
1838 1829 {
1839 1830 apic_irq_t *irqp = apic_irq_table[irqno];
1840 1831 int ioapicindex, intin;
1841 1832
1842 1833 ASSERT(LOCK_HELD(&apic_ioapic_lock) && !IS_IRQ_FREE(irqp));
1843 1834 /*
1844 1835 * The assumption here is that this is safe, even for
1845 1836 * systems with IOAPICs that suffer from the hardware
1846 1837 * erratum because all devices have been quiesced before
1847 1838 * they unregister their interrupt handlers. If that
1848 1839 * assumption turns out to be false, this mask operation
1849 1840 * can induce the same erratum result we're trying to
1850 1841 * avoid.
1851 1842 */
1852 1843 ioapicindex = irqp->airq_ioapicindex;
1853 1844 intin = irqp->airq_intin_no;
1854 1845 ioapic_write(ioapicindex, APIC_RDT_CMD + 2 * intin, AV_MASK);
1855 1846
1856 1847 APIC_VERBOSE_IOAPIC((CE_CONT, "apix_intx_disable: ioapic 0x%x"
1857 1848 " intin 0x%x\n", ioapicindex, intin));
1858 1849 }
1859 1850
1860 1851 void
1861 1852 apix_intx_free(int irqno)
1862 1853 {
1863 1854 apic_irq_t *irqp;
1864 1855
1865 1856 mutex_enter(&airq_mutex);
1866 1857 irqp = apic_irq_table[irqno];
1867 1858
1868 1859 if (IS_IRQ_FREE(irqp)) {
1869 1860 mutex_exit(&airq_mutex);
1870 1861 return;
1871 1862 }
1872 1863
1873 1864 irqp->airq_mps_intr_index = FREE_INDEX;
1874 1865 irqp->airq_cpu = IRQ_UNINIT;
1875 1866 irqp->airq_vector = APIX_INVALID_VECT;
1876 1867 mutex_exit(&airq_mutex);
1877 1868 }
1878 1869
1879 1870 #ifdef DEBUG
1880 1871 int apix_intr_deliver_timeouts = 0;
1881 1872 int apix_intr_rirr_timeouts = 0;
1882 1873 int apix_intr_rirr_reset_failure = 0;
1883 1874 #endif
1884 1875 int apix_max_reps_irr_pending = 10;
1885 1876
1886 1877 #define GET_RDT_BITS(ioapic, intin, bits) \
1887 1878 (READ_IOAPIC_RDT_ENTRY_LOW_DWORD((ioapic), (intin)) & (bits))
1888 1879 #define APIX_CHECK_IRR_DELAY drv_usectohz(5000)
1889 1880
1890 1881 int
1891 1882 apix_intx_rebind(int irqno, processorid_t cpuid, uchar_t vector)
1892 1883 {
1893 1884 apic_irq_t *irqp = apic_irq_table[irqno];
1894 1885 ulong_t iflag;
1895 1886 int waited, ioapic_ix, intin_no, level, repeats, rdt_entry, masked;
1896 1887
1897 1888 ASSERT(irqp != NULL);
1898 1889
1899 1890 iflag = intr_clear();
1900 1891 lock_set(&apic_ioapic_lock);
1901 1892
1902 1893 ioapic_ix = irqp->airq_ioapicindex;
1903 1894 intin_no = irqp->airq_intin_no;
1904 1895 level = apic_level_intr[irqno];
1905 1896
1906 1897 /*
1907 1898 * Wait for the delivery status bit to be cleared. This should
1908 1899 * be a very small amount of time.
1909 1900 */
1910 1901 repeats = 0;
1911 1902 do {
1912 1903 repeats++;
1913 1904
1914 1905 for (waited = 0; waited < apic_max_reps_clear_pending;
1915 1906 waited++) {
1916 1907 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) == 0)
1917 1908 break;
1918 1909 }
1919 1910 if (!level)
1920 1911 break;
1921 1912
1922 1913 /*
1923 1914 * Mask the RDT entry for level-triggered interrupts.
1924 1915 */
1925 1916 irqp->airq_rdt_entry |= AV_MASK;
1926 1917 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1927 1918 intin_no);
1928 1919 if ((masked = (rdt_entry & AV_MASK)) == 0) {
1929 1920 /* Mask it */
1930 1921 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix, intin_no,
1931 1922 AV_MASK | rdt_entry);
1932 1923 }
1933 1924
1934 1925 /*
1935 1926 * If there was a race and an interrupt was injected
1936 1927 * just before we masked, check for that case here.
1937 1928 * Then, unmask the RDT entry and try again. If we're
1938 1929 * on our last try, don't unmask (because we want the
1939 1930 * RDT entry to remain masked for the rest of the
1940 1931 * function).
1941 1932 */
1942 1933 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1943 1934 intin_no);
1944 1935 if ((masked == 0) && ((rdt_entry & AV_PENDING) != 0) &&
1945 1936 (repeats < apic_max_reps_clear_pending)) {
1946 1937 /* Unmask it */
1947 1938 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
1948 1939 intin_no, rdt_entry & ~AV_MASK);
1949 1940 irqp->airq_rdt_entry &= ~AV_MASK;
1950 1941 }
1951 1942 } while ((rdt_entry & AV_PENDING) &&
1952 1943 (repeats < apic_max_reps_clear_pending));
1953 1944
1954 1945 #ifdef DEBUG
1955 1946 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_PENDING) != 0)
1956 1947 apix_intr_deliver_timeouts++;
1957 1948 #endif
1958 1949
1959 1950 if (!level || !APIX_IS_MASK_RDT(apix_mul_ioapic_method))
1960 1951 goto done;
1961 1952
1962 1953 /*
1963 1954 * wait for remote IRR to be cleared for level-triggered
1964 1955 * interrupts
1965 1956 */
1966 1957 repeats = 0;
1967 1958 do {
1968 1959 repeats++;
1969 1960
1970 1961 for (waited = 0; waited < apic_max_reps_clear_pending;
1971 1962 waited++) {
1972 1963 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR)
1973 1964 == 0)
1974 1965 break;
1975 1966 }
1976 1967
1977 1968 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1978 1969 lock_clear(&apic_ioapic_lock);
1979 1970 intr_restore(iflag);
1980 1971
1981 1972 delay(APIX_CHECK_IRR_DELAY);
1982 1973
1983 1974 iflag = intr_clear();
1984 1975 lock_set(&apic_ioapic_lock);
1985 1976 }
1986 1977 } while (repeats < apix_max_reps_irr_pending);
1987 1978
1988 1979 if (repeats >= apix_max_reps_irr_pending) {
1989 1980 #ifdef DEBUG
1990 1981 apix_intr_rirr_timeouts++;
1991 1982 #endif
1992 1983
1993 1984 /*
1994 1985 * If we waited and the Remote IRR bit is still not cleared,
1995 1986 * AND if we've invoked the timeout APIC_REPROGRAM_MAX_TIMEOUTS
1996 1987 * times for this interrupt, try the last-ditch workaround:
1997 1988 */
1998 1989 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
1999 1990 /*
2000 1991 * Trying to clear the bit through normal
2001 1992 * channels has failed. So as a last-ditch
2002 1993 * effort, try to set the trigger mode to
2003 1994 * edge, then to level. This has been
2004 1995 * observed to work on many systems.
2005 1996 */
2006 1997 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2007 1998 intin_no,
2008 1999 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2009 2000 intin_no) & ~AV_LEVEL);
2010 2001 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2011 2002 intin_no,
2012 2003 READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapic_ix,
2013 2004 intin_no) | AV_LEVEL);
2014 2005 }
2015 2006
2016 2007 if (GET_RDT_BITS(ioapic_ix, intin_no, AV_REMOTE_IRR) != 0) {
2017 2008 #ifdef DEBUG
2018 2009 apix_intr_rirr_reset_failure++;
2019 2010 #endif
2020 2011 lock_clear(&apic_ioapic_lock);
2021 2012 intr_restore(iflag);
2022 2013 prom_printf("apix: Remote IRR still "
2023 2014 "not clear for IOAPIC %d intin %d.\n"
2024 2015 "\tInterrupts to this pin may cease "
2025 2016 "functioning.\n", ioapic_ix, intin_no);
2026 2017 return (1); /* return failure */
2027 2018 }
2028 2019 }
2029 2020
2030 2021 done:
2031 2022 /* change apic_irq_table */
2032 2023 lock_clear(&apic_ioapic_lock);
2033 2024 intr_restore(iflag);
2034 2025 apix_intx_set_vector(irqno, cpuid, vector);
2035 2026 iflag = intr_clear();
2036 2027 lock_set(&apic_ioapic_lock);
2037 2028
2038 2029 /* reprogramme IO-APIC RDT entry */
2039 2030 apix_intx_enable(irqno);
2040 2031
2041 2032 lock_clear(&apic_ioapic_lock);
2042 2033 intr_restore(iflag);
2043 2034
2044 2035 return (0);
2045 2036 }
2046 2037
2047 2038 static int
2048 2039 apix_intx_get_pending(int irqno)
2049 2040 {
2050 2041 apic_irq_t *irqp;
2051 2042 int intin, ioapicindex, pending;
2052 2043 ulong_t iflag;
2053 2044
2054 2045 mutex_enter(&airq_mutex);
2055 2046 irqp = apic_irq_table[irqno];
2056 2047 if (IS_IRQ_FREE(irqp)) {
2057 2048 mutex_exit(&airq_mutex);
2058 2049 return (0);
2059 2050 }
2060 2051
2061 2052 /* check IO-APIC delivery status */
2062 2053 intin = irqp->airq_intin_no;
2063 2054 ioapicindex = irqp->airq_ioapicindex;
2064 2055 mutex_exit(&airq_mutex);
2065 2056
2066 2057 iflag = intr_clear();
2067 2058 lock_set(&apic_ioapic_lock);
2068 2059
2069 2060 pending = (READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapicindex, intin) &
2070 2061 AV_PENDING) ? 1 : 0;
2071 2062
2072 2063 lock_clear(&apic_ioapic_lock);
2073 2064 intr_restore(iflag);
2074 2065
2075 2066 return (pending);
2076 2067 }
2077 2068
2078 2069 /*
2079 2070 * This function will mask the interrupt on the I/O APIC
2080 2071 */
2081 2072 static void
2082 2073 apix_intx_set_mask(int irqno)
2083 2074 {
2084 2075 int intin, ioapixindex, rdt_entry;
2085 2076 ulong_t iflag;
2086 2077 apic_irq_t *irqp;
2087 2078
2088 2079 mutex_enter(&airq_mutex);
2089 2080 irqp = apic_irq_table[irqno];
2090 2081
2091 2082 ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2092 2083
2093 2084 intin = irqp->airq_intin_no;
2094 2085 ioapixindex = irqp->airq_ioapicindex;
2095 2086 mutex_exit(&airq_mutex);
2096 2087
2097 2088 iflag = intr_clear();
2098 2089 lock_set(&apic_ioapic_lock);
2099 2090
2100 2091 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2101 2092
2102 2093 /* clear mask */
2103 2094 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2104 2095 (AV_MASK | rdt_entry));
2105 2096
2106 2097 lock_clear(&apic_ioapic_lock);
2107 2098 intr_restore(iflag);
2108 2099 }
2109 2100
2110 2101 /*
2111 2102 * This function will clear the mask for the interrupt on the I/O APIC
2112 2103 */
2113 2104 static void
2114 2105 apix_intx_clear_mask(int irqno)
2115 2106 {
2116 2107 int intin, ioapixindex, rdt_entry;
2117 2108 ulong_t iflag;
2118 2109 apic_irq_t *irqp;
2119 2110
2120 2111 mutex_enter(&airq_mutex);
2121 2112 irqp = apic_irq_table[irqno];
2122 2113
2123 2114 ASSERT(irqp->airq_mps_intr_index != FREE_INDEX);
2124 2115
2125 2116 intin = irqp->airq_intin_no;
2126 2117 ioapixindex = irqp->airq_ioapicindex;
2127 2118 mutex_exit(&airq_mutex);
2128 2119
2129 2120 iflag = intr_clear();
2130 2121 lock_set(&apic_ioapic_lock);
2131 2122
2132 2123 rdt_entry = READ_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin);
2133 2124
2134 2125 /* clear mask */
2135 2126 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(ioapixindex, intin,
2136 2127 ((~AV_MASK) & rdt_entry));
2137 2128
2138 2129 lock_clear(&apic_ioapic_lock);
2139 2130 intr_restore(iflag);
2140 2131 }
2141 2132
2142 2133 /*
2143 2134 * For level-triggered interrupt, mask the IRQ line. Mask means
2144 2135 * new interrupts will not be delivered. The interrupt already
2145 2136 * accepted by a local APIC is not affected
2146 2137 */
2147 2138 void
2148 2139 apix_level_intr_pre_eoi(int irq)
2149 2140 {
2150 2141 apic_irq_t *irqp = apic_irq_table[irq];
2151 2142 int apic_ix, intin_ix;
2152 2143
2153 2144 if (irqp == NULL)
2154 2145 return;
2155 2146
2156 2147 ASSERT(apic_level_intr[irq] == TRIGGER_MODE_LEVEL);
2157 2148
2158 2149 lock_set(&apic_ioapic_lock);
2159 2150
2160 2151 intin_ix = irqp->airq_intin_no;
2161 2152 apic_ix = irqp->airq_ioapicindex;
2162 2153
2163 2154 if (irqp->airq_cpu != CPU->cpu_id) {
2164 2155 if (!APIX_IS_MASK_RDT(apix_mul_ioapic_method))
2165 2156 ioapic_write_eoi(apic_ix, irqp->airq_vector);
2166 2157 lock_clear(&apic_ioapic_lock);
2167 2158 return;
2168 2159 }
2169 2160
2170 2161 if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC) {
2171 2162 /*
2172 2163 * This is a IOxAPIC and there is EOI register:
2173 2164 * Change the vector to reserved unused vector, so that
2174 2165 * the EOI from Local APIC won't clear the Remote IRR for
2175 2166 * this level trigger interrupt. Instead, we'll manually
2176 2167 * clear it in apix_post_hardint() after ISR handling.
2177 2168 */
2178 2169 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2179 2170 (irqp->airq_rdt_entry & (~0xff)) | APIX_RESV_VECTOR);
2180 2171 } else {
2181 2172 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2182 2173 AV_MASK | irqp->airq_rdt_entry);
2183 2174 }
2184 2175
2185 2176 lock_clear(&apic_ioapic_lock);
2186 2177 }
2187 2178
2188 2179 /*
2189 2180 * For level-triggered interrupt, unmask the IRQ line
2190 2181 * or restore the original vector number.
2191 2182 */
2192 2183 void
2193 2184 apix_level_intr_post_dispatch(int irq)
2194 2185 {
2195 2186 apic_irq_t *irqp = apic_irq_table[irq];
2196 2187 int apic_ix, intin_ix;
2197 2188
2198 2189 if (irqp == NULL)
2199 2190 return;
2200 2191
2201 2192 lock_set(&apic_ioapic_lock);
2202 2193
2203 2194 intin_ix = irqp->airq_intin_no;
2204 2195 apic_ix = irqp->airq_ioapicindex;
2205 2196
2206 2197 if (APIX_IS_DIRECTED_EOI(apix_mul_ioapic_method)) {
2207 2198 /*
2208 2199 * Already sent EOI back to Local APIC.
2209 2200 * Send EOI to IO-APIC
2210 2201 */
2211 2202 ioapic_write_eoi(apic_ix, irqp->airq_vector);
2212 2203 } else {
2213 2204 /* clear the mask or restore the vector */
2214 2205 WRITE_IOAPIC_RDT_ENTRY_LOW_DWORD(apic_ix, intin_ix,
2215 2206 irqp->airq_rdt_entry);
2216 2207
2217 2208 /* send EOI to IOxAPIC */
2218 2209 if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_IOXAPIC)
2219 2210 ioapic_write_eoi(apic_ix, irqp->airq_vector);
2220 2211 }
2221 2212
2222 2213 lock_clear(&apic_ioapic_lock);
2223 2214 }
2224 2215
2225 2216 static int
2226 2217 apix_intx_get_shared(int irqno)
2227 2218 {
2228 2219 apic_irq_t *irqp;
2229 2220 int share;
2230 2221
2231 2222 mutex_enter(&airq_mutex);
2232 2223 irqp = apic_irq_table[irqno];
2233 2224 if (IS_IRQ_FREE(irqp) || (irqp->airq_cpu == IRQ_UNINIT)) {
2234 2225 mutex_exit(&airq_mutex);
2235 2226 return (0);
2236 2227 }
2237 2228 share = irqp->airq_share;
2238 2229 mutex_exit(&airq_mutex);
2239 2230
2240 2231 return (share);
2241 2232 }
2242 2233
2243 2234 static void
2244 2235 apix_intx_set_shared(int irqno, int delta)
2245 2236 {
2246 2237 apic_irq_t *irqp;
2247 2238
2248 2239 mutex_enter(&airq_mutex);
2249 2240 irqp = apic_irq_table[irqno];
2250 2241 if (IS_IRQ_FREE(irqp)) {
2251 2242 mutex_exit(&airq_mutex);
2252 2243 return;
2253 2244 }
2254 2245 irqp->airq_share += delta;
2255 2246 mutex_exit(&airq_mutex);
2256 2247 }
2257 2248
2258 2249 /*
2259 2250 * Setup IRQ table. Return IRQ no or -1 on failure
2260 2251 */
2261 2252 static int
2262 2253 apix_intx_setup(dev_info_t *dip, int inum, int irqno,
2263 2254 struct apic_io_intr *intrp, struct intrspec *ispec, iflag_t *iflagp)
2264 2255 {
2265 2256 int origirq = ispec->intrspec_vec;
2266 2257 int newirq;
2267 2258 short intr_index;
2268 2259 uchar_t ipin, ioapic, ioapicindex;
2269 2260 apic_irq_t *irqp;
2270 2261
2271 2262 UNREFERENCED_1PARAMETER(inum);
2272 2263
2273 2264 if (intrp != NULL) {
2274 2265 intr_index = (short)(intrp - apic_io_intrp);
2275 2266 ioapic = intrp->intr_destid;
2276 2267 ipin = intrp->intr_destintin;
2277 2268
2278 2269 /* Find ioapicindex. If destid was ALL, we will exit with 0. */
2279 2270 for (ioapicindex = apic_io_max - 1; ioapicindex; ioapicindex--)
2280 2271 if (apic_io_id[ioapicindex] == ioapic)
2281 2272 break;
2282 2273 ASSERT((ioapic == apic_io_id[ioapicindex]) ||
2283 2274 (ioapic == INTR_ALL_APIC));
2284 2275
2285 2276 /* check whether this intin# has been used by another irqno */
2286 2277 if ((newirq = apic_find_intin(ioapicindex, ipin)) != -1)
2287 2278 return (newirq);
2288 2279
2289 2280 } else if (iflagp != NULL) { /* ACPI */
2290 2281 intr_index = ACPI_INDEX;
2291 2282 ioapicindex = acpi_find_ioapic(irqno);
2292 2283 ASSERT(ioapicindex != 0xFF);
2293 2284 ioapic = apic_io_id[ioapicindex];
2294 2285 ipin = irqno - apic_io_vectbase[ioapicindex];
2295 2286
2296 2287 if (apic_irq_table[irqno] &&
2297 2288 apic_irq_table[irqno]->airq_mps_intr_index == ACPI_INDEX) {
2298 2289 ASSERT(apic_irq_table[irqno]->airq_intin_no == ipin &&
2299 2290 apic_irq_table[irqno]->airq_ioapicindex ==
2300 2291 ioapicindex);
2301 2292 return (irqno);
2302 2293 }
2303 2294
2304 2295 } else { /* default configuration */
2305 2296 intr_index = DEFAULT_INDEX;
2306 2297 ioapicindex = 0;
2307 2298 ioapic = apic_io_id[ioapicindex];
2308 2299 ipin = (uchar_t)irqno;
2309 2300 }
2310 2301
2311 2302 /* allocate a new IRQ no */
2312 2303 if ((irqp = apic_irq_table[irqno]) == NULL) {
2313 2304 irqp = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
2314 2305 apic_irq_table[irqno] = irqp;
2315 2306 } else {
2316 2307 if (irqp->airq_mps_intr_index != FREE_INDEX) {
2317 2308 newirq = apic_allocate_irq(apic_first_avail_irq);
2318 2309 if (newirq == -1) {
2319 2310 return (-1);
2320 2311 }
2321 2312 irqno = newirq;
2322 2313 irqp = apic_irq_table[irqno];
2323 2314 ASSERT(irqp != NULL);
2324 2315 }
2325 2316 }
2326 2317 apic_max_device_irq = max(irqno, apic_max_device_irq);
2327 2318 apic_min_device_irq = min(irqno, apic_min_device_irq);
2328 2319
2329 2320 irqp->airq_mps_intr_index = intr_index;
2330 2321 irqp->airq_ioapicindex = ioapicindex;
2331 2322 irqp->airq_intin_no = ipin;
2332 2323 irqp->airq_dip = dip;
2333 2324 irqp->airq_origirq = (uchar_t)origirq;
2334 2325 if (iflagp != NULL)
2335 2326 irqp->airq_iflag = *iflagp;
2336 2327 irqp->airq_cpu = IRQ_UNINIT;
2337 2328 irqp->airq_vector = 0;
2338 2329
2339 2330 return (irqno);
2340 2331 }
2341 2332
2342 2333 /*
2343 2334 * Setup IRQ table for non-pci devices. Return IRQ no or -1 on error
2344 2335 */
2345 2336 static int
2346 2337 apix_intx_setup_nonpci(dev_info_t *dip, int inum, int bustype,
2347 2338 struct intrspec *ispec)
2348 2339 {
2349 2340 int irqno = ispec->intrspec_vec;
2350 2341 int newirq, i;
2351 2342 iflag_t intr_flag;
2352 2343 ACPI_SUBTABLE_HEADER *hp;
2353 2344 ACPI_MADT_INTERRUPT_OVERRIDE *isop;
2354 2345 struct apic_io_intr *intrp;
2355 2346
2356 2347 if (!apic_enable_acpi || apic_use_acpi_madt_only) {
2357 2348 int busid;
2358 2349
2359 2350 if (bustype == 0)
2360 2351 bustype = eisa_level_intr_mask ? BUS_EISA : BUS_ISA;
2361 2352
2362 2353 /* loop checking BUS_ISA/BUS_EISA */
2363 2354 for (i = 0; i < 2; i++) {
2364 2355 if (((busid = apic_find_bus_id(bustype)) != -1) &&
2365 2356 ((intrp = apic_find_io_intr_w_busid(irqno, busid))
2366 2357 != NULL)) {
2367 2358 return (apix_intx_setup(dip, inum, irqno,
2368 2359 intrp, ispec, NULL));
2369 2360 }
2370 2361 bustype = (bustype == BUS_EISA) ? BUS_ISA : BUS_EISA;
2371 2362 }
2372 2363
2373 2364 /* fall back to default configuration */
2374 2365 return (-1);
2375 2366 }
2376 2367
2377 2368 /* search iso entries first */
2378 2369 if (acpi_iso_cnt != 0) {
2379 2370 hp = (ACPI_SUBTABLE_HEADER *)acpi_isop;
2380 2371 i = 0;
2381 2372 while (i < acpi_iso_cnt) {
2382 2373 if (hp->Type == ACPI_MADT_TYPE_INTERRUPT_OVERRIDE) {
2383 2374 isop = (ACPI_MADT_INTERRUPT_OVERRIDE *) hp;
2384 2375 if (isop->Bus == 0 &&
2385 2376 isop->SourceIrq == irqno) {
2386 2377 newirq = isop->GlobalIrq;
2387 2378 intr_flag.intr_po = isop->IntiFlags &
2388 2379 ACPI_MADT_POLARITY_MASK;
2389 2380 intr_flag.intr_el = (isop->IntiFlags &
2390 2381 ACPI_MADT_TRIGGER_MASK) >> 2;
2391 2382 intr_flag.bustype = BUS_ISA;
2392 2383
2393 2384 return (apix_intx_setup(dip, inum,
2394 2385 newirq, NULL, ispec, &intr_flag));
2395 2386 }
2396 2387 i++;
2397 2388 }
2398 2389 hp = (ACPI_SUBTABLE_HEADER *)(((char *)hp) +
2399 2390 hp->Length);
2400 2391 }
2401 2392 }
2402 2393 intr_flag.intr_po = INTR_PO_ACTIVE_HIGH;
2403 2394 intr_flag.intr_el = INTR_EL_EDGE;
2404 2395 intr_flag.bustype = BUS_ISA;
2405 2396 return (apix_intx_setup(dip, inum, irqno, NULL, ispec, &intr_flag));
2406 2397 }
2407 2398
2408 2399
2409 2400 /*
2410 2401 * Setup IRQ table for pci devices. Return IRQ no or -1 on error
2411 2402 */
2412 2403 static int
2413 2404 apix_intx_setup_pci(dev_info_t *dip, int inum, int bustype,
2414 2405 struct intrspec *ispec)
2415 2406 {
2416 2407 int busid, devid, pci_irq;
2417 2408 ddi_acc_handle_t cfg_handle;
2418 2409 uchar_t ipin;
2419 2410 iflag_t intr_flag;
2420 2411 struct apic_io_intr *intrp;
2421 2412
2422 2413 if (acpica_get_bdf(dip, &busid, &devid, NULL) != 0)
2423 2414 return (-1);
2424 2415
2425 2416 if (busid == 0 && apic_pci_bus_total == 1)
2426 2417 busid = (int)apic_single_pci_busid;
2427 2418
2428 2419 if (pci_config_setup(dip, &cfg_handle) != DDI_SUCCESS)
2429 2420 return (-1);
2430 2421 ipin = pci_config_get8(cfg_handle, PCI_CONF_IPIN) - PCI_INTA;
2431 2422 pci_config_teardown(&cfg_handle);
2432 2423
2433 2424 if (apic_enable_acpi && !apic_use_acpi_madt_only) { /* ACPI */
2434 2425 if (apic_acpi_translate_pci_irq(dip, busid, devid,
2435 2426 ipin, &pci_irq, &intr_flag) != ACPI_PSM_SUCCESS)
2436 2427 return (-1);
2437 2428
2438 2429 intr_flag.bustype = (uchar_t)bustype;
2439 2430 return (apix_intx_setup(dip, inum, pci_irq, NULL, ispec,
2440 2431 &intr_flag));
2441 2432 }
2442 2433
2443 2434 /* MP configuration table */
2444 2435 pci_irq = ((devid & 0x1f) << 2) | (ipin & 0x3);
2445 2436 if ((intrp = apic_find_io_intr_w_busid(pci_irq, busid)) == NULL) {
2446 2437 pci_irq = apic_handle_pci_pci_bridge(dip, devid, ipin, &intrp);
2447 2438 if (pci_irq == -1)
2448 2439 return (-1);
2449 2440 }
2450 2441
2451 2442 return (apix_intx_setup(dip, inum, pci_irq, intrp, ispec, NULL));
2452 2443 }
2453 2444
2454 2445 /*
2455 2446 * Translate and return IRQ no
2456 2447 */
2457 2448 static int
2458 2449 apix_intx_xlate_irq(dev_info_t *dip, int inum, struct intrspec *ispec)
2459 2450 {
2460 2451 int newirq, irqno = ispec->intrspec_vec;
2461 2452 int parent_is_pci_or_pciex = 0, child_is_pciex = 0;
2462 2453 int bustype = 0, dev_len;
2463 2454 char dev_type[16];
2464 2455
2465 2456 if (apic_defconf) {
2466 2457 mutex_enter(&airq_mutex);
2467 2458 goto defconf;
2468 2459 }
2469 2460
2470 2461 if ((dip == NULL) || (!apic_irq_translate && !apic_enable_acpi)) {
2471 2462 mutex_enter(&airq_mutex);
2472 2463 goto nonpci;
2473 2464 }
2474 2465
2475 2466 /*
2476 2467 * use ddi_getlongprop_buf() instead of ddi_prop_lookup_string()
2477 2468 * to avoid extra buffer allocation.
2478 2469 */
2479 2470 dev_len = sizeof (dev_type);
2480 2471 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, ddi_get_parent(dip),
2481 2472 DDI_PROP_DONTPASS, "device_type", (caddr_t)dev_type,
2482 2473 &dev_len) == DDI_PROP_SUCCESS) {
2483 2474 if ((strcmp(dev_type, "pci") == 0) ||
2484 2475 (strcmp(dev_type, "pciex") == 0))
2485 2476 parent_is_pci_or_pciex = 1;
2486 2477 }
2487 2478
2488 2479 if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
2489 2480 DDI_PROP_DONTPASS, "compatible", (caddr_t)dev_type,
2490 2481 &dev_len) == DDI_PROP_SUCCESS) {
2491 2482 if (strstr(dev_type, "pciex"))
2492 2483 child_is_pciex = 1;
2493 2484 }
2494 2485
2495 2486 mutex_enter(&airq_mutex);
2496 2487
2497 2488 if (parent_is_pci_or_pciex) {
2498 2489 bustype = child_is_pciex ? BUS_PCIE : BUS_PCI;
2499 2490 newirq = apix_intx_setup_pci(dip, inum, bustype, ispec);
2500 2491 if (newirq != -1)
2501 2492 goto done;
2502 2493 bustype = 0;
2503 2494 } else if (strcmp(dev_type, "isa") == 0)
2504 2495 bustype = BUS_ISA;
2505 2496 else if (strcmp(dev_type, "eisa") == 0)
2506 2497 bustype = BUS_EISA;
2507 2498
2508 2499 nonpci:
2509 2500 newirq = apix_intx_setup_nonpci(dip, inum, bustype, ispec);
2510 2501 if (newirq != -1)
2511 2502 goto done;
2512 2503
2513 2504 defconf:
2514 2505 newirq = apix_intx_setup(dip, inum, irqno, NULL, ispec, NULL);
2515 2506 if (newirq == -1) {
↓ open down ↓ |
1317 lines elided |
↑ open up ↑ |
2516 2507 mutex_exit(&airq_mutex);
2517 2508 return (-1);
2518 2509 }
2519 2510 done:
2520 2511 ASSERT(apic_irq_table[newirq]);
2521 2512 mutex_exit(&airq_mutex);
2522 2513 return (newirq);
2523 2514 }
2524 2515
2525 2516 static int
2526 -apix_intx_alloc_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2517 +apix_intx_alloc_vector(dev_info_t *dip, ddi_intr_handle_impl_t *hdlp,
2518 + struct intrspec *ispec)
2527 2519 {
2528 2520 int irqno;
2529 2521 apix_vector_t *vecp;
2530 2522
2531 - if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2523 + if ((irqno = apix_intx_xlate_irq(dip, hdlp->ih_inum, ispec)) == -1)
2532 2524 return (0);
2533 2525
2534 - if ((vecp = apix_alloc_intx(dip, inum, irqno)) == NULL)
2526 + if ((vecp = apix_alloc_intx(dip, hdlp->ih_inum, irqno)) == NULL)
2535 2527 return (0);
2536 2528
2529 + hdlp->ih_irq = irqno;
2530 +
2537 2531 DDI_INTR_IMPLDBG((CE_CONT, "apix_intx_alloc_vector: dip=0x%p name=%s "
2538 2532 "irqno=0x%x cpuid=%d vector=0x%x\n",
2539 2533 (void *)dip, ddi_driver_name(dip), irqno,
2540 2534 vecp->v_cpuid, vecp->v_vector));
2541 2535
2542 2536 return (1);
2543 2537 }
2544 2538
2545 2539 /*
2546 2540 * Return the vector number if the translated IRQ for this device
2547 2541 * has a vector mapping setup. If no IRQ setup exists or no vector is
2548 2542 * allocated to it then return 0.
2549 2543 */
2550 2544 static apix_vector_t *
2551 2545 apix_intx_xlate_vector(dev_info_t *dip, int inum, struct intrspec *ispec)
2552 2546 {
2553 2547 int irqno;
2554 2548 apix_vector_t *vecp;
2555 2549
2556 2550 /* get the IRQ number */
2557 2551 if ((irqno = apix_intx_xlate_irq(dip, inum, ispec)) == -1)
2558 2552 return (NULL);
2559 2553
2560 2554 /* get the vector number if a vector is allocated to this irqno */
2561 2555 vecp = apix_intx_get_vector(irqno);
2562 2556
2563 2557 return (vecp);
2564 2558 }
2565 2559
2566 2560 /*
2567 2561 * Switch between safe and x2APIC IPI sending method.
2568 2562 * The CPU may power on in xapic mode or x2apic mode. If the CPU needs to send
2569 2563 * an IPI to other CPUs before entering x2APIC mode, it still needs to use the
2570 2564 * xAPIC method. Before sending a StartIPI to the target CPU, psm_send_ipi will
2571 2565 * be changed to apic_common_send_ipi, which detects current local APIC mode and
2572 2566 * use the right method to send an IPI. If some CPUs fail to start up,
2573 2567 * apic_poweron_cnt won't return to zero, so apic_common_send_ipi will always be
2574 2568 * used. psm_send_ipi can't be simply changed back to x2apic_send_ipi if some
2575 2569 * CPUs failed to start up because those failed CPUs may recover itself later at
2576 2570 * unpredictable time.
2577 2571 */
2578 2572 void
2579 2573 apic_switch_ipi_callback(boolean_t enter)
2580 2574 {
2581 2575 ulong_t iflag;
2582 2576 struct psm_ops *pops = psmops;
2583 2577
2584 2578 iflag = intr_clear();
2585 2579 lock_set(&apic_mode_switch_lock);
2586 2580 if (enter) {
2587 2581 ASSERT(apic_poweron_cnt >= 0);
2588 2582 if (apic_poweron_cnt == 0) {
2589 2583 pops->psm_send_ipi = apic_common_send_ipi;
2590 2584 send_dirintf = pops->psm_send_ipi;
2591 2585 }
2592 2586 apic_poweron_cnt++;
2593 2587 } else {
2594 2588 ASSERT(apic_poweron_cnt > 0);
2595 2589 apic_poweron_cnt--;
2596 2590 if (apic_poweron_cnt == 0) {
2597 2591 pops->psm_send_ipi = x2apic_send_ipi;
2598 2592 send_dirintf = pops->psm_send_ipi;
2599 2593 }
2600 2594 }
2601 2595 lock_clear(&apic_mode_switch_lock);
2602 2596 intr_restore(iflag);
2603 2597 }
2604 2598
2605 2599 /* stub function */
2606 2600 int
2607 2601 apix_loaded(void)
2608 2602 {
2609 2603 return (apix_is_enabled);
2610 2604 }
↓ open down ↓ |
64 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX