1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25 /*
26 * Copyright (c) 2010, Intel Corporation.
27 * All rights reserved.
28 */
29 /*
30 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
31 */
32
33 #include <sys/processor.h>
34 #include <sys/time.h>
35 #include <sys/psm.h>
36 #include <sys/smp_impldefs.h>
37 #include <sys/cram.h>
38 #include <sys/acpi/acpi.h>
39 #include <sys/acpica.h>
40 #include <sys/psm_common.h>
41 #include <sys/pit.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/ddi_impldefs.h>
45 #include <sys/pci.h>
46 #include <sys/promif.h>
47 #include <sys/x86_archext.h>
48 #include <sys/cpc_impl.h>
49 #include <sys/uadmin.h>
50 #include <sys/panic.h>
51 #include <sys/debug.h>
52 #include <sys/archsystm.h>
53 #include <sys/trap.h>
54 #include <sys/machsystm.h>
55 #include <sys/sysmacros.h>
56 #include <sys/cpuvar.h>
57 #include <sys/rm_platter.h>
58 #include <sys/privregs.h>
59 #include <sys/note.h>
60 #include <sys/pci_intr_lib.h>
61 #include <sys/spl.h>
62 #include <sys/clock.h>
63 #include <sys/dditypes.h>
64 #include <sys/sunddi.h>
65 #include <sys/x_call.h>
66 #include <sys/reboot.h>
67 #include <sys/apix.h>
68
69 static int apix_get_avail_vector_oncpu(uint32_t, int, int);
70 static apix_vector_t *apix_init_vector(processorid_t, uchar_t);
71 static void apix_cleanup_vector(apix_vector_t *);
72 static void apix_insert_av(apix_vector_t *, void *, avfunc, caddr_t, caddr_t,
73 uint64_t *, int, dev_info_t *);
74 static void apix_remove_av(apix_vector_t *, struct autovec *);
75 static void apix_clear_dev_map(dev_info_t *, int, int);
76 static boolean_t apix_is_cpu_enabled(processorid_t);
77 static void apix_wait_till_seen(processorid_t, int);
78
79 #define GET_INTR_INUM(ihdlp) \
80 (((ihdlp) != NULL) ? ((ddi_intr_handle_impl_t *)(ihdlp))->ih_inum : 0)
81
82 apix_rebind_info_t apix_rebindinfo = {0, 0, 0, NULL, 0, NULL};
83
84 /*
85 * Allocate IPI
86 *
87 * Return vector number or 0 on error
88 */
89 uchar_t
90 apix_alloc_ipi(int ipl)
91 {
92 apix_vector_t *vecp;
93 uchar_t vector;
94 int cpun;
95 int nproc;
96
97 APIX_ENTER_CPU_LOCK(0);
98
99 vector = apix_get_avail_vector_oncpu(0, APIX_IPI_MIN, APIX_IPI_MAX);
100 if (vector == 0) {
101 APIX_LEAVE_CPU_LOCK(0);
102 cmn_err(CE_WARN, "apix: no available IPI\n");
103 apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
104 return (0);
105 }
106
107 nproc = max(apic_nproc, apic_max_nproc);
108 for (cpun = 0; cpun < nproc; cpun++) {
109 vecp = xv_vector(cpun, vector);
110 if (vecp == NULL) {
111 vecp = kmem_zalloc(sizeof (apix_vector_t), KM_NOSLEEP);
112 if (vecp == NULL) {
113 cmn_err(CE_WARN, "apix: No memory for ipi");
114 goto fail;
115 }
116 xv_vector(cpun, vector) = vecp;
117 }
118 vecp->v_state = APIX_STATE_ALLOCED;
119 vecp->v_type = APIX_TYPE_IPI;
120 vecp->v_cpuid = vecp->v_bound_cpuid = cpun;
121 vecp->v_vector = vector;
122 vecp->v_pri = ipl;
123 }
124 APIX_LEAVE_CPU_LOCK(0);
125 return (vector);
126
127 fail:
128 while (--cpun >= 0)
129 apix_cleanup_vector(xv_vector(cpun, vector));
130 APIX_LEAVE_CPU_LOCK(0);
131 return (0);
132 }
133
134 /*
135 * Add IPI service routine
136 */
137 static int
138 apix_add_ipi(int ipl, avfunc xxintr, char *name, int vector,
139 caddr_t arg1, caddr_t arg2)
140 {
141 int cpun;
142 apix_vector_t *vecp;
143 int nproc;
144
145 ASSERT(vector >= APIX_IPI_MIN && vector <= APIX_IPI_MAX);
146
147 nproc = max(apic_nproc, apic_max_nproc);
148 for (cpun = 0; cpun < nproc; cpun++) {
149 APIX_ENTER_CPU_LOCK(cpun);
150 vecp = xv_vector(cpun, vector);
151 apix_insert_av(vecp, NULL, xxintr, arg1, arg2, NULL, ipl, NULL);
152 vecp->v_state = APIX_STATE_ENABLED;
153 APIX_LEAVE_CPU_LOCK(cpun);
154 }
155
156 APIC_VERBOSE(IPI, (CE_CONT, "apix: add ipi for %s, vector %x "
157 "ipl %x\n", name, vector, ipl));
158
159 return (1);
160 }
161
162 /*
163 * Find and return first free vector in range (start, end)
164 */
165 static int
166 apix_get_avail_vector_oncpu(uint32_t cpuid, int start, int end)
167 {
168 int i;
169 apix_impl_t *apixp = apixs[cpuid];
170
171 for (i = start; i <= end; i++) {
172 if (APIC_CHECK_RESERVE_VECTORS(i))
173 continue;
174 if (IS_VECT_FREE(apixp->x_vectbl[i]))
175 return (i);
176 }
177
178 return (0);
179 }
180
181 /*
182 * Allocate a vector on specified cpu
183 *
184 * Return NULL on error
185 */
186 static apix_vector_t *
187 apix_alloc_vector_oncpu(uint32_t cpuid, dev_info_t *dip, int inum, int type)
188 {
189 processorid_t tocpu = cpuid & ~IRQ_USER_BOUND;
190 apix_vector_t *vecp;
191 int vector;
192
193 ASSERT(APIX_CPU_LOCK_HELD(tocpu));
194
195 /* find free vector */
196 vector = apix_get_avail_vector_oncpu(tocpu, APIX_AVINTR_MIN,
197 APIX_AVINTR_MAX);
198 if (vector == 0)
199 return (NULL);
200
201 vecp = apix_init_vector(tocpu, vector);
202 vecp->v_type = (ushort_t)type;
203 vecp->v_inum = inum;
204 vecp->v_flags = (cpuid & IRQ_USER_BOUND) ? APIX_VECT_USER_BOUND : 0;
205
206 if (dip != NULL)
207 apix_set_dev_map(vecp, dip, inum);
208
209 return (vecp);
210 }
211
212 /*
213 * Allocates "count" contiguous MSI vectors starting at the proper alignment.
214 * Caller needs to make sure that count has to be power of 2 and should not
215 * be < 1.
216 *
217 * Return first vector number
218 */
219 apix_vector_t *
220 apix_alloc_nvectors_oncpu(uint32_t cpuid, dev_info_t *dip, int inum,
221 int count, int type)
222 {
223 int i, msibits, start = 0, navail = 0;
224 apix_vector_t *vecp, *startp = NULL;
225 processorid_t tocpu = cpuid & ~IRQ_USER_BOUND;
226 uint_t flags;
227
228 ASSERT(APIX_CPU_LOCK_HELD(tocpu));
229
230 /*
231 * msibits is the no. of lower order message data bits for the
232 * allocated MSI vectors and is used to calculate the aligned
233 * starting vector
234 */
235 msibits = count - 1;
236
237 /* It has to be contiguous */
238 for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
239 if (!IS_VECT_FREE(xv_vector(tocpu, i)))
240 continue;
241
242 /*
243 * starting vector has to be aligned accordingly for
244 * multiple MSIs
245 */
246 if (msibits)
247 i = (i + msibits) & ~msibits;
248
249 for (navail = 0, start = i; i <= APIX_AVINTR_MAX; i++) {
250 if (!IS_VECT_FREE(xv_vector(tocpu, i)))
251 break;
252 if (APIC_CHECK_RESERVE_VECTORS(i))
253 break;
254 if (++navail == count)
255 goto done;
256 }
257 }
258
259 return (NULL);
260
261 done:
262 flags = (cpuid & IRQ_USER_BOUND) ? APIX_VECT_USER_BOUND : 0;
263
264 for (i = 0; i < count; i++) {
265 if ((vecp = apix_init_vector(tocpu, start + i)) == NULL)
266 goto fail;
267
268 vecp->v_type = (ushort_t)type;
269 vecp->v_inum = inum + i;
270 vecp->v_flags = flags;
271
272 if (dip != NULL)
273 apix_set_dev_map(vecp, dip, inum + i);
274
275 if (i == 0)
276 startp = vecp;
277 }
278
279 return (startp);
280
281 fail:
282 while (i-- > 0) { /* Free allocated vectors */
283 vecp = xv_vector(tocpu, start + i);
284 apix_clear_dev_map(dip, inum + i, type);
285 apix_cleanup_vector(vecp);
286 }
287 return (NULL);
288 }
289
290 #define APIX_WRITE_MSI_DATA(_hdl, _cap, _ctrl, _v)\
291 do {\
292 if ((_ctrl) & PCI_MSI_64BIT_MASK)\
293 pci_config_put16((_hdl), (_cap) + PCI_MSI_64BIT_DATA, (_v));\
294 else\
295 pci_config_put16((_hdl), (_cap) + PCI_MSI_32BIT_DATA, (_v));\
296 _NOTE(CONSTCOND)} while (0)
297
298 static void
299 apix_pci_msi_enable_vector(apix_vector_t *vecp, dev_info_t *dip, int type,
300 int inum, int count, uchar_t vector, int target_apic_id)
301 {
302 uint64_t msi_addr, msi_data;
303 ushort_t msi_ctrl;
304 int i, cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
305 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(dip);
306 msi_regs_t msi_regs;
307 void *intrmap_tbl[PCI_MSI_MAX_INTRS];
308
309 DDI_INTR_IMPLDBG((CE_CONT, "apix_pci_msi_enable_vector: dip=0x%p\n"
310 "\tdriver = %s, inum=0x%x vector=0x%x apicid=0x%x\n", (void *)dip,
311 ddi_driver_name(dip), inum, vector, target_apic_id));
312
313 ASSERT((handle != NULL) && (cap_ptr != 0));
314
315 msi_regs.mr_data = vector;
316 msi_regs.mr_addr = target_apic_id;
317
318 for (i = 0; i < count; i++)
319 intrmap_tbl[i] = xv_intrmap_private(vecp->v_cpuid, vector + i);
320 apic_vt_ops->apic_intrmap_alloc_entry(intrmap_tbl, dip, type,
321 count, 0xff);
322 for (i = 0; i < count; i++)
323 xv_intrmap_private(vecp->v_cpuid, vector + i) = intrmap_tbl[i];
324
325 apic_vt_ops->apic_intrmap_map_entry(vecp->v_intrmap_private,
326 (void *)&msi_regs, type, count);
327 apic_vt_ops->apic_intrmap_record_msi(vecp->v_intrmap_private,
328 &msi_regs);
329
330 /* MSI Address */
331 msi_addr = msi_regs.mr_addr;
332
333 /* MSI Data: MSI is edge triggered according to spec */
334 msi_data = msi_regs.mr_data;
335
336 DDI_INTR_IMPLDBG((CE_CONT, "apix_pci_msi_enable_vector: addr=0x%lx "
337 "data=0x%lx\n", (long)msi_addr, (long)msi_data));
338
339 if (type == APIX_TYPE_MSI) {
340 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
341
342 /* Set the bits to inform how many MSIs are enabled */
343 msi_ctrl |= ((highbit(count) - 1) << PCI_MSI_MME_SHIFT);
344 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
345
346 if ((vecp->v_flags & APIX_VECT_MASKABLE) == 0)
347 APIX_WRITE_MSI_DATA(handle, cap_ptr, msi_ctrl,
348 APIX_RESV_VECTOR);
349
350 pci_config_put32(handle,
351 cap_ptr + PCI_MSI_ADDR_OFFSET, msi_addr);
352 if (msi_ctrl & PCI_MSI_64BIT_MASK)
353 pci_config_put32(handle,
354 cap_ptr + PCI_MSI_ADDR_OFFSET + 4, msi_addr >> 32);
355
356 APIX_WRITE_MSI_DATA(handle, cap_ptr, msi_ctrl, msi_data);
357 } else if (type == APIX_TYPE_MSIX) {
358 uintptr_t off;
359 ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip);
360
361 /* Offset into the "inum"th entry in the MSI-X table */
362 off = (uintptr_t)msix_p->msix_tbl_addr +
363 (inum * PCI_MSIX_VECTOR_SIZE);
364
365 ddi_put32(msix_p->msix_tbl_hdl,
366 (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), msi_data);
367 ddi_put64(msix_p->msix_tbl_hdl,
368 (uint64_t *)(off + PCI_MSIX_LOWER_ADDR_OFFSET), msi_addr);
369 }
370 }
371
372 static void
373 apix_pci_msi_enable_mode(dev_info_t *dip, int type, int inum)
374 {
375 ushort_t msi_ctrl;
376 int cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
377 ddi_acc_handle_t handle = i_ddi_get_pci_config_handle(dip);
378
379 ASSERT((handle != NULL) && (cap_ptr != 0));
380
381 if (type == APIX_TYPE_MSI) {
382 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
383 if ((msi_ctrl & PCI_MSI_ENABLE_BIT))
384 return;
385
386 msi_ctrl |= PCI_MSI_ENABLE_BIT;
387 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
388
389 } else if (type == DDI_INTR_TYPE_MSIX) {
390 uintptr_t off;
391 uint32_t mask;
392 ddi_intr_msix_t *msix_p;
393
394 msix_p = i_ddi_get_msix(dip);
395
396 /* Offset into "inum"th entry in the MSI-X table & clear mask */
397 off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
398 PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
399
400 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
401
402 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask & ~1));
403
404 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
405
406 if (!(msi_ctrl & PCI_MSIX_ENABLE_BIT)) {
407 msi_ctrl |= PCI_MSIX_ENABLE_BIT;
408 pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
409 msi_ctrl);
410 }
411 }
412 }
413
414 /*
415 * Setup interrupt, pogramming IO-APIC or MSI/X address/data.
416 */
417 void
418 apix_enable_vector(apix_vector_t *vecp)
419 {
420 int tocpu = vecp->v_cpuid, type = vecp->v_type;
421 apic_cpus_info_t *cpu_infop;
422 ulong_t iflag;
423
424 ASSERT(tocpu < apic_nproc);
425
426 cpu_infop = &apic_cpus[tocpu];
427 if (vecp->v_flags & APIX_VECT_USER_BOUND)
428 cpu_infop->aci_bound++;
429 else
430 cpu_infop->aci_temp_bound++;
431
432 iflag = intr_clear();
433 lock_set(&apic_ioapic_lock);
434
435 if (!DDI_INTR_IS_MSI_OR_MSIX(type)) { /* fixed */
436 apix_intx_enable(vecp->v_inum);
437 } else {
438 int inum = vecp->v_inum;
439 dev_info_t *dip = APIX_GET_DIP(vecp);
440 int count = i_ddi_intr_get_current_nintrs(dip);
441
442 if (type == APIX_TYPE_MSI) { /* MSI */
443 if (inum == apix_get_max_dev_inum(dip, type)) {
444 /* last one */
445 uchar_t start_inum = inum + 1 - count;
446 uchar_t start_vect = vecp->v_vector + 1 - count;
447 apix_vector_t *start_vecp =
448 xv_vector(vecp->v_cpuid, start_vect);
449
450 APIC_VERBOSE(INTR, (CE_CONT, "apix: call "
451 "apix_pci_msi_enable_vector\n"));
452 apix_pci_msi_enable_vector(start_vecp, dip,
453 type, start_inum, count, start_vect,
454 cpu_infop->aci_local_id);
455
456 APIC_VERBOSE(INTR, (CE_CONT, "apix: call "
457 "apix_pci_msi_enable_mode\n"));
458 apix_pci_msi_enable_mode(dip, type, inum);
459 }
460 } else { /* MSI-X */
461 apix_pci_msi_enable_vector(vecp, dip,
462 type, inum, 1, vecp->v_vector,
463 cpu_infop->aci_local_id);
464 apix_pci_msi_enable_mode(dip, type, inum);
465 }
466 }
467 vecp->v_state = APIX_STATE_ENABLED;
468 apic_redist_cpu_skip &= ~(1 << tocpu);
469
470 lock_clear(&apic_ioapic_lock);
471 intr_restore(iflag);
472 }
473
474 /*
475 * Disable the interrupt
476 */
477 void
478 apix_disable_vector(apix_vector_t *vecp)
479 {
480 struct autovec *avp = vecp->v_autovect;
481 ulong_t iflag;
482
483 ASSERT(avp != NULL);
484
485 iflag = intr_clear();
486 lock_set(&apic_ioapic_lock);
487
488 switch (vecp->v_type) {
489 case APIX_TYPE_MSI:
490 ASSERT(avp->av_vector != NULL && avp->av_dip != NULL);
491 /*
492 * Disable the MSI vector
493 * Make sure we only disable on the last
494 * of the multi-MSI support
495 */
496 if (i_ddi_intr_get_current_nenables(avp->av_dip) == 1) {
497 apic_pci_msi_disable_mode(avp->av_dip,
498 DDI_INTR_TYPE_MSI);
499 }
500 break;
501 case APIX_TYPE_MSIX:
502 ASSERT(avp->av_vector != NULL && avp->av_dip != NULL);
503 /*
504 * Disable the MSI-X vector
505 * needs to clear its mask and addr/data for each MSI-X
506 */
507 apic_pci_msi_unconfigure(avp->av_dip, DDI_INTR_TYPE_MSIX,
508 vecp->v_inum);
509 /*
510 * Make sure we only disable on the last MSI-X
511 */
512 if (i_ddi_intr_get_current_nenables(avp->av_dip) == 1) {
513 apic_pci_msi_disable_mode(avp->av_dip,
514 DDI_INTR_TYPE_MSIX);
515 }
516 break;
517 default:
518 apix_intx_disable(vecp->v_inum);
519 break;
520 }
521
522 if (!(apic_cpus[vecp->v_cpuid].aci_status & APIC_CPU_SUSPEND))
523 vecp->v_state = APIX_STATE_DISABLED;
524 apic_vt_ops->apic_intrmap_free_entry(&vecp->v_intrmap_private);
525 vecp->v_intrmap_private = NULL;
526
527 lock_clear(&apic_ioapic_lock);
528 intr_restore(iflag);
529 }
530
531 /*
532 * Mark vector as obsoleted or freed. The vector is marked
533 * obsoleted if there are pending requests on it. Otherwise,
534 * free the vector. The obsoleted vectors get freed after
535 * being serviced.
536 *
537 * Return 1 on being obosoleted and 0 on being freed.
538 */
539 #define INTR_BUSY(_avp)\
540 ((((volatile ushort_t)(_avp)->av_flags) &\
541 (AV_PENTRY_PEND | AV_PENTRY_ONPROC)) != 0)
542 #define LOCAL_WITH_INTR_DISABLED(_cpuid)\
543 ((_cpuid) == psm_get_cpu_id() && !interrupts_enabled())
544 static uint64_t dummy_tick;
545
546 int
547 apix_obsolete_vector(apix_vector_t *vecp)
548 {
549 struct autovec *avp = vecp->v_autovect;
550 int repeats, tries, ipl, busy = 0, cpuid = vecp->v_cpuid;
551 apix_impl_t *apixp = apixs[cpuid];
552
553 ASSERT(APIX_CPU_LOCK_HELD(cpuid));
554
555 for (avp = vecp->v_autovect; avp != NULL; avp = avp->av_link) {
556 if (avp->av_vector == NULL)
557 continue;
558
559 if (LOCAL_WITH_INTR_DISABLED(cpuid)) {
560 int bit, index, irr;
561
562 if (INTR_BUSY(avp)) {
563 busy++;
564 continue;
565 }
566
567 /* check IRR for pending interrupts */
568 index = vecp->v_vector / 32;
569 bit = vecp->v_vector % 32;
570 irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
571 if ((irr & (1 << bit)) != 0)
572 busy++;
573
574 if (!busy)
575 apix_remove_av(vecp, avp);
576
577 continue;
578 }
579
580 repeats = 0;
581 do {
582 repeats++;
583 for (tries = 0; tries < apic_max_reps_clear_pending;
584 tries++)
585 if (!INTR_BUSY(avp))
586 break;
587 } while (INTR_BUSY(avp) &&
588 (repeats < apic_max_reps_clear_pending));
589
590 if (INTR_BUSY(avp))
591 busy++;
592 else {
593 /*
594 * Interrupt is not in pending list or being serviced.
595 * However it might be cached in Local APIC's IRR
596 * register. It's impossible to check another CPU's
597 * IRR register. Then wait till lower levels finish
598 * running.
599 */
600 for (ipl = 1; ipl < MIN(LOCK_LEVEL, vecp->v_pri); ipl++)
601 apix_wait_till_seen(cpuid, ipl);
602 if (INTR_BUSY(avp))
603 busy++;
604 }
605
606 if (!busy)
607 apix_remove_av(vecp, avp);
608 }
609
610 if (busy) {
611 apix_vector_t *tp = apixp->x_obsoletes;
612
613 if (vecp->v_state == APIX_STATE_OBSOLETED)
614 return (1);
615
616 vecp->v_state = APIX_STATE_OBSOLETED;
617 vecp->v_next = NULL;
618 if (tp == NULL)
619 apixp->x_obsoletes = vecp;
620 else {
621 while (tp->v_next != NULL)
622 tp = tp->v_next;
623 tp->v_next = vecp;
624 }
625 return (1);
626 }
627
628 /* interrupt is not busy */
629 if (vecp->v_state == APIX_STATE_OBSOLETED) {
630 /* remove from obsoleted list */
631 apixp->x_obsoletes = vecp->v_next;
632 vecp->v_next = NULL;
633 }
634 apix_cleanup_vector(vecp);
635 return (0);
636 }
637
638 /*
639 * Duplicate number of continuous vectors to specified target vectors.
640 */
641 static void
642 apix_dup_vectors(apix_vector_t *oldp, apix_vector_t *newp, int count)
643 {
644 struct autovec *avp;
645 apix_vector_t *fromp, *top;
646 processorid_t oldcpu = oldp->v_cpuid, newcpu = newp->v_cpuid;
647 uchar_t oldvec = oldp->v_vector, newvec = newp->v_vector;
648 int i, inum;
649
650 ASSERT(oldp->v_type != APIX_TYPE_IPI);
651
652 for (i = 0; i < count; i++) {
653 fromp = xv_vector(oldcpu, oldvec + i);
654 top = xv_vector(newcpu, newvec + i);
655 ASSERT(fromp != NULL && top != NULL);
656
657 /* copy over original one */
658 top->v_state = fromp->v_state;
659 top->v_type = fromp->v_type;
660 top->v_bound_cpuid = fromp->v_bound_cpuid;
661 top->v_inum = fromp->v_inum;
662 top->v_flags = fromp->v_flags;
663 top->v_intrmap_private = fromp->v_intrmap_private;
664
665 for (avp = fromp->v_autovect; avp != NULL; avp = avp->av_link) {
666 if (avp->av_vector == NULL)
667 continue;
668
669 apix_insert_av(top, avp->av_intr_id, avp->av_vector,
670 avp->av_intarg1, avp->av_intarg2, avp->av_ticksp,
671 avp->av_prilevel, avp->av_dip);
672
673 if (fromp->v_type == APIX_TYPE_FIXED &&
674 avp->av_dip != NULL) {
675 inum = GET_INTR_INUM(avp->av_intr_id);
676 apix_set_dev_map(top, avp->av_dip, inum);
677 }
678 }
679
680 if (DDI_INTR_IS_MSI_OR_MSIX(fromp->v_type) &&
681 fromp->v_devp != NULL)
682 apix_set_dev_map(top, fromp->v_devp->dv_dip,
683 fromp->v_devp->dv_inum);
684 }
685 }
686
687 static apix_vector_t *
688 apix_init_vector(processorid_t cpuid, uchar_t vector)
689 {
690 apix_impl_t *apixp = apixs[cpuid];
691 apix_vector_t *vecp = apixp->x_vectbl[vector];
692
693 ASSERT(IS_VECT_FREE(vecp));
694
695 if (vecp == NULL) {
696 vecp = kmem_zalloc(sizeof (apix_vector_t), KM_NOSLEEP);
697 if (vecp == NULL) {
698 cmn_err(CE_WARN, "apix: no memory to allocate vector");
699 return (NULL);
700 }
701 apixp->x_vectbl[vector] = vecp;
702 }
703 vecp->v_state = APIX_STATE_ALLOCED;
704 vecp->v_cpuid = vecp->v_bound_cpuid = cpuid;
705 vecp->v_vector = vector;
706
707 return (vecp);
708 }
709
710 static void
711 apix_cleanup_vector(apix_vector_t *vecp)
712 {
713 ASSERT(vecp->v_share == 0);
714 vecp->v_bound_cpuid = IRQ_UNINIT;
715 vecp->v_state = APIX_STATE_FREED;
716 vecp->v_type = 0;
717 vecp->v_flags = 0;
718 vecp->v_busy = 0;
719 vecp->v_intrmap_private = NULL;
720 }
721
722 static void
723 apix_dprint_vector(apix_vector_t *vecp, dev_info_t *dip, int count)
724 {
725 #ifdef DEBUG
726 major_t major;
727 char *name, *drv_name;
728 int instance, len, t_len;
729 char mesg[1024] = "apix: ";
730
731 t_len = sizeof (mesg);
732 len = strlen(mesg);
733 if (dip != NULL) {
734 name = ddi_get_name(dip);
735 major = ddi_name_to_major(name);
736 drv_name = ddi_major_to_name(major);
737 instance = ddi_get_instance(dip);
738 (void) snprintf(mesg + len, t_len - len, "%s (%s) instance %d ",
739 name, drv_name, instance);
740 }
741 len = strlen(mesg);
742
743 switch (vecp->v_type) {
744 case APIX_TYPE_FIXED:
745 (void) snprintf(mesg + len, t_len - len, "irqno %d",
746 vecp->v_inum);
747 break;
748 case APIX_TYPE_MSI:
749 (void) snprintf(mesg + len, t_len - len,
750 "msi inum %d (count %d)", vecp->v_inum, count);
751 break;
752 case APIX_TYPE_MSIX:
753 (void) snprintf(mesg + len, t_len - len, "msi-x inum %d",
754 vecp->v_inum);
755 break;
756 default:
757 break;
758
759 }
760
761 APIC_VERBOSE(ALLOC, (CE_CONT, "%s allocated with vector 0x%x on "
762 "cpu %d\n", mesg, vecp->v_vector, vecp->v_cpuid));
763 #endif /* DEBUG */
764 }
765
766 /*
767 * Operations on avintr
768 */
769
770 #define INIT_AUTOVEC(p, intr_id, f, arg1, arg2, ticksp, ipl, dip) \
771 do { \
772 (p)->av_intr_id = intr_id; \
773 (p)->av_vector = f; \
774 (p)->av_intarg1 = arg1; \
775 (p)->av_intarg2 = arg2; \
776 (p)->av_ticksp = ticksp; \
777 (p)->av_prilevel = ipl; \
778 (p)->av_dip = dip; \
779 (p)->av_flags = 0; \
780 _NOTE(CONSTCOND)} while (0)
781
782 /*
783 * Insert an interrupt service routine into chain by its priority from
784 * high to low
785 */
786 static void
787 apix_insert_av(apix_vector_t *vecp, void *intr_id, avfunc f, caddr_t arg1,
788 caddr_t arg2, uint64_t *ticksp, int ipl, dev_info_t *dip)
789 {
790 struct autovec *p, *prep, *mem;
791
792 APIC_VERBOSE(INTR, (CE_CONT, "apix_insert_av: dip %p, vector 0x%x, "
793 "cpu %d\n", (void *)dip, vecp->v_vector, vecp->v_cpuid));
794
795 mem = kmem_zalloc(sizeof (struct autovec), KM_SLEEP);
796 INIT_AUTOVEC(mem, intr_id, f, arg1, arg2, ticksp, ipl, dip);
797 if (vecp->v_type == APIX_TYPE_FIXED && apic_level_intr[vecp->v_inum])
798 mem->av_flags |= AV_PENTRY_LEVEL;
799
800 vecp->v_share++;
801 vecp->v_pri = (ipl > vecp->v_pri) ? ipl : vecp->v_pri;
802 if (vecp->v_autovect == NULL) { /* Nothing on list - put it at head */
803 vecp->v_autovect = mem;
804 return;
805 }
806
807 if (DDI_INTR_IS_MSI_OR_MSIX(vecp->v_type)) { /* MSI/X */
808 ASSERT(vecp->v_share == 1); /* No sharing for MSI/X */
809
810 INIT_AUTOVEC(vecp->v_autovect, intr_id, f, arg1, arg2, ticksp,
811 ipl, dip);
812 prep = vecp->v_autovect->av_link;
813 vecp->v_autovect->av_link = NULL;
814
815 /* Free the following autovect chain */
816 while (prep != NULL) {
817 ASSERT(prep->av_vector == NULL);
818
819 p = prep;
820 prep = prep->av_link;
821 kmem_free(p, sizeof (struct autovec));
822 }
823
824 kmem_free(mem, sizeof (struct autovec));
825 return;
826 }
827
828 /* find where it goes in list */
829 prep = NULL;
830 for (p = vecp->v_autovect; p != NULL; p = p->av_link) {
831 if (p->av_vector && p->av_prilevel <= ipl)
832 break;
833 prep = p;
834 }
835 if (prep != NULL) {
836 if (prep->av_vector == NULL) { /* freed struct available */
837 INIT_AUTOVEC(prep, intr_id, f, arg1, arg2,
838 ticksp, ipl, dip);
839 prep->av_flags = mem->av_flags;
840 kmem_free(mem, sizeof (struct autovec));
841 return;
842 }
843
844 mem->av_link = prep->av_link;
845 prep->av_link = mem;
846 } else {
847 /* insert new intpt at beginning of chain */
848 mem->av_link = vecp->v_autovect;
849 vecp->v_autovect = mem;
850 }
851 }
852
853 /*
854 * After having made a change to an autovector list, wait until we have
855 * seen specified cpu not executing an interrupt at that level--so we
856 * know our change has taken effect completely (no old state in registers,
857 * etc).
858 */
859 #define APIX_CPU_ENABLED(_cp) \
860 (quiesce_active == 0 && \
861 (((_cp)->cpu_flags & (CPU_QUIESCED|CPU_OFFLINE)) == 0))
862
863 static void
864 apix_wait_till_seen(processorid_t cpuid, int ipl)
865 {
866 struct cpu *cp = cpu[cpuid];
867
868 if (cp == NULL || LOCAL_WITH_INTR_DISABLED(cpuid))
869 return;
870
871 /*
872 * Don't wait if the CPU is quiesced or offlined. This can happen
873 * when a CPU is running pause thread but hardware triggered an
874 * interrupt and the interrupt gets queued.
875 */
876 for (;;) {
877 if (!INTR_ACTIVE((volatile struct cpu *)cpu[cpuid], ipl) &&
878 (!APIX_CPU_ENABLED(cp) ||
879 !INTR_PENDING((volatile apix_impl_t *)apixs[cpuid], ipl)))
880 return;
881 }
882 }
883
884 static void
885 apix_remove_av(apix_vector_t *vecp, struct autovec *target)
886 {
887 int hi_pri = 0;
888 struct autovec *p;
889
890 if (target == NULL)
891 return;
892
893 APIC_VERBOSE(INTR, (CE_CONT, "apix_remove_av: dip %p, vector 0x%x, "
894 "cpu %d\n", (void *)target->av_dip, vecp->v_vector, vecp->v_cpuid));
895
896 for (p = vecp->v_autovect; p; p = p->av_link) {
897 if (p == target || p->av_vector == NULL)
898 continue;
899 hi_pri = (p->av_prilevel > hi_pri) ? p->av_prilevel : hi_pri;
900 }
901
902 vecp->v_share--;
903 vecp->v_pri = hi_pri;
904
905 /*
906 * This drops the handler from the chain, it can no longer be called.
907 * However, there is no guarantee that the handler is not currently
908 * still executing.
909 */
910 target->av_vector = NULL;
911 /*
912 * There is a race where we could be just about to pick up the ticksp
913 * pointer to increment it after returning from the service routine
914 * in av_dispatch_autovect. Rather than NULL it out let's just point
915 * it off to something safe so that any final tick update attempt
916 * won't fault.
917 */
918 target->av_ticksp = &dummy_tick;
919 apix_wait_till_seen(vecp->v_cpuid, target->av_prilevel);
920 }
921
922 static struct autovec *
923 apix_find_av(apix_vector_t *vecp, void *intr_id, avfunc f)
924 {
925 struct autovec *p;
926
927 for (p = vecp->v_autovect; p; p = p->av_link) {
928 if ((p->av_vector == f) && (p->av_intr_id == intr_id)) {
929 /* found the handler */
930 return (p);
931 }
932 }
933
934 return (NULL);
935 }
936
937 static apix_vector_t *
938 apix_find_vector_by_avintr(void *intr_id, avfunc f)
939 {
940 apix_vector_t *vecp;
941 processorid_t n;
942 uchar_t v;
943
944 for (n = 0; n < apic_nproc; n++) {
945 if (!apix_is_cpu_enabled(n))
946 continue;
947
948 for (v = APIX_AVINTR_MIN; v <= APIX_AVINTR_MIN; v++) {
949 vecp = xv_vector(n, v);
950 if (vecp == NULL ||
951 vecp->v_state <= APIX_STATE_OBSOLETED)
952 continue;
953
954 if (apix_find_av(vecp, intr_id, f) != NULL)
955 return (vecp);
956 }
957 }
958
959 return (NULL);
960 }
961
962 /*
963 * Add interrupt service routine.
964 *
965 * For legacy interrupts (HPET timer, ACPI SCI), the vector is actually
966 * IRQ no. A vector is then allocated. Otherwise, the vector is already
967 * allocated. The input argument virt_vect is virtual vector of format
968 * APIX_VIRTVEC_VECTOR(cpuid, vector).
969 *
970 * Return 1 on success, 0 on failure.
971 */
972 int
973 apix_add_avintr(void *intr_id, int ipl, avfunc xxintr, char *name,
974 int virt_vect, caddr_t arg1, caddr_t arg2, uint64_t *ticksp,
975 dev_info_t *dip)
976 {
977 int cpuid;
978 uchar_t v = (uchar_t)APIX_VIRTVEC_VECTOR(virt_vect);
979 apix_vector_t *vecp;
980
981 if (xxintr == NULL) {
982 cmn_err(CE_WARN, "Attempt to add null for %s "
983 "on vector 0x%x,0x%x", name,
984 APIX_VIRTVEC_CPU(virt_vect),
985 APIX_VIRTVEC_VECTOR(virt_vect));
986 return (0);
987 }
988
989 if (v >= APIX_IPI_MIN) /* IPIs */
990 return (apix_add_ipi(ipl, xxintr, name, v, arg1, arg2));
991
992 if (!APIX_IS_VIRTVEC(virt_vect)) { /* got irq */
993 int irqno = virt_vect;
994 int inum = GET_INTR_INUM(intr_id);
995
996 /*
997 * Senarios include:
998 * a. add_avintr() is called before irqp initialized (legacy)
999 * b. irqp is initialized, vector is not allocated (fixed)
1000 * c. irqp is initialized, vector is allocated (fixed & shared)
1001 */
1002 if ((vecp = apix_alloc_intx(dip, inum, irqno)) == NULL)
1003 return (0);
1004
1005 cpuid = vecp->v_cpuid;
1006 v = vecp->v_vector;
1007 virt_vect = APIX_VIRTVECTOR(cpuid, v);
1008 } else { /* got virtual vector */
1009 cpuid = APIX_VIRTVEC_CPU(virt_vect);
1010 vecp = xv_vector(cpuid, v);
1011 ASSERT(vecp != NULL);
1012 }
1013
1014 lock_set(&apix_lock);
1015 if (vecp->v_state <= APIX_STATE_OBSOLETED) {
1016 vecp = NULL;
1017
1018 /*
1019 * Basically the allocated but not enabled interrupts
1020 * will not get re-targeted. But MSIs in allocated state
1021 * could be re-targeted due to group re-targeting.
1022 */
1023 if (intr_id != NULL && dip != NULL) {
1024 ddi_intr_handle_impl_t *hdlp = intr_id;
1025 vecp = apix_get_dev_map(dip, hdlp->ih_inum,
1026 hdlp->ih_type);
1027 ASSERT(vecp->v_state == APIX_STATE_ALLOCED);
1028 }
1029 if (vecp == NULL) {
1030 lock_clear(&apix_lock);
1031 cmn_err(CE_WARN, "Invalid interrupt 0x%x,0x%x "
1032 " for %p to add", cpuid, v, intr_id);
1033 return (0);
1034 }
1035 cpuid = vecp->v_cpuid;
1036 virt_vect = APIX_VIRTVECTOR(cpuid, vecp->v_vector);
1037 }
1038
1039 APIX_ENTER_CPU_LOCK(cpuid);
1040 apix_insert_av(vecp, intr_id, xxintr, arg1, arg2, ticksp, ipl, dip);
1041 APIX_LEAVE_CPU_LOCK(cpuid);
1042
1043 (void) apix_addspl(virt_vect, ipl, 0, 0);
1044
1045 lock_clear(&apix_lock);
1046
1047 return (1);
1048 }
1049
1050 /*
1051 * Remove avintr
1052 *
1053 * For fixed, if it's the last one of shared interrupts, free the vector.
1054 * For msi/x, only disable the interrupt but not free the vector, which
1055 * is freed by PSM_XXX_FREE_XXX.
1056 */
1057 void
1058 apix_rem_avintr(void *intr_id, int ipl, avfunc xxintr, int virt_vect)
1059 {
1060 avfunc f;
1061 apix_vector_t *vecp;
1062 struct autovec *avp;
1063 processorid_t cpuid;
1064
1065 if ((f = xxintr) == NULL)
1066 return;
1067
1068 lock_set(&apix_lock);
1069
1070 if (!APIX_IS_VIRTVEC(virt_vect)) { /* got irq */
1071 vecp = apix_intx_get_vector(virt_vect);
1072 virt_vect = APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1073 } else /* got virtual vector */
1074 vecp = xv_vector(APIX_VIRTVEC_CPU(virt_vect),
1075 APIX_VIRTVEC_VECTOR(virt_vect));
1076
1077 if (vecp == NULL) {
1078 lock_clear(&apix_lock);
1079 cmn_err(CE_CONT, "Invalid interrupt 0x%x,0x%x to remove",
1080 APIX_VIRTVEC_CPU(virt_vect),
1081 APIX_VIRTVEC_VECTOR(virt_vect));
1082 return;
1083 }
1084
1085 if (vecp->v_state <= APIX_STATE_OBSOLETED ||
1086 ((avp = apix_find_av(vecp, intr_id, f)) == NULL)) {
1087 /*
1088 * It's possible that the interrupt is rebound to a
1089 * different cpu before rem_avintr() is called. Search
1090 * through all vectors once it happens.
1091 */
1092 if ((vecp = apix_find_vector_by_avintr(intr_id, f))
1093 == NULL) {
1094 lock_clear(&apix_lock);
1095 cmn_err(CE_CONT, "Unknown interrupt 0x%x,0x%x "
1096 "for %p to remove", APIX_VIRTVEC_CPU(virt_vect),
1097 APIX_VIRTVEC_VECTOR(virt_vect), intr_id);
1098 return;
1099 }
1100 virt_vect = APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1101 avp = apix_find_av(vecp, intr_id, f);
1102 }
1103 cpuid = vecp->v_cpuid;
1104
1105 /* disable interrupt */
1106 (void) apix_delspl(virt_vect, ipl, 0, 0);
1107
1108 /* remove ISR entry */
1109 APIX_ENTER_CPU_LOCK(cpuid);
1110 apix_remove_av(vecp, avp);
1111 APIX_LEAVE_CPU_LOCK(cpuid);
1112
1113 lock_clear(&apix_lock);
1114 }
1115
1116 /*
1117 * Device to vector mapping table
1118 */
1119
1120 static void
1121 apix_clear_dev_map(dev_info_t *dip, int inum, int type)
1122 {
1123 char *name;
1124 major_t major;
1125 apix_dev_vector_t *dvp, *prev = NULL;
1126 int found = 0;
1127
1128 name = ddi_get_name(dip);
1129 major = ddi_name_to_major(name);
1130
1131 mutex_enter(&apix_mutex);
1132
1133 for (dvp = apix_dev_vector[major]; dvp != NULL;
1134 prev = dvp, dvp = dvp->dv_next) {
1135 if (dvp->dv_dip == dip && dvp->dv_inum == inum &&
1136 dvp->dv_type == type) {
1137 found++;
1138 break;
1139 }
1140 }
1141
1142 if (!found) {
1143 mutex_exit(&apix_mutex);
1144 return;
1145 }
1146
1147 if (prev != NULL)
1148 prev->dv_next = dvp->dv_next;
1149
1150 if (apix_dev_vector[major] == dvp)
1151 apix_dev_vector[major] = dvp->dv_next;
1152
1153 dvp->dv_vector->v_devp = NULL;
1154
1155 mutex_exit(&apix_mutex);
1156
1157 kmem_free(dvp, sizeof (apix_dev_vector_t));
1158 }
1159
1160 void
1161 apix_set_dev_map(apix_vector_t *vecp, dev_info_t *dip, int inum)
1162 {
1163 apix_dev_vector_t *dvp;
1164 char *name;
1165 major_t major;
1166 uint32_t found = 0;
1167
1168 ASSERT(dip != NULL);
1169 name = ddi_get_name(dip);
1170 major = ddi_name_to_major(name);
1171
1172 mutex_enter(&apix_mutex);
1173
1174 for (dvp = apix_dev_vector[major]; dvp != NULL;
1175 dvp = dvp->dv_next) {
1176 if (dvp->dv_dip == dip && dvp->dv_inum == inum &&
1177 dvp->dv_type == vecp->v_type) {
1178 found++;
1179 break;
1180 }
1181 }
1182
1183 if (found == 0) { /* not found */
1184 dvp = kmem_zalloc(sizeof (apix_dev_vector_t), KM_SLEEP);
1185 dvp->dv_dip = dip;
1186 dvp->dv_inum = inum;
1187 dvp->dv_type = vecp->v_type;
1188
1189 dvp->dv_next = apix_dev_vector[major];
1190 apix_dev_vector[major] = dvp;
1191 }
1192 dvp->dv_vector = vecp;
1193 vecp->v_devp = dvp;
1194
1195 mutex_exit(&apix_mutex);
1196
1197 DDI_INTR_IMPLDBG((CE_CONT, "apix_set_dev_map: dip=0x%p "
1198 "inum=0x%x vector=0x%x/0x%x\n",
1199 (void *)dip, inum, vecp->v_cpuid, vecp->v_vector));
1200 }
1201
1202 apix_vector_t *
1203 apix_get_dev_map(dev_info_t *dip, int inum, int type)
1204 {
1205 char *name;
1206 major_t major;
1207 apix_dev_vector_t *dvp;
1208 apix_vector_t *vecp;
1209
1210 name = ddi_get_name(dip);
1211 if ((major = ddi_name_to_major(name)) == DDI_MAJOR_T_NONE)
1212 return (NULL);
1213
1214 mutex_enter(&apix_mutex);
1215 for (dvp = apix_dev_vector[major]; dvp != NULL;
1216 dvp = dvp->dv_next) {
1217 if (dvp->dv_dip == dip && dvp->dv_inum == inum &&
1218 dvp->dv_type == type) {
1219 vecp = dvp->dv_vector;
1220 mutex_exit(&apix_mutex);
1221 return (vecp);
1222 }
1223 }
1224 mutex_exit(&apix_mutex);
1225
1226 return (NULL);
1227 }
1228
1229 /*
1230 * Get minimum inum for specified device, used for MSI
1231 */
1232 int
1233 apix_get_min_dev_inum(dev_info_t *dip, int type)
1234 {
1235 char *name;
1236 major_t major;
1237 apix_dev_vector_t *dvp;
1238 int inum = -1;
1239
1240 name = ddi_get_name(dip);
1241 major = ddi_name_to_major(name);
1242
1243 mutex_enter(&apix_mutex);
1244 for (dvp = apix_dev_vector[major]; dvp != NULL;
1245 dvp = dvp->dv_next) {
1246 if (dvp->dv_dip == dip && dvp->dv_type == type) {
1247 if (inum == -1)
1248 inum = dvp->dv_inum;
1249 else
1250 inum = (dvp->dv_inum < inum) ?
1251 dvp->dv_inum : inum;
1252 }
1253 }
1254 mutex_exit(&apix_mutex);
1255
1256 return (inum);
1257 }
1258
1259 int
1260 apix_get_max_dev_inum(dev_info_t *dip, int type)
1261 {
1262 char *name;
1263 major_t major;
1264 apix_dev_vector_t *dvp;
1265 int inum = -1;
1266
1267 name = ddi_get_name(dip);
1268 major = ddi_name_to_major(name);
1269
1270 mutex_enter(&apix_mutex);
1271 for (dvp = apix_dev_vector[major]; dvp != NULL;
1272 dvp = dvp->dv_next) {
1273 if (dvp->dv_dip == dip && dvp->dv_type == type) {
1274 if (inum == -1)
1275 inum = dvp->dv_inum;
1276 else
1277 inum = (dvp->dv_inum > inum) ?
1278 dvp->dv_inum : inum;
1279 }
1280 }
1281 mutex_exit(&apix_mutex);
1282
1283 return (inum);
1284 }
1285
1286 /*
1287 * Major to cpu binding, for INTR_ROUND_ROBIN_WITH_AFFINITY cpu
1288 * binding policy
1289 */
1290
1291 static uint32_t
1292 apix_get_dev_binding(dev_info_t *dip)
1293 {
1294 major_t major;
1295 char *name;
1296 uint32_t cpu = IRQ_UNINIT;
1297
1298 name = ddi_get_name(dip);
1299 major = ddi_name_to_major(name);
1300 if (major < devcnt) {
1301 mutex_enter(&apix_mutex);
1302 cpu = apix_major_to_cpu[major];
1303 mutex_exit(&apix_mutex);
1304 }
1305
1306 return (cpu);
1307 }
1308
1309 static void
1310 apix_set_dev_binding(dev_info_t *dip, uint32_t cpu)
1311 {
1312 major_t major;
1313 char *name;
1314
1315 /* setup major to cpu mapping */
1316 name = ddi_get_name(dip);
1317 major = ddi_name_to_major(name);
1318 if (apix_major_to_cpu[major] == IRQ_UNINIT) {
1319 mutex_enter(&apix_mutex);
1320 apix_major_to_cpu[major] = cpu;
1321 mutex_exit(&apix_mutex);
1322 }
1323 }
1324
1325 /*
1326 * return the cpu to which this intr should be bound.
1327 * Check properties or any other mechanism to see if user wants it
1328 * bound to a specific CPU. If so, return the cpu id with high bit set.
1329 * If not, use the policy to choose a cpu and return the id.
1330 */
1331 uint32_t
1332 apix_bind_cpu(dev_info_t *dip)
1333 {
1334 int instance, instno, prop_len, bind_cpu, count;
1335 uint_t i, rc;
1336 major_t major;
1337 char *name, *drv_name, *prop_val, *cptr;
1338 char prop_name[32];
1339
1340 lock_set(&apix_lock);
1341
1342 if (apic_intr_policy == INTR_LOWEST_PRIORITY) {
1343 cmn_err(CE_WARN, "apix: unsupported interrupt binding policy "
1344 "LOWEST PRIORITY, use ROUND ROBIN instead");
1345 apic_intr_policy = INTR_ROUND_ROBIN;
1346 }
1347
1348 if (apic_nproc == 1) {
1349 lock_clear(&apix_lock);
1350 return (0);
1351 }
1352
1353 drv_name = NULL;
1354 rc = DDI_PROP_NOT_FOUND;
1355 major = (major_t)-1;
1356 if (dip != NULL) {
1357 name = ddi_get_name(dip);
1358 major = ddi_name_to_major(name);
1359 drv_name = ddi_major_to_name(major);
1360 instance = ddi_get_instance(dip);
1361 if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
1362 bind_cpu = apix_get_dev_binding(dip);
1363 if (bind_cpu != IRQ_UNINIT) {
1364 lock_clear(&apix_lock);
1365 return (bind_cpu);
1366 }
1367 }
1368 /*
1369 * search for "drvname"_intpt_bind_cpus property first, the
1370 * syntax of the property should be "a[,b,c,...]" where
1371 * instance 0 binds to cpu a, instance 1 binds to cpu b,
1372 * instance 3 binds to cpu c...
1373 * ddi_getlongprop() will search /option first, then /
1374 * if "drvname"_intpt_bind_cpus doesn't exist, then find
1375 * intpt_bind_cpus property. The syntax is the same, and
1376 * it applies to all the devices if its "drvname" specific
1377 * property doesn't exist
1378 */
1379 (void) strcpy(prop_name, drv_name);
1380 (void) strcat(prop_name, "_intpt_bind_cpus");
1381 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, prop_name,
1382 (caddr_t)&prop_val, &prop_len);
1383 if (rc != DDI_PROP_SUCCESS) {
1384 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0,
1385 "intpt_bind_cpus", (caddr_t)&prop_val, &prop_len);
1386 }
1387 }
1388 if (rc == DDI_PROP_SUCCESS) {
1389 for (i = count = 0; i < (prop_len - 1); i++)
1390 if (prop_val[i] == ',')
1391 count++;
1392 if (prop_val[i-1] != ',')
1393 count++;
1394 /*
1395 * if somehow the binding instances defined in the
1396 * property are not enough for this instno., then
1397 * reuse the pattern for the next instance until
1398 * it reaches the requested instno
1399 */
1400 instno = instance % count;
1401 i = 0;
1402 cptr = prop_val;
1403 while (i < instno)
1404 if (*cptr++ == ',')
1405 i++;
1406 bind_cpu = stoi(&cptr);
1407 kmem_free(prop_val, prop_len);
1408 /* if specific cpu is bogus, then default to cpu 0 */
1409 if (bind_cpu >= apic_nproc) {
1410 cmn_err(CE_WARN, "apix: %s=%s: CPU %d not present",
1411 prop_name, prop_val, bind_cpu);
1412 bind_cpu = 0;
1413 } else {
1414 /* indicate that we are bound at user request */
1415 bind_cpu |= IRQ_USER_BOUND;
1416 }
1417 /*
1418 * no need to check apic_cpus[].aci_status, if specific cpu is
1419 * not up, then post_cpu_start will handle it.
1420 */
1421 } else {
1422 bind_cpu = apic_get_next_bind_cpu();
1423 }
1424
1425 lock_clear(&apix_lock);
1426
1427 return ((uint32_t)bind_cpu);
1428 }
1429
1430 static boolean_t
1431 apix_is_cpu_enabled(processorid_t cpuid)
1432 {
1433 apic_cpus_info_t *cpu_infop;
1434
1435 cpu_infop = &apic_cpus[cpuid];
1436
1437 if ((cpu_infop->aci_status & APIC_CPU_INTR_ENABLE) == 0)
1438 return (B_FALSE);
1439
1440 return (B_TRUE);
1441 }
1442
1443 /*
1444 * Must be called with apix_lock held. This function can be
1445 * called from above lock level by apix_intr_redistribute().
1446 *
1447 * Arguments:
1448 * vecp : Vector to be rebound
1449 * tocpu : Target cpu. IRQ_UNINIT means target is vecp->v_cpuid.
1450 * count : Number of continuous vectors
1451 *
1452 * Return new vector being bound to
1453 */
1454 apix_vector_t *
1455 apix_rebind(apix_vector_t *vecp, processorid_t newcpu, int count)
1456 {
1457 apix_vector_t *newp, *oldp;
1458 processorid_t oldcpu = vecp->v_cpuid;
1459 uchar_t newvec, oldvec = vecp->v_vector;
1460 int i;
1461
1462 ASSERT(LOCK_HELD(&apix_lock) && count > 0);
1463
1464 if (!apix_is_cpu_enabled(newcpu))
1465 return (NULL);
1466
1467 if (vecp->v_cpuid == newcpu) /* rebind to the same cpu */
1468 return (vecp);
1469
1470 APIX_ENTER_CPU_LOCK(oldcpu);
1471 APIX_ENTER_CPU_LOCK(newcpu);
1472
1473 /* allocate vector */
1474 if (count == 1)
1475 newp = apix_alloc_vector_oncpu(newcpu, NULL, 0, vecp->v_type);
1476 else {
1477 ASSERT(vecp->v_type == APIX_TYPE_MSI);
1478 newp = apix_alloc_nvectors_oncpu(newcpu, NULL, 0, count,
1479 vecp->v_type);
1480 }
1481 if (newp == NULL) {
1482 APIX_LEAVE_CPU_LOCK(newcpu);
1483 APIX_LEAVE_CPU_LOCK(oldcpu);
1484 return (NULL);
1485 }
1486
1487 newvec = newp->v_vector;
1488 apix_dup_vectors(vecp, newp, count);
1489
1490 APIX_LEAVE_CPU_LOCK(newcpu);
1491 APIX_LEAVE_CPU_LOCK(oldcpu);
1492
1493 if (!DDI_INTR_IS_MSI_OR_MSIX(vecp->v_type)) {
1494 ASSERT(count == 1);
1495 if (apix_intx_rebind(vecp->v_inum, newcpu, newvec) != 0) {
1496 struct autovec *avp;
1497 int inum;
1498
1499 /* undo duplication */
1500 APIX_ENTER_CPU_LOCK(oldcpu);
1501 APIX_ENTER_CPU_LOCK(newcpu);
1502 for (avp = newp->v_autovect; avp != NULL;
1503 avp = avp->av_link) {
1504 if (avp->av_dip != NULL) {
1505 inum = GET_INTR_INUM(avp->av_intr_id);
1506 apix_set_dev_map(vecp, avp->av_dip,
1507 inum);
1508 }
1509 apix_remove_av(newp, avp);
1510 }
1511 apix_cleanup_vector(newp);
1512 APIX_LEAVE_CPU_LOCK(newcpu);
1513 APIX_LEAVE_CPU_LOCK(oldcpu);
1514 APIC_VERBOSE(REBIND, (CE_CONT, "apix: rebind fixed "
1515 "interrupt 0x%x to cpu %d failed\n",
1516 vecp->v_inum, newcpu));
1517 return (NULL);
1518 }
1519
1520 APIX_ENTER_CPU_LOCK(oldcpu);
1521 (void) apix_obsolete_vector(vecp);
1522 APIX_LEAVE_CPU_LOCK(oldcpu);
1523 APIC_VERBOSE(REBIND, (CE_CONT, "apix: rebind fixed interrupt"
1524 " 0x%x/0x%x to 0x%x/0x%x\n",
1525 oldcpu, oldvec, newcpu, newvec));
1526 return (newp);
1527 }
1528
1529 for (i = 0; i < count; i++) {
1530 oldp = xv_vector(oldcpu, oldvec + i);
1531 newp = xv_vector(newcpu, newvec + i);
1532
1533 if (newp->v_share > 0) {
1534 APIX_SET_REBIND_INFO(oldp, newp);
1535
1536 apix_enable_vector(newp);
1537
1538 APIX_CLR_REBIND_INFO();
1539 }
1540
1541 APIX_ENTER_CPU_LOCK(oldcpu);
1542 (void) apix_obsolete_vector(oldp);
1543 APIX_LEAVE_CPU_LOCK(oldcpu);
1544 }
1545 APIC_VERBOSE(REBIND, (CE_CONT, "apix: rebind vector 0x%x/0x%x "
1546 "to 0x%x/0x%x, count=%d\n",
1547 oldcpu, oldvec, newcpu, newvec, count));
1548
1549 return (xv_vector(newcpu, newvec));
1550 }
1551
1552 /*
1553 * Senarios include:
1554 * a. add_avintr() is called before irqp initialized (legacy)
1555 * b. irqp is initialized, vector is not allocated (fixed interrupts)
1556 * c. irqp is initialized, vector is allocated (shared interrupts)
1557 */
1558 apix_vector_t *
1559 apix_alloc_intx(dev_info_t *dip, int inum, int irqno)
1560 {
1561 apic_irq_t *irqp;
1562 apix_vector_t *vecp;
1563
1564 /*
1565 * Allocate IRQ. Caller is later responsible for the
1566 * initialization
1567 */
1568 mutex_enter(&airq_mutex);
1569 if ((irqp = apic_irq_table[irqno]) == NULL) {
1570 /* allocate irq */
1571 irqp = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
1572 irqp->airq_mps_intr_index = FREE_INDEX;
1573 apic_irq_table[irqno] = irqp;
1574 }
1575 if (irqp->airq_mps_intr_index == FREE_INDEX) {
1576 irqp->airq_mps_intr_index = DEFAULT_INDEX;
1577 irqp->airq_cpu = IRQ_UNINIT;
1578 irqp->airq_origirq = (uchar_t)irqno;
1579 }
1580
1581 mutex_exit(&airq_mutex);
1582
1583 /*
1584 * allocate vector
1585 */
1586 if (irqp->airq_cpu == IRQ_UNINIT) {
1587 uint32_t bindcpu, cpuid;
1588
1589 /* select cpu by system policy */
1590 bindcpu = apix_bind_cpu(dip);
1591 cpuid = bindcpu & ~IRQ_USER_BOUND;
1592
1593 /* allocate vector */
1594 APIX_ENTER_CPU_LOCK(cpuid);
1595
1596 if ((vecp = apix_alloc_vector_oncpu(bindcpu, dip, inum,
1597 APIX_TYPE_FIXED)) == NULL) {
1598 cmn_err(CE_WARN, "No interrupt vector for irq %x",
1599 irqno);
1600 APIX_LEAVE_CPU_LOCK(cpuid);
1601 return (NULL);
1602 }
1603 vecp->v_inum = irqno;
1604 vecp->v_flags |= APIX_VECT_MASKABLE;
1605
1606 apix_intx_set_vector(irqno, vecp->v_cpuid, vecp->v_vector);
1607
1608 APIX_LEAVE_CPU_LOCK(cpuid);
1609 } else {
1610 vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1611 ASSERT(!IS_VECT_FREE(vecp));
1612
1613 if (dip != NULL)
1614 apix_set_dev_map(vecp, dip, inum);
1615 }
1616
1617 if ((dip != NULL) &&
1618 (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) &&
1619 ((vecp->v_flags & APIX_VECT_USER_BOUND) == 0))
1620 apix_set_dev_binding(dip, vecp->v_cpuid);
1621
1622 apix_dprint_vector(vecp, dip, 1);
1623
1624 return (vecp);
1625 }
1626
1627 int
1628 apix_alloc_msi(dev_info_t *dip, int inum, int count, int behavior)
1629 {
1630 int i, cap_ptr, rcount = count;
1631 apix_vector_t *vecp;
1632 processorid_t bindcpu, cpuid;
1633 ushort_t msi_ctrl;
1634 ddi_acc_handle_t handle;
1635
1636 DDI_INTR_IMPLDBG((CE_CONT, "apix_alloc_msi_vectors: dip=0x%p "
1637 "inum=0x%x count=0x%x behavior=%d\n",
1638 (void *)dip, inum, count, behavior));
1639
1640 if (count > 1) {
1641 if (behavior == DDI_INTR_ALLOC_STRICT &&
1642 apic_multi_msi_enable == 0)
1643 return (0);
1644 if (apic_multi_msi_enable == 0)
1645 count = 1;
1646 }
1647
1648 /* Check whether it supports per-vector masking */
1649 cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1650 handle = i_ddi_get_pci_config_handle(dip);
1651 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1652
1653 /* bind to cpu */
1654 bindcpu = apix_bind_cpu(dip);
1655 cpuid = bindcpu & ~IRQ_USER_BOUND;
1656
1657 /* if not ISP2, then round it down */
1658 if (!ISP2(rcount))
1659 rcount = 1 << (highbit(rcount) - 1);
1660
1661 APIX_ENTER_CPU_LOCK(cpuid);
1662 for (vecp = NULL; rcount > 0; rcount >>= 1) {
1663 vecp = apix_alloc_nvectors_oncpu(bindcpu, dip, inum, rcount,
1664 APIX_TYPE_MSI);
1665 if (vecp != NULL || behavior == DDI_INTR_ALLOC_STRICT)
1666 break;
1667 }
1668 for (i = 0; vecp && i < rcount; i++)
1669 xv_vector(vecp->v_cpuid, vecp->v_vector + i)->v_flags |=
1670 (msi_ctrl & PCI_MSI_PVM_MASK) ? APIX_VECT_MASKABLE : 0;
1671 APIX_LEAVE_CPU_LOCK(cpuid);
1672 if (vecp == NULL) {
1673 APIC_VERBOSE(INTR, (CE_CONT,
1674 "apix_alloc_msi: no %d cont vectors found on cpu 0x%x\n",
1675 count, bindcpu));
1676 return (0);
1677 }
1678
1679 /* major to cpu binding */
1680 if ((apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) &&
1681 ((vecp->v_flags & APIX_VECT_USER_BOUND) == 0))
1682 apix_set_dev_binding(dip, vecp->v_cpuid);
1683
1684 apix_dprint_vector(vecp, dip, rcount);
1685
1686 return (rcount);
1687 }
1688
1689 int
1690 apix_alloc_msix(dev_info_t *dip, int inum, int count, int behavior)
1691 {
1692 apix_vector_t *vecp;
1693 processorid_t bindcpu, cpuid;
1694 int i;
1695
1696 for (i = 0; i < count; i++) {
1697 /* select cpu by system policy */
1698 bindcpu = apix_bind_cpu(dip);
1699 cpuid = bindcpu & ~IRQ_USER_BOUND;
1700
1701 /* allocate vector */
1702 APIX_ENTER_CPU_LOCK(cpuid);
1703 if ((vecp = apix_alloc_vector_oncpu(bindcpu, dip, inum + i,
1704 APIX_TYPE_MSIX)) == NULL) {
1705 APIX_LEAVE_CPU_LOCK(cpuid);
1706 APIC_VERBOSE(INTR, (CE_CONT, "apix_alloc_msix: "
1707 "allocate msix for device dip=%p, inum=%d on"
1708 " cpu %d failed", (void *)dip, inum + i, bindcpu));
1709 break;
1710 }
1711 vecp->v_flags |= APIX_VECT_MASKABLE;
1712 APIX_LEAVE_CPU_LOCK(cpuid);
1713
1714 /* major to cpu mapping */
1715 if ((i == 0) &&
1716 (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) &&
1717 ((vecp->v_flags & APIX_VECT_USER_BOUND) == 0))
1718 apix_set_dev_binding(dip, vecp->v_cpuid);
1719
1720 apix_dprint_vector(vecp, dip, 1);
1721 }
1722
1723 if (i < count && behavior == DDI_INTR_ALLOC_STRICT) {
1724 APIC_VERBOSE(INTR, (CE_WARN, "apix_alloc_msix: "
1725 "strictly allocate %d vectors failed, got %d\n",
1726 count, i));
1727 apix_free_vectors(dip, inum, i, APIX_TYPE_MSIX);
1728 i = 0;
1729 }
1730
1731 return (i);
1732 }
1733
1734 /*
1735 * A rollback free for vectors allocated by apix_alloc_xxx().
1736 */
1737 void
1738 apix_free_vectors(dev_info_t *dip, int inum, int count, int type)
1739 {
1740 int i, cpuid;
1741 apix_vector_t *vecp;
1742
1743 DDI_INTR_IMPLDBG((CE_CONT, "apix_free_vectors: dip: %p inum: %x "
1744 "count: %x type: %x\n",
1745 (void *)dip, inum, count, type));
1746
1747 lock_set(&apix_lock);
1748
1749 for (i = 0; i < count; i++, inum++) {
1750 if ((vecp = apix_get_dev_map(dip, inum, type)) == NULL) {
1751 lock_clear(&apix_lock);
1752 DDI_INTR_IMPLDBG((CE_CONT, "apix_free_vectors: "
1753 "dip=0x%p inum=0x%x type=0x%x apix_find_intr() "
1754 "failed\n", (void *)dip, inum, type));
1755 continue;
1756 }
1757
1758 APIX_ENTER_CPU_LOCK(vecp->v_cpuid);
1759 cpuid = vecp->v_cpuid;
1760
1761 DDI_INTR_IMPLDBG((CE_CONT, "apix_free_vectors: "
1762 "dip=0x%p inum=0x%x type=0x%x vector 0x%x (share %d)\n",
1763 (void *)dip, inum, type, vecp->v_vector, vecp->v_share));
1764
1765 /* tear down device interrupt to vector mapping */
1766 apix_clear_dev_map(dip, inum, type);
1767
1768 if (vecp->v_type == APIX_TYPE_FIXED) {
1769 if (vecp->v_share > 0) { /* share IRQ line */
1770 APIX_LEAVE_CPU_LOCK(cpuid);
1771 continue;
1772 }
1773
1774 /* Free apic_irq_table entry */
1775 apix_intx_free(vecp->v_inum);
1776 }
1777
1778 /* free vector */
1779 apix_cleanup_vector(vecp);
1780
1781 APIX_LEAVE_CPU_LOCK(cpuid);
1782 }
1783
1784 lock_clear(&apix_lock);
1785 }
1786
1787 /*
1788 * Must be called with apix_lock held
1789 */
1790 apix_vector_t *
1791 apix_setup_io_intr(apix_vector_t *vecp)
1792 {
1793 processorid_t bindcpu;
1794 int ret;
1795
1796 ASSERT(LOCK_HELD(&apix_lock));
1797
1798 /*
1799 * Interrupts are enabled on the CPU, programme IOAPIC RDT
1800 * entry or MSI/X address/data to enable the interrupt.
1801 */
1802 if (apix_is_cpu_enabled(vecp->v_cpuid)) {
1803 apix_enable_vector(vecp);
1804 return (vecp);
1805 }
1806
1807 /*
1808 * CPU is not up or interrupts are disabled. Fall back to the
1809 * first avialable CPU.
1810 */
1811 bindcpu = apic_find_cpu(APIC_CPU_INTR_ENABLE);
1812
1813 if (vecp->v_type == APIX_TYPE_MSI)
1814 return (apix_grp_set_cpu(vecp, bindcpu, &ret));
1815
1816 return (apix_set_cpu(vecp, bindcpu, &ret));
1817 }
1818
1819 /*
1820 * For interrupts which call add_avintr() before apic is initialized.
1821 * ioapix_setup_intr() will
1822 * - allocate vector
1823 * - copy over ISR
1824 */
1825 static void
1826 ioapix_setup_intr(int irqno, iflag_t *flagp)
1827 {
1828 extern struct av_head autovect[];
1829 apix_vector_t *vecp;
1830 apic_irq_t *irqp;
1831 uchar_t ioapicindex, ipin;
1832 ulong_t iflag;
1833 struct autovec *avp;
1834
1835 ioapicindex = acpi_find_ioapic(irqno);
1836 ASSERT(ioapicindex != 0xFF);
1837 ipin = irqno - apic_io_vectbase[ioapicindex];
1838
1839 mutex_enter(&airq_mutex);
1840 irqp = apic_irq_table[irqno];
1841
1842 /*
1843 * The irq table entry should not exist unless the interrupts are shared.
1844 * In that case, make sure it matches what we would initialize it to.
1845 */
1846 if (irqp != NULL) {
1847 ASSERT(irqp->airq_mps_intr_index == ACPI_INDEX);
1848 ASSERT(irqp->airq_intin_no == ipin &&
1849 irqp->airq_ioapicindex == ioapicindex);
1850 vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1851 ASSERT(!IS_VECT_FREE(vecp));
1852 mutex_exit(&airq_mutex);
1853 } else {
1854 irqp = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
1855
1856 irqp->airq_cpu = IRQ_UNINIT;
1857 irqp->airq_origirq = (uchar_t)irqno;
1858 irqp->airq_mps_intr_index = ACPI_INDEX;
1859 irqp->airq_ioapicindex = ioapicindex;
1860 irqp->airq_intin_no = ipin;
1861 irqp->airq_iflag = *flagp;
1862 irqp->airq_share++;
1863
1864 apic_irq_table[irqno] = irqp;
1865 mutex_exit(&airq_mutex);
1866
1867 vecp = apix_alloc_intx(NULL, 0, irqno);
1868 }
1869
1870 /* copy over autovect */
1871 for (avp = autovect[irqno].avh_link; avp; avp = avp->av_link)
1872 apix_insert_av(vecp, avp->av_intr_id, avp->av_vector,
1873 avp->av_intarg1, avp->av_intarg2, avp->av_ticksp,
1874 avp->av_prilevel, avp->av_dip);
1875
1876 /* Program I/O APIC */
1877 iflag = intr_clear();
1878 lock_set(&apix_lock);
1879
1880 (void) apix_setup_io_intr(vecp);
1881
1882 lock_clear(&apix_lock);
1883 intr_restore(iflag);
1884
1885 APIC_VERBOSE_IOAPIC((CE_CONT, "apix: setup ioapic, irqno %x "
1886 "(ioapic %x, ipin %x) is bound to cpu %x, vector %x\n",
1887 irqno, ioapicindex, ipin, irqp->airq_cpu, irqp->airq_vector));
1888 }
1889
1890 void
1891 ioapix_init_intr(int mask_apic)
1892 {
1893 int ioapicindex;
1894 int i, j;
1895
1896 /* mask interrupt vectors */
1897 for (j = 0; j < apic_io_max && mask_apic; j++) {
1898 int intin_max;
1899
1900 ioapicindex = j;
1901 /* Bits 23-16 define the maximum redirection entries */
1902 intin_max = (ioapic_read(ioapicindex, APIC_VERS_CMD) >> 16)
1903 & 0xff;
1904 for (i = 0; i <= intin_max; i++)
1905 ioapic_write(ioapicindex, APIC_RDT_CMD + 2 * i,
1906 AV_MASK);
1907 }
1908
1909 /*
1910 * Hack alert: deal with ACPI SCI interrupt chicken/egg here
1911 */
1912 if (apic_sci_vect > 0)
1913 ioapix_setup_intr(apic_sci_vect, &apic_sci_flags);
1914
1915 /*
1916 * Hack alert: deal with ACPI HPET interrupt chicken/egg here.
1917 */
1918 if (apic_hpet_vect > 0)
1919 ioapix_setup_intr(apic_hpet_vect, &apic_hpet_flags);
1920 }