OS-7125 Need mitigation of L1TF (CVE-2018-3646)
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 /*
  26  * Copyright (c) 2010, Intel Corporation.
  27  * All rights reserved.
  28  */
  29 /*
  30  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  31  * Copyright 2013 Pluribus Networks, Inc.

  32  */
  33 
  34 #include <sys/processor.h>
  35 #include <sys/time.h>
  36 #include <sys/psm.h>
  37 #include <sys/smp_impldefs.h>
  38 #include <sys/cram.h>
  39 #include <sys/acpi/acpi.h>
  40 #include <sys/acpica.h>
  41 #include <sys/psm_common.h>
  42 #include <sys/pit.h>
  43 #include <sys/ddi.h>
  44 #include <sys/sunddi.h>
  45 #include <sys/ddi_impldefs.h>
  46 #include <sys/pci.h>
  47 #include <sys/promif.h>
  48 #include <sys/x86_archext.h>
  49 #include <sys/cpc_impl.h>
  50 #include <sys/uadmin.h>
  51 #include <sys/panic.h>
  52 #include <sys/debug.h>
  53 #include <sys/archsystm.h>
  54 #include <sys/trap.h>
  55 #include <sys/machsystm.h>
  56 #include <sys/sysmacros.h>
  57 #include <sys/cpuvar.h>
  58 #include <sys/rm_platter.h>
  59 #include <sys/privregs.h>
  60 #include <sys/note.h>
  61 #include <sys/pci_intr_lib.h>
  62 #include <sys/spl.h>
  63 #include <sys/clock.h>
  64 #include <sys/dditypes.h>
  65 #include <sys/sunddi.h>
  66 #include <sys/x_call.h>
  67 #include <sys/reboot.h>
  68 #include <sys/apix.h>

  69 
  70 static int apix_get_avail_vector_oncpu(uint32_t, int, int);
  71 static apix_vector_t *apix_init_vector(processorid_t, uchar_t);
  72 static void apix_cleanup_vector(apix_vector_t *);
  73 static void apix_insert_av(apix_vector_t *, void *, avfunc, caddr_t, caddr_t,
  74     uint64_t *, int, dev_info_t *);
  75 static void apix_remove_av(apix_vector_t *, struct autovec *);
  76 static void apix_clear_dev_map(dev_info_t *, int, int);
  77 static boolean_t apix_is_cpu_enabled(processorid_t);
  78 static void apix_wait_till_seen(processorid_t, int);
  79 
  80 #define GET_INTR_INUM(ihdlp)            \
  81         (((ihdlp) != NULL) ? ((ddi_intr_handle_impl_t *)(ihdlp))->ih_inum : 0)
  82 
  83 apix_rebind_info_t apix_rebindinfo = {0, 0, 0, NULL, 0, NULL};
  84 
  85 /*
  86  * Allocate IPI
  87  *
  88  * Return vector number or 0 on error
  89  */
  90 uchar_t
  91 apix_alloc_ipi(int ipl)
  92 {
  93         apix_vector_t *vecp;
  94         uchar_t vector;
  95         int cpun;
  96         int nproc;
  97 
  98         APIX_ENTER_CPU_LOCK(0);
  99 
 100         vector = apix_get_avail_vector_oncpu(0, APIX_IPI_MIN, APIX_IPI_MAX);
 101         if (vector == 0) {
 102                 APIX_LEAVE_CPU_LOCK(0);
 103                 cmn_err(CE_WARN, "apix: no available IPI\n");
 104                 apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
 105                 return (0);
 106         }
 107 
 108         nproc = max(apic_nproc, apic_max_nproc);
 109         for (cpun = 0; cpun < nproc; cpun++) {
 110                 vecp = xv_vector(cpun, vector);
 111                 if (vecp == NULL) {
 112                         vecp = kmem_zalloc(sizeof (apix_vector_t), KM_NOSLEEP);
 113                         if (vecp == NULL) {
 114                                 cmn_err(CE_WARN, "apix: No memory for ipi");
 115                                 goto fail;
 116                         }
 117                         xv_vector(cpun, vector) = vecp;
 118                 }
 119                 vecp->v_state = APIX_STATE_ALLOCED;
 120                 vecp->v_type = APIX_TYPE_IPI;
 121                 vecp->v_cpuid = vecp->v_bound_cpuid = cpun;
 122                 vecp->v_vector = vector;
 123                 vecp->v_pri = ipl;
 124         }
 125         APIX_LEAVE_CPU_LOCK(0);
 126         return (vector);
 127 
 128 fail:
 129         while (--cpun >= 0)
 130                 apix_cleanup_vector(xv_vector(cpun, vector));
 131         APIX_LEAVE_CPU_LOCK(0);
 132         return (0);
 133 }
 134 
 135 /*
 136  * Add IPI service routine
 137  */
 138 static int
 139 apix_add_ipi(int ipl, avfunc xxintr, char *name, int vector,
 140     caddr_t arg1, caddr_t arg2)
 141 {
 142         int cpun;
 143         apix_vector_t *vecp;
 144         int nproc;
 145 
 146         ASSERT(vector >= APIX_IPI_MIN && vector <= APIX_IPI_MAX);
 147 
 148         nproc = max(apic_nproc, apic_max_nproc);
 149         for (cpun = 0; cpun < nproc; cpun++) {
 150                 APIX_ENTER_CPU_LOCK(cpun);
 151                 vecp = xv_vector(cpun, vector);
 152                 apix_insert_av(vecp, NULL, xxintr, arg1, arg2, NULL, ipl, NULL);
 153                 vecp->v_state = APIX_STATE_ENABLED;
 154                 APIX_LEAVE_CPU_LOCK(cpun);
 155         }
 156 
 157         APIC_VERBOSE(IPI, (CE_CONT, "apix: add ipi for %s, vector %x "
 158             "ipl %x\n", name, vector, ipl));
 159 
 160         return (1);
 161 }
 162 
 163 /*
 164  * Find and return first free vector in range (start, end)
 165  */
 166 static int
 167 apix_get_avail_vector_oncpu(uint32_t cpuid, int start, int end)
 168 {
 169         int i;
 170         apix_impl_t *apixp = apixs[cpuid];
 171 
 172         for (i = start; i <= end; i++) {
 173                 if (APIC_CHECK_RESERVE_VECTORS(i))
 174                         continue;
 175                 if (IS_VECT_FREE(apixp->x_vectbl[i]))
 176                         return (i);
 177         }
 178 
 179         return (0);
 180 }
 181 
 182 /*
 183  * Allocate a vector on specified cpu
 184  *
 185  * Return NULL on error
 186  */
 187 static apix_vector_t *
 188 apix_alloc_vector_oncpu(uint32_t cpuid, dev_info_t *dip, int inum, int type)
 189 {
 190         processorid_t tocpu = cpuid & ~IRQ_USER_BOUND;
 191         apix_vector_t *vecp;
 192         int vector;
 193 
 194         ASSERT(APIX_CPU_LOCK_HELD(tocpu));
 195 
 196         /* find free vector */
 197         vector = apix_get_avail_vector_oncpu(tocpu, APIX_AVINTR_MIN,
 198             APIX_AVINTR_MAX);
 199         if (vector == 0)
 200                 return (NULL);
 201 
 202         vecp = apix_init_vector(tocpu, vector);
 203         vecp->v_type = (ushort_t)type;
 204         vecp->v_inum = inum;
 205         vecp->v_flags = (cpuid & IRQ_USER_BOUND) ? APIX_VECT_USER_BOUND : 0;
 206 
 207         if (dip != NULL)
 208                 apix_set_dev_map(vecp, dip, inum);
 209 
 210         return (vecp);
 211 }
 212 
 213 /*
 214  * Allocates "count" contiguous MSI vectors starting at the proper alignment.
 215  * Caller needs to make sure that count has to be power of 2 and should not
 216  * be < 1.
 217  *
 218  * Return first vector number
 219  */
 220 apix_vector_t *
 221 apix_alloc_nvectors_oncpu(uint32_t cpuid, dev_info_t *dip, int inum,
 222     int count, int type)
 223 {
 224         int i, msibits, start = 0, navail = 0;
 225         apix_vector_t *vecp, *startp = NULL;
 226         processorid_t tocpu = cpuid & ~IRQ_USER_BOUND;
 227         uint_t flags;
 228 
 229         ASSERT(APIX_CPU_LOCK_HELD(tocpu));
 230 
 231         /*
 232          * msibits is the no. of lower order message data bits for the
 233          * allocated MSI vectors and is used to calculate the aligned
 234          * starting vector
 235          */
 236         msibits = count - 1;
 237 
 238         /* It has to be contiguous */
 239         for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
 240                 if (!IS_VECT_FREE(xv_vector(tocpu, i)))
 241                         continue;
 242 
 243                 /*
 244                  * starting vector has to be aligned accordingly for
 245                  * multiple MSIs
 246                  */
 247                 if (msibits)
 248                         i = (i + msibits) & ~msibits;
 249 
 250                 for (navail = 0, start = i; i <= APIX_AVINTR_MAX; i++) {
 251                         if (!IS_VECT_FREE(xv_vector(tocpu, i)))
 252                                 break;
 253                         if (APIC_CHECK_RESERVE_VECTORS(i))
 254                                 break;
 255                         if (++navail == count)
 256                                 goto done;
 257                 }
 258         }
 259 
 260         return (NULL);
 261 
 262 done:
 263         flags = (cpuid & IRQ_USER_BOUND) ? APIX_VECT_USER_BOUND : 0;
 264 
 265         for (i = 0; i < count; i++) {
 266                 if ((vecp = apix_init_vector(tocpu, start + i)) == NULL)
 267                         goto fail;
 268 
 269                 vecp->v_type = (ushort_t)type;
 270                 vecp->v_inum = inum + i;
 271                 vecp->v_flags = flags;
 272 
 273                 if (dip != NULL)
 274                         apix_set_dev_map(vecp, dip, inum + i);
 275 
 276                 if (i == 0)
 277                         startp = vecp;
 278         }
 279 
 280         return (startp);
 281 
 282 fail:
 283         while (i-- > 0) {    /* Free allocated vectors */
 284                 vecp = xv_vector(tocpu, start + i);
 285                 apix_clear_dev_map(dip, inum + i, type);
 286                 apix_cleanup_vector(vecp);
 287         }
 288         return (NULL);
 289 }
 290 
 291 #define APIX_WRITE_MSI_DATA(_hdl, _cap, _ctrl, _v)\
 292 do {\
 293         if ((_ctrl) & PCI_MSI_64BIT_MASK)\
 294                 pci_config_put16((_hdl), (_cap) + PCI_MSI_64BIT_DATA, (_v));\
 295         else\
 296                 pci_config_put16((_hdl), (_cap) + PCI_MSI_32BIT_DATA, (_v));\
 297 _NOTE(CONSTCOND)} while (0)
 298 
 299 static void
 300 apix_pci_msi_enable_vector(apix_vector_t *vecp, dev_info_t *dip, int type,
 301     int inum, int count, uchar_t vector, int target_apic_id)
 302 {
 303         uint64_t                msi_addr, msi_data;
 304         ushort_t                msi_ctrl;
 305         int                     i, cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
 306         ddi_acc_handle_t        handle = i_ddi_get_pci_config_handle(dip);
 307         msi_regs_t              msi_regs;
 308         void                    *intrmap_tbl[PCI_MSI_MAX_INTRS];
 309 
 310         DDI_INTR_IMPLDBG((CE_CONT, "apix_pci_msi_enable_vector: dip=0x%p\n"
 311             "\tdriver = %s, inum=0x%x vector=0x%x apicid=0x%x\n", (void *)dip,
 312             ddi_driver_name(dip), inum, vector, target_apic_id));
 313 
 314         ASSERT((handle != NULL) && (cap_ptr != 0));
 315 
 316         msi_regs.mr_data = vector;
 317         msi_regs.mr_addr = target_apic_id;
 318 
 319         for (i = 0; i < count; i++)
 320                 intrmap_tbl[i] = xv_intrmap_private(vecp->v_cpuid, vector + i);
 321         apic_vt_ops->apic_intrmap_alloc_entry(intrmap_tbl, dip, type,
 322             count, 0xff);
 323         for (i = 0; i < count; i++)
 324                 xv_intrmap_private(vecp->v_cpuid, vector + i) = intrmap_tbl[i];
 325 
 326         apic_vt_ops->apic_intrmap_map_entry(vecp->v_intrmap_private,
 327             (void *)&msi_regs, type, count);
 328         apic_vt_ops->apic_intrmap_record_msi(vecp->v_intrmap_private,
 329             &msi_regs);
 330 
 331         /* MSI Address */
 332         msi_addr = msi_regs.mr_addr;
 333 
 334         /* MSI Data: MSI is edge triggered according to spec */
 335         msi_data = msi_regs.mr_data;
 336 
 337         DDI_INTR_IMPLDBG((CE_CONT, "apix_pci_msi_enable_vector: addr=0x%lx "
 338             "data=0x%lx\n", (long)msi_addr, (long)msi_data));
 339 
 340         if (type == APIX_TYPE_MSI) {
 341                 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
 342 
 343                 /* Set the bits to inform how many MSIs are enabled */
 344                 msi_ctrl |= ((highbit(count) - 1) << PCI_MSI_MME_SHIFT);
 345                 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
 346 
 347                 if ((vecp->v_flags & APIX_VECT_MASKABLE) == 0)
 348                         APIX_WRITE_MSI_DATA(handle, cap_ptr, msi_ctrl,
 349                             APIX_RESV_VECTOR);
 350 
 351                 pci_config_put32(handle,
 352                     cap_ptr + PCI_MSI_ADDR_OFFSET, msi_addr);
 353                 if (msi_ctrl &  PCI_MSI_64BIT_MASK)
 354                         pci_config_put32(handle,
 355                             cap_ptr + PCI_MSI_ADDR_OFFSET + 4, msi_addr >> 32);
 356 
 357                 APIX_WRITE_MSI_DATA(handle, cap_ptr, msi_ctrl, msi_data);
 358         } else if (type == APIX_TYPE_MSIX) {
 359                 uintptr_t       off;
 360                 ddi_intr_msix_t *msix_p = i_ddi_get_msix(dip);
 361 
 362                 /* Offset into the "inum"th entry in the MSI-X table */
 363                 off = (uintptr_t)msix_p->msix_tbl_addr +
 364                     (inum * PCI_MSIX_VECTOR_SIZE);
 365 
 366                 ddi_put32(msix_p->msix_tbl_hdl,
 367                     (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), msi_data);
 368                 ddi_put32(msix_p->msix_tbl_hdl,
 369                     (uint32_t *)(off + PCI_MSIX_LOWER_ADDR_OFFSET), msi_addr);
 370                 ddi_put32(msix_p->msix_tbl_hdl,
 371                     (uint32_t *)(off + PCI_MSIX_UPPER_ADDR_OFFSET),
 372                     msi_addr >> 32);
 373         }
 374 }
 375 
 376 static void
 377 apix_pci_msi_enable_mode(dev_info_t *dip, int type, int inum)
 378 {
 379         ushort_t                msi_ctrl;
 380         int                     cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
 381         ddi_acc_handle_t        handle = i_ddi_get_pci_config_handle(dip);
 382 
 383         ASSERT((handle != NULL) && (cap_ptr != 0));
 384 
 385         if (type == APIX_TYPE_MSI) {
 386                 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
 387                 if ((msi_ctrl & PCI_MSI_ENABLE_BIT))
 388                         return;
 389 
 390                 msi_ctrl |= PCI_MSI_ENABLE_BIT;
 391                 pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
 392 
 393         } else if (type == DDI_INTR_TYPE_MSIX) {
 394                 uintptr_t       off;
 395                 uint32_t        mask;
 396                 ddi_intr_msix_t *msix_p;
 397 
 398                 msix_p = i_ddi_get_msix(dip);
 399 
 400                 /* Offset into "inum"th entry in the MSI-X table & clear mask */
 401                 off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
 402                     PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
 403 
 404                 mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
 405 
 406                 ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask & ~1));
 407 
 408                 msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
 409 
 410                 if (!(msi_ctrl & PCI_MSIX_ENABLE_BIT)) {
 411                         msi_ctrl |= PCI_MSIX_ENABLE_BIT;
 412                         pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
 413                             msi_ctrl);
 414                 }
 415         }
 416 }
 417 
 418 /*
 419  * Setup interrupt, pogramming IO-APIC or MSI/X address/data.
 420  */
 421 void
 422 apix_enable_vector(apix_vector_t *vecp)
 423 {
 424         int tocpu = vecp->v_cpuid, type = vecp->v_type;
 425         apic_cpus_info_t *cpu_infop;
 426         ulong_t iflag;
 427 
 428         ASSERT(tocpu < apic_nproc);
 429 
 430         cpu_infop = &apic_cpus[tocpu];
 431         if (vecp->v_flags & APIX_VECT_USER_BOUND)
 432                 cpu_infop->aci_bound++;
 433         else
 434                 cpu_infop->aci_temp_bound++;
 435 
 436         iflag = intr_clear();
 437         lock_set(&apic_ioapic_lock);
 438 
 439         if (!DDI_INTR_IS_MSI_OR_MSIX(type)) {   /* fixed */
 440                 apix_intx_enable(vecp->v_inum);
 441         } else {
 442                 int inum = vecp->v_inum;
 443                 dev_info_t *dip = APIX_GET_DIP(vecp);
 444                 int count = i_ddi_intr_get_current_nintrs(dip);
 445 
 446                 if (type == APIX_TYPE_MSI) {    /* MSI */
 447                         if (inum == apix_get_max_dev_inum(dip, type)) {
 448                                 /* last one */
 449                                 uchar_t start_inum = inum + 1 - count;
 450                                 uchar_t start_vect = vecp->v_vector + 1 - count;
 451                                 apix_vector_t *start_vecp =
 452                                     xv_vector(vecp->v_cpuid, start_vect);
 453 
 454                                 APIC_VERBOSE(INTR, (CE_CONT, "apix: call "
 455                                     "apix_pci_msi_enable_vector\n"));
 456                                 apix_pci_msi_enable_vector(start_vecp, dip,
 457                                     type, start_inum, count, start_vect,
 458                                     cpu_infop->aci_local_id);
 459 
 460                                 APIC_VERBOSE(INTR, (CE_CONT, "apix: call "
 461                                     "apix_pci_msi_enable_mode\n"));
 462                                 apix_pci_msi_enable_mode(dip, type, inum);
 463                         }
 464                 } else {                                /* MSI-X */
 465                         apix_pci_msi_enable_vector(vecp, dip,
 466                             type, inum, 1, vecp->v_vector,
 467                             cpu_infop->aci_local_id);
 468                         apix_pci_msi_enable_mode(dip, type, inum);
 469                 }
 470         }
 471         vecp->v_state = APIX_STATE_ENABLED;
 472         apic_redist_cpu_skip &= ~(1 << tocpu);
 473 
 474         lock_clear(&apic_ioapic_lock);
 475         intr_restore(iflag);
 476 }
 477 
 478 /*
 479  * Disable the interrupt
 480  */
 481 void
 482 apix_disable_vector(apix_vector_t *vecp)
 483 {
 484         struct autovec *avp = vecp->v_autovect;
 485         ulong_t iflag;
 486 
 487         ASSERT(avp != NULL);
 488 
 489         iflag = intr_clear();
 490         lock_set(&apic_ioapic_lock);
 491 
 492         switch (vecp->v_type) {
 493         case APIX_TYPE_MSI:
 494                 ASSERT(avp->av_vector != NULL && avp->av_dip != NULL);
 495                 /*
 496                  * Disable the MSI vector
 497                  * Make sure we only disable on the last
 498                  * of the multi-MSI support
 499                  */
 500                 if (i_ddi_intr_get_current_nenables(avp->av_dip) == 1) {
 501                         apic_pci_msi_disable_mode(avp->av_dip,
 502                             DDI_INTR_TYPE_MSI);
 503                 }
 504                 break;
 505         case APIX_TYPE_MSIX:
 506                 ASSERT(avp->av_vector != NULL && avp->av_dip != NULL);
 507                 /*
 508                  * Disable the MSI-X vector
 509                  * needs to clear its mask and addr/data for each MSI-X
 510                  */
 511                 apic_pci_msi_unconfigure(avp->av_dip, DDI_INTR_TYPE_MSIX,
 512                     vecp->v_inum);
 513                 /*
 514                  * Make sure we only disable on the last MSI-X
 515                  */
 516                 if (i_ddi_intr_get_current_nenables(avp->av_dip) == 1) {
 517                         apic_pci_msi_disable_mode(avp->av_dip,
 518                             DDI_INTR_TYPE_MSIX);
 519                 }
 520                 break;
 521         default:
 522                 apix_intx_disable(vecp->v_inum);
 523                 break;
 524         }
 525 
 526         if (!(apic_cpus[vecp->v_cpuid].aci_status & APIC_CPU_SUSPEND))
 527                 vecp->v_state = APIX_STATE_DISABLED;
 528         apic_vt_ops->apic_intrmap_free_entry(&vecp->v_intrmap_private);
 529         vecp->v_intrmap_private = NULL;
 530 
 531         lock_clear(&apic_ioapic_lock);
 532         intr_restore(iflag);
 533 }
 534 
 535 /*
 536  * Mark vector as obsoleted or freed. The vector is marked
 537  * obsoleted if there are pending requests on it. Otherwise,
 538  * free the vector. The obsoleted vectors get freed after
 539  * being serviced.
 540  *
 541  * Return 1 on being obosoleted and 0 on being freed.
 542  */
 543 #define INTR_BUSY(_avp)\
 544         ((((volatile ushort_t)(_avp)->av_flags) &\
 545         (AV_PENTRY_PEND | AV_PENTRY_ONPROC)) != 0)
 546 #define LOCAL_WITH_INTR_DISABLED(_cpuid)\
 547         ((_cpuid) == psm_get_cpu_id() && !interrupts_enabled())
 548 static uint64_t dummy_tick;
 549 
 550 int
 551 apix_obsolete_vector(apix_vector_t *vecp)
 552 {
 553         struct autovec *avp = vecp->v_autovect;
 554         int repeats, tries, ipl, busy = 0, cpuid = vecp->v_cpuid;
 555         apix_impl_t *apixp = apixs[cpuid];
 556 
 557         ASSERT(APIX_CPU_LOCK_HELD(cpuid));
 558 
 559         for (avp = vecp->v_autovect; avp != NULL; avp = avp->av_link) {
 560                 if (avp->av_vector == NULL)
 561                         continue;
 562 
 563                 if (LOCAL_WITH_INTR_DISABLED(cpuid)) {
 564                         int bit, index, irr;
 565 
 566                         if (INTR_BUSY(avp)) {
 567                                 busy++;
 568                                 continue;
 569                         }
 570 
 571                         /* check IRR for pending interrupts */
 572                         index = vecp->v_vector / 32;
 573                         bit = vecp->v_vector % 32;
 574                         irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
 575                         if ((irr & (1 << bit)) != 0)
 576                                 busy++;
 577 
 578                         if (!busy)
 579                                 apix_remove_av(vecp, avp);
 580 
 581                         continue;
 582                 }
 583 
 584                 repeats = 0;
 585                 do {
 586                         repeats++;
 587                         for (tries = 0; tries < apic_max_reps_clear_pending;
 588                             tries++)
 589                                 if (!INTR_BUSY(avp))
 590                                         break;
 591                 } while (INTR_BUSY(avp) &&
 592                     (repeats < apic_max_reps_clear_pending));
 593 
 594                 if (INTR_BUSY(avp))
 595                         busy++;
 596                 else {
 597                         /*
 598                          * Interrupt is not in pending list or being serviced.
 599                          * However it might be cached in Local APIC's IRR
 600                          * register. It's impossible to check another CPU's
 601                          * IRR register. Then wait till lower levels finish
 602                          * running.
 603                          */
 604                         for (ipl = 1; ipl < MIN(LOCK_LEVEL, vecp->v_pri); ipl++)
 605                                 apix_wait_till_seen(cpuid, ipl);
 606                         if (INTR_BUSY(avp))
 607                                 busy++;
 608                 }
 609 
 610                 if (!busy)
 611                         apix_remove_av(vecp, avp);
 612         }
 613 
 614         if (busy) {
 615                 apix_vector_t *tp = apixp->x_obsoletes;
 616 
 617                 if (vecp->v_state == APIX_STATE_OBSOLETED)
 618                         return (1);
 619 
 620                 vecp->v_state = APIX_STATE_OBSOLETED;
 621                 vecp->v_next = NULL;
 622                 if (tp == NULL)
 623                         apixp->x_obsoletes = vecp;
 624                 else {
 625                         while (tp->v_next != NULL)
 626                                 tp = tp->v_next;
 627                         tp->v_next = vecp;
 628                 }
 629                 return (1);
 630         }
 631 
 632         /* interrupt is not busy */
 633         if (vecp->v_state == APIX_STATE_OBSOLETED) {
 634                 /* remove from obsoleted list */
 635                 apixp->x_obsoletes = vecp->v_next;
 636                 vecp->v_next = NULL;
 637         }
 638         apix_cleanup_vector(vecp);
 639         return (0);
 640 }
 641 
 642 /*
 643  * Duplicate number of continuous vectors to specified target vectors.
 644  */
 645 static void
 646 apix_dup_vectors(apix_vector_t *oldp, apix_vector_t *newp, int count)
 647 {
 648         struct autovec *avp;
 649         apix_vector_t *fromp, *top;
 650         processorid_t oldcpu = oldp->v_cpuid, newcpu = newp->v_cpuid;
 651         uchar_t oldvec = oldp->v_vector, newvec = newp->v_vector;
 652         int i, inum;
 653 
 654         ASSERT(oldp->v_type != APIX_TYPE_IPI);
 655 
 656         for (i = 0; i < count; i++) {
 657                 fromp = xv_vector(oldcpu, oldvec + i);
 658                 top = xv_vector(newcpu, newvec + i);
 659                 ASSERT(fromp != NULL && top != NULL);
 660 
 661                 /* copy over original one */
 662                 top->v_state = fromp->v_state;
 663                 top->v_type = fromp->v_type;
 664                 top->v_bound_cpuid = fromp->v_bound_cpuid;
 665                 top->v_inum = fromp->v_inum;
 666                 top->v_flags = fromp->v_flags;
 667                 top->v_intrmap_private = fromp->v_intrmap_private;
 668 
 669                 for (avp = fromp->v_autovect; avp != NULL; avp = avp->av_link) {
 670                         if (avp->av_vector == NULL)
 671                                 continue;
 672 
 673                         apix_insert_av(top, avp->av_intr_id, avp->av_vector,
 674                             avp->av_intarg1, avp->av_intarg2, avp->av_ticksp,
 675                             avp->av_prilevel, avp->av_dip);
 676 
 677                         if (fromp->v_type == APIX_TYPE_FIXED &&
 678                             avp->av_dip != NULL) {
 679                                 inum = GET_INTR_INUM(avp->av_intr_id);
 680                                 apix_set_dev_map(top, avp->av_dip, inum);
 681                         }
 682                 }
 683 
 684                 if (DDI_INTR_IS_MSI_OR_MSIX(fromp->v_type) &&
 685                     fromp->v_devp != NULL)
 686                         apix_set_dev_map(top, fromp->v_devp->dv_dip,
 687                             fromp->v_devp->dv_inum);
 688         }
 689 }
 690 
 691 static apix_vector_t *
 692 apix_init_vector(processorid_t cpuid, uchar_t vector)
 693 {
 694         apix_impl_t *apixp = apixs[cpuid];
 695         apix_vector_t *vecp = apixp->x_vectbl[vector];
 696 
 697         ASSERT(IS_VECT_FREE(vecp));
 698 
 699         if (vecp == NULL) {
 700                 vecp = kmem_zalloc(sizeof (apix_vector_t), KM_NOSLEEP);
 701                 if (vecp == NULL) {
 702                         cmn_err(CE_WARN, "apix: no memory to allocate vector");
 703                         return (NULL);
 704                 }
 705                 apixp->x_vectbl[vector] = vecp;
 706         }
 707         vecp->v_state = APIX_STATE_ALLOCED;
 708         vecp->v_cpuid = vecp->v_bound_cpuid = cpuid;
 709         vecp->v_vector = vector;
 710 
 711         return (vecp);
 712 }
 713 
 714 static void
 715 apix_cleanup_vector(apix_vector_t *vecp)
 716 {
 717         ASSERT(vecp->v_share == 0);
 718         vecp->v_bound_cpuid = IRQ_UNINIT;
 719         vecp->v_state = APIX_STATE_FREED;
 720         vecp->v_type = 0;
 721         vecp->v_flags = 0;
 722         vecp->v_busy = 0;
 723         vecp->v_intrmap_private = NULL;
 724 }
 725 
 726 static void
 727 apix_dprint_vector(apix_vector_t *vecp, dev_info_t *dip, int count)
 728 {
 729 #ifdef DEBUG
 730         major_t major;
 731         char *name, *drv_name;
 732         int instance, len, t_len;
 733         char mesg[1024] = "apix: ";
 734 
 735         t_len = sizeof (mesg);
 736         len = strlen(mesg);
 737         if (dip != NULL) {
 738                 name = ddi_get_name(dip);
 739                 major = ddi_name_to_major(name);
 740                 drv_name = ddi_major_to_name(major);
 741                 instance = ddi_get_instance(dip);
 742                 (void) snprintf(mesg + len, t_len - len, "%s (%s) instance %d ",
 743                     name, drv_name, instance);
 744         }
 745         len = strlen(mesg);
 746 
 747         switch (vecp->v_type) {
 748         case APIX_TYPE_FIXED:
 749                 (void) snprintf(mesg + len, t_len - len, "irqno %d",
 750                     vecp->v_inum);
 751                 break;
 752         case APIX_TYPE_MSI:
 753                 (void) snprintf(mesg + len, t_len - len,
 754                     "msi inum %d (count %d)", vecp->v_inum, count);
 755                 break;
 756         case APIX_TYPE_MSIX:
 757                 (void) snprintf(mesg + len, t_len - len, "msi-x inum %d",
 758                     vecp->v_inum);
 759                 break;
 760         default:
 761                 break;
 762 
 763         }
 764 
 765         APIC_VERBOSE(ALLOC, (CE_CONT, "%s allocated with vector 0x%x on "
 766             "cpu %d\n", mesg, vecp->v_vector, vecp->v_cpuid));
 767 #endif  /* DEBUG */
 768 }
 769 
 770 /*
 771  * Operations on avintr
 772  */
 773 
 774 #define INIT_AUTOVEC(p, intr_id, f, arg1, arg2, ticksp, ipl, dip)       \
 775 do { \
 776         (p)->av_intr_id = intr_id;   \
 777         (p)->av_vector = f;          \
 778         (p)->av_intarg1 = arg1;              \
 779         (p)->av_intarg2 = arg2;              \
 780         (p)->av_ticksp = ticksp;     \
 781         (p)->av_prilevel = ipl;              \
 782         (p)->av_dip = dip;           \
 783         (p)->av_flags = 0;           \
 784 _NOTE(CONSTCOND)} while (0)
 785 
 786 /*
 787  * Insert an interrupt service routine into chain by its priority from
 788  * high to low
 789  */
 790 static void
 791 apix_insert_av(apix_vector_t *vecp, void *intr_id, avfunc f, caddr_t arg1,
 792     caddr_t arg2, uint64_t *ticksp, int ipl, dev_info_t *dip)
 793 {
 794         struct autovec *p, *prep, *mem;
 795 
 796         APIC_VERBOSE(INTR, (CE_CONT, "apix_insert_av: dip %p, vector 0x%x, "
 797             "cpu %d\n", (void *)dip, vecp->v_vector, vecp->v_cpuid));
 798 
 799         mem = kmem_zalloc(sizeof (struct autovec), KM_SLEEP);
 800         INIT_AUTOVEC(mem, intr_id, f, arg1, arg2, ticksp, ipl, dip);
 801         if (vecp->v_type == APIX_TYPE_FIXED && apic_level_intr[vecp->v_inum])
 802                 mem->av_flags |= AV_PENTRY_LEVEL;
 803 
 804         vecp->v_share++;
 805         vecp->v_pri = (ipl > vecp->v_pri) ? ipl : vecp->v_pri;



 806         if (vecp->v_autovect == NULL) {      /* Nothing on list - put it at head */
 807                 vecp->v_autovect = mem;
 808                 return;
 809         }
 810 
 811         if (DDI_INTR_IS_MSI_OR_MSIX(vecp->v_type)) { /* MSI/X */
 812                 ASSERT(vecp->v_share == 1);  /* No sharing for MSI/X */
 813 
 814                 INIT_AUTOVEC(vecp->v_autovect, intr_id, f, arg1, arg2, ticksp,
 815                     ipl, dip);
 816                 prep = vecp->v_autovect->av_link;
 817                 vecp->v_autovect->av_link = NULL;
 818 
 819                 /* Free the following autovect chain */
 820                 while (prep != NULL) {
 821                         ASSERT(prep->av_vector == NULL);
 822 
 823                         p = prep;
 824                         prep = prep->av_link;
 825                         kmem_free(p, sizeof (struct autovec));
 826                 }
 827 
 828                 kmem_free(mem, sizeof (struct autovec));
 829                 return;
 830         }
 831 
 832         /* find where it goes in list */
 833         prep = NULL;
 834         for (p = vecp->v_autovect; p != NULL; p = p->av_link) {
 835                 if (p->av_vector && p->av_prilevel <= ipl)
 836                         break;
 837                 prep = p;
 838         }
 839         if (prep != NULL) {
 840                 if (prep->av_vector == NULL) {       /* freed struct available */
 841                         INIT_AUTOVEC(prep, intr_id, f, arg1, arg2,
 842                             ticksp, ipl, dip);
 843                         prep->av_flags = mem->av_flags;
 844                         kmem_free(mem, sizeof (struct autovec));
 845                         return;
 846                 }
 847 
 848                 mem->av_link = prep->av_link;
 849                 prep->av_link = mem;
 850         } else {
 851                 /* insert new intpt at beginning of chain */
 852                 mem->av_link = vecp->v_autovect;
 853                 vecp->v_autovect = mem;
 854         }
 855 }
 856 
 857 /*
 858  * After having made a change to an autovector list, wait until we have
 859  * seen specified cpu not executing an interrupt at that level--so we
 860  * know our change has taken effect completely (no old state in registers,
 861  * etc).
 862  */
 863 #define APIX_CPU_ENABLED(_cp) \
 864         (quiesce_active == 0 && \
 865         (((_cp)->cpu_flags & (CPU_QUIESCED|CPU_OFFLINE)) == 0))
 866 
 867 static void
 868 apix_wait_till_seen(processorid_t cpuid, int ipl)
 869 {
 870         struct cpu *cp = cpu[cpuid];
 871 
 872         if (cp == NULL || LOCAL_WITH_INTR_DISABLED(cpuid))
 873                 return;
 874 
 875         /*
 876          * Don't wait if the CPU is quiesced or offlined. This can happen
 877          * when a CPU is running pause thread but hardware triggered an
 878          * interrupt and the interrupt gets queued.
 879          */
 880         for (;;) {
 881                 if (!INTR_ACTIVE((volatile struct cpu *)cpu[cpuid], ipl) &&
 882                     (!APIX_CPU_ENABLED(cp) ||
 883                     !INTR_PENDING((volatile apix_impl_t *)apixs[cpuid], ipl)))
 884                         return;
 885         }
 886 }
 887 
 888 static void
 889 apix_remove_av(apix_vector_t *vecp, struct autovec *target)
 890 {
 891         int hi_pri = 0;
 892         struct autovec *p;
 893 
 894         if (target == NULL)
 895                 return;
 896 
 897         APIC_VERBOSE(INTR, (CE_CONT, "apix_remove_av: dip %p, vector 0x%x, "
 898             "cpu %d\n", (void *)target->av_dip, vecp->v_vector, vecp->v_cpuid));
 899 
 900         for (p = vecp->v_autovect; p; p = p->av_link) {
 901                 if (p == target || p->av_vector == NULL)
 902                         continue;
 903                 hi_pri = (p->av_prilevel > hi_pri) ? p->av_prilevel : hi_pri;
 904         }
 905 
 906         vecp->v_share--;
 907         vecp->v_pri = hi_pri;
 908 
 909         /*
 910          * This drops the handler from the chain, it can no longer be called.
 911          * However, there is no guarantee that the handler is not currently
 912          * still executing.
 913          */
 914         target->av_vector = NULL;
 915         /*
 916          * There is a race where we could be just about to pick up the ticksp
 917          * pointer to increment it after returning from the service routine
 918          * in av_dispatch_autovect.  Rather than NULL it out let's just point
 919          * it off to something safe so that any final tick update attempt
 920          * won't fault.
 921          */
 922         target->av_ticksp = &dummy_tick;
 923         apix_wait_till_seen(vecp->v_cpuid, target->av_prilevel);
 924 }
 925 
 926 static struct autovec *
 927 apix_find_av(apix_vector_t *vecp, void *intr_id, avfunc f)
 928 {
 929         struct autovec *p;
 930 
 931         for (p = vecp->v_autovect; p; p = p->av_link) {
 932                 if ((p->av_vector == f) && (p->av_intr_id == intr_id)) {
 933                         /* found the handler */
 934                         return (p);
 935                 }
 936         }
 937 
 938         return (NULL);
 939 }
 940 
 941 static apix_vector_t *
 942 apix_find_vector_by_avintr(void *intr_id, avfunc f)
 943 {
 944         apix_vector_t *vecp;
 945         processorid_t n;
 946         uchar_t v;
 947 
 948         for (n = 0; n < apic_nproc; n++) {
 949                 if (!apix_is_cpu_enabled(n))
 950                         continue;
 951 
 952                 for (v = APIX_AVINTR_MIN; v <= APIX_AVINTR_MIN; v++) {
 953                         vecp = xv_vector(n, v);
 954                         if (vecp == NULL ||
 955                             vecp->v_state <= APIX_STATE_OBSOLETED)
 956                                 continue;
 957 
 958                         if (apix_find_av(vecp, intr_id, f) != NULL)
 959                                 return (vecp);
 960                 }
 961         }
 962 
 963         return (NULL);
 964 }
 965 
 966 /*
 967  * Add interrupt service routine.
 968  *
 969  * For legacy interrupts (HPET timer, ACPI SCI), the vector is actually
 970  * IRQ no. A vector is then allocated. Otherwise, the vector is already
 971  * allocated. The input argument virt_vect is virtual vector of format
 972  * APIX_VIRTVEC_VECTOR(cpuid, vector).
 973  *
 974  * Return 1 on success, 0 on failure.
 975  */
 976 int
 977 apix_add_avintr(void *intr_id, int ipl, avfunc xxintr, char *name,
 978     int virt_vect, caddr_t arg1, caddr_t arg2, uint64_t *ticksp,
 979     dev_info_t *dip)
 980 {
 981         int cpuid;
 982         uchar_t v = (uchar_t)APIX_VIRTVEC_VECTOR(virt_vect);
 983         apix_vector_t *vecp;
 984 
 985         if (xxintr == NULL) {
 986                 cmn_err(CE_WARN, "Attempt to add null for %s "
 987                     "on vector 0x%x,0x%x", name,
 988                     APIX_VIRTVEC_CPU(virt_vect),
 989                     APIX_VIRTVEC_VECTOR(virt_vect));
 990                 return (0);
 991         }
 992 
 993         if (v >= APIX_IPI_MIN)       /* IPIs */
 994                 return (apix_add_ipi(ipl, xxintr, name, v, arg1, arg2));
 995 
 996         if (!APIX_IS_VIRTVEC(virt_vect)) {      /* got irq */
 997                 int irqno = virt_vect;
 998                 int inum = GET_INTR_INUM(intr_id);
 999 
1000                 /*
1001                  * Senarios include:
1002                  * a. add_avintr() is called before irqp initialized (legacy)
1003                  * b. irqp is initialized, vector is not allocated (fixed)
1004                  * c. irqp is initialized, vector is allocated (fixed & shared)
1005                  */
1006                 if ((vecp = apix_alloc_intx(dip, inum, irqno)) == NULL)
1007                         return (0);
1008 
1009                 cpuid = vecp->v_cpuid;
1010                 v = vecp->v_vector;
1011                 virt_vect = APIX_VIRTVECTOR(cpuid, v);
1012         } else {        /* got virtual vector */
1013                 cpuid = APIX_VIRTVEC_CPU(virt_vect);
1014                 vecp = xv_vector(cpuid, v);
1015                 ASSERT(vecp != NULL);
1016         }
1017 
1018         lock_set(&apix_lock);
1019         if (vecp->v_state <= APIX_STATE_OBSOLETED) {
1020                 vecp = NULL;
1021 
1022                 /*
1023                  * Basically the allocated but not enabled interrupts
1024                  * will not get re-targeted. But MSIs in allocated state
1025                  * could be re-targeted due to group re-targeting.
1026                  */
1027                 if (intr_id != NULL && dip != NULL) {
1028                         ddi_intr_handle_impl_t *hdlp = intr_id;
1029                         vecp = apix_get_dev_map(dip, hdlp->ih_inum,
1030                             hdlp->ih_type);
1031                         ASSERT(vecp->v_state == APIX_STATE_ALLOCED);
1032                 }
1033                 if (vecp == NULL) {
1034                         lock_clear(&apix_lock);
1035                         cmn_err(CE_WARN, "Invalid interrupt 0x%x,0x%x "
1036                             " for %p to add", cpuid, v, intr_id);
1037                         return (0);
1038                 }
1039                 cpuid = vecp->v_cpuid;
1040                 virt_vect = APIX_VIRTVECTOR(cpuid, vecp->v_vector);
1041         }
1042 
1043         APIX_ENTER_CPU_LOCK(cpuid);
1044         apix_insert_av(vecp, intr_id, xxintr, arg1, arg2, ticksp, ipl, dip);
1045         APIX_LEAVE_CPU_LOCK(cpuid);
1046 
1047         (void) apix_addspl(virt_vect, ipl, 0, 0);
1048 
1049         lock_clear(&apix_lock);
1050 
1051         return (1);
1052 }
1053 
1054 /*
1055  * Remove avintr
1056  *
1057  * For fixed, if it's the last one of shared interrupts, free the vector.
1058  * For msi/x, only disable the interrupt but not free the vector, which
1059  * is freed by PSM_XXX_FREE_XXX.
1060  */
1061 void
1062 apix_rem_avintr(void *intr_id, int ipl, avfunc xxintr, int virt_vect)
1063 {
1064         avfunc f;
1065         apix_vector_t *vecp;
1066         struct autovec *avp;
1067         processorid_t cpuid;
1068 
1069         if ((f = xxintr) == NULL)
1070                 return;
1071 
1072         lock_set(&apix_lock);
1073 
1074         if (!APIX_IS_VIRTVEC(virt_vect)) {      /* got irq */
1075                 vecp = apix_intx_get_vector(virt_vect);
1076                 virt_vect = APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1077         } else  /* got virtual vector */
1078                 vecp = xv_vector(APIX_VIRTVEC_CPU(virt_vect),
1079                     APIX_VIRTVEC_VECTOR(virt_vect));
1080 
1081         if (vecp == NULL) {
1082                 lock_clear(&apix_lock);
1083                 cmn_err(CE_CONT, "Invalid interrupt 0x%x,0x%x to remove",
1084                     APIX_VIRTVEC_CPU(virt_vect),
1085                     APIX_VIRTVEC_VECTOR(virt_vect));
1086                 return;
1087         }
1088 
1089         if (vecp->v_state <= APIX_STATE_OBSOLETED ||
1090             ((avp = apix_find_av(vecp, intr_id, f)) == NULL)) {
1091                 /*
1092                  * It's possible that the interrupt is rebound to a
1093                  * different cpu before rem_avintr() is called. Search
1094                  * through all vectors once it happens.
1095                  */
1096                 if ((vecp = apix_find_vector_by_avintr(intr_id, f))
1097                     == NULL) {
1098                         lock_clear(&apix_lock);
1099                         cmn_err(CE_CONT, "Unknown interrupt 0x%x,0x%x "
1100                             "for %p to remove", APIX_VIRTVEC_CPU(virt_vect),
1101                             APIX_VIRTVEC_VECTOR(virt_vect), intr_id);
1102                         return;
1103                 }
1104                 virt_vect = APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1105                 avp = apix_find_av(vecp, intr_id, f);
1106         }
1107         cpuid = vecp->v_cpuid;
1108 
1109         /* disable interrupt */
1110         (void) apix_delspl(virt_vect, ipl, 0, 0);
1111 
1112         /* remove ISR entry */
1113         APIX_ENTER_CPU_LOCK(cpuid);
1114         apix_remove_av(vecp, avp);
1115         APIX_LEAVE_CPU_LOCK(cpuid);
1116 
1117         lock_clear(&apix_lock);
1118 }
1119 
1120 /*
1121  * Device to vector mapping table
1122  */
1123 
1124 static void
1125 apix_clear_dev_map(dev_info_t *dip, int inum, int type)
1126 {
1127         char *name;
1128         major_t major;
1129         apix_dev_vector_t *dvp, *prev = NULL;
1130         int found = 0;
1131 
1132         name = ddi_get_name(dip);
1133         major = ddi_name_to_major(name);
1134 
1135         mutex_enter(&apix_mutex);
1136 
1137         for (dvp = apix_dev_vector[major]; dvp != NULL;
1138             prev = dvp, dvp = dvp->dv_next) {
1139                 if (dvp->dv_dip == dip && dvp->dv_inum == inum &&
1140                     dvp->dv_type == type) {
1141                         found++;
1142                         break;
1143                 }
1144         }
1145 
1146         if (!found) {
1147                 mutex_exit(&apix_mutex);
1148                 return;
1149         }
1150 
1151         if (prev != NULL)
1152                 prev->dv_next = dvp->dv_next;
1153 
1154         if (apix_dev_vector[major] == dvp)
1155                 apix_dev_vector[major] = dvp->dv_next;
1156 
1157         dvp->dv_vector->v_devp = NULL;
1158 
1159         mutex_exit(&apix_mutex);
1160 
1161         kmem_free(dvp, sizeof (apix_dev_vector_t));
1162 }
1163 
1164 void
1165 apix_set_dev_map(apix_vector_t *vecp, dev_info_t *dip, int inum)
1166 {
1167         apix_dev_vector_t *dvp;
1168         char *name;
1169         major_t major;
1170         uint32_t found = 0;
1171 
1172         ASSERT(dip != NULL);
1173         name = ddi_get_name(dip);
1174         major = ddi_name_to_major(name);
1175 
1176         mutex_enter(&apix_mutex);
1177 
1178         for (dvp = apix_dev_vector[major]; dvp != NULL;
1179             dvp = dvp->dv_next) {
1180                 if (dvp->dv_dip == dip && dvp->dv_inum == inum &&
1181                     dvp->dv_type == vecp->v_type) {
1182                         found++;
1183                         break;
1184                 }
1185         }
1186 
1187         if (found == 0) {       /* not found */
1188                 dvp = kmem_zalloc(sizeof (apix_dev_vector_t), KM_SLEEP);
1189                 dvp->dv_dip = dip;
1190                 dvp->dv_inum = inum;
1191                 dvp->dv_type = vecp->v_type;
1192 
1193                 dvp->dv_next = apix_dev_vector[major];
1194                 apix_dev_vector[major] = dvp;
1195         }
1196         dvp->dv_vector = vecp;
1197         vecp->v_devp = dvp;
1198 
1199         mutex_exit(&apix_mutex);
1200 
1201         DDI_INTR_IMPLDBG((CE_CONT, "apix_set_dev_map: dip=0x%p "
1202             "inum=0x%x  vector=0x%x/0x%x\n",
1203             (void *)dip, inum, vecp->v_cpuid, vecp->v_vector));
1204 }
1205 
1206 apix_vector_t *
1207 apix_get_dev_map(dev_info_t *dip, int inum, int type)
1208 {
1209         char *name;
1210         major_t major;
1211         apix_dev_vector_t *dvp;
1212         apix_vector_t *vecp;
1213 
1214         name = ddi_get_name(dip);
1215         if ((major = ddi_name_to_major(name)) == DDI_MAJOR_T_NONE)
1216                 return (NULL);
1217 
1218         mutex_enter(&apix_mutex);
1219         for (dvp = apix_dev_vector[major]; dvp != NULL;
1220             dvp = dvp->dv_next) {
1221                 if (dvp->dv_dip == dip && dvp->dv_inum == inum &&
1222                     dvp->dv_type == type) {
1223                         vecp = dvp->dv_vector;
1224                         mutex_exit(&apix_mutex);
1225                         return (vecp);
1226                 }
1227         }
1228         mutex_exit(&apix_mutex);
1229 
1230         return (NULL);
1231 }
1232 
1233 /*
1234  * Get minimum inum for specified device, used for MSI
1235  */
1236 int
1237 apix_get_min_dev_inum(dev_info_t *dip, int type)
1238 {
1239         char *name;
1240         major_t major;
1241         apix_dev_vector_t *dvp;
1242         int inum = -1;
1243 
1244         name = ddi_get_name(dip);
1245         major = ddi_name_to_major(name);
1246 
1247         mutex_enter(&apix_mutex);
1248         for (dvp = apix_dev_vector[major]; dvp != NULL;
1249             dvp = dvp->dv_next) {
1250                 if (dvp->dv_dip == dip && dvp->dv_type == type) {
1251                         if (inum == -1)
1252                                 inum = dvp->dv_inum;
1253                         else
1254                                 inum = (dvp->dv_inum < inum) ?
1255                                     dvp->dv_inum : inum;
1256                 }
1257         }
1258         mutex_exit(&apix_mutex);
1259 
1260         return (inum);
1261 }
1262 
1263 int
1264 apix_get_max_dev_inum(dev_info_t *dip, int type)
1265 {
1266         char *name;
1267         major_t major;
1268         apix_dev_vector_t *dvp;
1269         int inum = -1;
1270 
1271         name = ddi_get_name(dip);
1272         major = ddi_name_to_major(name);
1273 
1274         mutex_enter(&apix_mutex);
1275         for (dvp = apix_dev_vector[major]; dvp != NULL;
1276             dvp = dvp->dv_next) {
1277                 if (dvp->dv_dip == dip && dvp->dv_type == type) {
1278                         if (inum == -1)
1279                                 inum = dvp->dv_inum;
1280                         else
1281                                 inum = (dvp->dv_inum > inum) ?
1282                                     dvp->dv_inum : inum;
1283                 }
1284         }
1285         mutex_exit(&apix_mutex);
1286 
1287         return (inum);
1288 }
1289 
1290 /*
1291  * Major to cpu binding, for INTR_ROUND_ROBIN_WITH_AFFINITY cpu
1292  * binding policy
1293  */
1294 
1295 static uint32_t
1296 apix_get_dev_binding(dev_info_t *dip)
1297 {
1298         major_t major;
1299         char *name;
1300         uint32_t cpu = IRQ_UNINIT;
1301 
1302         name = ddi_get_name(dip);
1303         major = ddi_name_to_major(name);
1304         if (major < devcnt) {
1305                 mutex_enter(&apix_mutex);
1306                 cpu = apix_major_to_cpu[major];
1307                 mutex_exit(&apix_mutex);
1308         }
1309 
1310         return (cpu);
1311 }
1312 
1313 static void
1314 apix_set_dev_binding(dev_info_t *dip, uint32_t cpu)
1315 {
1316         major_t major;
1317         char *name;
1318 
1319         /* setup major to cpu mapping */
1320         name = ddi_get_name(dip);
1321         major = ddi_name_to_major(name);
1322         if (apix_major_to_cpu[major] == IRQ_UNINIT) {
1323                 mutex_enter(&apix_mutex);
1324                 apix_major_to_cpu[major] = cpu;
1325                 mutex_exit(&apix_mutex);
1326         }
1327 }
1328 
1329 /*
1330  * return the cpu to which this intr should be bound.
1331  * Check properties or any other mechanism to see if user wants it
1332  * bound to a specific CPU. If so, return the cpu id with high bit set.
1333  * If not, use the policy to choose a cpu and return the id.
1334  */
1335 uint32_t
1336 apix_bind_cpu(dev_info_t *dip)
1337 {
1338         int     instance, instno, prop_len, bind_cpu, count;
1339         uint_t  i, rc;
1340         major_t major;
1341         char    *name, *drv_name, *prop_val, *cptr;
1342         char    prop_name[32];
1343 
1344         lock_set(&apix_lock);
1345 
1346         if (apic_intr_policy == INTR_LOWEST_PRIORITY) {
1347                 cmn_err(CE_WARN, "apix: unsupported interrupt binding policy "
1348                     "LOWEST PRIORITY, use ROUND ROBIN instead");
1349                 apic_intr_policy = INTR_ROUND_ROBIN;
1350         }
1351 
1352         if (apic_nproc == 1) {
1353                 lock_clear(&apix_lock);
1354                 return (0);
1355         }
1356 
1357         drv_name = NULL;
1358         rc = DDI_PROP_NOT_FOUND;
1359         major = (major_t)-1;
1360         if (dip != NULL) {
1361                 name = ddi_get_name(dip);
1362                 major = ddi_name_to_major(name);
1363                 drv_name = ddi_major_to_name(major);
1364                 instance = ddi_get_instance(dip);
1365                 if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
1366                         bind_cpu = apix_get_dev_binding(dip);
1367                         if (bind_cpu != IRQ_UNINIT) {
1368                                 lock_clear(&apix_lock);
1369                                 return (bind_cpu);
1370                         }
1371                 }
1372                 /*
1373                  * search for "drvname"_intpt_bind_cpus property first, the
1374                  * syntax of the property should be "a[,b,c,...]" where
1375                  * instance 0 binds to cpu a, instance 1 binds to cpu b,
1376                  * instance 3 binds to cpu c...
1377                  * ddi_getlongprop() will search /option first, then /
1378                  * if "drvname"_intpt_bind_cpus doesn't exist, then find
1379                  * intpt_bind_cpus property.  The syntax is the same, and
1380                  * it applies to all the devices if its "drvname" specific
1381                  * property doesn't exist
1382                  */
1383                 (void) strcpy(prop_name, drv_name);
1384                 (void) strcat(prop_name, "_intpt_bind_cpus");
1385                 rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, prop_name,
1386                     (caddr_t)&prop_val, &prop_len);
1387                 if (rc != DDI_PROP_SUCCESS) {
1388                         rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0,
1389                             "intpt_bind_cpus", (caddr_t)&prop_val, &prop_len);
1390                 }
1391         }
1392         if (rc == DDI_PROP_SUCCESS) {
1393                 for (i = count = 0; i < (prop_len - 1); i++)
1394                         if (prop_val[i] == ',')
1395                                 count++;
1396                 if (prop_val[i-1] != ',')
1397                         count++;
1398                 /*
1399                  * if somehow the binding instances defined in the
1400                  * property are not enough for this instno., then
1401                  * reuse the pattern for the next instance until
1402                  * it reaches the requested instno
1403                  */
1404                 instno = instance % count;
1405                 i = 0;
1406                 cptr = prop_val;
1407                 while (i < instno)
1408                         if (*cptr++ == ',')
1409                                 i++;
1410                 bind_cpu = stoi(&cptr);
1411                 kmem_free(prop_val, prop_len);
1412                 /* if specific cpu is bogus, then default to cpu 0 */
1413                 if (bind_cpu >= apic_nproc) {
1414                         cmn_err(CE_WARN, "apix: %s=%s: CPU %d not present",
1415                             prop_name, prop_val, bind_cpu);
1416                         bind_cpu = 0;
1417                 } else {
1418                         /* indicate that we are bound at user request */
1419                         bind_cpu |= IRQ_USER_BOUND;
1420                 }
1421                 /*
1422                  * no need to check apic_cpus[].aci_status, if specific cpu is
1423                  * not up, then post_cpu_start will handle it.
1424                  */
1425         } else {
1426                 bind_cpu = apic_get_next_bind_cpu();
1427         }
1428 
1429         lock_clear(&apix_lock);
1430 
1431         return ((uint32_t)bind_cpu);
1432 }
1433 
1434 static boolean_t
1435 apix_is_cpu_enabled(processorid_t cpuid)
1436 {
1437         apic_cpus_info_t *cpu_infop;
1438 
1439         cpu_infop = &apic_cpus[cpuid];
1440 
1441         if ((cpu_infop->aci_status & APIC_CPU_INTR_ENABLE) == 0)
1442                 return (B_FALSE);
1443 
1444         return (B_TRUE);
1445 }
1446 
1447 /*
1448  * Must be called with apix_lock held. This function can be
1449  * called from above lock level by apix_intr_redistribute().
1450  *
1451  * Arguments:
1452  *    vecp  : Vector to be rebound
1453  *    tocpu : Target cpu. IRQ_UNINIT means target is vecp->v_cpuid.
1454  *    count : Number of continuous vectors
1455  *
1456  * Return new vector being bound to
1457  */
1458 apix_vector_t *
1459 apix_rebind(apix_vector_t *vecp, processorid_t newcpu, int count)
1460 {
1461         apix_vector_t *newp, *oldp;
1462         processorid_t oldcpu = vecp->v_cpuid;
1463         uchar_t newvec, oldvec = vecp->v_vector;
1464         int i;
1465 
1466         ASSERT(LOCK_HELD(&apix_lock) && count > 0);
1467 
1468         if (!apix_is_cpu_enabled(newcpu))
1469                 return (NULL);
1470 
1471         if (vecp->v_cpuid == newcpu)         /* rebind to the same cpu */
1472                 return (vecp);
1473 
1474         APIX_ENTER_CPU_LOCK(oldcpu);
1475         APIX_ENTER_CPU_LOCK(newcpu);
1476 
1477         /* allocate vector */
1478         if (count == 1)
1479                 newp = apix_alloc_vector_oncpu(newcpu, NULL, 0, vecp->v_type);
1480         else {
1481                 ASSERT(vecp->v_type == APIX_TYPE_MSI);
1482                 newp = apix_alloc_nvectors_oncpu(newcpu, NULL, 0, count,
1483                     vecp->v_type);
1484         }
1485         if (newp == NULL) {
1486                 APIX_LEAVE_CPU_LOCK(newcpu);
1487                 APIX_LEAVE_CPU_LOCK(oldcpu);
1488                 return (NULL);
1489         }
1490 
1491         newvec = newp->v_vector;
1492         apix_dup_vectors(vecp, newp, count);
1493 
1494         APIX_LEAVE_CPU_LOCK(newcpu);
1495         APIX_LEAVE_CPU_LOCK(oldcpu);
1496 
1497         if (!DDI_INTR_IS_MSI_OR_MSIX(vecp->v_type)) {
1498                 ASSERT(count == 1);
1499                 if (apix_intx_rebind(vecp->v_inum, newcpu, newvec) != 0) {
1500                         struct autovec *avp;
1501                         int inum;
1502 
1503                         /* undo duplication */
1504                         APIX_ENTER_CPU_LOCK(oldcpu);
1505                         APIX_ENTER_CPU_LOCK(newcpu);
1506                         for (avp = newp->v_autovect; avp != NULL;
1507                             avp = avp->av_link) {
1508                                 if (avp->av_dip != NULL) {
1509                                         inum = GET_INTR_INUM(avp->av_intr_id);
1510                                         apix_set_dev_map(vecp, avp->av_dip,
1511                                             inum);
1512                                 }
1513                                 apix_remove_av(newp, avp);
1514                         }
1515                         apix_cleanup_vector(newp);
1516                         APIX_LEAVE_CPU_LOCK(newcpu);
1517                         APIX_LEAVE_CPU_LOCK(oldcpu);
1518                         APIC_VERBOSE(REBIND, (CE_CONT, "apix: rebind fixed "
1519                             "interrupt 0x%x to cpu %d failed\n",
1520                             vecp->v_inum, newcpu));
1521                         return (NULL);
1522                 }
1523 
1524                 APIX_ENTER_CPU_LOCK(oldcpu);
1525                 (void) apix_obsolete_vector(vecp);
1526                 APIX_LEAVE_CPU_LOCK(oldcpu);
1527                 APIC_VERBOSE(REBIND, (CE_CONT, "apix: rebind fixed interrupt"
1528                     " 0x%x/0x%x to 0x%x/0x%x\n",
1529                     oldcpu, oldvec, newcpu, newvec));
1530                 return (newp);
1531         }
1532 
1533         for (i = 0; i < count; i++) {
1534                 oldp = xv_vector(oldcpu, oldvec + i);
1535                 newp = xv_vector(newcpu, newvec + i);
1536 
1537                 if (newp->v_share > 0) {
1538                         APIX_SET_REBIND_INFO(oldp, newp);
1539 
1540                         apix_enable_vector(newp);
1541 
1542                         APIX_CLR_REBIND_INFO();
1543                 }
1544 
1545                 APIX_ENTER_CPU_LOCK(oldcpu);
1546                 (void) apix_obsolete_vector(oldp);
1547                 APIX_LEAVE_CPU_LOCK(oldcpu);
1548         }
1549         APIC_VERBOSE(REBIND, (CE_CONT, "apix: rebind vector 0x%x/0x%x "
1550             "to 0x%x/0x%x, count=%d\n",
1551             oldcpu, oldvec, newcpu, newvec, count));
1552 
1553         return (xv_vector(newcpu, newvec));
1554 }
1555 
1556 /*
1557  * Senarios include:
1558  * a. add_avintr() is called before irqp initialized (legacy)
1559  * b. irqp is initialized, vector is not allocated (fixed interrupts)
1560  * c. irqp is initialized, vector is allocated (shared interrupts)
1561  */
1562 apix_vector_t *
1563 apix_alloc_intx(dev_info_t *dip, int inum, int irqno)
1564 {
1565         apic_irq_t *irqp;
1566         apix_vector_t *vecp;
1567 
1568         /*
1569          * Allocate IRQ. Caller is later responsible for the
1570          * initialization
1571          */
1572         mutex_enter(&airq_mutex);
1573         if ((irqp = apic_irq_table[irqno]) == NULL) {
1574                 /* allocate irq */
1575                 irqp = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
1576                 irqp->airq_mps_intr_index = FREE_INDEX;
1577                 apic_irq_table[irqno] = irqp;
1578         }
1579         if (irqp->airq_mps_intr_index == FREE_INDEX) {
1580                 irqp->airq_mps_intr_index = DEFAULT_INDEX;
1581                 irqp->airq_cpu = IRQ_UNINIT;
1582                 irqp->airq_origirq = (uchar_t)irqno;
1583         }
1584 
1585         mutex_exit(&airq_mutex);
1586 
1587         /*
1588          * allocate vector
1589          */
1590         if (irqp->airq_cpu == IRQ_UNINIT) {
1591                 uint32_t bindcpu, cpuid;
1592 
1593                 /* select cpu by system policy */
1594                 bindcpu = apix_bind_cpu(dip);
1595                 cpuid = bindcpu & ~IRQ_USER_BOUND;
1596 
1597                 /* allocate vector */
1598                 APIX_ENTER_CPU_LOCK(cpuid);
1599 
1600                 if ((vecp = apix_alloc_vector_oncpu(bindcpu, dip, inum,
1601                     APIX_TYPE_FIXED)) == NULL) {
1602                         cmn_err(CE_WARN, "No interrupt vector for irq %x",
1603                             irqno);
1604                         APIX_LEAVE_CPU_LOCK(cpuid);
1605                         return (NULL);
1606                 }
1607                 vecp->v_inum = irqno;
1608                 vecp->v_flags |= APIX_VECT_MASKABLE;
1609 
1610                 apix_intx_set_vector(irqno, vecp->v_cpuid, vecp->v_vector);
1611 
1612                 APIX_LEAVE_CPU_LOCK(cpuid);
1613         } else {
1614                 vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1615                 ASSERT(!IS_VECT_FREE(vecp));
1616 
1617                 if (dip != NULL)
1618                         apix_set_dev_map(vecp, dip, inum);
1619         }
1620 
1621         if ((dip != NULL) &&
1622             (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) &&
1623             ((vecp->v_flags & APIX_VECT_USER_BOUND) == 0))
1624                 apix_set_dev_binding(dip, vecp->v_cpuid);
1625 
1626         apix_dprint_vector(vecp, dip, 1);
1627 
1628         return (vecp);
1629 }
1630 
1631 int
1632 apix_alloc_msi(dev_info_t *dip, int inum, int count, int behavior)
1633 {
1634         int i, cap_ptr, rcount = count;
1635         apix_vector_t *vecp;
1636         processorid_t bindcpu, cpuid;
1637         ushort_t msi_ctrl;
1638         ddi_acc_handle_t handle;
1639 
1640         DDI_INTR_IMPLDBG((CE_CONT, "apix_alloc_msi_vectors: dip=0x%p "
1641             "inum=0x%x  count=0x%x behavior=%d\n",
1642             (void *)dip, inum, count, behavior));
1643 
1644         if (count > 1) {
1645                 if (behavior == DDI_INTR_ALLOC_STRICT &&
1646                     apic_multi_msi_enable == 0)
1647                         return (0);
1648                 if (apic_multi_msi_enable == 0)
1649                         count = 1;
1650         }
1651 
1652         /* Check whether it supports per-vector masking */
1653         cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1654         handle = i_ddi_get_pci_config_handle(dip);
1655         msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1656 
1657         /* bind to cpu */
1658         bindcpu = apix_bind_cpu(dip);
1659         cpuid = bindcpu & ~IRQ_USER_BOUND;
1660 
1661         /* if not ISP2, then round it down */
1662         if (!ISP2(rcount))
1663                 rcount = 1 << (highbit(rcount) - 1);
1664 
1665         APIX_ENTER_CPU_LOCK(cpuid);
1666         for (vecp = NULL; rcount > 0; rcount >>= 1) {
1667                 vecp = apix_alloc_nvectors_oncpu(bindcpu, dip, inum, rcount,
1668                     APIX_TYPE_MSI);
1669                 if (vecp != NULL || behavior == DDI_INTR_ALLOC_STRICT)
1670                         break;
1671         }
1672         for (i = 0; vecp && i < rcount; i++)
1673                 xv_vector(vecp->v_cpuid, vecp->v_vector + i)->v_flags |=
1674                     (msi_ctrl & PCI_MSI_PVM_MASK) ? APIX_VECT_MASKABLE : 0;
1675         APIX_LEAVE_CPU_LOCK(cpuid);
1676         if (vecp == NULL) {
1677                 APIC_VERBOSE(INTR, (CE_CONT,
1678                     "apix_alloc_msi: no %d cont vectors found on cpu 0x%x\n",
1679                     count, bindcpu));
1680                 return (0);
1681         }
1682 
1683         /* major to cpu binding */
1684         if ((apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) &&
1685             ((vecp->v_flags & APIX_VECT_USER_BOUND) == 0))
1686                 apix_set_dev_binding(dip, vecp->v_cpuid);
1687 
1688         apix_dprint_vector(vecp, dip, rcount);
1689 
1690         return (rcount);
1691 }
1692 
1693 int
1694 apix_alloc_msix(dev_info_t *dip, int inum, int count, int behavior)
1695 {
1696         apix_vector_t *vecp;
1697         processorid_t bindcpu, cpuid;
1698         int i;
1699 
1700         for (i = 0; i < count; i++) {
1701                 /* select cpu by system policy */
1702                 bindcpu = apix_bind_cpu(dip);
1703                 cpuid = bindcpu & ~IRQ_USER_BOUND;
1704 
1705                 /* allocate vector */
1706                 APIX_ENTER_CPU_LOCK(cpuid);
1707                 if ((vecp = apix_alloc_vector_oncpu(bindcpu, dip, inum + i,
1708                     APIX_TYPE_MSIX)) == NULL) {
1709                         APIX_LEAVE_CPU_LOCK(cpuid);
1710                         APIC_VERBOSE(INTR, (CE_CONT, "apix_alloc_msix: "
1711                             "allocate msix for device dip=%p, inum=%d on"
1712                             " cpu %d failed", (void *)dip, inum + i, bindcpu));
1713                         break;
1714                 }
1715                 vecp->v_flags |= APIX_VECT_MASKABLE;
1716                 APIX_LEAVE_CPU_LOCK(cpuid);
1717 
1718                 /* major to cpu mapping */
1719                 if ((i == 0) &&
1720                     (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) &&
1721                     ((vecp->v_flags & APIX_VECT_USER_BOUND) == 0))
1722                         apix_set_dev_binding(dip, vecp->v_cpuid);
1723 
1724                 apix_dprint_vector(vecp, dip, 1);
1725         }
1726 
1727         if (i < count && behavior == DDI_INTR_ALLOC_STRICT) {
1728                 APIC_VERBOSE(INTR, (CE_WARN, "apix_alloc_msix: "
1729                     "strictly allocate %d vectors failed, got %d\n",
1730                     count, i));
1731                 apix_free_vectors(dip, inum, i, APIX_TYPE_MSIX);
1732                 i = 0;
1733         }
1734 
1735         return (i);
1736 }
1737 
1738 /*
1739  * A rollback free for vectors allocated by apix_alloc_xxx().
1740  */
1741 void
1742 apix_free_vectors(dev_info_t *dip, int inum, int count, int type)
1743 {
1744         int i, cpuid;
1745         apix_vector_t *vecp;
1746 
1747         DDI_INTR_IMPLDBG((CE_CONT, "apix_free_vectors: dip: %p inum: %x "
1748             "count: %x type: %x\n",
1749             (void *)dip, inum, count, type));
1750 
1751         lock_set(&apix_lock);
1752 
1753         for (i = 0; i < count; i++, inum++) {
1754                 if ((vecp = apix_get_dev_map(dip, inum, type)) == NULL) {
1755                         lock_clear(&apix_lock);
1756                         DDI_INTR_IMPLDBG((CE_CONT, "apix_free_vectors: "
1757                             "dip=0x%p inum=0x%x type=0x%x apix_find_intr() "
1758                             "failed\n", (void *)dip, inum, type));
1759                         continue;
1760                 }
1761 
1762                 APIX_ENTER_CPU_LOCK(vecp->v_cpuid);
1763                 cpuid = vecp->v_cpuid;
1764 
1765                 DDI_INTR_IMPLDBG((CE_CONT, "apix_free_vectors: "
1766                     "dip=0x%p inum=0x%x type=0x%x vector 0x%x (share %d)\n",
1767                     (void *)dip, inum, type, vecp->v_vector, vecp->v_share));
1768 
1769                 /* tear down device interrupt to vector mapping */
1770                 apix_clear_dev_map(dip, inum, type);
1771 
1772                 if (vecp->v_type == APIX_TYPE_FIXED) {
1773                         if (vecp->v_share > 0) {  /* share IRQ line */
1774                                 APIX_LEAVE_CPU_LOCK(cpuid);
1775                                 continue;
1776                         }
1777 
1778                         /* Free apic_irq_table entry */
1779                         apix_intx_free(vecp->v_inum);
1780                 }
1781 
1782                 /* free vector */
1783                 apix_cleanup_vector(vecp);
1784 
1785                 APIX_LEAVE_CPU_LOCK(cpuid);
1786         }
1787 
1788         lock_clear(&apix_lock);
1789 }
1790 
1791 /*
1792  * Must be called with apix_lock held
1793  */
1794 apix_vector_t *
1795 apix_setup_io_intr(apix_vector_t *vecp)
1796 {
1797         processorid_t bindcpu;
1798         int ret;
1799 
1800         ASSERT(LOCK_HELD(&apix_lock));
1801 
1802         /*
1803          * Interrupts are enabled on the CPU, programme IOAPIC RDT
1804          * entry or MSI/X address/data to enable the interrupt.
1805          */
1806         if (apix_is_cpu_enabled(vecp->v_cpuid)) {
1807                 apix_enable_vector(vecp);
1808                 return (vecp);
1809         }
1810 
1811         /*
1812          * CPU is not up or interrupts are disabled. Fall back to the
1813          * first avialable CPU.
1814          */
1815         bindcpu = apic_find_cpu(APIC_CPU_INTR_ENABLE);
1816 
1817         if (vecp->v_type == APIX_TYPE_MSI)
1818                 return (apix_grp_set_cpu(vecp, bindcpu, &ret));
1819 
1820         return (apix_set_cpu(vecp, bindcpu, &ret));
1821 }
1822 
1823 /*
1824  * For interrupts which call add_avintr() before apic is initialized.
1825  * ioapix_setup_intr() will
1826  *   - allocate vector
1827  *   - copy over ISR
1828  */
1829 static void
1830 ioapix_setup_intr(int irqno, iflag_t *flagp)
1831 {
1832         extern struct av_head autovect[];
1833         apix_vector_t *vecp;
1834         apic_irq_t *irqp;
1835         uchar_t ioapicindex, ipin;
1836         ulong_t iflag;
1837         struct autovec *avp;
1838 
1839         ioapicindex = acpi_find_ioapic(irqno);
1840         ASSERT(ioapicindex != 0xFF);
1841         ipin = irqno - apic_io_vectbase[ioapicindex];
1842 
1843         mutex_enter(&airq_mutex);
1844         irqp = apic_irq_table[irqno];
1845 
1846         /*
1847          * The irq table entry shouldn't exist unless the interrupts are shared.
1848          * In that case, make sure it matches what we would initialize it to.
1849          */
1850         if (irqp != NULL) {
1851                 ASSERT(irqp->airq_mps_intr_index == ACPI_INDEX);
1852                 ASSERT(irqp->airq_intin_no == ipin &&
1853                     irqp->airq_ioapicindex == ioapicindex);
1854                 vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1855                 ASSERT(!IS_VECT_FREE(vecp));
1856                 mutex_exit(&airq_mutex);
1857         } else {
1858                 irqp = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
1859 
1860                 irqp->airq_cpu = IRQ_UNINIT;
1861                 irqp->airq_origirq = (uchar_t)irqno;
1862                 irqp->airq_mps_intr_index = ACPI_INDEX;
1863                 irqp->airq_ioapicindex = ioapicindex;
1864                 irqp->airq_intin_no = ipin;
1865                 irqp->airq_iflag = *flagp;
1866                 irqp->airq_share++;
1867 
1868                 apic_irq_table[irqno] = irqp;
1869                 mutex_exit(&airq_mutex);
1870 
1871                 vecp = apix_alloc_intx(NULL, 0, irqno);
1872         }
1873 
1874         /* copy over autovect */
1875         for (avp = autovect[irqno].avh_link; avp; avp = avp->av_link)
1876                 apix_insert_av(vecp, avp->av_intr_id, avp->av_vector,
1877                     avp->av_intarg1, avp->av_intarg2, avp->av_ticksp,
1878                     avp->av_prilevel, avp->av_dip);
1879 
1880         /* Program I/O APIC */
1881         iflag = intr_clear();
1882         lock_set(&apix_lock);
1883 
1884         (void) apix_setup_io_intr(vecp);
1885 
1886         lock_clear(&apix_lock);
1887         intr_restore(iflag);
1888 
1889         APIC_VERBOSE_IOAPIC((CE_CONT, "apix: setup ioapic, irqno %x "
1890             "(ioapic %x, ipin %x) is bound to cpu %x, vector %x\n",
1891             irqno, ioapicindex, ipin, irqp->airq_cpu, irqp->airq_vector));
1892 }
1893 
1894 void
1895 ioapix_init_intr(int mask_apic)
1896 {
1897         int ioapicindex;
1898         int i, j;
1899 
1900         /* mask interrupt vectors */
1901         for (j = 0; j < apic_io_max && mask_apic; j++) {
1902                 int intin_max;
1903 
1904                 ioapicindex = j;
1905                 /* Bits 23-16 define the maximum redirection entries */
1906                 intin_max = (ioapic_read(ioapicindex, APIC_VERS_CMD) >> 16)
1907                     & 0xff;
1908                 for (i = 0; i <= intin_max; i++)
1909                         ioapic_write(ioapicindex, APIC_RDT_CMD + 2 * i,
1910                             AV_MASK);
1911         }
1912 
1913         /*
1914          * Hack alert: deal with ACPI SCI interrupt chicken/egg here
1915          */
1916         if (apic_sci_vect > 0)
1917                 ioapix_setup_intr(apic_sci_vect, &apic_sci_flags);
1918 
1919         /*
1920          * Hack alert: deal with ACPI HPET interrupt chicken/egg here.
1921          */
1922         if (apic_hpet_vect > 0)
1923                 ioapix_setup_intr(apic_hpet_vect, &apic_hpet_flags);
1924 }
--- EOF ---