1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2018 Western Digital Corporation.  All rights reserved.
  25  * Copyright 2018 Joyent, Inc.
  26  */
  27 
  28 #include <sys/cpuvar.h>
  29 #include <sys/cpu_event.h>
  30 #include <sys/param.h>
  31 #include <sys/cmn_err.h>
  32 #include <sys/t_lock.h>
  33 #include <sys/kmem.h>
  34 #include <sys/machlock.h>
  35 #include <sys/systm.h>
  36 #include <sys/archsystm.h>
  37 #include <sys/atomic.h>
  38 #include <sys/sdt.h>
  39 #include <sys/processor.h>
  40 #include <sys/time.h>
  41 #include <sys/psm.h>
  42 #include <sys/smp_impldefs.h>
  43 #include <sys/cram.h>
  44 #include <sys/apic.h>
  45 #include <sys/pit.h>
  46 #include <sys/ddi.h>
  47 #include <sys/sunddi.h>
  48 #include <sys/ddi_impldefs.h>
  49 #include <sys/pci.h>
  50 #include <sys/promif.h>
  51 #include <sys/x86_archext.h>
  52 #include <sys/cpc_impl.h>
  53 #include <sys/uadmin.h>
  54 #include <sys/panic.h>
  55 #include <sys/debug.h>
  56 #include <sys/trap.h>
  57 #include <sys/machsystm.h>
  58 #include <sys/sysmacros.h>
  59 #include <sys/rm_platter.h>
  60 #include <sys/privregs.h>
  61 #include <sys/note.h>
  62 #include <sys/pci_intr_lib.h>
  63 #include <sys/spl.h>
  64 #include <sys/clock.h>
  65 #include <sys/dditypes.h>
  66 #include <sys/sunddi.h>
  67 #include <sys/x_call.h>
  68 #include <sys/reboot.h>
  69 #include <vm/hat_i86.h>
  70 #include <sys/stack.h>
  71 #include <sys/apix.h>
  72 #include <sys/ht.h>
  73 
  74 static void apix_post_hardint(int);
  75 
  76 /*
  77  * Insert an vector into the tail of the interrupt pending list
  78  */
  79 static __inline__ void
  80 apix_insert_pending_av(apix_impl_t *apixp, struct autovec *avp, int ipl)
  81 {
  82         struct autovec **head = apixp->x_intr_head;
  83         struct autovec **tail = apixp->x_intr_tail;
  84 
  85         avp->av_ipl_link = NULL;
  86         if (tail[ipl] == NULL) {
  87                 head[ipl] = tail[ipl] = avp;
  88                 return;
  89         }
  90 
  91         tail[ipl]->av_ipl_link = avp;
  92         tail[ipl] = avp;
  93 }
  94 
  95 /*
  96  * Remove and return an vector from the head of hardware interrupt
  97  * pending list.
  98  */
  99 static __inline__ struct autovec *
 100 apix_remove_pending_av(apix_impl_t *apixp, int ipl)
 101 {
 102         struct cpu *cpu = CPU;
 103         struct autovec **head = apixp->x_intr_head;
 104         struct autovec **tail = apixp->x_intr_tail;
 105         struct autovec *avp = head[ipl];
 106 
 107         if (avp == NULL)
 108                 return (NULL);
 109 
 110         if (avp->av_vector != NULL && avp->av_prilevel < cpu->cpu_base_spl) {
 111                 /*
 112                  * If there is blocked higher level interrupts, return
 113                  * NULL to quit handling of current IPL level.
 114                  */
 115                 apixp->x_intr_pending |= (1 << avp->av_prilevel);
 116                 return (NULL);
 117         }
 118 
 119         avp->av_flags &= ~AV_PENTRY_PEND;
 120         avp->av_flags |= AV_PENTRY_ONPROC;
 121         head[ipl] = avp->av_ipl_link;
 122         avp->av_ipl_link = NULL;
 123 
 124         if (head[ipl] == NULL)
 125                 tail[ipl] = NULL;
 126 
 127         return (avp);
 128 }
 129 
 130 /*
 131  * add_pending_hardint:
 132  *
 133  * Add hardware interrupts to the interrupt pending list.
 134  */
 135 static void
 136 apix_add_pending_hardint(int vector)
 137 {
 138         uint32_t cpuid = psm_get_cpu_id();
 139         apix_impl_t *apixp = apixs[cpuid];
 140         apix_vector_t *vecp = apixp->x_vectbl[vector];
 141         struct autovec *p, *prevp = NULL;
 142         int ipl;
 143 
 144         /*
 145          * The MSI interrupt not supporting per-vector masking could
 146          * be triggered on a false vector as a result of rebinding
 147          * operation cannot programme MSI address & data atomically.
 148          * Add ISR of this interrupt to the pending list for such
 149          * suspicious interrupt.
 150          */
 151         APIX_DO_FAKE_INTR(cpuid, vector);
 152         if (vecp == NULL)
 153                 return;
 154 
 155         for (p = vecp->v_autovect; p != NULL; p = p->av_link) {
 156                 if (p->av_vector == NULL)
 157                         continue;       /* skip freed entry */
 158 
 159                 ipl = p->av_prilevel;
 160                 prevp = p;
 161 
 162                 /* set pending at specified priority level */
 163                 apixp->x_intr_pending |= (1 << ipl);
 164 
 165                 if (p->av_flags & AV_PENTRY_PEND)
 166                         continue;       /* already in the pending list */
 167                 p->av_flags |= AV_PENTRY_PEND;
 168 
 169                 /* insert into pending list by it original IPL */
 170                 apix_insert_pending_av(apixp, p, ipl);
 171         }
 172 
 173         /* last one of the linked list */
 174         if (prevp && ((prevp->av_flags & AV_PENTRY_LEVEL) != 0))
 175                 prevp->av_flags |= (vector & AV_PENTRY_VECTMASK);
 176 }
 177 
 178 /*
 179  * Walk pending hardware interrupts at given priority level, invoking
 180  * each interrupt handler as we go.
 181  */
 182 extern uint64_t intr_get_time(void);
 183 
 184 static void
 185 apix_dispatch_pending_autovect(uint_t ipl)
 186 {
 187         uint32_t cpuid = psm_get_cpu_id();
 188         apix_impl_t *apixp = apixs[cpuid];
 189         struct autovec *av;
 190 
 191         while ((av = apix_remove_pending_av(apixp, ipl)) != NULL) {
 192                 uint_t r;
 193                 uint_t (*intr)() = av->av_vector;
 194                 caddr_t arg1 = av->av_intarg1;
 195                 caddr_t arg2 = av->av_intarg2;
 196                 dev_info_t *dip = av->av_dip;
 197                 uchar_t vector = av->av_flags & AV_PENTRY_VECTMASK;
 198 
 199                 if (intr == NULL)
 200                         continue;
 201 
 202                 /* Don't enable interrupts during x-calls */
 203                 if (ipl != XC_HI_PIL)
 204                         sti();
 205 
 206                 DTRACE_PROBE4(interrupt__start, dev_info_t *, dip,
 207                     void *, intr, caddr_t, arg1, caddr_t, arg2);
 208                 r = (*intr)(arg1, arg2);
 209                 DTRACE_PROBE4(interrupt__complete, dev_info_t *, dip,
 210                     void *, intr, caddr_t, arg1, uint_t, r);
 211 
 212                 if (av->av_ticksp && av->av_prilevel <= LOCK_LEVEL)
 213                         atomic_add_64(av->av_ticksp, intr_get_time());
 214 
 215                 cli();
 216 
 217                 if (vector) {
 218                         if ((av->av_flags & AV_PENTRY_PEND) == 0)
 219                                 av->av_flags &= ~AV_PENTRY_VECTMASK;
 220 
 221                         apix_post_hardint(vector);
 222                 }
 223 
 224                 /* mark it as idle */
 225                 av->av_flags &= ~AV_PENTRY_ONPROC;
 226         }
 227 }
 228 
 229 static caddr_t
 230 apix_do_softint_prolog(struct cpu *cpu, uint_t pil, uint_t oldpil,
 231     caddr_t stackptr)
 232 {
 233         kthread_t *t, *volatile it;
 234         struct machcpu *mcpu = &cpu->cpu_m;
 235         hrtime_t now;
 236 
 237         UNREFERENCED_1PARAMETER(oldpil);
 238         ASSERT(pil > mcpu->mcpu_pri && pil > cpu->cpu_base_spl);
 239 
 240         atomic_and_32((uint32_t *)&mcpu->mcpu_softinfo.st_pending, ~(1 << pil));
 241 
 242         mcpu->mcpu_pri = pil;
 243 
 244         now = tsc_read();
 245 
 246         /*
 247          * Get set to run interrupt thread.
 248          * There should always be an interrupt thread since we
 249          * allocate one for each level on the CPU.
 250          */
 251         it = cpu->cpu_intr_thread;
 252         ASSERT(it != NULL);
 253         cpu->cpu_intr_thread = it->t_link;
 254 
 255         /* t_intr_start could be zero due to cpu_intr_swtch_enter. */
 256         t = cpu->cpu_thread;
 257         if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) {
 258                 hrtime_t intrtime = now - t->t_intr_start;
 259                 mcpu->intrstat[pil][0] += intrtime;
 260                 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
 261                 t->t_intr_start = 0;
 262         }
 263 
 264         /*
 265          * Note that the code in kcpc_overflow_intr -relies- on the
 266          * ordering of events here - in particular that t->t_lwp of
 267          * the interrupt thread is set to the pinned thread *before*
 268          * curthread is changed.
 269          */
 270         it->t_lwp = t->t_lwp;
 271         it->t_state = TS_ONPROC;
 272 
 273         /*
 274          * Push interrupted thread onto list from new thread.
 275          * Set the new thread as the current one.
 276          * Set interrupted thread's T_SP because if it is the idle thread,
 277          * resume() may use that stack between threads.
 278          */
 279 
 280         ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
 281         t->t_sp = (uintptr_t)stackptr;
 282 
 283         it->t_intr = t;
 284         cpu->cpu_thread = it;
 285         ht_begin_intr(pil);
 286 
 287         /*
 288          * Set bit for this pil in CPU's interrupt active bitmask.
 289          */
 290         ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
 291         cpu->cpu_intr_actv |= (1 << pil);
 292 
 293         /*
 294          * Initialize thread priority level from intr_pri
 295          */
 296         it->t_pil = (uchar_t)pil;
 297         it->t_pri = (pri_t)pil + intr_pri;
 298         it->t_intr_start = now;
 299 
 300         return (it->t_stk);
 301 }
 302 
 303 static void
 304 apix_do_softint_epilog(struct cpu *cpu, uint_t oldpil)
 305 {
 306         struct machcpu *mcpu = &cpu->cpu_m;
 307         kthread_t *t, *it;
 308         uint_t pil, basespl;
 309         hrtime_t intrtime;
 310         hrtime_t now = tsc_read();
 311 
 312         it = cpu->cpu_thread;
 313         pil = it->t_pil;
 314 
 315         cpu->cpu_stats.sys.intr[pil - 1]++;
 316 
 317         ASSERT(cpu->cpu_intr_actv & (1 << pil));
 318         cpu->cpu_intr_actv &= ~(1 << pil);
 319 
 320         intrtime = now - it->t_intr_start;
 321         mcpu->intrstat[pil][0] += intrtime;
 322         cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
 323 
 324         /*
 325          * If there is still an interrupted thread underneath this one
 326          * then the interrupt was never blocked and the return is
 327          * fairly simple.  Otherwise it isn't.
 328          */
 329         if ((t = it->t_intr) == NULL) {
 330                 /*
 331                  * Put thread back on the interrupt thread list.
 332                  * This was an interrupt thread, so set CPU's base SPL.
 333                  */
 334                 set_base_spl();
 335                 /* mcpu->mcpu_pri = cpu->cpu_base_spl; */
 336 
 337                 /*
 338                  * If there are pending interrupts, send a softint to
 339                  * re-enter apix_do_interrupt() and get them processed.
 340                  */
 341                 if (apixs[cpu->cpu_id]->x_intr_pending)
 342                         siron();
 343 
 344                 it->t_state = TS_FREE;
 345                 it->t_link = cpu->cpu_intr_thread;
 346                 cpu->cpu_intr_thread = it;
 347                 (void) splhigh();
 348                 sti();
 349                 swtch();
 350                 /*NOTREACHED*/
 351                 panic("dosoftint_epilog: swtch returned");
 352         }
 353         it->t_link = cpu->cpu_intr_thread;
 354         cpu->cpu_intr_thread = it;
 355         it->t_state = TS_FREE;
 356         ht_end_intr();
 357         cpu->cpu_thread = t;
 358 
 359         if (t->t_flag & T_INTR_THREAD)
 360                 t->t_intr_start = now;
 361         basespl = cpu->cpu_base_spl;
 362         pil = MAX(oldpil, basespl);
 363         mcpu->mcpu_pri = pil;
 364 }
 365 
 366 /*
 367  * Dispatch a soft interrupt
 368  */
 369 static void
 370 apix_dispatch_softint(uint_t oldpil, uint_t arg2)
 371 {
 372         struct cpu *cpu = CPU;
 373 
 374         UNREFERENCED_1PARAMETER(arg2);
 375 
 376         sti();
 377         av_dispatch_softvect((int)cpu->cpu_thread->t_pil);
 378         cli();
 379 
 380         /*
 381          * Must run softint_epilog() on the interrupt thread stack, since
 382          * there may not be a return from it if the interrupt thread blocked.
 383          */
 384         apix_do_softint_epilog(cpu, oldpil);
 385 }
 386 
 387 /*
 388  * Deliver any softints the current interrupt priority allows.
 389  * Called with interrupts disabled.
 390  */
 391 int
 392 apix_do_softint(struct regs *regs)
 393 {
 394         struct cpu *cpu = CPU;
 395         int oldipl;
 396         int newipl;
 397         volatile uint16_t pending;
 398         caddr_t newsp;
 399 
 400         while ((pending = cpu->cpu_softinfo.st_pending) != 0) {
 401                 newipl = bsrw_insn(pending);
 402                 oldipl = cpu->cpu_pri;
 403                 if (newipl <= oldipl || newipl <= cpu->cpu_base_spl)
 404                         return (-1);
 405 
 406                 newsp = apix_do_softint_prolog(cpu, newipl, oldipl,
 407                     (caddr_t)regs);
 408                 ASSERT(newsp != NULL);
 409                 switch_sp_and_call(newsp, apix_dispatch_softint, oldipl, 0);
 410         }
 411 
 412         return (0);
 413 }
 414 
 415 static int
 416 apix_hilevel_intr_prolog(struct cpu *cpu, uint_t pil, uint_t oldpil,
 417     struct regs *rp)
 418 {
 419         struct machcpu *mcpu = &cpu->cpu_m;
 420         hrtime_t intrtime;
 421         hrtime_t now = tsc_read();
 422         apix_impl_t *apixp = apixs[cpu->cpu_id];
 423         uint_t mask;
 424 
 425         ASSERT(pil > mcpu->mcpu_pri && pil > cpu->cpu_base_spl);
 426 
 427         if (pil == CBE_HIGH_PIL) {      /* 14 */
 428                 cpu->cpu_profile_pil = oldpil;
 429                 if (USERMODE(rp->r_cs)) {
 430                         cpu->cpu_profile_pc = 0;
 431                         cpu->cpu_profile_upc = rp->r_pc;
 432                         cpu->cpu_cpcprofile_pc = 0;
 433                         cpu->cpu_cpcprofile_upc = rp->r_pc;
 434                 } else {
 435                         cpu->cpu_profile_pc = rp->r_pc;
 436                         cpu->cpu_profile_upc = 0;
 437                         cpu->cpu_cpcprofile_pc = rp->r_pc;
 438                         cpu->cpu_cpcprofile_upc = 0;
 439                 }
 440         }
 441 
 442         mcpu->mcpu_pri = pil;
 443 
 444         mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK;
 445         if (mask != 0) {
 446                 int nestpil;
 447 
 448                 /*
 449                  * We have interrupted another high-level interrupt.
 450                  * Load starting timestamp, compute interval, update
 451                  * cumulative counter.
 452                  */
 453                 nestpil = bsrw_insn((uint16_t)mask);
 454                 intrtime = now -
 455                     mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)];
 456                 mcpu->intrstat[nestpil][0] += intrtime;
 457                 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
 458         } else {
 459                 kthread_t *t = cpu->cpu_thread;
 460 
 461                 /*
 462                  * See if we are interrupting a low-level interrupt thread.
 463                  * If so, account for its time slice only if its time stamp
 464                  * is non-zero.
 465                  */
 466                 if ((t->t_flag & T_INTR_THREAD) != 0 && t->t_intr_start != 0) {
 467                         intrtime = now - t->t_intr_start;
 468                         mcpu->intrstat[t->t_pil][0] += intrtime;
 469                         cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
 470                         t->t_intr_start = 0;
 471                 }
 472         }
 473 
 474         ht_begin_intr(pil);
 475 
 476         /* store starting timestamp in CPu structure for this IPL */
 477         mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] = now;
 478 
 479         if (pil == 15) {
 480                 /*
 481                  * To support reentrant level 15 interrupts, we maintain a
 482                  * recursion count in the top half of cpu_intr_actv.  Only
 483                  * when this count hits zero do we clear the PIL 15 bit from
 484                  * the lower half of cpu_intr_actv.
 485                  */
 486                 uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
 487                 (*refcntp)++;
 488         }
 489 
 490         cpu->cpu_intr_actv |= (1 << pil);
 491         /* clear pending ipl level bit */
 492         apixp->x_intr_pending &= ~(1 << pil);
 493 
 494         return (mask);
 495 }
 496 
 497 static int
 498 apix_hilevel_intr_epilog(struct cpu *cpu, uint_t oldpil)
 499 {
 500         struct machcpu *mcpu = &cpu->cpu_m;
 501         uint_t mask, pil;
 502         hrtime_t intrtime;
 503         hrtime_t now = tsc_read();
 504 
 505         pil = mcpu->mcpu_pri;
 506         cpu->cpu_stats.sys.intr[pil - 1]++;
 507 
 508         ASSERT(cpu->cpu_intr_actv & (1 << pil));
 509 
 510         if (pil == 15) {
 511                 /*
 512                  * To support reentrant level 15 interrupts, we maintain a
 513                  * recursion count in the top half of cpu_intr_actv.  Only
 514                  * when this count hits zero do we clear the PIL 15 bit from
 515                  * the lower half of cpu_intr_actv.
 516                  */
 517                 uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
 518 
 519                 ASSERT(*refcntp > 0);
 520 
 521                 if (--(*refcntp) == 0)
 522                         cpu->cpu_intr_actv &= ~(1 << pil);
 523         } else {
 524                 cpu->cpu_intr_actv &= ~(1 << pil);
 525         }
 526 
 527         ASSERT(mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] != 0);
 528 
 529         intrtime = now - mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)];
 530         mcpu->intrstat[pil][0] += intrtime;
 531         cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
 532 
 533         /*
 534          * Check for lower-pil nested high-level interrupt beneath
 535          * current one.  If so, place a starting timestamp in its
 536          * pil_high_start entry.
 537          */
 538         mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK;
 539         if (mask != 0) {
 540                 int nestpil;
 541 
 542                 /*
 543                  * find PIL of nested interrupt
 544                  */
 545                 nestpil = bsrw_insn((uint16_t)mask);
 546                 ASSERT(nestpil < pil);
 547                 mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)] = now;
 548                 /*
 549                  * (Another high-level interrupt is active below this one,
 550                  * so there is no need to check for an interrupt
 551                  * thread.  That will be done by the lowest priority
 552                  * high-level interrupt active.)
 553                  */
 554         } else {
 555                 /*
 556                  * Check to see if there is a low-level interrupt active.
 557                  * If so, place a starting timestamp in the thread
 558                  * structure.
 559                  */
 560                 kthread_t *t = cpu->cpu_thread;
 561 
 562                 if (t->t_flag & T_INTR_THREAD)
 563                         t->t_intr_start = now;
 564         }
 565 
 566         ht_end_intr();
 567 
 568         mcpu->mcpu_pri = oldpil;
 569         if (pil < CBE_HIGH_PIL)
 570                 (void) (*setlvlx)(oldpil, 0);
 571 
 572         return (mask);
 573 }
 574 
 575 /*
 576  * Dispatch a hilevel interrupt (one above LOCK_LEVEL)
 577  */
 578 static void
 579 apix_dispatch_pending_hilevel(uint_t ipl, uint_t arg2)
 580 {
 581         UNREFERENCED_1PARAMETER(arg2);
 582 
 583         apix_dispatch_pending_autovect(ipl);
 584 }
 585 
 586 static __inline__ int
 587 apix_do_pending_hilevel(struct cpu *cpu, struct regs *rp)
 588 {
 589         volatile uint16_t pending;
 590         uint_t newipl, oldipl;
 591         caddr_t newsp;
 592 
 593         while ((pending = HILEVEL_PENDING(cpu)) != 0) {
 594                 newipl = bsrw_insn(pending);
 595                 ASSERT(newipl > LOCK_LEVEL && newipl > cpu->cpu_base_spl);
 596                 oldipl = cpu->cpu_pri;
 597                 if (newipl <= oldipl)
 598                         return (-1);
 599 
 600                 /*
 601                  * High priority interrupts run on this cpu's interrupt stack.
 602                  */
 603                 if (apix_hilevel_intr_prolog(cpu, newipl, oldipl, rp) == 0) {
 604                         newsp = cpu->cpu_intr_stack;
 605                         switch_sp_and_call(newsp, apix_dispatch_pending_hilevel,
 606                             newipl, 0);
 607                 } else {        /* already on the interrupt stack */
 608                         apix_dispatch_pending_hilevel(newipl, 0);
 609                 }
 610                 (void) apix_hilevel_intr_epilog(cpu, oldipl);
 611         }
 612 
 613         return (0);
 614 }
 615 
 616 /*
 617  * Get an interrupt thread and swith to it. It's called from do_interrupt().
 618  * The IF flag is cleared and thus all maskable interrupts are blocked at
 619  * the time of calling.
 620  */
 621 static caddr_t
 622 apix_intr_thread_prolog(struct cpu *cpu, uint_t pil, caddr_t stackptr)
 623 {
 624         apix_impl_t *apixp = apixs[cpu->cpu_id];
 625         struct machcpu *mcpu = &cpu->cpu_m;
 626         hrtime_t now = tsc_read();
 627         kthread_t *t, *volatile it;
 628 
 629         ASSERT(pil > mcpu->mcpu_pri && pil > cpu->cpu_base_spl);
 630 
 631         apixp->x_intr_pending &= ~(1 << pil);
 632         ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
 633         cpu->cpu_intr_actv |= (1 << pil);
 634         mcpu->mcpu_pri = pil;
 635 
 636         /*
 637          * Get set to run interrupt thread.
 638          * There should always be an interrupt thread since we
 639          * allocate one for each level on the CPU.
 640          */
 641         /* t_intr_start could be zero due to cpu_intr_swtch_enter. */
 642         t = cpu->cpu_thread;
 643         if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) {
 644                 hrtime_t intrtime = now - t->t_intr_start;
 645                 mcpu->intrstat[pil][0] += intrtime;
 646                 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
 647                 t->t_intr_start = 0;
 648         }
 649 
 650         /*
 651          * Push interrupted thread onto list from new thread.
 652          * Set the new thread as the current one.
 653          * Set interrupted thread's T_SP because if it is the idle thread,
 654          * resume() may use that stack between threads.
 655          */
 656 
 657         ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
 658 
 659         t->t_sp = (uintptr_t)stackptr;       /* mark stack in curthread for resume */
 660 
 661         /*
 662          * Note that the code in kcpc_overflow_intr -relies- on the
 663          * ordering of events here - in particular that t->t_lwp of
 664          * the interrupt thread is set to the pinned thread *before*
 665          * curthread is changed.
 666          */
 667         it = cpu->cpu_intr_thread;
 668         cpu->cpu_intr_thread = it->t_link;
 669         it->t_intr = t;
 670         it->t_lwp = t->t_lwp;
 671 
 672         /*
 673          * (threads on the interrupt thread free list could have state
 674          * preset to TS_ONPROC, but it helps in debugging if
 675          * they're TS_FREE.)
 676          */
 677         it->t_state = TS_ONPROC;
 678 
 679         cpu->cpu_thread = it;
 680         ht_begin_intr(pil);
 681 
 682         /*
 683          * Initialize thread priority level from intr_pri
 684          */
 685         it->t_pil = (uchar_t)pil;
 686         it->t_pri = (pri_t)pil + intr_pri;
 687         it->t_intr_start = now;
 688 
 689         return (it->t_stk);
 690 }
 691 
 692 static void
 693 apix_intr_thread_epilog(struct cpu *cpu, uint_t oldpil)
 694 {
 695         struct machcpu *mcpu = &cpu->cpu_m;
 696         kthread_t *t, *it = cpu->cpu_thread;
 697         uint_t pil, basespl;
 698         hrtime_t intrtime;
 699         hrtime_t now = tsc_read();
 700 
 701         pil = it->t_pil;
 702         cpu->cpu_stats.sys.intr[pil - 1]++;
 703 
 704         ASSERT(cpu->cpu_intr_actv & (1 << pil));
 705         cpu->cpu_intr_actv &= ~(1 << pil);
 706 
 707         ASSERT(it->t_intr_start != 0);
 708         intrtime = now - it->t_intr_start;
 709         mcpu->intrstat[pil][0] += intrtime;
 710         cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
 711 
 712         /*
 713          * If there is still an interrupted thread underneath this one
 714          * then the interrupt was never blocked and the return is
 715          * fairly simple.  Otherwise it isn't.
 716          */
 717         if ((t = it->t_intr) == NULL) {
 718                 /*
 719                  * The interrupted thread is no longer pinned underneath
 720                  * the interrupt thread.  This means the interrupt must
 721                  * have blocked, and the interrupted thread has been
 722                  * unpinned, and has probably been running around the
 723                  * system for a while.
 724                  *
 725                  * Since there is no longer a thread under this one, put
 726                  * this interrupt thread back on the CPU's free list and
 727                  * resume the idle thread which will dispatch the next
 728                  * thread to run.
 729                  */
 730                 cpu->cpu_stats.sys.intrblk++;
 731 
 732                 /*
 733                  * Put thread back on the interrupt thread list.
 734                  * This was an interrupt thread, so set CPU's base SPL.
 735                  */
 736                 set_base_spl();
 737                 basespl = cpu->cpu_base_spl;
 738                 mcpu->mcpu_pri = basespl;
 739                 (*setlvlx)(basespl, 0);
 740 
 741                 /*
 742                  * If there are pending interrupts, send a softint to
 743                  * re-enter apix_do_interrupt() and get them processed.
 744                  */
 745                 if (apixs[cpu->cpu_id]->x_intr_pending)
 746                         siron();
 747 
 748                 it->t_state = TS_FREE;
 749                 /*
 750                  * Return interrupt thread to pool
 751                  */
 752                 it->t_link = cpu->cpu_intr_thread;
 753                 cpu->cpu_intr_thread = it;
 754 
 755                 (void) splhigh();
 756                 sti();
 757                 swtch();
 758                 /*NOTREACHED*/
 759                 panic("dosoftint_epilog: swtch returned");
 760         }
 761 
 762         /*
 763          * Return interrupt thread to the pool
 764          */
 765         it->t_link = cpu->cpu_intr_thread;
 766         cpu->cpu_intr_thread = it;
 767         it->t_state = TS_FREE;
 768 
 769         ht_end_intr();
 770         cpu->cpu_thread = t;
 771 
 772         if (t->t_flag & T_INTR_THREAD)
 773                 t->t_intr_start = now;
 774         basespl = cpu->cpu_base_spl;
 775         mcpu->mcpu_pri = MAX(oldpil, basespl);
 776         (*setlvlx)(mcpu->mcpu_pri, 0);
 777 }
 778 
 779 
 780 static void
 781 apix_dispatch_pending_hardint(uint_t oldpil, uint_t arg2)
 782 {
 783         struct cpu *cpu = CPU;
 784 
 785         UNREFERENCED_1PARAMETER(arg2);
 786 
 787         apix_dispatch_pending_autovect((int)cpu->cpu_thread->t_pil);
 788 
 789         /*
 790          * Must run intr_thread_epilog() on the interrupt thread stack, since
 791          * there may not be a return from it if the interrupt thread blocked.
 792          */
 793         apix_intr_thread_epilog(cpu, oldpil);
 794 }
 795 
 796 static __inline__ int
 797 apix_do_pending_hardint(struct cpu *cpu, struct regs *rp)
 798 {
 799         volatile uint16_t pending;
 800         uint_t newipl, oldipl;
 801         caddr_t newsp;
 802 
 803         while ((pending = LOWLEVEL_PENDING(cpu)) != 0) {
 804                 newipl = bsrw_insn(pending);
 805                 ASSERT(newipl <= LOCK_LEVEL);
 806                 oldipl = cpu->cpu_pri;
 807                 if (newipl <= oldipl || newipl <= cpu->cpu_base_spl)
 808                         return (-1);
 809 
 810                 /*
 811                  * Run this interrupt in a separate thread.
 812                  */
 813                 newsp = apix_intr_thread_prolog(cpu, newipl, (caddr_t)rp);
 814                 ASSERT(newsp != NULL);
 815                 switch_sp_and_call(newsp, apix_dispatch_pending_hardint,
 816                     oldipl, 0);
 817         }
 818 
 819         return (0);
 820 }
 821 
 822 /*
 823  * Unmask level triggered interrupts
 824  */
 825 static void
 826 apix_post_hardint(int vector)
 827 {
 828         apix_vector_t *vecp = xv_vector(psm_get_cpu_id(), vector);
 829         int irqno = vecp->v_inum;
 830 
 831         ASSERT(vecp->v_type == APIX_TYPE_FIXED && apic_level_intr[irqno]);
 832 
 833         apix_level_intr_post_dispatch(irqno);
 834 }
 835 
 836 static void
 837 apix_dispatch_by_vector(uint_t vector)
 838 {
 839         struct cpu *cpu = CPU;
 840         apix_vector_t *vecp = xv_vector(cpu->cpu_id, vector);
 841         struct autovec *avp;
 842         uint_t r, (*intr)();
 843         caddr_t arg1, arg2;
 844         dev_info_t *dip;
 845 
 846         if (vecp == NULL ||
 847             (avp = vecp->v_autovect) == NULL || avp->av_vector == NULL)
 848                 return;
 849 
 850         avp->av_flags |= AV_PENTRY_ONPROC;
 851         intr = avp->av_vector;
 852         arg1 = avp->av_intarg1;
 853         arg2 = avp->av_intarg2;
 854         dip = avp->av_dip;
 855 
 856         if (avp->av_prilevel != XC_HI_PIL)
 857                 sti();
 858 
 859         DTRACE_PROBE4(interrupt__start, dev_info_t *, dip,
 860             void *, intr, caddr_t, arg1, caddr_t, arg2);
 861         r = (*intr)(arg1, arg2);
 862         DTRACE_PROBE4(interrupt__complete, dev_info_t *, dip,
 863             void *, intr, caddr_t, arg1, uint_t, r);
 864 
 865         cli();
 866         avp->av_flags &= ~AV_PENTRY_ONPROC;
 867 }
 868 
 869 
 870 static void
 871 apix_dispatch_hilevel(uint_t vector, uint_t arg2)
 872 {
 873         UNREFERENCED_1PARAMETER(arg2);
 874 
 875         apix_dispatch_by_vector(vector);
 876 }
 877 
 878 static void
 879 apix_dispatch_lowlevel(uint_t vector, uint_t oldipl)
 880 {
 881         struct cpu *cpu = CPU;
 882 
 883         apix_dispatch_by_vector(vector);
 884 
 885         /*
 886          * Must run intr_thread_epilog() on the interrupt thread stack, since
 887          * there may not be a return from it if the interrupt thread blocked.
 888          */
 889         apix_intr_thread_epilog(cpu, oldipl);
 890 }
 891 
 892 /*
 893  * Interrupt service routine, called with interrupts disabled.
 894  */
 895 void
 896 apix_do_interrupt(struct regs *rp, trap_trace_rec_t *ttp)
 897 {
 898         struct cpu *cpu = CPU;
 899         int vector = rp->r_trapno, newipl, oldipl = cpu->cpu_pri, ret;
 900         apix_vector_t *vecp = NULL;
 901 
 902 #ifdef TRAPTRACE
 903         ttp->ttr_marker = TT_INTERRUPT;
 904         ttp->ttr_cpuid = cpu->cpu_id;
 905         ttp->ttr_ipl = 0xff;
 906         ttp->ttr_pri = (uchar_t)oldipl;
 907         ttp->ttr_spl = cpu->cpu_base_spl;
 908         ttp->ttr_vector = 0xff;
 909 #endif  /* TRAPTRACE */
 910 
 911         cpu_idle_exit(CPU_IDLE_CB_FLAG_INTR);
 912 
 913         ++*(uint16_t *)&cpu->cpu_m.mcpu_istamp;
 914 
 915         /*
 916          * If it's a softint go do it now.
 917          */
 918         if (rp->r_trapno == T_SOFTINT) {
 919                 /*
 920                  * It might be the case that when an interrupt is triggered,
 921                  * the spl is raised to high by splhigh(). Later when do_splx()
 922                  * is called to restore the spl, both hardware and software
 923                  * interrupt pending flags are check and an SOFTINT is faked
 924                  * accordingly.
 925                  */
 926                 (void) apix_do_pending_hilevel(cpu, rp);
 927                 (void) apix_do_pending_hardint(cpu, rp);
 928                 (void) apix_do_softint(rp);
 929                 ASSERT(!interrupts_enabled());
 930 #ifdef TRAPTRACE
 931                 ttp->ttr_vector = T_SOFTINT;
 932 #endif
 933                 /*
 934                  * We need to check again for pending interrupts that may have
 935                  * arrived while the softint was running.
 936                  */
 937                 goto do_pending;
 938         }
 939 
 940         /*
 941          * Send EOI to local APIC
 942          */
 943         newipl = (*setlvl)(oldipl, (int *)&rp->r_trapno);
 944 #ifdef TRAPTRACE
 945         ttp->ttr_ipl = (uchar_t)newipl;
 946 #endif  /* TRAPTRACE */
 947 
 948         /*
 949          * Bail if it is a spurious interrupt
 950          */
 951         if (newipl == -1)
 952                 return;
 953 
 954         vector = rp->r_trapno;
 955         vecp = xv_vector(cpu->cpu_id, vector);
 956 #ifdef TRAPTRACE
 957         ttp->ttr_vector = (short)vector;
 958 #endif  /* TRAPTRACE */
 959 
 960         /*
 961          * Direct dispatch for IPI, MSI, MSI-X
 962          */
 963         if (vecp && vecp->v_type != APIX_TYPE_FIXED &&
 964             newipl > MAX(oldipl, cpu->cpu_base_spl)) {
 965                 caddr_t newsp;
 966 
 967                 if (INTR_PENDING(apixs[cpu->cpu_id], newipl)) {
 968                         /*
 969                          * There are already vectors pending at newipl,
 970                          * queue this one and fall through to process
 971                          * all pending.
 972                          */
 973                         apix_add_pending_hardint(vector);
 974                 } else if (newipl > LOCK_LEVEL) {
 975                         if (apix_hilevel_intr_prolog(cpu, newipl, oldipl, rp)
 976                             == 0) {
 977                                 newsp = cpu->cpu_intr_stack;
 978                                 switch_sp_and_call(newsp, apix_dispatch_hilevel,
 979                                     vector, 0);
 980                         } else {
 981                                 apix_dispatch_hilevel(vector, 0);
 982                         }
 983                         (void) apix_hilevel_intr_epilog(cpu, oldipl);
 984                 } else {
 985                         newsp = apix_intr_thread_prolog(cpu, newipl,
 986                             (caddr_t)rp);
 987                         switch_sp_and_call(newsp, apix_dispatch_lowlevel,
 988                             vector, oldipl);
 989                 }
 990         } else {
 991                 /* Add to per-pil pending queue */
 992                 apix_add_pending_hardint(vector);
 993                 if (newipl <= MAX(oldipl, cpu->cpu_base_spl) ||
 994                     !apixs[cpu->cpu_id]->x_intr_pending)
 995                         return;
 996         }
 997 
 998 do_pending:
 999         if (apix_do_pending_hilevel(cpu, rp) < 0)
1000                 return;
1001 
1002         do {
1003                 ret = apix_do_pending_hardint(cpu, rp);
1004 
1005                 /*
1006                  * Deliver any pending soft interrupts.
1007                  */
1008                 (void) apix_do_softint(rp);
1009         } while (!ret && LOWLEVEL_PENDING(cpu));
1010 }