1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2018 Western Digital Corporation.  All rights reserved.
  25  */
  26 
  27 #include <sys/cpuvar.h>
  28 #include <sys/cpu_event.h>
  29 #include <sys/param.h>
  30 #include <sys/cmn_err.h>
  31 #include <sys/t_lock.h>
  32 #include <sys/kmem.h>
  33 #include <sys/machlock.h>
  34 #include <sys/systm.h>
  35 #include <sys/archsystm.h>
  36 #include <sys/atomic.h>
  37 #include <sys/sdt.h>
  38 #include <sys/processor.h>
  39 #include <sys/time.h>
  40 #include <sys/psm.h>
  41 #include <sys/smp_impldefs.h>
  42 #include <sys/cram.h>
  43 #include <sys/apic.h>
  44 #include <sys/pit.h>
  45 #include <sys/ddi.h>
  46 #include <sys/sunddi.h>
  47 #include <sys/ddi_impldefs.h>
  48 #include <sys/pci.h>
  49 #include <sys/promif.h>
  50 #include <sys/x86_archext.h>
  51 #include <sys/cpc_impl.h>
  52 #include <sys/uadmin.h>
  53 #include <sys/panic.h>
  54 #include <sys/debug.h>
  55 #include <sys/trap.h>
  56 #include <sys/machsystm.h>
  57 #include <sys/sysmacros.h>
  58 #include <sys/rm_platter.h>
  59 #include <sys/privregs.h>
  60 #include <sys/note.h>
  61 #include <sys/pci_intr_lib.h>
  62 #include <sys/spl.h>
  63 #include <sys/clock.h>
  64 #include <sys/dditypes.h>
  65 #include <sys/sunddi.h>
  66 #include <sys/x_call.h>
  67 #include <sys/reboot.h>
  68 #include <vm/hat_i86.h>
  69 #include <sys/stack.h>
  70 #include <sys/apix.h>
  71 
  72 static void apix_post_hardint(int);
  73 
  74 /*
  75  * Insert an vector into the tail of the interrupt pending list
  76  */
  77 static __inline__ void
  78 apix_insert_pending_av(apix_impl_t *apixp, struct autovec *avp, int ipl)
  79 {
  80         struct autovec **head = apixp->x_intr_head;
  81         struct autovec **tail = apixp->x_intr_tail;
  82 
  83         avp->av_ipl_link = NULL;
  84         if (tail[ipl] == NULL) {
  85                 head[ipl] = tail[ipl] = avp;
  86                 return;
  87         }
  88 
  89         tail[ipl]->av_ipl_link = avp;
  90         tail[ipl] = avp;
  91 }
  92 
  93 /*
  94  * Remove and return an vector from the head of hardware interrupt
  95  * pending list.
  96  */
  97 static __inline__ struct autovec *
  98 apix_remove_pending_av(apix_impl_t *apixp, int ipl)
  99 {
 100         struct cpu *cpu = CPU;
 101         struct autovec **head = apixp->x_intr_head;
 102         struct autovec **tail = apixp->x_intr_tail;
 103         struct autovec *avp = head[ipl];
 104 
 105         if (avp == NULL)
 106                 return (NULL);
 107 
 108         if (avp->av_vector != NULL && avp->av_prilevel < cpu->cpu_base_spl) {
 109                 /*
 110                  * If there is blocked higher level interrupts, return
 111                  * NULL to quit handling of current IPL level.
 112                  */
 113                 apixp->x_intr_pending |= (1 << avp->av_prilevel);
 114                 return (NULL);
 115         }
 116 
 117         avp->av_flags &= ~AV_PENTRY_PEND;
 118         avp->av_flags |= AV_PENTRY_ONPROC;
 119         head[ipl] = avp->av_ipl_link;
 120         avp->av_ipl_link = NULL;
 121 
 122         if (head[ipl] == NULL)
 123                 tail[ipl] = NULL;
 124 
 125         return (avp);
 126 }
 127 
 128 /*
 129  * add_pending_hardint:
 130  *
 131  * Add hardware interrupts to the interrupt pending list.
 132  */
 133 static void
 134 apix_add_pending_hardint(int vector)
 135 {
 136         uint32_t cpuid = psm_get_cpu_id();
 137         apix_impl_t *apixp = apixs[cpuid];
 138         apix_vector_t *vecp = apixp->x_vectbl[vector];
 139         struct autovec *p, *prevp = NULL;
 140         int ipl;
 141 
 142         /*
 143          * The MSI interrupt not supporting per-vector masking could
 144          * be triggered on a false vector as a result of rebinding
 145          * operation cannot programme MSI address & data atomically.
 146          * Add ISR of this interrupt to the pending list for such
 147          * suspicious interrupt.
 148          */
 149         APIX_DO_FAKE_INTR(cpuid, vector);
 150         if (vecp == NULL)
 151                 return;
 152 
 153         for (p = vecp->v_autovect; p != NULL; p = p->av_link) {
 154                 if (p->av_vector == NULL)
 155                         continue;       /* skip freed entry */
 156 
 157                 ipl = p->av_prilevel;
 158                 prevp = p;
 159 
 160                 /* set pending at specified priority level */
 161                 apixp->x_intr_pending |= (1 << ipl);
 162 
 163                 if (p->av_flags & AV_PENTRY_PEND)
 164                         continue;       /* already in the pending list */
 165                 p->av_flags |= AV_PENTRY_PEND;
 166 
 167                 /* insert into pending list by it original IPL */
 168                 apix_insert_pending_av(apixp, p, ipl);
 169         }
 170 
 171         /* last one of the linked list */
 172         if (prevp && ((prevp->av_flags & AV_PENTRY_LEVEL) != 0))
 173                 prevp->av_flags |= (vector & AV_PENTRY_VECTMASK);
 174 }
 175 
 176 /*
 177  * Walk pending hardware interrupts at given priority level, invoking
 178  * each interrupt handler as we go.
 179  */
 180 extern uint64_t intr_get_time(void);
 181 
 182 static void
 183 apix_dispatch_pending_autovect(uint_t ipl)
 184 {
 185         uint32_t cpuid = psm_get_cpu_id();
 186         apix_impl_t *apixp = apixs[cpuid];
 187         struct autovec *av;
 188 
 189         while ((av = apix_remove_pending_av(apixp, ipl)) != NULL) {
 190                 uint_t r;
 191                 uint_t (*intr)() = av->av_vector;
 192                 caddr_t arg1 = av->av_intarg1;
 193                 caddr_t arg2 = av->av_intarg2;
 194                 dev_info_t *dip = av->av_dip;
 195                 uchar_t vector = av->av_flags & AV_PENTRY_VECTMASK;
 196 
 197                 if (intr == NULL)
 198                         continue;
 199 
 200                 /* Don't enable interrupts during x-calls */
 201                 if (ipl != XC_HI_PIL)
 202                         sti();
 203 
 204                 DTRACE_PROBE4(interrupt__start, dev_info_t *, dip,
 205                     void *, intr, caddr_t, arg1, caddr_t, arg2);
 206                 r = (*intr)(arg1, arg2);
 207                 DTRACE_PROBE4(interrupt__complete, dev_info_t *, dip,
 208                     void *, intr, caddr_t, arg1, uint_t, r);
 209 
 210                 if (av->av_ticksp && av->av_prilevel <= LOCK_LEVEL)
 211                         atomic_add_64(av->av_ticksp, intr_get_time());
 212 
 213                 cli();
 214 
 215                 if (vector) {
 216                         if ((av->av_flags & AV_PENTRY_PEND) == 0)
 217                                 av->av_flags &= ~AV_PENTRY_VECTMASK;
 218 
 219                         apix_post_hardint(vector);
 220                 }
 221 
 222                 /* mark it as idle */
 223                 av->av_flags &= ~AV_PENTRY_ONPROC;
 224         }
 225 }
 226 
 227 static caddr_t
 228 apix_do_softint_prolog(struct cpu *cpu, uint_t pil, uint_t oldpil,
 229     caddr_t stackptr)
 230 {
 231         kthread_t *t, *volatile it;
 232         struct machcpu *mcpu = &cpu->cpu_m;
 233         hrtime_t now;
 234 
 235         UNREFERENCED_1PARAMETER(oldpil);
 236         ASSERT(pil > mcpu->mcpu_pri && pil > cpu->cpu_base_spl);
 237 
 238         atomic_and_32((uint32_t *)&mcpu->mcpu_softinfo.st_pending, ~(1 << pil));
 239 
 240         mcpu->mcpu_pri = pil;
 241 
 242         now = tsc_read();
 243 
 244         /*
 245          * Get set to run interrupt thread.
 246          * There should always be an interrupt thread since we
 247          * allocate one for each level on the CPU.
 248          */
 249         it = cpu->cpu_intr_thread;
 250         ASSERT(it != NULL);
 251         cpu->cpu_intr_thread = it->t_link;
 252 
 253         /* t_intr_start could be zero due to cpu_intr_swtch_enter. */
 254         t = cpu->cpu_thread;
 255         if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) {
 256                 hrtime_t intrtime = now - t->t_intr_start;
 257                 mcpu->intrstat[pil][0] += intrtime;
 258                 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
 259                 t->t_intr_start = 0;
 260         }
 261 
 262         /*
 263          * Note that the code in kcpc_overflow_intr -relies- on the
 264          * ordering of events here - in particular that t->t_lwp of
 265          * the interrupt thread is set to the pinned thread *before*
 266          * curthread is changed.
 267          */
 268         it->t_lwp = t->t_lwp;
 269         it->t_state = TS_ONPROC;
 270 
 271         /*
 272          * Push interrupted thread onto list from new thread.
 273          * Set the new thread as the current one.
 274          * Set interrupted thread's T_SP because if it is the idle thread,
 275          * resume() may use that stack between threads.
 276          */
 277 
 278         ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
 279         t->t_sp = (uintptr_t)stackptr;
 280 
 281         it->t_intr = t;
 282         cpu->cpu_thread = it;
 283 
 284         /*
 285          * Set bit for this pil in CPU's interrupt active bitmask.
 286          */
 287         ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
 288         cpu->cpu_intr_actv |= (1 << pil);
 289 
 290         /*
 291          * Initialize thread priority level from intr_pri
 292          */
 293         it->t_pil = (uchar_t)pil;
 294         it->t_pri = (pri_t)pil + intr_pri;
 295         it->t_intr_start = now;
 296 
 297         return (it->t_stk);
 298 }
 299 
 300 static void
 301 apix_do_softint_epilog(struct cpu *cpu, uint_t oldpil)
 302 {
 303         struct machcpu *mcpu = &cpu->cpu_m;
 304         kthread_t *t, *it;
 305         uint_t pil, basespl;
 306         hrtime_t intrtime;
 307         hrtime_t now = tsc_read();
 308 
 309         it = cpu->cpu_thread;
 310         pil = it->t_pil;
 311 
 312         cpu->cpu_stats.sys.intr[pil - 1]++;
 313 
 314         ASSERT(cpu->cpu_intr_actv & (1 << pil));
 315         cpu->cpu_intr_actv &= ~(1 << pil);
 316 
 317         intrtime = now - it->t_intr_start;
 318         mcpu->intrstat[pil][0] += intrtime;
 319         cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
 320 
 321         /*
 322          * If there is still an interrupted thread underneath this one
 323          * then the interrupt was never blocked and the return is
 324          * fairly simple.  Otherwise it isn't.
 325          */
 326         if ((t = it->t_intr) == NULL) {
 327                 /*
 328                  * Put thread back on the interrupt thread list.
 329                  * This was an interrupt thread, so set CPU's base SPL.
 330                  */
 331                 set_base_spl();
 332                 /* mcpu->mcpu_pri = cpu->cpu_base_spl; */
 333 
 334                 /*
 335                  * If there are pending interrupts, send a softint to
 336                  * re-enter apix_do_interrupt() and get them processed.
 337                  */
 338                 if (apixs[cpu->cpu_id]->x_intr_pending)
 339                         siron();
 340 
 341                 it->t_state = TS_FREE;
 342                 it->t_link = cpu->cpu_intr_thread;
 343                 cpu->cpu_intr_thread = it;
 344                 (void) splhigh();
 345                 sti();
 346                 swtch();
 347                 /*NOTREACHED*/
 348                 panic("dosoftint_epilog: swtch returned");
 349         }
 350         it->t_link = cpu->cpu_intr_thread;
 351         cpu->cpu_intr_thread = it;
 352         it->t_state = TS_FREE;
 353         cpu->cpu_thread = t;
 354         if (t->t_flag & T_INTR_THREAD)
 355                 t->t_intr_start = now;
 356         basespl = cpu->cpu_base_spl;
 357         pil = MAX(oldpil, basespl);
 358         mcpu->mcpu_pri = pil;
 359 }
 360 
 361 /*
 362  * Dispatch a soft interrupt
 363  */
 364 static void
 365 apix_dispatch_softint(uint_t oldpil, uint_t arg2)
 366 {
 367         struct cpu *cpu = CPU;
 368 
 369         UNREFERENCED_1PARAMETER(arg2);
 370 
 371         sti();
 372         av_dispatch_softvect((int)cpu->cpu_thread->t_pil);
 373         cli();
 374 
 375         /*
 376          * Must run softint_epilog() on the interrupt thread stack, since
 377          * there may not be a return from it if the interrupt thread blocked.
 378          */
 379         apix_do_softint_epilog(cpu, oldpil);
 380 }
 381 
 382 /*
 383  * Deliver any softints the current interrupt priority allows.
 384  * Called with interrupts disabled.
 385  */
 386 int
 387 apix_do_softint(struct regs *regs)
 388 {
 389         struct cpu *cpu = CPU;
 390         int oldipl;
 391         int newipl;
 392         volatile uint16_t pending;
 393         caddr_t newsp;
 394 
 395         while ((pending = cpu->cpu_softinfo.st_pending) != 0) {
 396                 newipl = bsrw_insn(pending);
 397                 oldipl = cpu->cpu_pri;
 398                 if (newipl <= oldipl || newipl <= cpu->cpu_base_spl)
 399                         return (-1);
 400 
 401                 newsp = apix_do_softint_prolog(cpu, newipl, oldipl,
 402                     (caddr_t)regs);
 403                 ASSERT(newsp != NULL);
 404                 switch_sp_and_call(newsp, apix_dispatch_softint, oldipl, 0);
 405         }
 406 
 407         return (0);
 408 }
 409 
 410 static int
 411 apix_hilevel_intr_prolog(struct cpu *cpu, uint_t pil, uint_t oldpil,
 412     struct regs *rp)
 413 {
 414         struct machcpu *mcpu = &cpu->cpu_m;
 415         hrtime_t intrtime;
 416         hrtime_t now = tsc_read();
 417         apix_impl_t *apixp = apixs[cpu->cpu_id];
 418         uint_t mask;
 419 
 420         ASSERT(pil > mcpu->mcpu_pri && pil > cpu->cpu_base_spl);
 421 
 422         if (pil == CBE_HIGH_PIL) {      /* 14 */
 423                 cpu->cpu_profile_pil = oldpil;
 424                 if (USERMODE(rp->r_cs)) {
 425                         cpu->cpu_profile_pc = 0;
 426                         cpu->cpu_profile_upc = rp->r_pc;
 427                         cpu->cpu_cpcprofile_pc = 0;
 428                         cpu->cpu_cpcprofile_upc = rp->r_pc;
 429                 } else {
 430                         cpu->cpu_profile_pc = rp->r_pc;
 431                         cpu->cpu_profile_upc = 0;
 432                         cpu->cpu_cpcprofile_pc = rp->r_pc;
 433                         cpu->cpu_cpcprofile_upc = 0;
 434                 }
 435         }
 436 
 437         mcpu->mcpu_pri = pil;
 438 
 439         mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK;
 440         if (mask != 0) {
 441                 int nestpil;
 442 
 443                 /*
 444                  * We have interrupted another high-level interrupt.
 445                  * Load starting timestamp, compute interval, update
 446                  * cumulative counter.
 447                  */
 448                 nestpil = bsrw_insn((uint16_t)mask);
 449                 intrtime = now -
 450                     mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)];
 451                 mcpu->intrstat[nestpil][0] += intrtime;
 452                 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
 453         } else {
 454                 kthread_t *t = cpu->cpu_thread;
 455 
 456                 /*
 457                  * See if we are interrupting a low-level interrupt thread.
 458                  * If so, account for its time slice only if its time stamp
 459                  * is non-zero.
 460                  */
 461                 if ((t->t_flag & T_INTR_THREAD) != 0 && t->t_intr_start != 0) {
 462                         intrtime = now - t->t_intr_start;
 463                         mcpu->intrstat[t->t_pil][0] += intrtime;
 464                         cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
 465                         t->t_intr_start = 0;
 466                 }
 467         }
 468 
 469         /* store starting timestamp in CPu structure for this IPL */
 470         mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] = now;
 471 
 472         if (pil == 15) {
 473                 /*
 474                  * To support reentrant level 15 interrupts, we maintain a
 475                  * recursion count in the top half of cpu_intr_actv.  Only
 476                  * when this count hits zero do we clear the PIL 15 bit from
 477                  * the lower half of cpu_intr_actv.
 478                  */
 479                 uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
 480                 (*refcntp)++;
 481         }
 482 
 483         cpu->cpu_intr_actv |= (1 << pil);
 484         /* clear pending ipl level bit */
 485         apixp->x_intr_pending &= ~(1 << pil);
 486 
 487         return (mask);
 488 }
 489 
 490 static int
 491 apix_hilevel_intr_epilog(struct cpu *cpu, uint_t oldpil)
 492 {
 493         struct machcpu *mcpu = &cpu->cpu_m;
 494         uint_t mask, pil;
 495         hrtime_t intrtime;
 496         hrtime_t now = tsc_read();
 497 
 498         pil = mcpu->mcpu_pri;
 499         cpu->cpu_stats.sys.intr[pil - 1]++;
 500 
 501         ASSERT(cpu->cpu_intr_actv & (1 << pil));
 502 
 503         if (pil == 15) {
 504                 /*
 505                  * To support reentrant level 15 interrupts, we maintain a
 506                  * recursion count in the top half of cpu_intr_actv.  Only
 507                  * when this count hits zero do we clear the PIL 15 bit from
 508                  * the lower half of cpu_intr_actv.
 509                  */
 510                 uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
 511 
 512                 ASSERT(*refcntp > 0);
 513 
 514                 if (--(*refcntp) == 0)
 515                         cpu->cpu_intr_actv &= ~(1 << pil);
 516         } else {
 517                 cpu->cpu_intr_actv &= ~(1 << pil);
 518         }
 519 
 520         ASSERT(mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] != 0);
 521 
 522         intrtime = now - mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)];
 523         mcpu->intrstat[pil][0] += intrtime;
 524         cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
 525 
 526         /*
 527          * Check for lower-pil nested high-level interrupt beneath
 528          * current one.  If so, place a starting timestamp in its
 529          * pil_high_start entry.
 530          */
 531         mask = cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK;
 532         if (mask != 0) {
 533                 int nestpil;
 534 
 535                 /*
 536                  * find PIL of nested interrupt
 537                  */
 538                 nestpil = bsrw_insn((uint16_t)mask);
 539                 ASSERT(nestpil < pil);
 540                 mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)] = now;
 541                 /*
 542                  * (Another high-level interrupt is active below this one,
 543                  * so there is no need to check for an interrupt
 544                  * thread.  That will be done by the lowest priority
 545                  * high-level interrupt active.)
 546                  */
 547         } else {
 548                 /*
 549                  * Check to see if there is a low-level interrupt active.
 550                  * If so, place a starting timestamp in the thread
 551                  * structure.
 552                  */
 553                 kthread_t *t = cpu->cpu_thread;
 554 
 555                 if (t->t_flag & T_INTR_THREAD)
 556                         t->t_intr_start = now;
 557         }
 558 
 559         mcpu->mcpu_pri = oldpil;
 560         if (pil < CBE_HIGH_PIL)
 561                 (void) (*setlvlx)(oldpil, 0);
 562 
 563         return (mask);
 564 }
 565 
 566 /*
 567  * Dispatch a hilevel interrupt (one above LOCK_LEVEL)
 568  */
 569 static void
 570 apix_dispatch_pending_hilevel(uint_t ipl, uint_t arg2)
 571 {
 572         UNREFERENCED_1PARAMETER(arg2);
 573 
 574         apix_dispatch_pending_autovect(ipl);
 575 }
 576 
 577 static __inline__ int
 578 apix_do_pending_hilevel(struct cpu *cpu, struct regs *rp)
 579 {
 580         volatile uint16_t pending;
 581         uint_t newipl, oldipl;
 582         caddr_t newsp;
 583 
 584         while ((pending = HILEVEL_PENDING(cpu)) != 0) {
 585                 newipl = bsrw_insn(pending);
 586                 ASSERT(newipl > LOCK_LEVEL && newipl > cpu->cpu_base_spl);
 587                 oldipl = cpu->cpu_pri;
 588                 if (newipl <= oldipl)
 589                         return (-1);
 590 
 591                 /*
 592                  * High priority interrupts run on this cpu's interrupt stack.
 593                  */
 594                 if (apix_hilevel_intr_prolog(cpu, newipl, oldipl, rp) == 0) {
 595                         newsp = cpu->cpu_intr_stack;
 596                         switch_sp_and_call(newsp, apix_dispatch_pending_hilevel,
 597                             newipl, 0);
 598                 } else {        /* already on the interrupt stack */
 599                         apix_dispatch_pending_hilevel(newipl, 0);
 600                 }
 601                 (void) apix_hilevel_intr_epilog(cpu, oldipl);
 602         }
 603 
 604         return (0);
 605 }
 606 
 607 /*
 608  * Get an interrupt thread and swith to it. It's called from do_interrupt().
 609  * The IF flag is cleared and thus all maskable interrupts are blocked at
 610  * the time of calling.
 611  */
 612 static caddr_t
 613 apix_intr_thread_prolog(struct cpu *cpu, uint_t pil, caddr_t stackptr)
 614 {
 615         apix_impl_t *apixp = apixs[cpu->cpu_id];
 616         struct machcpu *mcpu = &cpu->cpu_m;
 617         hrtime_t now = tsc_read();
 618         kthread_t *t, *volatile it;
 619 
 620         ASSERT(pil > mcpu->mcpu_pri && pil > cpu->cpu_base_spl);
 621 
 622         apixp->x_intr_pending &= ~(1 << pil);
 623         ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
 624         cpu->cpu_intr_actv |= (1 << pil);
 625         mcpu->mcpu_pri = pil;
 626 
 627         /*
 628          * Get set to run interrupt thread.
 629          * There should always be an interrupt thread since we
 630          * allocate one for each level on the CPU.
 631          */
 632         /* t_intr_start could be zero due to cpu_intr_swtch_enter. */
 633         t = cpu->cpu_thread;
 634         if ((t->t_flag & T_INTR_THREAD) && t->t_intr_start != 0) {
 635                 hrtime_t intrtime = now - t->t_intr_start;
 636                 mcpu->intrstat[pil][0] += intrtime;
 637                 cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
 638                 t->t_intr_start = 0;
 639         }
 640 
 641         /*
 642          * Push interrupted thread onto list from new thread.
 643          * Set the new thread as the current one.
 644          * Set interrupted thread's T_SP because if it is the idle thread,
 645          * resume() may use that stack between threads.
 646          */
 647 
 648         ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
 649 
 650         t->t_sp = (uintptr_t)stackptr;       /* mark stack in curthread for resume */
 651 
 652         /*
 653          * Note that the code in kcpc_overflow_intr -relies- on the
 654          * ordering of events here - in particular that t->t_lwp of
 655          * the interrupt thread is set to the pinned thread *before*
 656          * curthread is changed.
 657          */
 658         it = cpu->cpu_intr_thread;
 659         cpu->cpu_intr_thread = it->t_link;
 660         it->t_intr = t;
 661         it->t_lwp = t->t_lwp;
 662 
 663         /*
 664          * (threads on the interrupt thread free list could have state
 665          * preset to TS_ONPROC, but it helps in debugging if
 666          * they're TS_FREE.)
 667          */
 668         it->t_state = TS_ONPROC;
 669 
 670         cpu->cpu_thread = it;
 671 
 672         /*
 673          * Initialize thread priority level from intr_pri
 674          */
 675         it->t_pil = (uchar_t)pil;
 676         it->t_pri = (pri_t)pil + intr_pri;
 677         it->t_intr_start = now;
 678 
 679         return (it->t_stk);
 680 }
 681 
 682 static void
 683 apix_intr_thread_epilog(struct cpu *cpu, uint_t oldpil)
 684 {
 685         struct machcpu *mcpu = &cpu->cpu_m;
 686         kthread_t *t, *it = cpu->cpu_thread;
 687         uint_t pil, basespl;
 688         hrtime_t intrtime;
 689         hrtime_t now = tsc_read();
 690 
 691         pil = it->t_pil;
 692         cpu->cpu_stats.sys.intr[pil - 1]++;
 693 
 694         ASSERT(cpu->cpu_intr_actv & (1 << pil));
 695         cpu->cpu_intr_actv &= ~(1 << pil);
 696 
 697         ASSERT(it->t_intr_start != 0);
 698         intrtime = now - it->t_intr_start;
 699         mcpu->intrstat[pil][0] += intrtime;
 700         cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
 701 
 702         /*
 703          * If there is still an interrupted thread underneath this one
 704          * then the interrupt was never blocked and the return is
 705          * fairly simple.  Otherwise it isn't.
 706          */
 707         if ((t = it->t_intr) == NULL) {
 708                 /*
 709                  * The interrupted thread is no longer pinned underneath
 710                  * the interrupt thread.  This means the interrupt must
 711                  * have blocked, and the interrupted thread has been
 712                  * unpinned, and has probably been running around the
 713                  * system for a while.
 714                  *
 715                  * Since there is no longer a thread under this one, put
 716                  * this interrupt thread back on the CPU's free list and
 717                  * resume the idle thread which will dispatch the next
 718                  * thread to run.
 719                  */
 720                 cpu->cpu_stats.sys.intrblk++;
 721 
 722                 /*
 723                  * Put thread back on the interrupt thread list.
 724                  * This was an interrupt thread, so set CPU's base SPL.
 725                  */
 726                 set_base_spl();
 727                 basespl = cpu->cpu_base_spl;
 728                 mcpu->mcpu_pri = basespl;
 729                 (*setlvlx)(basespl, 0);
 730 
 731                 /*
 732                  * If there are pending interrupts, send a softint to
 733                  * re-enter apix_do_interrupt() and get them processed.
 734                  */
 735                 if (apixs[cpu->cpu_id]->x_intr_pending)
 736                         siron();
 737 
 738                 it->t_state = TS_FREE;
 739                 /*
 740                  * Return interrupt thread to pool
 741                  */
 742                 it->t_link = cpu->cpu_intr_thread;
 743                 cpu->cpu_intr_thread = it;
 744 
 745                 (void) splhigh();
 746                 sti();
 747                 swtch();
 748                 /*NOTREACHED*/
 749                 panic("dosoftint_epilog: swtch returned");
 750         }
 751 
 752         /*
 753          * Return interrupt thread to the pool
 754          */
 755         it->t_link = cpu->cpu_intr_thread;
 756         cpu->cpu_intr_thread = it;
 757         it->t_state = TS_FREE;
 758 
 759         cpu->cpu_thread = t;
 760         if (t->t_flag & T_INTR_THREAD)
 761                 t->t_intr_start = now;
 762         basespl = cpu->cpu_base_spl;
 763         mcpu->mcpu_pri = MAX(oldpil, basespl);
 764         (*setlvlx)(mcpu->mcpu_pri, 0);
 765 }
 766 
 767 
 768 static void
 769 apix_dispatch_pending_hardint(uint_t oldpil, uint_t arg2)
 770 {
 771         struct cpu *cpu = CPU;
 772 
 773         UNREFERENCED_1PARAMETER(arg2);
 774 
 775         apix_dispatch_pending_autovect((int)cpu->cpu_thread->t_pil);
 776 
 777         /*
 778          * Must run intr_thread_epilog() on the interrupt thread stack, since
 779          * there may not be a return from it if the interrupt thread blocked.
 780          */
 781         apix_intr_thread_epilog(cpu, oldpil);
 782 }
 783 
 784 static __inline__ int
 785 apix_do_pending_hardint(struct cpu *cpu, struct regs *rp)
 786 {
 787         volatile uint16_t pending;
 788         uint_t newipl, oldipl;
 789         caddr_t newsp;
 790 
 791         while ((pending = LOWLEVEL_PENDING(cpu)) != 0) {
 792                 newipl = bsrw_insn(pending);
 793                 ASSERT(newipl <= LOCK_LEVEL);
 794                 oldipl = cpu->cpu_pri;
 795                 if (newipl <= oldipl || newipl <= cpu->cpu_base_spl)
 796                         return (-1);
 797 
 798                 /*
 799                  * Run this interrupt in a separate thread.
 800                  */
 801                 newsp = apix_intr_thread_prolog(cpu, newipl, (caddr_t)rp);
 802                 ASSERT(newsp != NULL);
 803                 switch_sp_and_call(newsp, apix_dispatch_pending_hardint,
 804                     oldipl, 0);
 805         }
 806 
 807         return (0);
 808 }
 809 
 810 /*
 811  * Unmask level triggered interrupts
 812  */
 813 static void
 814 apix_post_hardint(int vector)
 815 {
 816         apix_vector_t *vecp = xv_vector(psm_get_cpu_id(), vector);
 817         int irqno = vecp->v_inum;
 818 
 819         ASSERT(vecp->v_type == APIX_TYPE_FIXED && apic_level_intr[irqno]);
 820 
 821         apix_level_intr_post_dispatch(irqno);
 822 }
 823 
 824 static void
 825 apix_dispatch_by_vector(uint_t vector)
 826 {
 827         struct cpu *cpu = CPU;
 828         apix_vector_t *vecp = xv_vector(cpu->cpu_id, vector);
 829         struct autovec *avp;
 830         uint_t r, (*intr)();
 831         caddr_t arg1, arg2;
 832         dev_info_t *dip;
 833 
 834         if (vecp == NULL ||
 835             (avp = vecp->v_autovect) == NULL || avp->av_vector == NULL)
 836                 return;
 837 
 838         avp->av_flags |= AV_PENTRY_ONPROC;
 839         intr = avp->av_vector;
 840         arg1 = avp->av_intarg1;
 841         arg2 = avp->av_intarg2;
 842         dip = avp->av_dip;
 843 
 844         if (avp->av_prilevel != XC_HI_PIL)
 845                 sti();
 846 
 847         DTRACE_PROBE4(interrupt__start, dev_info_t *, dip,
 848             void *, intr, caddr_t, arg1, caddr_t, arg2);
 849         r = (*intr)(arg1, arg2);
 850         DTRACE_PROBE4(interrupt__complete, dev_info_t *, dip,
 851             void *, intr, caddr_t, arg1, uint_t, r);
 852 
 853         cli();
 854         avp->av_flags &= ~AV_PENTRY_ONPROC;
 855 }
 856 
 857 
 858 static void
 859 apix_dispatch_hilevel(uint_t vector, uint_t arg2)
 860 {
 861         UNREFERENCED_1PARAMETER(arg2);
 862 
 863         apix_dispatch_by_vector(vector);
 864 }
 865 
 866 static void
 867 apix_dispatch_lowlevel(uint_t vector, uint_t oldipl)
 868 {
 869         struct cpu *cpu = CPU;
 870 
 871         apix_dispatch_by_vector(vector);
 872 
 873         /*
 874          * Must run intr_thread_epilog() on the interrupt thread stack, since
 875          * there may not be a return from it if the interrupt thread blocked.
 876          */
 877         apix_intr_thread_epilog(cpu, oldipl);
 878 }
 879 
 880 /*
 881  * Interrupt service routine, called with interrupts disabled.
 882  */
 883 void
 884 apix_do_interrupt(struct regs *rp, trap_trace_rec_t *ttp)
 885 {
 886         struct cpu *cpu = CPU;
 887         int vector = rp->r_trapno, newipl, oldipl = cpu->cpu_pri, ret;
 888         apix_vector_t *vecp = NULL;
 889 
 890 #ifdef TRAPTRACE
 891         ttp->ttr_marker = TT_INTERRUPT;
 892         ttp->ttr_cpuid = cpu->cpu_id;
 893         ttp->ttr_ipl = 0xff;
 894         ttp->ttr_pri = (uchar_t)oldipl;
 895         ttp->ttr_spl = cpu->cpu_base_spl;
 896         ttp->ttr_vector = 0xff;
 897 #endif  /* TRAPTRACE */
 898 
 899         cpu_idle_exit(CPU_IDLE_CB_FLAG_INTR);
 900 
 901         ++*(uint16_t *)&cpu->cpu_m.mcpu_istamp;
 902 
 903         /*
 904          * If it's a softint go do it now.
 905          */
 906         if (rp->r_trapno == T_SOFTINT) {
 907                 /*
 908                  * It might be the case that when an interrupt is triggered,
 909                  * the spl is raised to high by splhigh(). Later when do_splx()
 910                  * is called to restore the spl, both hardware and software
 911                  * interrupt pending flags are check and an SOFTINT is faked
 912                  * accordingly.
 913                  */
 914                 (void) apix_do_pending_hilevel(cpu, rp);
 915                 (void) apix_do_pending_hardint(cpu, rp);
 916                 (void) apix_do_softint(rp);
 917                 ASSERT(!interrupts_enabled());
 918 #ifdef TRAPTRACE
 919                 ttp->ttr_vector = T_SOFTINT;
 920 #endif
 921                 /*
 922                  * We need to check again for pending interrupts that may have
 923                  * arrived while the softint was running.
 924                  */
 925                 goto do_pending;
 926         }
 927 
 928         /*
 929          * Send EOI to local APIC
 930          */
 931         newipl = (*setlvl)(oldipl, (int *)&rp->r_trapno);
 932 #ifdef TRAPTRACE
 933         ttp->ttr_ipl = (uchar_t)newipl;
 934 #endif  /* TRAPTRACE */
 935 
 936         /*
 937          * Bail if it is a spurious interrupt
 938          */
 939         if (newipl == -1)
 940                 return;
 941 
 942         vector = rp->r_trapno;
 943         vecp = xv_vector(cpu->cpu_id, vector);
 944 #ifdef TRAPTRACE
 945         ttp->ttr_vector = (short)vector;
 946 #endif  /* TRAPTRACE */
 947 
 948         /*
 949          * Direct dispatch for IPI, MSI, MSI-X
 950          */
 951         if (vecp && vecp->v_type != APIX_TYPE_FIXED &&
 952             newipl > MAX(oldipl, cpu->cpu_base_spl)) {
 953                 caddr_t newsp;
 954 
 955                 if (INTR_PENDING(apixs[cpu->cpu_id], newipl)) {
 956                         /*
 957                          * There are already vectors pending at newipl,
 958                          * queue this one and fall through to process
 959                          * all pending.
 960                          */
 961                         apix_add_pending_hardint(vector);
 962                 } else if (newipl > LOCK_LEVEL) {
 963                         if (apix_hilevel_intr_prolog(cpu, newipl, oldipl, rp)
 964                             == 0) {
 965                                 newsp = cpu->cpu_intr_stack;
 966                                 switch_sp_and_call(newsp, apix_dispatch_hilevel,
 967                                     vector, 0);
 968                         } else {
 969                                 apix_dispatch_hilevel(vector, 0);
 970                         }
 971                         (void) apix_hilevel_intr_epilog(cpu, oldipl);
 972                 } else {
 973                         newsp = apix_intr_thread_prolog(cpu, newipl,
 974                             (caddr_t)rp);
 975                         switch_sp_and_call(newsp, apix_dispatch_lowlevel,
 976                             vector, oldipl);
 977                 }
 978         } else {
 979                 /* Add to per-pil pending queue */
 980                 apix_add_pending_hardint(vector);
 981                 if (newipl <= MAX(oldipl, cpu->cpu_base_spl) ||
 982                     !apixs[cpu->cpu_id]->x_intr_pending)
 983                         return;
 984         }
 985 
 986 do_pending:
 987         if (apix_do_pending_hilevel(cpu, rp) < 0)
 988                 return;
 989 
 990         do {
 991                 ret = apix_do_pending_hardint(cpu, rp);
 992 
 993                 /*
 994                  * Deliver any pending soft interrupts.
 995                  */
 996                 (void) apix_do_softint(rp);
 997         } while (!ret && LOWLEVEL_PENDING(cpu));
 998 }