Print this page
OS-7125 Need mitigation of L1TF (CVE-2018-3646)
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>


 449 #include <sys/trap.h>
 450 #include <sys/ftrace.h>
 451 #include <sys/traptrace.h>
 452 #include <sys/clock.h>
 453 #include <sys/panic.h>
 454 #include <sys/disp.h>
 455 #include <vm/seg_kp.h>
 456 #include <sys/stack.h>
 457 #include <sys/sysmacros.h>
 458 #include <sys/cmn_err.h>
 459 #include <sys/kstat.h>
 460 #include <sys/smp_impldefs.h>
 461 #include <sys/pool_pset.h>
 462 #include <sys/zone.h>
 463 #include <sys/bitmap.h>
 464 #include <sys/archsystm.h>
 465 #include <sys/machsystm.h>
 466 #include <sys/ontrap.h>
 467 #include <sys/x86_archext.h>
 468 #include <sys/promif.h>

 469 #include <vm/hat_i86.h>
 470 #if defined(__xpv)
 471 #include <sys/hypervisor.h>
 472 #endif
 473 
 474 #if defined(__amd64) && !defined(__xpv)
 475 /* If this fails, then the padding numbers in machcpuvar.h are wrong. */
 476 CTASSERT((offsetof(cpu_t, cpu_m) + offsetof(struct machcpu, mcpu_pad)) <
 477     MMU_PAGESIZE);
 478 CTASSERT((offsetof(cpu_t, cpu_m) + offsetof(struct machcpu, mcpu_kpti)) >=
 479     MMU_PAGESIZE);
 480 CTASSERT((offsetof(cpu_t, cpu_m) + offsetof(struct machcpu, mcpu_kpti_dbg)) <
 481     2 * MMU_PAGESIZE);
 482 CTASSERT((offsetof(cpu_t, cpu_m) + offsetof(struct machcpu, mcpu_pad2)) <
 483     2 * MMU_PAGESIZE);
 484 CTASSERT(((sizeof (struct kpti_frame)) & 0xF) == 0);
 485 CTASSERT(((offsetof(cpu_t, cpu_m) +
 486     offsetof(struct machcpu, mcpu_kpti_dbg)) & 0xF) == 0);
 487 CTASSERT((offsetof(struct kpti_frame, kf_tr_rsp) & 0xF) == 0);

 488 #endif
 489 
 490 #if defined(__xpv) && defined(DEBUG)
 491 
 492 /*
 493  * This panic message is intended as an aid to interrupt debugging.
 494  *
 495  * The associated assertion tests the condition of enabling
 496  * events when events are already enabled.  The implication
 497  * being that whatever code the programmer thought was
 498  * protected by having events disabled until the second
 499  * enable happened really wasn't protected at all ..
 500  */
 501 
 502 int stistipanic = 1;    /* controls the debug panic check */
 503 const char *stistimsg = "stisti";
 504 ulong_t laststi[NCPU];
 505 
 506 /*
 507  * This variable tracks the last place events were disabled on each cpu


 583                  * there is no need to check for an interrupt thread.  That
 584                  * will be done by the lowest priority high-level interrupt
 585                  * active.
 586                  */
 587         } else {
 588                 kthread_t *t = cpu->cpu_thread;
 589 
 590                 /*
 591                  * See if we are interrupting a low-level interrupt thread.
 592                  * If so, account for its time slice only if its time stamp
 593                  * is non-zero.
 594                  */
 595                 if ((t->t_flag & T_INTR_THREAD) != 0 && t->t_intr_start != 0) {
 596                         intrtime = now - t->t_intr_start;
 597                         mcpu->intrstat[t->t_pil][0] += intrtime;
 598                         cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
 599                         t->t_intr_start = 0;
 600                 }
 601         }
 602 


 603         /*
 604          * Store starting timestamp in CPU structure for this PIL.
 605          */
 606         mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] = now;
 607 
 608         ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
 609 
 610         if (pil == 15) {
 611                 /*
 612                  * To support reentrant level 15 interrupts, we maintain a
 613                  * recursion count in the top half of cpu_intr_actv.  Only
 614                  * when this count hits zero do we clear the PIL 15 bit from
 615                  * the lower half of cpu_intr_actv.
 616                  */
 617                 uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
 618                 (*refcntp)++;
 619         }
 620 
 621         mask = cpu->cpu_intr_actv;
 622 


 687                 ASSERT(nestpil < pil);
 688                 mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)] = now;
 689                 /*
 690                  * (Another high-level interrupt is active below this one,
 691                  * so there is no need to check for an interrupt
 692                  * thread.  That will be done by the lowest priority
 693                  * high-level interrupt active.)
 694                  */
 695         } else {
 696                 /*
 697                  * Check to see if there is a low-level interrupt active.
 698                  * If so, place a starting timestamp in the thread
 699                  * structure.
 700                  */
 701                 kthread_t *t = cpu->cpu_thread;
 702 
 703                 if (t->t_flag & T_INTR_THREAD)
 704                         t->t_intr_start = now;
 705         }
 706 


 707         mcpu->mcpu_pri = oldpil;
 708         (void) (*setlvlx)(oldpil, vecnum);
 709 
 710         return (cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK);
 711 }
 712 
 713 /*
 714  * Set up the cpu, thread and interrupt thread structures for
 715  * executing an interrupt thread.  The new stack pointer of the
 716  * interrupt thread (which *must* be switched to) is returned.
 717  */
 718 static caddr_t
 719 intr_thread_prolog(struct cpu *cpu, caddr_t stackptr, uint_t pil)
 720 {
 721         struct machcpu *mcpu = &cpu->cpu_m;
 722         kthread_t *t, *volatile it;
 723         hrtime_t now = tsc_read();
 724 
 725         ASSERT(pil > 0);
 726         ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);


 749          * unlink the interrupt thread off the cpu
 750          *
 751          * Note that the code in kcpc_overflow_intr -relies- on the
 752          * ordering of events here - in particular that t->t_lwp of
 753          * the interrupt thread is set to the pinned thread *before*
 754          * curthread is changed.
 755          */
 756         it = cpu->cpu_intr_thread;
 757         cpu->cpu_intr_thread = it->t_link;
 758         it->t_intr = t;
 759         it->t_lwp = t->t_lwp;
 760 
 761         /*
 762          * (threads on the interrupt thread free list could have state
 763          * preset to TS_ONPROC, but it helps in debugging if
 764          * they're TS_FREE.)
 765          */
 766         it->t_state = TS_ONPROC;
 767 
 768         cpu->cpu_thread = it;                /* new curthread on this cpu */


 769         it->t_pil = (uchar_t)pil;
 770         it->t_pri = intr_pri + (pri_t)pil;
 771         it->t_intr_start = now;
 772 
 773         return (it->t_stk);
 774 }
 775 
 776 
 777 #ifdef DEBUG
 778 int intr_thread_cnt;
 779 #endif
 780 
 781 /*
 782  * Called with interrupts disabled
 783  */
 784 static void
 785 intr_thread_epilog(struct cpu *cpu, uint_t vec, uint_t oldpil)
 786 {
 787         struct machcpu *mcpu = &cpu->cpu_m;
 788         kthread_t *t;


 839                  */
 840                 it->t_link = cpu->cpu_intr_thread;
 841                 cpu->cpu_intr_thread = it;
 842                 swtch();
 843                 panic("intr_thread_epilog: swtch returned");
 844                 /*NOTREACHED*/
 845         }
 846 
 847         /*
 848          * Return interrupt thread to the pool
 849          */
 850         it->t_link = cpu->cpu_intr_thread;
 851         cpu->cpu_intr_thread = it;
 852         it->t_state = TS_FREE;
 853 
 854         basespl = cpu->cpu_base_spl;
 855         pil = MAX(oldpil, basespl);
 856         mcpu->mcpu_pri = pil;
 857         (*setlvlx)(pil, vec);
 858         t->t_intr_start = now;

 859         cpu->cpu_thread = t;
 860 }
 861 
 862 /*
 863  * intr_get_time() is a resource for interrupt handlers to determine how
 864  * much time has been spent handling the current interrupt. Such a function
 865  * is needed because higher level interrupts can arrive during the
 866  * processing of an interrupt.  intr_get_time() only returns time spent in the
 867  * current interrupt handler.
 868  *
 869  * The caller must be calling from an interrupt handler running at a pil
 870  * below or at lock level. Timings are not provided for high-level
 871  * interrupts.
 872  *
 873  * The first time intr_get_time() is called while handling an interrupt,
 874  * it returns the time since the interrupt handler was invoked. Subsequent
 875  * calls will return the time since the prior call to intr_get_time(). Time
 876  * is returned as ticks. Use scalehrtimef() to convert ticks to nsec.
 877  *
 878  * Theory Of Intrstat[][]:


1026          * Note that the code in kcpc_overflow_intr -relies- on the
1027          * ordering of events here - in particular that t->t_lwp of
1028          * the interrupt thread is set to the pinned thread *before*
1029          * curthread is changed.
1030          */
1031         it->t_lwp = t->t_lwp;
1032         it->t_state = TS_ONPROC;
1033 
1034         /*
1035          * Push interrupted thread onto list from new thread.
1036          * Set the new thread as the current one.
1037          * Set interrupted thread's T_SP because if it is the idle thread,
1038          * resume() may use that stack between threads.
1039          */
1040 
1041         ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
1042         t->t_sp = (uintptr_t)stackptr;
1043 
1044         it->t_intr = t;
1045         cpu->cpu_thread = it;

1046 
1047         /*
1048          * Set bit for this pil in CPU's interrupt active bitmask.
1049          */
1050         ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
1051         cpu->cpu_intr_actv |= (1 << pil);
1052 
1053         /*
1054          * Initialize thread priority level from intr_pri
1055          */
1056         it->t_pil = (uchar_t)pil;
1057         it->t_pri = (pri_t)pil + intr_pri;
1058         it->t_intr_start = now;
1059 
1060         return (it->t_stk);
1061 }
1062 
1063 static void
1064 dosoftint_epilog(struct cpu *cpu, uint_t oldpil)
1065 {


1086          * fairly simple.  Otherwise it isn't.
1087          */
1088         if ((t = it->t_intr) == NULL) {
1089                 /*
1090                  * Put thread back on the interrupt thread list.
1091                  * This was an interrupt thread, so set CPU's base SPL.
1092                  */
1093                 set_base_spl();
1094                 it->t_state = TS_FREE;
1095                 it->t_link = cpu->cpu_intr_thread;
1096                 cpu->cpu_intr_thread = it;
1097                 (void) splhigh();
1098                 sti();
1099                 swtch();
1100                 /*NOTREACHED*/
1101                 panic("dosoftint_epilog: swtch returned");
1102         }
1103         it->t_link = cpu->cpu_intr_thread;
1104         cpu->cpu_intr_thread = it;
1105         it->t_state = TS_FREE;

1106         cpu->cpu_thread = t;

1107         if (t->t_flag & T_INTR_THREAD)
1108                 t->t_intr_start = now;
1109         basespl = cpu->cpu_base_spl;
1110         pil = MAX(oldpil, basespl);
1111         mcpu->mcpu_pri = pil;
1112         (*setspl)(pil);
1113 }
1114 
1115 
1116 /*
1117  * Make the interrupted thread 'to' be runnable.
1118  *
1119  * Since t->t_sp has already been saved, t->t_pc is all
1120  * that needs to be set in this function.
1121  *
1122  * Returns the interrupt level of the interrupt thread.
1123  */
1124 int
1125 intr_passivate(
1126         kthread_t *it,          /* interrupt thread */




 449 #include <sys/trap.h>
 450 #include <sys/ftrace.h>
 451 #include <sys/traptrace.h>
 452 #include <sys/clock.h>
 453 #include <sys/panic.h>
 454 #include <sys/disp.h>
 455 #include <vm/seg_kp.h>
 456 #include <sys/stack.h>
 457 #include <sys/sysmacros.h>
 458 #include <sys/cmn_err.h>
 459 #include <sys/kstat.h>
 460 #include <sys/smp_impldefs.h>
 461 #include <sys/pool_pset.h>
 462 #include <sys/zone.h>
 463 #include <sys/bitmap.h>
 464 #include <sys/archsystm.h>
 465 #include <sys/machsystm.h>
 466 #include <sys/ontrap.h>
 467 #include <sys/x86_archext.h>
 468 #include <sys/promif.h>
 469 #include <sys/ht.h>
 470 #include <vm/hat_i86.h>
 471 #if defined(__xpv)
 472 #include <sys/hypervisor.h>
 473 #endif
 474 
 475 /* If these fail, then the padding numbers in machcpuvar.h are wrong. */
 476 #if !defined(__xpv)
 477 #define MCOFF(member)   \
 478         (offsetof(cpu_t, cpu_m) + offsetof(struct machcpu, member))
 479 CTASSERT(MCOFF(mcpu_pad) == MACHCPU_SIZE);
 480 CTASSERT(MCOFF(mcpu_pad2) == MMU_PAGESIZE);
 481 CTASSERT((MCOFF(mcpu_kpti) & 0xF) == 0);



 482 CTASSERT(((sizeof (struct kpti_frame)) & 0xF) == 0);


 483 CTASSERT((offsetof(struct kpti_frame, kf_tr_rsp) & 0xF) == 0);
 484 CTASSERT(MCOFF(mcpu_pad3) < 2 * MMU_PAGESIZE);
 485 #endif
 486 
 487 #if defined(__xpv) && defined(DEBUG)
 488 
 489 /*
 490  * This panic message is intended as an aid to interrupt debugging.
 491  *
 492  * The associated assertion tests the condition of enabling
 493  * events when events are already enabled.  The implication
 494  * being that whatever code the programmer thought was
 495  * protected by having events disabled until the second
 496  * enable happened really wasn't protected at all ..
 497  */
 498 
 499 int stistipanic = 1;    /* controls the debug panic check */
 500 const char *stistimsg = "stisti";
 501 ulong_t laststi[NCPU];
 502 
 503 /*
 504  * This variable tracks the last place events were disabled on each cpu


 580                  * there is no need to check for an interrupt thread.  That
 581                  * will be done by the lowest priority high-level interrupt
 582                  * active.
 583                  */
 584         } else {
 585                 kthread_t *t = cpu->cpu_thread;
 586 
 587                 /*
 588                  * See if we are interrupting a low-level interrupt thread.
 589                  * If so, account for its time slice only if its time stamp
 590                  * is non-zero.
 591                  */
 592                 if ((t->t_flag & T_INTR_THREAD) != 0 && t->t_intr_start != 0) {
 593                         intrtime = now - t->t_intr_start;
 594                         mcpu->intrstat[t->t_pil][0] += intrtime;
 595                         cpu->cpu_intracct[cpu->cpu_mstate] += intrtime;
 596                         t->t_intr_start = 0;
 597                 }
 598         }
 599 
 600         ht_begin_intr(pil);
 601 
 602         /*
 603          * Store starting timestamp in CPU structure for this PIL.
 604          */
 605         mcpu->pil_high_start[pil - (LOCK_LEVEL + 1)] = now;
 606 
 607         ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
 608 
 609         if (pil == 15) {
 610                 /*
 611                  * To support reentrant level 15 interrupts, we maintain a
 612                  * recursion count in the top half of cpu_intr_actv.  Only
 613                  * when this count hits zero do we clear the PIL 15 bit from
 614                  * the lower half of cpu_intr_actv.
 615                  */
 616                 uint16_t *refcntp = (uint16_t *)&cpu->cpu_intr_actv + 1;
 617                 (*refcntp)++;
 618         }
 619 
 620         mask = cpu->cpu_intr_actv;
 621 


 686                 ASSERT(nestpil < pil);
 687                 mcpu->pil_high_start[nestpil - (LOCK_LEVEL + 1)] = now;
 688                 /*
 689                  * (Another high-level interrupt is active below this one,
 690                  * so there is no need to check for an interrupt
 691                  * thread.  That will be done by the lowest priority
 692                  * high-level interrupt active.)
 693                  */
 694         } else {
 695                 /*
 696                  * Check to see if there is a low-level interrupt active.
 697                  * If so, place a starting timestamp in the thread
 698                  * structure.
 699                  */
 700                 kthread_t *t = cpu->cpu_thread;
 701 
 702                 if (t->t_flag & T_INTR_THREAD)
 703                         t->t_intr_start = now;
 704         }
 705 
 706         ht_end_intr();
 707 
 708         mcpu->mcpu_pri = oldpil;
 709         (void) (*setlvlx)(oldpil, vecnum);
 710 
 711         return (cpu->cpu_intr_actv & CPU_INTR_ACTV_HIGH_LEVEL_MASK);
 712 }
 713 
 714 /*
 715  * Set up the cpu, thread and interrupt thread structures for
 716  * executing an interrupt thread.  The new stack pointer of the
 717  * interrupt thread (which *must* be switched to) is returned.
 718  */
 719 static caddr_t
 720 intr_thread_prolog(struct cpu *cpu, caddr_t stackptr, uint_t pil)
 721 {
 722         struct machcpu *mcpu = &cpu->cpu_m;
 723         kthread_t *t, *volatile it;
 724         hrtime_t now = tsc_read();
 725 
 726         ASSERT(pil > 0);
 727         ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);


 750          * unlink the interrupt thread off the cpu
 751          *
 752          * Note that the code in kcpc_overflow_intr -relies- on the
 753          * ordering of events here - in particular that t->t_lwp of
 754          * the interrupt thread is set to the pinned thread *before*
 755          * curthread is changed.
 756          */
 757         it = cpu->cpu_intr_thread;
 758         cpu->cpu_intr_thread = it->t_link;
 759         it->t_intr = t;
 760         it->t_lwp = t->t_lwp;
 761 
 762         /*
 763          * (threads on the interrupt thread free list could have state
 764          * preset to TS_ONPROC, but it helps in debugging if
 765          * they're TS_FREE.)
 766          */
 767         it->t_state = TS_ONPROC;
 768 
 769         cpu->cpu_thread = it;                /* new curthread on this cpu */
 770         ht_begin_intr(pil);
 771 
 772         it->t_pil = (uchar_t)pil;
 773         it->t_pri = intr_pri + (pri_t)pil;
 774         it->t_intr_start = now;
 775 
 776         return (it->t_stk);
 777 }
 778 
 779 
 780 #ifdef DEBUG
 781 int intr_thread_cnt;
 782 #endif
 783 
 784 /*
 785  * Called with interrupts disabled
 786  */
 787 static void
 788 intr_thread_epilog(struct cpu *cpu, uint_t vec, uint_t oldpil)
 789 {
 790         struct machcpu *mcpu = &cpu->cpu_m;
 791         kthread_t *t;


 842                  */
 843                 it->t_link = cpu->cpu_intr_thread;
 844                 cpu->cpu_intr_thread = it;
 845                 swtch();
 846                 panic("intr_thread_epilog: swtch returned");
 847                 /*NOTREACHED*/
 848         }
 849 
 850         /*
 851          * Return interrupt thread to the pool
 852          */
 853         it->t_link = cpu->cpu_intr_thread;
 854         cpu->cpu_intr_thread = it;
 855         it->t_state = TS_FREE;
 856 
 857         basespl = cpu->cpu_base_spl;
 858         pil = MAX(oldpil, basespl);
 859         mcpu->mcpu_pri = pil;
 860         (*setlvlx)(pil, vec);
 861         t->t_intr_start = now;
 862         ht_end_intr();
 863         cpu->cpu_thread = t;
 864 }
 865 
 866 /*
 867  * intr_get_time() is a resource for interrupt handlers to determine how
 868  * much time has been spent handling the current interrupt. Such a function
 869  * is needed because higher level interrupts can arrive during the
 870  * processing of an interrupt.  intr_get_time() only returns time spent in the
 871  * current interrupt handler.
 872  *
 873  * The caller must be calling from an interrupt handler running at a pil
 874  * below or at lock level. Timings are not provided for high-level
 875  * interrupts.
 876  *
 877  * The first time intr_get_time() is called while handling an interrupt,
 878  * it returns the time since the interrupt handler was invoked. Subsequent
 879  * calls will return the time since the prior call to intr_get_time(). Time
 880  * is returned as ticks. Use scalehrtimef() to convert ticks to nsec.
 881  *
 882  * Theory Of Intrstat[][]:


1030          * Note that the code in kcpc_overflow_intr -relies- on the
1031          * ordering of events here - in particular that t->t_lwp of
1032          * the interrupt thread is set to the pinned thread *before*
1033          * curthread is changed.
1034          */
1035         it->t_lwp = t->t_lwp;
1036         it->t_state = TS_ONPROC;
1037 
1038         /*
1039          * Push interrupted thread onto list from new thread.
1040          * Set the new thread as the current one.
1041          * Set interrupted thread's T_SP because if it is the idle thread,
1042          * resume() may use that stack between threads.
1043          */
1044 
1045         ASSERT(SA((uintptr_t)stackptr) == (uintptr_t)stackptr);
1046         t->t_sp = (uintptr_t)stackptr;
1047 
1048         it->t_intr = t;
1049         cpu->cpu_thread = it;
1050         ht_begin_intr(pil);
1051 
1052         /*
1053          * Set bit for this pil in CPU's interrupt active bitmask.
1054          */
1055         ASSERT((cpu->cpu_intr_actv & (1 << pil)) == 0);
1056         cpu->cpu_intr_actv |= (1 << pil);
1057 
1058         /*
1059          * Initialize thread priority level from intr_pri
1060          */
1061         it->t_pil = (uchar_t)pil;
1062         it->t_pri = (pri_t)pil + intr_pri;
1063         it->t_intr_start = now;
1064 
1065         return (it->t_stk);
1066 }
1067 
1068 static void
1069 dosoftint_epilog(struct cpu *cpu, uint_t oldpil)
1070 {


1091          * fairly simple.  Otherwise it isn't.
1092          */
1093         if ((t = it->t_intr) == NULL) {
1094                 /*
1095                  * Put thread back on the interrupt thread list.
1096                  * This was an interrupt thread, so set CPU's base SPL.
1097                  */
1098                 set_base_spl();
1099                 it->t_state = TS_FREE;
1100                 it->t_link = cpu->cpu_intr_thread;
1101                 cpu->cpu_intr_thread = it;
1102                 (void) splhigh();
1103                 sti();
1104                 swtch();
1105                 /*NOTREACHED*/
1106                 panic("dosoftint_epilog: swtch returned");
1107         }
1108         it->t_link = cpu->cpu_intr_thread;
1109         cpu->cpu_intr_thread = it;
1110         it->t_state = TS_FREE;
1111         ht_end_intr();
1112         cpu->cpu_thread = t;
1113 
1114         if (t->t_flag & T_INTR_THREAD)
1115                 t->t_intr_start = now;
1116         basespl = cpu->cpu_base_spl;
1117         pil = MAX(oldpil, basespl);
1118         mcpu->mcpu_pri = pil;
1119         (*setspl)(pil);
1120 }
1121 
1122 
1123 /*
1124  * Make the interrupted thread 'to' be runnable.
1125  *
1126  * Since t->t_sp has already been saved, t->t_pc is all
1127  * that needs to be set in this function.
1128  *
1129  * Returns the interrupt level of the interrupt thread.
1130  */
1131 int
1132 intr_passivate(
1133         kthread_t *it,          /* interrupt thread */