Print this page
10924 Need mitigation of L1TF (CVE-2018-3646)
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Peter Tribble <peter.tribble@gmail.com>

*** 21,30 **** --- 21,34 ---- /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ + /* + * Copyright (c) 2018, Joyent, Inc. All rights reserved. + */ + /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ #include <sys/types.h>
*** 54,63 **** --- 58,68 ---- #include <sys/schedctl.h> #include <sys/atomic.h> #include <sys/dtrace.h> #include <sys/sdt.h> #include <sys/archsystm.h> + #include <sys/ht.h> #include <vm/as.h> #define BOUND_CPU 0x1 #define BOUND_PARTITION 0x2
*** 1113,1131 **** * appear at the end of resume(), because we may not * return here */ } - #define CPU_IDLING(pri) ((pri) == -1) - static void cpu_resched(cpu_t *cp, pri_t tpri) { int call_poke_cpu = 0; pri_t cpupri = cp->cpu_dispatch_pri; ! if (!CPU_IDLING(cpupri) && (cpupri < tpri)) { TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED, "CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri); if (tpri >= upreemptpri && cp->cpu_runrun == 0) { cp->cpu_runrun = 1; aston(cp->cpu_dispthread); --- 1118,1134 ---- * appear at the end of resume(), because we may not * return here */ } static void cpu_resched(cpu_t *cp, pri_t tpri) { int call_poke_cpu = 0; pri_t cpupri = cp->cpu_dispatch_pri; ! if (cpupri != CPU_IDLE_PRI && cpupri < tpri) { TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED, "CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri); if (tpri >= upreemptpri && cp->cpu_runrun == 0) { cp->cpu_runrun = 1; aston(cp->cpu_dispthread);
*** 1217,1237 **** } /* * We'll generally let this thread continue to run where * it last ran...but will consider migration if: ! * - We thread probably doesn't have much cache warmth. * - The CPU where it last ran is the target of an offline * request. ! * - The thread last ran outside it's home lgroup. */ if ((!THREAD_HAS_CACHE_WARMTH(tp)) || ! (tp->t_cpu == cpu_inmotion)) { ! cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, NULL); ! } else if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) { ! cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, ! self ? tp->t_cpu : NULL); } else { cp = tp->t_cpu; } if (tp->t_cpupart == cp->cpu_part) { --- 1220,1240 ---- } /* * We'll generally let this thread continue to run where * it last ran...but will consider migration if: ! * - The thread probably doesn't have much cache warmth. ! * - HT exclusion would prefer us to run elsewhere * - The CPU where it last ran is the target of an offline * request. ! * - The thread last ran outside its home lgroup. */ if ((!THREAD_HAS_CACHE_WARMTH(tp)) || ! !ht_should_run(tp, tp->t_cpu) || ! (tp->t_cpu == cpu_inmotion) || ! !LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) { ! cp = disp_lowpri_cpu(tp->t_cpu, tp, tpri); } else { cp = tp->t_cpu; } if (tp->t_cpupart == cp->cpu_part) {
*** 1256,1266 **** newcp = cp->cpu_next_part; } else if ((newcp = cp->cpu_next_lpl) == cp) { newcp = cp->cpu_next_part; } ! if (RUNQ_LEN(newcp, tpri) < qlen) { DTRACE_PROBE3(runq__balance, kthread_t *, tp, cpu_t *, cp, cpu_t *, newcp); cp = newcp; } --- 1259,1270 ---- newcp = cp->cpu_next_part; } else if ((newcp = cp->cpu_next_lpl) == cp) { newcp = cp->cpu_next_part; } ! if (ht_should_run(tp, newcp) && ! RUNQ_LEN(newcp, tpri) < qlen) { DTRACE_PROBE3(runq__balance, kthread_t *, tp, cpu_t *, cp, cpu_t *, newcp); cp = newcp; }
*** 1267,1278 **** } } else { /* * Migrate to a cpu in the new partition. */ ! cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, ! tp->t_lpl, tp->t_pri, NULL); } ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); } else { /* * It is possible that t_weakbound_cpu != t_bound_cpu (for --- 1271,1282 ---- } } else { /* * Migrate to a cpu in the new partition. */ ! cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, tp, ! tp->t_pri); } ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); } else { /* * It is possible that t_weakbound_cpu != t_bound_cpu (for
*** 1405,1437 **** cp = tp->t_cpu; if (tp->t_cpupart == cp->cpu_part) { /* * We'll generally let this thread continue to run * where it last ran, but will consider migration if: ! * - The thread last ran outside it's home lgroup. * - The CPU where it last ran is the target of an * offline request (a thread_nomigrate() on the in * motion CPU relies on this when forcing a preempt). * - The thread isn't the highest priority thread where * it last ran, and it is considered not likely to * have significant cache warmth. */ ! if ((!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp)) || ! (cp == cpu_inmotion)) { ! cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, ! (tp == curthread) ? cp : NULL); ! } else if ((tpri < cp->cpu_disp->disp_maxrunpri) && ! (!THREAD_HAS_CACHE_WARMTH(tp))) { ! cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, ! NULL); } } else { /* * Migrate to a cpu in the new partition. */ cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, ! tp->t_lpl, tp->t_pri, NULL); } ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); } else { /* * It is possible that t_weakbound_cpu != t_bound_cpu (for --- 1409,1438 ---- cp = tp->t_cpu; if (tp->t_cpupart == cp->cpu_part) { /* * We'll generally let this thread continue to run * where it last ran, but will consider migration if: ! * - The thread last ran outside its home lgroup. * - The CPU where it last ran is the target of an * offline request (a thread_nomigrate() on the in * motion CPU relies on this when forcing a preempt). * - The thread isn't the highest priority thread where * it last ran, and it is considered not likely to * have significant cache warmth. */ ! if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp) || ! cp == cpu_inmotion || ! (tpri < cp->cpu_disp->disp_maxrunpri && ! !THREAD_HAS_CACHE_WARMTH(tp))) { ! cp = disp_lowpri_cpu(tp->t_cpu, tp, tpri); } } else { /* * Migrate to a cpu in the new partition. */ cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, ! tp, tp->t_pri); } ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); } else { /* * It is possible that t_weakbound_cpu != t_bound_cpu (for
*** 1578,1588 **** cp = tp->t_cpu; if (tp->t_cpupart != cp->cpu_part) { /* migrate to a cpu in the new partition */ cp = tp->t_cpupart->cp_cpulist; } ! cp = disp_lowpri_cpu(cp, tp->t_lpl, tp->t_pri, NULL); disp_lock_enter_high(&cp->cpu_disp->disp_lock); ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); #ifndef NPROBE /* Kernel probe */ --- 1579,1589 ---- cp = tp->t_cpu; if (tp->t_cpupart != cp->cpu_part) { /* migrate to a cpu in the new partition */ cp = tp->t_cpupart->cp_cpulist; } ! cp = disp_lowpri_cpu(cp, tp, tp->t_pri); disp_lock_enter_high(&cp->cpu_disp->disp_lock); ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); #ifndef NPROBE /* Kernel probe */
*** 2551,2634 **** } disp_lock_exit(&dp->disp_lock); } /* ! * disp_lowpri_cpu - find CPU running the lowest priority thread. ! * The hint passed in is used as a starting point so we don't favor ! * CPU 0 or any other CPU. The caller should pass in the most recently ! * used CPU for the thread. * ! * The lgroup and priority are used to determine the best CPU to run on ! * in a NUMA machine. The lgroup specifies which CPUs are closest while ! * the thread priority will indicate whether the thread will actually run ! * there. To pick the best CPU, the CPUs inside and outside of the given ! * lgroup which are running the lowest priority threads are found. The ! * remote CPU is chosen only if the thread will not run locally on a CPU ! * within the lgroup, but will run on the remote CPU. If the thread ! * cannot immediately run on any CPU, the best local CPU will be chosen. * ! * The lpl specified also identifies the cpu partition from which ! * disp_lowpri_cpu should select a CPU. * ! * curcpu is used to indicate that disp_lowpri_cpu is being called on ! * behalf of the current thread. (curthread is looking for a new cpu) ! * In this case, cpu_dispatch_pri for this thread's cpu should be ! * ignored. * ! * If a cpu is the target of an offline request then try to avoid it. * ! * This function must be called at either high SPL, or with preemption ! * disabled, so that the "hint" CPU cannot be removed from the online ! * CPU list while we are traversing it. */ cpu_t * ! disp_lowpri_cpu(cpu_t *hint, lpl_t *lpl, pri_t tpri, cpu_t *curcpu) { cpu_t *bestcpu; cpu_t *besthomecpu; cpu_t *cp, *cpstart; - pri_t bestpri; - pri_t cpupri; - klgrpset_t done; - klgrpset_t cur_set; lpl_t *lpl_iter, *lpl_leaf; - int i; - /* - * Scan for a CPU currently running the lowest priority thread. - * Cannot get cpu_lock here because it is adaptive. - * We do not require lock on CPU list. - */ ASSERT(hint != NULL); ! ASSERT(lpl != NULL); ! ASSERT(lpl->lpl_ncpu > 0); - /* - * First examine local CPUs. Note that it's possible the hint CPU - * passed in in remote to the specified home lgroup. If our priority - * isn't sufficient enough such that we can run immediately at home, - * then examine CPUs remote to our home lgroup. - * We would like to give preference to CPUs closest to "home". - * If we can't find a CPU where we'll run at a given level - * of locality, we expand our search to include the next level. - */ bestcpu = besthomecpu = NULL; klgrpset_clear(done); - /* start with lpl we were passed */ ! lpl_iter = lpl; do { - bestpri = SHRT_MAX; klgrpset_clear(cur_set); ! for (i = 0; i < lpl_iter->lpl_nrset; i++) { lpl_leaf = lpl_iter->lpl_rset[i]; if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid)) continue; klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid); --- 2552,2640 ---- } disp_lock_exit(&dp->disp_lock); } /* ! * Return a score rating this CPU for running this thread: lower is better. * ! * If curthread is looking for a new CPU, then we ignore cpu_dispatch_pri for ! * curcpu (as that's our own priority). * ! * If a cpu is the target of an offline request, then try to avoid it. * ! * Otherwise we'll use double the effective dispatcher priority for the CPU. * ! * We do this so ht_adjust_cpu_score() can increment the score if needed, ! * without ending up over-riding a dispatcher priority. ! */ ! static pri_t ! cpu_score(cpu_t *cp, kthread_t *tp) ! { ! pri_t score; ! ! if (tp == curthread && cp == curthread->t_cpu) ! score = 2 * CPU_IDLE_PRI; ! else if (cp == cpu_inmotion) ! score = SHRT_MAX; ! else ! score = 2 * cp->cpu_dispatch_pri; ! ! if (2 * cp->cpu_disp->disp_maxrunpri > score) ! score = 2 * cp->cpu_disp->disp_maxrunpri; ! if (2 * cp->cpu_chosen_level > score) ! score = 2 * cp->cpu_chosen_level; ! ! return (ht_adjust_cpu_score(tp, cp, score)); ! } ! ! /* ! * disp_lowpri_cpu - find a suitable CPU to run the given thread. * ! * We are looking for a CPU with an effective dispatch priority lower than the ! * thread's, so that the thread will run immediately rather than be enqueued. ! * For NUMA locality, we prefer "home" CPUs within the thread's ->t_lpl group. ! * If we don't find an available CPU there, we will expand our search to include ! * wider locality levels. (Note these groups are already divided by CPU ! * partition.) ! * ! * If the thread cannot immediately run on *any* CPU, we'll enqueue ourselves on ! * the best home CPU we found. ! * ! * The hint passed in is used as a starting point so we don't favor CPU 0 or any ! * other CPU. The caller should pass in the most recently used CPU for the ! * thread; it's of course possible that this CPU isn't in the home lgroup. ! * ! * This function must be called at either high SPL, or with preemption disabled, ! * so that the "hint" CPU cannot be removed from the online CPU list while we ! * are traversing it. */ cpu_t * ! disp_lowpri_cpu(cpu_t *hint, kthread_t *tp, pri_t tpri) { cpu_t *bestcpu; cpu_t *besthomecpu; cpu_t *cp, *cpstart; klgrpset_t done; lpl_t *lpl_iter, *lpl_leaf; ASSERT(hint != NULL); ! ASSERT(tp->t_lpl->lpl_ncpu > 0); bestcpu = besthomecpu = NULL; klgrpset_clear(done); ! lpl_iter = tp->t_lpl; do { + pri_t best = SHRT_MAX; + klgrpset_t cur_set; klgrpset_clear(cur_set); ! for (int i = 0; i < lpl_iter->lpl_nrset; i++) { lpl_leaf = lpl_iter->lpl_rset[i]; if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid)) continue; klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid);
*** 2637,2674 **** cp = cpstart = hint; else cp = cpstart = lpl_leaf->lpl_cpus; do { ! if (cp == curcpu) ! cpupri = -1; ! else if (cp == cpu_inmotion) ! cpupri = SHRT_MAX; ! else ! cpupri = cp->cpu_dispatch_pri; ! if (cp->cpu_disp->disp_maxrunpri > cpupri) ! cpupri = cp->cpu_disp->disp_maxrunpri; ! if (cp->cpu_chosen_level > cpupri) ! cpupri = cp->cpu_chosen_level; ! if (cpupri < bestpri) { ! if (CPU_IDLING(cpupri)) { ! ASSERT((cp->cpu_flags & ! CPU_QUIESCED) == 0); ! return (cp); ! } bestcpu = cp; ! bestpri = cpupri; } } while ((cp = cp->cpu_next_lpl) != cpstart); } ! if (bestcpu && (tpri > bestpri)) { ! ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0); ! return (bestcpu); ! } if (besthomecpu == NULL) besthomecpu = bestcpu; /* * Add the lgrps we just considered to the "done" set */ klgrpset_or(done, cur_set); --- 2643,2671 ---- cp = cpstart = hint; else cp = cpstart = lpl_leaf->lpl_cpus; do { ! pri_t score = cpu_score(cp, tp); ! ! if (score < best) { ! best = score; bestcpu = cp; ! ! /* An idle CPU: we're done. */ ! if (score / 2 == CPU_IDLE_PRI) ! goto out; } } while ((cp = cp->cpu_next_lpl) != cpstart); } ! if (bestcpu != NULL && tpri > (best / 2)) ! goto out; ! if (besthomecpu == NULL) besthomecpu = bestcpu; + /* * Add the lgrps we just considered to the "done" set */ klgrpset_or(done, cur_set);
*** 2676,2687 **** /* * The specified priority isn't high enough to run immediately * anywhere, so just return the best CPU from the home lgroup. */ ! ASSERT((besthomecpu->cpu_flags & CPU_QUIESCED) == 0); ! return (besthomecpu); } /* * This routine provides the generic idle cpu function for all processors. * If a processor has some specific code to execute when idle (say, to stop --- 2673,2687 ---- /* * The specified priority isn't high enough to run immediately * anywhere, so just return the best CPU from the home lgroup. */ ! bestcpu = besthomecpu; ! ! out: ! ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0); ! return (bestcpu); } /* * This routine provides the generic idle cpu function for all processors. * If a processor has some specific code to execute when idle (say, to stop
*** 2696,2701 **** --- 2696,2717 ---- /*ARGSUSED*/ static void generic_enq_thread(cpu_t *cpu, int bound) { + } + + cpu_t * + disp_choose_best_cpu(void) + { + kthread_t *t = curthread; + cpu_t *curcpu = CPU; + + ASSERT(t->t_preempt > 0); + ASSERT(t->t_state == TS_ONPROC); + ASSERT(t->t_schedflag & TS_VCPU); + + if (ht_should_run(t, curcpu)) + return (curcpu); + + return (disp_lowpri_cpu(curcpu, t, t->t_pri)); }