Print this page
OS-7125 Need mitigation of L1TF (CVE-2018-3646)
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
*** 21,30 ****
--- 21,34 ----
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+ /*
+ * Copyright (c) 2018, Joyent, Inc. All rights reserved.
+ */
+
/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
/* All Rights Reserved */
#include <sys/types.h>
*** 54,63 ****
--- 58,68 ----
#include <sys/schedctl.h>
#include <sys/atomic.h>
#include <sys/dtrace.h>
#include <sys/sdt.h>
#include <sys/archsystm.h>
+ #include <sys/ht.h>
#include <vm/as.h>
#define BOUND_CPU 0x1
#define BOUND_PARTITION 0x2
*** 1113,1131 ****
* appear at the end of resume(), because we may not
* return here
*/
}
- #define CPU_IDLING(pri) ((pri) == -1)
-
static void
cpu_resched(cpu_t *cp, pri_t tpri)
{
int call_poke_cpu = 0;
pri_t cpupri = cp->cpu_dispatch_pri;
! if (!CPU_IDLING(cpupri) && (cpupri < tpri)) {
TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED,
"CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri);
if (tpri >= upreemptpri && cp->cpu_runrun == 0) {
cp->cpu_runrun = 1;
aston(cp->cpu_dispthread);
--- 1118,1134 ----
* appear at the end of resume(), because we may not
* return here
*/
}
static void
cpu_resched(cpu_t *cp, pri_t tpri)
{
int call_poke_cpu = 0;
pri_t cpupri = cp->cpu_dispatch_pri;
! if (cpupri != CPU_IDLE_PRI && cpupri < tpri) {
TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED,
"CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri);
if (tpri >= upreemptpri && cp->cpu_runrun == 0) {
cp->cpu_runrun = 1;
aston(cp->cpu_dispthread);
*** 1217,1237 ****
}
/*
* We'll generally let this thread continue to run where
* it last ran...but will consider migration if:
! * - We thread probably doesn't have much cache warmth.
* - The CPU where it last ran is the target of an offline
* request.
! * - The thread last ran outside it's home lgroup.
*/
if ((!THREAD_HAS_CACHE_WARMTH(tp)) ||
! (tp->t_cpu == cpu_inmotion)) {
! cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, NULL);
! } else if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) {
! cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
! self ? tp->t_cpu : NULL);
} else {
cp = tp->t_cpu;
}
if (tp->t_cpupart == cp->cpu_part) {
--- 1220,1240 ----
}
/*
* We'll generally let this thread continue to run where
* it last ran...but will consider migration if:
! * - The thread probably doesn't have much cache warmth.
! * - HT exclusion would prefer us to run elsewhere
* - The CPU where it last ran is the target of an offline
* request.
! * - The thread last ran outside its home lgroup.
*/
if ((!THREAD_HAS_CACHE_WARMTH(tp)) ||
! !ht_should_run(tp, tp->t_cpu) ||
! (tp->t_cpu == cpu_inmotion) ||
! !LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) {
! cp = disp_lowpri_cpu(tp->t_cpu, tp, tpri);
} else {
cp = tp->t_cpu;
}
if (tp->t_cpupart == cp->cpu_part) {
*** 1256,1266 ****
newcp = cp->cpu_next_part;
} else if ((newcp = cp->cpu_next_lpl) == cp) {
newcp = cp->cpu_next_part;
}
! if (RUNQ_LEN(newcp, tpri) < qlen) {
DTRACE_PROBE3(runq__balance,
kthread_t *, tp,
cpu_t *, cp, cpu_t *, newcp);
cp = newcp;
}
--- 1259,1270 ----
newcp = cp->cpu_next_part;
} else if ((newcp = cp->cpu_next_lpl) == cp) {
newcp = cp->cpu_next_part;
}
! if (ht_should_run(tp, newcp) &&
! RUNQ_LEN(newcp, tpri) < qlen) {
DTRACE_PROBE3(runq__balance,
kthread_t *, tp,
cpu_t *, cp, cpu_t *, newcp);
cp = newcp;
}
*** 1267,1278 ****
}
} else {
/*
* Migrate to a cpu in the new partition.
*/
! cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
! tp->t_lpl, tp->t_pri, NULL);
}
ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
} else {
/*
* It is possible that t_weakbound_cpu != t_bound_cpu (for
--- 1271,1282 ----
}
} else {
/*
* Migrate to a cpu in the new partition.
*/
! cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, tp,
! tp->t_pri);
}
ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
} else {
/*
* It is possible that t_weakbound_cpu != t_bound_cpu (for
*** 1405,1437 ****
cp = tp->t_cpu;
if (tp->t_cpupart == cp->cpu_part) {
/*
* We'll generally let this thread continue to run
* where it last ran, but will consider migration if:
! * - The thread last ran outside it's home lgroup.
* - The CPU where it last ran is the target of an
* offline request (a thread_nomigrate() on the in
* motion CPU relies on this when forcing a preempt).
* - The thread isn't the highest priority thread where
* it last ran, and it is considered not likely to
* have significant cache warmth.
*/
! if ((!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp)) ||
! (cp == cpu_inmotion)) {
! cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
! (tp == curthread) ? cp : NULL);
! } else if ((tpri < cp->cpu_disp->disp_maxrunpri) &&
! (!THREAD_HAS_CACHE_WARMTH(tp))) {
! cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
! NULL);
}
} else {
/*
* Migrate to a cpu in the new partition.
*/
cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
! tp->t_lpl, tp->t_pri, NULL);
}
ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
} else {
/*
* It is possible that t_weakbound_cpu != t_bound_cpu (for
--- 1409,1438 ----
cp = tp->t_cpu;
if (tp->t_cpupart == cp->cpu_part) {
/*
* We'll generally let this thread continue to run
* where it last ran, but will consider migration if:
! * - The thread last ran outside its home lgroup.
* - The CPU where it last ran is the target of an
* offline request (a thread_nomigrate() on the in
* motion CPU relies on this when forcing a preempt).
* - The thread isn't the highest priority thread where
* it last ran, and it is considered not likely to
* have significant cache warmth.
*/
! if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp) ||
! cp == cpu_inmotion ||
! (tpri < cp->cpu_disp->disp_maxrunpri &&
! !THREAD_HAS_CACHE_WARMTH(tp))) {
! cp = disp_lowpri_cpu(tp->t_cpu, tp, tpri);
}
} else {
/*
* Migrate to a cpu in the new partition.
*/
cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
! tp, tp->t_pri);
}
ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
} else {
/*
* It is possible that t_weakbound_cpu != t_bound_cpu (for
*** 1578,1588 ****
cp = tp->t_cpu;
if (tp->t_cpupart != cp->cpu_part) {
/* migrate to a cpu in the new partition */
cp = tp->t_cpupart->cp_cpulist;
}
! cp = disp_lowpri_cpu(cp, tp->t_lpl, tp->t_pri, NULL);
disp_lock_enter_high(&cp->cpu_disp->disp_lock);
ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
#ifndef NPROBE
/* Kernel probe */
--- 1579,1589 ----
cp = tp->t_cpu;
if (tp->t_cpupart != cp->cpu_part) {
/* migrate to a cpu in the new partition */
cp = tp->t_cpupart->cp_cpulist;
}
! cp = disp_lowpri_cpu(cp, tp, tp->t_pri);
disp_lock_enter_high(&cp->cpu_disp->disp_lock);
ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
#ifndef NPROBE
/* Kernel probe */
*** 2551,2634 ****
}
disp_lock_exit(&dp->disp_lock);
}
/*
! * disp_lowpri_cpu - find CPU running the lowest priority thread.
! * The hint passed in is used as a starting point so we don't favor
! * CPU 0 or any other CPU. The caller should pass in the most recently
! * used CPU for the thread.
*
! * The lgroup and priority are used to determine the best CPU to run on
! * in a NUMA machine. The lgroup specifies which CPUs are closest while
! * the thread priority will indicate whether the thread will actually run
! * there. To pick the best CPU, the CPUs inside and outside of the given
! * lgroup which are running the lowest priority threads are found. The
! * remote CPU is chosen only if the thread will not run locally on a CPU
! * within the lgroup, but will run on the remote CPU. If the thread
! * cannot immediately run on any CPU, the best local CPU will be chosen.
*
! * The lpl specified also identifies the cpu partition from which
! * disp_lowpri_cpu should select a CPU.
*
! * curcpu is used to indicate that disp_lowpri_cpu is being called on
! * behalf of the current thread. (curthread is looking for a new cpu)
! * In this case, cpu_dispatch_pri for this thread's cpu should be
! * ignored.
*
! * If a cpu is the target of an offline request then try to avoid it.
*
! * This function must be called at either high SPL, or with preemption
! * disabled, so that the "hint" CPU cannot be removed from the online
! * CPU list while we are traversing it.
*/
cpu_t *
! disp_lowpri_cpu(cpu_t *hint, lpl_t *lpl, pri_t tpri, cpu_t *curcpu)
{
cpu_t *bestcpu;
cpu_t *besthomecpu;
cpu_t *cp, *cpstart;
- pri_t bestpri;
- pri_t cpupri;
-
klgrpset_t done;
- klgrpset_t cur_set;
lpl_t *lpl_iter, *lpl_leaf;
- int i;
- /*
- * Scan for a CPU currently running the lowest priority thread.
- * Cannot get cpu_lock here because it is adaptive.
- * We do not require lock on CPU list.
- */
ASSERT(hint != NULL);
! ASSERT(lpl != NULL);
! ASSERT(lpl->lpl_ncpu > 0);
- /*
- * First examine local CPUs. Note that it's possible the hint CPU
- * passed in in remote to the specified home lgroup. If our priority
- * isn't sufficient enough such that we can run immediately at home,
- * then examine CPUs remote to our home lgroup.
- * We would like to give preference to CPUs closest to "home".
- * If we can't find a CPU where we'll run at a given level
- * of locality, we expand our search to include the next level.
- */
bestcpu = besthomecpu = NULL;
klgrpset_clear(done);
- /* start with lpl we were passed */
! lpl_iter = lpl;
do {
- bestpri = SHRT_MAX;
klgrpset_clear(cur_set);
! for (i = 0; i < lpl_iter->lpl_nrset; i++) {
lpl_leaf = lpl_iter->lpl_rset[i];
if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid))
continue;
klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid);
--- 2552,2640 ----
}
disp_lock_exit(&dp->disp_lock);
}
/*
! * Return a score rating this CPU for running this thread: lower is better.
*
! * If curthread is looking for a new CPU, then we ignore cpu_dispatch_pri for
! * curcpu (as that's our own priority).
*
! * If a cpu is the target of an offline request, then try to avoid it.
*
! * Otherwise we'll use double the effective dispatcher priority for the CPU.
*
! * We do this so ht_adjust_cpu_score() can increment the score if needed,
! * without ending up over-riding a dispatcher priority.
! */
! static pri_t
! cpu_score(cpu_t *cp, kthread_t *tp)
! {
! pri_t score;
!
! if (tp == curthread && cp == curthread->t_cpu)
! score = 2 * CPU_IDLE_PRI;
! else if (cp == cpu_inmotion)
! score = SHRT_MAX;
! else
! score = 2 * cp->cpu_dispatch_pri;
!
! if (2 * cp->cpu_disp->disp_maxrunpri > score)
! score = 2 * cp->cpu_disp->disp_maxrunpri;
! if (2 * cp->cpu_chosen_level > score)
! score = 2 * cp->cpu_chosen_level;
!
! return (ht_adjust_cpu_score(tp, cp, score));
! }
!
! /*
! * disp_lowpri_cpu - find a suitable CPU to run the given thread.
*
! * We are looking for a CPU with an effective dispatch priority lower than the
! * thread's, so that the thread will run immediately rather than be enqueued.
! * For NUMA locality, we prefer "home" CPUs within the thread's ->t_lpl group.
! * If we don't find an available CPU there, we will expand our search to include
! * wider locality levels. (Note these groups are already divided by CPU
! * partition.)
! *
! * If the thread cannot immediately run on *any* CPU, we'll enqueue ourselves on
! * the best home CPU we found.
! *
! * The hint passed in is used as a starting point so we don't favor CPU 0 or any
! * other CPU. The caller should pass in the most recently used CPU for the
! * thread; it's of course possible that this CPU isn't in the home lgroup.
! *
! * This function must be called at either high SPL, or with preemption disabled,
! * so that the "hint" CPU cannot be removed from the online CPU list while we
! * are traversing it.
*/
cpu_t *
! disp_lowpri_cpu(cpu_t *hint, kthread_t *tp, pri_t tpri)
{
cpu_t *bestcpu;
cpu_t *besthomecpu;
cpu_t *cp, *cpstart;
klgrpset_t done;
lpl_t *lpl_iter, *lpl_leaf;
ASSERT(hint != NULL);
! ASSERT(tp->t_lpl->lpl_ncpu > 0);
bestcpu = besthomecpu = NULL;
klgrpset_clear(done);
! lpl_iter = tp->t_lpl;
do {
+ pri_t best = SHRT_MAX;
+ klgrpset_t cur_set;
klgrpset_clear(cur_set);
! for (int i = 0; i < lpl_iter->lpl_nrset; i++) {
lpl_leaf = lpl_iter->lpl_rset[i];
if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid))
continue;
klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid);
*** 2637,2674 ****
cp = cpstart = hint;
else
cp = cpstart = lpl_leaf->lpl_cpus;
do {
! if (cp == curcpu)
! cpupri = -1;
! else if (cp == cpu_inmotion)
! cpupri = SHRT_MAX;
! else
! cpupri = cp->cpu_dispatch_pri;
! if (cp->cpu_disp->disp_maxrunpri > cpupri)
! cpupri = cp->cpu_disp->disp_maxrunpri;
! if (cp->cpu_chosen_level > cpupri)
! cpupri = cp->cpu_chosen_level;
! if (cpupri < bestpri) {
! if (CPU_IDLING(cpupri)) {
! ASSERT((cp->cpu_flags &
! CPU_QUIESCED) == 0);
! return (cp);
! }
bestcpu = cp;
! bestpri = cpupri;
}
} while ((cp = cp->cpu_next_lpl) != cpstart);
}
! if (bestcpu && (tpri > bestpri)) {
! ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0);
! return (bestcpu);
! }
if (besthomecpu == NULL)
besthomecpu = bestcpu;
/*
* Add the lgrps we just considered to the "done" set
*/
klgrpset_or(done, cur_set);
--- 2643,2671 ----
cp = cpstart = hint;
else
cp = cpstart = lpl_leaf->lpl_cpus;
do {
! pri_t score = cpu_score(cp, tp);
!
! if (score < best) {
! best = score;
bestcpu = cp;
!
! /* An idle CPU: we're done. */
! if (score / 2 == CPU_IDLE_PRI)
! goto out;
}
} while ((cp = cp->cpu_next_lpl) != cpstart);
}
! if (bestcpu != NULL && tpri > (best / 2))
! goto out;
!
if (besthomecpu == NULL)
besthomecpu = bestcpu;
+
/*
* Add the lgrps we just considered to the "done" set
*/
klgrpset_or(done, cur_set);
*** 2676,2687 ****
/*
* The specified priority isn't high enough to run immediately
* anywhere, so just return the best CPU from the home lgroup.
*/
! ASSERT((besthomecpu->cpu_flags & CPU_QUIESCED) == 0);
! return (besthomecpu);
}
/*
* This routine provides the generic idle cpu function for all processors.
* If a processor has some specific code to execute when idle (say, to stop
--- 2673,2687 ----
/*
* The specified priority isn't high enough to run immediately
* anywhere, so just return the best CPU from the home lgroup.
*/
! bestcpu = besthomecpu;
!
! out:
! ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0);
! return (bestcpu);
}
/*
* This routine provides the generic idle cpu function for all processors.
* If a processor has some specific code to execute when idle (say, to stop
*** 2696,2701 ****
--- 2696,2717 ----
/*ARGSUSED*/
static void
generic_enq_thread(cpu_t *cpu, int bound)
{
+ }
+
+ cpu_t *
+ disp_choose_best_cpu(void)
+ {
+ kthread_t *t = curthread;
+ cpu_t *curcpu = CPU;
+
+ ASSERT(t->t_preempt > 0);
+ ASSERT(t->t_state == TS_ONPROC);
+ ASSERT(t->t_schedflag & TS_VCPU);
+
+ if (ht_should_run(t, curcpu))
+ return (curcpu);
+
+ return (disp_lowpri_cpu(curcpu, t, t->t_pri));
}