6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28
29
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/sysmacros.h>
33 #include <sys/signal.h>
34 #include <sys/user.h>
35 #include <sys/systm.h>
36 #include <sys/sysinfo.h>
37 #include <sys/var.h>
38 #include <sys/errno.h>
39 #include <sys/cmn_err.h>
40 #include <sys/debug.h>
41 #include <sys/inline.h>
42 #include <sys/disp.h>
43 #include <sys/class.h>
44 #include <sys/bitmap.h>
45 #include <sys/kmem.h>
46 #include <sys/cpuvar.h>
47 #include <sys/vtrace.h>
48 #include <sys/tnf.h>
49 #include <sys/cpupart.h>
50 #include <sys/lgrp.h>
51 #include <sys/pg.h>
52 #include <sys/cmt.h>
53 #include <sys/bitset.h>
54 #include <sys/schedctl.h>
55 #include <sys/atomic.h>
56 #include <sys/dtrace.h>
57 #include <sys/sdt.h>
58 #include <sys/archsystm.h>
59
60 #include <vm/as.h>
61
62 #define BOUND_CPU 0x1
63 #define BOUND_PARTITION 0x2
64 #define BOUND_INTR 0x4
65
66 /* Dispatch queue allocation structure and functions */
67 struct disp_queue_info {
68 disp_t *dp;
69 dispq_t *olddispq;
70 dispq_t *newdispq;
71 ulong_t *olddqactmap;
72 ulong_t *newdqactmap;
73 int oldnglobpris;
74 };
75 static void disp_dq_alloc(struct disp_queue_info *dptr, int numpris,
76 disp_t *dp);
77 static void disp_dq_assign(struct disp_queue_info *dptr, int numpris);
78 static void disp_dq_free(struct disp_queue_info *dptr);
1098 * queue.
1099 */
1100 if ((curthread->t_state == TS_RUN) && (curthread->t_waitrq == 0)) {
1101 curthread->t_waitrq = now;
1102 }
1103
1104 /* restore next thread to previously running microstate */
1105 restore_mstate(next);
1106
1107 if (dtrace_vtime_active)
1108 dtrace_vtime_switch(next);
1109
1110 resume(next);
1111 /*
1112 * The TR_RESUME_END and TR_SWTCH_END trace points
1113 * appear at the end of resume(), because we may not
1114 * return here
1115 */
1116 }
1117
1118 #define CPU_IDLING(pri) ((pri) == -1)
1119
1120 static void
1121 cpu_resched(cpu_t *cp, pri_t tpri)
1122 {
1123 int call_poke_cpu = 0;
1124 pri_t cpupri = cp->cpu_dispatch_pri;
1125
1126 if (!CPU_IDLING(cpupri) && (cpupri < tpri)) {
1127 TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED,
1128 "CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri);
1129 if (tpri >= upreemptpri && cp->cpu_runrun == 0) {
1130 cp->cpu_runrun = 1;
1131 aston(cp->cpu_dispthread);
1132 if (tpri < kpreemptpri && cp != CPU)
1133 call_poke_cpu = 1;
1134 }
1135 if (tpri >= kpreemptpri && cp->cpu_kprunrun == 0) {
1136 cp->cpu_kprunrun = 1;
1137 if (cp != CPU)
1138 call_poke_cpu = 1;
1139 }
1140 }
1141
1142 /*
1143 * Propagate cpu_runrun, and cpu_kprunrun to global visibility.
1144 */
1145 membar_enter();
1146
1202
1203 self = (tp == curthread);
1204
1205 if (tp->t_bound_cpu || tp->t_weakbound_cpu)
1206 bound = 1;
1207 else
1208 bound = 0;
1209
1210 tpri = DISP_PRIO(tp);
1211 if (ncpus == 1)
1212 cp = tp->t_cpu;
1213 else if (!bound) {
1214 if (tpri >= kpqpri) {
1215 setkpdq(tp, SETKP_BACK);
1216 return;
1217 }
1218
1219 /*
1220 * We'll generally let this thread continue to run where
1221 * it last ran...but will consider migration if:
1222 * - We thread probably doesn't have much cache warmth.
1223 * - The CPU where it last ran is the target of an offline
1224 * request.
1225 * - The thread last ran outside it's home lgroup.
1226 */
1227 if ((!THREAD_HAS_CACHE_WARMTH(tp)) ||
1228 (tp->t_cpu == cpu_inmotion)) {
1229 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, NULL);
1230 } else if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) {
1231 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1232 self ? tp->t_cpu : NULL);
1233 } else {
1234 cp = tp->t_cpu;
1235 }
1236
1237 if (tp->t_cpupart == cp->cpu_part) {
1238 int qlen;
1239
1240 /*
1241 * Perform any CMT load balancing
1242 */
1243 cp = cmt_balance(tp, cp);
1244
1245 /*
1246 * Balance across the run queues
1247 */
1248 qlen = RUNQ_LEN(cp, tpri);
1249 if (tpri >= RUNQ_MATCH_PRI &&
1250 !(tp->t_schedflag & TS_RUNQMATCH))
1251 qlen -= RUNQ_MAX_DIFF;
1252 if (qlen > 0) {
1253 cpu_t *newcp;
1254
1255 if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) {
1256 newcp = cp->cpu_next_part;
1257 } else if ((newcp = cp->cpu_next_lpl) == cp) {
1258 newcp = cp->cpu_next_part;
1259 }
1260
1261 if (RUNQ_LEN(newcp, tpri) < qlen) {
1262 DTRACE_PROBE3(runq__balance,
1263 kthread_t *, tp,
1264 cpu_t *, cp, cpu_t *, newcp);
1265 cp = newcp;
1266 }
1267 }
1268 } else {
1269 /*
1270 * Migrate to a cpu in the new partition.
1271 */
1272 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
1273 tp->t_lpl, tp->t_pri, NULL);
1274 }
1275 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1276 } else {
1277 /*
1278 * It is possible that t_weakbound_cpu != t_bound_cpu (for
1279 * a short time until weak binding that existed when the
1280 * strong binding was established has dropped) so we must
1281 * favour weak binding over strong.
1282 */
1283 cp = tp->t_weakbound_cpu ?
1284 tp->t_weakbound_cpu : tp->t_bound_cpu;
1285 }
1286 /*
1287 * A thread that is ONPROC may be temporarily placed on the run queue
1288 * but then chosen to run again by disp. If the thread we're placing on
1289 * the queue is in TS_ONPROC state, don't set its t_waitrq until a
1290 * replacement process is actually scheduled in swtch(). In this
1291 * situation, curthread is the only thread that could be in the ONPROC
1292 * state.
1293 */
1390 }
1391
1392 if (tp->t_bound_cpu || tp->t_weakbound_cpu)
1393 bound = 1;
1394 else
1395 bound = 0;
1396
1397 tpri = DISP_PRIO(tp);
1398 if (ncpus == 1)
1399 cp = tp->t_cpu;
1400 else if (!bound) {
1401 if (tpri >= kpqpri) {
1402 setkpdq(tp, SETKP_FRONT);
1403 return;
1404 }
1405 cp = tp->t_cpu;
1406 if (tp->t_cpupart == cp->cpu_part) {
1407 /*
1408 * We'll generally let this thread continue to run
1409 * where it last ran, but will consider migration if:
1410 * - The thread last ran outside it's home lgroup.
1411 * - The CPU where it last ran is the target of an
1412 * offline request (a thread_nomigrate() on the in
1413 * motion CPU relies on this when forcing a preempt).
1414 * - The thread isn't the highest priority thread where
1415 * it last ran, and it is considered not likely to
1416 * have significant cache warmth.
1417 */
1418 if ((!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp)) ||
1419 (cp == cpu_inmotion)) {
1420 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1421 (tp == curthread) ? cp : NULL);
1422 } else if ((tpri < cp->cpu_disp->disp_maxrunpri) &&
1423 (!THREAD_HAS_CACHE_WARMTH(tp))) {
1424 cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1425 NULL);
1426 }
1427 } else {
1428 /*
1429 * Migrate to a cpu in the new partition.
1430 */
1431 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
1432 tp->t_lpl, tp->t_pri, NULL);
1433 }
1434 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1435 } else {
1436 /*
1437 * It is possible that t_weakbound_cpu != t_bound_cpu (for
1438 * a short time until weak binding that existed when the
1439 * strong binding was established has dropped) so we must
1440 * favour weak binding over strong.
1441 */
1442 cp = tp->t_weakbound_cpu ?
1443 tp->t_weakbound_cpu : tp->t_bound_cpu;
1444 }
1445
1446 /*
1447 * A thread that is ONPROC may be temporarily placed on the run queue
1448 * but then chosen to run again by disp. If the thread we're placing on
1449 * the queue is in TS_ONPROC state, don't set its t_waitrq until a
1450 * replacement process is actually scheduled in swtch(). In this
1451 * situation, curthread is the only thread that could be in the ONPROC
1452 * state.
1563 } else {
1564 ASSERT(dq->dq_last == NULL);
1565 ASSERT(dq->dq_first == NULL);
1566 tp->t_link = NULL;
1567 dq->dq_first = dq->dq_last = tp;
1568 }
1569 BT_SET(dp->disp_qactmap, tpri);
1570 if (tpri > dp->disp_max_unbound_pri)
1571 dp->disp_max_unbound_pri = tpri;
1572 if (tpri > dp->disp_maxrunpri) {
1573 dp->disp_maxrunpri = tpri;
1574 membar_enter();
1575 }
1576 }
1577
1578 cp = tp->t_cpu;
1579 if (tp->t_cpupart != cp->cpu_part) {
1580 /* migrate to a cpu in the new partition */
1581 cp = tp->t_cpupart->cp_cpulist;
1582 }
1583 cp = disp_lowpri_cpu(cp, tp->t_lpl, tp->t_pri, NULL);
1584 disp_lock_enter_high(&cp->cpu_disp->disp_lock);
1585 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1586
1587 #ifndef NPROBE
1588 /* Kernel probe */
1589 if (tnf_tracing_active)
1590 tnf_thread_queue(tp, cp, tpri);
1591 #endif /* NPROBE */
1592
1593 if (cp->cpu_chosen_level < tpri)
1594 cp->cpu_chosen_level = tpri;
1595 cpu_resched(cp, tpri);
1596 disp_lock_exit_high(&cp->cpu_disp->disp_lock);
1597 (*disp_enq_thread)(cp, 0);
1598 }
1599
1600 /*
1601 * Remove a thread from the dispatcher queue if it is on it.
1602 * It is not an error if it is not found but we return whether
1603 * or not it was found in case the caller wants to check.
2536 *
2537 * cp has already been removed from the list of active cpus
2538 * and tp->t_cpu has been changed so there is no risk of
2539 * tp ending up back on cp.
2540 *
2541 * Called from cpupart_move_cpu:
2542 *
2543 * The cpu has moved to a new cpupart. Any threads that
2544 * were on it's dispatch queues before the move remain
2545 * in the old partition and can't run in the new partition.
2546 */
2547 ASSERT(tp->t_cpu != cp);
2548 thread_unlock(tp);
2549
2550 disp_lock_enter(&dp->disp_lock);
2551 }
2552 disp_lock_exit(&dp->disp_lock);
2553 }
2554
2555 /*
2556 * disp_lowpri_cpu - find CPU running the lowest priority thread.
2557 * The hint passed in is used as a starting point so we don't favor
2558 * CPU 0 or any other CPU. The caller should pass in the most recently
2559 * used CPU for the thread.
2560 *
2561 * The lgroup and priority are used to determine the best CPU to run on
2562 * in a NUMA machine. The lgroup specifies which CPUs are closest while
2563 * the thread priority will indicate whether the thread will actually run
2564 * there. To pick the best CPU, the CPUs inside and outside of the given
2565 * lgroup which are running the lowest priority threads are found. The
2566 * remote CPU is chosen only if the thread will not run locally on a CPU
2567 * within the lgroup, but will run on the remote CPU. If the thread
2568 * cannot immediately run on any CPU, the best local CPU will be chosen.
2569 *
2570 * The lpl specified also identifies the cpu partition from which
2571 * disp_lowpri_cpu should select a CPU.
2572 *
2573 * curcpu is used to indicate that disp_lowpri_cpu is being called on
2574 * behalf of the current thread. (curthread is looking for a new cpu)
2575 * In this case, cpu_dispatch_pri for this thread's cpu should be
2576 * ignored.
2577 *
2578 * If a cpu is the target of an offline request then try to avoid it.
2579 *
2580 * This function must be called at either high SPL, or with preemption
2581 * disabled, so that the "hint" CPU cannot be removed from the online
2582 * CPU list while we are traversing it.
2583 */
2584 cpu_t *
2585 disp_lowpri_cpu(cpu_t *hint, lpl_t *lpl, pri_t tpri, cpu_t *curcpu)
2586 {
2587 cpu_t *bestcpu;
2588 cpu_t *besthomecpu;
2589 cpu_t *cp, *cpstart;
2590
2591 pri_t bestpri;
2592 pri_t cpupri;
2593
2594 klgrpset_t done;
2595 klgrpset_t cur_set;
2596
2597 lpl_t *lpl_iter, *lpl_leaf;
2598 int i;
2599
2600 /*
2601 * Scan for a CPU currently running the lowest priority thread.
2602 * Cannot get cpu_lock here because it is adaptive.
2603 * We do not require lock on CPU list.
2604 */
2605 ASSERT(hint != NULL);
2606 ASSERT(lpl != NULL);
2607 ASSERT(lpl->lpl_ncpu > 0);
2608
2609 /*
2610 * First examine local CPUs. Note that it's possible the hint CPU
2611 * passed in in remote to the specified home lgroup. If our priority
2612 * isn't sufficient enough such that we can run immediately at home,
2613 * then examine CPUs remote to our home lgroup.
2614 * We would like to give preference to CPUs closest to "home".
2615 * If we can't find a CPU where we'll run at a given level
2616 * of locality, we expand our search to include the next level.
2617 */
2618 bestcpu = besthomecpu = NULL;
2619 klgrpset_clear(done);
2620 /* start with lpl we were passed */
2621
2622 lpl_iter = lpl;
2623
2624 do {
2625
2626 bestpri = SHRT_MAX;
2627 klgrpset_clear(cur_set);
2628
2629 for (i = 0; i < lpl_iter->lpl_nrset; i++) {
2630 lpl_leaf = lpl_iter->lpl_rset[i];
2631 if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid))
2632 continue;
2633
2634 klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid);
2635
2636 if (hint->cpu_lpl == lpl_leaf)
2637 cp = cpstart = hint;
2638 else
2639 cp = cpstart = lpl_leaf->lpl_cpus;
2640
2641 do {
2642 if (cp == curcpu)
2643 cpupri = -1;
2644 else if (cp == cpu_inmotion)
2645 cpupri = SHRT_MAX;
2646 else
2647 cpupri = cp->cpu_dispatch_pri;
2648 if (cp->cpu_disp->disp_maxrunpri > cpupri)
2649 cpupri = cp->cpu_disp->disp_maxrunpri;
2650 if (cp->cpu_chosen_level > cpupri)
2651 cpupri = cp->cpu_chosen_level;
2652 if (cpupri < bestpri) {
2653 if (CPU_IDLING(cpupri)) {
2654 ASSERT((cp->cpu_flags &
2655 CPU_QUIESCED) == 0);
2656 return (cp);
2657 }
2658 bestcpu = cp;
2659 bestpri = cpupri;
2660 }
2661 } while ((cp = cp->cpu_next_lpl) != cpstart);
2662 }
2663
2664 if (bestcpu && (tpri > bestpri)) {
2665 ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0);
2666 return (bestcpu);
2667 }
2668 if (besthomecpu == NULL)
2669 besthomecpu = bestcpu;
2670 /*
2671 * Add the lgrps we just considered to the "done" set
2672 */
2673 klgrpset_or(done, cur_set);
2674
2675 } while ((lpl_iter = lpl_iter->lpl_parent) != NULL);
2676
2677 /*
2678 * The specified priority isn't high enough to run immediately
2679 * anywhere, so just return the best CPU from the home lgroup.
2680 */
2681 ASSERT((besthomecpu->cpu_flags & CPU_QUIESCED) == 0);
2682 return (besthomecpu);
2683 }
2684
2685 /*
2686 * This routine provides the generic idle cpu function for all processors.
2687 * If a processor has some specific code to execute when idle (say, to stop
2688 * the pipeline and save power) then that routine should be defined in the
2689 * processors specific code (module_xx.c) and the global variable idle_cpu
2690 * set to that function.
2691 */
2692 static void
2693 generic_idle_cpu(void)
2694 {
2695 }
2696
2697 /*ARGSUSED*/
2698 static void
2699 generic_enq_thread(cpu_t *cpu, int bound)
2700 {
2701 }
|
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2018, Joyent, Inc. All rights reserved.
28 */
29
30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
31 /* All Rights Reserved */
32
33
34 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/sysmacros.h>
37 #include <sys/signal.h>
38 #include <sys/user.h>
39 #include <sys/systm.h>
40 #include <sys/sysinfo.h>
41 #include <sys/var.h>
42 #include <sys/errno.h>
43 #include <sys/cmn_err.h>
44 #include <sys/debug.h>
45 #include <sys/inline.h>
46 #include <sys/disp.h>
47 #include <sys/class.h>
48 #include <sys/bitmap.h>
49 #include <sys/kmem.h>
50 #include <sys/cpuvar.h>
51 #include <sys/vtrace.h>
52 #include <sys/tnf.h>
53 #include <sys/cpupart.h>
54 #include <sys/lgrp.h>
55 #include <sys/pg.h>
56 #include <sys/cmt.h>
57 #include <sys/bitset.h>
58 #include <sys/schedctl.h>
59 #include <sys/atomic.h>
60 #include <sys/dtrace.h>
61 #include <sys/sdt.h>
62 #include <sys/archsystm.h>
63 #include <sys/ht.h>
64
65 #include <vm/as.h>
66
67 #define BOUND_CPU 0x1
68 #define BOUND_PARTITION 0x2
69 #define BOUND_INTR 0x4
70
71 /* Dispatch queue allocation structure and functions */
72 struct disp_queue_info {
73 disp_t *dp;
74 dispq_t *olddispq;
75 dispq_t *newdispq;
76 ulong_t *olddqactmap;
77 ulong_t *newdqactmap;
78 int oldnglobpris;
79 };
80 static void disp_dq_alloc(struct disp_queue_info *dptr, int numpris,
81 disp_t *dp);
82 static void disp_dq_assign(struct disp_queue_info *dptr, int numpris);
83 static void disp_dq_free(struct disp_queue_info *dptr);
1103 * queue.
1104 */
1105 if ((curthread->t_state == TS_RUN) && (curthread->t_waitrq == 0)) {
1106 curthread->t_waitrq = now;
1107 }
1108
1109 /* restore next thread to previously running microstate */
1110 restore_mstate(next);
1111
1112 if (dtrace_vtime_active)
1113 dtrace_vtime_switch(next);
1114
1115 resume(next);
1116 /*
1117 * The TR_RESUME_END and TR_SWTCH_END trace points
1118 * appear at the end of resume(), because we may not
1119 * return here
1120 */
1121 }
1122
1123 static void
1124 cpu_resched(cpu_t *cp, pri_t tpri)
1125 {
1126 int call_poke_cpu = 0;
1127 pri_t cpupri = cp->cpu_dispatch_pri;
1128
1129 if (cpupri != CPU_IDLE_PRI && cpupri < tpri) {
1130 TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED,
1131 "CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri);
1132 if (tpri >= upreemptpri && cp->cpu_runrun == 0) {
1133 cp->cpu_runrun = 1;
1134 aston(cp->cpu_dispthread);
1135 if (tpri < kpreemptpri && cp != CPU)
1136 call_poke_cpu = 1;
1137 }
1138 if (tpri >= kpreemptpri && cp->cpu_kprunrun == 0) {
1139 cp->cpu_kprunrun = 1;
1140 if (cp != CPU)
1141 call_poke_cpu = 1;
1142 }
1143 }
1144
1145 /*
1146 * Propagate cpu_runrun, and cpu_kprunrun to global visibility.
1147 */
1148 membar_enter();
1149
1205
1206 self = (tp == curthread);
1207
1208 if (tp->t_bound_cpu || tp->t_weakbound_cpu)
1209 bound = 1;
1210 else
1211 bound = 0;
1212
1213 tpri = DISP_PRIO(tp);
1214 if (ncpus == 1)
1215 cp = tp->t_cpu;
1216 else if (!bound) {
1217 if (tpri >= kpqpri) {
1218 setkpdq(tp, SETKP_BACK);
1219 return;
1220 }
1221
1222 /*
1223 * We'll generally let this thread continue to run where
1224 * it last ran...but will consider migration if:
1225 * - The thread probably doesn't have much cache warmth.
1226 * - HT exclusion would prefer us to run elsewhere
1227 * - The CPU where it last ran is the target of an offline
1228 * request.
1229 * - The thread last ran outside its home lgroup.
1230 */
1231 if ((!THREAD_HAS_CACHE_WARMTH(tp)) ||
1232 !ht_should_run(tp, tp->t_cpu) ||
1233 (tp->t_cpu == cpu_inmotion) ||
1234 !LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) {
1235 cp = disp_lowpri_cpu(tp->t_cpu, tp, tpri);
1236 } else {
1237 cp = tp->t_cpu;
1238 }
1239
1240 if (tp->t_cpupart == cp->cpu_part) {
1241 int qlen;
1242
1243 /*
1244 * Perform any CMT load balancing
1245 */
1246 cp = cmt_balance(tp, cp);
1247
1248 /*
1249 * Balance across the run queues
1250 */
1251 qlen = RUNQ_LEN(cp, tpri);
1252 if (tpri >= RUNQ_MATCH_PRI &&
1253 !(tp->t_schedflag & TS_RUNQMATCH))
1254 qlen -= RUNQ_MAX_DIFF;
1255 if (qlen > 0) {
1256 cpu_t *newcp;
1257
1258 if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) {
1259 newcp = cp->cpu_next_part;
1260 } else if ((newcp = cp->cpu_next_lpl) == cp) {
1261 newcp = cp->cpu_next_part;
1262 }
1263
1264 if (ht_should_run(tp, newcp) &&
1265 RUNQ_LEN(newcp, tpri) < qlen) {
1266 DTRACE_PROBE3(runq__balance,
1267 kthread_t *, tp,
1268 cpu_t *, cp, cpu_t *, newcp);
1269 cp = newcp;
1270 }
1271 }
1272 } else {
1273 /*
1274 * Migrate to a cpu in the new partition.
1275 */
1276 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, tp,
1277 tp->t_pri);
1278 }
1279 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1280 } else {
1281 /*
1282 * It is possible that t_weakbound_cpu != t_bound_cpu (for
1283 * a short time until weak binding that existed when the
1284 * strong binding was established has dropped) so we must
1285 * favour weak binding over strong.
1286 */
1287 cp = tp->t_weakbound_cpu ?
1288 tp->t_weakbound_cpu : tp->t_bound_cpu;
1289 }
1290 /*
1291 * A thread that is ONPROC may be temporarily placed on the run queue
1292 * but then chosen to run again by disp. If the thread we're placing on
1293 * the queue is in TS_ONPROC state, don't set its t_waitrq until a
1294 * replacement process is actually scheduled in swtch(). In this
1295 * situation, curthread is the only thread that could be in the ONPROC
1296 * state.
1297 */
1394 }
1395
1396 if (tp->t_bound_cpu || tp->t_weakbound_cpu)
1397 bound = 1;
1398 else
1399 bound = 0;
1400
1401 tpri = DISP_PRIO(tp);
1402 if (ncpus == 1)
1403 cp = tp->t_cpu;
1404 else if (!bound) {
1405 if (tpri >= kpqpri) {
1406 setkpdq(tp, SETKP_FRONT);
1407 return;
1408 }
1409 cp = tp->t_cpu;
1410 if (tp->t_cpupart == cp->cpu_part) {
1411 /*
1412 * We'll generally let this thread continue to run
1413 * where it last ran, but will consider migration if:
1414 * - The thread last ran outside its home lgroup.
1415 * - The CPU where it last ran is the target of an
1416 * offline request (a thread_nomigrate() on the in
1417 * motion CPU relies on this when forcing a preempt).
1418 * - The thread isn't the highest priority thread where
1419 * it last ran, and it is considered not likely to
1420 * have significant cache warmth.
1421 */
1422 if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp) ||
1423 cp == cpu_inmotion ||
1424 (tpri < cp->cpu_disp->disp_maxrunpri &&
1425 !THREAD_HAS_CACHE_WARMTH(tp))) {
1426 cp = disp_lowpri_cpu(tp->t_cpu, tp, tpri);
1427 }
1428 } else {
1429 /*
1430 * Migrate to a cpu in the new partition.
1431 */
1432 cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
1433 tp, tp->t_pri);
1434 }
1435 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1436 } else {
1437 /*
1438 * It is possible that t_weakbound_cpu != t_bound_cpu (for
1439 * a short time until weak binding that existed when the
1440 * strong binding was established has dropped) so we must
1441 * favour weak binding over strong.
1442 */
1443 cp = tp->t_weakbound_cpu ?
1444 tp->t_weakbound_cpu : tp->t_bound_cpu;
1445 }
1446
1447 /*
1448 * A thread that is ONPROC may be temporarily placed on the run queue
1449 * but then chosen to run again by disp. If the thread we're placing on
1450 * the queue is in TS_ONPROC state, don't set its t_waitrq until a
1451 * replacement process is actually scheduled in swtch(). In this
1452 * situation, curthread is the only thread that could be in the ONPROC
1453 * state.
1564 } else {
1565 ASSERT(dq->dq_last == NULL);
1566 ASSERT(dq->dq_first == NULL);
1567 tp->t_link = NULL;
1568 dq->dq_first = dq->dq_last = tp;
1569 }
1570 BT_SET(dp->disp_qactmap, tpri);
1571 if (tpri > dp->disp_max_unbound_pri)
1572 dp->disp_max_unbound_pri = tpri;
1573 if (tpri > dp->disp_maxrunpri) {
1574 dp->disp_maxrunpri = tpri;
1575 membar_enter();
1576 }
1577 }
1578
1579 cp = tp->t_cpu;
1580 if (tp->t_cpupart != cp->cpu_part) {
1581 /* migrate to a cpu in the new partition */
1582 cp = tp->t_cpupart->cp_cpulist;
1583 }
1584 cp = disp_lowpri_cpu(cp, tp, tp->t_pri);
1585 disp_lock_enter_high(&cp->cpu_disp->disp_lock);
1586 ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1587
1588 #ifndef NPROBE
1589 /* Kernel probe */
1590 if (tnf_tracing_active)
1591 tnf_thread_queue(tp, cp, tpri);
1592 #endif /* NPROBE */
1593
1594 if (cp->cpu_chosen_level < tpri)
1595 cp->cpu_chosen_level = tpri;
1596 cpu_resched(cp, tpri);
1597 disp_lock_exit_high(&cp->cpu_disp->disp_lock);
1598 (*disp_enq_thread)(cp, 0);
1599 }
1600
1601 /*
1602 * Remove a thread from the dispatcher queue if it is on it.
1603 * It is not an error if it is not found but we return whether
1604 * or not it was found in case the caller wants to check.
2537 *
2538 * cp has already been removed from the list of active cpus
2539 * and tp->t_cpu has been changed so there is no risk of
2540 * tp ending up back on cp.
2541 *
2542 * Called from cpupart_move_cpu:
2543 *
2544 * The cpu has moved to a new cpupart. Any threads that
2545 * were on it's dispatch queues before the move remain
2546 * in the old partition and can't run in the new partition.
2547 */
2548 ASSERT(tp->t_cpu != cp);
2549 thread_unlock(tp);
2550
2551 disp_lock_enter(&dp->disp_lock);
2552 }
2553 disp_lock_exit(&dp->disp_lock);
2554 }
2555
2556 /*
2557 * Return a score rating this CPU for running this thread: lower is better.
2558 *
2559 * If curthread is looking for a new CPU, then we ignore cpu_dispatch_pri for
2560 * curcpu (as that's our own priority).
2561 *
2562 * If a cpu is the target of an offline request, then try to avoid it.
2563 *
2564 * Otherwise we'll use double the effective dispatcher priority for the CPU.
2565 *
2566 * We do this so ht_adjust_cpu_score() can increment the score if needed,
2567 * without ending up over-riding a dispatcher priority.
2568 */
2569 static pri_t
2570 cpu_score(cpu_t *cp, kthread_t *tp)
2571 {
2572 pri_t score;
2573
2574 if (tp == curthread && cp == curthread->t_cpu)
2575 score = 2 * CPU_IDLE_PRI;
2576 else if (cp == cpu_inmotion)
2577 score = SHRT_MAX;
2578 else
2579 score = 2 * cp->cpu_dispatch_pri;
2580
2581 if (2 * cp->cpu_disp->disp_maxrunpri > score)
2582 score = 2 * cp->cpu_disp->disp_maxrunpri;
2583 if (2 * cp->cpu_chosen_level > score)
2584 score = 2 * cp->cpu_chosen_level;
2585
2586 return (ht_adjust_cpu_score(tp, cp, score));
2587 }
2588
2589 /*
2590 * disp_lowpri_cpu - find a suitable CPU to run the given thread.
2591 *
2592 * We are looking for a CPU with an effective dispatch priority lower than the
2593 * thread's, so that the thread will run immediately rather than be enqueued.
2594 * For NUMA locality, we prefer "home" CPUs within the thread's ->t_lpl group.
2595 * If we don't find an available CPU there, we will expand our search to include
2596 * wider locality levels. (Note these groups are already divided by CPU
2597 * partition.)
2598 *
2599 * If the thread cannot immediately run on *any* CPU, we'll enqueue ourselves on
2600 * the best home CPU we found.
2601 *
2602 * The hint passed in is used as a starting point so we don't favor CPU 0 or any
2603 * other CPU. The caller should pass in the most recently used CPU for the
2604 * thread; it's of course possible that this CPU isn't in the home lgroup.
2605 *
2606 * This function must be called at either high SPL, or with preemption disabled,
2607 * so that the "hint" CPU cannot be removed from the online CPU list while we
2608 * are traversing it.
2609 */
2610 cpu_t *
2611 disp_lowpri_cpu(cpu_t *hint, kthread_t *tp, pri_t tpri)
2612 {
2613 cpu_t *bestcpu;
2614 cpu_t *besthomecpu;
2615 cpu_t *cp, *cpstart;
2616
2617 klgrpset_t done;
2618
2619 lpl_t *lpl_iter, *lpl_leaf;
2620
2621 ASSERT(hint != NULL);
2622 ASSERT(tp->t_lpl->lpl_ncpu > 0);
2623
2624 bestcpu = besthomecpu = NULL;
2625 klgrpset_clear(done);
2626
2627 lpl_iter = tp->t_lpl;
2628
2629 do {
2630 pri_t best = SHRT_MAX;
2631 klgrpset_t cur_set;
2632
2633 klgrpset_clear(cur_set);
2634
2635 for (int i = 0; i < lpl_iter->lpl_nrset; i++) {
2636 lpl_leaf = lpl_iter->lpl_rset[i];
2637 if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid))
2638 continue;
2639
2640 klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid);
2641
2642 if (hint->cpu_lpl == lpl_leaf)
2643 cp = cpstart = hint;
2644 else
2645 cp = cpstart = lpl_leaf->lpl_cpus;
2646
2647 do {
2648 pri_t score = cpu_score(cp, tp);
2649
2650 if (score < best) {
2651 best = score;
2652 bestcpu = cp;
2653
2654 /* An idle CPU: we're done. */
2655 if (score / 2 == CPU_IDLE_PRI)
2656 goto out;
2657 }
2658 } while ((cp = cp->cpu_next_lpl) != cpstart);
2659 }
2660
2661 if (bestcpu != NULL && tpri > (best / 2))
2662 goto out;
2663
2664 if (besthomecpu == NULL)
2665 besthomecpu = bestcpu;
2666
2667 /*
2668 * Add the lgrps we just considered to the "done" set
2669 */
2670 klgrpset_or(done, cur_set);
2671
2672 } while ((lpl_iter = lpl_iter->lpl_parent) != NULL);
2673
2674 /*
2675 * The specified priority isn't high enough to run immediately
2676 * anywhere, so just return the best CPU from the home lgroup.
2677 */
2678 bestcpu = besthomecpu;
2679
2680 out:
2681 ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0);
2682 return (bestcpu);
2683 }
2684
2685 /*
2686 * This routine provides the generic idle cpu function for all processors.
2687 * If a processor has some specific code to execute when idle (say, to stop
2688 * the pipeline and save power) then that routine should be defined in the
2689 * processors specific code (module_xx.c) and the global variable idle_cpu
2690 * set to that function.
2691 */
2692 static void
2693 generic_idle_cpu(void)
2694 {
2695 }
2696
2697 /*ARGSUSED*/
2698 static void
2699 generic_enq_thread(cpu_t *cpu, int bound)
2700 {
2701 }
2702
2703 cpu_t *
2704 disp_choose_best_cpu(void)
2705 {
2706 kthread_t *t = curthread;
2707 cpu_t *curcpu = CPU;
2708
2709 ASSERT(t->t_preempt > 0);
2710 ASSERT(t->t_state == TS_ONPROC);
2711 ASSERT(t->t_schedflag & TS_VCPU);
2712
2713 if (ht_should_run(t, curcpu))
2714 return (curcpu);
2715
2716 return (disp_lowpri_cpu(curcpu, t, t->t_pri));
2717 }
|