1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright (c) 2012 by Delphix. All rights reserved. 27 */ 28 29 /* 30 * The System Duty Cycle (SDC) scheduling class 31 * -------------------------------------------- 32 * 33 * Background 34 * 35 * Kernel threads in Solaris have traditionally not been large consumers 36 * of CPU time. They typically wake up, perform a small amount of 37 * work, then go back to sleep waiting for either a timeout or another 38 * signal. On the assumption that the small amount of work that they do 39 * is important for the behavior of the whole system, these threads are 40 * treated kindly by the dispatcher and the SYS scheduling class: they run 41 * without preemption from anything other than real-time and interrupt 42 * threads; when preempted, they are put at the front of the queue, so they 43 * generally do not migrate between CPUs; and they are allowed to stay 44 * running until they voluntarily give up the CPU. 45 * 46 * As Solaris has evolved, new workloads have emerged which require the 47 * kernel to perform significant amounts of CPU-intensive work. One 48 * example of such a workload is ZFS's transaction group sync processing. 49 * Each sync operation generates a large batch of I/Os, and each I/O 50 * may need to be compressed and/or checksummed before it is written to 51 * storage. The taskq threads which perform the compression and checksums 52 * will run nonstop as long as they have work to do; a large sync operation 53 * on a compression-heavy dataset can keep them busy for seconds on end. 54 * This causes human-time-scale dispatch latency bubbles for any other 55 * threads which have the misfortune to share a CPU with the taskq threads. 56 * 57 * The SDC scheduling class is a solution to this problem. 58 * 59 * 60 * Overview 61 * 62 * SDC is centered around the concept of a thread's duty cycle (DC): 63 * 64 * ONPROC time 65 * Duty Cycle = ---------------------- 66 * ONPROC + Runnable time 67 * 68 * This is the ratio of the time that the thread spent running on a CPU 69 * divided by the time it spent running or trying to run. It is unaffected 70 * by any time the thread spent sleeping, stopped, etc. 71 * 72 * A thread joining the SDC class specifies a "target" DC that it wants 73 * to run at. To implement this policy, the routine sysdc_update() scans 74 * the list of active SDC threads every few ticks and uses each thread's 75 * microstate data to compute the actual duty cycle that that thread 76 * has experienced recently. If the thread is under its target DC, its 77 * priority is increased to the maximum available (sysdc_maxpri, which is 78 * 99 by default). If the thread is over its target DC, its priority is 79 * reduced to the minimum available (sysdc_minpri, 0 by default). This 80 * is a fairly primitive approach, in that it doesn't use any of the 81 * intermediate priorities, but it's not completely inappropriate. Even 82 * though threads in the SDC class might take a while to do their job, they 83 * are by some definition important if they're running inside the kernel, 84 * so it is reasonable that they should get to run at priority 99. 85 * 86 * If a thread is running when sysdc_update() calculates its actual duty 87 * cycle, and there are other threads of equal or greater priority on its 88 * CPU's dispatch queue, sysdc_update() preempts that thread. The thread 89 * acknowledges the preemption by calling sysdc_preempt(), which calls 90 * setbackdq(), which gives other threads with the same priority a chance 91 * to run. This creates a de facto time quantum for threads in the SDC 92 * scheduling class. 93 * 94 * An SDC thread which is assigned priority 0 can continue to run if 95 * nothing else needs to use the CPU that it's running on. Similarly, an 96 * SDC thread at priority 99 might not get to run as much as it wants to 97 * if there are other priority-99 or higher threads on its CPU. These 98 * situations would cause the thread to get ahead of or behind its target 99 * DC; the longer the situations lasted, the further ahead or behind the 100 * thread would get. Rather than condemning a thread to a lifetime of 101 * paying for its youthful indiscretions, SDC keeps "base" values for 102 * ONPROC and Runnable times in each thread's sysdc data, and updates these 103 * values periodically. The duty cycle is then computed using the elapsed 104 * amount of ONPROC and Runnable times since those base times. 105 * 106 * Since sysdc_update() scans SDC threads fairly frequently, it tries to 107 * keep the list of "active" threads small by pruning out threads which 108 * have been asleep for a brief time. They are not pruned immediately upon 109 * going to sleep, since some threads may bounce back and forth between 110 * sleeping and being runnable. 111 * 112 * 113 * Interfaces 114 * 115 * void sysdc_thread_enter(t, dc, flags) 116 * 117 * Moves a kernel thread from the SYS scheduling class to the 118 * SDC class. t must have an associated LWP (created by calling 119 * lwp_kernel_create()). The thread will have a target DC of dc. 120 * Flags should be either 0 or SYSDC_THREAD_BATCH. If 121 * SYSDC_THREAD_BATCH is specified, the thread is expected to be 122 * doing large amounts of processing. 123 * 124 * 125 * Complications 126 * 127 * - Run queue balancing 128 * 129 * The Solaris dispatcher is biased towards letting a thread run 130 * on the same CPU which it last ran on, if no more than 3 ticks 131 * (i.e. rechoose_interval) have passed since the thread last ran. 132 * This helps to preserve cache warmth. On the other hand, it also 133 * tries to keep the per-CPU run queues fairly balanced; if the CPU 134 * chosen for a runnable thread has a run queue which is three or 135 * more threads longer than a neighboring CPU's queue, the runnable 136 * thread is dispatched onto the neighboring CPU instead. 137 * 138 * These policies work well for some workloads, but not for many SDC 139 * threads. The taskq client of SDC, for example, has many discrete 140 * units of work to do. The work units are largely independent, so 141 * cache warmth is not an important consideration. It is important 142 * that the threads fan out quickly to different CPUs, since the 143 * amount of work these threads have to do (a few seconds worth at a 144 * time) doesn't leave much time to correct thread placement errors 145 * (i.e. two SDC threads being dispatched to the same CPU). 146 * 147 * To fix this, SDC uses the TS_RUNQMATCH flag introduced for FSS. 148 * This tells the dispatcher to keep neighboring run queues' lengths 149 * more evenly matched, which allows SDC threads to migrate more 150 * easily. 151 * 152 * - LWPs and system processes 153 * 154 * SDC can only be used for kernel threads. Since SDC uses microstate 155 * accounting data to compute each thread's actual duty cycle, all 156 * threads entering the SDC class must have associated LWPs (which 157 * store the microstate data). This means that the threads have to 158 * be associated with an SSYS process, i.e. one created by newproc(). 159 * If the microstate accounting information is ever moved into the 160 * kthread_t, this restriction could be lifted. 161 * 162 * - Dealing with oversubscription 163 * 164 * Since SDC duty cycles are per-thread, it is possible that the 165 * aggregate requested duty cycle of all SDC threads in a processor 166 * set could be greater than the total CPU time available in that set. 167 * The FSS scheduling class has an analogous situation, which it deals 168 * with by reducing each thread's allotted CPU time proportionally. 169 * Since SDC doesn't need to be as precise as FSS, it uses a simpler 170 * solution to the oversubscription problem. 171 * 172 * sysdc_update() accumulates the amount of time that max-priority SDC 173 * threads have spent on-CPU in each processor set, and uses that sum 174 * to create an implied duty cycle for that processor set: 175 * 176 * accumulated CPU time 177 * pset DC = ----------------------------------- 178 * (# CPUs) * time since last update 179 * 180 * If this implied duty cycle is above a maximum pset duty cycle (90% 181 * by default), sysdc_update() sets the priority of all SDC threads 182 * in that processor set to sysdc_minpri for a "break" period. After 183 * the break period, it waits for a "nobreak" period before trying to 184 * enforce the pset duty cycle limit again. 185 * 186 * - Processor sets 187 * 188 * As the above implies, SDC is processor set aware, but it does not 189 * currently allow threads to change processor sets while in the SDC 190 * class. Instead, those threads must join the desired processor set 191 * before entering SDC. [1] 192 * 193 * - Batch threads 194 * 195 * A thread joining the SDC class can specify the SDC_THREAD_BATCH 196 * flag. This flag currently has no effect, but marks threads which 197 * do bulk processing. 198 * 199 * - t_kpri_req 200 * 201 * The TS and FSS scheduling classes pay attention to t_kpri_req, 202 * which provides a simple form of priority inheritance for 203 * synchronization primitives (such as rwlocks held as READER) which 204 * cannot be traced to a unique thread. The SDC class does not honor 205 * t_kpri_req, for a few reasons: 206 * 207 * 1. t_kpri_req is notoriously inaccurate. A measure of its 208 * inaccuracy is that it needs to be cleared every time a thread 209 * returns to user mode, because it is frequently non-zero at that 210 * point. This can happen because "ownership" of synchronization 211 * primitives that use t_kpri_req can be silently handed off, 212 * leaving no opportunity to will the t_kpri_req inheritance. 213 * 214 * 2. Unlike in TS and FSS, threads in SDC *will* eventually run at 215 * kernel priority. This means that even if an SDC thread 216 * is holding a synchronization primitive and running at low 217 * priority, its priority will eventually be raised above 60, 218 * allowing it to drive on and release the resource. 219 * 220 * 3. The first consumer of SDC uses the taskq subsystem, which holds 221 * a reader lock for the duration of the task's execution. This 222 * would mean that SDC threads would never drop below kernel 223 * priority in practice, which defeats one of the purposes of SDC. 224 * 225 * - Why not FSS? 226 * 227 * It might seem that the existing FSS scheduling class could solve 228 * the problems that SDC is attempting to solve. FSS's more precise 229 * solution to the oversubscription problem would hardly cause 230 * trouble, as long as it performed well. SDC is implemented as 231 * a separate scheduling class for two main reasons: the initial 232 * consumer of SDC does not map well onto the "project" abstraction 233 * that is central to FSS, and FSS does not expect to run at kernel 234 * priorities. 235 * 236 * 237 * Tunables 238 * 239 * - sysdc_update_interval_msec: Number of milliseconds between 240 * consecutive thread priority updates. 241 * 242 * - sysdc_reset_interval_msec: Number of milliseconds between 243 * consecutive resets of a thread's base ONPROC and Runnable 244 * times. 245 * 246 * - sysdc_prune_interval_msec: Number of milliseconds of sleeping 247 * before a thread is pruned from the active list. 248 * 249 * - sysdc_max_pset_DC: Allowable percentage of a processor set's 250 * CPU time which SDC can give to its high-priority threads. 251 * 252 * - sysdc_break_msec: Number of milliseconds of "break" taken when 253 * sysdc_max_pset_DC is exceeded. 254 * 255 * 256 * Future work (in SDC and related subsystems) 257 * 258 * - Per-thread rechoose interval (0 for SDC) 259 * 260 * Allow each thread to specify its own rechoose interval. SDC 261 * threads would specify an interval of zero, which would rechoose 262 * the CPU with the lowest priority once per update. 263 * 264 * - Allow threads to change processor sets after joining the SDC class 265 * 266 * - Thread groups and per-group DC 267 * 268 * It might be nice to be able to specify a duty cycle which applies 269 * to a group of threads in aggregate. 270 * 271 * - Per-group DC callback to allow dynamic DC tuning 272 * 273 * Currently, DCs are assigned when the thread joins SDC. Some 274 * workloads could benefit from being able to tune their DC using 275 * subsystem-specific knowledge about the workload. 276 * 277 * - Finer-grained priority updates 278 * 279 * - More nuanced management of oversubscription 280 * 281 * - Moving other CPU-intensive threads into SDC 282 * 283 * - Move msacct data into kthread_t 284 * 285 * This would allow kernel threads without LWPs to join SDC. 286 * 287 * 288 * Footnotes 289 * 290 * [1] The details of doing so are left as an exercise for the reader. 291 */ 292 293 #include <sys/types.h> 294 #include <sys/sysdc.h> 295 #include <sys/sysdc_impl.h> 296 297 #include <sys/class.h> 298 #include <sys/cmn_err.h> 299 #include <sys/cpuvar.h> 300 #include <sys/cpupart.h> 301 #include <sys/debug.h> 302 #include <sys/disp.h> 303 #include <sys/errno.h> 304 #include <sys/inline.h> 305 #include <sys/kmem.h> 306 #include <sys/modctl.h> 307 #include <sys/schedctl.h> 308 #include <sys/sdt.h> 309 #include <sys/sunddi.h> 310 #include <sys/sysmacros.h> 311 #include <sys/systm.h> 312 #include <sys/var.h> 313 314 /* 315 * Tunables - loaded into the internal state at module load time 316 */ 317 uint_t sysdc_update_interval_msec = 20; 318 uint_t sysdc_reset_interval_msec = 400; 319 uint_t sysdc_prune_interval_msec = 100; 320 uint_t sysdc_max_pset_DC = 90; 321 uint_t sysdc_break_msec = 80; 322 323 /* 324 * Internal state - constants set up by sysdc_initparam() 325 */ 326 static clock_t sysdc_update_ticks; /* ticks between updates */ 327 static uint_t sysdc_prune_updates; /* updates asleep before pruning */ 328 static uint_t sysdc_reset_updates; /* # of updates before reset */ 329 static uint_t sysdc_break_updates; /* updates to break */ 330 static uint_t sysdc_nobreak_updates; /* updates to not check */ 331 static uint_t sysdc_minDC; /* minimum allowed DC */ 332 static uint_t sysdc_maxDC; /* maximum allowed DC */ 333 static pri_t sysdc_minpri; /* minimum allowed priority */ 334 static pri_t sysdc_maxpri; /* maximum allowed priority */ 335 336 /* 337 * Internal state 338 */ 339 static kmutex_t sysdc_pset_lock; /* lock protecting pset data */ 340 static list_t sysdc_psets; /* list of psets with SDC threads */ 341 static uint_t sysdc_param_init; /* sysdc_initparam() has been called */ 342 static uint_t sysdc_update_timeout_started; /* update timeout is active */ 343 static hrtime_t sysdc_last_update; /* time of last sysdc_update() */ 344 static sysdc_t sysdc_dummy; /* used to terminate active lists */ 345 346 /* 347 * Internal state - active hash table 348 */ 349 #define SYSDC_NLISTS 8 350 #define SYSDC_HASH(sdc) (((uintptr_t)(sdc) >> 6) & (SYSDC_NLISTS - 1)) 351 static sysdc_list_t sysdc_active[SYSDC_NLISTS]; 352 #define SYSDC_LIST(sdc) (&sysdc_active[SYSDC_HASH(sdc)]) 353 354 #ifdef DEBUG 355 static struct { 356 uint64_t sysdc_update_times_asleep; 357 uint64_t sysdc_update_times_base_ran_backwards; 358 uint64_t sysdc_update_times_already_done; 359 uint64_t sysdc_update_times_cur_ran_backwards; 360 uint64_t sysdc_compute_pri_breaking; 361 uint64_t sysdc_activate_enter; 362 uint64_t sysdc_update_enter; 363 uint64_t sysdc_update_exited; 364 uint64_t sysdc_update_not_sdc; 365 uint64_t sysdc_update_idle; 366 uint64_t sysdc_update_take_break; 367 uint64_t sysdc_update_no_psets; 368 uint64_t sysdc_tick_not_sdc; 369 uint64_t sysdc_tick_quantum_expired; 370 uint64_t sysdc_thread_enter_enter; 371 } sysdc_stats; 372 373 #define SYSDC_INC_STAT(x) (sysdc_stats.x++) 374 #else 375 #define SYSDC_INC_STAT(x) ((void)0) 376 #endif 377 378 /* macros are UPPER CASE */ 379 #define HOWMANY(a, b) howmany((a), (b)) 380 #define MSECTOTICKS(a) HOWMANY((a) * 1000, usec_per_tick) 381 382 static void 383 sysdc_initparam(void) 384 { 385 uint_t sysdc_break_ticks; 386 387 /* update / prune intervals */ 388 sysdc_update_ticks = MSECTOTICKS(sysdc_update_interval_msec); 389 390 sysdc_prune_updates = HOWMANY(sysdc_prune_interval_msec, 391 sysdc_update_interval_msec); 392 sysdc_reset_updates = HOWMANY(sysdc_reset_interval_msec, 393 sysdc_update_interval_msec); 394 395 /* We must get at least a little time on CPU. */ 396 sysdc_minDC = 1; 397 sysdc_maxDC = SYSDC_DC_MAX; 398 sysdc_minpri = 0; 399 sysdc_maxpri = maxclsyspri; 400 401 /* break parameters */ 402 if (sysdc_max_pset_DC > SYSDC_DC_MAX) { 403 sysdc_max_pset_DC = SYSDC_DC_MAX; 404 } 405 sysdc_break_ticks = MSECTOTICKS(sysdc_break_msec); 406 sysdc_break_updates = HOWMANY(sysdc_break_ticks, sysdc_update_ticks); 407 408 /* 409 * We want: 410 * 411 * sysdc_max_pset_DC = (nobreak / (break + nobreak)) 412 * 413 * ==> nobreak = sysdc_max_pset_DC * (break + nobreak) 414 * 415 * sysdc_max_pset_DC * break 416 * ==> nobreak = ------------------------- 417 * 1 - sysdc_max_pset_DC 418 */ 419 sysdc_nobreak_updates = 420 HOWMANY((uint64_t)sysdc_break_updates * sysdc_max_pset_DC, 421 (SYSDC_DC_MAX - sysdc_max_pset_DC)); 422 423 sysdc_param_init = 1; 424 } 425 426 #undef HOWMANY 427 #undef MSECTOTICKS 428 429 #define SDC_UPDATE_INITIAL 0x1 /* for the initial update */ 430 #define SDC_UPDATE_TIMEOUT 0x2 /* from sysdc_update() */ 431 #define SDC_UPDATE_TICK 0x4 /* from sysdc_tick(), on expiry */ 432 433 /* 434 * Updates the recorded times in the sdc, and returns the elapsed ONPROC 435 * and Runnable times since the last reset. 436 * 437 * newO is the thread's actual ONPROC time; it's used during sysdc_update() 438 * to track processor set usage. 439 */ 440 static void 441 sysdc_update_times(sysdc_t *sdc, uint_t flags, 442 hrtime_t *O, hrtime_t *R, hrtime_t *newO) 443 { 444 kthread_t *const t = sdc->sdc_thread; 445 const uint_t initial = (flags & SDC_UPDATE_INITIAL); 446 const uint_t update = (flags & SDC_UPDATE_TIMEOUT); 447 const clock_t now = ddi_get_lbolt(); 448 uint_t do_reset; 449 450 ASSERT(THREAD_LOCK_HELD(t)); 451 452 *O = *R = 0; 453 454 /* If we've been sleeping, we know we haven't had any ONPROC time. */ 455 if (sdc->sdc_sleep_updates != 0 && 456 sdc->sdc_sleep_updates != sdc->sdc_nupdates) { 457 *newO = sdc->sdc_last_base_O; 458 SYSDC_INC_STAT(sysdc_update_times_asleep); 459 return; 460 } 461 462 /* 463 * If this is our first update, or we've hit the reset point, 464 * we need to reset our base_{O,R}. Once we've updated them, we 465 * report O and R for the entire prior interval. 466 */ 467 do_reset = initial; 468 if (update) { 469 ++sdc->sdc_nupdates; 470 if ((sdc->sdc_nupdates % sysdc_reset_updates) == 0) 471 do_reset = 1; 472 } 473 if (do_reset) { 474 hrtime_t baseO, baseR; 475 if (initial) { 476 /* 477 * Start off our cycle count somewhere in the middle, 478 * to keep the resets from all happening at once. 479 * 480 * 4999 is a handy prime much larger than 481 * sysdc_reset_updates, so that we don't run into 482 * trouble if the resolution is a multiple of 483 * sysdc_reset_updates. 484 */ 485 sdc->sdc_nupdates = (uint_t)((gethrtime() % 4999) % 486 sysdc_reset_updates); 487 baseO = baseR = 0; 488 } else { 489 baseO = sdc->sdc_base_O; 490 baseR = sdc->sdc_base_R; 491 } 492 493 mstate_systhread_times(t, &sdc->sdc_base_O, &sdc->sdc_base_R); 494 *newO = sdc->sdc_base_O; 495 496 sdc->sdc_reset = now; 497 sdc->sdc_pri_check = -1; /* force mismatch below */ 498 499 /* 500 * See below for rationale. 501 */ 502 if (baseO > sdc->sdc_base_O || baseR > sdc->sdc_base_R) { 503 SYSDC_INC_STAT(sysdc_update_times_base_ran_backwards); 504 baseO = sdc->sdc_base_O; 505 baseR = sdc->sdc_base_R; 506 } 507 508 /* compute based on the entire interval */ 509 *O = (sdc->sdc_base_O - baseO); 510 *R = (sdc->sdc_base_R - baseR); 511 return; 512 } 513 514 /* 515 * If we're called from sysdc_update(), we *must* return a value 516 * for newO, so we always call mstate_systhread_times(). 517 * 518 * Otherwise, if we've already done a pri check this tick, 519 * we can skip it. 520 */ 521 if (!update && sdc->sdc_pri_check == now) { 522 SYSDC_INC_STAT(sysdc_update_times_already_done); 523 return; 524 } 525 526 /* Get the current times from the thread */ 527 sdc->sdc_pri_check = now; 528 mstate_systhread_times(t, &sdc->sdc_cur_O, &sdc->sdc_cur_R); 529 *newO = sdc->sdc_cur_O; 530 531 /* 532 * The updating of microstate accounting is not done under a 533 * consistent set of locks, particularly the t_waitrq field. This 534 * can lead to narrow windows in which we account for time in the 535 * wrong bucket, which on the next read will be accounted for 536 * correctly. 537 * 538 * If our sdc_base_* fields were affected by one of these blips, we 539 * throw away the old data, and pretend this tick didn't happen. 540 */ 541 if (sdc->sdc_cur_O < sdc->sdc_base_O || 542 sdc->sdc_cur_R < sdc->sdc_base_R) { 543 544 sdc->sdc_base_O = sdc->sdc_cur_O; 545 sdc->sdc_base_R = sdc->sdc_cur_R; 546 547 SYSDC_INC_STAT(sysdc_update_times_cur_ran_backwards); 548 return; 549 } 550 551 *O = sdc->sdc_cur_O - sdc->sdc_base_O; 552 *R = sdc->sdc_cur_R - sdc->sdc_base_R; 553 } 554 555 /* 556 * sysdc_compute_pri() 557 * 558 * Recomputes the priority of the thread, leaving the result in 559 * sdc->sdc_epri. Returns 1 if a priority update should occur 560 * (which will also trigger a cpu_surrender()), otherwise 561 * returns 0. 562 */ 563 static uint_t 564 sysdc_compute_pri(sysdc_t *sdc, uint_t flags) 565 { 566 kthread_t *const t = sdc->sdc_thread; 567 const uint_t update = (flags & SDC_UPDATE_TIMEOUT); 568 const uint_t tick = (flags & SDC_UPDATE_TICK); 569 570 hrtime_t O, R; 571 hrtime_t newO = -1; 572 573 ASSERT(THREAD_LOCK_HELD(t)); 574 575 sysdc_update_times(sdc, flags, &O, &R, &newO); 576 ASSERT(!update || newO != -1); 577 578 /* If we have new data, recompute our priority. */ 579 if ((O + R) != 0) { 580 sdc->sdc_cur_DC = (O * SYSDC_DC_MAX) / (O + R); 581 582 /* Adjust our priority to move our DC closer to the target. */ 583 if (sdc->sdc_cur_DC < sdc->sdc_target_DC) 584 sdc->sdc_pri = sdc->sdc_maxpri; 585 else 586 sdc->sdc_pri = sdc->sdc_minpri; 587 } 588 589 /* 590 * If our per-pset duty cycle goes over the max, we will take a break. 591 * This forces all sysdc threads in the pset to minimum priority, in 592 * order to let everyone else have a chance at the CPU. 593 */ 594 if (sdc->sdc_pset->sdp_need_break) { 595 SYSDC_INC_STAT(sysdc_compute_pri_breaking); 596 sdc->sdc_epri = sdc->sdc_minpri; 597 } else { 598 sdc->sdc_epri = sdc->sdc_pri; 599 } 600 601 DTRACE_PROBE4(sysdc__compute__pri, 602 kthread_t *, t, pri_t, sdc->sdc_epri, uint_t, sdc->sdc_cur_DC, 603 uint_t, sdc->sdc_target_DC); 604 605 /* 606 * For sysdc_update(), we compute the ONPROC time for high-priority 607 * threads, which is used to calculate the per-pset duty cycle. We 608 * will always tell our callers to update the thread's priority, 609 * since we want to force a cpu_surrender(). 610 * 611 * We reset sdc_update_ticks so that sysdc_tick() will only update 612 * the thread's priority if our timeout is delayed by a tick or 613 * more. 614 */ 615 if (update) { 616 /* SDC threads are not allowed to change cpupart bindings. */ 617 ASSERT(t->t_cpupart == sdc->sdc_pset->sdp_cpupart); 618 619 /* If we were at MAXPRI, account for our onproc time. */ 620 if (t->t_pri == sdc->sdc_maxpri && 621 sdc->sdc_last_base_O != 0 && 622 sdc->sdc_last_base_O < newO) { 623 sdc->sdc_last_O = newO - sdc->sdc_last_base_O; 624 sdc->sdc_pset->sdp_onproc_time += 625 (uint64_t)sdc->sdc_last_O; 626 sdc->sdc_pset->sdp_onproc_threads++; 627 } else { 628 sdc->sdc_last_O = 0; 629 } 630 sdc->sdc_last_base_O = newO; 631 632 sdc->sdc_update_ticks = sdc->sdc_ticks + sysdc_update_ticks + 1; 633 return (1); 634 } 635 636 /* 637 * Like sysdc_update(), sysdc_tick() always wants to update the 638 * thread's priority, so that the CPU is surrendered if necessary. 639 * We reset sdc_update_ticks so that if the timeout continues to be 640 * delayed, we'll update at the regular interval. 641 */ 642 if (tick) { 643 ASSERT(sdc->sdc_ticks == sdc->sdc_update_ticks); 644 sdc->sdc_update_ticks = sdc->sdc_ticks + sysdc_update_ticks; 645 return (1); 646 } 647 648 /* 649 * Otherwise, only tell our callers to update the priority if it has 650 * changed. 651 */ 652 return (sdc->sdc_epri != t->t_pri); 653 } 654 655 static void 656 sysdc_update_pri(sysdc_t *sdc, uint_t flags) 657 { 658 kthread_t *t = sdc->sdc_thread; 659 660 ASSERT(THREAD_LOCK_HELD(t)); 661 662 if (sysdc_compute_pri(sdc, flags)) { 663 if (!thread_change_pri(t, sdc->sdc_epri, 0)) { 664 cpu_surrender(t); 665 } 666 } 667 } 668 669 /* 670 * Add a thread onto the active list. It will only be removed by 671 * sysdc_update(). 672 */ 673 static void 674 sysdc_activate(sysdc_t *sdc) 675 { 676 sysdc_t *volatile *headp = &SYSDC_LIST(sdc)->sdl_list; 677 sysdc_t *head; 678 kthread_t *t = sdc->sdc_thread; 679 680 SYSDC_INC_STAT(sysdc_activate_enter); 681 682 ASSERT(sdc->sdc_next == NULL); 683 ASSERT(THREAD_LOCK_HELD(t)); 684 685 do { 686 head = *headp; 687 sdc->sdc_next = head; 688 } while (atomic_cas_ptr(headp, head, sdc) != head); 689 } 690 691 /* 692 * sysdc_update() has two jobs: 693 * 694 * 1. It updates the priorities of all active SDC threads on the system. 695 * 2. It measures pset CPU usage and enforces sysdc_max_pset_DC. 696 */ 697 static void 698 sysdc_update(void *arg) 699 { 700 int idx; 701 sysdc_t *freelist = NULL; 702 sysdc_pset_t *cur; 703 hrtime_t now, diff; 704 uint_t redeploy = 1; 705 706 SYSDC_INC_STAT(sysdc_update_enter); 707 708 ASSERT(sysdc_update_timeout_started); 709 710 /* 711 * If this is our first time through, diff will be gigantic, and 712 * no breaks will be necessary. 713 */ 714 now = gethrtime(); 715 diff = now - sysdc_last_update; 716 sysdc_last_update = now; 717 718 mutex_enter(&sysdc_pset_lock); 719 for (cur = list_head(&sysdc_psets); cur != NULL; 720 cur = list_next(&sysdc_psets, cur)) { 721 boolean_t breaking = (cur->sdp_should_break != 0); 722 723 if (cur->sdp_need_break != breaking) { 724 DTRACE_PROBE2(sdc__pset__break, sysdc_pset_t *, cur, 725 boolean_t, breaking); 726 } 727 cur->sdp_onproc_time = 0; 728 cur->sdp_onproc_threads = 0; 729 cur->sdp_need_break = breaking; 730 } 731 mutex_exit(&sysdc_pset_lock); 732 733 for (idx = 0; idx < SYSDC_NLISTS; idx++) { 734 sysdc_list_t *sdl = &sysdc_active[idx]; 735 sysdc_t *volatile *headp = &sdl->sdl_list; 736 sysdc_t *head, *tail; 737 sysdc_t **prevptr; 738 739 if (*headp == &sysdc_dummy) 740 continue; 741 742 /* Prevent any threads from exiting while we're poking them. */ 743 mutex_enter(&sdl->sdl_lock); 744 745 /* 746 * Each sdl_list contains a singly-linked list of active 747 * threads. Threads which become active while we are 748 * processing the list will be added to sdl_list. Since we 749 * don't want that to interfere with our own processing, we 750 * swap in an empty list. Any newly active threads will 751 * go on to this empty list. When finished, we'll put any 752 * such threads at the end of the processed list. 753 */ 754 head = atomic_swap_ptr(headp, &sysdc_dummy); 755 prevptr = &head; 756 while (*prevptr != &sysdc_dummy) { 757 sysdc_t *const sdc = *prevptr; 758 kthread_t *const t = sdc->sdc_thread; 759 760 /* 761 * If the thread has exited, move its sysdc_t onto 762 * freelist, to be freed later. 763 */ 764 if (t == NULL) { 765 *prevptr = sdc->sdc_next; 766 SYSDC_INC_STAT(sysdc_update_exited); 767 sdc->sdc_next = freelist; 768 freelist = sdc; 769 continue; 770 } 771 772 thread_lock(t); 773 if (t->t_cid != sysdccid) { 774 thread_unlock(t); 775 prevptr = &sdc->sdc_next; 776 SYSDC_INC_STAT(sysdc_update_not_sdc); 777 continue; 778 } 779 ASSERT(t->t_cldata == sdc); 780 781 /* 782 * If the thread has been sleeping for longer 783 * than sysdc_prune_interval, make it inactive by 784 * removing it from the list. 785 */ 786 if (!(t->t_state & (TS_RUN | TS_ONPROC)) && 787 sdc->sdc_sleep_updates != 0 && 788 (sdc->sdc_sleep_updates - sdc->sdc_nupdates) > 789 sysdc_prune_updates) { 790 *prevptr = sdc->sdc_next; 791 SYSDC_INC_STAT(sysdc_update_idle); 792 sdc->sdc_next = NULL; 793 thread_unlock(t); 794 continue; 795 } 796 sysdc_update_pri(sdc, SDC_UPDATE_TIMEOUT); 797 thread_unlock(t); 798 799 prevptr = &sdc->sdc_next; 800 } 801 802 /* 803 * Add our list to the bucket, putting any new entries 804 * added while we were working at the tail of the list. 805 */ 806 do { 807 tail = *headp; 808 *prevptr = tail; 809 } while (atomic_cas_ptr(headp, tail, head) != tail); 810 811 mutex_exit(&sdl->sdl_lock); 812 } 813 814 mutex_enter(&sysdc_pset_lock); 815 for (cur = list_head(&sysdc_psets); cur != NULL; 816 cur = list_next(&sysdc_psets, cur)) { 817 818 cur->sdp_vtime_last_interval = 819 diff * cur->sdp_cpupart->cp_ncpus; 820 cur->sdp_DC_last_interval = 821 (cur->sdp_onproc_time * SYSDC_DC_MAX) / 822 cur->sdp_vtime_last_interval; 823 824 if (cur->sdp_should_break > 0) { 825 cur->sdp_should_break--; /* breaking */ 826 continue; 827 } 828 if (cur->sdp_dont_break > 0) { 829 cur->sdp_dont_break--; /* waiting before checking */ 830 continue; 831 } 832 if (cur->sdp_DC_last_interval > sysdc_max_pset_DC) { 833 cur->sdp_should_break = sysdc_break_updates; 834 cur->sdp_dont_break = sysdc_nobreak_updates; 835 SYSDC_INC_STAT(sysdc_update_take_break); 836 } 837 } 838 839 /* 840 * If there are no sysdc_psets, there can be no threads, so 841 * we can stop doing our timeout. Since we're holding the 842 * sysdc_pset_lock, no new sysdc_psets can come in, which will 843 * prevent anyone from racing with this and dropping our timeout 844 * on the floor. 845 */ 846 if (list_is_empty(&sysdc_psets)) { 847 SYSDC_INC_STAT(sysdc_update_no_psets); 848 ASSERT(sysdc_update_timeout_started); 849 sysdc_update_timeout_started = 0; 850 851 redeploy = 0; 852 } 853 mutex_exit(&sysdc_pset_lock); 854 855 while (freelist != NULL) { 856 sysdc_t *cur = freelist; 857 freelist = cur->sdc_next; 858 kmem_free(cur, sizeof (*cur)); 859 } 860 861 if (redeploy) { 862 (void) timeout(sysdc_update, arg, sysdc_update_ticks); 863 } 864 } 865 866 static void 867 sysdc_preempt(kthread_t *t) 868 { 869 ASSERT(t == curthread); 870 ASSERT(THREAD_LOCK_HELD(t)); 871 872 setbackdq(t); /* give others a chance to run */ 873 } 874 875 static void 876 sysdc_tick(kthread_t *t) 877 { 878 sysdc_t *sdc; 879 880 thread_lock(t); 881 if (t->t_cid != sysdccid) { 882 SYSDC_INC_STAT(sysdc_tick_not_sdc); 883 thread_unlock(t); 884 return; 885 } 886 sdc = t->t_cldata; 887 if (t->t_state == TS_ONPROC && 888 t->t_pri < t->t_disp_queue->disp_maxrunpri) { 889 cpu_surrender(t); 890 } 891 892 if (t->t_state == TS_ONPROC || t->t_state == TS_RUN) { 893 ASSERT(sdc->sdc_sleep_updates == 0); 894 } 895 896 ASSERT(sdc->sdc_ticks != sdc->sdc_update_ticks); 897 sdc->sdc_ticks++; 898 if (sdc->sdc_ticks == sdc->sdc_update_ticks) { 899 SYSDC_INC_STAT(sysdc_tick_quantum_expired); 900 sysdc_update_pri(sdc, SDC_UPDATE_TICK); 901 ASSERT(sdc->sdc_ticks != sdc->sdc_update_ticks); 902 } 903 thread_unlock(t); 904 } 905 906 static void 907 sysdc_setrun(kthread_t *t) 908 { 909 sysdc_t *sdc = t->t_cldata; 910 911 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */ 912 913 sdc->sdc_sleep_updates = 0; 914 915 if (sdc->sdc_next == NULL) { 916 /* 917 * Since we're in transition, we don't want to use the 918 * full thread_update_pri(). 919 */ 920 if (sysdc_compute_pri(sdc, 0)) { 921 THREAD_CHANGE_PRI(t, sdc->sdc_epri); 922 } 923 sysdc_activate(sdc); 924 925 ASSERT(sdc->sdc_next != NULL); 926 } 927 928 setbackdq(t); 929 } 930 931 static void 932 sysdc_wakeup(kthread_t *t) 933 { 934 sysdc_setrun(t); 935 } 936 937 static void 938 sysdc_sleep(kthread_t *t) 939 { 940 sysdc_t *sdc = t->t_cldata; 941 942 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */ 943 944 sdc->sdc_sleep_updates = sdc->sdc_nupdates; 945 } 946 947 /*ARGSUSED*/ 948 static int 949 sysdc_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp, 950 void *bufp) 951 { 952 cpupart_t *const cpupart = t->t_cpupart; 953 sysdc_t *sdc = bufp; 954 sysdc_params_t *sdpp = parmsp; 955 sysdc_pset_t *newpset = sdc->sdc_pset; 956 sysdc_pset_t *pset; 957 int start_timeout; 958 959 if (t->t_cid != syscid) 960 return (EPERM); 961 962 ASSERT(ttolwp(t) != NULL); 963 ASSERT(sdpp != NULL); 964 ASSERT(newpset != NULL); 965 ASSERT(sysdc_param_init); 966 967 ASSERT(sdpp->sdp_minpri >= sysdc_minpri); 968 ASSERT(sdpp->sdp_maxpri <= sysdc_maxpri); 969 ASSERT(sdpp->sdp_DC >= sysdc_minDC); 970 ASSERT(sdpp->sdp_DC <= sysdc_maxDC); 971 972 sdc->sdc_thread = t; 973 sdc->sdc_pri = sdpp->sdp_maxpri; /* start off maximally */ 974 sdc->sdc_minpri = sdpp->sdp_minpri; 975 sdc->sdc_maxpri = sdpp->sdp_maxpri; 976 sdc->sdc_target_DC = sdpp->sdp_DC; 977 sdc->sdc_ticks = 0; 978 sdc->sdc_update_ticks = sysdc_update_ticks + 1; 979 980 /* Assign ourselves to the appropriate pset. */ 981 sdc->sdc_pset = NULL; 982 mutex_enter(&sysdc_pset_lock); 983 for (pset = list_head(&sysdc_psets); pset != NULL; 984 pset = list_next(&sysdc_psets, pset)) { 985 if (pset->sdp_cpupart == cpupart) { 986 break; 987 } 988 } 989 if (pset == NULL) { 990 pset = newpset; 991 newpset = NULL; 992 pset->sdp_cpupart = cpupart; 993 list_insert_tail(&sysdc_psets, pset); 994 } 995 pset->sdp_nthreads++; 996 ASSERT(pset->sdp_nthreads > 0); 997 998 sdc->sdc_pset = pset; 999 1000 start_timeout = (sysdc_update_timeout_started == 0); 1001 sysdc_update_timeout_started = 1; 1002 mutex_exit(&sysdc_pset_lock); 1003 1004 if (newpset != NULL) 1005 kmem_free(newpset, sizeof (*newpset)); 1006 1007 /* Update t's scheduling class and priority. */ 1008 thread_lock(t); 1009 t->t_clfuncs = &(sclass[cid].cl_funcs->thread); 1010 t->t_cid = cid; 1011 t->t_cldata = sdc; 1012 t->t_schedflag |= TS_RUNQMATCH; 1013 1014 sysdc_update_pri(sdc, SDC_UPDATE_INITIAL); 1015 thread_unlock(t); 1016 1017 /* Kick off the thread timeout if we're the first one in. */ 1018 if (start_timeout) { 1019 (void) timeout(sysdc_update, NULL, sysdc_update_ticks); 1020 } 1021 1022 return (0); 1023 } 1024 1025 static void 1026 sysdc_leave(sysdc_t *sdc) 1027 { 1028 sysdc_pset_t *sdp = sdc->sdc_pset; 1029 sysdc_list_t *sdl = SYSDC_LIST(sdc); 1030 uint_t freedc; 1031 1032 mutex_enter(&sdl->sdl_lock); /* block sysdc_update() */ 1033 sdc->sdc_thread = NULL; 1034 freedc = (sdc->sdc_next == NULL); 1035 mutex_exit(&sdl->sdl_lock); 1036 1037 mutex_enter(&sysdc_pset_lock); 1038 ASSERT(sdp != NULL); 1039 ASSERT(sdp->sdp_nthreads > 0); 1040 --sdp->sdp_nthreads; 1041 if (sdp->sdp_nthreads == 0) { 1042 list_remove(&sysdc_psets, sdp); 1043 } else { 1044 sdp = NULL; 1045 } 1046 mutex_exit(&sysdc_pset_lock); 1047 1048 if (freedc) 1049 kmem_free(sdc, sizeof (*sdc)); 1050 if (sdp != NULL) 1051 kmem_free(sdp, sizeof (*sdp)); 1052 } 1053 1054 static void 1055 sysdc_exitclass(void *buf) 1056 { 1057 sysdc_leave((sysdc_t *)buf); 1058 } 1059 1060 /*ARGSUSED*/ 1061 static int 1062 sysdc_canexit(kthread_t *t, cred_t *reqpcredp) 1063 { 1064 /* Threads cannot exit SDC once joined, except in a body bag. */ 1065 return (EPERM); 1066 } 1067 1068 static void 1069 sysdc_exit(kthread_t *t) 1070 { 1071 sysdc_t *sdc; 1072 1073 /* We're exiting, so we just rejoin the SYS class. */ 1074 thread_lock(t); 1075 ASSERT(t->t_cid == sysdccid); 1076 sdc = t->t_cldata; 1077 t->t_cid = syscid; 1078 t->t_cldata = NULL; 1079 t->t_clfuncs = &(sclass[syscid].cl_funcs->thread); 1080 (void) thread_change_pri(t, maxclsyspri, 0); 1081 t->t_schedflag &= ~TS_RUNQMATCH; 1082 thread_unlock_nopreempt(t); 1083 1084 /* Unlink the sdc from everything. */ 1085 sysdc_leave(sdc); 1086 } 1087 1088 /*ARGSUSED*/ 1089 static int 1090 sysdc_fork(kthread_t *t, kthread_t *ct, void *bufp) 1091 { 1092 /* 1093 * Threads cannot be created with SDC as their class; they must 1094 * be created as SYS and then added with sysdc_thread_enter(). 1095 * Because of this restriction, sysdc_fork() should never be called. 1096 */ 1097 panic("sysdc cannot be forked"); 1098 1099 return (ENOSYS); 1100 } 1101 1102 /*ARGSUSED*/ 1103 static void 1104 sysdc_forkret(kthread_t *t, kthread_t *ct) 1105 { 1106 /* SDC threads are part of system processes, which never fork. */ 1107 panic("sysdc cannot be forked"); 1108 } 1109 1110 static pri_t 1111 sysdc_globpri(kthread_t *t) 1112 { 1113 return (t->t_epri); 1114 } 1115 1116 /*ARGSUSED*/ 1117 static pri_t 1118 sysdc_no_swap(kthread_t *t, int flags) 1119 { 1120 /* SDC threads cannot be swapped. */ 1121 return (-1); 1122 } 1123 1124 /* 1125 * Get maximum and minimum priorities enjoyed by SDC threads. 1126 */ 1127 static int 1128 sysdc_getclpri(pcpri_t *pcprip) 1129 { 1130 pcprip->pc_clpmax = sysdc_maxpri; 1131 pcprip->pc_clpmin = sysdc_minpri; 1132 return (0); 1133 } 1134 1135 /*ARGSUSED*/ 1136 static int 1137 sysdc_getclinfo(void *arg) 1138 { 1139 return (0); /* no class-specific info */ 1140 } 1141 1142 /*ARGSUSED*/ 1143 static int 1144 sysdc_alloc(void **p, int flag) 1145 { 1146 sysdc_t *new; 1147 1148 *p = NULL; 1149 if ((new = kmem_zalloc(sizeof (*new), flag)) == NULL) { 1150 return (ENOMEM); 1151 } 1152 if ((new->sdc_pset = kmem_zalloc(sizeof (*new->sdc_pset), flag)) == 1153 NULL) { 1154 kmem_free(new, sizeof (*new)); 1155 return (ENOMEM); 1156 } 1157 *p = new; 1158 return (0); 1159 } 1160 1161 static void 1162 sysdc_free(void *p) 1163 { 1164 sysdc_t *sdc = p; 1165 1166 if (sdc != NULL) { 1167 /* 1168 * We must have failed CL_ENTERCLASS(), so our pset should be 1169 * there and unused. 1170 */ 1171 ASSERT(sdc->sdc_pset != NULL); 1172 ASSERT(sdc->sdc_pset->sdp_cpupart == NULL); 1173 kmem_free(sdc->sdc_pset, sizeof (*sdc->sdc_pset)); 1174 kmem_free(sdc, sizeof (*sdc)); 1175 } 1176 } 1177 1178 static int sysdc_enosys(); /* Boy, ANSI-C's K&R compatibility is weird. */ 1179 static int sysdc_einval(); 1180 static void sysdc_nullsys(); 1181 1182 static struct classfuncs sysdc_classfuncs = { 1183 /* messages to class manager */ 1184 { 1185 sysdc_enosys, /* admin */ 1186 sysdc_getclinfo, 1187 sysdc_enosys, /* parmsin */ 1188 sysdc_enosys, /* parmsout */ 1189 sysdc_enosys, /* vaparmsin */ 1190 sysdc_enosys, /* vaparmsout */ 1191 sysdc_getclpri, 1192 sysdc_alloc, 1193 sysdc_free, 1194 }, 1195 /* operations on threads */ 1196 { 1197 sysdc_enterclass, 1198 sysdc_exitclass, 1199 sysdc_canexit, 1200 sysdc_fork, 1201 sysdc_forkret, 1202 sysdc_nullsys, /* parmsget */ 1203 sysdc_enosys, /* parmsset */ 1204 sysdc_nullsys, /* stop */ 1205 sysdc_exit, 1206 sysdc_nullsys, /* active */ 1207 sysdc_nullsys, /* inactive */ 1208 sysdc_no_swap, /* swapin */ 1209 sysdc_no_swap, /* swapout */ 1210 sysdc_nullsys, /* trapret */ 1211 sysdc_preempt, 1212 sysdc_setrun, 1213 sysdc_sleep, 1214 sysdc_tick, 1215 sysdc_wakeup, 1216 sysdc_einval, /* donice */ 1217 sysdc_globpri, 1218 sysdc_nullsys, /* set_process_group */ 1219 sysdc_nullsys, /* yield */ 1220 sysdc_einval, /* doprio */ 1221 } 1222 }; 1223 1224 static int 1225 sysdc_enosys() 1226 { 1227 return (ENOSYS); 1228 } 1229 1230 static int 1231 sysdc_einval() 1232 { 1233 return (EINVAL); 1234 } 1235 1236 static void 1237 sysdc_nullsys() 1238 { 1239 } 1240 1241 /*ARGSUSED*/ 1242 static pri_t 1243 sysdc_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp) 1244 { 1245 int idx; 1246 1247 list_create(&sysdc_psets, sizeof (sysdc_pset_t), 1248 offsetof(sysdc_pset_t, sdp_node)); 1249 1250 for (idx = 0; idx < SYSDC_NLISTS; idx++) { 1251 sysdc_active[idx].sdl_list = &sysdc_dummy; 1252 } 1253 1254 sysdc_initparam(); 1255 1256 sysdccid = cid; 1257 *clfuncspp = &sysdc_classfuncs; 1258 1259 return ((pri_t)v.v_maxsyspri); 1260 } 1261 1262 static struct sclass csw = { 1263 "SDC", 1264 sysdc_init, 1265 0 1266 }; 1267 1268 static struct modlsched modlsched = { 1269 &mod_schedops, "system duty cycle scheduling class", &csw 1270 }; 1271 1272 static struct modlinkage modlinkage = { 1273 MODREV_1, (void *)&modlsched, NULL 1274 }; 1275 1276 int 1277 _init() 1278 { 1279 return (mod_install(&modlinkage)); 1280 } 1281 1282 int 1283 _fini() 1284 { 1285 return (EBUSY); /* can't unload for now */ 1286 } 1287 1288 int 1289 _info(struct modinfo *modinfop) 1290 { 1291 return (mod_info(&modlinkage, modinfop)); 1292 } 1293 1294 /* --- consolidation-private interfaces --- */ 1295 void 1296 sysdc_thread_enter(kthread_t *t, uint_t dc, uint_t flags) 1297 { 1298 void *buf = NULL; 1299 sysdc_params_t sdp; 1300 1301 SYSDC_INC_STAT(sysdc_thread_enter_enter); 1302 1303 ASSERT(sysdc_param_init); 1304 ASSERT(sysdccid >= 0); 1305 1306 ASSERT((flags & ~SYSDC_THREAD_BATCH) == 0); 1307 1308 sdp.sdp_minpri = sysdc_minpri; 1309 sdp.sdp_maxpri = sysdc_maxpri; 1310 sdp.sdp_DC = MAX(MIN(dc, sysdc_maxDC), sysdc_minDC); 1311 1312 VERIFY0(CL_ALLOC(&buf, sysdccid, KM_SLEEP)); 1313 1314 ASSERT(t->t_lwp != NULL); 1315 ASSERT(t->t_cid == syscid); 1316 ASSERT(t->t_cldata == NULL); 1317 VERIFY0(CL_CANEXIT(t, NULL)); 1318 VERIFY0(CL_ENTERCLASS(t, sysdccid, &sdp, kcred, buf)); 1319 CL_EXITCLASS(syscid, NULL); 1320 }