1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012 by Delphix. All rights reserved. 24 */ 25 26 /* 27 * The System Duty Cycle (SDC) scheduling class 28 * -------------------------------------------- 29 * 30 * Background 31 * 32 * Kernel threads in Solaris have traditionally not been large consumers 33 * of CPU time. They typically wake up, perform a small amount of 34 * work, then go back to sleep waiting for either a timeout or another 35 * signal. On the assumption that the small amount of work that they do 36 * is important for the behavior of the whole system, these threads are 37 * treated kindly by the dispatcher and the SYS scheduling class: they run 38 * without preemption from anything other than real-time and interrupt 39 * threads; when preempted, they are put at the front of the queue, so they 40 * generally do not migrate between CPUs; and they are allowed to stay 41 * running until they voluntarily give up the CPU. 42 * 43 * As Solaris has evolved, new workloads have emerged which require the 44 * kernel to perform significant amounts of CPU-intensive work. One 45 * example of such a workload is ZFS's transaction group sync processing. 46 * Each sync operation generates a large batch of I/Os, and each I/O 47 * may need to be compressed and/or checksummed before it is written to 48 * storage. The taskq threads which perform the compression and checksums 49 * will run nonstop as long as they have work to do; a large sync operation 50 * on a compression-heavy dataset can keep them busy for seconds on end. 51 * This causes human-time-scale dispatch latency bubbles for any other 52 * threads which have the misfortune to share a CPU with the taskq threads. 53 * 54 * The SDC scheduling class is a solution to this problem. 55 * 56 * 57 * Overview 58 * 59 * SDC is centered around the concept of a thread's duty cycle (DC): 60 * 61 * ONPROC time 62 * Duty Cycle = ---------------------- 63 * ONPROC + Runnable time 64 * 65 * This is the ratio of the time that the thread spent running on a CPU 66 * divided by the time it spent running or trying to run. It is unaffected 67 * by any time the thread spent sleeping, stopped, etc. 68 * 69 * A thread joining the SDC class specifies a "target" DC that it wants 70 * to run at. To implement this policy, the routine sysdc_update() scans 71 * the list of active SDC threads every few ticks and uses each thread's 72 * microstate data to compute the actual duty cycle that that thread 73 * has experienced recently. If the thread is under its target DC, its 74 * priority is increased to the maximum available (sysdc_maxpri, which is 75 * 99 by default). If the thread is over its target DC, its priority is 76 * reduced to the minimum available (sysdc_minpri, 0 by default). This 77 * is a fairly primitive approach, in that it doesn't use any of the 78 * intermediate priorities, but it's not completely inappropriate. Even 79 * though threads in the SDC class might take a while to do their job, they 80 * are by some definition important if they're running inside the kernel, 81 * so it is reasonable that they should get to run at priority 99. 82 * 83 * If a thread is running when sysdc_update() calculates its actual duty 84 * cycle, and there are other threads of equal or greater priority on its 85 * CPU's dispatch queue, sysdc_update() preempts that thread. The thread 86 * acknowledges the preemption by calling sysdc_preempt(), which calls 87 * setbackdq(), which gives other threads with the same priority a chance 88 * to run. This creates a de facto time quantum for threads in the SDC 89 * scheduling class. 90 * 91 * An SDC thread which is assigned priority 0 can continue to run if 92 * nothing else needs to use the CPU that it's running on. Similarly, an 93 * SDC thread at priority 99 might not get to run as much as it wants to 94 * if there are other priority-99 or higher threads on its CPU. These 95 * situations would cause the thread to get ahead of or behind its target 96 * DC; the longer the situations lasted, the further ahead or behind the 97 * thread would get. Rather than condemning a thread to a lifetime of 98 * paying for its youthful indiscretions, SDC keeps "base" values for 99 * ONPROC and Runnable times in each thread's sysdc data, and updates these 100 * values periodically. The duty cycle is then computed using the elapsed 101 * amount of ONPROC and Runnable times since those base times. 102 * 103 * Since sysdc_update() scans SDC threads fairly frequently, it tries to 104 * keep the list of "active" threads small by pruning out threads which 105 * have been asleep for a brief time. They are not pruned immediately upon 106 * going to sleep, since some threads may bounce back and forth between 107 * sleeping and being runnable. 108 * 109 * 110 * Interfaces 111 * 112 * void sysdc_thread_enter(t, dc, flags) 113 * 114 * Moves a kernel thread from the SYS scheduling class to the 115 * SDC class. t must have an associated LWP (created by calling 116 * lwp_kernel_create()). The thread will have a target DC of dc. 117 * Flags should be either 0 or SYSDC_THREAD_BATCH. If 118 * SYSDC_THREAD_BATCH is specified, the thread is expected to be 119 * doing large amounts of processing. 120 * 121 * 122 * Complications 123 * 124 * - Run queue balancing 125 * 126 * The Solaris dispatcher is biased towards letting a thread run 127 * on the same CPU which it last ran on, if no more than 3 ticks 128 * (i.e. rechoose_interval) have passed since the thread last ran. 129 * This helps to preserve cache warmth. On the other hand, it also 130 * tries to keep the per-CPU run queues fairly balanced; if the CPU 131 * chosen for a runnable thread has a run queue which is three or 132 * more threads longer than a neighboring CPU's queue, the runnable 133 * thread is dispatched onto the neighboring CPU instead. 134 * 135 * These policies work well for some workloads, but not for many SDC 136 * threads. The taskq client of SDC, for example, has many discrete 137 * units of work to do. The work units are largely independent, so 138 * cache warmth is not an important consideration. It is important 139 * that the threads fan out quickly to different CPUs, since the 140 * amount of work these threads have to do (a few seconds worth at a 141 * time) doesn't leave much time to correct thread placement errors 142 * (i.e. two SDC threads being dispatched to the same CPU). 143 * 144 * To fix this, SDC uses the TS_RUNQMATCH flag introduced for FSS. 145 * This tells the dispatcher to keep neighboring run queues' lengths 146 * more evenly matched, which allows SDC threads to migrate more 147 * easily. 148 * 149 * - LWPs and system processes 150 * 151 * SDC can only be used for kernel threads. Since SDC uses microstate 152 * accounting data to compute each thread's actual duty cycle, all 153 * threads entering the SDC class must have associated LWPs (which 154 * store the microstate data). This means that the threads have to 155 * be associated with an SSYS process, i.e. one created by newproc(). 156 * If the microstate accounting information is ever moved into the 157 * kthread_t, this restriction could be lifted. 158 * 159 * - Dealing with oversubscription 160 * 161 * Since SDC duty cycles are per-thread, it is possible that the 162 * aggregate requested duty cycle of all SDC threads in a processor 163 * set could be greater than the total CPU time available in that set. 164 * The FSS scheduling class has an analogous situation, which it deals 165 * with by reducing each thread's allotted CPU time proportionally. 166 * Since SDC doesn't need to be as precise as FSS, it uses a simpler 167 * solution to the oversubscription problem. 168 * 169 * sysdc_update() accumulates the amount of time that max-priority SDC 170 * threads have spent on-CPU in each processor set, and uses that sum 171 * to create an implied duty cycle for that processor set: 172 * 173 * accumulated CPU time 174 * pset DC = ----------------------------------- 175 * (# CPUs) * time since last update 176 * 177 * If this implied duty cycle is above a maximum pset duty cycle (90% 178 * by default), sysdc_update() sets the priority of all SDC threads 179 * in that processor set to sysdc_minpri for a "break" period. After 180 * the break period, it waits for a "nobreak" period before trying to 181 * enforce the pset duty cycle limit again. 182 * 183 * - Processor sets 184 * 185 * As the above implies, SDC is processor set aware, but it does not 186 * currently allow threads to change processor sets while in the SDC 187 * class. Instead, those threads must join the desired processor set 188 * before entering SDC. [1] 189 * 190 * - Batch threads 191 * 192 * A thread joining the SDC class can specify the SDC_THREAD_BATCH 193 * flag. This flag currently has no effect, but marks threads which 194 * do bulk processing. 195 * 196 * - t_kpri_req 197 * 198 * The TS and FSS scheduling classes pay attention to t_kpri_req, 199 * which provides a simple form of priority inheritance for 200 * synchronization primitives (such as rwlocks held as READER) which 201 * cannot be traced to a unique thread. The SDC class does not honor 202 * t_kpri_req, for a few reasons: 203 * 204 * 1. t_kpri_req is notoriously inaccurate. A measure of its 205 * inaccuracy is that it needs to be cleared every time a thread 206 * returns to user mode, because it is frequently non-zero at that 207 * point. This can happen because "ownership" of synchronization 208 * primitives that use t_kpri_req can be silently handed off, 209 * leaving no opportunity to will the t_kpri_req inheritance. 210 * 211 * 2. Unlike in TS and FSS, threads in SDC *will* eventually run at 212 * kernel priority. This means that even if an SDC thread 213 * is holding a synchronization primitive and running at low 214 * priority, its priority will eventually be raised above 60, 215 * allowing it to drive on and release the resource. 216 * 217 * 3. The first consumer of SDC uses the taskq subsystem, which holds 218 * a reader lock for the duration of the task's execution. This 219 * would mean that SDC threads would never drop below kernel 220 * priority in practice, which defeats one of the purposes of SDC. 221 * 222 * - Why not FSS? 223 * 224 * It might seem that the existing FSS scheduling class could solve 225 * the problems that SDC is attempting to solve. FSS's more precise 226 * solution to the oversubscription problem would hardly cause 227 * trouble, as long as it performed well. SDC is implemented as 228 * a separate scheduling class for two main reasons: the initial 229 * consumer of SDC does not map well onto the "project" abstraction 230 * that is central to FSS, and FSS does not expect to run at kernel 231 * priorities. 232 * 233 * 234 * Tunables 235 * 236 * - sysdc_update_interval_msec: Number of milliseconds between 237 * consecutive thread priority updates. 238 * 239 * - sysdc_reset_interval_msec: Number of milliseconds between 240 * consecutive resets of a thread's base ONPROC and Runnable 241 * times. 242 * 243 * - sysdc_prune_interval_msec: Number of milliseconds of sleeping 244 * before a thread is pruned from the active list. 245 * 246 * - sysdc_max_pset_DC: Allowable percentage of a processor set's 247 * CPU time which SDC can give to its high-priority threads. 248 * 249 * - sysdc_break_msec: Number of milliseconds of "break" taken when 250 * sysdc_max_pset_DC is exceeded. 251 * 252 * 253 * Future work (in SDC and related subsystems) 254 * 255 * - Per-thread rechoose interval (0 for SDC) 256 * 257 * Allow each thread to specify its own rechoose interval. SDC 258 * threads would specify an interval of zero, which would rechoose 259 * the CPU with the lowest priority once per update. 260 * 261 * - Allow threads to change processor sets after joining the SDC class 262 * 263 * - Thread groups and per-group DC 264 * 265 * It might be nice to be able to specify a duty cycle which applies 266 * to a group of threads in aggregate. 267 * 268 * - Per-group DC callback to allow dynamic DC tuning 269 * 270 * Currently, DCs are assigned when the thread joins SDC. Some 271 * workloads could benefit from being able to tune their DC using 272 * subsystem-specific knowledge about the workload. 273 * 274 * - Finer-grained priority updates 275 * 276 * - More nuanced management of oversubscription 277 * 278 * - Moving other CPU-intensive threads into SDC 279 * 280 * - Move msacct data into kthread_t 281 * 282 * This would allow kernel threads without LWPs to join SDC. 283 * 284 * 285 * Footnotes 286 * 287 * [1] The details of doing so are left as an exercise for the reader. 288 */ 289 290 #include <sys/types.h> 291 #include <sys/sysdc.h> 292 #include <sys/sysdc_impl.h> 293 294 #include <sys/class.h> 295 #include <sys/cmn_err.h> 296 #include <sys/cpuvar.h> 297 #include <sys/cpupart.h> 298 #include <sys/debug.h> 299 #include <sys/disp.h> 300 #include <sys/errno.h> 301 #include <sys/inline.h> 302 #include <sys/kmem.h> 303 #include <sys/modctl.h> 304 #include <sys/schedctl.h> 305 #include <sys/sdt.h> 306 #include <sys/sunddi.h> 307 #include <sys/sysmacros.h> 308 #include <sys/systm.h> 309 #include <sys/var.h> 310 311 /* 312 * Tunables - loaded into the internal state at module load time 313 */ 314 uint_t sysdc_update_interval_msec = 20; 315 uint_t sysdc_reset_interval_msec = 400; 316 uint_t sysdc_prune_interval_msec = 100; 317 uint_t sysdc_max_pset_DC = 90; 318 uint_t sysdc_break_msec = 80; 319 320 /* 321 * Internal state - constants set up by sysdc_initparam() 322 */ 323 static clock_t sysdc_update_ticks; /* ticks between updates */ 324 static uint_t sysdc_prune_updates; /* updates asleep before pruning */ 325 static uint_t sysdc_reset_updates; /* # of updates before reset */ 326 static uint_t sysdc_break_updates; /* updates to break */ 327 static uint_t sysdc_nobreak_updates; /* updates to not check */ 328 static uint_t sysdc_minDC; /* minimum allowed DC */ 329 static uint_t sysdc_maxDC; /* maximum allowed DC */ 330 static pri_t sysdc_minpri; /* minimum allowed priority */ 331 static pri_t sysdc_maxpri; /* maximum allowed priority */ 332 333 /* 334 * Internal state 335 */ 336 static kmutex_t sysdc_pset_lock; /* lock protecting pset data */ 337 static list_t sysdc_psets; /* list of psets with SDC threads */ 338 static uint_t sysdc_param_init; /* sysdc_initparam() has been called */ 339 static uint_t sysdc_update_timeout_started; /* update timeout is active */ 340 static hrtime_t sysdc_last_update; /* time of last sysdc_update() */ 341 static sysdc_t sysdc_dummy; /* used to terminate active lists */ 342 343 /* 344 * Internal state - active hash table 345 */ 346 #define SYSDC_NLISTS 8 347 #define SYSDC_HASH(sdc) (((uintptr_t)(sdc) >> 6) & (SYSDC_NLISTS - 1)) 348 static sysdc_list_t sysdc_active[SYSDC_NLISTS]; 349 #define SYSDC_LIST(sdc) (&sysdc_active[SYSDC_HASH(sdc)]) 350 351 #ifdef DEBUG 352 static struct { 353 uint64_t sysdc_update_times_asleep; 354 uint64_t sysdc_update_times_base_ran_backwards; 355 uint64_t sysdc_update_times_already_done; 356 uint64_t sysdc_update_times_cur_ran_backwards; 357 uint64_t sysdc_compute_pri_breaking; 358 uint64_t sysdc_activate_enter; 359 uint64_t sysdc_update_enter; 360 uint64_t sysdc_update_exited; 361 uint64_t sysdc_update_not_sdc; 362 uint64_t sysdc_update_idle; 363 uint64_t sysdc_update_take_break; 364 uint64_t sysdc_update_no_psets; 365 uint64_t sysdc_tick_not_sdc; 366 uint64_t sysdc_tick_quantum_expired; 367 uint64_t sysdc_thread_enter_enter; 368 } sysdc_stats; 369 370 #define SYSDC_INC_STAT(x) (sysdc_stats.x++) 371 #else 372 #define SYSDC_INC_STAT(x) ((void)0) 373 #endif 374 375 /* macros are UPPER CASE */ 376 #define HOWMANY(a, b) howmany((a), (b)) 377 #define MSECTOTICKS(a) HOWMANY((a) * 1000, usec_per_tick) 378 379 static void 380 sysdc_initparam(void) 381 { 382 uint_t sysdc_break_ticks; 383 384 /* update / prune intervals */ 385 sysdc_update_ticks = MSECTOTICKS(sysdc_update_interval_msec); 386 387 sysdc_prune_updates = HOWMANY(sysdc_prune_interval_msec, 388 sysdc_update_interval_msec); 389 sysdc_reset_updates = HOWMANY(sysdc_reset_interval_msec, 390 sysdc_update_interval_msec); 391 392 /* We must get at least a little time on CPU. */ 393 sysdc_minDC = 1; 394 sysdc_maxDC = SYSDC_DC_MAX; 395 sysdc_minpri = 0; 396 sysdc_maxpri = maxclsyspri; 397 398 /* break parameters */ 399 if (sysdc_max_pset_DC > SYSDC_DC_MAX) { 400 sysdc_max_pset_DC = SYSDC_DC_MAX; 401 } 402 sysdc_break_ticks = MSECTOTICKS(sysdc_break_msec); 403 sysdc_break_updates = HOWMANY(sysdc_break_ticks, sysdc_update_ticks); 404 405 /* 406 * We want: 407 * 408 * sysdc_max_pset_DC = (nobreak / (break + nobreak)) 409 * 410 * ==> nobreak = sysdc_max_pset_DC * (break + nobreak) 411 * 412 * sysdc_max_pset_DC * break 413 * ==> nobreak = ------------------------- 414 * 1 - sysdc_max_pset_DC 415 */ 416 sysdc_nobreak_updates = 417 HOWMANY((uint64_t)sysdc_break_updates * sysdc_max_pset_DC, 418 (SYSDC_DC_MAX - sysdc_max_pset_DC)); 419 420 sysdc_param_init = 1; 421 } 422 423 #undef HOWMANY 424 #undef MSECTOTICKS 425 426 #define SDC_UPDATE_INITIAL 0x1 /* for the initial update */ 427 #define SDC_UPDATE_TIMEOUT 0x2 /* from sysdc_update() */ 428 #define SDC_UPDATE_TICK 0x4 /* from sysdc_tick(), on expiry */ 429 430 /* 431 * Updates the recorded times in the sdc, and returns the elapsed ONPROC 432 * and Runnable times since the last reset. 433 * 434 * newO is the thread's actual ONPROC time; it's used during sysdc_update() 435 * to track processor set usage. 436 */ 437 static void 438 sysdc_update_times(sysdc_t *sdc, uint_t flags, 439 hrtime_t *O, hrtime_t *R, hrtime_t *newO) 440 { 441 kthread_t *const t = sdc->sdc_thread; 442 const uint_t initial = (flags & SDC_UPDATE_INITIAL); 443 const uint_t update = (flags & SDC_UPDATE_TIMEOUT); 444 const clock_t now = ddi_get_lbolt(); 445 uint_t do_reset; 446 447 ASSERT(THREAD_LOCK_HELD(t)); 448 449 *O = *R = 0; 450 451 /* If we've been sleeping, we know we haven't had any ONPROC time. */ 452 if (sdc->sdc_sleep_updates != 0 && 453 sdc->sdc_sleep_updates != sdc->sdc_nupdates) { 454 *newO = sdc->sdc_last_base_O; 455 SYSDC_INC_STAT(sysdc_update_times_asleep); 456 return; 457 } 458 459 /* 460 * If this is our first update, or we've hit the reset point, 461 * we need to reset our base_{O,R}. Once we've updated them, we 462 * report O and R for the entire prior interval. 463 */ 464 do_reset = initial; 465 if (update) { 466 ++sdc->sdc_nupdates; 467 if ((sdc->sdc_nupdates % sysdc_reset_updates) == 0) 468 do_reset = 1; 469 } 470 if (do_reset) { 471 hrtime_t baseO, baseR; 472 if (initial) { 473 /* 474 * Start off our cycle count somewhere in the middle, 475 * to keep the resets from all happening at once. 476 * 477 * 4999 is a handy prime much larger than 478 * sysdc_reset_updates, so that we don't run into 479 * trouble if the resolution is a multiple of 480 * sysdc_reset_updates. 481 */ 482 sdc->sdc_nupdates = (uint_t)((gethrtime() % 4999) % 483 sysdc_reset_updates); 484 baseO = baseR = 0; 485 } else { 486 baseO = sdc->sdc_base_O; 487 baseR = sdc->sdc_base_R; 488 } 489 490 mstate_systhread_times(t, &sdc->sdc_base_O, &sdc->sdc_base_R); 491 *newO = sdc->sdc_base_O; 492 493 sdc->sdc_reset = now; 494 sdc->sdc_pri_check = -1; /* force mismatch below */ 495 496 /* 497 * See below for rationale. 498 */ 499 if (baseO > sdc->sdc_base_O || baseR > sdc->sdc_base_R) { 500 SYSDC_INC_STAT(sysdc_update_times_base_ran_backwards); 501 baseO = sdc->sdc_base_O; 502 baseR = sdc->sdc_base_R; 503 } 504 505 /* compute based on the entire interval */ 506 *O = (sdc->sdc_base_O - baseO); 507 *R = (sdc->sdc_base_R - baseR); 508 return; 509 } 510 511 /* 512 * If we're called from sysdc_update(), we *must* return a value 513 * for newO, so we always call mstate_systhread_times(). 514 * 515 * Otherwise, if we've already done a pri check this tick, 516 * we can skip it. 517 */ 518 if (!update && sdc->sdc_pri_check == now) { 519 SYSDC_INC_STAT(sysdc_update_times_already_done); 520 return; 521 } 522 523 /* Get the current times from the thread */ 524 sdc->sdc_pri_check = now; 525 mstate_systhread_times(t, &sdc->sdc_cur_O, &sdc->sdc_cur_R); 526 *newO = sdc->sdc_cur_O; 527 528 /* 529 * The updating of microstate accounting is not done under a 530 * consistent set of locks, particularly the t_waitrq field. This 531 * can lead to narrow windows in which we account for time in the 532 * wrong bucket, which on the next read will be accounted for 533 * correctly. 534 * 535 * If our sdc_base_* fields were affected by one of these blips, we 536 * throw away the old data, and pretend this tick didn't happen. 537 */ 538 if (sdc->sdc_cur_O < sdc->sdc_base_O || 539 sdc->sdc_cur_R < sdc->sdc_base_R) { 540 541 sdc->sdc_base_O = sdc->sdc_cur_O; 542 sdc->sdc_base_R = sdc->sdc_cur_R; 543 544 SYSDC_INC_STAT(sysdc_update_times_cur_ran_backwards); 545 return; 546 } 547 548 *O = sdc->sdc_cur_O - sdc->sdc_base_O; 549 *R = sdc->sdc_cur_R - sdc->sdc_base_R; 550 } 551 552 /* 553 * sysdc_compute_pri() 554 * 555 * Recomputes the priority of the thread, leaving the result in 556 * sdc->sdc_epri. Returns 1 if a priority update should occur 557 * (which will also trigger a cpu_surrender()), otherwise 558 * returns 0. 559 */ 560 static uint_t 561 sysdc_compute_pri(sysdc_t *sdc, uint_t flags) 562 { 563 kthread_t *const t = sdc->sdc_thread; 564 const uint_t update = (flags & SDC_UPDATE_TIMEOUT); 565 const uint_t tick = (flags & SDC_UPDATE_TICK); 566 567 hrtime_t O, R; 568 hrtime_t newO = -1; 569 570 ASSERT(THREAD_LOCK_HELD(t)); 571 572 sysdc_update_times(sdc, flags, &O, &R, &newO); 573 ASSERT(!update || newO != -1); 574 575 /* If we have new data, recompute our priority. */ 576 if ((O + R) != 0) { 577 sdc->sdc_cur_DC = (O * SYSDC_DC_MAX) / (O + R); 578 579 /* Adjust our priority to move our DC closer to the target. */ 580 if (sdc->sdc_cur_DC < sdc->sdc_target_DC) 581 sdc->sdc_pri = sdc->sdc_maxpri; 582 else 583 sdc->sdc_pri = sdc->sdc_minpri; 584 } 585 586 /* 587 * If our per-pset duty cycle goes over the max, we will take a break. 588 * This forces all sysdc threads in the pset to minimum priority, in 589 * order to let everyone else have a chance at the CPU. 590 */ 591 if (sdc->sdc_pset->sdp_need_break) { 592 SYSDC_INC_STAT(sysdc_compute_pri_breaking); 593 sdc->sdc_epri = sdc->sdc_minpri; 594 } else { 595 sdc->sdc_epri = sdc->sdc_pri; 596 } 597 598 DTRACE_PROBE4(sysdc__compute__pri, 599 kthread_t *, t, pri_t, sdc->sdc_epri, uint_t, sdc->sdc_cur_DC, 600 uint_t, sdc->sdc_target_DC); 601 602 /* 603 * For sysdc_update(), we compute the ONPROC time for high-priority 604 * threads, which is used to calculate the per-pset duty cycle. We 605 * will always tell our callers to update the thread's priority, 606 * since we want to force a cpu_surrender(). 607 * 608 * We reset sdc_update_ticks so that sysdc_tick() will only update 609 * the thread's priority if our timeout is delayed by a tick or 610 * more. 611 */ 612 if (update) { 613 /* SDC threads are not allowed to change cpupart bindings. */ 614 ASSERT(t->t_cpupart == sdc->sdc_pset->sdp_cpupart); 615 616 /* If we were at MAXPRI, account for our onproc time. */ 617 if (t->t_pri == sdc->sdc_maxpri && 618 sdc->sdc_last_base_O != 0 && 619 sdc->sdc_last_base_O < newO) { 620 sdc->sdc_last_O = newO - sdc->sdc_last_base_O; 621 sdc->sdc_pset->sdp_onproc_time += 622 (uint64_t)sdc->sdc_last_O; 623 sdc->sdc_pset->sdp_onproc_threads++; 624 } else { 625 sdc->sdc_last_O = 0; 626 } 627 sdc->sdc_last_base_O = newO; 628 629 sdc->sdc_update_ticks = sdc->sdc_ticks + sysdc_update_ticks + 1; 630 return (1); 631 } 632 633 /* 634 * Like sysdc_update(), sysdc_tick() always wants to update the 635 * thread's priority, so that the CPU is surrendered if necessary. 636 * We reset sdc_update_ticks so that if the timeout continues to be 637 * delayed, we'll update at the regular interval. 638 */ 639 if (tick) { 640 ASSERT(sdc->sdc_ticks == sdc->sdc_update_ticks); 641 sdc->sdc_update_ticks = sdc->sdc_ticks + sysdc_update_ticks; 642 return (1); 643 } 644 645 /* 646 * Otherwise, only tell our callers to update the priority if it has 647 * changed. 648 */ 649 return (sdc->sdc_epri != t->t_pri); 650 } 651 652 static void 653 sysdc_update_pri(sysdc_t *sdc, uint_t flags) 654 { 655 kthread_t *t = sdc->sdc_thread; 656 657 ASSERT(THREAD_LOCK_HELD(t)); 658 659 if (sysdc_compute_pri(sdc, flags)) { 660 if (!thread_change_pri(t, sdc->sdc_epri, 0)) { 661 cpu_surrender(t); 662 } 663 } 664 } 665 666 /* 667 * Add a thread onto the active list. It will only be removed by 668 * sysdc_update(). 669 */ 670 static void 671 sysdc_activate(sysdc_t *sdc) 672 { 673 sysdc_t *volatile *headp = &SYSDC_LIST(sdc)->sdl_list; 674 sysdc_t *head; 675 kthread_t *t = sdc->sdc_thread; 676 677 SYSDC_INC_STAT(sysdc_activate_enter); 678 679 ASSERT(sdc->sdc_next == NULL); 680 ASSERT(THREAD_LOCK_HELD(t)); 681 682 do { 683 head = *headp; 684 sdc->sdc_next = head; 685 } while (atomic_cas_ptr(headp, head, sdc) != head); 686 } 687 688 /* 689 * sysdc_update() has two jobs: 690 * 691 * 1. It updates the priorities of all active SDC threads on the system. 692 * 2. It measures pset CPU usage and enforces sysdc_max_pset_DC. 693 */ 694 static void 695 sysdc_update(void *arg) 696 { 697 int idx; 698 sysdc_t *freelist = NULL; 699 sysdc_pset_t *cur; 700 hrtime_t now, diff; 701 uint_t redeploy = 1; 702 703 SYSDC_INC_STAT(sysdc_update_enter); 704 705 ASSERT(sysdc_update_timeout_started); 706 707 /* 708 * If this is our first time through, diff will be gigantic, and 709 * no breaks will be necessary. 710 */ 711 now = gethrtime(); 712 diff = now - sysdc_last_update; 713 sysdc_last_update = now; 714 715 mutex_enter(&sysdc_pset_lock); 716 for (cur = list_head(&sysdc_psets); cur != NULL; 717 cur = list_next(&sysdc_psets, cur)) { 718 boolean_t breaking = (cur->sdp_should_break != 0); 719 720 if (cur->sdp_need_break != breaking) { 721 DTRACE_PROBE2(sdc__pset__break, sysdc_pset_t *, cur, 722 boolean_t, breaking); 723 } 724 cur->sdp_onproc_time = 0; 725 cur->sdp_onproc_threads = 0; 726 cur->sdp_need_break = breaking; 727 } 728 mutex_exit(&sysdc_pset_lock); 729 730 for (idx = 0; idx < SYSDC_NLISTS; idx++) { 731 sysdc_list_t *sdl = &sysdc_active[idx]; 732 sysdc_t *volatile *headp = &sdl->sdl_list; 733 sysdc_t *head, *tail; 734 sysdc_t **prevptr; 735 736 if (*headp == &sysdc_dummy) 737 continue; 738 739 /* Prevent any threads from exiting while we're poking them. */ 740 mutex_enter(&sdl->sdl_lock); 741 742 /* 743 * Each sdl_list contains a singly-linked list of active 744 * threads. Threads which become active while we are 745 * processing the list will be added to sdl_list. Since we 746 * don't want that to interfere with our own processing, we 747 * swap in an empty list. Any newly active threads will 748 * go on to this empty list. When finished, we'll put any 749 * such threads at the end of the processed list. 750 */ 751 head = atomic_swap_ptr(headp, &sysdc_dummy); 752 prevptr = &head; 753 while (*prevptr != &sysdc_dummy) { 754 sysdc_t *const sdc = *prevptr; 755 kthread_t *const t = sdc->sdc_thread; 756 757 /* 758 * If the thread has exited, move its sysdc_t onto 759 * freelist, to be freed later. 760 */ 761 if (t == NULL) { 762 *prevptr = sdc->sdc_next; 763 SYSDC_INC_STAT(sysdc_update_exited); 764 sdc->sdc_next = freelist; 765 freelist = sdc; 766 continue; 767 } 768 769 thread_lock(t); 770 if (t->t_cid != sysdccid) { 771 thread_unlock(t); 772 prevptr = &sdc->sdc_next; 773 SYSDC_INC_STAT(sysdc_update_not_sdc); 774 continue; 775 } 776 ASSERT(t->t_cldata == sdc); 777 778 /* 779 * If the thread has been sleeping for longer 780 * than sysdc_prune_interval, make it inactive by 781 * removing it from the list. 782 */ 783 if (!(t->t_state & (TS_RUN | TS_ONPROC)) && 784 sdc->sdc_sleep_updates != 0 && 785 (sdc->sdc_sleep_updates - sdc->sdc_nupdates) > 786 sysdc_prune_updates) { 787 *prevptr = sdc->sdc_next; 788 SYSDC_INC_STAT(sysdc_update_idle); 789 sdc->sdc_next = NULL; 790 thread_unlock(t); 791 continue; 792 } 793 sysdc_update_pri(sdc, SDC_UPDATE_TIMEOUT); 794 thread_unlock(t); 795 796 prevptr = &sdc->sdc_next; 797 } 798 799 /* 800 * Add our list to the bucket, putting any new entries 801 * added while we were working at the tail of the list. 802 */ 803 do { 804 tail = *headp; 805 *prevptr = tail; 806 } while (atomic_cas_ptr(headp, tail, head) != tail); 807 808 mutex_exit(&sdl->sdl_lock); 809 } 810 811 mutex_enter(&sysdc_pset_lock); 812 for (cur = list_head(&sysdc_psets); cur != NULL; 813 cur = list_next(&sysdc_psets, cur)) { 814 815 cur->sdp_vtime_last_interval = 816 diff * cur->sdp_cpupart->cp_ncpus; 817 cur->sdp_DC_last_interval = 818 (cur->sdp_onproc_time * SYSDC_DC_MAX) / 819 cur->sdp_vtime_last_interval; 820 821 if (cur->sdp_should_break > 0) { 822 cur->sdp_should_break--; /* breaking */ 823 continue; 824 } 825 if (cur->sdp_dont_break > 0) { 826 cur->sdp_dont_break--; /* waiting before checking */ 827 continue; 828 } 829 if (cur->sdp_DC_last_interval > sysdc_max_pset_DC) { 830 cur->sdp_should_break = sysdc_break_updates; 831 cur->sdp_dont_break = sysdc_nobreak_updates; 832 SYSDC_INC_STAT(sysdc_update_take_break); 833 } 834 } 835 836 /* 837 * If there are no sysdc_psets, there can be no threads, so 838 * we can stop doing our timeout. Since we're holding the 839 * sysdc_pset_lock, no new sysdc_psets can come in, which will 840 * prevent anyone from racing with this and dropping our timeout 841 * on the floor. 842 */ 843 if (list_is_empty(&sysdc_psets)) { 844 SYSDC_INC_STAT(sysdc_update_no_psets); 845 ASSERT(sysdc_update_timeout_started); 846 sysdc_update_timeout_started = 0; 847 848 redeploy = 0; 849 } 850 mutex_exit(&sysdc_pset_lock); 851 852 while (freelist != NULL) { 853 sysdc_t *cur = freelist; 854 freelist = cur->sdc_next; 855 kmem_free(cur, sizeof (*cur)); 856 } 857 858 if (redeploy) { 859 (void) timeout(sysdc_update, arg, sysdc_update_ticks); 860 } 861 } 862 863 static void 864 sysdc_preempt(kthread_t *t) 865 { 866 ASSERT(t == curthread); 867 ASSERT(THREAD_LOCK_HELD(t)); 868 869 setbackdq(t); /* give others a chance to run */ 870 } 871 872 static void 873 sysdc_tick(kthread_t *t) 874 { 875 sysdc_t *sdc; 876 877 thread_lock(t); 878 if (t->t_cid != sysdccid) { 879 SYSDC_INC_STAT(sysdc_tick_not_sdc); 880 thread_unlock(t); 881 return; 882 } 883 sdc = t->t_cldata; 884 if (t->t_state == TS_ONPROC && 885 t->t_pri < t->t_disp_queue->disp_maxrunpri) { 886 cpu_surrender(t); 887 } 888 889 if (t->t_state == TS_ONPROC || t->t_state == TS_RUN) { 890 ASSERT(sdc->sdc_sleep_updates == 0); 891 } 892 893 ASSERT(sdc->sdc_ticks != sdc->sdc_update_ticks); 894 sdc->sdc_ticks++; 895 if (sdc->sdc_ticks == sdc->sdc_update_ticks) { 896 SYSDC_INC_STAT(sysdc_tick_quantum_expired); 897 sysdc_update_pri(sdc, SDC_UPDATE_TICK); 898 ASSERT(sdc->sdc_ticks != sdc->sdc_update_ticks); 899 } 900 thread_unlock(t); 901 } 902 903 static void 904 sysdc_setrun(kthread_t *t) 905 { 906 sysdc_t *sdc = t->t_cldata; 907 908 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */ 909 910 sdc->sdc_sleep_updates = 0; 911 912 if (sdc->sdc_next == NULL) { 913 /* 914 * Since we're in transition, we don't want to use the 915 * full thread_update_pri(). 916 */ 917 if (sysdc_compute_pri(sdc, 0)) { 918 THREAD_CHANGE_PRI(t, sdc->sdc_epri); 919 } 920 sysdc_activate(sdc); 921 922 ASSERT(sdc->sdc_next != NULL); 923 } 924 925 setbackdq(t); 926 } 927 928 static void 929 sysdc_wakeup(kthread_t *t) 930 { 931 sysdc_setrun(t); 932 } 933 934 static void 935 sysdc_sleep(kthread_t *t) 936 { 937 sysdc_t *sdc = t->t_cldata; 938 939 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */ 940 941 sdc->sdc_sleep_updates = sdc->sdc_nupdates; 942 } 943 944 /*ARGSUSED*/ 945 static int 946 sysdc_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp, 947 void *bufp) 948 { 949 cpupart_t *const cpupart = t->t_cpupart; 950 sysdc_t *sdc = bufp; 951 sysdc_params_t *sdpp = parmsp; 952 sysdc_pset_t *newpset = sdc->sdc_pset; 953 sysdc_pset_t *pset; 954 int start_timeout; 955 956 if (t->t_cid != syscid) 957 return (EPERM); 958 959 ASSERT(ttolwp(t) != NULL); 960 ASSERT(sdpp != NULL); 961 ASSERT(newpset != NULL); 962 ASSERT(sysdc_param_init); 963 964 ASSERT(sdpp->sdp_minpri >= sysdc_minpri); 965 ASSERT(sdpp->sdp_maxpri <= sysdc_maxpri); 966 ASSERT(sdpp->sdp_DC >= sysdc_minDC); 967 ASSERT(sdpp->sdp_DC <= sysdc_maxDC); 968 969 sdc->sdc_thread = t; 970 sdc->sdc_pri = sdpp->sdp_maxpri; /* start off maximally */ 971 sdc->sdc_minpri = sdpp->sdp_minpri; 972 sdc->sdc_maxpri = sdpp->sdp_maxpri; 973 sdc->sdc_target_DC = sdpp->sdp_DC; 974 sdc->sdc_ticks = 0; 975 sdc->sdc_update_ticks = sysdc_update_ticks + 1; 976 977 /* Assign ourselves to the appropriate pset. */ 978 sdc->sdc_pset = NULL; 979 mutex_enter(&sysdc_pset_lock); 980 for (pset = list_head(&sysdc_psets); pset != NULL; 981 pset = list_next(&sysdc_psets, pset)) { 982 if (pset->sdp_cpupart == cpupart) { 983 break; 984 } 985 } 986 if (pset == NULL) { 987 pset = newpset; 988 newpset = NULL; 989 pset->sdp_cpupart = cpupart; 990 list_insert_tail(&sysdc_psets, pset); 991 } 992 pset->sdp_nthreads++; 993 ASSERT(pset->sdp_nthreads > 0); 994 995 sdc->sdc_pset = pset; 996 997 start_timeout = (sysdc_update_timeout_started == 0); 998 sysdc_update_timeout_started = 1; 999 mutex_exit(&sysdc_pset_lock); 1000 1001 if (newpset != NULL) 1002 kmem_free(newpset, sizeof (*newpset)); 1003 1004 /* Update t's scheduling class and priority. */ 1005 thread_lock(t); 1006 t->t_clfuncs = &(sclass[cid].cl_funcs->thread); 1007 t->t_cid = cid; 1008 t->t_cldata = sdc; 1009 t->t_schedflag |= TS_RUNQMATCH; 1010 1011 sysdc_update_pri(sdc, SDC_UPDATE_INITIAL); 1012 thread_unlock(t); 1013 1014 /* Kick off the thread timeout if we're the first one in. */ 1015 if (start_timeout) { 1016 (void) timeout(sysdc_update, NULL, sysdc_update_ticks); 1017 } 1018 1019 return (0); 1020 } 1021 1022 static void 1023 sysdc_leave(sysdc_t *sdc) 1024 { 1025 sysdc_pset_t *sdp = sdc->sdc_pset; 1026 sysdc_list_t *sdl = SYSDC_LIST(sdc); 1027 uint_t freedc; 1028 1029 mutex_enter(&sdl->sdl_lock); /* block sysdc_update() */ 1030 sdc->sdc_thread = NULL; 1031 freedc = (sdc->sdc_next == NULL); 1032 mutex_exit(&sdl->sdl_lock); 1033 1034 mutex_enter(&sysdc_pset_lock); 1035 ASSERT(sdp != NULL); 1036 ASSERT(sdp->sdp_nthreads > 0); 1037 --sdp->sdp_nthreads; 1038 if (sdp->sdp_nthreads == 0) { 1039 list_remove(&sysdc_psets, sdp); 1040 } else { 1041 sdp = NULL; 1042 } 1043 mutex_exit(&sysdc_pset_lock); 1044 1045 if (freedc) 1046 kmem_free(sdc, sizeof (*sdc)); 1047 if (sdp != NULL) 1048 kmem_free(sdp, sizeof (*sdp)); 1049 } 1050 1051 static void 1052 sysdc_exitclass(void *buf) 1053 { 1054 sysdc_leave((sysdc_t *)buf); 1055 } 1056 1057 /*ARGSUSED*/ 1058 static int 1059 sysdc_canexit(kthread_t *t, cred_t *reqpcredp) 1060 { 1061 /* Threads cannot exit SDC once joined, except in a body bag. */ 1062 return (EPERM); 1063 } 1064 1065 static void 1066 sysdc_exit(kthread_t *t) 1067 { 1068 sysdc_t *sdc; 1069 1070 /* We're exiting, so we just rejoin the SYS class. */ 1071 thread_lock(t); 1072 ASSERT(t->t_cid == sysdccid); 1073 sdc = t->t_cldata; 1074 t->t_cid = syscid; 1075 t->t_cldata = NULL; 1076 t->t_clfuncs = &(sclass[syscid].cl_funcs->thread); 1077 (void) thread_change_pri(t, maxclsyspri, 0); 1078 t->t_schedflag &= ~TS_RUNQMATCH; 1079 thread_unlock_nopreempt(t); 1080 1081 /* Unlink the sdc from everything. */ 1082 sysdc_leave(sdc); 1083 } 1084 1085 /*ARGSUSED*/ 1086 static int 1087 sysdc_fork(kthread_t *t, kthread_t *ct, void *bufp) 1088 { 1089 /* 1090 * Threads cannot be created with SDC as their class; they must 1091 * be created as SYS and then added with sysdc_thread_enter(). 1092 * Because of this restriction, sysdc_fork() should never be called. 1093 */ 1094 panic("sysdc cannot be forked"); 1095 1096 return (ENOSYS); 1097 } 1098 1099 /*ARGSUSED*/ 1100 static void 1101 sysdc_forkret(kthread_t *t, kthread_t *ct) 1102 { 1103 /* SDC threads are part of system processes, which never fork. */ 1104 panic("sysdc cannot be forked"); 1105 } 1106 1107 static pri_t 1108 sysdc_globpri(kthread_t *t) 1109 { 1110 return (t->t_epri); 1111 } 1112 1113 /*ARGSUSED*/ 1114 static pri_t 1115 sysdc_no_swap(kthread_t *t, int flags) 1116 { 1117 /* SDC threads cannot be swapped. */ 1118 return (-1); 1119 } 1120 1121 /* 1122 * Get maximum and minimum priorities enjoyed by SDC threads. 1123 */ 1124 static int 1125 sysdc_getclpri(pcpri_t *pcprip) 1126 { 1127 pcprip->pc_clpmax = sysdc_maxpri; 1128 pcprip->pc_clpmin = sysdc_minpri; 1129 return (0); 1130 } 1131 1132 /*ARGSUSED*/ 1133 static int 1134 sysdc_getclinfo(void *arg) 1135 { 1136 return (0); /* no class-specific info */ 1137 } 1138 1139 /*ARGSUSED*/ 1140 static int 1141 sysdc_alloc(void **p, int flag) 1142 { 1143 sysdc_t *new; 1144 1145 *p = NULL; 1146 if ((new = kmem_zalloc(sizeof (*new), flag)) == NULL) { 1147 return (ENOMEM); 1148 } 1149 if ((new->sdc_pset = kmem_zalloc(sizeof (*new->sdc_pset), flag)) == 1150 NULL) { 1151 kmem_free(new, sizeof (*new)); 1152 return (ENOMEM); 1153 } 1154 *p = new; 1155 return (0); 1156 } 1157 1158 static void 1159 sysdc_free(void *p) 1160 { 1161 sysdc_t *sdc = p; 1162 1163 if (sdc != NULL) { 1164 /* 1165 * We must have failed CL_ENTERCLASS(), so our pset should be 1166 * there and unused. 1167 */ 1168 ASSERT(sdc->sdc_pset != NULL); 1169 ASSERT(sdc->sdc_pset->sdp_cpupart == NULL); 1170 kmem_free(sdc->sdc_pset, sizeof (*sdc->sdc_pset)); 1171 kmem_free(sdc, sizeof (*sdc)); 1172 } 1173 } 1174 1175 static int sysdc_enosys(); /* Boy, ANSI-C's K&R compatibility is weird. */ 1176 static int sysdc_einval(); 1177 static void sysdc_nullsys(); 1178 1179 static struct classfuncs sysdc_classfuncs = { 1180 /* messages to class manager */ 1181 { 1182 sysdc_enosys, /* admin */ 1183 sysdc_getclinfo, 1184 sysdc_enosys, /* parmsin */ 1185 sysdc_enosys, /* parmsout */ 1186 sysdc_enosys, /* vaparmsin */ 1187 sysdc_enosys, /* vaparmsout */ 1188 sysdc_getclpri, 1189 sysdc_alloc, 1190 sysdc_free, 1191 }, 1192 /* operations on threads */ 1193 { 1194 sysdc_enterclass, 1195 sysdc_exitclass, 1196 sysdc_canexit, 1197 sysdc_fork, 1198 sysdc_forkret, 1199 sysdc_nullsys, /* parmsget */ 1200 sysdc_enosys, /* parmsset */ 1201 sysdc_nullsys, /* stop */ 1202 sysdc_exit, 1203 sysdc_nullsys, /* active */ 1204 sysdc_nullsys, /* inactive */ 1205 sysdc_no_swap, /* swapin */ 1206 sysdc_no_swap, /* swapout */ 1207 sysdc_nullsys, /* trapret */ 1208 sysdc_preempt, 1209 sysdc_setrun, 1210 sysdc_sleep, 1211 sysdc_tick, 1212 sysdc_wakeup, 1213 sysdc_einval, /* donice */ 1214 sysdc_globpri, 1215 sysdc_nullsys, /* set_process_group */ 1216 sysdc_nullsys, /* yield */ 1217 sysdc_einval, /* doprio */ 1218 } 1219 }; 1220 1221 static int 1222 sysdc_enosys() 1223 { 1224 return (ENOSYS); 1225 } 1226 1227 static int 1228 sysdc_einval() 1229 { 1230 return (EINVAL); 1231 } 1232 1233 static void 1234 sysdc_nullsys() 1235 { 1236 } 1237 1238 /*ARGSUSED*/ 1239 static pri_t 1240 sysdc_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp) 1241 { 1242 int idx; 1243 1244 list_create(&sysdc_psets, sizeof (sysdc_pset_t), 1245 offsetof(sysdc_pset_t, sdp_node)); 1246 1247 for (idx = 0; idx < SYSDC_NLISTS; idx++) { 1248 sysdc_active[idx].sdl_list = &sysdc_dummy; 1249 } 1250 1251 sysdc_initparam(); 1252 1253 sysdccid = cid; 1254 *clfuncspp = &sysdc_classfuncs; 1255 1256 return ((pri_t)v.v_maxsyspri); 1257 } 1258 1259 static struct sclass csw = { 1260 "SDC", 1261 sysdc_init, 1262 0 1263 }; 1264 1265 static struct modlsched modlsched = { 1266 &mod_schedops, "system duty cycle scheduling class", &csw 1267 }; 1268 1269 static struct modlinkage modlinkage = { 1270 MODREV_1, { (void *)&modlsched, NULL } 1271 }; 1272 1273 int 1274 _init() 1275 { 1276 return (mod_install(&modlinkage)); 1277 } 1278 1279 int 1280 _fini() 1281 { 1282 return (EBUSY); /* can't unload for now */ 1283 } 1284 1285 int 1286 _info(struct modinfo *modinfop) 1287 { 1288 return (mod_info(&modlinkage, modinfop)); 1289 } 1290 1291 /* --- consolidation-private interfaces --- */ 1292 void 1293 sysdc_thread_enter(kthread_t *t, uint_t dc, uint_t flags) 1294 { 1295 void *buf = NULL; 1296 sysdc_params_t sdp; 1297 1298 SYSDC_INC_STAT(sysdc_thread_enter_enter); 1299 1300 ASSERT(sysdc_param_init); 1301 ASSERT(sysdccid >= 0); 1302 1303 ASSERT((flags & ~SYSDC_THREAD_BATCH) == 0); 1304 1305 sdp.sdp_minpri = sysdc_minpri; 1306 sdp.sdp_maxpri = sysdc_maxpri; 1307 sdp.sdp_DC = MAX(MIN(dc, sysdc_maxDC), sysdc_minDC); 1308 1309 VERIFY0(CL_ALLOC(&buf, sysdccid, KM_SLEEP)); 1310 1311 ASSERT(t->t_lwp != NULL); 1312 ASSERT(t->t_cid == syscid); 1313 ASSERT(t->t_cldata == NULL); 1314 VERIFY0(CL_CANEXIT(t, NULL)); 1315 VERIFY0(CL_ENTERCLASS(t, sysdccid, &sdp, kcred, buf)); 1316 CL_EXITCLASS(syscid, NULL); 1317 }