1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2013, Joyent, Inc. All rights reserved. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/param.h> 29 #include <sys/sysmacros.h> 30 #include <sys/cred.h> 31 #include <sys/proc.h> 32 #include <sys/session.h> 33 #include <sys/strsubr.h> 34 #include <sys/user.h> 35 #include <sys/priocntl.h> 36 #include <sys/class.h> 37 #include <sys/disp.h> 38 #include <sys/procset.h> 39 #include <sys/debug.h> 40 #include <sys/kmem.h> 41 #include <sys/errno.h> 42 #include <sys/fx.h> 43 #include <sys/fxpriocntl.h> 44 #include <sys/cpuvar.h> 45 #include <sys/systm.h> 46 #include <sys/vtrace.h> 47 #include <sys/schedctl.h> 48 #include <sys/tnf_probe.h> 49 #include <sys/sunddi.h> 50 #include <sys/spl.h> 51 #include <sys/modctl.h> 52 #include <sys/policy.h> 53 #include <sys/sdt.h> 54 #include <sys/cpupart.h> 55 #include <sys/cpucaps.h> 56 57 static pri_t fx_init(id_t, int, classfuncs_t **); 58 59 static struct sclass csw = { 60 "FX", 61 fx_init, 62 0 63 }; 64 65 static struct modlsched modlsched = { 66 &mod_schedops, "Fixed priority sched class", &csw 67 }; 68 69 static struct modlinkage modlinkage = { 70 MODREV_1, (void *)&modlsched, NULL 71 }; 72 73 74 /* 75 * control flags (kparms->fx_cflags). 76 */ 77 #define FX_DOUPRILIM 0x01 /* change user priority limit */ 78 #define FX_DOUPRI 0x02 /* change user priority */ 79 #define FX_DOTQ 0x04 /* change FX time quantum */ 80 81 82 #define FXMAXUPRI 60 /* maximum user priority setting */ 83 84 #define FX_MAX_UNPRIV_PRI 0 /* maximum unpriviledge priority */ 85 86 /* 87 * The fxproc_t structures that have a registered callback vector, 88 * are also kept in an array of circular doubly linked lists. A hash on 89 * the thread id (from ddi_get_kt_did()) is used to determine which list 90 * each of such fxproc structures should be placed. Each list has a dummy 91 * "head" which is never removed, so the list is never empty. 92 */ 93 94 #define FX_CB_LISTS 16 /* number of lists, must be power of 2 */ 95 #define FX_CB_LIST_HASH(ktid) ((uint_t)ktid & (FX_CB_LISTS - 1)) 96 97 /* Insert fxproc into callback list */ 98 #define FX_CB_LIST_INSERT(fxpp) \ 99 { \ 100 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); \ 101 kmutex_t *lockp = &fx_cb_list_lock[index]; \ 102 fxproc_t *headp = &fx_cb_plisthead[index]; \ 103 mutex_enter(lockp); \ 104 fxpp->fx_cb_next = headp->fx_cb_next; \ 105 fxpp->fx_cb_prev = headp; \ 106 headp->fx_cb_next->fx_cb_prev = fxpp; \ 107 headp->fx_cb_next = fxpp; \ 108 mutex_exit(lockp); \ 109 } 110 111 /* 112 * Remove thread from callback list. 113 */ 114 #define FX_CB_LIST_DELETE(fxpp) \ 115 { \ 116 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); \ 117 kmutex_t *lockp = &fx_cb_list_lock[index]; \ 118 mutex_enter(lockp); \ 119 fxpp->fx_cb_prev->fx_cb_next = fxpp->fx_cb_next; \ 120 fxpp->fx_cb_next->fx_cb_prev = fxpp->fx_cb_prev; \ 121 mutex_exit(lockp); \ 122 } 123 124 #define FX_HAS_CB(fxpp) (fxpp->fx_callback != NULL) 125 126 /* adjust x to be between 0 and fx_maxumdpri */ 127 128 #define FX_ADJUST_PRI(pri) \ 129 { \ 130 if (pri < 0) \ 131 pri = 0; \ 132 else if (pri > fx_maxumdpri) \ 133 pri = fx_maxumdpri; \ 134 } 135 136 #define FX_ADJUST_QUANTUM(q) \ 137 { \ 138 if (q > INT_MAX) \ 139 q = INT_MAX; \ 140 else if (q <= 0) \ 141 q = FX_TQINF; \ 142 } 143 144 #define FX_ISVALID(pri, quantum) \ 145 (((pri >= 0) || (pri == FX_CB_NOCHANGE)) && \ 146 ((quantum >= 0) || (quantum == FX_NOCHANGE) || \ 147 (quantum == FX_TQDEF) || (quantum == FX_TQINF))) 148 149 150 static id_t fx_cid; /* fixed priority class ID */ 151 static fxdpent_t *fx_dptbl; /* fixed priority disp parameter table */ 152 153 static pri_t fx_maxupri = FXMAXUPRI; 154 static pri_t fx_maxumdpri; /* max user mode fixed priority */ 155 156 static pri_t fx_maxglobpri; /* maximum global priority used by fx class */ 157 static kmutex_t fx_dptblock; /* protects fixed priority dispatch table */ 158 159 160 static kmutex_t fx_cb_list_lock[FX_CB_LISTS]; /* protects list of fxprocs */ 161 /* that have callbacks */ 162 static fxproc_t fx_cb_plisthead[FX_CB_LISTS]; /* dummy fxproc at head of */ 163 /* list of fxprocs with */ 164 /* callbacks */ 165 166 static int fx_admin(caddr_t, cred_t *); 167 static int fx_getclinfo(void *); 168 static int fx_parmsin(void *); 169 static int fx_parmsout(void *, pc_vaparms_t *); 170 static int fx_vaparmsin(void *, pc_vaparms_t *); 171 static int fx_vaparmsout(void *, pc_vaparms_t *); 172 static int fx_getclpri(pcpri_t *); 173 static int fx_alloc(void **, int); 174 static void fx_free(void *); 175 static int fx_enterclass(kthread_t *, id_t, void *, cred_t *, void *); 176 static void fx_exitclass(void *); 177 static int fx_canexit(kthread_t *, cred_t *); 178 static int fx_fork(kthread_t *, kthread_t *, void *); 179 static void fx_forkret(kthread_t *, kthread_t *); 180 static void fx_parmsget(kthread_t *, void *); 181 static int fx_parmsset(kthread_t *, void *, id_t, cred_t *); 182 static void fx_stop(kthread_t *, int, int); 183 static void fx_exit(kthread_t *); 184 static pri_t fx_swapin(kthread_t *, int); 185 static pri_t fx_swapout(kthread_t *, int); 186 static void fx_trapret(kthread_t *); 187 static void fx_preempt(kthread_t *); 188 static void fx_setrun(kthread_t *); 189 static void fx_sleep(kthread_t *); 190 static void fx_tick(kthread_t *); 191 static void fx_wakeup(kthread_t *); 192 static int fx_donice(kthread_t *, cred_t *, int, int *); 193 static int fx_doprio(kthread_t *, cred_t *, int, int *); 194 static pri_t fx_globpri(kthread_t *); 195 static void fx_yield(kthread_t *); 196 static void fx_nullsys(); 197 198 extern fxdpent_t *fx_getdptbl(void); 199 200 static void fx_change_priority(kthread_t *, fxproc_t *); 201 static fxproc_t *fx_list_lookup(kt_did_t); 202 static void fx_list_release(fxproc_t *); 203 204 205 static struct classfuncs fx_classfuncs = { 206 /* class functions */ 207 fx_admin, 208 fx_getclinfo, 209 fx_parmsin, 210 fx_parmsout, 211 fx_vaparmsin, 212 fx_vaparmsout, 213 fx_getclpri, 214 fx_alloc, 215 fx_free, 216 217 /* thread functions */ 218 fx_enterclass, 219 fx_exitclass, 220 fx_canexit, 221 fx_fork, 222 fx_forkret, 223 fx_parmsget, 224 fx_parmsset, 225 fx_stop, 226 fx_exit, 227 fx_nullsys, /* active */ 228 fx_nullsys, /* inactive */ 229 fx_swapin, 230 fx_swapout, 231 fx_trapret, 232 fx_preempt, 233 fx_setrun, 234 fx_sleep, 235 fx_tick, 236 fx_wakeup, 237 fx_donice, 238 fx_globpri, 239 fx_nullsys, /* set_process_group */ 240 fx_yield, 241 fx_doprio, 242 }; 243 244 245 int 246 _init() 247 { 248 return (mod_install(&modlinkage)); 249 } 250 251 int 252 _fini() 253 { 254 return (EBUSY); 255 } 256 257 int 258 _info(struct modinfo *modinfop) 259 { 260 return (mod_info(&modlinkage, modinfop)); 261 } 262 263 /* 264 * Fixed priority class initialization. Called by dispinit() at boot time. 265 * We can ignore the clparmsz argument since we know that the smallest 266 * possible parameter buffer is big enough for us. 267 */ 268 /* ARGSUSED */ 269 static pri_t 270 fx_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp) 271 { 272 int i; 273 extern pri_t fx_getmaxumdpri(void); 274 275 fx_dptbl = fx_getdptbl(); 276 fx_maxumdpri = fx_getmaxumdpri(); 277 fx_maxglobpri = fx_dptbl[fx_maxumdpri].fx_globpri; 278 279 fx_cid = cid; /* Record our class ID */ 280 281 /* 282 * Initialize the hash table for fxprocs with callbacks 283 */ 284 for (i = 0; i < FX_CB_LISTS; i++) { 285 fx_cb_plisthead[i].fx_cb_next = fx_cb_plisthead[i].fx_cb_prev = 286 &fx_cb_plisthead[i]; 287 } 288 289 /* 290 * We're required to return a pointer to our classfuncs 291 * structure and the highest global priority value we use. 292 */ 293 *clfuncspp = &fx_classfuncs; 294 return (fx_maxglobpri); 295 } 296 297 /* 298 * Get or reset the fx_dptbl values per the user's request. 299 */ 300 static int 301 fx_admin(caddr_t uaddr, cred_t *reqpcredp) 302 { 303 fxadmin_t fxadmin; 304 fxdpent_t *tmpdpp; 305 int userdpsz; 306 int i; 307 size_t fxdpsz; 308 309 if (get_udatamodel() == DATAMODEL_NATIVE) { 310 if (copyin(uaddr, &fxadmin, sizeof (fxadmin_t))) 311 return (EFAULT); 312 } 313 #ifdef _SYSCALL32_IMPL 314 else { 315 /* get fxadmin struct from ILP32 caller */ 316 fxadmin32_t fxadmin32; 317 if (copyin(uaddr, &fxadmin32, sizeof (fxadmin32_t))) 318 return (EFAULT); 319 fxadmin.fx_dpents = 320 (struct fxdpent *)(uintptr_t)fxadmin32.fx_dpents; 321 fxadmin.fx_ndpents = fxadmin32.fx_ndpents; 322 fxadmin.fx_cmd = fxadmin32.fx_cmd; 323 } 324 #endif /* _SYSCALL32_IMPL */ 325 326 fxdpsz = (fx_maxumdpri + 1) * sizeof (fxdpent_t); 327 328 switch (fxadmin.fx_cmd) { 329 case FX_GETDPSIZE: 330 fxadmin.fx_ndpents = fx_maxumdpri + 1; 331 332 if (get_udatamodel() == DATAMODEL_NATIVE) { 333 if (copyout(&fxadmin, uaddr, sizeof (fxadmin_t))) 334 return (EFAULT); 335 } 336 #ifdef _SYSCALL32_IMPL 337 else { 338 /* return fxadmin struct to ILP32 caller */ 339 fxadmin32_t fxadmin32; 340 fxadmin32.fx_dpents = 341 (caddr32_t)(uintptr_t)fxadmin.fx_dpents; 342 fxadmin32.fx_ndpents = fxadmin.fx_ndpents; 343 fxadmin32.fx_cmd = fxadmin.fx_cmd; 344 if (copyout(&fxadmin32, uaddr, sizeof (fxadmin32_t))) 345 return (EFAULT); 346 } 347 #endif /* _SYSCALL32_IMPL */ 348 break; 349 350 case FX_GETDPTBL: 351 userdpsz = MIN(fxadmin.fx_ndpents * sizeof (fxdpent_t), 352 fxdpsz); 353 if (copyout(fx_dptbl, fxadmin.fx_dpents, userdpsz)) 354 return (EFAULT); 355 356 fxadmin.fx_ndpents = userdpsz / sizeof (fxdpent_t); 357 358 if (get_udatamodel() == DATAMODEL_NATIVE) { 359 if (copyout(&fxadmin, uaddr, sizeof (fxadmin_t))) 360 return (EFAULT); 361 } 362 #ifdef _SYSCALL32_IMPL 363 else { 364 /* return fxadmin struct to ILP32 callers */ 365 fxadmin32_t fxadmin32; 366 fxadmin32.fx_dpents = 367 (caddr32_t)(uintptr_t)fxadmin.fx_dpents; 368 fxadmin32.fx_ndpents = fxadmin.fx_ndpents; 369 fxadmin32.fx_cmd = fxadmin.fx_cmd; 370 if (copyout(&fxadmin32, uaddr, sizeof (fxadmin32_t))) 371 return (EFAULT); 372 } 373 #endif /* _SYSCALL32_IMPL */ 374 break; 375 376 case FX_SETDPTBL: 377 /* 378 * We require that the requesting process has sufficient 379 * privileges. We also require that the table supplied by 380 * the user exactly match the current fx_dptbl in size. 381 */ 382 if (secpolicy_dispadm(reqpcredp) != 0) { 383 return (EPERM); 384 } 385 if (fxadmin.fx_ndpents * sizeof (fxdpent_t) != fxdpsz) { 386 return (EINVAL); 387 } 388 389 /* 390 * We read the user supplied table into a temporary buffer 391 * where it is validated before being copied over the 392 * fx_dptbl. 393 */ 394 tmpdpp = kmem_alloc(fxdpsz, KM_SLEEP); 395 if (copyin(fxadmin.fx_dpents, tmpdpp, fxdpsz)) { 396 kmem_free(tmpdpp, fxdpsz); 397 return (EFAULT); 398 } 399 for (i = 0; i < fxadmin.fx_ndpents; i++) { 400 401 /* 402 * Validate the user supplied values. All we are doing 403 * here is verifying that the values are within their 404 * allowable ranges and will not panic the system. We 405 * make no attempt to ensure that the resulting 406 * configuration makes sense or results in reasonable 407 * performance. 408 */ 409 if (tmpdpp[i].fx_quantum <= 0 && 410 tmpdpp[i].fx_quantum != FX_TQINF) { 411 kmem_free(tmpdpp, fxdpsz); 412 return (EINVAL); 413 } 414 } 415 416 /* 417 * Copy the user supplied values over the current fx_dptbl 418 * values. The fx_globpri member is read-only so we don't 419 * overwrite it. 420 */ 421 mutex_enter(&fx_dptblock); 422 for (i = 0; i < fxadmin.fx_ndpents; i++) { 423 fx_dptbl[i].fx_quantum = tmpdpp[i].fx_quantum; 424 } 425 mutex_exit(&fx_dptblock); 426 kmem_free(tmpdpp, fxdpsz); 427 break; 428 429 default: 430 return (EINVAL); 431 } 432 return (0); 433 } 434 435 /* 436 * Allocate a fixed priority class specific thread structure and 437 * initialize it with the parameters supplied. Also move the thread 438 * to specified priority. 439 */ 440 static int 441 fx_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp, 442 void *bufp) 443 { 444 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 445 fxproc_t *fxpp; 446 pri_t reqfxupri; 447 pri_t reqfxuprilim; 448 449 fxpp = (fxproc_t *)bufp; 450 ASSERT(fxpp != NULL); 451 452 /* 453 * Initialize the fxproc structure. 454 */ 455 fxpp->fx_flags = 0; 456 fxpp->fx_callback = NULL; 457 fxpp->fx_cookie = NULL; 458 459 if (fxkparmsp == NULL) { 460 /* 461 * Use default values. 462 */ 463 fxpp->fx_pri = fxpp->fx_uprilim = 0; 464 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 465 fxpp->fx_nice = NZERO; 466 } else { 467 /* 468 * Use supplied values. 469 */ 470 471 if ((fxkparmsp->fx_cflags & FX_DOUPRILIM) == 0) { 472 reqfxuprilim = 0; 473 } else { 474 if (fxkparmsp->fx_uprilim > FX_MAX_UNPRIV_PRI && 475 secpolicy_setpriority(reqpcredp) != 0) 476 return (EPERM); 477 reqfxuprilim = fxkparmsp->fx_uprilim; 478 FX_ADJUST_PRI(reqfxuprilim); 479 } 480 481 if ((fxkparmsp->fx_cflags & FX_DOUPRI) == 0) { 482 reqfxupri = reqfxuprilim; 483 } else { 484 if (fxkparmsp->fx_upri > FX_MAX_UNPRIV_PRI && 485 secpolicy_setpriority(reqpcredp) != 0) 486 return (EPERM); 487 /* 488 * Set the user priority to the requested value 489 * or the upri limit, whichever is lower. 490 */ 491 reqfxupri = fxkparmsp->fx_upri; 492 FX_ADJUST_PRI(reqfxupri); 493 494 if (reqfxupri > reqfxuprilim) 495 reqfxupri = reqfxuprilim; 496 } 497 498 499 fxpp->fx_uprilim = reqfxuprilim; 500 fxpp->fx_pri = reqfxupri; 501 502 fxpp->fx_nice = NZERO - (NZERO * reqfxupri) / fx_maxupri; 503 504 if (((fxkparmsp->fx_cflags & FX_DOTQ) == 0) || 505 (fxkparmsp->fx_tqntm == FX_TQDEF)) { 506 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 507 } else { 508 if (secpolicy_setpriority(reqpcredp) != 0) 509 return (EPERM); 510 511 if (fxkparmsp->fx_tqntm == FX_TQINF) 512 fxpp->fx_pquantum = FX_TQINF; 513 else { 514 fxpp->fx_pquantum = fxkparmsp->fx_tqntm; 515 } 516 } 517 518 } 519 520 fxpp->fx_timeleft = fxpp->fx_pquantum; 521 cpucaps_sc_init(&fxpp->fx_caps); 522 fxpp->fx_tp = t; 523 524 thread_lock(t); /* get dispatcher lock on thread */ 525 t->t_clfuncs = &(sclass[cid].cl_funcs->thread); 526 t->t_cid = cid; 527 t->t_cldata = (void *)fxpp; 528 t->t_schedflag &= ~TS_RUNQMATCH; 529 fx_change_priority(t, fxpp); 530 thread_unlock(t); 531 532 return (0); 533 } 534 535 /* 536 * The thread is exiting. 537 */ 538 static void 539 fx_exit(kthread_t *t) 540 { 541 fxproc_t *fxpp; 542 543 thread_lock(t); 544 fxpp = (fxproc_t *)(t->t_cldata); 545 546 /* 547 * A thread could be exiting in between clock ticks, so we need to 548 * calculate how much CPU time it used since it was charged last time. 549 * 550 * CPU caps are not enforced on exiting processes - it is usually 551 * desirable to exit as soon as possible to free resources. 552 */ 553 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ONLY); 554 555 if (FX_HAS_CB(fxpp)) { 556 FX_CB_EXIT(FX_CALLB(fxpp), fxpp->fx_cookie); 557 fxpp->fx_callback = NULL; 558 fxpp->fx_cookie = NULL; 559 thread_unlock(t); 560 FX_CB_LIST_DELETE(fxpp); 561 return; 562 } 563 564 thread_unlock(t); 565 } 566 567 /* 568 * Exiting the class. Free fxproc structure of thread. 569 */ 570 static void 571 fx_exitclass(void *procp) 572 { 573 fxproc_t *fxpp = (fxproc_t *)procp; 574 575 thread_lock(fxpp->fx_tp); 576 if (FX_HAS_CB(fxpp)) { 577 578 FX_CB_EXIT(FX_CALLB(fxpp), fxpp->fx_cookie); 579 580 fxpp->fx_callback = NULL; 581 fxpp->fx_cookie = NULL; 582 thread_unlock(fxpp->fx_tp); 583 FX_CB_LIST_DELETE(fxpp); 584 } else 585 thread_unlock(fxpp->fx_tp); 586 587 kmem_free(fxpp, sizeof (fxproc_t)); 588 } 589 590 /* ARGSUSED */ 591 static int 592 fx_canexit(kthread_t *t, cred_t *cred) 593 { 594 /* 595 * A thread can always leave the FX class 596 */ 597 return (0); 598 } 599 600 /* 601 * Initialize fixed-priority class specific proc structure for a child. 602 * callbacks are not inherited upon fork. 603 */ 604 static int 605 fx_fork(kthread_t *t, kthread_t *ct, void *bufp) 606 { 607 fxproc_t *pfxpp; /* ptr to parent's fxproc structure */ 608 fxproc_t *cfxpp; /* ptr to child's fxproc structure */ 609 610 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 611 612 cfxpp = (fxproc_t *)bufp; 613 ASSERT(cfxpp != NULL); 614 thread_lock(t); 615 pfxpp = (fxproc_t *)t->t_cldata; 616 /* 617 * Initialize child's fxproc structure. 618 */ 619 cfxpp->fx_timeleft = cfxpp->fx_pquantum = pfxpp->fx_pquantum; 620 cfxpp->fx_pri = pfxpp->fx_pri; 621 cfxpp->fx_uprilim = pfxpp->fx_uprilim; 622 cfxpp->fx_nice = pfxpp->fx_nice; 623 cfxpp->fx_callback = NULL; 624 cfxpp->fx_cookie = NULL; 625 cfxpp->fx_flags = pfxpp->fx_flags & ~(FXBACKQ); 626 cpucaps_sc_init(&cfxpp->fx_caps); 627 628 cfxpp->fx_tp = ct; 629 ct->t_cldata = (void *)cfxpp; 630 thread_unlock(t); 631 632 /* 633 * Link new structure into fxproc list. 634 */ 635 return (0); 636 } 637 638 639 /* 640 * Child is placed at back of dispatcher queue and parent gives 641 * up processor so that the child runs first after the fork. 642 * This allows the child immediately execing to break the multiple 643 * use of copy on write pages with no disk home. The parent will 644 * get to steal them back rather than uselessly copying them. 645 */ 646 static void 647 fx_forkret(kthread_t *t, kthread_t *ct) 648 { 649 proc_t *pp = ttoproc(t); 650 proc_t *cp = ttoproc(ct); 651 fxproc_t *fxpp; 652 653 ASSERT(t == curthread); 654 ASSERT(MUTEX_HELD(&pidlock)); 655 656 /* 657 * Grab the child's p_lock before dropping pidlock to ensure 658 * the process does not disappear before we set it running. 659 */ 660 mutex_enter(&cp->p_lock); 661 continuelwps(cp); 662 mutex_exit(&cp->p_lock); 663 664 mutex_enter(&pp->p_lock); 665 mutex_exit(&pidlock); 666 continuelwps(pp); 667 668 thread_lock(t); 669 fxpp = (fxproc_t *)(t->t_cldata); 670 t->t_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 671 ASSERT(t->t_pri >= 0 && t->t_pri <= fx_maxglobpri); 672 THREAD_TRANSITION(t); 673 fx_setrun(t); 674 thread_unlock(t); 675 /* 676 * Safe to drop p_lock now since it is safe to change 677 * the scheduling class after this point. 678 */ 679 mutex_exit(&pp->p_lock); 680 681 swtch(); 682 } 683 684 685 /* 686 * Get information about the fixed-priority class into the buffer 687 * pointed to by fxinfop. The maximum configured user priority 688 * is the only information we supply. 689 */ 690 static int 691 fx_getclinfo(void *infop) 692 { 693 fxinfo_t *fxinfop = (fxinfo_t *)infop; 694 fxinfop->fx_maxupri = fx_maxupri; 695 return (0); 696 } 697 698 699 700 /* 701 * Return the user mode scheduling priority range. 702 */ 703 static int 704 fx_getclpri(pcpri_t *pcprip) 705 { 706 pcprip->pc_clpmax = fx_maxupri; 707 pcprip->pc_clpmin = 0; 708 return (0); 709 } 710 711 712 static void 713 fx_nullsys() 714 {} 715 716 717 /* 718 * Get the fixed-priority parameters of the thread pointed to by 719 * fxprocp into the buffer pointed to by fxparmsp. 720 */ 721 static void 722 fx_parmsget(kthread_t *t, void *parmsp) 723 { 724 fxproc_t *fxpp = (fxproc_t *)t->t_cldata; 725 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 726 727 fxkparmsp->fx_upri = fxpp->fx_pri; 728 fxkparmsp->fx_uprilim = fxpp->fx_uprilim; 729 fxkparmsp->fx_tqntm = fxpp->fx_pquantum; 730 } 731 732 733 734 /* 735 * Check the validity of the fixed-priority parameters in the buffer 736 * pointed to by fxparmsp. 737 */ 738 static int 739 fx_parmsin(void *parmsp) 740 { 741 fxparms_t *fxparmsp = (fxparms_t *)parmsp; 742 uint_t cflags; 743 longlong_t ticks; 744 /* 745 * Check validity of parameters. 746 */ 747 748 if ((fxparmsp->fx_uprilim > fx_maxupri || 749 fxparmsp->fx_uprilim < 0) && 750 fxparmsp->fx_uprilim != FX_NOCHANGE) 751 return (EINVAL); 752 753 if ((fxparmsp->fx_upri > fx_maxupri || 754 fxparmsp->fx_upri < 0) && 755 fxparmsp->fx_upri != FX_NOCHANGE) 756 return (EINVAL); 757 758 if ((fxparmsp->fx_tqsecs == 0 && fxparmsp->fx_tqnsecs == 0) || 759 fxparmsp->fx_tqnsecs >= NANOSEC) 760 return (EINVAL); 761 762 cflags = (fxparmsp->fx_upri != FX_NOCHANGE ? FX_DOUPRI : 0); 763 764 if (fxparmsp->fx_uprilim != FX_NOCHANGE) { 765 cflags |= FX_DOUPRILIM; 766 } 767 768 if (fxparmsp->fx_tqnsecs != FX_NOCHANGE) 769 cflags |= FX_DOTQ; 770 771 /* 772 * convert the buffer to kernel format. 773 */ 774 775 if (fxparmsp->fx_tqnsecs >= 0) { 776 if ((ticks = SEC_TO_TICK((longlong_t)fxparmsp->fx_tqsecs) + 777 NSEC_TO_TICK_ROUNDUP(fxparmsp->fx_tqnsecs)) > INT_MAX) 778 return (ERANGE); 779 780 ((fxkparms_t *)fxparmsp)->fx_tqntm = (int)ticks; 781 } else { 782 if ((fxparmsp->fx_tqnsecs != FX_NOCHANGE) && 783 (fxparmsp->fx_tqnsecs != FX_TQINF) && 784 (fxparmsp->fx_tqnsecs != FX_TQDEF)) 785 return (EINVAL); 786 ((fxkparms_t *)fxparmsp)->fx_tqntm = fxparmsp->fx_tqnsecs; 787 } 788 789 ((fxkparms_t *)fxparmsp)->fx_cflags = cflags; 790 791 return (0); 792 } 793 794 795 /* 796 * Check the validity of the fixed-priority parameters in the pc_vaparms_t 797 * structure vaparmsp and put them in the buffer pointed to by fxprmsp. 798 * pc_vaparms_t contains (key, value) pairs of parameter. 799 */ 800 static int 801 fx_vaparmsin(void *prmsp, pc_vaparms_t *vaparmsp) 802 { 803 uint_t secs = 0; 804 uint_t cnt; 805 int nsecs = 0; 806 int priflag, secflag, nsecflag, limflag; 807 longlong_t ticks; 808 fxkparms_t *fxprmsp = (fxkparms_t *)prmsp; 809 pc_vaparm_t *vpp = &vaparmsp->pc_parms[0]; 810 811 812 /* 813 * First check the validity of parameters and convert them 814 * from the user supplied format to the internal format. 815 */ 816 priflag = secflag = nsecflag = limflag = 0; 817 818 fxprmsp->fx_cflags = 0; 819 820 if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT) 821 return (EINVAL); 822 823 for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) { 824 825 switch (vpp->pc_key) { 826 case FX_KY_UPRILIM: 827 if (limflag++) 828 return (EINVAL); 829 fxprmsp->fx_cflags |= FX_DOUPRILIM; 830 fxprmsp->fx_uprilim = (pri_t)vpp->pc_parm; 831 if (fxprmsp->fx_uprilim > fx_maxupri || 832 fxprmsp->fx_uprilim < 0) 833 return (EINVAL); 834 break; 835 836 case FX_KY_UPRI: 837 if (priflag++) 838 return (EINVAL); 839 fxprmsp->fx_cflags |= FX_DOUPRI; 840 fxprmsp->fx_upri = (pri_t)vpp->pc_parm; 841 if (fxprmsp->fx_upri > fx_maxupri || 842 fxprmsp->fx_upri < 0) 843 return (EINVAL); 844 break; 845 846 case FX_KY_TQSECS: 847 if (secflag++) 848 return (EINVAL); 849 fxprmsp->fx_cflags |= FX_DOTQ; 850 secs = (uint_t)vpp->pc_parm; 851 break; 852 853 case FX_KY_TQNSECS: 854 if (nsecflag++) 855 return (EINVAL); 856 fxprmsp->fx_cflags |= FX_DOTQ; 857 nsecs = (int)vpp->pc_parm; 858 break; 859 860 default: 861 return (EINVAL); 862 } 863 } 864 865 if (vaparmsp->pc_vaparmscnt == 0) { 866 /* 867 * Use default parameters. 868 */ 869 fxprmsp->fx_upri = 0; 870 fxprmsp->fx_uprilim = 0; 871 fxprmsp->fx_tqntm = FX_TQDEF; 872 fxprmsp->fx_cflags = FX_DOUPRI | FX_DOUPRILIM | FX_DOTQ; 873 } else if ((fxprmsp->fx_cflags & FX_DOTQ) != 0) { 874 if ((secs == 0 && nsecs == 0) || nsecs >= NANOSEC) 875 return (EINVAL); 876 877 if (nsecs >= 0) { 878 if ((ticks = SEC_TO_TICK((longlong_t)secs) + 879 NSEC_TO_TICK_ROUNDUP(nsecs)) > INT_MAX) 880 return (ERANGE); 881 882 fxprmsp->fx_tqntm = (int)ticks; 883 } else { 884 if (nsecs != FX_TQINF && nsecs != FX_TQDEF) 885 return (EINVAL); 886 fxprmsp->fx_tqntm = nsecs; 887 } 888 } 889 890 return (0); 891 } 892 893 894 /* 895 * Nothing to do here but return success. 896 */ 897 /* ARGSUSED */ 898 static int 899 fx_parmsout(void *parmsp, pc_vaparms_t *vaparmsp) 900 { 901 register fxkparms_t *fxkprmsp = (fxkparms_t *)parmsp; 902 903 if (vaparmsp != NULL) 904 return (0); 905 906 if (fxkprmsp->fx_tqntm < 0) { 907 /* 908 * Quantum field set to special value (e.g. FX_TQINF) 909 */ 910 ((fxparms_t *)fxkprmsp)->fx_tqnsecs = fxkprmsp->fx_tqntm; 911 ((fxparms_t *)fxkprmsp)->fx_tqsecs = 0; 912 913 } else { 914 /* Convert quantum from ticks to seconds-nanoseconds */ 915 916 timestruc_t ts; 917 TICK_TO_TIMESTRUC(fxkprmsp->fx_tqntm, &ts); 918 ((fxparms_t *)fxkprmsp)->fx_tqsecs = ts.tv_sec; 919 ((fxparms_t *)fxkprmsp)->fx_tqnsecs = ts.tv_nsec; 920 } 921 922 return (0); 923 } 924 925 926 /* 927 * Copy all selected fixed-priority class parameters to the user. 928 * The parameters are specified by a key. 929 */ 930 static int 931 fx_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp) 932 { 933 fxkparms_t *fxkprmsp = (fxkparms_t *)prmsp; 934 timestruc_t ts; 935 uint_t cnt; 936 uint_t secs; 937 int nsecs; 938 int priflag, secflag, nsecflag, limflag; 939 pc_vaparm_t *vpp = &vaparmsp->pc_parms[0]; 940 941 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); 942 943 priflag = secflag = nsecflag = limflag = 0; 944 945 if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT) 946 return (EINVAL); 947 948 if (fxkprmsp->fx_tqntm < 0) { 949 /* 950 * Quantum field set to special value (e.g. FX_TQINF). 951 */ 952 secs = 0; 953 nsecs = fxkprmsp->fx_tqntm; 954 } else { 955 /* 956 * Convert quantum from ticks to seconds-nanoseconds. 957 */ 958 TICK_TO_TIMESTRUC(fxkprmsp->fx_tqntm, &ts); 959 secs = ts.tv_sec; 960 nsecs = ts.tv_nsec; 961 } 962 963 964 for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) { 965 966 switch (vpp->pc_key) { 967 case FX_KY_UPRILIM: 968 if (limflag++) 969 return (EINVAL); 970 if (copyout(&fxkprmsp->fx_uprilim, 971 (void *)(uintptr_t)vpp->pc_parm, sizeof (pri_t))) 972 return (EFAULT); 973 break; 974 975 case FX_KY_UPRI: 976 if (priflag++) 977 return (EINVAL); 978 if (copyout(&fxkprmsp->fx_upri, 979 (void *)(uintptr_t)vpp->pc_parm, sizeof (pri_t))) 980 return (EFAULT); 981 break; 982 983 case FX_KY_TQSECS: 984 if (secflag++) 985 return (EINVAL); 986 if (copyout(&secs, 987 (void *)(uintptr_t)vpp->pc_parm, sizeof (uint_t))) 988 return (EFAULT); 989 break; 990 991 case FX_KY_TQNSECS: 992 if (nsecflag++) 993 return (EINVAL); 994 if (copyout(&nsecs, 995 (void *)(uintptr_t)vpp->pc_parm, sizeof (int))) 996 return (EFAULT); 997 break; 998 999 default: 1000 return (EINVAL); 1001 } 1002 } 1003 1004 return (0); 1005 } 1006 1007 /* 1008 * Set the scheduling parameters of the thread pointed to by fxprocp 1009 * to those specified in the buffer pointed to by fxparmsp. 1010 */ 1011 /* ARGSUSED */ 1012 static int 1013 fx_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp) 1014 { 1015 char nice; 1016 pri_t reqfxuprilim; 1017 pri_t reqfxupri; 1018 fxkparms_t *fxkparmsp = (fxkparms_t *)parmsp; 1019 fxproc_t *fxpp; 1020 1021 1022 ASSERT(MUTEX_HELD(&(ttoproc(tx))->p_lock)); 1023 1024 thread_lock(tx); 1025 fxpp = (fxproc_t *)tx->t_cldata; 1026 1027 if ((fxkparmsp->fx_cflags & FX_DOUPRILIM) == 0) 1028 reqfxuprilim = fxpp->fx_uprilim; 1029 else 1030 reqfxuprilim = fxkparmsp->fx_uprilim; 1031 1032 /* 1033 * Basic permissions enforced by generic kernel code 1034 * for all classes require that a thread attempting 1035 * to change the scheduling parameters of a target 1036 * thread be privileged or have a real or effective 1037 * UID matching that of the target thread. We are not 1038 * called unless these basic permission checks have 1039 * already passed. The fixed priority class requires in 1040 * addition that the calling thread be privileged if it 1041 * is attempting to raise the pri above its current 1042 * value This may have been checked previously but if our 1043 * caller passed us a non-NULL credential pointer we assume 1044 * it hasn't and we check it here. 1045 */ 1046 1047 if ((reqpcredp != NULL) && 1048 (reqfxuprilim > fxpp->fx_uprilim || 1049 ((fxkparmsp->fx_cflags & FX_DOTQ) != 0)) && 1050 secpolicy_raisepriority(reqpcredp) != 0) { 1051 thread_unlock(tx); 1052 return (EPERM); 1053 } 1054 1055 FX_ADJUST_PRI(reqfxuprilim); 1056 1057 if ((fxkparmsp->fx_cflags & FX_DOUPRI) == 0) 1058 reqfxupri = fxpp->fx_pri; 1059 else 1060 reqfxupri = fxkparmsp->fx_upri; 1061 1062 1063 /* 1064 * Make sure the user priority doesn't exceed the upri limit. 1065 */ 1066 if (reqfxupri > reqfxuprilim) 1067 reqfxupri = reqfxuprilim; 1068 1069 /* 1070 * Set fx_nice to the nice value corresponding to the user 1071 * priority we are setting. Note that setting the nice field 1072 * of the parameter struct won't affect upri or nice. 1073 */ 1074 1075 nice = NZERO - (reqfxupri * NZERO) / fx_maxupri; 1076 1077 if (nice > NZERO) 1078 nice = NZERO; 1079 1080 fxpp->fx_uprilim = reqfxuprilim; 1081 fxpp->fx_pri = reqfxupri; 1082 1083 if (fxkparmsp->fx_tqntm == FX_TQINF) 1084 fxpp->fx_pquantum = FX_TQINF; 1085 else if (fxkparmsp->fx_tqntm == FX_TQDEF) 1086 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1087 else if ((fxkparmsp->fx_cflags & FX_DOTQ) != 0) 1088 fxpp->fx_pquantum = fxkparmsp->fx_tqntm; 1089 1090 fxpp->fx_nice = nice; 1091 1092 fx_change_priority(tx, fxpp); 1093 thread_unlock(tx); 1094 return (0); 1095 } 1096 1097 1098 /* 1099 * Return the global scheduling priority that would be assigned 1100 * to a thread entering the fixed-priority class with the fx_upri. 1101 */ 1102 static pri_t 1103 fx_globpri(kthread_t *t) 1104 { 1105 fxproc_t *fxpp; 1106 1107 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); 1108 1109 fxpp = (fxproc_t *)t->t_cldata; 1110 return (fx_dptbl[fxpp->fx_pri].fx_globpri); 1111 1112 } 1113 1114 /* 1115 * Arrange for thread to be placed in appropriate location 1116 * on dispatcher queue. 1117 * 1118 * This is called with the current thread in TS_ONPROC and locked. 1119 */ 1120 static void 1121 fx_preempt(kthread_t *t) 1122 { 1123 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1124 1125 ASSERT(t == curthread); 1126 ASSERT(THREAD_LOCK_HELD(curthread)); 1127 1128 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE); 1129 1130 /* 1131 * Check to see if we're doing "preemption control" here. If 1132 * we are, and if the user has requested that this thread not 1133 * be preempted, and if preemptions haven't been put off for 1134 * too long, let the preemption happen here but try to make 1135 * sure the thread is rescheduled as soon as possible. We do 1136 * this by putting it on the front of the highest priority run 1137 * queue in the FX class. If the preemption has been put off 1138 * for too long, clear the "nopreempt" bit and let the thread 1139 * be preempted. 1140 */ 1141 if (t->t_schedctl && schedctl_get_nopreempt(t)) { 1142 if (fxpp->fx_pquantum == FX_TQINF || 1143 fxpp->fx_timeleft > -SC_MAX_TICKS) { 1144 DTRACE_SCHED1(schedctl__nopreempt, kthread_t *, t); 1145 schedctl_set_yield(t, 1); 1146 setfrontdq(t); 1147 return; 1148 } else { 1149 schedctl_set_nopreempt(t, 0); 1150 DTRACE_SCHED1(schedctl__preempt, kthread_t *, t); 1151 TNF_PROBE_2(schedctl_preempt, "schedctl FX fx_preempt", 1152 /* CSTYLED */, tnf_pid, pid, ttoproc(t)->p_pid, 1153 tnf_lwpid, lwpid, t->t_tid); 1154 /* 1155 * Fall through and be preempted below. 1156 */ 1157 } 1158 } 1159 1160 if (FX_HAS_CB(fxpp)) { 1161 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1162 pri_t newpri = fxpp->fx_pri; 1163 FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie, 1164 &new_quantum, &newpri); 1165 FX_ADJUST_QUANTUM(new_quantum); 1166 if ((int)new_quantum != fxpp->fx_pquantum) { 1167 fxpp->fx_pquantum = (int)new_quantum; 1168 fxpp->fx_timeleft = fxpp->fx_pquantum; 1169 } 1170 FX_ADJUST_PRI(newpri); 1171 fxpp->fx_pri = newpri; 1172 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1173 } 1174 1175 /* 1176 * This thread may be placed on wait queue by CPU Caps. In this case we 1177 * do not need to do anything until it is removed from the wait queue. 1178 */ 1179 if (CPUCAPS_ENFORCE(t)) { 1180 return; 1181 } 1182 1183 if ((fxpp->fx_flags & (FXBACKQ)) == FXBACKQ) { 1184 fxpp->fx_timeleft = fxpp->fx_pquantum; 1185 fxpp->fx_flags &= ~FXBACKQ; 1186 setbackdq(t); 1187 } else { 1188 setfrontdq(t); 1189 } 1190 } 1191 1192 static void 1193 fx_setrun(kthread_t *t) 1194 { 1195 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1196 1197 ASSERT(THREAD_LOCK_HELD(t)); /* t should be in transition */ 1198 fxpp->fx_flags &= ~FXBACKQ; 1199 1200 if (t->t_disp_time != ddi_get_lbolt()) 1201 setbackdq(t); 1202 else 1203 setfrontdq(t); 1204 } 1205 1206 1207 /* 1208 * Prepare thread for sleep. We reset the thread priority so it will 1209 * run at the kernel priority level when it wakes up. 1210 */ 1211 static void 1212 fx_sleep(kthread_t *t) 1213 { 1214 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1215 1216 ASSERT(t == curthread); 1217 ASSERT(THREAD_LOCK_HELD(t)); 1218 1219 /* 1220 * Account for time spent on CPU before going to sleep. 1221 */ 1222 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE); 1223 1224 if (FX_HAS_CB(fxpp)) { 1225 FX_CB_SLEEP(FX_CALLB(fxpp), fxpp->fx_cookie); 1226 } 1227 t->t_stime = ddi_get_lbolt(); /* time stamp for the swapper */ 1228 } 1229 1230 1231 /* 1232 * Return Values: 1233 * 1234 * -1 if the thread is loaded or is not eligible to be swapped in. 1235 * 1236 * FX and RT threads are designed so that they don't swapout; however, 1237 * it is possible that while the thread is swapped out and in another class, it 1238 * can be changed to FX or RT. Since these threads should be swapped in 1239 * as soon as they're runnable, rt_swapin returns SHRT_MAX, and fx_swapin 1240 * returns SHRT_MAX - 1, so that it gives deference to any swapped out 1241 * RT threads. 1242 */ 1243 /* ARGSUSED */ 1244 static pri_t 1245 fx_swapin(kthread_t *t, int flags) 1246 { 1247 pri_t tpri = -1; 1248 1249 ASSERT(THREAD_LOCK_HELD(t)); 1250 1251 if (t->t_state == TS_RUN && (t->t_schedflag & TS_LOAD) == 0) { 1252 tpri = (pri_t)SHRT_MAX - 1; 1253 } 1254 1255 return (tpri); 1256 } 1257 1258 /* 1259 * Return Values 1260 * -1 if the thread isn't loaded or is not eligible to be swapped out. 1261 */ 1262 /* ARGSUSED */ 1263 static pri_t 1264 fx_swapout(kthread_t *t, int flags) 1265 { 1266 ASSERT(THREAD_LOCK_HELD(t)); 1267 1268 return (-1); 1269 1270 } 1271 1272 /* ARGSUSED */ 1273 static void 1274 fx_stop(kthread_t *t, int why, int what) 1275 { 1276 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1277 1278 ASSERT(THREAD_LOCK_HELD(t)); 1279 1280 if (FX_HAS_CB(fxpp)) { 1281 FX_CB_STOP(FX_CALLB(fxpp), fxpp->fx_cookie); 1282 } 1283 } 1284 1285 /* 1286 * Check for time slice expiration. If time slice has expired 1287 * set runrun to cause preemption. 1288 */ 1289 static void 1290 fx_tick(kthread_t *t) 1291 { 1292 boolean_t call_cpu_surrender = B_FALSE; 1293 fxproc_t *fxpp; 1294 1295 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); 1296 1297 thread_lock(t); 1298 1299 fxpp = (fxproc_t *)(t->t_cldata); 1300 1301 if (FX_HAS_CB(fxpp)) { 1302 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1303 pri_t newpri = fxpp->fx_pri; 1304 FX_CB_TICK(FX_CALLB(fxpp), fxpp->fx_cookie, 1305 &new_quantum, &newpri); 1306 FX_ADJUST_QUANTUM(new_quantum); 1307 if ((int)new_quantum != fxpp->fx_pquantum) { 1308 fxpp->fx_pquantum = (int)new_quantum; 1309 fxpp->fx_timeleft = fxpp->fx_pquantum; 1310 } 1311 FX_ADJUST_PRI(newpri); 1312 if (newpri != fxpp->fx_pri) { 1313 fxpp->fx_pri = newpri; 1314 fx_change_priority(t, fxpp); 1315 } 1316 } 1317 1318 /* 1319 * Keep track of thread's project CPU usage. Note that projects 1320 * get charged even when threads are running in the kernel. 1321 */ 1322 call_cpu_surrender = CPUCAPS_CHARGE(t, &fxpp->fx_caps, 1323 CPUCAPS_CHARGE_ENFORCE); 1324 1325 if ((fxpp->fx_pquantum != FX_TQINF) && 1326 (--fxpp->fx_timeleft <= 0)) { 1327 pri_t new_pri; 1328 1329 /* 1330 * If we're doing preemption control and trying to 1331 * avoid preempting this thread, just note that 1332 * the thread should yield soon and let it keep 1333 * running (unless it's been a while). 1334 */ 1335 if (t->t_schedctl && schedctl_get_nopreempt(t)) { 1336 if (fxpp->fx_timeleft > -SC_MAX_TICKS) { 1337 DTRACE_SCHED1(schedctl__nopreempt, 1338 kthread_t *, t); 1339 schedctl_set_yield(t, 1); 1340 thread_unlock_nopreempt(t); 1341 return; 1342 } 1343 TNF_PROBE_2(schedctl_failsafe, 1344 "schedctl FX fx_tick", /* CSTYLED */, 1345 tnf_pid, pid, ttoproc(t)->p_pid, 1346 tnf_lwpid, lwpid, t->t_tid); 1347 } 1348 new_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 1349 ASSERT(new_pri >= 0 && new_pri <= fx_maxglobpri); 1350 /* 1351 * When the priority of a thread is changed, 1352 * it may be necessary to adjust its position 1353 * on a sleep queue or dispatch queue. Even 1354 * when the priority is not changed, we need 1355 * to preserve round robin on dispatch queue. 1356 * The function thread_change_pri accomplishes 1357 * this. 1358 */ 1359 if (thread_change_pri(t, new_pri, 0)) { 1360 fxpp->fx_timeleft = fxpp->fx_pquantum; 1361 } else { 1362 call_cpu_surrender = B_TRUE; 1363 } 1364 } else if (t->t_state == TS_ONPROC && 1365 t->t_pri < t->t_disp_queue->disp_maxrunpri) { 1366 call_cpu_surrender = B_TRUE; 1367 } 1368 1369 if (call_cpu_surrender) { 1370 fxpp->fx_flags |= FXBACKQ; 1371 cpu_surrender(t); 1372 } 1373 thread_unlock_nopreempt(t); /* clock thread can't be preempted */ 1374 } 1375 1376 1377 static void 1378 fx_trapret(kthread_t *t) 1379 { 1380 cpu_t *cp = CPU; 1381 1382 ASSERT(THREAD_LOCK_HELD(t)); 1383 ASSERT(t == curthread); 1384 ASSERT(cp->cpu_dispthread == t); 1385 ASSERT(t->t_state == TS_ONPROC); 1386 } 1387 1388 1389 /* 1390 * Processes waking up go to the back of their queue. 1391 */ 1392 static void 1393 fx_wakeup(kthread_t *t) 1394 { 1395 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1396 1397 ASSERT(THREAD_LOCK_HELD(t)); 1398 1399 t->t_stime = ddi_get_lbolt(); /* time stamp for the swapper */ 1400 if (FX_HAS_CB(fxpp)) { 1401 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1402 pri_t newpri = fxpp->fx_pri; 1403 FX_CB_WAKEUP(FX_CALLB(fxpp), fxpp->fx_cookie, 1404 &new_quantum, &newpri); 1405 FX_ADJUST_QUANTUM(new_quantum); 1406 if ((int)new_quantum != fxpp->fx_pquantum) { 1407 fxpp->fx_pquantum = (int)new_quantum; 1408 fxpp->fx_timeleft = fxpp->fx_pquantum; 1409 } 1410 1411 FX_ADJUST_PRI(newpri); 1412 if (newpri != fxpp->fx_pri) { 1413 fxpp->fx_pri = newpri; 1414 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1415 } 1416 } 1417 1418 fxpp->fx_flags &= ~FXBACKQ; 1419 1420 if (t->t_disp_time != ddi_get_lbolt()) 1421 setbackdq(t); 1422 else 1423 setfrontdq(t); 1424 } 1425 1426 1427 /* 1428 * When a thread yields, put it on the back of the run queue. 1429 */ 1430 static void 1431 fx_yield(kthread_t *t) 1432 { 1433 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1434 1435 ASSERT(t == curthread); 1436 ASSERT(THREAD_LOCK_HELD(t)); 1437 1438 /* 1439 * Collect CPU usage spent before yielding CPU. 1440 */ 1441 (void) CPUCAPS_CHARGE(t, &fxpp->fx_caps, CPUCAPS_CHARGE_ENFORCE); 1442 1443 if (FX_HAS_CB(fxpp)) { 1444 clock_t new_quantum = (clock_t)fxpp->fx_pquantum; 1445 pri_t newpri = fxpp->fx_pri; 1446 FX_CB_PREEMPT(FX_CALLB(fxpp), fxpp->fx_cookie, 1447 &new_quantum, &newpri); 1448 FX_ADJUST_QUANTUM(new_quantum); 1449 if ((int)new_quantum != fxpp->fx_pquantum) { 1450 fxpp->fx_pquantum = (int)new_quantum; 1451 fxpp->fx_timeleft = fxpp->fx_pquantum; 1452 } 1453 FX_ADJUST_PRI(newpri); 1454 fxpp->fx_pri = newpri; 1455 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1456 } 1457 1458 /* 1459 * Clear the preemption control "yield" bit since the user is 1460 * doing a yield. 1461 */ 1462 if (t->t_schedctl) 1463 schedctl_set_yield(t, 0); 1464 1465 if (fxpp->fx_timeleft <= 0) { 1466 /* 1467 * Time slice was artificially extended to avoid 1468 * preemption, so pretend we're preempting it now. 1469 */ 1470 DTRACE_SCHED1(schedctl__yield, int, -fxpp->fx_timeleft); 1471 fxpp->fx_timeleft = fxpp->fx_pquantum; 1472 THREAD_CHANGE_PRI(t, fx_dptbl[fxpp->fx_pri].fx_globpri); 1473 ASSERT(t->t_pri >= 0 && t->t_pri <= fx_maxglobpri); 1474 } 1475 1476 fxpp->fx_flags &= ~FXBACKQ; 1477 setbackdq(t); 1478 } 1479 1480 /* 1481 * Increment the nice value of the specified thread by incr and 1482 * return the new value in *retvalp. 1483 */ 1484 static int 1485 fx_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp) 1486 { 1487 int newnice; 1488 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1489 fxkparms_t fxkparms; 1490 1491 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); 1492 1493 /* If there's no change to priority, just return current setting */ 1494 if (incr == 0) { 1495 if (retvalp) { 1496 *retvalp = fxpp->fx_nice - NZERO; 1497 } 1498 return (0); 1499 } 1500 1501 if ((incr < 0 || incr > 2 * NZERO) && 1502 secpolicy_raisepriority(cr) != 0) 1503 return (EPERM); 1504 1505 /* 1506 * Specifying a nice increment greater than the upper limit of 1507 * 2 * NZERO - 1 will result in the thread's nice value being 1508 * set to the upper limit. We check for this before computing 1509 * the new value because otherwise we could get overflow 1510 * if a privileged user specified some ridiculous increment. 1511 */ 1512 if (incr > 2 * NZERO - 1) 1513 incr = 2 * NZERO - 1; 1514 1515 newnice = fxpp->fx_nice + incr; 1516 if (newnice > NZERO) 1517 newnice = NZERO; 1518 else if (newnice < 0) 1519 newnice = 0; 1520 1521 fxkparms.fx_uprilim = fxkparms.fx_upri = 1522 -((newnice - NZERO) * fx_maxupri) / NZERO; 1523 1524 fxkparms.fx_cflags = FX_DOUPRILIM | FX_DOUPRI; 1525 1526 fxkparms.fx_tqntm = FX_TQDEF; 1527 1528 /* 1529 * Reset the uprilim and upri values of the thread. Adjust 1530 * time quantum accordingly. 1531 */ 1532 1533 (void) fx_parmsset(t, (void *)&fxkparms, (id_t)0, (cred_t *)NULL); 1534 1535 /* 1536 * Although fx_parmsset already reset fx_nice it may 1537 * not have been set to precisely the value calculated above 1538 * because fx_parmsset determines the nice value from the 1539 * user priority and we may have truncated during the integer 1540 * conversion from nice value to user priority and back. 1541 * We reset fx_nice to the value we calculated above. 1542 */ 1543 fxpp->fx_nice = (char)newnice; 1544 1545 if (retvalp) 1546 *retvalp = newnice - NZERO; 1547 1548 return (0); 1549 } 1550 1551 /* 1552 * Increment the priority of the specified thread by incr and 1553 * return the new value in *retvalp. 1554 */ 1555 static int 1556 fx_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp) 1557 { 1558 int newpri; 1559 fxproc_t *fxpp = (fxproc_t *)(t->t_cldata); 1560 fxkparms_t fxkparms; 1561 1562 ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock)); 1563 1564 /* If there's no change to priority, just return current setting */ 1565 if (incr == 0) { 1566 *retvalp = fxpp->fx_pri; 1567 return (0); 1568 } 1569 1570 newpri = fxpp->fx_pri + incr; 1571 if (newpri > fx_maxupri || newpri < 0) 1572 return (EINVAL); 1573 1574 *retvalp = newpri; 1575 fxkparms.fx_uprilim = fxkparms.fx_upri = newpri; 1576 fxkparms.fx_tqntm = FX_NOCHANGE; 1577 fxkparms.fx_cflags = FX_DOUPRILIM | FX_DOUPRI; 1578 1579 /* 1580 * Reset the uprilim and upri values of the thread. 1581 */ 1582 return (fx_parmsset(t, (void *)&fxkparms, (id_t)0, cr)); 1583 } 1584 1585 static void 1586 fx_change_priority(kthread_t *t, fxproc_t *fxpp) 1587 { 1588 pri_t new_pri; 1589 1590 ASSERT(THREAD_LOCK_HELD(t)); 1591 new_pri = fx_dptbl[fxpp->fx_pri].fx_globpri; 1592 ASSERT(new_pri >= 0 && new_pri <= fx_maxglobpri); 1593 t->t_cpri = fxpp->fx_pri; 1594 if (t == curthread || t->t_state == TS_ONPROC) { 1595 /* curthread is always onproc */ 1596 cpu_t *cp = t->t_disp_queue->disp_cpu; 1597 THREAD_CHANGE_PRI(t, new_pri); 1598 if (t == cp->cpu_dispthread) 1599 cp->cpu_dispatch_pri = DISP_PRIO(t); 1600 if (DISP_MUST_SURRENDER(t)) { 1601 fxpp->fx_flags |= FXBACKQ; 1602 cpu_surrender(t); 1603 } else { 1604 fxpp->fx_timeleft = fxpp->fx_pquantum; 1605 } 1606 } else { 1607 /* 1608 * When the priority of a thread is changed, 1609 * it may be necessary to adjust its position 1610 * on a sleep queue or dispatch queue. 1611 * The function thread_change_pri accomplishes 1612 * this. 1613 */ 1614 if (thread_change_pri(t, new_pri, 0)) { 1615 /* 1616 * The thread was on a run queue. Reset 1617 * its CPU timeleft from the quantum 1618 * associated with the new priority. 1619 */ 1620 fxpp->fx_timeleft = fxpp->fx_pquantum; 1621 } else { 1622 fxpp->fx_flags |= FXBACKQ; 1623 } 1624 } 1625 } 1626 1627 static int 1628 fx_alloc(void **p, int flag) 1629 { 1630 void *bufp; 1631 1632 bufp = kmem_alloc(sizeof (fxproc_t), flag); 1633 if (bufp == NULL) { 1634 return (ENOMEM); 1635 } else { 1636 *p = bufp; 1637 return (0); 1638 } 1639 } 1640 1641 static void 1642 fx_free(void *bufp) 1643 { 1644 if (bufp) 1645 kmem_free(bufp, sizeof (fxproc_t)); 1646 } 1647 1648 /* 1649 * Release the callback list mutex after successful lookup 1650 */ 1651 void 1652 fx_list_release(fxproc_t *fxpp) 1653 { 1654 int index = FX_CB_LIST_HASH(fxpp->fx_ktid); 1655 kmutex_t *lockp = &fx_cb_list_lock[index]; 1656 mutex_exit(lockp); 1657 } 1658 1659 fxproc_t * 1660 fx_list_lookup(kt_did_t ktid) 1661 { 1662 int index = FX_CB_LIST_HASH(ktid); 1663 kmutex_t *lockp = &fx_cb_list_lock[index]; 1664 fxproc_t *fxpp; 1665 1666 mutex_enter(lockp); 1667 1668 for (fxpp = fx_cb_plisthead[index].fx_cb_next; 1669 fxpp != &fx_cb_plisthead[index]; fxpp = fxpp->fx_cb_next) { 1670 if (fxpp->fx_tp->t_cid == fx_cid && fxpp->fx_ktid == ktid && 1671 fxpp->fx_callback != NULL) { 1672 /* 1673 * The caller is responsible for calling 1674 * fx_list_release to drop the lock upon 1675 * successful lookup 1676 */ 1677 return (fxpp); 1678 } 1679 } 1680 mutex_exit(lockp); 1681 return ((fxproc_t *)NULL); 1682 } 1683 1684 1685 /* 1686 * register a callback set of routines for current thread 1687 * thread should already be in FX class 1688 */ 1689 int 1690 fx_register_callbacks(fx_callbacks_t *fx_callback, fx_cookie_t cookie, 1691 pri_t pri, clock_t quantum) 1692 { 1693 1694 fxproc_t *fxpp; 1695 1696 if (fx_callback == NULL) 1697 return (EINVAL); 1698 1699 if (secpolicy_dispadm(CRED()) != 0) 1700 return (EPERM); 1701 1702 if (FX_CB_VERSION(fx_callback) != FX_CALLB_REV) 1703 return (EINVAL); 1704 1705 if (!FX_ISVALID(pri, quantum)) 1706 return (EINVAL); 1707 1708 thread_lock(curthread); /* get dispatcher lock on thread */ 1709 1710 if (curthread->t_cid != fx_cid) { 1711 thread_unlock(curthread); 1712 return (EINVAL); 1713 } 1714 1715 fxpp = (fxproc_t *)(curthread->t_cldata); 1716 ASSERT(fxpp != NULL); 1717 if (FX_HAS_CB(fxpp)) { 1718 thread_unlock(curthread); 1719 return (EINVAL); 1720 } 1721 1722 fxpp->fx_callback = fx_callback; 1723 fxpp->fx_cookie = cookie; 1724 1725 if (pri != FX_CB_NOCHANGE) { 1726 fxpp->fx_pri = pri; 1727 FX_ADJUST_PRI(fxpp->fx_pri); 1728 if (quantum == FX_TQDEF) { 1729 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1730 } else if (quantum == FX_TQINF) { 1731 fxpp->fx_pquantum = FX_TQINF; 1732 } else if (quantum != FX_NOCHANGE) { 1733 FX_ADJUST_QUANTUM(quantum); 1734 fxpp->fx_pquantum = quantum; 1735 } 1736 } else if (quantum != FX_NOCHANGE && quantum != FX_TQDEF) { 1737 if (quantum == FX_TQINF) 1738 fxpp->fx_pquantum = FX_TQINF; 1739 else { 1740 FX_ADJUST_QUANTUM(quantum); 1741 fxpp->fx_pquantum = quantum; 1742 } 1743 } 1744 1745 fxpp->fx_ktid = ddi_get_kt_did(); 1746 1747 fx_change_priority(curthread, fxpp); 1748 1749 thread_unlock(curthread); 1750 1751 /* 1752 * Link new structure into fxproc list. 1753 */ 1754 FX_CB_LIST_INSERT(fxpp); 1755 return (0); 1756 } 1757 1758 /* unregister a callback set of routines for current thread */ 1759 int 1760 fx_unregister_callbacks() 1761 { 1762 fxproc_t *fxpp; 1763 1764 if ((fxpp = fx_list_lookup(ddi_get_kt_did())) == NULL) { 1765 /* 1766 * did not have a registered callback; 1767 */ 1768 return (EINVAL); 1769 } 1770 1771 thread_lock(fxpp->fx_tp); 1772 fxpp->fx_callback = NULL; 1773 fxpp->fx_cookie = NULL; 1774 thread_unlock(fxpp->fx_tp); 1775 fx_list_release(fxpp); 1776 1777 FX_CB_LIST_DELETE(fxpp); 1778 return (0); 1779 } 1780 1781 /* 1782 * modify priority and/or quantum value of a thread with callback 1783 */ 1784 int 1785 fx_modify_priority(kt_did_t ktid, clock_t quantum, pri_t pri) 1786 { 1787 fxproc_t *fxpp; 1788 1789 if (!FX_ISVALID(pri, quantum)) 1790 return (EINVAL); 1791 1792 if ((fxpp = fx_list_lookup(ktid)) == NULL) { 1793 /* 1794 * either thread had exited or did not have a registered 1795 * callback; 1796 */ 1797 return (ESRCH); 1798 } 1799 1800 thread_lock(fxpp->fx_tp); 1801 1802 if (pri != FX_CB_NOCHANGE) { 1803 fxpp->fx_pri = pri; 1804 FX_ADJUST_PRI(fxpp->fx_pri); 1805 if (quantum == FX_TQDEF) { 1806 fxpp->fx_pquantum = fx_dptbl[fxpp->fx_pri].fx_quantum; 1807 } else if (quantum == FX_TQINF) { 1808 fxpp->fx_pquantum = FX_TQINF; 1809 } else if (quantum != FX_NOCHANGE) { 1810 FX_ADJUST_QUANTUM(quantum); 1811 fxpp->fx_pquantum = quantum; 1812 } 1813 } else if (quantum != FX_NOCHANGE && quantum != FX_TQDEF) { 1814 if (quantum == FX_TQINF) { 1815 fxpp->fx_pquantum = FX_TQINF; 1816 } else { 1817 FX_ADJUST_QUANTUM(quantum); 1818 fxpp->fx_pquantum = quantum; 1819 } 1820 } 1821 1822 fx_change_priority(fxpp->fx_tp, fxpp); 1823 1824 thread_unlock(fxpp->fx_tp); 1825 fx_list_release(fxpp); 1826 return (0); 1827 } 1828 1829 1830 /* 1831 * return an iblock cookie for mutex initialization to be used in callbacks 1832 */ 1833 void * 1834 fx_get_mutex_cookie() 1835 { 1836 return ((void *)(uintptr_t)__ipltospl(DISP_LEVEL)); 1837 } 1838 1839 /* 1840 * return maximum relative priority 1841 */ 1842 pri_t 1843 fx_get_maxpri() 1844 { 1845 return (fx_maxumdpri); 1846 }