1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2019 Joyent, Inc.
  25  */
  26 
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  28 /*        All Rights Reserved   */
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/sysmacros.h>
  33 #include <sys/cred.h>
  34 #include <sys/proc.h>
  35 #include <sys/session.h>
  36 #include <sys/strsubr.h>
  37 #include <sys/signal.h>
  38 #include <sys/user.h>
  39 #include <sys/priocntl.h>
  40 #include <sys/class.h>
  41 #include <sys/disp.h>
  42 #include <sys/procset.h>
  43 #include <sys/debug.h>
  44 #include <sys/ts.h>
  45 #include <sys/tspriocntl.h>
  46 #include <sys/iapriocntl.h>
  47 #include <sys/kmem.h>
  48 #include <sys/errno.h>
  49 #include <sys/cpuvar.h>
  50 #include <sys/systm.h>            /* for lbolt */
  51 #include <sys/vtrace.h>
  52 #include <sys/vmsystm.h>
  53 #include <sys/schedctl.h>
  54 #include <sys/tnf_probe.h>
  55 #include <sys/atomic.h>
  56 #include <sys/policy.h>
  57 #include <sys/sdt.h>
  58 #include <sys/cpupart.h>
  59 #include <vm/rm.h>
  60 #include <vm/seg_kmem.h>
  61 #include <sys/modctl.h>
  62 #include <sys/cpucaps.h>
  63 
  64 static pri_t ts_init(id_t, int, classfuncs_t **);
  65 
  66 static struct sclass csw = {
  67         "TS",
  68         ts_init,
  69         0
  70 };
  71 
  72 static struct modlsched modlsched = {
  73         &mod_schedops, "time sharing sched class", &csw
  74 };
  75 
  76 static struct modlinkage modlinkage = {
  77         MODREV_1, (void *)&modlsched, NULL
  78 };
  79 
  80 int
  81 _init()
  82 {
  83         return (mod_install(&modlinkage));
  84 }
  85 
  86 int
  87 _fini()
  88 {
  89         return (EBUSY);         /* don't remove TS for now */
  90 }
  91 
  92 int
  93 _info(struct modinfo *modinfop)
  94 {
  95         return (mod_info(&modlinkage, modinfop));
  96 }
  97 
  98 /*
  99  * Class specific code for the time-sharing class
 100  */
 101 
 102 
 103 /*
 104  * Extern declarations for variables defined in the ts master file
 105  */
 106 #define TSMAXUPRI 60
 107 
 108 pri_t   ts_maxupri = TSMAXUPRI; /* max time-sharing user priority */
 109 pri_t   ts_maxumdpri;           /* maximum user mode ts priority */
 110 
 111 pri_t   ia_maxupri = IAMAXUPRI; /* max interactive user priority */
 112 pri_t   ia_boost = IA_BOOST;    /* boost value for interactive */
 113 
 114 tsdpent_t  *ts_dptbl;   /* time-sharing disp parameter table */
 115 pri_t   *ts_kmdpris;    /* array of global pris used by ts procs when */
 116                         /*  sleeping or running in kernel after sleep */
 117 
 118 static id_t ia_cid;
 119 
 120 int ts_sleep_promote = 1;
 121 
 122 #define tsmedumdpri     (ts_maxumdpri >> 1)
 123 
 124 #define TS_NEWUMDPRI(tspp) \
 125 { \
 126         pri_t pri; \
 127         pri = (tspp)->ts_cpupri + (tspp)->ts_upri + (tspp)->ts_boost; \
 128         if (pri > ts_maxumdpri) \
 129                 (tspp)->ts_umdpri = ts_maxumdpri; \
 130         else if (pri < 0) \
 131                 (tspp)->ts_umdpri = 0; \
 132         else \
 133                 (tspp)->ts_umdpri = pri; \
 134         ASSERT((tspp)->ts_umdpri >= 0 && (tspp)->ts_umdpri <= ts_maxumdpri); \
 135 }
 136 
 137 /*
 138  * The tsproc_t structures are kept in an array of circular doubly linked
 139  * lists.  A hash on the thread pointer is used to determine which list
 140  * each thread should be placed.  Each list has a dummy "head" which is
 141  * never removed, so the list is never empty.  ts_update traverses these
 142  * lists to update the priorities of threads that have been waiting on
 143  * the run queue.
 144  */
 145 
 146 #define TS_LISTS 16             /* number of lists, must be power of 2 */
 147 
 148 /* hash function, argument is a thread pointer */
 149 #define TS_LIST_HASH(tp)        (((uintptr_t)(tp) >> 9) & (TS_LISTS - 1))
 150 
 151 /* iterate to the next list */
 152 #define TS_LIST_NEXT(i)         (((i) + 1) & (TS_LISTS - 1))
 153 
 154 /*
 155  * Insert thread into the appropriate tsproc list.
 156  */
 157 #define TS_LIST_INSERT(tspp)                            \
 158 {                                                       \
 159         int index = TS_LIST_HASH(tspp->ts_tp);               \
 160         kmutex_t *lockp = &ts_list_lock[index];             \
 161         tsproc_t *headp = &ts_plisthead[index];             \
 162         mutex_enter(lockp);                             \
 163         tspp->ts_next = headp->ts_next;                   \
 164         tspp->ts_prev = headp;                               \
 165         headp->ts_next->ts_prev = tspp;                   \
 166         headp->ts_next = tspp;                               \
 167         mutex_exit(lockp);                              \
 168 }
 169 
 170 /*
 171  * Remove thread from tsproc list.
 172  */
 173 #define TS_LIST_DELETE(tspp)                            \
 174 {                                                       \
 175         int index = TS_LIST_HASH(tspp->ts_tp);               \
 176         kmutex_t *lockp = &ts_list_lock[index];             \
 177         mutex_enter(lockp);                             \
 178         tspp->ts_prev->ts_next = tspp->ts_next;                \
 179         tspp->ts_next->ts_prev = tspp->ts_prev;                \
 180         mutex_exit(lockp);                              \
 181 }
 182 
 183 
 184 static int      ts_admin(caddr_t, cred_t *);
 185 static int      ts_enterclass(kthread_t *, id_t, void *, cred_t *, void *);
 186 static int      ts_fork(kthread_t *, kthread_t *, void *);
 187 static int      ts_getclinfo(void *);
 188 static int      ts_getclpri(pcpri_t *);
 189 static int      ts_parmsin(void *);
 190 static int      ts_parmsout(void *, pc_vaparms_t *);
 191 static int      ts_vaparmsin(void *, pc_vaparms_t *);
 192 static int      ts_vaparmsout(void *, pc_vaparms_t *);
 193 static int      ts_parmsset(kthread_t *, void *, id_t, cred_t *);
 194 static void     ts_exit(kthread_t *);
 195 static int      ts_donice(kthread_t *, cred_t *, int, int *);
 196 static int      ts_doprio(kthread_t *, cred_t *, int, int *);
 197 static void     ts_exitclass(void *);
 198 static int      ts_canexit(kthread_t *, cred_t *);
 199 static void     ts_forkret(kthread_t *, kthread_t *);
 200 static void     ts_nullsys();
 201 static void     ts_parmsget(kthread_t *, void *);
 202 static void     ts_preempt(kthread_t *);
 203 static void     ts_setrun(kthread_t *);
 204 static void     ts_sleep(kthread_t *);
 205 static pri_t    ts_swapin(kthread_t *, int);
 206 static pri_t    ts_swapout(kthread_t *, int);
 207 static void     ts_tick(kthread_t *);
 208 static void     ts_trapret(kthread_t *);
 209 static void     ts_update(void *);
 210 static int      ts_update_list(int);
 211 static void     ts_wakeup(kthread_t *);
 212 static pri_t    ts_globpri(kthread_t *);
 213 static void     ts_yield(kthread_t *);
 214 extern tsdpent_t *ts_getdptbl(void);
 215 extern pri_t    *ts_getkmdpris(void);
 216 extern pri_t    td_getmaxumdpri(void);
 217 static int      ts_alloc(void **, int);
 218 static void     ts_free(void *);
 219 
 220 pri_t           ia_init(id_t, int, classfuncs_t **);
 221 static int      ia_getclinfo(void *);
 222 static int      ia_getclpri(pcpri_t *);
 223 static int      ia_parmsin(void *);
 224 static int      ia_vaparmsin(void *, pc_vaparms_t *);
 225 static int      ia_vaparmsout(void *, pc_vaparms_t *);
 226 static int      ia_parmsset(kthread_t *, void *, id_t, cred_t *);
 227 static void     ia_parmsget(kthread_t *, void *);
 228 static void     ia_set_process_group(pid_t, pid_t, pid_t);
 229 
 230 static void     ts_change_priority(kthread_t *, tsproc_t *);
 231 
 232 static pri_t    ts_maxglobpri;  /* maximum global priority used by ts class */
 233 static kmutex_t ts_dptblock;    /* protects time sharing dispatch table */
 234 static kmutex_t ts_list_lock[TS_LISTS]; /* protects tsproc lists */
 235 static tsproc_t ts_plisthead[TS_LISTS]; /* dummy tsproc at head of lists */
 236 
 237 static gid_t    IA_gid = 0;
 238 
 239 static struct classfuncs ts_classfuncs = {
 240         /* class functions */
 241         ts_admin,
 242         ts_getclinfo,
 243         ts_parmsin,
 244         ts_parmsout,
 245         ts_vaparmsin,
 246         ts_vaparmsout,
 247         ts_getclpri,
 248         ts_alloc,
 249         ts_free,
 250 
 251         /* thread functions */
 252         ts_enterclass,
 253         ts_exitclass,
 254         ts_canexit,
 255         ts_fork,
 256         ts_forkret,
 257         ts_parmsget,
 258         ts_parmsset,
 259         ts_nullsys,     /* stop */
 260         ts_exit,
 261         ts_nullsys,     /* active */
 262         ts_nullsys,     /* inactive */
 263         ts_swapin,
 264         ts_swapout,
 265         ts_trapret,
 266         ts_preempt,
 267         ts_setrun,
 268         ts_sleep,
 269         ts_tick,
 270         ts_wakeup,
 271         ts_donice,
 272         ts_globpri,
 273         ts_nullsys,     /* set_process_group */
 274         ts_yield,
 275         ts_doprio,
 276 };
 277 
 278 /*
 279  * ia_classfuncs is used for interactive class threads; IA threads are stored
 280  * on the same class list as TS threads, and most of the class functions are
 281  * identical, but a few have different enough functionality to require their
 282  * own functions.
 283  */
 284 static struct classfuncs ia_classfuncs = {
 285         /* class functions */
 286         ts_admin,
 287         ia_getclinfo,
 288         ia_parmsin,
 289         ts_parmsout,
 290         ia_vaparmsin,
 291         ia_vaparmsout,
 292         ia_getclpri,
 293         ts_alloc,
 294         ts_free,
 295 
 296         /* thread functions */
 297         ts_enterclass,
 298         ts_exitclass,
 299         ts_canexit,
 300         ts_fork,
 301         ts_forkret,
 302         ia_parmsget,
 303         ia_parmsset,
 304         ts_nullsys,     /* stop */
 305         ts_exit,
 306         ts_nullsys,     /* active */
 307         ts_nullsys,     /* inactive */
 308         ts_swapin,
 309         ts_swapout,
 310         ts_trapret,
 311         ts_preempt,
 312         ts_setrun,
 313         ts_sleep,
 314         ts_tick,
 315         ts_wakeup,
 316         ts_donice,
 317         ts_globpri,
 318         ia_set_process_group,
 319         ts_yield,
 320         ts_doprio,
 321 };
 322 
 323 
 324 /*
 325  * Time sharing class initialization.  Called by dispinit() at boot time.
 326  * We can ignore the clparmsz argument since we know that the smallest
 327  * possible parameter buffer is big enough for us.
 328  */
 329 /* ARGSUSED */
 330 static pri_t
 331 ts_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
 332 {
 333         int i;
 334         extern pri_t ts_getmaxumdpri(void);
 335 
 336         ts_dptbl = ts_getdptbl();
 337         ts_kmdpris = ts_getkmdpris();
 338         ts_maxumdpri = ts_getmaxumdpri();
 339         ts_maxglobpri = MAX(ts_kmdpris[0], ts_dptbl[ts_maxumdpri].ts_globpri);
 340 
 341         /*
 342          * Initialize the tsproc lists.
 343          */
 344         for (i = 0; i < TS_LISTS; i++) {
 345                 ts_plisthead[i].ts_next = ts_plisthead[i].ts_prev =
 346                     &ts_plisthead[i];
 347         }
 348 
 349         /*
 350          * We're required to return a pointer to our classfuncs
 351          * structure and the highest global priority value we use.
 352          */
 353         *clfuncspp = &ts_classfuncs;
 354         return (ts_maxglobpri);
 355 }
 356 
 357 
 358 /*
 359  * Interactive class scheduler initialization
 360  */
 361 /* ARGSUSED */
 362 pri_t
 363 ia_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
 364 {
 365         /*
 366          * We're required to return a pointer to our classfuncs
 367          * structure and the highest global priority value we use.
 368          */
 369         ia_cid = cid;
 370         *clfuncspp = &ia_classfuncs;
 371         return (ts_maxglobpri);
 372 }
 373 
 374 
 375 /*
 376  * Get or reset the ts_dptbl values per the user's request.
 377  */
 378 static int
 379 ts_admin(caddr_t uaddr, cred_t *reqpcredp)
 380 {
 381         tsadmin_t       tsadmin;
 382         tsdpent_t       *tmpdpp;
 383         int             userdpsz;
 384         int             i;
 385         size_t          tsdpsz;
 386 
 387         if (get_udatamodel() == DATAMODEL_NATIVE) {
 388                 if (copyin(uaddr, &tsadmin, sizeof (tsadmin_t)))
 389                         return (EFAULT);
 390         }
 391 #ifdef _SYSCALL32_IMPL
 392         else {
 393                 /* get tsadmin struct from ILP32 caller */
 394                 tsadmin32_t tsadmin32;
 395                 if (copyin(uaddr, &tsadmin32, sizeof (tsadmin32_t)))
 396                         return (EFAULT);
 397                 tsadmin.ts_dpents =
 398                     (struct tsdpent *)(uintptr_t)tsadmin32.ts_dpents;
 399                 tsadmin.ts_ndpents = tsadmin32.ts_ndpents;
 400                 tsadmin.ts_cmd = tsadmin32.ts_cmd;
 401         }
 402 #endif /* _SYSCALL32_IMPL */
 403 
 404         tsdpsz = (ts_maxumdpri + 1) * sizeof (tsdpent_t);
 405 
 406         switch (tsadmin.ts_cmd) {
 407         case TS_GETDPSIZE:
 408                 tsadmin.ts_ndpents = ts_maxumdpri + 1;
 409 
 410                 if (get_udatamodel() == DATAMODEL_NATIVE) {
 411                         if (copyout(&tsadmin, uaddr, sizeof (tsadmin_t)))
 412                                 return (EFAULT);
 413                 }
 414 #ifdef _SYSCALL32_IMPL
 415                 else {
 416                         /* return tsadmin struct to ILP32 caller */
 417                         tsadmin32_t tsadmin32;
 418                         tsadmin32.ts_dpents =
 419                             (caddr32_t)(uintptr_t)tsadmin.ts_dpents;
 420                         tsadmin32.ts_ndpents = tsadmin.ts_ndpents;
 421                         tsadmin32.ts_cmd = tsadmin.ts_cmd;
 422                         if (copyout(&tsadmin32, uaddr, sizeof (tsadmin32_t)))
 423                                 return (EFAULT);
 424                 }
 425 #endif /* _SYSCALL32_IMPL */
 426                 break;
 427 
 428         case TS_GETDPTBL:
 429                 userdpsz = MIN(tsadmin.ts_ndpents * sizeof (tsdpent_t),
 430                     tsdpsz);
 431                 if (copyout(ts_dptbl, tsadmin.ts_dpents, userdpsz))
 432                         return (EFAULT);
 433 
 434                 tsadmin.ts_ndpents = userdpsz / sizeof (tsdpent_t);
 435 
 436                 if (get_udatamodel() == DATAMODEL_NATIVE) {
 437                         if (copyout(&tsadmin, uaddr, sizeof (tsadmin_t)))
 438                                 return (EFAULT);
 439                 }
 440 #ifdef _SYSCALL32_IMPL
 441                 else {
 442                         /* return tsadmin struct to ILP32 callers */
 443                         tsadmin32_t tsadmin32;
 444                         tsadmin32.ts_dpents =
 445                             (caddr32_t)(uintptr_t)tsadmin.ts_dpents;
 446                         tsadmin32.ts_ndpents = tsadmin.ts_ndpents;
 447                         tsadmin32.ts_cmd = tsadmin.ts_cmd;
 448                         if (copyout(&tsadmin32, uaddr, sizeof (tsadmin32_t)))
 449                                 return (EFAULT);
 450                 }
 451 #endif /* _SYSCALL32_IMPL */
 452                 break;
 453 
 454         case TS_SETDPTBL:
 455                 /*
 456                  * We require that the requesting process has sufficient
 457                  * priveleges.  We also require that the table supplied by
 458                  * the user exactly match the current ts_dptbl in size.
 459                  */
 460                 if (secpolicy_dispadm(reqpcredp) != 0)
 461                         return (EPERM);
 462 
 463                 if (tsadmin.ts_ndpents * sizeof (tsdpent_t) != tsdpsz) {
 464                         return (EINVAL);
 465                 }
 466 
 467                 /*
 468                  * We read the user supplied table into a temporary buffer
 469                  * where it is validated before being copied over the
 470                  * ts_dptbl.
 471                  */
 472                 tmpdpp = kmem_alloc(tsdpsz, KM_SLEEP);
 473                 if (copyin((caddr_t)tsadmin.ts_dpents, (caddr_t)tmpdpp,
 474                     tsdpsz)) {
 475                         kmem_free(tmpdpp, tsdpsz);
 476                         return (EFAULT);
 477                 }
 478                 for (i = 0; i < tsadmin.ts_ndpents; i++) {
 479 
 480                         /*
 481                          * Validate the user supplied values.  All we are doing
 482                          * here is verifying that the values are within their
 483                          * allowable ranges and will not panic the system.  We
 484                          * make no attempt to ensure that the resulting
 485                          * configuration makes sense or results in reasonable
 486                          * performance.
 487                          */
 488                         if (tmpdpp[i].ts_quantum <= 0) {
 489                                 kmem_free(tmpdpp, tsdpsz);
 490                                 return (EINVAL);
 491                         }
 492                         if (tmpdpp[i].ts_tqexp > ts_maxumdpri ||
 493                             tmpdpp[i].ts_tqexp < 0) {
 494                                 kmem_free(tmpdpp, tsdpsz);
 495                                 return (EINVAL);
 496                         }
 497                         if (tmpdpp[i].ts_slpret > ts_maxumdpri ||
 498                             tmpdpp[i].ts_slpret < 0) {
 499                                 kmem_free(tmpdpp, tsdpsz);
 500                                 return (EINVAL);
 501                         }
 502                         if (tmpdpp[i].ts_maxwait < 0) {
 503                                 kmem_free(tmpdpp, tsdpsz);
 504                                 return (EINVAL);
 505                         }
 506                         if (tmpdpp[i].ts_lwait > ts_maxumdpri ||
 507                             tmpdpp[i].ts_lwait < 0) {
 508                                 kmem_free(tmpdpp, tsdpsz);
 509                                 return (EINVAL);
 510                         }
 511                 }
 512 
 513                 /*
 514                  * Copy the user supplied values over the current ts_dptbl
 515                  * values.  The ts_globpri member is read-only so we don't
 516                  * overwrite it.
 517                  */
 518                 mutex_enter(&ts_dptblock);
 519                 for (i = 0; i < tsadmin.ts_ndpents; i++) {
 520                         ts_dptbl[i].ts_quantum = tmpdpp[i].ts_quantum;
 521                         ts_dptbl[i].ts_tqexp = tmpdpp[i].ts_tqexp;
 522                         ts_dptbl[i].ts_slpret = tmpdpp[i].ts_slpret;
 523                         ts_dptbl[i].ts_maxwait = tmpdpp[i].ts_maxwait;
 524                         ts_dptbl[i].ts_lwait = tmpdpp[i].ts_lwait;
 525                 }
 526                 mutex_exit(&ts_dptblock);
 527                 kmem_free(tmpdpp, tsdpsz);
 528                 break;
 529 
 530         default:
 531                 return (EINVAL);
 532         }
 533         return (0);
 534 }
 535 
 536 
 537 /*
 538  * Allocate a time-sharing class specific thread structure and
 539  * initialize it with the parameters supplied. Also move the thread
 540  * to specified time-sharing priority.
 541  */
 542 static int
 543 ts_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp,
 544     void *bufp)
 545 {
 546         tsparms_t       *tsparmsp = (tsparms_t *)parmsp;
 547         tsproc_t        *tspp;
 548         pri_t           reqtsuprilim;
 549         pri_t           reqtsupri;
 550         static uint32_t tspexists = 0;  /* set on first occurrence of */
 551                                         /*   a time-sharing process */
 552 
 553         tspp = (tsproc_t *)bufp;
 554         ASSERT(tspp != NULL);
 555 
 556         /*
 557          * Initialize the tsproc structure.
 558          */
 559         tspp->ts_cpupri = tsmedumdpri;
 560         if (cid == ia_cid) {
 561                 /*
 562                  * Check to make sure caller is either privileged or the
 563                  * window system.  When the window system is converted
 564                  * to using privileges, the second check can go away.
 565                  */
 566                 if (reqpcredp != NULL && !groupmember(IA_gid, reqpcredp) &&
 567                     secpolicy_setpriority(reqpcredp) != 0)
 568                         return (EPERM);
 569                 /*
 570                  * Belongs to IA "class", so set appropriate flags.
 571                  * Mark as 'on' so it will not be a swap victim
 572                  * while forking.
 573                  */
 574                 tspp->ts_flags = TSIA | TSIASET;
 575                 tspp->ts_boost = ia_boost;
 576         } else {
 577                 tspp->ts_flags = 0;
 578                 tspp->ts_boost = 0;
 579         }
 580 
 581         if (tsparmsp == NULL) {
 582                 /*
 583                  * Use default values.
 584                  */
 585                 tspp->ts_uprilim = tspp->ts_upri = 0;
 586                 tspp->ts_nice = NZERO;
 587         } else {
 588                 /*
 589                  * Use supplied values.
 590                  */
 591                 if (tsparmsp->ts_uprilim == TS_NOCHANGE)
 592                         reqtsuprilim = 0;
 593                 else {
 594                         if (tsparmsp->ts_uprilim > 0 &&
 595                             secpolicy_setpriority(reqpcredp) != 0)
 596                                 return (EPERM);
 597                         reqtsuprilim = tsparmsp->ts_uprilim;
 598                 }
 599 
 600                 if (tsparmsp->ts_upri == TS_NOCHANGE) {
 601                         reqtsupri = reqtsuprilim;
 602                 } else {
 603                         if (tsparmsp->ts_upri > 0 &&
 604                             secpolicy_setpriority(reqpcredp) != 0)
 605                                 return (EPERM);
 606                         /*
 607                          * Set the user priority to the requested value
 608                          * or the upri limit, whichever is lower.
 609                          */
 610                         reqtsupri = tsparmsp->ts_upri;
 611                         if (reqtsupri > reqtsuprilim)
 612                                 reqtsupri = reqtsuprilim;
 613                 }
 614 
 615 
 616                 tspp->ts_uprilim = reqtsuprilim;
 617                 tspp->ts_upri = reqtsupri;
 618                 tspp->ts_nice = NZERO - (NZERO * reqtsupri) / ts_maxupri;
 619         }
 620         TS_NEWUMDPRI(tspp);
 621 
 622         tspp->ts_dispwait = 0;
 623         tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
 624         tspp->ts_tp = t;
 625         cpucaps_sc_init(&tspp->ts_caps);
 626 
 627         /*
 628          * Reset priority. Process goes to a "user mode" priority
 629          * here regardless of whether or not it has slept since
 630          * entering the kernel.
 631          */
 632         thread_lock(t);                 /* get dispatcher lock on thread */
 633         t->t_clfuncs = &(sclass[cid].cl_funcs->thread);
 634         t->t_cid = cid;
 635         t->t_cldata = (void *)tspp;
 636         t->t_schedflag &= ~TS_RUNQMATCH;
 637         ts_change_priority(t, tspp);
 638         thread_unlock(t);
 639 
 640         /*
 641          * Link new structure into tsproc list.
 642          */
 643         TS_LIST_INSERT(tspp);
 644 
 645         /*
 646          * If this is the first time-sharing thread to occur since
 647          * boot we set up the initial call to ts_update() here.
 648          * Use an atomic compare-and-swap since that's easier and
 649          * faster than a mutex (but check with an ordinary load first
 650          * since most of the time this will already be done).
 651          */
 652         if (tspexists == 0 && atomic_cas_32(&tspexists, 0, 1) == 0)
 653                 (void) timeout(ts_update, NULL, hz);
 654 
 655         return (0);
 656 }
 657 
 658 
 659 /*
 660  * Free tsproc structure of thread.
 661  */
 662 static void
 663 ts_exitclass(void *procp)
 664 {
 665         tsproc_t *tspp = (tsproc_t *)procp;
 666 
 667         /* Remove tsproc_t structure from list */
 668         TS_LIST_DELETE(tspp);
 669         kmem_free(tspp, sizeof (tsproc_t));
 670 }
 671 
 672 /* ARGSUSED */
 673 static int
 674 ts_canexit(kthread_t *t, cred_t *cred)
 675 {
 676         /*
 677          * A thread can always leave a TS/IA class
 678          */
 679         return (0);
 680 }
 681 
 682 static int
 683 ts_fork(kthread_t *t, kthread_t *ct, void *bufp)
 684 {
 685         tsproc_t        *ptspp;         /* ptr to parent's tsproc structure */
 686         tsproc_t        *ctspp;         /* ptr to child's tsproc structure */
 687 
 688         ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
 689 
 690         ctspp = (tsproc_t *)bufp;
 691         ASSERT(ctspp != NULL);
 692         ptspp = (tsproc_t *)t->t_cldata;
 693         /*
 694          * Initialize child's tsproc structure.
 695          */
 696         thread_lock(t);
 697         ctspp->ts_timeleft = ts_dptbl[ptspp->ts_cpupri].ts_quantum;
 698         ctspp->ts_cpupri = ptspp->ts_cpupri;
 699         ctspp->ts_boost = ptspp->ts_boost;
 700         ctspp->ts_uprilim = ptspp->ts_uprilim;
 701         ctspp->ts_upri = ptspp->ts_upri;
 702         TS_NEWUMDPRI(ctspp);
 703         ctspp->ts_nice = ptspp->ts_nice;
 704         ctspp->ts_dispwait = 0;
 705         ctspp->ts_flags = ptspp->ts_flags & ~(TSBACKQ | TSRESTORE);
 706         ctspp->ts_tp = ct;
 707         cpucaps_sc_init(&ctspp->ts_caps);
 708         thread_unlock(t);
 709 
 710         /*
 711          * Link new structure into tsproc list.
 712          */
 713         ct->t_cldata = (void *)ctspp;
 714         TS_LIST_INSERT(ctspp);
 715         return (0);
 716 }
 717 
 718 
 719 /*
 720  * Child is placed at back of dispatcher queue and parent gives
 721  * up processor so that the child runs first after the fork.
 722  * This allows the child immediately execing to break the multiple
 723  * use of copy on write pages with no disk home. The parent will
 724  * get to steal them back rather than uselessly copying them.
 725  */
 726 static void
 727 ts_forkret(kthread_t *t, kthread_t *ct)
 728 {
 729         proc_t  *pp = ttoproc(t);
 730         proc_t  *cp = ttoproc(ct);
 731         tsproc_t *tspp;
 732 
 733         ASSERT(t == curthread);
 734         ASSERT(MUTEX_HELD(&pidlock));
 735 
 736         /*
 737          * Grab the child's p_lock before dropping pidlock to ensure
 738          * the process does not disappear before we set it running.
 739          */
 740         mutex_enter(&cp->p_lock);
 741         continuelwps(cp);
 742         mutex_exit(&cp->p_lock);
 743 
 744         mutex_enter(&pp->p_lock);
 745         mutex_exit(&pidlock);
 746         continuelwps(pp);
 747 
 748         thread_lock(t);
 749         tspp = (tsproc_t *)(t->t_cldata);
 750         tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_tqexp;
 751         TS_NEWUMDPRI(tspp);
 752         tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
 753         tspp->ts_dispwait = 0;
 754         t->t_pri = ts_dptbl[tspp->ts_umdpri].ts_globpri;
 755         ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
 756         THREAD_TRANSITION(t);
 757         ts_setrun(t);
 758         thread_unlock(t);
 759         /*
 760          * Safe to drop p_lock now since since it is safe to change
 761          * the scheduling class after this point.
 762          */
 763         mutex_exit(&pp->p_lock);
 764 
 765         swtch();
 766 }
 767 
 768 
 769 /*
 770  * Get information about the time-sharing class into the buffer
 771  * pointed to by tsinfop. The maximum configured user priority
 772  * is the only information we supply.  ts_getclinfo() is called
 773  * for TS threads, and ia_getclinfo() is called for IA threads.
 774  */
 775 static int
 776 ts_getclinfo(void *infop)
 777 {
 778         tsinfo_t *tsinfop = (tsinfo_t *)infop;
 779         tsinfop->ts_maxupri = ts_maxupri;
 780         return (0);
 781 }
 782 
 783 static int
 784 ia_getclinfo(void *infop)
 785 {
 786         iainfo_t *iainfop = (iainfo_t *)infop;
 787         iainfop->ia_maxupri = ia_maxupri;
 788         return (0);
 789 }
 790 
 791 
 792 /*
 793  * Return the user mode scheduling priority range.
 794  */
 795 static int
 796 ts_getclpri(pcpri_t *pcprip)
 797 {
 798         pcprip->pc_clpmax = ts_maxupri;
 799         pcprip->pc_clpmin = -ts_maxupri;
 800         return (0);
 801 }
 802 
 803 
 804 static int
 805 ia_getclpri(pcpri_t *pcprip)
 806 {
 807         pcprip->pc_clpmax = ia_maxupri;
 808         pcprip->pc_clpmin = -ia_maxupri;
 809         return (0);
 810 }
 811 
 812 
 813 static void
 814 ts_nullsys()
 815 {}
 816 
 817 
 818 /*
 819  * Get the time-sharing parameters of the thread pointed to by
 820  * tsprocp into the buffer pointed to by tsparmsp.  ts_parmsget()
 821  * is called for TS threads, and ia_parmsget() is called for IA
 822  * threads.
 823  */
 824 static void
 825 ts_parmsget(kthread_t *t, void *parmsp)
 826 {
 827         tsproc_t *tspp = (tsproc_t *)t->t_cldata;
 828         tsparms_t *tsparmsp = (tsparms_t *)parmsp;
 829 
 830         tsparmsp->ts_uprilim = tspp->ts_uprilim;
 831         tsparmsp->ts_upri = tspp->ts_upri;
 832 }
 833 
 834 static void
 835 ia_parmsget(kthread_t *t, void *parmsp)
 836 {
 837         tsproc_t *tspp = (tsproc_t *)t->t_cldata;
 838         iaparms_t *iaparmsp = (iaparms_t *)parmsp;
 839 
 840         iaparmsp->ia_uprilim = tspp->ts_uprilim;
 841         iaparmsp->ia_upri = tspp->ts_upri;
 842         if (tspp->ts_flags & TSIASET)
 843                 iaparmsp->ia_mode = IA_SET_INTERACTIVE;
 844         else
 845                 iaparmsp->ia_mode = IA_INTERACTIVE_OFF;
 846 }
 847 
 848 
 849 /*
 850  * Check the validity of the time-sharing parameters in the buffer
 851  * pointed to by tsparmsp.
 852  * ts_parmsin() is called for TS threads, and ia_parmsin() is called
 853  * for IA threads.
 854  */
 855 static int
 856 ts_parmsin(void *parmsp)
 857 {
 858         tsparms_t       *tsparmsp = (tsparms_t *)parmsp;
 859         /*
 860          * Check validity of parameters.
 861          */
 862         if ((tsparmsp->ts_uprilim > ts_maxupri ||
 863             tsparmsp->ts_uprilim < -ts_maxupri) &&
 864             tsparmsp->ts_uprilim != TS_NOCHANGE)
 865                 return (EINVAL);
 866 
 867         if ((tsparmsp->ts_upri > ts_maxupri ||
 868             tsparmsp->ts_upri < -ts_maxupri) &&
 869             tsparmsp->ts_upri != TS_NOCHANGE)
 870                 return (EINVAL);
 871 
 872         return (0);
 873 }
 874 
 875 static int
 876 ia_parmsin(void *parmsp)
 877 {
 878         iaparms_t       *iaparmsp = (iaparms_t *)parmsp;
 879 
 880         if ((iaparmsp->ia_uprilim > ia_maxupri ||
 881             iaparmsp->ia_uprilim < -ia_maxupri) &&
 882             iaparmsp->ia_uprilim != IA_NOCHANGE) {
 883                 return (EINVAL);
 884         }
 885 
 886         if ((iaparmsp->ia_upri > ia_maxupri ||
 887             iaparmsp->ia_upri < -ia_maxupri) &&
 888             iaparmsp->ia_upri != IA_NOCHANGE) {
 889                 return (EINVAL);
 890         }
 891 
 892         return (0);
 893 }
 894 
 895 
 896 /*
 897  * Check the validity of the time-sharing parameters in the pc_vaparms_t
 898  * structure vaparmsp and put them in the buffer pointed to by tsparmsp.
 899  * pc_vaparms_t contains (key, value) pairs of parameter.
 900  * ts_vaparmsin() is called for TS threads, and ia_vaparmsin() is called
 901  * for IA threads. ts_vaparmsin() is the variable parameter version of
 902  * ts_parmsin() and ia_vaparmsin() is the variable parameter version of
 903  * ia_parmsin().
 904  */
 905 static int
 906 ts_vaparmsin(void *parmsp, pc_vaparms_t *vaparmsp)
 907 {
 908         tsparms_t       *tsparmsp = (tsparms_t *)parmsp;
 909         int             priflag = 0;
 910         int             limflag = 0;
 911         uint_t          cnt;
 912         pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
 913 
 914 
 915         /*
 916          * TS_NOCHANGE (-32768) is outside of the range of values for
 917          * ts_uprilim and ts_upri. If the structure tsparms_t is changed,
 918          * TS_NOCHANGE should be replaced by a flag word (in the same manner
 919          * as in rt.c).
 920          */
 921         tsparmsp->ts_uprilim = TS_NOCHANGE;
 922         tsparmsp->ts_upri = TS_NOCHANGE;
 923 
 924         /*
 925          * Get the varargs parameter and check validity of parameters.
 926          */
 927         if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
 928                 return (EINVAL);
 929 
 930         for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
 931 
 932                 switch (vpp->pc_key) {
 933                 case TS_KY_UPRILIM:
 934                         if (limflag++)
 935                                 return (EINVAL);
 936                         tsparmsp->ts_uprilim = (pri_t)vpp->pc_parm;
 937                         if (tsparmsp->ts_uprilim > ts_maxupri ||
 938                             tsparmsp->ts_uprilim < -ts_maxupri)
 939                                 return (EINVAL);
 940                         break;
 941 
 942                 case TS_KY_UPRI:
 943                         if (priflag++)
 944                                 return (EINVAL);
 945                         tsparmsp->ts_upri = (pri_t)vpp->pc_parm;
 946                         if (tsparmsp->ts_upri > ts_maxupri ||
 947                             tsparmsp->ts_upri < -ts_maxupri)
 948                                 return (EINVAL);
 949                         break;
 950 
 951                 default:
 952                         return (EINVAL);
 953                 }
 954         }
 955 
 956         if (vaparmsp->pc_vaparmscnt == 0) {
 957                 /*
 958                  * Use default parameters.
 959                  */
 960                 tsparmsp->ts_upri = tsparmsp->ts_uprilim = 0;
 961         }
 962 
 963         return (0);
 964 }
 965 
 966 static int
 967 ia_vaparmsin(void *parmsp, pc_vaparms_t *vaparmsp)
 968 {
 969         iaparms_t       *iaparmsp = (iaparms_t *)parmsp;
 970         int             priflag = 0;
 971         int             limflag = 0;
 972         int             mflag = 0;
 973         uint_t          cnt;
 974         pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
 975 
 976         /*
 977          * IA_NOCHANGE (-32768) is outside of the range of values for
 978          * ia_uprilim, ia_upri and ia_mode. If the structure iaparms_t is
 979          * changed, IA_NOCHANGE should be replaced by a flag word (in the
 980          * same manner as in rt.c).
 981          */
 982         iaparmsp->ia_uprilim = IA_NOCHANGE;
 983         iaparmsp->ia_upri = IA_NOCHANGE;
 984         iaparmsp->ia_mode = IA_NOCHANGE;
 985 
 986         /*
 987          * Get the varargs parameter and check validity of parameters.
 988          */
 989         if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
 990                 return (EINVAL);
 991 
 992         for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
 993 
 994                 switch (vpp->pc_key) {
 995                 case IA_KY_UPRILIM:
 996                         if (limflag++)
 997                                 return (EINVAL);
 998                         iaparmsp->ia_uprilim = (pri_t)vpp->pc_parm;
 999                         if (iaparmsp->ia_uprilim > ia_maxupri ||
1000                             iaparmsp->ia_uprilim < -ia_maxupri)
1001                                 return (EINVAL);
1002                         break;
1003 
1004                 case IA_KY_UPRI:
1005                         if (priflag++)
1006                                 return (EINVAL);
1007                         iaparmsp->ia_upri = (pri_t)vpp->pc_parm;
1008                         if (iaparmsp->ia_upri > ia_maxupri ||
1009                             iaparmsp->ia_upri < -ia_maxupri)
1010                                 return (EINVAL);
1011                         break;
1012 
1013                 case IA_KY_MODE:
1014                         if (mflag++)
1015                                 return (EINVAL);
1016                         iaparmsp->ia_mode = (int)vpp->pc_parm;
1017                         if (iaparmsp->ia_mode != IA_SET_INTERACTIVE &&
1018                             iaparmsp->ia_mode != IA_INTERACTIVE_OFF)
1019                                 return (EINVAL);
1020                         break;
1021 
1022                 default:
1023                         return (EINVAL);
1024                 }
1025         }
1026 
1027         if (vaparmsp->pc_vaparmscnt == 0) {
1028                 /*
1029                  * Use default parameters.
1030                  */
1031                 iaparmsp->ia_upri = iaparmsp->ia_uprilim = 0;
1032                 iaparmsp->ia_mode = IA_SET_INTERACTIVE;
1033         }
1034 
1035         return (0);
1036 }
1037 
1038 /*
1039  * Nothing to do here but return success.
1040  */
1041 /* ARGSUSED */
1042 static int
1043 ts_parmsout(void *parmsp, pc_vaparms_t *vaparmsp)
1044 {
1045         return (0);
1046 }
1047 
1048 
1049 /*
1050  * Copy all selected time-sharing class parameters to the user.
1051  * The parameters are specified by a key.
1052  */
1053 static int
1054 ts_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp)
1055 {
1056         tsparms_t       *tsprmsp = (tsparms_t *)prmsp;
1057         int             priflag = 0;
1058         int             limflag = 0;
1059         uint_t          cnt;
1060         pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
1061 
1062         ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
1063 
1064         if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
1065                 return (EINVAL);
1066 
1067         for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
1068 
1069                 switch (vpp->pc_key) {
1070                 case TS_KY_UPRILIM:
1071                         if (limflag++)
1072                                 return (EINVAL);
1073                         if (copyout(&tsprmsp->ts_uprilim,
1074                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
1075                                 return (EFAULT);
1076                         break;
1077 
1078                 case TS_KY_UPRI:
1079                         if (priflag++)
1080                                 return (EINVAL);
1081                         if (copyout(&tsprmsp->ts_upri,
1082                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
1083                                 return (EFAULT);
1084                         break;
1085 
1086                 default:
1087                         return (EINVAL);
1088                 }
1089         }
1090 
1091         return (0);
1092 }
1093 
1094 
1095 /*
1096  * Copy all selected interactive class parameters to the user.
1097  * The parameters are specified by a key.
1098  */
1099 static int
1100 ia_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp)
1101 {
1102         iaparms_t       *iaprmsp = (iaparms_t *)prmsp;
1103         int             priflag = 0;
1104         int             limflag = 0;
1105         int             mflag = 0;
1106         uint_t          cnt;
1107         pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
1108 
1109         ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
1110 
1111         if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
1112                 return (EINVAL);
1113 
1114         for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
1115 
1116                 switch (vpp->pc_key) {
1117                 case IA_KY_UPRILIM:
1118                         if (limflag++)
1119                                 return (EINVAL);
1120                         if (copyout(&iaprmsp->ia_uprilim,
1121                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
1122                                 return (EFAULT);
1123                         break;
1124 
1125                 case IA_KY_UPRI:
1126                         if (priflag++)
1127                                 return (EINVAL);
1128                         if (copyout(&iaprmsp->ia_upri,
1129                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
1130                                 return (EFAULT);
1131                         break;
1132 
1133                 case IA_KY_MODE:
1134                         if (mflag++)
1135                                 return (EINVAL);
1136                         if (copyout(&iaprmsp->ia_mode,
1137                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (int)))
1138                                 return (EFAULT);
1139                         break;
1140 
1141                 default:
1142                         return (EINVAL);
1143                 }
1144         }
1145         return (0);
1146 }
1147 
1148 
1149 /*
1150  * Set the scheduling parameters of the thread pointed to by tsprocp
1151  * to those specified in the buffer pointed to by tsparmsp.
1152  * ts_parmsset() is called for TS threads, and ia_parmsset() is
1153  * called for IA threads.
1154  */
1155 /* ARGSUSED */
1156 static int
1157 ts_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp)
1158 {
1159         char            nice;
1160         pri_t           reqtsuprilim;
1161         pri_t           reqtsupri;
1162         tsparms_t       *tsparmsp = (tsparms_t *)parmsp;
1163         tsproc_t        *tspp = (tsproc_t *)tx->t_cldata;
1164 
1165         ASSERT(MUTEX_HELD(&(ttoproc(tx))->p_lock));
1166 
1167         if (tsparmsp->ts_uprilim == TS_NOCHANGE)
1168                 reqtsuprilim = tspp->ts_uprilim;
1169         else
1170                 reqtsuprilim = tsparmsp->ts_uprilim;
1171 
1172         if (tsparmsp->ts_upri == TS_NOCHANGE)
1173                 reqtsupri = tspp->ts_upri;
1174         else
1175                 reqtsupri = tsparmsp->ts_upri;
1176 
1177         /*
1178          * Make sure the user priority doesn't exceed the upri limit.
1179          */
1180         if (reqtsupri > reqtsuprilim)
1181                 reqtsupri = reqtsuprilim;
1182 
1183         /*
1184          * Basic permissions enforced by generic kernel code
1185          * for all classes require that a thread attempting
1186          * to change the scheduling parameters of a target
1187          * thread be privileged or have a real or effective
1188          * UID matching that of the target thread. We are not
1189          * called unless these basic permission checks have
1190          * already passed. The time-sharing class requires in
1191          * addition that the calling thread be privileged if it
1192          * is attempting to raise the upri limit above its current
1193          * value This may have been checked previously but if our
1194          * caller passed us a non-NULL credential pointer we assume
1195          * it hasn't and we check it here.
1196          */
1197         if (reqpcredp != NULL &&
1198             reqtsuprilim > tspp->ts_uprilim &&
1199             secpolicy_raisepriority(reqpcredp) != 0)
1200                 return (EPERM);
1201 
1202         /*
1203          * Set ts_nice to the nice value corresponding to the user
1204          * priority we are setting.  Note that setting the nice field
1205          * of the parameter struct won't affect upri or nice.
1206          */
1207         nice = NZERO - (reqtsupri * NZERO) / ts_maxupri;
1208         if (nice >= 2 * NZERO)
1209                 nice = 2 * NZERO - 1;
1210 
1211         thread_lock(tx);
1212 
1213         tspp->ts_uprilim = reqtsuprilim;
1214         tspp->ts_upri = reqtsupri;
1215         TS_NEWUMDPRI(tspp);
1216         tspp->ts_nice = nice;
1217 
1218         tspp->ts_dispwait = 0;
1219         ts_change_priority(tx, tspp);
1220         thread_unlock(tx);
1221         return (0);
1222 }
1223 
1224 
1225 static int
1226 ia_parmsset(kthread_t *tx, void *parmsp, id_t reqpcid, cred_t *reqpcredp)
1227 {
1228         tsproc_t        *tspp = (tsproc_t *)tx->t_cldata;
1229         iaparms_t       *iaparmsp = (iaparms_t *)parmsp;
1230         proc_t          *p;
1231         pid_t           pid, pgid, sid;
1232         pid_t           on, off;
1233         struct stdata   *stp;
1234         int             sess_held;
1235 
1236         /*
1237          * Handle user priority changes
1238          */
1239         if (iaparmsp->ia_mode == IA_NOCHANGE)
1240                 return (ts_parmsset(tx, parmsp, reqpcid, reqpcredp));
1241 
1242         /*
1243          * Check permissions for changing modes.
1244          */
1245 
1246         if (reqpcredp != NULL && !groupmember(IA_gid, reqpcredp) &&
1247             secpolicy_raisepriority(reqpcredp) != 0) {
1248                 /*
1249                  * Silently fail in case this is just a priocntl
1250                  * call with upri and uprilim set to IA_NOCHANGE.
1251                  */
1252                 return (0);
1253         }
1254 
1255         ASSERT(MUTEX_HELD(&pidlock));
1256         if ((p = ttoproc(tx)) == NULL) {
1257                 return (0);
1258         }
1259         ASSERT(MUTEX_HELD(&p->p_lock));
1260         if (p->p_stat == SIDL) {
1261                 return (0);
1262         }
1263         pid = p->p_pid;
1264         sid = p->p_sessp->s_sid;
1265         pgid = p->p_pgrp;
1266         if (iaparmsp->ia_mode == IA_SET_INTERACTIVE) {
1267                 /*
1268                  * session leaders must be turned on now so all processes
1269                  * in the group controlling the tty will be turned on or off.
1270                  * if the ia_mode is off for the session leader,
1271                  * ia_set_process_group will return without setting the
1272                  * processes in the group controlling the tty on.
1273                  */
1274                 thread_lock(tx);
1275                 tspp->ts_flags |= TSIASET;
1276                 thread_unlock(tx);
1277         }
1278         mutex_enter(&p->p_sessp->s_lock);
1279         sess_held = 1;
1280         if ((pid == sid) && (p->p_sessp->s_vp != NULL) &&
1281             ((stp = p->p_sessp->s_vp->v_stream) != NULL)) {
1282                 if ((stp->sd_pgidp != NULL) && (stp->sd_sidp != NULL)) {
1283                         pgid = stp->sd_pgidp->pid_id;
1284                         sess_held = 0;
1285                         mutex_exit(&p->p_sessp->s_lock);
1286                         if (iaparmsp->ia_mode ==
1287                             IA_SET_INTERACTIVE) {
1288                                 off = 0;
1289                                 on = pgid;
1290                         } else {
1291                                 off = pgid;
1292                                 on = 0;
1293                         }
1294                         TRACE_3(TR_FAC_IA, TR_ACTIVE_CHAIN,
1295                             "active chain:pid %d gid %d %p",
1296                             pid, pgid, p);
1297                         ia_set_process_group(sid, off, on);
1298                 }
1299         }
1300         if (sess_held)
1301                 mutex_exit(&p->p_sessp->s_lock);
1302 
1303         thread_lock(tx);
1304 
1305         if (iaparmsp->ia_mode == IA_SET_INTERACTIVE) {
1306                 tspp->ts_flags |= TSIASET;
1307                 tspp->ts_boost = ia_boost;
1308         } else {
1309                 tspp->ts_flags &= ~TSIASET;
1310                 tspp->ts_boost = -ia_boost;
1311         }
1312         thread_unlock(tx);
1313 
1314         return (ts_parmsset(tx, parmsp, reqpcid, reqpcredp));
1315 }
1316 
1317 static void
1318 ts_exit(kthread_t *t)
1319 {
1320         tsproc_t *tspp;
1321 
1322         if (CPUCAPS_ON()) {
1323                 /*
1324                  * A thread could be exiting in between clock ticks,
1325                  * so we need to calculate how much CPU time it used
1326                  * since it was charged last time.
1327                  *
1328                  * CPU caps are not enforced on exiting processes - it is
1329                  * usually desirable to exit as soon as possible to free
1330                  * resources.
1331                  */
1332                 thread_lock(t);
1333                 tspp = (tsproc_t *)t->t_cldata;
1334                 (void) cpucaps_charge(t, &tspp->ts_caps, CPUCAPS_CHARGE_ONLY);
1335                 thread_unlock(t);
1336         }
1337 }
1338 
1339 /*
1340  * Return the global scheduling priority that would be assigned
1341  * to a thread entering the time-sharing class with the ts_upri.
1342  */
1343 static pri_t
1344 ts_globpri(kthread_t *t)
1345 {
1346         tsproc_t *tspp;
1347         pri_t   tspri;
1348 
1349         ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
1350         tspp = (tsproc_t *)t->t_cldata;
1351         tspri = tsmedumdpri + tspp->ts_upri;
1352         if (tspri > ts_maxumdpri)
1353                 tspri = ts_maxumdpri;
1354         else if (tspri < 0)
1355                 tspri = 0;
1356         return (ts_dptbl[tspri].ts_globpri);
1357 }
1358 
1359 /*
1360  * Arrange for thread to be placed in appropriate location
1361  * on dispatcher queue.
1362  *
1363  * This is called with the current thread in TS_ONPROC and locked.
1364  */
1365 static void
1366 ts_preempt(kthread_t *t)
1367 {
1368         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1369         klwp_t          *lwp = ttolwp(t);
1370         pri_t           oldpri = t->t_pri;
1371 
1372         ASSERT(t == curthread);
1373         ASSERT(THREAD_LOCK_HELD(curthread));
1374 
1375         /*
1376          * This thread may be placed on wait queue by CPU Caps. In this case we
1377          * do not need to do anything until it is removed from the wait queue.
1378          */
1379         if (CPUCAPS_ON()) {
1380                 (void) cpucaps_charge(t, &tspp->ts_caps,
1381                     CPUCAPS_CHARGE_ENFORCE);
1382                 if (CPUCAPS_ENFORCE(t))
1383                         return;
1384         }
1385 
1386         /*
1387          * If thread got preempted in the user-land then we know
1388          * it isn't holding any locks.  Mark it as swappable.
1389          */
1390         ASSERT(t->t_schedflag & TS_DONT_SWAP);
1391         if (lwp != NULL && lwp->lwp_state == LWP_USER)
1392                 t->t_schedflag &= ~TS_DONT_SWAP;
1393 
1394         /*
1395          * Check to see if we're doing "preemption control" here.  If
1396          * we are, and if the user has requested that this thread not
1397          * be preempted, and if preemptions haven't been put off for
1398          * too long, let the preemption happen here but try to make
1399          * sure the thread is rescheduled as soon as possible.  We do
1400          * this by putting it on the front of the highest priority run
1401          * queue in the TS class.  If the preemption has been put off
1402          * for too long, clear the "nopreempt" bit and let the thread
1403          * be preempted.
1404          */
1405         if (t->t_schedctl && schedctl_get_nopreempt(t)) {
1406                 if (tspp->ts_timeleft > -SC_MAX_TICKS) {
1407                         DTRACE_SCHED1(schedctl__nopreempt, kthread_t *, t);
1408                         /*
1409                          * If not already remembered, remember current
1410                          * priority for restoration in ts_yield().
1411                          */
1412                         if (!(tspp->ts_flags & TSRESTORE)) {
1413                                 tspp->ts_scpri = t->t_pri;
1414                                 tspp->ts_flags |= TSRESTORE;
1415                         }
1416                         THREAD_CHANGE_PRI(t, ts_maxumdpri);
1417                         t->t_schedflag |= TS_DONT_SWAP;
1418                         schedctl_set_yield(t, 1);
1419                         setfrontdq(t);
1420                         goto done;
1421                 } else {
1422                         if (tspp->ts_flags & TSRESTORE) {
1423                                 THREAD_CHANGE_PRI(t, tspp->ts_scpri);
1424                                 tspp->ts_flags &= ~TSRESTORE;
1425                         }
1426                         schedctl_set_nopreempt(t, 0);
1427                         DTRACE_SCHED1(schedctl__preempt, kthread_t *, t);
1428                         TNF_PROBE_2(schedctl_preempt, "schedctl TS ts_preempt",
1429                             /* CSTYLED */, tnf_pid, pid, ttoproc(t)->p_pid,
1430                             tnf_lwpid, lwpid, t->t_tid);
1431                         /*
1432                          * Fall through and be preempted below.
1433                          */
1434                 }
1435         }
1436 
1437         if ((tspp->ts_flags & TSBACKQ) != 0) {
1438                 tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
1439                 tspp->ts_dispwait = 0;
1440                 tspp->ts_flags &= ~TSBACKQ;
1441                 setbackdq(t);
1442         } else {
1443                 setfrontdq(t);
1444         }
1445 
1446 done:
1447         TRACE_2(TR_FAC_DISP, TR_PREEMPT,
1448             "preempt:tid %p old pri %d", t, oldpri);
1449 }
1450 
1451 static void
1452 ts_setrun(kthread_t *t)
1453 {
1454         tsproc_t *tspp = (tsproc_t *)(t->t_cldata);
1455 
1456         ASSERT(THREAD_LOCK_HELD(t));    /* t should be in transition */
1457 
1458         if (tspp->ts_dispwait > ts_dptbl[tspp->ts_umdpri].ts_maxwait) {
1459                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_slpret;
1460                 TS_NEWUMDPRI(tspp);
1461                 tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
1462                 tspp->ts_dispwait = 0;
1463                 THREAD_CHANGE_PRI(t, ts_dptbl[tspp->ts_umdpri].ts_globpri);
1464                 ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
1465         }
1466 
1467         tspp->ts_flags &= ~TSBACKQ;
1468 
1469         if (tspp->ts_flags & TSIA) {
1470                 if (tspp->ts_flags & TSIASET)
1471                         setfrontdq(t);
1472                 else
1473                         setbackdq(t);
1474         } else {
1475                 if (t->t_disp_time != ddi_get_lbolt())
1476                         setbackdq(t);
1477                 else
1478                         setfrontdq(t);
1479         }
1480 }
1481 
1482 
1483 /*
1484  * Prepare thread for sleep.
1485  */
1486 static void
1487 ts_sleep(kthread_t *t)
1488 {
1489         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1490         pri_t           old_pri = t->t_pri;
1491 
1492         ASSERT(t == curthread);
1493         ASSERT(THREAD_LOCK_HELD(t));
1494 
1495         /*
1496          * Account for time spent on CPU before going to sleep.
1497          */
1498         (void) CPUCAPS_CHARGE(t, &tspp->ts_caps, CPUCAPS_CHARGE_ENFORCE);
1499 
1500         if (tspp->ts_dispwait > ts_dptbl[tspp->ts_umdpri].ts_maxwait) {
1501                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_slpret;
1502                 TS_NEWUMDPRI(tspp);
1503                 tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
1504                 tspp->ts_dispwait = 0;
1505 
1506                 THREAD_CHANGE_PRI(curthread,
1507                     ts_dptbl[tspp->ts_umdpri].ts_globpri);
1508                 ASSERT(curthread->t_pri >= 0 &&
1509                     curthread->t_pri <= ts_maxglobpri);
1510 
1511                 if (DISP_MUST_SURRENDER(curthread))
1512                         cpu_surrender(curthread);
1513         }
1514         t->t_stime = ddi_get_lbolt();                /* time stamp for the swapper */
1515         TRACE_2(TR_FAC_DISP, TR_SLEEP,
1516             "sleep:tid %p old pri %d", t, old_pri);
1517 }
1518 
1519 
1520 /*
1521  * Return Values:
1522  *
1523  *      -1 if the thread is loaded or is not eligible to be swapped in.
1524  *
1525  *      effective priority of the specified thread based on swapout time
1526  *              and size of process (epri >= 0 , epri <= SHRT_MAX).
1527  */
1528 /* ARGSUSED */
1529 static pri_t
1530 ts_swapin(kthread_t *t, int flags)
1531 {
1532         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1533         long            epri = -1;
1534         proc_t          *pp = ttoproc(t);
1535 
1536         ASSERT(THREAD_LOCK_HELD(t));
1537 
1538         /*
1539          * We know that pri_t is a short.
1540          * Be sure not to overrun its range.
1541          */
1542         if (t->t_state == TS_RUN && (t->t_schedflag & TS_LOAD) == 0) {
1543                 time_t swapout_time;
1544 
1545                 swapout_time = (ddi_get_lbolt() - t->t_stime) / hz;
1546                 if (INHERITED(t) || (tspp->ts_flags & TSIASET)) {
1547                         epri = (long)DISP_PRIO(t) + swapout_time;
1548                 } else {
1549                         /*
1550                          * Threads which have been out for a long time,
1551                          * have high user mode priority and are associated
1552                          * with a small address space are more deserving
1553                          */
1554                         epri = ts_dptbl[tspp->ts_umdpri].ts_globpri;
1555                         ASSERT(epri >= 0 && epri <= ts_maxumdpri);
1556                         epri += swapout_time - pp->p_swrss / nz(maxpgio)/2;
1557                 }
1558                 /*
1559                  * Scale epri so SHRT_MAX/2 represents zero priority.
1560                  */
1561                 epri += SHRT_MAX/2;
1562                 if (epri < 0)
1563                         epri = 0;
1564                 else if (epri > SHRT_MAX)
1565                         epri = SHRT_MAX;
1566         }
1567         return ((pri_t)epri);
1568 }
1569 
1570 /*
1571  * Return Values
1572  *      -1 if the thread isn't loaded or is not eligible to be swapped out.
1573  *
1574  *      effective priority of the specified thread based on if the swapper
1575  *              is in softswap or hardswap mode.
1576  *
1577  *              Softswap:  Return a low effective priority for threads
1578  *                         sleeping for more than maxslp secs.
1579  *
1580  *              Hardswap:  Return an effective priority such that threads
1581  *                         which have been in memory for a while and are
1582  *                         associated with a small address space are swapped
1583  *                         in before others.
1584  *
1585  *              (epri >= 0 , epri <= SHRT_MAX).
1586  */
1587 time_t  ts_minrun = 2;          /* XXX - t_pri becomes 59 within 2 secs */
1588 time_t  ts_minslp = 2;          /* min time on sleep queue for hardswap */
1589 
1590 static pri_t
1591 ts_swapout(kthread_t *t, int flags)
1592 {
1593         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1594         long            epri = -1;
1595         proc_t          *pp = ttoproc(t);
1596         time_t          swapin_time;
1597 
1598         ASSERT(THREAD_LOCK_HELD(t));
1599 
1600         if (INHERITED(t) || (tspp->ts_flags & TSIASET) ||
1601             (t->t_proc_flag & TP_LWPEXIT) ||
1602             (t->t_state & (TS_ZOMB | TS_FREE | TS_STOPPED |
1603             TS_ONPROC | TS_WAIT)) ||
1604             !(t->t_schedflag & TS_LOAD) || !SWAP_OK(t))
1605                 return (-1);
1606 
1607         ASSERT(t->t_state & (TS_SLEEP | TS_RUN));
1608 
1609         /*
1610          * We know that pri_t is a short.
1611          * Be sure not to overrun its range.
1612          */
1613         swapin_time = (ddi_get_lbolt() - t->t_stime) / hz;
1614         if (flags == SOFTSWAP) {
1615                 if (t->t_state == TS_SLEEP && swapin_time > maxslp) {
1616                         epri = 0;
1617                 } else {
1618                         return ((pri_t)epri);
1619                 }
1620         } else {
1621                 pri_t pri;
1622 
1623                 if ((t->t_state == TS_SLEEP && swapin_time > ts_minslp) ||
1624                     (t->t_state == TS_RUN && swapin_time > ts_minrun)) {
1625                         pri = ts_dptbl[tspp->ts_umdpri].ts_globpri;
1626                         ASSERT(pri >= 0 && pri <= ts_maxumdpri);
1627                         epri = swapin_time -
1628                             (rm_asrss(pp->p_as) / nz(maxpgio)/2) - (long)pri;
1629                 } else {
1630                         return ((pri_t)epri);
1631                 }
1632         }
1633 
1634         /*
1635          * Scale epri so SHRT_MAX/2 represents zero priority.
1636          */
1637         epri += SHRT_MAX/2;
1638         if (epri < 0)
1639                 epri = 0;
1640         else if (epri > SHRT_MAX)
1641                 epri = SHRT_MAX;
1642 
1643         return ((pri_t)epri);
1644 }
1645 
1646 /*
1647  * Check for time slice expiration.  If time slice has expired
1648  * move thread to priority specified in tsdptbl for time slice expiration
1649  * and set runrun to cause preemption.
1650  */
1651 static void
1652 ts_tick(kthread_t *t)
1653 {
1654         tsproc_t *tspp = (tsproc_t *)(t->t_cldata);
1655         klwp_t *lwp;
1656         boolean_t call_cpu_surrender = B_FALSE;
1657         pri_t   oldpri = t->t_pri;
1658 
1659         ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
1660 
1661         thread_lock(t);
1662 
1663         /*
1664          * Keep track of thread's project CPU usage.  Note that projects
1665          * get charged even when threads are running in the kernel.
1666          */
1667         if (CPUCAPS_ON()) {
1668                 call_cpu_surrender = cpucaps_charge(t, &tspp->ts_caps,
1669                     CPUCAPS_CHARGE_ENFORCE);
1670         }
1671 
1672         if (--tspp->ts_timeleft <= 0) {
1673                 pri_t   new_pri;
1674 
1675                 /*
1676                  * If we're doing preemption control and trying to avoid
1677                  * preempting this thread, just note that the thread should
1678                  * yield soon and let it keep running (unless it's been a
1679                  * while).
1680                  */
1681                 if (t->t_schedctl && schedctl_get_nopreempt(t)) {
1682                         if (tspp->ts_timeleft > -SC_MAX_TICKS) {
1683                                 DTRACE_SCHED1(schedctl__nopreempt,
1684                                     kthread_t *, t);
1685                                 schedctl_set_yield(t, 1);
1686                                 thread_unlock_nopreempt(t);
1687                                 return;
1688                         }
1689 
1690                         TNF_PROBE_2(schedctl_failsafe,
1691                             "schedctl TS ts_tick", /* CSTYLED */,
1692                             tnf_pid, pid, ttoproc(t)->p_pid,
1693                             tnf_lwpid, lwpid, t->t_tid);
1694                 }
1695                 tspp->ts_flags &= ~TSRESTORE;
1696                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_tqexp;
1697                 TS_NEWUMDPRI(tspp);
1698                 tspp->ts_dispwait = 0;
1699                 new_pri = ts_dptbl[tspp->ts_umdpri].ts_globpri;
1700                 ASSERT(new_pri >= 0 && new_pri <= ts_maxglobpri);
1701                 /*
1702                  * When the priority of a thread is changed, it may be
1703                  * necessary to adjust its position on a sleep queue or
1704                  * dispatch queue.  The function thread_change_pri accomplishes
1705                  * this.
1706                  */
1707                 if (thread_change_pri(t, new_pri, 0)) {
1708                         if ((t->t_schedflag & TS_LOAD) &&
1709                             (lwp = t->t_lwp) &&
1710                             lwp->lwp_state == LWP_USER)
1711                                 t->t_schedflag &= ~TS_DONT_SWAP;
1712                         tspp->ts_timeleft =
1713                             ts_dptbl[tspp->ts_cpupri].ts_quantum;
1714                 } else {
1715                         call_cpu_surrender = B_TRUE;
1716                 }
1717                 TRACE_2(TR_FAC_DISP, TR_TICK,
1718                     "tick:tid %p old pri %d", t, oldpri);
1719         } else if (t->t_state == TS_ONPROC &&
1720             t->t_pri < t->t_disp_queue->disp_maxrunpri) {
1721                 call_cpu_surrender = B_TRUE;
1722         }
1723 
1724         if (call_cpu_surrender) {
1725                 tspp->ts_flags |= TSBACKQ;
1726                 cpu_surrender(t);
1727         }
1728 
1729         thread_unlock_nopreempt(t);     /* clock thread can't be preempted */
1730 }
1731 
1732 
1733 /*
1734  * If we are lowering the thread's priority below that of other runnable
1735  * threads we will normally set runrun via cpu_surrender() to cause preemption.
1736  */
1737 static void
1738 ts_trapret(kthread_t *t)
1739 {
1740         tsproc_t        *tspp = (tsproc_t *)t->t_cldata;
1741         cpu_t           *cp = CPU;
1742         pri_t           old_pri = curthread->t_pri;
1743 
1744         ASSERT(THREAD_LOCK_HELD(t));
1745         ASSERT(t == curthread);
1746         ASSERT(cp->cpu_dispthread == t);
1747         ASSERT(t->t_state == TS_ONPROC);
1748 
1749         if (tspp->ts_dispwait > ts_dptbl[tspp->ts_umdpri].ts_maxwait) {
1750                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_slpret;
1751                 TS_NEWUMDPRI(tspp);
1752                 tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
1753                 tspp->ts_dispwait = 0;
1754 
1755                 /*
1756                  * If thread has blocked in the kernel (as opposed to
1757                  * being merely preempted), recompute the user mode priority.
1758                  */
1759                 THREAD_CHANGE_PRI(t, ts_dptbl[tspp->ts_umdpri].ts_globpri);
1760                 cp->cpu_dispatch_pri = DISP_PRIO(t);
1761                 ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
1762 
1763                 if (DISP_MUST_SURRENDER(t))
1764                         cpu_surrender(t);
1765         }
1766 
1767         /*
1768          * Swapout lwp if the swapper is waiting for this thread to reach a
1769          * safe point.
1770          */
1771         if ((t->t_schedflag & TS_SWAPENQ) && !(tspp->ts_flags & TSIASET)) {
1772                 thread_unlock(t);
1773                 swapout_lwp(ttolwp(t));
1774                 thread_lock(t);
1775         }
1776 
1777         TRACE_2(TR_FAC_DISP, TR_TRAPRET,
1778             "trapret:tid %p old pri %d", t, old_pri);
1779 }
1780 
1781 
1782 /*
1783  * Update the ts_dispwait values of all time sharing threads that
1784  * are currently runnable at a user mode priority and bump the priority
1785  * if ts_dispwait exceeds ts_maxwait.  Called once per second via
1786  * timeout which we reset here.
1787  *
1788  * There are several lists of time sharing threads broken up by a hash on
1789  * the thread pointer.  Each list has its own lock.  This avoids blocking
1790  * all ts_enterclass, ts_fork, and ts_exitclass operations while ts_update
1791  * runs.  ts_update traverses each list in turn.
1792  *
1793  * If multiple threads have their priorities updated to the same value,
1794  * the system implicitly favors the one that is updated first (since it
1795  * winds up first on the run queue).  To avoid this unfairness, the
1796  * traversal of threads starts at the list indicated by a marker.  When
1797  * threads in more than one list have their priorities updated, the marker
1798  * is moved.  This changes the order the threads will be placed on the run
1799  * queue the next time ts_update is called and preserves fairness over the
1800  * long run.  The marker doesn't need to be protected by a lock since it's
1801  * only accessed by ts_update, which is inherently single-threaded (only
1802  * one instance can be running at a time).
1803  */
1804 static void
1805 ts_update(void *arg)
1806 {
1807         int             i;
1808         int             new_marker = -1;
1809         static int      ts_update_marker;
1810 
1811         /*
1812          * Start with the ts_update_marker list, then do the rest.
1813          */
1814         i = ts_update_marker;
1815         do {
1816                 /*
1817                  * If this is the first list after the current marker to
1818                  * have threads with priorities updated, advance the marker
1819                  * to this list for the next time ts_update runs.
1820                  */
1821                 if (ts_update_list(i) && new_marker == -1 &&
1822                     i != ts_update_marker) {
1823                         new_marker = i;
1824                 }
1825         } while ((i = TS_LIST_NEXT(i)) != ts_update_marker);
1826 
1827         /* advance marker for next ts_update call */
1828         if (new_marker != -1)
1829                 ts_update_marker = new_marker;
1830 
1831         (void) timeout(ts_update, arg, hz);
1832 }
1833 
1834 /*
1835  * Updates priority for a list of threads.  Returns 1 if the priority of
1836  * one of the threads was actually updated, 0 if none were for various
1837  * reasons (thread is no longer in the TS or IA class, isn't runnable,
1838  * hasn't waited long enough, has the preemption control no-preempt bit
1839  * set, etc.)
1840  */
1841 static int
1842 ts_update_list(int i)
1843 {
1844         tsproc_t *tspp;
1845         kthread_t *tx;
1846         int updated = 0;
1847 
1848         mutex_enter(&ts_list_lock[i]);
1849         for (tspp = ts_plisthead[i].ts_next; tspp != &ts_plisthead[i];
1850             tspp = tspp->ts_next) {
1851                 tx = tspp->ts_tp;
1852                 /*
1853                  * Lock the thread and verify state.
1854                  */
1855                 thread_lock(tx);
1856                 /*
1857                  * Skip the thread if it is no longer in the TS (or IA) class.
1858                  */
1859                 if (tx->t_clfuncs != &ts_classfuncs.thread &&
1860                     tx->t_clfuncs != &ia_classfuncs.thread)
1861                         goto next;
1862                 tspp->ts_dispwait++;
1863                 if (tspp->ts_dispwait <= ts_dptbl[tspp->ts_umdpri].ts_maxwait)
1864                         goto next;
1865                 if (tx->t_schedctl && schedctl_get_nopreempt(tx))
1866                         goto next;
1867                 if (tx->t_state != TS_RUN && tx->t_state != TS_WAIT &&
1868                     (tx->t_state != TS_SLEEP || !ts_sleep_promote)) {
1869                         /* make next syscall/trap do CL_TRAPRET */
1870                         tx->t_trapret = 1;
1871                         aston(tx);
1872                         goto next;
1873                 }
1874                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_lwait;
1875                 TS_NEWUMDPRI(tspp);
1876                 tspp->ts_dispwait = 0;
1877                 updated = 1;
1878 
1879                 /*
1880                  * Only dequeue it if needs to move; otherwise it should
1881                  * just round-robin here.
1882                  */
1883                 if (tx->t_pri != ts_dptbl[tspp->ts_umdpri].ts_globpri) {
1884                         pri_t oldpri = tx->t_pri;
1885                         ts_change_priority(tx, tspp);
1886                         TRACE_2(TR_FAC_DISP, TR_UPDATE,
1887                             "update:tid %p old pri %d", tx, oldpri);
1888                 }
1889 next:
1890                 thread_unlock(tx);
1891         }
1892         mutex_exit(&ts_list_lock[i]);
1893 
1894         return (updated);
1895 }
1896 
1897 /*
1898  * Processes waking up go to the back of their queue.
1899  */
1900 static void
1901 ts_wakeup(kthread_t *t)
1902 {
1903         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1904 
1905         ASSERT(THREAD_LOCK_HELD(t));
1906 
1907         t->t_stime = ddi_get_lbolt();                /* time stamp for the swapper */
1908 
1909         if (tspp->ts_dispwait > ts_dptbl[tspp->ts_umdpri].ts_maxwait) {
1910                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_slpret;
1911                 TS_NEWUMDPRI(tspp);
1912                 tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
1913                 tspp->ts_dispwait = 0;
1914                 THREAD_CHANGE_PRI(t, ts_dptbl[tspp->ts_umdpri].ts_globpri);
1915                 ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
1916         }
1917 
1918         tspp->ts_flags &= ~TSBACKQ;
1919 
1920         if (tspp->ts_flags & TSIA) {
1921                 if (tspp->ts_flags & TSIASET)
1922                         setfrontdq(t);
1923                 else
1924                         setbackdq(t);
1925         } else {
1926                 if (t->t_disp_time != ddi_get_lbolt())
1927                         setbackdq(t);
1928                 else
1929                         setfrontdq(t);
1930         }
1931 }
1932 
1933 
1934 /*
1935  * When a thread yields, put it on the back of the run queue.
1936  */
1937 static void
1938 ts_yield(kthread_t *t)
1939 {
1940         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1941 
1942         ASSERT(t == curthread);
1943         ASSERT(THREAD_LOCK_HELD(t));
1944 
1945         /*
1946          * Collect CPU usage spent before yielding
1947          */
1948         (void) CPUCAPS_CHARGE(t, &tspp->ts_caps, CPUCAPS_CHARGE_ENFORCE);
1949 
1950         /*
1951          * Clear the preemption control "yield" bit since the user is
1952          * doing a yield.
1953          */
1954         if (t->t_schedctl)
1955                 schedctl_set_yield(t, 0);
1956         /*
1957          * If ts_preempt() artifically increased the thread's priority
1958          * to avoid preemption, restore the original priority now.
1959          */
1960         if (tspp->ts_flags & TSRESTORE) {
1961                 THREAD_CHANGE_PRI(t, tspp->ts_scpri);
1962                 tspp->ts_flags &= ~TSRESTORE;
1963         }
1964         if (tspp->ts_timeleft <= 0) {
1965                 /*
1966                  * Time slice was artificially extended to avoid
1967                  * preemption, so pretend we're preempting it now.
1968                  */
1969                 DTRACE_SCHED1(schedctl__yield, int, -tspp->ts_timeleft);
1970                 tspp->ts_cpupri = ts_dptbl[tspp->ts_cpupri].ts_tqexp;
1971                 TS_NEWUMDPRI(tspp);
1972                 tspp->ts_timeleft = ts_dptbl[tspp->ts_cpupri].ts_quantum;
1973                 tspp->ts_dispwait = 0;
1974                 THREAD_CHANGE_PRI(t, ts_dptbl[tspp->ts_umdpri].ts_globpri);
1975                 ASSERT(t->t_pri >= 0 && t->t_pri <= ts_maxglobpri);
1976         }
1977         tspp->ts_flags &= ~TSBACKQ;
1978         setbackdq(t);
1979 }
1980 
1981 
1982 /*
1983  * Increment the nice value of the specified thread by incr and
1984  * return the new value in *retvalp.
1985  */
1986 static int
1987 ts_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
1988 {
1989         int             newnice;
1990         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
1991         tsparms_t       tsparms;
1992 
1993         ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
1994 
1995         /* If there's no change to priority, just return current setting */
1996         if (incr == 0) {
1997                 if (retvalp) {
1998                         *retvalp = tspp->ts_nice - NZERO;
1999                 }
2000                 return (0);
2001         }
2002 
2003         if ((incr < 0 || incr > 2 * NZERO) &&
2004             secpolicy_raisepriority(cr) != 0)
2005                 return (EPERM);
2006 
2007         /*
2008          * Specifying a nice increment greater than the upper limit of
2009          * 2 * NZERO - 1 will result in the thread's nice value being
2010          * set to the upper limit.  We check for this before computing
2011          * the new value because otherwise we could get overflow
2012          * if a privileged process specified some ridiculous increment.
2013          */
2014         if (incr > 2 * NZERO - 1)
2015                 incr = 2 * NZERO - 1;
2016 
2017         newnice = tspp->ts_nice + incr;
2018         if (newnice >= 2 * NZERO)
2019                 newnice = 2 * NZERO - 1;
2020         else if (newnice < 0)
2021                 newnice = 0;
2022 
2023         tsparms.ts_uprilim = tsparms.ts_upri =
2024             -((newnice - NZERO) * ts_maxupri) / NZERO;
2025         /*
2026          * Reset the uprilim and upri values of the thread.
2027          * Call ts_parmsset even if thread is interactive since we're
2028          * not changing mode.
2029          */
2030         (void) ts_parmsset(t, (void *)&tsparms, (id_t)0, (cred_t *)NULL);
2031 
2032         /*
2033          * Although ts_parmsset already reset ts_nice it may
2034          * not have been set to precisely the value calculated above
2035          * because ts_parmsset determines the nice value from the
2036          * user priority and we may have truncated during the integer
2037          * conversion from nice value to user priority and back.
2038          * We reset ts_nice to the value we calculated above.
2039          */
2040         tspp->ts_nice = (char)newnice;
2041 
2042         if (retvalp)
2043                 *retvalp = newnice - NZERO;
2044         return (0);
2045 }
2046 
2047 /*
2048  * Increment the priority of the specified thread by incr and
2049  * return the new value in *retvalp.
2050  */
2051 static int
2052 ts_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp)
2053 {
2054         int             newpri;
2055         tsproc_t        *tspp = (tsproc_t *)(t->t_cldata);
2056         tsparms_t       tsparms;
2057 
2058         ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
2059 
2060         /* If there's no change to the priority, just return current setting */
2061         if (incr == 0) {
2062                 *retvalp = tspp->ts_upri;
2063                 return (0);
2064         }
2065 
2066         newpri = tspp->ts_upri + incr;
2067         if (newpri > ts_maxupri || newpri < -ts_maxupri)
2068                 return (EINVAL);
2069 
2070         *retvalp = newpri;
2071         tsparms.ts_uprilim = tsparms.ts_upri = newpri;
2072         /*
2073          * Reset the uprilim and upri values of the thread.
2074          * Call ts_parmsset even if thread is interactive since we're
2075          * not changing mode.
2076          */
2077         return (ts_parmsset(t, &tsparms, 0, cr));
2078 }
2079 
2080 /*
2081  * ia_set_process_group marks foreground processes as interactive
2082  * and background processes as non-interactive iff the session
2083  * leader is interactive.  This routine is called from two places:
2084  *      strioctl:SPGRP when a new process group gets
2085  *              control of the tty.
2086  *      ia_parmsset-when the process in question is a session leader.
2087  * ia_set_process_group assumes that pidlock is held by the caller,
2088  * either strioctl or priocntlsys.  If the caller is priocntlsys
2089  * (via ia_parmsset) then the p_lock of the session leader is held
2090  * and the code needs to be careful about acquiring other p_locks.
2091  */
2092 static void
2093 ia_set_process_group(pid_t sid, pid_t bg_pgid, pid_t fg_pgid)
2094 {
2095         proc_t          *leader, *fg, *bg;
2096         tsproc_t        *tspp;
2097         kthread_t       *tx;
2098         int             plocked = 0;
2099 
2100         ASSERT(MUTEX_HELD(&pidlock));
2101 
2102         /*
2103          * see if the session leader is interactive AND
2104          * if it is currently "on" AND controlling a tty
2105          * iff it is then make the processes in the foreground
2106          * group interactive and the processes in the background
2107          * group non-interactive.
2108          */
2109         if ((leader = (proc_t *)prfind(sid)) == NULL) {
2110                 return;
2111         }
2112         if (leader->p_stat == SIDL) {
2113                 return;
2114         }
2115         if ((tx = proctot(leader)) == NULL) {
2116                 return;
2117         }
2118         /*
2119          * XXX do all the threads in the leader
2120          */
2121         if (tx->t_cid != ia_cid) {
2122                 return;
2123         }
2124         tspp = tx->t_cldata;
2125         /*
2126          * session leaders that are not interactive need not have
2127          * any processing done for them.  They are typically shells
2128          * that do not have focus and are changing the process group
2129          * attatched to the tty, e.g. a process that is exiting
2130          */
2131         mutex_enter(&leader->p_sessp->s_lock);
2132         if (!(tspp->ts_flags & TSIASET) ||
2133             (leader->p_sessp->s_vp == NULL) ||
2134             (leader->p_sessp->s_vp->v_stream == NULL)) {
2135                 mutex_exit(&leader->p_sessp->s_lock);
2136                 return;
2137         }
2138         mutex_exit(&leader->p_sessp->s_lock);
2139 
2140         /*
2141          * If we're already holding the leader's p_lock, we should use
2142          * mutex_tryenter instead of mutex_enter to avoid deadlocks from
2143          * lock ordering violations.
2144          */
2145         if (mutex_owned(&leader->p_lock))
2146                 plocked = 1;
2147 
2148         if (fg_pgid == 0)
2149                 goto skip;
2150         /*
2151          * now look for all processes in the foreground group and
2152          * make them interactive
2153          */
2154         for (fg = (proc_t *)pgfind(fg_pgid); fg != NULL; fg = fg->p_pglink) {
2155                 /*
2156                  * if the process is SIDL it's begin forked, ignore it
2157                  */
2158                 if (fg->p_stat == SIDL) {
2159                         continue;
2160                 }
2161                 /*
2162                  * sesssion leaders must be turned on/off explicitly
2163                  * not implicitly as happens to other members of
2164                  * the process group.
2165                  */
2166                 if (fg->p_pid  == fg->p_sessp->s_sid) {
2167                         continue;
2168                 }
2169 
2170                 TRACE_1(TR_FAC_IA, TR_GROUP_ON,
2171                     "group on:proc %p", fg);
2172 
2173                 if (plocked) {
2174                         if (mutex_tryenter(&fg->p_lock) == 0)
2175                                 continue;
2176                 } else {
2177                         mutex_enter(&fg->p_lock);
2178                 }
2179 
2180                 if ((tx = proctot(fg)) == NULL) {
2181                         mutex_exit(&fg->p_lock);
2182                         continue;
2183                 }
2184                 do {
2185                         thread_lock(tx);
2186                         /*
2187                          * if this thread is not interactive continue
2188                          */
2189                         if (tx->t_cid != ia_cid) {
2190                                 thread_unlock(tx);
2191                                 continue;
2192                         }
2193                         tspp = tx->t_cldata;
2194                         tspp->ts_flags |= TSIASET;
2195                         tspp->ts_boost = ia_boost;
2196                         TS_NEWUMDPRI(tspp);
2197                         tspp->ts_dispwait = 0;
2198                         ts_change_priority(tx, tspp);
2199                         thread_unlock(tx);
2200                 } while ((tx = tx->t_forw) != fg->p_tlist);
2201                 mutex_exit(&fg->p_lock);
2202         }
2203 skip:
2204         if (bg_pgid == 0)
2205                 return;
2206         for (bg = (proc_t *)pgfind(bg_pgid); bg != NULL; bg = bg->p_pglink) {
2207                 if (bg->p_stat == SIDL) {
2208                         continue;
2209                 }
2210                 /*
2211                  * sesssion leaders must be turned off explicitly
2212                  * not implicitly as happens to other members of
2213                  * the process group.
2214                  */
2215                 if (bg->p_pid == bg->p_sessp->s_sid) {
2216                         continue;
2217                 }
2218 
2219                 TRACE_1(TR_FAC_IA, TR_GROUP_OFF,
2220                     "group off:proc %p", bg);
2221 
2222                 if (plocked) {
2223                         if (mutex_tryenter(&bg->p_lock) == 0)
2224                                 continue;
2225                 } else {
2226                         mutex_enter(&bg->p_lock);
2227                 }
2228 
2229                 if ((tx = proctot(bg)) == NULL) {
2230                         mutex_exit(&bg->p_lock);
2231                         continue;
2232                 }
2233                 do {
2234                         thread_lock(tx);
2235                         /*
2236                          * if this thread is not interactive continue
2237                          */
2238                         if (tx->t_cid != ia_cid) {
2239                                 thread_unlock(tx);
2240                                 continue;
2241                         }
2242                         tspp = tx->t_cldata;
2243                         tspp->ts_flags &= ~TSIASET;
2244                         tspp->ts_boost = -ia_boost;
2245                         TS_NEWUMDPRI(tspp);
2246 
2247                         tspp->ts_dispwait = 0;
2248                         ts_change_priority(tx, tspp);
2249                         thread_unlock(tx);
2250                 } while ((tx = tx->t_forw) != bg->p_tlist);
2251                 mutex_exit(&bg->p_lock);
2252         }
2253 }
2254 
2255 
2256 static void
2257 ts_change_priority(kthread_t *t, tsproc_t *tspp)
2258 {
2259         pri_t   new_pri;
2260 
2261         ASSERT(THREAD_LOCK_HELD(t));
2262         new_pri = ts_dptbl[tspp->ts_umdpri].ts_globpri;
2263         ASSERT(new_pri >= 0 && new_pri <= ts_maxglobpri);
2264         tspp->ts_flags &= ~TSRESTORE;
2265         t->t_cpri = tspp->ts_upri;
2266         if (t == curthread || t->t_state == TS_ONPROC) {
2267                 /* curthread is always onproc */
2268                 cpu_t   *cp = t->t_disp_queue->disp_cpu;
2269                 THREAD_CHANGE_PRI(t, new_pri);
2270                 if (t == cp->cpu_dispthread)
2271                         cp->cpu_dispatch_pri = DISP_PRIO(t);
2272                 if (DISP_MUST_SURRENDER(t)) {
2273                         tspp->ts_flags |= TSBACKQ;
2274                         cpu_surrender(t);
2275                 } else {
2276                         tspp->ts_timeleft =
2277                             ts_dptbl[tspp->ts_cpupri].ts_quantum;
2278                 }
2279         } else {
2280                 int     frontq;
2281 
2282                 frontq = (tspp->ts_flags & TSIASET) != 0;
2283                 /*
2284                  * When the priority of a thread is changed,
2285                  * it may be necessary to adjust its position
2286                  * on a sleep queue or dispatch queue.
2287                  * The function thread_change_pri accomplishes
2288                  * this.
2289                  */
2290                 if (thread_change_pri(t, new_pri, frontq)) {
2291                         /*
2292                          * The thread was on a run queue. Reset
2293                          * its CPU timeleft from the quantum
2294                          * associated with the new priority.
2295                          */
2296                         tspp->ts_timeleft =
2297                             ts_dptbl[tspp->ts_cpupri].ts_quantum;
2298                 } else {
2299                         tspp->ts_flags |= TSBACKQ;
2300                 }
2301         }
2302 }
2303 
2304 static int
2305 ts_alloc(void **p, int flag)
2306 {
2307         void *bufp;
2308         bufp = kmem_alloc(sizeof (tsproc_t), flag);
2309         if (bufp == NULL) {
2310                 return (ENOMEM);
2311         } else {
2312                 *p = bufp;
2313                 return (0);
2314         }
2315 }
2316 
2317 static void
2318 ts_free(void *bufp)
2319 {
2320         if (bufp)
2321                 kmem_free(bufp, sizeof (tsproc_t));
2322 }