1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright 2013 Joyent, Inc.  All rights reserved.
  26  */
  27 
  28 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  29 /*        All Rights Reserved   */
  30 
  31 #include <sys/types.h>
  32 #include <sys/param.h>
  33 #include <sys/sysmacros.h>
  34 #include <sys/cred.h>
  35 #include <sys/proc.h>
  36 #include <sys/pcb.h>
  37 #include <sys/signal.h>
  38 #include <sys/user.h>
  39 #include <sys/priocntl.h>
  40 #include <sys/class.h>
  41 #include <sys/disp.h>
  42 #include <sys/procset.h>
  43 #include <sys/cmn_err.h>
  44 #include <sys/debug.h>
  45 #include <sys/rt.h>
  46 #include <sys/rtpriocntl.h>
  47 #include <sys/kmem.h>
  48 #include <sys/systm.h>
  49 #include <sys/schedctl.h>
  50 #include <sys/errno.h>
  51 #include <sys/cpuvar.h>
  52 #include <sys/vmsystm.h>
  53 #include <sys/time.h>
  54 #include <sys/policy.h>
  55 #include <sys/sdt.h>
  56 #include <sys/cpupart.h>
  57 #include <sys/modctl.h>
  58 
  59 static pri_t    rt_init(id_t, int, classfuncs_t **);
  60 
  61 static struct sclass csw = {
  62         "RT",
  63         rt_init,
  64         0
  65 };
  66 
  67 static struct modlsched modlsched = {
  68         &mod_schedops, "realtime scheduling class", &csw
  69 };
  70 
  71 static struct modlinkage modlinkage = {
  72         MODREV_1, { (void *)&modlsched, NULL }
  73 };
  74 
  75 int
  76 _init()
  77 {
  78         return (mod_install(&modlinkage));
  79 }
  80 
  81 int
  82 _fini()
  83 {
  84         return (EBUSY);         /* don't remove RT for now */
  85 }
  86 
  87 int
  88 _info(struct modinfo *modinfop)
  89 {
  90         return (mod_info(&modlinkage, modinfop));
  91 }
  92 
  93 
  94 /*
  95  * Class specific code for the real-time class
  96  */
  97 
  98 /*
  99  * Extern declarations for variables defined in the rt master file
 100  */
 101 #define RTMAXPRI 59
 102 
 103 pri_t rt_maxpri = RTMAXPRI;     /* maximum real-time priority */
 104 rtdpent_t *rt_dptbl;      /* real-time dispatcher parameter table */
 105 
 106 /*
 107  * control flags (kparms->rt_cflags).
 108  */
 109 #define RT_DOPRI        0x01    /* change priority */
 110 #define RT_DOTQ         0x02    /* change RT time quantum */
 111 #define RT_DOSIG        0x04    /* change RT time quantum signal */
 112 
 113 static int      rt_admin(caddr_t, cred_t *);
 114 static int      rt_enterclass(kthread_t *, id_t, void *, cred_t *, void *);
 115 static int      rt_fork(kthread_t *, kthread_t *, void *);
 116 static int      rt_getclinfo(void *);
 117 static int      rt_getclpri(pcpri_t *);
 118 static int      rt_parmsin(void *);
 119 static int      rt_parmsout(void *, pc_vaparms_t *);
 120 static int      rt_vaparmsin(void *, pc_vaparms_t *);
 121 static int      rt_vaparmsout(void *, pc_vaparms_t *);
 122 static int      rt_parmsset(kthread_t *, void *, id_t, cred_t *);
 123 static int      rt_donice(kthread_t *, cred_t *, int, int *);
 124 static int      rt_doprio(kthread_t *, cred_t *, int, int *);
 125 static void     rt_exitclass(void *);
 126 static int      rt_canexit(kthread_t *, cred_t *);
 127 static void     rt_forkret(kthread_t *, kthread_t *);
 128 static void     rt_nullsys();
 129 static void     rt_parmsget(kthread_t *, void *);
 130 static void     rt_preempt(kthread_t *);
 131 static void     rt_setrun(kthread_t *);
 132 static void     rt_tick(kthread_t *);
 133 static void     rt_wakeup(kthread_t *);
 134 static pri_t    rt_swapin(kthread_t *, int);
 135 static pri_t    rt_swapout(kthread_t *, int);
 136 static pri_t    rt_globpri(kthread_t *);
 137 static void     rt_yield(kthread_t *);
 138 static int      rt_alloc(void **, int);
 139 static void     rt_free(void *);
 140 
 141 static void     rt_change_priority(kthread_t *, rtproc_t *);
 142 
 143 static id_t     rt_cid;         /* real-time class ID */
 144 static rtproc_t rt_plisthead;   /* dummy rtproc at head of rtproc list */
 145 static kmutex_t rt_dptblock;    /* protects realtime dispatch table */
 146 static kmutex_t rt_list_lock;   /* protects RT thread list */
 147 
 148 extern rtdpent_t *rt_getdptbl(void);
 149 
 150 static struct classfuncs rt_classfuncs = {
 151         /* class ops */
 152         {   rt_admin,
 153             rt_getclinfo,
 154             rt_parmsin,
 155             rt_parmsout,
 156             rt_vaparmsin,
 157             rt_vaparmsout,
 158             rt_getclpri,
 159             rt_alloc,
 160             rt_free },
 161         /* thread ops */
 162         {   rt_enterclass,
 163             rt_exitclass,
 164             rt_canexit,
 165             rt_fork,
 166             rt_forkret,
 167             rt_parmsget,
 168             rt_parmsset,
 169             rt_nullsys, /* stop */
 170             rt_nullsys, /* exit */
 171             rt_nullsys, /* active */
 172             rt_nullsys, /* inactive */
 173             rt_swapin,
 174             rt_swapout,
 175             rt_nullsys, /* trapret */
 176             rt_preempt,
 177             rt_setrun,
 178             rt_nullsys, /* sleep */
 179             rt_tick,
 180             rt_wakeup,
 181             rt_donice,
 182             rt_globpri,
 183             rt_nullsys, /* set_process_group */
 184             rt_yield,
 185             rt_doprio },
 186 };
 187 
 188 /*
 189  * Real-time class initialization. Called by dispinit() at boot time.
 190  * We can ignore the clparmsz argument since we know that the smallest
 191  * possible parameter buffer is big enough for us.
 192  */
 193 /* ARGSUSED */
 194 pri_t
 195 rt_init(id_t cid, int clparmsz, classfuncs_t **clfuncspp)
 196 {
 197         rt_dptbl = rt_getdptbl();
 198         rt_cid = cid;   /* Record our class ID */
 199 
 200         /*
 201          * Initialize the rtproc list.
 202          */
 203         rt_plisthead.rt_next = rt_plisthead.rt_prev = &rt_plisthead;
 204 
 205         /*
 206          * We're required to return a pointer to our classfuncs
 207          * structure and the highest global priority value we use.
 208          */
 209         *clfuncspp = &rt_classfuncs;
 210         mutex_init(&rt_dptblock, NULL, MUTEX_DEFAULT, NULL);
 211         mutex_init(&rt_list_lock, NULL, MUTEX_DEFAULT, NULL);
 212         return (rt_dptbl[rt_maxpri].rt_globpri);
 213 }
 214 
 215 /*
 216  * Get or reset the rt_dptbl values per the user's request.
 217  */
 218 /* ARGSUSED */
 219 static int
 220 rt_admin(caddr_t uaddr, cred_t *reqpcredp)
 221 {
 222         rtadmin_t       rtadmin;
 223         rtdpent_t       *tmpdpp;
 224         size_t          userdpsz;
 225         size_t          rtdpsz;
 226         int             i;
 227 
 228         if (get_udatamodel() == DATAMODEL_NATIVE) {
 229                 if (copyin(uaddr, &rtadmin, sizeof (rtadmin_t)))
 230                         return (EFAULT);
 231         }
 232 #ifdef _SYSCALL32_IMPL
 233         else {
 234                 /* rtadmin struct from ILP32 callers */
 235                 rtadmin32_t rtadmin32;
 236                 if (copyin(uaddr, &rtadmin32, sizeof (rtadmin32_t)))
 237                         return (EFAULT);
 238                 rtadmin.rt_dpents =
 239                     (struct rtdpent *)(uintptr_t)rtadmin32.rt_dpents;
 240                 rtadmin.rt_ndpents = rtadmin32.rt_ndpents;
 241                 rtadmin.rt_cmd = rtadmin32.rt_cmd;
 242         }
 243 #endif /* _SYSCALL32_IMPL */
 244 
 245         rtdpsz = (rt_maxpri + 1) * sizeof (rtdpent_t);
 246 
 247         switch (rtadmin.rt_cmd) {
 248 
 249         case RT_GETDPSIZE:
 250                 rtadmin.rt_ndpents = rt_maxpri + 1;
 251 
 252                 if (get_udatamodel() == DATAMODEL_NATIVE) {
 253                         if (copyout(&rtadmin, uaddr, sizeof (rtadmin_t)))
 254                                 return (EFAULT);
 255                 }
 256 #ifdef _SYSCALL32_IMPL
 257                 else {
 258                         /* return rtadmin struct to ILP32 callers */
 259                         rtadmin32_t rtadmin32;
 260                         rtadmin32.rt_dpents =
 261                             (caddr32_t)(uintptr_t)rtadmin.rt_dpents;
 262                         rtadmin32.rt_ndpents = rtadmin.rt_ndpents;
 263                         rtadmin32.rt_cmd = rtadmin.rt_cmd;
 264                         if (copyout(&rtadmin32, uaddr, sizeof (rtadmin32_t)))
 265                                 return (EFAULT);
 266                 }
 267 #endif /* _SYSCALL32_IMPL */
 268 
 269                 break;
 270 
 271         case RT_GETDPTBL:
 272                 userdpsz = MIN(rtadmin.rt_ndpents * sizeof (rtdpent_t),
 273                     rtdpsz);
 274                 if (copyout(rt_dptbl, rtadmin.rt_dpents, userdpsz))
 275                         return (EFAULT);
 276                 rtadmin.rt_ndpents = userdpsz / sizeof (rtdpent_t);
 277 
 278                 if (get_udatamodel() == DATAMODEL_NATIVE) {
 279                         if (copyout(&rtadmin, uaddr, sizeof (rtadmin_t)))
 280                                 return (EFAULT);
 281                 }
 282 #ifdef _SYSCALL32_IMPL
 283                 else {
 284                         /* return rtadmin struct to ILP32 callers */
 285                         rtadmin32_t rtadmin32;
 286                         rtadmin32.rt_dpents =
 287                             (caddr32_t)(uintptr_t)rtadmin.rt_dpents;
 288                         rtadmin32.rt_ndpents = rtadmin.rt_ndpents;
 289                         rtadmin32.rt_cmd = rtadmin.rt_cmd;
 290                         if (copyout(&rtadmin32, uaddr, sizeof (rtadmin32_t)))
 291                                 return (EFAULT);
 292                 }
 293 #endif /* _SYSCALL32_IMPL */
 294                 break;
 295 
 296         case RT_SETDPTBL:
 297                 /*
 298                  * We require that the requesting process has sufficient
 299                  * priveleges.  We also require that the table supplied by
 300                  * the user exactly match the current rt_dptbl in size.
 301                  */
 302                 if (secpolicy_dispadm(reqpcredp) != 0)
 303                         return (EPERM);
 304                 if (rtadmin.rt_ndpents * sizeof (rtdpent_t) != rtdpsz)
 305                         return (EINVAL);
 306 
 307                 /*
 308                  * We read the user supplied table into a temporary buffer
 309                  * where the time quantum values are validated before
 310                  * being copied to the rt_dptbl.
 311                  */
 312                 tmpdpp = kmem_alloc(rtdpsz, KM_SLEEP);
 313                 if (copyin(rtadmin.rt_dpents, tmpdpp, rtdpsz)) {
 314                         kmem_free(tmpdpp, rtdpsz);
 315                         return (EFAULT);
 316                 }
 317                 for (i = 0; i < rtadmin.rt_ndpents; i++) {
 318 
 319                         /*
 320                          * Validate the user supplied time quantum values.
 321                          */
 322                         if (tmpdpp[i].rt_quantum <= 0 &&
 323                             tmpdpp[i].rt_quantum != RT_TQINF) {
 324                                 kmem_free(tmpdpp, rtdpsz);
 325                                 return (EINVAL);
 326                         }
 327                 }
 328 
 329                 /*
 330                  * Copy the user supplied values over the current rt_dptbl
 331                  * values.  The rt_globpri member is read-only so we don't
 332                  * overwrite it.
 333                  */
 334                 mutex_enter(&rt_dptblock);
 335                 for (i = 0; i < rtadmin.rt_ndpents; i++)
 336                         rt_dptbl[i].rt_quantum = tmpdpp[i].rt_quantum;
 337                 mutex_exit(&rt_dptblock);
 338                 kmem_free(tmpdpp, rtdpsz);
 339                 break;
 340 
 341         default:
 342                 return (EINVAL);
 343         }
 344         return (0);
 345 }
 346 
 347 
 348 /*
 349  * Allocate a real-time class specific proc structure and
 350  * initialize it with the parameters supplied. Also move thread
 351  * to specified real-time priority.
 352  */
 353 /* ARGSUSED */
 354 static int
 355 rt_enterclass(kthread_t *t, id_t cid, void *parmsp, cred_t *reqpcredp,
 356     void *bufp)
 357 {
 358         rtkparms_t *rtkparmsp = (rtkparms_t *)parmsp;
 359         rtproc_t *rtpp;
 360 
 361         /*
 362          * For a thread to enter the real-time class the thread
 363          * which initiates the request must be privileged.
 364          * This may have been checked previously but if our
 365          * caller passed us a credential structure we assume it
 366          * hasn't and we check it here.
 367          */
 368         if (reqpcredp != NULL && secpolicy_setpriority(reqpcredp) != 0)
 369                 return (EPERM);
 370 
 371         rtpp = (rtproc_t *)bufp;
 372         ASSERT(rtpp != NULL);
 373 
 374         /*
 375          * If this thread's lwp is swapped out, it will be brought in
 376          * when it is put onto the runqueue.
 377          *
 378          * Now, Initialize the rtproc structure.
 379          */
 380         if (rtkparmsp == NULL) {
 381                 /*
 382                  * Use default values
 383                  */
 384                 rtpp->rt_pri = 0;
 385                 rtpp->rt_pquantum = rt_dptbl[0].rt_quantum;
 386                 rtpp->rt_tqsignal = 0;
 387         } else {
 388                 /*
 389                  * Use supplied values
 390                  */
 391                 if ((rtkparmsp->rt_cflags & RT_DOPRI) == 0)
 392                         rtpp->rt_pri = 0;
 393                 else
 394                         rtpp->rt_pri = rtkparmsp->rt_pri;
 395 
 396                 if (rtkparmsp->rt_tqntm == RT_TQINF)
 397                         rtpp->rt_pquantum = RT_TQINF;
 398                 else if (rtkparmsp->rt_tqntm == RT_TQDEF ||
 399                     (rtkparmsp->rt_cflags & RT_DOTQ) == 0)
 400                         rtpp->rt_pquantum = rt_dptbl[rtpp->rt_pri].rt_quantum;
 401                 else
 402                         rtpp->rt_pquantum = rtkparmsp->rt_tqntm;
 403 
 404                 if ((rtkparmsp->rt_cflags & RT_DOSIG) == 0)
 405                         rtpp->rt_tqsignal = 0;
 406                 else
 407                         rtpp->rt_tqsignal = rtkparmsp->rt_tqsig;
 408         }
 409         rtpp->rt_flags = 0;
 410         rtpp->rt_tp = t;
 411         /*
 412          * Reset thread priority
 413          */
 414         thread_lock(t);
 415         t->t_clfuncs = &(sclass[cid].cl_funcs->thread);
 416         t->t_cid = cid;
 417         t->t_cldata = (void *)rtpp;
 418         t->t_schedflag &= ~TS_RUNQMATCH;
 419         rt_change_priority(t, rtpp);
 420         thread_unlock(t);
 421         /*
 422          * Link new structure into rtproc list
 423          */
 424         mutex_enter(&rt_list_lock);
 425         rtpp->rt_next = rt_plisthead.rt_next;
 426         rtpp->rt_prev = &rt_plisthead;
 427         rt_plisthead.rt_next->rt_prev = rtpp;
 428         rt_plisthead.rt_next = rtpp;
 429         mutex_exit(&rt_list_lock);
 430         return (0);
 431 }
 432 
 433 
 434 /*
 435  * Free rtproc structure of thread.
 436  */
 437 static void
 438 rt_exitclass(void *procp)
 439 {
 440         rtproc_t *rtprocp = (rtproc_t *)procp;
 441 
 442         mutex_enter(&rt_list_lock);
 443         rtprocp->rt_prev->rt_next = rtprocp->rt_next;
 444         rtprocp->rt_next->rt_prev = rtprocp->rt_prev;
 445         mutex_exit(&rt_list_lock);
 446         kmem_free(rtprocp, sizeof (rtproc_t));
 447 }
 448 
 449 
 450 /*
 451  * Allocate and initialize real-time class specific
 452  * proc structure for child.
 453  */
 454 /* ARGSUSED */
 455 static int
 456 rt_fork(kthread_t *t, kthread_t *ct, void *bufp)
 457 {
 458         rtproc_t *prtpp;
 459         rtproc_t *crtpp;
 460 
 461         ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
 462 
 463         /*
 464          * Initialize child's rtproc structure
 465          */
 466         crtpp = (rtproc_t *)bufp;
 467         ASSERT(crtpp != NULL);
 468         prtpp = (rtproc_t *)t->t_cldata;
 469         thread_lock(t);
 470         crtpp->rt_timeleft = crtpp->rt_pquantum = prtpp->rt_pquantum;
 471         crtpp->rt_pri = prtpp->rt_pri;
 472         crtpp->rt_flags = prtpp->rt_flags & ~RTBACKQ;
 473         crtpp->rt_tqsignal = prtpp->rt_tqsignal;
 474 
 475         crtpp->rt_tp = ct;
 476         thread_unlock(t);
 477 
 478         /*
 479          * Link new structure into rtproc list
 480          */
 481         ct->t_cldata = (void *)crtpp;
 482         mutex_enter(&rt_list_lock);
 483         crtpp->rt_next = rt_plisthead.rt_next;
 484         crtpp->rt_prev = &rt_plisthead;
 485         rt_plisthead.rt_next->rt_prev = crtpp;
 486         rt_plisthead.rt_next = crtpp;
 487         mutex_exit(&rt_list_lock);
 488         return (0);
 489 }
 490 
 491 
 492 /*
 493  * The child goes to the back of its dispatcher queue while the
 494  * parent continues to run after a real time thread forks.
 495  */
 496 /* ARGSUSED */
 497 static void
 498 rt_forkret(kthread_t *t, kthread_t *ct)
 499 {
 500         proc_t *pp = ttoproc(t);
 501         proc_t *cp = ttoproc(ct);
 502 
 503         ASSERT(t == curthread);
 504         ASSERT(MUTEX_HELD(&pidlock));
 505 
 506         /*
 507          * Grab the child's p_lock before dropping pidlock to ensure
 508          * the process does not disappear before we set it running.
 509          */
 510         mutex_enter(&cp->p_lock);
 511         mutex_exit(&pidlock);
 512         continuelwps(cp);
 513         mutex_exit(&cp->p_lock);
 514 
 515         mutex_enter(&pp->p_lock);
 516         continuelwps(pp);
 517         mutex_exit(&pp->p_lock);
 518 }
 519 
 520 
 521 /*
 522  * Get information about the real-time class into the buffer
 523  * pointed to by rtinfop.  The maximum configured real-time
 524  * priority is the only information we supply.  We ignore the
 525  * class and credential arguments because anyone can have this
 526  * information.
 527  */
 528 /* ARGSUSED */
 529 static int
 530 rt_getclinfo(void *infop)
 531 {
 532         rtinfo_t *rtinfop = (rtinfo_t *)infop;
 533         rtinfop->rt_maxpri = rt_maxpri;
 534         return (0);
 535 }
 536 
 537 /*
 538  * Return the user mode scheduling priority range.
 539  */
 540 static int
 541 rt_getclpri(pcpri_t *pcprip)
 542 {
 543         pcprip->pc_clpmax = rt_maxpri;
 544         pcprip->pc_clpmin = 0;
 545         return (0);
 546 }
 547 
 548 static void
 549 rt_nullsys()
 550 {
 551 }
 552 
 553 /* ARGSUSED */
 554 static int
 555 rt_canexit(kthread_t *t, cred_t *cred)
 556 {
 557         /*
 558          * Thread can always leave RT class
 559          */
 560         return (0);
 561 }
 562 
 563 /*
 564  * Get the real-time scheduling parameters of the thread pointed to by
 565  * rtprocp into the buffer pointed to by rtkparmsp.
 566  */
 567 static void
 568 rt_parmsget(kthread_t *t, void *parmsp)
 569 {
 570         rtproc_t        *rtprocp = (rtproc_t *)t->t_cldata;
 571         rtkparms_t      *rtkparmsp = (rtkparms_t *)parmsp;
 572 
 573         rtkparmsp->rt_pri = rtprocp->rt_pri;
 574         rtkparmsp->rt_tqntm = rtprocp->rt_pquantum;
 575         rtkparmsp->rt_tqsig = rtprocp->rt_tqsignal;
 576 }
 577 
 578 
 579 
 580 /*
 581  * Check the validity of the real-time parameters in the buffer
 582  * pointed to by rtprmsp.
 583  * We convert the rtparms buffer from the user supplied format to
 584  * our internal format (i.e. time quantum expressed in ticks).
 585  */
 586 static int
 587 rt_parmsin(void *prmsp)
 588 {
 589         rtparms_t *rtprmsp = (rtparms_t *)prmsp;
 590         longlong_t      ticks;
 591         uint_t          cflags;
 592 
 593         /*
 594          * First check the validity of parameters and convert
 595          * the buffer to kernel format.
 596          */
 597         if ((rtprmsp->rt_pri < 0 || rtprmsp->rt_pri > rt_maxpri) &&
 598             rtprmsp->rt_pri != RT_NOCHANGE)
 599                 return (EINVAL);
 600 
 601         cflags = (rtprmsp->rt_pri != RT_NOCHANGE ? RT_DOPRI : 0);
 602 
 603         if ((rtprmsp->rt_tqsecs == 0 && rtprmsp->rt_tqnsecs == 0) ||
 604             rtprmsp->rt_tqnsecs >= NANOSEC)
 605                 return (EINVAL);
 606 
 607         if (rtprmsp->rt_tqnsecs != RT_NOCHANGE)
 608                 cflags |= RT_DOTQ;
 609 
 610         if (rtprmsp->rt_tqnsecs >= 0) {
 611                 if ((ticks = SEC_TO_TICK((longlong_t)rtprmsp->rt_tqsecs) +
 612                     NSEC_TO_TICK_ROUNDUP(rtprmsp->rt_tqnsecs)) > INT_MAX)
 613                         return (ERANGE);
 614 
 615                 ((rtkparms_t *)rtprmsp)->rt_tqntm = (int)ticks;
 616         } else {
 617                 if (rtprmsp->rt_tqnsecs != RT_NOCHANGE &&
 618                     rtprmsp->rt_tqnsecs != RT_TQINF &&
 619                     rtprmsp->rt_tqnsecs != RT_TQDEF)
 620                         return (EINVAL);
 621 
 622                 ((rtkparms_t *)rtprmsp)->rt_tqntm = rtprmsp->rt_tqnsecs;
 623         }
 624         ((rtkparms_t *)rtprmsp)->rt_cflags = cflags;
 625 
 626         return (0);
 627 }
 628 
 629 
 630 /*
 631  * Check the validity of the real-time parameters in the pc_vaparms_t
 632  * structure vaparmsp and put them in the buffer pointed to by rtprmsp.
 633  * pc_vaparms_t contains (key, value) pairs of parameter.
 634  * rt_vaparmsin() is the variable parameter version of rt_parmsin().
 635  */
 636 static int
 637 rt_vaparmsin(void *prmsp, pc_vaparms_t *vaparmsp)
 638 {
 639         uint_t          secs = 0;
 640         uint_t          cnt;
 641         int             nsecs = 0;
 642         int             priflag, secflag, nsecflag, sigflag;
 643         longlong_t      ticks;
 644         rtkparms_t      *rtprmsp = (rtkparms_t *)prmsp;
 645         pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
 646 
 647 
 648         /*
 649          * First check the validity of parameters and convert them
 650          * from the user supplied format to the internal format.
 651          */
 652         priflag = secflag = nsecflag = sigflag = 0;
 653         rtprmsp->rt_cflags = 0;
 654 
 655         if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
 656                 return (EINVAL);
 657 
 658         for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
 659 
 660                 switch (vpp->pc_key) {
 661                 case RT_KY_PRI:
 662                         if (priflag++)
 663                                 return (EINVAL);
 664                         rtprmsp->rt_cflags |= RT_DOPRI;
 665                         rtprmsp->rt_pri = (pri_t)vpp->pc_parm;
 666                         if (rtprmsp->rt_pri < 0 || rtprmsp->rt_pri > rt_maxpri)
 667                                 return (EINVAL);
 668                         break;
 669 
 670                 case RT_KY_TQSECS:
 671                         if (secflag++)
 672                                 return (EINVAL);
 673                         rtprmsp->rt_cflags |= RT_DOTQ;
 674                         secs = (uint_t)vpp->pc_parm;
 675                         break;
 676 
 677                 case RT_KY_TQNSECS:
 678                         if (nsecflag++)
 679                                 return (EINVAL);
 680                         rtprmsp->rt_cflags |= RT_DOTQ;
 681                         nsecs = (int)vpp->pc_parm;
 682                         break;
 683 
 684                 case RT_KY_TQSIG:
 685                         if (sigflag++)
 686                                 return (EINVAL);
 687                         rtprmsp->rt_cflags |= RT_DOSIG;
 688                         rtprmsp->rt_tqsig = (int)vpp->pc_parm;
 689                         if (rtprmsp->rt_tqsig < 0 || rtprmsp->rt_tqsig >= NSIG)
 690                                 return (EINVAL);
 691                         break;
 692 
 693                 default:
 694                         return (EINVAL);
 695                 }
 696         }
 697 
 698         if (vaparmsp->pc_vaparmscnt == 0) {
 699                 /*
 700                  * Use default parameters.
 701                  */
 702                 rtprmsp->rt_pri = 0;
 703                 rtprmsp->rt_tqntm = RT_TQDEF;
 704                 rtprmsp->rt_tqsig = 0;
 705                 rtprmsp->rt_cflags = RT_DOPRI | RT_DOTQ | RT_DOSIG;
 706         } else if ((rtprmsp->rt_cflags & RT_DOTQ) != 0) {
 707                 if ((secs == 0 && nsecs == 0) || nsecs >= NANOSEC)
 708                         return (EINVAL);
 709 
 710                 if (nsecs >= 0) {
 711                         if ((ticks = SEC_TO_TICK((longlong_t)secs) +
 712                             NSEC_TO_TICK_ROUNDUP(nsecs)) > INT_MAX)
 713                                 return (ERANGE);
 714 
 715                         rtprmsp->rt_tqntm = (int)ticks;
 716                 } else {
 717                         if (nsecs != RT_TQINF && nsecs != RT_TQDEF)
 718                                 return (EINVAL);
 719                         rtprmsp->rt_tqntm = nsecs;
 720                 }
 721         }
 722 
 723         return (0);
 724 }
 725 
 726 /*
 727  * Do required processing on the real-time parameter buffer
 728  * before it is copied out to the user.
 729  * All we have to do is convert the buffer from kernel to user format
 730  * (i.e. convert time quantum from ticks to seconds-nanoseconds).
 731  */
 732 /* ARGSUSED */
 733 static int
 734 rt_parmsout(void *prmsp, pc_vaparms_t *vaparmsp)
 735 {
 736         rtkparms_t      *rtkprmsp = (rtkparms_t *)prmsp;
 737 
 738         if (vaparmsp != NULL)
 739                 return (0);
 740 
 741         if (rtkprmsp->rt_tqntm < 0) {
 742                 /*
 743                  * Quantum field set to special value (e.g. RT_TQINF)
 744                  */
 745                 ((rtparms_t *)rtkprmsp)->rt_tqnsecs = rtkprmsp->rt_tqntm;
 746                 ((rtparms_t *)rtkprmsp)->rt_tqsecs = 0;
 747         } else {
 748                 /* Convert quantum from ticks to seconds-nanoseconds */
 749 
 750                 timestruc_t ts;
 751                 TICK_TO_TIMESTRUC(rtkprmsp->rt_tqntm, &ts);
 752                 ((rtparms_t *)rtkprmsp)->rt_tqsecs = ts.tv_sec;
 753                 ((rtparms_t *)rtkprmsp)->rt_tqnsecs = ts.tv_nsec;
 754         }
 755 
 756         return (0);
 757 }
 758 
 759 
 760 /*
 761  * Copy all selected real-time class parameters to the user.
 762  * The parameters are specified by a key.
 763  */
 764 static int
 765 rt_vaparmsout(void *prmsp, pc_vaparms_t *vaparmsp)
 766 {
 767         rtkparms_t      *rtkprmsp = (rtkparms_t *)prmsp;
 768         timestruc_t     ts;
 769         uint_t          cnt;
 770         uint_t          secs;
 771         int             nsecs;
 772         int             priflag, secflag, nsecflag, sigflag;
 773         pc_vaparm_t     *vpp = &vaparmsp->pc_parms[0];
 774 
 775         ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
 776 
 777         priflag = secflag = nsecflag = sigflag = 0;
 778 
 779         if (vaparmsp->pc_vaparmscnt > PC_VAPARMCNT)
 780                 return (EINVAL);
 781 
 782         if (rtkprmsp->rt_tqntm < 0) {
 783                 /*
 784                  * Quantum field set to special value (e.g. RT_TQINF).
 785                  */
 786                 secs = 0;
 787                 nsecs = rtkprmsp->rt_tqntm;
 788         } else {
 789                 /*
 790                  * Convert quantum from ticks to seconds-nanoseconds.
 791                  */
 792                 TICK_TO_TIMESTRUC(rtkprmsp->rt_tqntm, &ts);
 793                 secs = ts.tv_sec;
 794                 nsecs = ts.tv_nsec;
 795         }
 796 
 797 
 798         for (cnt = 0; cnt < vaparmsp->pc_vaparmscnt; cnt++, vpp++) {
 799 
 800                 switch (vpp->pc_key) {
 801                 case RT_KY_PRI:
 802                         if (priflag++)
 803                                 return (EINVAL);
 804                         if (copyout(&rtkprmsp->rt_pri,
 805                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (pri_t)))
 806                                 return (EFAULT);
 807                         break;
 808 
 809                 case RT_KY_TQSECS:
 810                         if (secflag++)
 811                                 return (EINVAL);
 812                         if (copyout(&secs, (caddr_t)(uintptr_t)vpp->pc_parm,
 813                             sizeof (uint_t)))
 814                                 return (EFAULT);
 815                         break;
 816 
 817                 case RT_KY_TQNSECS:
 818                         if (nsecflag++)
 819                                 return (EINVAL);
 820                         if (copyout(&nsecs, (caddr_t)(uintptr_t)vpp->pc_parm,
 821                             sizeof (int)))
 822                                 return (EFAULT);
 823                         break;
 824 
 825                 case RT_KY_TQSIG:
 826                         if (sigflag++)
 827                                 return (EINVAL);
 828                         if (copyout(&rtkprmsp->rt_tqsig,
 829                             (caddr_t)(uintptr_t)vpp->pc_parm, sizeof (int)))
 830                                 return (EFAULT);
 831                         break;
 832 
 833                 default:
 834                         return (EINVAL);
 835                 }
 836         }
 837 
 838         return (0);
 839 }
 840 
 841 
 842 /*
 843  * Set the scheduling parameters of the thread pointed to by rtprocp
 844  * to those specified in the buffer pointed to by rtkprmsp.
 845  * Note that the parameters are expected to be in kernel format
 846  * (i.e. time quantm expressed in ticks).  Real time parameters copied
 847  * in from the user should be processed by rt_parmsin() before they are
 848  * passed to this function.
 849  */
 850 static int
 851 rt_parmsset(kthread_t *tx, void *prmsp, id_t reqpcid, cred_t *reqpcredp)
 852 {
 853         rtkparms_t *rtkprmsp = (rtkparms_t *)prmsp;
 854         rtproc_t *rtpp = (rtproc_t *)tx->t_cldata;
 855 
 856         ASSERT(MUTEX_HELD(&(ttoproc(tx))->p_lock));
 857 
 858         /*
 859          * Basic permissions enforced by generic kernel code
 860          * for all classes require that a thread attempting
 861          * to change the scheduling parameters of a target thread
 862          * be privileged or have a real or effective UID
 863          * matching that of the target thread. We are not
 864          * called unless these basic permission checks have
 865          * already passed. The real-time class requires in addition
 866          * that the requesting thread be real-time unless it is privileged.
 867          * This may also have been checked previously but if our caller
 868          * passes us a credential structure we assume it hasn't and
 869          * we check it here.
 870          */
 871         if (reqpcredp != NULL && reqpcid != rt_cid &&
 872             secpolicy_raisepriority(reqpcredp) != 0)
 873                 return (EPERM);
 874 
 875         thread_lock(tx);
 876         if ((rtkprmsp->rt_cflags & RT_DOPRI) != 0) {
 877                 rtpp->rt_pri = rtkprmsp->rt_pri;
 878                 rt_change_priority(tx, rtpp);
 879         }
 880         if (rtkprmsp->rt_tqntm == RT_TQINF)
 881                 rtpp->rt_pquantum = RT_TQINF;
 882         else if (rtkprmsp->rt_tqntm == RT_TQDEF)
 883                 rtpp->rt_timeleft = rtpp->rt_pquantum =
 884                     rt_dptbl[rtpp->rt_pri].rt_quantum;
 885         else if ((rtkprmsp->rt_cflags & RT_DOTQ) != 0)
 886                 rtpp->rt_timeleft = rtpp->rt_pquantum = rtkprmsp->rt_tqntm;
 887 
 888         if ((rtkprmsp->rt_cflags & RT_DOSIG) != 0)
 889                 rtpp->rt_tqsignal = rtkprmsp->rt_tqsig;
 890 
 891         thread_unlock(tx);
 892         return (0);
 893 }
 894 
 895 
 896 /*
 897  * Arrange for thread to be placed in appropriate location
 898  * on dispatcher queue.  Runs at splhi() since the clock
 899  * interrupt can cause RTBACKQ to be set.
 900  */
 901 static void
 902 rt_preempt(kthread_t *t)
 903 {
 904         rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
 905         klwp_t *lwp;
 906 
 907         ASSERT(THREAD_LOCK_HELD(t));
 908 
 909         /*
 910          * If the state is user I allow swapping because I know I won't
 911          * be holding any locks.
 912          */
 913         if ((lwp = curthread->t_lwp) != NULL && lwp->lwp_state == LWP_USER)
 914                 t->t_schedflag &= ~TS_DONT_SWAP;
 915         if ((rtpp->rt_flags & RTBACKQ) != 0) {
 916                 rtpp->rt_timeleft = rtpp->rt_pquantum;
 917                 rtpp->rt_flags &= ~RTBACKQ;
 918                 setbackdq(t);
 919         } else
 920                 setfrontdq(t);
 921 
 922 }
 923 
 924 /*
 925  * Return the global priority associated with this rt_pri.
 926  */
 927 static pri_t
 928 rt_globpri(kthread_t *t)
 929 {
 930         rtproc_t *rtprocp = (rtproc_t *)t->t_cldata;
 931         return (rt_dptbl[rtprocp->rt_pri].rt_globpri);
 932 }
 933 
 934 static void
 935 rt_setrun(kthread_t *t)
 936 {
 937         rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
 938 
 939         ASSERT(THREAD_LOCK_HELD(t));
 940 
 941         rtpp->rt_timeleft = rtpp->rt_pquantum;
 942         rtpp->rt_flags &= ~RTBACKQ;
 943         setbackdq(t);
 944 }
 945 
 946 /*
 947  * Returns the priority of the thread, -1 if the thread is loaded or ineligible
 948  * for swapin.
 949  *
 950  * FX and RT threads are designed so that they don't swapout; however, it
 951  * is possible that while the thread is swapped out and in another class, it
 952  * can be changed to FX or RT.  Since these threads should be swapped in as
 953  * soon as they're runnable, rt_swapin returns SHRT_MAX, and fx_swapin
 954  * returns SHRT_MAX - 1, so that it gives deference to any swapped out RT
 955  * threads.
 956  */
 957 /* ARGSUSED */
 958 static pri_t
 959 rt_swapin(kthread_t *t, int flags)
 960 {
 961         pri_t   tpri = -1;
 962 
 963         ASSERT(THREAD_LOCK_HELD(t));
 964 
 965         if (t->t_state == TS_RUN && (t->t_schedflag & TS_LOAD) == 0) {
 966                 tpri = (pri_t)SHRT_MAX;
 967         }
 968 
 969         return (tpri);
 970 }
 971 
 972 /*
 973  * Return an effective priority for swapout.
 974  */
 975 /* ARGSUSED */
 976 static pri_t
 977 rt_swapout(kthread_t *t, int flags)
 978 {
 979         ASSERT(THREAD_LOCK_HELD(t));
 980 
 981         return (-1);
 982 }
 983 
 984 /*
 985  * Check for time slice expiration (unless thread has infinite time
 986  * slice).  If time slice has expired arrange for thread to be preempted
 987  * and placed on back of queue.
 988  */
 989 static void
 990 rt_tick(kthread_t *t)
 991 {
 992         rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
 993 
 994         ASSERT(MUTEX_HELD(&(ttoproc(t))->p_lock));
 995 
 996         thread_lock(t);
 997         if ((rtpp->rt_pquantum != RT_TQINF && --rtpp->rt_timeleft == 0) ||
 998             (t->t_state == TS_ONPROC && DISP_MUST_SURRENDER(t))) {
 999                 if (rtpp->rt_timeleft == 0 && rtpp->rt_tqsignal) {
1000                         thread_unlock(t);
1001                         sigtoproc(ttoproc(t), t, rtpp->rt_tqsignal);
1002                         thread_lock(t);
1003                 }
1004                 rtpp->rt_flags |= RTBACKQ;
1005                 cpu_surrender(t);
1006         }
1007         thread_unlock(t);
1008 }
1009 
1010 
1011 /*
1012  * Place the thread waking up on the dispatcher queue.
1013  */
1014 static void
1015 rt_wakeup(kthread_t *t)
1016 {
1017         rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
1018 
1019         ASSERT(THREAD_LOCK_HELD(t));
1020 
1021         rtpp->rt_timeleft = rtpp->rt_pquantum;
1022         rtpp->rt_flags &= ~RTBACKQ;
1023         setbackdq(t);
1024 }
1025 
1026 static void
1027 rt_yield(kthread_t *t)
1028 {
1029         rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
1030 
1031         ASSERT(t == curthread);
1032         ASSERT(THREAD_LOCK_HELD(t));
1033 
1034         rtpp->rt_flags &= ~RTBACKQ;
1035         setbackdq(t);
1036 }
1037 
1038 /* ARGSUSED */
1039 static int
1040 rt_donice(kthread_t *t, cred_t *cr, int incr, int *retvalp)
1041 {
1042         return (EINVAL);
1043 }
1044 
1045 /*
1046  * Increment the priority of the specified thread by incr and
1047  * return the new value in *retvalp.
1048  */
1049 static int
1050 rt_doprio(kthread_t *t, cred_t *cr, int incr, int *retvalp)
1051 {
1052         int newpri;
1053         rtproc_t *rtpp = (rtproc_t *)(t->t_cldata);
1054         rtkparms_t rtkparms;
1055 
1056         /* If there's no change to the priority, just return current setting */
1057         if (incr == 0) {
1058                 *retvalp = rtpp->rt_pri;
1059                 return (0);
1060         }
1061 
1062         newpri = rtpp->rt_pri + incr;
1063         if (newpri > rt_maxpri || newpri < 0)
1064                 return (EINVAL);
1065 
1066         *retvalp = newpri;
1067         rtkparms.rt_pri = newpri;
1068         rtkparms.rt_tqntm = RT_NOCHANGE;
1069         rtkparms.rt_tqsig = 0;
1070         rtkparms.rt_cflags = RT_DOPRI;
1071         return (rt_parmsset(t, &rtkparms, rt_cid, cr));
1072 }
1073 
1074 static int
1075 rt_alloc(void **p, int flag)
1076 {
1077         void *bufp;
1078         bufp = kmem_alloc(sizeof (rtproc_t), flag);
1079         if (bufp == NULL) {
1080                 return (ENOMEM);
1081         } else {
1082                 *p = bufp;
1083                 return (0);
1084         }
1085 }
1086 
1087 static void
1088 rt_free(void *bufp)
1089 {
1090         if (bufp)
1091                 kmem_free(bufp, sizeof (rtproc_t));
1092 }
1093 
1094 static void
1095 rt_change_priority(kthread_t *t, rtproc_t *rtpp)
1096 {
1097         pri_t new_pri;
1098 
1099         ASSERT(THREAD_LOCK_HELD(t));
1100 
1101         new_pri = rt_dptbl[rtpp->rt_pri].rt_globpri;
1102 
1103         t->t_cpri = rtpp->rt_pri;
1104         if (t == curthread || t->t_state == TS_ONPROC) {
1105                 cpu_t   *cp = t->t_disp_queue->disp_cpu;
1106                 THREAD_CHANGE_PRI(t, new_pri);
1107                 if (t == cp->cpu_dispthread)
1108                         cp->cpu_dispatch_pri = DISP_PRIO(t);
1109                 if (DISP_MUST_SURRENDER(t)) {
1110                         rtpp->rt_flags |= RTBACKQ;
1111                         cpu_surrender(t);
1112                 } else {
1113                         rtpp->rt_timeleft = rtpp->rt_pquantum;
1114                 }
1115         } else {
1116                 /*
1117                  * When the priority of a thread is changed,
1118                  * it may be necessary to adjust its position
1119                  * on a sleep queue or dispatch queue.  The
1120                  * function thread_change_pri() accomplishes this.
1121                  */
1122                 if (thread_change_pri(t, new_pri, 0)) {
1123                         /*
1124                          * The thread was on a run queue.
1125                          * Reset its CPU timeleft.
1126                          */
1127                         rtpp->rt_timeleft = rtpp->rt_pquantum;
1128                 } else {
1129                         rtpp->rt_flags |= RTBACKQ;
1130                 }
1131         }
1132 }