1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28 
  29 #include <sys/types.h>
  30 #include <sys/cred_impl.h>
  31 #include <sys/ucred.h>
  32 #include <ucred.h>
  33 #include <stdlib.h>
  34 #include <signal.h>
  35 #include <errno.h>
  36 #include <sched.h>
  37 #include <strings.h>
  38 #include <pthread.h>
  39 #include <time.h>
  40 #include <thread.h>
  41 #include <alloca.h>
  42 #include <unistd.h>
  43 #include <sys/syscall.h>
  44 #include <sys/lx_syscall.h>
  45 #include <sys/lx_debug.h>
  46 #include <sys/lx_brand.h>
  47 #include <sys/lx_misc.h>
  48 #include <sys/lx_sched.h>
  49 
  50 /* Linux only has three valid policies, SCHED_FIFO, SCHED_RR and SCHED_OTHER */
  51 static int
  52 validate_policy(int policy)
  53 {
  54         switch (policy) {
  55                 case LX_SCHED_FIFO:
  56                         return (SCHED_FIFO);
  57 
  58                 case LX_SCHED_RR:
  59                         return (SCHED_RR);
  60 
  61                 case LX_SCHED_OTHER:
  62                         return (SCHED_OTHER);
  63 
  64                 default:
  65                         lx_debug("validate_policy: illegal policy: %d", policy);
  66                         return (-EINVAL);
  67         }
  68 }
  69 
  70 /*
  71  * Check to see if we have the permissions to set scheduler parameters and
  72  * policy, based on Linux' demand that such commands fail with errno set to
  73  * EPERM if the current euid is not the euid or ruid of the process in
  74  * question.
  75  */
  76 static int
  77 check_schedperms(pid_t pid)
  78 {
  79         size_t sz;
  80         ucred_t *cr;
  81         uid_t euid;
  82 
  83         euid = geteuid();
  84 
  85         if (pid == getpid()) {
  86                 /*
  87                  * If we're the process to be checked, simply check the euid
  88                  * against our ruid.
  89                  */
  90                 if (euid != getuid())
  91                         return (-EPERM);
  92 
  93                 return (0);
  94         }
  95 
  96         /*
  97          * We allocate a ucred_t ourselves rather than call ucred_get(3C)
  98          * because ucred_get() calls malloc(3C), which the brand library cannot
  99          * use.  Because we allocate the space with SAFE_ALLOCA(), there's
 100          * no need to free it when we're done.
 101          */
 102         sz = ucred_size();
 103         cr = (ucred_t *)SAFE_ALLOCA(sz);
 104 
 105         if (cr == NULL)
 106                 return (-ENOMEM);
 107 
 108         /*
 109          * If we can't access the process' credentials, fail with errno EPERM
 110          * as the call would not have succeeded anyway.
 111          */
 112         if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, pid, cr) != 0)
 113                 return ((errno == EACCES) ? -EPERM : -errno);
 114 
 115         if ((euid != ucred_geteuid(cr)) && (euid != ucred_getruid(cr)))
 116                 return (-EPERM);
 117 
 118         return (0);
 119 }
 120 
 121 static int
 122 ltos_sparam(int policy, struct lx_sched_param *lsp, struct sched_param *sp)
 123 {
 124         struct lx_sched_param ls;
 125         int smin = sched_get_priority_min(policy);
 126         int smax = sched_get_priority_max(policy);
 127 
 128         if (uucopy(lsp, &ls, sizeof (struct lx_sched_param)) != 0)
 129                 return (-errno);
 130 
 131         bzero(sp, sizeof (struct sched_param));
 132 
 133         /*
 134          * Linux has a fixed priority range, 0 - 99, which we need to convert to
 135          * Solaris's dynamic range. Linux considers lower numbers to be
 136          * higher priority, so we'll invert the priority within Solaris's range.
 137          *
 138          * The formula to convert between ranges is:
 139          *
 140          *      L * (smax - smin)
 141          * S =  -----------------  + smin
 142          *        (lmax - lmin)
 143          *
 144          * where S is the Solaris equivalent of the linux priority L.
 145          *
 146          * To invert the priority, we use:
 147          * S' = smax - S + smin
 148          *
 149          * Together, these two formulas become:
 150          *
 151          *              L * (smax - smin)
 152          *   S = smax - -----------------  + 2smin
 153          *                      99
 154          */
 155         sp->sched_priority = smax -
 156             ((ls.lx_sched_prio * (smax - smin)) / LX_PRI_MAX) + 2*smin;
 157 
 158         lx_debug("ltos_sparam: linux prio %d = Solaris prio %d "
 159             "(Solaris range %d,%d)\n", ls.lx_sched_prio, sp->sched_priority,
 160             smin, smax);
 161 
 162         return (0);
 163 }
 164 
 165 static int
 166 stol_sparam(int policy, struct sched_param *sp, struct lx_sched_param *lsp)
 167 {
 168         struct lx_sched_param ls;
 169         int smin = sched_get_priority_min(policy);
 170         int smax = sched_get_priority_max(policy);
 171 
 172         if (policy == SCHED_OTHER) {
 173                 /*
 174                  * In Linux, the only valid SCHED_OTHER scheduler priority is 0
 175                  */
 176                 ls.lx_sched_prio = 0;
 177         } else {
 178                 /*
 179                  * Convert Solaris's dynamic, inverted priority range to the
 180                  * fixed Linux range of 1 - 99.
 181                  *
 182                  * The formula is (see above):
 183                  *
 184                  *      (smax - s + 2smin) * 99
 185                  *  l = -----------------------
 186                  *              smax - smin
 187                  */
 188                 ls.lx_sched_prio = ((smax - sp->sched_priority + 2*smin) *
 189                     LX_PRI_MAX) / (smax - smin);
 190         }
 191 
 192         lx_debug("stol_sparam: Solaris prio %d = linux prio %d "
 193             "(Solaris range %d,%d)\n", sp->sched_priority, ls.lx_sched_prio,
 194             smin, smax);
 195 
 196         return ((uucopy(&ls, lsp, sizeof (struct lx_sched_param)) != 0)
 197             ? -errno : 0);
 198 }
 199 
 200 #define BITINDEX(ind)   (ind / (sizeof (ulong_t) * 8))
 201 #define BITSHIFT(ind)   (1 << (ind % (sizeof (ulong_t) * 8)))
 202 
 203 /* ARGSUSED */
 204 int
 205 lx_sched_getaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp)
 206 {
 207         int     sz;
 208         ulong_t *lmask, *zmask;
 209         int     i;
 210 
 211         sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, pid, len, maskp);
 212         if (sz == -1)
 213                 return (-errno);
 214 
 215         /*
 216          * If the target LWP hasn't ever had an affinity mask set, the kernel
 217          * will return a mask of all 0's. If that is the case we must build a
 218          * default mask that has all valid bits turned on.
 219          */
 220         lmask = SAFE_ALLOCA(sz);
 221         zmask = SAFE_ALLOCA(sz);
 222         if (lmask == NULL || zmask == NULL)
 223                 return (-ENOMEM);
 224 
 225         bzero(zmask, sz);
 226 
 227         if (uucopy((void *)maskp, lmask, sz) != 0)
 228                 return (-EFAULT);
 229 
 230         if (bcmp(lmask, zmask, sz) != 0)
 231                 return (sz);
 232 
 233         for (i = 0; i < sz * 8; i++) {
 234                 if (p_online(i, P_STATUS) != -1) {
 235                         lmask[BITINDEX(i)] |= BITSHIFT(i);
 236                 }
 237         }
 238 
 239         if (uucopy(lmask, (void *)maskp, sz) != 0)
 240                 return (-EFAULT);
 241 
 242         return (sz);
 243 }
 244 
 245 /* ARGSUSED */
 246 int
 247 lx_sched_setaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp)
 248 {
 249         int             ret;
 250         int             sz;
 251         int             i;
 252         int             found;
 253         ulong_t         *lmask;
 254         pid_t           s_pid;
 255         lwpid_t         s_tid;
 256         processorid_t   cpuid = NULL;
 257 
 258         if ((pid_t)pid < 0)
 259                 return (-EINVAL);
 260 
 261         if (lx_lpid_to_spair(pid, &s_pid, &s_tid) < 0)
 262                 return (-ESRCH);
 263 
 264         /*
 265          * We only support setting affinity masks for threads in
 266          * the calling process.
 267          */
 268         if (s_pid != getpid())
 269                 return (-EPERM);
 270 
 271         /*
 272          * First, get the minimum bitmask size from the kernel.
 273          */
 274         sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, 0, 0, 0);
 275         if (sz == -1)
 276                 return (-errno);
 277 
 278         lmask = SAFE_ALLOCA(sz);
 279         if (lmask == NULL)
 280                 return (-ENOMEM);
 281 
 282         if (uucopy((void *)maskp, lmask, sz) != 0)
 283                 return (-EFAULT);
 284 
 285         /*
 286          * Make sure the mask contains at least one processor that is
 287          * physically on the system. Reduce the user's mask to the set of
 288          * physically present CPUs. Keep track of how many valid
 289          * bits are set in the user's mask.
 290          */
 291 
 292         for (found = 0, i = 0; i < sz * 8; i++) {
 293                 if (p_online(i, P_STATUS) == -1) {
 294                         /*
 295                          * This CPU doesn't exist, so clear this bit from
 296                          * the user's mask.
 297                          */
 298                         lmask[BITINDEX(i)] &= ~BITSHIFT(i);
 299                         continue;
 300                 }
 301 
 302                 if ((lmask[BITINDEX(i)] & BITSHIFT(i)) == BITSHIFT(i)) {
 303                         found++;
 304                         cpuid = i;
 305                 }
 306         }
 307 
 308         if (found == 0) {
 309                 lx_debug("\tlx_sched_setaffinity: mask has no present CPUs\n");
 310                 return (-EINVAL);
 311         }
 312 
 313         /*
 314          * If only one bit is set, bind the thread to that procesor;
 315          * otherwise, clear the binding.
 316          */
 317         if (found == 1) {
 318                 lx_debug("\tlx_sched_setaffinity: binding thread %d to cpu%d\n",
 319                     s_tid, cpuid);
 320                 if (processor_bind(P_LWPID, s_tid, cpuid, NULL) != 0)
 321                         /*
 322                          * It could be that the requested processor is offline,
 323                          * so we'll just abandon our good-natured attempt to
 324                          * bind to it.
 325                          */
 326                         lx_debug("couldn't bind LWP %d to cpu %d: %s\n", s_tid,
 327                             cpuid, strerror(errno));
 328         } else {
 329                 lx_debug("\tlx_sched_setaffinity: clearing thr %d binding\n",
 330                     s_tid);
 331                 if (processor_bind(P_LWPID, s_tid, PBIND_NONE, NULL) != 0) {
 332                         lx_debug("couldn't clear CPU binding for LWP %d: %s\n",
 333                             s_tid, strerror(errno));
 334                 }
 335         }
 336 
 337         /*
 338          * Finally, ask the kernel to make a note of our current (though fairly
 339          * meaningless) affinity mask.
 340          */
 341         ret = syscall(SYS_brand, B_SET_AFFINITY_MASK, pid, sz, lmask);
 342 
 343         return ((ret == 0) ? 0 : -errno);
 344 }
 345 
 346 int
 347 lx_sched_getparam(uintptr_t pid, uintptr_t param)
 348 {
 349         int     policy, ret;
 350         pid_t   s_pid;
 351         lwpid_t s_tid;
 352 
 353         struct sched_param sp;
 354 
 355         if (((pid_t)pid < 0) || (param == NULL))
 356                 return (-EINVAL);
 357 
 358         if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
 359                 return (-ESRCH);
 360 
 361         /*
 362          * If we're attempting to get information on our own process, we can
 363          * get data on a per-thread basis; if not, punt and use the specified
 364          * pid.
 365          */
 366         if (s_pid == getpid()) {
 367                 if ((ret = pthread_getschedparam(s_tid, &policy, &sp)) != 0)
 368                     return (-ret);
 369         } else {
 370                 if (sched_getparam(s_pid, &sp) == -1)
 371                         return (-errno);
 372 
 373                 if ((policy = sched_getscheduler(s_pid)) < 0)
 374                         return (-errno);
 375         }
 376 
 377         return (stol_sparam(policy, &sp, (struct lx_sched_param *)param));
 378 }
 379 
 380 int
 381 lx_sched_setparam(uintptr_t pid, uintptr_t param)
 382 {
 383         int     err, policy;
 384         pid_t   s_pid;
 385         lwpid_t s_tid;
 386         struct lx_sched_param lp;
 387         struct sched_param sp;
 388 
 389         if (((pid_t)pid < 0) || (param == NULL))
 390                 return (-EINVAL);
 391 
 392         if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
 393                 return (-ESRCH);
 394 
 395         if (s_pid == getpid()) {
 396                 struct sched_param dummy;
 397 
 398                 if ((err = pthread_getschedparam(s_tid, &policy, &dummy)) != 0)
 399                         return (-err);
 400         } else
 401                 if ((policy = sched_getscheduler(s_pid)) < 0)
 402                         return (-errno);
 403 
 404         lx_debug("sched_setparam(): current policy %d", policy);
 405 
 406         if (uucopy((void *)param, &lp, sizeof (lp)) != 0)
 407                 return (-errno);
 408 
 409         /*
 410          * In Linux, the only valid SCHED_OTHER scheduler priority is 0
 411          */
 412         if ((policy == SCHED_OTHER) && (lp.lx_sched_prio != 0))
 413                 return (-EINVAL);
 414 
 415         if ((err = ltos_sparam(policy, (struct lx_sched_param *)&lp,
 416             &sp)) != 0)
 417                 return (err);
 418 
 419         /*
 420          * Check if we're allowed to change the scheduler for the process.
 421          *
 422          * If we're operating on a thread, we can't just call
 423          * pthread_setschedparam() because as all threads reside within a
 424          * single Solaris process, Solaris will allow the modification
 425          *
 426          * If we're operating on a process, we can't just call sched_setparam()
 427          * because Solaris will allow the call to succeed if the scheduler
 428          * parameters do not differ from those being installed, but Linux wants
 429          * the call to fail.
 430          */
 431         if ((err = check_schedperms(s_pid)) != 0)
 432                 return (err);
 433 
 434         if (s_pid == getpid())
 435                 return (((err = pthread_setschedparam(s_tid, policy, &sp)) != 0)
 436                     ? -err : 0);
 437 
 438         return ((sched_setparam(s_pid, &sp) == -1) ? -errno : 0);
 439 }
 440 
 441 int
 442 lx_sched_rr_get_interval(uintptr_t pid, uintptr_t timespec)
 443 {
 444         struct timespec ts;
 445         pid_t   s_pid;
 446 
 447         if ((pid_t)pid < 0)
 448                 return (-EINVAL);
 449 
 450         if (lx_lpid_to_spid((pid_t)pid, &s_pid) < 0)
 451                 return (-ESRCH);
 452 
 453         if (uucopy((struct timespec *)timespec, &ts,
 454             sizeof (struct timespec)) != 0)
 455                 return (-errno);
 456 
 457         return ((sched_rr_get_interval(s_pid, &ts) == -1) ? -errno : 0);
 458 }
 459 
 460 int
 461 lx_sched_getscheduler(uintptr_t pid)
 462 {
 463         int     policy, rv;
 464         pid_t   s_pid;
 465         lwpid_t s_tid;
 466 
 467         if ((pid_t)pid < 0)
 468                 return (-EINVAL);
 469 
 470         if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
 471                 return (-ESRCH);
 472 
 473         if (s_pid == getpid()) {
 474                 struct sched_param dummy;
 475 
 476                 if ((rv = pthread_getschedparam(s_tid, &policy, &dummy)) != 0)
 477                         return (-rv);
 478         } else
 479                 if ((policy = sched_getscheduler(s_pid)) < 0)
 480                         return (-errno);
 481 
 482         /*
 483          * Linux only supports certain policies; avoid confusing apps with
 484          * alien policies.
 485          */
 486         switch (policy) {
 487         case SCHED_FIFO:
 488                 return (LX_SCHED_FIFO);
 489         case SCHED_OTHER:
 490                 return (LX_SCHED_OTHER);
 491         case SCHED_RR:
 492                 return (LX_SCHED_RR);
 493         default:
 494                 break;
 495         }
 496 
 497         return (LX_SCHED_OTHER);
 498 }
 499 
 500 int
 501 lx_sched_setscheduler(uintptr_t pid, uintptr_t policy, uintptr_t param)
 502 {
 503         int     rt_pol;
 504         int     rv;
 505         pid_t   s_pid;
 506         lwpid_t s_tid;
 507         struct lx_sched_param lp;
 508 
 509         struct sched_param sp;
 510 
 511         if (((pid_t)pid < 0) || (param == NULL))
 512                 return (-EINVAL);
 513 
 514         if ((rt_pol = validate_policy((int)policy)) < 0)
 515                 return (rt_pol);
 516 
 517         if ((rv = ltos_sparam(policy, (struct lx_sched_param *)param,
 518             &sp)) != 0)
 519                 return (rv);
 520 
 521         if (uucopy((void *)param, &lp, sizeof (lp)) != 0)
 522                 return (-errno);
 523 
 524         /*
 525          * In Linux, the only valid SCHED_OTHER scheduler priority is 0
 526          */
 527         if ((rt_pol == LX_SCHED_OTHER) && (lp.lx_sched_prio != 0))
 528                 return (-EINVAL);
 529 
 530         if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
 531                 return (-ESRCH);
 532 
 533         /*
 534          * Check if we're allowed to change the scheduler for the process.
 535          *
 536          * If we're operating on a thread, we can't just call
 537          * pthread_setschedparam() because as all threads reside within a
 538          * single Solaris process, Solaris will allow the modification.
 539          *
 540          * If we're operating on a process, we can't just call
 541          * sched_setscheduler() because Solaris will allow the call to succeed
 542          * if the scheduler and scheduler parameters do not differ from those
 543          * being installed, but Linux wants the call to fail.
 544          */
 545         if ((rv = check_schedperms(s_pid)) != 0)
 546                 return (rv);
 547 
 548         if (s_pid == getpid()) {
 549                 struct sched_param param;
 550                 int pol;
 551 
 552                 if ((pol = sched_getscheduler(s_pid)) != 0)
 553                         return (-errno);
 554 
 555                 /*
 556                  * sched_setscheduler() returns the previous scheduling policy
 557                  * on success, so call pthread_getschedparam() to get the
 558                  * current thread's scheduling policy and return that if the
 559                  * call to pthread_setschedparam() succeeds.
 560                  */
 561                 if ((rv = pthread_getschedparam(s_tid, &pol, &param)) != 0)
 562                         return (-rv);
 563 
 564                 return (((rv = pthread_setschedparam(s_tid, rt_pol, &sp)) != 0)
 565                     ? -rv : pol);
 566         }
 567 
 568         return (((rv = sched_setscheduler(s_pid, rt_pol, &sp)) == -1)
 569             ? -errno : rv);
 570 }
 571 
 572 int
 573 lx_sched_get_priority_min(uintptr_t policy)
 574 {
 575         /*
 576          * In Linux, the only valid SCHED_OTHER scheduler priority is 0.
 577          * Linux scheduling priorities are not alterable, so there is no
 578          * Solaris translation necessary.
 579          */
 580         switch (policy) {
 581         case LX_SCHED_FIFO:
 582         case LX_SCHED_RR:
 583                 return (LX_SCHED_PRIORITY_MIN_RRFIFO);
 584         case LX_SCHED_OTHER:
 585                 return (LX_SCHED_PRIORITY_MIN_OTHER);
 586         default:
 587                 break;
 588         }
 589         return (-EINVAL);
 590 }
 591 
 592 int
 593 lx_sched_get_priority_max(uintptr_t policy)
 594 {
 595         /*
 596          * In Linux, the only valid SCHED_OTHER scheduler priority is 0
 597          * Linux scheduling priorities are not alterable, so there is no
 598          * Solaris translation necessary.
 599          */
 600         switch (policy) {
 601         case LX_SCHED_FIFO:
 602         case LX_SCHED_RR:
 603                 return (LX_SCHED_PRIORITY_MAX_RRFIFO);
 604         case LX_SCHED_OTHER:
 605                 return (LX_SCHED_PRIORITY_MAX_OTHER);
 606         default:
 607                 break;
 608         }
 609         return (-EINVAL);
 610 }