1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/cred_impl.h> 31 #include <sys/ucred.h> 32 #include <ucred.h> 33 #include <stdlib.h> 34 #include <signal.h> 35 #include <errno.h> 36 #include <sched.h> 37 #include <strings.h> 38 #include <pthread.h> 39 #include <time.h> 40 #include <thread.h> 41 #include <alloca.h> 42 #include <unistd.h> 43 #include <sys/syscall.h> 44 #include <sys/lx_syscall.h> 45 #include <sys/lx_debug.h> 46 #include <sys/lx_brand.h> 47 #include <sys/lx_misc.h> 48 #include <sys/lx_sched.h> 49 50 /* Linux only has three valid policies, SCHED_FIFO, SCHED_RR and SCHED_OTHER */ 51 static int 52 validate_policy(int policy) 53 { 54 switch (policy) { 55 case LX_SCHED_FIFO: 56 return (SCHED_FIFO); 57 58 case LX_SCHED_RR: 59 return (SCHED_RR); 60 61 case LX_SCHED_OTHER: 62 return (SCHED_OTHER); 63 64 default: 65 lx_debug("validate_policy: illegal policy: %d", policy); 66 return (-EINVAL); 67 } 68 } 69 70 /* 71 * Check to see if we have the permissions to set scheduler parameters and 72 * policy, based on Linux' demand that such commands fail with errno set to 73 * EPERM if the current euid is not the euid or ruid of the process in 74 * question. 75 */ 76 static int 77 check_schedperms(pid_t pid) 78 { 79 size_t sz; 80 ucred_t *cr; 81 uid_t euid; 82 83 euid = geteuid(); 84 85 if (pid == getpid()) { 86 /* 87 * If we're the process to be checked, simply check the euid 88 * against our ruid. 89 */ 90 if (euid != getuid()) 91 return (-EPERM); 92 93 return (0); 94 } 95 96 /* 97 * We allocate a ucred_t ourselves rather than call ucred_get(3C) 98 * because ucred_get() calls malloc(3C), which the brand library cannot 99 * use. Because we allocate the space with SAFE_ALLOCA(), there's 100 * no need to free it when we're done. 101 */ 102 sz = ucred_size(); 103 cr = (ucred_t *)SAFE_ALLOCA(sz); 104 105 if (cr == NULL) 106 return (-ENOMEM); 107 108 /* 109 * If we can't access the process' credentials, fail with errno EPERM 110 * as the call would not have succeeded anyway. 111 */ 112 if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, pid, cr) != 0) 113 return ((errno == EACCES) ? -EPERM : -errno); 114 115 if ((euid != ucred_geteuid(cr)) && (euid != ucred_getruid(cr))) 116 return (-EPERM); 117 118 return (0); 119 } 120 121 static int 122 ltos_sparam(int policy, struct lx_sched_param *lsp, struct sched_param *sp) 123 { 124 struct lx_sched_param ls; 125 int smin = sched_get_priority_min(policy); 126 int smax = sched_get_priority_max(policy); 127 128 if (uucopy(lsp, &ls, sizeof (struct lx_sched_param)) != 0) 129 return (-errno); 130 131 bzero(sp, sizeof (struct sched_param)); 132 133 /* 134 * Linux has a fixed priority range, 0 - 99, which we need to convert to 135 * Solaris's dynamic range. Linux considers lower numbers to be 136 * higher priority, so we'll invert the priority within Solaris's range. 137 * 138 * The formula to convert between ranges is: 139 * 140 * L * (smax - smin) 141 * S = ----------------- + smin 142 * (lmax - lmin) 143 * 144 * where S is the Solaris equivalent of the linux priority L. 145 * 146 * To invert the priority, we use: 147 * S' = smax - S + smin 148 * 149 * Together, these two formulas become: 150 * 151 * L * (smax - smin) 152 * S = smax - ----------------- + 2smin 153 * 99 154 */ 155 sp->sched_priority = smax - 156 ((ls.lx_sched_prio * (smax - smin)) / LX_PRI_MAX) + 2*smin; 157 158 lx_debug("ltos_sparam: linux prio %d = Solaris prio %d " 159 "(Solaris range %d,%d)\n", ls.lx_sched_prio, sp->sched_priority, 160 smin, smax); 161 162 return (0); 163 } 164 165 static int 166 stol_sparam(int policy, struct sched_param *sp, struct lx_sched_param *lsp) 167 { 168 struct lx_sched_param ls; 169 int smin = sched_get_priority_min(policy); 170 int smax = sched_get_priority_max(policy); 171 172 if (policy == SCHED_OTHER) { 173 /* 174 * In Linux, the only valid SCHED_OTHER scheduler priority is 0 175 */ 176 ls.lx_sched_prio = 0; 177 } else { 178 /* 179 * Convert Solaris's dynamic, inverted priority range to the 180 * fixed Linux range of 1 - 99. 181 * 182 * The formula is (see above): 183 * 184 * (smax - s + 2smin) * 99 185 * l = ----------------------- 186 * smax - smin 187 */ 188 ls.lx_sched_prio = ((smax - sp->sched_priority + 2*smin) * 189 LX_PRI_MAX) / (smax - smin); 190 } 191 192 lx_debug("stol_sparam: Solaris prio %d = linux prio %d " 193 "(Solaris range %d,%d)\n", sp->sched_priority, ls.lx_sched_prio, 194 smin, smax); 195 196 return ((uucopy(&ls, lsp, sizeof (struct lx_sched_param)) != 0) 197 ? -errno : 0); 198 } 199 200 #define BITINDEX(ind) (ind / (sizeof (ulong_t) * 8)) 201 #define BITSHIFT(ind) (1 << (ind % (sizeof (ulong_t) * 8))) 202 203 /* ARGSUSED */ 204 int 205 lx_sched_getaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp) 206 { 207 int sz; 208 ulong_t *lmask, *zmask; 209 int i; 210 211 sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, pid, len, maskp); 212 if (sz == -1) 213 return (-errno); 214 215 /* 216 * If the target LWP hasn't ever had an affinity mask set, the kernel 217 * will return a mask of all 0's. If that is the case we must build a 218 * default mask that has all valid bits turned on. 219 */ 220 lmask = SAFE_ALLOCA(sz); 221 zmask = SAFE_ALLOCA(sz); 222 if (lmask == NULL || zmask == NULL) 223 return (-ENOMEM); 224 225 bzero(zmask, sz); 226 227 if (uucopy((void *)maskp, lmask, sz) != 0) 228 return (-EFAULT); 229 230 if (bcmp(lmask, zmask, sz) != 0) 231 return (sz); 232 233 for (i = 0; i < sz * 8; i++) { 234 if (p_online(i, P_STATUS) != -1) { 235 lmask[BITINDEX(i)] |= BITSHIFT(i); 236 } 237 } 238 239 if (uucopy(lmask, (void *)maskp, sz) != 0) 240 return (-EFAULT); 241 242 return (sz); 243 } 244 245 /* ARGSUSED */ 246 int 247 lx_sched_setaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp) 248 { 249 int ret; 250 int sz; 251 int i; 252 int found; 253 ulong_t *lmask; 254 pid_t s_pid; 255 lwpid_t s_tid; 256 processorid_t cpuid = NULL; 257 258 if ((pid_t)pid < 0) 259 return (-EINVAL); 260 261 if (lx_lpid_to_spair(pid, &s_pid, &s_tid) < 0) 262 return (-ESRCH); 263 264 /* 265 * We only support setting affinity masks for threads in 266 * the calling process. 267 */ 268 if (s_pid != getpid()) 269 return (-EPERM); 270 271 /* 272 * First, get the minimum bitmask size from the kernel. 273 */ 274 sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, 0, 0, 0); 275 if (sz == -1) 276 return (-errno); 277 278 lmask = SAFE_ALLOCA(sz); 279 if (lmask == NULL) 280 return (-ENOMEM); 281 282 if (uucopy((void *)maskp, lmask, sz) != 0) 283 return (-EFAULT); 284 285 /* 286 * Make sure the mask contains at least one processor that is 287 * physically on the system. Reduce the user's mask to the set of 288 * physically present CPUs. Keep track of how many valid 289 * bits are set in the user's mask. 290 */ 291 292 for (found = 0, i = 0; i < sz * 8; i++) { 293 if (p_online(i, P_STATUS) == -1) { 294 /* 295 * This CPU doesn't exist, so clear this bit from 296 * the user's mask. 297 */ 298 lmask[BITINDEX(i)] &= ~BITSHIFT(i); 299 continue; 300 } 301 302 if ((lmask[BITINDEX(i)] & BITSHIFT(i)) == BITSHIFT(i)) { 303 found++; 304 cpuid = i; 305 } 306 } 307 308 if (found == 0) { 309 lx_debug("\tlx_sched_setaffinity: mask has no present CPUs\n"); 310 return (-EINVAL); 311 } 312 313 /* 314 * If only one bit is set, bind the thread to that procesor; 315 * otherwise, clear the binding. 316 */ 317 if (found == 1) { 318 lx_debug("\tlx_sched_setaffinity: binding thread %d to cpu%d\n", 319 s_tid, cpuid); 320 if (processor_bind(P_LWPID, s_tid, cpuid, NULL) != 0) 321 /* 322 * It could be that the requested processor is offline, 323 * so we'll just abandon our good-natured attempt to 324 * bind to it. 325 */ 326 lx_debug("couldn't bind LWP %d to cpu %d: %s\n", s_tid, 327 cpuid, strerror(errno)); 328 } else { 329 lx_debug("\tlx_sched_setaffinity: clearing thr %d binding\n", 330 s_tid); 331 if (processor_bind(P_LWPID, s_tid, PBIND_NONE, NULL) != 0) { 332 lx_debug("couldn't clear CPU binding for LWP %d: %s\n", 333 s_tid, strerror(errno)); 334 } 335 } 336 337 /* 338 * Finally, ask the kernel to make a note of our current (though fairly 339 * meaningless) affinity mask. 340 */ 341 ret = syscall(SYS_brand, B_SET_AFFINITY_MASK, pid, sz, lmask); 342 343 return ((ret == 0) ? 0 : -errno); 344 } 345 346 int 347 lx_sched_getparam(uintptr_t pid, uintptr_t param) 348 { 349 int policy, ret; 350 pid_t s_pid; 351 lwpid_t s_tid; 352 353 struct sched_param sp; 354 355 if (((pid_t)pid < 0) || (param == NULL)) 356 return (-EINVAL); 357 358 if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0) 359 return (-ESRCH); 360 361 /* 362 * If we're attempting to get information on our own process, we can 363 * get data on a per-thread basis; if not, punt and use the specified 364 * pid. 365 */ 366 if (s_pid == getpid()) { 367 if ((ret = pthread_getschedparam(s_tid, &policy, &sp)) != 0) 368 return (-ret); 369 } else { 370 if (sched_getparam(s_pid, &sp) == -1) 371 return (-errno); 372 373 if ((policy = sched_getscheduler(s_pid)) < 0) 374 return (-errno); 375 } 376 377 return (stol_sparam(policy, &sp, (struct lx_sched_param *)param)); 378 } 379 380 int 381 lx_sched_setparam(uintptr_t pid, uintptr_t param) 382 { 383 int err, policy; 384 pid_t s_pid; 385 lwpid_t s_tid; 386 struct lx_sched_param lp; 387 struct sched_param sp; 388 389 if (((pid_t)pid < 0) || (param == NULL)) 390 return (-EINVAL); 391 392 if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0) 393 return (-ESRCH); 394 395 if (s_pid == getpid()) { 396 struct sched_param dummy; 397 398 if ((err = pthread_getschedparam(s_tid, &policy, &dummy)) != 0) 399 return (-err); 400 } else 401 if ((policy = sched_getscheduler(s_pid)) < 0) 402 return (-errno); 403 404 lx_debug("sched_setparam(): current policy %d", policy); 405 406 if (uucopy((void *)param, &lp, sizeof (lp)) != 0) 407 return (-errno); 408 409 /* 410 * In Linux, the only valid SCHED_OTHER scheduler priority is 0 411 */ 412 if ((policy == SCHED_OTHER) && (lp.lx_sched_prio != 0)) 413 return (-EINVAL); 414 415 if ((err = ltos_sparam(policy, (struct lx_sched_param *)&lp, 416 &sp)) != 0) 417 return (err); 418 419 /* 420 * Check if we're allowed to change the scheduler for the process. 421 * 422 * If we're operating on a thread, we can't just call 423 * pthread_setschedparam() because as all threads reside within a 424 * single Solaris process, Solaris will allow the modification 425 * 426 * If we're operating on a process, we can't just call sched_setparam() 427 * because Solaris will allow the call to succeed if the scheduler 428 * parameters do not differ from those being installed, but Linux wants 429 * the call to fail. 430 */ 431 if ((err = check_schedperms(s_pid)) != 0) 432 return (err); 433 434 if (s_pid == getpid()) 435 return (((err = pthread_setschedparam(s_tid, policy, &sp)) != 0) 436 ? -err : 0); 437 438 return ((sched_setparam(s_pid, &sp) == -1) ? -errno : 0); 439 } 440 441 int 442 lx_sched_rr_get_interval(uintptr_t pid, uintptr_t timespec) 443 { 444 struct timespec ts; 445 pid_t s_pid; 446 447 if ((pid_t)pid < 0) 448 return (-EINVAL); 449 450 if (lx_lpid_to_spid((pid_t)pid, &s_pid) < 0) 451 return (-ESRCH); 452 453 if (uucopy((struct timespec *)timespec, &ts, 454 sizeof (struct timespec)) != 0) 455 return (-errno); 456 457 return ((sched_rr_get_interval(s_pid, &ts) == -1) ? -errno : 0); 458 } 459 460 int 461 lx_sched_getscheduler(uintptr_t pid) 462 { 463 int policy, rv; 464 pid_t s_pid; 465 lwpid_t s_tid; 466 467 if ((pid_t)pid < 0) 468 return (-EINVAL); 469 470 if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0) 471 return (-ESRCH); 472 473 if (s_pid == getpid()) { 474 struct sched_param dummy; 475 476 if ((rv = pthread_getschedparam(s_tid, &policy, &dummy)) != 0) 477 return (-rv); 478 } else 479 if ((policy = sched_getscheduler(s_pid)) < 0) 480 return (-errno); 481 482 /* 483 * Linux only supports certain policies; avoid confusing apps with 484 * alien policies. 485 */ 486 switch (policy) { 487 case SCHED_FIFO: 488 return (LX_SCHED_FIFO); 489 case SCHED_OTHER: 490 return (LX_SCHED_OTHER); 491 case SCHED_RR: 492 return (LX_SCHED_RR); 493 default: 494 break; 495 } 496 497 return (LX_SCHED_OTHER); 498 } 499 500 int 501 lx_sched_setscheduler(uintptr_t pid, uintptr_t policy, uintptr_t param) 502 { 503 int rt_pol; 504 int rv; 505 pid_t s_pid; 506 lwpid_t s_tid; 507 struct lx_sched_param lp; 508 509 struct sched_param sp; 510 511 if (((pid_t)pid < 0) || (param == NULL)) 512 return (-EINVAL); 513 514 if ((rt_pol = validate_policy((int)policy)) < 0) 515 return (rt_pol); 516 517 if ((rv = ltos_sparam(policy, (struct lx_sched_param *)param, 518 &sp)) != 0) 519 return (rv); 520 521 if (uucopy((void *)param, &lp, sizeof (lp)) != 0) 522 return (-errno); 523 524 /* 525 * In Linux, the only valid SCHED_OTHER scheduler priority is 0 526 */ 527 if ((rt_pol == LX_SCHED_OTHER) && (lp.lx_sched_prio != 0)) 528 return (-EINVAL); 529 530 if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0) 531 return (-ESRCH); 532 533 /* 534 * Check if we're allowed to change the scheduler for the process. 535 * 536 * If we're operating on a thread, we can't just call 537 * pthread_setschedparam() because as all threads reside within a 538 * single Solaris process, Solaris will allow the modification. 539 * 540 * If we're operating on a process, we can't just call 541 * sched_setscheduler() because Solaris will allow the call to succeed 542 * if the scheduler and scheduler parameters do not differ from those 543 * being installed, but Linux wants the call to fail. 544 */ 545 if ((rv = check_schedperms(s_pid)) != 0) 546 return (rv); 547 548 if (s_pid == getpid()) { 549 struct sched_param param; 550 int pol; 551 552 if ((pol = sched_getscheduler(s_pid)) != 0) 553 return (-errno); 554 555 /* 556 * sched_setscheduler() returns the previous scheduling policy 557 * on success, so call pthread_getschedparam() to get the 558 * current thread's scheduling policy and return that if the 559 * call to pthread_setschedparam() succeeds. 560 */ 561 if ((rv = pthread_getschedparam(s_tid, &pol, ¶m)) != 0) 562 return (-rv); 563 564 return (((rv = pthread_setschedparam(s_tid, rt_pol, &sp)) != 0) 565 ? -rv : pol); 566 } 567 568 return (((rv = sched_setscheduler(s_pid, rt_pol, &sp)) == -1) 569 ? -errno : rv); 570 } 571 572 int 573 lx_sched_get_priority_min(uintptr_t policy) 574 { 575 /* 576 * In Linux, the only valid SCHED_OTHER scheduler priority is 0. 577 * Linux scheduling priorities are not alterable, so there is no 578 * Solaris translation necessary. 579 */ 580 switch (policy) { 581 case LX_SCHED_FIFO: 582 case LX_SCHED_RR: 583 return (LX_SCHED_PRIORITY_MIN_RRFIFO); 584 case LX_SCHED_OTHER: 585 return (LX_SCHED_PRIORITY_MIN_OTHER); 586 default: 587 break; 588 } 589 return (-EINVAL); 590 } 591 592 int 593 lx_sched_get_priority_max(uintptr_t policy) 594 { 595 /* 596 * In Linux, the only valid SCHED_OTHER scheduler priority is 0 597 * Linux scheduling priorities are not alterable, so there is no 598 * Solaris translation necessary. 599 */ 600 switch (policy) { 601 case LX_SCHED_FIFO: 602 case LX_SCHED_RR: 603 return (LX_SCHED_PRIORITY_MAX_RRFIFO); 604 case LX_SCHED_OTHER: 605 return (LX_SCHED_PRIORITY_MAX_OTHER); 606 default: 607 break; 608 } 609 return (-EINVAL); 610 }