1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 #include <sys/types.h>
30 #include <sys/cred_impl.h>
31 #include <sys/ucred.h>
32 #include <ucred.h>
33 #include <stdlib.h>
34 #include <signal.h>
35 #include <errno.h>
36 #include <sched.h>
37 #include <strings.h>
38 #include <pthread.h>
39 #include <time.h>
40 #include <thread.h>
41 #include <alloca.h>
42 #include <unistd.h>
43 #include <sys/syscall.h>
44 #include <sys/lx_syscall.h>
45 #include <sys/lx_debug.h>
46 #include <sys/lx_brand.h>
47 #include <sys/lx_misc.h>
48 #include <sys/lx_sched.h>
49
50 /* Linux only has three valid policies, SCHED_FIFO, SCHED_RR and SCHED_OTHER */
51 static int
52 validate_policy(int policy)
53 {
54 switch (policy) {
55 case LX_SCHED_FIFO:
56 return (SCHED_FIFO);
57
58 case LX_SCHED_RR:
59 return (SCHED_RR);
60
61 case LX_SCHED_OTHER:
62 return (SCHED_OTHER);
63
64 default:
65 lx_debug("validate_policy: illegal policy: %d", policy);
66 return (-EINVAL);
67 }
68 }
69
70 /*
71 * Check to see if we have the permissions to set scheduler parameters and
72 * policy, based on Linux' demand that such commands fail with errno set to
73 * EPERM if the current euid is not the euid or ruid of the process in
74 * question.
75 */
76 static int
77 check_schedperms(pid_t pid)
78 {
79 size_t sz;
80 ucred_t *cr;
81 uid_t euid;
82
83 euid = geteuid();
84
85 if (pid == getpid()) {
86 /*
87 * If we're the process to be checked, simply check the euid
88 * against our ruid.
89 */
90 if (euid != getuid())
91 return (-EPERM);
92
93 return (0);
94 }
95
96 /*
97 * We allocate a ucred_t ourselves rather than call ucred_get(3C)
98 * because ucred_get() calls malloc(3C), which the brand library cannot
99 * use. Because we allocate the space with SAFE_ALLOCA(), there's
100 * no need to free it when we're done.
101 */
102 sz = ucred_size();
103 cr = (ucred_t *)SAFE_ALLOCA(sz);
104
105 if (cr == NULL)
106 return (-ENOMEM);
107
108 /*
109 * If we can't access the process' credentials, fail with errno EPERM
110 * as the call would not have succeeded anyway.
111 */
112 if (syscall(SYS_ucredsys, UCREDSYS_UCREDGET, pid, cr) != 0)
113 return ((errno == EACCES) ? -EPERM : -errno);
114
115 if ((euid != ucred_geteuid(cr)) && (euid != ucred_getruid(cr)))
116 return (-EPERM);
117
118 return (0);
119 }
120
121 static int
122 ltos_sparam(int policy, struct lx_sched_param *lsp, struct sched_param *sp)
123 {
124 struct lx_sched_param ls;
125 int smin = sched_get_priority_min(policy);
126 int smax = sched_get_priority_max(policy);
127
128 if (uucopy(lsp, &ls, sizeof (struct lx_sched_param)) != 0)
129 return (-errno);
130
131 bzero(sp, sizeof (struct sched_param));
132
133 /*
134 * Linux has a fixed priority range, 0 - 99, which we need to convert to
135 * Solaris's dynamic range. Linux considers lower numbers to be
136 * higher priority, so we'll invert the priority within Solaris's range.
137 *
138 * The formula to convert between ranges is:
139 *
140 * L * (smax - smin)
141 * S = ----------------- + smin
142 * (lmax - lmin)
143 *
144 * where S is the Solaris equivalent of the linux priority L.
145 *
146 * To invert the priority, we use:
147 * S' = smax - S + smin
148 *
149 * Together, these two formulas become:
150 *
151 * L * (smax - smin)
152 * S = smax - ----------------- + 2smin
153 * 99
154 */
155 sp->sched_priority = smax -
156 ((ls.lx_sched_prio * (smax - smin)) / LX_PRI_MAX) + 2*smin;
157
158 lx_debug("ltos_sparam: linux prio %d = Solaris prio %d "
159 "(Solaris range %d,%d)\n", ls.lx_sched_prio, sp->sched_priority,
160 smin, smax);
161
162 return (0);
163 }
164
165 static int
166 stol_sparam(int policy, struct sched_param *sp, struct lx_sched_param *lsp)
167 {
168 struct lx_sched_param ls;
169 int smin = sched_get_priority_min(policy);
170 int smax = sched_get_priority_max(policy);
171
172 if (policy == SCHED_OTHER) {
173 /*
174 * In Linux, the only valid SCHED_OTHER scheduler priority is 0
175 */
176 ls.lx_sched_prio = 0;
177 } else {
178 /*
179 * Convert Solaris's dynamic, inverted priority range to the
180 * fixed Linux range of 1 - 99.
181 *
182 * The formula is (see above):
183 *
184 * (smax - s + 2smin) * 99
185 * l = -----------------------
186 * smax - smin
187 */
188 ls.lx_sched_prio = ((smax - sp->sched_priority + 2*smin) *
189 LX_PRI_MAX) / (smax - smin);
190 }
191
192 lx_debug("stol_sparam: Solaris prio %d = linux prio %d "
193 "(Solaris range %d,%d)\n", sp->sched_priority, ls.lx_sched_prio,
194 smin, smax);
195
196 return ((uucopy(&ls, lsp, sizeof (struct lx_sched_param)) != 0)
197 ? -errno : 0);
198 }
199
200 #define BITINDEX(ind) (ind / (sizeof (ulong_t) * 8))
201 #define BITSHIFT(ind) (1 << (ind % (sizeof (ulong_t) * 8)))
202
203 /* ARGSUSED */
204 int
205 lx_sched_getaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp)
206 {
207 int sz;
208 ulong_t *lmask, *zmask;
209 int i;
210
211 sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, pid, len, maskp);
212 if (sz == -1)
213 return (-errno);
214
215 /*
216 * If the target LWP hasn't ever had an affinity mask set, the kernel
217 * will return a mask of all 0's. If that is the case we must build a
218 * default mask that has all valid bits turned on.
219 */
220 lmask = SAFE_ALLOCA(sz);
221 zmask = SAFE_ALLOCA(sz);
222 if (lmask == NULL || zmask == NULL)
223 return (-ENOMEM);
224
225 bzero(zmask, sz);
226
227 if (uucopy((void *)maskp, lmask, sz) != 0)
228 return (-EFAULT);
229
230 if (bcmp(lmask, zmask, sz) != 0)
231 return (sz);
232
233 for (i = 0; i < sz * 8; i++) {
234 if (p_online(i, P_STATUS) != -1) {
235 lmask[BITINDEX(i)] |= BITSHIFT(i);
236 }
237 }
238
239 if (uucopy(lmask, (void *)maskp, sz) != 0)
240 return (-EFAULT);
241
242 return (sz);
243 }
244
245 /* ARGSUSED */
246 int
247 lx_sched_setaffinity(uintptr_t pid, uintptr_t len, uintptr_t maskp)
248 {
249 int ret;
250 int sz;
251 int i;
252 int found;
253 ulong_t *lmask;
254 pid_t s_pid;
255 lwpid_t s_tid;
256 processorid_t cpuid = NULL;
257
258 if ((pid_t)pid < 0)
259 return (-EINVAL);
260
261 if (lx_lpid_to_spair(pid, &s_pid, &s_tid) < 0)
262 return (-ESRCH);
263
264 /*
265 * We only support setting affinity masks for threads in
266 * the calling process.
267 */
268 if (s_pid != getpid())
269 return (-EPERM);
270
271 /*
272 * First, get the minimum bitmask size from the kernel.
273 */
274 sz = syscall(SYS_brand, B_GET_AFFINITY_MASK, 0, 0, 0);
275 if (sz == -1)
276 return (-errno);
277
278 lmask = SAFE_ALLOCA(sz);
279 if (lmask == NULL)
280 return (-ENOMEM);
281
282 if (uucopy((void *)maskp, lmask, sz) != 0)
283 return (-EFAULT);
284
285 /*
286 * Make sure the mask contains at least one processor that is
287 * physically on the system. Reduce the user's mask to the set of
288 * physically present CPUs. Keep track of how many valid
289 * bits are set in the user's mask.
290 */
291
292 for (found = 0, i = 0; i < sz * 8; i++) {
293 if (p_online(i, P_STATUS) == -1) {
294 /*
295 * This CPU doesn't exist, so clear this bit from
296 * the user's mask.
297 */
298 lmask[BITINDEX(i)] &= ~BITSHIFT(i);
299 continue;
300 }
301
302 if ((lmask[BITINDEX(i)] & BITSHIFT(i)) == BITSHIFT(i)) {
303 found++;
304 cpuid = i;
305 }
306 }
307
308 if (found == 0) {
309 lx_debug("\tlx_sched_setaffinity: mask has no present CPUs\n");
310 return (-EINVAL);
311 }
312
313 /*
314 * If only one bit is set, bind the thread to that procesor;
315 * otherwise, clear the binding.
316 */
317 if (found == 1) {
318 lx_debug("\tlx_sched_setaffinity: binding thread %d to cpu%d\n",
319 s_tid, cpuid);
320 if (processor_bind(P_LWPID, s_tid, cpuid, NULL) != 0)
321 /*
322 * It could be that the requested processor is offline,
323 * so we'll just abandon our good-natured attempt to
324 * bind to it.
325 */
326 lx_debug("couldn't bind LWP %d to cpu %d: %s\n", s_tid,
327 cpuid, strerror(errno));
328 } else {
329 lx_debug("\tlx_sched_setaffinity: clearing thr %d binding\n",
330 s_tid);
331 if (processor_bind(P_LWPID, s_tid, PBIND_NONE, NULL) != 0) {
332 lx_debug("couldn't clear CPU binding for LWP %d: %s\n",
333 s_tid, strerror(errno));
334 }
335 }
336
337 /*
338 * Finally, ask the kernel to make a note of our current (though fairly
339 * meaningless) affinity mask.
340 */
341 ret = syscall(SYS_brand, B_SET_AFFINITY_MASK, pid, sz, lmask);
342
343 return ((ret == 0) ? 0 : -errno);
344 }
345
346 int
347 lx_sched_getparam(uintptr_t pid, uintptr_t param)
348 {
349 int policy, ret;
350 pid_t s_pid;
351 lwpid_t s_tid;
352
353 struct sched_param sp;
354
355 if (((pid_t)pid < 0) || (param == NULL))
356 return (-EINVAL);
357
358 if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
359 return (-ESRCH);
360
361 /*
362 * If we're attempting to get information on our own process, we can
363 * get data on a per-thread basis; if not, punt and use the specified
364 * pid.
365 */
366 if (s_pid == getpid()) {
367 if ((ret = pthread_getschedparam(s_tid, &policy, &sp)) != 0)
368 return (-ret);
369 } else {
370 if (sched_getparam(s_pid, &sp) == -1)
371 return (-errno);
372
373 if ((policy = sched_getscheduler(s_pid)) < 0)
374 return (-errno);
375 }
376
377 return (stol_sparam(policy, &sp, (struct lx_sched_param *)param));
378 }
379
380 int
381 lx_sched_setparam(uintptr_t pid, uintptr_t param)
382 {
383 int err, policy;
384 pid_t s_pid;
385 lwpid_t s_tid;
386 struct lx_sched_param lp;
387 struct sched_param sp;
388
389 if (((pid_t)pid < 0) || (param == NULL))
390 return (-EINVAL);
391
392 if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
393 return (-ESRCH);
394
395 if (s_pid == getpid()) {
396 struct sched_param dummy;
397
398 if ((err = pthread_getschedparam(s_tid, &policy, &dummy)) != 0)
399 return (-err);
400 } else
401 if ((policy = sched_getscheduler(s_pid)) < 0)
402 return (-errno);
403
404 lx_debug("sched_setparam(): current policy %d", policy);
405
406 if (uucopy((void *)param, &lp, sizeof (lp)) != 0)
407 return (-errno);
408
409 /*
410 * In Linux, the only valid SCHED_OTHER scheduler priority is 0
411 */
412 if ((policy == SCHED_OTHER) && (lp.lx_sched_prio != 0))
413 return (-EINVAL);
414
415 if ((err = ltos_sparam(policy, (struct lx_sched_param *)&lp,
416 &sp)) != 0)
417 return (err);
418
419 /*
420 * Check if we're allowed to change the scheduler for the process.
421 *
422 * If we're operating on a thread, we can't just call
423 * pthread_setschedparam() because as all threads reside within a
424 * single Solaris process, Solaris will allow the modification
425 *
426 * If we're operating on a process, we can't just call sched_setparam()
427 * because Solaris will allow the call to succeed if the scheduler
428 * parameters do not differ from those being installed, but Linux wants
429 * the call to fail.
430 */
431 if ((err = check_schedperms(s_pid)) != 0)
432 return (err);
433
434 if (s_pid == getpid())
435 return (((err = pthread_setschedparam(s_tid, policy, &sp)) != 0)
436 ? -err : 0);
437
438 return ((sched_setparam(s_pid, &sp) == -1) ? -errno : 0);
439 }
440
441 int
442 lx_sched_rr_get_interval(uintptr_t pid, uintptr_t timespec)
443 {
444 struct timespec ts;
445 pid_t s_pid;
446
447 if ((pid_t)pid < 0)
448 return (-EINVAL);
449
450 if (lx_lpid_to_spid((pid_t)pid, &s_pid) < 0)
451 return (-ESRCH);
452
453 if (uucopy((struct timespec *)timespec, &ts,
454 sizeof (struct timespec)) != 0)
455 return (-errno);
456
457 return ((sched_rr_get_interval(s_pid, &ts) == -1) ? -errno : 0);
458 }
459
460 int
461 lx_sched_getscheduler(uintptr_t pid)
462 {
463 int policy, rv;
464 pid_t s_pid;
465 lwpid_t s_tid;
466
467 if ((pid_t)pid < 0)
468 return (-EINVAL);
469
470 if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
471 return (-ESRCH);
472
473 if (s_pid == getpid()) {
474 struct sched_param dummy;
475
476 if ((rv = pthread_getschedparam(s_tid, &policy, &dummy)) != 0)
477 return (-rv);
478 } else
479 if ((policy = sched_getscheduler(s_pid)) < 0)
480 return (-errno);
481
482 /*
483 * Linux only supports certain policies; avoid confusing apps with
484 * alien policies.
485 */
486 switch (policy) {
487 case SCHED_FIFO:
488 return (LX_SCHED_FIFO);
489 case SCHED_OTHER:
490 return (LX_SCHED_OTHER);
491 case SCHED_RR:
492 return (LX_SCHED_RR);
493 default:
494 break;
495 }
496
497 return (LX_SCHED_OTHER);
498 }
499
500 int
501 lx_sched_setscheduler(uintptr_t pid, uintptr_t policy, uintptr_t param)
502 {
503 int rt_pol;
504 int rv;
505 pid_t s_pid;
506 lwpid_t s_tid;
507 struct lx_sched_param lp;
508
509 struct sched_param sp;
510
511 if (((pid_t)pid < 0) || (param == NULL))
512 return (-EINVAL);
513
514 if ((rt_pol = validate_policy((int)policy)) < 0)
515 return (rt_pol);
516
517 if ((rv = ltos_sparam(policy, (struct lx_sched_param *)param,
518 &sp)) != 0)
519 return (rv);
520
521 if (uucopy((void *)param, &lp, sizeof (lp)) != 0)
522 return (-errno);
523
524 /*
525 * In Linux, the only valid SCHED_OTHER scheduler priority is 0
526 */
527 if ((rt_pol == LX_SCHED_OTHER) && (lp.lx_sched_prio != 0))
528 return (-EINVAL);
529
530 if (lx_lpid_to_spair((pid_t)pid, &s_pid, &s_tid) < 0)
531 return (-ESRCH);
532
533 /*
534 * Check if we're allowed to change the scheduler for the process.
535 *
536 * If we're operating on a thread, we can't just call
537 * pthread_setschedparam() because as all threads reside within a
538 * single Solaris process, Solaris will allow the modification.
539 *
540 * If we're operating on a process, we can't just call
541 * sched_setscheduler() because Solaris will allow the call to succeed
542 * if the scheduler and scheduler parameters do not differ from those
543 * being installed, but Linux wants the call to fail.
544 */
545 if ((rv = check_schedperms(s_pid)) != 0)
546 return (rv);
547
548 if (s_pid == getpid()) {
549 struct sched_param param;
550 int pol;
551
552 if ((pol = sched_getscheduler(s_pid)) != 0)
553 return (-errno);
554
555 /*
556 * sched_setscheduler() returns the previous scheduling policy
557 * on success, so call pthread_getschedparam() to get the
558 * current thread's scheduling policy and return that if the
559 * call to pthread_setschedparam() succeeds.
560 */
561 if ((rv = pthread_getschedparam(s_tid, &pol, ¶m)) != 0)
562 return (-rv);
563
564 return (((rv = pthread_setschedparam(s_tid, rt_pol, &sp)) != 0)
565 ? -rv : pol);
566 }
567
568 return (((rv = sched_setscheduler(s_pid, rt_pol, &sp)) == -1)
569 ? -errno : rv);
570 }
571
572 int
573 lx_sched_get_priority_min(uintptr_t policy)
574 {
575 /*
576 * In Linux, the only valid SCHED_OTHER scheduler priority is 0.
577 * Linux scheduling priorities are not alterable, so there is no
578 * Solaris translation necessary.
579 */
580 switch (policy) {
581 case LX_SCHED_FIFO:
582 case LX_SCHED_RR:
583 return (LX_SCHED_PRIORITY_MIN_RRFIFO);
584 case LX_SCHED_OTHER:
585 return (LX_SCHED_PRIORITY_MIN_OTHER);
586 default:
587 break;
588 }
589 return (-EINVAL);
590 }
591
592 int
593 lx_sched_get_priority_max(uintptr_t policy)
594 {
595 /*
596 * In Linux, the only valid SCHED_OTHER scheduler priority is 0
597 * Linux scheduling priorities are not alterable, so there is no
598 * Solaris translation necessary.
599 */
600 switch (policy) {
601 case LX_SCHED_FIFO:
602 case LX_SCHED_RR:
603 return (LX_SCHED_PRIORITY_MAX_RRFIFO);
604 case LX_SCHED_OTHER:
605 return (LX_SCHED_PRIORITY_MAX_OTHER);
606 default:
607 break;
608 }
609 return (-EINVAL);
610 }