1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2015 Joyent, Inc. All rights reserved. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/segments.h> 34 #include <sys/lx_types.h> 35 #include <sys/lx_brand.h> 36 #include <sys/lx_misc.h> 37 #include <sys/lx_debug.h> 38 #include <sys/lx_poll.h> 39 #include <sys/lx_signal.h> 40 #include <sys/lx_sigstack.h> 41 #include <sys/lx_syscall.h> 42 #include <sys/lx_thread.h> 43 #include <sys/syscall.h> 44 #include <lx_provider_impl.h> 45 #include <sys/stack.h> 46 #include <assert.h> 47 #include <errno.h> 48 #include <poll.h> 49 #include <rctl.h> 50 #include <signal.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include <strings.h> 54 #include <thread.h> 55 #include <ucontext.h> 56 #include <unistd.h> 57 #include <stdio.h> 58 #include <libintl.h> 59 #include <ieeefp.h> 60 #include <sys/signalfd.h> 61 62 #if defined(_ILP32) 63 extern int pselect_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, 64 const timespec_t *tsp, const sigset_t *sp); 65 #endif 66 67 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 68 69 /* 70 * Delivering signals to a Linux process is complicated by differences in 71 * signal numbering, stack structure and contents, and the action taken when a 72 * signal handler exits. In addition, many signal-related structures, such as 73 * sigset_ts, vary between Illumos and Linux. 74 * 75 * To support user-level signal handlers, the brand uses a double layer of 76 * indirection to process and deliver signals to branded threads. 77 * 78 * When a Linux process sends a signal using the kill(2) system call, we must 79 * translate the signal into the Illumos equivalent before handing control off 80 * to the standard signalling mechanism. When a signal is delivered to a Linux 81 * process, we translate the signal number from Illumos to back to Linux. 82 * Translating signals both at generation and delivery time ensures both that 83 * Illumos signals are sent properly to Linux applications and that signals' 84 * default behavior works as expected. 85 * 86 * In a normal Illumos process, signal delivery is interposed on for any thread 87 * registering a signal handler by libc. Libc needs to do various bits of magic 88 * to provide thread-safe critical regions, so it registers its own handler, 89 * named sigacthandler(), using the sigaction(2) system call. When a signal is 90 * received, sigacthandler() is called, and after some processing, libc turns 91 * around and calls the user's signal handler via a routine named 92 * call_user_handler(). 93 * 94 * Adding a Linux branded thread to the mix complicates things somewhat. 95 * 96 * First, when a thread receives a signal, it may either be running in an 97 * emulated Linux context or a native illumos context. In either case, the 98 * in-kernel brand module is responsible for preserving the register state 99 * from the interrupted context, regardless of whether emulated or native 100 * software was running at the time. The kernel is also responsible for 101 * ensuring that the illumos native sigacthandler() is called with register 102 * values appropriate for native code. Of particular note is the %gs segment 103 * selector for 32-bit code, and the %fsbase segment base register for 64-bit 104 * code; these are used by libc to locate per-thread data structures. 105 * 106 * Second, the signal number translation referenced above must take place. 107 * Finally, when we hand control to the Linux signal handler we must do so 108 * on the brand stack, and with registers configured appropriately for the 109 * Linux application. 110 * 111 * This need to translate signal numbers (and manipulate the signal handling 112 * context) means that with standard Illumos libc, following a signal from 113 * generation to delivery looks something like: 114 * 115 * kernel -> 116 * sigacthandler() -> 117 * call_user_handler() -> 118 * user signal handler 119 * 120 * but for the brand's Linux threads, this would look like: 121 * 122 * kernel -> 123 * sigacthandler() -> 124 * call_user_handler() -> 125 * lx_call_user_handler() -> 126 * lx_sigdeliver() -> 127 * syscall(B_JUMP_TO_LINUX, ...) -> 128 * Linux user signal handler 129 * 130 * The new addtions are: 131 * 132 * lx_call_user_handler 133 * ==================== 134 * This routine is responsible for translating Illumos signal numbers to 135 * their Linux equivalents, building a Linux signal stack based on the 136 * information Illumos has provided, and passing the stack to the 137 * registered Linux signal handler. It is, in effect, the Linux thread 138 * equivalent to libc's call_user_handler(). 139 * 140 * lx_sigdeliver 141 * ============= 142 * 143 * Note that none of this interposition is necessary unless a Linux thread 144 * registers a user signal handler, as the default action for all signals is the 145 * same between Illumos and Linux save for one signal, SIGPWR. For this reason, 146 * the brand ALWAYS installs its own internal signal handler for SIGPWR that 147 * translates the action to the Linux default, to terminate the process. 148 * (Illumos' default action is to ignore SIGPWR.) 149 * 150 * It is also important to note that when signals are not translated, the brand 151 * relies upon code interposing upon the wait(2) system call to translate 152 * signals to their proper values for any Linux threads retrieving the status 153 * of others. So while the Illumos signal number for a particular signal is set 154 * in a process' data structures (and would be returned as the result of say, 155 * WTERMSIG()), the brand's interposiiton upon wait(2) is responsible for 156 * translating the value WTERMSIG() would return from a Illumos signal number 157 * to the appropriate Linux value. 158 * 159 * lx_call_user_handler() calls lx_sigdeliver() with a helper function 160 * (typically lx_build_signal_frame) which builds a stack frame for the 32-bit 161 * Linux signal handler, or populates a local (on the stack) structure for the 162 * 64-bit Linux signal handler. The stack at that time looks like this: 163 * 164 * ========================================================= 165 * | | lx_sigdeliver_frame_t -- includes LX_SIGRT_MAGIC and | 166 * | | a return context for the eventual sigreturn(2) call | 167 * | ========================================================= 168 * | | Linux signal frame (32-bit) or local data | 169 * V | (64-bit) built by stack_builder() | 170 * ========================================================= 171 * 172 * The process of returning to an interrupted thread of execution from a user 173 * signal handler is entirely different between Illumos and Linux. While 174 * Illumos generally expects to set the context to the interrupted one on a 175 * normal return from a signal handler, in the normal case Linux instead calls 176 * code that calls a specific Linux system call, rt_sigreturn(2) (or it also 177 * can call sigreturn(2) in 32-bit code). Thus when a Linux signal handler 178 * completes execution, instead of returning through what would in libc be a 179 * call to setcontext(2), the rt_sigreturn(2) Linux system call is responsible 180 * for accomplishing much the same thing. It's for this reason that the stack 181 * frame we build has the lx_(rt_)sigreturn_tramp code on the top of the 182 * stack. The code looks like this: 183 * 184 * 32-bit 64-bit 185 * -------------------------------- ----------------------------- 186 * mov LX_SYS_rt_sigreturn, %eax movq LX_SYS_rt_sigreturn, %rax 187 * int $0x80 syscall 188 * 189 * We also use these same functions (lx_rt_sigreturn_tramp or 190 * lx_sigreturn_tramp) to actually return from the signal handler. 191 * 192 * (Note that this trampoline code actually lives in a proper executable segment 193 * and not on the stack, but gdb checks for the exact code sequence of the 194 * trampoline code on the stack to determine whether it is in a signal stack 195 * frame or not. Really.) 196 * 197 * When the 32-bit Linux user signal handler is eventually called, the brand 198 * stack frame looks like this (in the case of a "modern" signal stack; see 199 * the lx_sigstack structure definition): 200 * 201 * ========================================================= 202 * | | lx_sigdeliver_frame_t | 203 * | ========================================================= 204 * | | Trampoline code (marker for gdb, not really executed) | 205 * | ========================================================= 206 * | | Linux struct _fpstate | 207 * | ========================================================= 208 * V | Linux ucontext_t | <--+ 209 * ========================================================= | 210 * | Linux siginfo_t | <--|-----+ 211 * ========================================================= | | 212 * | Pointer to Linux ucontext_t (or NULL) (sigaction arg2)| ---+ | 213 * ========================================================= | 214 * | Pointer to Linux siginfo_t (or NULL) (sigaction arg1)| ---------+ 215 * ========================================================= 216 * | Linux signal number (sigaction arg0)| 217 * ========================================================= 218 * | Pointer to signal return code (trampoline code) | 219 * ========================================================= 220 * 221 * The 64-bit stack-local data looks like this: 222 * 223 * ========================================================= 224 * | | lx_sigdeliver_frame_t | 225 * | ========================================================= 226 * | | Trampoline code (marker for gdb, not really executed) | 227 * | ========================================================= 228 * | | Linux struct _fpstate | 229 * | ========================================================= 230 * V | Linux ucontext_t | %rdx arg2 231 * ========================================================= 232 * | Linux siginfo_t | %rsi arg1 233 * ========================================================= 234 * | Pointer to signal return code (trampoline code) | 235 * ========================================================= 236 * 237 * As usual in 64-bit code, %rdi is arg0 which is the signal number. 238 * 239 * The *sigreturn(2) family of emulated system call handlers locates the 240 * "lx_sigdeliver_frame_t" struct on the Linux stack as part of processing 241 * the system call. This object contains a guard value (LX_SIGRT_MAGIC) to 242 * detect stack smashing or an incorrect stack pointer. It also contains a 243 * "return" context, which we use to get back to the "lx_sigdeliver()" frame 244 * on the native stack that originally dispatched to the Linux signal 245 * handler. The lx_sigdeliver() function is then able to return to the 246 * native libc signal handler in the usual way. This results in a further 247 * setcontext() back to whatever was running when we took the signal. 248 * 249 * There are some edge cases where the "return" context cannot be located 250 * by inspection of the Linux stack; e.g. if the guard value has been 251 * corrupted, or the emulated program has relocated parts of the signal 252 * delivery stack frame. If this case is detected, a fallback mechanism is 253 * used to attempt to find the return context. A chain of "lx_sigbackup_t" 254 * objects is maintained in signal interposer call frames, with the current 255 * head stored in the thread-specific "lx_tsd_t". This mechanism is 256 * similar in principle to the "lwp_oldcontext" member of the "klwp_t" used 257 * by the native signal handling infrastructure. This backup chain is used 258 * by the sigreturn(2) family of emulated system calls in the event that 259 * the Linux stack did not correctly reference a return context. 260 */ 261 262 typedef struct lx_sigdeliver_frame { 263 uintptr_t lxsdf_magic; 264 ucontext_t *lxsdf_retucp; 265 ucontext_t *lxsdf_sigucp; 266 lx_sigbackup_t *lxsdf_sigbackup; 267 } lx_sigdeliver_frame_t; 268 269 struct lx_oldsigstack { 270 void (*retaddr)(); /* address of real lx_sigreturn code */ 271 int sig; /* signal number */ 272 lx_sigcontext_t sigc; /* saved user context */ 273 lx_fpstate_t fpstate; /* saved FP state */ 274 int sig_extra; /* signal mask for signals [32 .. NSIG - 1] */ 275 char trampoline[8]; /* code for trampoline to lx_sigreturn() */ 276 }; 277 278 /* 279 * The lx_sighandlers structure needs to be a global due to the semantics of 280 * clone(). 281 * 282 * If CLONE_SIGHAND is set, the calling process and child share signal 283 * handlers, and if either calls sigaction(2) it should change the behavior 284 * in the other thread. Each thread does, however, have its own signal mask 285 * and set of pending signals. 286 * 287 * If CLONE_SIGHAND is not set, the child process should inherit a copy of 288 * the signal handlers at the time of the clone() but later calls to 289 * sigaction(2) should only affect the individual thread calling it. 290 * 291 * This maps perfectly to a thr_create(3C) thread semantic in the first 292 * case and a fork(2)-type semantic in the second case. By making 293 * lx_sighandlers global, we automatically get the correct behavior. 294 */ 295 static lx_sighandlers_t lx_sighandlers; 296 297 /* 298 * Setting LX_NO_ABORT_HANDLER in the environment will prevent the emulated 299 * Linux program from modifying the signal handling disposition for SIGSEGV or 300 * SIGABRT. Useful for debugging programs which fall over themselves to 301 * prevent useful core files being generated. 302 */ 303 static int lx_no_abort_handler = 0; 304 305 static void lx_sigdeliver(int, siginfo_t *, ucontext_t *, size_t, void (*)(), 306 void (*)(), struct lx_sigaction *); 307 308 /* 309 * Cache result of process.max-file-descriptor to avoid calling getrctl() 310 * for each lx_ppoll(). 311 */ 312 static rlim_t maxfd = 0; 313 314 /* 315 * stol_stack() and ltos_stack() convert between Illumos and Linux stack_t 316 * structures. 317 * 318 * These routines are needed because although the two structures have the same 319 * contents, their contents are declared in a different order, so the content 320 * of the structures cannot be copied with a simple bcopy(). 321 */ 322 static void 323 stol_stack(stack_t *fr, lx_stack_t *to) 324 { 325 to->ss_sp = fr->ss_sp; 326 to->ss_flags = fr->ss_flags; 327 to->ss_size = fr->ss_size; 328 } 329 330 static void 331 ltos_stack(lx_stack_t *fr, stack_t *to) 332 { 333 to->ss_sp = fr->ss_sp; 334 to->ss_flags = fr->ss_flags; 335 to->ss_size = fr->ss_size; 336 } 337 338 static int 339 ltos_sigset(lx_sigset_t *lx_sigsetp, sigset_t *s_sigsetp) 340 { 341 lx_sigset_t l; 342 int lx_sig, sig; 343 344 if (uucopy(lx_sigsetp, &l, sizeof (lx_sigset_t)) != 0) 345 return (-errno); 346 347 (void) sigemptyset(s_sigsetp); 348 349 for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) { 350 if (lx_sigismember(&l, lx_sig) && 351 ((sig = ltos_signo[lx_sig]) > 0)) 352 (void) sigaddset(s_sigsetp, sig); 353 } 354 355 return (0); 356 } 357 358 static int 359 stol_sigset(sigset_t *s_sigsetp, lx_sigset_t *lx_sigsetp) 360 { 361 lx_sigset_t l; 362 int sig, lx_sig; 363 364 bzero(&l, sizeof (lx_sigset_t)); 365 366 for (sig = 1; sig < NSIG; sig++) { 367 if (sigismember(s_sigsetp, sig) && 368 ((lx_sig = stol_signo[sig]) > 0)) 369 lx_sigaddset(&l, lx_sig); 370 } 371 372 return ((uucopy(&l, lx_sigsetp, sizeof (lx_sigset_t)) != 0) 373 ? -errno : 0); 374 } 375 376 #if defined(_ILP32) 377 static int 378 ltos_osigset(lx_osigset_t *lx_osigsetp, sigset_t *s_sigsetp) 379 { 380 lx_osigset_t lo; 381 int lx_sig, sig; 382 383 if (uucopy(lx_osigsetp, &lo, sizeof (lx_osigset_t)) != 0) 384 return (-errno); 385 386 (void) sigemptyset(s_sigsetp); 387 388 for (lx_sig = 1; lx_sig <= OSIGSET_NBITS; lx_sig++) 389 if ((lo & OSIGSET_BITSET(lx_sig)) && 390 ((sig = ltos_signo[lx_sig]) > 0)) 391 (void) sigaddset(s_sigsetp, sig); 392 393 return (0); 394 } 395 396 static int 397 stol_osigset(sigset_t *s_sigsetp, lx_osigset_t *lx_osigsetp) 398 { 399 lx_osigset_t lo = 0; 400 int lx_sig, sig; 401 402 /* 403 * Note that an lx_osigset_t can only represent the signals from 404 * [1 .. OSIGSET_NBITS], so even though a signal may be present in the 405 * Illumos sigset_t, it may not be representable as a bit in the 406 * lx_osigset_t. 407 */ 408 for (sig = 1; sig < NSIG; sig++) 409 if (sigismember(s_sigsetp, sig) && 410 ((lx_sig = stol_signo[sig]) > 0) && 411 (lx_sig <= OSIGSET_NBITS)) 412 lo |= OSIGSET_BITSET(lx_sig); 413 414 return ((uucopy(&lo, lx_osigsetp, sizeof (lx_osigset_t)) != 0) 415 ? -errno : 0); 416 } 417 #endif 418 419 static int 420 ltos_sigcode(int si_code) 421 { 422 switch (si_code) { 423 case LX_SI_USER: 424 return (SI_USER); 425 case LX_SI_TKILL: 426 return (SI_LWP); 427 case LX_SI_QUEUE: 428 return (SI_QUEUE); 429 case LX_SI_TIMER: 430 return (SI_TIMER); 431 case LX_SI_ASYNCIO: 432 return (SI_ASYNCIO); 433 case LX_SI_MESGQ: 434 return (SI_MESGQ); 435 default: 436 return (LX_SI_CODE_NOT_EXIST); 437 } 438 } 439 440 int 441 stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop) 442 { 443 int ret = 0; 444 lx_siginfo_t lx_siginfo; 445 446 bzero(&lx_siginfo, sizeof (*lx_siginfop)); 447 448 if ((lx_siginfo.lsi_signo = stol_signo[siginfop->si_signo]) <= 0) { 449 /* 450 * Depending on the caller we may still need to get a usable 451 * converted siginfo struct. 452 */ 453 lx_siginfo.lsi_signo = LX_SIGKILL; 454 errno = EINVAL; 455 ret = -1; 456 } 457 458 lx_siginfo.lsi_code = lx_stol_sigcode(siginfop->si_code); 459 lx_siginfo.lsi_errno = siginfop->si_errno; 460 461 switch (lx_siginfo.lsi_signo) { 462 /* 463 * Semantics ARE defined for SIGKILL, but since 464 * we can't catch it, we can't translate it. :-( 465 */ 466 case LX_SIGPOLL: 467 lx_siginfo.lsi_band = siginfop->si_band; 468 lx_siginfo.lsi_fd = siginfop->si_fd; 469 break; 470 471 case LX_SIGCHLD: 472 lx_siginfo.lsi_pid = siginfop->si_pid; 473 if (siginfop->si_code <= 0 || siginfop->si_code == 474 CLD_EXITED) { 475 lx_siginfo.lsi_status = siginfop->si_status; 476 } else { 477 lx_siginfo.lsi_status = lx_stol_status( 478 siginfop->si_status, -1); 479 } 480 lx_siginfo.lsi_utime = siginfop->si_utime; 481 lx_siginfo.lsi_stime = siginfop->si_stime; 482 break; 483 484 case LX_SIGILL: 485 case LX_SIGBUS: 486 case LX_SIGFPE: 487 case LX_SIGSEGV: 488 lx_siginfo.lsi_addr = siginfop->si_addr; 489 break; 490 491 default: 492 lx_siginfo.lsi_pid = siginfop->si_pid; 493 lx_siginfo.lsi_uid = 494 LX_UID32_TO_UID16(siginfop->si_uid); 495 lx_siginfo.lsi_value = siginfop->si_value; 496 break; 497 } 498 499 if (uucopy(&lx_siginfo, lx_siginfop, sizeof (lx_siginfo_t)) != 0) 500 return (-errno); 501 return ((ret != 0) ? -errno : 0); 502 } 503 504 static void 505 stol_fpstate(fpregset_t *fpr, lx_fpstate_t *lfpr) 506 { 507 size_t copy_len; 508 509 #if defined(_LP64) 510 /* 511 * The 64-bit Illumos struct fpregset_t and lx_fpstate_t are identical 512 * so just bcopy() those entries (see usr/src/uts/intel/sys/regset.h 513 * for __amd64's struct fpu). 514 */ 515 copy_len = sizeof (fpr->fp_reg_set.fpchip_state); 516 bcopy(fpr, lfpr, copy_len); 517 518 #else /* is _ILP32 */ 519 struct _fpstate *fpsp = (struct _fpstate *)fpr; 520 521 /* 522 * The Illumos struct _fpstate and lx_fpstate_t are identical from the 523 * beginning of the structure to the lx_fpstate_t "magic" field, so 524 * just bcopy() those entries. 525 */ 526 copy_len = (size_t)&(((lx_fpstate_t *)0)->magic); 527 bcopy(fpsp, lfpr, copy_len); 528 529 /* 530 * These fields are all only significant for the first 16 bits. 531 */ 532 lfpr->cw &= 0xffff; /* x87 control word */ 533 lfpr->tag &= 0xffff; /* x87 tag word */ 534 lfpr->cssel &= 0xffff; /* cs selector */ 535 lfpr->datasel &= 0xffff; /* ds selector */ 536 537 /* 538 * Linux wants the x87 status word field to contain the value of the 539 * x87 saved exception status word. 540 */ 541 lfpr->sw = lfpr->status & 0xffff; /* x87 status word */ 542 543 lfpr->mxcsr = fpsp->mxcsr; 544 545 if (fpsp->mxcsr != 0) { 546 /* 547 * Linux uses the "magic" field to denote whether the XMM 548 * registers contain legal data or not. Since we can't get to 549 * %cr4 from userland to check the status of the OSFXSR bit, 550 * check the mxcsr field to see if it's 0, which it should 551 * never be on a system with the OXFXSR bit enabled. 552 */ 553 lfpr->magic = LX_X86_FXSR_MAGIC; 554 bcopy(fpsp->xmm, lfpr->_xmm, sizeof (lfpr->_xmm)); 555 } else { 556 lfpr->magic = LX_X86_FXSR_NONE; 557 } 558 #endif 559 } 560 561 static void 562 ltos_fpstate(lx_fpstate_t *lfpr, fpregset_t *fpr) 563 { 564 size_t copy_len; 565 566 #if defined(_LP64) 567 /* 568 * The 64-bit Illumos struct fpregset_t and lx_fpstate_t are identical 569 * so just bcopy() those entries (see usr/src/uts/intel/sys/regset.h 570 * for __amd64's struct fpu). 571 */ 572 copy_len = sizeof (fpr->fp_reg_set.fpchip_state); 573 bcopy(lfpr, fpr, copy_len); 574 575 #else /* is _ILP32 */ 576 struct _fpstate *fpsp = (struct _fpstate *)fpr; 577 578 /* 579 * The lx_fpstate_t and Illumos struct _fpstate are identical from the 580 * beginning of the structure to the struct _fpstate "mxcsr" field, so 581 * just bcopy() those entries. 582 * 583 * Note that we do NOT have to propogate changes the user may have made 584 * to the "status" word back to the "sw" word, unlike the way we have 585 * to deal with processing the ESP and UESP register values on return 586 * from a signal handler. 587 */ 588 copy_len = (size_t)&(((struct _fpstate *)0)->mxcsr); 589 bcopy(lfpr, fpsp, copy_len); 590 591 /* 592 * These fields are all only significant for the first 16 bits. 593 */ 594 fpsp->cw &= 0xffff; /* x87 control word */ 595 fpsp->sw &= 0xffff; /* x87 status word */ 596 fpsp->tag &= 0xffff; /* x87 tag word */ 597 fpsp->cssel &= 0xffff; /* cs selector */ 598 fpsp->datasel &= 0xffff; /* ds selector */ 599 fpsp->status &= 0xffff; /* saved status */ 600 601 fpsp->mxcsr = lfpr->mxcsr; 602 603 if (lfpr->magic == LX_X86_FXSR_MAGIC) 604 bcopy(lfpr->_xmm, fpsp->xmm, sizeof (fpsp->xmm)); 605 #endif 606 } 607 608 /* 609 * We do not use the system sigaltstack() infrastructure as that would conflict 610 * with our handling of both system call emulation and native signals on the 611 * native stack. Instead, we track the Linux stack structure in our 612 * thread-specific data. This function is modeled on the behaviour of the 613 * native sigaltstack system call handler. 614 */ 615 long 616 lx_sigaltstack(uintptr_t ssp, uintptr_t oss) 617 { 618 lx_tsd_t *lxtsd = lx_get_tsd(); 619 lx_stack_t ss; 620 621 if (ssp != NULL) { 622 if (lxtsd->lxtsd_sigaltstack.ss_flags & LX_SS_ONSTACK) { 623 /* 624 * If we are currently using the installed alternate 625 * stack for signal handling, the user may not modify 626 * the stack for this thread. 627 */ 628 return (-EPERM); 629 } 630 631 if (uucopy((void *)ssp, &ss, sizeof (ss)) != 0) { 632 return (-EFAULT); 633 } 634 635 if (ss.ss_flags & ~LX_SS_DISABLE) { 636 /* 637 * The user may not specify a value for flags other 638 * than 0 or SS_DISABLE. 639 */ 640 return (-EINVAL); 641 } 642 643 if (!(ss.ss_flags & LX_SS_DISABLE) && ss.ss_size < 644 LX_MINSIGSTKSZ) { 645 return (-ENOMEM); 646 } 647 } 648 649 if (oss != NULL) { 650 /* 651 * User provided old and new stack_t pointers may point to 652 * the same location. Copy out before we modify. 653 */ 654 if (uucopy(&lxtsd->lxtsd_sigaltstack, (void *)oss, 655 sizeof (lxtsd->lxtsd_sigaltstack)) != 0) { 656 return (-EFAULT); 657 } 658 } 659 660 if (ssp != NULL) { 661 lxtsd->lxtsd_sigaltstack = ss; 662 } 663 664 return (0); 665 } 666 667 #if defined(_ILP32) 668 /* 669 * The following routines are needed because sigset_ts and siginfo_ts are 670 * different in format between Linux and Illumos. 671 * 672 * Note that there are two different lx_sigset structures, lx_sigset_ts and 673 * lx_osigset_ts: 674 * 675 * + An lx_sigset_t is the equivalent of a Illumos sigset_t and supports 676 * more than 32 signals. 677 * 678 * + An lx_osigset_t is simply a uint32_t, so it by definition only supports 679 * 32 signals. 680 * 681 * When there are two versions of a routine, one prefixed with lx_rt_ and 682 * one prefixed with lx_ alone, in GENERAL the lx_rt_ routines deal with 683 * lx_sigset_ts while the lx_ routines deal with lx_osigset_ts. Unfortunately, 684 * this is not always the case (e.g. lx_sigreturn() vs. lx_rt_sigreturn()) 685 */ 686 long 687 lx_sigpending(uintptr_t sigpend) 688 { 689 sigset_t sigpendset; 690 691 if (sigpending(&sigpendset) != 0) 692 return (-errno); 693 694 return (stol_osigset(&sigpendset, (lx_osigset_t *)sigpend)); 695 } 696 #endif 697 698 long 699 lx_rt_sigpending(uintptr_t sigpend, uintptr_t setsize) 700 { 701 sigset_t sigpendset; 702 703 if ((size_t)setsize != sizeof (lx_sigset_t)) 704 return (-EINVAL); 705 706 if (sigpending(&sigpendset) != 0) 707 return (-errno); 708 709 return (stol_sigset(&sigpendset, (lx_sigset_t *)sigpend)); 710 } 711 712 /* 713 * Create a common routine to encapsulate all of the sigprocmask code, 714 * as the only difference between lx_sigprocmask() and lx_rt_sigprocmask() 715 * is the usage of lx_osigset_ts vs. lx_sigset_ts, as toggled in the code by 716 * the setting of the "sigset_type" flag. 717 */ 718 static int 719 lx_sigprocmask_common(uintptr_t how, uintptr_t l_setp, uintptr_t l_osetp, 720 uintptr_t sigset_type) 721 { 722 int err = 0; 723 sigset_t set, oset; 724 sigset_t *s_setp = NULL; 725 sigset_t *s_osetp; 726 727 if (l_setp) { 728 switch (how) { 729 case LX_SIG_BLOCK: 730 how = SIG_BLOCK; 731 break; 732 733 case LX_SIG_UNBLOCK: 734 how = SIG_UNBLOCK; 735 break; 736 737 case LX_SIG_SETMASK: 738 how = SIG_SETMASK; 739 break; 740 741 default: 742 return (-EINVAL); 743 } 744 745 s_setp = &set; 746 747 /* Only 32-bit code passes other than USE_SIGSET */ 748 if (sigset_type == USE_SIGSET) 749 err = ltos_sigset((lx_sigset_t *)l_setp, s_setp); 750 #if defined(_ILP32) 751 else 752 err = ltos_osigset((lx_osigset_t *)l_setp, s_setp); 753 #endif 754 755 if (err != 0) 756 return (err); 757 758 } 759 760 s_osetp = (l_osetp ? &oset : NULL); 761 762 /* 763 * In a multithreaded environment, a call to sigprocmask(2) should 764 * only affect the current thread's signal mask so we don't need to 765 * explicitly call thr_sigsetmask(3C) here. 766 */ 767 if (sigprocmask(how, s_setp, s_osetp) != 0) 768 return (-errno); 769 770 if (l_osetp) { 771 if (sigset_type == USE_SIGSET) 772 err = stol_sigset(s_osetp, (lx_sigset_t *)l_osetp); 773 #if defined(_ILP32) 774 else 775 err = stol_osigset(s_osetp, (lx_osigset_t *)l_osetp); 776 #endif 777 778 if (err != 0) { 779 /* 780 * Encountered a fault while writing to the old signal 781 * mask buffer, so unwind the signal mask change made 782 * above. 783 */ 784 (void) sigprocmask(how, s_osetp, (sigset_t *)NULL); 785 return (err); 786 } 787 } 788 789 return (0); 790 } 791 792 #if defined(_ILP32) 793 long 794 lx_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp) 795 { 796 return (lx_sigprocmask_common(how, setp, osetp, USE_OSIGSET)); 797 } 798 #endif 799 800 long 801 lx_rt_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp, 802 uintptr_t setsize) 803 { 804 if ((size_t)setsize != sizeof (lx_sigset_t)) 805 return (-EINVAL); 806 807 return (lx_sigprocmask_common(how, setp, osetp, USE_SIGSET)); 808 } 809 810 #if defined(_ILP32) 811 long 812 lx_sigsuspend(uintptr_t set) 813 { 814 sigset_t s_set; 815 816 if (ltos_osigset((lx_osigset_t *)set, &s_set) != 0) 817 return (-errno); 818 819 return ((sigsuspend(&s_set) == -1) ? -errno : 0); 820 } 821 #endif 822 823 long 824 lx_rt_sigsuspend(uintptr_t set, uintptr_t setsize) 825 { 826 sigset_t s_set; 827 828 if ((size_t)setsize != sizeof (lx_sigset_t)) 829 return (-EINVAL); 830 831 if (ltos_sigset((lx_sigset_t *)set, &s_set) != 0) 832 return (-errno); 833 834 return ((sigsuspend(&s_set) == -1) ? -errno : 0); 835 } 836 837 long 838 lx_rt_sigwaitinfo(uintptr_t set, uintptr_t sinfo, uintptr_t setsize) 839 { 840 sigset_t s_set; 841 siginfo_t s_sinfo, *s_sinfop; 842 int rc; 843 844 lx_sigset_t *setp = (lx_sigset_t *)set; 845 lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo; 846 847 if ((size_t)setsize != sizeof (lx_sigset_t)) 848 return (-EINVAL); 849 850 if (ltos_sigset(setp, &s_set) != 0) 851 return (-errno); 852 853 s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo; 854 855 if ((rc = sigwaitinfo(&s_set, s_sinfop)) == -1) 856 return (-errno); 857 858 if (s_sinfop == NULL) 859 return (stol_signo[rc]); 860 861 return ((stol_siginfo(s_sinfop, sinfop) != 0) 862 ? -errno : stol_signo[rc]); 863 } 864 865 long 866 lx_rt_sigtimedwait(uintptr_t set, uintptr_t sinfo, uintptr_t toutp, 867 uintptr_t setsize) 868 { 869 sigset_t s_set; 870 siginfo_t s_sinfo, *s_sinfop; 871 int rc; 872 873 lx_sigset_t *setp = (lx_sigset_t *)set; 874 lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo; 875 876 if ((size_t)setsize != sizeof (lx_sigset_t)) 877 return (-EINVAL); 878 879 if (ltos_sigset(setp, &s_set) != 0) 880 return (-errno); 881 882 s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo; 883 884 /* 885 * "If timeout is the NULL pointer, the behavior is unspecified." 886 * Match what LTP expects. 887 */ 888 if ((rc = sigtimedwait(&s_set, s_sinfop, 889 (struct timespec *)toutp)) == -1) 890 return (toutp == NULL ? -EINTR : -errno); 891 892 if (s_sinfop == NULL) 893 return (stol_signo[rc]); 894 895 return ((stol_siginfo(s_sinfop, sinfop) != 0) 896 ? -errno : stol_signo[rc]); 897 } 898 899 static void 900 lx_sigreturn_find_native_context(const char *caller, ucontext_t **sigucp, 901 ucontext_t **retucp, uintptr_t sp) 902 { 903 lx_tsd_t *lxtsd = lx_get_tsd(); 904 lx_sigdeliver_frame_t *lxsdfp = (lx_sigdeliver_frame_t *)sp; 905 lx_sigdeliver_frame_t lxsdf; 906 boolean_t copy_ok; 907 908 lx_debug("%s: reading lx_sigdeliver_frame_t @ %p\n", caller, lxsdfp); 909 if (uucopy(lxsdfp, &lxsdf, sizeof (lxsdf)) != 0) { 910 lx_debug("%s: failed to read lx_sigdeliver_frame_t @ %p\n", 911 lxsdfp); 912 913 copy_ok = B_FALSE; 914 } else { 915 lx_debug("%s: lxsdf: magic %p retucp %p sigucp %p\n", caller, 916 lxsdf.lxsdf_magic, lxsdf.lxsdf_retucp, lxsdf.lxsdf_sigucp); 917 918 copy_ok = B_TRUE; 919 } 920 921 /* 922 * lx_sigdeliver() pushes a lx_sigdeliver_frame_t onto the stack 923 * before it creates the struct lx_oldsigstack. 924 */ 925 if (copy_ok && lxsdf.lxsdf_magic == LX_SIGRT_MAGIC) { 926 LX_SIGNAL_DELIVERY_FRAME_FOUND(lxsdfp); 927 928 /* 929 * The guard value is intact; use the context pointers stored 930 * in the signal delivery frame: 931 */ 932 *sigucp = lxsdf.lxsdf_sigucp; 933 *retucp = lxsdf.lxsdf_retucp; 934 935 /* 936 * Ensure that the backup signal delivery chain is in sync with 937 * the frame we are returning via: 938 */ 939 lxtsd->lxtsd_sigbackup = lxsdf.lxsdf_sigbackup; 940 } else { 941 /* 942 * The guard value was not intact. Either the program smashed 943 * the stack unintentionally, or worse: intentionally moved 944 * some parts of the signal delivery frame we constructed to 945 * another location before calling rt_sigreturn(2). 946 */ 947 LX_SIGNAL_DELIVERY_FRAME_CORRUPT(lxsdfp); 948 949 if (lxtsd->lxtsd_sigbackup == NULL) { 950 /* 951 * There was no backup context to use, so we must 952 * kill the process. 953 */ 954 if (copy_ok) { 955 lx_err_fatal("%s: sp 0x%p, expected 0x%x, " 956 "found 0x%x!", caller, sp, LX_SIGRT_MAGIC, 957 lxsdf.lxsdf_magic); 958 } else { 959 lx_err_fatal("%s: sp 0x%p, could not read " 960 "magic", caller, sp); 961 } 962 } 963 964 /* 965 * Attempt to recover by using the backup signal delivery 966 * chain: 967 */ 968 lx_debug("%s: SIGRT_MAGIC not found @ sp %p; using backup " 969 "@ %p\n", caller, (void *)sp, lxtsd->lxtsd_sigbackup); 970 *sigucp = lxtsd->lxtsd_sigbackup->lxsb_sigucp; 971 *retucp = lxtsd->lxtsd_sigbackup->lxsb_retucp; 972 } 973 } 974 975 #if defined(_ILP32) 976 /* 977 * Intercept the Linux sigreturn() syscall to turn it into the return through 978 * the libc call stack that Illumos expects. 979 * 980 * When control returns to libc's call_user_handler() routine, a setcontext(2) 981 * will be done that returns thread execution to the point originally 982 * interrupted by receipt of the signal. 983 * 984 * This is only used by 32-bit code. 985 */ 986 long 987 lx_sigreturn(void) 988 { 989 struct lx_oldsigstack *lx_ossp; 990 lx_sigset_t lx_sigset; 991 ucontext_t *ucp; 992 ucontext_t *sigucp; 993 ucontext_t *retucp; 994 uintptr_t sp; 995 996 ucp = lx_syscall_regs(); 997 998 /* 999 * NOTE: The sp saved in the context is eight bytes off of where we 1000 * need it to be (either due to trampoline or the copying of 1001 * sp = uesp, not clear which). 1002 */ 1003 sp = LX_REG(ucp, REG_SP) - 8; 1004 1005 /* 1006 * At this point, the stack pointer should point to the struct 1007 * lx_oldsigstack that lx_build_old_signal_frame() constructed and 1008 * placed on the stack. We need to reference it a bit later, so 1009 * save a pointer to it before incrementing our copy of the sp. 1010 */ 1011 lx_ossp = (struct lx_oldsigstack *)sp; 1012 sp += SA(sizeof (struct lx_oldsigstack)); 1013 1014 lx_sigreturn_find_native_context(__func__, &sigucp, &retucp, sp); 1015 1016 /* 1017 * We need to copy machine registers the Linux signal handler may have 1018 * modified back to the Illumos ucontext_t. 1019 * 1020 * General registers copy across as-is, except Linux expects that 1021 * changes made to uc_mcontext.gregs[ESP] will be reflected when the 1022 * interrupted thread resumes execution after the signal handler. To 1023 * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to 1024 * match uc_mcontext.gregs[ESP] as Illumos will restore the UESP 1025 * value to ESP. 1026 */ 1027 lx_ossp->sigc.sc_esp_at_signal = lx_ossp->sigc.sc_esp; 1028 bcopy(&lx_ossp->sigc, &sigucp->uc_mcontext, sizeof (gregset_t)); 1029 1030 LX_SIGRETURN(NULL, sigucp, sp); 1031 1032 /* copy back FP regs if present */ 1033 if (lx_ossp->sigc.sc_fpstate != NULL) 1034 ltos_fpstate(&lx_ossp->fpstate, &sigucp->uc_mcontext.fpregs); 1035 1036 /* convert Linux signal mask back to its Illumos equivalent */ 1037 bzero(&lx_sigset, sizeof (lx_sigset_t)); 1038 lx_sigset.__bits[0] = lx_ossp->sigc.sc_mask; 1039 lx_sigset.__bits[1] = lx_ossp->sig_extra; 1040 (void) ltos_sigset(&lx_sigset, &sigucp->uc_sigmask); 1041 1042 /* 1043 * For signal mask handling to be done properly, this call needs to 1044 * return to the libc routine that originally called the signal handler 1045 * rather than directly set the context back to the place the signal 1046 * interrupted execution as the original Linux code would do. 1047 */ 1048 lx_debug("lx_sigreturn: calling setcontext; retucp %p flags %lx " 1049 "link %p\n", retucp, retucp->uc_flags, retucp->uc_link); 1050 setcontext(retucp); 1051 assert(0); 1052 1053 /*NOTREACHED*/ 1054 return (0); 1055 } 1056 #endif 1057 1058 /* 1059 * This signal return syscall is used by both 32-bit and 64-bit code. 1060 */ 1061 long 1062 lx_rt_sigreturn(void) 1063 { 1064 struct lx_sigstack *lx_ssp; 1065 lx_ucontext_t *lx_ucp; 1066 ucontext_t *ucp; 1067 ucontext_t *sigucp; 1068 ucontext_t *retucp; 1069 uintptr_t sp; 1070 1071 /* Get the registers at the emulated Linux rt_sigreturn syscall */ 1072 ucp = lx_syscall_regs(); 1073 1074 #if defined(_ILP32) 1075 lx_debug("lx_rt_sigreturn: ESP %p UESP %p\n", LX_REG(ucp, ESP), 1076 LX_REG(ucp, UESP)); 1077 /* 1078 * For 32-bit 1079 * 1080 * NOTE: Because of the silly compatibility measures done in the 1081 * signal trampoline code to make sure the stack holds the 1082 * _exact same_ instruction sequence Linux does, we have to 1083 * manually "pop" some extra instructions off the stack here 1084 * before passing the stack address to the syscall because the 1085 * trampoline code isn't allowed to do it due to the gdb 1086 * compatability issues. 1087 * 1088 * No, I'm not kidding. 1089 * 1090 * The sp saved in the context is eight bytes off of where we 1091 * need it to be (either due to trampoline or the copying of 1092 * sp = uesp, not clear which but looks like the uesp case), so 1093 * the need to pop the extra four byte instruction means we need 1094 * to subtract a net four bytes from the sp before "popping" the 1095 * struct lx_sigstack off the stack. 1096 * 1097 * This will yield the value the stack pointer had before 1098 * lx_sigdeliver() created the stack frame for the Linux signal 1099 * handler. 1100 */ 1101 sp = (uintptr_t)LX_REG(ucp, REG_SP) - 4; 1102 #else 1103 /* 1104 * We need to make an adjustment for 64-bit code as well. Since 64-bit 1105 * does not use the trampoline, it's probably for the same reason as 1106 * alluded to above. 1107 */ 1108 sp = (uintptr_t)LX_REG(ucp, REG_SP) - 8; 1109 #endif 1110 1111 /* 1112 * At this point, the stack pointer should point to the struct 1113 * lx_sigstack that lx_build_signal_frame() constructed and 1114 * placed on the stack. We need to reference it a bit later, so 1115 * save a pointer to it before incrementing our copy of the sp. 1116 */ 1117 lx_ssp = (struct lx_sigstack *)sp; 1118 sp += SA(sizeof (struct lx_sigstack)); 1119 1120 #if defined(_LP64) 1121 /* 1122 * The 64-bit lx_sigdeliver() inserts 8 bytes of padding between 1123 * the lx_sigstack_t and the delivery frame to maintain ABI stack 1124 * alignment. 1125 */ 1126 sp += 8; 1127 #endif 1128 1129 lx_sigreturn_find_native_context(__func__, &sigucp, &retucp, sp); 1130 1131 /* 1132 * We need to copy machine registers the Linux signal handler may have 1133 * modified back to the Illumos version. 1134 */ 1135 #if defined(_LP64) 1136 lx_ucp = &lx_ssp->uc; 1137 1138 /* 1139 * General register layout is completely different. 1140 */ 1141 LX_REG(sigucp, REG_R15) = lx_ucp->uc_sigcontext.sc_r15; 1142 LX_REG(sigucp, REG_R14) = lx_ucp->uc_sigcontext.sc_r14; 1143 LX_REG(sigucp, REG_R13) = lx_ucp->uc_sigcontext.sc_r13; 1144 LX_REG(sigucp, REG_R12) = lx_ucp->uc_sigcontext.sc_r12; 1145 LX_REG(sigucp, REG_R11) = lx_ucp->uc_sigcontext.sc_r11; 1146 LX_REG(sigucp, REG_R10) = lx_ucp->uc_sigcontext.sc_r10; 1147 LX_REG(sigucp, REG_R9) = lx_ucp->uc_sigcontext.sc_r9; 1148 LX_REG(sigucp, REG_R8) = lx_ucp->uc_sigcontext.sc_r8; 1149 LX_REG(sigucp, REG_RDI) = lx_ucp->uc_sigcontext.sc_rdi; 1150 LX_REG(sigucp, REG_RSI) = lx_ucp->uc_sigcontext.sc_rsi; 1151 LX_REG(sigucp, REG_RBP) = lx_ucp->uc_sigcontext.sc_rbp; 1152 LX_REG(sigucp, REG_RBX) = lx_ucp->uc_sigcontext.sc_rbx; 1153 LX_REG(sigucp, REG_RDX) = lx_ucp->uc_sigcontext.sc_rdx; 1154 LX_REG(sigucp, REG_RCX) = lx_ucp->uc_sigcontext.sc_rcx; 1155 LX_REG(sigucp, REG_RAX) = lx_ucp->uc_sigcontext.sc_rax; 1156 LX_REG(sigucp, REG_TRAPNO) = lx_ucp->uc_sigcontext.sc_trapno; 1157 LX_REG(sigucp, REG_ERR) = lx_ucp->uc_sigcontext.sc_err; 1158 LX_REG(sigucp, REG_RIP) = lx_ucp->uc_sigcontext.sc_rip; 1159 LX_REG(sigucp, REG_CS) = lx_ucp->uc_sigcontext.sc_cs; 1160 LX_REG(sigucp, REG_RFL) = lx_ucp->uc_sigcontext.sc_eflags; 1161 LX_REG(sigucp, REG_RSP) = lx_ucp->uc_sigcontext.sc_rsp; 1162 LX_REG(sigucp, REG_SS) = lx_ucp->uc_sigcontext.sc_pad0; 1163 LX_REG(sigucp, REG_FS) = lx_ucp->uc_sigcontext.sc_fs; 1164 LX_REG(sigucp, REG_GS) = lx_ucp->uc_sigcontext.sc_gs; 1165 1166 #else /* is _ILP32 */ 1167 lx_ucp = &lx_ssp->uc; 1168 1169 /* 1170 * Illumos and Linux both follow the SysV i386 ABI layout for the 1171 * mcontext. 1172 * 1173 * General registers copy across as-is, except Linux expects that 1174 * changes made to uc_mcontext.gregs[ESP] will be reflected when the 1175 * interrupted thread resumes execution after the signal handler. To 1176 * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to 1177 * match uc_mcontext.gregs[ESP] as Illumos will restore the UESP value 1178 * to ESP. 1179 */ 1180 lx_ucp->uc_sigcontext.sc_esp_at_signal = lx_ucp->uc_sigcontext.sc_esp; 1181 1182 bcopy(&lx_ucp->uc_sigcontext, &sigucp->uc_mcontext.gregs, 1183 sizeof (gregset_t)); 1184 #endif 1185 1186 LX_SIGRETURN(lx_ucp, sigucp, sp); 1187 1188 if (lx_ucp->uc_sigcontext.sc_fpstate != NULL) { 1189 ltos_fpstate(lx_ucp->uc_sigcontext.sc_fpstate, 1190 &sigucp->uc_mcontext.fpregs); 1191 } 1192 1193 /* 1194 * Convert the Linux signal mask and stack back to their 1195 * Illumos equivalents. 1196 */ 1197 (void) ltos_sigset(&lx_ucp->uc_sigmask, &sigucp->uc_sigmask); 1198 ltos_stack(&lx_ucp->uc_stack, &sigucp->uc_stack); 1199 1200 /* 1201 * For signal mask handling to be done properly, this call needs to 1202 * return to the libc routine that originally called the signal handler 1203 * rather than directly set the context back to the place the signal 1204 * interrupted execution as the original Linux code would do. 1205 */ 1206 lx_debug("lx_rt_sigreturn: calling setcontext; retucp %p\n", retucp); 1207 setcontext(retucp); 1208 assert(0); 1209 1210 /*NOTREACHED*/ 1211 return (0); 1212 } 1213 1214 1215 #if defined(_ILP32) 1216 /* 1217 * Build signal frame for processing for "old" (legacy) Linux signals 1218 * This stack-builder function is only used by 32-bit code. 1219 */ 1220 static void 1221 lx_build_old_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp, 1222 uintptr_t *hargs) 1223 { 1224 extern void lx_sigreturn_tramp(); 1225 1226 lx_sigset_t lx_sigset; 1227 ucontext_t *ucp = (ucontext_t *)p; 1228 struct lx_sigaction *lxsap; 1229 struct lx_oldsigstack *lx_ossp = sp; 1230 1231 lx_debug("building old signal frame for lx sig %d at 0x%p", lx_sig, sp); 1232 1233 lx_ossp->sig = lx_sig; 1234 lxsap = &lx_sighandlers.lx_sa[lx_sig]; 1235 lx_debug("lxsap @ 0x%p", lxsap); 1236 1237 if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) && 1238 lxsap->lxsa_restorer) { 1239 lx_ossp->retaddr = lxsap->lxsa_restorer; 1240 lx_debug("lxsa_restorer exists @ 0x%p", lx_ossp->retaddr); 1241 } else { 1242 lx_ossp->retaddr = lx_sigreturn_tramp; 1243 lx_debug("lx_ossp->retaddr set to 0x%p", lx_sigreturn_tramp); 1244 } 1245 1246 lx_debug("osf retaddr = 0x%p", lx_ossp->retaddr); 1247 1248 /* convert Illumos signal mask and stack to their Linux equivalents */ 1249 (void) stol_sigset(&ucp->uc_sigmask, &lx_sigset); 1250 lx_ossp->sigc.sc_mask = lx_sigset.__bits[0]; 1251 lx_ossp->sig_extra = lx_sigset.__bits[1]; 1252 1253 /* 1254 * General registers copy across as-is, except Linux expects that 1255 * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a 1256 * signal. 1257 */ 1258 bcopy(&ucp->uc_mcontext, &lx_ossp->sigc, sizeof (gregset_t)); 1259 lx_ossp->sigc.sc_esp = lx_ossp->sigc.sc_esp_at_signal; 1260 1261 /* 1262 * cr2 contains the faulting address, and Linux only sets cr2 for a 1263 * a segmentation fault. 1264 */ 1265 lx_ossp->sigc.sc_cr2 = (((lx_sig == LX_SIGSEGV) && (sip)) ? 1266 (uintptr_t)sip->si_addr : 0); 1267 1268 /* convert FP regs if present */ 1269 if (ucp->uc_flags & UC_FPU) { 1270 stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ossp->fpstate); 1271 lx_ossp->sigc.sc_fpstate = &lx_ossp->fpstate; 1272 } else { 1273 lx_ossp->sigc.sc_fpstate = NULL; 1274 } 1275 1276 /* 1277 * Believe it or not, gdb wants to SEE the trampoline code on the 1278 * bottom of the stack to determine whether the stack frame belongs to 1279 * a signal handler, even though this code is no longer actually 1280 * called. 1281 * 1282 * You can't make this stuff up. 1283 */ 1284 bcopy((void *)lx_sigreturn_tramp, lx_ossp->trampoline, 1285 sizeof (lx_ossp->trampoline)); 1286 } 1287 #endif 1288 1289 /* 1290 * Build stack frame (32-bit) or stack local data (64-bit) for processing for 1291 * modern Linux signals. This is the only stack-builder function for 64-bit 1292 * code (32-bit code also calls this when using "modern" signals). 1293 */ 1294 static void 1295 lx_build_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp, 1296 uintptr_t *hargs) 1297 { 1298 extern void lx_rt_sigreturn_tramp(); 1299 1300 lx_ucontext_t *lx_ucp; 1301 ucontext_t *ucp = (ucontext_t *)p; 1302 struct lx_sigstack *lx_ssp = sp; 1303 struct lx_sigaction *lxsap; 1304 1305 lx_debug("building signal frame for lx sig %d at 0x%p", lx_sig, sp); 1306 1307 lx_ucp = &lx_ssp->uc; 1308 #if defined(_ILP32) 1309 /* 1310 * Arguments are passed to the 32-bit signal handler on the stack. 1311 */ 1312 lx_ssp->ucp = lx_ucp; 1313 lx_ssp->sip = sip != NULL ? &lx_ssp->si : NULL; 1314 lx_ssp->sig = lx_sig; 1315 #else 1316 /* 1317 * Arguments to the 64-bit signal handler are passed in registers: 1318 * hdlr(int sig, siginfo_t *sip, void *ucp); 1319 */ 1320 hargs[0] = lx_sig; 1321 hargs[1] = sip != NULL ? (uintptr_t)&lx_ssp->si : NULL; 1322 hargs[2] = (uintptr_t)lx_ucp; 1323 #endif 1324 1325 lxsap = &lx_sighandlers.lx_sa[lx_sig]; 1326 lx_debug("lxsap @ 0x%p", lxsap); 1327 1328 if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) && 1329 lxsap->lxsa_restorer) { 1330 /* 1331 * lxsa_restorer is explicitly set by sigaction in 32-bit code 1332 * but it can also be implicitly set for both 32 and 64 bit 1333 * code via lx_sigaction_common when we bcopy the user-supplied 1334 * lx_sigaction element into the proper slot in the sighandler 1335 * array. 1336 */ 1337 lx_ssp->retaddr = lxsap->lxsa_restorer; 1338 lx_debug("lxsa_restorer exists @ 0x%p", lx_ssp->retaddr); 1339 } else { 1340 lx_ssp->retaddr = lx_rt_sigreturn_tramp; 1341 lx_debug("lx_ssp->retaddr set to 0x%p", lx_rt_sigreturn_tramp); 1342 } 1343 1344 /* Linux has these fields but always clears them to 0 */ 1345 lx_ucp->uc_flags = 0; 1346 lx_ucp->uc_link = NULL; 1347 1348 /* convert Illumos signal mask and stack to their Linux equivalents */ 1349 (void) stol_sigset(&ucp->uc_sigmask, &lx_ucp->uc_sigmask); 1350 stol_stack(&ucp->uc_stack, &lx_ucp->uc_stack); 1351 1352 #if defined(_LP64) 1353 /* 1354 * General register layout is completely different. 1355 */ 1356 lx_ucp->uc_sigcontext.sc_r8 = LX_REG(ucp, REG_R8); 1357 lx_ucp->uc_sigcontext.sc_r9 = LX_REG(ucp, REG_R9); 1358 lx_ucp->uc_sigcontext.sc_r10 = LX_REG(ucp, REG_R10); 1359 lx_ucp->uc_sigcontext.sc_r11 = LX_REG(ucp, REG_R11); 1360 lx_ucp->uc_sigcontext.sc_r12 = LX_REG(ucp, REG_R12); 1361 lx_ucp->uc_sigcontext.sc_r13 = LX_REG(ucp, REG_R13); 1362 lx_ucp->uc_sigcontext.sc_r14 = LX_REG(ucp, REG_R14); 1363 lx_ucp->uc_sigcontext.sc_r15 = LX_REG(ucp, REG_R15); 1364 lx_ucp->uc_sigcontext.sc_rdi = LX_REG(ucp, REG_RDI); 1365 lx_ucp->uc_sigcontext.sc_rsi = LX_REG(ucp, REG_RSI); 1366 lx_ucp->uc_sigcontext.sc_rbp = LX_REG(ucp, REG_RBP); 1367 lx_ucp->uc_sigcontext.sc_rbx = LX_REG(ucp, REG_RBX); 1368 lx_ucp->uc_sigcontext.sc_rdx = LX_REG(ucp, REG_RDX); 1369 lx_ucp->uc_sigcontext.sc_rax = LX_REG(ucp, REG_RAX); 1370 lx_ucp->uc_sigcontext.sc_rcx = LX_REG(ucp, REG_RCX); 1371 lx_ucp->uc_sigcontext.sc_rsp = LX_REG(ucp, REG_RSP); 1372 lx_ucp->uc_sigcontext.sc_rip = LX_REG(ucp, REG_RIP); 1373 lx_ucp->uc_sigcontext.sc_eflags = LX_REG(ucp, REG_RFL); 1374 lx_ucp->uc_sigcontext.sc_cs = LX_REG(ucp, REG_CS); 1375 lx_ucp->uc_sigcontext.sc_gs = LX_REG(ucp, REG_GS); 1376 lx_ucp->uc_sigcontext.sc_fs = LX_REG(ucp, REG_FS); 1377 lx_ucp->uc_sigcontext.sc_pad0 = LX_REG(ucp, REG_SS); 1378 lx_ucp->uc_sigcontext.sc_err = LX_REG(ucp, REG_ERR); 1379 lx_ucp->uc_sigcontext.sc_trapno = LX_REG(ucp, REG_TRAPNO); 1380 1381 #else /* is _ILP32 */ 1382 /* 1383 * General registers copy across as-is, except Linux expects that 1384 * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a 1385 * signal. 1386 */ 1387 bcopy(&ucp->uc_mcontext, &lx_ucp->uc_sigcontext, sizeof (gregset_t)); 1388 lx_ucp->uc_sigcontext.sc_esp = lx_ucp->uc_sigcontext.sc_esp_at_signal; 1389 #endif 1390 1391 /* 1392 * cr2 contains the faulting address, which Linux only sets for a 1393 * a segmentation fault. 1394 */ 1395 lx_ucp->uc_sigcontext.sc_cr2 = ((lx_sig == LX_SIGSEGV) && (sip)) ? 1396 (uintptr_t)sip->si_addr : 0; 1397 1398 /* 1399 * This should only return an error if the signum is invalid but that 1400 * also gets converted into a LX_SIGKILL by this function. 1401 */ 1402 if (sip != NULL) 1403 (void) stol_siginfo(sip, &lx_ssp->si); 1404 else 1405 bzero(&lx_ssp->si, sizeof (lx_siginfo_t)); 1406 1407 /* convert FP regs if present */ 1408 if (ucp->uc_flags & UC_FPU) { 1409 /* 1410 * Copy FP regs to the appropriate place in the the lx_sigstack 1411 * structure. 1412 */ 1413 stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ssp->fpstate); 1414 lx_ucp->uc_sigcontext.sc_fpstate = &lx_ssp->fpstate; 1415 } else { 1416 lx_ucp->uc_sigcontext.sc_fpstate = NULL; 1417 } 1418 1419 #if defined(_ILP32) 1420 /* 1421 * Believe it or not, gdb wants to SEE the sigreturn code on the 1422 * top of the stack to determine whether the stack frame belongs to 1423 * a signal handler, even though this code is not actually called. 1424 * 1425 * You can't make this stuff up. 1426 */ 1427 bcopy((void *)lx_rt_sigreturn_tramp, lx_ssp->trampoline, 1428 sizeof (lx_ssp->trampoline)); 1429 #endif 1430 } 1431 1432 /* 1433 * This is the interposition handler for Linux signals. 1434 */ 1435 static void 1436 lx_call_user_handler(int sig, siginfo_t *sip, void *p) 1437 { 1438 void (*user_handler)(); 1439 void (*stk_builder)(); 1440 struct lx_sigaction *lxsap; 1441 ucontext_t *ucp = (ucontext_t *)p; 1442 size_t stksize; 1443 int lx_sig; 1444 1445 /* 1446 * If Illumos signal has no Linux equivalent, effectively ignore it. 1447 */ 1448 if ((lx_sig = stol_signo[sig]) == -1) { 1449 lx_unsupported("caught Illumos signal %d, no Linux equivalent", 1450 sig); 1451 return; 1452 } 1453 1454 lx_debug("interpose caught Illumos signal %d, translating to Linux " 1455 "signal %d", sig, lx_sig); 1456 1457 lxsap = &lx_sighandlers.lx_sa[lx_sig]; 1458 lx_debug("lxsap @ 0x%p", lxsap); 1459 1460 if ((sig == SIGPWR) && (lxsap->lxsa_handler == SIG_DFL)) { 1461 /* 1462 * Linux SIG_DFL for SIGPWR is to terminate. The lx wait 1463 * emulation will translate SIGPWR to LX_SIGPWR. 1464 */ 1465 (void) syscall(SYS_brand, B_EXIT_AS_SIG, SIGPWR); 1466 /* This should never return */ 1467 assert(0); 1468 } 1469 1470 if (lxsap->lxsa_handler == SIG_DFL || lxsap->lxsa_handler == SIG_IGN) 1471 lx_err_fatal("lxsa_handler set to %s? How?!?!?", 1472 (lxsap->lxsa_handler == SIG_DFL) ? "SIG_DFL" : "SIG_IGN"); 1473 1474 #if defined(_LP64) 1475 stksize = sizeof (struct lx_sigstack); 1476 stk_builder = lx_build_signal_frame; 1477 #else 1478 if (lxsap->lxsa_flags & LX_SA_SIGINFO) { 1479 stksize = sizeof (struct lx_sigstack); 1480 stk_builder = lx_build_signal_frame; 1481 } else { 1482 stksize = sizeof (struct lx_oldsigstack); 1483 stk_builder = lx_build_old_signal_frame; 1484 } 1485 #endif 1486 1487 user_handler = lxsap->lxsa_handler; 1488 1489 lx_debug("delivering %d (lx %d) to handler at 0x%p", sig, lx_sig, 1490 lxsap->lxsa_handler); 1491 1492 if (lxsap->lxsa_flags & LX_SA_RESETHAND) 1493 lxsap->lxsa_handler = SIG_DFL; 1494 1495 lx_sigdeliver(lx_sig, sip, ucp, stksize, stk_builder, user_handler, 1496 lxsap); 1497 1498 /* 1499 * We need to handle restarting system calls if requested by the 1500 * program for this signal type: 1501 */ 1502 if (lxsap->lxsa_flags & LX_SA_RESTART) { 1503 uintptr_t flags = (uintptr_t)ucp->uc_brand_data[0]; 1504 long ret = (long)LX_REG(ucp, REG_R0); 1505 boolean_t interrupted = (ret == -lx_errno(EINTR, -1)); 1506 1507 /* 1508 * If the system call returned EINTR, and the system 1509 * call handler set "br_syscall_restart" when returning, 1510 * we modify the context to try the system call again 1511 * when we return from this signal handler. 1512 */ 1513 if ((flags & LX_UC_RESTART_SYSCALL) && interrupted) { 1514 int syscall_num = (int)(uintptr_t)ucp->uc_brand_data[2]; 1515 1516 lx_debug("restarting interrupted system call %d", 1517 syscall_num); 1518 1519 /* 1520 * Both the "int 0x80" and the "syscall" instruction 1521 * are two bytes long. Wind the program counter back 1522 * to the start of this instruction. 1523 * 1524 * The system call we interrupted is preserved in the 1525 * brand-specific data in the ucontext_t when the 1526 * LX_UC_RESTART_SYSCALL flag is set. This is 1527 * analogous to the "orig_[er]ax" field in the Linux 1528 * "user_regs_struct". 1529 */ 1530 LX_REG(ucp, REG_PC) -= 2; 1531 LX_REG(ucp, REG_R0) = syscall_num; 1532 } 1533 } 1534 } 1535 1536 /* 1537 * The "lx_sigdeliver()" function is responsible for constructing the emulated 1538 * signal delivery frame on the brand stack for this LWP. A context is saved 1539 * on the stack which will be used by the "sigreturn(2)" family of emulated 1540 * system calls to get us back here after the Linux signal handler returns. 1541 * This function is modelled on the in-kernel "sendsig()" signal delivery 1542 * mechanism. 1543 */ 1544 void 1545 lx_sigdeliver(int lx_sig, siginfo_t *sip, ucontext_t *ucp, size_t stacksz, 1546 void (*stack_builder)(), void (*user_handler)(), 1547 struct lx_sigaction *lxsap) 1548 { 1549 lx_sigbackup_t sigbackup; 1550 ucontext_t uc; 1551 lx_tsd_t *lxtsd = lx_get_tsd(); 1552 int totsz = 0; 1553 uintptr_t flags; 1554 uintptr_t hargs[3]; 1555 /* 1556 * These variables must be "volatile", as they are modified after the 1557 * getcontext() stores the register state: 1558 */ 1559 volatile boolean_t signal_delivered = B_FALSE; 1560 volatile uintptr_t lxfp; 1561 volatile uintptr_t old_tsd_sp; 1562 volatile int newstack; 1563 1564 /* 1565 * This function involves modifying the Linux process stack for this 1566 * thread. To do so without corruption requires us to exclude other 1567 * signal handlers (or emulated system calls called from within those 1568 * handlers) from running while we reserve space on that stack. We 1569 * defer the execution of further instances of lx_call_user_handler() 1570 * until we have completed this operation. 1571 */ 1572 _sigoff(); 1573 1574 /* 1575 * Clear register arguments vector. 1576 */ 1577 bzero(hargs, sizeof (hargs)); 1578 1579 /* 1580 * We save a context here so that we can be returned later to complete 1581 * handling the signal. 1582 */ 1583 lx_debug("lx_sigdeliver: STORING RETURN CONTEXT @ %p\n", &uc); 1584 assert(getcontext(&uc) == 0); 1585 lx_debug("lx_sigdeliver: RETURN CONTEXT %p LINK %p FLAGS %lx\n", 1586 &uc, uc.uc_link, uc.uc_flags); 1587 if (signal_delivered) { 1588 /* 1589 * If the "signal_delivered" flag is set, we are returned here 1590 * via setcontext() as called by the emulated Linux signal 1591 * return system call. 1592 */ 1593 lx_debug("lx_sigdeliver: WE ARE BACK, VIA UC @ %p!\n", &uc); 1594 goto after_signal_handler; 1595 } 1596 signal_delivered = B_TRUE; 1597 1598 /* 1599 * Preserve the current tsd value of the Linux process stack pointer, 1600 * even if it is zero. We will restore it when we are returned here 1601 * via setcontext() after the Linux process has completed execution of 1602 * its signal handler. 1603 */ 1604 old_tsd_sp = lxtsd->lxtsd_lx_sp; 1605 1606 /* 1607 * Figure out whether we will be handling this signal on an alternate 1608 * stack specified by the user. 1609 */ 1610 newstack = (lxsap->lxsa_flags & LX_SA_ONSTACK) && 1611 !(lxtsd->lxtsd_sigaltstack.ss_flags & (LX_SS_ONSTACK | 1612 LX_SS_DISABLE)); 1613 1614 /* 1615 * Find the first unused region of the Linux process stack, where 1616 * we will assemble our signal delivery frame. 1617 */ 1618 flags = (uintptr_t)ucp->uc_brand_data[0]; 1619 if (newstack) { 1620 /* 1621 * We are moving to the user-provided alternate signal 1622 * stack. 1623 */ 1624 lxfp = SA((uintptr_t)lxtsd->lxtsd_sigaltstack.ss_sp) + 1625 SA(lxtsd->lxtsd_sigaltstack.ss_size) - STACK_ALIGN; 1626 lx_debug("lx_sigdeliver: moving to ALTSTACK sp %p\n", lxfp); 1627 LX_SIGNAL_ALTSTACK_ENABLE(lxfp); 1628 } else if (flags & LX_UC_STACK_BRAND) { 1629 /* 1630 * We interrupted the Linux process to take this signal. The 1631 * stack pointer is the one saved in this context. 1632 */ 1633 lxfp = LX_REG(ucp, REG_SP); 1634 } else { 1635 /* 1636 * We interrupted a native (emulation) routine, so we must get 1637 * the current stack pointer from either the tsd (if one is 1638 * stored there) or via the context chain. 1639 * 1640 */ 1641 lxfp = lx_find_brand_sp(); 1642 if (lxtsd->lxtsd_lx_sp != 0) { 1643 /* 1644 * We must also make room for the possibility of nested 1645 * signal delivery -- we may be pre-empting the 1646 * in-progress handling of another signal. 1647 * 1648 * Note that if we were already on the alternate stack, 1649 * any emulated Linux system calls would be betwixt 1650 * that original signal frame and this new one on the 1651 * one contiguous stack, so this logic holds either 1652 * way: 1653 */ 1654 lxfp = MIN(lxtsd->lxtsd_lx_sp, lxfp); 1655 } 1656 } 1657 1658 /* 1659 * Account for a reserved stack region (for amd64, this is 128 bytes), 1660 * and align the stack: 1661 */ 1662 lxfp -= STACK_RESERVE; 1663 lxfp &= ~(STACK_ALIGN - 1); 1664 1665 /* 1666 * Allocate space on the Linux process stack for our delivery frame, 1667 * including: 1668 * 1669 * ----------------------------------------------------- old %sp 1670 * - lx_sigdeliver_frame_t 1671 * - (ucontext_t pointers and stack magic) 1672 * ----------------------------------------------------- 1673 * - (amd64-only 8-byte alignment gap) 1674 * ----------------------------------------------------- 1675 * - frame of size "stacksz" from the stack builder 1676 * ----------------------------------------------------- new %sp 1677 */ 1678 #if defined(_LP64) 1679 /* 1680 * The AMD64 ABI requires us to align the stack such that when the 1681 * called function pushes the base pointer, the stack is 16 byte 1682 * aligned. The stack must, therefore, be 8- but _not_ 16-byte 1683 * aligned. 1684 */ 1685 #if (STACK_ALIGN != 16) || (STACK_ENTRY_ALIGN != 8) 1686 #error "lx_sigdeliver() did not find expected stack alignment" 1687 #endif 1688 totsz = SA(sizeof (lx_sigdeliver_frame_t)) + SA(stacksz) + 8; 1689 assert((totsz & (STACK_ENTRY_ALIGN - 1)) == 0); 1690 assert((totsz & (STACK_ALIGN - 1)) == 8); 1691 #else 1692 totsz = SA(sizeof (lx_sigdeliver_frame_t)) + SA(stacksz); 1693 assert((totsz & (STACK_ALIGN - 1)) == 0); 1694 #endif 1695 1696 /* 1697 * Copy our return frame into place: 1698 */ 1699 lxfp -= SA(sizeof (lx_sigdeliver_frame_t)); 1700 lx_debug("lx_sigdeliver: lx_sigdeliver_frame_t @ %p\n", lxfp); 1701 { 1702 lx_sigdeliver_frame_t frm; 1703 1704 frm.lxsdf_magic = LX_SIGRT_MAGIC; 1705 frm.lxsdf_retucp = &uc; 1706 frm.lxsdf_sigucp = ucp; 1707 frm.lxsdf_sigbackup = &sigbackup; 1708 1709 lx_debug("lx_sigdeliver: retucp %p sigucp %p\n", 1710 frm.lxsdf_retucp, frm.lxsdf_sigucp); 1711 1712 if (uucopy(&frm, (void *)lxfp, sizeof (frm)) != 0) { 1713 /* 1714 * We could not modify the stack of the emulated Linux 1715 * program. Act like the kernel and terminate the 1716 * program with a segmentation violation. 1717 */ 1718 (void) syscall(SYS_brand, B_EXIT_AS_SIG, SIGSEGV); 1719 } 1720 1721 LX_SIGNAL_DELIVERY_FRAME_CREATE((void *)lxfp); 1722 1723 /* 1724 * Populate a backup copy of signal linkage to use in case 1725 * the Linux program completely destroys (or relocates) the 1726 * delivery frame. 1727 * 1728 * This is necessary for programs that have flown so far off 1729 * the architectural rails that they believe it is 1730 * acceptable to make assumptions about the precise size and 1731 * layout of the signal handling frame assembled by the 1732 * kernel. 1733 */ 1734 sigbackup.lxsb_retucp = frm.lxsdf_retucp; 1735 sigbackup.lxsb_sigucp = frm.lxsdf_sigucp; 1736 sigbackup.lxsb_sigdeliver_frame = lxfp; 1737 sigbackup.lxsb_previous = lxtsd->lxtsd_sigbackup; 1738 lxtsd->lxtsd_sigbackup = &sigbackup; 1739 1740 lx_debug("lx_sigdeliver: installed sigbackup %p; prev %p\n", 1741 &sigbackup, sigbackup.lxsb_previous); 1742 } 1743 1744 /* 1745 * Build the Linux signal handling frame: 1746 */ 1747 #if defined(_LP64) 1748 lxfp -= SA(stacksz) + 8; 1749 #else 1750 lxfp -= SA(stacksz); 1751 #endif 1752 lx_debug("lx_sigdeliver: Linux sig frame @ %p\n", lxfp); 1753 stack_builder(lx_sig, sip, ucp, lxfp, hargs); 1754 1755 /* 1756 * Record our reservation so that any nested signal handlers 1757 * can see it. 1758 */ 1759 lx_debug("lx_sigdeliver: Linux tsd sp %p -> %p\n", lxtsd->lxtsd_lx_sp, 1760 lxfp); 1761 lxtsd->lxtsd_lx_sp = lxfp; 1762 1763 if (newstack) { 1764 lxtsd->lxtsd_sigaltstack.ss_flags |= LX_SS_ONSTACK; 1765 } 1766 1767 LX_SIGDELIVER(lx_sig, lxsap, (void *)lxfp); 1768 1769 /* 1770 * Re-enable signal delivery. If a signal was queued while we were 1771 * in the critical section, it will be delivered immediately. 1772 */ 1773 _sigon(); 1774 1775 /* 1776 * Pass control to the Linux signal handler: 1777 */ 1778 lx_debug("lx_sigdeliver: JUMPING TO LINUX (sig %d sp %p eip %p)\n", 1779 lx_sig, lxfp, user_handler); 1780 { 1781 ucontext_t jump_uc; 1782 1783 bcopy(lx_find_brand_uc(), &jump_uc, sizeof (jump_uc)); 1784 1785 /* 1786 * We want to load the general registers from this context, and 1787 * switch to the BRAND stack. We do _not_ want to restore the 1788 * uc_link value from this synthetic context, as that would 1789 * break the signal handling context chain. 1790 */ 1791 jump_uc.uc_flags = UC_CPU; 1792 jump_uc.uc_brand_data[0] = (void *)(LX_UC_STACK_BRAND | 1793 LX_UC_IGNORE_LINK); 1794 1795 LX_REG(&jump_uc, REG_FP) = 0; 1796 LX_REG(&jump_uc, REG_SP) = lxfp; 1797 LX_REG(&jump_uc, REG_PC) = (uintptr_t)user_handler; 1798 1799 #if defined(_LP64) 1800 /* 1801 * Pass signal handler arguments by registers on AMD64. 1802 */ 1803 LX_REG(&jump_uc, REG_RDI) = hargs[0]; 1804 LX_REG(&jump_uc, REG_RSI) = hargs[1]; 1805 LX_REG(&jump_uc, REG_RDX) = hargs[2]; 1806 #endif 1807 1808 if (syscall(SYS_brand, B_JUMP_TO_LINUX, &jump_uc) == -1) { 1809 lx_err_fatal("B_JUMP_TO_LINUX failed: %s", 1810 strerror(errno)); 1811 } 1812 } 1813 1814 assert(0); 1815 1816 after_signal_handler: 1817 /* 1818 * Ensure all nested signal handlers have completed correctly 1819 * and then remove our stack reservation. 1820 */ 1821 _sigoff(); 1822 LX_SIGNAL_POST_HANDLER(lxfp, old_tsd_sp); 1823 assert(lxtsd->lxtsd_lx_sp == lxfp); 1824 lx_debug("lx_sigdeliver: after; Linux tsd sp %p -> %p\n", lxfp, 1825 old_tsd_sp); 1826 lxtsd->lxtsd_lx_sp = old_tsd_sp; 1827 if (newstack) { 1828 LX_SIGNAL_ALTSTACK_DISABLE(); 1829 lx_debug("lx_sigdeliver: disabling ALTSTACK sp %p\n", lxfp); 1830 lxtsd->lxtsd_sigaltstack.ss_flags &= ~LX_SS_ONSTACK; 1831 } 1832 /* 1833 * Restore backup signal tracking chain pointer to previous value: 1834 */ 1835 if (lxtsd->lxtsd_sigbackup != NULL) { 1836 lx_sigbackup_t *bprev = lxtsd->lxtsd_sigbackup->lxsb_previous; 1837 1838 lx_debug("lx_sigdeliver: restoring sigbackup %p to %p\n", 1839 lxtsd->lxtsd_sigbackup, bprev); 1840 1841 lxtsd->lxtsd_sigbackup = bprev; 1842 } 1843 _sigon(); 1844 1845 /* 1846 * Here we return to libc so that it may clean up and restore the 1847 * context originally interrupted by this signal. 1848 */ 1849 } 1850 1851 /* 1852 * Common routine to modify sigaction characteristics of a thread. 1853 * 1854 * We shouldn't need any special locking code here as we actually use our copy 1855 * of libc's sigaction() to do all the real work, so its thread locking should 1856 * take care of any issues for us. 1857 */ 1858 static int 1859 lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp, 1860 struct lx_sigaction *olxsp) 1861 { 1862 struct lx_sigaction *lxsap; 1863 struct sigaction sa; 1864 1865 if (lx_sig <= 0 || lx_sig > LX_NSIG) 1866 return (-EINVAL); 1867 1868 lxsap = &lx_sighandlers.lx_sa[lx_sig]; 1869 lx_debug("&lx_sighandlers.lx_sa[%d] = 0x%p", lx_sig, lxsap); 1870 1871 if ((olxsp != NULL) && 1872 ((uucopy(lxsap, olxsp, sizeof (struct lx_sigaction))) != 0)) 1873 return (-errno); 1874 1875 if (lxsp != NULL) { 1876 int err, sig; 1877 struct lx_sigaction lxsa; 1878 sigset_t new_set, oset; 1879 1880 if (uucopy(lxsp, &lxsa, sizeof (struct lx_sigaction)) != 0) 1881 return (-errno); 1882 1883 if ((sig = ltos_signo[lx_sig]) != -1) { 1884 if (lx_no_abort_handler != 0) { 1885 /* 1886 * If LX_NO_ABORT_HANDLER has been set, we will 1887 * not allow the emulated program to do 1888 * anything hamfisted with SIGSEGV or SIGABRT 1889 * signals. 1890 */ 1891 if (sig == SIGSEGV || sig == SIGABRT) { 1892 return (0); 1893 } 1894 } 1895 1896 /* 1897 * Block this signal while messing with its dispostion 1898 */ 1899 (void) sigemptyset(&new_set); 1900 (void) sigaddset(&new_set, sig); 1901 1902 if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0) { 1903 err = errno; 1904 lx_debug("unable to block signal %d: %s", sig, 1905 strerror(err)); 1906 return (-err); 1907 } 1908 1909 /* 1910 * We don't really need the old signal disposition at 1911 * this point, but this weeds out signals that would 1912 * cause sigaction() to return an error before we change 1913 * anything other than the current signal mask. 1914 */ 1915 if (sigaction(sig, NULL, &sa) < 0) { 1916 err = errno; 1917 lx_debug("sigaction() to get old " 1918 "disposition for signal %d failed: " 1919 "%s", sig, strerror(err)); 1920 (void) sigprocmask(SIG_SETMASK, &oset, NULL); 1921 return (-err); 1922 } 1923 1924 if ((lxsa.lxsa_handler != SIG_DFL) && 1925 (lxsa.lxsa_handler != SIG_IGN)) { 1926 sa.sa_handler = lx_call_user_handler; 1927 1928 /* 1929 * The interposition signal handler needs the 1930 * information provided via the SA_SIGINFO flag. 1931 */ 1932 sa.sa_flags = SA_SIGINFO; 1933 1934 /* 1935 * When translating from Linux to illumos 1936 * sigaction(2) flags, we explicitly do not 1937 * pass SA_ONSTACK to the kernel. The 1938 * alternate stack for Linux signal handling is 1939 * handled entirely by the emulation code. 1940 */ 1941 if (lxsa.lxsa_flags & LX_SA_NOCLDSTOP) 1942 sa.sa_flags |= SA_NOCLDSTOP; 1943 if (lxsa.lxsa_flags & LX_SA_NOCLDWAIT) 1944 sa.sa_flags |= SA_NOCLDWAIT; 1945 if (lxsa.lxsa_flags & LX_SA_RESTART) 1946 sa.sa_flags |= SA_RESTART; 1947 if (lxsa.lxsa_flags & LX_SA_NODEFER) 1948 sa.sa_flags |= SA_NODEFER; 1949 1950 /* 1951 * RESETHAND cannot be used be passed through 1952 * for SIGPWR due to different default actions 1953 * between Linux and Illumos. 1954 */ 1955 if ((sig != SIGPWR) && 1956 (lxsa.lxsa_flags & LX_SA_RESETHAND)) 1957 sa.sa_flags |= SA_RESETHAND; 1958 1959 if (ltos_sigset(&lxsa.lxsa_mask, 1960 &sa.sa_mask) != 0) { 1961 err = errno; 1962 (void) sigprocmask(SIG_SETMASK, &oset, 1963 NULL); 1964 return (-err); 1965 } 1966 1967 lx_debug("interposing handler @ 0x%p for " 1968 "signal %d (lx %d), flags 0x%x", 1969 lxsa.lxsa_handler, sig, lx_sig, 1970 lxsa.lxsa_flags); 1971 1972 if (sigaction(sig, &sa, NULL) < 0) { 1973 err = errno; 1974 lx_debug("sigaction() to set new " 1975 "disposition for signal %d failed: " 1976 "%s", sig, strerror(err)); 1977 (void) sigprocmask(SIG_SETMASK, &oset, 1978 NULL); 1979 return (-err); 1980 } 1981 } else if ((sig != SIGPWR) || 1982 ((sig == SIGPWR) && 1983 (lxsa.lxsa_handler == SIG_IGN))) { 1984 /* 1985 * There's no need to interpose for SIG_DFL or 1986 * SIG_IGN so just call our copy of libc's 1987 * sigaction(), but don't allow SIG_DFL for 1988 * SIGPWR due to differing default actions 1989 * between Linux and Illumos. 1990 * 1991 * Get the previous disposition first so things 1992 * like sa_mask and sa_flags are preserved over 1993 * a transition to SIG_DFL or SIG_IGN, which is 1994 * what Linux expects. 1995 */ 1996 1997 sa.sa_handler = lxsa.lxsa_handler; 1998 1999 if (sigaction(sig, &sa, NULL) < 0) { 2000 err = errno; 2001 lx_debug("sigaction(%d, %s) failed: %s", 2002 sig, ((sa.sa_handler == SIG_DFL) ? 2003 "SIG_DFL" : "SIG_IGN"), 2004 strerror(err)); 2005 (void) sigprocmask(SIG_SETMASK, &oset, 2006 NULL); 2007 return (-err); 2008 } 2009 } 2010 } else { 2011 lx_debug("Linux signal with no kill support " 2012 "specified: %d", lx_sig); 2013 } 2014 2015 /* 2016 * Save the new disposition for the signal in the global 2017 * lx_sighandlers structure. 2018 */ 2019 bcopy(&lxsa, lxsap, sizeof (struct lx_sigaction)); 2020 2021 /* 2022 * Reset the signal mask to what we came in with if 2023 * we were modifying a kill-supported signal. 2024 */ 2025 if (sig != -1) 2026 (void) sigprocmask(SIG_SETMASK, &oset, NULL); 2027 } 2028 2029 return (0); 2030 } 2031 2032 #if defined(_ILP32) 2033 /* 2034 * sigaction is only used in 32-bit code. 2035 */ 2036 long 2037 lx_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp) 2038 { 2039 int val; 2040 struct lx_sigaction sa, osa; 2041 struct lx_sigaction *sap, *osap; 2042 struct lx_osigaction *osp; 2043 2044 sap = (actp ? &sa : NULL); 2045 osap = (oactp ? &osa : NULL); 2046 2047 /* 2048 * If we have a source pointer, convert source lxsa_mask from 2049 * lx_osigset_t to lx_sigset_t format. 2050 */ 2051 if (sap) { 2052 osp = (struct lx_osigaction *)actp; 2053 sap->lxsa_handler = osp->lxsa_handler; 2054 2055 bzero(&sap->lxsa_mask, sizeof (lx_sigset_t)); 2056 2057 for (val = 1; val <= OSIGSET_NBITS; val++) 2058 if (osp->lxsa_mask & OSIGSET_BITSET(val)) 2059 (void) lx_sigaddset(&sap->lxsa_mask, val); 2060 2061 sap->lxsa_flags = osp->lxsa_flags; 2062 sap->lxsa_restorer = osp->lxsa_restorer; 2063 } 2064 2065 if ((val = lx_sigaction_common(lx_sig, sap, osap))) 2066 return (val); 2067 2068 /* 2069 * If we have a save pointer, convert the old lxsa_mask from 2070 * lx_sigset_t to lx_osigset_t format. 2071 */ 2072 if (osap) { 2073 osp = (struct lx_osigaction *)oactp; 2074 2075 osp->lxsa_handler = osap->lxsa_handler; 2076 2077 bzero(&osp->lxsa_mask, sizeof (osp->lxsa_mask)); 2078 for (val = 1; val <= OSIGSET_NBITS; val++) 2079 if (lx_sigismember(&osap->lxsa_mask, val)) 2080 osp->lxsa_mask |= OSIGSET_BITSET(val); 2081 2082 osp->lxsa_flags = osap->lxsa_flags; 2083 osp->lxsa_restorer = osap->lxsa_restorer; 2084 } 2085 2086 return (0); 2087 } 2088 #endif 2089 2090 long 2091 lx_rt_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp, 2092 uintptr_t setsize) 2093 { 2094 /* 2095 * The "new" rt_sigaction call checks the setsize 2096 * parameter. 2097 */ 2098 if ((size_t)setsize != sizeof (lx_sigset_t)) 2099 return (-EINVAL); 2100 2101 return (lx_sigaction_common(lx_sig, (struct lx_sigaction *)actp, 2102 (struct lx_sigaction *)oactp)); 2103 } 2104 2105 #if defined(_ILP32) 2106 /* 2107 * Convert signal syscall to a call to the lx_sigaction() syscall 2108 * Only used in 32-bit code. 2109 */ 2110 long 2111 lx_signal(uintptr_t lx_sig, uintptr_t handler) 2112 { 2113 struct sigaction act; 2114 struct sigaction oact; 2115 int rc; 2116 2117 /* 2118 * Use sigaction to mimic SYSV signal() behavior; glibc will 2119 * actually call sigaction(2) itself, so we're really reaching 2120 * back for signal(2) semantics here. 2121 */ 2122 bzero(&act, sizeof (act)); 2123 act.sa_handler = (void (*)())handler; 2124 act.sa_flags = SA_RESETHAND | SA_NODEFER; 2125 2126 rc = lx_sigaction(lx_sig, (uintptr_t)&act, (uintptr_t)&oact); 2127 return ((rc == 0) ? ((ssize_t)oact.sa_handler) : rc); 2128 } 2129 #endif 2130 2131 void 2132 lx_sighandlers_save(lx_sighandlers_t *saved) 2133 { 2134 bcopy(&lx_sighandlers, saved, sizeof (lx_sighandlers_t)); 2135 } 2136 2137 void 2138 lx_sighandlers_restore(lx_sighandlers_t *saved) 2139 { 2140 bcopy(saved, &lx_sighandlers, sizeof (lx_sighandlers_t)); 2141 } 2142 2143 int 2144 lx_siginit(void) 2145 { 2146 extern void set_setcontext_enforcement(int); 2147 extern void set_escaped_context_cleanup(int); 2148 2149 struct sigaction sa; 2150 sigset_t new_set, oset; 2151 int lx_sig, sig; 2152 2153 if (getenv("LX_NO_ABORT_HANDLER") != NULL) { 2154 lx_no_abort_handler = 1; 2155 } 2156 2157 /* 2158 * Block all signals possible while setting up the signal imposition 2159 * mechanism. 2160 */ 2161 (void) sigfillset(&new_set); 2162 2163 if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0) 2164 lx_err_fatal("unable to block signals while setting up " 2165 "imposition mechanism: %s", strerror(errno)); 2166 2167 /* 2168 * Ignore any signals that have no Linux analog so that those 2169 * signals cannot be sent to Linux processes from the global zone 2170 */ 2171 for (sig = 1; sig < NSIG; sig++) 2172 if (stol_signo[sig] < 0) 2173 (void) sigignore(sig); 2174 2175 /* 2176 * Mark any signals that are ignored as ignored in our interposition 2177 * handler array 2178 */ 2179 for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) { 2180 if (((sig = ltos_signo[lx_sig]) != -1) && 2181 (sigaction(sig, NULL, &sa) < 0)) 2182 lx_err_fatal("unable to determine previous disposition " 2183 "for signal %d: %s", sig, strerror(errno)); 2184 2185 if (sa.sa_handler == SIG_IGN) { 2186 lx_debug("marking signal %d (lx %d) as SIG_IGN", 2187 sig, lx_sig); 2188 lx_sighandlers.lx_sa[lx_sig].lxsa_handler = SIG_IGN; 2189 } 2190 } 2191 2192 /* 2193 * Have our interposition handler handle SIGPWR to start with, 2194 * as it has a default action of terminating the process in Linux 2195 * but its default is to be ignored in Illumos. 2196 */ 2197 (void) sigemptyset(&sa.sa_mask); 2198 sa.sa_sigaction = lx_call_user_handler; 2199 sa.sa_flags = SA_SIGINFO; 2200 2201 if (sigaction(SIGPWR, &sa, NULL) < 0) 2202 lx_err_fatal("sigaction(SIGPWR) failed: %s", strerror(errno)); 2203 2204 /* 2205 * Illumos' libc forces certain register values in the ucontext_t 2206 * used to restore a post-signal user context to be those Illumos 2207 * expects; however that is not what we want to happen if the signal 2208 * was taken while branded code was executing, so we must disable 2209 * that behavior. 2210 */ 2211 set_setcontext_enforcement(0); 2212 2213 /* 2214 * The illumos libc attempts to clean up dangling uc_link pointers in 2215 * signal handling contexts when libc believes us to have escaped a 2216 * signal handler incorrectly in the past. We want to disable this 2217 * behaviour, so that the system call emulation context saved by the 2218 * kernel brand module for lx_emulate() may be part of the context 2219 * chain without itself being used for signal handling. 2220 */ 2221 set_escaped_context_cleanup(0); 2222 2223 /* 2224 * Reset the signal mask to what we came in with. 2225 */ 2226 (void) sigprocmask(SIG_SETMASK, &oset, NULL); 2227 2228 lx_debug("interposition handler setup for SIGPWR"); 2229 return (0); 2230 } 2231 2232 /* 2233 * This code strongly resembles lx_poll(), but is here to be able to take 2234 * advantage of the Linux signal helper routines. 2235 */ 2236 long 2237 lx_ppoll(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, uintptr_t p5) 2238 { 2239 struct pollfd *lfds, *sfds; 2240 nfds_t nfds = (nfds_t)p2; 2241 timespec_t ts, *tsp = NULL; 2242 int fds_size, i, rval, revents; 2243 lx_sigset_t lxsig, *lxsigp = NULL; 2244 sigset_t sigset, *sp = NULL; 2245 rctlblk_t *rblk; 2246 2247 lx_debug("\tppoll(0x%p, %d, 0x%p, 0x%p, %d)", p1, p2, p3, p4, p5); 2248 2249 if (p3 != NULL) { 2250 if (uucopy((void *)p3, &ts, sizeof (ts)) != 0) 2251 return (-errno); 2252 2253 tsp = &ts; 2254 } 2255 2256 if (p4 != NULL) { 2257 if (uucopy((void *)p4, &lxsig, sizeof (lxsig)) != 0) 2258 return (-errno); 2259 2260 lxsigp = &lxsig; 2261 if ((size_t)p5 != sizeof (lx_sigset_t)) 2262 return (-EINVAL); 2263 2264 if (lxsigp) { 2265 if ((rval = ltos_sigset(lxsigp, &sigset)) != 0) 2266 return (rval); 2267 2268 sp = &sigset; 2269 } 2270 } 2271 2272 /* 2273 * Deal with the NULL fds[] case. 2274 */ 2275 if (nfds == 0 || p1 == NULL) { 2276 if ((rval = ppoll(NULL, 0, tsp, sp)) < 0) 2277 return (-errno); 2278 2279 return (rval); 2280 } 2281 2282 if (maxfd == 0) { 2283 if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rctlblk_size())) == NULL) 2284 return (-ENOMEM); 2285 2286 if (getrctl("process.max-file-descriptor", NULL, rblk, 2287 RCTL_FIRST) == -1) 2288 return (-EINVAL); 2289 2290 maxfd = rctlblk_get_value(rblk); 2291 } 2292 2293 if (nfds > maxfd) 2294 return (-EINVAL); 2295 2296 /* 2297 * Note: we are assuming that the Linux and Illumos pollfd 2298 * structures are identical. Copy in the Linux poll structure. 2299 */ 2300 fds_size = sizeof (struct pollfd) * nfds; 2301 lfds = (struct pollfd *)SAFE_ALLOCA(fds_size); 2302 if (lfds == NULL) 2303 return (-ENOMEM); 2304 if (uucopy((void *)p1, lfds, fds_size) != 0) 2305 return (-errno); 2306 2307 /* 2308 * The poll system call modifies the poll structures passed in 2309 * so we'll need to make an extra copy of them. 2310 */ 2311 sfds = (struct pollfd *)SAFE_ALLOCA(fds_size); 2312 if (sfds == NULL) 2313 return (-ENOMEM); 2314 2315 /* Convert the Linux events bitmask into the Illumos equivalent. */ 2316 for (i = 0; i < nfds; i++) { 2317 /* 2318 * If the caller is polling for an unsupported event, we 2319 * have to bail out. 2320 */ 2321 if (lfds[i].events & ~LX_POLL_SUPPORTED_EVENTS) { 2322 lx_unsupported("unsupported poll events requested: " 2323 "events=0x%x", lfds[i].events); 2324 return (-ENOTSUP); 2325 } 2326 2327 sfds[i].fd = lfds[i].fd; 2328 sfds[i].events = lfds[i].events & LX_POLL_COMMON_EVENTS; 2329 if (lfds[i].events & LX_POLLWRNORM) 2330 sfds[i].events |= POLLWRNORM; 2331 if (lfds[i].events & LX_POLLWRBAND) 2332 sfds[i].events |= POLLWRBAND; 2333 if (lfds[i].events & LX_POLLRDHUP) 2334 sfds[i].events |= POLLRDHUP; 2335 sfds[i].revents = 0; 2336 } 2337 2338 if ((rval = ppoll(sfds, nfds, tsp, sp)) < 0) 2339 return (-errno); 2340 2341 /* Convert the Illumos revents bitmask into the Linux equivalent */ 2342 for (i = 0; i < nfds; i++) { 2343 revents = sfds[i].revents & LX_POLL_COMMON_EVENTS; 2344 if (sfds[i].revents & POLLWRBAND) 2345 revents |= LX_POLLWRBAND; 2346 if (sfds[i].revents & POLLRDHUP) 2347 revents |= LX_POLLRDHUP; 2348 2349 /* 2350 * Be careful because on Illumos POLLOUT and POLLWRNORM 2351 * are defined to the same values but on Linux they 2352 * are not. 2353 */ 2354 if (sfds[i].revents & POLLOUT) { 2355 if ((lfds[i].events & LX_POLLOUT) == 0) 2356 revents &= ~LX_POLLOUT; 2357 if (lfds[i].events & LX_POLLWRNORM) 2358 revents |= LX_POLLWRNORM; 2359 } 2360 2361 lfds[i].revents = revents; 2362 } 2363 2364 /* Copy out the results */ 2365 if (uucopy(lfds, (void *)p1, fds_size) != 0) 2366 return (-errno); 2367 2368 return (rval); 2369 } 2370 2371 /* 2372 * This code stongly resemebles lx_select(), but is here to be able to take 2373 * advantage of the Linux signal helper routines. 2374 */ 2375 long 2376 lx_pselect6(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, 2377 uintptr_t p5, uintptr_t p6) 2378 { 2379 int nfds = (int)p1; 2380 fd_set *rfdsp = NULL; 2381 fd_set *wfdsp = NULL; 2382 fd_set *efdsp = NULL; 2383 timespec_t ts, *tsp = NULL; 2384 int fd_set_len = howmany(nfds, 8); 2385 int r; 2386 sigset_t sigset, *sp = NULL; 2387 2388 lx_debug("\tpselect6(%d, 0x%p, 0x%p, 0x%p, 0x%p, 0x%p)", 2389 p1, p2, p3, p4, p4, p6); 2390 2391 if (nfds > 0) { 2392 if (p2 != NULL) { 2393 rfdsp = SAFE_ALLOCA(fd_set_len); 2394 if (rfdsp == NULL) 2395 return (-ENOMEM); 2396 if (uucopy((void *)p2, rfdsp, fd_set_len) != 0) 2397 return (-errno); 2398 } 2399 if (p3 != NULL) { 2400 wfdsp = SAFE_ALLOCA(fd_set_len); 2401 if (wfdsp == NULL) 2402 return (-ENOMEM); 2403 if (uucopy((void *)p3, wfdsp, fd_set_len) != 0) 2404 return (-errno); 2405 } 2406 if (p4 != NULL) { 2407 efdsp = SAFE_ALLOCA(fd_set_len); 2408 if (efdsp == NULL) 2409 return (-ENOMEM); 2410 if (uucopy((void *)p4, efdsp, fd_set_len) != 0) 2411 return (-errno); 2412 } 2413 } 2414 2415 if (p5 != NULL) { 2416 if (uucopy((void *)p5, &ts, sizeof (ts)) != 0) 2417 return (-errno); 2418 2419 tsp = &ts; 2420 } 2421 2422 if (p6 != NULL) { 2423 /* 2424 * To force the number of arguments to be no more than six, 2425 * Linux bundles both the sigset and the size into a structure 2426 * that becomes the sixth argument. 2427 */ 2428 struct { 2429 lx_sigset_t *addr; 2430 size_t size; 2431 } lx_sigset; 2432 2433 if (uucopy((void *)p6, &lx_sigset, sizeof (lx_sigset)) != 0) 2434 return (-errno); 2435 2436 /* 2437 * Yes, that's right: Linux forces a size to be passed only 2438 * so it can check that it's the size of a sigset_t. 2439 */ 2440 if (lx_sigset.size != sizeof (lx_sigset_t)) 2441 return (-EINVAL); 2442 2443 /* 2444 * This is where we check if the sigset is *really* NULL. 2445 */ 2446 if (lx_sigset.addr) { 2447 if ((r = ltos_sigset(lx_sigset.addr, &sigset)) != 0) 2448 return (r); 2449 2450 sp = &sigset; 2451 } 2452 } 2453 2454 #if defined(_LP64) 2455 r = pselect(nfds, rfdsp, wfdsp, efdsp, tsp, sp); 2456 #else 2457 if (nfds >= FD_SETSIZE) 2458 r = pselect_large_fdset(nfds, rfdsp, wfdsp, efdsp, tsp, sp); 2459 else 2460 r = pselect(nfds, rfdsp, wfdsp, efdsp, tsp, sp); 2461 #endif 2462 2463 if (r < 0) 2464 return (-errno); 2465 2466 /* 2467 * For pselect6(), we don't honor the strange Linux select() semantics 2468 * with respect to the timestruc parameter because glibc ignores it 2469 * anyway -- just copy out the fd pointers and return. 2470 */ 2471 if ((rfdsp != NULL) && (uucopy(rfdsp, (void *)p2, fd_set_len) != 0)) 2472 return (-errno); 2473 if ((wfdsp != NULL) && (uucopy(wfdsp, (void *)p3, fd_set_len) != 0)) 2474 return (-errno); 2475 if ((efdsp != NULL) && (uucopy(efdsp, (void *)p4, fd_set_len) != 0)) 2476 return (-errno); 2477 2478 return (r); 2479 } 2480 2481 /* 2482 * The first argument is the pid (Linux tgid) to send the signal to, second 2483 * argument is the signal to send (an lx signal), and third is the siginfo_t 2484 * with extra information. We translate the code and signal only from the 2485 * siginfo_t, and leave everything else the same as it gets passed through the 2486 * signalling system. This is enough to get sigqueue working. See Linux man 2487 * page rt_sigqueueinfo(2). 2488 */ 2489 long 2490 lx_rt_sigqueueinfo(uintptr_t p1, uintptr_t p2, uintptr_t p3) 2491 { 2492 pid_t tgid = (pid_t)p1; 2493 int lx_sig = (int)p2; 2494 int sig; 2495 lx_siginfo_t lx_siginfo; 2496 siginfo_t siginfo; 2497 int s_code; 2498 pid_t s_pid; 2499 2500 if (uucopy((void *)p3, &lx_siginfo, sizeof (lx_siginfo_t)) != 0) 2501 return (-EFAULT); 2502 s_code = ltos_sigcode(lx_siginfo.lsi_code); 2503 if (s_code == LX_SI_CODE_NOT_EXIST) 2504 return (-EINVAL); 2505 if (lx_sig < 0 || lx_sig > LX_NSIG || (sig = ltos_signo[lx_sig]) < 0) { 2506 return (-EINVAL); 2507 } 2508 /* 2509 * This case (when trying to kill pid 0) just has a different errno 2510 * returned in illumos than in Linux. 2511 */ 2512 if (tgid == 0) 2513 return (-ESRCH); 2514 if (lx_lpid_to_spid(tgid, &s_pid) != 0) 2515 return (-ESRCH); 2516 if (SI_CANQUEUE(s_code)) { 2517 return ((syscall(SYS_sigqueue, s_pid, sig, 2518 lx_siginfo.lsi_value, s_code, 0) == -1) ? 2519 (-errno): 0); 2520 } else { 2521 /* 2522 * This case is unlikely, as the main entry point is through 2523 * sigqueue, which always has a queuable si_code. 2524 */ 2525 siginfo.si_signo = sig; 2526 siginfo.si_code = s_code; 2527 siginfo.si_pid = lx_siginfo.lsi_pid; 2528 siginfo.si_value = lx_siginfo.lsi_value; 2529 siginfo.si_uid = lx_siginfo.lsi_uid; 2530 return ((syscall(SYS_brand, B_HELPER_SIGQUEUE, 2531 tgid, sig, &siginfo)) ? (-errno) : 0); 2532 } 2533 } 2534 2535 /* 2536 * Adds an additional argument for which thread within a thread group to send 2537 * the signal to (added as the second argument). 2538 */ 2539 long 2540 lx_rt_tgsigqueueinfo(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) 2541 { 2542 pid_t tgid = (pid_t)p1; 2543 pid_t tid = (pid_t)p2; 2544 int lx_sig = (int)p3; 2545 int sig; 2546 lx_siginfo_t lx_siginfo; 2547 siginfo_t siginfo; 2548 int si_code; 2549 2550 if (uucopy((void *)p4, &lx_siginfo, sizeof (lx_siginfo_t)) != 0) 2551 return (-EFAULT); 2552 if (lx_sig < 0 || lx_sig > LX_NSIG || (sig = ltos_signo[lx_sig]) < 0) { 2553 return (-EINVAL); 2554 } 2555 si_code = ltos_sigcode(lx_siginfo.lsi_code); 2556 if (si_code == LX_SI_CODE_NOT_EXIST) 2557 return (-EINVAL); 2558 /* 2559 * Check for invalid tgid and tids. That appears to be only negatives 2560 * and 0 values. Everything else that doesn't exist is instead ESRCH. 2561 */ 2562 if (tgid <= 0 || tid <= 0) 2563 return (-EINVAL); 2564 siginfo.si_signo = sig; 2565 siginfo.si_code = si_code; 2566 siginfo.si_pid = lx_siginfo.lsi_pid; 2567 siginfo.si_value = lx_siginfo.lsi_value; 2568 siginfo.si_uid = lx_siginfo.lsi_uid; 2569 2570 return ((syscall(SYS_brand, B_HELPER_TGSIGQUEUE, tgid, tid, sig, 2571 &siginfo)) ? (-errno) : 0); 2572 } 2573 2574 long 2575 lx_signalfd(int fd, uintptr_t mask, size_t msize) 2576 { 2577 return (lx_signalfd4(fd, mask, msize, 0)); 2578 } 2579 2580 long 2581 lx_signalfd4(int fd, uintptr_t mask, size_t msize, int flags) 2582 { 2583 sigset_t s_set; 2584 int r; 2585 2586 if (msize != sizeof (int64_t)) 2587 return (-EINVAL); 2588 2589 if (ltos_sigset((lx_sigset_t *)mask, &s_set) != 0) 2590 return (-errno); 2591 2592 r = signalfd(fd, &s_set, flags); 2593 2594 /* 2595 * signalfd(3C) may fail with ENOENT if /dev/signalfd is not available. 2596 * It is less jarring to Linux programs to tell them that internal 2597 * allocation failed than to report an error number they are not 2598 * expecting. 2599 */ 2600 if (r == -1 && errno == ENOENT) 2601 return (-ENODEV); 2602 2603 return (r == -1 ? -errno : r); 2604 }