1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Copyright 2015 Joyent, Inc. All rights reserved. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/segments.h> 34 #include <sys/lx_types.h> 35 #include <sys/lx_brand.h> 36 #include <sys/lx_misc.h> 37 #include <sys/lx_debug.h> 38 #include <sys/lx_poll.h> 39 #include <sys/lx_signal.h> 40 #include <sys/lx_sigstack.h> 41 #include <sys/lx_syscall.h> 42 #include <sys/lx_thread.h> 43 #include <sys/syscall.h> 44 #include <lx_provider_impl.h> 45 #include <sys/stack.h> 46 #include <assert.h> 47 #include <errno.h> 48 #include <poll.h> 49 #include <rctl.h> 50 #include <signal.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include <strings.h> 54 #include <thread.h> 55 #include <ucontext.h> 56 #include <unistd.h> 57 #include <stdio.h> 58 #include <libintl.h> 59 #include <ieeefp.h> 60 #include <sys/signalfd.h> 61 62 #if defined(_ILP32) 63 extern int pselect_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, 64 const timespec_t *tsp, const sigset_t *sp); 65 #endif 66 67 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 68 69 /* 70 * Delivering signals to a Linux process is complicated by differences in 71 * signal numbering, stack structure and contents, and the action taken when a 72 * signal handler exits. In addition, many signal-related structures, such as 73 * sigset_ts, vary between Illumos and Linux. 74 * 75 * To support user-level signal handlers, the brand uses a double layer of 76 * indirection to process and deliver signals to branded threads. 77 * 78 * When a Linux process sends a signal using the kill(2) system call, we must 79 * translate the signal into the Illumos equivalent before handing control off 80 * to the standard signalling mechanism. When a signal is delivered to a Linux 81 * process, we translate the signal number from Illumos to back to Linux. 82 * Translating signals both at generation and delivery time ensures both that 83 * Illumos signals are sent properly to Linux applications and that signals' 84 * default behavior works as expected. 85 * 86 * In a normal Illumos process, signal delivery is interposed on for any thread 87 * registering a signal handler by libc. Libc needs to do various bits of magic 88 * to provide thread-safe critical regions, so it registers its own handler, 89 * named sigacthandler(), using the sigaction(2) system call. When a signal is 90 * received, sigacthandler() is called, and after some processing, libc turns 91 * around and calls the user's signal handler via a routine named 92 * call_user_handler(). 93 * 94 * Adding a Linux branded thread to the mix complicates things somewhat. 95 * 96 * First, when a thread receives a signal, it may either be running in an 97 * emulated Linux context or a native illumos context. In either case, the 98 * in-kernel brand module is responsible for preserving the register state 99 * from the interrupted context, regardless of whether emulated or native 100 * software was running at the time. The kernel is also responsible for 101 * ensuring that the illumos native sigacthandler() is called with register 102 * values appropriate for native code. Of particular note is the %gs segment 103 * selector for 32-bit code, and the %fsbase segment base register for 64-bit 104 * code; these are used by libc to locate per-thread data structures. 105 * 106 * Second, the signal number translation referenced above must take place. 107 * Finally, when we hand control to the Linux signal handler we must do so 108 * on the brand stack, and with registers configured appropriately for the 109 * Linux application. 110 * 111 * This need to translate signal numbers (and manipulate the signal handling 112 * context) means that with standard Illumos libc, following a signal from 113 * generation to delivery looks something like: 114 * 115 * kernel -> 116 * sigacthandler() -> 117 * call_user_handler() -> 118 * user signal handler 119 * 120 * but for the brand's Linux threads, this would look like: 121 * 122 * kernel -> 123 * sigacthandler() -> 124 * call_user_handler() -> 125 * lx_call_user_handler() -> 126 * lx_sigdeliver() -> 127 * syscall(B_JUMP_TO_LINUX, ...) -> 128 * Linux user signal handler 129 * 130 * The new addtions are: 131 * 132 * lx_call_user_handler 133 * ==================== 134 * This routine is responsible for translating Illumos signal numbers to 135 * their Linux equivalents, building a Linux signal stack based on the 136 * information Illumos has provided, and passing the stack to the 137 * registered Linux signal handler. It is, in effect, the Linux thread 138 * equivalent to libc's call_user_handler(). 139 * 140 * lx_sigdeliver 141 * ============= 142 * 143 * Note that none of this interposition is necessary unless a Linux thread 144 * registers a user signal handler, as the default action for all signals is the 145 * same between Illumos and Linux save for one signal, SIGPWR. For this reason, 146 * the brand ALWAYS installs its own internal signal handler for SIGPWR that 147 * translates the action to the Linux default, to terminate the process. 148 * (Illumos' default action is to ignore SIGPWR.) 149 * 150 * It is also important to note that when signals are not translated, the brand 151 * relies upon code interposing upon the wait(2) system call to translate 152 * signals to their proper values for any Linux threads retrieving the status 153 * of others. So while the Illumos signal number for a particular signal is set 154 * in a process' data structures (and would be returned as the result of say, 155 * WTERMSIG()), the brand's interposiiton upon wait(2) is responsible for 156 * translating the value WTERMSIG() would return from a Illumos signal number 157 * to the appropriate Linux value. 158 * 159 * lx_call_user_handler() calls lx_sigdeliver() with a helper function 160 * (typically lx_build_signal_frame) which builds a stack frame for the 32-bit 161 * Linux signal handler, or populates a local (on the stack) structure for the 162 * 64-bit Linux signal handler. The stack at that time looks like this: 163 * 164 * ========================================================= 165 * | | lx_sigdeliver_frame_t -- includes LX_SIGRT_MAGIC and | 166 * | | a return context for the eventual sigreturn(2) call | 167 * | ========================================================= 168 * | | Linux signal frame (32-bit) or local data | 169 * V | (64-bit) built by stack_builder() | 170 * ========================================================= 171 * 172 * The process of returning to an interrupted thread of execution from a user 173 * signal handler is entirely different between Illumos and Linux. While 174 * Illumos generally expects to set the context to the interrupted one on a 175 * normal return from a signal handler, in the normal case Linux instead calls 176 * code that calls a specific Linux system call, rt_sigreturn(2) (or it also 177 * can call sigreturn(2) in 32-bit code). Thus when a Linux signal handler 178 * completes execution, instead of returning through what would in libc be a 179 * call to setcontext(2), the rt_sigreturn(2) Linux system call is responsible 180 * for accomplishing much the same thing. It's for this reason that the stack 181 * frame we build has the lx_(rt_)sigreturn_tramp code on the top of the 182 * stack. The code looks like this: 183 * 184 * 32-bit 64-bit 185 * -------------------------------- ----------------------------- 186 * mov LX_SYS_rt_sigreturn, %eax movq LX_SYS_rt_sigreturn, %rax 187 * int $0x80 syscall 188 * 189 * We also use these same functions (lx_rt_sigreturn_tramp or 190 * lx_sigreturn_tramp) to actually return from the signal handler. 191 * 192 * (Note that this trampoline code actually lives in a proper executable segment 193 * and not on the stack, but gdb checks for the exact code sequence of the 194 * trampoline code on the stack to determine whether it is in a signal stack 195 * frame or not. Really.) 196 * 197 * When the 32-bit Linux user signal handler is eventually called, the brand 198 * stack frame looks like this (in the case of a "modern" signal stack; see 199 * the lx_sigstack structure definition): 200 * 201 * ========================================================= 202 * | | lx_sigdeliver_frame_t | 203 * | ========================================================= 204 * | | Trampoline code (marker for gdb, not really executed) | 205 * | ========================================================= 206 * | | Linux struct _fpstate | 207 * | ========================================================= 208 * V | Linux ucontext_t | <--+ 209 * ========================================================= | 210 * | Linux siginfo_t | <--|-----+ 211 * ========================================================= | | 212 * | Pointer to Linux ucontext_t (or NULL) (sigaction arg2)| ---+ | 213 * ========================================================= | 214 * | Pointer to Linux siginfo_t (or NULL) (sigaction arg1)| ---------+ 215 * ========================================================= 216 * | Linux signal number (sigaction arg0)| 217 * ========================================================= 218 * | Pointer to signal return code (trampoline code) | 219 * ========================================================= 220 * 221 * The 64-bit stack-local data looks like this: 222 * 223 * ========================================================= 224 * | | lx_sigdeliver_frame_t | 225 * | ========================================================= 226 * | | Trampoline code (marker for gdb, not really executed) | 227 * | ========================================================= 228 * | | Linux struct _fpstate | 229 * | ========================================================= 230 * V | Linux ucontext_t | %rdx arg2 231 * ========================================================= 232 * | Linux siginfo_t | %rsi arg1 233 * ========================================================= 234 * | Pointer to signal return code (trampoline code) | 235 * ========================================================= 236 * 237 * As usual in 64-bit code, %rdi is arg0 which is the signal number. 238 * 239 * The *sigreturn(2) family of emulated system call handlers locates the 240 * "lx_sigdeliver_frame_t" struct on the Linux stack as part of processing 241 * the system call. This object contains a guard value (LX_SIGRT_MAGIC) to 242 * detect stack smashing or an incorrect stack pointer. It also contains a 243 * "return" context, which we use to get back to the "lx_sigdeliver()" frame 244 * on the native stack that originally dispatched to the Linux signal 245 * handler. The lx_sigdeliver() function is then able to return to the 246 * native libc signal handler in the usual way. This results in a further 247 * setcontext() back to whatever was running when we took the signal. 248 * 249 * There are some edge cases where the "return" context cannot be located 250 * by inspection of the Linux stack; e.g. if the guard value has been 251 * corrupted, or the emulated program has relocated parts of the signal 252 * delivery stack frame. If this case is detected, a fallback mechanism is 253 * used to attempt to find the return context. A chain of "lx_sigbackup_t" 254 * objects is maintained in signal interposer call frames, with the current 255 * head stored in the thread-specific "lx_tsd_t". This mechanism is 256 * similar in principle to the "lwp_oldcontext" member of the "klwp_t" used 257 * by the native signal handling infrastructure. This backup chain is used 258 * by the sigreturn(2) family of emulated system calls in the event that 259 * the Linux stack did not correctly reference a return context. 260 */ 261 262 typedef struct lx_sigdeliver_frame { 263 uintptr_t lxsdf_magic; 264 ucontext_t *lxsdf_retucp; 265 ucontext_t *lxsdf_sigucp; 266 lx_sigbackup_t *lxsdf_sigbackup; 267 } lx_sigdeliver_frame_t; 268 269 struct lx_oldsigstack { 270 void (*retaddr)(); /* address of real lx_sigreturn code */ 271 int sig; /* signal number */ 272 lx_sigcontext_t sigc; /* saved user context */ 273 lx_fpstate_t fpstate; /* saved FP state */ 274 int sig_extra; /* signal mask for signals [32 .. NSIG - 1] */ 275 char trampoline[8]; /* code for trampoline to lx_sigreturn() */ 276 }; 277 278 /* 279 * The lx_sighandlers structure needs to be a global due to the semantics of 280 * clone(). 281 * 282 * If CLONE_SIGHAND is set, the calling process and child share signal 283 * handlers, and if either calls sigaction(2) it should change the behavior 284 * in the other thread. Each thread does, however, have its own signal mask 285 * and set of pending signals. 286 * 287 * If CLONE_SIGHAND is not set, the child process should inherit a copy of 288 * the signal handlers at the time of the clone() but later calls to 289 * sigaction(2) should only affect the individual thread calling it. 290 * 291 * This maps perfectly to a thr_create(3C) thread semantic in the first 292 * case and a fork(2)-type semantic in the second case. By making 293 * lx_sighandlers global, we automatically get the correct behavior. 294 */ 295 static lx_sighandlers_t lx_sighandlers; 296 297 /* 298 * Setting LX_NO_ABORT_HANDLER in the environment will prevent the emulated 299 * Linux program from modifying the signal handling disposition for SIGSEGV or 300 * SIGABRT. Useful for debugging programs which fall over themselves to 301 * prevent useful core files being generated. 302 */ 303 static int lx_no_abort_handler = 0; 304 305 static void lx_sigdeliver(int, siginfo_t *, ucontext_t *, size_t, void (*)(), 306 void (*)(), struct lx_sigaction *); 307 308 /* 309 * Cache result of process.max-file-descriptor to avoid calling getrctl() 310 * for each lx_ppoll(). 311 */ 312 static rlim_t maxfd = 0; 313 314 /* 315 * stol_stack() and ltos_stack() convert between Illumos and Linux stack_t 316 * structures. 317 * 318 * These routines are needed because although the two structures have the same 319 * contents, their contents are declared in a different order, so the content 320 * of the structures cannot be copied with a simple bcopy(). 321 */ 322 static void 323 stol_stack(stack_t *fr, lx_stack_t *to) 324 { 325 to->ss_sp = fr->ss_sp; 326 to->ss_flags = fr->ss_flags; 327 to->ss_size = fr->ss_size; 328 } 329 330 static void 331 ltos_stack(lx_stack_t *fr, stack_t *to) 332 { 333 to->ss_sp = fr->ss_sp; 334 to->ss_flags = fr->ss_flags; 335 to->ss_size = fr->ss_size; 336 } 337 338 static int 339 ltos_sigset(lx_sigset_t *lx_sigsetp, sigset_t *s_sigsetp) 340 { 341 lx_sigset_t l; 342 int lx_sig, sig; 343 344 if (uucopy(lx_sigsetp, &l, sizeof (lx_sigset_t)) != 0) 345 return (-errno); 346 347 (void) sigemptyset(s_sigsetp); 348 349 for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) { 350 if (lx_sigismember(&l, lx_sig) && 351 ((sig = ltos_signo[lx_sig]) > 0)) 352 (void) sigaddset(s_sigsetp, sig); 353 } 354 355 return (0); 356 } 357 358 static int 359 stol_sigset(sigset_t *s_sigsetp, lx_sigset_t *lx_sigsetp) 360 { 361 lx_sigset_t l; 362 int sig, lx_sig; 363 364 bzero(&l, sizeof (lx_sigset_t)); 365 366 for (sig = 1; sig < NSIG; sig++) { 367 if (sigismember(s_sigsetp, sig) && 368 ((lx_sig = stol_signo[sig]) > 0)) 369 lx_sigaddset(&l, lx_sig); 370 } 371 372 return ((uucopy(&l, lx_sigsetp, sizeof (lx_sigset_t)) != 0) 373 ? -errno : 0); 374 } 375 376 #if defined(_ILP32) 377 static int 378 ltos_osigset(lx_osigset_t *lx_osigsetp, sigset_t *s_sigsetp) 379 { 380 lx_osigset_t lo; 381 int lx_sig, sig; 382 383 if (uucopy(lx_osigsetp, &lo, sizeof (lx_osigset_t)) != 0) 384 return (-errno); 385 386 (void) sigemptyset(s_sigsetp); 387 388 for (lx_sig = 1; lx_sig <= OSIGSET_NBITS; lx_sig++) 389 if ((lo & OSIGSET_BITSET(lx_sig)) && 390 ((sig = ltos_signo[lx_sig]) > 0)) 391 (void) sigaddset(s_sigsetp, sig); 392 393 return (0); 394 } 395 396 static int 397 stol_osigset(sigset_t *s_sigsetp, lx_osigset_t *lx_osigsetp) 398 { 399 lx_osigset_t lo = 0; 400 int lx_sig, sig; 401 402 /* 403 * Note that an lx_osigset_t can only represent the signals from 404 * [1 .. OSIGSET_NBITS], so even though a signal may be present in the 405 * Illumos sigset_t, it may not be representable as a bit in the 406 * lx_osigset_t. 407 */ 408 for (sig = 1; sig < NSIG; sig++) 409 if (sigismember(s_sigsetp, sig) && 410 ((lx_sig = stol_signo[sig]) > 0) && 411 (lx_sig <= OSIGSET_NBITS)) 412 lo |= OSIGSET_BITSET(lx_sig); 413 414 return ((uucopy(&lo, lx_osigsetp, sizeof (lx_osigset_t)) != 0) 415 ? -errno : 0); 416 } 417 #endif 418 419 static int 420 ltos_sigcode(int si_code) 421 { 422 switch (si_code) { 423 case LX_SI_USER: 424 return (SI_USER); 425 case LX_SI_TKILL: 426 return (SI_LWP); 427 case LX_SI_QUEUE: 428 return (SI_QUEUE); 429 case LX_SI_TIMER: 430 return (SI_TIMER); 431 case LX_SI_ASYNCIO: 432 return (SI_ASYNCIO); 433 case LX_SI_MESGQ: 434 return (SI_MESGQ); 435 default: 436 return (LX_SI_CODE_NOT_EXIST); 437 } 438 } 439 440 int 441 stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop) 442 { 443 int ret = 0; 444 lx_siginfo_t lx_siginfo; 445 446 bzero(&lx_siginfo, sizeof (*lx_siginfop)); 447 448 if ((lx_siginfo.lsi_signo = stol_signo[siginfop->si_signo]) <= 0) { 449 /* 450 * Depending on the caller we may still need to get a usable 451 * converted siginfo struct. 452 */ 453 lx_siginfo.lsi_signo = LX_SIGKILL; 454 errno = EINVAL; 455 ret = -1; 456 } 457 458 lx_siginfo.lsi_code = lx_stol_sigcode(siginfop->si_code); 459 lx_siginfo.lsi_errno = siginfop->si_errno; 460 461 switch (lx_siginfo.lsi_signo) { 462 /* 463 * Semantics ARE defined for SIGKILL, but since 464 * we can't catch it, we can't translate it. :-( 465 */ 466 case LX_SIGPOLL: 467 lx_siginfo.lsi_band = siginfop->si_band; 468 lx_siginfo.lsi_fd = siginfop->si_fd; 469 break; 470 471 case LX_SIGCHLD: 472 lx_siginfo.lsi_pid = siginfop->si_pid; 473 if (siginfop->si_code <= 0 || siginfop->si_code == 474 CLD_EXITED) { 475 lx_siginfo.lsi_status = siginfop->si_status; 476 } else { 477 lx_siginfo.lsi_status = lx_stol_status( 478 siginfop->si_status, -1); 479 } 480 lx_siginfo.lsi_utime = siginfop->si_utime; 481 lx_siginfo.lsi_stime = siginfop->si_stime; 482 break; 483 484 case LX_SIGILL: 485 case LX_SIGBUS: 486 case LX_SIGFPE: 487 case LX_SIGSEGV: 488 lx_siginfo.lsi_addr = siginfop->si_addr; 489 break; 490 491 default: 492 lx_siginfo.lsi_pid = siginfop->si_pid; 493 lx_siginfo.lsi_uid = 494 LX_UID32_TO_UID16(siginfop->si_uid); 495 lx_siginfo.lsi_value = siginfop->si_value; 496 break; 497 } 498 499 if (uucopy(&lx_siginfo, lx_siginfop, sizeof (lx_siginfo_t)) != 0) 500 return (-errno); 501 return ((ret != 0) ? -errno : 0); 502 } 503 504 static void 505 stol_fpstate(fpregset_t *fpr, lx_fpstate_t *lfpr) 506 { 507 size_t copy_len; 508 509 #if defined(_LP64) 510 /* 511 * The 64-bit Illumos struct fpregset_t and lx_fpstate_t are identical 512 * so just bcopy() those entries (see usr/src/uts/intel/sys/regset.h 513 * for __amd64's struct fpu). 514 */ 515 copy_len = sizeof (fpr->fp_reg_set.fpchip_state); 516 bcopy(fpr, lfpr, copy_len); 517 518 #else /* is _ILP32 */ 519 struct _fpstate *fpsp = (struct _fpstate *)fpr; 520 521 /* 522 * The Illumos struct _fpstate and lx_fpstate_t are identical from the 523 * beginning of the structure to the lx_fpstate_t "magic" field, so 524 * just bcopy() those entries. 525 */ 526 copy_len = (size_t)&(((lx_fpstate_t *)0)->magic); 527 bcopy(fpsp, lfpr, copy_len); 528 529 /* 530 * These fields are all only significant for the first 16 bits. 531 */ 532 lfpr->cw &= 0xffff; /* x87 control word */ 533 lfpr->tag &= 0xffff; /* x87 tag word */ 534 lfpr->cssel &= 0xffff; /* cs selector */ 535 lfpr->datasel &= 0xffff; /* ds selector */ 536 537 /* 538 * Linux wants the x87 status word field to contain the value of the 539 * x87 saved exception status word. 540 */ 541 lfpr->sw = lfpr->status & 0xffff; /* x87 status word */ 542 543 lfpr->mxcsr = fpsp->mxcsr; 544 545 if (fpsp->mxcsr != 0) { 546 /* 547 * Linux uses the "magic" field to denote whether the XMM 548 * registers contain legal data or not. Since we can't get to 549 * %cr4 from userland to check the status of the OSFXSR bit, 550 * check the mxcsr field to see if it's 0, which it should 551 * never be on a system with the OXFXSR bit enabled. 552 */ 553 lfpr->magic = LX_X86_FXSR_MAGIC; 554 bcopy(fpsp->xmm, lfpr->_xmm, sizeof (lfpr->_xmm)); 555 } else { 556 lfpr->magic = LX_X86_FXSR_NONE; 557 } 558 #endif 559 } 560 561 static void 562 ltos_fpstate(lx_fpstate_t *lfpr, fpregset_t *fpr) 563 { 564 size_t copy_len; 565 566 #if defined(_LP64) 567 /* 568 * The 64-bit Illumos struct fpregset_t and lx_fpstate_t are identical 569 * so just bcopy() those entries (see usr/src/uts/intel/sys/regset.h 570 * for __amd64's struct fpu). 571 */ 572 copy_len = sizeof (fpr->fp_reg_set.fpchip_state); 573 bcopy(lfpr, fpr, copy_len); 574 575 #else /* is _ILP32 */ 576 struct _fpstate *fpsp = (struct _fpstate *)fpr; 577 578 /* 579 * The lx_fpstate_t and Illumos struct _fpstate are identical from the 580 * beginning of the structure to the struct _fpstate "mxcsr" field, so 581 * just bcopy() those entries. 582 * 583 * Note that we do NOT have to propogate changes the user may have made 584 * to the "status" word back to the "sw" word, unlike the way we have 585 * to deal with processing the ESP and UESP register values on return 586 * from a signal handler. 587 */ 588 copy_len = (size_t)&(((struct _fpstate *)0)->mxcsr); 589 bcopy(lfpr, fpsp, copy_len); 590 591 /* 592 * These fields are all only significant for the first 16 bits. 593 */ 594 fpsp->cw &= 0xffff; /* x87 control word */ 595 fpsp->sw &= 0xffff; /* x87 status word */ 596 fpsp->tag &= 0xffff; /* x87 tag word */ 597 fpsp->cssel &= 0xffff; /* cs selector */ 598 fpsp->datasel &= 0xffff; /* ds selector */ 599 fpsp->status &= 0xffff; /* saved status */ 600 601 fpsp->mxcsr = lfpr->mxcsr; 602 603 if (lfpr->magic == LX_X86_FXSR_MAGIC) 604 bcopy(lfpr->_xmm, fpsp->xmm, sizeof (fpsp->xmm)); 605 #endif 606 } 607 608 /* 609 * We do not use the system sigaltstack() infrastructure as that would conflict 610 * with our handling of both system call emulation and native signals on the 611 * native stack. Instead, we track the Linux stack structure in our 612 * thread-specific data. This function is modeled on the behaviour of the 613 * native sigaltstack system call handler. 614 */ 615 long 616 lx_sigaltstack(uintptr_t ssp, uintptr_t oss) 617 { 618 lx_tsd_t *lxtsd = lx_get_tsd(); 619 lx_stack_t ss; 620 621 if (ssp != NULL) { 622 if (lxtsd->lxtsd_sigaltstack.ss_flags & LX_SS_ONSTACK) { 623 /* 624 * If we are currently using the installed alternate 625 * stack for signal handling, the user may not modify 626 * the stack for this thread. 627 */ 628 return (-EPERM); 629 } 630 631 if (uucopy((void *)ssp, &ss, sizeof (ss)) != 0) { 632 return (-EFAULT); 633 } 634 635 if (ss.ss_flags & ~LX_SS_DISABLE) { 636 /* 637 * The user may not specify a value for flags other 638 * than 0 or SS_DISABLE. 639 */ 640 return (-EINVAL); 641 } 642 643 if (!(ss.ss_flags & LX_SS_DISABLE) && ss.ss_size < 644 LX_MINSIGSTKSZ) { 645 return (-ENOMEM); 646 } 647 } 648 649 if (oss != NULL) { 650 /* 651 * User provided old and new stack_t pointers may point to 652 * the same location. Copy out before we modify. 653 */ 654 if (uucopy(&lxtsd->lxtsd_sigaltstack, (void *)oss, 655 sizeof (lxtsd->lxtsd_sigaltstack)) != 0) { 656 return (-EFAULT); 657 } 658 } 659 660 if (ssp != NULL) { 661 lxtsd->lxtsd_sigaltstack = ss; 662 } 663 664 return (0); 665 } 666 667 #if defined(_ILP32) 668 /* 669 * The following routines are needed because sigset_ts and siginfo_ts are 670 * different in format between Linux and Illumos. 671 * 672 * Note that there are two different lx_sigset structures, lx_sigset_ts and 673 * lx_osigset_ts: 674 * 675 * + An lx_sigset_t is the equivalent of a Illumos sigset_t and supports 676 * more than 32 signals. 677 * 678 * + An lx_osigset_t is simply a uint32_t, so it by definition only supports 679 * 32 signals. 680 * 681 * When there are two versions of a routine, one prefixed with lx_rt_ and 682 * one prefixed with lx_ alone, in GENERAL the lx_rt_ routines deal with 683 * lx_sigset_ts while the lx_ routines deal with lx_osigset_ts. Unfortunately, 684 * this is not always the case (e.g. lx_sigreturn() vs. lx_rt_sigreturn()) 685 */ 686 long 687 lx_sigpending(uintptr_t sigpend) 688 { 689 sigset_t sigpendset; 690 691 if (sigpending(&sigpendset) != 0) 692 return (-errno); 693 694 return (stol_osigset(&sigpendset, (lx_osigset_t *)sigpend)); 695 } 696 #endif 697 698 long 699 lx_rt_sigpending(uintptr_t sigpend, uintptr_t setsize) 700 { 701 sigset_t sigpendset; 702 703 if ((size_t)setsize != sizeof (lx_sigset_t)) 704 return (-EINVAL); 705 706 if (sigpending(&sigpendset) != 0) 707 return (-errno); 708 709 return (stol_sigset(&sigpendset, (lx_sigset_t *)sigpend)); 710 } 711 712 /* 713 * Create a common routine to encapsulate all of the sigprocmask code, 714 * as the only difference between lx_sigprocmask() and lx_rt_sigprocmask() 715 * is the usage of lx_osigset_ts vs. lx_sigset_ts, as toggled in the code by 716 * the setting of the "sigset_type" flag. 717 */ 718 static int 719 lx_sigprocmask_common(uintptr_t how, uintptr_t l_setp, uintptr_t l_osetp, 720 uintptr_t sigset_type) 721 { 722 int err = 0; 723 sigset_t set, oset; 724 sigset_t *s_setp = NULL; 725 sigset_t *s_osetp; 726 727 if (l_setp) { 728 switch (how) { 729 case LX_SIG_BLOCK: 730 how = SIG_BLOCK; 731 break; 732 733 case LX_SIG_UNBLOCK: 734 how = SIG_UNBLOCK; 735 break; 736 737 case LX_SIG_SETMASK: 738 how = SIG_SETMASK; 739 break; 740 741 default: 742 return (-EINVAL); 743 } 744 745 s_setp = &set; 746 747 /* Only 32-bit code passes other than USE_SIGSET */ 748 if (sigset_type == USE_SIGSET) 749 err = ltos_sigset((lx_sigset_t *)l_setp, s_setp); 750 #if defined(_ILP32) 751 else 752 err = ltos_osigset((lx_osigset_t *)l_setp, s_setp); 753 #endif 754 755 if (err != 0) 756 return (err); 757 758 } 759 760 s_osetp = (l_osetp ? &oset : NULL); 761 762 /* 763 * In a multithreaded environment, a call to sigprocmask(2) should 764 * only affect the current thread's signal mask so we don't need to 765 * explicitly call thr_sigsetmask(3C) here. 766 */ 767 if (sigprocmask(how, s_setp, s_osetp) != 0) 768 return (-errno); 769 770 if (l_osetp) { 771 if (sigset_type == USE_SIGSET) 772 err = stol_sigset(s_osetp, (lx_sigset_t *)l_osetp); 773 #if defined(_ILP32) 774 else 775 err = stol_osigset(s_osetp, (lx_osigset_t *)l_osetp); 776 #endif 777 778 if (err != 0) { 779 /* 780 * Encountered a fault while writing to the old signal 781 * mask buffer, so unwind the signal mask change made 782 * above. 783 */ 784 (void) sigprocmask(how, s_osetp, (sigset_t *)NULL); 785 return (err); 786 } 787 } 788 789 return (0); 790 } 791 792 #if defined(_ILP32) 793 long 794 lx_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp) 795 { 796 return (lx_sigprocmask_common(how, setp, osetp, USE_OSIGSET)); 797 } 798 #endif 799 800 long 801 lx_rt_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp, 802 uintptr_t setsize) 803 { 804 if ((size_t)setsize != sizeof (lx_sigset_t)) 805 return (-EINVAL); 806 807 return (lx_sigprocmask_common(how, setp, osetp, USE_SIGSET)); 808 } 809 810 #if defined(_ILP32) 811 long 812 lx_sigsuspend(uintptr_t set) 813 { 814 sigset_t s_set; 815 816 if (ltos_osigset((lx_osigset_t *)set, &s_set) != 0) 817 return (-errno); 818 819 return ((sigsuspend(&s_set) == -1) ? -errno : 0); 820 } 821 #endif 822 823 long 824 lx_rt_sigsuspend(uintptr_t set, uintptr_t setsize) 825 { 826 sigset_t s_set; 827 828 if ((size_t)setsize != sizeof (lx_sigset_t)) 829 return (-EINVAL); 830 831 if (ltos_sigset((lx_sigset_t *)set, &s_set) != 0) 832 return (-errno); 833 834 return ((sigsuspend(&s_set) == -1) ? -errno : 0); 835 } 836 837 long 838 lx_rt_sigwaitinfo(uintptr_t set, uintptr_t sinfo, uintptr_t setsize) 839 { 840 sigset_t s_set; 841 siginfo_t s_sinfo, *s_sinfop; 842 int rc; 843 844 lx_sigset_t *setp = (lx_sigset_t *)set; 845 lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo; 846 847 if ((size_t)setsize != sizeof (lx_sigset_t)) 848 return (-EINVAL); 849 850 if (ltos_sigset(setp, &s_set) != 0) 851 return (-errno); 852 853 s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo; 854 855 if ((rc = sigwaitinfo(&s_set, s_sinfop)) == -1) 856 return (-errno); 857 858 if (s_sinfop == NULL) 859 return (stol_signo[rc]); 860 861 return ((stol_siginfo(s_sinfop, sinfop) != 0) 862 ? -errno : stol_signo[rc]); 863 } 864 865 long 866 lx_rt_sigtimedwait(uintptr_t set, uintptr_t sinfo, uintptr_t toutp, 867 uintptr_t setsize) 868 { 869 sigset_t s_set; 870 siginfo_t s_sinfo, *s_sinfop; 871 int rc; 872 873 lx_sigset_t *setp = (lx_sigset_t *)set; 874 lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo; 875 876 if ((size_t)setsize != sizeof (lx_sigset_t)) 877 return (-EINVAL); 878 879 if (ltos_sigset(setp, &s_set) != 0) 880 return (-errno); 881 882 s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo; 883 884 /* 885 * "If timeout is the NULL pointer, the behavior is unspecified." 886 * Match what LTP expects. 887 */ 888 if ((rc = sigtimedwait(&s_set, s_sinfop, 889 (struct timespec *)toutp)) == -1) 890 return (toutp == NULL ? -EINTR : -errno); 891 892 if (s_sinfop == NULL) 893 return (stol_signo[rc]); 894 895 return ((stol_siginfo(s_sinfop, sinfop) != 0) 896 ? -errno : stol_signo[rc]); 897 } 898 899 static void 900 lx_sigreturn_find_native_context(const char *caller, ucontext_t **sigucp, 901 ucontext_t **retucp, uintptr_t sp) 902 { 903 lx_tsd_t *lxtsd = lx_get_tsd(); 904 lx_sigdeliver_frame_t *lxsdfp = (lx_sigdeliver_frame_t *)sp; 905 lx_sigdeliver_frame_t lxsdf; 906 boolean_t copy_ok; 907 908 lx_debug("%s: reading lx_sigdeliver_frame_t @ %p\n", caller, lxsdfp); 909 if (uucopy(lxsdfp, &lxsdf, sizeof (lxsdf)) != 0) { 910 lx_debug("%s: failed to read lx_sigdeliver_frame_t @ %p\n", 911 lxsdfp); 912 913 copy_ok = B_FALSE; 914 } else { 915 lx_debug("%s: lxsdf: magic %p retucp %p sigucp %p\n", caller, 916 lxsdf.lxsdf_magic, lxsdf.lxsdf_retucp, lxsdf.lxsdf_sigucp); 917 918 copy_ok = B_TRUE; 919 } 920 921 /* 922 * lx_sigdeliver() pushes a lx_sigdeliver_frame_t onto the stack 923 * before it creates the struct lx_oldsigstack. 924 */ 925 if (copy_ok && lxsdf.lxsdf_magic == LX_SIGRT_MAGIC) { 926 LX_SIGNAL_DELIVERY_FRAME_FOUND(lxsdfp); 927 928 /* 929 * The guard value is intact; use the context pointers stored 930 * in the signal delivery frame: 931 */ 932 *sigucp = lxsdf.lxsdf_sigucp; 933 *retucp = lxsdf.lxsdf_retucp; 934 935 /* 936 * Ensure that the backup signal delivery chain is in sync with 937 * the frame we are returning via: 938 */ 939 lxtsd->lxtsd_sigbackup = lxsdf.lxsdf_sigbackup; 940 } else { 941 /* 942 * The guard value was not intact. Either the program smashed 943 * the stack unintentionally, or worse: intentionally moved 944 * some parts of the signal delivery frame we constructed to 945 * another location before calling rt_sigreturn(2). 946 */ 947 LX_SIGNAL_DELIVERY_FRAME_CORRUPT(lxsdfp); 948 949 if (lxtsd->lxtsd_sigbackup == NULL) { 950 /* 951 * There was no backup context to use, so we must 952 * kill the process. 953 */ 954 if (copy_ok) { 955 lx_err_fatal("%s: sp 0x%p, expected 0x%x, " 956 "found 0x%x!", caller, sp, LX_SIGRT_MAGIC, 957 lxsdf.lxsdf_magic); 958 } else { 959 lx_err_fatal("%s: sp 0x%p, could not read " 960 "magic", caller, sp); 961 } 962 } 963 964 /* 965 * Attempt to recover by using the backup signal delivery 966 * chain: 967 */ 968 lx_debug("%s: SIGRT_MAGIC not found @ sp %p; using backup " 969 "@ %p\n", caller, (void *)sp, lxtsd->lxtsd_sigbackup); 970 *sigucp = lxtsd->lxtsd_sigbackup->lxsb_sigucp; 971 *retucp = lxtsd->lxtsd_sigbackup->lxsb_retucp; 972 } 973 } 974 975 #if defined(_ILP32) 976 /* 977 * Intercept the Linux sigreturn() syscall to turn it into the return through 978 * the libc call stack that Illumos expects. 979 * 980 * When control returns to libc's call_user_handler() routine, a setcontext(2) 981 * will be done that returns thread execution to the point originally 982 * interrupted by receipt of the signal. 983 * 984 * This is only used by 32-bit code. 985 */ 986 long 987 lx_sigreturn(void) 988 { 989 struct lx_oldsigstack *lx_ossp; 990 lx_sigset_t lx_sigset; 991 ucontext_t *ucp; 992 ucontext_t *sigucp; 993 ucontext_t *retucp; 994 uintptr_t sp; 995 996 ucp = lx_syscall_regs(); 997 998 /* 999 * NOTE: The sp saved in the context is eight bytes off of where we 1000 * need it to be (either due to trampoline or the copying of 1001 * sp = uesp, not clear which). 1002 */ 1003 sp = LX_REG(ucp, REG_SP) - 8; 1004 1005 /* 1006 * At this point, the stack pointer should point to the struct 1007 * lx_oldsigstack that lx_build_old_signal_frame() constructed and 1008 * placed on the stack. We need to reference it a bit later, so 1009 * save a pointer to it before incrementing our copy of the sp. 1010 */ 1011 lx_ossp = (struct lx_oldsigstack *)sp; 1012 sp += SA(sizeof (struct lx_oldsigstack)); 1013 1014 lx_sigreturn_find_native_context(__func__, &sigucp, &retucp, sp); 1015 1016 /* 1017 * We need to copy machine registers the Linux signal handler may have 1018 * modified back to the Illumos ucontext_t. 1019 * 1020 * General registers copy across as-is, except Linux expects that 1021 * changes made to uc_mcontext.gregs[ESP] will be reflected when the 1022 * interrupted thread resumes execution after the signal handler. To 1023 * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to 1024 * match uc_mcontext.gregs[ESP] as Illumos will restore the UESP 1025 * value to ESP. 1026 */ 1027 lx_ossp->sigc.sc_esp_at_signal = lx_ossp->sigc.sc_esp; 1028 bcopy(&lx_ossp->sigc, &sigucp->uc_mcontext, sizeof (gregset_t)); 1029 1030 LX_SIGRETURN(NULL, sigucp, sp); 1031 1032 /* copy back FP regs if present */ 1033 if (lx_ossp->sigc.sc_fpstate != NULL) 1034 ltos_fpstate(&lx_ossp->fpstate, &sigucp->uc_mcontext.fpregs); 1035 1036 /* convert Linux signal mask back to its Illumos equivalent */ 1037 bzero(&lx_sigset, sizeof (lx_sigset_t)); 1038 lx_sigset.__bits[0] = lx_ossp->sigc.sc_mask; 1039 lx_sigset.__bits[1] = lx_ossp->sig_extra; 1040 (void) ltos_sigset(&lx_sigset, &sigucp->uc_sigmask); 1041 1042 /* 1043 * For signal mask handling to be done properly, this call needs to 1044 * return to the libc routine that originally called the signal handler 1045 * rather than directly set the context back to the place the signal 1046 * interrupted execution as the original Linux code would do. 1047 */ 1048 lx_debug("lx_sigreturn: calling setcontext; retucp %p flags %lx " 1049 "link %p\n", retucp, retucp->uc_flags, retucp->uc_link); 1050 setcontext(retucp); 1051 assert(0); 1052 1053 /*NOTREACHED*/ 1054 return (0); 1055 } 1056 #endif 1057 1058 /* 1059 * This signal return syscall is used by both 32-bit and 64-bit code. 1060 */ 1061 long 1062 lx_rt_sigreturn(void) 1063 { 1064 struct lx_sigstack *lx_ssp; 1065 lx_ucontext_t *lx_ucp; 1066 ucontext_t *ucp; 1067 ucontext_t *sigucp; 1068 ucontext_t *retucp; 1069 uintptr_t sp; 1070 1071 /* Get the registers at the emulated Linux rt_sigreturn syscall */ 1072 ucp = lx_syscall_regs(); 1073 1074 #if defined(_ILP32) 1075 lx_debug("lx_rt_sigreturn: ESP %p UESP %p\n", LX_REG(ucp, ESP), 1076 LX_REG(ucp, UESP)); 1077 /* 1078 * For 32-bit 1079 * 1080 * NOTE: Because of the silly compatibility measures done in the 1081 * signal trampoline code to make sure the stack holds the 1082 * _exact same_ instruction sequence Linux does, we have to 1083 * manually "pop" some extra instructions off the stack here 1084 * before passing the stack address to the syscall because the 1085 * trampoline code isn't allowed to do it due to the gdb 1086 * compatability issues. 1087 * 1088 * No, I'm not kidding. 1089 * 1090 * The sp saved in the context is eight bytes off of where we 1091 * need it to be (either due to trampoline or the copying of 1092 * sp = uesp, not clear which but looks like the uesp case), so 1093 * the need to pop the extra four byte instruction means we need 1094 * to subtract a net four bytes from the sp before "popping" the 1095 * struct lx_sigstack off the stack. 1096 * 1097 * This will yield the value the stack pointer had before 1098 * lx_sigdeliver() created the stack frame for the Linux signal 1099 * handler. 1100 */ 1101 sp = (uintptr_t)LX_REG(ucp, REG_SP) - 4; 1102 #else 1103 /* 1104 * We need to make an adjustment for 64-bit code as well. Since 64-bit 1105 * does not use the trampoline, it's probably for the same reason as 1106 * alluded to above. 1107 */ 1108 sp = (uintptr_t)LX_REG(ucp, REG_SP) - 8; 1109 #endif 1110 1111 /* 1112 * At this point, the stack pointer should point to the struct 1113 * lx_sigstack that lx_build_signal_frame() constructed and 1114 * placed on the stack. We need to reference it a bit later, so 1115 * save a pointer to it before incrementing our copy of the sp. 1116 */ 1117 lx_ssp = (struct lx_sigstack *)sp; 1118 sp += SA(sizeof (struct lx_sigstack)); 1119 1120 #if defined(_LP64) 1121 /* 1122 * The 64-bit lx_sigdeliver() inserts 8 bytes of padding between 1123 * the lx_sigstack_t and the delivery frame to maintain ABI stack 1124 * alignment. 1125 */ 1126 sp += 8; 1127 #endif 1128 1129 lx_sigreturn_find_native_context(__func__, &sigucp, &retucp, sp); 1130 1131 /* 1132 * We need to copy machine registers the Linux signal handler may have 1133 * modified back to the Illumos version. 1134 */ 1135 #if defined(_LP64) 1136 lx_ucp = &lx_ssp->uc; 1137 1138 /* 1139 * General register layout is completely different. 1140 */ 1141 LX_REG(sigucp, REG_R15) = lx_ucp->uc_sigcontext.sc_r15; 1142 LX_REG(sigucp, REG_R14) = lx_ucp->uc_sigcontext.sc_r14; 1143 LX_REG(sigucp, REG_R13) = lx_ucp->uc_sigcontext.sc_r13; 1144 LX_REG(sigucp, REG_R12) = lx_ucp->uc_sigcontext.sc_r12; 1145 LX_REG(sigucp, REG_R11) = lx_ucp->uc_sigcontext.sc_r11; 1146 LX_REG(sigucp, REG_R10) = lx_ucp->uc_sigcontext.sc_r10; 1147 LX_REG(sigucp, REG_R9) = lx_ucp->uc_sigcontext.sc_r9; 1148 LX_REG(sigucp, REG_R8) = lx_ucp->uc_sigcontext.sc_r8; 1149 LX_REG(sigucp, REG_RDI) = lx_ucp->uc_sigcontext.sc_rdi; 1150 LX_REG(sigucp, REG_RSI) = lx_ucp->uc_sigcontext.sc_rsi; 1151 LX_REG(sigucp, REG_RBP) = lx_ucp->uc_sigcontext.sc_rbp; 1152 LX_REG(sigucp, REG_RBX) = lx_ucp->uc_sigcontext.sc_rbx; 1153 LX_REG(sigucp, REG_RDX) = lx_ucp->uc_sigcontext.sc_rdx; 1154 LX_REG(sigucp, REG_RCX) = lx_ucp->uc_sigcontext.sc_rcx; 1155 LX_REG(sigucp, REG_RAX) = lx_ucp->uc_sigcontext.sc_rax; 1156 LX_REG(sigucp, REG_TRAPNO) = lx_ucp->uc_sigcontext.sc_trapno; 1157 LX_REG(sigucp, REG_ERR) = lx_ucp->uc_sigcontext.sc_err; 1158 LX_REG(sigucp, REG_RIP) = lx_ucp->uc_sigcontext.sc_rip; 1159 LX_REG(sigucp, REG_CS) = lx_ucp->uc_sigcontext.sc_cs; 1160 LX_REG(sigucp, REG_RFL) = lx_ucp->uc_sigcontext.sc_eflags; 1161 LX_REG(sigucp, REG_RSP) = lx_ucp->uc_sigcontext.sc_rsp; 1162 LX_REG(sigucp, REG_SS) = lx_ucp->uc_sigcontext.sc_pad0; 1163 LX_REG(sigucp, REG_FS) = lx_ucp->uc_sigcontext.sc_fs; 1164 LX_REG(sigucp, REG_GS) = lx_ucp->uc_sigcontext.sc_gs; 1165 1166 #else /* is _ILP32 */ 1167 lx_ucp = &lx_ssp->uc; 1168 1169 /* 1170 * Illumos and Linux both follow the SysV i386 ABI layout for the 1171 * mcontext. 1172 * 1173 * General registers copy across as-is, except Linux expects that 1174 * changes made to uc_mcontext.gregs[ESP] will be reflected when the 1175 * interrupted thread resumes execution after the signal handler. To 1176 * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to 1177 * match uc_mcontext.gregs[ESP] as Illumos will restore the UESP value 1178 * to ESP. 1179 */ 1180 lx_ucp->uc_sigcontext.sc_esp_at_signal = lx_ucp->uc_sigcontext.sc_esp; 1181 1182 bcopy(&lx_ucp->uc_sigcontext, &sigucp->uc_mcontext.gregs, 1183 sizeof (gregset_t)); 1184 #endif 1185 1186 LX_SIGRETURN(lx_ucp, sigucp, sp); 1187 1188 if (lx_ucp->uc_sigcontext.sc_fpstate != NULL) { 1189 ltos_fpstate(lx_ucp->uc_sigcontext.sc_fpstate, 1190 &sigucp->uc_mcontext.fpregs); 1191 } 1192 1193 /* 1194 * Convert the Linux signal mask and stack back to their 1195 * Illumos equivalents. 1196 */ 1197 (void) ltos_sigset(&lx_ucp->uc_sigmask, &sigucp->uc_sigmask); 1198 ltos_stack(&lx_ucp->uc_stack, &sigucp->uc_stack); 1199 1200 /* 1201 * For signal mask handling to be done properly, this call needs to 1202 * return to the libc routine that originally called the signal handler 1203 * rather than directly set the context back to the place the signal 1204 * interrupted execution as the original Linux code would do. 1205 */ 1206 lx_debug("lx_rt_sigreturn: calling setcontext; retucp %p\n", retucp); 1207 setcontext(retucp); 1208 assert(0); 1209 1210 /*NOTREACHED*/ 1211 return (0); 1212 } 1213 1214 1215 #if defined(_ILP32) 1216 /* 1217 * Build signal frame for processing for "old" (legacy) Linux signals 1218 * This stack-builder function is only used by 32-bit code. 1219 */ 1220 static void 1221 lx_build_old_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp, 1222 uintptr_t *hargs) 1223 { 1224 extern void lx_sigreturn_tramp(); 1225 1226 lx_sigset_t lx_sigset; 1227 ucontext_t *ucp = (ucontext_t *)p; 1228 struct lx_sigaction *lxsap; 1229 struct lx_oldsigstack *lx_ossp = sp; 1230 1231 lx_debug("building old signal frame for lx sig %d at 0x%p", lx_sig, sp); 1232 1233 lx_ossp->sig = lx_sig; 1234 lxsap = &lx_sighandlers.lx_sa[lx_sig]; 1235 lx_debug("lxsap @ 0x%p", lxsap); 1236 1237 if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) && 1238 lxsap->lxsa_restorer) { 1239 lx_ossp->retaddr = lxsap->lxsa_restorer; 1240 lx_debug("lxsa_restorer exists @ 0x%p", lx_ossp->retaddr); 1241 } else { 1242 lx_ossp->retaddr = lx_sigreturn_tramp; 1243 lx_debug("lx_ossp->retaddr set to 0x%p", lx_sigreturn_tramp); 1244 } 1245 1246 lx_debug("osf retaddr = 0x%p", lx_ossp->retaddr); 1247 1248 /* convert Illumos signal mask and stack to their Linux equivalents */ 1249 (void) stol_sigset(&ucp->uc_sigmask, &lx_sigset); 1250 lx_ossp->sigc.sc_mask = lx_sigset.__bits[0]; 1251 lx_ossp->sig_extra = lx_sigset.__bits[1]; 1252 1253 /* 1254 * General registers copy across as-is, except Linux expects that 1255 * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a 1256 * signal. 1257 */ 1258 bcopy(&ucp->uc_mcontext, &lx_ossp->sigc, sizeof (gregset_t)); 1259 lx_ossp->sigc.sc_esp = lx_ossp->sigc.sc_esp_at_signal; 1260 1261 /* 1262 * cr2 contains the faulting address, and Linux only sets cr2 for a 1263 * a segmentation fault. 1264 */ 1265 lx_ossp->sigc.sc_cr2 = (((lx_sig == LX_SIGSEGV) && (sip)) ? 1266 (uintptr_t)sip->si_addr : 0); 1267 1268 /* convert FP regs if present */ 1269 if (ucp->uc_flags & UC_FPU) { 1270 stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ossp->fpstate); 1271 lx_ossp->sigc.sc_fpstate = &lx_ossp->fpstate; 1272 } else { 1273 lx_ossp->sigc.sc_fpstate = NULL; 1274 } 1275 1276 /* 1277 * Believe it or not, gdb wants to SEE the trampoline code on the 1278 * bottom of the stack to determine whether the stack frame belongs to 1279 * a signal handler, even though this code is no longer actually 1280 * called. 1281 * 1282 * You can't make this stuff up. 1283 */ 1284 bcopy((void *)lx_sigreturn_tramp, lx_ossp->trampoline, 1285 sizeof (lx_ossp->trampoline)); 1286 } 1287 #endif 1288 1289 /* 1290 * Build stack frame (32-bit) or stack local data (64-bit) for processing for 1291 * modern Linux signals. This is the only stack-builder function for 64-bit 1292 * code (32-bit code also calls this when using "modern" signals). 1293 */ 1294 static void 1295 lx_build_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp, 1296 uintptr_t *hargs) 1297 { 1298 extern void lx_rt_sigreturn_tramp(); 1299 1300 lx_ucontext_t *lx_ucp; 1301 ucontext_t *ucp = (ucontext_t *)p; 1302 struct lx_sigstack *lx_ssp = sp; 1303 struct lx_sigaction *lxsap; 1304 1305 lx_debug("building signal frame for lx sig %d at 0x%p", lx_sig, sp); 1306 1307 lx_ucp = &lx_ssp->uc; 1308 #if defined(_ILP32) 1309 /* 1310 * Arguments are passed to the 32-bit signal handler on the stack. 1311 */ 1312 lx_ssp->ucp = lx_ucp; 1313 lx_ssp->sip = sip != NULL ? &lx_ssp->si : NULL; 1314 lx_ssp->sig = lx_sig; 1315 #else 1316 /* 1317 * Arguments to the 64-bit signal handler are passed in registers: 1318 * hdlr(int sig, siginfo_t *sip, void *ucp); 1319 */ 1320 hargs[0] = lx_sig; 1321 hargs[1] = sip != NULL ? (uintptr_t)&lx_ssp->si : NULL; 1322 hargs[2] = (uintptr_t)lx_ucp; 1323 #endif 1324 1325 lxsap = &lx_sighandlers.lx_sa[lx_sig]; 1326 lx_debug("lxsap @ 0x%p", lxsap); 1327 1328 if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) && 1329 lxsap->lxsa_restorer) { 1330 /* 1331 * lxsa_restorer is explicitly set by sigaction in 32-bit code 1332 * but it can also be implicitly set for both 32 and 64 bit 1333 * code via lx_sigaction_common when we bcopy the user-supplied 1334 * lx_sigaction element into the proper slot in the sighandler 1335 * array. 1336 */ 1337 lx_ssp->retaddr = lxsap->lxsa_restorer; 1338 lx_debug("lxsa_restorer exists @ 0x%p", lx_ssp->retaddr); 1339 } else { 1340 lx_ssp->retaddr = lx_rt_sigreturn_tramp; 1341 lx_debug("lx_ssp->retaddr set to 0x%p", lx_rt_sigreturn_tramp); 1342 } 1343 1344 /* Linux has these fields but always clears them to 0 */ 1345 lx_ucp->uc_flags = 0; 1346 lx_ucp->uc_link = NULL; 1347 1348 /* convert Illumos signal mask and stack to their Linux equivalents */ 1349 (void) stol_sigset(&ucp->uc_sigmask, &lx_ucp->uc_sigmask); 1350 stol_stack(&ucp->uc_stack, &lx_ucp->uc_stack); 1351 1352 #if defined(_LP64) 1353 /* 1354 * General register layout is completely different. 1355 */ 1356 lx_ucp->uc_sigcontext.sc_r8 = LX_REG(ucp, REG_R8); 1357 lx_ucp->uc_sigcontext.sc_r9 = LX_REG(ucp, REG_R9); 1358 lx_ucp->uc_sigcontext.sc_r10 = LX_REG(ucp, REG_R10); 1359 lx_ucp->uc_sigcontext.sc_r11 = LX_REG(ucp, REG_R11); 1360 lx_ucp->uc_sigcontext.sc_r12 = LX_REG(ucp, REG_R12); 1361 lx_ucp->uc_sigcontext.sc_r13 = LX_REG(ucp, REG_R13); 1362 lx_ucp->uc_sigcontext.sc_r14 = LX_REG(ucp, REG_R14); 1363 lx_ucp->uc_sigcontext.sc_r15 = LX_REG(ucp, REG_R15); 1364 lx_ucp->uc_sigcontext.sc_rdi = LX_REG(ucp, REG_RDI); 1365 lx_ucp->uc_sigcontext.sc_rsi = LX_REG(ucp, REG_RSI); 1366 lx_ucp->uc_sigcontext.sc_rbp = LX_REG(ucp, REG_RBP); 1367 lx_ucp->uc_sigcontext.sc_rbx = LX_REG(ucp, REG_RBX); 1368 lx_ucp->uc_sigcontext.sc_rdx = LX_REG(ucp, REG_RDX); 1369 lx_ucp->uc_sigcontext.sc_rax = LX_REG(ucp, REG_RAX); 1370 lx_ucp->uc_sigcontext.sc_rcx = LX_REG(ucp, REG_RCX); 1371 lx_ucp->uc_sigcontext.sc_rsp = LX_REG(ucp, REG_RSP); 1372 lx_ucp->uc_sigcontext.sc_rip = LX_REG(ucp, REG_RIP); 1373 lx_ucp->uc_sigcontext.sc_eflags = LX_REG(ucp, REG_RFL); 1374 lx_ucp->uc_sigcontext.sc_cs = LX_REG(ucp, REG_CS); 1375 lx_ucp->uc_sigcontext.sc_gs = LX_REG(ucp, REG_GS); 1376 lx_ucp->uc_sigcontext.sc_fs = LX_REG(ucp, REG_FS); 1377 lx_ucp->uc_sigcontext.sc_pad0 = LX_REG(ucp, REG_SS); 1378 lx_ucp->uc_sigcontext.sc_err = LX_REG(ucp, REG_ERR); 1379 lx_ucp->uc_sigcontext.sc_trapno = LX_REG(ucp, REG_TRAPNO); 1380 1381 #else /* is _ILP32 */ 1382 /* 1383 * General registers copy across as-is, except Linux expects that 1384 * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a 1385 * signal. 1386 */ 1387 bcopy(&ucp->uc_mcontext, &lx_ucp->uc_sigcontext, sizeof (gregset_t)); 1388 lx_ucp->uc_sigcontext.sc_esp = lx_ucp->uc_sigcontext.sc_esp_at_signal; 1389 #endif 1390 1391 /* 1392 * cr2 contains the faulting address, which Linux only sets for a 1393 * a segmentation fault. 1394 */ 1395 lx_ucp->uc_sigcontext.sc_cr2 = ((lx_sig == LX_SIGSEGV) && (sip)) ? 1396 (uintptr_t)sip->si_addr : 0; 1397 1398 /* 1399 * This should only return an error if the signum is invalid but that 1400 * also gets converted into a LX_SIGKILL by this function. 1401 */ 1402 if (sip != NULL) 1403 (void) stol_siginfo(sip, &lx_ssp->si); 1404 else 1405 bzero(&lx_ssp->si, sizeof (lx_siginfo_t)); 1406 1407 /* convert FP regs if present */ 1408 if (ucp->uc_flags & UC_FPU) { 1409 /* 1410 * Copy FP regs to the appropriate place in the the lx_sigstack 1411 * structure. 1412 */ 1413 stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ssp->fpstate); 1414 lx_ucp->uc_sigcontext.sc_fpstate = &lx_ssp->fpstate; 1415 } else { 1416 lx_ucp->uc_sigcontext.sc_fpstate = NULL; 1417 } 1418 1419 #if defined(_ILP32) 1420 /* 1421 * Believe it or not, gdb wants to SEE the sigreturn code on the 1422 * top of the stack to determine whether the stack frame belongs to 1423 * a signal handler, even though this code is not actually called. 1424 * 1425 * You can't make this stuff up. 1426 */ 1427 bcopy((void *)lx_rt_sigreturn_tramp, lx_ssp->trampoline, 1428 sizeof (lx_ssp->trampoline)); 1429 #endif 1430 } 1431 1432 /* 1433 * This is the interposition handler for Linux signals. 1434 */ 1435 static void 1436 lx_call_user_handler(int sig, siginfo_t *sip, void *p) 1437 { 1438 void (*user_handler)(); 1439 void (*stk_builder)(); 1440 struct lx_sigaction *lxsap; 1441 ucontext_t *ucp = (ucontext_t *)p; 1442 size_t stksize; 1443 int lx_sig; 1444 1445 /* 1446 * If Illumos signal has no Linux equivalent, effectively ignore it. 1447 */ 1448 if ((lx_sig = stol_signo[sig]) == -1) { 1449 lx_unsupported("caught Illumos signal %d, no Linux equivalent", 1450 sig); 1451 return; 1452 } 1453 1454 lx_debug("interpose caught Illumos signal %d, translating to Linux " 1455 "signal %d", sig, lx_sig); 1456 1457 lxsap = &lx_sighandlers.lx_sa[lx_sig]; 1458 lx_debug("lxsap @ 0x%p", lxsap); 1459 1460 if ((sig == SIGPWR) && (lxsap->lxsa_handler == SIG_DFL)) { 1461 /* 1462 * Linux SIG_DFL for SIGPWR is to terminate. The lx wait 1463 * emulation will translate SIGPWR to LX_SIGPWR. 1464 */ 1465 (void) syscall(SYS_brand, B_EXIT_AS_SIG, SIGPWR); 1466 /* This should never return */ 1467 assert(0); 1468 } 1469 1470 if (lxsap->lxsa_handler == SIG_DFL || lxsap->lxsa_handler == SIG_IGN) 1471 lx_err_fatal("lxsa_handler set to %s? How?!?!?", 1472 (lxsap->lxsa_handler == SIG_DFL) ? "SIG_DFL" : "SIG_IGN"); 1473 1474 #if defined(_LP64) 1475 stksize = sizeof (struct lx_sigstack); 1476 stk_builder = lx_build_signal_frame; 1477 #else 1478 if (lxsap->lxsa_flags & LX_SA_SIGINFO) { 1479 stksize = sizeof (struct lx_sigstack); 1480 stk_builder = lx_build_signal_frame; 1481 } else { 1482 stksize = sizeof (struct lx_oldsigstack); 1483 stk_builder = lx_build_old_signal_frame; 1484 } 1485 #endif 1486 1487 user_handler = lxsap->lxsa_handler; 1488 1489 lx_debug("delivering %d (lx %d) to handler at 0x%p", sig, lx_sig, 1490 lxsap->lxsa_handler); 1491 1492 if (lxsap->lxsa_flags & LX_SA_RESETHAND) 1493 lxsap->lxsa_handler = SIG_DFL; 1494 1495 lx_sigdeliver(lx_sig, sip, ucp, stksize, stk_builder, user_handler, 1496 lxsap); 1497 1498 /* 1499 * We need to handle restarting system calls if requested by the 1500 * program for this signal type: 1501 */ 1502 if (lxsap->lxsa_flags & LX_SA_RESTART) { 1503 uintptr_t flags = (uintptr_t)ucp->uc_brand_data[0]; 1504 long ret = (long)LX_REG(ucp, REG_R0); 1505 boolean_t interrupted = (ret == -lx_errno(EINTR, -1)); 1506 1507 /* 1508 * If the system call returned EINTR, and the system 1509 * call handler set "br_syscall_restart" when returning, 1510 * we modify the context to try the system call again 1511 * when we return from this signal handler. 1512 */ 1513 if ((flags & LX_UC_RESTART_SYSCALL) && interrupted) { 1514 int syscall_num = (int)(uintptr_t)ucp->uc_brand_data[2]; 1515 1516 lx_debug("restarting interrupted system call %d", 1517 syscall_num); 1518 1519 /* 1520 * Both the "int 0x80" and the "syscall" instruction 1521 * are two bytes long. Wind the program counter back 1522 * to the start of this instruction. 1523 * 1524 * The system call we interrupted is preserved in the 1525 * brand-specific data in the ucontext_t when the 1526 * LX_UC_RESTART_SYSCALL flag is set. This is 1527 * analogous to the "orig_[er]ax" field in the Linux 1528 * "user_regs_struct". 1529 */ 1530 LX_REG(ucp, REG_PC) -= 2; 1531 LX_REG(ucp, REG_R0) = syscall_num; 1532 } 1533 } 1534 } 1535 1536 /* 1537 * The "lx_sigdeliver()" function is responsible for constructing the emulated 1538 * signal delivery frame on the brand stack for this LWP. A context is saved 1539 * on the stack which will be used by the "sigreturn(2)" family of emulated 1540 * system calls to get us back here after the Linux signal handler returns. 1541 * This function is modelled on the in-kernel "sendsig()" signal delivery 1542 * mechanism. 1543 */ 1544 void 1545 lx_sigdeliver(int lx_sig, siginfo_t *sip, ucontext_t *ucp, size_t stacksz, 1546 void (*stack_builder)(), void (*user_handler)(), 1547 struct lx_sigaction *lxsap) 1548 { 1549 lx_sigbackup_t sigbackup; 1550 ucontext_t uc; 1551 lx_tsd_t *lxtsd = lx_get_tsd(); 1552 int totsz = 0; 1553 uintptr_t flags; 1554 uintptr_t hargs[3]; 1555 /* 1556 * These variables must be "volatile", as they are modified after the 1557 * getcontext() stores the register state: 1558 */ 1559 volatile boolean_t signal_delivered = B_FALSE; 1560 volatile uintptr_t lxfp = 0; 1561 volatile uintptr_t old_tsd_sp = 0; 1562 volatile int newstack = 0; 1563 1564 /* 1565 * This function involves modifying the Linux process stack for this 1566 * thread. To do so without corruption requires us to exclude other 1567 * signal handlers (or emulated system calls called from within those 1568 * handlers) from running while we reserve space on that stack. We 1569 * defer the execution of further instances of lx_call_user_handler() 1570 * until we have completed this operation. 1571 */ 1572 _sigoff(); 1573 1574 /* 1575 * Clear register arguments vector. 1576 */ 1577 bzero(hargs, sizeof (hargs)); 1578 1579 /* 1580 * We save a context here so that we can be returned later to complete 1581 * handling the signal. 1582 */ 1583 lx_debug("lx_sigdeliver: STORING RETURN CONTEXT @ %p\n", &uc); 1584 assert(getcontext(&uc) == 0); 1585 lx_debug("lx_sigdeliver: RETURN CONTEXT %p LINK %p FLAGS %lx\n", 1586 &uc, uc.uc_link, uc.uc_flags); 1587 if (signal_delivered) { 1588 /* 1589 * If the "signal_delivered" flag is set, we are returned here 1590 * via setcontext() as called by the emulated Linux signal 1591 * return system call. 1592 */ 1593 lx_debug("lx_sigdeliver: WE ARE BACK, VIA UC @ %p!\n", &uc); 1594 goto after_signal_handler; 1595 } 1596 signal_delivered = B_TRUE; 1597 1598 /* 1599 * Preserve the current tsd value of the Linux process stack pointer, 1600 * even if it is zero. We will restore it when we are returned here 1601 * via setcontext() after the Linux process has completed execution of 1602 * its signal handler. 1603 */ 1604 old_tsd_sp = lxtsd->lxtsd_lx_sp; 1605 1606 /* 1607 * Figure out whether we will be handling this signal on an alternate 1608 * stack specified by the user. 1609 */ 1610 newstack = (lxsap->lxsa_flags & LX_SA_ONSTACK) && 1611 !(lxtsd->lxtsd_sigaltstack.ss_flags & (LX_SS_ONSTACK | 1612 LX_SS_DISABLE)); 1613 1614 /* 1615 * Find the first unused region of the Linux process stack, where 1616 * we will assemble our signal delivery frame. 1617 */ 1618 flags = (uintptr_t)ucp->uc_brand_data[0]; 1619 if (newstack) { 1620 /* 1621 * We are moving to the user-provided alternate signal 1622 * stack. 1623 */ 1624 lxfp = SA((uintptr_t)lxtsd->lxtsd_sigaltstack.ss_sp) + 1625 SA(lxtsd->lxtsd_sigaltstack.ss_size) - STACK_ALIGN; 1626 lx_debug("lx_sigdeliver: moving to ALTSTACK sp %p\n", lxfp); 1627 LX_SIGNAL_ALTSTACK_ENABLE(lxfp); 1628 } else if (flags & LX_UC_STACK_BRAND) { 1629 /* 1630 * We interrupted the Linux process to take this signal. The 1631 * stack pointer is the one saved in this context. 1632 */ 1633 lxfp = LX_REG(ucp, REG_SP); 1634 } else { 1635 /* 1636 * We interrupted a native (emulation) routine, so we must get 1637 * the current stack pointer from either the tsd (if one is 1638 * stored there) or via the context chain. 1639 * 1640 */ 1641 lxfp = lx_find_brand_sp(); 1642 if (lxtsd->lxtsd_lx_sp != 0) { 1643 /* 1644 * We must also make room for the possibility of nested 1645 * signal delivery -- we may be pre-empting the 1646 * in-progress handling of another signal. 1647 * 1648 * Note that if we were already on the alternate stack, 1649 * any emulated Linux system calls would be betwixt 1650 * that original signal frame and this new one on the 1651 * one contiguous stack, so this logic holds either 1652 * way: 1653 */ 1654 lxfp = MIN(lxtsd->lxtsd_lx_sp, lxfp); 1655 } 1656 } 1657 1658 /* 1659 * Account for a reserved stack region (for amd64, this is 128 bytes), 1660 * and align the stack: 1661 */ 1662 lxfp -= STACK_RESERVE; 1663 lxfp &= ~(STACK_ALIGN - 1); 1664 1665 /* 1666 * Allocate space on the Linux process stack for our delivery frame, 1667 * including: 1668 * 1669 * ----------------------------------------------------- old %sp 1670 * - lx_sigdeliver_frame_t 1671 * - (ucontext_t pointers and stack magic) 1672 * ----------------------------------------------------- 1673 * - (amd64-only 8-byte alignment gap) 1674 * ----------------------------------------------------- 1675 * - frame of size "stacksz" from the stack builder 1676 * ----------------------------------------------------- new %sp 1677 */ 1678 #if defined(_LP64) 1679 /* 1680 * The AMD64 ABI requires us to align the stack such that when the 1681 * called function pushes the base pointer, the stack is 16 byte 1682 * aligned. The stack must, therefore, be 8- but _not_ 16-byte 1683 * aligned. 1684 */ 1685 #if (STACK_ALIGN != 16) || (STACK_ENTRY_ALIGN != 8) 1686 #error "lx_sigdeliver() did not find expected stack alignment" 1687 #endif 1688 totsz = SA(sizeof (lx_sigdeliver_frame_t)) + SA(stacksz) + 8; 1689 assert((totsz & (STACK_ENTRY_ALIGN - 1)) == 0); 1690 assert((totsz & (STACK_ALIGN - 1)) == 8); 1691 #else 1692 totsz = SA(sizeof (lx_sigdeliver_frame_t)) + SA(stacksz); 1693 assert((totsz & (STACK_ALIGN - 1)) == 0); 1694 #endif 1695 1696 /* 1697 * Copy our return frame into place: 1698 */ 1699 lxfp -= SA(sizeof (lx_sigdeliver_frame_t)); 1700 lx_debug("lx_sigdeliver: lx_sigdeliver_frame_t @ %p\n", lxfp); 1701 { 1702 lx_sigdeliver_frame_t frm; 1703 1704 frm.lxsdf_magic = LX_SIGRT_MAGIC; 1705 frm.lxsdf_retucp = &uc; 1706 frm.lxsdf_sigucp = ucp; 1707 frm.lxsdf_sigbackup = &sigbackup; 1708 1709 lx_debug("lx_sigdeliver: retucp %p sigucp %p\n", 1710 frm.lxsdf_retucp, frm.lxsdf_sigucp); 1711 1712 if (uucopy(&frm, (void *)lxfp, sizeof (frm)) != 0) { 1713 /* 1714 * We could not modify the stack of the emulated Linux 1715 * program. Act like the kernel and terminate the 1716 * program with a segmentation violation. 1717 */ 1718 (void) syscall(SYS_brand, B_EXIT_AS_SIG, SIGSEGV); 1719 } 1720 1721 LX_SIGNAL_DELIVERY_FRAME_CREATE((void *)lxfp); 1722 1723 /* 1724 * Populate a backup copy of signal linkage to use in case 1725 * the Linux program completely destroys (or relocates) the 1726 * delivery frame. 1727 * 1728 * This is necessary for programs that have flown so far off 1729 * the architectural rails that they believe it is 1730 * acceptable to make assumptions about the precise size and 1731 * layout of the signal handling frame assembled by the 1732 * kernel. 1733 */ 1734 sigbackup.lxsb_retucp = frm.lxsdf_retucp; 1735 sigbackup.lxsb_sigucp = frm.lxsdf_sigucp; 1736 sigbackup.lxsb_sigdeliver_frame = lxfp; 1737 sigbackup.lxsb_previous = lxtsd->lxtsd_sigbackup; 1738 lxtsd->lxtsd_sigbackup = &sigbackup; 1739 1740 lx_debug("lx_sigdeliver: installed sigbackup %p; prev %p\n", 1741 &sigbackup, sigbackup.lxsb_previous); 1742 } 1743 1744 /* 1745 * Build the Linux signal handling frame: 1746 */ 1747 #if defined(_LP64) 1748 lxfp -= SA(stacksz) + 8; 1749 #else 1750 lxfp -= SA(stacksz); 1751 #endif 1752 lx_debug("lx_sigdeliver: Linux sig frame @ %p\n", lxfp); 1753 stack_builder(lx_sig, sip, ucp, lxfp, hargs); 1754 1755 /* 1756 * Record our reservation so that any nested signal handlers 1757 * can see it. 1758 */ 1759 lx_debug("lx_sigdeliver: Linux tsd sp %p -> %p\n", lxtsd->lxtsd_lx_sp, 1760 lxfp); 1761 lxtsd->lxtsd_lx_sp = lxfp; 1762 1763 if (newstack) { 1764 lxtsd->lxtsd_sigaltstack.ss_flags |= LX_SS_ONSTACK; 1765 } 1766 1767 LX_SIGDELIVER(lx_sig, lxsap, (void *)lxfp); 1768 1769 /* 1770 * Re-enable signal delivery. If a signal was queued while we were 1771 * in the critical section, it will be delivered immediately. 1772 */ 1773 _sigon(); 1774 1775 /* 1776 * Pass control to the Linux signal handler: 1777 */ 1778 lx_debug("lx_sigdeliver: JUMPING TO LINUX (sig %d sp %p eip %p)\n", 1779 lx_sig, lxfp, user_handler); 1780 { 1781 ucontext_t jump_uc; 1782 1783 bcopy(lx_find_brand_uc(), &jump_uc, sizeof (jump_uc)); 1784 1785 /* 1786 * We want to load the general registers from this context, and 1787 * switch to the BRAND stack. We do _not_ want to restore the 1788 * uc_link value from this synthetic context, as that would 1789 * break the signal handling context chain. 1790 */ 1791 jump_uc.uc_flags = UC_CPU; 1792 jump_uc.uc_brand_data[0] = (void *)(LX_UC_STACK_BRAND | 1793 LX_UC_IGNORE_LINK); 1794 1795 LX_REG(&jump_uc, REG_FP) = 0; 1796 LX_REG(&jump_uc, REG_SP) = lxfp; 1797 LX_REG(&jump_uc, REG_PC) = (uintptr_t)user_handler; 1798 1799 #if defined(_LP64) 1800 /* 1801 * Pass signal handler arguments by registers on AMD64. 1802 */ 1803 LX_REG(&jump_uc, REG_RDI) = hargs[0]; 1804 LX_REG(&jump_uc, REG_RSI) = hargs[1]; 1805 LX_REG(&jump_uc, REG_RDX) = hargs[2]; 1806 #endif 1807 1808 lx_jump_to_linux(&jump_uc); 1809 } 1810 1811 assert(0); 1812 abort(); 1813 1814 after_signal_handler: 1815 /* 1816 * Ensure all nested signal handlers have completed correctly 1817 * and then remove our stack reservation. 1818 */ 1819 _sigoff(); 1820 LX_SIGNAL_POST_HANDLER(lxfp, old_tsd_sp); 1821 assert(lxtsd->lxtsd_lx_sp == lxfp); 1822 lx_debug("lx_sigdeliver: after; Linux tsd sp %p -> %p\n", lxfp, 1823 old_tsd_sp); 1824 lxtsd->lxtsd_lx_sp = old_tsd_sp; 1825 if (newstack) { 1826 LX_SIGNAL_ALTSTACK_DISABLE(); 1827 lx_debug("lx_sigdeliver: disabling ALTSTACK sp %p\n", lxfp); 1828 lxtsd->lxtsd_sigaltstack.ss_flags &= ~LX_SS_ONSTACK; 1829 } 1830 /* 1831 * Restore backup signal tracking chain pointer to previous value: 1832 */ 1833 if (lxtsd->lxtsd_sigbackup != NULL) { 1834 lx_sigbackup_t *bprev = lxtsd->lxtsd_sigbackup->lxsb_previous; 1835 1836 lx_debug("lx_sigdeliver: restoring sigbackup %p to %p\n", 1837 lxtsd->lxtsd_sigbackup, bprev); 1838 1839 lxtsd->lxtsd_sigbackup = bprev; 1840 } 1841 _sigon(); 1842 1843 /* 1844 * Here we return to libc so that it may clean up and restore the 1845 * context originally interrupted by this signal. 1846 */ 1847 } 1848 1849 /* 1850 * Common routine to modify sigaction characteristics of a thread. 1851 * 1852 * We shouldn't need any special locking code here as we actually use our copy 1853 * of libc's sigaction() to do all the real work, so its thread locking should 1854 * take care of any issues for us. 1855 */ 1856 static int 1857 lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp, 1858 struct lx_sigaction *olxsp) 1859 { 1860 struct lx_sigaction *lxsap; 1861 struct sigaction sa; 1862 1863 if (lx_sig <= 0 || lx_sig > LX_NSIG) 1864 return (-EINVAL); 1865 1866 lxsap = &lx_sighandlers.lx_sa[lx_sig]; 1867 lx_debug("&lx_sighandlers.lx_sa[%d] = 0x%p", lx_sig, lxsap); 1868 1869 if ((olxsp != NULL) && 1870 ((uucopy(lxsap, olxsp, sizeof (struct lx_sigaction))) != 0)) 1871 return (-errno); 1872 1873 if (lxsp != NULL) { 1874 int err, sig; 1875 struct lx_sigaction lxsa; 1876 sigset_t new_set, oset; 1877 1878 if (uucopy(lxsp, &lxsa, sizeof (struct lx_sigaction)) != 0) 1879 return (-errno); 1880 1881 if ((sig = ltos_signo[lx_sig]) != -1) { 1882 if (lx_no_abort_handler != 0) { 1883 /* 1884 * If LX_NO_ABORT_HANDLER has been set, we will 1885 * not allow the emulated program to do 1886 * anything hamfisted with SIGSEGV or SIGABRT 1887 * signals. 1888 */ 1889 if (sig == SIGSEGV || sig == SIGABRT) { 1890 return (0); 1891 } 1892 } 1893 1894 /* 1895 * Block this signal while messing with its dispostion 1896 */ 1897 (void) sigemptyset(&new_set); 1898 (void) sigaddset(&new_set, sig); 1899 1900 if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0) { 1901 err = errno; 1902 lx_debug("unable to block signal %d: %s", sig, 1903 strerror(err)); 1904 return (-err); 1905 } 1906 1907 /* 1908 * We don't really need the old signal disposition at 1909 * this point, but this weeds out signals that would 1910 * cause sigaction() to return an error before we change 1911 * anything other than the current signal mask. 1912 */ 1913 if (sigaction(sig, NULL, &sa) < 0) { 1914 err = errno; 1915 lx_debug("sigaction() to get old " 1916 "disposition for signal %d failed: " 1917 "%s", sig, strerror(err)); 1918 (void) sigprocmask(SIG_SETMASK, &oset, NULL); 1919 return (-err); 1920 } 1921 1922 if ((lxsa.lxsa_handler != SIG_DFL) && 1923 (lxsa.lxsa_handler != SIG_IGN)) { 1924 sa.sa_handler = lx_call_user_handler; 1925 1926 /* 1927 * The interposition signal handler needs the 1928 * information provided via the SA_SIGINFO flag. 1929 */ 1930 sa.sa_flags = SA_SIGINFO; 1931 1932 /* 1933 * When translating from Linux to illumos 1934 * sigaction(2) flags, we explicitly do not 1935 * pass SA_ONSTACK to the kernel. The 1936 * alternate stack for Linux signal handling is 1937 * handled entirely by the emulation code. 1938 */ 1939 if (lxsa.lxsa_flags & LX_SA_NOCLDSTOP) 1940 sa.sa_flags |= SA_NOCLDSTOP; 1941 if (lxsa.lxsa_flags & LX_SA_NOCLDWAIT) 1942 sa.sa_flags |= SA_NOCLDWAIT; 1943 if (lxsa.lxsa_flags & LX_SA_RESTART) 1944 sa.sa_flags |= SA_RESTART; 1945 if (lxsa.lxsa_flags & LX_SA_NODEFER) 1946 sa.sa_flags |= SA_NODEFER; 1947 1948 /* 1949 * RESETHAND cannot be used be passed through 1950 * for SIGPWR due to different default actions 1951 * between Linux and Illumos. 1952 */ 1953 if ((sig != SIGPWR) && 1954 (lxsa.lxsa_flags & LX_SA_RESETHAND)) 1955 sa.sa_flags |= SA_RESETHAND; 1956 1957 if (ltos_sigset(&lxsa.lxsa_mask, 1958 &sa.sa_mask) != 0) { 1959 err = errno; 1960 (void) sigprocmask(SIG_SETMASK, &oset, 1961 NULL); 1962 return (-err); 1963 } 1964 1965 lx_debug("interposing handler @ 0x%p for " 1966 "signal %d (lx %d), flags 0x%x", 1967 lxsa.lxsa_handler, sig, lx_sig, 1968 lxsa.lxsa_flags); 1969 1970 if (sigaction(sig, &sa, NULL) < 0) { 1971 err = errno; 1972 lx_debug("sigaction() to set new " 1973 "disposition for signal %d failed: " 1974 "%s", sig, strerror(err)); 1975 (void) sigprocmask(SIG_SETMASK, &oset, 1976 NULL); 1977 return (-err); 1978 } 1979 } else if ((sig != SIGPWR) || 1980 ((sig == SIGPWR) && 1981 (lxsa.lxsa_handler == SIG_IGN))) { 1982 /* 1983 * There's no need to interpose for SIG_DFL or 1984 * SIG_IGN so just call our copy of libc's 1985 * sigaction(), but don't allow SIG_DFL for 1986 * SIGPWR due to differing default actions 1987 * between Linux and Illumos. 1988 * 1989 * Get the previous disposition first so things 1990 * like sa_mask and sa_flags are preserved over 1991 * a transition to SIG_DFL or SIG_IGN, which is 1992 * what Linux expects. 1993 */ 1994 1995 sa.sa_handler = lxsa.lxsa_handler; 1996 1997 if (sigaction(sig, &sa, NULL) < 0) { 1998 err = errno; 1999 lx_debug("sigaction(%d, %s) failed: %s", 2000 sig, ((sa.sa_handler == SIG_DFL) ? 2001 "SIG_DFL" : "SIG_IGN"), 2002 strerror(err)); 2003 (void) sigprocmask(SIG_SETMASK, &oset, 2004 NULL); 2005 return (-err); 2006 } 2007 } 2008 } else { 2009 lx_debug("Linux signal with no kill support " 2010 "specified: %d", lx_sig); 2011 } 2012 2013 /* 2014 * Save the new disposition for the signal in the global 2015 * lx_sighandlers structure. 2016 */ 2017 bcopy(&lxsa, lxsap, sizeof (struct lx_sigaction)); 2018 2019 /* 2020 * Reset the signal mask to what we came in with if 2021 * we were modifying a kill-supported signal. 2022 */ 2023 if (sig != -1) 2024 (void) sigprocmask(SIG_SETMASK, &oset, NULL); 2025 } 2026 2027 return (0); 2028 } 2029 2030 #if defined(_ILP32) 2031 /* 2032 * sigaction is only used in 32-bit code. 2033 */ 2034 long 2035 lx_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp) 2036 { 2037 int val; 2038 struct lx_sigaction sa, osa; 2039 struct lx_sigaction *sap, *osap; 2040 struct lx_osigaction *osp; 2041 2042 sap = (actp ? &sa : NULL); 2043 osap = (oactp ? &osa : NULL); 2044 2045 /* 2046 * If we have a source pointer, convert source lxsa_mask from 2047 * lx_osigset_t to lx_sigset_t format. 2048 */ 2049 if (sap) { 2050 osp = (struct lx_osigaction *)actp; 2051 sap->lxsa_handler = osp->lxsa_handler; 2052 2053 bzero(&sap->lxsa_mask, sizeof (lx_sigset_t)); 2054 2055 for (val = 1; val <= OSIGSET_NBITS; val++) 2056 if (osp->lxsa_mask & OSIGSET_BITSET(val)) 2057 (void) lx_sigaddset(&sap->lxsa_mask, val); 2058 2059 sap->lxsa_flags = osp->lxsa_flags; 2060 sap->lxsa_restorer = osp->lxsa_restorer; 2061 } 2062 2063 if ((val = lx_sigaction_common(lx_sig, sap, osap))) 2064 return (val); 2065 2066 /* 2067 * If we have a save pointer, convert the old lxsa_mask from 2068 * lx_sigset_t to lx_osigset_t format. 2069 */ 2070 if (osap) { 2071 osp = (struct lx_osigaction *)oactp; 2072 2073 osp->lxsa_handler = osap->lxsa_handler; 2074 2075 bzero(&osp->lxsa_mask, sizeof (osp->lxsa_mask)); 2076 for (val = 1; val <= OSIGSET_NBITS; val++) 2077 if (lx_sigismember(&osap->lxsa_mask, val)) 2078 osp->lxsa_mask |= OSIGSET_BITSET(val); 2079 2080 osp->lxsa_flags = osap->lxsa_flags; 2081 osp->lxsa_restorer = osap->lxsa_restorer; 2082 } 2083 2084 return (0); 2085 } 2086 #endif 2087 2088 long 2089 lx_rt_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp, 2090 uintptr_t setsize) 2091 { 2092 /* 2093 * The "new" rt_sigaction call checks the setsize 2094 * parameter. 2095 */ 2096 if ((size_t)setsize != sizeof (lx_sigset_t)) 2097 return (-EINVAL); 2098 2099 return (lx_sigaction_common(lx_sig, (struct lx_sigaction *)actp, 2100 (struct lx_sigaction *)oactp)); 2101 } 2102 2103 #if defined(_ILP32) 2104 /* 2105 * Convert signal syscall to a call to the lx_sigaction() syscall 2106 * Only used in 32-bit code. 2107 */ 2108 long 2109 lx_signal(uintptr_t lx_sig, uintptr_t handler) 2110 { 2111 struct sigaction act; 2112 struct sigaction oact; 2113 int rc; 2114 2115 /* 2116 * Use sigaction to mimic SYSV signal() behavior; glibc will 2117 * actually call sigaction(2) itself, so we're really reaching 2118 * back for signal(2) semantics here. 2119 */ 2120 bzero(&act, sizeof (act)); 2121 act.sa_handler = (void (*)())handler; 2122 act.sa_flags = SA_RESETHAND | SA_NODEFER; 2123 2124 rc = lx_sigaction(lx_sig, (uintptr_t)&act, (uintptr_t)&oact); 2125 return ((rc == 0) ? ((ssize_t)oact.sa_handler) : rc); 2126 } 2127 #endif 2128 2129 void 2130 lx_sighandlers_save(lx_sighandlers_t *saved) 2131 { 2132 bcopy(&lx_sighandlers, saved, sizeof (lx_sighandlers_t)); 2133 } 2134 2135 void 2136 lx_sighandlers_restore(lx_sighandlers_t *saved) 2137 { 2138 bcopy(saved, &lx_sighandlers, sizeof (lx_sighandlers_t)); 2139 } 2140 2141 int 2142 lx_siginit(void) 2143 { 2144 extern void set_setcontext_enforcement(int); 2145 extern void set_escaped_context_cleanup(int); 2146 2147 struct sigaction sa; 2148 sigset_t new_set, oset; 2149 int lx_sig, sig; 2150 2151 if (getenv("LX_NO_ABORT_HANDLER") != NULL) { 2152 lx_no_abort_handler = 1; 2153 } 2154 2155 /* 2156 * Block all signals possible while setting up the signal imposition 2157 * mechanism. 2158 */ 2159 (void) sigfillset(&new_set); 2160 2161 if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0) 2162 lx_err_fatal("unable to block signals while setting up " 2163 "imposition mechanism: %s", strerror(errno)); 2164 2165 /* 2166 * Ignore any signals that have no Linux analog so that those 2167 * signals cannot be sent to Linux processes from the global zone 2168 */ 2169 for (sig = 1; sig < NSIG; sig++) 2170 if (stol_signo[sig] < 0) 2171 (void) sigignore(sig); 2172 2173 /* 2174 * Mark any signals that are ignored as ignored in our interposition 2175 * handler array 2176 */ 2177 for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) { 2178 if (((sig = ltos_signo[lx_sig]) != -1) && 2179 (sigaction(sig, NULL, &sa) < 0)) 2180 lx_err_fatal("unable to determine previous disposition " 2181 "for signal %d: %s", sig, strerror(errno)); 2182 2183 if (sa.sa_handler == SIG_IGN) { 2184 lx_debug("marking signal %d (lx %d) as SIG_IGN", 2185 sig, lx_sig); 2186 lx_sighandlers.lx_sa[lx_sig].lxsa_handler = SIG_IGN; 2187 } 2188 } 2189 2190 /* 2191 * Have our interposition handler handle SIGPWR to start with, 2192 * as it has a default action of terminating the process in Linux 2193 * but its default is to be ignored in Illumos. 2194 */ 2195 (void) sigemptyset(&sa.sa_mask); 2196 sa.sa_sigaction = lx_call_user_handler; 2197 sa.sa_flags = SA_SIGINFO; 2198 2199 if (sigaction(SIGPWR, &sa, NULL) < 0) 2200 lx_err_fatal("sigaction(SIGPWR) failed: %s", strerror(errno)); 2201 2202 /* 2203 * Illumos' libc forces certain register values in the ucontext_t 2204 * used to restore a post-signal user context to be those Illumos 2205 * expects; however that is not what we want to happen if the signal 2206 * was taken while branded code was executing, so we must disable 2207 * that behavior. 2208 */ 2209 set_setcontext_enforcement(0); 2210 2211 /* 2212 * The illumos libc attempts to clean up dangling uc_link pointers in 2213 * signal handling contexts when libc believes us to have escaped a 2214 * signal handler incorrectly in the past. We want to disable this 2215 * behaviour, so that the system call emulation context saved by the 2216 * kernel brand module for lx_emulate() may be part of the context 2217 * chain without itself being used for signal handling. 2218 */ 2219 set_escaped_context_cleanup(0); 2220 2221 /* 2222 * Reset the signal mask to what we came in with. 2223 */ 2224 (void) sigprocmask(SIG_SETMASK, &oset, NULL); 2225 2226 lx_debug("interposition handler setup for SIGPWR"); 2227 return (0); 2228 } 2229 2230 /* 2231 * This code strongly resembles lx_poll(), but is here to be able to take 2232 * advantage of the Linux signal helper routines. 2233 */ 2234 long 2235 lx_ppoll(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, uintptr_t p5) 2236 { 2237 struct pollfd *lfds, *sfds; 2238 nfds_t nfds = (nfds_t)p2; 2239 timespec_t ts, *tsp = NULL; 2240 int fds_size, i, rval, revents; 2241 lx_sigset_t lxsig, *lxsigp = NULL; 2242 sigset_t sigset, *sp = NULL; 2243 rctlblk_t *rblk; 2244 2245 lx_debug("\tppoll(0x%p, %d, 0x%p, 0x%p, %d)", p1, p2, p3, p4, p5); 2246 2247 if (p3 != NULL) { 2248 if (uucopy((void *)p3, &ts, sizeof (ts)) != 0) 2249 return (-errno); 2250 2251 tsp = &ts; 2252 } 2253 2254 if (p4 != NULL) { 2255 if (uucopy((void *)p4, &lxsig, sizeof (lxsig)) != 0) 2256 return (-errno); 2257 2258 lxsigp = &lxsig; 2259 if ((size_t)p5 != sizeof (lx_sigset_t)) 2260 return (-EINVAL); 2261 2262 if (lxsigp) { 2263 if ((rval = ltos_sigset(lxsigp, &sigset)) != 0) 2264 return (rval); 2265 2266 sp = &sigset; 2267 } 2268 } 2269 2270 /* 2271 * Deal with the NULL fds[] case. 2272 */ 2273 if (nfds == 0 || p1 == NULL) { 2274 if ((rval = ppoll(NULL, 0, tsp, sp)) < 0) 2275 return (-errno); 2276 2277 return (rval); 2278 } 2279 2280 if (maxfd == 0) { 2281 if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rctlblk_size())) == NULL) 2282 return (-ENOMEM); 2283 2284 if (getrctl("process.max-file-descriptor", NULL, rblk, 2285 RCTL_FIRST) == -1) 2286 return (-EINVAL); 2287 2288 maxfd = rctlblk_get_value(rblk); 2289 } 2290 2291 if (nfds > maxfd) 2292 return (-EINVAL); 2293 2294 /* 2295 * Note: we are assuming that the Linux and Illumos pollfd 2296 * structures are identical. Copy in the Linux poll structure. 2297 */ 2298 fds_size = sizeof (struct pollfd) * nfds; 2299 lfds = (struct pollfd *)SAFE_ALLOCA(fds_size); 2300 if (lfds == NULL) 2301 return (-ENOMEM); 2302 if (uucopy((void *)p1, lfds, fds_size) != 0) 2303 return (-errno); 2304 2305 /* 2306 * The poll system call modifies the poll structures passed in 2307 * so we'll need to make an extra copy of them. 2308 */ 2309 sfds = (struct pollfd *)SAFE_ALLOCA(fds_size); 2310 if (sfds == NULL) 2311 return (-ENOMEM); 2312 2313 /* Convert the Linux events bitmask into the Illumos equivalent. */ 2314 for (i = 0; i < nfds; i++) { 2315 /* 2316 * If the caller is polling for an unsupported event, we 2317 * have to bail out. 2318 */ 2319 if (lfds[i].events & ~LX_POLL_SUPPORTED_EVENTS) { 2320 lx_unsupported("unsupported poll events requested: " 2321 "events=0x%x", lfds[i].events); 2322 return (-ENOTSUP); 2323 } 2324 2325 sfds[i].fd = lfds[i].fd; 2326 sfds[i].events = lfds[i].events & LX_POLL_COMMON_EVENTS; 2327 if (lfds[i].events & LX_POLLWRNORM) 2328 sfds[i].events |= POLLWRNORM; 2329 if (lfds[i].events & LX_POLLWRBAND) 2330 sfds[i].events |= POLLWRBAND; 2331 if (lfds[i].events & LX_POLLRDHUP) 2332 sfds[i].events |= POLLRDHUP; 2333 sfds[i].revents = 0; 2334 } 2335 2336 if ((rval = ppoll(sfds, nfds, tsp, sp)) < 0) 2337 return (-errno); 2338 2339 /* Convert the Illumos revents bitmask into the Linux equivalent */ 2340 for (i = 0; i < nfds; i++) { 2341 revents = sfds[i].revents & LX_POLL_COMMON_EVENTS; 2342 if (sfds[i].revents & POLLWRBAND) 2343 revents |= LX_POLLWRBAND; 2344 if (sfds[i].revents & POLLRDHUP) 2345 revents |= LX_POLLRDHUP; 2346 2347 /* 2348 * Be careful because on Illumos POLLOUT and POLLWRNORM 2349 * are defined to the same values but on Linux they 2350 * are not. 2351 */ 2352 if (sfds[i].revents & POLLOUT) { 2353 if ((lfds[i].events & LX_POLLOUT) == 0) 2354 revents &= ~LX_POLLOUT; 2355 if (lfds[i].events & LX_POLLWRNORM) 2356 revents |= LX_POLLWRNORM; 2357 } 2358 2359 lfds[i].revents = revents; 2360 } 2361 2362 /* Copy out the results */ 2363 if (uucopy(lfds, (void *)p1, fds_size) != 0) 2364 return (-errno); 2365 2366 return (rval); 2367 } 2368 2369 /* 2370 * This code stongly resemebles lx_select(), but is here to be able to take 2371 * advantage of the Linux signal helper routines. 2372 */ 2373 long 2374 lx_pselect6(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, 2375 uintptr_t p5, uintptr_t p6) 2376 { 2377 int nfds = (int)p1; 2378 fd_set *rfdsp = NULL; 2379 fd_set *wfdsp = NULL; 2380 fd_set *efdsp = NULL; 2381 timespec_t ts, *tsp = NULL; 2382 int fd_set_len = howmany(nfds, 8); 2383 int r; 2384 sigset_t sigset, *sp = NULL; 2385 2386 lx_debug("\tpselect6(%d, 0x%p, 0x%p, 0x%p, 0x%p, 0x%p)", 2387 p1, p2, p3, p4, p4, p6); 2388 2389 if (nfds > 0) { 2390 if (p2 != NULL) { 2391 rfdsp = SAFE_ALLOCA(fd_set_len); 2392 if (rfdsp == NULL) 2393 return (-ENOMEM); 2394 if (uucopy((void *)p2, rfdsp, fd_set_len) != 0) 2395 return (-errno); 2396 } 2397 if (p3 != NULL) { 2398 wfdsp = SAFE_ALLOCA(fd_set_len); 2399 if (wfdsp == NULL) 2400 return (-ENOMEM); 2401 if (uucopy((void *)p3, wfdsp, fd_set_len) != 0) 2402 return (-errno); 2403 } 2404 if (p4 != NULL) { 2405 efdsp = SAFE_ALLOCA(fd_set_len); 2406 if (efdsp == NULL) 2407 return (-ENOMEM); 2408 if (uucopy((void *)p4, efdsp, fd_set_len) != 0) 2409 return (-errno); 2410 } 2411 } 2412 2413 if (p5 != NULL) { 2414 if (uucopy((void *)p5, &ts, sizeof (ts)) != 0) 2415 return (-errno); 2416 2417 tsp = &ts; 2418 } 2419 2420 if (p6 != NULL) { 2421 /* 2422 * To force the number of arguments to be no more than six, 2423 * Linux bundles both the sigset and the size into a structure 2424 * that becomes the sixth argument. 2425 */ 2426 struct { 2427 lx_sigset_t *addr; 2428 size_t size; 2429 } lx_sigset; 2430 2431 if (uucopy((void *)p6, &lx_sigset, sizeof (lx_sigset)) != 0) 2432 return (-errno); 2433 2434 /* 2435 * Yes, that's right: Linux forces a size to be passed only 2436 * so it can check that it's the size of a sigset_t. 2437 */ 2438 if (lx_sigset.size != sizeof (lx_sigset_t)) 2439 return (-EINVAL); 2440 2441 /* 2442 * This is where we check if the sigset is *really* NULL. 2443 */ 2444 if (lx_sigset.addr) { 2445 if ((r = ltos_sigset(lx_sigset.addr, &sigset)) != 0) 2446 return (r); 2447 2448 sp = &sigset; 2449 } 2450 } 2451 2452 #if defined(_LP64) 2453 r = pselect(nfds, rfdsp, wfdsp, efdsp, tsp, sp); 2454 #else 2455 if (nfds >= FD_SETSIZE) 2456 r = pselect_large_fdset(nfds, rfdsp, wfdsp, efdsp, tsp, sp); 2457 else 2458 r = pselect(nfds, rfdsp, wfdsp, efdsp, tsp, sp); 2459 #endif 2460 2461 if (r < 0) 2462 return (-errno); 2463 2464 /* 2465 * For pselect6(), we don't honor the strange Linux select() semantics 2466 * with respect to the timestruc parameter because glibc ignores it 2467 * anyway -- just copy out the fd pointers and return. 2468 */ 2469 if ((rfdsp != NULL) && (uucopy(rfdsp, (void *)p2, fd_set_len) != 0)) 2470 return (-errno); 2471 if ((wfdsp != NULL) && (uucopy(wfdsp, (void *)p3, fd_set_len) != 0)) 2472 return (-errno); 2473 if ((efdsp != NULL) && (uucopy(efdsp, (void *)p4, fd_set_len) != 0)) 2474 return (-errno); 2475 2476 return (r); 2477 } 2478 2479 /* 2480 * The first argument is the pid (Linux tgid) to send the signal to, second 2481 * argument is the signal to send (an lx signal), and third is the siginfo_t 2482 * with extra information. We translate the code and signal only from the 2483 * siginfo_t, and leave everything else the same as it gets passed through the 2484 * signalling system. This is enough to get sigqueue working. See Linux man 2485 * page rt_sigqueueinfo(2). 2486 */ 2487 long 2488 lx_rt_sigqueueinfo(uintptr_t p1, uintptr_t p2, uintptr_t p3) 2489 { 2490 pid_t tgid = (pid_t)p1; 2491 int lx_sig = (int)p2; 2492 int sig; 2493 lx_siginfo_t lx_siginfo; 2494 siginfo_t siginfo; 2495 int s_code; 2496 pid_t s_pid; 2497 2498 if (uucopy((void *)p3, &lx_siginfo, sizeof (lx_siginfo_t)) != 0) 2499 return (-EFAULT); 2500 s_code = ltos_sigcode(lx_siginfo.lsi_code); 2501 if (s_code == LX_SI_CODE_NOT_EXIST) 2502 return (-EINVAL); 2503 if (lx_sig < 0 || lx_sig > LX_NSIG || (sig = ltos_signo[lx_sig]) < 0) { 2504 return (-EINVAL); 2505 } 2506 /* 2507 * This case (when trying to kill pid 0) just has a different errno 2508 * returned in illumos than in Linux. 2509 */ 2510 if (tgid == 0) 2511 return (-ESRCH); 2512 if (lx_lpid_to_spid(tgid, &s_pid) != 0) 2513 return (-ESRCH); 2514 if (SI_CANQUEUE(s_code)) { 2515 return ((syscall(SYS_sigqueue, s_pid, sig, 2516 lx_siginfo.lsi_value, s_code, 0) == -1) ? 2517 (-errno): 0); 2518 } else { 2519 /* 2520 * This case is unlikely, as the main entry point is through 2521 * sigqueue, which always has a queuable si_code. 2522 */ 2523 siginfo.si_signo = sig; 2524 siginfo.si_code = s_code; 2525 siginfo.si_pid = lx_siginfo.lsi_pid; 2526 siginfo.si_value = lx_siginfo.lsi_value; 2527 siginfo.si_uid = lx_siginfo.lsi_uid; 2528 return ((syscall(SYS_brand, B_HELPER_SIGQUEUE, 2529 tgid, sig, &siginfo)) ? (-errno) : 0); 2530 } 2531 } 2532 2533 /* 2534 * Adds an additional argument for which thread within a thread group to send 2535 * the signal to (added as the second argument). 2536 */ 2537 long 2538 lx_rt_tgsigqueueinfo(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4) 2539 { 2540 pid_t tgid = (pid_t)p1; 2541 pid_t tid = (pid_t)p2; 2542 int lx_sig = (int)p3; 2543 int sig; 2544 lx_siginfo_t lx_siginfo; 2545 siginfo_t siginfo; 2546 int si_code; 2547 2548 if (uucopy((void *)p4, &lx_siginfo, sizeof (lx_siginfo_t)) != 0) 2549 return (-EFAULT); 2550 if (lx_sig < 0 || lx_sig > LX_NSIG || (sig = ltos_signo[lx_sig]) < 0) { 2551 return (-EINVAL); 2552 } 2553 si_code = ltos_sigcode(lx_siginfo.lsi_code); 2554 if (si_code == LX_SI_CODE_NOT_EXIST) 2555 return (-EINVAL); 2556 /* 2557 * Check for invalid tgid and tids. That appears to be only negatives 2558 * and 0 values. Everything else that doesn't exist is instead ESRCH. 2559 */ 2560 if (tgid <= 0 || tid <= 0) 2561 return (-EINVAL); 2562 siginfo.si_signo = sig; 2563 siginfo.si_code = si_code; 2564 siginfo.si_pid = lx_siginfo.lsi_pid; 2565 siginfo.si_value = lx_siginfo.lsi_value; 2566 siginfo.si_uid = lx_siginfo.lsi_uid; 2567 2568 return ((syscall(SYS_brand, B_HELPER_TGSIGQUEUE, tgid, tid, sig, 2569 &siginfo)) ? (-errno) : 0); 2570 } 2571 2572 long 2573 lx_signalfd(int fd, uintptr_t mask, size_t msize) 2574 { 2575 return (lx_signalfd4(fd, mask, msize, 0)); 2576 } 2577 2578 long 2579 lx_signalfd4(int fd, uintptr_t mask, size_t msize, int flags) 2580 { 2581 sigset_t s_set; 2582 int r; 2583 2584 if (msize != sizeof (int64_t)) 2585 return (-EINVAL); 2586 2587 if (ltos_sigset((lx_sigset_t *)mask, &s_set) != 0) 2588 return (-errno); 2589 2590 r = signalfd(fd, &s_set, flags); 2591 2592 /* 2593 * signalfd(3C) may fail with ENOENT if /dev/signalfd is not available. 2594 * It is less jarring to Linux programs to tell them that internal 2595 * allocation failed than to report an error number they are not 2596 * expecting. 2597 */ 2598 if (r == -1 && errno == ENOENT) 2599 return (-ENODEV); 2600 2601 return (r == -1 ? -errno : r); 2602 }