1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2015 Joyent, Inc. All rights reserved.
  29  */
  30 
  31 #include <sys/types.h>
  32 #include <sys/param.h>
  33 #include <sys/segments.h>
  34 #include <sys/lx_types.h>
  35 #include <sys/lx_brand.h>
  36 #include <sys/lx_misc.h>
  37 #include <sys/lx_debug.h>
  38 #include <sys/lx_poll.h>
  39 #include <sys/lx_signal.h>
  40 #include <sys/lx_sigstack.h>
  41 #include <sys/lx_syscall.h>
  42 #include <sys/lx_thread.h>
  43 #include <sys/syscall.h>
  44 #include <lx_provider_impl.h>
  45 #include <sys/stack.h>
  46 #include <assert.h>
  47 #include <errno.h>
  48 #include <poll.h>
  49 #include <rctl.h>
  50 #include <signal.h>
  51 #include <stdlib.h>
  52 #include <string.h>
  53 #include <strings.h>
  54 #include <thread.h>
  55 #include <ucontext.h>
  56 #include <unistd.h>
  57 #include <stdio.h>
  58 #include <libintl.h>
  59 #include <ieeefp.h>
  60 #include <sys/signalfd.h>
  61 
  62 #if defined(_ILP32)
  63 extern int pselect_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
  64         const timespec_t *tsp, const sigset_t *sp);
  65 #endif
  66 
  67 #define MIN(a, b)       ((a) < (b) ? (a) : (b))
  68 
  69 /*
  70  * Delivering signals to a Linux process is complicated by differences in
  71  * signal numbering, stack structure and contents, and the action taken when a
  72  * signal handler exits.  In addition, many signal-related structures, such as
  73  * sigset_ts, vary between Illumos and Linux.
  74  *
  75  * To support user-level signal handlers, the brand uses a double layer of
  76  * indirection to process and deliver signals to branded threads.
  77  *
  78  * When a Linux process sends a signal using the kill(2) system call, we must
  79  * translate the signal into the Illumos equivalent before handing control off
  80  * to the standard signalling mechanism.  When a signal is delivered to a Linux
  81  * process, we translate the signal number from Illumos to back to Linux.
  82  * Translating signals both at generation and delivery time ensures both that
  83  * Illumos signals are sent properly to Linux applications and that signals'
  84  * default behavior works as expected.
  85  *
  86  * In a normal Illumos process, signal delivery is interposed on for any thread
  87  * registering a signal handler by libc. Libc needs to do various bits of magic
  88  * to provide thread-safe critical regions, so it registers its own handler,
  89  * named sigacthandler(), using the sigaction(2) system call. When a signal is
  90  * received, sigacthandler() is called, and after some processing, libc turns
  91  * around and calls the user's signal handler via a routine named
  92  * call_user_handler().
  93  *
  94  * Adding a Linux branded thread to the mix complicates things somewhat.
  95  *
  96  * First, when a thread receives a signal, it may either be running in an
  97  * emulated Linux context or a native illumos context.  In either case, the
  98  * in-kernel brand module is responsible for preserving the register state
  99  * from the interrupted context, regardless of whether emulated or native
 100  * software was running at the time.  The kernel is also responsible for
 101  * ensuring that the illumos native sigacthandler() is called with register
 102  * values appropriate for native code.  Of particular note is the %gs segment
 103  * selector for 32-bit code, and the %fsbase segment base register for 64-bit
 104  * code; these are used by libc to locate per-thread data structures.
 105  *
 106  * Second, the signal number translation referenced above must take place.
 107  * Finally, when we hand control to the Linux signal handler we must do so
 108  * on the brand stack, and with registers configured appropriately for the
 109  * Linux application.
 110  *
 111  * This need to translate signal numbers (and manipulate the signal handling
 112  * context) means that with standard Illumos libc, following a signal from
 113  * generation to delivery looks something like:
 114  *
 115  *      kernel ->
 116  *          sigacthandler() ->
 117  *              call_user_handler() ->
 118  *                  user signal handler
 119  *
 120  * but for the brand's Linux threads, this would look like:
 121  *
 122  *      kernel ->
 123  *          sigacthandler() ->
 124  *              call_user_handler() ->
 125  *                  lx_call_user_handler() ->
 126  *                      lx_sigdeliver() ->
 127  *                          syscall(B_JUMP_TO_LINUX, ...) ->
 128  *                              Linux user signal handler
 129  *
 130  * The new addtions are:
 131  *
 132  *      lx_call_user_handler
 133  *      ====================
 134  *      This routine is responsible for translating Illumos signal numbers to
 135  *      their Linux equivalents, building a Linux signal stack based on the
 136  *      information Illumos has provided, and passing the stack to the
 137  *      registered Linux signal handler. It is, in effect, the Linux thread
 138  *      equivalent to libc's call_user_handler().
 139  *
 140  *      lx_sigdeliver
 141  *      =============
 142  *
 143  * Note that none of this interposition is necessary unless a Linux thread
 144  * registers a user signal handler, as the default action for all signals is the
 145  * same between Illumos and Linux save for one signal, SIGPWR.  For this reason,
 146  * the brand ALWAYS installs its own internal signal handler for SIGPWR that
 147  * translates the action to the Linux default, to terminate the process.
 148  * (Illumos' default action is to ignore SIGPWR.)
 149  *
 150  * It is also important to note that when signals are not translated, the brand
 151  * relies upon code interposing upon the wait(2) system call to translate
 152  * signals to their proper values for any Linux threads retrieving the status
 153  * of others.  So while the Illumos signal number for a particular signal is set
 154  * in a process' data structures (and would be returned as the result of say,
 155  * WTERMSIG()), the brand's interposiiton upon wait(2) is responsible for
 156  * translating the value WTERMSIG() would return from a Illumos signal number
 157  * to the appropriate Linux value.
 158  *
 159  * lx_call_user_handler() calls lx_sigdeliver() with a helper function
 160  * (typically lx_build_signal_frame) which builds a stack frame for the 32-bit
 161  * Linux signal handler, or populates a local (on the stack) structure for the
 162  * 64-bit Linux signal handler. The stack at that time looks like this:
 163  *
 164  *      =========================================================
 165  * |    | lx_sigdeliver_frame_t -- includes LX_SIGRT_MAGIC and  |
 166  * |    | a return context for the eventual sigreturn(2) call   |
 167  * |    =========================================================
 168  * |    | Linux signal frame (32-bit) or local data             |
 169  * V    | (64-bit) built by stack_builder()                     |
 170  *      =========================================================
 171  *
 172  * The process of returning to an interrupted thread of execution from a user
 173  * signal handler is entirely different between Illumos and Linux.  While
 174  * Illumos generally expects to set the context to the interrupted one on a
 175  * normal return from a signal handler, in the normal case Linux instead calls
 176  * code that calls a specific Linux system call, rt_sigreturn(2) (or it also
 177  * can call sigreturn(2) in 32-bit code).  Thus when a Linux signal handler
 178  * completes execution, instead of returning through what would in libc be a
 179  * call to setcontext(2), the rt_sigreturn(2) Linux system call is responsible
 180  * for accomplishing much the same thing. It's for this reason that the stack
 181  * frame we build has the lx_(rt_)sigreturn_tramp code on the top of the
 182  * stack.  The code looks like this:
 183  *
 184  *      32-bit                                  64-bit
 185  *      --------------------------------        -----------------------------
 186  *      mov LX_SYS_rt_sigreturn, %eax           movq LX_SYS_rt_sigreturn, %rax
 187  *      int $0x80                               syscall
 188  *
 189  * We also use these same functions (lx_rt_sigreturn_tramp or
 190  * lx_sigreturn_tramp) to actually return from the signal handler.
 191  *
 192  * (Note that this trampoline code actually lives in a proper executable segment
 193  * and not on the stack, but gdb checks for the exact code sequence of the
 194  * trampoline code on the stack to determine whether it is in a signal stack
 195  * frame or not.  Really.)
 196  *
 197  * When the 32-bit Linux user signal handler is eventually called, the brand
 198  * stack frame looks like this (in the case of a "modern" signal stack; see
 199  * the lx_sigstack structure definition):
 200  *
 201  *      =========================================================
 202  * |    | lx_sigdeliver_frame_t                                 |
 203  * |    =========================================================
 204  * |    | Trampoline code (marker for gdb, not really executed) |
 205  * |    =========================================================
 206  * |    | Linux struct _fpstate                                 |
 207  * |    =========================================================
 208  * V    | Linux ucontext_t                                      | <--+
 209  *      =========================================================    |
 210  *      | Linux siginfo_t                                       | <--|-----+
 211  *      =========================================================    |     |
 212  *      | Pointer to Linux ucontext_t (or NULL) (sigaction arg2)| ---+     |
 213  *      =========================================================          |
 214  *      | Pointer to Linux siginfo_t (or NULL)  (sigaction arg1)| ---------+
 215  *      =========================================================
 216  *      | Linux signal number                   (sigaction arg0)|
 217  *      =========================================================
 218  *      | Pointer to signal return code (trampoline code)       |
 219  *      =========================================================
 220  *
 221  * The 64-bit stack-local data looks like this:
 222  *
 223  *      =========================================================
 224  * |    | lx_sigdeliver_frame_t                                 |
 225  * |    =========================================================
 226  * |    | Trampoline code (marker for gdb, not really executed) |
 227  * |    =========================================================
 228  * |    | Linux struct _fpstate                                 |
 229  * |    =========================================================
 230  * V    | Linux ucontext_t                                      | %rdx arg2
 231  *      =========================================================
 232  *      | Linux siginfo_t                                       | %rsi arg1
 233  *      =========================================================
 234  *      | Pointer to signal return code (trampoline code)       |
 235  *      =========================================================
 236  *
 237  * As usual in 64-bit code, %rdi is arg0 which is the signal number.
 238  *
 239  * The *sigreturn(2) family of emulated system call handlers locates the
 240  * "lx_sigdeliver_frame_t" struct on the Linux stack as part of processing
 241  * the system call.  This object contains a guard value (LX_SIGRT_MAGIC) to
 242  * detect stack smashing or an incorrect stack pointer.  It also contains a
 243  * "return" context, which we use to get back to the "lx_sigdeliver()" frame
 244  * on the native stack that originally dispatched to the Linux signal
 245  * handler.  The lx_sigdeliver() function is then able to return to the
 246  * native libc signal handler in the usual way.  This results in a further
 247  * setcontext() back to whatever was running when we took the signal.
 248  *
 249  * There are some edge cases where the "return" context cannot be located
 250  * by inspection of the Linux stack; e.g. if the guard value has been
 251  * corrupted, or the emulated program has relocated parts of the signal
 252  * delivery stack frame.  If this case is detected, a fallback mechanism is
 253  * used to attempt to find the return context.  A chain of "lx_sigbackup_t"
 254  * objects is maintained in signal interposer call frames, with the current
 255  * head stored in the thread-specific "lx_tsd_t".  This mechanism is
 256  * similar in principle to the "lwp_oldcontext" member of the "klwp_t" used
 257  * by the native signal handling infrastructure.  This backup chain is used
 258  * by the sigreturn(2) family of emulated system calls in the event that
 259  * the Linux stack did not correctly reference a return context.
 260  */
 261 
 262 typedef struct lx_sigdeliver_frame {
 263         uintptr_t lxsdf_magic;
 264         ucontext_t *lxsdf_retucp;
 265         ucontext_t *lxsdf_sigucp;
 266         lx_sigbackup_t *lxsdf_sigbackup;
 267 } lx_sigdeliver_frame_t;
 268 
 269 struct lx_oldsigstack {
 270         void (*retaddr)();      /* address of real lx_sigreturn code */
 271         int sig;                /* signal number */
 272         lx_sigcontext_t sigc;   /* saved user context */
 273         lx_fpstate_t fpstate;   /* saved FP state */
 274         int sig_extra;          /* signal mask for signals [32 .. NSIG - 1] */
 275         char trampoline[8];     /* code for trampoline to lx_sigreturn() */
 276 };
 277 
 278 /*
 279  * The lx_sighandlers structure needs to be a global due to the semantics of
 280  * clone().
 281  *
 282  * If CLONE_SIGHAND is set, the calling process and child share signal
 283  * handlers, and if either calls sigaction(2) it should change the behavior
 284  * in the other thread.  Each thread does, however, have its own signal mask
 285  * and set of pending signals.
 286  *
 287  * If CLONE_SIGHAND is not set, the child process should inherit a copy of
 288  * the signal handlers at the time of the clone() but later calls to
 289  * sigaction(2) should only affect the individual thread calling it.
 290  *
 291  * This maps perfectly to a thr_create(3C) thread semantic in the first
 292  * case and a fork(2)-type semantic in the second case.  By making
 293  * lx_sighandlers global, we automatically get the correct behavior.
 294  */
 295 static lx_sighandlers_t lx_sighandlers;
 296 
 297 /*
 298  * Setting LX_NO_ABORT_HANDLER in the environment will prevent the emulated
 299  * Linux program from modifying the signal handling disposition for SIGSEGV or
 300  * SIGABRT.  Useful for debugging programs which fall over themselves to
 301  * prevent useful core files being generated.
 302  */
 303 static int lx_no_abort_handler = 0;
 304 
 305 static void lx_sigdeliver(int, siginfo_t *, ucontext_t *, size_t, void (*)(),
 306     void (*)(), struct lx_sigaction *);
 307 
 308 /*
 309  * Cache result of process.max-file-descriptor to avoid calling getrctl()
 310  * for each lx_ppoll().
 311  */
 312 static rlim_t maxfd = 0;
 313 
 314 /*
 315  * stol_stack() and ltos_stack() convert between Illumos and Linux stack_t
 316  * structures.
 317  *
 318  * These routines are needed because although the two structures have the same
 319  * contents, their contents are declared in a different order, so the content
 320  * of the structures cannot be copied with a simple bcopy().
 321  */
 322 static void
 323 stol_stack(stack_t *fr, lx_stack_t *to)
 324 {
 325         to->ss_sp = fr->ss_sp;
 326         to->ss_flags = fr->ss_flags;
 327         to->ss_size = fr->ss_size;
 328 }
 329 
 330 static void
 331 ltos_stack(lx_stack_t *fr, stack_t *to)
 332 {
 333         to->ss_sp = fr->ss_sp;
 334         to->ss_flags = fr->ss_flags;
 335         to->ss_size = fr->ss_size;
 336 }
 337 
 338 static int
 339 ltos_sigset(lx_sigset_t *lx_sigsetp, sigset_t *s_sigsetp)
 340 {
 341         lx_sigset_t l;
 342         int lx_sig, sig;
 343 
 344         if (uucopy(lx_sigsetp, &l, sizeof (lx_sigset_t)) != 0)
 345                 return (-errno);
 346 
 347         (void) sigemptyset(s_sigsetp);
 348 
 349         for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) {
 350                 if (lx_sigismember(&l, lx_sig) &&
 351                     ((sig = ltos_signo[lx_sig]) > 0))
 352                         (void) sigaddset(s_sigsetp, sig);
 353         }
 354 
 355         return (0);
 356 }
 357 
 358 static int
 359 stol_sigset(sigset_t *s_sigsetp, lx_sigset_t *lx_sigsetp)
 360 {
 361         lx_sigset_t l;
 362         int sig, lx_sig;
 363 
 364         bzero(&l, sizeof (lx_sigset_t));
 365 
 366         for (sig = 1; sig < NSIG; sig++) {
 367                 if (sigismember(s_sigsetp, sig) &&
 368                     ((lx_sig = stol_signo[sig]) > 0))
 369                         lx_sigaddset(&l, lx_sig);
 370         }
 371 
 372         return ((uucopy(&l, lx_sigsetp, sizeof (lx_sigset_t)) != 0)
 373             ? -errno : 0);
 374 }
 375 
 376 #if defined(_ILP32)
 377 static int
 378 ltos_osigset(lx_osigset_t *lx_osigsetp, sigset_t *s_sigsetp)
 379 {
 380         lx_osigset_t lo;
 381         int lx_sig, sig;
 382 
 383         if (uucopy(lx_osigsetp, &lo, sizeof (lx_osigset_t)) != 0)
 384                 return (-errno);
 385 
 386         (void) sigemptyset(s_sigsetp);
 387 
 388         for (lx_sig = 1; lx_sig <= OSIGSET_NBITS; lx_sig++)
 389                 if ((lo & OSIGSET_BITSET(lx_sig)) &&
 390                     ((sig = ltos_signo[lx_sig]) > 0))
 391                         (void) sigaddset(s_sigsetp, sig);
 392 
 393         return (0);
 394 }
 395 
 396 static int
 397 stol_osigset(sigset_t *s_sigsetp, lx_osigset_t *lx_osigsetp)
 398 {
 399         lx_osigset_t lo = 0;
 400         int lx_sig, sig;
 401 
 402         /*
 403          * Note that an lx_osigset_t can only represent the signals from
 404          * [1 .. OSIGSET_NBITS], so even though a signal may be present in the
 405          * Illumos sigset_t, it may not be representable as a bit in the
 406          * lx_osigset_t.
 407          */
 408         for (sig = 1; sig < NSIG; sig++)
 409                 if (sigismember(s_sigsetp, sig) &&
 410                     ((lx_sig = stol_signo[sig]) > 0) &&
 411                     (lx_sig <= OSIGSET_NBITS))
 412                         lo |= OSIGSET_BITSET(lx_sig);
 413 
 414         return ((uucopy(&lo, lx_osigsetp, sizeof (lx_osigset_t)) != 0)
 415             ? -errno : 0);
 416 }
 417 #endif
 418 
 419 static int
 420 ltos_sigcode(int si_code)
 421 {
 422         switch (si_code) {
 423                 case LX_SI_USER:
 424                         return (SI_USER);
 425                 case LX_SI_TKILL:
 426                         return (SI_LWP);
 427                 case LX_SI_QUEUE:
 428                         return (SI_QUEUE);
 429                 case LX_SI_TIMER:
 430                         return (SI_TIMER);
 431                 case LX_SI_ASYNCIO:
 432                         return (SI_ASYNCIO);
 433                 case LX_SI_MESGQ:
 434                         return (SI_MESGQ);
 435                 default:
 436                         return (LX_SI_CODE_NOT_EXIST);
 437         }
 438 }
 439 
 440 int
 441 stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop)
 442 {
 443         int ret = 0;
 444         lx_siginfo_t lx_siginfo;
 445 
 446         bzero(&lx_siginfo, sizeof (*lx_siginfop));
 447 
 448         if ((lx_siginfo.lsi_signo = stol_signo[siginfop->si_signo]) <= 0) {
 449                 /*
 450                  * Depending on the caller we may still need to get a usable
 451                  * converted siginfo struct.
 452                  */
 453                 lx_siginfo.lsi_signo = LX_SIGKILL;
 454                 errno = EINVAL;
 455                 ret = -1;
 456         }
 457 
 458         lx_siginfo.lsi_code = lx_stol_sigcode(siginfop->si_code);
 459         lx_siginfo.lsi_errno = siginfop->si_errno;
 460 
 461         switch (lx_siginfo.lsi_signo) {
 462                 /*
 463                  * Semantics ARE defined for SIGKILL, but since
 464                  * we can't catch it, we can't translate it. :-(
 465                  */
 466                 case LX_SIGPOLL:
 467                         lx_siginfo.lsi_band = siginfop->si_band;
 468                         lx_siginfo.lsi_fd = siginfop->si_fd;
 469                         break;
 470 
 471                 case LX_SIGCHLD:
 472                         lx_siginfo.lsi_pid = siginfop->si_pid;
 473                         if (siginfop->si_code <= 0 || siginfop->si_code ==
 474                             CLD_EXITED) {
 475                                 lx_siginfo.lsi_status = siginfop->si_status;
 476                         } else {
 477                                 lx_siginfo.lsi_status = lx_stol_status(
 478                                     siginfop->si_status, -1);
 479                         }
 480                         lx_siginfo.lsi_utime = siginfop->si_utime;
 481                         lx_siginfo.lsi_stime = siginfop->si_stime;
 482                         break;
 483 
 484                 case LX_SIGILL:
 485                 case LX_SIGBUS:
 486                 case LX_SIGFPE:
 487                 case LX_SIGSEGV:
 488                         lx_siginfo.lsi_addr = siginfop->si_addr;
 489                         break;
 490 
 491                 default:
 492                         lx_siginfo.lsi_pid = siginfop->si_pid;
 493                         lx_siginfo.lsi_uid =
 494                             LX_UID32_TO_UID16(siginfop->si_uid);
 495                         lx_siginfo.lsi_value = siginfop->si_value;
 496                         break;
 497         }
 498 
 499         if (uucopy(&lx_siginfo, lx_siginfop, sizeof (lx_siginfo_t)) != 0)
 500                 return (-errno);
 501         return ((ret != 0) ? -errno : 0);
 502 }
 503 
 504 static void
 505 stol_fpstate(fpregset_t *fpr, lx_fpstate_t *lfpr)
 506 {
 507         size_t copy_len;
 508 
 509 #if defined(_LP64)
 510         /*
 511          * The 64-bit Illumos struct fpregset_t and lx_fpstate_t are identical
 512          * so just bcopy() those entries (see usr/src/uts/intel/sys/regset.h
 513          * for __amd64's struct fpu).
 514          */
 515         copy_len = sizeof (fpr->fp_reg_set.fpchip_state);
 516         bcopy(fpr, lfpr, copy_len);
 517 
 518 #else /* is _ILP32 */
 519         struct _fpstate *fpsp = (struct _fpstate *)fpr;
 520 
 521         /*
 522          * The Illumos struct _fpstate and lx_fpstate_t are identical from the
 523          * beginning of the structure to the lx_fpstate_t "magic" field, so
 524          * just bcopy() those entries.
 525          */
 526         copy_len = (size_t)&(((lx_fpstate_t *)0)->magic);
 527         bcopy(fpsp, lfpr, copy_len);
 528 
 529         /*
 530          * These fields are all only significant for the first 16 bits.
 531          */
 532         lfpr->cw &= 0xffff;              /* x87 control word */
 533         lfpr->tag &= 0xffff;             /* x87 tag word */
 534         lfpr->cssel &= 0xffff;           /* cs selector */
 535         lfpr->datasel &= 0xffff; /* ds selector */
 536 
 537         /*
 538          * Linux wants the x87 status word field to contain the value of the
 539          * x87 saved exception status word.
 540          */
 541         lfpr->sw = lfpr->status & 0xffff;     /* x87 status word */
 542 
 543         lfpr->mxcsr = fpsp->mxcsr;
 544 
 545         if (fpsp->mxcsr != 0) {
 546                 /*
 547                  * Linux uses the "magic" field to denote whether the XMM
 548                  * registers contain legal data or not.  Since we can't get to
 549                  * %cr4 from userland to check the status of the OSFXSR bit,
 550                  * check the mxcsr field to see if it's 0, which it should
 551                  * never be on a system with the OXFXSR bit enabled.
 552                  */
 553                 lfpr->magic = LX_X86_FXSR_MAGIC;
 554                 bcopy(fpsp->xmm, lfpr->_xmm, sizeof (lfpr->_xmm));
 555         } else {
 556                 lfpr->magic = LX_X86_FXSR_NONE;
 557         }
 558 #endif
 559 }
 560 
 561 static void
 562 ltos_fpstate(lx_fpstate_t *lfpr, fpregset_t *fpr)
 563 {
 564         size_t copy_len;
 565 
 566 #if defined(_LP64)
 567         /*
 568          * The 64-bit Illumos struct fpregset_t and lx_fpstate_t are identical
 569          * so just bcopy() those entries (see usr/src/uts/intel/sys/regset.h
 570          * for __amd64's struct fpu).
 571          */
 572         copy_len = sizeof (fpr->fp_reg_set.fpchip_state);
 573         bcopy(lfpr, fpr, copy_len);
 574 
 575 #else /* is _ILP32 */
 576         struct _fpstate *fpsp = (struct _fpstate *)fpr;
 577 
 578         /*
 579          * The lx_fpstate_t and Illumos struct _fpstate are identical from the
 580          * beginning of the structure to the struct _fpstate "mxcsr" field, so
 581          * just bcopy() those entries.
 582          *
 583          * Note that we do NOT have to propogate changes the user may have made
 584          * to the "status" word back to the "sw" word, unlike the way we have
 585          * to deal with processing the ESP and UESP register values on return
 586          * from a signal handler.
 587          */
 588         copy_len = (size_t)&(((struct _fpstate *)0)->mxcsr);
 589         bcopy(lfpr, fpsp, copy_len);
 590 
 591         /*
 592          * These fields are all only significant for the first 16 bits.
 593          */
 594         fpsp->cw &= 0xffff;              /* x87 control word */
 595         fpsp->sw &= 0xffff;              /* x87 status word */
 596         fpsp->tag &= 0xffff;             /* x87 tag word */
 597         fpsp->cssel &= 0xffff;           /* cs selector */
 598         fpsp->datasel &= 0xffff; /* ds selector */
 599         fpsp->status &= 0xffff;          /* saved status */
 600 
 601         fpsp->mxcsr = lfpr->mxcsr;
 602 
 603         if (lfpr->magic == LX_X86_FXSR_MAGIC)
 604                 bcopy(lfpr->_xmm, fpsp->xmm, sizeof (fpsp->xmm));
 605 #endif
 606 }
 607 
 608 /*
 609  * We do not use the system sigaltstack() infrastructure as that would conflict
 610  * with our handling of both system call emulation and native signals on the
 611  * native stack.  Instead, we track the Linux stack structure in our
 612  * thread-specific data.  This function is modeled on the behaviour of the
 613  * native sigaltstack system call handler.
 614  */
 615 long
 616 lx_sigaltstack(uintptr_t ssp, uintptr_t oss)
 617 {
 618         lx_tsd_t *lxtsd = lx_get_tsd();
 619         lx_stack_t ss;
 620 
 621         if (ssp != NULL) {
 622                 if (lxtsd->lxtsd_sigaltstack.ss_flags & LX_SS_ONSTACK) {
 623                         /*
 624                          * If we are currently using the installed alternate
 625                          * stack for signal handling, the user may not modify
 626                          * the stack for this thread.
 627                          */
 628                         return (-EPERM);
 629                 }
 630 
 631                 if (uucopy((void *)ssp, &ss, sizeof (ss)) != 0) {
 632                         return (-EFAULT);
 633                 }
 634 
 635                 if (ss.ss_flags & ~LX_SS_DISABLE) {
 636                         /*
 637                          * The user may not specify a value for flags other
 638                          * than 0 or SS_DISABLE.
 639                          */
 640                         return (-EINVAL);
 641                 }
 642 
 643                 if (!(ss.ss_flags & LX_SS_DISABLE) && ss.ss_size <
 644                     LX_MINSIGSTKSZ) {
 645                         return (-ENOMEM);
 646                 }
 647         }
 648 
 649         if (oss != NULL) {
 650                 /*
 651                  * User provided old and new stack_t pointers may point to
 652                  * the same location.  Copy out before we modify.
 653                  */
 654                 if (uucopy(&lxtsd->lxtsd_sigaltstack, (void *)oss,
 655                     sizeof (lxtsd->lxtsd_sigaltstack)) != 0) {
 656                         return (-EFAULT);
 657                 }
 658         }
 659 
 660         if (ssp != NULL) {
 661                 lxtsd->lxtsd_sigaltstack = ss;
 662         }
 663 
 664         return (0);
 665 }
 666 
 667 #if defined(_ILP32)
 668 /*
 669  * The following routines are needed because sigset_ts and siginfo_ts are
 670  * different in format between Linux and Illumos.
 671  *
 672  * Note that there are two different lx_sigset structures, lx_sigset_ts and
 673  * lx_osigset_ts:
 674  *
 675  *    + An lx_sigset_t is the equivalent of a Illumos sigset_t and supports
 676  *      more than 32 signals.
 677  *
 678  *    + An lx_osigset_t is simply a uint32_t, so it by definition only supports
 679  *      32 signals.
 680  *
 681  * When there are two versions of a routine, one prefixed with lx_rt_ and
 682  * one prefixed with lx_ alone, in GENERAL the lx_rt_ routines deal with
 683  * lx_sigset_ts while the lx_ routines deal with lx_osigset_ts.  Unfortunately,
 684  * this is not always the case (e.g. lx_sigreturn() vs. lx_rt_sigreturn())
 685  */
 686 long
 687 lx_sigpending(uintptr_t sigpend)
 688 {
 689         sigset_t sigpendset;
 690 
 691         if (sigpending(&sigpendset) != 0)
 692                 return (-errno);
 693 
 694         return (stol_osigset(&sigpendset, (lx_osigset_t *)sigpend));
 695 }
 696 #endif
 697 
 698 long
 699 lx_rt_sigpending(uintptr_t sigpend, uintptr_t setsize)
 700 {
 701         sigset_t sigpendset;
 702 
 703         if ((size_t)setsize != sizeof (lx_sigset_t))
 704                 return (-EINVAL);
 705 
 706         if (sigpending(&sigpendset) != 0)
 707                 return (-errno);
 708 
 709         return (stol_sigset(&sigpendset, (lx_sigset_t *)sigpend));
 710 }
 711 
 712 /*
 713  * Create a common routine to encapsulate all of the sigprocmask code,
 714  * as the only difference between lx_sigprocmask() and lx_rt_sigprocmask()
 715  * is the usage of lx_osigset_ts vs. lx_sigset_ts, as toggled in the code by
 716  * the setting of the "sigset_type" flag.
 717  */
 718 static int
 719 lx_sigprocmask_common(uintptr_t how, uintptr_t l_setp, uintptr_t l_osetp,
 720     uintptr_t sigset_type)
 721 {
 722         int err = 0;
 723         sigset_t set, oset;
 724         sigset_t *s_setp = NULL;
 725         sigset_t *s_osetp;
 726 
 727         if (l_setp) {
 728                 switch (how) {
 729                         case LX_SIG_BLOCK:
 730                                 how = SIG_BLOCK;
 731                                 break;
 732 
 733                         case LX_SIG_UNBLOCK:
 734                                 how = SIG_UNBLOCK;
 735                                 break;
 736 
 737                         case LX_SIG_SETMASK:
 738                                 how = SIG_SETMASK;
 739                                 break;
 740 
 741                         default:
 742                                 return (-EINVAL);
 743                 }
 744 
 745                 s_setp = &set;
 746 
 747                 /* Only 32-bit code passes other than USE_SIGSET */
 748                 if (sigset_type == USE_SIGSET)
 749                         err = ltos_sigset((lx_sigset_t *)l_setp, s_setp);
 750 #if defined(_ILP32)
 751                 else
 752                         err = ltos_osigset((lx_osigset_t *)l_setp, s_setp);
 753 #endif
 754 
 755                 if (err != 0)
 756                         return (err);
 757 
 758         }
 759 
 760         s_osetp = (l_osetp ? &oset : NULL);
 761 
 762         /*
 763          * In a multithreaded environment, a call to sigprocmask(2) should
 764          * only affect the current thread's signal mask so we don't need to
 765          * explicitly call thr_sigsetmask(3C) here.
 766          */
 767         if (sigprocmask(how, s_setp, s_osetp) != 0)
 768                 return (-errno);
 769 
 770         if (l_osetp) {
 771                 if (sigset_type == USE_SIGSET)
 772                         err = stol_sigset(s_osetp, (lx_sigset_t *)l_osetp);
 773 #if defined(_ILP32)
 774                 else
 775                         err = stol_osigset(s_osetp, (lx_osigset_t *)l_osetp);
 776 #endif
 777 
 778                 if (err != 0) {
 779                         /*
 780                          * Encountered a fault while writing to the old signal
 781                          * mask buffer, so unwind the signal mask change made
 782                          * above.
 783                          */
 784                         (void) sigprocmask(how, s_osetp, (sigset_t *)NULL);
 785                         return (err);
 786                 }
 787         }
 788 
 789         return (0);
 790 }
 791 
 792 #if defined(_ILP32)
 793 long
 794 lx_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp)
 795 {
 796         return (lx_sigprocmask_common(how, setp, osetp, USE_OSIGSET));
 797 }
 798 #endif
 799 
 800 long
 801 lx_rt_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp,
 802     uintptr_t setsize)
 803 {
 804         if ((size_t)setsize != sizeof (lx_sigset_t))
 805                 return (-EINVAL);
 806 
 807         return (lx_sigprocmask_common(how, setp, osetp, USE_SIGSET));
 808 }
 809 
 810 #if defined(_ILP32)
 811 long
 812 lx_sigsuspend(uintptr_t set)
 813 {
 814         sigset_t s_set;
 815 
 816         if (ltos_osigset((lx_osigset_t *)set, &s_set) != 0)
 817                 return (-errno);
 818 
 819         return ((sigsuspend(&s_set) == -1) ? -errno : 0);
 820 }
 821 #endif
 822 
 823 long
 824 lx_rt_sigsuspend(uintptr_t set, uintptr_t setsize)
 825 {
 826         sigset_t s_set;
 827 
 828         if ((size_t)setsize != sizeof (lx_sigset_t))
 829                 return (-EINVAL);
 830 
 831         if (ltos_sigset((lx_sigset_t *)set, &s_set) != 0)
 832                 return (-errno);
 833 
 834         return ((sigsuspend(&s_set) == -1) ? -errno : 0);
 835 }
 836 
 837 long
 838 lx_rt_sigwaitinfo(uintptr_t set, uintptr_t sinfo, uintptr_t setsize)
 839 {
 840         sigset_t s_set;
 841         siginfo_t s_sinfo, *s_sinfop;
 842         int rc;
 843 
 844         lx_sigset_t *setp = (lx_sigset_t *)set;
 845         lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
 846 
 847         if ((size_t)setsize != sizeof (lx_sigset_t))
 848                 return (-EINVAL);
 849 
 850         if (ltos_sigset(setp, &s_set) != 0)
 851                 return (-errno);
 852 
 853         s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
 854 
 855         if ((rc = sigwaitinfo(&s_set, s_sinfop)) == -1)
 856                 return (-errno);
 857 
 858         if (s_sinfop == NULL)
 859                 return (stol_signo[rc]);
 860 
 861         return ((stol_siginfo(s_sinfop, sinfop) != 0)
 862             ? -errno : stol_signo[rc]);
 863 }
 864 
 865 long
 866 lx_rt_sigtimedwait(uintptr_t set, uintptr_t sinfo, uintptr_t toutp,
 867     uintptr_t setsize)
 868 {
 869         sigset_t s_set;
 870         siginfo_t s_sinfo, *s_sinfop;
 871         int rc;
 872 
 873         lx_sigset_t *setp = (lx_sigset_t *)set;
 874         lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
 875 
 876         if ((size_t)setsize != sizeof (lx_sigset_t))
 877                 return (-EINVAL);
 878 
 879         if (ltos_sigset(setp, &s_set) != 0)
 880                 return (-errno);
 881 
 882         s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
 883 
 884         /*
 885          * "If timeout is the NULL pointer, the behavior is unspecified."
 886          * Match what LTP expects.
 887          */
 888         if ((rc = sigtimedwait(&s_set, s_sinfop,
 889             (struct timespec *)toutp)) == -1)
 890                 return (toutp == NULL ? -EINTR : -errno);
 891 
 892         if (s_sinfop == NULL)
 893                 return (stol_signo[rc]);
 894 
 895         return ((stol_siginfo(s_sinfop, sinfop) != 0)
 896             ? -errno : stol_signo[rc]);
 897 }
 898 
 899 static void
 900 lx_sigreturn_find_native_context(const char *caller, ucontext_t **sigucp,
 901     ucontext_t **retucp, uintptr_t sp)
 902 {
 903         lx_tsd_t *lxtsd = lx_get_tsd();
 904         lx_sigdeliver_frame_t *lxsdfp = (lx_sigdeliver_frame_t *)sp;
 905         lx_sigdeliver_frame_t lxsdf;
 906         boolean_t copy_ok;
 907 
 908         lx_debug("%s: reading lx_sigdeliver_frame_t @ %p\n", caller, lxsdfp);
 909         if (uucopy(lxsdfp, &lxsdf, sizeof (lxsdf)) != 0) {
 910                 lx_debug("%s: failed to read lx_sigdeliver_frame_t @ %p\n",
 911                     lxsdfp);
 912 
 913                 copy_ok = B_FALSE;
 914         } else {
 915                 lx_debug("%s: lxsdf: magic %p retucp %p sigucp %p\n", caller,
 916                     lxsdf.lxsdf_magic, lxsdf.lxsdf_retucp, lxsdf.lxsdf_sigucp);
 917 
 918                 copy_ok = B_TRUE;
 919         }
 920 
 921         /*
 922          * lx_sigdeliver() pushes a lx_sigdeliver_frame_t onto the stack
 923          * before it creates the struct lx_oldsigstack.
 924          */
 925         if (copy_ok && lxsdf.lxsdf_magic == LX_SIGRT_MAGIC) {
 926                 LX_SIGNAL_DELIVERY_FRAME_FOUND(lxsdfp);
 927 
 928                 /*
 929                  * The guard value is intact; use the context pointers stored
 930                  * in the signal delivery frame:
 931                  */
 932                 *sigucp = lxsdf.lxsdf_sigucp;
 933                 *retucp = lxsdf.lxsdf_retucp;
 934 
 935                 /*
 936                  * Ensure that the backup signal delivery chain is in sync with
 937                  * the frame we are returning via:
 938                  */
 939                 lxtsd->lxtsd_sigbackup = lxsdf.lxsdf_sigbackup;
 940         } else {
 941                 /*
 942                  * The guard value was not intact.  Either the program smashed
 943                  * the stack unintentionally, or worse: intentionally moved
 944                  * some parts of the signal delivery frame we constructed to
 945                  * another location before calling rt_sigreturn(2).
 946                  */
 947                 LX_SIGNAL_DELIVERY_FRAME_CORRUPT(lxsdfp);
 948 
 949                 if (lxtsd->lxtsd_sigbackup == NULL) {
 950                         /*
 951                          * There was no backup context to use, so we must
 952                          * kill the process.
 953                          */
 954                         if (copy_ok) {
 955                                 lx_err_fatal("%s: sp 0x%p, expected 0x%x, "
 956                                     "found 0x%x!", caller, sp, LX_SIGRT_MAGIC,
 957                                     lxsdf.lxsdf_magic);
 958                         } else {
 959                                 lx_err_fatal("%s: sp 0x%p, could not read "
 960                                     "magic", caller, sp);
 961                         }
 962                 }
 963 
 964                 /*
 965                  * Attempt to recover by using the backup signal delivery
 966                  * chain:
 967                  */
 968                 lx_debug("%s: SIGRT_MAGIC not found @ sp %p; using backup "
 969                     "@ %p\n", caller, (void *)sp, lxtsd->lxtsd_sigbackup);
 970                 *sigucp = lxtsd->lxtsd_sigbackup->lxsb_sigucp;
 971                 *retucp = lxtsd->lxtsd_sigbackup->lxsb_retucp;
 972         }
 973 }
 974 
 975 #if defined(_ILP32)
 976 /*
 977  * Intercept the Linux sigreturn() syscall to turn it into the return through
 978  * the libc call stack that Illumos expects.
 979  *
 980  * When control returns to libc's call_user_handler() routine, a setcontext(2)
 981  * will be done that returns thread execution to the point originally
 982  * interrupted by receipt of the signal.
 983  *
 984  * This is only used by 32-bit code.
 985  */
 986 long
 987 lx_sigreturn(void)
 988 {
 989         struct lx_oldsigstack *lx_ossp;
 990         lx_sigset_t lx_sigset;
 991         ucontext_t *ucp;
 992         ucontext_t *sigucp;
 993         ucontext_t *retucp;
 994         uintptr_t sp;
 995 
 996         ucp = lx_syscall_regs();
 997 
 998         /*
 999          * NOTE:  The sp saved in the context is eight bytes off of where we
1000          *        need it to be (either due to trampoline or the copying of
1001          *        sp = uesp, not clear which).
1002          */
1003         sp = LX_REG(ucp, REG_SP) - 8;
1004 
1005         /*
1006          * At this point, the stack pointer should point to the struct
1007          * lx_oldsigstack that lx_build_old_signal_frame() constructed and
1008          * placed on the stack.  We need to reference it a bit later, so
1009          * save a pointer to it before incrementing our copy of the sp.
1010          */
1011         lx_ossp = (struct lx_oldsigstack *)sp;
1012         sp += SA(sizeof (struct lx_oldsigstack));
1013 
1014         lx_sigreturn_find_native_context(__func__, &sigucp, &retucp, sp);
1015 
1016         /*
1017          * We need to copy machine registers the Linux signal handler may have
1018          * modified back to the Illumos ucontext_t.
1019          *
1020          * General registers copy across as-is, except Linux expects that
1021          * changes made to uc_mcontext.gregs[ESP] will be reflected when the
1022          * interrupted thread resumes execution after the signal handler. To
1023          * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to
1024          * match uc_mcontext.gregs[ESP] as Illumos will restore the UESP
1025          * value to ESP.
1026          */
1027         lx_ossp->sigc.sc_esp_at_signal = lx_ossp->sigc.sc_esp;
1028         bcopy(&lx_ossp->sigc, &sigucp->uc_mcontext, sizeof (gregset_t));
1029 
1030         LX_SIGRETURN(NULL, sigucp, sp);
1031 
1032         /* copy back FP regs if present */
1033         if (lx_ossp->sigc.sc_fpstate != NULL)
1034                 ltos_fpstate(&lx_ossp->fpstate, &sigucp->uc_mcontext.fpregs);
1035 
1036         /* convert Linux signal mask back to its Illumos equivalent */
1037         bzero(&lx_sigset, sizeof (lx_sigset_t));
1038         lx_sigset.__bits[0] = lx_ossp->sigc.sc_mask;
1039         lx_sigset.__bits[1] = lx_ossp->sig_extra;
1040         (void) ltos_sigset(&lx_sigset, &sigucp->uc_sigmask);
1041 
1042         /*
1043          * For signal mask handling to be done properly, this call needs to
1044          * return to the libc routine that originally called the signal handler
1045          * rather than directly set the context back to the place the signal
1046          * interrupted execution as the original Linux code would do.
1047          */
1048         lx_debug("lx_sigreturn: calling setcontext; retucp %p flags %lx "
1049             "link %p\n", retucp, retucp->uc_flags, retucp->uc_link);
1050         setcontext(retucp);
1051         assert(0);
1052 
1053         /*NOTREACHED*/
1054         return (0);
1055 }
1056 #endif
1057 
1058 /*
1059  * This signal return syscall is used by both 32-bit and 64-bit code.
1060  */
1061 long
1062 lx_rt_sigreturn(void)
1063 {
1064         struct lx_sigstack *lx_ssp;
1065         lx_ucontext_t *lx_ucp;
1066         ucontext_t *ucp;
1067         ucontext_t *sigucp;
1068         ucontext_t *retucp;
1069         uintptr_t sp;
1070 
1071         /* Get the registers at the emulated Linux rt_sigreturn syscall */
1072         ucp = lx_syscall_regs();
1073 
1074 #if defined(_ILP32)
1075         lx_debug("lx_rt_sigreturn: ESP %p UESP %p\n", LX_REG(ucp, ESP),
1076             LX_REG(ucp, UESP));
1077         /*
1078          * For 32-bit
1079          *
1080          * NOTE:  Because of the silly compatibility measures done in the
1081          *        signal trampoline code to make sure the stack holds the
1082          *         _exact same_  instruction sequence Linux does, we have to
1083          *        manually "pop" some extra instructions off the stack here
1084          *        before passing the stack address to the syscall because the
1085          *        trampoline code isn't allowed to do it due to the gdb
1086          *        compatability issues.
1087          *
1088          *        No, I'm not kidding.
1089          *
1090          *        The sp saved in the context is eight bytes off of where we
1091          *        need it to be (either due to trampoline or the copying of
1092          *        sp = uesp, not clear which but looks like the uesp case), so
1093          *        the need to pop the extra four byte instruction means we need
1094          *        to subtract  a net four bytes from the sp before "popping" the
1095          *        struct lx_sigstack off the stack.
1096          *
1097          *        This will yield the value the stack pointer had before
1098          *        lx_sigdeliver() created the stack frame for the Linux signal
1099          *        handler.
1100          */
1101         sp = (uintptr_t)LX_REG(ucp, REG_SP) - 4;
1102 #else
1103         /*
1104          * We need to make an adjustment for 64-bit code as well. Since 64-bit
1105          * does not use the trampoline, it's probably for the same reason as
1106          * alluded to above.
1107          */
1108         sp = (uintptr_t)LX_REG(ucp, REG_SP) - 8;
1109 #endif
1110 
1111         /*
1112          * At this point, the stack pointer should point to the struct
1113          * lx_sigstack that lx_build_signal_frame() constructed and
1114          * placed on the stack.  We need to reference it a bit later, so
1115          * save a pointer to it before incrementing our copy of the sp.
1116          */
1117         lx_ssp = (struct lx_sigstack *)sp;
1118         sp += SA(sizeof (struct lx_sigstack));
1119 
1120 #if defined(_LP64)
1121         /*
1122          * The 64-bit lx_sigdeliver() inserts 8 bytes of padding between
1123          * the lx_sigstack_t and the delivery frame to maintain ABI stack
1124          * alignment.
1125          */
1126         sp += 8;
1127 #endif
1128 
1129         lx_sigreturn_find_native_context(__func__, &sigucp, &retucp, sp);
1130 
1131         /*
1132          * We need to copy machine registers the Linux signal handler may have
1133          * modified back to the Illumos version.
1134          */
1135 #if defined(_LP64)
1136         lx_ucp = &lx_ssp->uc;
1137 
1138         /*
1139          * General register layout is completely different.
1140          */
1141         LX_REG(sigucp, REG_R15) = lx_ucp->uc_sigcontext.sc_r15;
1142         LX_REG(sigucp, REG_R14) = lx_ucp->uc_sigcontext.sc_r14;
1143         LX_REG(sigucp, REG_R13) = lx_ucp->uc_sigcontext.sc_r13;
1144         LX_REG(sigucp, REG_R12) = lx_ucp->uc_sigcontext.sc_r12;
1145         LX_REG(sigucp, REG_R11) = lx_ucp->uc_sigcontext.sc_r11;
1146         LX_REG(sigucp, REG_R10) = lx_ucp->uc_sigcontext.sc_r10;
1147         LX_REG(sigucp, REG_R9) = lx_ucp->uc_sigcontext.sc_r9;
1148         LX_REG(sigucp, REG_R8) = lx_ucp->uc_sigcontext.sc_r8;
1149         LX_REG(sigucp, REG_RDI) = lx_ucp->uc_sigcontext.sc_rdi;
1150         LX_REG(sigucp, REG_RSI) = lx_ucp->uc_sigcontext.sc_rsi;
1151         LX_REG(sigucp, REG_RBP) = lx_ucp->uc_sigcontext.sc_rbp;
1152         LX_REG(sigucp, REG_RBX) = lx_ucp->uc_sigcontext.sc_rbx;
1153         LX_REG(sigucp, REG_RDX) = lx_ucp->uc_sigcontext.sc_rdx;
1154         LX_REG(sigucp, REG_RCX) = lx_ucp->uc_sigcontext.sc_rcx;
1155         LX_REG(sigucp, REG_RAX) = lx_ucp->uc_sigcontext.sc_rax;
1156         LX_REG(sigucp, REG_TRAPNO) = lx_ucp->uc_sigcontext.sc_trapno;
1157         LX_REG(sigucp, REG_ERR) = lx_ucp->uc_sigcontext.sc_err;
1158         LX_REG(sigucp, REG_RIP) = lx_ucp->uc_sigcontext.sc_rip;
1159         LX_REG(sigucp, REG_CS) = lx_ucp->uc_sigcontext.sc_cs;
1160         LX_REG(sigucp, REG_RFL) = lx_ucp->uc_sigcontext.sc_eflags;
1161         LX_REG(sigucp, REG_RSP) = lx_ucp->uc_sigcontext.sc_rsp;
1162         LX_REG(sigucp, REG_SS) = lx_ucp->uc_sigcontext.sc_pad0;
1163         LX_REG(sigucp, REG_FS) = lx_ucp->uc_sigcontext.sc_fs;
1164         LX_REG(sigucp, REG_GS) = lx_ucp->uc_sigcontext.sc_gs;
1165 
1166 #else /* is _ILP32 */
1167         lx_ucp = &lx_ssp->uc;
1168 
1169         /*
1170          * Illumos and Linux both follow the SysV i386 ABI layout for the
1171          * mcontext.
1172          *
1173          * General registers copy across as-is, except Linux expects that
1174          * changes made to uc_mcontext.gregs[ESP] will be reflected when the
1175          * interrupted thread resumes execution after the signal handler. To
1176          * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to
1177          * match uc_mcontext.gregs[ESP] as Illumos will restore the UESP value
1178          * to ESP.
1179          */
1180         lx_ucp->uc_sigcontext.sc_esp_at_signal = lx_ucp->uc_sigcontext.sc_esp;
1181 
1182         bcopy(&lx_ucp->uc_sigcontext, &sigucp->uc_mcontext.gregs,
1183             sizeof (gregset_t));
1184 #endif
1185 
1186         LX_SIGRETURN(lx_ucp, sigucp, sp);
1187 
1188         if (lx_ucp->uc_sigcontext.sc_fpstate != NULL) {
1189                 ltos_fpstate(lx_ucp->uc_sigcontext.sc_fpstate,
1190                     &sigucp->uc_mcontext.fpregs);
1191         }
1192 
1193         /*
1194          * Convert the Linux signal mask and stack back to their
1195          * Illumos equivalents.
1196          */
1197         (void) ltos_sigset(&lx_ucp->uc_sigmask, &sigucp->uc_sigmask);
1198         ltos_stack(&lx_ucp->uc_stack, &sigucp->uc_stack);
1199 
1200         /*
1201          * For signal mask handling to be done properly, this call needs to
1202          * return to the libc routine that originally called the signal handler
1203          * rather than directly set the context back to the place the signal
1204          * interrupted execution as the original Linux code would do.
1205          */
1206         lx_debug("lx_rt_sigreturn: calling setcontext; retucp %p\n", retucp);
1207         setcontext(retucp);
1208         assert(0);
1209 
1210         /*NOTREACHED*/
1211         return (0);
1212 }
1213 
1214 
1215 #if defined(_ILP32)
1216 /*
1217  * Build signal frame for processing for "old" (legacy) Linux signals
1218  * This stack-builder function is only used by 32-bit code.
1219  */
1220 static void
1221 lx_build_old_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp,
1222     uintptr_t *hargs)
1223 {
1224         extern void lx_sigreturn_tramp();
1225 
1226         lx_sigset_t lx_sigset;
1227         ucontext_t *ucp = (ucontext_t *)p;
1228         struct lx_sigaction *lxsap;
1229         struct lx_oldsigstack *lx_ossp = sp;
1230 
1231         lx_debug("building old signal frame for lx sig %d at 0x%p", lx_sig, sp);
1232 
1233         lx_ossp->sig = lx_sig;
1234         lxsap = &lx_sighandlers.lx_sa[lx_sig];
1235         lx_debug("lxsap @ 0x%p", lxsap);
1236 
1237         if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) &&
1238             lxsap->lxsa_restorer) {
1239                 lx_ossp->retaddr = lxsap->lxsa_restorer;
1240                 lx_debug("lxsa_restorer exists @ 0x%p", lx_ossp->retaddr);
1241         } else {
1242                 lx_ossp->retaddr = lx_sigreturn_tramp;
1243                 lx_debug("lx_ossp->retaddr set to 0x%p", lx_sigreturn_tramp);
1244         }
1245 
1246         lx_debug("osf retaddr = 0x%p", lx_ossp->retaddr);
1247 
1248         /* convert Illumos signal mask and stack to their Linux equivalents */
1249         (void) stol_sigset(&ucp->uc_sigmask, &lx_sigset);
1250         lx_ossp->sigc.sc_mask = lx_sigset.__bits[0];
1251         lx_ossp->sig_extra = lx_sigset.__bits[1];
1252 
1253         /*
1254          * General registers copy across as-is, except Linux expects that
1255          * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a
1256          * signal.
1257          */
1258         bcopy(&ucp->uc_mcontext, &lx_ossp->sigc, sizeof (gregset_t));
1259         lx_ossp->sigc.sc_esp = lx_ossp->sigc.sc_esp_at_signal;
1260 
1261         /*
1262          * cr2 contains the faulting address, and Linux only sets cr2 for a
1263          * a segmentation fault.
1264          */
1265         lx_ossp->sigc.sc_cr2 = (((lx_sig == LX_SIGSEGV) && (sip)) ?
1266             (uintptr_t)sip->si_addr : 0);
1267 
1268         /* convert FP regs if present */
1269         if (ucp->uc_flags & UC_FPU) {
1270                 stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ossp->fpstate);
1271                 lx_ossp->sigc.sc_fpstate = &lx_ossp->fpstate;
1272         } else {
1273                 lx_ossp->sigc.sc_fpstate = NULL;
1274         }
1275 
1276         /*
1277          * Believe it or not, gdb wants to SEE the trampoline code on the
1278          * bottom of the stack to determine whether the stack frame belongs to
1279          * a signal handler, even though this code is no longer actually
1280          * called.
1281          *
1282          * You can't make this stuff up.
1283          */
1284         bcopy((void *)lx_sigreturn_tramp, lx_ossp->trampoline,
1285             sizeof (lx_ossp->trampoline));
1286 }
1287 #endif
1288 
1289 /*
1290  * Build stack frame (32-bit) or stack local data (64-bit) for processing for
1291  * modern Linux signals. This is the only stack-builder function for 64-bit
1292  * code (32-bit code also calls this when using "modern" signals).
1293  */
1294 static void
1295 lx_build_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp,
1296     uintptr_t *hargs)
1297 {
1298         extern void lx_rt_sigreturn_tramp();
1299 
1300         lx_ucontext_t *lx_ucp;
1301         ucontext_t *ucp = (ucontext_t *)p;
1302         struct lx_sigstack *lx_ssp = sp;
1303         struct lx_sigaction *lxsap;
1304 
1305         lx_debug("building signal frame for lx sig %d at 0x%p", lx_sig, sp);
1306 
1307         lx_ucp = &lx_ssp->uc;
1308 #if defined(_ILP32)
1309         /*
1310          * Arguments are passed to the 32-bit signal handler on the stack.
1311          */
1312         lx_ssp->ucp = lx_ucp;
1313         lx_ssp->sip = sip != NULL ? &lx_ssp->si : NULL;
1314         lx_ssp->sig = lx_sig;
1315 #else
1316         /*
1317          * Arguments to the 64-bit signal handler are passed in registers:
1318          *   hdlr(int sig, siginfo_t *sip, void *ucp);
1319          */
1320         hargs[0] = lx_sig;
1321         hargs[1] = sip != NULL ? (uintptr_t)&lx_ssp->si : NULL;
1322         hargs[2] = (uintptr_t)lx_ucp;
1323 #endif
1324 
1325         lxsap = &lx_sighandlers.lx_sa[lx_sig];
1326         lx_debug("lxsap @ 0x%p", lxsap);
1327 
1328         if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) &&
1329             lxsap->lxsa_restorer) {
1330                 /*
1331                  * lxsa_restorer is explicitly set by sigaction in 32-bit code
1332                  * but it can also be implicitly set for both 32 and 64 bit
1333                  * code via lx_sigaction_common when we bcopy the user-supplied
1334                  * lx_sigaction element into the proper slot in the sighandler
1335                  * array.
1336                  */
1337                 lx_ssp->retaddr = lxsap->lxsa_restorer;
1338                 lx_debug("lxsa_restorer exists @ 0x%p", lx_ssp->retaddr);
1339         } else {
1340                 lx_ssp->retaddr = lx_rt_sigreturn_tramp;
1341                 lx_debug("lx_ssp->retaddr set to 0x%p", lx_rt_sigreturn_tramp);
1342         }
1343 
1344         /* Linux has these fields but always clears them to 0 */
1345         lx_ucp->uc_flags = 0;
1346         lx_ucp->uc_link = NULL;
1347 
1348         /* convert Illumos signal mask and stack to their Linux equivalents */
1349         (void) stol_sigset(&ucp->uc_sigmask, &lx_ucp->uc_sigmask);
1350         stol_stack(&ucp->uc_stack, &lx_ucp->uc_stack);
1351 
1352 #if defined(_LP64)
1353         /*
1354          * General register layout is completely different.
1355          */
1356         lx_ucp->uc_sigcontext.sc_r8 = LX_REG(ucp, REG_R8);
1357         lx_ucp->uc_sigcontext.sc_r9 = LX_REG(ucp, REG_R9);
1358         lx_ucp->uc_sigcontext.sc_r10 = LX_REG(ucp, REG_R10);
1359         lx_ucp->uc_sigcontext.sc_r11 = LX_REG(ucp, REG_R11);
1360         lx_ucp->uc_sigcontext.sc_r12 = LX_REG(ucp, REG_R12);
1361         lx_ucp->uc_sigcontext.sc_r13 = LX_REG(ucp, REG_R13);
1362         lx_ucp->uc_sigcontext.sc_r14 = LX_REG(ucp, REG_R14);
1363         lx_ucp->uc_sigcontext.sc_r15 = LX_REG(ucp, REG_R15);
1364         lx_ucp->uc_sigcontext.sc_rdi = LX_REG(ucp, REG_RDI);
1365         lx_ucp->uc_sigcontext.sc_rsi = LX_REG(ucp, REG_RSI);
1366         lx_ucp->uc_sigcontext.sc_rbp = LX_REG(ucp, REG_RBP);
1367         lx_ucp->uc_sigcontext.sc_rbx = LX_REG(ucp, REG_RBX);
1368         lx_ucp->uc_sigcontext.sc_rdx = LX_REG(ucp, REG_RDX);
1369         lx_ucp->uc_sigcontext.sc_rax = LX_REG(ucp, REG_RAX);
1370         lx_ucp->uc_sigcontext.sc_rcx = LX_REG(ucp, REG_RCX);
1371         lx_ucp->uc_sigcontext.sc_rsp = LX_REG(ucp, REG_RSP);
1372         lx_ucp->uc_sigcontext.sc_rip = LX_REG(ucp, REG_RIP);
1373         lx_ucp->uc_sigcontext.sc_eflags = LX_REG(ucp, REG_RFL);
1374         lx_ucp->uc_sigcontext.sc_cs = LX_REG(ucp, REG_CS);
1375         lx_ucp->uc_sigcontext.sc_gs = LX_REG(ucp, REG_GS);
1376         lx_ucp->uc_sigcontext.sc_fs = LX_REG(ucp, REG_FS);
1377         lx_ucp->uc_sigcontext.sc_pad0 = LX_REG(ucp, REG_SS);
1378         lx_ucp->uc_sigcontext.sc_err = LX_REG(ucp, REG_ERR);
1379         lx_ucp->uc_sigcontext.sc_trapno = LX_REG(ucp, REG_TRAPNO);
1380 
1381 #else /* is _ILP32 */
1382         /*
1383          * General registers copy across as-is, except Linux expects that
1384          * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a
1385          * signal.
1386          */
1387         bcopy(&ucp->uc_mcontext, &lx_ucp->uc_sigcontext, sizeof (gregset_t));
1388         lx_ucp->uc_sigcontext.sc_esp = lx_ucp->uc_sigcontext.sc_esp_at_signal;
1389 #endif
1390 
1391         /*
1392          * cr2 contains the faulting address, which Linux only sets for a
1393          * a segmentation fault.
1394          */
1395         lx_ucp->uc_sigcontext.sc_cr2 = ((lx_sig == LX_SIGSEGV) && (sip)) ?
1396             (uintptr_t)sip->si_addr : 0;
1397 
1398         /*
1399          * This should only return an error if the signum is invalid but that
1400          * also gets converted into a LX_SIGKILL by this function.
1401          */
1402         if (sip != NULL)
1403                 (void) stol_siginfo(sip, &lx_ssp->si);
1404         else
1405                 bzero(&lx_ssp->si, sizeof (lx_siginfo_t));
1406 
1407         /* convert FP regs if present */
1408         if (ucp->uc_flags & UC_FPU) {
1409                 /*
1410                  * Copy FP regs to the appropriate place in the the lx_sigstack
1411                  * structure.
1412                  */
1413                 stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ssp->fpstate);
1414                 lx_ucp->uc_sigcontext.sc_fpstate = &lx_ssp->fpstate;
1415         } else {
1416                 lx_ucp->uc_sigcontext.sc_fpstate = NULL;
1417         }
1418 
1419 #if defined(_ILP32)
1420         /*
1421          * Believe it or not, gdb wants to SEE the sigreturn code on the
1422          * top of the stack to determine whether the stack frame belongs to
1423          * a signal handler, even though this code is not actually called.
1424          *
1425          * You can't make this stuff up.
1426          */
1427         bcopy((void *)lx_rt_sigreturn_tramp, lx_ssp->trampoline,
1428             sizeof (lx_ssp->trampoline));
1429 #endif
1430 }
1431 
1432 /*
1433  * This is the interposition handler for Linux signals.
1434  */
1435 static void
1436 lx_call_user_handler(int sig, siginfo_t *sip, void *p)
1437 {
1438         void (*user_handler)();
1439         void (*stk_builder)();
1440         struct lx_sigaction *lxsap;
1441         ucontext_t *ucp = (ucontext_t *)p;
1442         size_t stksize;
1443         int lx_sig;
1444 
1445         /*
1446          * If Illumos signal has no Linux equivalent, effectively ignore it.
1447          */
1448         if ((lx_sig = stol_signo[sig]) == -1) {
1449                 lx_unsupported("caught Illumos signal %d, no Linux equivalent",
1450                     sig);
1451                 return;
1452         }
1453 
1454         lx_debug("interpose caught Illumos signal %d, translating to Linux "
1455             "signal %d", sig, lx_sig);
1456 
1457         lxsap = &lx_sighandlers.lx_sa[lx_sig];
1458         lx_debug("lxsap @ 0x%p", lxsap);
1459 
1460         if ((sig == SIGPWR) && (lxsap->lxsa_handler == SIG_DFL)) {
1461                 /*
1462                  * Linux SIG_DFL for SIGPWR is to terminate. The lx wait
1463                  * emulation will translate SIGPWR to LX_SIGPWR.
1464                  */
1465                 (void) syscall(SYS_brand, B_EXIT_AS_SIG, SIGPWR);
1466                 /* This should never return */
1467                 assert(0);
1468         }
1469 
1470         if (lxsap->lxsa_handler == SIG_DFL || lxsap->lxsa_handler == SIG_IGN)
1471                 lx_err_fatal("lxsa_handler set to %s?  How?!?!?",
1472                     (lxsap->lxsa_handler == SIG_DFL) ? "SIG_DFL" : "SIG_IGN");
1473 
1474 #if defined(_LP64)
1475         stksize = sizeof (struct lx_sigstack);
1476         stk_builder = lx_build_signal_frame;
1477 #else
1478         if (lxsap->lxsa_flags & LX_SA_SIGINFO) {
1479                 stksize = sizeof (struct lx_sigstack);
1480                 stk_builder = lx_build_signal_frame;
1481         } else  {
1482                 stksize = sizeof (struct lx_oldsigstack);
1483                 stk_builder = lx_build_old_signal_frame;
1484         }
1485 #endif
1486 
1487         user_handler = lxsap->lxsa_handler;
1488 
1489         lx_debug("delivering %d (lx %d) to handler at 0x%p", sig, lx_sig,
1490             lxsap->lxsa_handler);
1491 
1492         if (lxsap->lxsa_flags & LX_SA_RESETHAND)
1493                 lxsap->lxsa_handler = SIG_DFL;
1494 
1495         lx_sigdeliver(lx_sig, sip, ucp, stksize, stk_builder, user_handler,
1496             lxsap);
1497 
1498         /*
1499          * We need to handle restarting system calls if requested by the
1500          * program for this signal type:
1501          */
1502         if (lxsap->lxsa_flags & LX_SA_RESTART) {
1503                 uintptr_t flags = (uintptr_t)ucp->uc_brand_data[0];
1504                 long ret = (long)LX_REG(ucp, REG_R0);
1505                 boolean_t interrupted = (ret == -lx_errno(EINTR, -1));
1506 
1507                 /*
1508                  * If the system call returned EINTR, and the system
1509                  * call handler set "br_syscall_restart" when returning,
1510                  * we modify the context to try the system call again
1511                  * when we return from this signal handler.
1512                  */
1513                 if ((flags & LX_UC_RESTART_SYSCALL) && interrupted) {
1514                         int syscall_num = (int)(uintptr_t)ucp->uc_brand_data[2];
1515 
1516                         lx_debug("restarting interrupted system call %d",
1517                             syscall_num);
1518 
1519                         /*
1520                          * Both the "int 0x80" and the "syscall" instruction
1521                          * are two bytes long.  Wind the program counter back
1522                          * to the start of this instruction.
1523                          *
1524                          * The system call we interrupted is preserved in the
1525                          * brand-specific data in the ucontext_t when the
1526                          * LX_UC_RESTART_SYSCALL flag is set.  This is
1527                          * analogous to the "orig_[er]ax" field in the Linux
1528                          * "user_regs_struct".
1529                          */
1530                         LX_REG(ucp, REG_PC) -= 2;
1531                         LX_REG(ucp, REG_R0) = syscall_num;
1532                 }
1533         }
1534 }
1535 
1536 /*
1537  * The "lx_sigdeliver()" function is responsible for constructing the emulated
1538  * signal delivery frame on the brand stack for this LWP.  A context is saved
1539  * on the stack which will be used by the "sigreturn(2)" family of emulated
1540  * system calls to get us back here after the Linux signal handler returns.
1541  * This function is modelled on the in-kernel "sendsig()" signal delivery
1542  * mechanism.
1543  */
1544 void
1545 lx_sigdeliver(int lx_sig, siginfo_t *sip, ucontext_t *ucp, size_t stacksz,
1546     void (*stack_builder)(), void (*user_handler)(),
1547     struct lx_sigaction *lxsap)
1548 {
1549         lx_sigbackup_t sigbackup;
1550         ucontext_t uc;
1551         lx_tsd_t *lxtsd = lx_get_tsd();
1552         int totsz = 0;
1553         uintptr_t flags;
1554         uintptr_t hargs[3];
1555         /*
1556          * These variables must be "volatile", as they are modified after the
1557          * getcontext() stores the register state:
1558          */
1559         volatile boolean_t signal_delivered = B_FALSE;
1560         volatile uintptr_t lxfp = 0;
1561         volatile uintptr_t old_tsd_sp = 0;
1562         volatile int newstack = 0;
1563 
1564         /*
1565          * This function involves modifying the Linux process stack for this
1566          * thread.  To do so without corruption requires us to exclude other
1567          * signal handlers (or emulated system calls called from within those
1568          * handlers) from running while we reserve space on that stack.  We
1569          * defer the execution of further instances of lx_call_user_handler()
1570          * until we have completed this operation.
1571          */
1572         _sigoff();
1573 
1574         /*
1575          * Clear register arguments vector.
1576          */
1577         bzero(hargs, sizeof (hargs));
1578 
1579         /*
1580          * We save a context here so that we can be returned later to complete
1581          * handling the signal.
1582          */
1583         lx_debug("lx_sigdeliver: STORING RETURN CONTEXT @ %p\n", &uc);
1584         assert(getcontext(&uc) == 0);
1585         lx_debug("lx_sigdeliver: RETURN CONTEXT %p LINK %p FLAGS %lx\n",
1586             &uc, uc.uc_link, uc.uc_flags);
1587         if (signal_delivered) {
1588                 /*
1589                  * If the "signal_delivered" flag is set, we are returned here
1590                  * via setcontext() as called by the emulated Linux signal
1591                  * return system call.
1592                  */
1593                 lx_debug("lx_sigdeliver: WE ARE BACK, VIA UC @ %p!\n", &uc);
1594                 goto after_signal_handler;
1595         }
1596         signal_delivered = B_TRUE;
1597 
1598         /*
1599          * Preserve the current tsd value of the Linux process stack pointer,
1600          * even if it is zero.  We will restore it when we are returned here
1601          * via setcontext() after the Linux process has completed execution of
1602          * its signal handler.
1603          */
1604         old_tsd_sp = lxtsd->lxtsd_lx_sp;
1605 
1606         /*
1607          * Figure out whether we will be handling this signal on an alternate
1608          * stack specified by the user.
1609          */
1610         newstack = (lxsap->lxsa_flags & LX_SA_ONSTACK) &&
1611             !(lxtsd->lxtsd_sigaltstack.ss_flags & (LX_SS_ONSTACK |
1612             LX_SS_DISABLE));
1613 
1614         /*
1615          * Find the first unused region of the Linux process stack, where
1616          * we will assemble our signal delivery frame.
1617          */
1618         flags = (uintptr_t)ucp->uc_brand_data[0];
1619         if (newstack) {
1620                 /*
1621                  * We are moving to the user-provided alternate signal
1622                  * stack.
1623                  */
1624                 lxfp = SA((uintptr_t)lxtsd->lxtsd_sigaltstack.ss_sp) +
1625                     SA(lxtsd->lxtsd_sigaltstack.ss_size) - STACK_ALIGN;
1626                 lx_debug("lx_sigdeliver: moving to ALTSTACK sp %p\n", lxfp);
1627                 LX_SIGNAL_ALTSTACK_ENABLE(lxfp);
1628         } else if (flags & LX_UC_STACK_BRAND) {
1629                 /*
1630                  * We interrupted the Linux process to take this signal.  The
1631                  * stack pointer is the one saved in this context.
1632                  */
1633                 lxfp = LX_REG(ucp, REG_SP);
1634         } else {
1635                 /*
1636                  * We interrupted a native (emulation) routine, so we must get
1637                  * the current stack pointer from either the tsd (if one is
1638                  * stored there) or via the context chain.
1639                  *
1640                  */
1641                 lxfp = lx_find_brand_sp();
1642                 if (lxtsd->lxtsd_lx_sp != 0) {
1643                         /*
1644                          * We must also make room for the possibility of nested
1645                          * signal delivery -- we may be pre-empting the
1646                          * in-progress handling of another signal.
1647                          *
1648                          * Note that if we were already on the alternate stack,
1649                          * any emulated Linux system calls would be betwixt
1650                          * that original signal frame and this new one on the
1651                          * one contiguous stack, so this logic holds either
1652                          * way:
1653                          */
1654                         lxfp = MIN(lxtsd->lxtsd_lx_sp, lxfp);
1655                 }
1656         }
1657 
1658         /*
1659          * Account for a reserved stack region (for amd64, this is 128 bytes),
1660          * and align the stack:
1661          */
1662         lxfp -= STACK_RESERVE;
1663         lxfp &= ~(STACK_ALIGN - 1);
1664 
1665         /*
1666          * Allocate space on the Linux process stack for our delivery frame,
1667          * including:
1668          *
1669          *   ----------------------------------------------------- old %sp
1670          *   - lx_sigdeliver_frame_t
1671          *   - (ucontext_t pointers and stack magic)
1672          *   -----------------------------------------------------
1673          *   - (amd64-only 8-byte alignment gap)
1674          *   -----------------------------------------------------
1675          *   - frame of size "stacksz" from the stack builder
1676          *   ----------------------------------------------------- new %sp
1677          */
1678 #if defined(_LP64)
1679         /*
1680          * The AMD64 ABI requires us to align the stack such that when the
1681          * called function pushes the base pointer, the stack is 16 byte
1682          * aligned.  The stack must, therefore, be 8- but _not_ 16-byte
1683          * aligned.
1684          */
1685 #if (STACK_ALIGN != 16) || (STACK_ENTRY_ALIGN != 8)
1686 #error "lx_sigdeliver() did not find expected stack alignment"
1687 #endif
1688         totsz = SA(sizeof (lx_sigdeliver_frame_t)) + SA(stacksz) + 8;
1689         assert((totsz & (STACK_ENTRY_ALIGN - 1)) == 0);
1690         assert((totsz & (STACK_ALIGN - 1)) == 8);
1691 #else
1692         totsz = SA(sizeof (lx_sigdeliver_frame_t)) + SA(stacksz);
1693         assert((totsz & (STACK_ALIGN - 1)) == 0);
1694 #endif
1695 
1696         /*
1697          * Copy our return frame into place:
1698          */
1699         lxfp -= SA(sizeof (lx_sigdeliver_frame_t));
1700         lx_debug("lx_sigdeliver: lx_sigdeliver_frame_t @ %p\n", lxfp);
1701         {
1702                 lx_sigdeliver_frame_t frm;
1703 
1704                 frm.lxsdf_magic = LX_SIGRT_MAGIC;
1705                 frm.lxsdf_retucp = &uc;
1706                 frm.lxsdf_sigucp = ucp;
1707                 frm.lxsdf_sigbackup = &sigbackup;
1708 
1709                 lx_debug("lx_sigdeliver: retucp %p sigucp %p\n",
1710                     frm.lxsdf_retucp, frm.lxsdf_sigucp);
1711 
1712                 if (uucopy(&frm, (void *)lxfp, sizeof (frm)) != 0) {
1713                         /*
1714                          * We could not modify the stack of the emulated Linux
1715                          * program.  Act like the kernel and terminate the
1716                          * program with a segmentation violation.
1717                          */
1718                         (void) syscall(SYS_brand, B_EXIT_AS_SIG, SIGSEGV);
1719                 }
1720 
1721                 LX_SIGNAL_DELIVERY_FRAME_CREATE((void *)lxfp);
1722 
1723                 /*
1724                  * Populate a backup copy of signal linkage to use in case
1725                  * the Linux program completely destroys (or relocates) the
1726                  * delivery frame.
1727                  *
1728                  * This is necessary for programs that have flown so far off
1729                  * the architectural rails that they believe it is
1730                  * acceptable to make assumptions about the precise size and
1731                  * layout of the signal handling frame assembled by the
1732                  * kernel.
1733                  */
1734                 sigbackup.lxsb_retucp = frm.lxsdf_retucp;
1735                 sigbackup.lxsb_sigucp = frm.lxsdf_sigucp;
1736                 sigbackup.lxsb_sigdeliver_frame = lxfp;
1737                 sigbackup.lxsb_previous = lxtsd->lxtsd_sigbackup;
1738                 lxtsd->lxtsd_sigbackup = &sigbackup;
1739 
1740                 lx_debug("lx_sigdeliver: installed sigbackup %p; prev %p\n",
1741                     &sigbackup, sigbackup.lxsb_previous);
1742         }
1743 
1744         /*
1745          * Build the Linux signal handling frame:
1746          */
1747 #if defined(_LP64)
1748         lxfp -= SA(stacksz) + 8;
1749 #else
1750         lxfp -= SA(stacksz);
1751 #endif
1752         lx_debug("lx_sigdeliver: Linux sig frame @ %p\n", lxfp);
1753         stack_builder(lx_sig, sip, ucp, lxfp, hargs);
1754 
1755         /*
1756          * Record our reservation so that any nested signal handlers
1757          * can see it.
1758          */
1759         lx_debug("lx_sigdeliver: Linux tsd sp %p -> %p\n", lxtsd->lxtsd_lx_sp,
1760             lxfp);
1761         lxtsd->lxtsd_lx_sp = lxfp;
1762 
1763         if (newstack) {
1764                 lxtsd->lxtsd_sigaltstack.ss_flags |= LX_SS_ONSTACK;
1765         }
1766 
1767         LX_SIGDELIVER(lx_sig, lxsap, (void *)lxfp);
1768 
1769         /*
1770          * Re-enable signal delivery.  If a signal was queued while we were
1771          * in the critical section, it will be delivered immediately.
1772          */
1773         _sigon();
1774 
1775         /*
1776          * Pass control to the Linux signal handler:
1777          */
1778         lx_debug("lx_sigdeliver: JUMPING TO LINUX (sig %d sp %p eip %p)\n",
1779             lx_sig, lxfp, user_handler);
1780         {
1781                 ucontext_t jump_uc;
1782 
1783                 bcopy(lx_find_brand_uc(), &jump_uc, sizeof (jump_uc));
1784 
1785                 /*
1786                  * We want to load the general registers from this context, and
1787                  * switch to the BRAND stack.  We do _not_ want to restore the
1788                  * uc_link value from this synthetic context, as that would
1789                  * break the signal handling context chain.
1790                  */
1791                 jump_uc.uc_flags = UC_CPU;
1792                 jump_uc.uc_brand_data[0] = (void *)(LX_UC_STACK_BRAND |
1793                     LX_UC_IGNORE_LINK);
1794 
1795                 LX_REG(&jump_uc, REG_FP) = 0;
1796                 LX_REG(&jump_uc, REG_SP) = lxfp;
1797                 LX_REG(&jump_uc, REG_PC) = (uintptr_t)user_handler;
1798 
1799 #if defined(_LP64)
1800                 /*
1801                  * Pass signal handler arguments by registers on AMD64.
1802                  */
1803                 LX_REG(&jump_uc, REG_RDI) = hargs[0];
1804                 LX_REG(&jump_uc, REG_RSI) = hargs[1];
1805                 LX_REG(&jump_uc, REG_RDX) = hargs[2];
1806 #endif
1807 
1808                 lx_jump_to_linux(&jump_uc);
1809         }
1810 
1811         assert(0);
1812         abort();
1813 
1814 after_signal_handler:
1815         /*
1816          * Ensure all nested signal handlers have completed correctly
1817          * and then remove our stack reservation.
1818          */
1819         _sigoff();
1820         LX_SIGNAL_POST_HANDLER(lxfp, old_tsd_sp);
1821         assert(lxtsd->lxtsd_lx_sp == lxfp);
1822         lx_debug("lx_sigdeliver: after; Linux tsd sp %p -> %p\n", lxfp,
1823             old_tsd_sp);
1824         lxtsd->lxtsd_lx_sp = old_tsd_sp;
1825         if (newstack) {
1826                 LX_SIGNAL_ALTSTACK_DISABLE();
1827                 lx_debug("lx_sigdeliver: disabling ALTSTACK sp %p\n", lxfp);
1828                 lxtsd->lxtsd_sigaltstack.ss_flags &= ~LX_SS_ONSTACK;
1829         }
1830         /*
1831          * Restore backup signal tracking chain pointer to previous value:
1832          */
1833         if (lxtsd->lxtsd_sigbackup != NULL) {
1834                 lx_sigbackup_t *bprev = lxtsd->lxtsd_sigbackup->lxsb_previous;
1835 
1836                 lx_debug("lx_sigdeliver: restoring sigbackup %p to %p\n",
1837                     lxtsd->lxtsd_sigbackup, bprev);
1838 
1839                 lxtsd->lxtsd_sigbackup = bprev;
1840         }
1841         _sigon();
1842 
1843         /*
1844          * Here we return to libc so that it may clean up and restore the
1845          * context originally interrupted by this signal.
1846          */
1847 }
1848 
1849 /*
1850  * Common routine to modify sigaction characteristics of a thread.
1851  *
1852  * We shouldn't need any special locking code here as we actually use our copy
1853  * of libc's sigaction() to do all the real work, so its thread locking should
1854  * take care of any issues for us.
1855  */
1856 static int
1857 lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp,
1858     struct lx_sigaction *olxsp)
1859 {
1860         struct lx_sigaction *lxsap;
1861         struct sigaction sa;
1862 
1863         if (lx_sig <= 0 || lx_sig > LX_NSIG)
1864                 return (-EINVAL);
1865 
1866         lxsap = &lx_sighandlers.lx_sa[lx_sig];
1867         lx_debug("&lx_sighandlers.lx_sa[%d] = 0x%p", lx_sig, lxsap);
1868 
1869         if ((olxsp != NULL) &&
1870             ((uucopy(lxsap, olxsp, sizeof (struct lx_sigaction))) != 0))
1871                 return (-errno);
1872 
1873         if (lxsp != NULL) {
1874                 int err, sig;
1875                 struct lx_sigaction lxsa;
1876                 sigset_t new_set, oset;
1877 
1878                 if (uucopy(lxsp, &lxsa, sizeof (struct lx_sigaction)) != 0)
1879                         return (-errno);
1880 
1881                 if ((sig = ltos_signo[lx_sig]) != -1) {
1882                         if (lx_no_abort_handler != 0) {
1883                                 /*
1884                                  * If LX_NO_ABORT_HANDLER has been set, we will
1885                                  * not allow the emulated program to do
1886                                  * anything hamfisted with SIGSEGV or SIGABRT
1887                                  * signals.
1888                                  */
1889                                 if (sig == SIGSEGV || sig == SIGABRT) {
1890                                         return (0);
1891                                 }
1892                         }
1893 
1894                         /*
1895                          * Block this signal while messing with its dispostion
1896                          */
1897                         (void) sigemptyset(&new_set);
1898                         (void) sigaddset(&new_set, sig);
1899 
1900                         if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0) {
1901                                 err = errno;
1902                                 lx_debug("unable to block signal %d: %s", sig,
1903                                     strerror(err));
1904                                 return (-err);
1905                         }
1906 
1907                         /*
1908                          * We don't really need the old signal disposition at
1909                          * this point, but this weeds out signals that would
1910                          * cause sigaction() to return an error before we change
1911                          * anything other than the current signal mask.
1912                          */
1913                         if (sigaction(sig, NULL, &sa) < 0) {
1914                                 err = errno;
1915                                 lx_debug("sigaction() to get old "
1916                                     "disposition for signal %d failed: "
1917                                     "%s", sig, strerror(err));
1918                                 (void) sigprocmask(SIG_SETMASK, &oset, NULL);
1919                                 return (-err);
1920                         }
1921 
1922                         if ((lxsa.lxsa_handler != SIG_DFL) &&
1923                             (lxsa.lxsa_handler != SIG_IGN)) {
1924                                 sa.sa_handler = lx_call_user_handler;
1925 
1926                                 /*
1927                                  * The interposition signal handler needs the
1928                                  * information provided via the SA_SIGINFO flag.
1929                                  */
1930                                 sa.sa_flags = SA_SIGINFO;
1931 
1932                                 /*
1933                                  * When translating from Linux to illumos
1934                                  * sigaction(2) flags, we explicitly do not
1935                                  * pass SA_ONSTACK to the kernel.  The
1936                                  * alternate stack for Linux signal handling is
1937                                  * handled entirely by the emulation code.
1938                                  */
1939                                 if (lxsa.lxsa_flags & LX_SA_NOCLDSTOP)
1940                                         sa.sa_flags |= SA_NOCLDSTOP;
1941                                 if (lxsa.lxsa_flags & LX_SA_NOCLDWAIT)
1942                                         sa.sa_flags |= SA_NOCLDWAIT;
1943                                 if (lxsa.lxsa_flags & LX_SA_RESTART)
1944                                         sa.sa_flags |= SA_RESTART;
1945                                 if (lxsa.lxsa_flags & LX_SA_NODEFER)
1946                                         sa.sa_flags |= SA_NODEFER;
1947 
1948                                 /*
1949                                  * RESETHAND cannot be used be passed through
1950                                  * for SIGPWR due to different default actions
1951                                  * between Linux and Illumos.
1952                                  */
1953                                 if ((sig != SIGPWR) &&
1954                                     (lxsa.lxsa_flags & LX_SA_RESETHAND))
1955                                         sa.sa_flags |= SA_RESETHAND;
1956 
1957                                 if (ltos_sigset(&lxsa.lxsa_mask,
1958                                     &sa.sa_mask) != 0) {
1959                                         err = errno;
1960                                         (void) sigprocmask(SIG_SETMASK, &oset,
1961                                             NULL);
1962                                         return (-err);
1963                                 }
1964 
1965                                 lx_debug("interposing handler @ 0x%p for "
1966                                     "signal %d (lx %d), flags 0x%x",
1967                                     lxsa.lxsa_handler, sig, lx_sig,
1968                                     lxsa.lxsa_flags);
1969 
1970                                 if (sigaction(sig, &sa, NULL) < 0) {
1971                                         err = errno;
1972                                         lx_debug("sigaction() to set new "
1973                                             "disposition for signal %d failed: "
1974                                             "%s", sig, strerror(err));
1975                                         (void) sigprocmask(SIG_SETMASK, &oset,
1976                                             NULL);
1977                                         return (-err);
1978                                 }
1979                         } else if ((sig != SIGPWR) ||
1980                             ((sig == SIGPWR) &&
1981                             (lxsa.lxsa_handler == SIG_IGN))) {
1982                                 /*
1983                                  * There's no need to interpose for SIG_DFL or
1984                                  * SIG_IGN so just call our copy of libc's
1985                                  * sigaction(), but don't allow SIG_DFL for
1986                                  * SIGPWR due to differing default actions
1987                                  * between Linux and Illumos.
1988                                  *
1989                                  * Get the previous disposition first so things
1990                                  * like sa_mask and sa_flags are preserved over
1991                                  * a transition to SIG_DFL or SIG_IGN, which is
1992                                  * what Linux expects.
1993                                  */
1994 
1995                                 sa.sa_handler = lxsa.lxsa_handler;
1996 
1997                                 if (sigaction(sig, &sa, NULL) < 0) {
1998                                         err = errno;
1999                                         lx_debug("sigaction(%d, %s) failed: %s",
2000                                             sig, ((sa.sa_handler == SIG_DFL) ?
2001                                             "SIG_DFL" : "SIG_IGN"),
2002                                             strerror(err));
2003                                         (void) sigprocmask(SIG_SETMASK, &oset,
2004                                             NULL);
2005                                         return (-err);
2006                                 }
2007                         }
2008                 } else {
2009                         lx_debug("Linux signal with no kill support "
2010                             "specified: %d", lx_sig);
2011                 }
2012 
2013                 /*
2014                  * Save the new disposition for the signal in the global
2015                  * lx_sighandlers structure.
2016                  */
2017                 bcopy(&lxsa, lxsap, sizeof (struct lx_sigaction));
2018 
2019                 /*
2020                  * Reset the signal mask to what we came in with if
2021                  * we were modifying a kill-supported signal.
2022                  */
2023                 if (sig != -1)
2024                         (void) sigprocmask(SIG_SETMASK, &oset, NULL);
2025         }
2026 
2027         return (0);
2028 }
2029 
2030 #if defined(_ILP32)
2031 /*
2032  * sigaction is only used in 32-bit code.
2033  */
2034 long
2035 lx_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp)
2036 {
2037         int val;
2038         struct lx_sigaction sa, osa;
2039         struct lx_sigaction *sap, *osap;
2040         struct lx_osigaction *osp;
2041 
2042         sap = (actp ? &sa : NULL);
2043         osap = (oactp ? &osa : NULL);
2044 
2045         /*
2046          * If we have a source pointer, convert source lxsa_mask from
2047          * lx_osigset_t to lx_sigset_t format.
2048          */
2049         if (sap) {
2050                 osp = (struct lx_osigaction *)actp;
2051                 sap->lxsa_handler = osp->lxsa_handler;
2052 
2053                 bzero(&sap->lxsa_mask, sizeof (lx_sigset_t));
2054 
2055                 for (val = 1; val <= OSIGSET_NBITS; val++)
2056                         if (osp->lxsa_mask & OSIGSET_BITSET(val))
2057                                 (void) lx_sigaddset(&sap->lxsa_mask, val);
2058 
2059                 sap->lxsa_flags = osp->lxsa_flags;
2060                 sap->lxsa_restorer = osp->lxsa_restorer;
2061         }
2062 
2063         if ((val = lx_sigaction_common(lx_sig, sap, osap)))
2064                 return (val);
2065 
2066         /*
2067          * If we have a save pointer, convert the old lxsa_mask from
2068          * lx_sigset_t to lx_osigset_t format.
2069          */
2070         if (osap) {
2071                 osp = (struct lx_osigaction *)oactp;
2072 
2073                 osp->lxsa_handler = osap->lxsa_handler;
2074 
2075                 bzero(&osp->lxsa_mask, sizeof (osp->lxsa_mask));
2076                 for (val = 1; val <= OSIGSET_NBITS; val++)
2077                         if (lx_sigismember(&osap->lxsa_mask, val))
2078                                 osp->lxsa_mask |= OSIGSET_BITSET(val);
2079 
2080                 osp->lxsa_flags = osap->lxsa_flags;
2081                 osp->lxsa_restorer = osap->lxsa_restorer;
2082         }
2083 
2084         return (0);
2085 }
2086 #endif
2087 
2088 long
2089 lx_rt_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp,
2090     uintptr_t setsize)
2091 {
2092         /*
2093          * The "new" rt_sigaction call checks the setsize
2094          * parameter.
2095          */
2096         if ((size_t)setsize != sizeof (lx_sigset_t))
2097                 return (-EINVAL);
2098 
2099         return (lx_sigaction_common(lx_sig, (struct lx_sigaction *)actp,
2100             (struct lx_sigaction *)oactp));
2101 }
2102 
2103 #if defined(_ILP32)
2104 /*
2105  * Convert signal syscall to a call to the lx_sigaction() syscall
2106  * Only used in 32-bit code.
2107  */
2108 long
2109 lx_signal(uintptr_t lx_sig, uintptr_t handler)
2110 {
2111         struct sigaction act;
2112         struct sigaction oact;
2113         int rc;
2114 
2115         /*
2116          * Use sigaction to mimic SYSV signal() behavior; glibc will
2117          * actually call sigaction(2) itself, so we're really reaching
2118          * back for signal(2) semantics here.
2119          */
2120         bzero(&act, sizeof (act));
2121         act.sa_handler = (void (*)())handler;
2122         act.sa_flags = SA_RESETHAND | SA_NODEFER;
2123 
2124         rc = lx_sigaction(lx_sig, (uintptr_t)&act, (uintptr_t)&oact);
2125         return ((rc == 0) ? ((ssize_t)oact.sa_handler) : rc);
2126 }
2127 #endif
2128 
2129 void
2130 lx_sighandlers_save(lx_sighandlers_t *saved)
2131 {
2132         bcopy(&lx_sighandlers, saved, sizeof (lx_sighandlers_t));
2133 }
2134 
2135 void
2136 lx_sighandlers_restore(lx_sighandlers_t *saved)
2137 {
2138         bcopy(saved, &lx_sighandlers, sizeof (lx_sighandlers_t));
2139 }
2140 
2141 int
2142 lx_siginit(void)
2143 {
2144         extern void set_setcontext_enforcement(int);
2145         extern void set_escaped_context_cleanup(int);
2146 
2147         struct sigaction sa;
2148         sigset_t new_set, oset;
2149         int lx_sig, sig;
2150 
2151         if (getenv("LX_NO_ABORT_HANDLER") != NULL) {
2152                 lx_no_abort_handler = 1;
2153         }
2154 
2155         /*
2156          * Block all signals possible while setting up the signal imposition
2157          * mechanism.
2158          */
2159         (void) sigfillset(&new_set);
2160 
2161         if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0)
2162                 lx_err_fatal("unable to block signals while setting up "
2163                     "imposition mechanism: %s", strerror(errno));
2164 
2165         /*
2166          * Ignore any signals that have no Linux analog so that those
2167          * signals cannot be sent to Linux processes from the global zone
2168          */
2169         for (sig = 1; sig < NSIG; sig++)
2170                 if (stol_signo[sig] < 0)
2171                         (void) sigignore(sig);
2172 
2173         /*
2174          * Mark any signals that are ignored as ignored in our interposition
2175          * handler array
2176          */
2177         for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) {
2178                 if (((sig = ltos_signo[lx_sig]) != -1) &&
2179                     (sigaction(sig, NULL, &sa) < 0))
2180                         lx_err_fatal("unable to determine previous disposition "
2181                             "for signal %d: %s", sig, strerror(errno));
2182 
2183                 if (sa.sa_handler == SIG_IGN) {
2184                         lx_debug("marking signal %d (lx %d) as SIG_IGN",
2185                             sig, lx_sig);
2186                         lx_sighandlers.lx_sa[lx_sig].lxsa_handler = SIG_IGN;
2187                 }
2188         }
2189 
2190         /*
2191          * Have our interposition handler handle SIGPWR to start with,
2192          * as it has a default action of terminating the process in Linux
2193          * but its default is to be ignored in Illumos.
2194          */
2195         (void) sigemptyset(&sa.sa_mask);
2196         sa.sa_sigaction = lx_call_user_handler;
2197         sa.sa_flags = SA_SIGINFO;
2198 
2199         if (sigaction(SIGPWR, &sa, NULL) < 0)
2200                 lx_err_fatal("sigaction(SIGPWR) failed: %s", strerror(errno));
2201 
2202         /*
2203          * Illumos' libc forces certain register values in the ucontext_t
2204          * used to restore a post-signal user context to be those Illumos
2205          * expects; however that is not what we want to happen if the signal
2206          * was taken while branded code was executing, so we must disable
2207          * that behavior.
2208          */
2209         set_setcontext_enforcement(0);
2210 
2211         /*
2212          * The illumos libc attempts to clean up dangling uc_link pointers in
2213          * signal handling contexts when libc believes us to have escaped a
2214          * signal handler incorrectly in the past.  We want to disable this
2215          * behaviour, so that the system call emulation context saved by the
2216          * kernel brand module for lx_emulate() may be part of the context
2217          * chain without itself being used for signal handling.
2218          */
2219         set_escaped_context_cleanup(0);
2220 
2221         /*
2222          * Reset the signal mask to what we came in with.
2223          */
2224         (void) sigprocmask(SIG_SETMASK, &oset, NULL);
2225 
2226         lx_debug("interposition handler setup for SIGPWR");
2227         return (0);
2228 }
2229 
2230 /*
2231  * This code strongly resembles lx_poll(), but is here to be able to take
2232  * advantage of the Linux signal helper routines.
2233  */
2234 long
2235 lx_ppoll(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, uintptr_t p5)
2236 {
2237         struct pollfd   *lfds, *sfds;
2238         nfds_t          nfds = (nfds_t)p2;
2239         timespec_t      ts, *tsp = NULL;
2240         int             fds_size, i, rval, revents;
2241         lx_sigset_t     lxsig, *lxsigp = NULL;
2242         sigset_t        sigset, *sp = NULL;
2243         rctlblk_t       *rblk;
2244 
2245         lx_debug("\tppoll(0x%p, %d, 0x%p, 0x%p, %d)", p1, p2, p3, p4, p5);
2246 
2247         if (p3 != NULL) {
2248                 if (uucopy((void *)p3, &ts, sizeof (ts)) != 0)
2249                         return (-errno);
2250 
2251                 tsp = &ts;
2252         }
2253 
2254         if (p4 != NULL) {
2255                 if (uucopy((void *)p4, &lxsig, sizeof (lxsig)) != 0)
2256                         return (-errno);
2257 
2258                 lxsigp = &lxsig;
2259                 if ((size_t)p5 != sizeof (lx_sigset_t))
2260                         return (-EINVAL);
2261 
2262                 if (lxsigp) {
2263                         if ((rval = ltos_sigset(lxsigp, &sigset)) != 0)
2264                                 return (rval);
2265 
2266                         sp = &sigset;
2267                 }
2268         }
2269 
2270         /*
2271          * Deal with the NULL fds[] case.
2272          */
2273         if (nfds == 0 || p1 == NULL) {
2274                 if ((rval = ppoll(NULL, 0, tsp, sp)) < 0)
2275                         return (-errno);
2276 
2277                 return (rval);
2278         }
2279 
2280         if (maxfd == 0) {
2281                 if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rctlblk_size())) == NULL)
2282                         return (-ENOMEM);
2283 
2284                 if (getrctl("process.max-file-descriptor", NULL, rblk,
2285                     RCTL_FIRST) == -1)
2286                         return (-EINVAL);
2287 
2288                 maxfd = rctlblk_get_value(rblk);
2289         }
2290 
2291         if (nfds > maxfd)
2292                 return (-EINVAL);
2293 
2294         /*
2295          * Note: we are assuming that the Linux and Illumos pollfd
2296          * structures are identical.  Copy in the Linux poll structure.
2297          */
2298         fds_size = sizeof (struct pollfd) * nfds;
2299         lfds = (struct pollfd *)SAFE_ALLOCA(fds_size);
2300         if (lfds == NULL)
2301                 return (-ENOMEM);
2302         if (uucopy((void *)p1, lfds, fds_size) != 0)
2303                 return (-errno);
2304 
2305         /*
2306          * The poll system call modifies the poll structures passed in
2307          * so we'll need to make an extra copy of them.
2308          */
2309         sfds = (struct pollfd *)SAFE_ALLOCA(fds_size);
2310         if (sfds == NULL)
2311                 return (-ENOMEM);
2312 
2313         /* Convert the Linux events bitmask into the Illumos equivalent. */
2314         for (i = 0; i < nfds; i++) {
2315                 /*
2316                  * If the caller is polling for an unsupported event, we
2317                  * have to bail out.
2318                  */
2319                 if (lfds[i].events & ~LX_POLL_SUPPORTED_EVENTS) {
2320                         lx_unsupported("unsupported poll events requested: "
2321                             "events=0x%x", lfds[i].events);
2322                         return (-ENOTSUP);
2323                 }
2324 
2325                 sfds[i].fd = lfds[i].fd;
2326                 sfds[i].events = lfds[i].events & LX_POLL_COMMON_EVENTS;
2327                 if (lfds[i].events & LX_POLLWRNORM)
2328                         sfds[i].events |= POLLWRNORM;
2329                 if (lfds[i].events & LX_POLLWRBAND)
2330                         sfds[i].events |= POLLWRBAND;
2331                 if (lfds[i].events & LX_POLLRDHUP)
2332                         sfds[i].events |= POLLRDHUP;
2333                 sfds[i].revents = 0;
2334         }
2335 
2336         if ((rval = ppoll(sfds, nfds, tsp, sp)) < 0)
2337                 return (-errno);
2338 
2339         /* Convert the Illumos revents bitmask into the Linux equivalent */
2340         for (i = 0; i < nfds; i++) {
2341                 revents = sfds[i].revents & LX_POLL_COMMON_EVENTS;
2342                 if (sfds[i].revents & POLLWRBAND)
2343                         revents |= LX_POLLWRBAND;
2344                 if (sfds[i].revents & POLLRDHUP)
2345                         revents |= LX_POLLRDHUP;
2346 
2347                 /*
2348                  * Be careful because on Illumos POLLOUT and POLLWRNORM
2349                  * are defined to the same values but on Linux they
2350                  * are not.
2351                  */
2352                 if (sfds[i].revents & POLLOUT) {
2353                         if ((lfds[i].events & LX_POLLOUT) == 0)
2354                                 revents &= ~LX_POLLOUT;
2355                         if (lfds[i].events & LX_POLLWRNORM)
2356                                 revents |= LX_POLLWRNORM;
2357                 }
2358 
2359                 lfds[i].revents = revents;
2360         }
2361 
2362         /* Copy out the results */
2363         if (uucopy(lfds, (void *)p1, fds_size) != 0)
2364                 return (-errno);
2365 
2366         return (rval);
2367 }
2368 
2369 /*
2370  * This code stongly resemebles lx_select(), but is here to be able to take
2371  * advantage of the Linux signal helper routines.
2372  */
2373 long
2374 lx_pselect6(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
2375         uintptr_t p5, uintptr_t p6)
2376 {
2377         int nfds = (int)p1;
2378         fd_set *rfdsp = NULL;
2379         fd_set *wfdsp = NULL;
2380         fd_set *efdsp = NULL;
2381         timespec_t ts, *tsp = NULL;
2382         int fd_set_len = howmany(nfds, 8);
2383         int r;
2384         sigset_t sigset, *sp = NULL;
2385 
2386         lx_debug("\tpselect6(%d, 0x%p, 0x%p, 0x%p, 0x%p, 0x%p)",
2387             p1, p2, p3, p4, p4, p6);
2388 
2389         if (nfds > 0) {
2390                 if (p2 != NULL) {
2391                         rfdsp = SAFE_ALLOCA(fd_set_len);
2392                         if (rfdsp == NULL)
2393                                 return (-ENOMEM);
2394                         if (uucopy((void *)p2, rfdsp, fd_set_len) != 0)
2395                                 return (-errno);
2396                 }
2397                 if (p3 != NULL) {
2398                         wfdsp = SAFE_ALLOCA(fd_set_len);
2399                         if (wfdsp == NULL)
2400                                 return (-ENOMEM);
2401                         if (uucopy((void *)p3, wfdsp, fd_set_len) != 0)
2402                                 return (-errno);
2403                 }
2404                 if (p4 != NULL) {
2405                         efdsp = SAFE_ALLOCA(fd_set_len);
2406                         if (efdsp == NULL)
2407                                 return (-ENOMEM);
2408                         if (uucopy((void *)p4, efdsp, fd_set_len) != 0)
2409                                 return (-errno);
2410                 }
2411         }
2412 
2413         if (p5 != NULL) {
2414                 if (uucopy((void *)p5, &ts, sizeof (ts)) != 0)
2415                         return (-errno);
2416 
2417                 tsp = &ts;
2418         }
2419 
2420         if (p6 != NULL) {
2421                 /*
2422                  * To force the number of arguments to be no more than six,
2423                  * Linux bundles both the sigset and the size into a structure
2424                  * that becomes the sixth argument.
2425                  */
2426                 struct {
2427                         lx_sigset_t *addr;
2428                         size_t size;
2429                 } lx_sigset;
2430 
2431                 if (uucopy((void *)p6, &lx_sigset, sizeof (lx_sigset)) != 0)
2432                         return (-errno);
2433 
2434                 /*
2435                  * Yes, that's right:  Linux forces a size to be passed only
2436                  * so it can check that it's the size of a sigset_t.
2437                  */
2438                 if (lx_sigset.size != sizeof (lx_sigset_t))
2439                         return (-EINVAL);
2440 
2441                 /*
2442                  * This is where we check if the sigset is *really* NULL.
2443                  */
2444                 if (lx_sigset.addr) {
2445                         if ((r = ltos_sigset(lx_sigset.addr, &sigset)) != 0)
2446                                 return (r);
2447 
2448                         sp = &sigset;
2449                 }
2450         }
2451 
2452 #if defined(_LP64)
2453         r = pselect(nfds, rfdsp, wfdsp, efdsp, tsp, sp);
2454 #else
2455         if (nfds >= FD_SETSIZE)
2456                 r = pselect_large_fdset(nfds, rfdsp, wfdsp, efdsp, tsp, sp);
2457         else
2458                 r = pselect(nfds, rfdsp, wfdsp, efdsp, tsp, sp);
2459 #endif
2460 
2461         if (r < 0)
2462                 return (-errno);
2463 
2464         /*
2465          * For pselect6(), we don't honor the strange Linux select() semantics
2466          * with respect to the timestruc parameter because glibc ignores it
2467          * anyway -- just copy out the fd pointers and return.
2468          */
2469         if ((rfdsp != NULL) && (uucopy(rfdsp, (void *)p2, fd_set_len) != 0))
2470                 return (-errno);
2471         if ((wfdsp != NULL) && (uucopy(wfdsp, (void *)p3, fd_set_len) != 0))
2472                 return (-errno);
2473         if ((efdsp != NULL) && (uucopy(efdsp, (void *)p4, fd_set_len) != 0))
2474                 return (-errno);
2475 
2476         return (r);
2477 }
2478 
2479 /*
2480  * The first argument is the pid (Linux tgid) to send the signal to, second
2481  * argument is the signal to send (an lx signal), and third is the siginfo_t
2482  * with extra information. We translate the code and signal only from the
2483  * siginfo_t, and leave everything else the same as it gets passed through the
2484  * signalling system. This is enough to get sigqueue working. See Linux man
2485  * page rt_sigqueueinfo(2).
2486  */
2487 long
2488 lx_rt_sigqueueinfo(uintptr_t p1, uintptr_t p2, uintptr_t p3)
2489 {
2490         pid_t tgid = (pid_t)p1;
2491         int lx_sig = (int)p2;
2492         int sig;
2493         lx_siginfo_t lx_siginfo;
2494         siginfo_t siginfo;
2495         int s_code;
2496         pid_t s_pid;
2497 
2498         if (uucopy((void *)p3, &lx_siginfo, sizeof (lx_siginfo_t)) != 0)
2499                 return (-EFAULT);
2500         s_code = ltos_sigcode(lx_siginfo.lsi_code);
2501         if (s_code == LX_SI_CODE_NOT_EXIST)
2502                 return (-EINVAL);
2503         if (lx_sig < 0 || lx_sig > LX_NSIG || (sig = ltos_signo[lx_sig]) < 0) {
2504                 return (-EINVAL);
2505         }
2506         /*
2507          * This case (when trying to kill pid 0) just has a different errno
2508          * returned in illumos than in Linux.
2509          */
2510         if (tgid == 0)
2511                 return (-ESRCH);
2512         if (lx_lpid_to_spid(tgid, &s_pid) != 0)
2513                 return (-ESRCH);
2514         if (SI_CANQUEUE(s_code)) {
2515                 return ((syscall(SYS_sigqueue, s_pid, sig,
2516                     lx_siginfo.lsi_value, s_code, 0) == -1) ?
2517                     (-errno): 0);
2518         } else {
2519                 /*
2520                  * This case is unlikely, as the main entry point is through
2521                  * sigqueue, which always has a queuable si_code.
2522                  */
2523                 siginfo.si_signo = sig;
2524                 siginfo.si_code = s_code;
2525                 siginfo.si_pid = lx_siginfo.lsi_pid;
2526                 siginfo.si_value = lx_siginfo.lsi_value;
2527                 siginfo.si_uid = lx_siginfo.lsi_uid;
2528                 return ((syscall(SYS_brand, B_HELPER_SIGQUEUE,
2529                     tgid, sig, &siginfo)) ? (-errno) : 0);
2530         }
2531 }
2532 
2533 /*
2534  * Adds an additional argument for which thread within a thread group to send
2535  * the signal to (added as the second argument).
2536  */
2537 long
2538 lx_rt_tgsigqueueinfo(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
2539 {
2540         pid_t tgid = (pid_t)p1;
2541         pid_t tid = (pid_t)p2;
2542         int lx_sig = (int)p3;
2543         int sig;
2544         lx_siginfo_t lx_siginfo;
2545         siginfo_t siginfo;
2546         int si_code;
2547 
2548         if (uucopy((void *)p4, &lx_siginfo, sizeof (lx_siginfo_t)) != 0)
2549                 return (-EFAULT);
2550         if (lx_sig < 0 || lx_sig > LX_NSIG || (sig = ltos_signo[lx_sig]) < 0) {
2551                 return (-EINVAL);
2552         }
2553         si_code = ltos_sigcode(lx_siginfo.lsi_code);
2554         if (si_code == LX_SI_CODE_NOT_EXIST)
2555                 return (-EINVAL);
2556         /*
2557          * Check for invalid tgid and tids. That appears to be only negatives
2558          * and 0 values. Everything else that doesn't exist is instead ESRCH.
2559          */
2560         if (tgid <= 0 || tid <= 0)
2561                 return (-EINVAL);
2562         siginfo.si_signo = sig;
2563         siginfo.si_code = si_code;
2564         siginfo.si_pid = lx_siginfo.lsi_pid;
2565         siginfo.si_value = lx_siginfo.lsi_value;
2566         siginfo.si_uid = lx_siginfo.lsi_uid;
2567 
2568         return ((syscall(SYS_brand, B_HELPER_TGSIGQUEUE, tgid, tid, sig,
2569             &siginfo)) ? (-errno) : 0);
2570 }
2571 
2572 long
2573 lx_signalfd(int fd, uintptr_t mask, size_t msize)
2574 {
2575         return (lx_signalfd4(fd, mask, msize, 0));
2576 }
2577 
2578 long
2579 lx_signalfd4(int fd, uintptr_t mask, size_t msize, int flags)
2580 {
2581         sigset_t s_set;
2582         int r;
2583 
2584         if (msize != sizeof (int64_t))
2585                 return (-EINVAL);
2586 
2587         if (ltos_sigset((lx_sigset_t *)mask, &s_set) != 0)
2588                 return (-errno);
2589 
2590         r = signalfd(fd, &s_set, flags);
2591 
2592         /*
2593          * signalfd(3C) may fail with ENOENT if /dev/signalfd is not available.
2594          * It is less jarring to Linux programs to tell them that internal
2595          * allocation failed than to report an error number they are not
2596          * expecting.
2597          */
2598         if (r == -1 && errno == ENOENT)
2599                 return (-ENODEV);
2600 
2601         return (r == -1 ? -errno : r);
2602 }