1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2015 Joyent, Inc. All rights reserved.
  29  */
  30 
  31 #include <sys/types.h>
  32 #include <sys/param.h>
  33 #include <sys/segments.h>
  34 #include <sys/lx_types.h>
  35 #include <sys/lx_brand.h>
  36 #include <sys/lx_misc.h>
  37 #include <sys/lx_debug.h>
  38 #include <sys/lx_poll.h>
  39 #include <sys/lx_signal.h>
  40 #include <sys/lx_sigstack.h>
  41 #include <sys/lx_syscall.h>
  42 #include <sys/lx_thread.h>
  43 #include <sys/syscall.h>
  44 #include <lx_provider_impl.h>
  45 #include <sys/stack.h>
  46 #include <assert.h>
  47 #include <errno.h>
  48 #include <poll.h>
  49 #include <rctl.h>
  50 #include <signal.h>
  51 #include <stdlib.h>
  52 #include <string.h>
  53 #include <strings.h>
  54 #include <thread.h>
  55 #include <ucontext.h>
  56 #include <unistd.h>
  57 #include <stdio.h>
  58 #include <libintl.h>
  59 #include <ieeefp.h>
  60 #include <sys/signalfd.h>
  61 
  62 #if defined(_ILP32)
  63 extern int pselect_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
  64         const timespec_t *tsp, const sigset_t *sp);
  65 #endif
  66 
  67 #define MIN(a, b)       ((a) < (b) ? (a) : (b))
  68 
  69 /*
  70  * Delivering signals to a Linux process is complicated by differences in
  71  * signal numbering, stack structure and contents, and the action taken when a
  72  * signal handler exits.  In addition, many signal-related structures, such as
  73  * sigset_ts, vary between Illumos and Linux.
  74  *
  75  * To support user-level signal handlers, the brand uses a double layer of
  76  * indirection to process and deliver signals to branded threads.
  77  *
  78  * When a Linux process sends a signal using the kill(2) system call, we must
  79  * translate the signal into the Illumos equivalent before handing control off
  80  * to the standard signalling mechanism.  When a signal is delivered to a Linux
  81  * process, we translate the signal number from Illumos to back to Linux.
  82  * Translating signals both at generation and delivery time ensures both that
  83  * Illumos signals are sent properly to Linux applications and that signals'
  84  * default behavior works as expected.
  85  *
  86  * In a normal Illumos process, signal delivery is interposed on for any thread
  87  * registering a signal handler by libc. Libc needs to do various bits of magic
  88  * to provide thread-safe critical regions, so it registers its own handler,
  89  * named sigacthandler(), using the sigaction(2) system call. When a signal is
  90  * received, sigacthandler() is called, and after some processing, libc turns
  91  * around and calls the user's signal handler via a routine named
  92  * call_user_handler().
  93  *
  94  * Adding a Linux branded thread to the mix complicates things somewhat.
  95  *
  96  * First, when a thread receives a signal, it may either be running in an
  97  * emulated Linux context or a native illumos context.  In either case, the
  98  * in-kernel brand module is responsible for preserving the register state
  99  * from the interrupted context, regardless of whether emulated or native
 100  * software was running at the time.  The kernel is also responsible for
 101  * ensuring that the illumos native sigacthandler() is called with register
 102  * values appropriate for native code.  Of particular note is the %gs segment
 103  * selector for 32-bit code, and the %fsbase segment base register for 64-bit
 104  * code; these are used by libc to locate per-thread data structures.
 105  *
 106  * Second, the signal number translation referenced above must take place.
 107  * Finally, when we hand control to the Linux signal handler we must do so
 108  * on the brand stack, and with registers configured appropriately for the
 109  * Linux application.
 110  *
 111  * This need to translate signal numbers (and manipulate the signal handling
 112  * context) means that with standard Illumos libc, following a signal from
 113  * generation to delivery looks something like:
 114  *
 115  *      kernel ->
 116  *          sigacthandler() ->
 117  *              call_user_handler() ->
 118  *                  user signal handler
 119  *
 120  * but for the brand's Linux threads, this would look like:
 121  *
 122  *      kernel ->
 123  *          sigacthandler() ->
 124  *              call_user_handler() ->
 125  *                  lx_call_user_handler() ->
 126  *                      lx_sigdeliver() ->
 127  *                          syscall(B_JUMP_TO_LINUX, ...) ->
 128  *                              Linux user signal handler
 129  *
 130  * The new addtions are:
 131  *
 132  *      lx_call_user_handler
 133  *      ====================
 134  *      This routine is responsible for translating Illumos signal numbers to
 135  *      their Linux equivalents, building a Linux signal stack based on the
 136  *      information Illumos has provided, and passing the stack to the
 137  *      registered Linux signal handler. It is, in effect, the Linux thread
 138  *      equivalent to libc's call_user_handler().
 139  *
 140  *      lx_sigdeliver
 141  *      =============
 142  *
 143  * Note that none of this interposition is necessary unless a Linux thread
 144  * registers a user signal handler, as the default action for all signals is the
 145  * same between Illumos and Linux save for one signal, SIGPWR.  For this reason,
 146  * the brand ALWAYS installs its own internal signal handler for SIGPWR that
 147  * translates the action to the Linux default, to terminate the process.
 148  * (Illumos' default action is to ignore SIGPWR.)
 149  *
 150  * It is also important to note that when signals are not translated, the brand
 151  * relies upon code interposing upon the wait(2) system call to translate
 152  * signals to their proper values for any Linux threads retrieving the status
 153  * of others.  So while the Illumos signal number for a particular signal is set
 154  * in a process' data structures (and would be returned as the result of say,
 155  * WTERMSIG()), the brand's interposiiton upon wait(2) is responsible for
 156  * translating the value WTERMSIG() would return from a Illumos signal number
 157  * to the appropriate Linux value.
 158  *
 159  * lx_call_user_handler() calls lx_sigdeliver() with a helper function
 160  * (typically lx_build_signal_frame) which builds a stack frame for the 32-bit
 161  * Linux signal handler, or populates a local (on the stack) structure for the
 162  * 64-bit Linux signal handler. The stack at that time looks like this:
 163  *
 164  *      =========================================================
 165  * |    | lx_sigdeliver_frame_t -- includes LX_SIGRT_MAGIC and  |
 166  * |    | a return context for the eventual sigreturn(2) call   |
 167  * |    =========================================================
 168  * |    | Linux signal frame (32-bit) or local data             |
 169  * V    | (64-bit) built by stack_builder()                     |
 170  *      =========================================================
 171  *
 172  * The process of returning to an interrupted thread of execution from a user
 173  * signal handler is entirely different between Illumos and Linux.  While
 174  * Illumos generally expects to set the context to the interrupted one on a
 175  * normal return from a signal handler, in the normal case Linux instead calls
 176  * code that calls a specific Linux system call, rt_sigreturn(2) (or it also
 177  * can call sigreturn(2) in 32-bit code).  Thus when a Linux signal handler
 178  * completes execution, instead of returning through what would in libc be a
 179  * call to setcontext(2), the rt_sigreturn(2) Linux system call is responsible
 180  * for accomplishing much the same thing. It's for this reason that the stack
 181  * frame we build has the lx_(rt_)sigreturn_tramp code on the top of the
 182  * stack.  The code looks like this:
 183  *
 184  *      32-bit                                  64-bit
 185  *      --------------------------------        -----------------------------
 186  *      mov LX_SYS_rt_sigreturn, %eax           movq LX_SYS_rt_sigreturn, %rax
 187  *      int $0x80                               syscall
 188  *
 189  * We also use these same functions (lx_rt_sigreturn_tramp or
 190  * lx_sigreturn_tramp) to actually return from the signal handler.
 191  *
 192  * (Note that this trampoline code actually lives in a proper executable segment
 193  * and not on the stack, but gdb checks for the exact code sequence of the
 194  * trampoline code on the stack to determine whether it is in a signal stack
 195  * frame or not.  Really.)
 196  *
 197  * When the 32-bit Linux user signal handler is eventually called, the brand
 198  * stack frame looks like this (in the case of a "modern" signal stack; see
 199  * the lx_sigstack structure definition):
 200  *
 201  *      =========================================================
 202  * |    | lx_sigdeliver_frame_t                                 |
 203  * |    =========================================================
 204  * |    | Trampoline code (marker for gdb, not really executed) |
 205  * |    =========================================================
 206  * |    | Linux struct _fpstate                                 |
 207  * |    =========================================================
 208  * V    | Linux ucontext_t                                      | <--+
 209  *      =========================================================    |
 210  *      | Linux siginfo_t                                       | <--|-----+
 211  *      =========================================================    |     |
 212  *      | Pointer to Linux ucontext_t (or NULL) (sigaction arg2)| ---+     |
 213  *      =========================================================          |
 214  *      | Pointer to Linux siginfo_t (or NULL)  (sigaction arg1)| ---------+
 215  *      =========================================================
 216  *      | Linux signal number                   (sigaction arg0)|
 217  *      =========================================================
 218  *      | Pointer to signal return code (trampoline code)       |
 219  *      =========================================================
 220  *
 221  * The 64-bit stack-local data looks like this:
 222  *
 223  *      =========================================================
 224  * |    | lx_sigdeliver_frame_t                                 |
 225  * |    =========================================================
 226  * |    | Trampoline code (marker for gdb, not really executed) |
 227  * |    =========================================================
 228  * |    | Linux struct _fpstate                                 |
 229  * |    =========================================================
 230  * V    | Linux ucontext_t                                      | %rdx arg2
 231  *      =========================================================
 232  *      | Linux siginfo_t                                       | %rsi arg1
 233  *      =========================================================
 234  *      | Pointer to signal return code (trampoline code)       |
 235  *      =========================================================
 236  *
 237  * As usual in 64-bit code, %rdi is arg0 which is the signal number.
 238  *
 239  * The *sigreturn(2) family of emulated system call handlers locates the
 240  * "lx_sigdeliver_frame_t" struct on the Linux stack as part of processing
 241  * the system call.  This object contains a guard value (LX_SIGRT_MAGIC) to
 242  * detect stack smashing or an incorrect stack pointer.  It also contains a
 243  * "return" context, which we use to get back to the "lx_sigdeliver()" frame
 244  * on the native stack that originally dispatched to the Linux signal
 245  * handler.  The lx_sigdeliver() function is then able to return to the
 246  * native libc signal handler in the usual way.  This results in a further
 247  * setcontext() back to whatever was running when we took the signal.
 248  *
 249  * There are some edge cases where the "return" context cannot be located
 250  * by inspection of the Linux stack; e.g. if the guard value has been
 251  * corrupted, or the emulated program has relocated parts of the signal
 252  * delivery stack frame.  If this case is detected, a fallback mechanism is
 253  * used to attempt to find the return context.  A chain of "lx_sigbackup_t"
 254  * objects is maintained in signal interposer call frames, with the current
 255  * head stored in the thread-specific "lx_tsd_t".  This mechanism is
 256  * similar in principle to the "lwp_oldcontext" member of the "klwp_t" used
 257  * by the native signal handling infrastructure.  This backup chain is used
 258  * by the sigreturn(2) family of emulated system calls in the event that
 259  * the Linux stack did not correctly reference a return context.
 260  */
 261 
 262 typedef struct lx_sigdeliver_frame {
 263         uintptr_t lxsdf_magic;
 264         ucontext_t *lxsdf_retucp;
 265         ucontext_t *lxsdf_sigucp;
 266         lx_sigbackup_t *lxsdf_sigbackup;
 267 } lx_sigdeliver_frame_t;
 268 
 269 struct lx_oldsigstack {
 270         void (*retaddr)();      /* address of real lx_sigreturn code */
 271         int sig;                /* signal number */
 272         lx_sigcontext_t sigc;   /* saved user context */
 273         lx_fpstate_t fpstate;   /* saved FP state */
 274         int sig_extra;          /* signal mask for signals [32 .. NSIG - 1] */
 275         char trampoline[8];     /* code for trampoline to lx_sigreturn() */
 276 };
 277 
 278 /*
 279  * The lx_sighandlers structure needs to be a global due to the semantics of
 280  * clone().
 281  *
 282  * If CLONE_SIGHAND is set, the calling process and child share signal
 283  * handlers, and if either calls sigaction(2) it should change the behavior
 284  * in the other thread.  Each thread does, however, have its own signal mask
 285  * and set of pending signals.
 286  *
 287  * If CLONE_SIGHAND is not set, the child process should inherit a copy of
 288  * the signal handlers at the time of the clone() but later calls to
 289  * sigaction(2) should only affect the individual thread calling it.
 290  *
 291  * This maps perfectly to a thr_create(3C) thread semantic in the first
 292  * case and a fork(2)-type semantic in the second case.  By making
 293  * lx_sighandlers global, we automatically get the correct behavior.
 294  */
 295 static lx_sighandlers_t lx_sighandlers;
 296 
 297 /*
 298  * Setting LX_NO_ABORT_HANDLER in the environment will prevent the emulated
 299  * Linux program from modifying the signal handling disposition for SIGSEGV or
 300  * SIGABRT.  Useful for debugging programs which fall over themselves to
 301  * prevent useful core files being generated.
 302  */
 303 static int lx_no_abort_handler = 0;
 304 
 305 static void lx_sigdeliver(int, siginfo_t *, ucontext_t *, size_t, void (*)(),
 306     void (*)(), struct lx_sigaction *);
 307 
 308 /*
 309  * Cache result of process.max-file-descriptor to avoid calling getrctl()
 310  * for each lx_ppoll().
 311  */
 312 static rlim_t maxfd = 0;
 313 
 314 /*
 315  * stol_stack() and ltos_stack() convert between Illumos and Linux stack_t
 316  * structures.
 317  *
 318  * These routines are needed because although the two structures have the same
 319  * contents, their contents are declared in a different order, so the content
 320  * of the structures cannot be copied with a simple bcopy().
 321  */
 322 static void
 323 stol_stack(stack_t *fr, lx_stack_t *to)
 324 {
 325         to->ss_sp = fr->ss_sp;
 326         to->ss_flags = fr->ss_flags;
 327         to->ss_size = fr->ss_size;
 328 }
 329 
 330 static void
 331 ltos_stack(lx_stack_t *fr, stack_t *to)
 332 {
 333         to->ss_sp = fr->ss_sp;
 334         to->ss_flags = fr->ss_flags;
 335         to->ss_size = fr->ss_size;
 336 }
 337 
 338 static int
 339 ltos_sigset(lx_sigset_t *lx_sigsetp, sigset_t *s_sigsetp)
 340 {
 341         lx_sigset_t l;
 342         int lx_sig, sig;
 343 
 344         if (uucopy(lx_sigsetp, &l, sizeof (lx_sigset_t)) != 0)
 345                 return (-errno);
 346 
 347         (void) sigemptyset(s_sigsetp);
 348 
 349         for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) {
 350                 if (lx_sigismember(&l, lx_sig) &&
 351                     ((sig = ltos_signo[lx_sig]) > 0))
 352                         (void) sigaddset(s_sigsetp, sig);
 353         }
 354 
 355         return (0);
 356 }
 357 
 358 static int
 359 stol_sigset(sigset_t *s_sigsetp, lx_sigset_t *lx_sigsetp)
 360 {
 361         lx_sigset_t l;
 362         int sig, lx_sig;
 363 
 364         bzero(&l, sizeof (lx_sigset_t));
 365 
 366         for (sig = 1; sig < NSIG; sig++) {
 367                 if (sigismember(s_sigsetp, sig) &&
 368                     ((lx_sig = stol_signo[sig]) > 0))
 369                         lx_sigaddset(&l, lx_sig);
 370         }
 371 
 372         return ((uucopy(&l, lx_sigsetp, sizeof (lx_sigset_t)) != 0)
 373             ? -errno : 0);
 374 }
 375 
 376 #if defined(_ILP32)
 377 static int
 378 ltos_osigset(lx_osigset_t *lx_osigsetp, sigset_t *s_sigsetp)
 379 {
 380         lx_osigset_t lo;
 381         int lx_sig, sig;
 382 
 383         if (uucopy(lx_osigsetp, &lo, sizeof (lx_osigset_t)) != 0)
 384                 return (-errno);
 385 
 386         (void) sigemptyset(s_sigsetp);
 387 
 388         for (lx_sig = 1; lx_sig <= OSIGSET_NBITS; lx_sig++)
 389                 if ((lo & OSIGSET_BITSET(lx_sig)) &&
 390                     ((sig = ltos_signo[lx_sig]) > 0))
 391                         (void) sigaddset(s_sigsetp, sig);
 392 
 393         return (0);
 394 }
 395 
 396 static int
 397 stol_osigset(sigset_t *s_sigsetp, lx_osigset_t *lx_osigsetp)
 398 {
 399         lx_osigset_t lo = 0;
 400         int lx_sig, sig;
 401 
 402         /*
 403          * Note that an lx_osigset_t can only represent the signals from
 404          * [1 .. OSIGSET_NBITS], so even though a signal may be present in the
 405          * Illumos sigset_t, it may not be representable as a bit in the
 406          * lx_osigset_t.
 407          */
 408         for (sig = 1; sig < NSIG; sig++)
 409                 if (sigismember(s_sigsetp, sig) &&
 410                     ((lx_sig = stol_signo[sig]) > 0) &&
 411                     (lx_sig <= OSIGSET_NBITS))
 412                         lo |= OSIGSET_BITSET(lx_sig);
 413 
 414         return ((uucopy(&lo, lx_osigsetp, sizeof (lx_osigset_t)) != 0)
 415             ? -errno : 0);
 416 }
 417 #endif
 418 
 419 static int
 420 ltos_sigcode(int si_code)
 421 {
 422         switch (si_code) {
 423                 case LX_SI_USER:
 424                         return (SI_USER);
 425                 case LX_SI_TKILL:
 426                         return (SI_LWP);
 427                 case LX_SI_QUEUE:
 428                         return (SI_QUEUE);
 429                 case LX_SI_TIMER:
 430                         return (SI_TIMER);
 431                 case LX_SI_ASYNCIO:
 432                         return (SI_ASYNCIO);
 433                 case LX_SI_MESGQ:
 434                         return (SI_MESGQ);
 435                 default:
 436                         return (LX_SI_CODE_NOT_EXIST);
 437         }
 438 }
 439 
 440 int
 441 stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop)
 442 {
 443         int ret = 0;
 444         lx_siginfo_t lx_siginfo;
 445 
 446         bzero(&lx_siginfo, sizeof (*lx_siginfop));
 447 
 448         if ((lx_siginfo.lsi_signo = stol_signo[siginfop->si_signo]) <= 0) {
 449                 /*
 450                  * Depending on the caller we may still need to get a usable
 451                  * converted siginfo struct.
 452                  */
 453                 lx_siginfo.lsi_signo = LX_SIGKILL;
 454                 errno = EINVAL;
 455                 ret = -1;
 456         }
 457 
 458         lx_siginfo.lsi_code = lx_stol_sigcode(siginfop->si_code);
 459         lx_siginfo.lsi_errno = siginfop->si_errno;
 460 
 461         switch (lx_siginfo.lsi_signo) {
 462                 /*
 463                  * Semantics ARE defined for SIGKILL, but since
 464                  * we can't catch it, we can't translate it. :-(
 465                  */
 466                 case LX_SIGPOLL:
 467                         lx_siginfo.lsi_band = siginfop->si_band;
 468                         lx_siginfo.lsi_fd = siginfop->si_fd;
 469                         break;
 470 
 471                 case LX_SIGCHLD:
 472                         lx_siginfo.lsi_pid = siginfop->si_pid;
 473                         if (siginfop->si_code <= 0 || siginfop->si_code ==
 474                             CLD_EXITED) {
 475                                 lx_siginfo.lsi_status = siginfop->si_status;
 476                         } else {
 477                                 lx_siginfo.lsi_status = lx_stol_status(
 478                                     siginfop->si_status, -1);
 479                         }
 480                         lx_siginfo.lsi_utime = siginfop->si_utime;
 481                         lx_siginfo.lsi_stime = siginfop->si_stime;
 482                         break;
 483 
 484                 case LX_SIGILL:
 485                 case LX_SIGBUS:
 486                 case LX_SIGFPE:
 487                 case LX_SIGSEGV:
 488                         lx_siginfo.lsi_addr = siginfop->si_addr;
 489                         break;
 490 
 491                 default:
 492                         lx_siginfo.lsi_pid = siginfop->si_pid;
 493                         lx_siginfo.lsi_uid =
 494                             LX_UID32_TO_UID16(siginfop->si_uid);
 495                         lx_siginfo.lsi_value = siginfop->si_value;
 496                         break;
 497         }
 498 
 499         if (uucopy(&lx_siginfo, lx_siginfop, sizeof (lx_siginfo_t)) != 0)
 500                 return (-errno);
 501         return ((ret != 0) ? -errno : 0);
 502 }
 503 
 504 static void
 505 stol_fpstate(fpregset_t *fpr, lx_fpstate_t *lfpr)
 506 {
 507         size_t copy_len;
 508 
 509 #if defined(_LP64)
 510         /*
 511          * The 64-bit Illumos struct fpregset_t and lx_fpstate_t are identical
 512          * so just bcopy() those entries (see usr/src/uts/intel/sys/regset.h
 513          * for __amd64's struct fpu).
 514          */
 515         copy_len = sizeof (fpr->fp_reg_set.fpchip_state);
 516         bcopy(fpr, lfpr, copy_len);
 517 
 518 #else /* is _ILP32 */
 519         struct _fpstate *fpsp = (struct _fpstate *)fpr;
 520 
 521         /*
 522          * The Illumos struct _fpstate and lx_fpstate_t are identical from the
 523          * beginning of the structure to the lx_fpstate_t "magic" field, so
 524          * just bcopy() those entries.
 525          */
 526         copy_len = (size_t)&(((lx_fpstate_t *)0)->magic);
 527         bcopy(fpsp, lfpr, copy_len);
 528 
 529         /*
 530          * These fields are all only significant for the first 16 bits.
 531          */
 532         lfpr->cw &= 0xffff;              /* x87 control word */
 533         lfpr->tag &= 0xffff;             /* x87 tag word */
 534         lfpr->cssel &= 0xffff;           /* cs selector */
 535         lfpr->datasel &= 0xffff; /* ds selector */
 536 
 537         /*
 538          * Linux wants the x87 status word field to contain the value of the
 539          * x87 saved exception status word.
 540          */
 541         lfpr->sw = lfpr->status & 0xffff;     /* x87 status word */
 542 
 543         lfpr->mxcsr = fpsp->mxcsr;
 544 
 545         if (fpsp->mxcsr != 0) {
 546                 /*
 547                  * Linux uses the "magic" field to denote whether the XMM
 548                  * registers contain legal data or not.  Since we can't get to
 549                  * %cr4 from userland to check the status of the OSFXSR bit,
 550                  * check the mxcsr field to see if it's 0, which it should
 551                  * never be on a system with the OXFXSR bit enabled.
 552                  */
 553                 lfpr->magic = LX_X86_FXSR_MAGIC;
 554                 bcopy(fpsp->xmm, lfpr->_xmm, sizeof (lfpr->_xmm));
 555         } else {
 556                 lfpr->magic = LX_X86_FXSR_NONE;
 557         }
 558 #endif
 559 }
 560 
 561 static void
 562 ltos_fpstate(lx_fpstate_t *lfpr, fpregset_t *fpr)
 563 {
 564         size_t copy_len;
 565 
 566 #if defined(_LP64)
 567         /*
 568          * The 64-bit Illumos struct fpregset_t and lx_fpstate_t are identical
 569          * so just bcopy() those entries (see usr/src/uts/intel/sys/regset.h
 570          * for __amd64's struct fpu).
 571          */
 572         copy_len = sizeof (fpr->fp_reg_set.fpchip_state);
 573         bcopy(lfpr, fpr, copy_len);
 574 
 575 #else /* is _ILP32 */
 576         struct _fpstate *fpsp = (struct _fpstate *)fpr;
 577 
 578         /*
 579          * The lx_fpstate_t and Illumos struct _fpstate are identical from the
 580          * beginning of the structure to the struct _fpstate "mxcsr" field, so
 581          * just bcopy() those entries.
 582          *
 583          * Note that we do NOT have to propogate changes the user may have made
 584          * to the "status" word back to the "sw" word, unlike the way we have
 585          * to deal with processing the ESP and UESP register values on return
 586          * from a signal handler.
 587          */
 588         copy_len = (size_t)&(((struct _fpstate *)0)->mxcsr);
 589         bcopy(lfpr, fpsp, copy_len);
 590 
 591         /*
 592          * These fields are all only significant for the first 16 bits.
 593          */
 594         fpsp->cw &= 0xffff;              /* x87 control word */
 595         fpsp->sw &= 0xffff;              /* x87 status word */
 596         fpsp->tag &= 0xffff;             /* x87 tag word */
 597         fpsp->cssel &= 0xffff;           /* cs selector */
 598         fpsp->datasel &= 0xffff; /* ds selector */
 599         fpsp->status &= 0xffff;          /* saved status */
 600 
 601         fpsp->mxcsr = lfpr->mxcsr;
 602 
 603         if (lfpr->magic == LX_X86_FXSR_MAGIC)
 604                 bcopy(lfpr->_xmm, fpsp->xmm, sizeof (fpsp->xmm));
 605 #endif
 606 }
 607 
 608 /*
 609  * We do not use the system sigaltstack() infrastructure as that would conflict
 610  * with our handling of both system call emulation and native signals on the
 611  * native stack.  Instead, we track the Linux stack structure in our
 612  * thread-specific data.  This function is modeled on the behaviour of the
 613  * native sigaltstack system call handler.
 614  */
 615 long
 616 lx_sigaltstack(uintptr_t ssp, uintptr_t oss)
 617 {
 618         lx_tsd_t *lxtsd = lx_get_tsd();
 619         lx_stack_t ss;
 620 
 621         if (ssp != NULL) {
 622                 if (lxtsd->lxtsd_sigaltstack.ss_flags & LX_SS_ONSTACK) {
 623                         /*
 624                          * If we are currently using the installed alternate
 625                          * stack for signal handling, the user may not modify
 626                          * the stack for this thread.
 627                          */
 628                         return (-EPERM);
 629                 }
 630 
 631                 if (uucopy((void *)ssp, &ss, sizeof (ss)) != 0) {
 632                         return (-EFAULT);
 633                 }
 634 
 635                 if (ss.ss_flags & ~LX_SS_DISABLE) {
 636                         /*
 637                          * The user may not specify a value for flags other
 638                          * than 0 or SS_DISABLE.
 639                          */
 640                         return (-EINVAL);
 641                 }
 642 
 643                 if (!(ss.ss_flags & LX_SS_DISABLE) && ss.ss_size <
 644                     LX_MINSIGSTKSZ) {
 645                         return (-ENOMEM);
 646                 }
 647         }
 648 
 649         if (oss != NULL) {
 650                 /*
 651                  * User provided old and new stack_t pointers may point to
 652                  * the same location.  Copy out before we modify.
 653                  */
 654                 if (uucopy(&lxtsd->lxtsd_sigaltstack, (void *)oss,
 655                     sizeof (lxtsd->lxtsd_sigaltstack)) != 0) {
 656                         return (-EFAULT);
 657                 }
 658         }
 659 
 660         if (ssp != NULL) {
 661                 lxtsd->lxtsd_sigaltstack = ss;
 662         }
 663 
 664         return (0);
 665 }
 666 
 667 #if defined(_ILP32)
 668 /*
 669  * The following routines are needed because sigset_ts and siginfo_ts are
 670  * different in format between Linux and Illumos.
 671  *
 672  * Note that there are two different lx_sigset structures, lx_sigset_ts and
 673  * lx_osigset_ts:
 674  *
 675  *    + An lx_sigset_t is the equivalent of a Illumos sigset_t and supports
 676  *      more than 32 signals.
 677  *
 678  *    + An lx_osigset_t is simply a uint32_t, so it by definition only supports
 679  *      32 signals.
 680  *
 681  * When there are two versions of a routine, one prefixed with lx_rt_ and
 682  * one prefixed with lx_ alone, in GENERAL the lx_rt_ routines deal with
 683  * lx_sigset_ts while the lx_ routines deal with lx_osigset_ts.  Unfortunately,
 684  * this is not always the case (e.g. lx_sigreturn() vs. lx_rt_sigreturn())
 685  */
 686 long
 687 lx_sigpending(uintptr_t sigpend)
 688 {
 689         sigset_t sigpendset;
 690 
 691         if (sigpending(&sigpendset) != 0)
 692                 return (-errno);
 693 
 694         return (stol_osigset(&sigpendset, (lx_osigset_t *)sigpend));
 695 }
 696 #endif
 697 
 698 long
 699 lx_rt_sigpending(uintptr_t sigpend, uintptr_t setsize)
 700 {
 701         sigset_t sigpendset;
 702 
 703         if ((size_t)setsize != sizeof (lx_sigset_t))
 704                 return (-EINVAL);
 705 
 706         if (sigpending(&sigpendset) != 0)
 707                 return (-errno);
 708 
 709         return (stol_sigset(&sigpendset, (lx_sigset_t *)sigpend));
 710 }
 711 
 712 /*
 713  * Create a common routine to encapsulate all of the sigprocmask code,
 714  * as the only difference between lx_sigprocmask() and lx_rt_sigprocmask()
 715  * is the usage of lx_osigset_ts vs. lx_sigset_ts, as toggled in the code by
 716  * the setting of the "sigset_type" flag.
 717  */
 718 static int
 719 lx_sigprocmask_common(uintptr_t how, uintptr_t l_setp, uintptr_t l_osetp,
 720     uintptr_t sigset_type)
 721 {
 722         int err = 0;
 723         sigset_t set, oset;
 724         sigset_t *s_setp = NULL;
 725         sigset_t *s_osetp;
 726 
 727         if (l_setp) {
 728                 switch (how) {
 729                         case LX_SIG_BLOCK:
 730                                 how = SIG_BLOCK;
 731                                 break;
 732 
 733                         case LX_SIG_UNBLOCK:
 734                                 how = SIG_UNBLOCK;
 735                                 break;
 736 
 737                         case LX_SIG_SETMASK:
 738                                 how = SIG_SETMASK;
 739                                 break;
 740 
 741                         default:
 742                                 return (-EINVAL);
 743                 }
 744 
 745                 s_setp = &set;
 746 
 747                 /* Only 32-bit code passes other than USE_SIGSET */
 748                 if (sigset_type == USE_SIGSET)
 749                         err = ltos_sigset((lx_sigset_t *)l_setp, s_setp);
 750 #if defined(_ILP32)
 751                 else
 752                         err = ltos_osigset((lx_osigset_t *)l_setp, s_setp);
 753 #endif
 754 
 755                 if (err != 0)
 756                         return (err);
 757 
 758         }
 759 
 760         s_osetp = (l_osetp ? &oset : NULL);
 761 
 762         /*
 763          * In a multithreaded environment, a call to sigprocmask(2) should
 764          * only affect the current thread's signal mask so we don't need to
 765          * explicitly call thr_sigsetmask(3C) here.
 766          */
 767         if (sigprocmask(how, s_setp, s_osetp) != 0)
 768                 return (-errno);
 769 
 770         if (l_osetp) {
 771                 if (sigset_type == USE_SIGSET)
 772                         err = stol_sigset(s_osetp, (lx_sigset_t *)l_osetp);
 773 #if defined(_ILP32)
 774                 else
 775                         err = stol_osigset(s_osetp, (lx_osigset_t *)l_osetp);
 776 #endif
 777 
 778                 if (err != 0) {
 779                         /*
 780                          * Encountered a fault while writing to the old signal
 781                          * mask buffer, so unwind the signal mask change made
 782                          * above.
 783                          */
 784                         (void) sigprocmask(how, s_osetp, (sigset_t *)NULL);
 785                         return (err);
 786                 }
 787         }
 788 
 789         return (0);
 790 }
 791 
 792 #if defined(_ILP32)
 793 long
 794 lx_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp)
 795 {
 796         return (lx_sigprocmask_common(how, setp, osetp, USE_OSIGSET));
 797 }
 798 #endif
 799 
 800 long
 801 lx_rt_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp,
 802     uintptr_t setsize)
 803 {
 804         if ((size_t)setsize != sizeof (lx_sigset_t))
 805                 return (-EINVAL);
 806 
 807         return (lx_sigprocmask_common(how, setp, osetp, USE_SIGSET));
 808 }
 809 
 810 #if defined(_ILP32)
 811 long
 812 lx_sigsuspend(uintptr_t set)
 813 {
 814         sigset_t s_set;
 815 
 816         if (ltos_osigset((lx_osigset_t *)set, &s_set) != 0)
 817                 return (-errno);
 818 
 819         return ((sigsuspend(&s_set) == -1) ? -errno : 0);
 820 }
 821 #endif
 822 
 823 long
 824 lx_rt_sigsuspend(uintptr_t set, uintptr_t setsize)
 825 {
 826         sigset_t s_set;
 827 
 828         if ((size_t)setsize != sizeof (lx_sigset_t))
 829                 return (-EINVAL);
 830 
 831         if (ltos_sigset((lx_sigset_t *)set, &s_set) != 0)
 832                 return (-errno);
 833 
 834         return ((sigsuspend(&s_set) == -1) ? -errno : 0);
 835 }
 836 
 837 long
 838 lx_rt_sigwaitinfo(uintptr_t set, uintptr_t sinfo, uintptr_t setsize)
 839 {
 840         sigset_t s_set;
 841         siginfo_t s_sinfo, *s_sinfop;
 842         int rc;
 843 
 844         lx_sigset_t *setp = (lx_sigset_t *)set;
 845         lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
 846 
 847         if ((size_t)setsize != sizeof (lx_sigset_t))
 848                 return (-EINVAL);
 849 
 850         if (ltos_sigset(setp, &s_set) != 0)
 851                 return (-errno);
 852 
 853         s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
 854 
 855         if ((rc = sigwaitinfo(&s_set, s_sinfop)) == -1)
 856                 return (-errno);
 857 
 858         if (s_sinfop == NULL)
 859                 return (stol_signo[rc]);
 860 
 861         return ((stol_siginfo(s_sinfop, sinfop) != 0)
 862             ? -errno : stol_signo[rc]);
 863 }
 864 
 865 long
 866 lx_rt_sigtimedwait(uintptr_t set, uintptr_t sinfo, uintptr_t toutp,
 867     uintptr_t setsize)
 868 {
 869         sigset_t s_set;
 870         siginfo_t s_sinfo, *s_sinfop;
 871         int rc;
 872 
 873         lx_sigset_t *setp = (lx_sigset_t *)set;
 874         lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
 875 
 876         if ((size_t)setsize != sizeof (lx_sigset_t))
 877                 return (-EINVAL);
 878 
 879         if (ltos_sigset(setp, &s_set) != 0)
 880                 return (-errno);
 881 
 882         s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
 883 
 884         /*
 885          * "If timeout is the NULL pointer, the behavior is unspecified."
 886          * Match what LTP expects.
 887          */
 888         if ((rc = sigtimedwait(&s_set, s_sinfop,
 889             (struct timespec *)toutp)) == -1)
 890                 return (toutp == NULL ? -EINTR : -errno);
 891 
 892         if (s_sinfop == NULL)
 893                 return (stol_signo[rc]);
 894 
 895         return ((stol_siginfo(s_sinfop, sinfop) != 0)
 896             ? -errno : stol_signo[rc]);
 897 }
 898 
 899 static void
 900 lx_sigreturn_find_native_context(const char *caller, ucontext_t **sigucp,
 901     ucontext_t **retucp, uintptr_t sp)
 902 {
 903         lx_tsd_t *lxtsd = lx_get_tsd();
 904         lx_sigdeliver_frame_t *lxsdfp = (lx_sigdeliver_frame_t *)sp;
 905         lx_sigdeliver_frame_t lxsdf;
 906         boolean_t copy_ok;
 907 
 908         lx_debug("%s: reading lx_sigdeliver_frame_t @ %p\n", caller, lxsdfp);
 909         if (uucopy(lxsdfp, &lxsdf, sizeof (lxsdf)) != 0) {
 910                 lx_debug("%s: failed to read lx_sigdeliver_frame_t @ %p\n",
 911                     lxsdfp);
 912 
 913                 copy_ok = B_FALSE;
 914         } else {
 915                 lx_debug("%s: lxsdf: magic %p retucp %p sigucp %p\n", caller,
 916                     lxsdf.lxsdf_magic, lxsdf.lxsdf_retucp, lxsdf.lxsdf_sigucp);
 917 
 918                 copy_ok = B_TRUE;
 919         }
 920 
 921         /*
 922          * lx_sigdeliver() pushes a lx_sigdeliver_frame_t onto the stack
 923          * before it creates the struct lx_oldsigstack.
 924          */
 925         if (copy_ok && lxsdf.lxsdf_magic == LX_SIGRT_MAGIC) {
 926                 LX_SIGNAL_DELIVERY_FRAME_FOUND(lxsdfp);
 927 
 928                 /*
 929                  * The guard value is intact; use the context pointers stored
 930                  * in the signal delivery frame:
 931                  */
 932                 *sigucp = lxsdf.lxsdf_sigucp;
 933                 *retucp = lxsdf.lxsdf_retucp;
 934 
 935                 /*
 936                  * Ensure that the backup signal delivery chain is in sync with
 937                  * the frame we are returning via:
 938                  */
 939                 lxtsd->lxtsd_sigbackup = lxsdf.lxsdf_sigbackup;
 940         } else {
 941                 /*
 942                  * The guard value was not intact.  Either the program smashed
 943                  * the stack unintentionally, or worse: intentionally moved
 944                  * some parts of the signal delivery frame we constructed to
 945                  * another location before calling rt_sigreturn(2).
 946                  */
 947                 LX_SIGNAL_DELIVERY_FRAME_CORRUPT(lxsdfp);
 948 
 949                 if (lxtsd->lxtsd_sigbackup == NULL) {
 950                         /*
 951                          * There was no backup context to use, so we must
 952                          * kill the process.
 953                          */
 954                         if (copy_ok) {
 955                                 lx_err_fatal("%s: sp 0x%p, expected 0x%x, "
 956                                     "found 0x%x!", caller, sp, LX_SIGRT_MAGIC,
 957                                     lxsdf.lxsdf_magic);
 958                         } else {
 959                                 lx_err_fatal("%s: sp 0x%p, could not read "
 960                                     "magic", caller, sp);
 961                         }
 962                 }
 963 
 964                 /*
 965                  * Attempt to recover by using the backup signal delivery
 966                  * chain:
 967                  */
 968                 lx_debug("%s: SIGRT_MAGIC not found @ sp %p; using backup "
 969                     "@ %p\n", caller, (void *)sp, lxtsd->lxtsd_sigbackup);
 970                 *sigucp = lxtsd->lxtsd_sigbackup->lxsb_sigucp;
 971                 *retucp = lxtsd->lxtsd_sigbackup->lxsb_retucp;
 972         }
 973 }
 974 
 975 #if defined(_ILP32)
 976 /*
 977  * Intercept the Linux sigreturn() syscall to turn it into the return through
 978  * the libc call stack that Illumos expects.
 979  *
 980  * When control returns to libc's call_user_handler() routine, a setcontext(2)
 981  * will be done that returns thread execution to the point originally
 982  * interrupted by receipt of the signal.
 983  *
 984  * This is only used by 32-bit code.
 985  */
 986 long
 987 lx_sigreturn(void)
 988 {
 989         struct lx_oldsigstack *lx_ossp;
 990         lx_sigset_t lx_sigset;
 991         ucontext_t *ucp;
 992         ucontext_t *sigucp;
 993         ucontext_t *retucp;
 994         uintptr_t sp;
 995 
 996         ucp = lx_syscall_regs();
 997 
 998         /*
 999          * NOTE:  The sp saved in the context is eight bytes off of where we
1000          *        need it to be (either due to trampoline or the copying of
1001          *        sp = uesp, not clear which).
1002          */
1003         sp = LX_REG(ucp, REG_SP) - 8;
1004 
1005         /*
1006          * At this point, the stack pointer should point to the struct
1007          * lx_oldsigstack that lx_build_old_signal_frame() constructed and
1008          * placed on the stack.  We need to reference it a bit later, so
1009          * save a pointer to it before incrementing our copy of the sp.
1010          */
1011         lx_ossp = (struct lx_oldsigstack *)sp;
1012         sp += SA(sizeof (struct lx_oldsigstack));
1013 
1014         lx_sigreturn_find_native_context(__func__, &sigucp, &retucp, sp);
1015 
1016         /*
1017          * We need to copy machine registers the Linux signal handler may have
1018          * modified back to the Illumos ucontext_t.
1019          *
1020          * General registers copy across as-is, except Linux expects that
1021          * changes made to uc_mcontext.gregs[ESP] will be reflected when the
1022          * interrupted thread resumes execution after the signal handler. To
1023          * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to
1024          * match uc_mcontext.gregs[ESP] as Illumos will restore the UESP
1025          * value to ESP.
1026          */
1027         lx_ossp->sigc.sc_esp_at_signal = lx_ossp->sigc.sc_esp;
1028         bcopy(&lx_ossp->sigc, &sigucp->uc_mcontext, sizeof (gregset_t));
1029 
1030         LX_SIGRETURN(NULL, sigucp, sp);
1031 
1032         /* copy back FP regs if present */
1033         if (lx_ossp->sigc.sc_fpstate != NULL)
1034                 ltos_fpstate(&lx_ossp->fpstate, &sigucp->uc_mcontext.fpregs);
1035 
1036         /* convert Linux signal mask back to its Illumos equivalent */
1037         bzero(&lx_sigset, sizeof (lx_sigset_t));
1038         lx_sigset.__bits[0] = lx_ossp->sigc.sc_mask;
1039         lx_sigset.__bits[1] = lx_ossp->sig_extra;
1040         (void) ltos_sigset(&lx_sigset, &sigucp->uc_sigmask);
1041 
1042         /*
1043          * For signal mask handling to be done properly, this call needs to
1044          * return to the libc routine that originally called the signal handler
1045          * rather than directly set the context back to the place the signal
1046          * interrupted execution as the original Linux code would do.
1047          */
1048         lx_debug("lx_sigreturn: calling setcontext; retucp %p flags %lx "
1049             "link %p\n", retucp, retucp->uc_flags, retucp->uc_link);
1050         setcontext(retucp);
1051         assert(0);
1052 
1053         /*NOTREACHED*/
1054         return (0);
1055 }
1056 #endif
1057 
1058 /*
1059  * This signal return syscall is used by both 32-bit and 64-bit code.
1060  */
1061 long
1062 lx_rt_sigreturn(void)
1063 {
1064         struct lx_sigstack *lx_ssp;
1065         lx_ucontext_t *lx_ucp;
1066         ucontext_t *ucp;
1067         ucontext_t *sigucp;
1068         ucontext_t *retucp;
1069         uintptr_t sp;
1070 
1071         /* Get the registers at the emulated Linux rt_sigreturn syscall */
1072         ucp = lx_syscall_regs();
1073 
1074 #if defined(_ILP32)
1075         lx_debug("lx_rt_sigreturn: ESP %p UESP %p\n", LX_REG(ucp, ESP),
1076             LX_REG(ucp, UESP));
1077         /*
1078          * For 32-bit
1079          *
1080          * NOTE:  Because of the silly compatibility measures done in the
1081          *        signal trampoline code to make sure the stack holds the
1082          *         _exact same_  instruction sequence Linux does, we have to
1083          *        manually "pop" some extra instructions off the stack here
1084          *        before passing the stack address to the syscall because the
1085          *        trampoline code isn't allowed to do it due to the gdb
1086          *        compatability issues.
1087          *
1088          *        No, I'm not kidding.
1089          *
1090          *        The sp saved in the context is eight bytes off of where we
1091          *        need it to be (either due to trampoline or the copying of
1092          *        sp = uesp, not clear which but looks like the uesp case), so
1093          *        the need to pop the extra four byte instruction means we need
1094          *        to subtract  a net four bytes from the sp before "popping" the
1095          *        struct lx_sigstack off the stack.
1096          *
1097          *        This will yield the value the stack pointer had before
1098          *        lx_sigdeliver() created the stack frame for the Linux signal
1099          *        handler.
1100          */
1101         sp = (uintptr_t)LX_REG(ucp, REG_SP) - 4;
1102 #else
1103         /*
1104          * We need to make an adjustment for 64-bit code as well. Since 64-bit
1105          * does not use the trampoline, it's probably for the same reason as
1106          * alluded to above.
1107          */
1108         sp = (uintptr_t)LX_REG(ucp, REG_SP) - 8;
1109 #endif
1110 
1111         /*
1112          * At this point, the stack pointer should point to the struct
1113          * lx_sigstack that lx_build_signal_frame() constructed and
1114          * placed on the stack.  We need to reference it a bit later, so
1115          * save a pointer to it before incrementing our copy of the sp.
1116          */
1117         lx_ssp = (struct lx_sigstack *)sp;
1118         sp += SA(sizeof (struct lx_sigstack));
1119 
1120 #if defined(_LP64)
1121         /*
1122          * The 64-bit lx_sigdeliver() inserts 8 bytes of padding between
1123          * the lx_sigstack_t and the delivery frame to maintain ABI stack
1124          * alignment.
1125          */
1126         sp += 8;
1127 #endif
1128 
1129         lx_sigreturn_find_native_context(__func__, &sigucp, &retucp, sp);
1130 
1131         /*
1132          * We need to copy machine registers the Linux signal handler may have
1133          * modified back to the Illumos version.
1134          */
1135 #if defined(_LP64)
1136         lx_ucp = &lx_ssp->uc;
1137 
1138         /*
1139          * General register layout is completely different.
1140          */
1141         LX_REG(sigucp, REG_R15) = lx_ucp->uc_sigcontext.sc_r15;
1142         LX_REG(sigucp, REG_R14) = lx_ucp->uc_sigcontext.sc_r14;
1143         LX_REG(sigucp, REG_R13) = lx_ucp->uc_sigcontext.sc_r13;
1144         LX_REG(sigucp, REG_R12) = lx_ucp->uc_sigcontext.sc_r12;
1145         LX_REG(sigucp, REG_R11) = lx_ucp->uc_sigcontext.sc_r11;
1146         LX_REG(sigucp, REG_R10) = lx_ucp->uc_sigcontext.sc_r10;
1147         LX_REG(sigucp, REG_R9) = lx_ucp->uc_sigcontext.sc_r9;
1148         LX_REG(sigucp, REG_R8) = lx_ucp->uc_sigcontext.sc_r8;
1149         LX_REG(sigucp, REG_RDI) = lx_ucp->uc_sigcontext.sc_rdi;
1150         LX_REG(sigucp, REG_RSI) = lx_ucp->uc_sigcontext.sc_rsi;
1151         LX_REG(sigucp, REG_RBP) = lx_ucp->uc_sigcontext.sc_rbp;
1152         LX_REG(sigucp, REG_RBX) = lx_ucp->uc_sigcontext.sc_rbx;
1153         LX_REG(sigucp, REG_RDX) = lx_ucp->uc_sigcontext.sc_rdx;
1154         LX_REG(sigucp, REG_RCX) = lx_ucp->uc_sigcontext.sc_rcx;
1155         LX_REG(sigucp, REG_RAX) = lx_ucp->uc_sigcontext.sc_rax;
1156         LX_REG(sigucp, REG_TRAPNO) = lx_ucp->uc_sigcontext.sc_trapno;
1157         LX_REG(sigucp, REG_ERR) = lx_ucp->uc_sigcontext.sc_err;
1158         LX_REG(sigucp, REG_RIP) = lx_ucp->uc_sigcontext.sc_rip;
1159         LX_REG(sigucp, REG_CS) = lx_ucp->uc_sigcontext.sc_cs;
1160         LX_REG(sigucp, REG_RFL) = lx_ucp->uc_sigcontext.sc_eflags;
1161         LX_REG(sigucp, REG_RSP) = lx_ucp->uc_sigcontext.sc_rsp;
1162         LX_REG(sigucp, REG_SS) = lx_ucp->uc_sigcontext.sc_pad0;
1163         LX_REG(sigucp, REG_FS) = lx_ucp->uc_sigcontext.sc_fs;
1164         LX_REG(sigucp, REG_GS) = lx_ucp->uc_sigcontext.sc_gs;
1165 
1166 #else /* is _ILP32 */
1167         lx_ucp = &lx_ssp->uc;
1168 
1169         /*
1170          * Illumos and Linux both follow the SysV i386 ABI layout for the
1171          * mcontext.
1172          *
1173          * General registers copy across as-is, except Linux expects that
1174          * changes made to uc_mcontext.gregs[ESP] will be reflected when the
1175          * interrupted thread resumes execution after the signal handler. To
1176          * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to
1177          * match uc_mcontext.gregs[ESP] as Illumos will restore the UESP value
1178          * to ESP.
1179          */
1180         lx_ucp->uc_sigcontext.sc_esp_at_signal = lx_ucp->uc_sigcontext.sc_esp;
1181 
1182         bcopy(&lx_ucp->uc_sigcontext, &sigucp->uc_mcontext.gregs,
1183             sizeof (gregset_t));
1184 #endif
1185 
1186         LX_SIGRETURN(lx_ucp, sigucp, sp);
1187 
1188         if (lx_ucp->uc_sigcontext.sc_fpstate != NULL) {
1189                 ltos_fpstate(lx_ucp->uc_sigcontext.sc_fpstate,
1190                     &sigucp->uc_mcontext.fpregs);
1191         }
1192 
1193         /*
1194          * Convert the Linux signal mask and stack back to their
1195          * Illumos equivalents.
1196          */
1197         (void) ltos_sigset(&lx_ucp->uc_sigmask, &sigucp->uc_sigmask);
1198         ltos_stack(&lx_ucp->uc_stack, &sigucp->uc_stack);
1199 
1200         /*
1201          * For signal mask handling to be done properly, this call needs to
1202          * return to the libc routine that originally called the signal handler
1203          * rather than directly set the context back to the place the signal
1204          * interrupted execution as the original Linux code would do.
1205          */
1206         lx_debug("lx_rt_sigreturn: calling setcontext; retucp %p\n", retucp);
1207         setcontext(retucp);
1208         assert(0);
1209 
1210         /*NOTREACHED*/
1211         return (0);
1212 }
1213 
1214 
1215 #if defined(_ILP32)
1216 /*
1217  * Build signal frame for processing for "old" (legacy) Linux signals
1218  * This stack-builder function is only used by 32-bit code.
1219  */
1220 static void
1221 lx_build_old_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp,
1222     uintptr_t *hargs)
1223 {
1224         extern void lx_sigreturn_tramp();
1225 
1226         lx_sigset_t lx_sigset;
1227         ucontext_t *ucp = (ucontext_t *)p;
1228         struct lx_sigaction *lxsap;
1229         struct lx_oldsigstack *lx_ossp = sp;
1230 
1231         lx_debug("building old signal frame for lx sig %d at 0x%p", lx_sig, sp);
1232 
1233         lx_ossp->sig = lx_sig;
1234         lxsap = &lx_sighandlers.lx_sa[lx_sig];
1235         lx_debug("lxsap @ 0x%p", lxsap);
1236 
1237         if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) &&
1238             lxsap->lxsa_restorer) {
1239                 lx_ossp->retaddr = lxsap->lxsa_restorer;
1240                 lx_debug("lxsa_restorer exists @ 0x%p", lx_ossp->retaddr);
1241         } else {
1242                 lx_ossp->retaddr = lx_sigreturn_tramp;
1243                 lx_debug("lx_ossp->retaddr set to 0x%p", lx_sigreturn_tramp);
1244         }
1245 
1246         lx_debug("osf retaddr = 0x%p", lx_ossp->retaddr);
1247 
1248         /* convert Illumos signal mask and stack to their Linux equivalents */
1249         (void) stol_sigset(&ucp->uc_sigmask, &lx_sigset);
1250         lx_ossp->sigc.sc_mask = lx_sigset.__bits[0];
1251         lx_ossp->sig_extra = lx_sigset.__bits[1];
1252 
1253         /*
1254          * General registers copy across as-is, except Linux expects that
1255          * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a
1256          * signal.
1257          */
1258         bcopy(&ucp->uc_mcontext, &lx_ossp->sigc, sizeof (gregset_t));
1259         lx_ossp->sigc.sc_esp = lx_ossp->sigc.sc_esp_at_signal;
1260 
1261         /*
1262          * cr2 contains the faulting address, and Linux only sets cr2 for a
1263          * a segmentation fault.
1264          */
1265         lx_ossp->sigc.sc_cr2 = (((lx_sig == LX_SIGSEGV) && (sip)) ?
1266             (uintptr_t)sip->si_addr : 0);
1267 
1268         /* convert FP regs if present */
1269         if (ucp->uc_flags & UC_FPU) {
1270                 stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ossp->fpstate);
1271                 lx_ossp->sigc.sc_fpstate = &lx_ossp->fpstate;
1272         } else {
1273                 lx_ossp->sigc.sc_fpstate = NULL;
1274         }
1275 
1276         /*
1277          * Believe it or not, gdb wants to SEE the trampoline code on the
1278          * bottom of the stack to determine whether the stack frame belongs to
1279          * a signal handler, even though this code is no longer actually
1280          * called.
1281          *
1282          * You can't make this stuff up.
1283          */
1284         bcopy((void *)lx_sigreturn_tramp, lx_ossp->trampoline,
1285             sizeof (lx_ossp->trampoline));
1286 }
1287 #endif
1288 
1289 /*
1290  * Build stack frame (32-bit) or stack local data (64-bit) for processing for
1291  * modern Linux signals. This is the only stack-builder function for 64-bit
1292  * code (32-bit code also calls this when using "modern" signals).
1293  */
1294 static void
1295 lx_build_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp,
1296     uintptr_t *hargs)
1297 {
1298         extern void lx_rt_sigreturn_tramp();
1299 
1300         lx_ucontext_t *lx_ucp;
1301         ucontext_t *ucp = (ucontext_t *)p;
1302         struct lx_sigstack *lx_ssp = sp;
1303         struct lx_sigaction *lxsap;
1304 
1305         lx_debug("building signal frame for lx sig %d at 0x%p", lx_sig, sp);
1306 
1307         lx_ucp = &lx_ssp->uc;
1308 #if defined(_ILP32)
1309         /*
1310          * Arguments are passed to the 32-bit signal handler on the stack.
1311          */
1312         lx_ssp->ucp = lx_ucp;
1313         lx_ssp->sip = sip != NULL ? &lx_ssp->si : NULL;
1314         lx_ssp->sig = lx_sig;
1315 #else
1316         /*
1317          * Arguments to the 64-bit signal handler are passed in registers:
1318          *   hdlr(int sig, siginfo_t *sip, void *ucp);
1319          */
1320         hargs[0] = lx_sig;
1321         hargs[1] = sip != NULL ? (uintptr_t)&lx_ssp->si : NULL;
1322         hargs[2] = (uintptr_t)lx_ucp;
1323 #endif
1324 
1325         lxsap = &lx_sighandlers.lx_sa[lx_sig];
1326         lx_debug("lxsap @ 0x%p", lxsap);
1327 
1328         if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) &&
1329             lxsap->lxsa_restorer) {
1330                 /*
1331                  * lxsa_restorer is explicitly set by sigaction in 32-bit code
1332                  * but it can also be implicitly set for both 32 and 64 bit
1333                  * code via lx_sigaction_common when we bcopy the user-supplied
1334                  * lx_sigaction element into the proper slot in the sighandler
1335                  * array.
1336                  */
1337                 lx_ssp->retaddr = lxsap->lxsa_restorer;
1338                 lx_debug("lxsa_restorer exists @ 0x%p", lx_ssp->retaddr);
1339         } else {
1340                 lx_ssp->retaddr = lx_rt_sigreturn_tramp;
1341                 lx_debug("lx_ssp->retaddr set to 0x%p", lx_rt_sigreturn_tramp);
1342         }
1343 
1344         /* Linux has these fields but always clears them to 0 */
1345         lx_ucp->uc_flags = 0;
1346         lx_ucp->uc_link = NULL;
1347 
1348         /* convert Illumos signal mask and stack to their Linux equivalents */
1349         (void) stol_sigset(&ucp->uc_sigmask, &lx_ucp->uc_sigmask);
1350         stol_stack(&ucp->uc_stack, &lx_ucp->uc_stack);
1351 
1352 #if defined(_LP64)
1353         /*
1354          * General register layout is completely different.
1355          */
1356         lx_ucp->uc_sigcontext.sc_r8 = LX_REG(ucp, REG_R8);
1357         lx_ucp->uc_sigcontext.sc_r9 = LX_REG(ucp, REG_R9);
1358         lx_ucp->uc_sigcontext.sc_r10 = LX_REG(ucp, REG_R10);
1359         lx_ucp->uc_sigcontext.sc_r11 = LX_REG(ucp, REG_R11);
1360         lx_ucp->uc_sigcontext.sc_r12 = LX_REG(ucp, REG_R12);
1361         lx_ucp->uc_sigcontext.sc_r13 = LX_REG(ucp, REG_R13);
1362         lx_ucp->uc_sigcontext.sc_r14 = LX_REG(ucp, REG_R14);
1363         lx_ucp->uc_sigcontext.sc_r15 = LX_REG(ucp, REG_R15);
1364         lx_ucp->uc_sigcontext.sc_rdi = LX_REG(ucp, REG_RDI);
1365         lx_ucp->uc_sigcontext.sc_rsi = LX_REG(ucp, REG_RSI);
1366         lx_ucp->uc_sigcontext.sc_rbp = LX_REG(ucp, REG_RBP);
1367         lx_ucp->uc_sigcontext.sc_rbx = LX_REG(ucp, REG_RBX);
1368         lx_ucp->uc_sigcontext.sc_rdx = LX_REG(ucp, REG_RDX);
1369         lx_ucp->uc_sigcontext.sc_rax = LX_REG(ucp, REG_RAX);
1370         lx_ucp->uc_sigcontext.sc_rcx = LX_REG(ucp, REG_RCX);
1371         lx_ucp->uc_sigcontext.sc_rsp = LX_REG(ucp, REG_RSP);
1372         lx_ucp->uc_sigcontext.sc_rip = LX_REG(ucp, REG_RIP);
1373         lx_ucp->uc_sigcontext.sc_eflags = LX_REG(ucp, REG_RFL);
1374         lx_ucp->uc_sigcontext.sc_cs = LX_REG(ucp, REG_CS);
1375         lx_ucp->uc_sigcontext.sc_gs = LX_REG(ucp, REG_GS);
1376         lx_ucp->uc_sigcontext.sc_fs = LX_REG(ucp, REG_FS);
1377         lx_ucp->uc_sigcontext.sc_pad0 = LX_REG(ucp, REG_SS);
1378         lx_ucp->uc_sigcontext.sc_err = LX_REG(ucp, REG_ERR);
1379         lx_ucp->uc_sigcontext.sc_trapno = LX_REG(ucp, REG_TRAPNO);
1380 
1381 #else /* is _ILP32 */
1382         /*
1383          * General registers copy across as-is, except Linux expects that
1384          * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a
1385          * signal.
1386          */
1387         bcopy(&ucp->uc_mcontext, &lx_ucp->uc_sigcontext, sizeof (gregset_t));
1388         lx_ucp->uc_sigcontext.sc_esp = lx_ucp->uc_sigcontext.sc_esp_at_signal;
1389 #endif
1390 
1391         /*
1392          * cr2 contains the faulting address, which Linux only sets for a
1393          * a segmentation fault.
1394          */
1395         lx_ucp->uc_sigcontext.sc_cr2 = ((lx_sig == LX_SIGSEGV) && (sip)) ?
1396             (uintptr_t)sip->si_addr : 0;
1397 
1398         /*
1399          * This should only return an error if the signum is invalid but that
1400          * also gets converted into a LX_SIGKILL by this function.
1401          */
1402         if (sip != NULL)
1403                 (void) stol_siginfo(sip, &lx_ssp->si);
1404         else
1405                 bzero(&lx_ssp->si, sizeof (lx_siginfo_t));
1406 
1407         /* convert FP regs if present */
1408         if (ucp->uc_flags & UC_FPU) {
1409                 /*
1410                  * Copy FP regs to the appropriate place in the the lx_sigstack
1411                  * structure.
1412                  */
1413                 stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ssp->fpstate);
1414                 lx_ucp->uc_sigcontext.sc_fpstate = &lx_ssp->fpstate;
1415         } else {
1416                 lx_ucp->uc_sigcontext.sc_fpstate = NULL;
1417         }
1418 
1419 #if defined(_ILP32)
1420         /*
1421          * Believe it or not, gdb wants to SEE the sigreturn code on the
1422          * top of the stack to determine whether the stack frame belongs to
1423          * a signal handler, even though this code is not actually called.
1424          *
1425          * You can't make this stuff up.
1426          */
1427         bcopy((void *)lx_rt_sigreturn_tramp, lx_ssp->trampoline,
1428             sizeof (lx_ssp->trampoline));
1429 #endif
1430 }
1431 
1432 /*
1433  * This is the interposition handler for Linux signals.
1434  */
1435 static void
1436 lx_call_user_handler(int sig, siginfo_t *sip, void *p)
1437 {
1438         void (*user_handler)();
1439         void (*stk_builder)();
1440         struct lx_sigaction *lxsap;
1441         ucontext_t *ucp = (ucontext_t *)p;
1442         size_t stksize;
1443         int lx_sig;
1444 
1445         /*
1446          * If Illumos signal has no Linux equivalent, effectively ignore it.
1447          */
1448         if ((lx_sig = stol_signo[sig]) == -1) {
1449                 lx_unsupported("caught Illumos signal %d, no Linux equivalent",
1450                     sig);
1451                 return;
1452         }
1453 
1454         lx_debug("interpose caught Illumos signal %d, translating to Linux "
1455             "signal %d", sig, lx_sig);
1456 
1457         lxsap = &lx_sighandlers.lx_sa[lx_sig];
1458         lx_debug("lxsap @ 0x%p", lxsap);
1459 
1460         if ((sig == SIGPWR) && (lxsap->lxsa_handler == SIG_DFL)) {
1461                 /*
1462                  * Linux SIG_DFL for SIGPWR is to terminate. The lx wait
1463                  * emulation will translate SIGPWR to LX_SIGPWR.
1464                  */
1465                 (void) syscall(SYS_brand, B_EXIT_AS_SIG, SIGPWR);
1466                 /* This should never return */
1467                 assert(0);
1468         }
1469 
1470         if (lxsap->lxsa_handler == SIG_DFL || lxsap->lxsa_handler == SIG_IGN)
1471                 lx_err_fatal("lxsa_handler set to %s?  How?!?!?",
1472                     (lxsap->lxsa_handler == SIG_DFL) ? "SIG_DFL" : "SIG_IGN");
1473 
1474 #if defined(_LP64)
1475         stksize = sizeof (struct lx_sigstack);
1476         stk_builder = lx_build_signal_frame;
1477 #else
1478         if (lxsap->lxsa_flags & LX_SA_SIGINFO) {
1479                 stksize = sizeof (struct lx_sigstack);
1480                 stk_builder = lx_build_signal_frame;
1481         } else  {
1482                 stksize = sizeof (struct lx_oldsigstack);
1483                 stk_builder = lx_build_old_signal_frame;
1484         }
1485 #endif
1486 
1487         user_handler = lxsap->lxsa_handler;
1488 
1489         lx_debug("delivering %d (lx %d) to handler at 0x%p", sig, lx_sig,
1490             lxsap->lxsa_handler);
1491 
1492         if (lxsap->lxsa_flags & LX_SA_RESETHAND)
1493                 lxsap->lxsa_handler = SIG_DFL;
1494 
1495         lx_sigdeliver(lx_sig, sip, ucp, stksize, stk_builder, user_handler,
1496             lxsap);
1497 
1498         /*
1499          * We need to handle restarting system calls if requested by the
1500          * program for this signal type:
1501          */
1502         if (lxsap->lxsa_flags & LX_SA_RESTART) {
1503                 uintptr_t flags = (uintptr_t)ucp->uc_brand_data[0];
1504                 long ret = (long)LX_REG(ucp, REG_R0);
1505                 boolean_t interrupted = (ret == -lx_errno(EINTR, -1));
1506 
1507                 /*
1508                  * If the system call returned EINTR, and the system
1509                  * call handler set "br_syscall_restart" when returning,
1510                  * we modify the context to try the system call again
1511                  * when we return from this signal handler.
1512                  */
1513                 if ((flags & LX_UC_RESTART_SYSCALL) && interrupted) {
1514                         int syscall_num = (int)(uintptr_t)ucp->uc_brand_data[2];
1515 
1516                         lx_debug("restarting interrupted system call %d",
1517                             syscall_num);
1518 
1519                         /*
1520                          * Both the "int 0x80" and the "syscall" instruction
1521                          * are two bytes long.  Wind the program counter back
1522                          * to the start of this instruction.
1523                          *
1524                          * The system call we interrupted is preserved in the
1525                          * brand-specific data in the ucontext_t when the
1526                          * LX_UC_RESTART_SYSCALL flag is set.  This is
1527                          * analogous to the "orig_[er]ax" field in the Linux
1528                          * "user_regs_struct".
1529                          */
1530                         LX_REG(ucp, REG_PC) -= 2;
1531                         LX_REG(ucp, REG_R0) = syscall_num;
1532                 }
1533         }
1534 }
1535 
1536 /*
1537  * The "lx_sigdeliver()" function is responsible for constructing the emulated
1538  * signal delivery frame on the brand stack for this LWP.  A context is saved
1539  * on the stack which will be used by the "sigreturn(2)" family of emulated
1540  * system calls to get us back here after the Linux signal handler returns.
1541  * This function is modelled on the in-kernel "sendsig()" signal delivery
1542  * mechanism.
1543  */
1544 void
1545 lx_sigdeliver(int lx_sig, siginfo_t *sip, ucontext_t *ucp, size_t stacksz,
1546     void (*stack_builder)(), void (*user_handler)(),
1547     struct lx_sigaction *lxsap)
1548 {
1549         lx_sigbackup_t sigbackup;
1550         ucontext_t uc;
1551         lx_tsd_t *lxtsd = lx_get_tsd();
1552         int totsz = 0;
1553         uintptr_t flags;
1554         uintptr_t hargs[3];
1555         /*
1556          * These variables must be "volatile", as they are modified after the
1557          * getcontext() stores the register state:
1558          */
1559         volatile boolean_t signal_delivered = B_FALSE;
1560         volatile uintptr_t lxfp;
1561         volatile uintptr_t old_tsd_sp;
1562         volatile int newstack;
1563 
1564         /*
1565          * This function involves modifying the Linux process stack for this
1566          * thread.  To do so without corruption requires us to exclude other
1567          * signal handlers (or emulated system calls called from within those
1568          * handlers) from running while we reserve space on that stack.  We
1569          * defer the execution of further instances of lx_call_user_handler()
1570          * until we have completed this operation.
1571          */
1572         _sigoff();
1573 
1574         /*
1575          * Clear register arguments vector.
1576          */
1577         bzero(hargs, sizeof (hargs));
1578 
1579         /*
1580          * We save a context here so that we can be returned later to complete
1581          * handling the signal.
1582          */
1583         lx_debug("lx_sigdeliver: STORING RETURN CONTEXT @ %p\n", &uc);
1584         assert(getcontext(&uc) == 0);
1585         lx_debug("lx_sigdeliver: RETURN CONTEXT %p LINK %p FLAGS %lx\n",
1586             &uc, uc.uc_link, uc.uc_flags);
1587         if (signal_delivered) {
1588                 /*
1589                  * If the "signal_delivered" flag is set, we are returned here
1590                  * via setcontext() as called by the emulated Linux signal
1591                  * return system call.
1592                  */
1593                 lx_debug("lx_sigdeliver: WE ARE BACK, VIA UC @ %p!\n", &uc);
1594                 goto after_signal_handler;
1595         }
1596         signal_delivered = B_TRUE;
1597 
1598         /*
1599          * Preserve the current tsd value of the Linux process stack pointer,
1600          * even if it is zero.  We will restore it when we are returned here
1601          * via setcontext() after the Linux process has completed execution of
1602          * its signal handler.
1603          */
1604         old_tsd_sp = lxtsd->lxtsd_lx_sp;
1605 
1606         /*
1607          * Figure out whether we will be handling this signal on an alternate
1608          * stack specified by the user.
1609          */
1610         newstack = (lxsap->lxsa_flags & LX_SA_ONSTACK) &&
1611             !(lxtsd->lxtsd_sigaltstack.ss_flags & (LX_SS_ONSTACK |
1612             LX_SS_DISABLE));
1613 
1614         /*
1615          * Find the first unused region of the Linux process stack, where
1616          * we will assemble our signal delivery frame.
1617          */
1618         flags = (uintptr_t)ucp->uc_brand_data[0];
1619         if (newstack) {
1620                 /*
1621                  * We are moving to the user-provided alternate signal
1622                  * stack.
1623                  */
1624                 lxfp = SA((uintptr_t)lxtsd->lxtsd_sigaltstack.ss_sp) +
1625                     SA(lxtsd->lxtsd_sigaltstack.ss_size) - STACK_ALIGN;
1626                 lx_debug("lx_sigdeliver: moving to ALTSTACK sp %p\n", lxfp);
1627                 LX_SIGNAL_ALTSTACK_ENABLE(lxfp);
1628         } else if (flags & LX_UC_STACK_BRAND) {
1629                 /*
1630                  * We interrupted the Linux process to take this signal.  The
1631                  * stack pointer is the one saved in this context.
1632                  */
1633                 lxfp = LX_REG(ucp, REG_SP);
1634         } else {
1635                 /*
1636                  * We interrupted a native (emulation) routine, so we must get
1637                  * the current stack pointer from either the tsd (if one is
1638                  * stored there) or via the context chain.
1639                  *
1640                  */
1641                 lxfp = lx_find_brand_sp();
1642                 if (lxtsd->lxtsd_lx_sp != 0) {
1643                         /*
1644                          * We must also make room for the possibility of nested
1645                          * signal delivery -- we may be pre-empting the
1646                          * in-progress handling of another signal.
1647                          *
1648                          * Note that if we were already on the alternate stack,
1649                          * any emulated Linux system calls would be betwixt
1650                          * that original signal frame and this new one on the
1651                          * one contiguous stack, so this logic holds either
1652                          * way:
1653                          */
1654                         lxfp = MIN(lxtsd->lxtsd_lx_sp, lxfp);
1655                 }
1656         }
1657 
1658         /*
1659          * Account for a reserved stack region (for amd64, this is 128 bytes),
1660          * and align the stack:
1661          */
1662         lxfp -= STACK_RESERVE;
1663         lxfp &= ~(STACK_ALIGN - 1);
1664 
1665         /*
1666          * Allocate space on the Linux process stack for our delivery frame,
1667          * including:
1668          *
1669          *   ----------------------------------------------------- old %sp
1670          *   - lx_sigdeliver_frame_t
1671          *   - (ucontext_t pointers and stack magic)
1672          *   -----------------------------------------------------
1673          *   - (amd64-only 8-byte alignment gap)
1674          *   -----------------------------------------------------
1675          *   - frame of size "stacksz" from the stack builder
1676          *   ----------------------------------------------------- new %sp
1677          */
1678 #if defined(_LP64)
1679         /*
1680          * The AMD64 ABI requires us to align the stack such that when the
1681          * called function pushes the base pointer, the stack is 16 byte
1682          * aligned.  The stack must, therefore, be 8- but _not_ 16-byte
1683          * aligned.
1684          */
1685 #if (STACK_ALIGN != 16) || (STACK_ENTRY_ALIGN != 8)
1686 #error "lx_sigdeliver() did not find expected stack alignment"
1687 #endif
1688         totsz = SA(sizeof (lx_sigdeliver_frame_t)) + SA(stacksz) + 8;
1689         assert((totsz & (STACK_ENTRY_ALIGN - 1)) == 0);
1690         assert((totsz & (STACK_ALIGN - 1)) == 8);
1691 #else
1692         totsz = SA(sizeof (lx_sigdeliver_frame_t)) + SA(stacksz);
1693         assert((totsz & (STACK_ALIGN - 1)) == 0);
1694 #endif
1695 
1696         /*
1697          * Copy our return frame into place:
1698          */
1699         lxfp -= SA(sizeof (lx_sigdeliver_frame_t));
1700         lx_debug("lx_sigdeliver: lx_sigdeliver_frame_t @ %p\n", lxfp);
1701         {
1702                 lx_sigdeliver_frame_t frm;
1703 
1704                 frm.lxsdf_magic = LX_SIGRT_MAGIC;
1705                 frm.lxsdf_retucp = &uc;
1706                 frm.lxsdf_sigucp = ucp;
1707                 frm.lxsdf_sigbackup = &sigbackup;
1708 
1709                 lx_debug("lx_sigdeliver: retucp %p sigucp %p\n",
1710                     frm.lxsdf_retucp, frm.lxsdf_sigucp);
1711 
1712                 if (uucopy(&frm, (void *)lxfp, sizeof (frm)) != 0) {
1713                         /*
1714                          * We could not modify the stack of the emulated Linux
1715                          * program.  Act like the kernel and terminate the
1716                          * program with a segmentation violation.
1717                          */
1718                         (void) syscall(SYS_brand, B_EXIT_AS_SIG, SIGSEGV);
1719                 }
1720 
1721                 LX_SIGNAL_DELIVERY_FRAME_CREATE((void *)lxfp);
1722 
1723                 /*
1724                  * Populate a backup copy of signal linkage to use in case
1725                  * the Linux program completely destroys (or relocates) the
1726                  * delivery frame.
1727                  *
1728                  * This is necessary for programs that have flown so far off
1729                  * the architectural rails that they believe it is
1730                  * acceptable to make assumptions about the precise size and
1731                  * layout of the signal handling frame assembled by the
1732                  * kernel.
1733                  */
1734                 sigbackup.lxsb_retucp = frm.lxsdf_retucp;
1735                 sigbackup.lxsb_sigucp = frm.lxsdf_sigucp;
1736                 sigbackup.lxsb_sigdeliver_frame = lxfp;
1737                 sigbackup.lxsb_previous = lxtsd->lxtsd_sigbackup;
1738                 lxtsd->lxtsd_sigbackup = &sigbackup;
1739 
1740                 lx_debug("lx_sigdeliver: installed sigbackup %p; prev %p\n",
1741                     &sigbackup, sigbackup.lxsb_previous);
1742         }
1743 
1744         /*
1745          * Build the Linux signal handling frame:
1746          */
1747 #if defined(_LP64)
1748         lxfp -= SA(stacksz) + 8;
1749 #else
1750         lxfp -= SA(stacksz);
1751 #endif
1752         lx_debug("lx_sigdeliver: Linux sig frame @ %p\n", lxfp);
1753         stack_builder(lx_sig, sip, ucp, lxfp, hargs);
1754 
1755         /*
1756          * Record our reservation so that any nested signal handlers
1757          * can see it.
1758          */
1759         lx_debug("lx_sigdeliver: Linux tsd sp %p -> %p\n", lxtsd->lxtsd_lx_sp,
1760             lxfp);
1761         lxtsd->lxtsd_lx_sp = lxfp;
1762 
1763         if (newstack) {
1764                 lxtsd->lxtsd_sigaltstack.ss_flags |= LX_SS_ONSTACK;
1765         }
1766 
1767         LX_SIGDELIVER(lx_sig, lxsap, (void *)lxfp);
1768 
1769         /*
1770          * Re-enable signal delivery.  If a signal was queued while we were
1771          * in the critical section, it will be delivered immediately.
1772          */
1773         _sigon();
1774 
1775         /*
1776          * Pass control to the Linux signal handler:
1777          */
1778         lx_debug("lx_sigdeliver: JUMPING TO LINUX (sig %d sp %p eip %p)\n",
1779             lx_sig, lxfp, user_handler);
1780         {
1781                 ucontext_t jump_uc;
1782 
1783                 bcopy(lx_find_brand_uc(), &jump_uc, sizeof (jump_uc));
1784 
1785                 /*
1786                  * We want to load the general registers from this context, and
1787                  * switch to the BRAND stack.  We do _not_ want to restore the
1788                  * uc_link value from this synthetic context, as that would
1789                  * break the signal handling context chain.
1790                  */
1791                 jump_uc.uc_flags = UC_CPU;
1792                 jump_uc.uc_brand_data[0] = (void *)(LX_UC_STACK_BRAND |
1793                     LX_UC_IGNORE_LINK);
1794 
1795                 LX_REG(&jump_uc, REG_FP) = 0;
1796                 LX_REG(&jump_uc, REG_SP) = lxfp;
1797                 LX_REG(&jump_uc, REG_PC) = (uintptr_t)user_handler;
1798 
1799 #if defined(_LP64)
1800                 /*
1801                  * Pass signal handler arguments by registers on AMD64.
1802                  */
1803                 LX_REG(&jump_uc, REG_RDI) = hargs[0];
1804                 LX_REG(&jump_uc, REG_RSI) = hargs[1];
1805                 LX_REG(&jump_uc, REG_RDX) = hargs[2];
1806 #endif
1807 
1808                 if (syscall(SYS_brand, B_JUMP_TO_LINUX, &jump_uc) == -1) {
1809                         lx_err_fatal("B_JUMP_TO_LINUX failed: %s",
1810                             strerror(errno));
1811                 }
1812         }
1813 
1814         assert(0);
1815 
1816 after_signal_handler:
1817         /*
1818          * Ensure all nested signal handlers have completed correctly
1819          * and then remove our stack reservation.
1820          */
1821         _sigoff();
1822         LX_SIGNAL_POST_HANDLER(lxfp, old_tsd_sp);
1823         assert(lxtsd->lxtsd_lx_sp == lxfp);
1824         lx_debug("lx_sigdeliver: after; Linux tsd sp %p -> %p\n", lxfp,
1825             old_tsd_sp);
1826         lxtsd->lxtsd_lx_sp = old_tsd_sp;
1827         if (newstack) {
1828                 LX_SIGNAL_ALTSTACK_DISABLE();
1829                 lx_debug("lx_sigdeliver: disabling ALTSTACK sp %p\n", lxfp);
1830                 lxtsd->lxtsd_sigaltstack.ss_flags &= ~LX_SS_ONSTACK;
1831         }
1832         /*
1833          * Restore backup signal tracking chain pointer to previous value:
1834          */
1835         if (lxtsd->lxtsd_sigbackup != NULL) {
1836                 lx_sigbackup_t *bprev = lxtsd->lxtsd_sigbackup->lxsb_previous;
1837 
1838                 lx_debug("lx_sigdeliver: restoring sigbackup %p to %p\n",
1839                     lxtsd->lxtsd_sigbackup, bprev);
1840 
1841                 lxtsd->lxtsd_sigbackup = bprev;
1842         }
1843         _sigon();
1844 
1845         /*
1846          * Here we return to libc so that it may clean up and restore the
1847          * context originally interrupted by this signal.
1848          */
1849 }
1850 
1851 /*
1852  * Common routine to modify sigaction characteristics of a thread.
1853  *
1854  * We shouldn't need any special locking code here as we actually use our copy
1855  * of libc's sigaction() to do all the real work, so its thread locking should
1856  * take care of any issues for us.
1857  */
1858 static int
1859 lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp,
1860     struct lx_sigaction *olxsp)
1861 {
1862         struct lx_sigaction *lxsap;
1863         struct sigaction sa;
1864 
1865         if (lx_sig <= 0 || lx_sig > LX_NSIG)
1866                 return (-EINVAL);
1867 
1868         lxsap = &lx_sighandlers.lx_sa[lx_sig];
1869         lx_debug("&lx_sighandlers.lx_sa[%d] = 0x%p", lx_sig, lxsap);
1870 
1871         if ((olxsp != NULL) &&
1872             ((uucopy(lxsap, olxsp, sizeof (struct lx_sigaction))) != 0))
1873                 return (-errno);
1874 
1875         if (lxsp != NULL) {
1876                 int err, sig;
1877                 struct lx_sigaction lxsa;
1878                 sigset_t new_set, oset;
1879 
1880                 if (uucopy(lxsp, &lxsa, sizeof (struct lx_sigaction)) != 0)
1881                         return (-errno);
1882 
1883                 if ((sig = ltos_signo[lx_sig]) != -1) {
1884                         if (lx_no_abort_handler != 0) {
1885                                 /*
1886                                  * If LX_NO_ABORT_HANDLER has been set, we will
1887                                  * not allow the emulated program to do
1888                                  * anything hamfisted with SIGSEGV or SIGABRT
1889                                  * signals.
1890                                  */
1891                                 if (sig == SIGSEGV || sig == SIGABRT) {
1892                                         return (0);
1893                                 }
1894                         }
1895 
1896                         /*
1897                          * Block this signal while messing with its dispostion
1898                          */
1899                         (void) sigemptyset(&new_set);
1900                         (void) sigaddset(&new_set, sig);
1901 
1902                         if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0) {
1903                                 err = errno;
1904                                 lx_debug("unable to block signal %d: %s", sig,
1905                                     strerror(err));
1906                                 return (-err);
1907                         }
1908 
1909                         /*
1910                          * We don't really need the old signal disposition at
1911                          * this point, but this weeds out signals that would
1912                          * cause sigaction() to return an error before we change
1913                          * anything other than the current signal mask.
1914                          */
1915                         if (sigaction(sig, NULL, &sa) < 0) {
1916                                 err = errno;
1917                                 lx_debug("sigaction() to get old "
1918                                     "disposition for signal %d failed: "
1919                                     "%s", sig, strerror(err));
1920                                 (void) sigprocmask(SIG_SETMASK, &oset, NULL);
1921                                 return (-err);
1922                         }
1923 
1924                         if ((lxsa.lxsa_handler != SIG_DFL) &&
1925                             (lxsa.lxsa_handler != SIG_IGN)) {
1926                                 sa.sa_handler = lx_call_user_handler;
1927 
1928                                 /*
1929                                  * The interposition signal handler needs the
1930                                  * information provided via the SA_SIGINFO flag.
1931                                  */
1932                                 sa.sa_flags = SA_SIGINFO;
1933 
1934                                 /*
1935                                  * When translating from Linux to illumos
1936                                  * sigaction(2) flags, we explicitly do not
1937                                  * pass SA_ONSTACK to the kernel.  The
1938                                  * alternate stack for Linux signal handling is
1939                                  * handled entirely by the emulation code.
1940                                  */
1941                                 if (lxsa.lxsa_flags & LX_SA_NOCLDSTOP)
1942                                         sa.sa_flags |= SA_NOCLDSTOP;
1943                                 if (lxsa.lxsa_flags & LX_SA_NOCLDWAIT)
1944                                         sa.sa_flags |= SA_NOCLDWAIT;
1945                                 if (lxsa.lxsa_flags & LX_SA_RESTART)
1946                                         sa.sa_flags |= SA_RESTART;
1947                                 if (lxsa.lxsa_flags & LX_SA_NODEFER)
1948                                         sa.sa_flags |= SA_NODEFER;
1949 
1950                                 /*
1951                                  * RESETHAND cannot be used be passed through
1952                                  * for SIGPWR due to different default actions
1953                                  * between Linux and Illumos.
1954                                  */
1955                                 if ((sig != SIGPWR) &&
1956                                     (lxsa.lxsa_flags & LX_SA_RESETHAND))
1957                                         sa.sa_flags |= SA_RESETHAND;
1958 
1959                                 if (ltos_sigset(&lxsa.lxsa_mask,
1960                                     &sa.sa_mask) != 0) {
1961                                         err = errno;
1962                                         (void) sigprocmask(SIG_SETMASK, &oset,
1963                                             NULL);
1964                                         return (-err);
1965                                 }
1966 
1967                                 lx_debug("interposing handler @ 0x%p for "
1968                                     "signal %d (lx %d), flags 0x%x",
1969                                     lxsa.lxsa_handler, sig, lx_sig,
1970                                     lxsa.lxsa_flags);
1971 
1972                                 if (sigaction(sig, &sa, NULL) < 0) {
1973                                         err = errno;
1974                                         lx_debug("sigaction() to set new "
1975                                             "disposition for signal %d failed: "
1976                                             "%s", sig, strerror(err));
1977                                         (void) sigprocmask(SIG_SETMASK, &oset,
1978                                             NULL);
1979                                         return (-err);
1980                                 }
1981                         } else if ((sig != SIGPWR) ||
1982                             ((sig == SIGPWR) &&
1983                             (lxsa.lxsa_handler == SIG_IGN))) {
1984                                 /*
1985                                  * There's no need to interpose for SIG_DFL or
1986                                  * SIG_IGN so just call our copy of libc's
1987                                  * sigaction(), but don't allow SIG_DFL for
1988                                  * SIGPWR due to differing default actions
1989                                  * between Linux and Illumos.
1990                                  *
1991                                  * Get the previous disposition first so things
1992                                  * like sa_mask and sa_flags are preserved over
1993                                  * a transition to SIG_DFL or SIG_IGN, which is
1994                                  * what Linux expects.
1995                                  */
1996 
1997                                 sa.sa_handler = lxsa.lxsa_handler;
1998 
1999                                 if (sigaction(sig, &sa, NULL) < 0) {
2000                                         err = errno;
2001                                         lx_debug("sigaction(%d, %s) failed: %s",
2002                                             sig, ((sa.sa_handler == SIG_DFL) ?
2003                                             "SIG_DFL" : "SIG_IGN"),
2004                                             strerror(err));
2005                                         (void) sigprocmask(SIG_SETMASK, &oset,
2006                                             NULL);
2007                                         return (-err);
2008                                 }
2009                         }
2010                 } else {
2011                         lx_debug("Linux signal with no kill support "
2012                             "specified: %d", lx_sig);
2013                 }
2014 
2015                 /*
2016                  * Save the new disposition for the signal in the global
2017                  * lx_sighandlers structure.
2018                  */
2019                 bcopy(&lxsa, lxsap, sizeof (struct lx_sigaction));
2020 
2021                 /*
2022                  * Reset the signal mask to what we came in with if
2023                  * we were modifying a kill-supported signal.
2024                  */
2025                 if (sig != -1)
2026                         (void) sigprocmask(SIG_SETMASK, &oset, NULL);
2027         }
2028 
2029         return (0);
2030 }
2031 
2032 #if defined(_ILP32)
2033 /*
2034  * sigaction is only used in 32-bit code.
2035  */
2036 long
2037 lx_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp)
2038 {
2039         int val;
2040         struct lx_sigaction sa, osa;
2041         struct lx_sigaction *sap, *osap;
2042         struct lx_osigaction *osp;
2043 
2044         sap = (actp ? &sa : NULL);
2045         osap = (oactp ? &osa : NULL);
2046 
2047         /*
2048          * If we have a source pointer, convert source lxsa_mask from
2049          * lx_osigset_t to lx_sigset_t format.
2050          */
2051         if (sap) {
2052                 osp = (struct lx_osigaction *)actp;
2053                 sap->lxsa_handler = osp->lxsa_handler;
2054 
2055                 bzero(&sap->lxsa_mask, sizeof (lx_sigset_t));
2056 
2057                 for (val = 1; val <= OSIGSET_NBITS; val++)
2058                         if (osp->lxsa_mask & OSIGSET_BITSET(val))
2059                                 (void) lx_sigaddset(&sap->lxsa_mask, val);
2060 
2061                 sap->lxsa_flags = osp->lxsa_flags;
2062                 sap->lxsa_restorer = osp->lxsa_restorer;
2063         }
2064 
2065         if ((val = lx_sigaction_common(lx_sig, sap, osap)))
2066                 return (val);
2067 
2068         /*
2069          * If we have a save pointer, convert the old lxsa_mask from
2070          * lx_sigset_t to lx_osigset_t format.
2071          */
2072         if (osap) {
2073                 osp = (struct lx_osigaction *)oactp;
2074 
2075                 osp->lxsa_handler = osap->lxsa_handler;
2076 
2077                 bzero(&osp->lxsa_mask, sizeof (osp->lxsa_mask));
2078                 for (val = 1; val <= OSIGSET_NBITS; val++)
2079                         if (lx_sigismember(&osap->lxsa_mask, val))
2080                                 osp->lxsa_mask |= OSIGSET_BITSET(val);
2081 
2082                 osp->lxsa_flags = osap->lxsa_flags;
2083                 osp->lxsa_restorer = osap->lxsa_restorer;
2084         }
2085 
2086         return (0);
2087 }
2088 #endif
2089 
2090 long
2091 lx_rt_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp,
2092     uintptr_t setsize)
2093 {
2094         /*
2095          * The "new" rt_sigaction call checks the setsize
2096          * parameter.
2097          */
2098         if ((size_t)setsize != sizeof (lx_sigset_t))
2099                 return (-EINVAL);
2100 
2101         return (lx_sigaction_common(lx_sig, (struct lx_sigaction *)actp,
2102             (struct lx_sigaction *)oactp));
2103 }
2104 
2105 #if defined(_ILP32)
2106 /*
2107  * Convert signal syscall to a call to the lx_sigaction() syscall
2108  * Only used in 32-bit code.
2109  */
2110 long
2111 lx_signal(uintptr_t lx_sig, uintptr_t handler)
2112 {
2113         struct sigaction act;
2114         struct sigaction oact;
2115         int rc;
2116 
2117         /*
2118          * Use sigaction to mimic SYSV signal() behavior; glibc will
2119          * actually call sigaction(2) itself, so we're really reaching
2120          * back for signal(2) semantics here.
2121          */
2122         bzero(&act, sizeof (act));
2123         act.sa_handler = (void (*)())handler;
2124         act.sa_flags = SA_RESETHAND | SA_NODEFER;
2125 
2126         rc = lx_sigaction(lx_sig, (uintptr_t)&act, (uintptr_t)&oact);
2127         return ((rc == 0) ? ((ssize_t)oact.sa_handler) : rc);
2128 }
2129 #endif
2130 
2131 void
2132 lx_sighandlers_save(lx_sighandlers_t *saved)
2133 {
2134         bcopy(&lx_sighandlers, saved, sizeof (lx_sighandlers_t));
2135 }
2136 
2137 void
2138 lx_sighandlers_restore(lx_sighandlers_t *saved)
2139 {
2140         bcopy(saved, &lx_sighandlers, sizeof (lx_sighandlers_t));
2141 }
2142 
2143 int
2144 lx_siginit(void)
2145 {
2146         extern void set_setcontext_enforcement(int);
2147         extern void set_escaped_context_cleanup(int);
2148 
2149         struct sigaction sa;
2150         sigset_t new_set, oset;
2151         int lx_sig, sig;
2152 
2153         if (getenv("LX_NO_ABORT_HANDLER") != NULL) {
2154                 lx_no_abort_handler = 1;
2155         }
2156 
2157         /*
2158          * Block all signals possible while setting up the signal imposition
2159          * mechanism.
2160          */
2161         (void) sigfillset(&new_set);
2162 
2163         if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0)
2164                 lx_err_fatal("unable to block signals while setting up "
2165                     "imposition mechanism: %s", strerror(errno));
2166 
2167         /*
2168          * Ignore any signals that have no Linux analog so that those
2169          * signals cannot be sent to Linux processes from the global zone
2170          */
2171         for (sig = 1; sig < NSIG; sig++)
2172                 if (stol_signo[sig] < 0)
2173                         (void) sigignore(sig);
2174 
2175         /*
2176          * Mark any signals that are ignored as ignored in our interposition
2177          * handler array
2178          */
2179         for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) {
2180                 if (((sig = ltos_signo[lx_sig]) != -1) &&
2181                     (sigaction(sig, NULL, &sa) < 0))
2182                         lx_err_fatal("unable to determine previous disposition "
2183                             "for signal %d: %s", sig, strerror(errno));
2184 
2185                 if (sa.sa_handler == SIG_IGN) {
2186                         lx_debug("marking signal %d (lx %d) as SIG_IGN",
2187                             sig, lx_sig);
2188                         lx_sighandlers.lx_sa[lx_sig].lxsa_handler = SIG_IGN;
2189                 }
2190         }
2191 
2192         /*
2193          * Have our interposition handler handle SIGPWR to start with,
2194          * as it has a default action of terminating the process in Linux
2195          * but its default is to be ignored in Illumos.
2196          */
2197         (void) sigemptyset(&sa.sa_mask);
2198         sa.sa_sigaction = lx_call_user_handler;
2199         sa.sa_flags = SA_SIGINFO;
2200 
2201         if (sigaction(SIGPWR, &sa, NULL) < 0)
2202                 lx_err_fatal("sigaction(SIGPWR) failed: %s", strerror(errno));
2203 
2204         /*
2205          * Illumos' libc forces certain register values in the ucontext_t
2206          * used to restore a post-signal user context to be those Illumos
2207          * expects; however that is not what we want to happen if the signal
2208          * was taken while branded code was executing, so we must disable
2209          * that behavior.
2210          */
2211         set_setcontext_enforcement(0);
2212 
2213         /*
2214          * The illumos libc attempts to clean up dangling uc_link pointers in
2215          * signal handling contexts when libc believes us to have escaped a
2216          * signal handler incorrectly in the past.  We want to disable this
2217          * behaviour, so that the system call emulation context saved by the
2218          * kernel brand module for lx_emulate() may be part of the context
2219          * chain without itself being used for signal handling.
2220          */
2221         set_escaped_context_cleanup(0);
2222 
2223         /*
2224          * Reset the signal mask to what we came in with.
2225          */
2226         (void) sigprocmask(SIG_SETMASK, &oset, NULL);
2227 
2228         lx_debug("interposition handler setup for SIGPWR");
2229         return (0);
2230 }
2231 
2232 /*
2233  * This code strongly resembles lx_poll(), but is here to be able to take
2234  * advantage of the Linux signal helper routines.
2235  */
2236 long
2237 lx_ppoll(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, uintptr_t p5)
2238 {
2239         struct pollfd   *lfds, *sfds;
2240         nfds_t          nfds = (nfds_t)p2;
2241         timespec_t      ts, *tsp = NULL;
2242         int             fds_size, i, rval, revents;
2243         lx_sigset_t     lxsig, *lxsigp = NULL;
2244         sigset_t        sigset, *sp = NULL;
2245         rctlblk_t       *rblk;
2246 
2247         lx_debug("\tppoll(0x%p, %d, 0x%p, 0x%p, %d)", p1, p2, p3, p4, p5);
2248 
2249         if (p3 != NULL) {
2250                 if (uucopy((void *)p3, &ts, sizeof (ts)) != 0)
2251                         return (-errno);
2252 
2253                 tsp = &ts;
2254         }
2255 
2256         if (p4 != NULL) {
2257                 if (uucopy((void *)p4, &lxsig, sizeof (lxsig)) != 0)
2258                         return (-errno);
2259 
2260                 lxsigp = &lxsig;
2261                 if ((size_t)p5 != sizeof (lx_sigset_t))
2262                         return (-EINVAL);
2263 
2264                 if (lxsigp) {
2265                         if ((rval = ltos_sigset(lxsigp, &sigset)) != 0)
2266                                 return (rval);
2267 
2268                         sp = &sigset;
2269                 }
2270         }
2271 
2272         /*
2273          * Deal with the NULL fds[] case.
2274          */
2275         if (nfds == 0 || p1 == NULL) {
2276                 if ((rval = ppoll(NULL, 0, tsp, sp)) < 0)
2277                         return (-errno);
2278 
2279                 return (rval);
2280         }
2281 
2282         if (maxfd == 0) {
2283                 if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rctlblk_size())) == NULL)
2284                         return (-ENOMEM);
2285 
2286                 if (getrctl("process.max-file-descriptor", NULL, rblk,
2287                     RCTL_FIRST) == -1)
2288                         return (-EINVAL);
2289 
2290                 maxfd = rctlblk_get_value(rblk);
2291         }
2292 
2293         if (nfds > maxfd)
2294                 return (-EINVAL);
2295 
2296         /*
2297          * Note: we are assuming that the Linux and Illumos pollfd
2298          * structures are identical.  Copy in the Linux poll structure.
2299          */
2300         fds_size = sizeof (struct pollfd) * nfds;
2301         lfds = (struct pollfd *)SAFE_ALLOCA(fds_size);
2302         if (lfds == NULL)
2303                 return (-ENOMEM);
2304         if (uucopy((void *)p1, lfds, fds_size) != 0)
2305                 return (-errno);
2306 
2307         /*
2308          * The poll system call modifies the poll structures passed in
2309          * so we'll need to make an extra copy of them.
2310          */
2311         sfds = (struct pollfd *)SAFE_ALLOCA(fds_size);
2312         if (sfds == NULL)
2313                 return (-ENOMEM);
2314 
2315         /* Convert the Linux events bitmask into the Illumos equivalent. */
2316         for (i = 0; i < nfds; i++) {
2317                 /*
2318                  * If the caller is polling for an unsupported event, we
2319                  * have to bail out.
2320                  */
2321                 if (lfds[i].events & ~LX_POLL_SUPPORTED_EVENTS) {
2322                         lx_unsupported("unsupported poll events requested: "
2323                             "events=0x%x", lfds[i].events);
2324                         return (-ENOTSUP);
2325                 }
2326 
2327                 sfds[i].fd = lfds[i].fd;
2328                 sfds[i].events = lfds[i].events & LX_POLL_COMMON_EVENTS;
2329                 if (lfds[i].events & LX_POLLWRNORM)
2330                         sfds[i].events |= POLLWRNORM;
2331                 if (lfds[i].events & LX_POLLWRBAND)
2332                         sfds[i].events |= POLLWRBAND;
2333                 if (lfds[i].events & LX_POLLRDHUP)
2334                         sfds[i].events |= POLLRDHUP;
2335                 sfds[i].revents = 0;
2336         }
2337 
2338         if ((rval = ppoll(sfds, nfds, tsp, sp)) < 0)
2339                 return (-errno);
2340 
2341         /* Convert the Illumos revents bitmask into the Linux equivalent */
2342         for (i = 0; i < nfds; i++) {
2343                 revents = sfds[i].revents & LX_POLL_COMMON_EVENTS;
2344                 if (sfds[i].revents & POLLWRBAND)
2345                         revents |= LX_POLLWRBAND;
2346                 if (sfds[i].revents & POLLRDHUP)
2347                         revents |= LX_POLLRDHUP;
2348 
2349                 /*
2350                  * Be careful because on Illumos POLLOUT and POLLWRNORM
2351                  * are defined to the same values but on Linux they
2352                  * are not.
2353                  */
2354                 if (sfds[i].revents & POLLOUT) {
2355                         if ((lfds[i].events & LX_POLLOUT) == 0)
2356                                 revents &= ~LX_POLLOUT;
2357                         if (lfds[i].events & LX_POLLWRNORM)
2358                                 revents |= LX_POLLWRNORM;
2359                 }
2360 
2361                 lfds[i].revents = revents;
2362         }
2363 
2364         /* Copy out the results */
2365         if (uucopy(lfds, (void *)p1, fds_size) != 0)
2366                 return (-errno);
2367 
2368         return (rval);
2369 }
2370 
2371 /*
2372  * This code stongly resemebles lx_select(), but is here to be able to take
2373  * advantage of the Linux signal helper routines.
2374  */
2375 long
2376 lx_pselect6(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
2377         uintptr_t p5, uintptr_t p6)
2378 {
2379         int nfds = (int)p1;
2380         fd_set *rfdsp = NULL;
2381         fd_set *wfdsp = NULL;
2382         fd_set *efdsp = NULL;
2383         timespec_t ts, *tsp = NULL;
2384         int fd_set_len = howmany(nfds, 8);
2385         int r;
2386         sigset_t sigset, *sp = NULL;
2387 
2388         lx_debug("\tpselect6(%d, 0x%p, 0x%p, 0x%p, 0x%p, 0x%p)",
2389             p1, p2, p3, p4, p4, p6);
2390 
2391         if (nfds > 0) {
2392                 if (p2 != NULL) {
2393                         rfdsp = SAFE_ALLOCA(fd_set_len);
2394                         if (rfdsp == NULL)
2395                                 return (-ENOMEM);
2396                         if (uucopy((void *)p2, rfdsp, fd_set_len) != 0)
2397                                 return (-errno);
2398                 }
2399                 if (p3 != NULL) {
2400                         wfdsp = SAFE_ALLOCA(fd_set_len);
2401                         if (wfdsp == NULL)
2402                                 return (-ENOMEM);
2403                         if (uucopy((void *)p3, wfdsp, fd_set_len) != 0)
2404                                 return (-errno);
2405                 }
2406                 if (p4 != NULL) {
2407                         efdsp = SAFE_ALLOCA(fd_set_len);
2408                         if (efdsp == NULL)
2409                                 return (-ENOMEM);
2410                         if (uucopy((void *)p4, efdsp, fd_set_len) != 0)
2411                                 return (-errno);
2412                 }
2413         }
2414 
2415         if (p5 != NULL) {
2416                 if (uucopy((void *)p5, &ts, sizeof (ts)) != 0)
2417                         return (-errno);
2418 
2419                 tsp = &ts;
2420         }
2421 
2422         if (p6 != NULL) {
2423                 /*
2424                  * To force the number of arguments to be no more than six,
2425                  * Linux bundles both the sigset and the size into a structure
2426                  * that becomes the sixth argument.
2427                  */
2428                 struct {
2429                         lx_sigset_t *addr;
2430                         size_t size;
2431                 } lx_sigset;
2432 
2433                 if (uucopy((void *)p6, &lx_sigset, sizeof (lx_sigset)) != 0)
2434                         return (-errno);
2435 
2436                 /*
2437                  * Yes, that's right:  Linux forces a size to be passed only
2438                  * so it can check that it's the size of a sigset_t.
2439                  */
2440                 if (lx_sigset.size != sizeof (lx_sigset_t))
2441                         return (-EINVAL);
2442 
2443                 /*
2444                  * This is where we check if the sigset is *really* NULL.
2445                  */
2446                 if (lx_sigset.addr) {
2447                         if ((r = ltos_sigset(lx_sigset.addr, &sigset)) != 0)
2448                                 return (r);
2449 
2450                         sp = &sigset;
2451                 }
2452         }
2453 
2454 #if defined(_LP64)
2455         r = pselect(nfds, rfdsp, wfdsp, efdsp, tsp, sp);
2456 #else
2457         if (nfds >= FD_SETSIZE)
2458                 r = pselect_large_fdset(nfds, rfdsp, wfdsp, efdsp, tsp, sp);
2459         else
2460                 r = pselect(nfds, rfdsp, wfdsp, efdsp, tsp, sp);
2461 #endif
2462 
2463         if (r < 0)
2464                 return (-errno);
2465 
2466         /*
2467          * For pselect6(), we don't honor the strange Linux select() semantics
2468          * with respect to the timestruc parameter because glibc ignores it
2469          * anyway -- just copy out the fd pointers and return.
2470          */
2471         if ((rfdsp != NULL) && (uucopy(rfdsp, (void *)p2, fd_set_len) != 0))
2472                 return (-errno);
2473         if ((wfdsp != NULL) && (uucopy(wfdsp, (void *)p3, fd_set_len) != 0))
2474                 return (-errno);
2475         if ((efdsp != NULL) && (uucopy(efdsp, (void *)p4, fd_set_len) != 0))
2476                 return (-errno);
2477 
2478         return (r);
2479 }
2480 
2481 /*
2482  * The first argument is the pid (Linux tgid) to send the signal to, second
2483  * argument is the signal to send (an lx signal), and third is the siginfo_t
2484  * with extra information. We translate the code and signal only from the
2485  * siginfo_t, and leave everything else the same as it gets passed through the
2486  * signalling system. This is enough to get sigqueue working. See Linux man
2487  * page rt_sigqueueinfo(2).
2488  */
2489 long
2490 lx_rt_sigqueueinfo(uintptr_t p1, uintptr_t p2, uintptr_t p3)
2491 {
2492         pid_t tgid = (pid_t)p1;
2493         int lx_sig = (int)p2;
2494         int sig;
2495         lx_siginfo_t lx_siginfo;
2496         siginfo_t siginfo;
2497         int s_code;
2498         pid_t s_pid;
2499 
2500         if (uucopy((void *)p3, &lx_siginfo, sizeof (lx_siginfo_t)) != 0)
2501                 return (-EFAULT);
2502         s_code = ltos_sigcode(lx_siginfo.lsi_code);
2503         if (s_code == LX_SI_CODE_NOT_EXIST)
2504                 return (-EINVAL);
2505         if (lx_sig < 0 || lx_sig > LX_NSIG || (sig = ltos_signo[lx_sig]) < 0) {
2506                 return (-EINVAL);
2507         }
2508         /*
2509          * This case (when trying to kill pid 0) just has a different errno
2510          * returned in illumos than in Linux.
2511          */
2512         if (tgid == 0)
2513                 return (-ESRCH);
2514         if (lx_lpid_to_spid(tgid, &s_pid) != 0)
2515                 return (-ESRCH);
2516         if (SI_CANQUEUE(s_code)) {
2517                 return ((syscall(SYS_sigqueue, s_pid, sig,
2518                     lx_siginfo.lsi_value, s_code, 0) == -1) ?
2519                     (-errno): 0);
2520         } else {
2521                 /*
2522                  * This case is unlikely, as the main entry point is through
2523                  * sigqueue, which always has a queuable si_code.
2524                  */
2525                 siginfo.si_signo = sig;
2526                 siginfo.si_code = s_code;
2527                 siginfo.si_pid = lx_siginfo.lsi_pid;
2528                 siginfo.si_value = lx_siginfo.lsi_value;
2529                 siginfo.si_uid = lx_siginfo.lsi_uid;
2530                 return ((syscall(SYS_brand, B_HELPER_SIGQUEUE,
2531                     tgid, sig, &siginfo)) ? (-errno) : 0);
2532         }
2533 }
2534 
2535 /*
2536  * Adds an additional argument for which thread within a thread group to send
2537  * the signal to (added as the second argument).
2538  */
2539 long
2540 lx_rt_tgsigqueueinfo(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
2541 {
2542         pid_t tgid = (pid_t)p1;
2543         pid_t tid = (pid_t)p2;
2544         int lx_sig = (int)p3;
2545         int sig;
2546         lx_siginfo_t lx_siginfo;
2547         siginfo_t siginfo;
2548         int si_code;
2549 
2550         if (uucopy((void *)p4, &lx_siginfo, sizeof (lx_siginfo_t)) != 0)
2551                 return (-EFAULT);
2552         if (lx_sig < 0 || lx_sig > LX_NSIG || (sig = ltos_signo[lx_sig]) < 0) {
2553                 return (-EINVAL);
2554         }
2555         si_code = ltos_sigcode(lx_siginfo.lsi_code);
2556         if (si_code == LX_SI_CODE_NOT_EXIST)
2557                 return (-EINVAL);
2558         /*
2559          * Check for invalid tgid and tids. That appears to be only negatives
2560          * and 0 values. Everything else that doesn't exist is instead ESRCH.
2561          */
2562         if (tgid <= 0 || tid <= 0)
2563                 return (-EINVAL);
2564         siginfo.si_signo = sig;
2565         siginfo.si_code = si_code;
2566         siginfo.si_pid = lx_siginfo.lsi_pid;
2567         siginfo.si_value = lx_siginfo.lsi_value;
2568         siginfo.si_uid = lx_siginfo.lsi_uid;
2569 
2570         return ((syscall(SYS_brand, B_HELPER_TGSIGQUEUE, tgid, tid, sig,
2571             &siginfo)) ? (-errno) : 0);
2572 }
2573 
2574 long
2575 lx_signalfd(int fd, uintptr_t mask, size_t msize)
2576 {
2577         return (lx_signalfd4(fd, mask, msize, 0));
2578 }
2579 
2580 long
2581 lx_signalfd4(int fd, uintptr_t mask, size_t msize, int flags)
2582 {
2583         sigset_t s_set;
2584         int r;
2585 
2586         if (msize != sizeof (int64_t))
2587                 return (-EINVAL);
2588 
2589         if (ltos_sigset((lx_sigset_t *)mask, &s_set) != 0)
2590                 return (-errno);
2591 
2592         r = signalfd(fd, &s_set, flags);
2593 
2594         /*
2595          * signalfd(3C) may fail with ENOENT if /dev/signalfd is not available.
2596          * It is less jarring to Linux programs to tell them that internal
2597          * allocation failed than to report an error number they are not
2598          * expecting.
2599          */
2600         if (r == -1 && errno == ENOENT)
2601                 return (-ENODEV);
2602 
2603         return (r == -1 ? -errno : r);
2604 }