1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #pragma ident   "%Z%%M% %I%     %E% SMI"
  28 
  29 #include <sys/types.h>
  30 #include <sys/param.h>
  31 #include <sys/segments.h>
  32 #include <sys/lx_types.h>
  33 #include <sys/lx_brand.h>
  34 #include <sys/lx_misc.h>
  35 #include <sys/lx_debug.h>
  36 #include <sys/lx_signal.h>
  37 #include <sys/lx_syscall.h>
  38 #include <sys/lx_thread.h>
  39 #include <assert.h>
  40 #include <errno.h>
  41 #include <signal.h>
  42 #include <stdlib.h>
  43 #include <string.h>
  44 #include <strings.h>
  45 #include <thread.h>
  46 #include <ucontext.h>
  47 #include <unistd.h>
  48 #include <stdio.h>
  49 #include <libintl.h>
  50 #include <ieeefp.h>
  51 
  52 /*
  53  * Delivering signals to a Linux process is complicated by differences in
  54  * signal numbering, stack structure and contents, and the action taken when a
  55  * signal handler exits.  In addition, many signal-related structures, such as
  56  * sigset_ts, vary between Solaris and Linux.
  57  *
  58  * To support user-level signal handlers, the brand uses a double layer of
  59  * indirection to process and deliver signals to branded threads.
  60  *
  61  * When a Linux process sends a signal using the kill(2) system call, we must
  62  * translate the signal into the Solaris equivalent before handing control off
  63  * to the standard signalling mechanism.  When a signal is delivered to a Linux
  64  * process, we translate the signal number from Solaris to back to Linux.
  65  * Translating signals both at generation and delivery time ensures both that
  66  * Solaris signals are sent properly to Linux applications and that signals'
  67  * default behavior works as expected.
  68  *
  69  * In a normal Solaris process, signal delivery is interposed on for any thread
  70  * registering a signal handler by libc. Libc needs to do various bits of magic
  71  * to provide thread-safe critical regions, so it registers its own handler,
  72  * named sigacthandler(), using the sigaction(2) system call. When a signal is
  73  * received, sigacthandler() is called, and after some processing, libc turns
  74  * around and calls the user's signal handler via a routine named
  75  * call_user_handler().
  76  *
  77  * Adding a Linux branded thread to the mix complicates things somewhat.
  78  *
  79  * First, when a thread receives a signal, it may be running with a Linux value
  80  * in the x86 %gs segment register as opposed to the value Solaris threads
  81  * expect; if control were passed directly to Solaris code, such as libc's
  82  * sigacthandler(), that code would experience a segmentation fault the first
  83  * time it tried to dereference a memory location using %gs.
  84  *
  85  * Second, the signal number translation referenced above must take place.
  86  * Further, as was the case with Solaris libc, before the Linux signal handler
  87  * is called, the value of the %gs segment register MUST be restored to the
  88  * value Linux code expects.
  89  *
  90  * This need to translate signal numbers and manipulate the %gs register means
  91  * that while with standard Solaris libc, following a signal from generation to
  92  * delivery looks something like:
  93  *
  94  *      kernel ->
  95  *          sigacthandler() ->
  96  *              call_user_handler() ->
  97  *                  user signal handler
  98  *
  99  * while for the brand's Linux threads, this would look like:
 100  *
 101  *      kernel ->
 102  *          lx_sigacthandler() ->
 103  *              sigacthandler() ->
 104  *                  call_user_handler() ->
 105  *                      lx_call_user_handler() ->
 106  *                          Linux user signal handler
 107  *
 108  * The new addtions are:
 109  *
 110  *      lx_sigacthandler
 111  *      ================
 112  *      This routine is responsible for setting the %gs segment register to the
 113  *      value Solaris code expects, and jumping to Solaris' libc signal
 114  *      interposition handler, sigacthandler().
 115  *
 116  *      lx_call_user_handler
 117  *      ====================
 118  *      This routine is responsible for translating Solaris signal numbers to
 119  *      their Linux equivalents, building a Linux signal stack based on the
 120  *      information Solaris has provided, and passing the stack to the
 121  *      registered Linux signal handler. It is, in effect, the Linux thread
 122  *      equivalent to libc's call_user_handler().
 123  *
 124  * Installing lx_sigacthandler() is a bit tricky, as normally libc's
 125  * sigacthandler() routine is hidden from user programs. To facilitate this, a
 126  * new private function was added to libc, setsigaction():
 127  *
 128  *      void setsigacthandler(void (*new_handler)(int, siginfo_t *, void *),
 129  *          void (**old_handler)(int, siginfo_t *, void *))
 130  *
 131  * The routine works by modifying the per-thread data structure libc already
 132  * keeps that keeps track of the address of its own interposition handler with
 133  * the address passed in; the old handler's address is set in the pointer
 134  * pointed to by the second argument, if it is non-NULL, mimicking the behavior
 135  * of sigaction() itself.  Once setsigacthandler() has been executed, all
 136  * future branded threads this thread may create will automatically have the
 137  * proper interposition handler installed as the result of a normal
 138  * sigaction() call.
 139  *
 140  * Note that none of this interposition is necessary unless a Linux thread
 141  * registers a user signal handler, as the default action for all signals is the
 142  * same between Solaris and Linux save for one signal, SIGPWR.  For this reason,
 143  * the brand ALWAYS installs its own internal signal handler for SIGPWR that
 144  * translates the action to the Linux default, to terminate the process.
 145  * (Solaris' default action is to ignore SIGPWR.)
 146  *
 147  * It is also important to note that when signals are not translated, the brand
 148  * relies upon code interposing upon the wait(2) system call to translate
 149  * signals to their proper values for any Linux threads retrieving the status
 150  * of others.  So while the Solaris signal number for a particular signal is set
 151  * in a process' data structures (and would be returned as the result of say,
 152  * WTERMSIG()), the brand's interposiiton upon wait(2) is responsible for
 153  * translating the value WTERMSIG() would return from a Solaris signal number
 154  * to the appropriate Linux value.
 155  *
 156  * The process of returning to an interrupted thread of execution from a user
 157  * signal handler is entirely different between Solaris and Linux.  While
 158  * Solaris generally expects to set the context to the interrupted one on a
 159  * normal return from a signal handler, in the normal case Linux instead calls
 160  * code that calls a specific Linux system call, sigreturn(2).  Thus when a
 161  * Linux signal handler completes execution, instead of returning through what
 162  * would in libc be a call to setcontext(2), the sigreturn(2) Linux system call
 163  * is responsible for accomplishing much the same thing.
 164  *
 165  * This trampoline code looks something like this:
 166  *
 167  *      pop     %eax
 168  *      mov     LX_SYS_rt_sigreturn, %eax
 169  *      int     $0x80
 170  *
 171  * so when the Linux user signal handler is eventually called, the stack looks
 172  * like this (in the case of an "lx_sigstack" stack:
 173  *
 174  *      =========================================================
 175  *      | Pointer to actual trampoline code (in code segment)   |
 176  *      =========================================================
 177  *      | Linux signal number                                   |
 178  *      =========================================================
 179  *      | Pointer to Linux siginfo_t (or NULL)                  |
 180  *      =========================================================
 181  *      | Pointer to Linux ucontext_t (or NULL)                 |
 182  *      =========================================================
 183  *      | Linux siginfo_t                                       |
 184  *      =========================================================
 185  *      | Linux ucontext_t                                      |
 186  *      =========================================================
 187  *      | Linux struct _fpstate                                 |
 188  *      =========================================================
 189  *      | Trampoline code (marker for gdb, not really executed) |
 190  *      =========================================================
 191  *
 192  * The brand takes the approach of intercepting the Linux sigreturn(2) system
 193  * call in order to turn it into the return through the libc call stack that
 194  * Solaris expects. This is done by the lx_sigreturn() and lx_rt_sigreturn()
 195  * routines, which remove the Linux signal frame from the stack and pass the
 196  * resulting stack pointer to another routine, lx_sigreturn_tolibc(), which
 197  * makes libc believe the user signal handler it had called returned.
 198  *
 199  * (Note that the trampoline code actually lives in a proper executable segment
 200  * and not on the stack, but gdb checks for the exact code sequence of the
 201  * trampoline code on the stack to determine whether it is in a signal stack
 202  * frame or not.  Really.)
 203  *
 204  * When control then returns to libc's call_user_handler() routine, a
 205  * setcontext(2) will be done that (in most cases) returns the thread executing
 206  * the code back to the location originally interrupted by receipt of the
 207  * signal.
 208  */
 209 
 210 /*
 211  * Two flavors of Linux signal stacks:
 212  *
 213  * lx_sigstack - used for "modern" signal handlers, in practice those
 214  *               that have the sigaction(2) flag SA_SIGINFO set
 215  *
 216  * lx_oldsigstack - used for legacy signal handlers, those that do not have
 217  *                  the sigaction(2) flag SA_SIGINFO set or that were setup via
 218  *                  the signal(2) call.
 219  *
 220  * NOTE: Since these structures will be placed on the stack and stack math will
 221  *       be done with their sizes, they must be word aligned in size (32 bits)
 222  *       so the stack remains word aligned per the i386 ABI.
 223  */
 224 struct lx_sigstack {
 225         void (*retaddr)();      /* address of real lx_rt_sigreturn code */
 226         int sig;                /* signal number */
 227         lx_siginfo_t *sip;      /* points to "si" if valid, NULL if not */
 228         lx_ucontext_t *ucp;     /* points to "uc" if valid, NULL if not */
 229         lx_siginfo_t si;        /* saved signal information */
 230         lx_ucontext_t uc;       /* saved user context */
 231         lx_fpstate_t fpstate;   /* saved FP state */
 232         char trampoline[8];     /* code for trampoline to lx_rt_sigreturn() */
 233 };
 234 
 235 struct lx_oldsigstack {
 236         void (*retaddr)();      /* address of real lx_sigreturn code */
 237         int sig;                /* signal number */
 238         lx_sigcontext_t sigc;   /* saved user context */
 239         lx_fpstate_t fpstate;   /* saved FP state */
 240         int sig_extra;          /* signal mask for signals [32 .. NSIG - 1] */
 241         char trampoline[8];     /* code for trampoline to lx_sigreturn() */
 242 };
 243 
 244 /*
 245  * libc_sigacthandler is set to the address of the libc signal interposition
 246  * routine, sigacthandler().
 247  */
 248 void (*libc_sigacthandler)(int, siginfo_t *, void*);
 249 
 250 /*
 251  * The lx_sighandlers structure needs to be a global due to the semantics of
 252  * clone().
 253  *
 254  * If CLONE_SIGHAND is set, the calling process and child share signal
 255  * handlers, and if either calls sigaction(2) it should change the behavior
 256  * in the other thread.  Each thread does, however, have its own signal mask
 257  * and set of pending signals.
 258  *
 259  * If CLONE_SIGHAND is not set, the child process should inherit a copy of
 260  * the signal handlers at the time of the clone() but later calls to
 261  * sigaction(2) should only affect the individual thread calling it.
 262  *
 263  * This maps perfectly to a thr_create(3C) thread semantic in the first
 264  * case and a fork(2)-type semantic in the second case.  By making
 265  * lx_sighandlers global, we automatically get the correct behavior.
 266  */
 267 static lx_sighandlers_t lx_sighandlers;
 268 
 269 /*
 270  * stol_stack() and ltos_stack() convert between Solaris and Linux stack_t
 271  * structures.
 272  *
 273  * These routines are needed because although the two structures have the same
 274  * contents, their contents are declared in a different order, so the content
 275  * of the structures cannot be copied with a simple bcopy().
 276  */
 277 static void
 278 stol_stack(stack_t *fr, lx_stack_t *to)
 279 {
 280         to->ss_sp = fr->ss_sp;
 281         to->ss_flags = fr->ss_flags;
 282         to->ss_size = fr->ss_size;
 283 }
 284 
 285 static void
 286 ltos_stack(lx_stack_t *fr, stack_t *to)
 287 {
 288         to->ss_sp = fr->ss_sp;
 289         to->ss_flags = fr->ss_flags;
 290         to->ss_size = fr->ss_size;
 291 }
 292 
 293 static int
 294 ltos_sigset(lx_sigset_t *lx_sigsetp, sigset_t *s_sigsetp)
 295 {
 296         lx_sigset_t l;
 297         int lx_sig, sig;
 298 
 299         if (uucopy(lx_sigsetp, &l, sizeof (lx_sigset_t)) != 0)
 300                 return (-errno);
 301 
 302         (void) sigemptyset(s_sigsetp);
 303 
 304         for (lx_sig = 1; lx_sig < LX_NSIG; lx_sig++) {
 305                 if (lx_sigismember(&l, lx_sig) &&
 306                     ((sig = ltos_signo[lx_sig]) > 0))
 307                         (void) sigaddset(s_sigsetp, sig);
 308         }
 309 
 310         return (0);
 311 }
 312 
 313 static int
 314 stol_sigset(sigset_t *s_sigsetp, lx_sigset_t *lx_sigsetp)
 315 {
 316         lx_sigset_t l;
 317         int sig, lx_sig;
 318 
 319         bzero(&l, sizeof (lx_sigset_t));
 320 
 321         for (sig = 1; sig < NSIG; sig++) {
 322                 if (sigismember(s_sigsetp, sig) &&
 323                     ((lx_sig = stol_signo[sig]) > 0))
 324                         lx_sigaddset(&l, lx_sig);
 325         }
 326 
 327         return ((uucopy(&l, lx_sigsetp, sizeof (lx_sigset_t)) != 0)
 328             ? -errno : 0);
 329 }
 330 
 331 static int
 332 ltos_osigset(lx_osigset_t *lx_osigsetp, sigset_t *s_sigsetp)
 333 {
 334         lx_osigset_t lo;
 335         int lx_sig, sig;
 336 
 337         if (uucopy(lx_osigsetp, &lo, sizeof (lx_osigset_t)) != 0)
 338                 return (-errno);
 339 
 340         (void) sigemptyset(s_sigsetp);
 341 
 342         for (lx_sig = 1; lx_sig <= OSIGSET_NBITS; lx_sig++)
 343                 if ((lo & OSIGSET_BITSET(lx_sig)) &&
 344                     ((sig = ltos_signo[lx_sig]) > 0))
 345                         (void) sigaddset(s_sigsetp, sig);
 346 
 347         return (0);
 348 }
 349 
 350 static int
 351 stol_osigset(sigset_t *s_sigsetp, lx_osigset_t *lx_osigsetp)
 352 {
 353         lx_osigset_t lo = 0;
 354         int lx_sig, sig;
 355 
 356         /*
 357          * Note that an lx_osigset_t can only represent the signals from
 358          * [1 .. OSIGSET_NBITS], so even though a signal may be present in the
 359          * Solaris sigset_t, it may not be representable as a bit in the
 360          * lx_osigset_t.
 361          */
 362         for (sig = 1; sig < NSIG; sig++)
 363                 if (sigismember(s_sigsetp, sig) &&
 364                     ((lx_sig = stol_signo[sig]) > 0) &&
 365                     (lx_sig <= OSIGSET_NBITS))
 366                         lo |= OSIGSET_BITSET(lx_sig);
 367 
 368         return ((uucopy(&lo, lx_osigsetp, sizeof (lx_osigset_t)) != 0)
 369             ? -errno : 0);
 370 }
 371 
 372 static int
 373 stol_sigcode(int si_code)
 374 {
 375         switch (si_code) {
 376                 case SI_USER:
 377                         return (LX_SI_USER);
 378                 case SI_LWP:
 379                         return (LX_SI_TKILL);
 380                 case SI_QUEUE:
 381                         return (LX_SI_QUEUE);
 382                 case SI_TIMER:
 383                         return (LX_SI_TIMER);
 384                 case SI_ASYNCIO:
 385                         return (LX_SI_ASYNCIO);
 386                 case SI_MESGQ:
 387                         return (LX_SI_MESGQ);
 388                 default:
 389                         return (si_code);
 390         }
 391 }
 392 
 393 int
 394 stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop)
 395 {
 396         lx_siginfo_t lx_siginfo;
 397 
 398         bzero(&lx_siginfo, sizeof (*lx_siginfop));
 399 
 400         if ((lx_siginfo.lsi_signo = stol_signo[siginfop->si_signo]) <= 0) {
 401                 errno = EINVAL;
 402                 return (-1);
 403         }
 404 
 405         lx_siginfo.lsi_code = stol_sigcode(siginfop->si_code);
 406         lx_siginfo.lsi_errno = siginfop->si_errno;
 407 
 408         switch (lx_siginfo.lsi_signo) {
 409                 /*
 410                  * Semantics ARE defined for SIGKILL, but since
 411                  * we can't catch it, we can't translate it. :-(
 412                  */
 413                 case LX_SIGPOLL:
 414                         lx_siginfo.lsi_band = siginfop->si_band;
 415                         lx_siginfo.lsi_fd = siginfop->si_fd;
 416                         break;
 417 
 418                 case LX_SIGCHLD:
 419                         lx_siginfo.lsi_pid = siginfop->si_pid;
 420                         lx_siginfo.lsi_status = siginfop->si_status;
 421                         lx_siginfo.lsi_utime = siginfop->si_utime;
 422                         lx_siginfo.lsi_stime = siginfop->si_stime;
 423 
 424                         break;
 425 
 426                 case LX_SIGILL:
 427                 case LX_SIGBUS:
 428                 case LX_SIGFPE:
 429                         lx_siginfo.lsi_addr = siginfop->si_addr;
 430                         break;
 431 
 432                 default:
 433                         lx_siginfo.lsi_pid = siginfop->si_pid;
 434                         lx_siginfo.lsi_uid =
 435                             LX_UID32_TO_UID16(siginfop->si_uid);
 436                         break;
 437         }
 438 
 439         return ((uucopy(&lx_siginfo, lx_siginfop, sizeof (lx_siginfo_t)) != 0)
 440             ? -errno : 0);
 441 }
 442 
 443 static void
 444 stol_fpstate(fpregset_t *fpr, lx_fpstate_t *lfpr)
 445 {
 446         struct _fpstate *fpsp = (struct _fpstate *)fpr;
 447         size_t copy_len;
 448 
 449         /*
 450          * The Solaris struct _fpstate and lx_fpstate_t are identical from the
 451          * beginning of the structure to the lx_fpstate_t "magic" field, so
 452          * just bcopy() those entries.
 453          */
 454         copy_len = (size_t)&(((lx_fpstate_t *)0)->magic);
 455         bcopy(fpsp, lfpr, copy_len);
 456 
 457         /*
 458          * These fields are all only significant for the first 16 bits.
 459          */
 460         lfpr->cw &= 0xffff;              /* x87 control word */
 461         lfpr->tag &= 0xffff;             /* x87 tag word */
 462         lfpr->cssel &= 0xffff;           /* cs selector */
 463         lfpr->datasel &= 0xffff; /* ds selector */
 464 
 465         /*
 466          * Linux wants the x87 status word field to contain the value of the
 467          * x87 saved exception status word.
 468          */
 469         lfpr->sw = lfpr->status & 0xffff;     /* x87 status word */
 470 
 471         lfpr->mxcsr = fpsp->mxcsr;
 472 
 473         if (fpsp->mxcsr != 0) {
 474                 /*
 475                  * Linux uses the "magic" field to denote whether the XMM
 476                  * registers contain legal data or not.  Since we can't get to
 477                  * %cr4 from userland to check the status of the OSFXSR bit,
 478                  * check the mxcsr field to see if it's 0, which it should
 479                  * never be on a system with the OXFXSR bit enabled.
 480                  */
 481                 lfpr->magic = LX_X86_FXSR_MAGIC;
 482                 bcopy(fpsp->xmm, lfpr->_xmm, sizeof (lfpr->_xmm));
 483         } else {
 484                 lfpr->magic = LX_X86_FXSR_NONE;
 485         }
 486 }
 487 
 488 static void
 489 ltos_fpstate(lx_fpstate_t *lfpr, fpregset_t *fpr)
 490 {
 491         struct _fpstate *fpsp = (struct _fpstate *)fpr;
 492         size_t copy_len;
 493 
 494         /*
 495          * The lx_fpstate_t and Solaris struct _fpstate are identical from the
 496          * beginning of the structure to the struct _fpstate "mxcsr" field, so
 497          * just bcopy() those entries.
 498          *
 499          * Note that we do NOT have to propogate changes the user may have made
 500          * to the "status" word back to the "sw" word, unlike the way we have
 501          * to deal with processing the ESP and UESP register values on return
 502          * from a signal handler.
 503          */
 504         copy_len = (size_t)&(((struct _fpstate *)0)->mxcsr);
 505         bcopy(lfpr, fpsp, copy_len);
 506 
 507         /*
 508          * These fields are all only significant for the first 16 bits.
 509          */
 510         fpsp->cw &= 0xffff;              /* x87 control word */
 511         fpsp->sw &= 0xffff;              /* x87 status word */
 512         fpsp->tag &= 0xffff;             /* x87 tag word */
 513         fpsp->cssel &= 0xffff;           /* cs selector */
 514         fpsp->datasel &= 0xffff; /* ds selector */
 515         fpsp->status &= 0xffff;          /* saved status */
 516 
 517         fpsp->mxcsr = lfpr->mxcsr;
 518 
 519         if (lfpr->magic == LX_X86_FXSR_MAGIC)
 520                 bcopy(lfpr->_xmm, fpsp->xmm, sizeof (fpsp->xmm));
 521 }
 522 
 523 /*
 524  * The brand needs a lx version of this because the format of the lx stack_t
 525  * differs from the Solaris stack_t not really in content but in ORDER,
 526  * so we can't simply pass pointers and expect things to work (sigh...)
 527  */
 528 int
 529 lx_sigaltstack(uintptr_t nsp, uintptr_t osp)
 530 {
 531         lx_stack_t ls;
 532         stack_t newsstack, oldsstack;
 533         stack_t *nssp = (nsp ? &newsstack : NULL);
 534         stack_t *ossp = (osp ? &oldsstack : NULL);
 535 
 536         if (nsp) {
 537                 if (uucopy((void *)nsp, &ls, sizeof (lx_stack_t)) != 0)
 538                         return (-errno);
 539 
 540                 if ((ls.ss_flags & LX_SS_DISABLE) == 0 &&
 541                     ls.ss_size < LX_MINSIGSTKSZ)
 542                         return (-ENOMEM);
 543 
 544                 newsstack.ss_sp = (int *)ls.ss_sp;
 545                 newsstack.ss_size = (long)ls.ss_size;
 546                 newsstack.ss_flags = ls.ss_flags;
 547         }
 548 
 549         if (sigaltstack(nssp, ossp) != 0)
 550                 return (-errno);
 551 
 552         if (osp) {
 553                 ls.ss_sp = (void *)oldsstack.ss_sp;
 554                 ls.ss_size = (size_t)oldsstack.ss_size;
 555                 ls.ss_flags = oldsstack.ss_flags;
 556 
 557                 if (uucopy(&ls, (void *)osp, sizeof (lx_stack_t)) != 0)
 558                         return (-errno);
 559         }
 560 
 561         return (0);
 562 }
 563 
 564 /*
 565  * The following routines are needed because sigset_ts and siginfo_ts are
 566  * different in format between Linux and Solaris.
 567  *
 568  * Note that there are two different lx_sigset structures, lx_sigset_ts and
 569  * lx_osigset_ts:
 570  *
 571  *    + An lx_sigset_t is the equivalent of a Solaris sigset_t and supports
 572  *      more than 32 signals.
 573  *
 574  *    + An lx_osigset_t is simply a uint32_t, so it by definition only supports
 575  *      32 signals.
 576  *
 577  * When there are two versions of a routine, one prefixed with lx_rt_ and
 578  * one prefixed with lx_ alone, in GENERAL the lx_rt_ routines deal with
 579  * lx_sigset_ts while the lx_ routines deal with lx_osigset_ts.  Unfortunately,
 580  * this is not always the case (e.g. lx_sigreturn() vs. lx_rt_sigreturn())
 581  */
 582 int
 583 lx_sigpending(uintptr_t sigpend)
 584 {
 585         sigset_t sigpendset;
 586 
 587         if (sigpending(&sigpendset) != 0)
 588                 return (-errno);
 589 
 590         return (stol_osigset(&sigpendset, (lx_osigset_t *)sigpend));
 591 }
 592 
 593 int
 594 lx_rt_sigpending(uintptr_t sigpend, uintptr_t setsize)
 595 {
 596         sigset_t sigpendset;
 597 
 598         if ((size_t)setsize != sizeof (lx_sigset_t))
 599                 return (-EINVAL);
 600 
 601         if (sigpending(&sigpendset) != 0)
 602                 return (-errno);
 603 
 604         return (stol_sigset(&sigpendset, (lx_sigset_t *)sigpend));
 605 }
 606 
 607 /*
 608  * Create a common routine to encapsulate all of the sigprocmask code,
 609  * as the only difference between lx_sigprocmask() and lx_rt_sigprocmask()
 610  * is the usage of lx_osigset_ts vs. lx_sigset_ts, as toggled in the code by
 611  * the setting of the "sigset_type" flag.
 612  */
 613 static int
 614 lx_sigprocmask_common(uintptr_t how, uintptr_t l_setp, uintptr_t l_osetp,
 615     uintptr_t sigset_type)
 616 {
 617         int err;
 618         sigset_t set, oset;
 619         sigset_t *s_setp = NULL;
 620         sigset_t *s_osetp;
 621 
 622         if (l_setp) {
 623                 switch (how) {
 624                         case LX_SIG_BLOCK:
 625                                 how = SIG_BLOCK;
 626                                 break;
 627 
 628                         case LX_SIG_UNBLOCK:
 629                                 how = SIG_UNBLOCK;
 630                                 break;
 631 
 632                         case LX_SIG_SETMASK:
 633                                 how = SIG_SETMASK;
 634                                 break;
 635 
 636                         default:
 637                                 return (-EINVAL);
 638                 }
 639 
 640                 s_setp = &set;
 641 
 642                 if (sigset_type == USE_SIGSET)
 643                         err = ltos_sigset((lx_sigset_t *)l_setp, s_setp);
 644                 else
 645                         err = ltos_osigset((lx_osigset_t *)l_setp, s_setp);
 646 
 647                 if (err != 0)
 648                         return (err);
 649         }
 650 
 651         s_osetp = (l_osetp ? &oset : NULL);
 652 
 653         /*
 654          * In a multithreaded environment, a call to sigprocmask(2) should
 655          * only affect the current thread's signal mask so we don't need to
 656          * explicitly call thr_sigsetmask(3C) here.
 657          */
 658         if (sigprocmask(how, s_setp, s_osetp) != 0)
 659                 return (-errno);
 660 
 661         if (l_osetp) {
 662                 if (sigset_type == USE_SIGSET)
 663                         err = stol_sigset(s_osetp, (lx_sigset_t *)l_osetp);
 664                 else
 665                         err = stol_osigset(s_osetp, (lx_osigset_t *)l_osetp);
 666 
 667                 if (err != 0) {
 668                         /*
 669                          * Encountered a fault while writing to the old signal
 670                          * mask buffer, so unwind the signal mask change made
 671                          * above.
 672                          */
 673                         (void) sigprocmask(how, s_osetp, (sigset_t *)NULL);
 674                         return (err);
 675                 }
 676         }
 677 
 678         return (0);
 679 }
 680 
 681 int
 682 lx_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp)
 683 {
 684         return (lx_sigprocmask_common(how, setp, osetp, USE_OSIGSET));
 685 }
 686 
 687 int
 688 lx_sgetmask(void)
 689 {
 690         lx_osigset_t oldmask;
 691 
 692         return ((lx_sigprocmask_common(SIG_SETMASK, NULL, (uintptr_t)&oldmask,
 693             USE_OSIGSET) != 0) ? -errno : (int)oldmask);
 694 }
 695 
 696 int
 697 lx_ssetmask(uintptr_t sigmask)
 698 {
 699         lx_osigset_t newmask, oldmask;
 700 
 701         newmask = (lx_osigset_t)sigmask;
 702 
 703         return ((lx_sigprocmask_common(SIG_SETMASK, (uintptr_t)&newmask,
 704             (uintptr_t)&oldmask, USE_OSIGSET) != 0) ? -errno : (int)oldmask);
 705 }
 706 
 707 int
 708 lx_rt_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp,
 709     uintptr_t setsize)
 710 {
 711         if ((size_t)setsize != sizeof (lx_sigset_t))
 712                 return (-EINVAL);
 713 
 714         return (lx_sigprocmask_common(how, setp, osetp, USE_SIGSET));
 715 }
 716 
 717 int
 718 lx_sigsuspend(uintptr_t set)
 719 {
 720         sigset_t s_set;
 721 
 722         if (ltos_osigset((lx_osigset_t *)set, &s_set) != 0)
 723                 return (-errno);
 724 
 725         return ((sigsuspend(&s_set) == -1) ? -errno : 0);
 726 }
 727 
 728 int
 729 lx_rt_sigsuspend(uintptr_t set, uintptr_t setsize)
 730 {
 731         sigset_t s_set;
 732 
 733         if ((size_t)setsize != sizeof (lx_sigset_t))
 734                 return (-EINVAL);
 735 
 736         if (ltos_sigset((lx_sigset_t *)set, &s_set) != 0)
 737                 return (-errno);
 738 
 739         return ((sigsuspend(&s_set) == -1) ? -errno : 0);
 740 }
 741 
 742 int
 743 lx_sigwaitinfo(uintptr_t set, uintptr_t sinfo)
 744 {
 745         lx_osigset_t *setp = (lx_osigset_t *)set;
 746         lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
 747 
 748         sigset_t s_set;
 749         siginfo_t s_sinfo, *s_sinfop;
 750         int rc;
 751 
 752         if (ltos_osigset(setp, &s_set) != 0)
 753                 return (-errno);
 754 
 755         s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
 756 
 757         if ((rc = sigwaitinfo(&s_set, s_sinfop)) == -1)
 758                 return (-errno);
 759 
 760         if (s_sinfop == NULL)
 761                 return (rc);
 762 
 763         return ((stol_siginfo(s_sinfop, sinfop) != 0) ? -errno : rc);
 764 }
 765 
 766 int
 767 lx_rt_sigwaitinfo(uintptr_t set, uintptr_t sinfo, uintptr_t setsize)
 768 {
 769         sigset_t s_set;
 770         siginfo_t s_sinfo, *s_sinfop;
 771         int rc;
 772 
 773         lx_sigset_t *setp = (lx_sigset_t *)set;
 774         lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
 775 
 776         if ((size_t)setsize != sizeof (lx_sigset_t))
 777                 return (-EINVAL);
 778 
 779         if (ltos_sigset(setp, &s_set) != 0)
 780                 return (-errno);
 781 
 782         s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
 783 
 784         if ((rc = sigwaitinfo(&s_set, s_sinfop)) == -1)
 785                 return (-errno);
 786 
 787         if (s_sinfop == NULL)
 788                 return (rc);
 789 
 790         return ((stol_siginfo(s_sinfop, sinfop) != 0) ? -errno : rc);
 791 }
 792 
 793 int
 794 lx_sigtimedwait(uintptr_t set, uintptr_t sinfo, uintptr_t toutp)
 795 {
 796         sigset_t s_set;
 797         siginfo_t s_sinfo, *s_sinfop;
 798         int rc;
 799 
 800         lx_osigset_t *setp = (lx_osigset_t *)set;
 801         lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
 802 
 803         if (ltos_osigset(setp, &s_set) != 0)
 804                 return (-errno);
 805 
 806         s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
 807 
 808         if ((rc = sigtimedwait(&s_set, s_sinfop,
 809             (struct timespec *)toutp)) == -1)
 810                 return (-errno);
 811 
 812         if (s_sinfop == NULL)
 813                 return (rc);
 814 
 815         return ((stol_siginfo(s_sinfop, sinfop) != 0) ? -errno : rc);
 816 }
 817 
 818 int
 819 lx_rt_sigtimedwait(uintptr_t set, uintptr_t sinfo, uintptr_t toutp,
 820     uintptr_t setsize)
 821 {
 822         sigset_t s_set;
 823         siginfo_t s_sinfo, *s_sinfop;
 824         int rc;
 825 
 826         lx_sigset_t *setp = (lx_sigset_t *)set;
 827         lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
 828 
 829         if ((size_t)setsize != sizeof (lx_sigset_t))
 830                 return (-EINVAL);
 831 
 832         if (ltos_sigset(setp, &s_set) != 0)
 833                 return (-errno);
 834 
 835         s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
 836 
 837         if ((rc = sigtimedwait(&s_set, s_sinfop,
 838             (struct timespec *)toutp)) == -1)
 839                 return (-errno);
 840 
 841         if (s_sinfop == NULL)
 842                 return (rc);
 843 
 844         return ((stol_siginfo(s_sinfop, sinfop) != 0) ? -errno : rc);
 845 }
 846 
 847 /*
 848  * Intercept the Linux sigreturn() syscall to turn it into the return through
 849  * the libc call stack that Solaris expects.
 850  *
 851  * When control returns to libc's call_user_handler() routine, a setcontext(2)
 852  * will be done that returns thread execution to the point originally
 853  * interrupted by receipt of the signal.
 854  */
 855 int
 856 lx_sigreturn(void)
 857 {
 858         struct lx_oldsigstack *lx_ossp;
 859         lx_sigset_t lx_sigset;
 860         lx_regs_t *rp;
 861         ucontext_t *ucp;
 862         uintptr_t sp;
 863 
 864         rp = lx_syscall_regs();
 865 
 866         /*
 867          * NOTE:  The sp saved in the context is eight bytes off of where we
 868          *        need it to be.
 869          */
 870         sp = (uintptr_t)rp->lxr_esp - 8;
 871 
 872         /*
 873          * At this point, the stack pointer should point to the struct
 874          * lx_oldsigstack that lx_build_old_signal_frame() constructed and
 875          * placed on the stack.  We need to reference it a bit later, so
 876          * save a pointer to it before incrementing our copy of the sp.
 877          */
 878         lx_ossp = (struct lx_oldsigstack *)sp;
 879         sp += sizeof (struct lx_oldsigstack);
 880 
 881         /*
 882          * lx_sigdeliver() pushes LX_SIGRT_MAGIC on the stack before it
 883          * creates the struct lx_oldsigstack.
 884          *
 885          * If we don't find it here, the stack's been corrupted and we need to
 886          * kill ourselves.
 887          */
 888         if (*(uint32_t *)sp != LX_SIGRT_MAGIC)
 889                 lx_err_fatal(gettext(
 890                     "sp @ 0x%p, expected 0x%x, found 0x%x!"),
 891                     sp, LX_SIGRT_MAGIC, *(uint32_t *)sp);
 892 
 893         sp += sizeof (uint32_t);
 894 
 895         /*
 896          * For signal mask handling to be done properly, this call needs to
 897          * return to the libc routine that originally called the signal handler
 898          * rather than directly set the context back to the place the signal
 899          * interrupted execution as the original Linux code would do.
 900          *
 901          * Here *sp points to the Solaris ucontext_t, so we need to copy
 902          * machine registers the Linux signal handler may have modified
 903          * back to the Solaris version.
 904          */
 905         ucp = (ucontext_t *)(*(uint32_t *)sp);
 906 
 907         /*
 908          * General registers copy across as-is, except Linux expects that
 909          * changes made to uc_mcontext.gregs[ESP] will be reflected when the
 910          * interrupted thread resumes execution after the signal handler. To
 911          * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to
 912          * match uc_mcontext.gregs[ESP] as Solaris will restore the UESP
 913          * value to ESP.
 914          */
 915         lx_ossp->sigc.sc_esp_at_signal = lx_ossp->sigc.sc_esp;
 916         bcopy(&lx_ossp->sigc, &ucp->uc_mcontext, sizeof (gregset_t));
 917 
 918         /* copy back FP regs if present */
 919         if (lx_ossp->sigc.sc_fpstate != NULL)
 920                 ltos_fpstate(&lx_ossp->fpstate, &ucp->uc_mcontext.fpregs);
 921 
 922         /* convert Linux signal mask back to its Solaris equivalent */
 923         bzero(&lx_sigset, sizeof (lx_sigset_t));
 924         lx_sigset.__bits[0] = lx_ossp->sigc.sc_mask;
 925         lx_sigset.__bits[1] = lx_ossp->sig_extra;
 926         (void) ltos_sigset(&lx_sigset, &ucp->uc_sigmask);
 927 
 928         /*
 929          * At this point sp contains the value of the stack pointer when
 930          * lx_call_user_handler() was called.
 931          *
 932          * Pop one more value off the stack and pass the new sp to
 933          * lx_sigreturn_tolibc(), which will in turn manipulate the x86
 934          * registers to make it appear to libc's call_user_handler() as if the
 935          * handler it had called returned.
 936          */
 937         sp += sizeof (uint32_t);
 938         lx_debug("calling lx_sigreturn_tolibc(0x%p)", sp);
 939         lx_sigreturn_tolibc(sp);
 940 
 941         /*NOTREACHED*/
 942         return (0);
 943 }
 944 
 945 int
 946 lx_rt_sigreturn(void)
 947 {
 948         struct lx_sigstack *lx_ssp;
 949         lx_regs_t *rp;
 950         lx_ucontext_t *lx_ucp;
 951         ucontext_t *ucp;
 952         uintptr_t sp;
 953 
 954         rp = lx_syscall_regs();
 955 
 956         /*
 957          * NOTE:  Because of some silly compatibility measures done in the
 958          *        signal trampoline code to make sure it uses the _exact same_
 959          *        instruction sequence Linux does, we have to manually "pop"
 960          *        one extra four byte instruction off the stack here before
 961          *        passing the stack address to the syscall because the
 962          *        trampoline code isn't allowed to do it.
 963          *
 964          *        No, I'm not kidding.
 965          *
 966          *        The sp saved in the context is eight bytes off of where we
 967          *        need it to be, so the need to pop the extra four byte
 968          *        instruction means we need to subtract a net four bytes from
 969          *        the sp before "popping" the struct lx_sigstack off the stack.
 970          *        This will yield the value the stack pointer had before
 971          *        lx_sigdeliver() created the stack frame for the Linux signal
 972          *        handler.
 973          */
 974         sp = (uintptr_t)rp->lxr_esp - 4;
 975 
 976         /*
 977          * At this point, the stack pointer should point to the struct
 978          * lx_sigstack that lx_build_signal_frame() constructed and
 979          * placed on the stack.  We need to reference it a bit later, so
 980          * save a pointer to it before incrementing our copy of the sp.
 981          */
 982         lx_ssp = (struct lx_sigstack *)sp;
 983         sp += sizeof (struct lx_sigstack);
 984 
 985         /*
 986          * lx_sigdeliver() pushes LX_SIGRT_MAGIC on the stack before it
 987          * creates the struct lx_sigstack (and possibly struct lx_fpstate_t).
 988          *
 989          * If we don't find it here, the stack's been corrupted and we need to
 990          * kill ourselves.
 991          */
 992         if (*(uint32_t *)sp != LX_SIGRT_MAGIC)
 993                 lx_err_fatal(gettext("sp @ 0x%p, expected 0x%x, found 0x%x!"),
 994                     sp, LX_SIGRT_MAGIC, *(uint32_t *)sp);
 995 
 996         sp += sizeof (uint32_t);
 997 
 998         /*
 999          * For signal mask handling to be done properly, this call needs to
1000          * return to the libc routine that originally called the signal handler
1001          * rather than directly set the context back to the place the signal
1002          * interrupted execution as the original Linux code would do.
1003          *
1004          * Here *sp points to the Solaris ucontext_t, so we need to copy
1005          * machine registers the Linux signal handler may have modified
1006          * back to the Solaris version.
1007          */
1008         ucp = (ucontext_t *)(*(uint32_t *)sp);
1009 
1010         lx_ucp = lx_ssp->ucp;
1011 
1012         if (lx_ucp != NULL) {
1013                 /*
1014                  * General registers copy across as-is, except Linux expects
1015                  * that changes made to uc_mcontext.gregs[ESP] will be reflected
1016                  * when the interrupted thread resumes execution after the
1017                  * signal handler. To emulate this behavior, we must modify
1018                  * uc_mcontext.gregs[UESP] to match uc_mcontext.gregs[ESP] as
1019                  * Solaris will restore the UESP value to ESP.
1020                  */
1021                 lx_ucp->uc_sigcontext.sc_esp_at_signal =
1022                     lx_ucp->uc_sigcontext.sc_esp;
1023                 bcopy(&lx_ucp->uc_sigcontext, &ucp->uc_mcontext.gregs,
1024                     sizeof (gregset_t));
1025 
1026                 if (lx_ucp->uc_sigcontext.sc_fpstate != NULL)
1027                         ltos_fpstate(lx_ucp->uc_sigcontext.sc_fpstate,
1028                             &ucp->uc_mcontext.fpregs);
1029 
1030                 /*
1031                  * Convert the Linux signal mask and stack back to their
1032                  * Solaris equivalents.
1033                  */
1034                 (void) ltos_sigset(&lx_ucp->uc_sigmask, &ucp->uc_sigmask);
1035                 ltos_stack(&lx_ucp->uc_stack, &ucp->uc_stack);
1036         }
1037 
1038         /*
1039          * At this point sp contains the value of the stack pointer when
1040          * lx_call_user_handler() was called.
1041          *
1042          * Pop one more value off the stack and pass the new sp to
1043          * lx_sigreturn_tolibc(), which will in turn manipulate the x86
1044          * registers to make it appear to libc's call_user_handler() as if the
1045          * handler it had called returned.
1046          */
1047         sp += sizeof (uint32_t);
1048         lx_debug("calling lx_sigreturn_tolibc(0x%p)", sp);
1049         lx_sigreturn_tolibc(sp);
1050 
1051         /*NOTREACHED*/
1052         return (0);
1053 }
1054 
1055 /*
1056  * Build signal frame for processing for "old" (legacy) Linux signals
1057  */
1058 static void
1059 lx_build_old_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp)
1060 {
1061         extern void lx_sigreturn_tramp();
1062 
1063         lx_sigset_t lx_sigset;
1064         ucontext_t *ucp = (ucontext_t *)p;
1065         struct lx_sigaction *lxsap;
1066         struct lx_oldsigstack *lx_ossp = sp;
1067 
1068         lx_debug("building old signal frame for lx sig %d at 0x%p", lx_sig, sp);
1069 
1070         lx_ossp->sig = lx_sig;
1071         lxsap = &lx_sighandlers.lx_sa[lx_sig];
1072         lx_debug("lxsap @ 0x%p", lxsap);
1073 
1074         if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) &&
1075             lxsap->lxsa_restorer) {
1076                 lx_ossp->retaddr = lxsap->lxsa_restorer;
1077                 lx_debug("lxsa_restorer exists @ 0x%p", lx_ossp->retaddr);
1078         } else {
1079                 lx_ossp->retaddr = lx_sigreturn_tramp;
1080                 lx_debug("lx_ossp->retaddr set to 0x%p", lx_sigreturn_tramp);
1081         }
1082 
1083         lx_debug("osf retaddr = 0x%p", lx_ossp->retaddr);
1084 
1085         /* convert Solaris signal mask and stack to their Linux equivalents */
1086         (void) stol_sigset(&ucp->uc_sigmask, &lx_sigset);
1087         lx_ossp->sigc.sc_mask = lx_sigset.__bits[0];
1088         lx_ossp->sig_extra = lx_sigset.__bits[1];
1089 
1090         /*
1091          * General registers copy across as-is, except Linux expects that
1092          * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a
1093          * signal.
1094          */
1095         bcopy(&ucp->uc_mcontext, &lx_ossp->sigc, sizeof (gregset_t));
1096         lx_ossp->sigc.sc_esp = lx_ossp->sigc.sc_esp_at_signal;
1097 
1098         /*
1099          * cr2 contains the faulting address, and Linux only sets cr2 for a
1100          * a segmentation fault.
1101          */
1102         lx_ossp->sigc.sc_cr2 = (((lx_sig == LX_SIGSEGV) && (sip)) ?
1103             (uintptr_t)sip->si_addr : 0);
1104 
1105         /* convert FP regs if present */
1106         if (ucp->uc_flags & UC_FPU) {
1107                 stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ossp->fpstate);
1108                 lx_ossp->sigc.sc_fpstate = &lx_ossp->fpstate;
1109         } else {
1110                 lx_ossp->sigc.sc_fpstate = NULL;
1111         }
1112 
1113         /*
1114          * Believe it or not, gdb wants to SEE the trampoline code on the
1115          * bottom of the stack to determine whether the stack frame belongs to
1116          * a signal handler, even though this code is no longer actually
1117          * called.
1118          *
1119          * You can't make this stuff up.
1120          */
1121         bcopy((void *)lx_sigreturn_tramp, lx_ossp->trampoline,
1122             sizeof (lx_ossp->trampoline));
1123 }
1124 
1125 /*
1126  * Build signal frame for processing for modern Linux signals
1127  */
1128 static void
1129 lx_build_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp)
1130 {
1131         extern void lx_rt_sigreturn_tramp();
1132 
1133         lx_ucontext_t *lx_ucp;
1134         ucontext_t *ucp = (ucontext_t *)p;
1135         struct lx_sigstack *lx_ssp = sp;
1136         struct lx_sigaction *lxsap;
1137 
1138         lx_debug("building signal frame for lx sig %d at 0x%p", lx_sig, sp);
1139 
1140         lx_ucp = &lx_ssp->uc;
1141         lx_ssp->ucp = lx_ucp;
1142         lx_ssp->sig = lx_sig;
1143 
1144         lxsap = &lx_sighandlers.lx_sa[lx_sig];
1145         lx_debug("lxsap @ 0x%p", lxsap);
1146 
1147         if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) &&
1148             lxsap->lxsa_restorer) {
1149                 lx_ssp->retaddr = lxsap->lxsa_restorer;
1150                 lx_debug("lxsa_restorer exists @ 0x%p", lx_ssp->retaddr);
1151         } else {
1152                 lx_ssp->retaddr = lx_rt_sigreturn_tramp;
1153                 lx_debug("lx_ssp->retaddr set to 0x%p", lx_rt_sigreturn_tramp);
1154         }
1155 
1156         /* Linux has these fields but always clears them to 0 */
1157         lx_ucp->uc_flags = 0;
1158         lx_ucp->uc_link = NULL;
1159 
1160         /* convert Solaris signal mask and stack to their Linux equivalents */
1161         (void) stol_sigset(&ucp->uc_sigmask, &lx_ucp->uc_sigmask);
1162         stol_stack(&ucp->uc_stack, &lx_ucp->uc_stack);
1163 
1164         /*
1165          * General registers copy across as-is, except Linux expects that
1166          * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a
1167          * signal.
1168          */
1169         bcopy(&ucp->uc_mcontext, &lx_ucp->uc_sigcontext, sizeof (gregset_t));
1170         lx_ucp->uc_sigcontext.sc_esp = lx_ucp->uc_sigcontext.sc_esp_at_signal;
1171 
1172         /*
1173          * cr2 contains the faulting address, which Linux only sets for a
1174          * a segmentation fault.
1175          */
1176         lx_ucp->uc_sigcontext.sc_cr2 = ((lx_sig == LX_SIGSEGV) && (sip)) ?
1177             (uintptr_t)sip->si_addr : 0;
1178 
1179         /*
1180          * Point the lx_siginfo_t pointer to the signal stack's lx_siginfo_t
1181          * if there was a Solaris siginfo_t to convert, otherwise set it to
1182          * NULL.
1183          */
1184         if ((sip) && (stol_siginfo(sip, &lx_ssp->si) == 0))
1185                 lx_ssp->sip = &lx_ssp->si;
1186         else
1187                 lx_ssp->sip = NULL;
1188 
1189         /* convert FP regs if present */
1190         if (ucp->uc_flags & UC_FPU) {
1191                 /*
1192                  * Copy FP regs to the appropriate place in the the lx_sigstack
1193                  * structure.
1194                  */
1195                 stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ssp->fpstate);
1196                 lx_ucp->uc_sigcontext.sc_fpstate = &lx_ssp->fpstate;
1197         } else
1198                 lx_ucp->uc_sigcontext.sc_fpstate = NULL;
1199 
1200         /*
1201          * Believe it or not, gdb wants to SEE the trampoline code on the
1202          * bottom of the stack to determine whether the stack frame belongs to
1203          * a signal handler, even though this code is no longer actually
1204          * called.
1205          *
1206          * You can't make this stuff up.
1207          */
1208         bcopy((void *)lx_rt_sigreturn_tramp, lx_ssp->trampoline,
1209             sizeof (lx_ssp->trampoline));
1210 }
1211 
1212 /*
1213  * This is the second level interposition handler for Linux signals.
1214  */
1215 static void
1216 lx_call_user_handler(int sig, siginfo_t *sip, void *p)
1217 {
1218         void (*user_handler)();
1219         void (*stk_builder)();
1220 
1221         lx_tsd_t *lx_tsd;
1222         struct lx_sigaction *lxsap;
1223         ucontext_t *ucp = (ucontext_t *)p;
1224         uintptr_t gs;
1225         size_t stksize;
1226         int err, lx_sig;
1227 
1228         /*
1229          * If Solaris signal has no Linux equivalent, effectively
1230          * ignore it.
1231          */
1232         if ((lx_sig = stol_signo[sig]) == -1) {
1233                 lx_debug("caught solaris signal %d, no Linux equivalent", sig);
1234                 return;
1235         }
1236 
1237         lx_debug("interpose caught solaris signal %d, translating to Linux "
1238             "signal %d", sig, lx_sig);
1239 
1240         lxsap = &lx_sighandlers.lx_sa[lx_sig];
1241         lx_debug("lxsap @ 0x%p", lxsap);
1242 
1243         if ((sig == SIGPWR) && (lxsap->lxsa_handler == SIG_DFL)) {
1244                 /* Linux SIG_DFL for SIGPWR is to terminate */
1245                 exit(LX_SIGPWR | 0x80);
1246         }
1247 
1248         if ((lxsap->lxsa_handler == SIG_DFL) ||
1249             (lxsap->lxsa_handler == SIG_IGN))
1250                 lx_err_fatal(gettext("%s set to %s?  How?!?!?"),
1251                     "lxsa_handler",
1252                     ((lxsap->lxsa_handler == SIG_DFL) ? "SIG_DFL" : "SIG_IGN"),
1253                     lxsap->lxsa_handler);
1254 
1255         if ((err = thr_getspecific(lx_tsd_key, (void **)&lx_tsd)) != 0)
1256                 lx_err_fatal(gettext(
1257                     "%s: unable to read thread-specific data: %s"),
1258                     "lx_call_user_handler", strerror(err));
1259 
1260         assert(lx_tsd != 0);
1261 
1262         gs = lx_tsd->lxtsd_gs & 0xffff;          /* gs is only 16 bits */
1263 
1264         /*
1265          * Any zero %gs value should be caught when a save is attempted in
1266          * lx_emulate(), but this extra check will catch any zero values due to
1267          * bugs in the library.
1268          */
1269         assert(gs != 0);
1270 
1271         if (lxsap->lxsa_flags & LX_SA_SIGINFO) {
1272                 stksize = sizeof (struct lx_sigstack);
1273                 stk_builder = lx_build_signal_frame;
1274         } else  {
1275                 stksize = sizeof (struct lx_oldsigstack);
1276                 stk_builder = lx_build_old_signal_frame;
1277         }
1278 
1279         user_handler = lxsap->lxsa_handler;
1280 
1281         lx_debug("delivering %d (lx %d) to handler at 0x%p with gs 0x%x", sig,
1282             lx_sig, lxsap->lxsa_handler, gs);
1283 
1284         if (lxsap->lxsa_flags & LX_SA_RESETHAND)
1285                 lxsap->lxsa_handler = SIG_DFL;
1286 
1287         /*
1288          * lx_sigdeliver() doesn't return, so it relies on the Linux
1289          * signal handlers to clean up the stack, reset the current
1290          * signal mask and return to the code interrupted by the signal.
1291          */
1292         lx_sigdeliver(lx_sig, sip, ucp, stksize, stk_builder, user_handler, gs);
1293 }
1294 
1295 /*
1296  * Common routine to modify sigaction characteristics of a thread.
1297  *
1298  * We shouldn't need any special locking code here as we actually use
1299  * libc's sigaction() to do all the real work, so its thread locking should
1300  * take care of any issues for us.
1301  */
1302 static int
1303 lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp,
1304     struct lx_sigaction *olxsp)
1305 {
1306         struct lx_sigaction *lxsap;
1307         struct sigaction sa;
1308 
1309         if (lx_sig <= 0 || lx_sig >= LX_NSIG)
1310                 return (-EINVAL);
1311 
1312         lxsap = &lx_sighandlers.lx_sa[lx_sig];
1313         lx_debug("&lx_sighandlers.lx_sa[%d] = 0x%p", lx_sig, lxsap);
1314 
1315         if ((olxsp != NULL) &&
1316             ((uucopy(lxsap, olxsp, sizeof (struct lx_sigaction))) != 0))
1317                 return (-errno);
1318 
1319         if (lxsp != NULL) {
1320                 int err, sig;
1321                 struct lx_sigaction lxsa;
1322                 sigset_t new_set, oset;
1323 
1324                 if (uucopy(lxsp, &lxsa, sizeof (struct lx_sigaction)) != 0)
1325                         return (-errno);
1326 
1327                 if ((sig = ltos_signo[lx_sig]) != -1) {
1328                         /*
1329                          * Block this signal while messing with its dispostion
1330                          */
1331                         (void) sigemptyset(&new_set);
1332                         (void) sigaddset(&new_set, sig);
1333 
1334                         if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0) {
1335                                 err = errno;
1336                                 lx_debug("unable to block signal %d: %s", sig,
1337                                     strerror(err));
1338                                 return (-err);
1339                         }
1340 
1341                         /*
1342                          * We don't really need the old signal disposition at
1343                          * this point, but this weeds out signals that would
1344                          * cause sigaction() to return an error before we change
1345                          * anything other than the current signal mask.
1346                          */
1347                         if (sigaction(sig, NULL, &sa) < 0) {
1348                                 err = errno;
1349                                 lx_debug("sigaction() to get old "
1350                                     "disposition for signal %d failed: "
1351                                     "%s", sig, strerror(err));
1352                                 (void) sigprocmask(SIG_SETMASK, &oset, NULL);
1353                                 return (-err);
1354                         }
1355 
1356                         if ((lxsa.lxsa_handler != SIG_DFL) &&
1357                             (lxsa.lxsa_handler != SIG_IGN)) {
1358                                 sa.sa_handler = lx_call_user_handler;
1359 
1360                                 /*
1361                                  * The interposition signal handler needs the
1362                                  * information provided via the SA_SIGINFO flag.
1363                                  */
1364                                 sa.sa_flags = SA_SIGINFO;
1365 
1366                                 if (lxsa.lxsa_flags & LX_SA_NOCLDSTOP)
1367                                         sa.sa_flags |= SA_NOCLDSTOP;
1368                                 if (lxsa.lxsa_flags & LX_SA_NOCLDWAIT)
1369                                         sa.sa_flags |= SA_NOCLDWAIT;
1370                                 if (lxsa.lxsa_flags & LX_SA_ONSTACK)
1371                                         sa.sa_flags |= SA_ONSTACK;
1372                                 if (lxsa.lxsa_flags & LX_SA_RESTART)
1373                                         sa.sa_flags |= SA_RESTART;
1374                                 if (lxsa.lxsa_flags & LX_SA_NODEFER)
1375                                         sa.sa_flags |= SA_NODEFER;
1376 
1377                                 /*
1378                                  * Can't use RESETHAND with SIGPWR due to
1379                                  * different default actions between Linux
1380                                  * and Solaris.
1381                                  */
1382                                 if ((sig != SIGPWR) &&
1383                                     (lxsa.lxsa_flags & LX_SA_RESETHAND))
1384                                         sa.sa_flags |= SA_RESETHAND;
1385 
1386                                 if (ltos_sigset(&lxsa.lxsa_mask,
1387                                     &sa.sa_mask) != 0) {
1388                                         err = errno;
1389                                         (void) sigprocmask(SIG_SETMASK, &oset,
1390                                             NULL);
1391                                         return (-err);
1392                                 }
1393 
1394                                 lx_debug("interposing handler @ 0x%p for "
1395                                     "signal %d (lx %d), flags 0x%x",
1396                                     lxsa.lxsa_handler, sig, lx_sig,
1397                                     lxsa.lxsa_flags);
1398 
1399                                 if (sigaction(sig, &sa, NULL) < 0) {
1400                                         err = errno;
1401                                         lx_debug("sigaction() to set new "
1402                                             "disposition for signal %d failed: "
1403                                             "%s", sig, strerror(err));
1404                                         (void) sigprocmask(SIG_SETMASK, &oset,
1405                                             NULL);
1406                                         return (-err);
1407                                 }
1408                         } else if ((sig != SIGPWR) ||
1409                             ((sig == SIGPWR) &&
1410                             (lxsa.lxsa_handler == SIG_IGN))) {
1411                                 /*
1412                                  * There's no need to interpose for SIG_DFL or
1413                                  * SIG_IGN so just call libc's sigaction(), but
1414                                  * don't allow SIG_DFL for SIGPWR due to
1415                                  * differing default actions between Linux and
1416                                  * Solaris.
1417                                  *
1418                                  * Get the previous disposition first so things
1419                                  * like sa_mask and sa_flags are preserved over
1420                                  * a transition to SIG_DFL or SIG_IGN, which is
1421                                  * what Linux expects.
1422                                  */
1423 
1424                                 sa.sa_handler = lxsa.lxsa_handler;
1425 
1426                                 if (sigaction(sig, &sa, NULL) < 0) {
1427                                         err = errno;
1428                                         lx_debug("sigaction(%d, %s) failed: %s",
1429                                             sig, ((sa.sa_handler == SIG_DFL) ?
1430                                             "SIG_DFL" : "SIG_IGN"),
1431                                             strerror(err));
1432                                         (void) sigprocmask(SIG_SETMASK, &oset,
1433                                             NULL);
1434                                         return (-err);
1435                                 }
1436                         }
1437                 } else {
1438                         lx_debug("Linux signal with no kill support "
1439                             "specified: %d", lx_sig);
1440                 }
1441 
1442                 /*
1443                  * Save the new disposition for the signal in the global
1444                  * lx_sighandlers structure.
1445                  */
1446                 bcopy(&lxsa, lxsap, sizeof (struct lx_sigaction));
1447 
1448                 /*
1449                  * Reset the signal mask to what we came in with if
1450                  * we were modifying a kill-supported signal.
1451                  */
1452                 if (sig != -1)
1453                         (void) sigprocmask(SIG_SETMASK, &oset, NULL);
1454         }
1455 
1456         return (0);
1457 }
1458 
1459 int
1460 lx_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp)
1461 {
1462         int val;
1463         struct lx_sigaction sa, osa;
1464         struct lx_sigaction *sap, *osap;
1465         struct lx_osigaction *osp;
1466 
1467         sap = (actp ? &sa : NULL);
1468         osap = (oactp ? &osa : NULL);
1469 
1470         /*
1471          * If we have a source pointer, convert source lxsa_mask from
1472          * lx_osigset_t to lx_sigset_t format.
1473          */
1474         if (sap) {
1475                 osp = (struct lx_osigaction *)actp;
1476                 sap->lxsa_handler = osp->lxsa_handler;
1477 
1478                 bzero(&sap->lxsa_mask, sizeof (lx_sigset_t));
1479 
1480                 for (val = 1; val <= OSIGSET_NBITS; val++)
1481                         if (osp->lxsa_mask & OSIGSET_BITSET(val))
1482                                 (void) lx_sigaddset(&sap->lxsa_mask, val);
1483 
1484                 sap->lxsa_flags = osp->lxsa_flags;
1485                 sap->lxsa_restorer = osp->lxsa_restorer;
1486         }
1487 
1488         if ((val = lx_sigaction_common(lx_sig, sap, osap)))
1489                 return (val);
1490 
1491         /*
1492          * If we have a save pointer, convert the old lxsa_mask from
1493          * lx_sigset_t to lx_osigset_t format.
1494          */
1495         if (osap) {
1496                 osp = (struct lx_osigaction *)oactp;
1497 
1498                 osp->lxsa_handler = osap->lxsa_handler;
1499 
1500                 bzero(&osp->lxsa_mask, sizeof (osp->lxsa_mask));
1501                 for (val = 1; val <= OSIGSET_NBITS; val++)
1502                         if (lx_sigismember(&osap->lxsa_mask, val))
1503                                 osp->lxsa_mask |= OSIGSET_BITSET(val);
1504 
1505                 osp->lxsa_flags = osap->lxsa_flags;
1506                 osp->lxsa_restorer = osap->lxsa_restorer;
1507         }
1508 
1509         return (0);
1510 }
1511 
1512 int
1513 lx_rt_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp,
1514     uintptr_t setsize)
1515 {
1516         /*
1517          * The "new" rt_sigaction call checks the setsize
1518          * parameter.
1519          */
1520         if ((size_t)setsize != sizeof (lx_sigset_t))
1521                 return (-EINVAL);
1522 
1523         return (lx_sigaction_common(lx_sig, (struct lx_sigaction *)actp,
1524             (struct lx_sigaction *)oactp));
1525 }
1526 
1527 /*
1528  * Convert signal syscall to a call to the lx_sigaction() syscall
1529  */
1530 int
1531 lx_signal(uintptr_t lx_sig, uintptr_t handler)
1532 {
1533         struct sigaction act;
1534         struct sigaction oact;
1535         int rc;
1536 
1537         /*
1538          * Use sigaction to mimic SYSV signal() behavior; glibc will
1539          * actually call sigaction(2) itself, so we're really reaching
1540          * back for signal(2) semantics here.
1541          */
1542         bzero(&act, sizeof (act));
1543         act.sa_handler = (void (*)())handler;
1544         act.sa_flags = SA_RESETHAND | SA_NODEFER;
1545 
1546         rc = lx_sigaction(lx_sig, (uintptr_t)&act, (uintptr_t)&oact);
1547         return ((rc == 0) ? ((int)oact.sa_handler) : rc);
1548 }
1549 
1550 int
1551 lx_tgkill(uintptr_t tgid, uintptr_t pid, uintptr_t sig)
1552 {
1553         if (((pid_t)tgid <= 0) || ((pid_t)pid <= 0))
1554                 return (-EINVAL);
1555 
1556         if (tgid != pid) {
1557                 lx_unsupported(gettext(
1558                     "BrandZ tgkill(2) does not support gid != pid\n"));
1559                 return (-ENOTSUP);
1560         }
1561 
1562         /*
1563          * Pad the lx_tkill() call with NULLs to match the IN_KERNEL_SYSCALL
1564          * prototype generated for it by IN_KERNEL_SYSCALL in lx_brand.c.
1565          */
1566         return (lx_tkill(pid, sig, NULL, NULL, NULL, NULL));
1567 }
1568 
1569 /*
1570  * This C routine to save the passed %gs value into the thread-specific save
1571  * area is called by the assembly routine lx_sigacthandler.
1572  */
1573 void
1574 lx_sigsavegs(uintptr_t signalled_gs)
1575 {
1576         lx_tsd_t *lx_tsd;
1577         int err;
1578 
1579         signalled_gs &= 0xffff;             /* gs is only 16 bits */
1580 
1581         /*
1582          * While a %gs of 0 is technically legal (as long as the application
1583          * never dereferences memory using %gs), Solaris has its own ideas as
1584          * to how a zero %gs should be handled in _update_sregs(), such that
1585          * any 32-bit user process with a %gs of zero running on a system with
1586          * a 64-bit kernel will have its %gs hidden base register stomped on on
1587          * return from a system call, leaving an incorrect base address in
1588          * place until the next time %gs is actually reloaded (forcing a reload
1589          * of the base address from the appropriate descriptor table.)
1590          *
1591          * Of course the kernel will once again stomp on THAT base address when
1592          * returning from a system call, resulting in an application
1593          * segmentation fault.
1594          *
1595          * To avoid this situation, disallow a save of a zero %gs here in order
1596          * to try and capture any Linux process that takes a signal with a zero
1597          * %gs installed.
1598          */
1599         assert(signalled_gs != 0);
1600 
1601         if (signalled_gs != LWPGS_SEL) {
1602                 if ((err = thr_getspecific(lx_tsd_key,
1603                     (void **)&lx_tsd)) != 0)
1604                         lx_err_fatal(gettext(
1605                             "%s: unable to read thread-specific data: %s"),
1606                             "sigsavegs", strerror(err));
1607 
1608                 assert(lx_tsd != 0);
1609 
1610                 lx_tsd->lxtsd_gs = signalled_gs;
1611 
1612                 lx_debug("lx_sigsavegs(): gsp 0x%p, saved gs: 0x%x\n",
1613                     lx_tsd, signalled_gs);
1614         }
1615 }
1616 
1617 int
1618 lx_siginit(void)
1619 {
1620         extern void set_setcontext_enforcement(int);
1621         extern void lx_sigacthandler(int, siginfo_t *, void *);
1622 
1623         struct sigaction sa;
1624         sigset_t new_set, oset;
1625         int lx_sig, sig;
1626 
1627         /*
1628          * Block all signals possible while setting up the signal imposition
1629          * mechanism.
1630          */
1631         (void) sigfillset(&new_set);
1632 
1633         if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0)
1634                 lx_err_fatal(gettext("unable to block signals while setting up "
1635                     "imposition mechanism: %s"), strerror(errno));
1636 
1637         /*
1638          * Ignore any signals that have no Linux analog so that those
1639          * signals cannot be sent to Linux processes from the global zone
1640          */
1641         for (sig = 1; sig < NSIG; sig++)
1642                 if (stol_signo[sig] < 0)
1643                         (void) sigignore(sig);
1644 
1645         /*
1646          * As mentioned previously, when a user signal handler is installed
1647          * via sigaction(), libc interposes on the mechanism by actually
1648          * installing an internal routine sigacthandler() as the signal
1649          * handler.  On receipt of the signal, libc does some thread-related
1650          * processing via sigacthandler(), then calls the registered user
1651          * signal handler on behalf of the user.
1652          *
1653          * We need to interpose on that mechanism to make sure the correct
1654          * %gs segment register value is installed before the libc routine
1655          * is called, otherwise the libc code will die with a segmentation
1656          * fault.
1657          *
1658          * The private libc routine setsigacthandler() will set our
1659          * interposition routine, lx_sigacthandler(), as the default
1660          * "sigacthandler" routine for all new signal handlers for this
1661          * thread.
1662          */
1663         setsigacthandler(lx_sigacthandler, &libc_sigacthandler);
1664         lx_debug("lx_sigacthandler installed, libc_sigacthandler = 0x%p",
1665             libc_sigacthandler);
1666 
1667         /*
1668          * Mark any signals that are ignored as ignored in our interposition
1669          * handler array
1670          */
1671         for (lx_sig = 1; lx_sig < LX_NSIG; lx_sig++) {
1672                 if (((sig = ltos_signo[lx_sig]) != -1) &&
1673                     (sigaction(sig, NULL, &sa) < 0))
1674                         lx_err_fatal(gettext("unable to determine previous "
1675                             "disposition for signal %d: %s"),
1676                             sig, strerror(errno));
1677 
1678                 if (sa.sa_handler == SIG_IGN) {
1679                         lx_debug("marking signal %d (lx %d) as SIG_IGN",
1680                             sig, lx_sig);
1681                         lx_sighandlers.lx_sa[lx_sig].lxsa_handler = SIG_IGN;
1682                 }
1683         }
1684 
1685         /*
1686          * Have our interposition handler handle SIGPWR to start with,
1687          * as it has a default action of terminating the process in Linux
1688          * but its default is to be ignored in Solaris.
1689          */
1690         (void) sigemptyset(&sa.sa_mask);
1691         sa.sa_sigaction = lx_call_user_handler;
1692         sa.sa_flags = SA_SIGINFO;
1693 
1694         if (sigaction(SIGPWR, &sa, NULL) < 0)
1695                 lx_err_fatal(gettext("%s failed: %s"), "sigaction(SIGPWR)",
1696                     strerror(errno));
1697 
1698         /*
1699          * Solaris' libc forces certain register values in the ucontext_t
1700          * used to restore a post-signal user context to be those Solaris
1701          * expects; however that is not what we want to happen if the signal
1702          * was taken while branded code was executing, so we must disable
1703          * that behavior.
1704          */
1705         set_setcontext_enforcement(0);
1706 
1707         /*
1708          * Reset the signal mask to what we came in with
1709          */
1710         (void) sigprocmask(SIG_SETMASK, &oset, NULL);
1711 
1712         lx_debug("interposition handler setup for SIGPWR");
1713         return (0);
1714 }