1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Copyright 2015 Joyent, Inc. All rights reserved.
29 */
30
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/segments.h>
34 #include <sys/lx_types.h>
35 #include <sys/lx_brand.h>
36 #include <sys/lx_misc.h>
37 #include <sys/lx_debug.h>
38 #include <sys/lx_poll.h>
39 #include <sys/lx_signal.h>
40 #include <sys/lx_sigstack.h>
41 #include <sys/lx_syscall.h>
42 #include <sys/lx_thread.h>
43 #include <sys/syscall.h>
44 #include <lx_provider_impl.h>
45 #include <sys/stack.h>
46 #include <assert.h>
47 #include <errno.h>
48 #include <poll.h>
49 #include <rctl.h>
50 #include <signal.h>
51 #include <stdlib.h>
52 #include <string.h>
53 #include <strings.h>
54 #include <thread.h>
55 #include <ucontext.h>
56 #include <unistd.h>
57 #include <stdio.h>
58 #include <libintl.h>
59 #include <ieeefp.h>
60 #include <sys/signalfd.h>
61
62 #if defined(_ILP32)
63 extern int pselect_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
64 const timespec_t *tsp, const sigset_t *sp);
65 #endif
66
67 #define MIN(a, b) ((a) < (b) ? (a) : (b))
68
69 /*
70 * Delivering signals to a Linux process is complicated by differences in
71 * signal numbering, stack structure and contents, and the action taken when a
72 * signal handler exits. In addition, many signal-related structures, such as
73 * sigset_ts, vary between Illumos and Linux.
74 *
75 * To support user-level signal handlers, the brand uses a double layer of
76 * indirection to process and deliver signals to branded threads.
77 *
78 * When a Linux process sends a signal using the kill(2) system call, we must
79 * translate the signal into the Illumos equivalent before handing control off
80 * to the standard signalling mechanism. When a signal is delivered to a Linux
81 * process, we translate the signal number from Illumos to back to Linux.
82 * Translating signals both at generation and delivery time ensures both that
83 * Illumos signals are sent properly to Linux applications and that signals'
84 * default behavior works as expected.
85 *
86 * In a normal Illumos process, signal delivery is interposed on for any thread
87 * registering a signal handler by libc. Libc needs to do various bits of magic
88 * to provide thread-safe critical regions, so it registers its own handler,
89 * named sigacthandler(), using the sigaction(2) system call. When a signal is
90 * received, sigacthandler() is called, and after some processing, libc turns
91 * around and calls the user's signal handler via a routine named
92 * call_user_handler().
93 *
94 * Adding a Linux branded thread to the mix complicates things somewhat.
95 *
96 * First, when a thread receives a signal, it may either be running in an
97 * emulated Linux context or a native illumos context. In either case, the
98 * in-kernel brand module is responsible for preserving the register state
99 * from the interrupted context, regardless of whether emulated or native
100 * software was running at the time. The kernel is also responsible for
101 * ensuring that the illumos native sigacthandler() is called with register
102 * values appropriate for native code. Of particular note is the %gs segment
103 * selector for 32-bit code, and the %fsbase segment base register for 64-bit
104 * code; these are used by libc to locate per-thread data structures.
105 *
106 * Second, the signal number translation referenced above must take place.
107 * Finally, when we hand control to the Linux signal handler we must do so
108 * on the brand stack, and with registers configured appropriately for the
109 * Linux application.
110 *
111 * This need to translate signal numbers (and manipulate the signal handling
112 * context) means that with standard Illumos libc, following a signal from
113 * generation to delivery looks something like:
114 *
115 * kernel ->
116 * sigacthandler() ->
117 * call_user_handler() ->
118 * user signal handler
119 *
120 * but for the brand's Linux threads, this would look like:
121 *
122 * kernel ->
123 * sigacthandler() ->
124 * call_user_handler() ->
125 * lx_call_user_handler() ->
126 * lx_sigdeliver() ->
127 * syscall(B_JUMP_TO_LINUX, ...) ->
128 * Linux user signal handler
129 *
130 * The new addtions are:
131 *
132 * lx_call_user_handler
133 * ====================
134 * This routine is responsible for translating Illumos signal numbers to
135 * their Linux equivalents, building a Linux signal stack based on the
136 * information Illumos has provided, and passing the stack to the
137 * registered Linux signal handler. It is, in effect, the Linux thread
138 * equivalent to libc's call_user_handler().
139 *
140 * lx_sigdeliver
141 * =============
142 *
143 * Note that none of this interposition is necessary unless a Linux thread
144 * registers a user signal handler, as the default action for all signals is the
145 * same between Illumos and Linux save for one signal, SIGPWR. For this reason,
146 * the brand ALWAYS installs its own internal signal handler for SIGPWR that
147 * translates the action to the Linux default, to terminate the process.
148 * (Illumos' default action is to ignore SIGPWR.)
149 *
150 * It is also important to note that when signals are not translated, the brand
151 * relies upon code interposing upon the wait(2) system call to translate
152 * signals to their proper values for any Linux threads retrieving the status
153 * of others. So while the Illumos signal number for a particular signal is set
154 * in a process' data structures (and would be returned as the result of say,
155 * WTERMSIG()), the brand's interposiiton upon wait(2) is responsible for
156 * translating the value WTERMSIG() would return from a Illumos signal number
157 * to the appropriate Linux value.
158 *
159 * lx_call_user_handler() calls lx_sigdeliver() with a helper function
160 * (typically lx_build_signal_frame) which builds a stack frame for the 32-bit
161 * Linux signal handler, or populates a local (on the stack) structure for the
162 * 64-bit Linux signal handler. The stack at that time looks like this:
163 *
164 * =========================================================
165 * | | lx_sigdeliver_frame_t -- includes LX_SIGRT_MAGIC and |
166 * | | a return context for the eventual sigreturn(2) call |
167 * | =========================================================
168 * | | Linux signal frame (32-bit) or local data |
169 * V | (64-bit) built by stack_builder() |
170 * =========================================================
171 *
172 * The process of returning to an interrupted thread of execution from a user
173 * signal handler is entirely different between Illumos and Linux. While
174 * Illumos generally expects to set the context to the interrupted one on a
175 * normal return from a signal handler, in the normal case Linux instead calls
176 * code that calls a specific Linux system call, rt_sigreturn(2) (or it also
177 * can call sigreturn(2) in 32-bit code). Thus when a Linux signal handler
178 * completes execution, instead of returning through what would in libc be a
179 * call to setcontext(2), the rt_sigreturn(2) Linux system call is responsible
180 * for accomplishing much the same thing. It's for this reason that the stack
181 * frame we build has the lx_(rt_)sigreturn_tramp code on the top of the
182 * stack. The code looks like this:
183 *
184 * 32-bit 64-bit
185 * -------------------------------- -----------------------------
186 * mov LX_SYS_rt_sigreturn, %eax movq LX_SYS_rt_sigreturn, %rax
187 * int $0x80 syscall
188 *
189 * We also use these same functions (lx_rt_sigreturn_tramp or
190 * lx_sigreturn_tramp) to actually return from the signal handler.
191 *
192 * (Note that this trampoline code actually lives in a proper executable segment
193 * and not on the stack, but gdb checks for the exact code sequence of the
194 * trampoline code on the stack to determine whether it is in a signal stack
195 * frame or not. Really.)
196 *
197 * When the 32-bit Linux user signal handler is eventually called, the brand
198 * stack frame looks like this (in the case of a "modern" signal stack; see
199 * the lx_sigstack structure definition):
200 *
201 * =========================================================
202 * | | lx_sigdeliver_frame_t |
203 * | =========================================================
204 * | | Trampoline code (marker for gdb, not really executed) |
205 * | =========================================================
206 * | | Linux struct _fpstate |
207 * | =========================================================
208 * V | Linux ucontext_t | <--+
209 * ========================================================= |
210 * | Linux siginfo_t | <--|-----+
211 * ========================================================= | |
212 * | Pointer to Linux ucontext_t (or NULL) (sigaction arg2)| ---+ |
213 * ========================================================= |
214 * | Pointer to Linux siginfo_t (or NULL) (sigaction arg1)| ---------+
215 * =========================================================
216 * | Linux signal number (sigaction arg0)|
217 * =========================================================
218 * | Pointer to signal return code (trampoline code) |
219 * =========================================================
220 *
221 * The 64-bit stack-local data looks like this:
222 *
223 * =========================================================
224 * | | lx_sigdeliver_frame_t |
225 * | =========================================================
226 * | | Trampoline code (marker for gdb, not really executed) |
227 * | =========================================================
228 * | | Linux struct _fpstate |
229 * | =========================================================
230 * V | Linux ucontext_t | %rdx arg2
231 * =========================================================
232 * | Linux siginfo_t | %rsi arg1
233 * =========================================================
234 * | Pointer to signal return code (trampoline code) |
235 * =========================================================
236 *
237 * As usual in 64-bit code, %rdi is arg0 which is the signal number.
238 *
239 * The *sigreturn(2) family of emulated system call handlers locates the
240 * "lx_sigdeliver_frame_t" struct on the Linux stack as part of processing
241 * the system call. This object contains a guard value (LX_SIGRT_MAGIC) to
242 * detect stack smashing or an incorrect stack pointer. It also contains a
243 * "return" context, which we use to get back to the "lx_sigdeliver()" frame
244 * on the native stack that originally dispatched to the Linux signal
245 * handler. The lx_sigdeliver() function is then able to return to the
246 * native libc signal handler in the usual way. This results in a further
247 * setcontext() back to whatever was running when we took the signal.
248 *
249 * There are some edge cases where the "return" context cannot be located
250 * by inspection of the Linux stack; e.g. if the guard value has been
251 * corrupted, or the emulated program has relocated parts of the signal
252 * delivery stack frame. If this case is detected, a fallback mechanism is
253 * used to attempt to find the return context. A chain of "lx_sigbackup_t"
254 * objects is maintained in signal interposer call frames, with the current
255 * head stored in the thread-specific "lx_tsd_t". This mechanism is
256 * similar in principle to the "lwp_oldcontext" member of the "klwp_t" used
257 * by the native signal handling infrastructure. This backup chain is used
258 * by the sigreturn(2) family of emulated system calls in the event that
259 * the Linux stack did not correctly reference a return context.
260 */
261
262 typedef struct lx_sigdeliver_frame {
263 uintptr_t lxsdf_magic;
264 ucontext_t *lxsdf_retucp;
265 ucontext_t *lxsdf_sigucp;
266 lx_sigbackup_t *lxsdf_sigbackup;
267 } lx_sigdeliver_frame_t;
268
269 struct lx_oldsigstack {
270 void (*retaddr)(); /* address of real lx_sigreturn code */
271 int sig; /* signal number */
272 lx_sigcontext_t sigc; /* saved user context */
273 lx_fpstate_t fpstate; /* saved FP state */
274 int sig_extra; /* signal mask for signals [32 .. NSIG - 1] */
275 char trampoline[8]; /* code for trampoline to lx_sigreturn() */
276 };
277
278 /*
279 * The lx_sighandlers structure needs to be a global due to the semantics of
280 * clone().
281 *
282 * If CLONE_SIGHAND is set, the calling process and child share signal
283 * handlers, and if either calls sigaction(2) it should change the behavior
284 * in the other thread. Each thread does, however, have its own signal mask
285 * and set of pending signals.
286 *
287 * If CLONE_SIGHAND is not set, the child process should inherit a copy of
288 * the signal handlers at the time of the clone() but later calls to
289 * sigaction(2) should only affect the individual thread calling it.
290 *
291 * This maps perfectly to a thr_create(3C) thread semantic in the first
292 * case and a fork(2)-type semantic in the second case. By making
293 * lx_sighandlers global, we automatically get the correct behavior.
294 */
295 static lx_sighandlers_t lx_sighandlers;
296
297 /*
298 * Setting LX_NO_ABORT_HANDLER in the environment will prevent the emulated
299 * Linux program from modifying the signal handling disposition for SIGSEGV or
300 * SIGABRT. Useful for debugging programs which fall over themselves to
301 * prevent useful core files being generated.
302 */
303 static int lx_no_abort_handler = 0;
304
305 static void lx_sigdeliver(int, siginfo_t *, ucontext_t *, size_t, void (*)(),
306 void (*)(), struct lx_sigaction *);
307
308 /*
309 * Cache result of process.max-file-descriptor to avoid calling getrctl()
310 * for each lx_ppoll().
311 */
312 static rlim_t maxfd = 0;
313
314 /*
315 * stol_stack() and ltos_stack() convert between Illumos and Linux stack_t
316 * structures.
317 *
318 * These routines are needed because although the two structures have the same
319 * contents, their contents are declared in a different order, so the content
320 * of the structures cannot be copied with a simple bcopy().
321 */
322 static void
323 stol_stack(stack_t *fr, lx_stack_t *to)
324 {
325 to->ss_sp = fr->ss_sp;
326 to->ss_flags = fr->ss_flags;
327 to->ss_size = fr->ss_size;
328 }
329
330 static void
331 ltos_stack(lx_stack_t *fr, stack_t *to)
332 {
333 to->ss_sp = fr->ss_sp;
334 to->ss_flags = fr->ss_flags;
335 to->ss_size = fr->ss_size;
336 }
337
338 static int
339 ltos_sigset(lx_sigset_t *lx_sigsetp, sigset_t *s_sigsetp)
340 {
341 lx_sigset_t l;
342 int lx_sig, sig;
343
344 if (uucopy(lx_sigsetp, &l, sizeof (lx_sigset_t)) != 0)
345 return (-errno);
346
347 (void) sigemptyset(s_sigsetp);
348
349 for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) {
350 if (lx_sigismember(&l, lx_sig) &&
351 ((sig = ltos_signo[lx_sig]) > 0))
352 (void) sigaddset(s_sigsetp, sig);
353 }
354
355 return (0);
356 }
357
358 static int
359 stol_sigset(sigset_t *s_sigsetp, lx_sigset_t *lx_sigsetp)
360 {
361 lx_sigset_t l;
362 int sig, lx_sig;
363
364 bzero(&l, sizeof (lx_sigset_t));
365
366 for (sig = 1; sig < NSIG; sig++) {
367 if (sigismember(s_sigsetp, sig) &&
368 ((lx_sig = stol_signo[sig]) > 0))
369 lx_sigaddset(&l, lx_sig);
370 }
371
372 return ((uucopy(&l, lx_sigsetp, sizeof (lx_sigset_t)) != 0)
373 ? -errno : 0);
374 }
375
376 #if defined(_ILP32)
377 static int
378 ltos_osigset(lx_osigset_t *lx_osigsetp, sigset_t *s_sigsetp)
379 {
380 lx_osigset_t lo;
381 int lx_sig, sig;
382
383 if (uucopy(lx_osigsetp, &lo, sizeof (lx_osigset_t)) != 0)
384 return (-errno);
385
386 (void) sigemptyset(s_sigsetp);
387
388 for (lx_sig = 1; lx_sig <= OSIGSET_NBITS; lx_sig++)
389 if ((lo & OSIGSET_BITSET(lx_sig)) &&
390 ((sig = ltos_signo[lx_sig]) > 0))
391 (void) sigaddset(s_sigsetp, sig);
392
393 return (0);
394 }
395
396 static int
397 stol_osigset(sigset_t *s_sigsetp, lx_osigset_t *lx_osigsetp)
398 {
399 lx_osigset_t lo = 0;
400 int lx_sig, sig;
401
402 /*
403 * Note that an lx_osigset_t can only represent the signals from
404 * [1 .. OSIGSET_NBITS], so even though a signal may be present in the
405 * Illumos sigset_t, it may not be representable as a bit in the
406 * lx_osigset_t.
407 */
408 for (sig = 1; sig < NSIG; sig++)
409 if (sigismember(s_sigsetp, sig) &&
410 ((lx_sig = stol_signo[sig]) > 0) &&
411 (lx_sig <= OSIGSET_NBITS))
412 lo |= OSIGSET_BITSET(lx_sig);
413
414 return ((uucopy(&lo, lx_osigsetp, sizeof (lx_osigset_t)) != 0)
415 ? -errno : 0);
416 }
417 #endif
418
419 static int
420 ltos_sigcode(int si_code)
421 {
422 switch (si_code) {
423 case LX_SI_USER:
424 return (SI_USER);
425 case LX_SI_TKILL:
426 return (SI_LWP);
427 case LX_SI_QUEUE:
428 return (SI_QUEUE);
429 case LX_SI_TIMER:
430 return (SI_TIMER);
431 case LX_SI_ASYNCIO:
432 return (SI_ASYNCIO);
433 case LX_SI_MESGQ:
434 return (SI_MESGQ);
435 default:
436 return (LX_SI_CODE_NOT_EXIST);
437 }
438 }
439
440 int
441 stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop)
442 {
443 int ret = 0;
444 lx_siginfo_t lx_siginfo;
445
446 bzero(&lx_siginfo, sizeof (*lx_siginfop));
447
448 if ((lx_siginfo.lsi_signo = stol_signo[siginfop->si_signo]) <= 0) {
449 /*
450 * Depending on the caller we may still need to get a usable
451 * converted siginfo struct.
452 */
453 lx_siginfo.lsi_signo = LX_SIGKILL;
454 errno = EINVAL;
455 ret = -1;
456 }
457
458 lx_siginfo.lsi_code = lx_stol_sigcode(siginfop->si_code);
459 lx_siginfo.lsi_errno = siginfop->si_errno;
460
461 switch (lx_siginfo.lsi_signo) {
462 /*
463 * Semantics ARE defined for SIGKILL, but since
464 * we can't catch it, we can't translate it. :-(
465 */
466 case LX_SIGPOLL:
467 lx_siginfo.lsi_band = siginfop->si_band;
468 lx_siginfo.lsi_fd = siginfop->si_fd;
469 break;
470
471 case LX_SIGCHLD:
472 lx_siginfo.lsi_pid = siginfop->si_pid;
473 if (siginfop->si_code <= 0 || siginfop->si_code ==
474 CLD_EXITED) {
475 lx_siginfo.lsi_status = siginfop->si_status;
476 } else {
477 lx_siginfo.lsi_status = lx_stol_status(
478 siginfop->si_status, -1);
479 }
480 lx_siginfo.lsi_utime = siginfop->si_utime;
481 lx_siginfo.lsi_stime = siginfop->si_stime;
482 break;
483
484 case LX_SIGILL:
485 case LX_SIGBUS:
486 case LX_SIGFPE:
487 case LX_SIGSEGV:
488 lx_siginfo.lsi_addr = siginfop->si_addr;
489 break;
490
491 default:
492 lx_siginfo.lsi_pid = siginfop->si_pid;
493 lx_siginfo.lsi_uid =
494 LX_UID32_TO_UID16(siginfop->si_uid);
495 lx_siginfo.lsi_value = siginfop->si_value;
496 break;
497 }
498
499 if (uucopy(&lx_siginfo, lx_siginfop, sizeof (lx_siginfo_t)) != 0)
500 return (-errno);
501 return ((ret != 0) ? -errno : 0);
502 }
503
504 static void
505 stol_fpstate(fpregset_t *fpr, lx_fpstate_t *lfpr)
506 {
507 size_t copy_len;
508
509 #if defined(_LP64)
510 /*
511 * The 64-bit Illumos struct fpregset_t and lx_fpstate_t are identical
512 * so just bcopy() those entries (see usr/src/uts/intel/sys/regset.h
513 * for __amd64's struct fpu).
514 */
515 copy_len = sizeof (fpr->fp_reg_set.fpchip_state);
516 bcopy(fpr, lfpr, copy_len);
517
518 #else /* is _ILP32 */
519 struct _fpstate *fpsp = (struct _fpstate *)fpr;
520
521 /*
522 * The Illumos struct _fpstate and lx_fpstate_t are identical from the
523 * beginning of the structure to the lx_fpstate_t "magic" field, so
524 * just bcopy() those entries.
525 */
526 copy_len = (size_t)&(((lx_fpstate_t *)0)->magic);
527 bcopy(fpsp, lfpr, copy_len);
528
529 /*
530 * These fields are all only significant for the first 16 bits.
531 */
532 lfpr->cw &= 0xffff; /* x87 control word */
533 lfpr->tag &= 0xffff; /* x87 tag word */
534 lfpr->cssel &= 0xffff; /* cs selector */
535 lfpr->datasel &= 0xffff; /* ds selector */
536
537 /*
538 * Linux wants the x87 status word field to contain the value of the
539 * x87 saved exception status word.
540 */
541 lfpr->sw = lfpr->status & 0xffff; /* x87 status word */
542
543 lfpr->mxcsr = fpsp->mxcsr;
544
545 if (fpsp->mxcsr != 0) {
546 /*
547 * Linux uses the "magic" field to denote whether the XMM
548 * registers contain legal data or not. Since we can't get to
549 * %cr4 from userland to check the status of the OSFXSR bit,
550 * check the mxcsr field to see if it's 0, which it should
551 * never be on a system with the OXFXSR bit enabled.
552 */
553 lfpr->magic = LX_X86_FXSR_MAGIC;
554 bcopy(fpsp->xmm, lfpr->_xmm, sizeof (lfpr->_xmm));
555 } else {
556 lfpr->magic = LX_X86_FXSR_NONE;
557 }
558 #endif
559 }
560
561 static void
562 ltos_fpstate(lx_fpstate_t *lfpr, fpregset_t *fpr)
563 {
564 size_t copy_len;
565
566 #if defined(_LP64)
567 /*
568 * The 64-bit Illumos struct fpregset_t and lx_fpstate_t are identical
569 * so just bcopy() those entries (see usr/src/uts/intel/sys/regset.h
570 * for __amd64's struct fpu).
571 */
572 copy_len = sizeof (fpr->fp_reg_set.fpchip_state);
573 bcopy(lfpr, fpr, copy_len);
574
575 #else /* is _ILP32 */
576 struct _fpstate *fpsp = (struct _fpstate *)fpr;
577
578 /*
579 * The lx_fpstate_t and Illumos struct _fpstate are identical from the
580 * beginning of the structure to the struct _fpstate "mxcsr" field, so
581 * just bcopy() those entries.
582 *
583 * Note that we do NOT have to propogate changes the user may have made
584 * to the "status" word back to the "sw" word, unlike the way we have
585 * to deal with processing the ESP and UESP register values on return
586 * from a signal handler.
587 */
588 copy_len = (size_t)&(((struct _fpstate *)0)->mxcsr);
589 bcopy(lfpr, fpsp, copy_len);
590
591 /*
592 * These fields are all only significant for the first 16 bits.
593 */
594 fpsp->cw &= 0xffff; /* x87 control word */
595 fpsp->sw &= 0xffff; /* x87 status word */
596 fpsp->tag &= 0xffff; /* x87 tag word */
597 fpsp->cssel &= 0xffff; /* cs selector */
598 fpsp->datasel &= 0xffff; /* ds selector */
599 fpsp->status &= 0xffff; /* saved status */
600
601 fpsp->mxcsr = lfpr->mxcsr;
602
603 if (lfpr->magic == LX_X86_FXSR_MAGIC)
604 bcopy(lfpr->_xmm, fpsp->xmm, sizeof (fpsp->xmm));
605 #endif
606 }
607
608 /*
609 * We do not use the system sigaltstack() infrastructure as that would conflict
610 * with our handling of both system call emulation and native signals on the
611 * native stack. Instead, we track the Linux stack structure in our
612 * thread-specific data. This function is modeled on the behaviour of the
613 * native sigaltstack system call handler.
614 */
615 long
616 lx_sigaltstack(uintptr_t ssp, uintptr_t oss)
617 {
618 lx_tsd_t *lxtsd = lx_get_tsd();
619 lx_stack_t ss;
620
621 if (ssp != NULL) {
622 if (lxtsd->lxtsd_sigaltstack.ss_flags & LX_SS_ONSTACK) {
623 /*
624 * If we are currently using the installed alternate
625 * stack for signal handling, the user may not modify
626 * the stack for this thread.
627 */
628 return (-EPERM);
629 }
630
631 if (uucopy((void *)ssp, &ss, sizeof (ss)) != 0) {
632 return (-EFAULT);
633 }
634
635 if (ss.ss_flags & ~LX_SS_DISABLE) {
636 /*
637 * The user may not specify a value for flags other
638 * than 0 or SS_DISABLE.
639 */
640 return (-EINVAL);
641 }
642
643 if (!(ss.ss_flags & LX_SS_DISABLE) && ss.ss_size <
644 LX_MINSIGSTKSZ) {
645 return (-ENOMEM);
646 }
647 }
648
649 if (oss != NULL) {
650 /*
651 * User provided old and new stack_t pointers may point to
652 * the same location. Copy out before we modify.
653 */
654 if (uucopy(&lxtsd->lxtsd_sigaltstack, (void *)oss,
655 sizeof (lxtsd->lxtsd_sigaltstack)) != 0) {
656 return (-EFAULT);
657 }
658 }
659
660 if (ssp != NULL) {
661 lxtsd->lxtsd_sigaltstack = ss;
662 }
663
664 return (0);
665 }
666
667 #if defined(_ILP32)
668 /*
669 * The following routines are needed because sigset_ts and siginfo_ts are
670 * different in format between Linux and Illumos.
671 *
672 * Note that there are two different lx_sigset structures, lx_sigset_ts and
673 * lx_osigset_ts:
674 *
675 * + An lx_sigset_t is the equivalent of a Illumos sigset_t and supports
676 * more than 32 signals.
677 *
678 * + An lx_osigset_t is simply a uint32_t, so it by definition only supports
679 * 32 signals.
680 *
681 * When there are two versions of a routine, one prefixed with lx_rt_ and
682 * one prefixed with lx_ alone, in GENERAL the lx_rt_ routines deal with
683 * lx_sigset_ts while the lx_ routines deal with lx_osigset_ts. Unfortunately,
684 * this is not always the case (e.g. lx_sigreturn() vs. lx_rt_sigreturn())
685 */
686 long
687 lx_sigpending(uintptr_t sigpend)
688 {
689 sigset_t sigpendset;
690
691 if (sigpending(&sigpendset) != 0)
692 return (-errno);
693
694 return (stol_osigset(&sigpendset, (lx_osigset_t *)sigpend));
695 }
696 #endif
697
698 long
699 lx_rt_sigpending(uintptr_t sigpend, uintptr_t setsize)
700 {
701 sigset_t sigpendset;
702
703 if ((size_t)setsize != sizeof (lx_sigset_t))
704 return (-EINVAL);
705
706 if (sigpending(&sigpendset) != 0)
707 return (-errno);
708
709 return (stol_sigset(&sigpendset, (lx_sigset_t *)sigpend));
710 }
711
712 /*
713 * Create a common routine to encapsulate all of the sigprocmask code,
714 * as the only difference between lx_sigprocmask() and lx_rt_sigprocmask()
715 * is the usage of lx_osigset_ts vs. lx_sigset_ts, as toggled in the code by
716 * the setting of the "sigset_type" flag.
717 */
718 static int
719 lx_sigprocmask_common(uintptr_t how, uintptr_t l_setp, uintptr_t l_osetp,
720 uintptr_t sigset_type)
721 {
722 int err = 0;
723 sigset_t set, oset;
724 sigset_t *s_setp = NULL;
725 sigset_t *s_osetp;
726
727 if (l_setp) {
728 switch (how) {
729 case LX_SIG_BLOCK:
730 how = SIG_BLOCK;
731 break;
732
733 case LX_SIG_UNBLOCK:
734 how = SIG_UNBLOCK;
735 break;
736
737 case LX_SIG_SETMASK:
738 how = SIG_SETMASK;
739 break;
740
741 default:
742 return (-EINVAL);
743 }
744
745 s_setp = &set;
746
747 /* Only 32-bit code passes other than USE_SIGSET */
748 if (sigset_type == USE_SIGSET)
749 err = ltos_sigset((lx_sigset_t *)l_setp, s_setp);
750 #if defined(_ILP32)
751 else
752 err = ltos_osigset((lx_osigset_t *)l_setp, s_setp);
753 #endif
754
755 if (err != 0)
756 return (err);
757
758 }
759
760 s_osetp = (l_osetp ? &oset : NULL);
761
762 /*
763 * In a multithreaded environment, a call to sigprocmask(2) should
764 * only affect the current thread's signal mask so we don't need to
765 * explicitly call thr_sigsetmask(3C) here.
766 */
767 if (sigprocmask(how, s_setp, s_osetp) != 0)
768 return (-errno);
769
770 if (l_osetp) {
771 if (sigset_type == USE_SIGSET)
772 err = stol_sigset(s_osetp, (lx_sigset_t *)l_osetp);
773 #if defined(_ILP32)
774 else
775 err = stol_osigset(s_osetp, (lx_osigset_t *)l_osetp);
776 #endif
777
778 if (err != 0) {
779 /*
780 * Encountered a fault while writing to the old signal
781 * mask buffer, so unwind the signal mask change made
782 * above.
783 */
784 (void) sigprocmask(how, s_osetp, (sigset_t *)NULL);
785 return (err);
786 }
787 }
788
789 return (0);
790 }
791
792 #if defined(_ILP32)
793 long
794 lx_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp)
795 {
796 return (lx_sigprocmask_common(how, setp, osetp, USE_OSIGSET));
797 }
798 #endif
799
800 long
801 lx_rt_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp,
802 uintptr_t setsize)
803 {
804 if ((size_t)setsize != sizeof (lx_sigset_t))
805 return (-EINVAL);
806
807 return (lx_sigprocmask_common(how, setp, osetp, USE_SIGSET));
808 }
809
810 #if defined(_ILP32)
811 long
812 lx_sigsuspend(uintptr_t set)
813 {
814 sigset_t s_set;
815
816 if (ltos_osigset((lx_osigset_t *)set, &s_set) != 0)
817 return (-errno);
818
819 return ((sigsuspend(&s_set) == -1) ? -errno : 0);
820 }
821 #endif
822
823 long
824 lx_rt_sigsuspend(uintptr_t set, uintptr_t setsize)
825 {
826 sigset_t s_set;
827
828 if ((size_t)setsize != sizeof (lx_sigset_t))
829 return (-EINVAL);
830
831 if (ltos_sigset((lx_sigset_t *)set, &s_set) != 0)
832 return (-errno);
833
834 return ((sigsuspend(&s_set) == -1) ? -errno : 0);
835 }
836
837 long
838 lx_rt_sigwaitinfo(uintptr_t set, uintptr_t sinfo, uintptr_t setsize)
839 {
840 sigset_t s_set;
841 siginfo_t s_sinfo, *s_sinfop;
842 int rc;
843
844 lx_sigset_t *setp = (lx_sigset_t *)set;
845 lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
846
847 if ((size_t)setsize != sizeof (lx_sigset_t))
848 return (-EINVAL);
849
850 if (ltos_sigset(setp, &s_set) != 0)
851 return (-errno);
852
853 s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
854
855 if ((rc = sigwaitinfo(&s_set, s_sinfop)) == -1)
856 return (-errno);
857
858 if (s_sinfop == NULL)
859 return (stol_signo[rc]);
860
861 return ((stol_siginfo(s_sinfop, sinfop) != 0)
862 ? -errno : stol_signo[rc]);
863 }
864
865 long
866 lx_rt_sigtimedwait(uintptr_t set, uintptr_t sinfo, uintptr_t toutp,
867 uintptr_t setsize)
868 {
869 sigset_t s_set;
870 siginfo_t s_sinfo, *s_sinfop;
871 int rc;
872
873 lx_sigset_t *setp = (lx_sigset_t *)set;
874 lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
875
876 if ((size_t)setsize != sizeof (lx_sigset_t))
877 return (-EINVAL);
878
879 if (ltos_sigset(setp, &s_set) != 0)
880 return (-errno);
881
882 s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
883
884 /*
885 * "If timeout is the NULL pointer, the behavior is unspecified."
886 * Match what LTP expects.
887 */
888 if ((rc = sigtimedwait(&s_set, s_sinfop,
889 (struct timespec *)toutp)) == -1)
890 return (toutp == NULL ? -EINTR : -errno);
891
892 if (s_sinfop == NULL)
893 return (stol_signo[rc]);
894
895 return ((stol_siginfo(s_sinfop, sinfop) != 0)
896 ? -errno : stol_signo[rc]);
897 }
898
899 static void
900 lx_sigreturn_find_native_context(const char *caller, ucontext_t **sigucp,
901 ucontext_t **retucp, uintptr_t sp)
902 {
903 lx_tsd_t *lxtsd = lx_get_tsd();
904 lx_sigdeliver_frame_t *lxsdfp = (lx_sigdeliver_frame_t *)sp;
905 lx_sigdeliver_frame_t lxsdf;
906 boolean_t copy_ok;
907
908 lx_debug("%s: reading lx_sigdeliver_frame_t @ %p\n", caller, lxsdfp);
909 if (uucopy(lxsdfp, &lxsdf, sizeof (lxsdf)) != 0) {
910 lx_debug("%s: failed to read lx_sigdeliver_frame_t @ %p\n",
911 lxsdfp);
912
913 copy_ok = B_FALSE;
914 } else {
915 lx_debug("%s: lxsdf: magic %p retucp %p sigucp %p\n", caller,
916 lxsdf.lxsdf_magic, lxsdf.lxsdf_retucp, lxsdf.lxsdf_sigucp);
917
918 copy_ok = B_TRUE;
919 }
920
921 /*
922 * lx_sigdeliver() pushes a lx_sigdeliver_frame_t onto the stack
923 * before it creates the struct lx_oldsigstack.
924 */
925 if (copy_ok && lxsdf.lxsdf_magic == LX_SIGRT_MAGIC) {
926 LX_SIGNAL_DELIVERY_FRAME_FOUND(lxsdfp);
927
928 /*
929 * The guard value is intact; use the context pointers stored
930 * in the signal delivery frame:
931 */
932 *sigucp = lxsdf.lxsdf_sigucp;
933 *retucp = lxsdf.lxsdf_retucp;
934
935 /*
936 * Ensure that the backup signal delivery chain is in sync with
937 * the frame we are returning via:
938 */
939 lxtsd->lxtsd_sigbackup = lxsdf.lxsdf_sigbackup;
940 } else {
941 /*
942 * The guard value was not intact. Either the program smashed
943 * the stack unintentionally, or worse: intentionally moved
944 * some parts of the signal delivery frame we constructed to
945 * another location before calling rt_sigreturn(2).
946 */
947 LX_SIGNAL_DELIVERY_FRAME_CORRUPT(lxsdfp);
948
949 if (lxtsd->lxtsd_sigbackup == NULL) {
950 /*
951 * There was no backup context to use, so we must
952 * kill the process.
953 */
954 if (copy_ok) {
955 lx_err_fatal("%s: sp 0x%p, expected 0x%x, "
956 "found 0x%x!", caller, sp, LX_SIGRT_MAGIC,
957 lxsdf.lxsdf_magic);
958 } else {
959 lx_err_fatal("%s: sp 0x%p, could not read "
960 "magic", caller, sp);
961 }
962 }
963
964 /*
965 * Attempt to recover by using the backup signal delivery
966 * chain:
967 */
968 lx_debug("%s: SIGRT_MAGIC not found @ sp %p; using backup "
969 "@ %p\n", caller, (void *)sp, lxtsd->lxtsd_sigbackup);
970 *sigucp = lxtsd->lxtsd_sigbackup->lxsb_sigucp;
971 *retucp = lxtsd->lxtsd_sigbackup->lxsb_retucp;
972 }
973 }
974
975 #if defined(_ILP32)
976 /*
977 * Intercept the Linux sigreturn() syscall to turn it into the return through
978 * the libc call stack that Illumos expects.
979 *
980 * When control returns to libc's call_user_handler() routine, a setcontext(2)
981 * will be done that returns thread execution to the point originally
982 * interrupted by receipt of the signal.
983 *
984 * This is only used by 32-bit code.
985 */
986 long
987 lx_sigreturn(void)
988 {
989 struct lx_oldsigstack *lx_ossp;
990 lx_sigset_t lx_sigset;
991 ucontext_t *ucp;
992 ucontext_t *sigucp;
993 ucontext_t *retucp;
994 uintptr_t sp;
995
996 ucp = lx_syscall_regs();
997
998 /*
999 * NOTE: The sp saved in the context is eight bytes off of where we
1000 * need it to be (either due to trampoline or the copying of
1001 * sp = uesp, not clear which).
1002 */
1003 sp = LX_REG(ucp, REG_SP) - 8;
1004
1005 /*
1006 * At this point, the stack pointer should point to the struct
1007 * lx_oldsigstack that lx_build_old_signal_frame() constructed and
1008 * placed on the stack. We need to reference it a bit later, so
1009 * save a pointer to it before incrementing our copy of the sp.
1010 */
1011 lx_ossp = (struct lx_oldsigstack *)sp;
1012 sp += SA(sizeof (struct lx_oldsigstack));
1013
1014 lx_sigreturn_find_native_context(__func__, &sigucp, &retucp, sp);
1015
1016 /*
1017 * We need to copy machine registers the Linux signal handler may have
1018 * modified back to the Illumos ucontext_t.
1019 *
1020 * General registers copy across as-is, except Linux expects that
1021 * changes made to uc_mcontext.gregs[ESP] will be reflected when the
1022 * interrupted thread resumes execution after the signal handler. To
1023 * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to
1024 * match uc_mcontext.gregs[ESP] as Illumos will restore the UESP
1025 * value to ESP.
1026 */
1027 lx_ossp->sigc.sc_esp_at_signal = lx_ossp->sigc.sc_esp;
1028 bcopy(&lx_ossp->sigc, &sigucp->uc_mcontext, sizeof (gregset_t));
1029
1030 LX_SIGRETURN(NULL, sigucp, sp);
1031
1032 /* copy back FP regs if present */
1033 if (lx_ossp->sigc.sc_fpstate != NULL)
1034 ltos_fpstate(&lx_ossp->fpstate, &sigucp->uc_mcontext.fpregs);
1035
1036 /* convert Linux signal mask back to its Illumos equivalent */
1037 bzero(&lx_sigset, sizeof (lx_sigset_t));
1038 lx_sigset.__bits[0] = lx_ossp->sigc.sc_mask;
1039 lx_sigset.__bits[1] = lx_ossp->sig_extra;
1040 (void) ltos_sigset(&lx_sigset, &sigucp->uc_sigmask);
1041
1042 /*
1043 * For signal mask handling to be done properly, this call needs to
1044 * return to the libc routine that originally called the signal handler
1045 * rather than directly set the context back to the place the signal
1046 * interrupted execution as the original Linux code would do.
1047 */
1048 lx_debug("lx_sigreturn: calling setcontext; retucp %p flags %lx "
1049 "link %p\n", retucp, retucp->uc_flags, retucp->uc_link);
1050 setcontext(retucp);
1051 assert(0);
1052
1053 /*NOTREACHED*/
1054 return (0);
1055 }
1056 #endif
1057
1058 /*
1059 * This signal return syscall is used by both 32-bit and 64-bit code.
1060 */
1061 long
1062 lx_rt_sigreturn(void)
1063 {
1064 struct lx_sigstack *lx_ssp;
1065 lx_ucontext_t *lx_ucp;
1066 ucontext_t *ucp;
1067 ucontext_t *sigucp;
1068 ucontext_t *retucp;
1069 uintptr_t sp;
1070
1071 /* Get the registers at the emulated Linux rt_sigreturn syscall */
1072 ucp = lx_syscall_regs();
1073
1074 #if defined(_ILP32)
1075 lx_debug("lx_rt_sigreturn: ESP %p UESP %p\n", LX_REG(ucp, ESP),
1076 LX_REG(ucp, UESP));
1077 /*
1078 * For 32-bit
1079 *
1080 * NOTE: Because of the silly compatibility measures done in the
1081 * signal trampoline code to make sure the stack holds the
1082 * _exact same_ instruction sequence Linux does, we have to
1083 * manually "pop" some extra instructions off the stack here
1084 * before passing the stack address to the syscall because the
1085 * trampoline code isn't allowed to do it due to the gdb
1086 * compatability issues.
1087 *
1088 * No, I'm not kidding.
1089 *
1090 * The sp saved in the context is eight bytes off of where we
1091 * need it to be (either due to trampoline or the copying of
1092 * sp = uesp, not clear which but looks like the uesp case), so
1093 * the need to pop the extra four byte instruction means we need
1094 * to subtract a net four bytes from the sp before "popping" the
1095 * struct lx_sigstack off the stack.
1096 *
1097 * This will yield the value the stack pointer had before
1098 * lx_sigdeliver() created the stack frame for the Linux signal
1099 * handler.
1100 */
1101 sp = (uintptr_t)LX_REG(ucp, REG_SP) - 4;
1102 #else
1103 /*
1104 * We need to make an adjustment for 64-bit code as well. Since 64-bit
1105 * does not use the trampoline, it's probably for the same reason as
1106 * alluded to above.
1107 */
1108 sp = (uintptr_t)LX_REG(ucp, REG_SP) - 8;
1109 #endif
1110
1111 /*
1112 * At this point, the stack pointer should point to the struct
1113 * lx_sigstack that lx_build_signal_frame() constructed and
1114 * placed on the stack. We need to reference it a bit later, so
1115 * save a pointer to it before incrementing our copy of the sp.
1116 */
1117 lx_ssp = (struct lx_sigstack *)sp;
1118 sp += SA(sizeof (struct lx_sigstack));
1119
1120 #if defined(_LP64)
1121 /*
1122 * The 64-bit lx_sigdeliver() inserts 8 bytes of padding between
1123 * the lx_sigstack_t and the delivery frame to maintain ABI stack
1124 * alignment.
1125 */
1126 sp += 8;
1127 #endif
1128
1129 lx_sigreturn_find_native_context(__func__, &sigucp, &retucp, sp);
1130
1131 /*
1132 * We need to copy machine registers the Linux signal handler may have
1133 * modified back to the Illumos version.
1134 */
1135 #if defined(_LP64)
1136 lx_ucp = &lx_ssp->uc;
1137
1138 /*
1139 * General register layout is completely different.
1140 */
1141 LX_REG(sigucp, REG_R15) = lx_ucp->uc_sigcontext.sc_r15;
1142 LX_REG(sigucp, REG_R14) = lx_ucp->uc_sigcontext.sc_r14;
1143 LX_REG(sigucp, REG_R13) = lx_ucp->uc_sigcontext.sc_r13;
1144 LX_REG(sigucp, REG_R12) = lx_ucp->uc_sigcontext.sc_r12;
1145 LX_REG(sigucp, REG_R11) = lx_ucp->uc_sigcontext.sc_r11;
1146 LX_REG(sigucp, REG_R10) = lx_ucp->uc_sigcontext.sc_r10;
1147 LX_REG(sigucp, REG_R9) = lx_ucp->uc_sigcontext.sc_r9;
1148 LX_REG(sigucp, REG_R8) = lx_ucp->uc_sigcontext.sc_r8;
1149 LX_REG(sigucp, REG_RDI) = lx_ucp->uc_sigcontext.sc_rdi;
1150 LX_REG(sigucp, REG_RSI) = lx_ucp->uc_sigcontext.sc_rsi;
1151 LX_REG(sigucp, REG_RBP) = lx_ucp->uc_sigcontext.sc_rbp;
1152 LX_REG(sigucp, REG_RBX) = lx_ucp->uc_sigcontext.sc_rbx;
1153 LX_REG(sigucp, REG_RDX) = lx_ucp->uc_sigcontext.sc_rdx;
1154 LX_REG(sigucp, REG_RCX) = lx_ucp->uc_sigcontext.sc_rcx;
1155 LX_REG(sigucp, REG_RAX) = lx_ucp->uc_sigcontext.sc_rax;
1156 LX_REG(sigucp, REG_TRAPNO) = lx_ucp->uc_sigcontext.sc_trapno;
1157 LX_REG(sigucp, REG_ERR) = lx_ucp->uc_sigcontext.sc_err;
1158 LX_REG(sigucp, REG_RIP) = lx_ucp->uc_sigcontext.sc_rip;
1159 LX_REG(sigucp, REG_CS) = lx_ucp->uc_sigcontext.sc_cs;
1160 LX_REG(sigucp, REG_RFL) = lx_ucp->uc_sigcontext.sc_eflags;
1161 LX_REG(sigucp, REG_RSP) = lx_ucp->uc_sigcontext.sc_rsp;
1162 LX_REG(sigucp, REG_SS) = lx_ucp->uc_sigcontext.sc_pad0;
1163 LX_REG(sigucp, REG_FS) = lx_ucp->uc_sigcontext.sc_fs;
1164 LX_REG(sigucp, REG_GS) = lx_ucp->uc_sigcontext.sc_gs;
1165
1166 #else /* is _ILP32 */
1167 lx_ucp = &lx_ssp->uc;
1168
1169 /*
1170 * Illumos and Linux both follow the SysV i386 ABI layout for the
1171 * mcontext.
1172 *
1173 * General registers copy across as-is, except Linux expects that
1174 * changes made to uc_mcontext.gregs[ESP] will be reflected when the
1175 * interrupted thread resumes execution after the signal handler. To
1176 * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to
1177 * match uc_mcontext.gregs[ESP] as Illumos will restore the UESP value
1178 * to ESP.
1179 */
1180 lx_ucp->uc_sigcontext.sc_esp_at_signal = lx_ucp->uc_sigcontext.sc_esp;
1181
1182 bcopy(&lx_ucp->uc_sigcontext, &sigucp->uc_mcontext.gregs,
1183 sizeof (gregset_t));
1184 #endif
1185
1186 LX_SIGRETURN(lx_ucp, sigucp, sp);
1187
1188 if (lx_ucp->uc_sigcontext.sc_fpstate != NULL) {
1189 ltos_fpstate(lx_ucp->uc_sigcontext.sc_fpstate,
1190 &sigucp->uc_mcontext.fpregs);
1191 }
1192
1193 /*
1194 * Convert the Linux signal mask and stack back to their
1195 * Illumos equivalents.
1196 */
1197 (void) ltos_sigset(&lx_ucp->uc_sigmask, &sigucp->uc_sigmask);
1198 ltos_stack(&lx_ucp->uc_stack, &sigucp->uc_stack);
1199
1200 /*
1201 * For signal mask handling to be done properly, this call needs to
1202 * return to the libc routine that originally called the signal handler
1203 * rather than directly set the context back to the place the signal
1204 * interrupted execution as the original Linux code would do.
1205 */
1206 lx_debug("lx_rt_sigreturn: calling setcontext; retucp %p\n", retucp);
1207 setcontext(retucp);
1208 assert(0);
1209
1210 /*NOTREACHED*/
1211 return (0);
1212 }
1213
1214
1215 #if defined(_ILP32)
1216 /*
1217 * Build signal frame for processing for "old" (legacy) Linux signals
1218 * This stack-builder function is only used by 32-bit code.
1219 */
1220 static void
1221 lx_build_old_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp,
1222 uintptr_t *hargs)
1223 {
1224 extern void lx_sigreturn_tramp();
1225
1226 lx_sigset_t lx_sigset;
1227 ucontext_t *ucp = (ucontext_t *)p;
1228 struct lx_sigaction *lxsap;
1229 struct lx_oldsigstack *lx_ossp = sp;
1230
1231 lx_debug("building old signal frame for lx sig %d at 0x%p", lx_sig, sp);
1232
1233 lx_ossp->sig = lx_sig;
1234 lxsap = &lx_sighandlers.lx_sa[lx_sig];
1235 lx_debug("lxsap @ 0x%p", lxsap);
1236
1237 if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) &&
1238 lxsap->lxsa_restorer) {
1239 lx_ossp->retaddr = lxsap->lxsa_restorer;
1240 lx_debug("lxsa_restorer exists @ 0x%p", lx_ossp->retaddr);
1241 } else {
1242 lx_ossp->retaddr = lx_sigreturn_tramp;
1243 lx_debug("lx_ossp->retaddr set to 0x%p", lx_sigreturn_tramp);
1244 }
1245
1246 lx_debug("osf retaddr = 0x%p", lx_ossp->retaddr);
1247
1248 /* convert Illumos signal mask and stack to their Linux equivalents */
1249 (void) stol_sigset(&ucp->uc_sigmask, &lx_sigset);
1250 lx_ossp->sigc.sc_mask = lx_sigset.__bits[0];
1251 lx_ossp->sig_extra = lx_sigset.__bits[1];
1252
1253 /*
1254 * General registers copy across as-is, except Linux expects that
1255 * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a
1256 * signal.
1257 */
1258 bcopy(&ucp->uc_mcontext, &lx_ossp->sigc, sizeof (gregset_t));
1259 lx_ossp->sigc.sc_esp = lx_ossp->sigc.sc_esp_at_signal;
1260
1261 /*
1262 * cr2 contains the faulting address, and Linux only sets cr2 for a
1263 * a segmentation fault.
1264 */
1265 lx_ossp->sigc.sc_cr2 = (((lx_sig == LX_SIGSEGV) && (sip)) ?
1266 (uintptr_t)sip->si_addr : 0);
1267
1268 /* convert FP regs if present */
1269 if (ucp->uc_flags & UC_FPU) {
1270 stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ossp->fpstate);
1271 lx_ossp->sigc.sc_fpstate = &lx_ossp->fpstate;
1272 } else {
1273 lx_ossp->sigc.sc_fpstate = NULL;
1274 }
1275
1276 /*
1277 * Believe it or not, gdb wants to SEE the trampoline code on the
1278 * bottom of the stack to determine whether the stack frame belongs to
1279 * a signal handler, even though this code is no longer actually
1280 * called.
1281 *
1282 * You can't make this stuff up.
1283 */
1284 bcopy((void *)lx_sigreturn_tramp, lx_ossp->trampoline,
1285 sizeof (lx_ossp->trampoline));
1286 }
1287 #endif
1288
1289 /*
1290 * Build stack frame (32-bit) or stack local data (64-bit) for processing for
1291 * modern Linux signals. This is the only stack-builder function for 64-bit
1292 * code (32-bit code also calls this when using "modern" signals).
1293 */
1294 static void
1295 lx_build_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp,
1296 uintptr_t *hargs)
1297 {
1298 extern void lx_rt_sigreturn_tramp();
1299
1300 lx_ucontext_t *lx_ucp;
1301 ucontext_t *ucp = (ucontext_t *)p;
1302 struct lx_sigstack *lx_ssp = sp;
1303 struct lx_sigaction *lxsap;
1304
1305 lx_debug("building signal frame for lx sig %d at 0x%p", lx_sig, sp);
1306
1307 lx_ucp = &lx_ssp->uc;
1308 #if defined(_ILP32)
1309 /*
1310 * Arguments are passed to the 32-bit signal handler on the stack.
1311 */
1312 lx_ssp->ucp = lx_ucp;
1313 lx_ssp->sip = sip != NULL ? &lx_ssp->si : NULL;
1314 lx_ssp->sig = lx_sig;
1315 #else
1316 /*
1317 * Arguments to the 64-bit signal handler are passed in registers:
1318 * hdlr(int sig, siginfo_t *sip, void *ucp);
1319 */
1320 hargs[0] = lx_sig;
1321 hargs[1] = sip != NULL ? (uintptr_t)&lx_ssp->si : NULL;
1322 hargs[2] = (uintptr_t)lx_ucp;
1323 #endif
1324
1325 lxsap = &lx_sighandlers.lx_sa[lx_sig];
1326 lx_debug("lxsap @ 0x%p", lxsap);
1327
1328 if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) &&
1329 lxsap->lxsa_restorer) {
1330 /*
1331 * lxsa_restorer is explicitly set by sigaction in 32-bit code
1332 * but it can also be implicitly set for both 32 and 64 bit
1333 * code via lx_sigaction_common when we bcopy the user-supplied
1334 * lx_sigaction element into the proper slot in the sighandler
1335 * array.
1336 */
1337 lx_ssp->retaddr = lxsap->lxsa_restorer;
1338 lx_debug("lxsa_restorer exists @ 0x%p", lx_ssp->retaddr);
1339 } else {
1340 lx_ssp->retaddr = lx_rt_sigreturn_tramp;
1341 lx_debug("lx_ssp->retaddr set to 0x%p", lx_rt_sigreturn_tramp);
1342 }
1343
1344 /* Linux has these fields but always clears them to 0 */
1345 lx_ucp->uc_flags = 0;
1346 lx_ucp->uc_link = NULL;
1347
1348 /* convert Illumos signal mask and stack to their Linux equivalents */
1349 (void) stol_sigset(&ucp->uc_sigmask, &lx_ucp->uc_sigmask);
1350 stol_stack(&ucp->uc_stack, &lx_ucp->uc_stack);
1351
1352 #if defined(_LP64)
1353 /*
1354 * General register layout is completely different.
1355 */
1356 lx_ucp->uc_sigcontext.sc_r8 = LX_REG(ucp, REG_R8);
1357 lx_ucp->uc_sigcontext.sc_r9 = LX_REG(ucp, REG_R9);
1358 lx_ucp->uc_sigcontext.sc_r10 = LX_REG(ucp, REG_R10);
1359 lx_ucp->uc_sigcontext.sc_r11 = LX_REG(ucp, REG_R11);
1360 lx_ucp->uc_sigcontext.sc_r12 = LX_REG(ucp, REG_R12);
1361 lx_ucp->uc_sigcontext.sc_r13 = LX_REG(ucp, REG_R13);
1362 lx_ucp->uc_sigcontext.sc_r14 = LX_REG(ucp, REG_R14);
1363 lx_ucp->uc_sigcontext.sc_r15 = LX_REG(ucp, REG_R15);
1364 lx_ucp->uc_sigcontext.sc_rdi = LX_REG(ucp, REG_RDI);
1365 lx_ucp->uc_sigcontext.sc_rsi = LX_REG(ucp, REG_RSI);
1366 lx_ucp->uc_sigcontext.sc_rbp = LX_REG(ucp, REG_RBP);
1367 lx_ucp->uc_sigcontext.sc_rbx = LX_REG(ucp, REG_RBX);
1368 lx_ucp->uc_sigcontext.sc_rdx = LX_REG(ucp, REG_RDX);
1369 lx_ucp->uc_sigcontext.sc_rax = LX_REG(ucp, REG_RAX);
1370 lx_ucp->uc_sigcontext.sc_rcx = LX_REG(ucp, REG_RCX);
1371 lx_ucp->uc_sigcontext.sc_rsp = LX_REG(ucp, REG_RSP);
1372 lx_ucp->uc_sigcontext.sc_rip = LX_REG(ucp, REG_RIP);
1373 lx_ucp->uc_sigcontext.sc_eflags = LX_REG(ucp, REG_RFL);
1374 lx_ucp->uc_sigcontext.sc_cs = LX_REG(ucp, REG_CS);
1375 lx_ucp->uc_sigcontext.sc_gs = LX_REG(ucp, REG_GS);
1376 lx_ucp->uc_sigcontext.sc_fs = LX_REG(ucp, REG_FS);
1377 lx_ucp->uc_sigcontext.sc_pad0 = LX_REG(ucp, REG_SS);
1378 lx_ucp->uc_sigcontext.sc_err = LX_REG(ucp, REG_ERR);
1379 lx_ucp->uc_sigcontext.sc_trapno = LX_REG(ucp, REG_TRAPNO);
1380
1381 #else /* is _ILP32 */
1382 /*
1383 * General registers copy across as-is, except Linux expects that
1384 * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a
1385 * signal.
1386 */
1387 bcopy(&ucp->uc_mcontext, &lx_ucp->uc_sigcontext, sizeof (gregset_t));
1388 lx_ucp->uc_sigcontext.sc_esp = lx_ucp->uc_sigcontext.sc_esp_at_signal;
1389 #endif
1390
1391 /*
1392 * cr2 contains the faulting address, which Linux only sets for a
1393 * a segmentation fault.
1394 */
1395 lx_ucp->uc_sigcontext.sc_cr2 = ((lx_sig == LX_SIGSEGV) && (sip)) ?
1396 (uintptr_t)sip->si_addr : 0;
1397
1398 /*
1399 * This should only return an error if the signum is invalid but that
1400 * also gets converted into a LX_SIGKILL by this function.
1401 */
1402 if (sip != NULL)
1403 (void) stol_siginfo(sip, &lx_ssp->si);
1404 else
1405 bzero(&lx_ssp->si, sizeof (lx_siginfo_t));
1406
1407 /* convert FP regs if present */
1408 if (ucp->uc_flags & UC_FPU) {
1409 /*
1410 * Copy FP regs to the appropriate place in the the lx_sigstack
1411 * structure.
1412 */
1413 stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ssp->fpstate);
1414 lx_ucp->uc_sigcontext.sc_fpstate = &lx_ssp->fpstate;
1415 } else {
1416 lx_ucp->uc_sigcontext.sc_fpstate = NULL;
1417 }
1418
1419 #if defined(_ILP32)
1420 /*
1421 * Believe it or not, gdb wants to SEE the sigreturn code on the
1422 * top of the stack to determine whether the stack frame belongs to
1423 * a signal handler, even though this code is not actually called.
1424 *
1425 * You can't make this stuff up.
1426 */
1427 bcopy((void *)lx_rt_sigreturn_tramp, lx_ssp->trampoline,
1428 sizeof (lx_ssp->trampoline));
1429 #endif
1430 }
1431
1432 /*
1433 * This is the interposition handler for Linux signals.
1434 */
1435 static void
1436 lx_call_user_handler(int sig, siginfo_t *sip, void *p)
1437 {
1438 void (*user_handler)();
1439 void (*stk_builder)();
1440 struct lx_sigaction *lxsap;
1441 ucontext_t *ucp = (ucontext_t *)p;
1442 size_t stksize;
1443 int lx_sig;
1444
1445 /*
1446 * If Illumos signal has no Linux equivalent, effectively ignore it.
1447 */
1448 if ((lx_sig = stol_signo[sig]) == -1) {
1449 lx_unsupported("caught Illumos signal %d, no Linux equivalent",
1450 sig);
1451 return;
1452 }
1453
1454 lx_debug("interpose caught Illumos signal %d, translating to Linux "
1455 "signal %d", sig, lx_sig);
1456
1457 lxsap = &lx_sighandlers.lx_sa[lx_sig];
1458 lx_debug("lxsap @ 0x%p", lxsap);
1459
1460 if ((sig == SIGPWR) && (lxsap->lxsa_handler == SIG_DFL)) {
1461 /*
1462 * Linux SIG_DFL for SIGPWR is to terminate. The lx wait
1463 * emulation will translate SIGPWR to LX_SIGPWR.
1464 */
1465 (void) syscall(SYS_brand, B_EXIT_AS_SIG, SIGPWR);
1466 /* This should never return */
1467 assert(0);
1468 }
1469
1470 if (lxsap->lxsa_handler == SIG_DFL || lxsap->lxsa_handler == SIG_IGN)
1471 lx_err_fatal("lxsa_handler set to %s? How?!?!?",
1472 (lxsap->lxsa_handler == SIG_DFL) ? "SIG_DFL" : "SIG_IGN");
1473
1474 #if defined(_LP64)
1475 stksize = sizeof (struct lx_sigstack);
1476 stk_builder = lx_build_signal_frame;
1477 #else
1478 if (lxsap->lxsa_flags & LX_SA_SIGINFO) {
1479 stksize = sizeof (struct lx_sigstack);
1480 stk_builder = lx_build_signal_frame;
1481 } else {
1482 stksize = sizeof (struct lx_oldsigstack);
1483 stk_builder = lx_build_old_signal_frame;
1484 }
1485 #endif
1486
1487 user_handler = lxsap->lxsa_handler;
1488
1489 lx_debug("delivering %d (lx %d) to handler at 0x%p", sig, lx_sig,
1490 lxsap->lxsa_handler);
1491
1492 if (lxsap->lxsa_flags & LX_SA_RESETHAND)
1493 lxsap->lxsa_handler = SIG_DFL;
1494
1495 lx_sigdeliver(lx_sig, sip, ucp, stksize, stk_builder, user_handler,
1496 lxsap);
1497
1498 /*
1499 * We need to handle restarting system calls if requested by the
1500 * program for this signal type:
1501 */
1502 if (lxsap->lxsa_flags & LX_SA_RESTART) {
1503 uintptr_t flags = (uintptr_t)ucp->uc_brand_data[0];
1504 long ret = (long)LX_REG(ucp, REG_R0);
1505 boolean_t interrupted = (ret == -lx_errno(EINTR, -1));
1506
1507 /*
1508 * If the system call returned EINTR, and the system
1509 * call handler set "br_syscall_restart" when returning,
1510 * we modify the context to try the system call again
1511 * when we return from this signal handler.
1512 */
1513 if ((flags & LX_UC_RESTART_SYSCALL) && interrupted) {
1514 int syscall_num = (int)(uintptr_t)ucp->uc_brand_data[2];
1515
1516 lx_debug("restarting interrupted system call %d",
1517 syscall_num);
1518
1519 /*
1520 * Both the "int 0x80" and the "syscall" instruction
1521 * are two bytes long. Wind the program counter back
1522 * to the start of this instruction.
1523 *
1524 * The system call we interrupted is preserved in the
1525 * brand-specific data in the ucontext_t when the
1526 * LX_UC_RESTART_SYSCALL flag is set. This is
1527 * analogous to the "orig_[er]ax" field in the Linux
1528 * "user_regs_struct".
1529 */
1530 LX_REG(ucp, REG_PC) -= 2;
1531 LX_REG(ucp, REG_R0) = syscall_num;
1532 }
1533 }
1534 }
1535
1536 /*
1537 * The "lx_sigdeliver()" function is responsible for constructing the emulated
1538 * signal delivery frame on the brand stack for this LWP. A context is saved
1539 * on the stack which will be used by the "sigreturn(2)" family of emulated
1540 * system calls to get us back here after the Linux signal handler returns.
1541 * This function is modelled on the in-kernel "sendsig()" signal delivery
1542 * mechanism.
1543 */
1544 void
1545 lx_sigdeliver(int lx_sig, siginfo_t *sip, ucontext_t *ucp, size_t stacksz,
1546 void (*stack_builder)(), void (*user_handler)(),
1547 struct lx_sigaction *lxsap)
1548 {
1549 lx_sigbackup_t sigbackup;
1550 ucontext_t uc;
1551 lx_tsd_t *lxtsd = lx_get_tsd();
1552 int totsz = 0;
1553 uintptr_t flags;
1554 uintptr_t hargs[3];
1555 /*
1556 * These variables must be "volatile", as they are modified after the
1557 * getcontext() stores the register state:
1558 */
1559 volatile boolean_t signal_delivered = B_FALSE;
1560 volatile uintptr_t lxfp = 0;
1561 volatile uintptr_t old_tsd_sp = 0;
1562 volatile int newstack = 0;
1563
1564 /*
1565 * This function involves modifying the Linux process stack for this
1566 * thread. To do so without corruption requires us to exclude other
1567 * signal handlers (or emulated system calls called from within those
1568 * handlers) from running while we reserve space on that stack. We
1569 * defer the execution of further instances of lx_call_user_handler()
1570 * until we have completed this operation.
1571 */
1572 _sigoff();
1573
1574 /*
1575 * Clear register arguments vector.
1576 */
1577 bzero(hargs, sizeof (hargs));
1578
1579 /*
1580 * We save a context here so that we can be returned later to complete
1581 * handling the signal.
1582 */
1583 lx_debug("lx_sigdeliver: STORING RETURN CONTEXT @ %p\n", &uc);
1584 assert(getcontext(&uc) == 0);
1585 lx_debug("lx_sigdeliver: RETURN CONTEXT %p LINK %p FLAGS %lx\n",
1586 &uc, uc.uc_link, uc.uc_flags);
1587 if (signal_delivered) {
1588 /*
1589 * If the "signal_delivered" flag is set, we are returned here
1590 * via setcontext() as called by the emulated Linux signal
1591 * return system call.
1592 */
1593 lx_debug("lx_sigdeliver: WE ARE BACK, VIA UC @ %p!\n", &uc);
1594 goto after_signal_handler;
1595 }
1596 signal_delivered = B_TRUE;
1597
1598 /*
1599 * Preserve the current tsd value of the Linux process stack pointer,
1600 * even if it is zero. We will restore it when we are returned here
1601 * via setcontext() after the Linux process has completed execution of
1602 * its signal handler.
1603 */
1604 old_tsd_sp = lxtsd->lxtsd_lx_sp;
1605
1606 /*
1607 * Figure out whether we will be handling this signal on an alternate
1608 * stack specified by the user.
1609 */
1610 newstack = (lxsap->lxsa_flags & LX_SA_ONSTACK) &&
1611 !(lxtsd->lxtsd_sigaltstack.ss_flags & (LX_SS_ONSTACK |
1612 LX_SS_DISABLE));
1613
1614 /*
1615 * Find the first unused region of the Linux process stack, where
1616 * we will assemble our signal delivery frame.
1617 */
1618 flags = (uintptr_t)ucp->uc_brand_data[0];
1619 if (newstack) {
1620 /*
1621 * We are moving to the user-provided alternate signal
1622 * stack.
1623 */
1624 lxfp = SA((uintptr_t)lxtsd->lxtsd_sigaltstack.ss_sp) +
1625 SA(lxtsd->lxtsd_sigaltstack.ss_size) - STACK_ALIGN;
1626 lx_debug("lx_sigdeliver: moving to ALTSTACK sp %p\n", lxfp);
1627 LX_SIGNAL_ALTSTACK_ENABLE(lxfp);
1628 } else if (flags & LX_UC_STACK_BRAND) {
1629 /*
1630 * We interrupted the Linux process to take this signal. The
1631 * stack pointer is the one saved in this context.
1632 */
1633 lxfp = LX_REG(ucp, REG_SP);
1634 } else {
1635 /*
1636 * We interrupted a native (emulation) routine, so we must get
1637 * the current stack pointer from either the tsd (if one is
1638 * stored there) or via the context chain.
1639 *
1640 */
1641 lxfp = lx_find_brand_sp();
1642 if (lxtsd->lxtsd_lx_sp != 0) {
1643 /*
1644 * We must also make room for the possibility of nested
1645 * signal delivery -- we may be pre-empting the
1646 * in-progress handling of another signal.
1647 *
1648 * Note that if we were already on the alternate stack,
1649 * any emulated Linux system calls would be betwixt
1650 * that original signal frame and this new one on the
1651 * one contiguous stack, so this logic holds either
1652 * way:
1653 */
1654 lxfp = MIN(lxtsd->lxtsd_lx_sp, lxfp);
1655 }
1656 }
1657
1658 /*
1659 * Account for a reserved stack region (for amd64, this is 128 bytes),
1660 * and align the stack:
1661 */
1662 lxfp -= STACK_RESERVE;
1663 lxfp &= ~(STACK_ALIGN - 1);
1664
1665 /*
1666 * Allocate space on the Linux process stack for our delivery frame,
1667 * including:
1668 *
1669 * ----------------------------------------------------- old %sp
1670 * - lx_sigdeliver_frame_t
1671 * - (ucontext_t pointers and stack magic)
1672 * -----------------------------------------------------
1673 * - (amd64-only 8-byte alignment gap)
1674 * -----------------------------------------------------
1675 * - frame of size "stacksz" from the stack builder
1676 * ----------------------------------------------------- new %sp
1677 */
1678 #if defined(_LP64)
1679 /*
1680 * The AMD64 ABI requires us to align the stack such that when the
1681 * called function pushes the base pointer, the stack is 16 byte
1682 * aligned. The stack must, therefore, be 8- but _not_ 16-byte
1683 * aligned.
1684 */
1685 #if (STACK_ALIGN != 16) || (STACK_ENTRY_ALIGN != 8)
1686 #error "lx_sigdeliver() did not find expected stack alignment"
1687 #endif
1688 totsz = SA(sizeof (lx_sigdeliver_frame_t)) + SA(stacksz) + 8;
1689 assert((totsz & (STACK_ENTRY_ALIGN - 1)) == 0);
1690 assert((totsz & (STACK_ALIGN - 1)) == 8);
1691 #else
1692 totsz = SA(sizeof (lx_sigdeliver_frame_t)) + SA(stacksz);
1693 assert((totsz & (STACK_ALIGN - 1)) == 0);
1694 #endif
1695
1696 /*
1697 * Copy our return frame into place:
1698 */
1699 lxfp -= SA(sizeof (lx_sigdeliver_frame_t));
1700 lx_debug("lx_sigdeliver: lx_sigdeliver_frame_t @ %p\n", lxfp);
1701 {
1702 lx_sigdeliver_frame_t frm;
1703
1704 frm.lxsdf_magic = LX_SIGRT_MAGIC;
1705 frm.lxsdf_retucp = &uc;
1706 frm.lxsdf_sigucp = ucp;
1707 frm.lxsdf_sigbackup = &sigbackup;
1708
1709 lx_debug("lx_sigdeliver: retucp %p sigucp %p\n",
1710 frm.lxsdf_retucp, frm.lxsdf_sigucp);
1711
1712 if (uucopy(&frm, (void *)lxfp, sizeof (frm)) != 0) {
1713 /*
1714 * We could not modify the stack of the emulated Linux
1715 * program. Act like the kernel and terminate the
1716 * program with a segmentation violation.
1717 */
1718 (void) syscall(SYS_brand, B_EXIT_AS_SIG, SIGSEGV);
1719 }
1720
1721 LX_SIGNAL_DELIVERY_FRAME_CREATE((void *)lxfp);
1722
1723 /*
1724 * Populate a backup copy of signal linkage to use in case
1725 * the Linux program completely destroys (or relocates) the
1726 * delivery frame.
1727 *
1728 * This is necessary for programs that have flown so far off
1729 * the architectural rails that they believe it is
1730 * acceptable to make assumptions about the precise size and
1731 * layout of the signal handling frame assembled by the
1732 * kernel.
1733 */
1734 sigbackup.lxsb_retucp = frm.lxsdf_retucp;
1735 sigbackup.lxsb_sigucp = frm.lxsdf_sigucp;
1736 sigbackup.lxsb_sigdeliver_frame = lxfp;
1737 sigbackup.lxsb_previous = lxtsd->lxtsd_sigbackup;
1738 lxtsd->lxtsd_sigbackup = &sigbackup;
1739
1740 lx_debug("lx_sigdeliver: installed sigbackup %p; prev %p\n",
1741 &sigbackup, sigbackup.lxsb_previous);
1742 }
1743
1744 /*
1745 * Build the Linux signal handling frame:
1746 */
1747 #if defined(_LP64)
1748 lxfp -= SA(stacksz) + 8;
1749 #else
1750 lxfp -= SA(stacksz);
1751 #endif
1752 lx_debug("lx_sigdeliver: Linux sig frame @ %p\n", lxfp);
1753 stack_builder(lx_sig, sip, ucp, lxfp, hargs);
1754
1755 /*
1756 * Record our reservation so that any nested signal handlers
1757 * can see it.
1758 */
1759 lx_debug("lx_sigdeliver: Linux tsd sp %p -> %p\n", lxtsd->lxtsd_lx_sp,
1760 lxfp);
1761 lxtsd->lxtsd_lx_sp = lxfp;
1762
1763 if (newstack) {
1764 lxtsd->lxtsd_sigaltstack.ss_flags |= LX_SS_ONSTACK;
1765 }
1766
1767 LX_SIGDELIVER(lx_sig, lxsap, (void *)lxfp);
1768
1769 /*
1770 * Re-enable signal delivery. If a signal was queued while we were
1771 * in the critical section, it will be delivered immediately.
1772 */
1773 _sigon();
1774
1775 /*
1776 * Pass control to the Linux signal handler:
1777 */
1778 lx_debug("lx_sigdeliver: JUMPING TO LINUX (sig %d sp %p eip %p)\n",
1779 lx_sig, lxfp, user_handler);
1780 {
1781 ucontext_t jump_uc;
1782
1783 bcopy(lx_find_brand_uc(), &jump_uc, sizeof (jump_uc));
1784
1785 /*
1786 * We want to load the general registers from this context, and
1787 * switch to the BRAND stack. We do _not_ want to restore the
1788 * uc_link value from this synthetic context, as that would
1789 * break the signal handling context chain.
1790 */
1791 jump_uc.uc_flags = UC_CPU;
1792 jump_uc.uc_brand_data[0] = (void *)(LX_UC_STACK_BRAND |
1793 LX_UC_IGNORE_LINK);
1794
1795 LX_REG(&jump_uc, REG_FP) = 0;
1796 LX_REG(&jump_uc, REG_SP) = lxfp;
1797 LX_REG(&jump_uc, REG_PC) = (uintptr_t)user_handler;
1798
1799 #if defined(_LP64)
1800 /*
1801 * Pass signal handler arguments by registers on AMD64.
1802 */
1803 LX_REG(&jump_uc, REG_RDI) = hargs[0];
1804 LX_REG(&jump_uc, REG_RSI) = hargs[1];
1805 LX_REG(&jump_uc, REG_RDX) = hargs[2];
1806 #endif
1807
1808 lx_jump_to_linux(&jump_uc);
1809 }
1810
1811 assert(0);
1812 abort();
1813
1814 after_signal_handler:
1815 /*
1816 * Ensure all nested signal handlers have completed correctly
1817 * and then remove our stack reservation.
1818 */
1819 _sigoff();
1820 LX_SIGNAL_POST_HANDLER(lxfp, old_tsd_sp);
1821 assert(lxtsd->lxtsd_lx_sp == lxfp);
1822 lx_debug("lx_sigdeliver: after; Linux tsd sp %p -> %p\n", lxfp,
1823 old_tsd_sp);
1824 lxtsd->lxtsd_lx_sp = old_tsd_sp;
1825 if (newstack) {
1826 LX_SIGNAL_ALTSTACK_DISABLE();
1827 lx_debug("lx_sigdeliver: disabling ALTSTACK sp %p\n", lxfp);
1828 lxtsd->lxtsd_sigaltstack.ss_flags &= ~LX_SS_ONSTACK;
1829 }
1830 /*
1831 * Restore backup signal tracking chain pointer to previous value:
1832 */
1833 if (lxtsd->lxtsd_sigbackup != NULL) {
1834 lx_sigbackup_t *bprev = lxtsd->lxtsd_sigbackup->lxsb_previous;
1835
1836 lx_debug("lx_sigdeliver: restoring sigbackup %p to %p\n",
1837 lxtsd->lxtsd_sigbackup, bprev);
1838
1839 lxtsd->lxtsd_sigbackup = bprev;
1840 }
1841 _sigon();
1842
1843 /*
1844 * Here we return to libc so that it may clean up and restore the
1845 * context originally interrupted by this signal.
1846 */
1847 }
1848
1849 /*
1850 * Common routine to modify sigaction characteristics of a thread.
1851 *
1852 * We shouldn't need any special locking code here as we actually use our copy
1853 * of libc's sigaction() to do all the real work, so its thread locking should
1854 * take care of any issues for us.
1855 */
1856 static int
1857 lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp,
1858 struct lx_sigaction *olxsp)
1859 {
1860 struct lx_sigaction *lxsap;
1861 struct sigaction sa;
1862
1863 if (lx_sig <= 0 || lx_sig > LX_NSIG)
1864 return (-EINVAL);
1865
1866 lxsap = &lx_sighandlers.lx_sa[lx_sig];
1867 lx_debug("&lx_sighandlers.lx_sa[%d] = 0x%p", lx_sig, lxsap);
1868
1869 if ((olxsp != NULL) &&
1870 ((uucopy(lxsap, olxsp, sizeof (struct lx_sigaction))) != 0))
1871 return (-errno);
1872
1873 if (lxsp != NULL) {
1874 int err, sig;
1875 struct lx_sigaction lxsa;
1876 sigset_t new_set, oset;
1877
1878 if (uucopy(lxsp, &lxsa, sizeof (struct lx_sigaction)) != 0)
1879 return (-errno);
1880
1881 if ((sig = ltos_signo[lx_sig]) != -1) {
1882 if (lx_no_abort_handler != 0) {
1883 /*
1884 * If LX_NO_ABORT_HANDLER has been set, we will
1885 * not allow the emulated program to do
1886 * anything hamfisted with SIGSEGV or SIGABRT
1887 * signals.
1888 */
1889 if (sig == SIGSEGV || sig == SIGABRT) {
1890 return (0);
1891 }
1892 }
1893
1894 /*
1895 * Block this signal while messing with its dispostion
1896 */
1897 (void) sigemptyset(&new_set);
1898 (void) sigaddset(&new_set, sig);
1899
1900 if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0) {
1901 err = errno;
1902 lx_debug("unable to block signal %d: %s", sig,
1903 strerror(err));
1904 return (-err);
1905 }
1906
1907 /*
1908 * We don't really need the old signal disposition at
1909 * this point, but this weeds out signals that would
1910 * cause sigaction() to return an error before we change
1911 * anything other than the current signal mask.
1912 */
1913 if (sigaction(sig, NULL, &sa) < 0) {
1914 err = errno;
1915 lx_debug("sigaction() to get old "
1916 "disposition for signal %d failed: "
1917 "%s", sig, strerror(err));
1918 (void) sigprocmask(SIG_SETMASK, &oset, NULL);
1919 return (-err);
1920 }
1921
1922 if ((lxsa.lxsa_handler != SIG_DFL) &&
1923 (lxsa.lxsa_handler != SIG_IGN)) {
1924 sa.sa_handler = lx_call_user_handler;
1925
1926 /*
1927 * The interposition signal handler needs the
1928 * information provided via the SA_SIGINFO flag.
1929 */
1930 sa.sa_flags = SA_SIGINFO;
1931
1932 /*
1933 * When translating from Linux to illumos
1934 * sigaction(2) flags, we explicitly do not
1935 * pass SA_ONSTACK to the kernel. The
1936 * alternate stack for Linux signal handling is
1937 * handled entirely by the emulation code.
1938 */
1939 if (lxsa.lxsa_flags & LX_SA_NOCLDSTOP)
1940 sa.sa_flags |= SA_NOCLDSTOP;
1941 if (lxsa.lxsa_flags & LX_SA_NOCLDWAIT)
1942 sa.sa_flags |= SA_NOCLDWAIT;
1943 if (lxsa.lxsa_flags & LX_SA_RESTART)
1944 sa.sa_flags |= SA_RESTART;
1945 if (lxsa.lxsa_flags & LX_SA_NODEFER)
1946 sa.sa_flags |= SA_NODEFER;
1947
1948 /*
1949 * RESETHAND cannot be used be passed through
1950 * for SIGPWR due to different default actions
1951 * between Linux and Illumos.
1952 */
1953 if ((sig != SIGPWR) &&
1954 (lxsa.lxsa_flags & LX_SA_RESETHAND))
1955 sa.sa_flags |= SA_RESETHAND;
1956
1957 if (ltos_sigset(&lxsa.lxsa_mask,
1958 &sa.sa_mask) != 0) {
1959 err = errno;
1960 (void) sigprocmask(SIG_SETMASK, &oset,
1961 NULL);
1962 return (-err);
1963 }
1964
1965 lx_debug("interposing handler @ 0x%p for "
1966 "signal %d (lx %d), flags 0x%x",
1967 lxsa.lxsa_handler, sig, lx_sig,
1968 lxsa.lxsa_flags);
1969
1970 if (sigaction(sig, &sa, NULL) < 0) {
1971 err = errno;
1972 lx_debug("sigaction() to set new "
1973 "disposition for signal %d failed: "
1974 "%s", sig, strerror(err));
1975 (void) sigprocmask(SIG_SETMASK, &oset,
1976 NULL);
1977 return (-err);
1978 }
1979 } else if ((sig != SIGPWR) ||
1980 ((sig == SIGPWR) &&
1981 (lxsa.lxsa_handler == SIG_IGN))) {
1982 /*
1983 * There's no need to interpose for SIG_DFL or
1984 * SIG_IGN so just call our copy of libc's
1985 * sigaction(), but don't allow SIG_DFL for
1986 * SIGPWR due to differing default actions
1987 * between Linux and Illumos.
1988 *
1989 * Get the previous disposition first so things
1990 * like sa_mask and sa_flags are preserved over
1991 * a transition to SIG_DFL or SIG_IGN, which is
1992 * what Linux expects.
1993 */
1994
1995 sa.sa_handler = lxsa.lxsa_handler;
1996
1997 if (sigaction(sig, &sa, NULL) < 0) {
1998 err = errno;
1999 lx_debug("sigaction(%d, %s) failed: %s",
2000 sig, ((sa.sa_handler == SIG_DFL) ?
2001 "SIG_DFL" : "SIG_IGN"),
2002 strerror(err));
2003 (void) sigprocmask(SIG_SETMASK, &oset,
2004 NULL);
2005 return (-err);
2006 }
2007 }
2008 } else {
2009 lx_debug("Linux signal with no kill support "
2010 "specified: %d", lx_sig);
2011 }
2012
2013 /*
2014 * Save the new disposition for the signal in the global
2015 * lx_sighandlers structure.
2016 */
2017 bcopy(&lxsa, lxsap, sizeof (struct lx_sigaction));
2018
2019 /*
2020 * Reset the signal mask to what we came in with if
2021 * we were modifying a kill-supported signal.
2022 */
2023 if (sig != -1)
2024 (void) sigprocmask(SIG_SETMASK, &oset, NULL);
2025 }
2026
2027 return (0);
2028 }
2029
2030 #if defined(_ILP32)
2031 /*
2032 * sigaction is only used in 32-bit code.
2033 */
2034 long
2035 lx_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp)
2036 {
2037 int val;
2038 struct lx_sigaction sa, osa;
2039 struct lx_sigaction *sap, *osap;
2040 struct lx_osigaction *osp;
2041
2042 sap = (actp ? &sa : NULL);
2043 osap = (oactp ? &osa : NULL);
2044
2045 /*
2046 * If we have a source pointer, convert source lxsa_mask from
2047 * lx_osigset_t to lx_sigset_t format.
2048 */
2049 if (sap) {
2050 osp = (struct lx_osigaction *)actp;
2051 sap->lxsa_handler = osp->lxsa_handler;
2052
2053 bzero(&sap->lxsa_mask, sizeof (lx_sigset_t));
2054
2055 for (val = 1; val <= OSIGSET_NBITS; val++)
2056 if (osp->lxsa_mask & OSIGSET_BITSET(val))
2057 (void) lx_sigaddset(&sap->lxsa_mask, val);
2058
2059 sap->lxsa_flags = osp->lxsa_flags;
2060 sap->lxsa_restorer = osp->lxsa_restorer;
2061 }
2062
2063 if ((val = lx_sigaction_common(lx_sig, sap, osap)))
2064 return (val);
2065
2066 /*
2067 * If we have a save pointer, convert the old lxsa_mask from
2068 * lx_sigset_t to lx_osigset_t format.
2069 */
2070 if (osap) {
2071 osp = (struct lx_osigaction *)oactp;
2072
2073 osp->lxsa_handler = osap->lxsa_handler;
2074
2075 bzero(&osp->lxsa_mask, sizeof (osp->lxsa_mask));
2076 for (val = 1; val <= OSIGSET_NBITS; val++)
2077 if (lx_sigismember(&osap->lxsa_mask, val))
2078 osp->lxsa_mask |= OSIGSET_BITSET(val);
2079
2080 osp->lxsa_flags = osap->lxsa_flags;
2081 osp->lxsa_restorer = osap->lxsa_restorer;
2082 }
2083
2084 return (0);
2085 }
2086 #endif
2087
2088 long
2089 lx_rt_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp,
2090 uintptr_t setsize)
2091 {
2092 /*
2093 * The "new" rt_sigaction call checks the setsize
2094 * parameter.
2095 */
2096 if ((size_t)setsize != sizeof (lx_sigset_t))
2097 return (-EINVAL);
2098
2099 return (lx_sigaction_common(lx_sig, (struct lx_sigaction *)actp,
2100 (struct lx_sigaction *)oactp));
2101 }
2102
2103 #if defined(_ILP32)
2104 /*
2105 * Convert signal syscall to a call to the lx_sigaction() syscall
2106 * Only used in 32-bit code.
2107 */
2108 long
2109 lx_signal(uintptr_t lx_sig, uintptr_t handler)
2110 {
2111 struct sigaction act;
2112 struct sigaction oact;
2113 int rc;
2114
2115 /*
2116 * Use sigaction to mimic SYSV signal() behavior; glibc will
2117 * actually call sigaction(2) itself, so we're really reaching
2118 * back for signal(2) semantics here.
2119 */
2120 bzero(&act, sizeof (act));
2121 act.sa_handler = (void (*)())handler;
2122 act.sa_flags = SA_RESETHAND | SA_NODEFER;
2123
2124 rc = lx_sigaction(lx_sig, (uintptr_t)&act, (uintptr_t)&oact);
2125 return ((rc == 0) ? ((ssize_t)oact.sa_handler) : rc);
2126 }
2127 #endif
2128
2129 void
2130 lx_sighandlers_save(lx_sighandlers_t *saved)
2131 {
2132 bcopy(&lx_sighandlers, saved, sizeof (lx_sighandlers_t));
2133 }
2134
2135 void
2136 lx_sighandlers_restore(lx_sighandlers_t *saved)
2137 {
2138 bcopy(saved, &lx_sighandlers, sizeof (lx_sighandlers_t));
2139 }
2140
2141 int
2142 lx_siginit(void)
2143 {
2144 extern void set_setcontext_enforcement(int);
2145 extern void set_escaped_context_cleanup(int);
2146
2147 struct sigaction sa;
2148 sigset_t new_set, oset;
2149 int lx_sig, sig;
2150
2151 if (getenv("LX_NO_ABORT_HANDLER") != NULL) {
2152 lx_no_abort_handler = 1;
2153 }
2154
2155 /*
2156 * Block all signals possible while setting up the signal imposition
2157 * mechanism.
2158 */
2159 (void) sigfillset(&new_set);
2160
2161 if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0)
2162 lx_err_fatal("unable to block signals while setting up "
2163 "imposition mechanism: %s", strerror(errno));
2164
2165 /*
2166 * Ignore any signals that have no Linux analog so that those
2167 * signals cannot be sent to Linux processes from the global zone
2168 */
2169 for (sig = 1; sig < NSIG; sig++)
2170 if (stol_signo[sig] < 0)
2171 (void) sigignore(sig);
2172
2173 /*
2174 * Mark any signals that are ignored as ignored in our interposition
2175 * handler array
2176 */
2177 for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) {
2178 if (((sig = ltos_signo[lx_sig]) != -1) &&
2179 (sigaction(sig, NULL, &sa) < 0))
2180 lx_err_fatal("unable to determine previous disposition "
2181 "for signal %d: %s", sig, strerror(errno));
2182
2183 if (sa.sa_handler == SIG_IGN) {
2184 lx_debug("marking signal %d (lx %d) as SIG_IGN",
2185 sig, lx_sig);
2186 lx_sighandlers.lx_sa[lx_sig].lxsa_handler = SIG_IGN;
2187 }
2188 }
2189
2190 /*
2191 * Have our interposition handler handle SIGPWR to start with,
2192 * as it has a default action of terminating the process in Linux
2193 * but its default is to be ignored in Illumos.
2194 */
2195 (void) sigemptyset(&sa.sa_mask);
2196 sa.sa_sigaction = lx_call_user_handler;
2197 sa.sa_flags = SA_SIGINFO;
2198
2199 if (sigaction(SIGPWR, &sa, NULL) < 0)
2200 lx_err_fatal("sigaction(SIGPWR) failed: %s", strerror(errno));
2201
2202 /*
2203 * Illumos' libc forces certain register values in the ucontext_t
2204 * used to restore a post-signal user context to be those Illumos
2205 * expects; however that is not what we want to happen if the signal
2206 * was taken while branded code was executing, so we must disable
2207 * that behavior.
2208 */
2209 set_setcontext_enforcement(0);
2210
2211 /*
2212 * The illumos libc attempts to clean up dangling uc_link pointers in
2213 * signal handling contexts when libc believes us to have escaped a
2214 * signal handler incorrectly in the past. We want to disable this
2215 * behaviour, so that the system call emulation context saved by the
2216 * kernel brand module for lx_emulate() may be part of the context
2217 * chain without itself being used for signal handling.
2218 */
2219 set_escaped_context_cleanup(0);
2220
2221 /*
2222 * Reset the signal mask to what we came in with.
2223 */
2224 (void) sigprocmask(SIG_SETMASK, &oset, NULL);
2225
2226 lx_debug("interposition handler setup for SIGPWR");
2227 return (0);
2228 }
2229
2230 /*
2231 * This code strongly resembles lx_poll(), but is here to be able to take
2232 * advantage of the Linux signal helper routines.
2233 */
2234 long
2235 lx_ppoll(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, uintptr_t p5)
2236 {
2237 struct pollfd *lfds, *sfds;
2238 nfds_t nfds = (nfds_t)p2;
2239 timespec_t ts, *tsp = NULL;
2240 int fds_size, i, rval, revents;
2241 lx_sigset_t lxsig, *lxsigp = NULL;
2242 sigset_t sigset, *sp = NULL;
2243 rctlblk_t *rblk;
2244
2245 lx_debug("\tppoll(0x%p, %d, 0x%p, 0x%p, %d)", p1, p2, p3, p4, p5);
2246
2247 if (p3 != NULL) {
2248 if (uucopy((void *)p3, &ts, sizeof (ts)) != 0)
2249 return (-errno);
2250
2251 tsp = &ts;
2252 }
2253
2254 if (p4 != NULL) {
2255 if (uucopy((void *)p4, &lxsig, sizeof (lxsig)) != 0)
2256 return (-errno);
2257
2258 lxsigp = &lxsig;
2259 if ((size_t)p5 != sizeof (lx_sigset_t))
2260 return (-EINVAL);
2261
2262 if (lxsigp) {
2263 if ((rval = ltos_sigset(lxsigp, &sigset)) != 0)
2264 return (rval);
2265
2266 sp = &sigset;
2267 }
2268 }
2269
2270 /*
2271 * Deal with the NULL fds[] case.
2272 */
2273 if (nfds == 0 || p1 == NULL) {
2274 if ((rval = ppoll(NULL, 0, tsp, sp)) < 0)
2275 return (-errno);
2276
2277 return (rval);
2278 }
2279
2280 if (maxfd == 0) {
2281 if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rctlblk_size())) == NULL)
2282 return (-ENOMEM);
2283
2284 if (getrctl("process.max-file-descriptor", NULL, rblk,
2285 RCTL_FIRST) == -1)
2286 return (-EINVAL);
2287
2288 maxfd = rctlblk_get_value(rblk);
2289 }
2290
2291 if (nfds > maxfd)
2292 return (-EINVAL);
2293
2294 /*
2295 * Note: we are assuming that the Linux and Illumos pollfd
2296 * structures are identical. Copy in the Linux poll structure.
2297 */
2298 fds_size = sizeof (struct pollfd) * nfds;
2299 lfds = (struct pollfd *)SAFE_ALLOCA(fds_size);
2300 if (lfds == NULL)
2301 return (-ENOMEM);
2302 if (uucopy((void *)p1, lfds, fds_size) != 0)
2303 return (-errno);
2304
2305 /*
2306 * The poll system call modifies the poll structures passed in
2307 * so we'll need to make an extra copy of them.
2308 */
2309 sfds = (struct pollfd *)SAFE_ALLOCA(fds_size);
2310 if (sfds == NULL)
2311 return (-ENOMEM);
2312
2313 /* Convert the Linux events bitmask into the Illumos equivalent. */
2314 for (i = 0; i < nfds; i++) {
2315 /*
2316 * If the caller is polling for an unsupported event, we
2317 * have to bail out.
2318 */
2319 if (lfds[i].events & ~LX_POLL_SUPPORTED_EVENTS) {
2320 lx_unsupported("unsupported poll events requested: "
2321 "events=0x%x", lfds[i].events);
2322 return (-ENOTSUP);
2323 }
2324
2325 sfds[i].fd = lfds[i].fd;
2326 sfds[i].events = lfds[i].events & LX_POLL_COMMON_EVENTS;
2327 if (lfds[i].events & LX_POLLWRNORM)
2328 sfds[i].events |= POLLWRNORM;
2329 if (lfds[i].events & LX_POLLWRBAND)
2330 sfds[i].events |= POLLWRBAND;
2331 if (lfds[i].events & LX_POLLRDHUP)
2332 sfds[i].events |= POLLRDHUP;
2333 sfds[i].revents = 0;
2334 }
2335
2336 if ((rval = ppoll(sfds, nfds, tsp, sp)) < 0)
2337 return (-errno);
2338
2339 /* Convert the Illumos revents bitmask into the Linux equivalent */
2340 for (i = 0; i < nfds; i++) {
2341 revents = sfds[i].revents & LX_POLL_COMMON_EVENTS;
2342 if (sfds[i].revents & POLLWRBAND)
2343 revents |= LX_POLLWRBAND;
2344 if (sfds[i].revents & POLLRDHUP)
2345 revents |= LX_POLLRDHUP;
2346
2347 /*
2348 * Be careful because on Illumos POLLOUT and POLLWRNORM
2349 * are defined to the same values but on Linux they
2350 * are not.
2351 */
2352 if (sfds[i].revents & POLLOUT) {
2353 if ((lfds[i].events & LX_POLLOUT) == 0)
2354 revents &= ~LX_POLLOUT;
2355 if (lfds[i].events & LX_POLLWRNORM)
2356 revents |= LX_POLLWRNORM;
2357 }
2358
2359 lfds[i].revents = revents;
2360 }
2361
2362 /* Copy out the results */
2363 if (uucopy(lfds, (void *)p1, fds_size) != 0)
2364 return (-errno);
2365
2366 return (rval);
2367 }
2368
2369 /*
2370 * This code stongly resemebles lx_select(), but is here to be able to take
2371 * advantage of the Linux signal helper routines.
2372 */
2373 long
2374 lx_pselect6(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
2375 uintptr_t p5, uintptr_t p6)
2376 {
2377 int nfds = (int)p1;
2378 fd_set *rfdsp = NULL;
2379 fd_set *wfdsp = NULL;
2380 fd_set *efdsp = NULL;
2381 timespec_t ts, *tsp = NULL;
2382 int fd_set_len = howmany(nfds, 8);
2383 int r;
2384 sigset_t sigset, *sp = NULL;
2385
2386 lx_debug("\tpselect6(%d, 0x%p, 0x%p, 0x%p, 0x%p, 0x%p)",
2387 p1, p2, p3, p4, p4, p6);
2388
2389 if (nfds > 0) {
2390 if (p2 != NULL) {
2391 rfdsp = SAFE_ALLOCA(fd_set_len);
2392 if (rfdsp == NULL)
2393 return (-ENOMEM);
2394 if (uucopy((void *)p2, rfdsp, fd_set_len) != 0)
2395 return (-errno);
2396 }
2397 if (p3 != NULL) {
2398 wfdsp = SAFE_ALLOCA(fd_set_len);
2399 if (wfdsp == NULL)
2400 return (-ENOMEM);
2401 if (uucopy((void *)p3, wfdsp, fd_set_len) != 0)
2402 return (-errno);
2403 }
2404 if (p4 != NULL) {
2405 efdsp = SAFE_ALLOCA(fd_set_len);
2406 if (efdsp == NULL)
2407 return (-ENOMEM);
2408 if (uucopy((void *)p4, efdsp, fd_set_len) != 0)
2409 return (-errno);
2410 }
2411 }
2412
2413 if (p5 != NULL) {
2414 if (uucopy((void *)p5, &ts, sizeof (ts)) != 0)
2415 return (-errno);
2416
2417 tsp = &ts;
2418 }
2419
2420 if (p6 != NULL) {
2421 /*
2422 * To force the number of arguments to be no more than six,
2423 * Linux bundles both the sigset and the size into a structure
2424 * that becomes the sixth argument.
2425 */
2426 struct {
2427 lx_sigset_t *addr;
2428 size_t size;
2429 } lx_sigset;
2430
2431 if (uucopy((void *)p6, &lx_sigset, sizeof (lx_sigset)) != 0)
2432 return (-errno);
2433
2434 /*
2435 * Yes, that's right: Linux forces a size to be passed only
2436 * so it can check that it's the size of a sigset_t.
2437 */
2438 if (lx_sigset.size != sizeof (lx_sigset_t))
2439 return (-EINVAL);
2440
2441 /*
2442 * This is where we check if the sigset is *really* NULL.
2443 */
2444 if (lx_sigset.addr) {
2445 if ((r = ltos_sigset(lx_sigset.addr, &sigset)) != 0)
2446 return (r);
2447
2448 sp = &sigset;
2449 }
2450 }
2451
2452 #if defined(_LP64)
2453 r = pselect(nfds, rfdsp, wfdsp, efdsp, tsp, sp);
2454 #else
2455 if (nfds >= FD_SETSIZE)
2456 r = pselect_large_fdset(nfds, rfdsp, wfdsp, efdsp, tsp, sp);
2457 else
2458 r = pselect(nfds, rfdsp, wfdsp, efdsp, tsp, sp);
2459 #endif
2460
2461 if (r < 0)
2462 return (-errno);
2463
2464 /*
2465 * For pselect6(), we don't honor the strange Linux select() semantics
2466 * with respect to the timestruc parameter because glibc ignores it
2467 * anyway -- just copy out the fd pointers and return.
2468 */
2469 if ((rfdsp != NULL) && (uucopy(rfdsp, (void *)p2, fd_set_len) != 0))
2470 return (-errno);
2471 if ((wfdsp != NULL) && (uucopy(wfdsp, (void *)p3, fd_set_len) != 0))
2472 return (-errno);
2473 if ((efdsp != NULL) && (uucopy(efdsp, (void *)p4, fd_set_len) != 0))
2474 return (-errno);
2475
2476 return (r);
2477 }
2478
2479 /*
2480 * The first argument is the pid (Linux tgid) to send the signal to, second
2481 * argument is the signal to send (an lx signal), and third is the siginfo_t
2482 * with extra information. We translate the code and signal only from the
2483 * siginfo_t, and leave everything else the same as it gets passed through the
2484 * signalling system. This is enough to get sigqueue working. See Linux man
2485 * page rt_sigqueueinfo(2).
2486 */
2487 long
2488 lx_rt_sigqueueinfo(uintptr_t p1, uintptr_t p2, uintptr_t p3)
2489 {
2490 pid_t tgid = (pid_t)p1;
2491 int lx_sig = (int)p2;
2492 int sig;
2493 lx_siginfo_t lx_siginfo;
2494 siginfo_t siginfo;
2495 int s_code;
2496 pid_t s_pid;
2497
2498 if (uucopy((void *)p3, &lx_siginfo, sizeof (lx_siginfo_t)) != 0)
2499 return (-EFAULT);
2500 s_code = ltos_sigcode(lx_siginfo.lsi_code);
2501 if (s_code == LX_SI_CODE_NOT_EXIST)
2502 return (-EINVAL);
2503 if (lx_sig < 0 || lx_sig > LX_NSIG || (sig = ltos_signo[lx_sig]) < 0) {
2504 return (-EINVAL);
2505 }
2506 /*
2507 * This case (when trying to kill pid 0) just has a different errno
2508 * returned in illumos than in Linux.
2509 */
2510 if (tgid == 0)
2511 return (-ESRCH);
2512 if (lx_lpid_to_spid(tgid, &s_pid) != 0)
2513 return (-ESRCH);
2514 if (SI_CANQUEUE(s_code)) {
2515 return ((syscall(SYS_sigqueue, s_pid, sig,
2516 lx_siginfo.lsi_value, s_code, 0) == -1) ?
2517 (-errno): 0);
2518 } else {
2519 /*
2520 * This case is unlikely, as the main entry point is through
2521 * sigqueue, which always has a queuable si_code.
2522 */
2523 siginfo.si_signo = sig;
2524 siginfo.si_code = s_code;
2525 siginfo.si_pid = lx_siginfo.lsi_pid;
2526 siginfo.si_value = lx_siginfo.lsi_value;
2527 siginfo.si_uid = lx_siginfo.lsi_uid;
2528 return ((syscall(SYS_brand, B_HELPER_SIGQUEUE,
2529 tgid, sig, &siginfo)) ? (-errno) : 0);
2530 }
2531 }
2532
2533 /*
2534 * Adds an additional argument for which thread within a thread group to send
2535 * the signal to (added as the second argument).
2536 */
2537 long
2538 lx_rt_tgsigqueueinfo(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
2539 {
2540 pid_t tgid = (pid_t)p1;
2541 pid_t tid = (pid_t)p2;
2542 int lx_sig = (int)p3;
2543 int sig;
2544 lx_siginfo_t lx_siginfo;
2545 siginfo_t siginfo;
2546 int si_code;
2547
2548 if (uucopy((void *)p4, &lx_siginfo, sizeof (lx_siginfo_t)) != 0)
2549 return (-EFAULT);
2550 if (lx_sig < 0 || lx_sig > LX_NSIG || (sig = ltos_signo[lx_sig]) < 0) {
2551 return (-EINVAL);
2552 }
2553 si_code = ltos_sigcode(lx_siginfo.lsi_code);
2554 if (si_code == LX_SI_CODE_NOT_EXIST)
2555 return (-EINVAL);
2556 /*
2557 * Check for invalid tgid and tids. That appears to be only negatives
2558 * and 0 values. Everything else that doesn't exist is instead ESRCH.
2559 */
2560 if (tgid <= 0 || tid <= 0)
2561 return (-EINVAL);
2562 siginfo.si_signo = sig;
2563 siginfo.si_code = si_code;
2564 siginfo.si_pid = lx_siginfo.lsi_pid;
2565 siginfo.si_value = lx_siginfo.lsi_value;
2566 siginfo.si_uid = lx_siginfo.lsi_uid;
2567
2568 return ((syscall(SYS_brand, B_HELPER_TGSIGQUEUE, tgid, tid, sig,
2569 &siginfo)) ? (-errno) : 0);
2570 }
2571
2572 long
2573 lx_signalfd(int fd, uintptr_t mask, size_t msize)
2574 {
2575 return (lx_signalfd4(fd, mask, msize, 0));
2576 }
2577
2578 long
2579 lx_signalfd4(int fd, uintptr_t mask, size_t msize, int flags)
2580 {
2581 sigset_t s_set;
2582 int r;
2583
2584 if (msize != sizeof (int64_t))
2585 return (-EINVAL);
2586
2587 if (ltos_sigset((lx_sigset_t *)mask, &s_set) != 0)
2588 return (-errno);
2589
2590 r = signalfd(fd, &s_set, flags);
2591
2592 /*
2593 * signalfd(3C) may fail with ENOENT if /dev/signalfd is not available.
2594 * It is less jarring to Linux programs to tell them that internal
2595 * allocation failed than to report an error number they are not
2596 * expecting.
2597 */
2598 if (r == -1 && errno == ENOENT)
2599 return (-ENODEV);
2600
2601 return (r == -1 ? -errno : r);
2602 }