Print this page
OS-4470 lxbrand unblocking signals in new threads must be atomic
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/brand/lx/lx_brand/common/signal.c
+++ new/usr/src/lib/brand/lx/lx_brand/common/signal.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * Copyright 2015 Joyent, Inc. All rights reserved.
29 29 */
30 30
31 31 #include <sys/types.h>
32 32 #include <sys/param.h>
33 33 #include <sys/segments.h>
34 34 #include <sys/lx_types.h>
35 35 #include <sys/lx_brand.h>
36 36 #include <sys/lx_misc.h>
37 37 #include <sys/lx_debug.h>
38 38 #include <sys/lx_poll.h>
39 39 #include <sys/lx_signal.h>
40 40 #include <sys/lx_sigstack.h>
41 41 #include <sys/lx_syscall.h>
42 42 #include <sys/lx_thread.h>
43 43 #include <sys/syscall.h>
44 44 #include <lx_provider_impl.h>
45 45 #include <sys/stack.h>
46 46 #include <assert.h>
47 47 #include <errno.h>
48 48 #include <poll.h>
49 49 #include <rctl.h>
50 50 #include <signal.h>
51 51 #include <stdlib.h>
52 52 #include <string.h>
53 53 #include <strings.h>
54 54 #include <thread.h>
55 55 #include <ucontext.h>
56 56 #include <unistd.h>
57 57 #include <stdio.h>
58 58 #include <libintl.h>
59 59 #include <ieeefp.h>
60 60 #include <sys/signalfd.h>
61 61
62 62 #if defined(_ILP32)
63 63 extern int pselect_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
64 64 const timespec_t *tsp, const sigset_t *sp);
65 65 #endif
66 66
67 67 #define MIN(a, b) ((a) < (b) ? (a) : (b))
68 68
69 69 /*
70 70 * Delivering signals to a Linux process is complicated by differences in
71 71 * signal numbering, stack structure and contents, and the action taken when a
72 72 * signal handler exits. In addition, many signal-related structures, such as
73 73 * sigset_ts, vary between Illumos and Linux.
74 74 *
75 75 * To support user-level signal handlers, the brand uses a double layer of
76 76 * indirection to process and deliver signals to branded threads.
77 77 *
78 78 * When a Linux process sends a signal using the kill(2) system call, we must
79 79 * translate the signal into the Illumos equivalent before handing control off
80 80 * to the standard signalling mechanism. When a signal is delivered to a Linux
81 81 * process, we translate the signal number from Illumos to back to Linux.
82 82 * Translating signals both at generation and delivery time ensures both that
83 83 * Illumos signals are sent properly to Linux applications and that signals'
84 84 * default behavior works as expected.
85 85 *
86 86 * In a normal Illumos process, signal delivery is interposed on for any thread
87 87 * registering a signal handler by libc. Libc needs to do various bits of magic
88 88 * to provide thread-safe critical regions, so it registers its own handler,
89 89 * named sigacthandler(), using the sigaction(2) system call. When a signal is
90 90 * received, sigacthandler() is called, and after some processing, libc turns
91 91 * around and calls the user's signal handler via a routine named
92 92 * call_user_handler().
93 93 *
94 94 * Adding a Linux branded thread to the mix complicates things somewhat.
95 95 *
96 96 * First, when a thread receives a signal, it may either be running in an
97 97 * emulated Linux context or a native illumos context. In either case, the
98 98 * in-kernel brand module is responsible for preserving the register state
99 99 * from the interrupted context, regardless of whether emulated or native
100 100 * software was running at the time. The kernel is also responsible for
101 101 * ensuring that the illumos native sigacthandler() is called with register
102 102 * values appropriate for native code. Of particular note is the %gs segment
103 103 * selector for 32-bit code, and the %fsbase segment base register for 64-bit
104 104 * code; these are used by libc to locate per-thread data structures.
105 105 *
106 106 * Second, the signal number translation referenced above must take place.
107 107 * Finally, when we hand control to the Linux signal handler we must do so
108 108 * on the brand stack, and with registers configured appropriately for the
109 109 * Linux application.
110 110 *
111 111 * This need to translate signal numbers (and manipulate the signal handling
112 112 * context) means that with standard Illumos libc, following a signal from
113 113 * generation to delivery looks something like:
114 114 *
115 115 * kernel ->
116 116 * sigacthandler() ->
117 117 * call_user_handler() ->
118 118 * user signal handler
119 119 *
120 120 * but for the brand's Linux threads, this would look like:
121 121 *
122 122 * kernel ->
123 123 * sigacthandler() ->
124 124 * call_user_handler() ->
125 125 * lx_call_user_handler() ->
126 126 * lx_sigdeliver() ->
127 127 * syscall(B_JUMP_TO_LINUX, ...) ->
128 128 * Linux user signal handler
129 129 *
130 130 * The new addtions are:
131 131 *
132 132 * lx_call_user_handler
133 133 * ====================
134 134 * This routine is responsible for translating Illumos signal numbers to
135 135 * their Linux equivalents, building a Linux signal stack based on the
136 136 * information Illumos has provided, and passing the stack to the
137 137 * registered Linux signal handler. It is, in effect, the Linux thread
138 138 * equivalent to libc's call_user_handler().
139 139 *
140 140 * lx_sigdeliver
141 141 * =============
142 142 *
143 143 * Note that none of this interposition is necessary unless a Linux thread
144 144 * registers a user signal handler, as the default action for all signals is the
145 145 * same between Illumos and Linux save for one signal, SIGPWR. For this reason,
146 146 * the brand ALWAYS installs its own internal signal handler for SIGPWR that
147 147 * translates the action to the Linux default, to terminate the process.
148 148 * (Illumos' default action is to ignore SIGPWR.)
149 149 *
150 150 * It is also important to note that when signals are not translated, the brand
151 151 * relies upon code interposing upon the wait(2) system call to translate
152 152 * signals to their proper values for any Linux threads retrieving the status
153 153 * of others. So while the Illumos signal number for a particular signal is set
154 154 * in a process' data structures (and would be returned as the result of say,
155 155 * WTERMSIG()), the brand's interposiiton upon wait(2) is responsible for
156 156 * translating the value WTERMSIG() would return from a Illumos signal number
157 157 * to the appropriate Linux value.
158 158 *
159 159 * lx_call_user_handler() calls lx_sigdeliver() with a helper function
160 160 * (typically lx_build_signal_frame) which builds a stack frame for the 32-bit
161 161 * Linux signal handler, or populates a local (on the stack) structure for the
162 162 * 64-bit Linux signal handler. The stack at that time looks like this:
163 163 *
164 164 * =========================================================
165 165 * | | lx_sigdeliver_frame_t -- includes LX_SIGRT_MAGIC and |
166 166 * | | a return context for the eventual sigreturn(2) call |
167 167 * | =========================================================
168 168 * | | Linux signal frame (32-bit) or local data |
169 169 * V | (64-bit) built by stack_builder() |
170 170 * =========================================================
171 171 *
172 172 * The process of returning to an interrupted thread of execution from a user
173 173 * signal handler is entirely different between Illumos and Linux. While
174 174 * Illumos generally expects to set the context to the interrupted one on a
175 175 * normal return from a signal handler, in the normal case Linux instead calls
176 176 * code that calls a specific Linux system call, rt_sigreturn(2) (or it also
177 177 * can call sigreturn(2) in 32-bit code). Thus when a Linux signal handler
178 178 * completes execution, instead of returning through what would in libc be a
179 179 * call to setcontext(2), the rt_sigreturn(2) Linux system call is responsible
180 180 * for accomplishing much the same thing. It's for this reason that the stack
181 181 * frame we build has the lx_(rt_)sigreturn_tramp code on the top of the
182 182 * stack. The code looks like this:
183 183 *
184 184 * 32-bit 64-bit
185 185 * -------------------------------- -----------------------------
186 186 * mov LX_SYS_rt_sigreturn, %eax movq LX_SYS_rt_sigreturn, %rax
187 187 * int $0x80 syscall
188 188 *
189 189 * We also use these same functions (lx_rt_sigreturn_tramp or
190 190 * lx_sigreturn_tramp) to actually return from the signal handler.
191 191 *
192 192 * (Note that this trampoline code actually lives in a proper executable segment
193 193 * and not on the stack, but gdb checks for the exact code sequence of the
194 194 * trampoline code on the stack to determine whether it is in a signal stack
195 195 * frame or not. Really.)
196 196 *
197 197 * When the 32-bit Linux user signal handler is eventually called, the brand
198 198 * stack frame looks like this (in the case of a "modern" signal stack; see
199 199 * the lx_sigstack structure definition):
200 200 *
201 201 * =========================================================
202 202 * | | lx_sigdeliver_frame_t |
203 203 * | =========================================================
204 204 * | | Trampoline code (marker for gdb, not really executed) |
205 205 * | =========================================================
206 206 * | | Linux struct _fpstate |
207 207 * | =========================================================
208 208 * V | Linux ucontext_t | <--+
209 209 * ========================================================= |
210 210 * | Linux siginfo_t | <--|-----+
211 211 * ========================================================= | |
212 212 * | Pointer to Linux ucontext_t (or NULL) (sigaction arg2)| ---+ |
213 213 * ========================================================= |
214 214 * | Pointer to Linux siginfo_t (or NULL) (sigaction arg1)| ---------+
215 215 * =========================================================
216 216 * | Linux signal number (sigaction arg0)|
217 217 * =========================================================
218 218 * | Pointer to signal return code (trampoline code) |
219 219 * =========================================================
220 220 *
221 221 * The 64-bit stack-local data looks like this:
222 222 *
223 223 * =========================================================
224 224 * | | lx_sigdeliver_frame_t |
225 225 * | =========================================================
226 226 * | | Trampoline code (marker for gdb, not really executed) |
227 227 * | =========================================================
228 228 * | | Linux struct _fpstate |
229 229 * | =========================================================
230 230 * V | Linux ucontext_t | %rdx arg2
231 231 * =========================================================
232 232 * | Linux siginfo_t | %rsi arg1
233 233 * =========================================================
234 234 * | Pointer to signal return code (trampoline code) |
235 235 * =========================================================
236 236 *
237 237 * As usual in 64-bit code, %rdi is arg0 which is the signal number.
238 238 *
239 239 * The *sigreturn(2) family of emulated system call handlers locates the
240 240 * "lx_sigdeliver_frame_t" struct on the Linux stack as part of processing
241 241 * the system call. This object contains a guard value (LX_SIGRT_MAGIC) to
242 242 * detect stack smashing or an incorrect stack pointer. It also contains a
243 243 * "return" context, which we use to get back to the "lx_sigdeliver()" frame
244 244 * on the native stack that originally dispatched to the Linux signal
245 245 * handler. The lx_sigdeliver() function is then able to return to the
246 246 * native libc signal handler in the usual way. This results in a further
247 247 * setcontext() back to whatever was running when we took the signal.
248 248 *
249 249 * There are some edge cases where the "return" context cannot be located
250 250 * by inspection of the Linux stack; e.g. if the guard value has been
251 251 * corrupted, or the emulated program has relocated parts of the signal
252 252 * delivery stack frame. If this case is detected, a fallback mechanism is
253 253 * used to attempt to find the return context. A chain of "lx_sigbackup_t"
254 254 * objects is maintained in signal interposer call frames, with the current
255 255 * head stored in the thread-specific "lx_tsd_t". This mechanism is
256 256 * similar in principle to the "lwp_oldcontext" member of the "klwp_t" used
257 257 * by the native signal handling infrastructure. This backup chain is used
258 258 * by the sigreturn(2) family of emulated system calls in the event that
259 259 * the Linux stack did not correctly reference a return context.
260 260 */
261 261
262 262 typedef struct lx_sigdeliver_frame {
263 263 uintptr_t lxsdf_magic;
264 264 ucontext_t *lxsdf_retucp;
265 265 ucontext_t *lxsdf_sigucp;
266 266 lx_sigbackup_t *lxsdf_sigbackup;
267 267 } lx_sigdeliver_frame_t;
268 268
269 269 struct lx_oldsigstack {
270 270 void (*retaddr)(); /* address of real lx_sigreturn code */
271 271 int sig; /* signal number */
272 272 lx_sigcontext_t sigc; /* saved user context */
273 273 lx_fpstate_t fpstate; /* saved FP state */
274 274 int sig_extra; /* signal mask for signals [32 .. NSIG - 1] */
275 275 char trampoline[8]; /* code for trampoline to lx_sigreturn() */
276 276 };
277 277
278 278 /*
279 279 * The lx_sighandlers structure needs to be a global due to the semantics of
280 280 * clone().
281 281 *
282 282 * If CLONE_SIGHAND is set, the calling process and child share signal
283 283 * handlers, and if either calls sigaction(2) it should change the behavior
284 284 * in the other thread. Each thread does, however, have its own signal mask
285 285 * and set of pending signals.
286 286 *
287 287 * If CLONE_SIGHAND is not set, the child process should inherit a copy of
288 288 * the signal handlers at the time of the clone() but later calls to
289 289 * sigaction(2) should only affect the individual thread calling it.
290 290 *
291 291 * This maps perfectly to a thr_create(3C) thread semantic in the first
292 292 * case and a fork(2)-type semantic in the second case. By making
293 293 * lx_sighandlers global, we automatically get the correct behavior.
294 294 */
295 295 static lx_sighandlers_t lx_sighandlers;
296 296
297 297 /*
298 298 * Setting LX_NO_ABORT_HANDLER in the environment will prevent the emulated
299 299 * Linux program from modifying the signal handling disposition for SIGSEGV or
300 300 * SIGABRT. Useful for debugging programs which fall over themselves to
301 301 * prevent useful core files being generated.
302 302 */
303 303 static int lx_no_abort_handler = 0;
304 304
305 305 static void lx_sigdeliver(int, siginfo_t *, ucontext_t *, size_t, void (*)(),
306 306 void (*)(), struct lx_sigaction *);
307 307
308 308 /*
309 309 * Cache result of process.max-file-descriptor to avoid calling getrctl()
310 310 * for each lx_ppoll().
311 311 */
312 312 static rlim_t maxfd = 0;
313 313
314 314 /*
315 315 * stol_stack() and ltos_stack() convert between Illumos and Linux stack_t
316 316 * structures.
317 317 *
318 318 * These routines are needed because although the two structures have the same
319 319 * contents, their contents are declared in a different order, so the content
320 320 * of the structures cannot be copied with a simple bcopy().
321 321 */
322 322 static void
323 323 stol_stack(stack_t *fr, lx_stack_t *to)
324 324 {
325 325 to->ss_sp = fr->ss_sp;
326 326 to->ss_flags = fr->ss_flags;
327 327 to->ss_size = fr->ss_size;
328 328 }
329 329
330 330 static void
331 331 ltos_stack(lx_stack_t *fr, stack_t *to)
332 332 {
333 333 to->ss_sp = fr->ss_sp;
334 334 to->ss_flags = fr->ss_flags;
335 335 to->ss_size = fr->ss_size;
336 336 }
337 337
338 338 static int
339 339 ltos_sigset(lx_sigset_t *lx_sigsetp, sigset_t *s_sigsetp)
340 340 {
341 341 lx_sigset_t l;
342 342 int lx_sig, sig;
343 343
344 344 if (uucopy(lx_sigsetp, &l, sizeof (lx_sigset_t)) != 0)
345 345 return (-errno);
346 346
347 347 (void) sigemptyset(s_sigsetp);
348 348
349 349 for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) {
350 350 if (lx_sigismember(&l, lx_sig) &&
351 351 ((sig = ltos_signo[lx_sig]) > 0))
352 352 (void) sigaddset(s_sigsetp, sig);
353 353 }
354 354
355 355 return (0);
356 356 }
357 357
358 358 static int
359 359 stol_sigset(sigset_t *s_sigsetp, lx_sigset_t *lx_sigsetp)
360 360 {
361 361 lx_sigset_t l;
362 362 int sig, lx_sig;
363 363
364 364 bzero(&l, sizeof (lx_sigset_t));
365 365
366 366 for (sig = 1; sig < NSIG; sig++) {
367 367 if (sigismember(s_sigsetp, sig) &&
368 368 ((lx_sig = stol_signo[sig]) > 0))
369 369 lx_sigaddset(&l, lx_sig);
370 370 }
371 371
372 372 return ((uucopy(&l, lx_sigsetp, sizeof (lx_sigset_t)) != 0)
373 373 ? -errno : 0);
374 374 }
375 375
376 376 #if defined(_ILP32)
377 377 static int
378 378 ltos_osigset(lx_osigset_t *lx_osigsetp, sigset_t *s_sigsetp)
379 379 {
380 380 lx_osigset_t lo;
381 381 int lx_sig, sig;
382 382
383 383 if (uucopy(lx_osigsetp, &lo, sizeof (lx_osigset_t)) != 0)
384 384 return (-errno);
385 385
386 386 (void) sigemptyset(s_sigsetp);
387 387
388 388 for (lx_sig = 1; lx_sig <= OSIGSET_NBITS; lx_sig++)
389 389 if ((lo & OSIGSET_BITSET(lx_sig)) &&
390 390 ((sig = ltos_signo[lx_sig]) > 0))
391 391 (void) sigaddset(s_sigsetp, sig);
392 392
393 393 return (0);
394 394 }
395 395
396 396 static int
397 397 stol_osigset(sigset_t *s_sigsetp, lx_osigset_t *lx_osigsetp)
398 398 {
399 399 lx_osigset_t lo = 0;
400 400 int lx_sig, sig;
401 401
402 402 /*
403 403 * Note that an lx_osigset_t can only represent the signals from
404 404 * [1 .. OSIGSET_NBITS], so even though a signal may be present in the
405 405 * Illumos sigset_t, it may not be representable as a bit in the
406 406 * lx_osigset_t.
407 407 */
408 408 for (sig = 1; sig < NSIG; sig++)
409 409 if (sigismember(s_sigsetp, sig) &&
410 410 ((lx_sig = stol_signo[sig]) > 0) &&
411 411 (lx_sig <= OSIGSET_NBITS))
412 412 lo |= OSIGSET_BITSET(lx_sig);
413 413
414 414 return ((uucopy(&lo, lx_osigsetp, sizeof (lx_osigset_t)) != 0)
415 415 ? -errno : 0);
416 416 }
417 417 #endif
418 418
419 419 static int
420 420 ltos_sigcode(int si_code)
421 421 {
422 422 switch (si_code) {
423 423 case LX_SI_USER:
424 424 return (SI_USER);
425 425 case LX_SI_TKILL:
426 426 return (SI_LWP);
427 427 case LX_SI_QUEUE:
428 428 return (SI_QUEUE);
429 429 case LX_SI_TIMER:
430 430 return (SI_TIMER);
431 431 case LX_SI_ASYNCIO:
432 432 return (SI_ASYNCIO);
433 433 case LX_SI_MESGQ:
434 434 return (SI_MESGQ);
435 435 default:
436 436 return (LX_SI_CODE_NOT_EXIST);
437 437 }
438 438 }
439 439
440 440 int
441 441 stol_siginfo(siginfo_t *siginfop, lx_siginfo_t *lx_siginfop)
442 442 {
443 443 int ret = 0;
444 444 lx_siginfo_t lx_siginfo;
445 445
446 446 bzero(&lx_siginfo, sizeof (*lx_siginfop));
447 447
448 448 if ((lx_siginfo.lsi_signo = stol_signo[siginfop->si_signo]) <= 0) {
449 449 /*
450 450 * Depending on the caller we may still need to get a usable
451 451 * converted siginfo struct.
452 452 */
453 453 lx_siginfo.lsi_signo = LX_SIGKILL;
454 454 errno = EINVAL;
455 455 ret = -1;
456 456 }
457 457
458 458 lx_siginfo.lsi_code = lx_stol_sigcode(siginfop->si_code);
459 459 lx_siginfo.lsi_errno = siginfop->si_errno;
460 460
461 461 switch (lx_siginfo.lsi_signo) {
462 462 /*
463 463 * Semantics ARE defined for SIGKILL, but since
464 464 * we can't catch it, we can't translate it. :-(
465 465 */
466 466 case LX_SIGPOLL:
467 467 lx_siginfo.lsi_band = siginfop->si_band;
468 468 lx_siginfo.lsi_fd = siginfop->si_fd;
469 469 break;
470 470
471 471 case LX_SIGCHLD:
472 472 lx_siginfo.lsi_pid = siginfop->si_pid;
473 473 if (siginfop->si_code <= 0 || siginfop->si_code ==
474 474 CLD_EXITED) {
475 475 lx_siginfo.lsi_status = siginfop->si_status;
476 476 } else {
477 477 lx_siginfo.lsi_status = lx_stol_status(
478 478 siginfop->si_status, -1);
479 479 }
480 480 lx_siginfo.lsi_utime = siginfop->si_utime;
481 481 lx_siginfo.lsi_stime = siginfop->si_stime;
482 482 break;
483 483
484 484 case LX_SIGILL:
485 485 case LX_SIGBUS:
486 486 case LX_SIGFPE:
487 487 case LX_SIGSEGV:
488 488 lx_siginfo.lsi_addr = siginfop->si_addr;
489 489 break;
490 490
491 491 default:
492 492 lx_siginfo.lsi_pid = siginfop->si_pid;
493 493 lx_siginfo.lsi_uid =
494 494 LX_UID32_TO_UID16(siginfop->si_uid);
495 495 lx_siginfo.lsi_value = siginfop->si_value;
496 496 break;
497 497 }
498 498
499 499 if (uucopy(&lx_siginfo, lx_siginfop, sizeof (lx_siginfo_t)) != 0)
500 500 return (-errno);
501 501 return ((ret != 0) ? -errno : 0);
502 502 }
503 503
504 504 static void
505 505 stol_fpstate(fpregset_t *fpr, lx_fpstate_t *lfpr)
506 506 {
507 507 size_t copy_len;
508 508
509 509 #if defined(_LP64)
510 510 /*
511 511 * The 64-bit Illumos struct fpregset_t and lx_fpstate_t are identical
512 512 * so just bcopy() those entries (see usr/src/uts/intel/sys/regset.h
513 513 * for __amd64's struct fpu).
514 514 */
515 515 copy_len = sizeof (fpr->fp_reg_set.fpchip_state);
516 516 bcopy(fpr, lfpr, copy_len);
517 517
518 518 #else /* is _ILP32 */
519 519 struct _fpstate *fpsp = (struct _fpstate *)fpr;
520 520
521 521 /*
522 522 * The Illumos struct _fpstate and lx_fpstate_t are identical from the
523 523 * beginning of the structure to the lx_fpstate_t "magic" field, so
524 524 * just bcopy() those entries.
525 525 */
526 526 copy_len = (size_t)&(((lx_fpstate_t *)0)->magic);
527 527 bcopy(fpsp, lfpr, copy_len);
528 528
529 529 /*
530 530 * These fields are all only significant for the first 16 bits.
531 531 */
532 532 lfpr->cw &= 0xffff; /* x87 control word */
533 533 lfpr->tag &= 0xffff; /* x87 tag word */
534 534 lfpr->cssel &= 0xffff; /* cs selector */
535 535 lfpr->datasel &= 0xffff; /* ds selector */
536 536
537 537 /*
538 538 * Linux wants the x87 status word field to contain the value of the
539 539 * x87 saved exception status word.
540 540 */
541 541 lfpr->sw = lfpr->status & 0xffff; /* x87 status word */
542 542
543 543 lfpr->mxcsr = fpsp->mxcsr;
544 544
545 545 if (fpsp->mxcsr != 0) {
546 546 /*
547 547 * Linux uses the "magic" field to denote whether the XMM
548 548 * registers contain legal data or not. Since we can't get to
549 549 * %cr4 from userland to check the status of the OSFXSR bit,
550 550 * check the mxcsr field to see if it's 0, which it should
551 551 * never be on a system with the OXFXSR bit enabled.
552 552 */
553 553 lfpr->magic = LX_X86_FXSR_MAGIC;
554 554 bcopy(fpsp->xmm, lfpr->_xmm, sizeof (lfpr->_xmm));
555 555 } else {
556 556 lfpr->magic = LX_X86_FXSR_NONE;
557 557 }
558 558 #endif
559 559 }
560 560
561 561 static void
562 562 ltos_fpstate(lx_fpstate_t *lfpr, fpregset_t *fpr)
563 563 {
564 564 size_t copy_len;
565 565
566 566 #if defined(_LP64)
567 567 /*
568 568 * The 64-bit Illumos struct fpregset_t and lx_fpstate_t are identical
569 569 * so just bcopy() those entries (see usr/src/uts/intel/sys/regset.h
570 570 * for __amd64's struct fpu).
571 571 */
572 572 copy_len = sizeof (fpr->fp_reg_set.fpchip_state);
573 573 bcopy(lfpr, fpr, copy_len);
574 574
575 575 #else /* is _ILP32 */
576 576 struct _fpstate *fpsp = (struct _fpstate *)fpr;
577 577
578 578 /*
579 579 * The lx_fpstate_t and Illumos struct _fpstate are identical from the
580 580 * beginning of the structure to the struct _fpstate "mxcsr" field, so
581 581 * just bcopy() those entries.
582 582 *
583 583 * Note that we do NOT have to propogate changes the user may have made
584 584 * to the "status" word back to the "sw" word, unlike the way we have
585 585 * to deal with processing the ESP and UESP register values on return
586 586 * from a signal handler.
587 587 */
588 588 copy_len = (size_t)&(((struct _fpstate *)0)->mxcsr);
589 589 bcopy(lfpr, fpsp, copy_len);
590 590
591 591 /*
592 592 * These fields are all only significant for the first 16 bits.
593 593 */
594 594 fpsp->cw &= 0xffff; /* x87 control word */
595 595 fpsp->sw &= 0xffff; /* x87 status word */
596 596 fpsp->tag &= 0xffff; /* x87 tag word */
597 597 fpsp->cssel &= 0xffff; /* cs selector */
598 598 fpsp->datasel &= 0xffff; /* ds selector */
599 599 fpsp->status &= 0xffff; /* saved status */
600 600
601 601 fpsp->mxcsr = lfpr->mxcsr;
602 602
603 603 if (lfpr->magic == LX_X86_FXSR_MAGIC)
604 604 bcopy(lfpr->_xmm, fpsp->xmm, sizeof (fpsp->xmm));
605 605 #endif
606 606 }
607 607
608 608 /*
609 609 * We do not use the system sigaltstack() infrastructure as that would conflict
610 610 * with our handling of both system call emulation and native signals on the
611 611 * native stack. Instead, we track the Linux stack structure in our
612 612 * thread-specific data. This function is modeled on the behaviour of the
613 613 * native sigaltstack system call handler.
614 614 */
615 615 long
616 616 lx_sigaltstack(uintptr_t ssp, uintptr_t oss)
617 617 {
618 618 lx_tsd_t *lxtsd = lx_get_tsd();
619 619 lx_stack_t ss;
620 620
621 621 if (ssp != NULL) {
622 622 if (lxtsd->lxtsd_sigaltstack.ss_flags & LX_SS_ONSTACK) {
623 623 /*
624 624 * If we are currently using the installed alternate
625 625 * stack for signal handling, the user may not modify
626 626 * the stack for this thread.
627 627 */
628 628 return (-EPERM);
629 629 }
630 630
631 631 if (uucopy((void *)ssp, &ss, sizeof (ss)) != 0) {
632 632 return (-EFAULT);
633 633 }
634 634
635 635 if (ss.ss_flags & ~LX_SS_DISABLE) {
636 636 /*
637 637 * The user may not specify a value for flags other
638 638 * than 0 or SS_DISABLE.
639 639 */
640 640 return (-EINVAL);
641 641 }
642 642
643 643 if (!(ss.ss_flags & LX_SS_DISABLE) && ss.ss_size <
644 644 LX_MINSIGSTKSZ) {
645 645 return (-ENOMEM);
646 646 }
647 647 }
648 648
649 649 if (oss != NULL) {
650 650 /*
651 651 * User provided old and new stack_t pointers may point to
652 652 * the same location. Copy out before we modify.
653 653 */
654 654 if (uucopy(&lxtsd->lxtsd_sigaltstack, (void *)oss,
655 655 sizeof (lxtsd->lxtsd_sigaltstack)) != 0) {
656 656 return (-EFAULT);
657 657 }
658 658 }
659 659
660 660 if (ssp != NULL) {
661 661 lxtsd->lxtsd_sigaltstack = ss;
662 662 }
663 663
664 664 return (0);
665 665 }
666 666
667 667 #if defined(_ILP32)
668 668 /*
669 669 * The following routines are needed because sigset_ts and siginfo_ts are
670 670 * different in format between Linux and Illumos.
671 671 *
672 672 * Note that there are two different lx_sigset structures, lx_sigset_ts and
673 673 * lx_osigset_ts:
674 674 *
675 675 * + An lx_sigset_t is the equivalent of a Illumos sigset_t and supports
676 676 * more than 32 signals.
677 677 *
678 678 * + An lx_osigset_t is simply a uint32_t, so it by definition only supports
679 679 * 32 signals.
680 680 *
681 681 * When there are two versions of a routine, one prefixed with lx_rt_ and
682 682 * one prefixed with lx_ alone, in GENERAL the lx_rt_ routines deal with
683 683 * lx_sigset_ts while the lx_ routines deal with lx_osigset_ts. Unfortunately,
684 684 * this is not always the case (e.g. lx_sigreturn() vs. lx_rt_sigreturn())
685 685 */
686 686 long
687 687 lx_sigpending(uintptr_t sigpend)
688 688 {
689 689 sigset_t sigpendset;
690 690
691 691 if (sigpending(&sigpendset) != 0)
692 692 return (-errno);
693 693
694 694 return (stol_osigset(&sigpendset, (lx_osigset_t *)sigpend));
695 695 }
696 696 #endif
697 697
698 698 long
699 699 lx_rt_sigpending(uintptr_t sigpend, uintptr_t setsize)
700 700 {
701 701 sigset_t sigpendset;
702 702
703 703 if ((size_t)setsize != sizeof (lx_sigset_t))
704 704 return (-EINVAL);
705 705
706 706 if (sigpending(&sigpendset) != 0)
707 707 return (-errno);
708 708
709 709 return (stol_sigset(&sigpendset, (lx_sigset_t *)sigpend));
710 710 }
711 711
712 712 /*
713 713 * Create a common routine to encapsulate all of the sigprocmask code,
714 714 * as the only difference between lx_sigprocmask() and lx_rt_sigprocmask()
715 715 * is the usage of lx_osigset_ts vs. lx_sigset_ts, as toggled in the code by
716 716 * the setting of the "sigset_type" flag.
717 717 */
718 718 static int
719 719 lx_sigprocmask_common(uintptr_t how, uintptr_t l_setp, uintptr_t l_osetp,
720 720 uintptr_t sigset_type)
721 721 {
722 722 int err = 0;
723 723 sigset_t set, oset;
724 724 sigset_t *s_setp = NULL;
725 725 sigset_t *s_osetp;
726 726
727 727 if (l_setp) {
728 728 switch (how) {
729 729 case LX_SIG_BLOCK:
730 730 how = SIG_BLOCK;
731 731 break;
732 732
733 733 case LX_SIG_UNBLOCK:
734 734 how = SIG_UNBLOCK;
735 735 break;
736 736
737 737 case LX_SIG_SETMASK:
738 738 how = SIG_SETMASK;
739 739 break;
740 740
741 741 default:
742 742 return (-EINVAL);
743 743 }
744 744
745 745 s_setp = &set;
746 746
747 747 /* Only 32-bit code passes other than USE_SIGSET */
748 748 if (sigset_type == USE_SIGSET)
749 749 err = ltos_sigset((lx_sigset_t *)l_setp, s_setp);
750 750 #if defined(_ILP32)
751 751 else
752 752 err = ltos_osigset((lx_osigset_t *)l_setp, s_setp);
753 753 #endif
754 754
755 755 if (err != 0)
756 756 return (err);
757 757
758 758 }
759 759
760 760 s_osetp = (l_osetp ? &oset : NULL);
761 761
762 762 /*
763 763 * In a multithreaded environment, a call to sigprocmask(2) should
764 764 * only affect the current thread's signal mask so we don't need to
765 765 * explicitly call thr_sigsetmask(3C) here.
766 766 */
767 767 if (sigprocmask(how, s_setp, s_osetp) != 0)
768 768 return (-errno);
769 769
770 770 if (l_osetp) {
771 771 if (sigset_type == USE_SIGSET)
772 772 err = stol_sigset(s_osetp, (lx_sigset_t *)l_osetp);
773 773 #if defined(_ILP32)
774 774 else
775 775 err = stol_osigset(s_osetp, (lx_osigset_t *)l_osetp);
776 776 #endif
777 777
778 778 if (err != 0) {
779 779 /*
780 780 * Encountered a fault while writing to the old signal
781 781 * mask buffer, so unwind the signal mask change made
782 782 * above.
783 783 */
784 784 (void) sigprocmask(how, s_osetp, (sigset_t *)NULL);
785 785 return (err);
786 786 }
787 787 }
788 788
789 789 return (0);
790 790 }
791 791
792 792 #if defined(_ILP32)
793 793 long
794 794 lx_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp)
795 795 {
796 796 return (lx_sigprocmask_common(how, setp, osetp, USE_OSIGSET));
797 797 }
798 798 #endif
799 799
800 800 long
801 801 lx_rt_sigprocmask(uintptr_t how, uintptr_t setp, uintptr_t osetp,
802 802 uintptr_t setsize)
803 803 {
804 804 if ((size_t)setsize != sizeof (lx_sigset_t))
805 805 return (-EINVAL);
806 806
807 807 return (lx_sigprocmask_common(how, setp, osetp, USE_SIGSET));
808 808 }
809 809
810 810 #if defined(_ILP32)
811 811 long
812 812 lx_sigsuspend(uintptr_t set)
813 813 {
814 814 sigset_t s_set;
815 815
816 816 if (ltos_osigset((lx_osigset_t *)set, &s_set) != 0)
817 817 return (-errno);
818 818
819 819 return ((sigsuspend(&s_set) == -1) ? -errno : 0);
820 820 }
821 821 #endif
822 822
823 823 long
824 824 lx_rt_sigsuspend(uintptr_t set, uintptr_t setsize)
825 825 {
826 826 sigset_t s_set;
827 827
828 828 if ((size_t)setsize != sizeof (lx_sigset_t))
829 829 return (-EINVAL);
830 830
831 831 if (ltos_sigset((lx_sigset_t *)set, &s_set) != 0)
832 832 return (-errno);
833 833
834 834 return ((sigsuspend(&s_set) == -1) ? -errno : 0);
835 835 }
836 836
837 837 long
838 838 lx_rt_sigwaitinfo(uintptr_t set, uintptr_t sinfo, uintptr_t setsize)
839 839 {
840 840 sigset_t s_set;
841 841 siginfo_t s_sinfo, *s_sinfop;
842 842 int rc;
843 843
844 844 lx_sigset_t *setp = (lx_sigset_t *)set;
845 845 lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
846 846
847 847 if ((size_t)setsize != sizeof (lx_sigset_t))
848 848 return (-EINVAL);
849 849
850 850 if (ltos_sigset(setp, &s_set) != 0)
851 851 return (-errno);
852 852
853 853 s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
854 854
855 855 if ((rc = sigwaitinfo(&s_set, s_sinfop)) == -1)
856 856 return (-errno);
857 857
858 858 if (s_sinfop == NULL)
859 859 return (stol_signo[rc]);
860 860
861 861 return ((stol_siginfo(s_sinfop, sinfop) != 0)
862 862 ? -errno : stol_signo[rc]);
863 863 }
864 864
865 865 long
866 866 lx_rt_sigtimedwait(uintptr_t set, uintptr_t sinfo, uintptr_t toutp,
867 867 uintptr_t setsize)
868 868 {
869 869 sigset_t s_set;
870 870 siginfo_t s_sinfo, *s_sinfop;
871 871 int rc;
872 872
873 873 lx_sigset_t *setp = (lx_sigset_t *)set;
874 874 lx_siginfo_t *sinfop = (lx_siginfo_t *)sinfo;
875 875
876 876 if ((size_t)setsize != sizeof (lx_sigset_t))
877 877 return (-EINVAL);
878 878
879 879 if (ltos_sigset(setp, &s_set) != 0)
880 880 return (-errno);
881 881
882 882 s_sinfop = (sinfop == NULL) ? NULL : &s_sinfo;
883 883
884 884 /*
885 885 * "If timeout is the NULL pointer, the behavior is unspecified."
886 886 * Match what LTP expects.
887 887 */
888 888 if ((rc = sigtimedwait(&s_set, s_sinfop,
889 889 (struct timespec *)toutp)) == -1)
890 890 return (toutp == NULL ? -EINTR : -errno);
891 891
892 892 if (s_sinfop == NULL)
893 893 return (stol_signo[rc]);
894 894
895 895 return ((stol_siginfo(s_sinfop, sinfop) != 0)
896 896 ? -errno : stol_signo[rc]);
897 897 }
898 898
899 899 static void
900 900 lx_sigreturn_find_native_context(const char *caller, ucontext_t **sigucp,
901 901 ucontext_t **retucp, uintptr_t sp)
902 902 {
903 903 lx_tsd_t *lxtsd = lx_get_tsd();
904 904 lx_sigdeliver_frame_t *lxsdfp = (lx_sigdeliver_frame_t *)sp;
905 905 lx_sigdeliver_frame_t lxsdf;
906 906 boolean_t copy_ok;
907 907
908 908 lx_debug("%s: reading lx_sigdeliver_frame_t @ %p\n", caller, lxsdfp);
909 909 if (uucopy(lxsdfp, &lxsdf, sizeof (lxsdf)) != 0) {
910 910 lx_debug("%s: failed to read lx_sigdeliver_frame_t @ %p\n",
911 911 lxsdfp);
912 912
913 913 copy_ok = B_FALSE;
914 914 } else {
915 915 lx_debug("%s: lxsdf: magic %p retucp %p sigucp %p\n", caller,
916 916 lxsdf.lxsdf_magic, lxsdf.lxsdf_retucp, lxsdf.lxsdf_sigucp);
917 917
918 918 copy_ok = B_TRUE;
919 919 }
920 920
921 921 /*
922 922 * lx_sigdeliver() pushes a lx_sigdeliver_frame_t onto the stack
923 923 * before it creates the struct lx_oldsigstack.
924 924 */
925 925 if (copy_ok && lxsdf.lxsdf_magic == LX_SIGRT_MAGIC) {
926 926 LX_SIGNAL_DELIVERY_FRAME_FOUND(lxsdfp);
927 927
928 928 /*
929 929 * The guard value is intact; use the context pointers stored
930 930 * in the signal delivery frame:
931 931 */
932 932 *sigucp = lxsdf.lxsdf_sigucp;
933 933 *retucp = lxsdf.lxsdf_retucp;
934 934
935 935 /*
936 936 * Ensure that the backup signal delivery chain is in sync with
937 937 * the frame we are returning via:
938 938 */
939 939 lxtsd->lxtsd_sigbackup = lxsdf.lxsdf_sigbackup;
940 940 } else {
941 941 /*
942 942 * The guard value was not intact. Either the program smashed
943 943 * the stack unintentionally, or worse: intentionally moved
944 944 * some parts of the signal delivery frame we constructed to
945 945 * another location before calling rt_sigreturn(2).
946 946 */
947 947 LX_SIGNAL_DELIVERY_FRAME_CORRUPT(lxsdfp);
948 948
949 949 if (lxtsd->lxtsd_sigbackup == NULL) {
950 950 /*
951 951 * There was no backup context to use, so we must
952 952 * kill the process.
953 953 */
954 954 if (copy_ok) {
955 955 lx_err_fatal("%s: sp 0x%p, expected 0x%x, "
956 956 "found 0x%x!", caller, sp, LX_SIGRT_MAGIC,
957 957 lxsdf.lxsdf_magic);
958 958 } else {
959 959 lx_err_fatal("%s: sp 0x%p, could not read "
960 960 "magic", caller, sp);
961 961 }
962 962 }
963 963
964 964 /*
965 965 * Attempt to recover by using the backup signal delivery
966 966 * chain:
967 967 */
968 968 lx_debug("%s: SIGRT_MAGIC not found @ sp %p; using backup "
969 969 "@ %p\n", caller, (void *)sp, lxtsd->lxtsd_sigbackup);
970 970 *sigucp = lxtsd->lxtsd_sigbackup->lxsb_sigucp;
971 971 *retucp = lxtsd->lxtsd_sigbackup->lxsb_retucp;
972 972 }
973 973 }
974 974
975 975 #if defined(_ILP32)
976 976 /*
977 977 * Intercept the Linux sigreturn() syscall to turn it into the return through
978 978 * the libc call stack that Illumos expects.
979 979 *
980 980 * When control returns to libc's call_user_handler() routine, a setcontext(2)
981 981 * will be done that returns thread execution to the point originally
982 982 * interrupted by receipt of the signal.
983 983 *
984 984 * This is only used by 32-bit code.
985 985 */
986 986 long
987 987 lx_sigreturn(void)
988 988 {
989 989 struct lx_oldsigstack *lx_ossp;
990 990 lx_sigset_t lx_sigset;
991 991 ucontext_t *ucp;
992 992 ucontext_t *sigucp;
993 993 ucontext_t *retucp;
994 994 uintptr_t sp;
995 995
996 996 ucp = lx_syscall_regs();
997 997
998 998 /*
999 999 * NOTE: The sp saved in the context is eight bytes off of where we
1000 1000 * need it to be (either due to trampoline or the copying of
1001 1001 * sp = uesp, not clear which).
1002 1002 */
1003 1003 sp = LX_REG(ucp, REG_SP) - 8;
1004 1004
1005 1005 /*
1006 1006 * At this point, the stack pointer should point to the struct
1007 1007 * lx_oldsigstack that lx_build_old_signal_frame() constructed and
1008 1008 * placed on the stack. We need to reference it a bit later, so
1009 1009 * save a pointer to it before incrementing our copy of the sp.
1010 1010 */
1011 1011 lx_ossp = (struct lx_oldsigstack *)sp;
1012 1012 sp += SA(sizeof (struct lx_oldsigstack));
1013 1013
1014 1014 lx_sigreturn_find_native_context(__func__, &sigucp, &retucp, sp);
1015 1015
1016 1016 /*
1017 1017 * We need to copy machine registers the Linux signal handler may have
1018 1018 * modified back to the Illumos ucontext_t.
1019 1019 *
1020 1020 * General registers copy across as-is, except Linux expects that
1021 1021 * changes made to uc_mcontext.gregs[ESP] will be reflected when the
1022 1022 * interrupted thread resumes execution after the signal handler. To
1023 1023 * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to
1024 1024 * match uc_mcontext.gregs[ESP] as Illumos will restore the UESP
1025 1025 * value to ESP.
1026 1026 */
1027 1027 lx_ossp->sigc.sc_esp_at_signal = lx_ossp->sigc.sc_esp;
1028 1028 bcopy(&lx_ossp->sigc, &sigucp->uc_mcontext, sizeof (gregset_t));
1029 1029
1030 1030 LX_SIGRETURN(NULL, sigucp, sp);
1031 1031
1032 1032 /* copy back FP regs if present */
1033 1033 if (lx_ossp->sigc.sc_fpstate != NULL)
1034 1034 ltos_fpstate(&lx_ossp->fpstate, &sigucp->uc_mcontext.fpregs);
1035 1035
1036 1036 /* convert Linux signal mask back to its Illumos equivalent */
1037 1037 bzero(&lx_sigset, sizeof (lx_sigset_t));
1038 1038 lx_sigset.__bits[0] = lx_ossp->sigc.sc_mask;
1039 1039 lx_sigset.__bits[1] = lx_ossp->sig_extra;
1040 1040 (void) ltos_sigset(&lx_sigset, &sigucp->uc_sigmask);
1041 1041
1042 1042 /*
1043 1043 * For signal mask handling to be done properly, this call needs to
1044 1044 * return to the libc routine that originally called the signal handler
1045 1045 * rather than directly set the context back to the place the signal
1046 1046 * interrupted execution as the original Linux code would do.
1047 1047 */
1048 1048 lx_debug("lx_sigreturn: calling setcontext; retucp %p flags %lx "
1049 1049 "link %p\n", retucp, retucp->uc_flags, retucp->uc_link);
1050 1050 setcontext(retucp);
1051 1051 assert(0);
1052 1052
1053 1053 /*NOTREACHED*/
1054 1054 return (0);
1055 1055 }
1056 1056 #endif
1057 1057
1058 1058 /*
1059 1059 * This signal return syscall is used by both 32-bit and 64-bit code.
1060 1060 */
1061 1061 long
1062 1062 lx_rt_sigreturn(void)
1063 1063 {
1064 1064 struct lx_sigstack *lx_ssp;
1065 1065 lx_ucontext_t *lx_ucp;
1066 1066 ucontext_t *ucp;
1067 1067 ucontext_t *sigucp;
1068 1068 ucontext_t *retucp;
1069 1069 uintptr_t sp;
1070 1070
1071 1071 /* Get the registers at the emulated Linux rt_sigreturn syscall */
1072 1072 ucp = lx_syscall_regs();
1073 1073
1074 1074 #if defined(_ILP32)
1075 1075 lx_debug("lx_rt_sigreturn: ESP %p UESP %p\n", LX_REG(ucp, ESP),
1076 1076 LX_REG(ucp, UESP));
1077 1077 /*
1078 1078 * For 32-bit
1079 1079 *
1080 1080 * NOTE: Because of the silly compatibility measures done in the
1081 1081 * signal trampoline code to make sure the stack holds the
1082 1082 * _exact same_ instruction sequence Linux does, we have to
1083 1083 * manually "pop" some extra instructions off the stack here
1084 1084 * before passing the stack address to the syscall because the
1085 1085 * trampoline code isn't allowed to do it due to the gdb
1086 1086 * compatability issues.
1087 1087 *
1088 1088 * No, I'm not kidding.
1089 1089 *
1090 1090 * The sp saved in the context is eight bytes off of where we
1091 1091 * need it to be (either due to trampoline or the copying of
1092 1092 * sp = uesp, not clear which but looks like the uesp case), so
1093 1093 * the need to pop the extra four byte instruction means we need
1094 1094 * to subtract a net four bytes from the sp before "popping" the
1095 1095 * struct lx_sigstack off the stack.
1096 1096 *
1097 1097 * This will yield the value the stack pointer had before
1098 1098 * lx_sigdeliver() created the stack frame for the Linux signal
1099 1099 * handler.
1100 1100 */
1101 1101 sp = (uintptr_t)LX_REG(ucp, REG_SP) - 4;
1102 1102 #else
1103 1103 /*
1104 1104 * We need to make an adjustment for 64-bit code as well. Since 64-bit
1105 1105 * does not use the trampoline, it's probably for the same reason as
1106 1106 * alluded to above.
1107 1107 */
1108 1108 sp = (uintptr_t)LX_REG(ucp, REG_SP) - 8;
1109 1109 #endif
1110 1110
1111 1111 /*
1112 1112 * At this point, the stack pointer should point to the struct
1113 1113 * lx_sigstack that lx_build_signal_frame() constructed and
1114 1114 * placed on the stack. We need to reference it a bit later, so
1115 1115 * save a pointer to it before incrementing our copy of the sp.
1116 1116 */
1117 1117 lx_ssp = (struct lx_sigstack *)sp;
1118 1118 sp += SA(sizeof (struct lx_sigstack));
1119 1119
1120 1120 #if defined(_LP64)
1121 1121 /*
1122 1122 * The 64-bit lx_sigdeliver() inserts 8 bytes of padding between
1123 1123 * the lx_sigstack_t and the delivery frame to maintain ABI stack
1124 1124 * alignment.
1125 1125 */
1126 1126 sp += 8;
1127 1127 #endif
1128 1128
1129 1129 lx_sigreturn_find_native_context(__func__, &sigucp, &retucp, sp);
1130 1130
1131 1131 /*
1132 1132 * We need to copy machine registers the Linux signal handler may have
1133 1133 * modified back to the Illumos version.
1134 1134 */
1135 1135 #if defined(_LP64)
1136 1136 lx_ucp = &lx_ssp->uc;
1137 1137
1138 1138 /*
1139 1139 * General register layout is completely different.
1140 1140 */
1141 1141 LX_REG(sigucp, REG_R15) = lx_ucp->uc_sigcontext.sc_r15;
1142 1142 LX_REG(sigucp, REG_R14) = lx_ucp->uc_sigcontext.sc_r14;
1143 1143 LX_REG(sigucp, REG_R13) = lx_ucp->uc_sigcontext.sc_r13;
1144 1144 LX_REG(sigucp, REG_R12) = lx_ucp->uc_sigcontext.sc_r12;
1145 1145 LX_REG(sigucp, REG_R11) = lx_ucp->uc_sigcontext.sc_r11;
1146 1146 LX_REG(sigucp, REG_R10) = lx_ucp->uc_sigcontext.sc_r10;
1147 1147 LX_REG(sigucp, REG_R9) = lx_ucp->uc_sigcontext.sc_r9;
1148 1148 LX_REG(sigucp, REG_R8) = lx_ucp->uc_sigcontext.sc_r8;
1149 1149 LX_REG(sigucp, REG_RDI) = lx_ucp->uc_sigcontext.sc_rdi;
1150 1150 LX_REG(sigucp, REG_RSI) = lx_ucp->uc_sigcontext.sc_rsi;
1151 1151 LX_REG(sigucp, REG_RBP) = lx_ucp->uc_sigcontext.sc_rbp;
1152 1152 LX_REG(sigucp, REG_RBX) = lx_ucp->uc_sigcontext.sc_rbx;
1153 1153 LX_REG(sigucp, REG_RDX) = lx_ucp->uc_sigcontext.sc_rdx;
1154 1154 LX_REG(sigucp, REG_RCX) = lx_ucp->uc_sigcontext.sc_rcx;
1155 1155 LX_REG(sigucp, REG_RAX) = lx_ucp->uc_sigcontext.sc_rax;
1156 1156 LX_REG(sigucp, REG_TRAPNO) = lx_ucp->uc_sigcontext.sc_trapno;
1157 1157 LX_REG(sigucp, REG_ERR) = lx_ucp->uc_sigcontext.sc_err;
1158 1158 LX_REG(sigucp, REG_RIP) = lx_ucp->uc_sigcontext.sc_rip;
1159 1159 LX_REG(sigucp, REG_CS) = lx_ucp->uc_sigcontext.sc_cs;
1160 1160 LX_REG(sigucp, REG_RFL) = lx_ucp->uc_sigcontext.sc_eflags;
1161 1161 LX_REG(sigucp, REG_RSP) = lx_ucp->uc_sigcontext.sc_rsp;
1162 1162 LX_REG(sigucp, REG_SS) = lx_ucp->uc_sigcontext.sc_pad0;
1163 1163 LX_REG(sigucp, REG_FS) = lx_ucp->uc_sigcontext.sc_fs;
1164 1164 LX_REG(sigucp, REG_GS) = lx_ucp->uc_sigcontext.sc_gs;
1165 1165
1166 1166 #else /* is _ILP32 */
1167 1167 lx_ucp = &lx_ssp->uc;
1168 1168
1169 1169 /*
1170 1170 * Illumos and Linux both follow the SysV i386 ABI layout for the
1171 1171 * mcontext.
1172 1172 *
1173 1173 * General registers copy across as-is, except Linux expects that
1174 1174 * changes made to uc_mcontext.gregs[ESP] will be reflected when the
1175 1175 * interrupted thread resumes execution after the signal handler. To
1176 1176 * emulate this behavior, we must modify uc_mcontext.gregs[UESP] to
1177 1177 * match uc_mcontext.gregs[ESP] as Illumos will restore the UESP value
1178 1178 * to ESP.
1179 1179 */
1180 1180 lx_ucp->uc_sigcontext.sc_esp_at_signal = lx_ucp->uc_sigcontext.sc_esp;
1181 1181
1182 1182 bcopy(&lx_ucp->uc_sigcontext, &sigucp->uc_mcontext.gregs,
1183 1183 sizeof (gregset_t));
1184 1184 #endif
1185 1185
1186 1186 LX_SIGRETURN(lx_ucp, sigucp, sp);
1187 1187
1188 1188 if (lx_ucp->uc_sigcontext.sc_fpstate != NULL) {
1189 1189 ltos_fpstate(lx_ucp->uc_sigcontext.sc_fpstate,
1190 1190 &sigucp->uc_mcontext.fpregs);
1191 1191 }
1192 1192
1193 1193 /*
1194 1194 * Convert the Linux signal mask and stack back to their
1195 1195 * Illumos equivalents.
1196 1196 */
1197 1197 (void) ltos_sigset(&lx_ucp->uc_sigmask, &sigucp->uc_sigmask);
1198 1198 ltos_stack(&lx_ucp->uc_stack, &sigucp->uc_stack);
1199 1199
1200 1200 /*
1201 1201 * For signal mask handling to be done properly, this call needs to
1202 1202 * return to the libc routine that originally called the signal handler
1203 1203 * rather than directly set the context back to the place the signal
1204 1204 * interrupted execution as the original Linux code would do.
1205 1205 */
1206 1206 lx_debug("lx_rt_sigreturn: calling setcontext; retucp %p\n", retucp);
1207 1207 setcontext(retucp);
1208 1208 assert(0);
1209 1209
1210 1210 /*NOTREACHED*/
1211 1211 return (0);
1212 1212 }
1213 1213
1214 1214
1215 1215 #if defined(_ILP32)
1216 1216 /*
1217 1217 * Build signal frame for processing for "old" (legacy) Linux signals
1218 1218 * This stack-builder function is only used by 32-bit code.
1219 1219 */
1220 1220 static void
1221 1221 lx_build_old_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp,
1222 1222 uintptr_t *hargs)
1223 1223 {
1224 1224 extern void lx_sigreturn_tramp();
1225 1225
1226 1226 lx_sigset_t lx_sigset;
1227 1227 ucontext_t *ucp = (ucontext_t *)p;
1228 1228 struct lx_sigaction *lxsap;
1229 1229 struct lx_oldsigstack *lx_ossp = sp;
1230 1230
1231 1231 lx_debug("building old signal frame for lx sig %d at 0x%p", lx_sig, sp);
1232 1232
1233 1233 lx_ossp->sig = lx_sig;
1234 1234 lxsap = &lx_sighandlers.lx_sa[lx_sig];
1235 1235 lx_debug("lxsap @ 0x%p", lxsap);
1236 1236
1237 1237 if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) &&
1238 1238 lxsap->lxsa_restorer) {
1239 1239 lx_ossp->retaddr = lxsap->lxsa_restorer;
1240 1240 lx_debug("lxsa_restorer exists @ 0x%p", lx_ossp->retaddr);
1241 1241 } else {
1242 1242 lx_ossp->retaddr = lx_sigreturn_tramp;
1243 1243 lx_debug("lx_ossp->retaddr set to 0x%p", lx_sigreturn_tramp);
1244 1244 }
1245 1245
1246 1246 lx_debug("osf retaddr = 0x%p", lx_ossp->retaddr);
1247 1247
1248 1248 /* convert Illumos signal mask and stack to their Linux equivalents */
1249 1249 (void) stol_sigset(&ucp->uc_sigmask, &lx_sigset);
1250 1250 lx_ossp->sigc.sc_mask = lx_sigset.__bits[0];
1251 1251 lx_ossp->sig_extra = lx_sigset.__bits[1];
1252 1252
1253 1253 /*
1254 1254 * General registers copy across as-is, except Linux expects that
1255 1255 * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a
1256 1256 * signal.
1257 1257 */
1258 1258 bcopy(&ucp->uc_mcontext, &lx_ossp->sigc, sizeof (gregset_t));
1259 1259 lx_ossp->sigc.sc_esp = lx_ossp->sigc.sc_esp_at_signal;
1260 1260
1261 1261 /*
1262 1262 * cr2 contains the faulting address, and Linux only sets cr2 for a
1263 1263 * a segmentation fault.
1264 1264 */
1265 1265 lx_ossp->sigc.sc_cr2 = (((lx_sig == LX_SIGSEGV) && (sip)) ?
1266 1266 (uintptr_t)sip->si_addr : 0);
1267 1267
1268 1268 /* convert FP regs if present */
1269 1269 if (ucp->uc_flags & UC_FPU) {
1270 1270 stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ossp->fpstate);
1271 1271 lx_ossp->sigc.sc_fpstate = &lx_ossp->fpstate;
1272 1272 } else {
1273 1273 lx_ossp->sigc.sc_fpstate = NULL;
1274 1274 }
1275 1275
1276 1276 /*
1277 1277 * Believe it or not, gdb wants to SEE the trampoline code on the
1278 1278 * bottom of the stack to determine whether the stack frame belongs to
1279 1279 * a signal handler, even though this code is no longer actually
1280 1280 * called.
1281 1281 *
1282 1282 * You can't make this stuff up.
1283 1283 */
1284 1284 bcopy((void *)lx_sigreturn_tramp, lx_ossp->trampoline,
1285 1285 sizeof (lx_ossp->trampoline));
1286 1286 }
1287 1287 #endif
1288 1288
1289 1289 /*
1290 1290 * Build stack frame (32-bit) or stack local data (64-bit) for processing for
1291 1291 * modern Linux signals. This is the only stack-builder function for 64-bit
1292 1292 * code (32-bit code also calls this when using "modern" signals).
1293 1293 */
1294 1294 static void
1295 1295 lx_build_signal_frame(int lx_sig, siginfo_t *sip, void *p, void *sp,
1296 1296 uintptr_t *hargs)
1297 1297 {
1298 1298 extern void lx_rt_sigreturn_tramp();
1299 1299
1300 1300 lx_ucontext_t *lx_ucp;
1301 1301 ucontext_t *ucp = (ucontext_t *)p;
1302 1302 struct lx_sigstack *lx_ssp = sp;
1303 1303 struct lx_sigaction *lxsap;
1304 1304
1305 1305 lx_debug("building signal frame for lx sig %d at 0x%p", lx_sig, sp);
1306 1306
1307 1307 lx_ucp = &lx_ssp->uc;
1308 1308 #if defined(_ILP32)
1309 1309 /*
1310 1310 * Arguments are passed to the 32-bit signal handler on the stack.
1311 1311 */
1312 1312 lx_ssp->ucp = lx_ucp;
1313 1313 lx_ssp->sip = sip != NULL ? &lx_ssp->si : NULL;
1314 1314 lx_ssp->sig = lx_sig;
1315 1315 #else
1316 1316 /*
1317 1317 * Arguments to the 64-bit signal handler are passed in registers:
1318 1318 * hdlr(int sig, siginfo_t *sip, void *ucp);
1319 1319 */
1320 1320 hargs[0] = lx_sig;
1321 1321 hargs[1] = sip != NULL ? (uintptr_t)&lx_ssp->si : NULL;
1322 1322 hargs[2] = (uintptr_t)lx_ucp;
1323 1323 #endif
1324 1324
1325 1325 lxsap = &lx_sighandlers.lx_sa[lx_sig];
1326 1326 lx_debug("lxsap @ 0x%p", lxsap);
1327 1327
1328 1328 if (lxsap && (lxsap->lxsa_flags & LX_SA_RESTORER) &&
1329 1329 lxsap->lxsa_restorer) {
1330 1330 /*
1331 1331 * lxsa_restorer is explicitly set by sigaction in 32-bit code
1332 1332 * but it can also be implicitly set for both 32 and 64 bit
1333 1333 * code via lx_sigaction_common when we bcopy the user-supplied
1334 1334 * lx_sigaction element into the proper slot in the sighandler
1335 1335 * array.
1336 1336 */
1337 1337 lx_ssp->retaddr = lxsap->lxsa_restorer;
1338 1338 lx_debug("lxsa_restorer exists @ 0x%p", lx_ssp->retaddr);
1339 1339 } else {
1340 1340 lx_ssp->retaddr = lx_rt_sigreturn_tramp;
1341 1341 lx_debug("lx_ssp->retaddr set to 0x%p", lx_rt_sigreturn_tramp);
1342 1342 }
1343 1343
1344 1344 /* Linux has these fields but always clears them to 0 */
1345 1345 lx_ucp->uc_flags = 0;
1346 1346 lx_ucp->uc_link = NULL;
1347 1347
1348 1348 /* convert Illumos signal mask and stack to their Linux equivalents */
1349 1349 (void) stol_sigset(&ucp->uc_sigmask, &lx_ucp->uc_sigmask);
1350 1350 stol_stack(&ucp->uc_stack, &lx_ucp->uc_stack);
1351 1351
1352 1352 #if defined(_LP64)
1353 1353 /*
1354 1354 * General register layout is completely different.
1355 1355 */
1356 1356 lx_ucp->uc_sigcontext.sc_r8 = LX_REG(ucp, REG_R8);
1357 1357 lx_ucp->uc_sigcontext.sc_r9 = LX_REG(ucp, REG_R9);
1358 1358 lx_ucp->uc_sigcontext.sc_r10 = LX_REG(ucp, REG_R10);
1359 1359 lx_ucp->uc_sigcontext.sc_r11 = LX_REG(ucp, REG_R11);
1360 1360 lx_ucp->uc_sigcontext.sc_r12 = LX_REG(ucp, REG_R12);
1361 1361 lx_ucp->uc_sigcontext.sc_r13 = LX_REG(ucp, REG_R13);
1362 1362 lx_ucp->uc_sigcontext.sc_r14 = LX_REG(ucp, REG_R14);
1363 1363 lx_ucp->uc_sigcontext.sc_r15 = LX_REG(ucp, REG_R15);
1364 1364 lx_ucp->uc_sigcontext.sc_rdi = LX_REG(ucp, REG_RDI);
1365 1365 lx_ucp->uc_sigcontext.sc_rsi = LX_REG(ucp, REG_RSI);
1366 1366 lx_ucp->uc_sigcontext.sc_rbp = LX_REG(ucp, REG_RBP);
1367 1367 lx_ucp->uc_sigcontext.sc_rbx = LX_REG(ucp, REG_RBX);
1368 1368 lx_ucp->uc_sigcontext.sc_rdx = LX_REG(ucp, REG_RDX);
1369 1369 lx_ucp->uc_sigcontext.sc_rax = LX_REG(ucp, REG_RAX);
1370 1370 lx_ucp->uc_sigcontext.sc_rcx = LX_REG(ucp, REG_RCX);
1371 1371 lx_ucp->uc_sigcontext.sc_rsp = LX_REG(ucp, REG_RSP);
1372 1372 lx_ucp->uc_sigcontext.sc_rip = LX_REG(ucp, REG_RIP);
1373 1373 lx_ucp->uc_sigcontext.sc_eflags = LX_REG(ucp, REG_RFL);
1374 1374 lx_ucp->uc_sigcontext.sc_cs = LX_REG(ucp, REG_CS);
1375 1375 lx_ucp->uc_sigcontext.sc_gs = LX_REG(ucp, REG_GS);
1376 1376 lx_ucp->uc_sigcontext.sc_fs = LX_REG(ucp, REG_FS);
1377 1377 lx_ucp->uc_sigcontext.sc_pad0 = LX_REG(ucp, REG_SS);
1378 1378 lx_ucp->uc_sigcontext.sc_err = LX_REG(ucp, REG_ERR);
1379 1379 lx_ucp->uc_sigcontext.sc_trapno = LX_REG(ucp, REG_TRAPNO);
1380 1380
1381 1381 #else /* is _ILP32 */
1382 1382 /*
1383 1383 * General registers copy across as-is, except Linux expects that
1384 1384 * uc_mcontext.gregs[ESP] == uc_mcontext.gregs[UESP] on receipt of a
1385 1385 * signal.
1386 1386 */
1387 1387 bcopy(&ucp->uc_mcontext, &lx_ucp->uc_sigcontext, sizeof (gregset_t));
1388 1388 lx_ucp->uc_sigcontext.sc_esp = lx_ucp->uc_sigcontext.sc_esp_at_signal;
1389 1389 #endif
1390 1390
1391 1391 /*
1392 1392 * cr2 contains the faulting address, which Linux only sets for a
1393 1393 * a segmentation fault.
1394 1394 */
1395 1395 lx_ucp->uc_sigcontext.sc_cr2 = ((lx_sig == LX_SIGSEGV) && (sip)) ?
1396 1396 (uintptr_t)sip->si_addr : 0;
1397 1397
1398 1398 /*
1399 1399 * This should only return an error if the signum is invalid but that
1400 1400 * also gets converted into a LX_SIGKILL by this function.
1401 1401 */
1402 1402 if (sip != NULL)
1403 1403 (void) stol_siginfo(sip, &lx_ssp->si);
1404 1404 else
1405 1405 bzero(&lx_ssp->si, sizeof (lx_siginfo_t));
1406 1406
1407 1407 /* convert FP regs if present */
1408 1408 if (ucp->uc_flags & UC_FPU) {
1409 1409 /*
1410 1410 * Copy FP regs to the appropriate place in the the lx_sigstack
1411 1411 * structure.
1412 1412 */
1413 1413 stol_fpstate(&ucp->uc_mcontext.fpregs, &lx_ssp->fpstate);
1414 1414 lx_ucp->uc_sigcontext.sc_fpstate = &lx_ssp->fpstate;
1415 1415 } else {
1416 1416 lx_ucp->uc_sigcontext.sc_fpstate = NULL;
1417 1417 }
1418 1418
1419 1419 #if defined(_ILP32)
1420 1420 /*
1421 1421 * Believe it or not, gdb wants to SEE the sigreturn code on the
1422 1422 * top of the stack to determine whether the stack frame belongs to
1423 1423 * a signal handler, even though this code is not actually called.
1424 1424 *
1425 1425 * You can't make this stuff up.
1426 1426 */
1427 1427 bcopy((void *)lx_rt_sigreturn_tramp, lx_ssp->trampoline,
1428 1428 sizeof (lx_ssp->trampoline));
1429 1429 #endif
1430 1430 }
1431 1431
1432 1432 /*
1433 1433 * This is the interposition handler for Linux signals.
1434 1434 */
1435 1435 static void
1436 1436 lx_call_user_handler(int sig, siginfo_t *sip, void *p)
1437 1437 {
1438 1438 void (*user_handler)();
1439 1439 void (*stk_builder)();
1440 1440 struct lx_sigaction *lxsap;
1441 1441 ucontext_t *ucp = (ucontext_t *)p;
1442 1442 size_t stksize;
1443 1443 int lx_sig;
1444 1444
1445 1445 /*
1446 1446 * If Illumos signal has no Linux equivalent, effectively ignore it.
1447 1447 */
1448 1448 if ((lx_sig = stol_signo[sig]) == -1) {
1449 1449 lx_unsupported("caught Illumos signal %d, no Linux equivalent",
1450 1450 sig);
1451 1451 return;
1452 1452 }
1453 1453
1454 1454 lx_debug("interpose caught Illumos signal %d, translating to Linux "
1455 1455 "signal %d", sig, lx_sig);
1456 1456
1457 1457 lxsap = &lx_sighandlers.lx_sa[lx_sig];
1458 1458 lx_debug("lxsap @ 0x%p", lxsap);
1459 1459
1460 1460 if ((sig == SIGPWR) && (lxsap->lxsa_handler == SIG_DFL)) {
1461 1461 /*
1462 1462 * Linux SIG_DFL for SIGPWR is to terminate. The lx wait
1463 1463 * emulation will translate SIGPWR to LX_SIGPWR.
1464 1464 */
1465 1465 (void) syscall(SYS_brand, B_EXIT_AS_SIG, SIGPWR);
1466 1466 /* This should never return */
1467 1467 assert(0);
1468 1468 }
1469 1469
1470 1470 if (lxsap->lxsa_handler == SIG_DFL || lxsap->lxsa_handler == SIG_IGN)
1471 1471 lx_err_fatal("lxsa_handler set to %s? How?!?!?",
1472 1472 (lxsap->lxsa_handler == SIG_DFL) ? "SIG_DFL" : "SIG_IGN");
1473 1473
1474 1474 #if defined(_LP64)
1475 1475 stksize = sizeof (struct lx_sigstack);
1476 1476 stk_builder = lx_build_signal_frame;
1477 1477 #else
1478 1478 if (lxsap->lxsa_flags & LX_SA_SIGINFO) {
1479 1479 stksize = sizeof (struct lx_sigstack);
1480 1480 stk_builder = lx_build_signal_frame;
1481 1481 } else {
1482 1482 stksize = sizeof (struct lx_oldsigstack);
1483 1483 stk_builder = lx_build_old_signal_frame;
1484 1484 }
1485 1485 #endif
1486 1486
1487 1487 user_handler = lxsap->lxsa_handler;
1488 1488
1489 1489 lx_debug("delivering %d (lx %d) to handler at 0x%p", sig, lx_sig,
1490 1490 lxsap->lxsa_handler);
1491 1491
1492 1492 if (lxsap->lxsa_flags & LX_SA_RESETHAND)
1493 1493 lxsap->lxsa_handler = SIG_DFL;
1494 1494
1495 1495 lx_sigdeliver(lx_sig, sip, ucp, stksize, stk_builder, user_handler,
1496 1496 lxsap);
1497 1497
1498 1498 /*
1499 1499 * We need to handle restarting system calls if requested by the
1500 1500 * program for this signal type:
1501 1501 */
1502 1502 if (lxsap->lxsa_flags & LX_SA_RESTART) {
1503 1503 uintptr_t flags = (uintptr_t)ucp->uc_brand_data[0];
1504 1504 long ret = (long)LX_REG(ucp, REG_R0);
1505 1505 boolean_t interrupted = (ret == -lx_errno(EINTR, -1));
1506 1506
1507 1507 /*
1508 1508 * If the system call returned EINTR, and the system
1509 1509 * call handler set "br_syscall_restart" when returning,
1510 1510 * we modify the context to try the system call again
1511 1511 * when we return from this signal handler.
1512 1512 */
1513 1513 if ((flags & LX_UC_RESTART_SYSCALL) && interrupted) {
1514 1514 int syscall_num = (int)(uintptr_t)ucp->uc_brand_data[2];
1515 1515
1516 1516 lx_debug("restarting interrupted system call %d",
1517 1517 syscall_num);
1518 1518
1519 1519 /*
1520 1520 * Both the "int 0x80" and the "syscall" instruction
1521 1521 * are two bytes long. Wind the program counter back
1522 1522 * to the start of this instruction.
1523 1523 *
1524 1524 * The system call we interrupted is preserved in the
1525 1525 * brand-specific data in the ucontext_t when the
1526 1526 * LX_UC_RESTART_SYSCALL flag is set. This is
1527 1527 * analogous to the "orig_[er]ax" field in the Linux
1528 1528 * "user_regs_struct".
1529 1529 */
1530 1530 LX_REG(ucp, REG_PC) -= 2;
1531 1531 LX_REG(ucp, REG_R0) = syscall_num;
1532 1532 }
1533 1533 }
1534 1534 }
1535 1535
1536 1536 /*
1537 1537 * The "lx_sigdeliver()" function is responsible for constructing the emulated
1538 1538 * signal delivery frame on the brand stack for this LWP. A context is saved
1539 1539 * on the stack which will be used by the "sigreturn(2)" family of emulated
1540 1540 * system calls to get us back here after the Linux signal handler returns.
1541 1541 * This function is modelled on the in-kernel "sendsig()" signal delivery
1542 1542 * mechanism.
1543 1543 */
1544 1544 void
1545 1545 lx_sigdeliver(int lx_sig, siginfo_t *sip, ucontext_t *ucp, size_t stacksz,
1546 1546 void (*stack_builder)(), void (*user_handler)(),
1547 1547 struct lx_sigaction *lxsap)
1548 1548 {
1549 1549 lx_sigbackup_t sigbackup;
↓ open down ↓ |
1549 lines elided |
↑ open up ↑ |
1550 1550 ucontext_t uc;
1551 1551 lx_tsd_t *lxtsd = lx_get_tsd();
1552 1552 int totsz = 0;
1553 1553 uintptr_t flags;
1554 1554 uintptr_t hargs[3];
1555 1555 /*
1556 1556 * These variables must be "volatile", as they are modified after the
1557 1557 * getcontext() stores the register state:
1558 1558 */
1559 1559 volatile boolean_t signal_delivered = B_FALSE;
1560 - volatile uintptr_t lxfp;
1561 - volatile uintptr_t old_tsd_sp;
1562 - volatile int newstack;
1560 + volatile uintptr_t lxfp = 0;
1561 + volatile uintptr_t old_tsd_sp = 0;
1562 + volatile int newstack = 0;
1563 1563
1564 1564 /*
1565 1565 * This function involves modifying the Linux process stack for this
1566 1566 * thread. To do so without corruption requires us to exclude other
1567 1567 * signal handlers (or emulated system calls called from within those
1568 1568 * handlers) from running while we reserve space on that stack. We
1569 1569 * defer the execution of further instances of lx_call_user_handler()
1570 1570 * until we have completed this operation.
1571 1571 */
1572 1572 _sigoff();
1573 1573
1574 1574 /*
1575 1575 * Clear register arguments vector.
1576 1576 */
1577 1577 bzero(hargs, sizeof (hargs));
1578 1578
1579 1579 /*
1580 1580 * We save a context here so that we can be returned later to complete
1581 1581 * handling the signal.
1582 1582 */
1583 1583 lx_debug("lx_sigdeliver: STORING RETURN CONTEXT @ %p\n", &uc);
1584 1584 assert(getcontext(&uc) == 0);
1585 1585 lx_debug("lx_sigdeliver: RETURN CONTEXT %p LINK %p FLAGS %lx\n",
1586 1586 &uc, uc.uc_link, uc.uc_flags);
1587 1587 if (signal_delivered) {
1588 1588 /*
1589 1589 * If the "signal_delivered" flag is set, we are returned here
1590 1590 * via setcontext() as called by the emulated Linux signal
1591 1591 * return system call.
1592 1592 */
1593 1593 lx_debug("lx_sigdeliver: WE ARE BACK, VIA UC @ %p!\n", &uc);
1594 1594 goto after_signal_handler;
1595 1595 }
1596 1596 signal_delivered = B_TRUE;
1597 1597
1598 1598 /*
1599 1599 * Preserve the current tsd value of the Linux process stack pointer,
1600 1600 * even if it is zero. We will restore it when we are returned here
1601 1601 * via setcontext() after the Linux process has completed execution of
1602 1602 * its signal handler.
1603 1603 */
1604 1604 old_tsd_sp = lxtsd->lxtsd_lx_sp;
1605 1605
1606 1606 /*
1607 1607 * Figure out whether we will be handling this signal on an alternate
1608 1608 * stack specified by the user.
1609 1609 */
1610 1610 newstack = (lxsap->lxsa_flags & LX_SA_ONSTACK) &&
1611 1611 !(lxtsd->lxtsd_sigaltstack.ss_flags & (LX_SS_ONSTACK |
1612 1612 LX_SS_DISABLE));
1613 1613
1614 1614 /*
1615 1615 * Find the first unused region of the Linux process stack, where
1616 1616 * we will assemble our signal delivery frame.
1617 1617 */
1618 1618 flags = (uintptr_t)ucp->uc_brand_data[0];
1619 1619 if (newstack) {
1620 1620 /*
1621 1621 * We are moving to the user-provided alternate signal
1622 1622 * stack.
1623 1623 */
1624 1624 lxfp = SA((uintptr_t)lxtsd->lxtsd_sigaltstack.ss_sp) +
1625 1625 SA(lxtsd->lxtsd_sigaltstack.ss_size) - STACK_ALIGN;
1626 1626 lx_debug("lx_sigdeliver: moving to ALTSTACK sp %p\n", lxfp);
1627 1627 LX_SIGNAL_ALTSTACK_ENABLE(lxfp);
1628 1628 } else if (flags & LX_UC_STACK_BRAND) {
1629 1629 /*
1630 1630 * We interrupted the Linux process to take this signal. The
1631 1631 * stack pointer is the one saved in this context.
1632 1632 */
1633 1633 lxfp = LX_REG(ucp, REG_SP);
1634 1634 } else {
1635 1635 /*
1636 1636 * We interrupted a native (emulation) routine, so we must get
1637 1637 * the current stack pointer from either the tsd (if one is
1638 1638 * stored there) or via the context chain.
1639 1639 *
1640 1640 */
1641 1641 lxfp = lx_find_brand_sp();
1642 1642 if (lxtsd->lxtsd_lx_sp != 0) {
1643 1643 /*
1644 1644 * We must also make room for the possibility of nested
1645 1645 * signal delivery -- we may be pre-empting the
1646 1646 * in-progress handling of another signal.
1647 1647 *
1648 1648 * Note that if we were already on the alternate stack,
1649 1649 * any emulated Linux system calls would be betwixt
1650 1650 * that original signal frame and this new one on the
1651 1651 * one contiguous stack, so this logic holds either
1652 1652 * way:
1653 1653 */
1654 1654 lxfp = MIN(lxtsd->lxtsd_lx_sp, lxfp);
1655 1655 }
1656 1656 }
1657 1657
1658 1658 /*
1659 1659 * Account for a reserved stack region (for amd64, this is 128 bytes),
1660 1660 * and align the stack:
1661 1661 */
1662 1662 lxfp -= STACK_RESERVE;
1663 1663 lxfp &= ~(STACK_ALIGN - 1);
1664 1664
1665 1665 /*
1666 1666 * Allocate space on the Linux process stack for our delivery frame,
1667 1667 * including:
1668 1668 *
1669 1669 * ----------------------------------------------------- old %sp
1670 1670 * - lx_sigdeliver_frame_t
1671 1671 * - (ucontext_t pointers and stack magic)
1672 1672 * -----------------------------------------------------
1673 1673 * - (amd64-only 8-byte alignment gap)
1674 1674 * -----------------------------------------------------
1675 1675 * - frame of size "stacksz" from the stack builder
1676 1676 * ----------------------------------------------------- new %sp
1677 1677 */
1678 1678 #if defined(_LP64)
1679 1679 /*
1680 1680 * The AMD64 ABI requires us to align the stack such that when the
1681 1681 * called function pushes the base pointer, the stack is 16 byte
1682 1682 * aligned. The stack must, therefore, be 8- but _not_ 16-byte
1683 1683 * aligned.
1684 1684 */
1685 1685 #if (STACK_ALIGN != 16) || (STACK_ENTRY_ALIGN != 8)
1686 1686 #error "lx_sigdeliver() did not find expected stack alignment"
1687 1687 #endif
1688 1688 totsz = SA(sizeof (lx_sigdeliver_frame_t)) + SA(stacksz) + 8;
1689 1689 assert((totsz & (STACK_ENTRY_ALIGN - 1)) == 0);
1690 1690 assert((totsz & (STACK_ALIGN - 1)) == 8);
1691 1691 #else
1692 1692 totsz = SA(sizeof (lx_sigdeliver_frame_t)) + SA(stacksz);
1693 1693 assert((totsz & (STACK_ALIGN - 1)) == 0);
1694 1694 #endif
1695 1695
1696 1696 /*
1697 1697 * Copy our return frame into place:
1698 1698 */
1699 1699 lxfp -= SA(sizeof (lx_sigdeliver_frame_t));
1700 1700 lx_debug("lx_sigdeliver: lx_sigdeliver_frame_t @ %p\n", lxfp);
1701 1701 {
1702 1702 lx_sigdeliver_frame_t frm;
1703 1703
1704 1704 frm.lxsdf_magic = LX_SIGRT_MAGIC;
1705 1705 frm.lxsdf_retucp = &uc;
1706 1706 frm.lxsdf_sigucp = ucp;
1707 1707 frm.lxsdf_sigbackup = &sigbackup;
1708 1708
1709 1709 lx_debug("lx_sigdeliver: retucp %p sigucp %p\n",
1710 1710 frm.lxsdf_retucp, frm.lxsdf_sigucp);
1711 1711
1712 1712 if (uucopy(&frm, (void *)lxfp, sizeof (frm)) != 0) {
1713 1713 /*
1714 1714 * We could not modify the stack of the emulated Linux
1715 1715 * program. Act like the kernel and terminate the
1716 1716 * program with a segmentation violation.
1717 1717 */
1718 1718 (void) syscall(SYS_brand, B_EXIT_AS_SIG, SIGSEGV);
1719 1719 }
1720 1720
1721 1721 LX_SIGNAL_DELIVERY_FRAME_CREATE((void *)lxfp);
1722 1722
1723 1723 /*
1724 1724 * Populate a backup copy of signal linkage to use in case
1725 1725 * the Linux program completely destroys (or relocates) the
1726 1726 * delivery frame.
1727 1727 *
1728 1728 * This is necessary for programs that have flown so far off
1729 1729 * the architectural rails that they believe it is
1730 1730 * acceptable to make assumptions about the precise size and
1731 1731 * layout of the signal handling frame assembled by the
1732 1732 * kernel.
1733 1733 */
1734 1734 sigbackup.lxsb_retucp = frm.lxsdf_retucp;
1735 1735 sigbackup.lxsb_sigucp = frm.lxsdf_sigucp;
1736 1736 sigbackup.lxsb_sigdeliver_frame = lxfp;
1737 1737 sigbackup.lxsb_previous = lxtsd->lxtsd_sigbackup;
1738 1738 lxtsd->lxtsd_sigbackup = &sigbackup;
1739 1739
1740 1740 lx_debug("lx_sigdeliver: installed sigbackup %p; prev %p\n",
1741 1741 &sigbackup, sigbackup.lxsb_previous);
1742 1742 }
1743 1743
1744 1744 /*
1745 1745 * Build the Linux signal handling frame:
1746 1746 */
1747 1747 #if defined(_LP64)
1748 1748 lxfp -= SA(stacksz) + 8;
1749 1749 #else
1750 1750 lxfp -= SA(stacksz);
1751 1751 #endif
1752 1752 lx_debug("lx_sigdeliver: Linux sig frame @ %p\n", lxfp);
1753 1753 stack_builder(lx_sig, sip, ucp, lxfp, hargs);
1754 1754
1755 1755 /*
1756 1756 * Record our reservation so that any nested signal handlers
1757 1757 * can see it.
1758 1758 */
1759 1759 lx_debug("lx_sigdeliver: Linux tsd sp %p -> %p\n", lxtsd->lxtsd_lx_sp,
1760 1760 lxfp);
1761 1761 lxtsd->lxtsd_lx_sp = lxfp;
1762 1762
1763 1763 if (newstack) {
1764 1764 lxtsd->lxtsd_sigaltstack.ss_flags |= LX_SS_ONSTACK;
1765 1765 }
1766 1766
1767 1767 LX_SIGDELIVER(lx_sig, lxsap, (void *)lxfp);
1768 1768
1769 1769 /*
1770 1770 * Re-enable signal delivery. If a signal was queued while we were
1771 1771 * in the critical section, it will be delivered immediately.
1772 1772 */
1773 1773 _sigon();
1774 1774
1775 1775 /*
1776 1776 * Pass control to the Linux signal handler:
1777 1777 */
1778 1778 lx_debug("lx_sigdeliver: JUMPING TO LINUX (sig %d sp %p eip %p)\n",
1779 1779 lx_sig, lxfp, user_handler);
1780 1780 {
1781 1781 ucontext_t jump_uc;
1782 1782
1783 1783 bcopy(lx_find_brand_uc(), &jump_uc, sizeof (jump_uc));
1784 1784
1785 1785 /*
1786 1786 * We want to load the general registers from this context, and
1787 1787 * switch to the BRAND stack. We do _not_ want to restore the
1788 1788 * uc_link value from this synthetic context, as that would
1789 1789 * break the signal handling context chain.
1790 1790 */
1791 1791 jump_uc.uc_flags = UC_CPU;
1792 1792 jump_uc.uc_brand_data[0] = (void *)(LX_UC_STACK_BRAND |
1793 1793 LX_UC_IGNORE_LINK);
1794 1794
1795 1795 LX_REG(&jump_uc, REG_FP) = 0;
1796 1796 LX_REG(&jump_uc, REG_SP) = lxfp;
1797 1797 LX_REG(&jump_uc, REG_PC) = (uintptr_t)user_handler;
↓ open down ↓ |
225 lines elided |
↑ open up ↑ |
1798 1798
1799 1799 #if defined(_LP64)
1800 1800 /*
1801 1801 * Pass signal handler arguments by registers on AMD64.
1802 1802 */
1803 1803 LX_REG(&jump_uc, REG_RDI) = hargs[0];
1804 1804 LX_REG(&jump_uc, REG_RSI) = hargs[1];
1805 1805 LX_REG(&jump_uc, REG_RDX) = hargs[2];
1806 1806 #endif
1807 1807
1808 - if (syscall(SYS_brand, B_JUMP_TO_LINUX, &jump_uc) == -1) {
1809 - lx_err_fatal("B_JUMP_TO_LINUX failed: %s",
1810 - strerror(errno));
1811 - }
1808 + lx_jump_to_linux(&jump_uc);
1812 1809 }
1813 1810
1814 1811 assert(0);
1812 + abort();
1815 1813
1816 1814 after_signal_handler:
1817 1815 /*
1818 1816 * Ensure all nested signal handlers have completed correctly
1819 1817 * and then remove our stack reservation.
1820 1818 */
1821 1819 _sigoff();
1822 1820 LX_SIGNAL_POST_HANDLER(lxfp, old_tsd_sp);
1823 1821 assert(lxtsd->lxtsd_lx_sp == lxfp);
1824 1822 lx_debug("lx_sigdeliver: after; Linux tsd sp %p -> %p\n", lxfp,
1825 1823 old_tsd_sp);
1826 1824 lxtsd->lxtsd_lx_sp = old_tsd_sp;
1827 1825 if (newstack) {
1828 1826 LX_SIGNAL_ALTSTACK_DISABLE();
1829 1827 lx_debug("lx_sigdeliver: disabling ALTSTACK sp %p\n", lxfp);
1830 1828 lxtsd->lxtsd_sigaltstack.ss_flags &= ~LX_SS_ONSTACK;
1831 1829 }
1832 1830 /*
1833 1831 * Restore backup signal tracking chain pointer to previous value:
1834 1832 */
1835 1833 if (lxtsd->lxtsd_sigbackup != NULL) {
1836 1834 lx_sigbackup_t *bprev = lxtsd->lxtsd_sigbackup->lxsb_previous;
1837 1835
1838 1836 lx_debug("lx_sigdeliver: restoring sigbackup %p to %p\n",
1839 1837 lxtsd->lxtsd_sigbackup, bprev);
1840 1838
1841 1839 lxtsd->lxtsd_sigbackup = bprev;
1842 1840 }
1843 1841 _sigon();
1844 1842
1845 1843 /*
1846 1844 * Here we return to libc so that it may clean up and restore the
1847 1845 * context originally interrupted by this signal.
1848 1846 */
1849 1847 }
1850 1848
1851 1849 /*
1852 1850 * Common routine to modify sigaction characteristics of a thread.
1853 1851 *
1854 1852 * We shouldn't need any special locking code here as we actually use our copy
1855 1853 * of libc's sigaction() to do all the real work, so its thread locking should
1856 1854 * take care of any issues for us.
1857 1855 */
1858 1856 static int
1859 1857 lx_sigaction_common(int lx_sig, struct lx_sigaction *lxsp,
1860 1858 struct lx_sigaction *olxsp)
1861 1859 {
1862 1860 struct lx_sigaction *lxsap;
1863 1861 struct sigaction sa;
1864 1862
1865 1863 if (lx_sig <= 0 || lx_sig > LX_NSIG)
1866 1864 return (-EINVAL);
1867 1865
1868 1866 lxsap = &lx_sighandlers.lx_sa[lx_sig];
1869 1867 lx_debug("&lx_sighandlers.lx_sa[%d] = 0x%p", lx_sig, lxsap);
1870 1868
1871 1869 if ((olxsp != NULL) &&
1872 1870 ((uucopy(lxsap, olxsp, sizeof (struct lx_sigaction))) != 0))
1873 1871 return (-errno);
1874 1872
1875 1873 if (lxsp != NULL) {
1876 1874 int err, sig;
1877 1875 struct lx_sigaction lxsa;
1878 1876 sigset_t new_set, oset;
1879 1877
1880 1878 if (uucopy(lxsp, &lxsa, sizeof (struct lx_sigaction)) != 0)
1881 1879 return (-errno);
1882 1880
1883 1881 if ((sig = ltos_signo[lx_sig]) != -1) {
1884 1882 if (lx_no_abort_handler != 0) {
1885 1883 /*
1886 1884 * If LX_NO_ABORT_HANDLER has been set, we will
1887 1885 * not allow the emulated program to do
1888 1886 * anything hamfisted with SIGSEGV or SIGABRT
1889 1887 * signals.
1890 1888 */
1891 1889 if (sig == SIGSEGV || sig == SIGABRT) {
1892 1890 return (0);
1893 1891 }
1894 1892 }
1895 1893
1896 1894 /*
1897 1895 * Block this signal while messing with its dispostion
1898 1896 */
1899 1897 (void) sigemptyset(&new_set);
1900 1898 (void) sigaddset(&new_set, sig);
1901 1899
1902 1900 if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0) {
1903 1901 err = errno;
1904 1902 lx_debug("unable to block signal %d: %s", sig,
1905 1903 strerror(err));
1906 1904 return (-err);
1907 1905 }
1908 1906
1909 1907 /*
1910 1908 * We don't really need the old signal disposition at
1911 1909 * this point, but this weeds out signals that would
1912 1910 * cause sigaction() to return an error before we change
1913 1911 * anything other than the current signal mask.
1914 1912 */
1915 1913 if (sigaction(sig, NULL, &sa) < 0) {
1916 1914 err = errno;
1917 1915 lx_debug("sigaction() to get old "
1918 1916 "disposition for signal %d failed: "
1919 1917 "%s", sig, strerror(err));
1920 1918 (void) sigprocmask(SIG_SETMASK, &oset, NULL);
1921 1919 return (-err);
1922 1920 }
1923 1921
1924 1922 if ((lxsa.lxsa_handler != SIG_DFL) &&
1925 1923 (lxsa.lxsa_handler != SIG_IGN)) {
1926 1924 sa.sa_handler = lx_call_user_handler;
1927 1925
1928 1926 /*
1929 1927 * The interposition signal handler needs the
1930 1928 * information provided via the SA_SIGINFO flag.
1931 1929 */
1932 1930 sa.sa_flags = SA_SIGINFO;
1933 1931
1934 1932 /*
1935 1933 * When translating from Linux to illumos
1936 1934 * sigaction(2) flags, we explicitly do not
1937 1935 * pass SA_ONSTACK to the kernel. The
1938 1936 * alternate stack for Linux signal handling is
1939 1937 * handled entirely by the emulation code.
1940 1938 */
1941 1939 if (lxsa.lxsa_flags & LX_SA_NOCLDSTOP)
1942 1940 sa.sa_flags |= SA_NOCLDSTOP;
1943 1941 if (lxsa.lxsa_flags & LX_SA_NOCLDWAIT)
1944 1942 sa.sa_flags |= SA_NOCLDWAIT;
1945 1943 if (lxsa.lxsa_flags & LX_SA_RESTART)
1946 1944 sa.sa_flags |= SA_RESTART;
1947 1945 if (lxsa.lxsa_flags & LX_SA_NODEFER)
1948 1946 sa.sa_flags |= SA_NODEFER;
1949 1947
1950 1948 /*
1951 1949 * RESETHAND cannot be used be passed through
1952 1950 * for SIGPWR due to different default actions
1953 1951 * between Linux and Illumos.
1954 1952 */
1955 1953 if ((sig != SIGPWR) &&
1956 1954 (lxsa.lxsa_flags & LX_SA_RESETHAND))
1957 1955 sa.sa_flags |= SA_RESETHAND;
1958 1956
1959 1957 if (ltos_sigset(&lxsa.lxsa_mask,
1960 1958 &sa.sa_mask) != 0) {
1961 1959 err = errno;
1962 1960 (void) sigprocmask(SIG_SETMASK, &oset,
1963 1961 NULL);
1964 1962 return (-err);
1965 1963 }
1966 1964
1967 1965 lx_debug("interposing handler @ 0x%p for "
1968 1966 "signal %d (lx %d), flags 0x%x",
1969 1967 lxsa.lxsa_handler, sig, lx_sig,
1970 1968 lxsa.lxsa_flags);
1971 1969
1972 1970 if (sigaction(sig, &sa, NULL) < 0) {
1973 1971 err = errno;
1974 1972 lx_debug("sigaction() to set new "
1975 1973 "disposition for signal %d failed: "
1976 1974 "%s", sig, strerror(err));
1977 1975 (void) sigprocmask(SIG_SETMASK, &oset,
1978 1976 NULL);
1979 1977 return (-err);
1980 1978 }
1981 1979 } else if ((sig != SIGPWR) ||
1982 1980 ((sig == SIGPWR) &&
1983 1981 (lxsa.lxsa_handler == SIG_IGN))) {
1984 1982 /*
1985 1983 * There's no need to interpose for SIG_DFL or
1986 1984 * SIG_IGN so just call our copy of libc's
1987 1985 * sigaction(), but don't allow SIG_DFL for
1988 1986 * SIGPWR due to differing default actions
1989 1987 * between Linux and Illumos.
1990 1988 *
1991 1989 * Get the previous disposition first so things
1992 1990 * like sa_mask and sa_flags are preserved over
1993 1991 * a transition to SIG_DFL or SIG_IGN, which is
1994 1992 * what Linux expects.
1995 1993 */
1996 1994
1997 1995 sa.sa_handler = lxsa.lxsa_handler;
1998 1996
1999 1997 if (sigaction(sig, &sa, NULL) < 0) {
2000 1998 err = errno;
2001 1999 lx_debug("sigaction(%d, %s) failed: %s",
2002 2000 sig, ((sa.sa_handler == SIG_DFL) ?
2003 2001 "SIG_DFL" : "SIG_IGN"),
2004 2002 strerror(err));
2005 2003 (void) sigprocmask(SIG_SETMASK, &oset,
2006 2004 NULL);
2007 2005 return (-err);
2008 2006 }
2009 2007 }
2010 2008 } else {
2011 2009 lx_debug("Linux signal with no kill support "
2012 2010 "specified: %d", lx_sig);
2013 2011 }
2014 2012
2015 2013 /*
2016 2014 * Save the new disposition for the signal in the global
2017 2015 * lx_sighandlers structure.
2018 2016 */
2019 2017 bcopy(&lxsa, lxsap, sizeof (struct lx_sigaction));
2020 2018
2021 2019 /*
2022 2020 * Reset the signal mask to what we came in with if
2023 2021 * we were modifying a kill-supported signal.
2024 2022 */
2025 2023 if (sig != -1)
2026 2024 (void) sigprocmask(SIG_SETMASK, &oset, NULL);
2027 2025 }
2028 2026
2029 2027 return (0);
2030 2028 }
2031 2029
2032 2030 #if defined(_ILP32)
2033 2031 /*
2034 2032 * sigaction is only used in 32-bit code.
2035 2033 */
2036 2034 long
2037 2035 lx_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp)
2038 2036 {
2039 2037 int val;
2040 2038 struct lx_sigaction sa, osa;
2041 2039 struct lx_sigaction *sap, *osap;
2042 2040 struct lx_osigaction *osp;
2043 2041
2044 2042 sap = (actp ? &sa : NULL);
2045 2043 osap = (oactp ? &osa : NULL);
2046 2044
2047 2045 /*
2048 2046 * If we have a source pointer, convert source lxsa_mask from
2049 2047 * lx_osigset_t to lx_sigset_t format.
2050 2048 */
2051 2049 if (sap) {
2052 2050 osp = (struct lx_osigaction *)actp;
2053 2051 sap->lxsa_handler = osp->lxsa_handler;
2054 2052
2055 2053 bzero(&sap->lxsa_mask, sizeof (lx_sigset_t));
2056 2054
2057 2055 for (val = 1; val <= OSIGSET_NBITS; val++)
2058 2056 if (osp->lxsa_mask & OSIGSET_BITSET(val))
2059 2057 (void) lx_sigaddset(&sap->lxsa_mask, val);
2060 2058
2061 2059 sap->lxsa_flags = osp->lxsa_flags;
2062 2060 sap->lxsa_restorer = osp->lxsa_restorer;
2063 2061 }
2064 2062
2065 2063 if ((val = lx_sigaction_common(lx_sig, sap, osap)))
2066 2064 return (val);
2067 2065
2068 2066 /*
2069 2067 * If we have a save pointer, convert the old lxsa_mask from
2070 2068 * lx_sigset_t to lx_osigset_t format.
2071 2069 */
2072 2070 if (osap) {
2073 2071 osp = (struct lx_osigaction *)oactp;
2074 2072
2075 2073 osp->lxsa_handler = osap->lxsa_handler;
2076 2074
2077 2075 bzero(&osp->lxsa_mask, sizeof (osp->lxsa_mask));
2078 2076 for (val = 1; val <= OSIGSET_NBITS; val++)
2079 2077 if (lx_sigismember(&osap->lxsa_mask, val))
2080 2078 osp->lxsa_mask |= OSIGSET_BITSET(val);
2081 2079
2082 2080 osp->lxsa_flags = osap->lxsa_flags;
2083 2081 osp->lxsa_restorer = osap->lxsa_restorer;
2084 2082 }
2085 2083
2086 2084 return (0);
2087 2085 }
2088 2086 #endif
2089 2087
2090 2088 long
2091 2089 lx_rt_sigaction(uintptr_t lx_sig, uintptr_t actp, uintptr_t oactp,
2092 2090 uintptr_t setsize)
2093 2091 {
2094 2092 /*
2095 2093 * The "new" rt_sigaction call checks the setsize
2096 2094 * parameter.
2097 2095 */
2098 2096 if ((size_t)setsize != sizeof (lx_sigset_t))
2099 2097 return (-EINVAL);
2100 2098
2101 2099 return (lx_sigaction_common(lx_sig, (struct lx_sigaction *)actp,
2102 2100 (struct lx_sigaction *)oactp));
2103 2101 }
2104 2102
2105 2103 #if defined(_ILP32)
2106 2104 /*
2107 2105 * Convert signal syscall to a call to the lx_sigaction() syscall
2108 2106 * Only used in 32-bit code.
2109 2107 */
2110 2108 long
2111 2109 lx_signal(uintptr_t lx_sig, uintptr_t handler)
2112 2110 {
2113 2111 struct sigaction act;
2114 2112 struct sigaction oact;
2115 2113 int rc;
2116 2114
2117 2115 /*
2118 2116 * Use sigaction to mimic SYSV signal() behavior; glibc will
2119 2117 * actually call sigaction(2) itself, so we're really reaching
2120 2118 * back for signal(2) semantics here.
2121 2119 */
2122 2120 bzero(&act, sizeof (act));
2123 2121 act.sa_handler = (void (*)())handler;
2124 2122 act.sa_flags = SA_RESETHAND | SA_NODEFER;
2125 2123
2126 2124 rc = lx_sigaction(lx_sig, (uintptr_t)&act, (uintptr_t)&oact);
2127 2125 return ((rc == 0) ? ((ssize_t)oact.sa_handler) : rc);
2128 2126 }
2129 2127 #endif
2130 2128
2131 2129 void
2132 2130 lx_sighandlers_save(lx_sighandlers_t *saved)
2133 2131 {
2134 2132 bcopy(&lx_sighandlers, saved, sizeof (lx_sighandlers_t));
2135 2133 }
2136 2134
2137 2135 void
2138 2136 lx_sighandlers_restore(lx_sighandlers_t *saved)
2139 2137 {
2140 2138 bcopy(saved, &lx_sighandlers, sizeof (lx_sighandlers_t));
2141 2139 }
2142 2140
2143 2141 int
2144 2142 lx_siginit(void)
2145 2143 {
2146 2144 extern void set_setcontext_enforcement(int);
2147 2145 extern void set_escaped_context_cleanup(int);
2148 2146
2149 2147 struct sigaction sa;
2150 2148 sigset_t new_set, oset;
2151 2149 int lx_sig, sig;
2152 2150
2153 2151 if (getenv("LX_NO_ABORT_HANDLER") != NULL) {
2154 2152 lx_no_abort_handler = 1;
2155 2153 }
2156 2154
2157 2155 /*
2158 2156 * Block all signals possible while setting up the signal imposition
2159 2157 * mechanism.
2160 2158 */
2161 2159 (void) sigfillset(&new_set);
2162 2160
2163 2161 if (sigprocmask(SIG_BLOCK, &new_set, &oset) < 0)
2164 2162 lx_err_fatal("unable to block signals while setting up "
2165 2163 "imposition mechanism: %s", strerror(errno));
2166 2164
2167 2165 /*
2168 2166 * Ignore any signals that have no Linux analog so that those
2169 2167 * signals cannot be sent to Linux processes from the global zone
2170 2168 */
2171 2169 for (sig = 1; sig < NSIG; sig++)
2172 2170 if (stol_signo[sig] < 0)
2173 2171 (void) sigignore(sig);
2174 2172
2175 2173 /*
2176 2174 * Mark any signals that are ignored as ignored in our interposition
2177 2175 * handler array
2178 2176 */
2179 2177 for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) {
2180 2178 if (((sig = ltos_signo[lx_sig]) != -1) &&
2181 2179 (sigaction(sig, NULL, &sa) < 0))
2182 2180 lx_err_fatal("unable to determine previous disposition "
2183 2181 "for signal %d: %s", sig, strerror(errno));
2184 2182
2185 2183 if (sa.sa_handler == SIG_IGN) {
2186 2184 lx_debug("marking signal %d (lx %d) as SIG_IGN",
2187 2185 sig, lx_sig);
2188 2186 lx_sighandlers.lx_sa[lx_sig].lxsa_handler = SIG_IGN;
2189 2187 }
2190 2188 }
2191 2189
2192 2190 /*
2193 2191 * Have our interposition handler handle SIGPWR to start with,
2194 2192 * as it has a default action of terminating the process in Linux
2195 2193 * but its default is to be ignored in Illumos.
2196 2194 */
2197 2195 (void) sigemptyset(&sa.sa_mask);
2198 2196 sa.sa_sigaction = lx_call_user_handler;
2199 2197 sa.sa_flags = SA_SIGINFO;
2200 2198
2201 2199 if (sigaction(SIGPWR, &sa, NULL) < 0)
2202 2200 lx_err_fatal("sigaction(SIGPWR) failed: %s", strerror(errno));
2203 2201
2204 2202 /*
2205 2203 * Illumos' libc forces certain register values in the ucontext_t
2206 2204 * used to restore a post-signal user context to be those Illumos
2207 2205 * expects; however that is not what we want to happen if the signal
2208 2206 * was taken while branded code was executing, so we must disable
2209 2207 * that behavior.
2210 2208 */
2211 2209 set_setcontext_enforcement(0);
2212 2210
2213 2211 /*
2214 2212 * The illumos libc attempts to clean up dangling uc_link pointers in
2215 2213 * signal handling contexts when libc believes us to have escaped a
2216 2214 * signal handler incorrectly in the past. We want to disable this
2217 2215 * behaviour, so that the system call emulation context saved by the
2218 2216 * kernel brand module for lx_emulate() may be part of the context
2219 2217 * chain without itself being used for signal handling.
2220 2218 */
2221 2219 set_escaped_context_cleanup(0);
2222 2220
2223 2221 /*
2224 2222 * Reset the signal mask to what we came in with.
2225 2223 */
2226 2224 (void) sigprocmask(SIG_SETMASK, &oset, NULL);
2227 2225
2228 2226 lx_debug("interposition handler setup for SIGPWR");
2229 2227 return (0);
2230 2228 }
2231 2229
2232 2230 /*
2233 2231 * This code strongly resembles lx_poll(), but is here to be able to take
2234 2232 * advantage of the Linux signal helper routines.
2235 2233 */
2236 2234 long
2237 2235 lx_ppoll(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, uintptr_t p5)
2238 2236 {
2239 2237 struct pollfd *lfds, *sfds;
2240 2238 nfds_t nfds = (nfds_t)p2;
2241 2239 timespec_t ts, *tsp = NULL;
2242 2240 int fds_size, i, rval, revents;
2243 2241 lx_sigset_t lxsig, *lxsigp = NULL;
2244 2242 sigset_t sigset, *sp = NULL;
2245 2243 rctlblk_t *rblk;
2246 2244
2247 2245 lx_debug("\tppoll(0x%p, %d, 0x%p, 0x%p, %d)", p1, p2, p3, p4, p5);
2248 2246
2249 2247 if (p3 != NULL) {
2250 2248 if (uucopy((void *)p3, &ts, sizeof (ts)) != 0)
2251 2249 return (-errno);
2252 2250
2253 2251 tsp = &ts;
2254 2252 }
2255 2253
2256 2254 if (p4 != NULL) {
2257 2255 if (uucopy((void *)p4, &lxsig, sizeof (lxsig)) != 0)
2258 2256 return (-errno);
2259 2257
2260 2258 lxsigp = &lxsig;
2261 2259 if ((size_t)p5 != sizeof (lx_sigset_t))
2262 2260 return (-EINVAL);
2263 2261
2264 2262 if (lxsigp) {
2265 2263 if ((rval = ltos_sigset(lxsigp, &sigset)) != 0)
2266 2264 return (rval);
2267 2265
2268 2266 sp = &sigset;
2269 2267 }
2270 2268 }
2271 2269
2272 2270 /*
2273 2271 * Deal with the NULL fds[] case.
2274 2272 */
2275 2273 if (nfds == 0 || p1 == NULL) {
2276 2274 if ((rval = ppoll(NULL, 0, tsp, sp)) < 0)
2277 2275 return (-errno);
2278 2276
2279 2277 return (rval);
2280 2278 }
2281 2279
2282 2280 if (maxfd == 0) {
2283 2281 if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rctlblk_size())) == NULL)
2284 2282 return (-ENOMEM);
2285 2283
2286 2284 if (getrctl("process.max-file-descriptor", NULL, rblk,
2287 2285 RCTL_FIRST) == -1)
2288 2286 return (-EINVAL);
2289 2287
2290 2288 maxfd = rctlblk_get_value(rblk);
2291 2289 }
2292 2290
2293 2291 if (nfds > maxfd)
2294 2292 return (-EINVAL);
2295 2293
2296 2294 /*
2297 2295 * Note: we are assuming that the Linux and Illumos pollfd
2298 2296 * structures are identical. Copy in the Linux poll structure.
2299 2297 */
2300 2298 fds_size = sizeof (struct pollfd) * nfds;
2301 2299 lfds = (struct pollfd *)SAFE_ALLOCA(fds_size);
2302 2300 if (lfds == NULL)
2303 2301 return (-ENOMEM);
2304 2302 if (uucopy((void *)p1, lfds, fds_size) != 0)
2305 2303 return (-errno);
2306 2304
2307 2305 /*
2308 2306 * The poll system call modifies the poll structures passed in
2309 2307 * so we'll need to make an extra copy of them.
2310 2308 */
2311 2309 sfds = (struct pollfd *)SAFE_ALLOCA(fds_size);
2312 2310 if (sfds == NULL)
2313 2311 return (-ENOMEM);
2314 2312
2315 2313 /* Convert the Linux events bitmask into the Illumos equivalent. */
2316 2314 for (i = 0; i < nfds; i++) {
2317 2315 /*
2318 2316 * If the caller is polling for an unsupported event, we
2319 2317 * have to bail out.
2320 2318 */
2321 2319 if (lfds[i].events & ~LX_POLL_SUPPORTED_EVENTS) {
2322 2320 lx_unsupported("unsupported poll events requested: "
2323 2321 "events=0x%x", lfds[i].events);
2324 2322 return (-ENOTSUP);
2325 2323 }
2326 2324
2327 2325 sfds[i].fd = lfds[i].fd;
2328 2326 sfds[i].events = lfds[i].events & LX_POLL_COMMON_EVENTS;
2329 2327 if (lfds[i].events & LX_POLLWRNORM)
2330 2328 sfds[i].events |= POLLWRNORM;
2331 2329 if (lfds[i].events & LX_POLLWRBAND)
2332 2330 sfds[i].events |= POLLWRBAND;
2333 2331 if (lfds[i].events & LX_POLLRDHUP)
2334 2332 sfds[i].events |= POLLRDHUP;
2335 2333 sfds[i].revents = 0;
2336 2334 }
2337 2335
2338 2336 if ((rval = ppoll(sfds, nfds, tsp, sp)) < 0)
2339 2337 return (-errno);
2340 2338
2341 2339 /* Convert the Illumos revents bitmask into the Linux equivalent */
2342 2340 for (i = 0; i < nfds; i++) {
2343 2341 revents = sfds[i].revents & LX_POLL_COMMON_EVENTS;
2344 2342 if (sfds[i].revents & POLLWRBAND)
2345 2343 revents |= LX_POLLWRBAND;
2346 2344 if (sfds[i].revents & POLLRDHUP)
2347 2345 revents |= LX_POLLRDHUP;
2348 2346
2349 2347 /*
2350 2348 * Be careful because on Illumos POLLOUT and POLLWRNORM
2351 2349 * are defined to the same values but on Linux they
2352 2350 * are not.
2353 2351 */
2354 2352 if (sfds[i].revents & POLLOUT) {
2355 2353 if ((lfds[i].events & LX_POLLOUT) == 0)
2356 2354 revents &= ~LX_POLLOUT;
2357 2355 if (lfds[i].events & LX_POLLWRNORM)
2358 2356 revents |= LX_POLLWRNORM;
2359 2357 }
2360 2358
2361 2359 lfds[i].revents = revents;
2362 2360 }
2363 2361
2364 2362 /* Copy out the results */
2365 2363 if (uucopy(lfds, (void *)p1, fds_size) != 0)
2366 2364 return (-errno);
2367 2365
2368 2366 return (rval);
2369 2367 }
2370 2368
2371 2369 /*
2372 2370 * This code stongly resemebles lx_select(), but is here to be able to take
2373 2371 * advantage of the Linux signal helper routines.
2374 2372 */
2375 2373 long
2376 2374 lx_pselect6(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
2377 2375 uintptr_t p5, uintptr_t p6)
2378 2376 {
2379 2377 int nfds = (int)p1;
2380 2378 fd_set *rfdsp = NULL;
2381 2379 fd_set *wfdsp = NULL;
2382 2380 fd_set *efdsp = NULL;
2383 2381 timespec_t ts, *tsp = NULL;
2384 2382 int fd_set_len = howmany(nfds, 8);
2385 2383 int r;
2386 2384 sigset_t sigset, *sp = NULL;
2387 2385
2388 2386 lx_debug("\tpselect6(%d, 0x%p, 0x%p, 0x%p, 0x%p, 0x%p)",
2389 2387 p1, p2, p3, p4, p4, p6);
2390 2388
2391 2389 if (nfds > 0) {
2392 2390 if (p2 != NULL) {
2393 2391 rfdsp = SAFE_ALLOCA(fd_set_len);
2394 2392 if (rfdsp == NULL)
2395 2393 return (-ENOMEM);
2396 2394 if (uucopy((void *)p2, rfdsp, fd_set_len) != 0)
2397 2395 return (-errno);
2398 2396 }
2399 2397 if (p3 != NULL) {
2400 2398 wfdsp = SAFE_ALLOCA(fd_set_len);
2401 2399 if (wfdsp == NULL)
2402 2400 return (-ENOMEM);
2403 2401 if (uucopy((void *)p3, wfdsp, fd_set_len) != 0)
2404 2402 return (-errno);
2405 2403 }
2406 2404 if (p4 != NULL) {
2407 2405 efdsp = SAFE_ALLOCA(fd_set_len);
2408 2406 if (efdsp == NULL)
2409 2407 return (-ENOMEM);
2410 2408 if (uucopy((void *)p4, efdsp, fd_set_len) != 0)
2411 2409 return (-errno);
2412 2410 }
2413 2411 }
2414 2412
2415 2413 if (p5 != NULL) {
2416 2414 if (uucopy((void *)p5, &ts, sizeof (ts)) != 0)
2417 2415 return (-errno);
2418 2416
2419 2417 tsp = &ts;
2420 2418 }
2421 2419
2422 2420 if (p6 != NULL) {
2423 2421 /*
2424 2422 * To force the number of arguments to be no more than six,
2425 2423 * Linux bundles both the sigset and the size into a structure
2426 2424 * that becomes the sixth argument.
2427 2425 */
2428 2426 struct {
2429 2427 lx_sigset_t *addr;
2430 2428 size_t size;
2431 2429 } lx_sigset;
2432 2430
2433 2431 if (uucopy((void *)p6, &lx_sigset, sizeof (lx_sigset)) != 0)
2434 2432 return (-errno);
2435 2433
2436 2434 /*
2437 2435 * Yes, that's right: Linux forces a size to be passed only
2438 2436 * so it can check that it's the size of a sigset_t.
2439 2437 */
2440 2438 if (lx_sigset.size != sizeof (lx_sigset_t))
2441 2439 return (-EINVAL);
2442 2440
2443 2441 /*
2444 2442 * This is where we check if the sigset is *really* NULL.
2445 2443 */
2446 2444 if (lx_sigset.addr) {
2447 2445 if ((r = ltos_sigset(lx_sigset.addr, &sigset)) != 0)
2448 2446 return (r);
2449 2447
2450 2448 sp = &sigset;
2451 2449 }
2452 2450 }
2453 2451
2454 2452 #if defined(_LP64)
2455 2453 r = pselect(nfds, rfdsp, wfdsp, efdsp, tsp, sp);
2456 2454 #else
2457 2455 if (nfds >= FD_SETSIZE)
2458 2456 r = pselect_large_fdset(nfds, rfdsp, wfdsp, efdsp, tsp, sp);
2459 2457 else
2460 2458 r = pselect(nfds, rfdsp, wfdsp, efdsp, tsp, sp);
2461 2459 #endif
2462 2460
2463 2461 if (r < 0)
2464 2462 return (-errno);
2465 2463
2466 2464 /*
2467 2465 * For pselect6(), we don't honor the strange Linux select() semantics
2468 2466 * with respect to the timestruc parameter because glibc ignores it
2469 2467 * anyway -- just copy out the fd pointers and return.
2470 2468 */
2471 2469 if ((rfdsp != NULL) && (uucopy(rfdsp, (void *)p2, fd_set_len) != 0))
2472 2470 return (-errno);
2473 2471 if ((wfdsp != NULL) && (uucopy(wfdsp, (void *)p3, fd_set_len) != 0))
2474 2472 return (-errno);
2475 2473 if ((efdsp != NULL) && (uucopy(efdsp, (void *)p4, fd_set_len) != 0))
2476 2474 return (-errno);
2477 2475
2478 2476 return (r);
2479 2477 }
2480 2478
2481 2479 /*
2482 2480 * The first argument is the pid (Linux tgid) to send the signal to, second
2483 2481 * argument is the signal to send (an lx signal), and third is the siginfo_t
2484 2482 * with extra information. We translate the code and signal only from the
2485 2483 * siginfo_t, and leave everything else the same as it gets passed through the
2486 2484 * signalling system. This is enough to get sigqueue working. See Linux man
2487 2485 * page rt_sigqueueinfo(2).
2488 2486 */
2489 2487 long
2490 2488 lx_rt_sigqueueinfo(uintptr_t p1, uintptr_t p2, uintptr_t p3)
2491 2489 {
2492 2490 pid_t tgid = (pid_t)p1;
2493 2491 int lx_sig = (int)p2;
2494 2492 int sig;
2495 2493 lx_siginfo_t lx_siginfo;
2496 2494 siginfo_t siginfo;
2497 2495 int s_code;
2498 2496 pid_t s_pid;
2499 2497
2500 2498 if (uucopy((void *)p3, &lx_siginfo, sizeof (lx_siginfo_t)) != 0)
2501 2499 return (-EFAULT);
2502 2500 s_code = ltos_sigcode(lx_siginfo.lsi_code);
2503 2501 if (s_code == LX_SI_CODE_NOT_EXIST)
2504 2502 return (-EINVAL);
2505 2503 if (lx_sig < 0 || lx_sig > LX_NSIG || (sig = ltos_signo[lx_sig]) < 0) {
2506 2504 return (-EINVAL);
2507 2505 }
2508 2506 /*
2509 2507 * This case (when trying to kill pid 0) just has a different errno
2510 2508 * returned in illumos than in Linux.
2511 2509 */
2512 2510 if (tgid == 0)
2513 2511 return (-ESRCH);
2514 2512 if (lx_lpid_to_spid(tgid, &s_pid) != 0)
2515 2513 return (-ESRCH);
2516 2514 if (SI_CANQUEUE(s_code)) {
2517 2515 return ((syscall(SYS_sigqueue, s_pid, sig,
2518 2516 lx_siginfo.lsi_value, s_code, 0) == -1) ?
2519 2517 (-errno): 0);
2520 2518 } else {
2521 2519 /*
2522 2520 * This case is unlikely, as the main entry point is through
2523 2521 * sigqueue, which always has a queuable si_code.
2524 2522 */
2525 2523 siginfo.si_signo = sig;
2526 2524 siginfo.si_code = s_code;
2527 2525 siginfo.si_pid = lx_siginfo.lsi_pid;
2528 2526 siginfo.si_value = lx_siginfo.lsi_value;
2529 2527 siginfo.si_uid = lx_siginfo.lsi_uid;
2530 2528 return ((syscall(SYS_brand, B_HELPER_SIGQUEUE,
2531 2529 tgid, sig, &siginfo)) ? (-errno) : 0);
2532 2530 }
2533 2531 }
2534 2532
2535 2533 /*
2536 2534 * Adds an additional argument for which thread within a thread group to send
2537 2535 * the signal to (added as the second argument).
2538 2536 */
2539 2537 long
2540 2538 lx_rt_tgsigqueueinfo(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
2541 2539 {
2542 2540 pid_t tgid = (pid_t)p1;
2543 2541 pid_t tid = (pid_t)p2;
2544 2542 int lx_sig = (int)p3;
2545 2543 int sig;
2546 2544 lx_siginfo_t lx_siginfo;
2547 2545 siginfo_t siginfo;
2548 2546 int si_code;
2549 2547
2550 2548 if (uucopy((void *)p4, &lx_siginfo, sizeof (lx_siginfo_t)) != 0)
2551 2549 return (-EFAULT);
2552 2550 if (lx_sig < 0 || lx_sig > LX_NSIG || (sig = ltos_signo[lx_sig]) < 0) {
2553 2551 return (-EINVAL);
2554 2552 }
2555 2553 si_code = ltos_sigcode(lx_siginfo.lsi_code);
2556 2554 if (si_code == LX_SI_CODE_NOT_EXIST)
2557 2555 return (-EINVAL);
2558 2556 /*
2559 2557 * Check for invalid tgid and tids. That appears to be only negatives
2560 2558 * and 0 values. Everything else that doesn't exist is instead ESRCH.
2561 2559 */
2562 2560 if (tgid <= 0 || tid <= 0)
2563 2561 return (-EINVAL);
2564 2562 siginfo.si_signo = sig;
2565 2563 siginfo.si_code = si_code;
2566 2564 siginfo.si_pid = lx_siginfo.lsi_pid;
2567 2565 siginfo.si_value = lx_siginfo.lsi_value;
2568 2566 siginfo.si_uid = lx_siginfo.lsi_uid;
2569 2567
2570 2568 return ((syscall(SYS_brand, B_HELPER_TGSIGQUEUE, tgid, tid, sig,
2571 2569 &siginfo)) ? (-errno) : 0);
2572 2570 }
2573 2571
2574 2572 long
2575 2573 lx_signalfd(int fd, uintptr_t mask, size_t msize)
2576 2574 {
2577 2575 return (lx_signalfd4(fd, mask, msize, 0));
2578 2576 }
2579 2577
2580 2578 long
2581 2579 lx_signalfd4(int fd, uintptr_t mask, size_t msize, int flags)
2582 2580 {
2583 2581 sigset_t s_set;
2584 2582 int r;
2585 2583
2586 2584 if (msize != sizeof (int64_t))
2587 2585 return (-EINVAL);
2588 2586
2589 2587 if (ltos_sigset((lx_sigset_t *)mask, &s_set) != 0)
2590 2588 return (-errno);
2591 2589
2592 2590 r = signalfd(fd, &s_set, flags);
2593 2591
2594 2592 /*
2595 2593 * signalfd(3C) may fail with ENOENT if /dev/signalfd is not available.
2596 2594 * It is less jarring to Linux programs to tell them that internal
2597 2595 * allocation failed than to report an error number they are not
2598 2596 * expecting.
2599 2597 */
2600 2598 if (r == -1 && errno == ENOENT)
2601 2599 return (-ENODEV);
2602 2600
2603 2601 return (r == -1 ? -errno : r);
2604 2602 }
↓ open down ↓ |
780 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX