Print this page
OS-4470 lxbrand unblocking signals in new threads must be atomic
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/brand/lx/lx_brand/common/clone.c
+++ new/usr/src/lib/brand/lx/lx_brand/common/clone.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 * Copyright 2015 Joyent, Inc. All rights reserved.
26 26 */
27 27
28 28 #include <assert.h>
29 29 #include <errno.h>
30 30 #include <stdlib.h>
31 31 #include <signal.h>
32 32 #include <unistd.h>
33 33 #include <ucontext.h>
34 34 #include <thread.h>
35 35 #include <strings.h>
36 36 #include <libintl.h>
37 37 #include <sys/regset.h>
38 38 #include <sys/syscall.h>
39 39 #include <sys/inttypes.h>
40 40 #include <sys/param.h>
41 41 #include <sys/types.h>
42 42 #include <sys/segments.h>
43 43 #include <signal.h>
44 44 #include <sys/lx_misc.h>
45 45 #include <sys/lx_types.h>
46 46 #include <sys/lx_signal.h>
47 47 #include <sys/lx_syscall.h>
48 48 #include <sys/lx_brand.h>
49 49 #include <sys/lx_debug.h>
50 50 #include <sys/lx_thread.h>
51 51 #include <sys/fork.h>
52 52 #include <sys/mman.h>
53 53 #include <lx_syscall.h>
54 54
55 55
56 56 #define SHARED_AS \
57 57 (LX_CLONE_VM | LX_CLONE_FS | LX_CLONE_FILES | LX_CLONE_SIGHAND \
58 58 | LX_CLONE_THREAD)
59 59 #define CLONE_VFORK (LX_CLONE_VM | LX_CLONE_VFORK)
60 60 #define CLONE_TD (LX_CLONE_THREAD|LX_CLONE_DETACH)
61 61
62 62 #define IS_FORK(f) (((f) & SHARED_AS) == 0)
63 63 #define IS_VFORK(f) (((f) & CLONE_VFORK) == CLONE_VFORK)
64 64
65 65 /*
66 66 * This is dicey. This seems to be an internal glibc structure, and not
67 67 * part of any external interface. Thus, it is subject to change without
68 68 * notice. FWIW, clone(2) itself seems to be an internal (or at least
69 69 * unstable) interface, since strace(1) shows it differently than the man
70 70 * page.
71 71 */
72 72 struct lx_desc
73 73 {
74 74 uint32_t entry_number;
75 75 uint32_t base_addr;
76 76 uint32_t limit;
77 77 uint32_t seg_32bit:1;
78 78 uint32_t contents:2;
79 79 uint32_t read_exec_only:1;
80 80 uint32_t limit_in_pages:1;
81 81 uint32_t seg_not_present:1;
82 82 uint32_t useable:1;
83 83 uint32_t empty:25;
↓ open down ↓ |
83 lines elided |
↑ open up ↑ |
84 84 };
85 85
86 86 struct clone_state {
87 87 void *c_retaddr; /* instr after clone()'s int80 */
88 88 int c_flags; /* flags to clone(2) */
89 89 int c_sig; /* signal to send on thread exit */
90 90 void *c_stk; /* %esp of new thread */
91 91 void *c_ptidp;
92 92 struct lx_desc *c_ldtinfo; /* thread-specific segment */
93 93 void *c_ctidp;
94 - ucontext_t c_uc; /* original register state */
95 - sigset_t c_sigmask; /* signal mask */
94 + ucontext_t c_uc; /* original register state/sigmask */
96 95 lx_affmask_t c_affmask; /* CPU affinity mask */
97 96 volatile int *c_clone_res; /* pid/error returned to cloner */
98 97 int c_ptrace_event; /* ptrace(2) event for child stop */
99 98 void *c_ntv_stk; /* native stack for this thread */
100 99 size_t c_ntv_stk_sz; /* native stack size */
101 100 lx_tsd_t *c_lx_tsd; /* tsd area for thread */
102 101 };
103 102
104 103 /*
105 104 * Counter incremented when we vfork(2) ourselves, and decremented when the
106 105 * vfork(2)ed child exit(2)s or exec(2)s.
107 106 */
108 107 static int is_vforked = 0;
109 108
110 109 long
111 110 lx_exit(uintptr_t p1)
112 111 {
113 112 int status = (int)p1;
114 113 lx_tsd_t *lx_tsd;
115 114
116 115 /*
117 116 * If we are a vfork(2)ed child, we need to exit as quickly and
118 117 * cleanly as possible to avoid corrupting our parent.
119 118 */
120 119 if (is_vforked != 0) {
121 120 is_vforked--;
122 121 _exit(status);
123 122 }
124 123
125 124 lx_tsd = lx_get_tsd();
126 125
127 126 lx_tsd->lxtsd_exit = LX_ET_EXIT;
128 127 lx_tsd->lxtsd_exit_status = status;
129 128
130 129 lx_ptrace_stop_if_option(LX_PTRACE_O_TRACEEXIT, B_FALSE,
131 130 (ulong_t)status, NULL);
132 131
133 132 /*
134 133 * This thread is exiting. Restore the state of the thread to
135 134 * what it was before we started running linux code.
136 135 */
137 136 (void) setcontext(&lx_tsd->lxtsd_exit_context);
138 137
139 138 /*
140 139 * If we returned from the setcontext(2), something is very wrong.
141 140 */
142 141 lx_err_fatal("exit: unable to set exit context: %s", strerror(errno));
143 142
144 143 /*NOTREACHED*/
145 144 return (0);
146 145 }
147 146
148 147 long
149 148 lx_group_exit(uintptr_t p1)
150 149 {
151 150 int status = (int)p1;
152 151 lx_tsd_t *lx_tsd;
153 152
154 153 /*
155 154 * If we are a vfork(2)ed child, we need to exit as quickly and
156 155 * cleanly as possible to avoid corrupting our parent.
157 156 */
158 157 if (is_vforked != 0) {
159 158 is_vforked--;
160 159 _exit(status);
161 160 }
162 161
163 162 lx_tsd = lx_get_tsd();
164 163
165 164 lx_tsd->lxtsd_exit = LX_ET_EXIT_GROUP;
166 165 lx_tsd->lxtsd_exit_status = status;
167 166
168 167 /*
169 168 * This thread is exiting. Restore the state of the thread to
170 169 * what it was before we started running linux code.
171 170 */
172 171 (void) setcontext(&lx_tsd->lxtsd_exit_context);
173 172
174 173 /*
175 174 * If we returned from the setcontext(2), something is very wrong.
176 175 */
177 176 lx_err_fatal("group_exit: unable to set exit context: %s",
178 177 strerror(errno));
179 178
180 179 /*NOTREACHED*/
181 180 return (0);
182 181 }
183 182
184 183 static void *
185 184 clone_start(void *arg)
186 185 {
187 186 int rval;
188 187 struct clone_state *cs = (struct clone_state *)arg;
189 188 lx_tsd_t *lxtsd;
190 189
191 190 /*
192 191 * Let the kernel finish setting up all the needed state for this
193 192 * new thread.
194 193 *
195 194 * We already created the thread using the thr_create(3C) library
196 195 * call, so most of the work required to emulate lx_clone(2) has
197 196 * been done by the time we get to this point.
198 197 */
199 198 lx_debug("\tre-vectoring to lx kernel module to complete lx_clone()");
200 199 lx_debug("\tB_HELPER_CLONE(0x%x, 0x%p, 0x%p, 0x%p)",
201 200 cs->c_flags, cs->c_ptidp, cs->c_ldtinfo, cs->c_ctidp);
202 201
203 202 rval = syscall(SYS_brand, B_HELPER_CLONE, cs->c_flags, cs->c_ptidp,
204 203 cs->c_ldtinfo, cs->c_ctidp);
205 204
206 205 /*
207 206 * At this point the parent is waiting for cs->c_clone_res to go
208 207 * non-zero to indicate the thread has been cloned. The value set
209 208 * in cs->c_clone_res will be used for the return value from
210 209 * clone().
211 210 */
212 211 if (rval < 0) {
213 212 *(cs->c_clone_res) = -errno;
214 213 lx_debug("\tkernel clone failed, errno %d\n", errno);
215 214 free(cs->c_lx_tsd);
216 215 free(cs);
217 216 return (NULL);
218 217 }
219 218
220 219 if (lx_sched_setaffinity(0, sizeof (cs->c_affmask),
221 220 (uintptr_t)&cs->c_affmask) != 0) {
222 221 *(cs->c_clone_res) = -errno;
223 222
224 223 lx_err_fatal("Unable to set affinity mask in child thread: %s",
225 224 strerror(errno));
226 225 }
227 226
228 227 /*
229 228 * Initialize the thread specific data for this thread.
230 229 */
231 230 lxtsd = cs->c_lx_tsd;
↓ open down ↓ |
126 lines elided |
↑ open up ↑ |
232 231 lx_init_tsd(lxtsd);
233 232 lxtsd->lxtsd_clone_state = cs;
234 233
235 234 /*
236 235 * Install the emulation stack for this thread. Register the
237 236 * thread-specific data structure with the stack list so that it may be
238 237 * freed at thread exit or fork(2).
239 238 */
240 239 lx_install_stack(cs->c_ntv_stk, cs->c_ntv_stk_sz, lxtsd);
241 240
242 - if (sigprocmask(SIG_SETMASK, &cs->c_sigmask, NULL) < 0) {
243 - *(cs->c_clone_res) = -errno;
244 -
245 - lx_err_fatal("Unable to release held signals for child "
246 - "thread: %s", strerror(errno));
247 - }
248 -
249 241 /*
250 242 * Let the parent know that the clone has (effectively) been
251 243 * completed.
252 244 */
253 245 *(cs->c_clone_res) = rval;
254 246
255 247 /*
256 - * We want to load the general registers from this context, and
257 - * switch to the BRAND stack.
248 + * We want to load the general registers from this context, restore the
249 + * original signal mask, and switch to the BRAND stack. The original
250 + * signal mask was saved to the context by lx_clone().
258 251 */
259 - cs->c_uc.uc_flags = UC_CPU;
252 + cs->c_uc.uc_flags = UC_CPU | UC_SIGMASK;
260 253 cs->c_uc.uc_brand_data[0] = (void *)LX_UC_STACK_BRAND;
261 254
262 255 /*
263 256 * New threads will not link into the existing context chain.
264 257 */
265 258 cs->c_uc.uc_link = NULL;
266 259
267 260 /*
268 261 * Set stack pointer and entry point for new thread:
269 262 */
270 263 LX_REG(&cs->c_uc, REG_SP) = (uintptr_t)cs->c_stk;
271 264 LX_REG(&cs->c_uc, REG_PC) = (uintptr_t)cs->c_retaddr;
272 265
273 266 /*
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
274 267 * Return 0 to the child:
275 268 */
276 269 LX_REG(&cs->c_uc, REG_R0) = (uintptr_t)0;
277 270
278 271 /*
279 272 * Fire the ptrace(2) event stop in the new thread:
280 273 */
281 274 lx_ptrace_stop_if_option(cs->c_ptrace_event, B_TRUE, 0, &cs->c_uc);
282 275
283 276 /*
284 - * Jump to the Linux process. The system call must not return.
277 + * Jump to the Linux process. This call cannot return.
285 278 */
286 - if (syscall(SYS_brand, B_JUMP_TO_LINUX, &cs->c_uc) == -1) {
287 - lx_err_fatal("B_JUMP_TO_LINUX failed: %s",
288 - strerror(errno));
289 - }
290 - abort();
291 -
292 - /*NOTREACHED*/
293 - return (NULL);
279 + lx_jump_to_linux(&cs->c_uc);
294 280 }
295 281
296 282 /*
297 283 * The way Linux handles stopping for FORK vs. CLONE does not map exactly to
298 284 * which syscall was used. Instead, it has to do with which signal is set in
299 285 * the low byte of the clone flag. The only time the CLONE event is emitted is
300 286 * if the clone signal (the low byte of the flags argument) is set to something
301 287 * other than SIGCHLD (see the Linux src in kernel/fork.c do_fork() for the
302 288 * actual code).
303 289 */
304 290 static int
305 291 ptrace_clone_event(int flags)
306 292 {
307 293 if (flags & LX_CLONE_VFORK)
308 294 return (LX_PTRACE_O_TRACEVFORK);
309 295
310 296 if ((flags & LX_CSIGNAL) != LX_SIGCHLD)
311 297 return (LX_PTRACE_O_TRACECLONE);
312 298
313 299 return (LX_PTRACE_O_TRACEFORK);
314 300 }
315 301
316 302 /*
317 303 * See glibc sysdeps/unix/sysv/linux/x86_64/clone.S code for x64 argument order
318 304 * and the Linux kernel/fork.c code for the various ways arguments can be passed
319 305 * to the clone syscall (CONFIG_CLONE_BACKWARDS, et al).
320 306 */
321 307 long
322 308 lx_clone(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4,
323 309 uintptr_t p5)
324 310 {
325 311 struct clone_state *cs;
326 312 int flags = (int)p1;
327 313 void *cldstk = (void *)p2;
328 314 void *ptidp = (void *)p3;
329 315 #if defined(_LP64)
330 316 void *ctidp = (void *)p4;
331 317 struct lx_desc *ldtinfo = (void *)p5;
332 318 #else /* is 32bit */
333 319 struct lx_desc *ldtinfo = (void *)p4;
334 320 void *ctidp = (void *)p5;
335 321 #endif
336 322 thread_t tid;
337 323 volatile int clone_res;
338 324 int sig;
339 325 int rval;
340 326 int pid;
341 327 ucontext_t *ucp;
342 328 sigset_t sigmask, osigmask;
343 329 int fork_flags = 0;
344 330 int ptrace_event;
345 331 int error = 0;
346 332
347 333 if (flags & LX_CLONE_SETTLS) {
348 334 lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p ldt=0x%p "
349 335 "ctidp=0x%p", flags, cldstk, ptidp, ldtinfo, ctidp);
350 336 } else {
351 337 lx_debug("lx_clone(flags=0x%x stk=0x%p ptidp=0x%p)",
352 338 flags, cldstk, ptidp);
353 339 }
354 340
355 341 /*
356 342 * Only supported for pid 0 on Linux after version 2.3.21, and
357 343 * apparently not at all since 2.5.16.
358 344 */
359 345 if (flags & LX_CLONE_PID)
360 346 return (-EINVAL);
361 347
362 348 /*
363 349 * CLONE_THREAD requires CLONE_SIGHAND.
364 350 *
365 351 * CLONE_THREAD and CLONE_DETACHED must both be either set or cleared
366 352 * in kernel 2.4 and prior.
367 353 * In kernel 2.6 (and later) CLONE_DETACHED was dropped completely, so
368 354 * we no longer have this requirement.
369 355 */
370 356
371 357 if (flags & CLONE_TD) {
372 358 if (!(flags & LX_CLONE_SIGHAND))
373 359 return (-EINVAL);
374 360 if (strncmp(lx_release, "2.4", 3) == 0 &&
375 361 (flags & CLONE_TD) != CLONE_TD)
376 362 return (-EINVAL);
377 363 }
378 364
379 365 ucp = lx_syscall_regs();
380 366
381 367 /* test if pointer passed by user are writable */
382 368 if (flags & LX_CLONE_PARENT_SETTID) {
383 369 if (uucopy(ptidp, &pid, sizeof (int)) != 0)
384 370 return (-EFAULT);
385 371 if (uucopy(&pid, ptidp, sizeof (int)) != 0)
386 372 return (-EFAULT);
387 373 }
388 374 if (flags & LX_CLONE_CHILD_SETTID) {
389 375 if (uucopy(ctidp, &pid, sizeof (int)) != 0)
390 376 return (-EFAULT);
391 377 if (uucopy(&pid, ctidp, sizeof (int)) != 0)
392 378 return (-EFAULT);
393 379 }
394 380
395 381 ptrace_event = ptrace_clone_event(flags);
396 382
397 383 /*
398 384 * Inform the in-kernel ptrace(2) subsystem that we are about to
399 385 * emulate a fork(2), vfork(2) or clone(2) system call.
400 386 */
401 387 lx_ptrace_clone_begin(ptrace_event, !!(flags & LX_CLONE_PTRACE));
402 388
403 389 /*
404 390 * Handle a fork(2) operation here. If this is not a fork, a new
405 391 * thread will be created after this block.
406 392 */
407 393 if (IS_FORK(flags) || IS_VFORK(flags)) {
408 394 if (flags & LX_CLONE_PARENT) {
409 395 lx_unsupported("clone(2) only supports CLONE_PARENT "
410 396 "for threads.\n");
411 397 return (-ENOTSUP);
412 398 }
413 399
414 400 if ((flags & LX_CSIGNAL) == 0)
415 401 fork_flags |= FORK_NOSIGCHLD;
416 402
417 403 /*
418 404 * Suspend signal delivery, run the stack management prefork
419 405 * handler and perform the actual fork(2) operation.
420 406 */
421 407 _sigoff();
422 408 lx_stack_prefork();
423 409 if (flags & LX_CLONE_VFORK) {
424 410 lx_sighandlers_t saved;
425 411
426 412 /*
427 413 * Because we keep our signal disposition at user-land
428 414 * (and in memory), we must prevent it from being
429 415 * clobbered should our vforked child change the
430 416 * disposition (e.g., via sigaction()) before releasing
431 417 * the address space. We preserve our disposition by
432 418 * taking a snapshot of it before the vfork and
433 419 * restoring it afterwards -- which we can get away
434 420 * with because we know that we aren't executing
435 421 * concurrently with our child.
436 422 */
437 423 lx_sighandlers_save(&saved);
438 424 is_vforked++;
439 425 rval = vforkx(fork_flags);
440 426 if (rval != 0) {
441 427 is_vforked--;
442 428 lx_sighandlers_restore(&saved);
443 429 }
444 430 } else {
445 431 rval = forkx(fork_flags);
446 432 }
447 433
448 434 /*
449 435 * The parent process returns through the regular system call
450 436 * path here.
451 437 */
452 438 if (rval != 0) {
453 439 if (!IS_VFORK(flags) || rval < 0) {
454 440 /*
455 441 * Run the stack management postfork handler in
456 442 * the parent. If this was a vfork(2), we only
457 443 * run it in the parent if the fork operation
458 444 * failed; the vfork(2) child has already run
459 445 * it for our address space.
460 446 */
461 447 lx_stack_postfork();
462 448 }
463 449
464 450 /*
465 451 * Since we've already forked, we can't do much if
466 452 * uucopy fails, so we just ignore failure. Failure is
467 453 * unlikely since we've tested the memory before we did
468 454 * the fork.
469 455 */
470 456 if (rval > 0 && (flags & LX_CLONE_PARENT_SETTID)) {
471 457 (void) uucopy(&rval, ptidp, sizeof (int));
472 458 }
473 459
474 460 if (rval > 0) {
475 461 lx_ptrace_stop_if_option(ptrace_event, B_FALSE,
476 462 (ulong_t)rval, NULL);
477 463 }
478 464
479 465 /*
480 466 * Re-enable signal delivery in the parent process.
481 467 */
482 468 _sigon();
483 469
484 470 return ((rval < 0) ? -errno : rval);
485 471 }
486 472
487 473 /*
488 474 * The rest of this block runs only within the new child
489 475 * process.
490 476 */
491 477
492 478 /*
493 479 * Run the stack management postfork handler in the child.
494 480 */
495 481 lx_stack_postfork();
496 482
497 483 if (!IS_VFORK(flags)) {
498 484 /*
499 485 * We must free the stacks and thread-specific data
500 486 * objects for every thread except the one duplicated
501 487 * from the parent by forkx().
502 488 */
503 489 lx_free_other_stacks();
504 490 }
505 491
506 492 if (rval == 0 && (flags & LX_CLONE_CHILD_SETTID)) {
507 493 /*
508 494 * lx_getpid should not fail, and if it does, there's
509 495 * not much we can do about it since we've already
510 496 * forked, so on failure, we just don't copy the
511 497 * memory.
512 498 */
513 499 pid = syscall(SYS_brand, B_GETPID);
514 500 if (pid >= 0)
515 501 (void) uucopy(&pid, ctidp, sizeof (int));
516 502 }
517 503
518 504 /*
519 505 * Set up additional data in the lx_proc_data structure as
520 506 * necessary.
521 507 */
522 508 if ((rval = syscall(SYS_brand, B_HELPER_CLONE, flags, ptidp,
523 509 ldtinfo, ctidp)) < 0) {
524 510 return (rval);
525 511 }
526 512
527 513 if (IS_VFORK(flags)) {
528 514 ucontext_t vforkuc;
529 515
530 516 /*
531 517 * The vfork(2) interface is somewhat less than ideal.
532 518 * The unfortunate notion of borrowing the address
533 519 * space of the parent process requires us to jump
534 520 * through several hoops to prevent corrupting parent
535 521 * emulation state.
536 522 *
537 523 * When returning in the child, we make a copy of the
538 524 * system call return context and discard three pages
539 525 * of the native stack. Returning normally would
540 526 * clobber the native stack frame in which the brand
541 527 * library in the parent process is presently waiting.
542 528 *
543 529 * The calling program is expected to correctly use
544 530 * this dusty, underspecified relic. Neglecting to
545 531 * immediately call execve(2) or exit(2) is not
546 532 * cricket; this stack space will be permanently lost,
547 533 * not to mention myriad other undefined behaviour.
548 534 */
549 535 bcopy(ucp, &vforkuc, sizeof (vforkuc));
550 536 vforkuc.uc_brand_data[1] -= LX_NATIVE_STACK_VFORK_GAP;
551 537 vforkuc.uc_link = NULL;
552 538
553 539 lx_debug("\tvfork native stack sp %p",
554 540 vforkuc.uc_brand_data[1]);
555 541
556 542 /*
557 543 * If provided, the child needs its new stack set up.
558 544 */
559 545 if (cldstk != 0) {
560 546 lx_debug("\tvfork cldstk %p", cldstk);
561 547 LX_REG(&vforkuc, REG_SP) = (uintptr_t)cldstk;
562 548 }
563 549
564 550 /*
565 551 * Stop for ptrace if required.
566 552 */
567 553 lx_ptrace_stop_if_option(ptrace_event, B_TRUE, 0, NULL);
568 554
569 555 /*
570 556 * Return to the child via the specially constructed
571 557 * vfork(2) context.
572 558 */
573 559 LX_EMULATE_RETURN(&vforkuc, LX_SYS_clone, 0, 0);
574 560 (void) syscall(SYS_brand, B_EMULATION_DONE, &vforkuc,
575 561 LX_SYS_clone, 0, 0);
576 562
577 563 assert(0);
578 564 }
579 565
580 566 /*
581 567 * If provided, the child needs its new stack set up.
582 568 */
583 569 if (cldstk != 0) {
584 570 lx_debug("\tcldstk %p", cldstk);
585 571 LX_REG(ucp, REG_SP) = (uintptr_t)cldstk;
586 572 }
587 573
588 574 /*
589 575 * Stop for ptrace if required.
590 576 */
591 577 lx_ptrace_stop_if_option(ptrace_event, B_TRUE, 0, NULL);
592 578
593 579 /*
594 580 * Re-enable signal delivery in the child process.
595 581 */
596 582 _sigon();
597 583
598 584 /*
599 585 * The child process returns via the regular emulated system
600 586 * call path:
601 587 */
602 588 return (0);
603 589 }
604 590
605 591 /*
606 592 * We have very restricted support.... only exactly these flags are
607 593 * supported
608 594 */
609 595 if (((flags & SHARED_AS) != SHARED_AS)) {
610 596 lx_unsupported("clone(2) requires that all or none of "
611 597 "CLONE_VM/FS/FILES/THREAD/SIGHAND be set. (flags:0x%08X)\n",
612 598 flags);
613 599 return (-ENOTSUP);
614 600 }
615 601
616 602 if (cldstk == NULL) {
617 603 lx_unsupported("clone(2) requires the caller to allocate the "
618 604 "child's stack.\n");
619 605 return (-ENOTSUP);
620 606 }
621 607
622 608 /*
623 609 * If we want a signal-on-exit, ensure that the signal is valid.
624 610 */
625 611 if ((sig = ltos_signo[flags & LX_CSIGNAL]) == -1) {
626 612 lx_unsupported("clone(2) passed unsupported signal: %d", sig);
627 613 return (-ENOTSUP);
628 614 }
629 615
630 616 /*
631 617 * Initialise the state structure we pass as an argument to the new
632 618 * thread:
633 619 */
634 620 if ((cs = malloc(sizeof (*cs))) == NULL) {
635 621 lx_debug("could not allocate clone_state: %s", strerror(errno));
636 622 return (-ENOMEM);
637 623 }
638 624 cs->c_flags = flags;
639 625 cs->c_sig = sig;
640 626 cs->c_stk = cldstk;
641 627 cs->c_ptidp = ptidp;
642 628 cs->c_ldtinfo = ldtinfo;
643 629 cs->c_ctidp = ctidp;
644 630 cs->c_clone_res = &clone_res;
645 631 cs->c_ptrace_event = ptrace_event;
646 632 /*
647 633 * We want the new thread to return directly to the call site for
648 634 * the system call.
649 635 */
650 636 cs->c_retaddr = (void *)LX_REG(ucp, REG_PC);
651 637 /*
652 638 * Copy the saved context for the clone(2) system call so that the
653 639 * new thread may use it to initialise registers.
654 640 */
655 641 bcopy(ucp, &cs->c_uc, sizeof (cs->c_uc));
656 642 if ((cs->c_lx_tsd = malloc(sizeof (*cs->c_lx_tsd))) == NULL) {
657 643 free(cs);
658 644 return (-ENOMEM);
↓ open down ↓ |
355 lines elided |
↑ open up ↑ |
659 645 }
660 646
661 647 if (lx_sched_getaffinity(0, sizeof (cs->c_affmask),
662 648 (uintptr_t)&cs->c_affmask) == -1) {
663 649 lx_err_fatal("Unable to get affinity mask for parent "
664 650 "thread: %s", strerror(errno));
665 651 }
666 652
667 653 clone_res = 0;
668 654
669 - (void) sigfillset(&sigmask);
670 -
671 655 /*
672 656 * Block all signals because the thread we create won't be able to
673 657 * properly handle them until it's fully set up.
674 658 */
659 + (void) sigfillset(&sigmask);
675 660 if (sigprocmask(SIG_BLOCK, &sigmask, &osigmask) < 0) {
676 661 lx_debug("lx_clone sigprocmask() failed: %s", strerror(errno));
677 662 free(cs->c_lx_tsd);
678 663 free(cs);
679 664 return (-errno);
680 665 }
681 - cs->c_sigmask = osigmask;
666 + cs->c_uc.uc_sigmask = osigmask;
682 667
683 668 /*
684 669 * Allocate the native stack for this new thread now, so that we
685 670 * can return failure gracefully as ENOMEM.
686 671 */
687 672 if (lx_alloc_stack(&cs->c_ntv_stk, &cs->c_ntv_stk_sz) != 0) {
688 673 free(cs->c_lx_tsd);
689 674 free(cs);
690 675 return (-ENOMEM);
691 676 }
692 677
693 678 rval = thr_create(NULL, NULL, clone_start, cs, THR_DETACHED, &tid);
694 679
695 680 /*
696 681 * If the thread did not start, free the resources we allocated:
697 682 */
698 683 if (rval == -1) {
699 684 error = errno;
700 685 (void) munmap(cs->c_ntv_stk, cs->c_ntv_stk_sz);
701 686 free(cs->c_lx_tsd);
702 687 free(cs);
703 688 }
704 689
705 690 /*
706 691 * Release any pending signals
707 692 */
708 693 (void) sigprocmask(SIG_SETMASK, &osigmask, NULL);
709 694
710 695 /*
711 696 * Wait for the child to be created and have its tid assigned.
712 697 */
713 698 if (rval == 0) {
714 699 while (clone_res == 0)
715 700 ;
716 701
717 702 rval = clone_res;
718 703 lx_ptrace_stop_if_option(ptrace_event, B_FALSE, (ulong_t)rval,
719 704 NULL);
720 705
721 706 return (rval);
722 707 } else {
723 708 /*
724 709 * Return the error from thr_create(3C).
725 710 */
726 711 return (-error);
727 712 }
728 713 }
↓ open down ↓ |
37 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX