Print this page
OS-4470 lxbrand unblocking signals in new threads must be atomic


  74         uint32_t entry_number;
  75         uint32_t base_addr;
  76         uint32_t limit;
  77         uint32_t seg_32bit:1;
  78         uint32_t contents:2;
  79         uint32_t read_exec_only:1;
  80         uint32_t limit_in_pages:1;
  81         uint32_t seg_not_present:1;
  82         uint32_t useable:1;
  83         uint32_t empty:25;
  84 };
  85 
  86 struct clone_state {
  87         void            *c_retaddr;     /* instr after clone()'s int80 */
  88         int             c_flags;        /* flags to clone(2) */
  89         int             c_sig;          /* signal to send on thread exit */
  90         void            *c_stk;         /* %esp of new thread */
  91         void            *c_ptidp;
  92         struct lx_desc  *c_ldtinfo;     /* thread-specific segment */
  93         void            *c_ctidp;
  94         ucontext_t      c_uc;           /* original register state */
  95         sigset_t        c_sigmask;      /* signal mask */
  96         lx_affmask_t    c_affmask;      /* CPU affinity mask */
  97         volatile int    *c_clone_res;   /* pid/error returned to cloner */
  98         int             c_ptrace_event; /* ptrace(2) event for child stop */
  99         void            *c_ntv_stk;     /* native stack for this thread */
 100         size_t          c_ntv_stk_sz;   /* native stack size */
 101         lx_tsd_t        *c_lx_tsd;      /* tsd area for thread */
 102 };
 103 
 104 /*
 105  * Counter incremented when we vfork(2) ourselves, and decremented when the
 106  * vfork(2)ed child exit(2)s or exec(2)s.
 107  */
 108 static int is_vforked = 0;
 109 
 110 long
 111 lx_exit(uintptr_t p1)
 112 {
 113         int             status = (int)p1;
 114         lx_tsd_t        *lx_tsd;
 115 


 222                 *(cs->c_clone_res) = -errno;
 223 
 224                 lx_err_fatal("Unable to set affinity mask in child thread: %s",
 225                     strerror(errno));
 226         }
 227 
 228         /*
 229          * Initialize the thread specific data for this thread.
 230          */
 231         lxtsd = cs->c_lx_tsd;
 232         lx_init_tsd(lxtsd);
 233         lxtsd->lxtsd_clone_state = cs;
 234 
 235         /*
 236          * Install the emulation stack for this thread.  Register the
 237          * thread-specific data structure with the stack list so that it may be
 238          * freed at thread exit or fork(2).
 239          */
 240         lx_install_stack(cs->c_ntv_stk, cs->c_ntv_stk_sz, lxtsd);
 241 
 242         if (sigprocmask(SIG_SETMASK, &cs->c_sigmask, NULL) < 0) {
 243                 *(cs->c_clone_res) = -errno;
 244 
 245                 lx_err_fatal("Unable to release held signals for child "
 246                     "thread: %s", strerror(errno));
 247         }
 248 
 249         /*
 250          * Let the parent know that the clone has (effectively) been
 251          * completed.
 252          */
 253         *(cs->c_clone_res) = rval;
 254 
 255         /*
 256          * We want to load the general registers from this context, and
 257          * switch to the BRAND stack.

 258          */
 259         cs->c_uc.uc_flags = UC_CPU;
 260         cs->c_uc.uc_brand_data[0] = (void *)LX_UC_STACK_BRAND;
 261 
 262         /*
 263          * New threads will not link into the existing context chain.
 264          */
 265         cs->c_uc.uc_link = NULL;
 266 
 267         /*
 268          * Set stack pointer and entry point for new thread:
 269          */
 270         LX_REG(&cs->c_uc, REG_SP) = (uintptr_t)cs->c_stk;
 271         LX_REG(&cs->c_uc, REG_PC) = (uintptr_t)cs->c_retaddr;
 272 
 273         /*
 274          * Return 0 to the child:
 275          */
 276         LX_REG(&cs->c_uc, REG_R0) = (uintptr_t)0;
 277 
 278         /*
 279          * Fire the ptrace(2) event stop in the new thread:
 280          */
 281         lx_ptrace_stop_if_option(cs->c_ptrace_event, B_TRUE, 0, &cs->c_uc);
 282 
 283         /*
 284          * Jump to the Linux process.  The system call must not return.
 285          */
 286         if (syscall(SYS_brand, B_JUMP_TO_LINUX, &cs->c_uc) == -1) {
 287                 lx_err_fatal("B_JUMP_TO_LINUX failed: %s",
 288                     strerror(errno));
 289         }
 290         abort();
 291 
 292         /*NOTREACHED*/
 293         return (NULL);
 294 }
 295 
 296 /*
 297  * The way Linux handles stopping for FORK vs. CLONE does not map exactly to
 298  * which syscall was used. Instead, it has to do with which signal is set in
 299  * the low byte of the clone flag. The only time the CLONE event is emitted is
 300  * if the clone signal (the low byte of the flags argument) is set to something
 301  * other than SIGCHLD (see the Linux src in kernel/fork.c do_fork() for the
 302  * actual code).
 303  */
 304 static int
 305 ptrace_clone_event(int flags)
 306 {
 307         if (flags & LX_CLONE_VFORK)
 308                 return (LX_PTRACE_O_TRACEVFORK);
 309 
 310         if ((flags & LX_CSIGNAL) != LX_SIGCHLD)
 311                 return (LX_PTRACE_O_TRACECLONE);
 312 
 313         return (LX_PTRACE_O_TRACEFORK);


 649          */
 650         cs->c_retaddr = (void *)LX_REG(ucp, REG_PC);
 651         /*
 652          * Copy the saved context for the clone(2) system call so that the
 653          * new thread may use it to initialise registers.
 654          */
 655         bcopy(ucp, &cs->c_uc, sizeof (cs->c_uc));
 656         if ((cs->c_lx_tsd = malloc(sizeof (*cs->c_lx_tsd))) == NULL) {
 657                 free(cs);
 658                 return (-ENOMEM);
 659         }
 660 
 661         if (lx_sched_getaffinity(0, sizeof (cs->c_affmask),
 662             (uintptr_t)&cs->c_affmask) == -1) {
 663                 lx_err_fatal("Unable to get affinity mask for parent "
 664                     "thread: %s", strerror(errno));
 665         }
 666 
 667         clone_res = 0;
 668 
 669         (void) sigfillset(&sigmask);
 670 
 671         /*
 672          * Block all signals because the thread we create won't be able to
 673          * properly handle them until it's fully set up.
 674          */

 675         if (sigprocmask(SIG_BLOCK, &sigmask, &osigmask) < 0) {
 676                 lx_debug("lx_clone sigprocmask() failed: %s", strerror(errno));
 677                 free(cs->c_lx_tsd);
 678                 free(cs);
 679                 return (-errno);
 680         }
 681         cs->c_sigmask = osigmask;
 682 
 683         /*
 684          * Allocate the native stack for this new thread now, so that we
 685          * can return failure gracefully as ENOMEM.
 686          */
 687         if (lx_alloc_stack(&cs->c_ntv_stk, &cs->c_ntv_stk_sz) != 0) {
 688                 free(cs->c_lx_tsd);
 689                 free(cs);
 690                 return (-ENOMEM);
 691         }
 692 
 693         rval = thr_create(NULL, NULL, clone_start, cs, THR_DETACHED, &tid);
 694 
 695         /*
 696          * If the thread did not start, free the resources we allocated:
 697          */
 698         if (rval == -1) {
 699                 error = errno;
 700                 (void) munmap(cs->c_ntv_stk, cs->c_ntv_stk_sz);
 701                 free(cs->c_lx_tsd);




  74         uint32_t entry_number;
  75         uint32_t base_addr;
  76         uint32_t limit;
  77         uint32_t seg_32bit:1;
  78         uint32_t contents:2;
  79         uint32_t read_exec_only:1;
  80         uint32_t limit_in_pages:1;
  81         uint32_t seg_not_present:1;
  82         uint32_t useable:1;
  83         uint32_t empty:25;
  84 };
  85 
  86 struct clone_state {
  87         void            *c_retaddr;     /* instr after clone()'s int80 */
  88         int             c_flags;        /* flags to clone(2) */
  89         int             c_sig;          /* signal to send on thread exit */
  90         void            *c_stk;         /* %esp of new thread */
  91         void            *c_ptidp;
  92         struct lx_desc  *c_ldtinfo;     /* thread-specific segment */
  93         void            *c_ctidp;
  94         ucontext_t      c_uc;           /* original register state/sigmask */

  95         lx_affmask_t    c_affmask;      /* CPU affinity mask */
  96         volatile int    *c_clone_res;   /* pid/error returned to cloner */
  97         int             c_ptrace_event; /* ptrace(2) event for child stop */
  98         void            *c_ntv_stk;     /* native stack for this thread */
  99         size_t          c_ntv_stk_sz;   /* native stack size */
 100         lx_tsd_t        *c_lx_tsd;      /* tsd area for thread */
 101 };
 102 
 103 /*
 104  * Counter incremented when we vfork(2) ourselves, and decremented when the
 105  * vfork(2)ed child exit(2)s or exec(2)s.
 106  */
 107 static int is_vforked = 0;
 108 
 109 long
 110 lx_exit(uintptr_t p1)
 111 {
 112         int             status = (int)p1;
 113         lx_tsd_t        *lx_tsd;
 114 


 221                 *(cs->c_clone_res) = -errno;
 222 
 223                 lx_err_fatal("Unable to set affinity mask in child thread: %s",
 224                     strerror(errno));
 225         }
 226 
 227         /*
 228          * Initialize the thread specific data for this thread.
 229          */
 230         lxtsd = cs->c_lx_tsd;
 231         lx_init_tsd(lxtsd);
 232         lxtsd->lxtsd_clone_state = cs;
 233 
 234         /*
 235          * Install the emulation stack for this thread.  Register the
 236          * thread-specific data structure with the stack list so that it may be
 237          * freed at thread exit or fork(2).
 238          */
 239         lx_install_stack(cs->c_ntv_stk, cs->c_ntv_stk_sz, lxtsd);
 240 







 241         /*
 242          * Let the parent know that the clone has (effectively) been
 243          * completed.
 244          */
 245         *(cs->c_clone_res) = rval;
 246 
 247         /*
 248          * We want to load the general registers from this context, restore the
 249          * original signal mask, and switch to the BRAND stack.  The original
 250          * signal mask was saved to the context by lx_clone().
 251          */
 252         cs->c_uc.uc_flags = UC_CPU | UC_SIGMASK;
 253         cs->c_uc.uc_brand_data[0] = (void *)LX_UC_STACK_BRAND;
 254 
 255         /*
 256          * New threads will not link into the existing context chain.
 257          */
 258         cs->c_uc.uc_link = NULL;
 259 
 260         /*
 261          * Set stack pointer and entry point for new thread:
 262          */
 263         LX_REG(&cs->c_uc, REG_SP) = (uintptr_t)cs->c_stk;
 264         LX_REG(&cs->c_uc, REG_PC) = (uintptr_t)cs->c_retaddr;
 265 
 266         /*
 267          * Return 0 to the child:
 268          */
 269         LX_REG(&cs->c_uc, REG_R0) = (uintptr_t)0;
 270 
 271         /*
 272          * Fire the ptrace(2) event stop in the new thread:
 273          */
 274         lx_ptrace_stop_if_option(cs->c_ptrace_event, B_TRUE, 0, &cs->c_uc);
 275 
 276         /*
 277          * Jump to the Linux process.  This call cannot return.
 278          */
 279         lx_jump_to_linux(&cs->c_uc);







 280 }
 281 
 282 /*
 283  * The way Linux handles stopping for FORK vs. CLONE does not map exactly to
 284  * which syscall was used. Instead, it has to do with which signal is set in
 285  * the low byte of the clone flag. The only time the CLONE event is emitted is
 286  * if the clone signal (the low byte of the flags argument) is set to something
 287  * other than SIGCHLD (see the Linux src in kernel/fork.c do_fork() for the
 288  * actual code).
 289  */
 290 static int
 291 ptrace_clone_event(int flags)
 292 {
 293         if (flags & LX_CLONE_VFORK)
 294                 return (LX_PTRACE_O_TRACEVFORK);
 295 
 296         if ((flags & LX_CSIGNAL) != LX_SIGCHLD)
 297                 return (LX_PTRACE_O_TRACECLONE);
 298 
 299         return (LX_PTRACE_O_TRACEFORK);


 635          */
 636         cs->c_retaddr = (void *)LX_REG(ucp, REG_PC);
 637         /*
 638          * Copy the saved context for the clone(2) system call so that the
 639          * new thread may use it to initialise registers.
 640          */
 641         bcopy(ucp, &cs->c_uc, sizeof (cs->c_uc));
 642         if ((cs->c_lx_tsd = malloc(sizeof (*cs->c_lx_tsd))) == NULL) {
 643                 free(cs);
 644                 return (-ENOMEM);
 645         }
 646 
 647         if (lx_sched_getaffinity(0, sizeof (cs->c_affmask),
 648             (uintptr_t)&cs->c_affmask) == -1) {
 649                 lx_err_fatal("Unable to get affinity mask for parent "
 650                     "thread: %s", strerror(errno));
 651         }
 652 
 653         clone_res = 0;
 654 


 655         /*
 656          * Block all signals because the thread we create won't be able to
 657          * properly handle them until it's fully set up.
 658          */
 659         (void) sigfillset(&sigmask);
 660         if (sigprocmask(SIG_BLOCK, &sigmask, &osigmask) < 0) {
 661                 lx_debug("lx_clone sigprocmask() failed: %s", strerror(errno));
 662                 free(cs->c_lx_tsd);
 663                 free(cs);
 664                 return (-errno);
 665         }
 666         cs->c_uc.uc_sigmask = osigmask;
 667 
 668         /*
 669          * Allocate the native stack for this new thread now, so that we
 670          * can return failure gracefully as ENOMEM.
 671          */
 672         if (lx_alloc_stack(&cs->c_ntv_stk, &cs->c_ntv_stk_sz) != 0) {
 673                 free(cs->c_lx_tsd);
 674                 free(cs);
 675                 return (-ENOMEM);
 676         }
 677 
 678         rval = thr_create(NULL, NULL, clone_start, cs, THR_DETACHED, &tid);
 679 
 680         /*
 681          * If the thread did not start, free the resources we allocated:
 682          */
 683         if (rval == -1) {
 684                 error = errno;
 685                 (void) munmap(cs->c_ntv_stk, cs->c_ntv_stk_sz);
 686                 free(cs->c_lx_tsd);