1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <errno.h>
  28 #include <sys/types.h>
  29 #include <sys/param.h>
  30 #include <sys/lx_misc.h>
  31 #include <sys/lx_debug.h>
  32 #include <sys/lx_syscall.h>
  33 #include <sys/lx_signal.h>
  34 #include <sys/lx_thread.h>
  35 #include <sys/lwp.h>
  36 #include <unistd.h>
  37 #include <fcntl.h>
  38 #include <procfs.h>
  39 #include <sys/frame.h>
  40 #include <strings.h>
  41 #include <signal.h>
  42 #include <stddef.h>
  43 #include <stdlib.h>
  44 #include <sys/wait.h>
  45 #include <sys/auxv.h>
  46 #include <thread.h>
  47 #include <pthread.h>
  48 #include <synch.h>
  49 #include <elf.h>
  50 #include <ieeefp.h>
  51 #include <assert.h>
  52 #include <libintl.h>
  53 
  54 /*
  55  * Linux ptrace compatibility.
  56  *
  57  * The brand support for ptrace(2) is built on top of the Solaris /proc
  58  * interfaces, mounted at /native/proc in the zone.  This gets quite
  59  * complicated due to the way ptrace works and the Solaris realization of the
  60  * Linux threading model.
  61  *
  62  * ptrace can only interact with a process if we are tracing it, and it is
  63  * currently stopped. There are two ways a process can begin tracing another
  64  * process:
  65  *
  66  *   PTRACE_TRACEME
  67  *
  68  *   A child process can use PTRACE_TRACEME to indicate that it wants to be
  69  *   traced by the parent. This sets the ptrace compatibility flag in /proc
  70  *   which causes ths ptrace consumer to be notified through the wait(2)
  71  *   system call of events of interest. PTRACE_TRACEME is typically used by
  72  *   the debugger by forking a process, using PTRACE_TRACEME, and finally
  73  *   doing an exec of the specified program.
  74  *
  75  *
  76  *   PTRACE_ATTACH
  77  *
  78  *   We can attach to a process using PTRACE_ATTACH. This is considerably
  79  *   more complicated than the previous case. On Linux, the traced process is
  80  *   effectively reparented to the ptrace consumer so that event notification
  81  *   can go through the normal wait(2) system call. Solaris has no such
  82  *   ability to reparent a process (nor should it) so some trickery was
  83  *   required.
  84  *
  85  *   When the ptrace consumer uses PTRACE_ATTACH it forks a monitor child
  86  *   process. The monitor enables the /proc ptrace flag for itself and uses
  87  *   the native /proc mechanisms to observe the traced process and wait for
  88  *   events of interest. When the traced process stops, the monitor process
  89  *   sends itself a SIGTRAP thus rousting its parent process (the ptrace
  90  *   consumer) out of wait(2). We then translate the process id and status
  91  *   code from wait(2) to those of the traced process.
  92  *
  93  *   To detach from the process we just have to clean up tracing flags and
  94  *   clean up the monitor.
  95  *
  96  * ptrace can only interact with a process if we have traced it, and it is
  97  * currently stopped (see is_traced()). For threads, there's no way to
  98  * distinguish whether ptrace() has been called for all threads or some
  99  * subset. Since most clients will be tracing all threads, and erroneously
 100  * allowing ptrace to access a non-traced thread is non-fatal (or at least
 101  * would be fatal on linux), we ignore this aspect of the problem.
 102  */
 103 
 104 #define LX_PTRACE_TRACEME       0
 105 #define LX_PTRACE_PEEKTEXT      1
 106 #define LX_PTRACE_PEEKDATA      2
 107 #define LX_PTRACE_PEEKUSER      3
 108 #define LX_PTRACE_POKETEXT      4
 109 #define LX_PTRACE_POKEDATA      5
 110 #define LX_PTRACE_POKEUSER      6
 111 #define LX_PTRACE_CONT          7
 112 #define LX_PTRACE_KILL          8
 113 #define LX_PTRACE_SINGLESTEP    9
 114 #define LX_PTRACE_GETREGS       12
 115 #define LX_PTRACE_SETREGS       13
 116 #define LX_PTRACE_GETFPREGS     14
 117 #define LX_PTRACE_SETFPREGS     15
 118 #define LX_PTRACE_ATTACH        16
 119 #define LX_PTRACE_DETACH        17
 120 #define LX_PTRACE_GETFPXREGS    18
 121 #define LX_PTRACE_SETFPXREGS    19
 122 #define LX_PTRACE_SYSCALL       24
 123 
 124 /*
 125  * This corresponds to the user_i387_struct Linux structure.
 126  */
 127 typedef struct lx_user_fpregs {
 128         long lxuf_cwd;
 129         long lxuf_swd;
 130         long lxuf_twd;
 131         long lxuf_fip;
 132         long lxuf_fcs;
 133         long lxuf_foo;
 134         long lxuf_fos;
 135         long lxuf_st_space[20];
 136 } lx_user_fpregs_t;
 137 
 138 /*
 139  * This corresponds to the user_fxsr_struct Linux structure.
 140  */
 141 typedef struct lx_user_fpxregs {
 142         uint16_t lxux_cwd;
 143         uint16_t lxux_swd;
 144         uint16_t lxux_twd;
 145         uint16_t lxux_fop;
 146         long lxux_fip;
 147         long lxux_fcs;
 148         long lxux_foo;
 149         long lxux_fos;
 150         long lxux_mxcsr;
 151         long lxux_reserved;
 152         long lxux_st_space[32];
 153         long lxux_xmm_space[32];
 154         long lxux_padding[56];
 155 } lx_user_fpxregs_t;
 156 
 157 /*
 158  * This corresponds to the user_regs_struct Linux structure.
 159  */
 160 typedef struct lx_user_regs {
 161         long lxur_ebx;
 162         long lxur_ecx;
 163         long lxur_edx;
 164         long lxur_esi;
 165         long lxur_edi;
 166         long lxur_ebp;
 167         long lxur_eax;
 168         long lxur_xds;
 169         long lxur_xes;
 170         long lxur_xfs;
 171         long lxur_xgs;
 172         long lxur_orig_eax;
 173         long lxur_eip;
 174         long lxur_xcs;
 175         long lxur_eflags;
 176         long lxur_esp;
 177         long lxur_xss;
 178 } lx_user_regs_t;
 179 
 180 typedef struct lx_user {
 181         lx_user_regs_t lxu_regs;
 182         int lxu_fpvalid;
 183         lx_user_fpregs_t lxu_i387;
 184         ulong_t lxu_tsize;
 185         ulong_t lxu_dsize;
 186         ulong_t lxu_ssize;
 187         ulong_t lxu_start_code;
 188         ulong_t lxu_start_stack;
 189         long lxu_signal;
 190         int lxu_reserved;
 191         lx_user_regs_t *lxu_ar0;
 192         lx_user_fpregs_t *lxu_fpstate;
 193         ulong_t lxu_magic;
 194         char lxu_comm[32];
 195         int lxu_debugreg[8];
 196 } lx_user_t;
 197 
 198 typedef struct ptrace_monitor_map {
 199         struct ptrace_monitor_map *pmm_next;    /* next pointer */
 200         pid_t pmm_monitor;                      /* monitor child process */
 201         pid_t pmm_target;                       /* traced Linux pid */
 202         pid_t pmm_pid;                          /* Solaris pid */
 203         lwpid_t pmm_lwpid;                      /* Solaris lwpid */
 204         uint_t pmm_exiting;                     /* detached */
 205 } ptrace_monitor_map_t;
 206 
 207 typedef struct ptrace_state_map {
 208         struct ptrace_state_map *psm_next;      /* next pointer */
 209         pid_t           psm_pid;                /* Solaris pid */
 210         uintptr_t       psm_debugreg[8];        /* debug registers */
 211 } ptrace_state_map_t;
 212 
 213 static ptrace_monitor_map_t *ptrace_monitor_map = NULL;
 214 static ptrace_state_map_t *ptrace_state_map = NULL;
 215 static mutex_t ptrace_map_mtx = DEFAULTMUTEX;
 216 
 217 extern void *_START_;
 218 
 219 static sigset_t blockable_sigs;
 220 
 221 #pragma init(ptrace_init)
 222 void
 223 ptrace_init(void)
 224 {
 225         (void) sigfillset(&blockable_sigs);
 226         (void) sigdelset(&blockable_sigs, SIGKILL);
 227         (void) sigdelset(&blockable_sigs, SIGSTOP);
 228 }
 229 
 230 /*
 231  * Given a pid, open the named file under /native/proc/<pid>/name using the
 232  * given mode.
 233  */
 234 static int
 235 open_procfile(pid_t pid, int mode, const char *name)
 236 {
 237         char path[MAXPATHLEN];
 238 
 239         (void) snprintf(path, sizeof (path), "/native/proc/%d/%s", pid, name);
 240 
 241         return (open(path, mode));
 242 }
 243 
 244 /*
 245  * Given a pid and lwpid, open the named file under
 246  * /native/proc/<pid>/<lwpid>/name using the given mode.
 247  */
 248 static int
 249 open_lwpfile(pid_t pid, lwpid_t lwpid, int mode, const char *name)
 250 {
 251         char path[MAXPATHLEN];
 252 
 253         (void) snprintf(path, sizeof (path), "/native/proc/%d/lwp/%d/%s",
 254             pid, lwpid, name);
 255 
 256         return (open(path, mode));
 257 }
 258 
 259 static int
 260 get_status(pid_t pid, pstatus_t *psp)
 261 {
 262         int fd;
 263 
 264         if ((fd = open_procfile(pid, O_RDONLY, "status")) < 0)
 265                 return (-ESRCH);
 266 
 267         if (read(fd, psp, sizeof (pstatus_t)) != sizeof (pstatus_t)) {
 268                 (void) close(fd);
 269                 return (-EIO);
 270         }
 271 
 272         (void) close(fd);
 273 
 274         return (0);
 275 }
 276 
 277 static int
 278 get_lwpstatus(pid_t pid, lwpid_t lwpid, lwpstatus_t *lsp)
 279 {
 280         int fd;
 281 
 282         if ((fd = open_lwpfile(pid, lwpid, O_RDONLY, "lwpstatus")) < 0)
 283                 return (-ESRCH);
 284 
 285         if (read(fd, lsp, sizeof (lwpstatus_t)) != sizeof (lwpstatus_t)) {
 286                 (void) close(fd);
 287                 return (-EIO);
 288         }
 289 
 290         (void) close(fd);
 291 
 292         return (0);
 293 }
 294 
 295 static uintptr_t
 296 syscall_regs(int fd, uintptr_t fp, pid_t pid)
 297 {
 298         uintptr_t addr, done;
 299         struct frame fr;
 300         auxv_t auxv;
 301         int afd;
 302         Elf32_Phdr phdr;
 303 
 304         /*
 305          * Try to walk the stack looking for a return address that corresponds
 306          * to the traced process's lx_emulate_done symbol. This relies on the
 307          * fact that the brand library in the traced process is the same as the
 308          * brand library in this process (indeed, this is true of all processes
 309          * in a given branded zone).
 310          */
 311 
 312         /*
 313          * Find the base address for the brand library in the traced process
 314          * by grabbing the AT_PHDR auxv entry, reading in the program header
 315          * at that location and subtracting off the p_vaddr member. We use
 316          * this to compute the location of lx_emulate done in the traced
 317          * process.
 318          */
 319         if ((afd = open_procfile(pid, O_RDONLY, "auxv")) < 0)
 320                 return (0);
 321 
 322         do {
 323                 if (read(afd, &auxv, sizeof (auxv)) != sizeof (auxv)) {
 324                         (void) close(afd);
 325                         return (0);
 326                 }
 327         } while (auxv.a_type != AT_PHDR);
 328 
 329         (void) close(afd);
 330 
 331         if (pread(fd, &phdr, sizeof (phdr), auxv.a_un.a_val) != sizeof (phdr)) {
 332                 lx_debug("failed to read brand library's phdr");
 333                 return (0);
 334         }
 335 
 336         addr = auxv.a_un.a_val - phdr.p_vaddr;
 337         done = (uintptr_t)&lx_emulate_done - (uintptr_t)&_START_ + addr;
 338 
 339         fr.fr_savfp = fp;
 340 
 341         do {
 342                 addr = fr.fr_savfp;
 343                 if (pread(fd, &fr, sizeof (fr), addr) != sizeof (fr)) {
 344                         lx_debug("ptrace read failed for stack walk");
 345                         return (0);
 346                 }
 347 
 348                 if (addr >= fr.fr_savfp) {
 349                         lx_debug("ptrace stack not monotonically increasing "
 350                             "%p %p (%p)", addr, fr.fr_savfp, done);
 351                         return (0);
 352                 }
 353         } while (fr.fr_savpc != done);
 354 
 355         /*
 356          * The first argument to lx_emulate is known to be an lx_regs_t
 357          * structure and the ABI specifies that it will be placed on the stack
 358          * immediately preceeding the return address.
 359          */
 360         addr += sizeof (fr);
 361         if (pread(fd, &addr, sizeof (addr), addr) != sizeof (addr)) {
 362                 lx_debug("ptrace stack failed to read register set address");
 363                 return (0);
 364         }
 365 
 366         return (addr);
 367 }
 368 
 369 static int
 370 getregs(pid_t pid, lwpid_t lwpid, lx_user_regs_t *rp)
 371 {
 372         lwpstatus_t status;
 373         uintptr_t addr;
 374         int fd, ret;
 375 
 376         if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
 377                 return (ret);
 378 
 379         if ((fd = open_procfile(pid, O_RDONLY, "as")) < 0)
 380                 return (-ESRCH);
 381 
 382         /*
 383          * If we find the syscall regs (and are therefore in an emulated
 384          * syscall, use the register set at given address. Otherwise, use the
 385          * registers as reported by /proc.
 386          */
 387         if ((addr = syscall_regs(fd, status.pr_reg[EBP], pid)) != 0) {
 388                 lx_regs_t regs;
 389 
 390                 if (pread(fd, &regs, sizeof (regs), addr) != sizeof (regs)) {
 391                         (void) close(fd);
 392                         lx_debug("ptrace failed to read register set");
 393                         return (-EIO);
 394                 }
 395 
 396                 (void) close(fd);
 397 
 398                 rp->lxur_ebx = regs.lxr_ebx;
 399                 rp->lxur_ecx = regs.lxr_ecx;
 400                 rp->lxur_edx = regs.lxr_edx;
 401                 rp->lxur_esi = regs.lxr_esi;
 402                 rp->lxur_edi = regs.lxr_edi;
 403                 rp->lxur_ebp = regs.lxr_ebp;
 404                 rp->lxur_eax = regs.lxr_eax;
 405                 rp->lxur_xds = status.pr_reg[DS];
 406                 rp->lxur_xes = status.pr_reg[ES];
 407                 rp->lxur_xfs = status.pr_reg[FS];
 408                 rp->lxur_xgs = regs.lxr_gs;
 409                 rp->lxur_orig_eax = regs.lxr_orig_eax;
 410                 rp->lxur_eip = regs.lxr_eip;
 411                 rp->lxur_xcs = status.pr_reg[CS];
 412                 rp->lxur_eflags = status.pr_reg[EFL];
 413                 rp->lxur_esp = regs.lxr_esp;
 414                 rp->lxur_xss = status.pr_reg[SS];
 415 
 416         } else {
 417                 (void) close(fd);
 418 
 419                 rp->lxur_ebx = status.pr_reg[EBX];
 420                 rp->lxur_ecx = status.pr_reg[ECX];
 421                 rp->lxur_edx = status.pr_reg[EDX];
 422                 rp->lxur_esi = status.pr_reg[ESI];
 423                 rp->lxur_edi = status.pr_reg[EDI];
 424                 rp->lxur_ebp = status.pr_reg[EBP];
 425                 rp->lxur_eax = status.pr_reg[EAX];
 426                 rp->lxur_xds = status.pr_reg[DS];
 427                 rp->lxur_xes = status.pr_reg[ES];
 428                 rp->lxur_xfs = status.pr_reg[FS];
 429                 rp->lxur_xgs = status.pr_reg[GS];
 430                 rp->lxur_orig_eax = 0;
 431                 rp->lxur_eip = status.pr_reg[EIP];
 432                 rp->lxur_xcs = status.pr_reg[CS];
 433                 rp->lxur_eflags = status.pr_reg[EFL];
 434                 rp->lxur_esp = status.pr_reg[UESP];
 435                 rp->lxur_xss = status.pr_reg[SS];
 436 
 437                 /*
 438                  * If the target process has just returned from exec, it's not
 439                  * going to be sitting in the emulation function. In that case
 440                  * we need to manually fake up the values for %eax and orig_eax
 441                  * to indicate a successful return and that the traced process
 442                  * had called execve (respectively).
 443                  */
 444                 if (status.pr_why == PR_SYSEXIT &&
 445                     status.pr_what == SYS_execve) {
 446                         rp->lxur_eax = 0;
 447                         rp->lxur_orig_eax = LX_SYS_execve;
 448                 }
 449         }
 450 
 451         return (0);
 452 }
 453 
 454 static int
 455 setregs(pid_t pid, lwpid_t lwpid, const lx_user_regs_t *rp)
 456 {
 457         long ctl[1 + sizeof (prgregset_t) / sizeof (long)];
 458         lwpstatus_t status;
 459         uintptr_t addr;
 460         int fd, ret;
 461 
 462         if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
 463                 return (ret);
 464 
 465         if ((fd = open_procfile(pid, O_RDWR, "as")) < 0)
 466                 return (-ESRCH);
 467 
 468         /*
 469          * If we find the syscall regs (and are therefore in an emulated
 470          * syscall, modify the register set at given address and set the
 471          * remaining registers through the /proc interface. Otherwise just use
 472          * the /proc interface to set register values;
 473          */
 474         if ((addr = syscall_regs(fd, status.pr_reg[EBP], pid)) != 0) {
 475                 lx_regs_t regs;
 476 
 477                 regs.lxr_ebx = rp->lxur_ebx;
 478                 regs.lxr_ecx = rp->lxur_ecx;
 479                 regs.lxr_edx = rp->lxur_edx;
 480                 regs.lxr_esi = rp->lxur_esi;
 481                 regs.lxr_edi = rp->lxur_edi;
 482                 regs.lxr_ebp = rp->lxur_ebp;
 483                 regs.lxr_eax = rp->lxur_eax;
 484                 regs.lxr_gs = rp->lxur_xgs;
 485                 regs.lxr_orig_eax = rp->lxur_orig_eax;
 486                 regs.lxr_eip = rp->lxur_eip;
 487                 regs.lxr_esp = rp->lxur_esp;
 488 
 489                 if (pwrite(fd, &regs, sizeof (regs), addr) != sizeof (regs)) {
 490                         (void) close(fd);
 491                         lx_debug("ptrace failed to write register set");
 492                         return (-EIO);
 493                 }
 494 
 495                 (void) close(fd);
 496 
 497                 status.pr_reg[DS] = rp->lxur_xds;
 498                 status.pr_reg[ES] = rp->lxur_xes;
 499                 status.pr_reg[FS] = rp->lxur_xfs;
 500                 status.pr_reg[CS] = rp->lxur_xcs;
 501                 status.pr_reg[EFL] = rp->lxur_eflags;
 502                 status.pr_reg[SS] = rp->lxur_xss;
 503 
 504         } else {
 505                 (void) close(fd);
 506 
 507                 status.pr_reg[EBX] = rp->lxur_ebx;
 508                 status.pr_reg[ECX] = rp->lxur_ecx;
 509                 status.pr_reg[EDX] = rp->lxur_edx;
 510                 status.pr_reg[ESI] = rp->lxur_esi;
 511                 status.pr_reg[EDI] = rp->lxur_edi;
 512                 status.pr_reg[EBP] = rp->lxur_ebp;
 513                 status.pr_reg[EAX] = rp->lxur_eax;
 514                 status.pr_reg[DS] = rp->lxur_xds;
 515                 status.pr_reg[ES] = rp->lxur_xes;
 516                 status.pr_reg[FS] = rp->lxur_xfs;
 517                 status.pr_reg[GS] = rp->lxur_xgs;
 518                 status.pr_reg[EIP] = rp->lxur_eip;
 519                 status.pr_reg[CS] = rp->lxur_xcs;
 520                 status.pr_reg[EFL] = rp->lxur_eflags;
 521                 status.pr_reg[UESP] = rp->lxur_esp;
 522                 status.pr_reg[SS] = rp->lxur_xss;
 523                 status.pr_reg[SS] = rp->lxur_xss;
 524         }
 525 
 526         if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
 527                 return (-ESRCH);
 528 
 529         ctl[0] = PCSREG;
 530         bcopy(status.pr_reg, &ctl[1], sizeof (prgregset_t));
 531 
 532         if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
 533                 (void) close(fd);
 534                 return (-EIO);
 535         }
 536 
 537         (void) close(fd);
 538 
 539         return (0);
 540 }
 541 
 542 static int
 543 getfpregs(pid_t pid, lwpid_t lwpid, lx_user_fpregs_t *rp)
 544 {
 545         lwpstatus_t status;
 546         struct _fpstate *fp;
 547         char *data;
 548         int ret, i;
 549 
 550         if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
 551                 return (ret);
 552 
 553         fp = (struct _fpstate *)&status.pr_fpreg.fp_reg_set.fpchip_state;
 554 
 555         rp->lxuf_cwd = fp->cw;
 556         rp->lxuf_swd = fp->sw;
 557         rp->lxuf_twd = fp->tag;
 558         rp->lxuf_fip = fp->ipoff;
 559         rp->lxuf_fcs = fp->cssel;
 560         rp->lxuf_foo = fp->dataoff;
 561         rp->lxuf_fos = fp->datasel;
 562 
 563         /*
 564          * The Linux structure uses 10 bytes per floating-point register.
 565          */
 566         data = (char *)&rp->lxuf_st_space[0];
 567         for (i = 0; i < 8; i++) {
 568                 bcopy(&fp->_st[i], data, 10);
 569                 data += 10;
 570         }
 571 
 572         return (0);
 573 }
 574 
 575 static int
 576 setfpregs(pid_t pid, lwpid_t lwpid, const lx_user_fpregs_t *rp)
 577 {
 578         lwpstatus_t status;
 579         struct {
 580                 long cmd;
 581                 prfpregset_t regs;
 582         } ctl;
 583         struct _fpstate *fp = (struct _fpstate *)&ctl.regs;
 584         char *data;
 585         int ret, i, fd;
 586 
 587         if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
 588                 return (ret);
 589 
 590         bcopy(&status.pr_fpreg, &ctl.regs, sizeof (ctl.regs));
 591 
 592         fp->cw = rp->lxuf_cwd;
 593         fp->sw = rp->lxuf_swd;
 594         fp->tag = rp->lxuf_twd;
 595         fp->ipoff = rp->lxuf_fip;
 596         fp->cssel = rp->lxuf_fcs;
 597         fp->dataoff = rp->lxuf_foo;
 598         fp->datasel = rp->lxuf_fos;
 599 
 600         /*
 601          * The Linux structure uses 10 bytes per floating-point register.
 602          */
 603         data = (char *)&rp->lxuf_st_space[0];
 604         for (i = 0; i < 8; i++) {
 605                 bcopy(data, &fp->_st[i], 10);
 606                 data += 10;
 607         }
 608 
 609         if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
 610                 return (-ESRCH);
 611 
 612         ctl.cmd = PCSFPREG;
 613         if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
 614                 (void) close(fd);
 615                 return (-EIO);
 616         }
 617 
 618         (void) close(fd);
 619 
 620         return (0);
 621 }
 622 
 623 
 624 static int
 625 getfpxregs(pid_t pid, lwpid_t lwpid, lx_user_fpxregs_t *rp)
 626 {
 627         lwpstatus_t status;
 628         struct _fpstate *fp;
 629         int ret, i;
 630 
 631         if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
 632                 return (ret);
 633 
 634         fp = (struct _fpstate *)&status.pr_fpreg.fp_reg_set.fpchip_state;
 635 
 636         rp->lxux_cwd = (uint16_t)fp->cw;
 637         rp->lxux_swd = (uint16_t)fp->sw;
 638         rp->lxux_twd = (uint16_t)fp->tag;
 639         rp->lxux_fop = (uint16_t)(fp->cssel >> 16);
 640         rp->lxux_fip = fp->ipoff;
 641         rp->lxux_fcs = (uint16_t)fp->cssel;
 642         rp->lxux_foo = fp->dataoff;
 643         rp->lxux_fos = fp->datasel;
 644         rp->lxux_mxcsr = status.pr_fpreg.fp_reg_set.fpchip_state.mxcsr;
 645 
 646         bcopy(fp->xmm, rp->lxux_xmm_space, sizeof (rp->lxux_xmm_space));
 647         bzero(rp->lxux_st_space, sizeof (rp->lxux_st_space));
 648         for (i = 0; i < 8; i++) {
 649                 bcopy(&fp->_st[i], &rp->lxux_st_space[i * 4],
 650                     sizeof (fp->_st[i]));
 651         }
 652 
 653         return (0);
 654 }
 655 
 656 static int
 657 setfpxregs(pid_t pid, lwpid_t lwpid, const lx_user_fpxregs_t *rp)
 658 {
 659         lwpstatus_t status;
 660         struct {
 661                 long cmd;
 662                 prfpregset_t regs;
 663         } ctl;
 664         struct _fpstate *fp = (struct _fpstate *)&ctl.regs;
 665         int ret, i, fd;
 666 
 667         if ((ret = get_lwpstatus(pid, lwpid, &status)) != 0)
 668                 return (ret);
 669 
 670         bcopy(&status.pr_fpreg, &ctl.regs, sizeof (ctl.regs));
 671 
 672         fp->cw = rp->lxux_cwd;
 673         fp->sw = rp->lxux_swd;
 674         fp->tag = rp->lxux_twd;
 675         fp->ipoff = rp->lxux_fip;
 676         fp->cssel = rp->lxux_fcs | (rp->lxux_fop << 16);
 677         fp->dataoff = rp->lxux_foo;
 678         fp->datasel = rp->lxux_fos;
 679 
 680         bcopy(rp->lxux_xmm_space, fp->xmm, sizeof (rp->lxux_xmm_space));
 681         for (i = 0; i < 8; i++) {
 682                 bcopy(&rp->lxux_st_space[i * 4], &fp->_st[i],
 683                     sizeof (fp->_st[i]));
 684         }
 685 
 686         if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
 687                 return (-ESRCH);
 688 
 689         ctl.cmd = PCSFPREG;
 690         if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
 691                 (void) close(fd);
 692                 return (-EIO);
 693         }
 694 
 695         (void) close(fd);
 696 
 697         return (0);
 698 }
 699 
 700 /*
 701  * Solaris does not allow a process to manipulate its own or some
 702  * other process's debug registers.  Linux ptrace(2) allows this
 703  * and gdb manipulates them for its watchpoint implementation.
 704  *
 705  * We keep a pseudo set of debug registers for each traced process
 706  * and map their contents into the appropriate PCWATCH /proc
 707  * operations when they are activated by gdb.
 708  *
 709  * To understand how the debug registers work on x86 machines,
 710  * see section 13.1 of the AMD x86-64 Architecture Programmer's
 711  * Manual, Volume 2, System Programming.
 712  */
 713 static uintptr_t *
 714 debug_registers(pid_t pid)
 715 {
 716         ptrace_state_map_t *p;
 717 
 718         (void) mutex_lock(&ptrace_map_mtx);
 719         for (p = ptrace_state_map; p != NULL; p = p->psm_next) {
 720                 if (p->psm_pid == pid)
 721                         break;
 722         }
 723         if (p == NULL && (p = malloc(sizeof (*p))) != NULL) {
 724                 bzero(p, sizeof (*p));
 725                 p->psm_pid = pid;
 726                 p->psm_next = ptrace_state_map;
 727                 p->psm_debugreg[6] = 0xffff0ff0;     /* read as ones */
 728                 ptrace_state_map = p;
 729         }
 730         (void) mutex_unlock(&ptrace_map_mtx);
 731         return (p != NULL? p->psm_debugreg : NULL);
 732 }
 733 
 734 static void
 735 free_debug_registers(pid_t pid)
 736 {
 737         ptrace_state_map_t **pp;
 738         ptrace_state_map_t *p;
 739 
 740         /* ASSERT(MUTEX_HELD(&ptrace_map_mtx) */
 741         for (pp = &ptrace_state_map; (p = *pp) != NULL; pp = &p->psm_next) {
 742                 if (p->psm_pid == pid) {
 743                         *pp = p->psm_next;
 744                         free(p);
 745                         break;
 746                 }
 747         }
 748 }
 749 
 750 static int
 751 setup_watchpoints(pid_t pid, uintptr_t *debugreg)
 752 {
 753         int dr7 = debugreg[7];
 754         int lrw;
 755         int fd;
 756         size_t size = NULL;
 757         prwatch_t prwatch[4];
 758         int nwatch;
 759         int i;
 760         int wflags = NULL;
 761         int error;
 762         struct {
 763                 long req;
 764                 prwatch_t prwatch;
 765         } ctl;
 766 
 767         /* find all watched areas */
 768         if ((fd = open_procfile(pid, O_RDONLY, "watch")) < 0)
 769                 return (-ESRCH);
 770         nwatch = read(fd, prwatch, sizeof (prwatch)) / sizeof (prwatch_t);
 771         (void) close(fd);
 772         if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0)
 773                 return (-ESRCH);
 774         /* clear all watched areas */
 775         for (i = 0; i < nwatch; i++) {
 776                 ctl.req = PCWATCH;
 777                 ctl.prwatch = prwatch[i];
 778                 ctl.prwatch.pr_wflags = 0;
 779                 if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
 780                         error = -errno;
 781                         (void) close(fd);
 782                         return (error);
 783                 }
 784         }
 785         /* establish all new watched areas */
 786         for (i = 0; i < 4; i++) {
 787                 if ((dr7 & (1 << (2 * i))) == 0)      /* enabled? */
 788                         continue;
 789                 lrw = (dr7 >> (16 + (4 * i))) & 0xf;
 790                 switch (lrw >> 2) {       /* length */
 791                 case 0: size = 1; break;
 792                 case 1: size = 2; break;
 793                 case 2: size = 8; break;
 794                 case 3: size = 4; break;
 795                 }
 796                 switch (lrw & 0x3) {        /* mode */
 797                 case 0: wflags = WA_EXEC; break;
 798                 case 1: wflags = WA_WRITE; break;
 799                 case 2: continue;
 800                 case 3: wflags = WA_READ | WA_WRITE; break;
 801                 }
 802                 ctl.req = PCWATCH;
 803                 ctl.prwatch.pr_vaddr = debugreg[i];
 804                 ctl.prwatch.pr_size = size;
 805                 ctl.prwatch.pr_wflags = wflags | WA_TRAPAFTER;
 806                 if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
 807                         error = -errno;
 808                         (void) close(fd);
 809                         return (error);
 810                 }
 811         }
 812         (void) close(fd);
 813         return (0);
 814 }
 815 
 816 /*
 817  * Returns TRUE if the process is traced, FALSE otherwise.  This is only true
 818  * if the process is currently stopped, and has been traced using PTRACE_TRACEME
 819  * or PTRACE_ATTACH.
 820  */
 821 static int
 822 is_traced(pid_t pid)
 823 {
 824         ptrace_monitor_map_t *p;
 825         pstatus_t status;
 826 
 827         if (get_status(pid, &status) != 0)
 828                 return (0);
 829 
 830         if ((status.pr_flags & PR_PTRACE) &&
 831             (status.pr_ppid == getpid()) &&
 832             (status.pr_lwp.pr_flags & PR_ISTOP))
 833                 return (1);
 834 
 835         (void) mutex_lock(&ptrace_map_mtx);
 836         for (p = ptrace_monitor_map; p != NULL; p = p->pmm_next) {
 837                 if (p->pmm_target == pid) {
 838                         (void) mutex_unlock(&ptrace_map_mtx);
 839                         return (1);
 840                 }
 841         }
 842         (void) mutex_unlock(&ptrace_map_mtx);
 843 
 844         return (0);
 845 }
 846 
 847 static int
 848 ptrace_trace_common(int fd)
 849 {
 850         struct {
 851                 long cmd;
 852                 union {
 853                         long flags;
 854                         sigset_t signals;
 855                         fltset_t faults;
 856                 } arg;
 857         } ctl;
 858         size_t size;
 859 
 860         ctl.cmd = PCSTRACE;
 861         prfillset(&ctl.arg.signals);
 862         size = sizeof (long) + sizeof (sigset_t);
 863         if (write(fd, &ctl, size) != size)
 864                 return (-1);
 865 
 866         ctl.cmd = PCSFAULT;
 867         premptyset(&ctl.arg.faults);
 868         size = sizeof (long) + sizeof (fltset_t);
 869         if (write(fd, &ctl, size) != size)
 870                 return (-1);
 871 
 872         ctl.cmd = PCUNSET;
 873         ctl.arg.flags = PR_FORK;
 874         size = sizeof (long) + sizeof (long);
 875         if (write(fd, &ctl, size) != size)
 876                 return (-1);
 877 
 878         return (0);
 879 }
 880 
 881 /*
 882  * Notify that parent that we wish to be traced.  This is the equivalent of:
 883  *
 884  *      1. Stop on all signals, and nothing else
 885  *      2. Turn off inherit-on-fork flag
 886  *      3. Set ptrace compatible flag
 887  *
 888  * If we are not the main thread, then the client is trying to request behavior
 889  * by which one of its own thread is to be traced.  We don't support this mode
 890  * of operation.
 891  */
 892 static int
 893 ptrace_traceme(void)
 894 {
 895         int fd, ret;
 896         int error;
 897         long ctl[2];
 898         pstatus_t status;
 899         pid_t pid = getpid();
 900 
 901         if (_lwp_self() != 1) {
 902                 lx_unsupported(gettext(
 903                     "thread %d calling PTRACE_TRACEME is unsupported"),
 904                     _lwp_self());
 905                 return (-ENOTSUP);
 906         }
 907 
 908         if ((ret = get_status(pid, &status)) != 0)
 909                 return (ret);
 910 
 911         /*
 912          * Why would a process try to do this twice? I'm not sure, but there's
 913          * a conformance test which wants this to fail just so.
 914          */
 915         if (status.pr_flags & PR_PTRACE)
 916                 return (-EPERM);
 917 
 918         if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0)
 919                 return (-errno);
 920 
 921         ctl[0] = PCSET;
 922         ctl[1] = PR_PTRACE;
 923         error = 0;
 924         if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl) ||
 925             ptrace_trace_common(fd) != 0)
 926                 error = -errno;
 927 
 928         (void) close(fd);
 929         return (error);
 930 }
 931 
 932 /*
 933  * Read a word of data from the given address.  Because this is a process-wide
 934  * action, we don't need the lwpid.
 935  */
 936 static int
 937 ptrace_peek(pid_t pid, uintptr_t addr, int *ret)
 938 {
 939         int fd, data;
 940 
 941         if (!is_traced(pid))
 942                 return (-ESRCH);
 943 
 944         if ((fd = open_procfile(pid, O_RDONLY, "as")) < 0)
 945                 return (-ESRCH);
 946 
 947         if (pread(fd, &data, sizeof (data), addr) != sizeof (data)) {
 948                 (void) close(fd);
 949                 return (-EIO);
 950         }
 951 
 952         (void) close(fd);
 953 
 954         if (uucopy(&data, ret, sizeof (data)) != 0)
 955                 return (-errno);
 956 
 957         return (0);
 958 }
 959 
 960 #define LX_USER_BOUND(m)        \
 961 (offsetof(lx_user_t, m) + sizeof (((lx_user_t *)NULL)->m))
 962 
 963 static int
 964 ptrace_peek_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int *ret)
 965 {
 966         int err, data;
 967         uintptr_t *debugreg;
 968         int dreg;
 969 
 970         if (!is_traced(pid))
 971                 return (-ESRCH);
 972 
 973         /*
 974          * The offset specified by the user is an offset into the Linux
 975          * user structure (seriously). Rather than constructing a full
 976          * user structure, we figure out which part of the user structure
 977          * the offset is in, and fill in just that component.
 978          */
 979         if (off < LX_USER_BOUND(lxu_regs)) {
 980                 lx_user_regs_t regs;
 981 
 982                 if ((err = getregs(pid, lwpid, &regs)) != 0)
 983                         return (err);
 984 
 985                 data = *(int *)((uintptr_t)&regs + off -
 986                     offsetof(lx_user_t, lxu_regs));
 987 
 988         } else if (off < LX_USER_BOUND(lxu_fpvalid)) {
 989                 lx_err(gettext("offset = %lu\n"), off);
 990                 assert(0);
 991         } else if (off < LX_USER_BOUND(lxu_i387)) {
 992                 lx_user_fpregs_t regs;
 993 
 994                 if ((err = getfpregs(pid, lwpid, &regs)) != 0)
 995                         return (err);
 996 
 997                 data = *(int *)((uintptr_t)&regs + off -
 998                     offsetof(lx_user_t, lxu_i387));
 999 
1000         } else if (off < LX_USER_BOUND(lxu_tsize)) {
1001                 lx_err(gettext("offset = %lu\n"), off);
1002                 assert(0);
1003         } else if (off < LX_USER_BOUND(lxu_dsize)) {
1004                 lx_err(gettext("offset = %lu\n"), off);
1005                 assert(0);
1006         } else if (off < LX_USER_BOUND(lxu_ssize)) {
1007                 lx_err(gettext("offset = %lu\n"), off);
1008                 assert(0);
1009         } else if (off < LX_USER_BOUND(lxu_start_code)) {
1010                 lx_err(gettext("offset = %lu\n"), off);
1011                 assert(0);
1012         } else if (off < LX_USER_BOUND(lxu_start_stack)) {
1013                 lx_err(gettext("offset = %lu\n"), off);
1014                 assert(0);
1015         } else if (off < LX_USER_BOUND(lxu_signal)) {
1016                 lx_err(gettext("offset = %lu\n"), off);
1017                 assert(0);
1018         } else if (off < LX_USER_BOUND(lxu_reserved)) {
1019                 lx_err(gettext("offset = %lu\n"), off);
1020                 assert(0);
1021         } else if (off < LX_USER_BOUND(lxu_ar0)) {
1022                 lx_err(gettext("offset = %lu\n"), off);
1023                 assert(0);
1024         } else if (off < LX_USER_BOUND(lxu_fpstate)) {
1025                 lx_err(gettext("offset = %lu\n"), off);
1026                 assert(0);
1027         } else if (off < LX_USER_BOUND(lxu_magic)) {
1028                 lx_err(gettext("offset = %lu\n"), off);
1029                 assert(0);
1030         } else if (off < LX_USER_BOUND(lxu_comm)) {
1031                 lx_err(gettext("offset = %lu\n"), off);
1032                 assert(0);
1033         } else if (off < LX_USER_BOUND(lxu_debugreg)) {
1034                 dreg = (off - offsetof(lx_user_t, lxu_debugreg)) / sizeof (int);
1035                 if (dreg == 4)          /* aliased */
1036                         dreg = 6;
1037                 else if (dreg == 5)     /* aliased */
1038                         dreg = 7;
1039                 if ((debugreg = debug_registers(pid)) != NULL)
1040                         data = debugreg[dreg];
1041                 else
1042                         data = 0;
1043         } else {
1044                 lx_unsupported(gettext(
1045                     "unsupported ptrace %s user offset: 0x%x\n"), "peek", off);
1046                 assert(0);
1047                 return (-ENOTSUP);
1048         }
1049 
1050         if (uucopy(&data, ret, sizeof (data)) != 0)
1051                 return (-errno);
1052 
1053         return (0);
1054 }
1055 
1056 /*
1057  * Write a word of data to the given address.  Because this is a process-wide
1058  * action, we don't need the lwpid.  Returns EINVAL if the address is not
1059  * word-aligned.
1060  */
1061 static int
1062 ptrace_poke(pid_t pid, uintptr_t addr, int data)
1063 {
1064         int fd;
1065 
1066         if (!is_traced(pid))
1067                 return (-ESRCH);
1068 
1069         if (addr & 0x3)
1070                 return (-EINVAL);
1071 
1072         if ((fd = open_procfile(pid, O_WRONLY, "as")) < 0)
1073                 return (-ESRCH);
1074 
1075         if (pwrite(fd, &data, sizeof (data), addr) != sizeof (data)) {
1076                 (void) close(fd);
1077                 return (-EIO);
1078         }
1079 
1080         (void) close(fd);
1081         return (0);
1082 }
1083 
1084 static int
1085 ptrace_poke_user(pid_t pid, lwpid_t lwpid, uintptr_t off, int data)
1086 {
1087         lx_user_regs_t regs;
1088         int err = 0;
1089         uintptr_t *debugreg;
1090         int dreg;
1091 
1092         if (!is_traced(pid))
1093                 return (-ESRCH);
1094 
1095         if (off & 0x3)
1096                 return (-EINVAL);
1097 
1098         if (off < offsetof(lx_user_t, lxu_regs) + sizeof (lx_user_regs_t)) {
1099                 if ((err = getregs(pid, lwpid, &regs)) != 0)
1100                         return (err);
1101                 *(int *)((uintptr_t)&regs + off -
1102                     offsetof(lx_user_t, lxu_regs)) = data;
1103                 return (setregs(pid, lwpid, &regs));
1104         }
1105 
1106         if (off >= offsetof(lx_user_t, lxu_debugreg) &&
1107             off < offsetof(lx_user_t, lxu_debugreg) + 8 * sizeof (int)) {
1108                 dreg = (off - offsetof(lx_user_t, lxu_debugreg)) / sizeof (int);
1109                 if (dreg == 4)          /* aliased */
1110                         dreg = 6;
1111                 else if (dreg == 5)     /* aliased */
1112                         dreg = 7;
1113                 if ((debugreg = debug_registers(pid)) != NULL) {
1114                         debugreg[dreg] = data;
1115                         if (dreg == 7)
1116                                 err = setup_watchpoints(pid, debugreg);
1117                 }
1118                 return (err);
1119         }
1120 
1121         lx_unsupported(gettext("unsupported ptrace %s user offset: 0x%x\n"),
1122             "poke", off);
1123         assert(0);
1124         return (-ENOTSUP);
1125 }
1126 
1127 static int
1128 ptrace_cont_common(int fd, int sig, int run, int step)
1129 {
1130         long ctl[1 + 1 + sizeof (siginfo_t) / sizeof (long) + 2];
1131         long *ctlp = ctl;
1132         size_t size;
1133 
1134         assert(0 <= sig && sig < LX_NSIG);
1135         assert(!step || run);
1136 
1137         /*
1138          * Clear the current signal.
1139          */
1140         *ctlp++ = PCCSIG;
1141 
1142         /*
1143          * Send a signal if one was specified.
1144          */
1145         if (sig != 0 && sig != LX_SIGSTOP) {
1146                 siginfo_t *infop;
1147 
1148                 *ctlp++ = PCSSIG;
1149                 infop = (siginfo_t *)ctlp;
1150                 bzero(infop, sizeof (siginfo_t));
1151                 infop->si_signo = ltos_signo[sig];
1152 
1153                 ctlp += sizeof (siginfo_t) / sizeof (long);
1154         }
1155 
1156         /*
1157          * If run is true, set the lwp running.
1158          */
1159         if (run) {
1160                 *ctlp++ = PCRUN;
1161                 *ctlp++ = step ? PRSTEP : 0;
1162         }
1163 
1164         size = (char *)ctlp - (char *)&ctl[0];
1165         assert(size <= sizeof (ctl));
1166 
1167         if (write(fd, ctl, size) != size) {
1168                 lx_debug("failed to continue %s", strerror(errno));
1169                 return (-EIO);
1170         }
1171 
1172         return (0);
1173 }
1174 
1175 static int
1176 ptrace_cont_monitor(ptrace_monitor_map_t *p)
1177 {
1178         long ctl[2];
1179         int fd;
1180 
1181         fd = open_procfile(p->pmm_monitor, O_WRONLY, "ctl");
1182         if (fd < 0) {
1183                 lx_debug("failed to open monitor ctl %d",
1184                     errno);
1185                 return (-EIO);
1186         }
1187 
1188         ctl[0] = PCRUN;
1189         ctl[1] = PRCSIG;
1190         if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) {
1191                 (void) close(fd);
1192                 return (-EIO);
1193         }
1194 
1195         (void) close(fd);
1196 
1197         return (0);
1198 }
1199 
1200 static int
1201 ptrace_cont(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig, int step)
1202 {
1203         ptrace_monitor_map_t *p;
1204         uintptr_t *debugreg;
1205         int fd, ret;
1206 
1207         if (!is_traced(pid))
1208                 return (-ESRCH);
1209 
1210         if (sig < 0 || sig >= LX_NSIG)
1211                 return (-EINVAL);
1212 
1213         if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
1214                 return (-ESRCH);
1215 
1216         if ((ret = ptrace_cont_common(fd, sig, 1, step)) != 0) {
1217                 (void) close(fd);
1218                 return (ret);
1219         }
1220 
1221         (void) close(fd);
1222 
1223         /* kludge: use debugreg[4] to remember the single-step flag */
1224         if ((debugreg = debug_registers(pid)) != NULL)
1225                 debugreg[4] = step;
1226 
1227         /*
1228          * Check for a monitor and get it moving if we find it. If any of the
1229          * /proc operations fail, we're kind of sunk so just return an error.
1230          */
1231         (void) mutex_lock(&ptrace_map_mtx);
1232         for (p = ptrace_monitor_map; p != NULL; p = p->pmm_next) {
1233                 if (p->pmm_target == lxpid) {
1234                         if ((ret = ptrace_cont_monitor(p)) != 0)
1235                                 return (ret);
1236                         break;
1237                 }
1238         }
1239         (void) mutex_unlock(&ptrace_map_mtx);
1240 
1241         return (0);
1242 }
1243 
1244 /*
1245  * If a monitor exists for this traced process, dispose of it.
1246  * First turn off its ptrace flag so we won't be notified of its
1247  * impending demise.  We ignore errors for this step since they
1248  * indicate only that the monitor has been damaged due to pilot
1249  * error.  Then kill the monitor, and wait for it.  If the wait
1250  * succeeds we can dispose of the corpse, otherwise another thread's
1251  * wait call has collected it and we need to set a flag in the
1252  * structure so that if can be picked up in wait.
1253  */
1254 static void
1255 monitor_kill(pid_t lxpid, pid_t pid)
1256 {
1257         ptrace_monitor_map_t *p, **pp;
1258         pid_t mpid;
1259         int fd;
1260         long ctl[2];
1261 
1262         (void) mutex_lock(&ptrace_map_mtx);
1263         free_debug_registers(pid);
1264         for (pp = &ptrace_monitor_map; (p = *pp) != NULL; pp = &p->pmm_next) {
1265                 if (p->pmm_target == lxpid) {
1266                         mpid = p->pmm_monitor;
1267                         if ((fd = open_procfile(mpid, O_WRONLY, "ctl")) >= 0) {
1268                                 ctl[0] = PCUNSET;
1269                                 ctl[1] = PR_PTRACE;
1270                                 (void) write(fd, ctl, sizeof (ctl));
1271                                 (void) close(fd);
1272                         }
1273 
1274                         (void) kill(mpid, SIGKILL);
1275 
1276                         if (waitpid(mpid, NULL, 0) == mpid) {
1277                                 *pp = p->pmm_next;
1278                                 free(p);
1279                         } else {
1280                                 p->pmm_exiting = 1;
1281                         }
1282 
1283                         break;
1284                 }
1285         }
1286         (void) mutex_unlock(&ptrace_map_mtx);
1287 }
1288 
1289 static int
1290 ptrace_kill(pid_t lxpid, pid_t pid)
1291 {
1292         int ret;
1293 
1294         if (!is_traced(pid))
1295                 return (-ESRCH);
1296 
1297         ret = kill(pid, SIGKILL);
1298 
1299         /* kill off the monitor process, if any */
1300         monitor_kill(lxpid, pid);
1301 
1302         return (ret);
1303 }
1304 
1305 static int
1306 ptrace_step(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig)
1307 {
1308         return (ptrace_cont(lxpid, pid, lwpid, sig, 1));
1309 }
1310 
1311 static int
1312 ptrace_getregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
1313 {
1314         lx_user_regs_t regs;
1315         int ret;
1316 
1317         if (!is_traced(pid))
1318                 return (-ESRCH);
1319 
1320         if ((ret = getregs(pid, lwpid, &regs)) != 0)
1321                 return (ret);
1322 
1323         if (uucopy(&regs, (void *)addr, sizeof (regs)) != 0)
1324                 return (-errno);
1325 
1326         return (0);
1327 }
1328 
1329 static int
1330 ptrace_setregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
1331 {
1332         lx_user_regs_t regs;
1333 
1334         if (!is_traced(pid))
1335                 return (-ESRCH);
1336 
1337         if (uucopy((void *)addr, &regs, sizeof (regs)) != 0)
1338                 return (-errno);
1339 
1340         return (setregs(pid, lwpid, &regs));
1341 }
1342 
1343 static int
1344 ptrace_getfpregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
1345 {
1346         lx_user_fpregs_t regs;
1347         int ret;
1348 
1349         if (!is_traced(pid))
1350                 return (-ESRCH);
1351 
1352         if ((ret = getfpregs(pid, lwpid, &regs)) != 0)
1353                 return (ret);
1354 
1355         if (uucopy(&regs, (void *)addr, sizeof (regs)) != 0)
1356                 return (-errno);
1357 
1358         return (0);
1359 }
1360 
1361 static int
1362 ptrace_setfpregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
1363 {
1364         lx_user_fpregs_t regs;
1365 
1366         if (!is_traced(pid))
1367                 return (-ESRCH);
1368 
1369         if (uucopy((void *)addr, &regs, sizeof (regs)) != 0)
1370                 return (-errno);
1371 
1372         return (setfpregs(pid, lwpid, &regs));
1373 }
1374 
1375 static int
1376 ptrace_getfpxregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
1377 {
1378         lx_user_fpxregs_t regs;
1379         int ret;
1380 
1381         if (!is_traced(pid))
1382                 return (-ESRCH);
1383 
1384         if ((ret = getfpxregs(pid, lwpid, &regs)) != 0)
1385                 return (ret);
1386 
1387         if (uucopy(&regs, (void *)addr, sizeof (regs)) != 0)
1388                 return (-errno);
1389 
1390         return (0);
1391 }
1392 
1393 static int
1394 ptrace_setfpxregs(pid_t pid, lwpid_t lwpid, uintptr_t addr)
1395 {
1396         lx_user_fpxregs_t regs;
1397 
1398         if (!is_traced(pid))
1399                 return (-ESRCH);
1400 
1401         if (uucopy((void *)addr, &regs, sizeof (regs)) != 0)
1402                 return (-errno);
1403 
1404         return (setfpxregs(pid, lwpid, &regs));
1405 }
1406 
1407 static void __NORETURN
1408 ptrace_monitor(int fd)
1409 {
1410         struct {
1411                 long cmd;
1412                 union {
1413                         long flags;
1414                         sigset_t signals;
1415                         fltset_t faults;
1416                 } arg;
1417         } ctl;
1418         size_t size;
1419         int monfd;
1420         int rv;
1421 
1422         monfd = open_procfile(getpid(), O_WRONLY, "ctl");
1423 
1424         ctl.cmd = PCSTRACE;     /* trace only SIGTRAP */
1425         premptyset(&ctl.arg.signals);
1426         praddset(&ctl.arg.signals, SIGTRAP);
1427         size = sizeof (long) + sizeof (sigset_t);
1428         (void) write(monfd, &ctl, size);    /* can't fail */
1429 
1430         ctl.cmd = PCSFAULT;
1431         premptyset(&ctl.arg.faults);
1432         size = sizeof (long) + sizeof (fltset_t);
1433         (void) write(monfd, &ctl, size);    /* can't fail */
1434 
1435         ctl.cmd = PCUNSET;
1436         ctl.arg.flags = PR_FORK;
1437         size = sizeof (long) + sizeof (long);
1438         (void) write(monfd, &ctl, size);    /* can't fail */
1439 
1440         ctl.cmd = PCSET;        /* wait()able by the parent */
1441         ctl.arg.flags = PR_PTRACE;
1442         size = sizeof (long) + sizeof (long);
1443         (void) write(monfd, &ctl, size);    /* can't fail */
1444 
1445         (void) close(monfd);
1446 
1447         ctl.cmd = PCWSTOP;
1448         size = sizeof (long);
1449 
1450         for (;;) {
1451                 /*
1452                  * Wait for the traced process to stop.
1453                  */
1454                 if (write(fd, &ctl, size) != size) {
1455                         rv = (errno == ENOENT)? 0 : 1;
1456                         lx_debug("monitor failed to wait for LWP to stop: %s",
1457                             strerror(errno));
1458                         _exit(rv);
1459                 }
1460 
1461                 lx_debug("monitor caught traced LWP");
1462 
1463                 /*
1464                  * Pull the ptrace trigger by sending ourself a SIGTRAP. This
1465                  * will cause this, the monitor process, to stop which will
1466                  * cause the parent's waitid(2) call to return this process
1467                  * id. In lx_wait(), we remap the monitor process's pid and
1468                  * status to those of the traced LWP. When the parent process
1469                  * uses ptrace to resume the traced LWP, it will additionally
1470                  * restart this process.
1471                  */
1472                 (void) _lwp_kill(_lwp_self(), SIGTRAP);
1473 
1474                 lx_debug("monitor was resumed");
1475         }
1476 }
1477 
1478 static int
1479 ptrace_attach_common(int fd, pid_t lxpid, pid_t pid, lwpid_t lwpid, int run)
1480 {
1481         pid_t child;
1482         ptrace_monitor_map_t *p;
1483         sigset_t unblock;
1484         pstatus_t status;
1485         long ctl[1 + sizeof (sysset_t) / sizeof (long) + 2];
1486         long *ctlp = ctl;
1487         size_t size;
1488         sysset_t *sysp;
1489         int ret;
1490 
1491         /*
1492          * We're going to need this structure so better to fail now before its
1493          * too late to turn back.
1494          */
1495         if ((p = malloc(sizeof (ptrace_monitor_map_t))) == NULL)
1496                 return (-EIO);
1497 
1498         if ((ret = get_status(pid, &status)) != 0) {
1499                 free(p);
1500                 return (ret);
1501         }
1502 
1503         /*
1504          * If this process is already traced, bail.
1505          */
1506         if (status.pr_flags & PR_PTRACE) {
1507                 free(p);
1508                 return (-EPERM);
1509         }
1510 
1511         /*
1512          * Turn on the appropriate tracing flags. It's exceedingly unlikely
1513          * that this operation will fail; any failure would probably be due
1514          * to another /proc consumer mucking around.
1515          */
1516         if (ptrace_trace_common(fd) != 0) {
1517                 free(p);
1518                 return (-EIO);
1519         }
1520 
1521         /*
1522          * Native ptrace automatically catches processes when they exec so we
1523          * have to do that explicitly here.
1524          */
1525         *ctlp++ = PCSEXIT;
1526         sysp = (sysset_t *)ctlp;
1527         ctlp += sizeof (sysset_t) / sizeof (long);
1528         premptyset(sysp);
1529         praddset(sysp, SYS_execve);
1530         if (run) {
1531                 *ctlp++ = PCRUN;
1532                 *ctlp++ = 0;
1533         }
1534 
1535         size = (char *)ctlp - (char *)&ctl[0];
1536 
1537         if (write(fd, ctl, size) != size) {
1538                 free(p);
1539                 return (-EIO);
1540         }
1541 
1542         /*
1543          * Spawn the monitor proceses to notify this process of events of
1544          * interest in the traced process. We block signals here both so
1545          * we're not interrupted during this operation and so that the
1546          * monitor process doesn't accept signals.
1547          */
1548         (void) sigprocmask(SIG_BLOCK, &blockable_sigs, &unblock);
1549         if ((child = fork1()) == 0)
1550                 ptrace_monitor(fd);
1551         (void) sigprocmask(SIG_SETMASK, &unblock, NULL);
1552 
1553         if (child == -1) {
1554                 lx_debug("failed to fork monitor process\n");
1555                 free(p);
1556                 return (-EIO);
1557         }
1558 
1559         p->pmm_monitor = child;
1560         p->pmm_target = lxpid;
1561         p->pmm_pid = pid;
1562         p->pmm_lwpid = lwpid;
1563         p->pmm_exiting = 0;
1564 
1565         (void) mutex_lock(&ptrace_map_mtx);
1566         p->pmm_next = ptrace_monitor_map;
1567         ptrace_monitor_map = p;
1568         (void) mutex_unlock(&ptrace_map_mtx);
1569 
1570         return (0);
1571 }
1572 
1573 static int
1574 ptrace_attach(pid_t lxpid, pid_t pid, lwpid_t lwpid)
1575 {
1576         int fd, ret;
1577         long ctl;
1578 
1579         /*
1580          * Linux doesn't let you trace process 1 -- go figure.
1581          */
1582         if (lxpid == 1)
1583                 return (-EPERM);
1584 
1585         if ((fd = open_lwpfile(pid, lwpid, O_WRONLY | O_EXCL, "lwpctl")) < 0)
1586                 return (errno == EBUSY ? -EPERM : -ESRCH);
1587 
1588         ctl = PCSTOP;
1589         if (write(fd, &ctl, sizeof (ctl)) != sizeof (ctl)) {
1590                 lx_err(gettext("failed to stop %d/%d\n"), (int)pid, (int)lwpid);
1591                 assert(0);
1592         }
1593 
1594         ret = ptrace_attach_common(fd, lxpid, pid, lwpid, 0);
1595 
1596         (void) close(fd);
1597 
1598         return (ret);
1599 }
1600 
1601 static int
1602 ptrace_detach(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig)
1603 {
1604         long ctl[2];
1605         int fd, ret;
1606 
1607         if (!is_traced(pid))
1608                 return (-ESRCH);
1609 
1610         if (sig < 0 || sig >= LX_NSIG)
1611                 return (-EINVAL);
1612 
1613         if ((fd = open_lwpfile(pid, lwpid, O_WRONLY, "lwpctl")) < 0)
1614                 return (-ESRCH);
1615 
1616         /*
1617          * The /proc ptrace flag may not be set, but we clear it
1618          * unconditionally since doing so doesn't hurt anything.
1619          */
1620         ctl[0] = PCUNSET;
1621         ctl[1] = PR_PTRACE;
1622         if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) {
1623                 (void) close(fd);
1624                 return (-EIO);
1625         }
1626 
1627         /*
1628          * Clear the brand-specific system call tracing flag to ensure that
1629          * the target doesn't stop unexpectedly some time in the future.
1630          */
1631         if ((ret = syscall(SYS_brand, B_PTRACE_SYSCALL, pid, lwpid, 0)) != 0) {
1632                 (void) close(fd);
1633                 return (-ret);
1634         }
1635 
1636         /* kill off the monitor process, if any */
1637         monitor_kill(lxpid, pid);
1638 
1639         /*
1640          * Turn on the run-on-last-close flag so that all tracing flags will be
1641          * cleared when we close the control file descriptor.
1642          */
1643         ctl[0] = PCSET;
1644         ctl[1] = PR_RLC;
1645         if (write(fd, ctl, sizeof (ctl)) != sizeof (ctl)) {
1646                 (void) close(fd);
1647                 return (-EIO);
1648         }
1649 
1650         /*
1651          * Clear the current signal (if any) and possibly send the traced
1652          * process a new signal.
1653          */
1654         ret = ptrace_cont_common(fd, sig, 0, 0);
1655 
1656         (void) close(fd);
1657 
1658         return (ret);
1659 }
1660 
1661 static int
1662 ptrace_syscall(pid_t lxpid, pid_t pid, lwpid_t lwpid, int sig)
1663 {
1664         int ret;
1665 
1666         if (!is_traced(pid))
1667                 return (-ESRCH);
1668 
1669         if ((ret = syscall(SYS_brand, B_PTRACE_SYSCALL, pid, lwpid, 1)) != 0)
1670                 return (-ret);
1671 
1672         return (ptrace_cont(lxpid, pid, lwpid, sig, 0));
1673 }
1674 
1675 int
1676 lx_ptrace(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4)
1677 {
1678         pid_t pid, lxpid = (pid_t)p2;
1679         lwpid_t lwpid;
1680 
1681         if ((p1 != LX_PTRACE_TRACEME) &&
1682             (lx_lpid_to_spair(lxpid, &pid, &lwpid) < 0))
1683                 return (-ESRCH);
1684 
1685         switch (p1) {
1686         case LX_PTRACE_TRACEME:
1687                 return (ptrace_traceme());
1688 
1689         case LX_PTRACE_PEEKTEXT:
1690         case LX_PTRACE_PEEKDATA:
1691                 return (ptrace_peek(pid, p3, (int *)p4));
1692 
1693         case LX_PTRACE_PEEKUSER:
1694                 return (ptrace_peek_user(pid, lwpid, p3, (int *)p4));
1695 
1696         case LX_PTRACE_POKETEXT:
1697         case LX_PTRACE_POKEDATA:
1698                 return (ptrace_poke(pid, p3, (int)p4));
1699 
1700         case LX_PTRACE_POKEUSER:
1701                 return (ptrace_poke_user(pid, lwpid, p3, (int)p4));
1702 
1703         case LX_PTRACE_CONT:
1704                 return (ptrace_cont(lxpid, pid, lwpid, (int)p4, 0));
1705 
1706         case LX_PTRACE_KILL:
1707                 return (ptrace_kill(lxpid, pid));
1708 
1709         case LX_PTRACE_SINGLESTEP:
1710                 return (ptrace_step(lxpid, pid, lwpid, (int)p4));
1711 
1712         case LX_PTRACE_GETREGS:
1713                 return (ptrace_getregs(pid, lwpid, p4));
1714 
1715         case LX_PTRACE_SETREGS:
1716                 return (ptrace_setregs(pid, lwpid, p4));
1717 
1718         case LX_PTRACE_GETFPREGS:
1719                 return (ptrace_getfpregs(pid, lwpid, p4));
1720 
1721         case LX_PTRACE_SETFPREGS:
1722                 return (ptrace_setfpregs(pid, lwpid, p4));
1723 
1724         case LX_PTRACE_ATTACH:
1725                 return (ptrace_attach(lxpid, pid, lwpid));
1726 
1727         case LX_PTRACE_DETACH:
1728                 return (ptrace_detach(lxpid, pid, lwpid, (int)p4));
1729 
1730         case LX_PTRACE_GETFPXREGS:
1731                 return (ptrace_getfpxregs(pid, lwpid, p4));
1732 
1733         case LX_PTRACE_SETFPXREGS:
1734                 return (ptrace_setfpxregs(pid, lwpid, p4));
1735 
1736         case LX_PTRACE_SYSCALL:
1737                 return (ptrace_syscall(lxpid, pid, lwpid, (int)p4));
1738 
1739         default:
1740                 return (-EINVAL);
1741         }
1742 }
1743 
1744 void
1745 lx_ptrace_fork(void)
1746 {
1747         /*
1748          * Send a special signal (that has no Linux equivalent) to indicate
1749          * that we're in this particularly special case. The signal will be
1750          * ignored by this process, but noticed by /proc consumers tracing
1751          * this process.
1752          */
1753         (void) _lwp_kill(_lwp_self(), SIGWAITING);
1754 }
1755 
1756 static void
1757 ptrace_catch_fork(pid_t pid, int monitor)
1758 {
1759         long ctl[14 + 2 * sizeof (sysset_t) / sizeof (long)];
1760         long *ctlp;
1761         sysset_t *sysp;
1762         size_t size;
1763         pstatus_t ps;
1764         pid_t child;
1765         int fd, err;
1766 
1767         /*
1768          * If any of this fails, we're really sunk since the child
1769          * will be stuck in the middle of lx_ptrace_fork().
1770          * Fortunately it's practically assured to succeed unless
1771          * something is seriously wrong on the system.
1772          */
1773         if ((fd = open_procfile(pid, O_WRONLY, "ctl")) < 0) {
1774                 lx_debug("lx_catch_fork: failed to control %d",
1775                     (int)pid);
1776                 return;
1777         }
1778 
1779         /*
1780          * Turn off the /proc PR_PTRACE flag so the parent doesn't get
1781          * spurious wake ups while we're working our dark magic. Arrange to
1782          * catch the process when it exits from fork, and turn on the /proc
1783          * inherit-on-fork flag so we catcht the child as well. We then run
1784          * the process, wait for it to stop on the fork1(2) call and reset
1785          * the tracing flags to their original state.
1786          */
1787         ctlp = ctl;
1788         *ctlp++ = PCCSIG;
1789         if (!monitor) {
1790                 *ctlp++ = PCUNSET;
1791                 *ctlp++ = PR_PTRACE;
1792         }
1793         *ctlp++ = PCSET;
1794         *ctlp++ = PR_FORK;
1795         *ctlp++ = PCSEXIT;
1796         sysp = (sysset_t *)ctlp;
1797         ctlp += sizeof (sysset_t) / sizeof (long);
1798         premptyset(sysp);
1799         praddset(sysp, SYS_forksys);    /* fork1() is forksys(0, 0) */
1800         *ctlp++ = PCRUN;
1801         *ctlp++ = 0;
1802         *ctlp++ = PCWSTOP;
1803         if (!monitor) {
1804                 *ctlp++ = PCSET;
1805                 *ctlp++ = PR_PTRACE;
1806         }
1807         *ctlp++ = PCUNSET;
1808         *ctlp++ = PR_FORK;
1809         *ctlp++ = PCSEXIT;
1810         sysp = (sysset_t *)ctlp;
1811         ctlp += sizeof (sysset_t) / sizeof (long);
1812         premptyset(sysp);
1813         if (monitor)
1814                 praddset(sysp, SYS_execve);
1815 
1816         size = (char *)ctlp - (char *)&ctl[0];
1817         assert(size <= sizeof (ctl));
1818 
1819         if (write(fd, ctl, size) != size) {
1820                 (void) close(fd);
1821                 lx_debug("lx_catch_fork: failed to set %d running",
1822                     (int)pid);
1823                 return;
1824         }
1825 
1826         /*
1827          * Get the status so we can find the value returned from fork1() --
1828          * the child process's pid.
1829          */
1830         if (get_status(pid, &ps) != 0) {
1831                 (void) close(fd);
1832                 lx_debug("lx_catch_fork: failed to get status for %d",
1833                     (int)pid);
1834                 return;
1835         }
1836 
1837         child = (pid_t)ps.pr_lwp.pr_reg[R_R0];
1838 
1839         /*
1840          * We're done with the parent -- off you go.
1841          */
1842         ctl[0] = PCRUN;
1843         ctl[1] = 0;
1844         size = 2 * sizeof (long);
1845 
1846         if (write(fd, ctl, size) != size) {
1847                 (void) close(fd);
1848                 lx_debug("lx_catch_fork: failed to set %d running",
1849                     (int)pid);
1850                 return;
1851         }
1852 
1853         (void) close(fd);
1854 
1855         /*
1856          * If fork1(2) failed, we're done.
1857          */
1858         if (child < 0) {
1859                 lx_debug("lx_catch_fork: fork1 failed");
1860                 return;
1861         }
1862 
1863         /*
1864          * Now we need to screw with the child process.
1865          */
1866         if ((fd = open_lwpfile(child, 1, O_WRONLY, "lwpctl")) < 0) {
1867                 lx_debug("lx_catch_fork: failed to control %d",
1868                     (int)child);
1869                 return;
1870         }
1871 
1872         ctlp = ctl;
1873         *ctlp++ = PCUNSET;
1874         *ctlp++ = PR_FORK;
1875         *ctlp++ = PCSEXIT;
1876         sysp = (sysset_t *)ctlp;
1877         ctlp += sizeof (sysset_t) / sizeof (long);
1878         premptyset(sysp);
1879         size = (char *)ctlp - (char *)&ctl[0];
1880 
1881         if (write(fd, ctl, size) != size) {
1882                 (void) close(fd);
1883                 lx_debug("lx_catch_fork: failed to clear trace flags for  %d",
1884                     (int)child);
1885                 return;
1886         }
1887 
1888         /*
1889          * Now treat the child as though we had attached to it explicitly.
1890          */
1891         err = ptrace_attach_common(fd, child, child, 1, 1);
1892         assert(err == 0);
1893 
1894         (void) close(fd);
1895 }
1896 
1897 static void
1898 set_dr6(pid_t pid, siginfo_t *infop)
1899 {
1900         uintptr_t *debugreg;
1901         uintptr_t addr;
1902         uintptr_t base;
1903         size_t size = NULL;
1904         int dr7;
1905         int lrw;
1906         int i;
1907 
1908         if ((debugreg = debug_registers(pid)) == NULL)
1909                 return;
1910 
1911         debugreg[6] = 0xffff0ff0;       /* read as ones */
1912         switch (infop->si_code) {
1913         case TRAP_TRACE:
1914                 debugreg[6] |= 0x4000;  /* single-step */
1915                 break;
1916         case TRAP_RWATCH:
1917         case TRAP_WWATCH:
1918         case TRAP_XWATCH:
1919                 dr7 = debugreg[7];
1920                 addr = (uintptr_t)infop->si_addr;
1921                 for (i = 0; i < 4; i++) {
1922                         if ((dr7 & (1 << (2 * i))) == 0)      /* enabled? */
1923                                 continue;
1924                         lrw = (dr7 >> (16 + (4 * i))) & 0xf;
1925                         switch (lrw >> 2) {       /* length */
1926                         case 0: size = 1; break;
1927                         case 1: size = 2; break;
1928                         case 2: size = 8; break;
1929                         case 3: size = 4; break;
1930                         }
1931                         base = debugreg[i];
1932                         if (addr >= base && addr < base + size)
1933                                 debugreg[6] |= (1 << i);
1934                 }
1935                 /*
1936                  * Were we also attempting a single-step?
1937                  * (kludge: we use debugreg[4] for this flag.)
1938                  */
1939                 if (debugreg[4])
1940                         debugreg[6] |= 0x4000;
1941                 break;
1942         default:
1943                 break;
1944         }
1945 }
1946 
1947 /*
1948  * This is called from the emulation of the wait4 and waitpid system call to
1949  * take into account the monitor processes which we spawn to observe other
1950  * processes from ptrace_attach().
1951  */
1952 int
1953 lx_ptrace_wait(siginfo_t *infop)
1954 {
1955         ptrace_monitor_map_t *p, **pp;
1956         pid_t lxpid, pid = infop->si_pid;
1957         lwpid_t lwpid;
1958         int fd;
1959         pstatus_t status;
1960 
1961         /*
1962          * If the process observed by waitid(2) corresponds to the monitor
1963          * process for a traced thread, we need to rewhack the siginfo_t to
1964          * look like it came from the traced thread with the flags set
1965          * according to the current state.
1966          */
1967         (void) mutex_lock(&ptrace_map_mtx);
1968         for (pp = &ptrace_monitor_map; (p = *pp) != NULL; pp = &p->pmm_next) {
1969                 if (p->pmm_monitor == pid) {
1970                         assert(infop->si_code == CLD_EXITED ||
1971                             infop->si_code == CLD_KILLED ||
1972                             infop->si_code == CLD_DUMPED ||
1973                             infop->si_code == CLD_TRAPPED);
1974                         goto found;
1975                 }
1976         }
1977         (void) mutex_unlock(&ptrace_map_mtx);
1978 
1979         /*
1980          * If the traced process got a SIGWAITING, we must be in the middle
1981          * of a clone(2) with CLONE_PTRACE set.
1982          */
1983         if (infop->si_code == CLD_TRAPPED && infop->si_status == SIGWAITING) {
1984                 ptrace_catch_fork(pid, 0);
1985                 return (-1);
1986         }
1987 
1988         if (get_status(pid, &status) == 0 &&
1989             (status.pr_lwp.pr_flags & PR_STOPPED) &&
1990             status.pr_lwp.pr_why == PR_SIGNALLED &&
1991             status.pr_lwp.pr_info.si_signo == SIGTRAP)
1992                 set_dr6(pid, &status.pr_lwp.pr_info);
1993 
1994         return (0);
1995 
1996 found:
1997         /*
1998          * If the monitor is in the exiting state, ignore the event and free
1999          * the monitor structure if the monitor has exited. By returning -1 we
2000          * indicate to the caller that this was a spurious return from
2001          * waitid(2) and that it should ignore the result and try again.
2002          */
2003         if (p->pmm_exiting) {
2004                 if (infop->si_code == CLD_EXITED ||
2005                     infop->si_code == CLD_KILLED ||
2006                     infop->si_code == CLD_DUMPED) {
2007                         *pp = p->pmm_next;
2008                         (void) mutex_unlock(&ptrace_map_mtx);
2009                         free(p);
2010                 }
2011                 return (-1);
2012         }
2013 
2014         lxpid = p->pmm_target;
2015         pid = p->pmm_pid;
2016         lwpid = p->pmm_lwpid;
2017         (void) mutex_unlock(&ptrace_map_mtx);
2018 
2019         /*
2020          * If we can't find the traced process, kill off its monitor.
2021          */
2022         if ((fd = open_lwpfile(pid, lwpid, O_RDONLY, "lwpstatus")) < 0) {
2023                 assert(errno == ENOENT);
2024                 monitor_kill(lxpid, pid);
2025                 infop->si_code = CLD_EXITED;
2026                 infop->si_status = 0;
2027                 infop->si_pid = lxpid;
2028                 return (0);
2029         }
2030 
2031         if (read(fd, &status.pr_lwp, sizeof (status.pr_lwp)) !=
2032             sizeof (status.pr_lwp)) {
2033                 lx_err(gettext("read lwpstatus failed %d %s"),
2034                     fd, strerror(errno));
2035                 assert(0);
2036         }
2037 
2038         (void) close(fd);
2039 
2040         /*
2041          * If the traced process isn't stopped, this is a truly spurious
2042          * event probably caused by another /proc consumer tracing the
2043          * monitor.
2044          */
2045         if (!(status.pr_lwp.pr_flags & PR_STOPPED)) {
2046                 (void) ptrace_cont_monitor(p);
2047                 return (-1);
2048         }
2049 
2050         switch (status.pr_lwp.pr_why) {
2051         case PR_SIGNALLED:
2052                 /*
2053                  * If the traced process got a SIGWAITING, we must be in the
2054                  * middle of a clone(2) with CLONE_PTRACE set.
2055                  */
2056                 if (status.pr_lwp.pr_what == SIGWAITING) {
2057                         ptrace_catch_fork(lxpid, 1);
2058                         (void) ptrace_cont_monitor(p);
2059                         return (-1);
2060                 }
2061                 infop->si_code = CLD_TRAPPED;
2062                 infop->si_status = status.pr_lwp.pr_what;
2063                 if (status.pr_lwp.pr_info.si_signo == SIGTRAP)
2064                         set_dr6(pid, &status.pr_lwp.pr_info);
2065                 break;
2066 
2067         case PR_REQUESTED:
2068                 /*
2069                  * Make it look like the traced process stopped on an
2070                  * event of interest.
2071                  */
2072                 infop->si_code = CLD_TRAPPED;
2073                 infop->si_status = SIGTRAP;
2074                 break;
2075 
2076         case PR_JOBCONTROL:
2077                 /*
2078                  * Ignore this as it was probably caused by another /proc
2079                  * consumer tracing the monitor.
2080                  */
2081                 (void) ptrace_cont_monitor(p);
2082                 return (-1);
2083 
2084         case PR_SYSEXIT:
2085                 /*
2086                  * Processes traced via a monitor (rather than using the
2087                  * native Solaris ptrace support) explicitly trace returns
2088                  * from exec system calls since it's an implicit ptrace
2089                  * trace point. Accordingly we need to present a process
2090                  * in that state as though it had reached the ptrace trace
2091                  * point.
2092                  */
2093                 if (status.pr_lwp.pr_what == SYS_execve) {
2094                         infop->si_code = CLD_TRAPPED;
2095                         infop->si_status = SIGTRAP;
2096                         break;
2097                 }
2098 
2099                 /*FALLTHROUGH*/
2100 
2101         case PR_SYSENTRY:
2102         case PR_FAULTED:
2103         case PR_SUSPENDED:
2104         default:
2105                 lx_err(gettext("didn't expect %d (%d %d)"),
2106                     status.pr_lwp.pr_why,
2107                     status.pr_lwp.pr_what, status.pr_lwp.pr_flags);
2108                 assert(0);
2109         }
2110 
2111         infop->si_pid = lxpid;
2112 
2113         return (0);
2114 }