1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/uio.h>
  29 #include <sys/param.h>
  30 #include <sys/cmn_err.h>
  31 #include <sys/cred.h>
  32 #include <sys/policy.h>
  33 #include <sys/debug.h>
  34 #include <sys/errno.h>
  35 #include <sys/file.h>
  36 #include <sys/inline.h>
  37 #include <sys/kmem.h>
  38 #include <sys/proc.h>
  39 #include <sys/brand.h>
  40 #include <sys/regset.h>
  41 #include <sys/sysmacros.h>
  42 #include <sys/systm.h>
  43 #include <sys/vfs.h>
  44 #include <sys/vnode.h>
  45 #include <sys/signal.h>
  46 #include <sys/auxv.h>
  47 #include <sys/user.h>
  48 #include <sys/class.h>
  49 #include <sys/fault.h>
  50 #include <sys/syscall.h>
  51 #include <sys/procfs.h>
  52 #include <sys/zone.h>
  53 #include <sys/copyops.h>
  54 #include <sys/schedctl.h>
  55 #include <vm/as.h>
  56 #include <vm/seg.h>
  57 #include <fs/proc/prdata.h>
  58 #include <sys/contract/process_impl.h>
  59 
  60 static  void    pr_settrace(proc_t *, sigset_t *);
  61 static  int     pr_setfpregs(prnode_t *, prfpregset_t *);
  62 #if defined(__sparc)
  63 static  int     pr_setxregs(prnode_t *, prxregset_t *);
  64 static  int     pr_setasrs(prnode_t *, asrset_t);
  65 #endif
  66 static  int     pr_setvaddr(prnode_t *, caddr_t);
  67 static  int     pr_clearsig(prnode_t *);
  68 static  int     pr_clearflt(prnode_t *);
  69 static  int     pr_watch(prnode_t *, prwatch_t *, int *);
  70 static  int     pr_agent(prnode_t *, prgregset_t, int *);
  71 static  int     pr_rdwr(proc_t *, enum uio_rw, priovec_t *);
  72 static  int     pr_scred(proc_t *, prcred_t *, cred_t *, boolean_t);
  73 static  int     pr_spriv(proc_t *, prpriv_t *, cred_t *);
  74 static  int     pr_szoneid(proc_t *, zoneid_t, cred_t *);
  75 static  void    pauselwps(proc_t *);
  76 static  void    unpauselwps(proc_t *);
  77 
  78 typedef union {
  79         long            sig;            /* PCKILL, PCUNKILL */
  80         long            nice;           /* PCNICE */
  81         long            timeo;          /* PCTWSTOP */
  82         ulong_t         flags;          /* PCRUN, PCSET, PCUNSET */
  83         caddr_t         vaddr;          /* PCSVADDR */
  84         siginfo_t       siginfo;        /* PCSSIG */
  85         sigset_t        sigset;         /* PCSTRACE, PCSHOLD */
  86         fltset_t        fltset;         /* PCSFAULT */
  87         sysset_t        sysset;         /* PCSENTRY, PCSEXIT */
  88         prgregset_t     prgregset;      /* PCSREG, PCAGENT */
  89         prfpregset_t    prfpregset;     /* PCSFPREG */
  90 #if defined(__sparc)
  91         prxregset_t     prxregset;      /* PCSXREG */
  92         asrset_t        asrset;         /* PCSASRS */
  93 #endif
  94         prwatch_t       prwatch;        /* PCWATCH */
  95         priovec_t       priovec;        /* PCREAD, PCWRITE */
  96         prcred_t        prcred;         /* PCSCRED */
  97         prpriv_t        prpriv;         /* PCSPRIV */
  98         long            przoneid;       /* PCSZONE */
  99 } arg_t;
 100 
 101 static  int     pr_control(long, arg_t *, prnode_t *, cred_t *);
 102 
 103 static size_t
 104 ctlsize(long cmd, size_t resid, arg_t *argp)
 105 {
 106         size_t size = sizeof (long);
 107         size_t rnd;
 108         int ngrp;
 109 
 110         switch (cmd) {
 111         case PCNULL:
 112         case PCSTOP:
 113         case PCDSTOP:
 114         case PCWSTOP:
 115         case PCCSIG:
 116         case PCCFAULT:
 117                 break;
 118         case PCSSIG:
 119                 size += sizeof (siginfo_t);
 120                 break;
 121         case PCTWSTOP:
 122                 size += sizeof (long);
 123                 break;
 124         case PCKILL:
 125         case PCUNKILL:
 126         case PCNICE:
 127                 size += sizeof (long);
 128                 break;
 129         case PCRUN:
 130         case PCSET:
 131         case PCUNSET:
 132                 size += sizeof (ulong_t);
 133                 break;
 134         case PCSVADDR:
 135                 size += sizeof (caddr_t);
 136                 break;
 137         case PCSTRACE:
 138         case PCSHOLD:
 139                 size += sizeof (sigset_t);
 140                 break;
 141         case PCSFAULT:
 142                 size += sizeof (fltset_t);
 143                 break;
 144         case PCSENTRY:
 145         case PCSEXIT:
 146                 size += sizeof (sysset_t);
 147                 break;
 148         case PCSREG:
 149         case PCAGENT:
 150                 size += sizeof (prgregset_t);
 151                 break;
 152         case PCSFPREG:
 153                 size += sizeof (prfpregset_t);
 154                 break;
 155 #if defined(__sparc)
 156         case PCSXREG:
 157                 size += sizeof (prxregset_t);
 158                 break;
 159         case PCSASRS:
 160                 size += sizeof (asrset_t);
 161                 break;
 162 #endif
 163         case PCWATCH:
 164                 size += sizeof (prwatch_t);
 165                 break;
 166         case PCREAD:
 167         case PCWRITE:
 168                 size += sizeof (priovec_t);
 169                 break;
 170         case PCSCRED:
 171                 size += sizeof (prcred_t);
 172                 break;
 173         case PCSCREDX:
 174                 /*
 175                  * We cannot derefence the pr_ngroups fields if it
 176                  * we don't have enough data.
 177                  */
 178                 if (resid < size + sizeof (prcred_t) - sizeof (gid_t))
 179                         return (0);
 180                 ngrp = argp->prcred.pr_ngroups;
 181                 if (ngrp < 0 || ngrp > ngroups_max)
 182                         return (0);
 183 
 184                 /* The result can be smaller than sizeof (prcred_t) */
 185                 size += sizeof (prcred_t) - sizeof (gid_t);
 186                 size += ngrp * sizeof (gid_t);
 187                 break;
 188         case PCSPRIV:
 189                 if (resid >= size + sizeof (prpriv_t))
 190                         size += priv_prgetprivsize(&argp->prpriv);
 191                 else
 192                         return (0);
 193                 break;
 194         case PCSZONE:
 195                 size += sizeof (long);
 196                 break;
 197         default:
 198                 return (0);
 199         }
 200 
 201         /* Round up to a multiple of long, unless exact amount written */
 202         if (size < resid) {
 203                 rnd = size & (sizeof (long) - 1);
 204 
 205                 if (rnd != 0)
 206                         size += sizeof (long) - rnd;
 207         }
 208 
 209         if (size > resid)
 210                 return (0);
 211         return (size);
 212 }
 213 
 214 /*
 215  * Control operations (lots).
 216  */
 217 int
 218 prwritectl(vnode_t *vp, uio_t *uiop, cred_t *cr)
 219 {
 220 #define MY_BUFFER_SIZE \
 221                 100 > 1 + sizeof (arg_t) / sizeof (long) ? \
 222                 100 : 1 + sizeof (arg_t) / sizeof (long)
 223         long buf[MY_BUFFER_SIZE];
 224         long *bufp;
 225         size_t resid = 0;
 226         size_t size;
 227         prnode_t *pnp = VTOP(vp);
 228         int error;
 229         int locked = 0;
 230 
 231         while (uiop->uio_resid) {
 232                 /*
 233                  * Read several commands in one gulp.
 234                  */
 235                 bufp = buf;
 236                 if (resid) {    /* move incomplete command to front of buffer */
 237                         long *tail;
 238 
 239                         if (resid >= sizeof (buf))
 240                                 break;
 241                         tail = (long *)((char *)buf + sizeof (buf) - resid);
 242                         do {
 243                                 *bufp++ = *tail++;
 244                         } while ((resid -= sizeof (long)) != 0);
 245                 }
 246                 resid = sizeof (buf) - ((char *)bufp - (char *)buf);
 247                 if (resid > uiop->uio_resid)
 248                         resid = uiop->uio_resid;
 249                 if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
 250                         return (error);
 251                 resid += (char *)bufp - (char *)buf;
 252                 bufp = buf;
 253 
 254                 do {            /* loop over commands in buffer */
 255                         long cmd = bufp[0];
 256                         arg_t *argp = (arg_t *)&bufp[1];
 257 
 258                         size = ctlsize(cmd, resid, argp);
 259                         if (size == 0)  /* incomplete or invalid command */
 260                                 break;
 261                         /*
 262                          * Perform the specified control operation.
 263                          */
 264                         if (!locked) {
 265                                 if ((error = prlock(pnp, ZNO)) != 0)
 266                                         return (error);
 267                                 locked = 1;
 268                         }
 269                         if (error = pr_control(cmd, argp, pnp, cr)) {
 270                                 if (error == -1)        /* -1 is timeout */
 271                                         locked = 0;
 272                                 else
 273                                         return (error);
 274                         }
 275                         bufp = (long *)((char *)bufp + size);
 276                 } while ((resid -= size) != 0);
 277 
 278                 if (locked) {
 279                         prunlock(pnp);
 280                         locked = 0;
 281                 }
 282         }
 283         return (resid? EINVAL : 0);
 284 }
 285 
 286 static int
 287 pr_control(long cmd, arg_t *argp, prnode_t *pnp, cred_t *cr)
 288 {
 289         prcommon_t *pcp;
 290         proc_t *p;
 291         int unlocked;
 292         int error = 0;
 293 
 294         if (cmd == PCNULL)
 295                 return (0);
 296 
 297         pcp = pnp->pr_common;
 298         p = pcp->prc_proc;
 299         ASSERT(p != NULL);
 300 
 301         /* System processes defy control. */
 302         if (p->p_flag & SSYS) {
 303                 prunlock(pnp);
 304                 return (EBUSY);
 305         }
 306 
 307         switch (cmd) {
 308 
 309         default:
 310                 error = EINVAL;
 311                 break;
 312 
 313         case PCSTOP:    /* direct process or lwp to stop and wait for stop */
 314         case PCDSTOP:   /* direct process or lwp to stop, don't wait */
 315         case PCWSTOP:   /* wait for process or lwp to stop */
 316         case PCTWSTOP:  /* wait for process or lwp to stop, with timeout */
 317                 {
 318                         time_t timeo;
 319 
 320                         /*
 321                          * Can't apply to a system process.
 322                          */
 323                         if (p->p_as == &kas) {
 324                                 error = EBUSY;
 325                                 break;
 326                         }
 327 
 328                         if (cmd == PCSTOP || cmd == PCDSTOP)
 329                                 pr_stop(pnp);
 330 
 331                         if (cmd == PCDSTOP)
 332                                 break;
 333 
 334                         /*
 335                          * If an lwp is waiting for itself or its process,
 336                          * don't wait. The stopped lwp would never see the
 337                          * fact that it is stopped.
 338                          */
 339                         if ((pcp->prc_flags & PRC_LWP)?
 340                             (pcp->prc_thread == curthread) : (p == curproc)) {
 341                                 if (cmd == PCWSTOP || cmd == PCTWSTOP)
 342                                         error = EBUSY;
 343                                 break;
 344                         }
 345 
 346                         timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
 347                         if ((error = pr_wait_stop(pnp, timeo)) != 0)
 348                                 return (error);
 349 
 350                         break;
 351                 }
 352 
 353         case PCRUN:     /* make lwp or process runnable */
 354                 error = pr_setrun(pnp, argp->flags);
 355                 break;
 356 
 357         case PCSTRACE:  /* set signal trace mask */
 358                 pr_settrace(p,  &argp->sigset);
 359                 break;
 360 
 361         case PCSSIG:    /* set current signal */
 362                 error = pr_setsig(pnp, &argp->siginfo);
 363                 if (argp->siginfo.si_signo == SIGKILL && error == 0) {
 364                         prunlock(pnp);
 365                         pr_wait_die(pnp);
 366                         return (-1);
 367                 }
 368                 break;
 369 
 370         case PCKILL:    /* send signal */
 371                 error = pr_kill(pnp, (int)argp->sig, cr);
 372                 if (error == 0 && argp->sig == SIGKILL) {
 373                         prunlock(pnp);
 374                         pr_wait_die(pnp);
 375                         return (-1);
 376                 }
 377                 break;
 378 
 379         case PCUNKILL:  /* delete a pending signal */
 380                 error = pr_unkill(pnp, (int)argp->sig);
 381                 break;
 382 
 383         case PCNICE:    /* set nice priority */
 384                 error = pr_nice(p, (int)argp->nice, cr);
 385                 break;
 386 
 387         case PCSENTRY:  /* set syscall entry bit mask */
 388         case PCSEXIT:   /* set syscall exit bit mask */
 389                 pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
 390                 break;
 391 
 392         case PCSET:     /* set process flags */
 393                 error = pr_set(p, argp->flags);
 394                 break;
 395 
 396         case PCUNSET:   /* unset process flags */
 397                 error = pr_unset(p, argp->flags);
 398                 break;
 399 
 400         case PCSREG:    /* set general registers */
 401                 {
 402                         kthread_t *t = pr_thread(pnp);
 403 
 404                         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
 405                                 thread_unlock(t);
 406                                 error = EBUSY;
 407                         } else {
 408                                 thread_unlock(t);
 409                                 mutex_exit(&p->p_lock);
 410                                 prsetprregs(ttolwp(t), argp->prgregset, 0);
 411                                 mutex_enter(&p->p_lock);
 412                         }
 413                         break;
 414                 }
 415 
 416         case PCSFPREG:  /* set floating-point registers */
 417                 error = pr_setfpregs(pnp, &argp->prfpregset);
 418                 break;
 419 
 420         case PCSXREG:   /* set extra registers */
 421 #if defined(__sparc)
 422                 error = pr_setxregs(pnp, &argp->prxregset);
 423 #else
 424                 error = EINVAL;
 425 #endif
 426                 break;
 427 
 428 #if defined(__sparc)
 429         case PCSASRS:   /* set ancillary state registers */
 430                 error = pr_setasrs(pnp, argp->asrset);
 431                 break;
 432 #endif
 433 
 434         case PCSVADDR:  /* set virtual address at which to resume */
 435                 error = pr_setvaddr(pnp, argp->vaddr);
 436                 break;
 437 
 438         case PCSHOLD:   /* set signal-hold mask */
 439                 pr_sethold(pnp, &argp->sigset);
 440                 break;
 441 
 442         case PCSFAULT:  /* set mask of traced faults */
 443                 pr_setfault(p, &argp->fltset);
 444                 break;
 445 
 446         case PCCSIG:    /* clear current signal */
 447                 error = pr_clearsig(pnp);
 448                 break;
 449 
 450         case PCCFAULT:  /* clear current fault */
 451                 error = pr_clearflt(pnp);
 452                 break;
 453 
 454         case PCWATCH:   /* set or clear watched areas */
 455                 error = pr_watch(pnp, &argp->prwatch, &unlocked);
 456                 if (error && unlocked)
 457                         return (error);
 458                 break;
 459 
 460         case PCAGENT:   /* create the /proc agent lwp in the target process */
 461                 error = pr_agent(pnp, argp->prgregset, &unlocked);
 462                 if (error && unlocked)
 463                         return (error);
 464                 break;
 465 
 466         case PCREAD:    /* read from the address space */
 467                 error = pr_rdwr(p, UIO_READ, &argp->priovec);
 468                 break;
 469 
 470         case PCWRITE:   /* write to the address space */
 471                 error = pr_rdwr(p, UIO_WRITE, &argp->priovec);
 472                 break;
 473 
 474         case PCSCRED:   /* set the process credentials */
 475         case PCSCREDX:
 476                 error = pr_scred(p, &argp->prcred, cr, cmd == PCSCREDX);
 477                 break;
 478 
 479         case PCSPRIV:   /* set the process privileges */
 480                 error = pr_spriv(p, &argp->prpriv, cr);
 481                 break;
 482         case PCSZONE:   /* set the process's zoneid credentials */
 483                 error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
 484                 break;
 485         }
 486 
 487         if (error)
 488                 prunlock(pnp);
 489         return (error);
 490 }
 491 
 492 #ifdef _SYSCALL32_IMPL
 493 
 494 typedef union {
 495         int32_t         sig;            /* PCKILL, PCUNKILL */
 496         int32_t         nice;           /* PCNICE */
 497         int32_t         timeo;          /* PCTWSTOP */
 498         uint32_t        flags;          /* PCRUN, PCSET, PCUNSET */
 499         caddr32_t       vaddr;          /* PCSVADDR */
 500         siginfo32_t     siginfo;        /* PCSSIG */
 501         sigset_t        sigset;         /* PCSTRACE, PCSHOLD */
 502         fltset_t        fltset;         /* PCSFAULT */
 503         sysset_t        sysset;         /* PCSENTRY, PCSEXIT */
 504         prgregset32_t   prgregset;      /* PCSREG, PCAGENT */
 505         prfpregset32_t  prfpregset;     /* PCSFPREG */
 506 #if defined(__sparc)
 507         prxregset_t     prxregset;      /* PCSXREG */
 508 #endif
 509         prwatch32_t     prwatch;        /* PCWATCH */
 510         priovec32_t     priovec;        /* PCREAD, PCWRITE */
 511         prcred32_t      prcred;         /* PCSCRED */
 512         prpriv_t        prpriv;         /* PCSPRIV */
 513         int32_t         przoneid;       /* PCSZONE */
 514 } arg32_t;
 515 
 516 static  int     pr_control32(int32_t, arg32_t *, prnode_t *, cred_t *);
 517 static  int     pr_setfpregs32(prnode_t *, prfpregset32_t *);
 518 
 519 /*
 520  * Note that while ctlsize32() can use argp, it must do so only in a way
 521  * that assumes 32-bit rather than 64-bit alignment as argp is a pointer
 522  * to an array of 32-bit values and only 32-bit alignment is ensured.
 523  */
 524 static size_t
 525 ctlsize32(int32_t cmd, size_t resid, arg32_t *argp)
 526 {
 527         size_t size = sizeof (int32_t);
 528         size_t rnd;
 529         int ngrp;
 530 
 531         switch (cmd) {
 532         case PCNULL:
 533         case PCSTOP:
 534         case PCDSTOP:
 535         case PCWSTOP:
 536         case PCCSIG:
 537         case PCCFAULT:
 538                 break;
 539         case PCSSIG:
 540                 size += sizeof (siginfo32_t);
 541                 break;
 542         case PCTWSTOP:
 543                 size += sizeof (int32_t);
 544                 break;
 545         case PCKILL:
 546         case PCUNKILL:
 547         case PCNICE:
 548                 size += sizeof (int32_t);
 549                 break;
 550         case PCRUN:
 551         case PCSET:
 552         case PCUNSET:
 553                 size += sizeof (uint32_t);
 554                 break;
 555         case PCSVADDR:
 556                 size += sizeof (caddr32_t);
 557                 break;
 558         case PCSTRACE:
 559         case PCSHOLD:
 560                 size += sizeof (sigset_t);
 561                 break;
 562         case PCSFAULT:
 563                 size += sizeof (fltset_t);
 564                 break;
 565         case PCSENTRY:
 566         case PCSEXIT:
 567                 size += sizeof (sysset_t);
 568                 break;
 569         case PCSREG:
 570         case PCAGENT:
 571                 size += sizeof (prgregset32_t);
 572                 break;
 573         case PCSFPREG:
 574                 size += sizeof (prfpregset32_t);
 575                 break;
 576 #if defined(__sparc)
 577         case PCSXREG:
 578                 size += sizeof (prxregset_t);
 579                 break;
 580 #endif
 581         case PCWATCH:
 582                 size += sizeof (prwatch32_t);
 583                 break;
 584         case PCREAD:
 585         case PCWRITE:
 586                 size += sizeof (priovec32_t);
 587                 break;
 588         case PCSCRED:
 589                 size += sizeof (prcred32_t);
 590                 break;
 591         case PCSCREDX:
 592                 /*
 593                  * We cannot derefence the pr_ngroups fields if it
 594                  * we don't have enough data.
 595                  */
 596                 if (resid < size + sizeof (prcred32_t) - sizeof (gid32_t))
 597                         return (0);
 598                 ngrp = argp->prcred.pr_ngroups;
 599                 if (ngrp < 0 || ngrp > ngroups_max)
 600                         return (0);
 601 
 602                 /* The result can be smaller than sizeof (prcred32_t) */
 603                 size += sizeof (prcred32_t) - sizeof (gid32_t);
 604                 size += ngrp * sizeof (gid32_t);
 605                 break;
 606         case PCSPRIV:
 607                 if (resid >= size + sizeof (prpriv_t))
 608                         size += priv_prgetprivsize(&argp->prpriv);
 609                 else
 610                         return (0);
 611                 break;
 612         case PCSZONE:
 613                 size += sizeof (int32_t);
 614                 break;
 615         default:
 616                 return (0);
 617         }
 618 
 619         /* Round up to a multiple of int32_t */
 620         rnd = size & (sizeof (int32_t) - 1);
 621 
 622         if (rnd != 0)
 623                 size += sizeof (int32_t) - rnd;
 624 
 625         if (size > resid)
 626                 return (0);
 627         return (size);
 628 }
 629 
 630 /*
 631  * Control operations (lots).
 632  */
 633 int
 634 prwritectl32(struct vnode *vp, struct uio *uiop, cred_t *cr)
 635 {
 636 #define MY_BUFFER_SIZE32 \
 637                 100 > 1 + sizeof (arg32_t) / sizeof (int32_t) ? \
 638                 100 : 1 + sizeof (arg32_t) / sizeof (int32_t)
 639         int32_t buf[MY_BUFFER_SIZE32];
 640         int32_t *bufp;
 641         arg32_t arg;
 642         size_t resid = 0;
 643         size_t size;
 644         prnode_t *pnp = VTOP(vp);
 645         int error;
 646         int locked = 0;
 647 
 648         while (uiop->uio_resid) {
 649                 /*
 650                  * Read several commands in one gulp.
 651                  */
 652                 bufp = buf;
 653                 if (resid) {    /* move incomplete command to front of buffer */
 654                         int32_t *tail;
 655 
 656                         if (resid >= sizeof (buf))
 657                                 break;
 658                         tail = (int32_t *)((char *)buf + sizeof (buf) - resid);
 659                         do {
 660                                 *bufp++ = *tail++;
 661                         } while ((resid -= sizeof (int32_t)) != 0);
 662                 }
 663                 resid = sizeof (buf) - ((char *)bufp - (char *)buf);
 664                 if (resid > uiop->uio_resid)
 665                         resid = uiop->uio_resid;
 666                 if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
 667                         return (error);
 668                 resid += (char *)bufp - (char *)buf;
 669                 bufp = buf;
 670 
 671                 do {            /* loop over commands in buffer */
 672                         int32_t cmd = bufp[0];
 673                         arg32_t *argp = (arg32_t *)&bufp[1];
 674 
 675                         size = ctlsize32(cmd, resid, argp);
 676                         if (size == 0)  /* incomplete or invalid command */
 677                                 break;
 678                         /*
 679                          * Perform the specified control operation.
 680                          */
 681                         if (!locked) {
 682                                 if ((error = prlock(pnp, ZNO)) != 0)
 683                                         return (error);
 684                                 locked = 1;
 685                         }
 686 
 687                         /*
 688                          * Since some members of the arg32_t union contain
 689                          * 64-bit values (which must be 64-bit aligned), we
 690                          * can't simply pass a pointer to the structure as
 691                          * it may be unaligned. Note that we do pass the
 692                          * potentially unaligned structure to ctlsize32()
 693                          * above, but that uses it a way that makes no
 694                          * assumptions about alignment.
 695                          */
 696                         ASSERT(size - sizeof (cmd) <= sizeof (arg));
 697                         bcopy(argp, &arg, size - sizeof (cmd));
 698 
 699                         if (error = pr_control32(cmd, &arg, pnp, cr)) {
 700                                 if (error == -1)        /* -1 is timeout */
 701                                         locked = 0;
 702                                 else
 703                                         return (error);
 704                         }
 705                         bufp = (int32_t *)((char *)bufp + size);
 706                 } while ((resid -= size) != 0);
 707 
 708                 if (locked) {
 709                         prunlock(pnp);
 710                         locked = 0;
 711                 }
 712         }
 713         return (resid? EINVAL : 0);
 714 }
 715 
 716 static int
 717 pr_control32(int32_t cmd, arg32_t *argp, prnode_t *pnp, cred_t *cr)
 718 {
 719         prcommon_t *pcp;
 720         proc_t *p;
 721         int unlocked;
 722         int error = 0;
 723 
 724         if (cmd == PCNULL)
 725                 return (0);
 726 
 727         pcp = pnp->pr_common;
 728         p = pcp->prc_proc;
 729         ASSERT(p != NULL);
 730 
 731         if (p->p_flag & SSYS) {
 732                 prunlock(pnp);
 733                 return (EBUSY);
 734         }
 735 
 736         switch (cmd) {
 737 
 738         default:
 739                 error = EINVAL;
 740                 break;
 741 
 742         case PCSTOP:    /* direct process or lwp to stop and wait for stop */
 743         case PCDSTOP:   /* direct process or lwp to stop, don't wait */
 744         case PCWSTOP:   /* wait for process or lwp to stop */
 745         case PCTWSTOP:  /* wait for process or lwp to stop, with timeout */
 746                 {
 747                         time_t timeo;
 748 
 749                         /*
 750                          * Can't apply to a system process.
 751                          */
 752                         if (p->p_as == &kas) {
 753                                 error = EBUSY;
 754                                 break;
 755                         }
 756 
 757                         if (cmd == PCSTOP || cmd == PCDSTOP)
 758                                 pr_stop(pnp);
 759 
 760                         if (cmd == PCDSTOP)
 761                                 break;
 762 
 763                         /*
 764                          * If an lwp is waiting for itself or its process,
 765                          * don't wait. The lwp will never see the fact that
 766                          * itself is stopped.
 767                          */
 768                         if ((pcp->prc_flags & PRC_LWP)?
 769                             (pcp->prc_thread == curthread) : (p == curproc)) {
 770                                 if (cmd == PCWSTOP || cmd == PCTWSTOP)
 771                                         error = EBUSY;
 772                                 break;
 773                         }
 774 
 775                         timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
 776                         if ((error = pr_wait_stop(pnp, timeo)) != 0)
 777                                 return (error);
 778 
 779                         break;
 780                 }
 781 
 782         case PCRUN:     /* make lwp or process runnable */
 783                 error = pr_setrun(pnp, (ulong_t)argp->flags);
 784                 break;
 785 
 786         case PCSTRACE:  /* set signal trace mask */
 787                 pr_settrace(p,  &argp->sigset);
 788                 break;
 789 
 790         case PCSSIG:    /* set current signal */
 791                 if (PROCESS_NOT_32BIT(p))
 792                         error = EOVERFLOW;
 793                 else {
 794                         int sig = (int)argp->siginfo.si_signo;
 795                         siginfo_t siginfo;
 796 
 797                         bzero(&siginfo, sizeof (siginfo));
 798                         siginfo_32tok(&argp->siginfo, (k_siginfo_t *)&siginfo);
 799                         error = pr_setsig(pnp, &siginfo);
 800                         if (sig == SIGKILL && error == 0) {
 801                                 prunlock(pnp);
 802                                 pr_wait_die(pnp);
 803                                 return (-1);
 804                         }
 805                 }
 806                 break;
 807 
 808         case PCKILL:    /* send signal */
 809                 error = pr_kill(pnp, (int)argp->sig, cr);
 810                 if (error == 0 && argp->sig == SIGKILL) {
 811                         prunlock(pnp);
 812                         pr_wait_die(pnp);
 813                         return (-1);
 814                 }
 815                 break;
 816 
 817         case PCUNKILL:  /* delete a pending signal */
 818                 error = pr_unkill(pnp, (int)argp->sig);
 819                 break;
 820 
 821         case PCNICE:    /* set nice priority */
 822                 error = pr_nice(p, (int)argp->nice, cr);
 823                 break;
 824 
 825         case PCSENTRY:  /* set syscall entry bit mask */
 826         case PCSEXIT:   /* set syscall exit bit mask */
 827                 pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
 828                 break;
 829 
 830         case PCSET:     /* set process flags */
 831                 error = pr_set(p, (long)argp->flags);
 832                 break;
 833 
 834         case PCUNSET:   /* unset process flags */
 835                 error = pr_unset(p, (long)argp->flags);
 836                 break;
 837 
 838         case PCSREG:    /* set general registers */
 839                 if (PROCESS_NOT_32BIT(p))
 840                         error = EOVERFLOW;
 841                 else {
 842                         kthread_t *t = pr_thread(pnp);
 843 
 844                         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
 845                                 thread_unlock(t);
 846                                 error = EBUSY;
 847                         } else {
 848                                 prgregset_t prgregset;
 849                                 klwp_t *lwp = ttolwp(t);
 850 
 851                                 thread_unlock(t);
 852                                 mutex_exit(&p->p_lock);
 853                                 prgregset_32ton(lwp, argp->prgregset,
 854                                     prgregset);
 855                                 prsetprregs(lwp, prgregset, 0);
 856                                 mutex_enter(&p->p_lock);
 857                         }
 858                 }
 859                 break;
 860 
 861         case PCSFPREG:  /* set floating-point registers */
 862                 if (PROCESS_NOT_32BIT(p))
 863                         error = EOVERFLOW;
 864                 else
 865                         error = pr_setfpregs32(pnp, &argp->prfpregset);
 866                 break;
 867 
 868         case PCSXREG:   /* set extra registers */
 869 #if defined(__sparc)
 870                 if (PROCESS_NOT_32BIT(p))
 871                         error = EOVERFLOW;
 872                 else
 873                         error = pr_setxregs(pnp, &argp->prxregset);
 874 #else
 875                 error = EINVAL;
 876 #endif
 877                 break;
 878 
 879         case PCSVADDR:  /* set virtual address at which to resume */
 880                 if (PROCESS_NOT_32BIT(p))
 881                         error = EOVERFLOW;
 882                 else
 883                         error = pr_setvaddr(pnp,
 884                             (caddr_t)(uintptr_t)argp->vaddr);
 885                 break;
 886 
 887         case PCSHOLD:   /* set signal-hold mask */
 888                 pr_sethold(pnp, &argp->sigset);
 889                 break;
 890 
 891         case PCSFAULT:  /* set mask of traced faults */
 892                 pr_setfault(p, &argp->fltset);
 893                 break;
 894 
 895         case PCCSIG:    /* clear current signal */
 896                 error = pr_clearsig(pnp);
 897                 break;
 898 
 899         case PCCFAULT:  /* clear current fault */
 900                 error = pr_clearflt(pnp);
 901                 break;
 902 
 903         case PCWATCH:   /* set or clear watched areas */
 904                 if (PROCESS_NOT_32BIT(p))
 905                         error = EOVERFLOW;
 906                 else {
 907                         prwatch_t prwatch;
 908 
 909                         prwatch.pr_vaddr = argp->prwatch.pr_vaddr;
 910                         prwatch.pr_size = argp->prwatch.pr_size;
 911                         prwatch.pr_wflags = argp->prwatch.pr_wflags;
 912                         prwatch.pr_pad = argp->prwatch.pr_pad;
 913                         error = pr_watch(pnp, &prwatch, &unlocked);
 914                         if (error && unlocked)
 915                                 return (error);
 916                 }
 917                 break;
 918 
 919         case PCAGENT:   /* create the /proc agent lwp in the target process */
 920                 if (PROCESS_NOT_32BIT(p))
 921                         error = EOVERFLOW;
 922                 else {
 923                         prgregset_t prgregset;
 924                         kthread_t *t = pr_thread(pnp);
 925                         klwp_t *lwp = ttolwp(t);
 926                         thread_unlock(t);
 927                         mutex_exit(&p->p_lock);
 928                         prgregset_32ton(lwp, argp->prgregset, prgregset);
 929                         mutex_enter(&p->p_lock);
 930                         error = pr_agent(pnp, prgregset, &unlocked);
 931                         if (error && unlocked)
 932                                 return (error);
 933                 }
 934                 break;
 935 
 936         case PCREAD:    /* read from the address space */
 937         case PCWRITE:   /* write to the address space */
 938                 if (PROCESS_NOT_32BIT(p))
 939                         error = EOVERFLOW;
 940                 else {
 941                         enum uio_rw rw = (cmd == PCREAD)? UIO_READ : UIO_WRITE;
 942                         priovec_t priovec;
 943 
 944                         priovec.pio_base =
 945                             (void *)(uintptr_t)argp->priovec.pio_base;
 946                         priovec.pio_len = (size_t)argp->priovec.pio_len;
 947                         priovec.pio_offset = (off_t)
 948                             (uint32_t)argp->priovec.pio_offset;
 949                         error = pr_rdwr(p, rw, &priovec);
 950                 }
 951                 break;
 952 
 953         case PCSCRED:   /* set the process credentials */
 954         case PCSCREDX:
 955                 {
 956                         /*
 957                          * All the fields in these structures are exactly the
 958                          * same and so the structures are compatible.  In case
 959                          * this ever changes, we catch this with the ASSERT
 960                          * below.
 961                          */
 962                         prcred_t *prcred = (prcred_t *)&argp->prcred;
 963 
 964 #ifndef __lint
 965                         ASSERT(sizeof (prcred_t) == sizeof (prcred32_t));
 966 #endif
 967 
 968                         error = pr_scred(p, prcred, cr, cmd == PCSCREDX);
 969                         break;
 970                 }
 971 
 972         case PCSPRIV:   /* set the process privileges */
 973                 error = pr_spriv(p, &argp->prpriv, cr);
 974                 break;
 975 
 976         case PCSZONE:   /* set the process's zoneid */
 977                 error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
 978                 break;
 979         }
 980 
 981         if (error)
 982                 prunlock(pnp);
 983         return (error);
 984 }
 985 
 986 #endif  /* _SYSCALL32_IMPL */
 987 
 988 /*
 989  * Return the specific or chosen thread/lwp for a control operation.
 990  * Returns with the thread locked via thread_lock(t).
 991  */
 992 kthread_t *
 993 pr_thread(prnode_t *pnp)
 994 {
 995         prcommon_t *pcp = pnp->pr_common;
 996         kthread_t *t;
 997 
 998         if (pcp->prc_flags & PRC_LWP) {
 999                 t = pcp->prc_thread;
1000                 ASSERT(t != NULL);
1001                 thread_lock(t);
1002         } else {
1003                 proc_t *p = pcp->prc_proc;
1004                 t = prchoose(p);        /* returns locked thread */
1005                 ASSERT(t != NULL);
1006         }
1007 
1008         return (t);
1009 }
1010 
1011 /*
1012  * Direct the process or lwp to stop.
1013  */
1014 void
1015 pr_stop(prnode_t *pnp)
1016 {
1017         prcommon_t *pcp = pnp->pr_common;
1018         proc_t *p = pcp->prc_proc;
1019         kthread_t *t;
1020         vnode_t *vp;
1021 
1022         /*
1023          * If already stopped, do nothing; otherwise flag
1024          * it to be stopped the next time it tries to run.
1025          * If sleeping at interruptible priority, set it
1026          * running so it will stop within cv_wait_sig().
1027          *
1028          * Take care to cooperate with jobcontrol: if an lwp
1029          * is stopped due to the default action of a jobcontrol
1030          * stop signal, flag it to be stopped the next time it
1031          * starts due to a SIGCONT signal.
1032          */
1033         if (pcp->prc_flags & PRC_LWP)
1034                 t = pcp->prc_thread;
1035         else
1036                 t = p->p_tlist;
1037         ASSERT(t != NULL);
1038 
1039         do {
1040                 int notify;
1041 
1042                 notify = 0;
1043                 thread_lock(t);
1044                 if (!ISTOPPED(t)) {
1045                         t->t_proc_flag |= TP_PRSTOP;
1046                         t->t_sig_check = 1;  /* do ISSIG */
1047                 }
1048 
1049                 /* Move the thread from wait queue to run queue */
1050                 if (ISWAITING(t))
1051                         setrun_locked(t);
1052 
1053                 if (ISWAKEABLE(t)) {
1054                         if (t->t_wchan0 == NULL)
1055                                 setrun_locked(t);
1056                         else if (!VSTOPPED(t)) {
1057                                 /*
1058                                  * Mark it virtually stopped.
1059                                  */
1060                                 t->t_proc_flag |= TP_PRVSTOP;
1061                                 notify = 1;
1062                         }
1063                 }
1064                 /*
1065                  * force the thread into the kernel
1066                  * if it is not already there.
1067                  */
1068                 prpokethread(t);
1069                 thread_unlock(t);
1070                 if (notify &&
1071                     (vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace) != NULL)
1072                         prnotify(vp);
1073                 if (pcp->prc_flags & PRC_LWP)
1074                         break;
1075         } while ((t = t->t_forw) != p->p_tlist);
1076 
1077         /*
1078          * We do this just in case the thread we asked
1079          * to stop is in holdlwps() (called from cfork()).
1080          */
1081         cv_broadcast(&p->p_holdlwps);
1082 }
1083 
1084 /*
1085  * Sleep until the lwp stops, but cooperate with
1086  * jobcontrol:  Don't wake up if the lwp is stopped
1087  * due to the default action of a jobcontrol stop signal.
1088  * If this is the process file descriptor, sleep
1089  * until all of the process's lwps stop.
1090  */
1091 int
1092 pr_wait_stop(prnode_t *pnp, time_t timeo)
1093 {
1094         prcommon_t *pcp = pnp->pr_common;
1095         proc_t *p = pcp->prc_proc;
1096         timestruc_t rqtime;
1097         timestruc_t *rqtp = NULL;
1098         int timecheck = 0;
1099         kthread_t *t;
1100         int error;
1101 
1102         if (timeo > 0) {     /* millisecond timeout */
1103                 /*
1104                  * Determine the precise future time of the requested timeout.
1105                  */
1106                 timestruc_t now;
1107 
1108                 timecheck = timechanged;
1109                 gethrestime(&now);
1110                 rqtp = &rqtime;
1111                 rqtp->tv_sec = timeo / MILLISEC;
1112                 rqtp->tv_nsec = (timeo % MILLISEC) * MICROSEC;
1113                 timespecadd(rqtp, &now);
1114         }
1115 
1116         if (pcp->prc_flags & PRC_LWP) {  /* lwp file descriptor */
1117                 t = pcp->prc_thread;
1118                 ASSERT(t != NULL);
1119                 thread_lock(t);
1120                 while (!ISTOPPED(t) && !VSTOPPED(t)) {
1121                         thread_unlock(t);
1122                         mutex_enter(&pcp->prc_mutex);
1123                         prunlock(pnp);
1124                         error = pr_wait(pcp, rqtp, timecheck);
1125                         if (error)      /* -1 is timeout */
1126                                 return (error);
1127                         if ((error = prlock(pnp, ZNO)) != 0)
1128                                 return (error);
1129                         ASSERT(p == pcp->prc_proc);
1130                         ASSERT(t == pcp->prc_thread);
1131                         thread_lock(t);
1132                 }
1133                 thread_unlock(t);
1134         } else {                        /* process file descriptor */
1135                 t = prchoose(p);        /* returns locked thread */
1136                 ASSERT(t != NULL);
1137                 ASSERT(MUTEX_HELD(&p->p_lock));
1138                 while ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t)) ||
1139                     (p->p_flag & SEXITLWPS)) {
1140                         thread_unlock(t);
1141                         mutex_enter(&pcp->prc_mutex);
1142                         prunlock(pnp);
1143                         error = pr_wait(pcp, rqtp, timecheck);
1144                         if (error)      /* -1 is timeout */
1145                                 return (error);
1146                         if ((error = prlock(pnp, ZNO)) != 0)
1147                                 return (error);
1148                         ASSERT(p == pcp->prc_proc);
1149                         t = prchoose(p);        /* returns locked t */
1150                         ASSERT(t != NULL);
1151                 }
1152                 thread_unlock(t);
1153         }
1154 
1155         ASSERT(!(pcp->prc_flags & PRC_DESTROY) && p->p_stat != SZOMB &&
1156             t != NULL && t->t_state != TS_ZOMB);
1157 
1158         return (0);
1159 }
1160 
1161 int
1162 pr_setrun(prnode_t *pnp, ulong_t flags)
1163 {
1164         prcommon_t *pcp = pnp->pr_common;
1165         proc_t *p = pcp->prc_proc;
1166         kthread_t *t;
1167         klwp_t *lwp;
1168 
1169         /*
1170          * Cannot set an lwp running if it is not stopped.
1171          * Also, no lwp other than the /proc agent lwp can
1172          * be set running so long as the /proc agent lwp exists.
1173          */
1174         t = pr_thread(pnp);     /* returns locked thread */
1175         if ((!ISTOPPED(t) && !VSTOPPED(t) &&
1176             !(t->t_proc_flag & TP_PRSTOP)) ||
1177             (p->p_agenttp != NULL &&
1178             (t != p->p_agenttp || !(pcp->prc_flags & PRC_LWP)))) {
1179                 thread_unlock(t);
1180                 return (EBUSY);
1181         }
1182         thread_unlock(t);
1183         if (flags & ~(PRCSIG|PRCFAULT|PRSTEP|PRSTOP|PRSABORT))
1184                 return (EINVAL);
1185         lwp = ttolwp(t);
1186         if ((flags & PRCSIG) && lwp->lwp_cursig != SIGKILL) {
1187                 /*
1188                  * Discard current siginfo_t, if any.
1189                  */
1190                 lwp->lwp_cursig = 0;
1191                 lwp->lwp_extsig = 0;
1192                 if (lwp->lwp_curinfo) {
1193                         siginfofree(lwp->lwp_curinfo);
1194                         lwp->lwp_curinfo = NULL;
1195                 }
1196         }
1197         if (flags & PRCFAULT)
1198                 lwp->lwp_curflt = 0;
1199         /*
1200          * We can't hold p->p_lock when we touch the lwp's registers.
1201          * It may be swapped out and we will get a page fault.
1202          */
1203         if (flags & PRSTEP) {
1204                 mutex_exit(&p->p_lock);
1205                 prstep(lwp, 0);
1206                 mutex_enter(&p->p_lock);
1207         }
1208         if (flags & PRSTOP) {
1209                 t->t_proc_flag |= TP_PRSTOP;
1210                 t->t_sig_check = 1;  /* do ISSIG */
1211         }
1212         if (flags & PRSABORT)
1213                 lwp->lwp_sysabort = 1;
1214         thread_lock(t);
1215         if ((pcp->prc_flags & PRC_LWP) || (flags & (PRSTEP|PRSTOP))) {
1216                 /*
1217                  * Here, we are dealing with a single lwp.
1218                  */
1219                 if (ISTOPPED(t)) {
1220                         t->t_schedflag |= TS_PSTART;
1221                         t->t_dtrace_stop = 0;
1222                         setrun_locked(t);
1223                 } else if (flags & PRSABORT) {
1224                         t->t_proc_flag &=
1225                             ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1226                         setrun_locked(t);
1227                 } else if (!(flags & PRSTOP)) {
1228                         t->t_proc_flag &=
1229                             ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1230                 }
1231                 thread_unlock(t);
1232         } else {
1233                 /*
1234                  * Here, we are dealing with the whole process.
1235                  */
1236                 if (ISTOPPED(t)) {
1237                         /*
1238                          * The representative lwp is stopped on an event
1239                          * of interest.  We demote it to PR_REQUESTED and
1240                          * choose another representative lwp.  If the new
1241                          * representative lwp is not stopped on an event of
1242                          * interest (other than PR_REQUESTED), we set the
1243                          * whole process running, else we leave the process
1244                          * stopped showing the next event of interest.
1245                          */
1246                         kthread_t *tx = NULL;
1247 
1248                         if (!(flags & PRSABORT) &&
1249                             t->t_whystop == PR_SYSENTRY &&
1250                             t->t_whatstop == SYS_lwp_exit)
1251                                 tx = t;         /* remember the exiting lwp */
1252                         t->t_whystop = PR_REQUESTED;
1253                         t->t_whatstop = 0;
1254                         thread_unlock(t);
1255                         t = prchoose(p);        /* returns locked t */
1256                         ASSERT(ISTOPPED(t) || VSTOPPED(t));
1257                         if (VSTOPPED(t) ||
1258                             t->t_whystop == PR_REQUESTED) {
1259                                 thread_unlock(t);
1260                                 allsetrun(p);
1261                         } else {
1262                                 thread_unlock(t);
1263                                 /*
1264                                  * As a special case, if the old representative
1265                                  * lwp was stopped on entry to _lwp_exit()
1266                                  * (and we are not aborting the system call),
1267                                  * we set the old representative lwp running.
1268                                  * We do this so that the next process stop
1269                                  * will find the exiting lwp gone.
1270                                  */
1271                                 if (tx != NULL) {
1272                                         thread_lock(tx);
1273                                         tx->t_schedflag |= TS_PSTART;
1274                                         t->t_dtrace_stop = 0;
1275                                         setrun_locked(tx);
1276                                         thread_unlock(tx);
1277                                 }
1278                         }
1279                 } else {
1280                         /*
1281                          * No event of interest; set all of the lwps running.
1282                          */
1283                         if (flags & PRSABORT) {
1284                                 t->t_proc_flag &=
1285                                     ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1286                                 setrun_locked(t);
1287                         }
1288                         thread_unlock(t);
1289                         allsetrun(p);
1290                 }
1291         }
1292         return (0);
1293 }
1294 
1295 /*
1296  * Wait until process/lwp stops or until timer expires.
1297  * Return EINTR for an interruption, -1 for timeout, else 0.
1298  */
1299 int
1300 pr_wait(prcommon_t *pcp,        /* prcommon referring to process/lwp */
1301         timestruc_t *ts,        /* absolute time of timeout, if any */
1302         int timecheck)
1303 {
1304         int rval;
1305 
1306         ASSERT(MUTEX_HELD(&pcp->prc_mutex));
1307         rval = cv_waituntil_sig(&pcp->prc_wait, &pcp->prc_mutex, ts, timecheck);
1308         mutex_exit(&pcp->prc_mutex);
1309         switch (rval) {
1310         case 0:
1311                 return (EINTR);
1312         case -1:
1313                 return (-1);
1314         default:
1315                 return (0);
1316         }
1317 }
1318 
1319 /*
1320  * Make all threads in the process runnable.
1321  */
1322 void
1323 allsetrun(proc_t *p)
1324 {
1325         kthread_t *t;
1326 
1327         ASSERT(MUTEX_HELD(&p->p_lock));
1328 
1329         if ((t = p->p_tlist) != NULL) {
1330                 do {
1331                         thread_lock(t);
1332                         ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1333                         t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1334                         if (ISTOPPED(t)) {
1335                                 t->t_schedflag |= TS_PSTART;
1336                                 t->t_dtrace_stop = 0;
1337                                 setrun_locked(t);
1338                         }
1339                         thread_unlock(t);
1340                 } while ((t = t->t_forw) != p->p_tlist);
1341         }
1342 }
1343 
1344 /*
1345  * Wait for the process to die.
1346  * We do this after sending SIGKILL because we know it will
1347  * die soon and we want subsequent operations to return ENOENT.
1348  */
1349 void
1350 pr_wait_die(prnode_t *pnp)
1351 {
1352         proc_t *p;
1353 
1354         mutex_enter(&pidlock);
1355         while ((p = pnp->pr_common->prc_proc) != NULL && p->p_stat != SZOMB) {
1356                 if (!cv_wait_sig(&p->p_srwchan_cv, &pidlock))
1357                         break;
1358         }
1359         mutex_exit(&pidlock);
1360 }
1361 
1362 static void
1363 pr_settrace(proc_t *p, sigset_t *sp)
1364 {
1365         prdelset(sp, SIGKILL);
1366         prassignset(&p->p_sigmask, sp);
1367         if (!sigisempty(&p->p_sigmask))
1368                 p->p_proc_flag |= P_PR_TRACE;
1369         else if (prisempty(&p->p_fltmask)) {
1370                 user_t *up = PTOU(p);
1371                 if (up->u_systrap == 0)
1372                         p->p_proc_flag &= ~P_PR_TRACE;
1373         }
1374 }
1375 
1376 int
1377 pr_setsig(prnode_t *pnp, siginfo_t *sip)
1378 {
1379         int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1380         int sig = sip->si_signo;
1381         prcommon_t *pcp = pnp->pr_common;
1382         proc_t *p = pcp->prc_proc;
1383         kthread_t *t;
1384         klwp_t *lwp;
1385         int error = 0;
1386 
1387         t = pr_thread(pnp);     /* returns locked thread */
1388         thread_unlock(t);
1389         lwp = ttolwp(t);
1390         if (sig < 0 || sig >= nsig)
1391                 /* Zero allowed here */
1392                 error = EINVAL;
1393         else if (lwp->lwp_cursig == SIGKILL)
1394                 /* "can't happen", but just in case */
1395                 error = EBUSY;
1396         else if ((lwp->lwp_cursig = (uchar_t)sig) == 0) {
1397                 lwp->lwp_extsig = 0;
1398                 /*
1399                  * Discard current siginfo_t, if any.
1400                  */
1401                 if (lwp->lwp_curinfo) {
1402                         siginfofree(lwp->lwp_curinfo);
1403                         lwp->lwp_curinfo = NULL;
1404                 }
1405         } else {
1406                 kthread_t *tx;
1407                 sigqueue_t *sqp;
1408 
1409                 /* drop p_lock to do kmem_alloc(KM_SLEEP) */
1410                 mutex_exit(&p->p_lock);
1411                 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
1412                 mutex_enter(&p->p_lock);
1413 
1414                 if (lwp->lwp_curinfo == NULL)
1415                         lwp->lwp_curinfo = sqp;
1416                 else
1417                         kmem_free(sqp, sizeof (sigqueue_t));
1418                 /*
1419                  * Copy contents of info to current siginfo_t.
1420                  */
1421                 bcopy(sip, &lwp->lwp_curinfo->sq_info,
1422                     sizeof (lwp->lwp_curinfo->sq_info));
1423                 /*
1424                  * Prevent contents published by si_zoneid-unaware /proc
1425                  * consumers from being incorrectly filtered.  Because
1426                  * an uninitialized si_zoneid is the same as
1427                  * GLOBAL_ZONEID, this means that you can't pr_setsig a
1428                  * process in a non-global zone with a siginfo which
1429                  * appears to come from the global zone.
1430                  */
1431                 if (SI_FROMUSER(sip) && sip->si_zoneid == 0)
1432                         lwp->lwp_curinfo->sq_info.si_zoneid =
1433                             p->p_zone->zone_id;
1434                 /*
1435                  * Side-effects for SIGKILL and jobcontrol signals.
1436                  */
1437                 if (sig == SIGKILL) {
1438                         p->p_flag |= SKILLED;
1439                         p->p_flag &= ~SEXTKILLED;
1440                 } else if (sig == SIGCONT) {
1441                         p->p_flag |= SSCONT;
1442                         sigdelq(p, NULL, SIGSTOP);
1443                         sigdelq(p, NULL, SIGTSTP);
1444                         sigdelq(p, NULL, SIGTTOU);
1445                         sigdelq(p, NULL, SIGTTIN);
1446                         sigdiffset(&p->p_sig, &stopdefault);
1447                         sigdiffset(&p->p_extsig, &stopdefault);
1448                         if ((tx = p->p_tlist) != NULL) {
1449                                 do {
1450                                         sigdelq(p, tx, SIGSTOP);
1451                                         sigdelq(p, tx, SIGTSTP);
1452                                         sigdelq(p, tx, SIGTTOU);
1453                                         sigdelq(p, tx, SIGTTIN);
1454                                         sigdiffset(&tx->t_sig, &stopdefault);
1455                                         sigdiffset(&tx->t_extsig, &stopdefault);
1456                                 } while ((tx = tx->t_forw) != p->p_tlist);
1457                         }
1458                 } else if (sigismember(&stopdefault, sig)) {
1459                         if (PTOU(p)->u_signal[sig-1] == SIG_DFL &&
1460                             (sig == SIGSTOP || !p->p_pgidp->pid_pgorphaned))
1461                                 p->p_flag &= ~SSCONT;
1462                         sigdelq(p, NULL, SIGCONT);
1463                         sigdelset(&p->p_sig, SIGCONT);
1464                         sigdelset(&p->p_extsig, SIGCONT);
1465                         if ((tx = p->p_tlist) != NULL) {
1466                                 do {
1467                                         sigdelq(p, tx, SIGCONT);
1468                                         sigdelset(&tx->t_sig, SIGCONT);
1469                                         sigdelset(&tx->t_extsig, SIGCONT);
1470                                 } while ((tx = tx->t_forw) != p->p_tlist);
1471                         }
1472                 }
1473                 thread_lock(t);
1474                 if (ISWAKEABLE(t) || ISWAITING(t)) {
1475                         /* Set signaled sleeping/waiting lwp running */
1476                         setrun_locked(t);
1477                 } else if (t->t_state == TS_STOPPED && sig == SIGKILL) {
1478                         /* If SIGKILL, set stopped lwp running */
1479                         p->p_stopsig = 0;
1480                         t->t_schedflag |= TS_XSTART | TS_PSTART;
1481                         t->t_dtrace_stop = 0;
1482                         setrun_locked(t);
1483                 }
1484                 t->t_sig_check = 1;  /* so ISSIG will be done */
1485                 thread_unlock(t);
1486                 /*
1487                  * More jobcontrol side-effects.
1488                  */
1489                 if (sig == SIGCONT && (tx = p->p_tlist) != NULL) {
1490                         p->p_stopsig = 0;
1491                         do {
1492                                 thread_lock(tx);
1493                                 if (tx->t_state == TS_STOPPED &&
1494                                     tx->t_whystop == PR_JOBCONTROL) {
1495                                         tx->t_schedflag |= TS_XSTART;
1496                                         setrun_locked(tx);
1497                                 }
1498                                 thread_unlock(tx);
1499                         } while ((tx = tx->t_forw) != p->p_tlist);
1500                 }
1501         }
1502         return (error);
1503 }
1504 
1505 int
1506 pr_kill(prnode_t *pnp, int sig, cred_t *cr)
1507 {
1508         int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1509         prcommon_t *pcp = pnp->pr_common;
1510         proc_t *p = pcp->prc_proc;
1511         k_siginfo_t info;
1512 
1513         if (sig <= 0 || sig >= nsig)
1514                 return (EINVAL);
1515 
1516         bzero(&info, sizeof (info));
1517         info.si_signo = sig;
1518         info.si_code = SI_USER;
1519         info.si_pid = curproc->p_pid;
1520         info.si_ctid = PRCTID(curproc);
1521         info.si_zoneid = getzoneid();
1522         info.si_uid = crgetruid(cr);
1523         sigaddq(p, (pcp->prc_flags & PRC_LWP)?
1524             pcp->prc_thread : NULL, &info, KM_NOSLEEP);
1525 
1526         return (0);
1527 }
1528 
1529 int
1530 pr_unkill(prnode_t *pnp, int sig)
1531 {
1532         int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1533         prcommon_t *pcp = pnp->pr_common;
1534         proc_t *p = pcp->prc_proc;
1535         sigqueue_t *infop = NULL;
1536 
1537         if (sig <= 0 || sig >= nsig || sig == SIGKILL)
1538                 return (EINVAL);
1539 
1540         if (pcp->prc_flags & PRC_LWP)
1541                 sigdeq(p, pcp->prc_thread, sig, &infop);
1542         else
1543                 sigdeq(p, NULL, sig, &infop);
1544 
1545         if (infop)
1546                 siginfofree(infop);
1547 
1548         return (0);
1549 }
1550 
1551 int
1552 pr_nice(proc_t *p, int nice, cred_t *cr)
1553 {
1554         kthread_t *t;
1555         int err;
1556         int error = 0;
1557 
1558         t = p->p_tlist;
1559         do {
1560                 ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1561                 err = CL_DONICE(t, cr, nice, (int *)NULL);
1562                 schedctl_set_cidpri(t);
1563                 if (error == 0)
1564                         error = err;
1565         } while ((t = t->t_forw) != p->p_tlist);
1566 
1567         return (error);
1568 }
1569 
1570 void
1571 pr_setentryexit(proc_t *p, sysset_t *sysset, int entry)
1572 {
1573         user_t *up = PTOU(p);
1574 
1575         if (entry) {
1576                 prassignset(&up->u_entrymask, sysset);
1577         } else {
1578                 prassignset(&up->u_exitmask, sysset);
1579         }
1580         if (!prisempty(&up->u_entrymask) ||
1581             !prisempty(&up->u_exitmask)) {
1582                 up->u_systrap = 1;
1583                 p->p_proc_flag |= P_PR_TRACE;
1584                 set_proc_sys(p);        /* set pre and post-sys flags */
1585         } else {
1586                 up->u_systrap = 0;
1587                 if (sigisempty(&p->p_sigmask) &&
1588                     prisempty(&p->p_fltmask))
1589                         p->p_proc_flag &= ~P_PR_TRACE;
1590         }
1591 }
1592 
1593 #define ALLFLAGS        \
1594         (PR_FORK|PR_RLC|PR_KLC|PR_ASYNC|PR_BPTADJ|PR_MSACCT|PR_MSFORK|PR_PTRACE)
1595 
1596 int
1597 pr_set(proc_t *p, long flags)
1598 {
1599         if ((p->p_flag & SSYS) || p->p_as == &kas)
1600                 return (EBUSY);
1601 
1602         if (flags & ~ALLFLAGS)
1603                 return (EINVAL);
1604 
1605         if (flags & PR_FORK)
1606                 p->p_proc_flag |= P_PR_FORK;
1607         if (flags & PR_RLC)
1608                 p->p_proc_flag |= P_PR_RUNLCL;
1609         if (flags & PR_KLC)
1610                 p->p_proc_flag |= P_PR_KILLCL;
1611         if (flags & PR_ASYNC)
1612                 p->p_proc_flag |= P_PR_ASYNC;
1613         if (flags & PR_BPTADJ)
1614                 p->p_proc_flag |= P_PR_BPTADJ;
1615         if (flags & PR_MSACCT)
1616                 if ((p->p_flag & SMSACCT) == 0)
1617                         estimate_msacct(p->p_tlist, gethrtime());
1618         if (flags & PR_MSFORK)
1619                 p->p_flag |= SMSFORK;
1620         if (flags & PR_PTRACE) {
1621                 p->p_proc_flag |= P_PR_PTRACE;
1622                 /* ptraced process must die if parent dead */
1623                 if (p->p_ppid == 1)
1624                         sigtoproc(p, NULL, SIGKILL);
1625         }
1626 
1627         return (0);
1628 }
1629 
1630 int
1631 pr_unset(proc_t *p, long flags)
1632 {
1633         if ((p->p_flag & SSYS) || p->p_as == &kas)
1634                 return (EBUSY);
1635 
1636         if (flags & ~ALLFLAGS)
1637                 return (EINVAL);
1638 
1639         if (flags & PR_FORK)
1640                 p->p_proc_flag &= ~P_PR_FORK;
1641         if (flags & PR_RLC)
1642                 p->p_proc_flag &= ~P_PR_RUNLCL;
1643         if (flags & PR_KLC)
1644                 p->p_proc_flag &= ~P_PR_KILLCL;
1645         if (flags & PR_ASYNC)
1646                 p->p_proc_flag &= ~P_PR_ASYNC;
1647         if (flags & PR_BPTADJ)
1648                 p->p_proc_flag &= ~P_PR_BPTADJ;
1649         if (flags & PR_MSACCT)
1650                 disable_msacct(p);
1651         if (flags & PR_MSFORK)
1652                 p->p_flag &= ~SMSFORK;
1653         if (flags & PR_PTRACE)
1654                 p->p_proc_flag &= ~P_PR_PTRACE;
1655 
1656         return (0);
1657 }
1658 
1659 static int
1660 pr_setfpregs(prnode_t *pnp, prfpregset_t *prfpregset)
1661 {
1662         proc_t *p = pnp->pr_common->prc_proc;
1663         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1664 
1665         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1666                 thread_unlock(t);
1667                 return (EBUSY);
1668         }
1669         if (!prhasfp()) {
1670                 thread_unlock(t);
1671                 return (EINVAL);        /* No FP support */
1672         }
1673 
1674         /* drop p_lock while touching the lwp's stack */
1675         thread_unlock(t);
1676         mutex_exit(&p->p_lock);
1677         prsetprfpregs(ttolwp(t), prfpregset);
1678         mutex_enter(&p->p_lock);
1679 
1680         return (0);
1681 }
1682 
1683 #ifdef  _SYSCALL32_IMPL
1684 static int
1685 pr_setfpregs32(prnode_t *pnp, prfpregset32_t *prfpregset)
1686 {
1687         proc_t *p = pnp->pr_common->prc_proc;
1688         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1689 
1690         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1691                 thread_unlock(t);
1692                 return (EBUSY);
1693         }
1694         if (!prhasfp()) {
1695                 thread_unlock(t);
1696                 return (EINVAL);        /* No FP support */
1697         }
1698 
1699         /* drop p_lock while touching the lwp's stack */
1700         thread_unlock(t);
1701         mutex_exit(&p->p_lock);
1702         prsetprfpregs32(ttolwp(t), prfpregset);
1703         mutex_enter(&p->p_lock);
1704 
1705         return (0);
1706 }
1707 #endif  /* _SYSCALL32_IMPL */
1708 
1709 #if defined(__sparc)
1710 /* ARGSUSED */
1711 static int
1712 pr_setxregs(prnode_t *pnp, prxregset_t *prxregset)
1713 {
1714         proc_t *p = pnp->pr_common->prc_proc;
1715         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1716 
1717         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1718                 thread_unlock(t);
1719                 return (EBUSY);
1720         }
1721         thread_unlock(t);
1722 
1723         if (!prhasx(p))
1724                 return (EINVAL);        /* No extra register support */
1725 
1726         /* drop p_lock while touching the lwp's stack */
1727         mutex_exit(&p->p_lock);
1728         prsetprxregs(ttolwp(t), (caddr_t)prxregset);
1729         mutex_enter(&p->p_lock);
1730 
1731         return (0);
1732 }
1733 
1734 static int
1735 pr_setasrs(prnode_t *pnp, asrset_t asrset)
1736 {
1737         proc_t *p = pnp->pr_common->prc_proc;
1738         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1739 
1740         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1741                 thread_unlock(t);
1742                 return (EBUSY);
1743         }
1744         thread_unlock(t);
1745 
1746         /* drop p_lock while touching the lwp's stack */
1747         mutex_exit(&p->p_lock);
1748         prsetasregs(ttolwp(t), asrset);
1749         mutex_enter(&p->p_lock);
1750 
1751         return (0);
1752 }
1753 #endif
1754 
1755 static int
1756 pr_setvaddr(prnode_t *pnp, caddr_t vaddr)
1757 {
1758         proc_t *p = pnp->pr_common->prc_proc;
1759         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1760 
1761         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1762                 thread_unlock(t);
1763                 return (EBUSY);
1764         }
1765 
1766         /* drop p_lock while touching the lwp's stack */
1767         thread_unlock(t);
1768         mutex_exit(&p->p_lock);
1769         prsvaddr(ttolwp(t), vaddr);
1770         mutex_enter(&p->p_lock);
1771 
1772         return (0);
1773 }
1774 
1775 void
1776 pr_sethold(prnode_t *pnp, sigset_t *sp)
1777 {
1778         proc_t *p = pnp->pr_common->prc_proc;
1779         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1780 
1781         schedctl_finish_sigblock(t);
1782         sigutok(sp, &t->t_hold);
1783         if (ISWAKEABLE(t) &&
1784             (fsig(&p->p_sig, t) || fsig(&t->t_sig, t)))
1785                 setrun_locked(t);
1786         t->t_sig_check = 1;  /* so thread will see new holdmask */
1787         thread_unlock(t);
1788 }
1789 
1790 void
1791 pr_setfault(proc_t *p, fltset_t *fltp)
1792 {
1793         prassignset(&p->p_fltmask, fltp);
1794         if (!prisempty(&p->p_fltmask))
1795                 p->p_proc_flag |= P_PR_TRACE;
1796         else if (sigisempty(&p->p_sigmask)) {
1797                 user_t *up = PTOU(p);
1798                 if (up->u_systrap == 0)
1799                         p->p_proc_flag &= ~P_PR_TRACE;
1800         }
1801 }
1802 
1803 static int
1804 pr_clearsig(prnode_t *pnp)
1805 {
1806         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1807         klwp_t *lwp = ttolwp(t);
1808 
1809         thread_unlock(t);
1810         if (lwp->lwp_cursig == SIGKILL)
1811                 return (EBUSY);
1812 
1813         /*
1814          * Discard current siginfo_t, if any.
1815          */
1816         lwp->lwp_cursig = 0;
1817         lwp->lwp_extsig = 0;
1818         if (lwp->lwp_curinfo) {
1819                 siginfofree(lwp->lwp_curinfo);
1820                 lwp->lwp_curinfo = NULL;
1821         }
1822 
1823         return (0);
1824 }
1825 
1826 static int
1827 pr_clearflt(prnode_t *pnp)
1828 {
1829         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1830 
1831         thread_unlock(t);
1832         ttolwp(t)->lwp_curflt = 0;
1833 
1834         return (0);
1835 }
1836 
1837 static int
1838 pr_watch(prnode_t *pnp, prwatch_t *pwp, int *unlocked)
1839 {
1840         proc_t *p = pnp->pr_common->prc_proc;
1841         struct as *as = p->p_as;
1842         uintptr_t vaddr = pwp->pr_vaddr;
1843         size_t size = pwp->pr_size;
1844         int wflags = pwp->pr_wflags;
1845         ulong_t newpage = 0;
1846         struct watched_area *pwa;
1847         int error;
1848 
1849         *unlocked = 0;
1850 
1851         /*
1852          * Can't apply to a system process.
1853          */
1854         if ((p->p_flag & SSYS) || p->p_as == &kas)
1855                 return (EBUSY);
1856 
1857         /*
1858          * Verify that the address range does not wrap
1859          * and that only the proper flags were specified.
1860          */
1861         if ((wflags & ~WA_TRAPAFTER) == 0)
1862                 size = 0;
1863         if (vaddr + size < vaddr ||
1864             (wflags & ~(WA_READ|WA_WRITE|WA_EXEC|WA_TRAPAFTER)) != 0 ||
1865             ((wflags & ~WA_TRAPAFTER) != 0 && size == 0))
1866                 return (EINVAL);
1867 
1868         /*
1869          * Don't let the address range go above as->a_userlimit.
1870          * There is no error here, just a limitation.
1871          */
1872         if (vaddr >= (uintptr_t)as->a_userlimit)
1873                 return (0);
1874         if (vaddr + size > (uintptr_t)as->a_userlimit)
1875                 size = (uintptr_t)as->a_userlimit - vaddr;
1876 
1877         /*
1878          * Compute maximum number of pages this will add.
1879          */
1880         if ((wflags & ~WA_TRAPAFTER) != 0) {
1881                 ulong_t pagespan = (vaddr + size) - (vaddr & PAGEMASK);
1882                 newpage = btopr(pagespan);
1883                 if (newpage > 2 * prnwatch)
1884                         return (E2BIG);
1885         }
1886 
1887         /*
1888          * Force the process to be fully stopped.
1889          */
1890         if (p == curproc) {
1891                 prunlock(pnp);
1892                 while (holdwatch() != 0)
1893                         continue;
1894                 if ((error = prlock(pnp, ZNO)) != 0) {
1895                         continuelwps(p);
1896                         *unlocked = 1;
1897                         return (error);
1898                 }
1899         } else {
1900                 pauselwps(p);
1901                 while (pr_allstopped(p, 0) > 0) {
1902                         /*
1903                          * This cv/mutex pair is persistent even
1904                          * if the process disappears after we
1905                          * unmark it and drop p->p_lock.
1906                          */
1907                         kcondvar_t *cv = &pr_pid_cv[p->p_slot];
1908                         kmutex_t *mp = &p->p_lock;
1909 
1910                         prunmark(p);
1911                         (void) cv_wait(cv, mp);
1912                         mutex_exit(mp);
1913                         if ((error = prlock(pnp, ZNO)) != 0) {
1914                                 /*
1915                                  * Unpause the process if it exists.
1916                                  */
1917                                 p = pr_p_lock(pnp);
1918                                 mutex_exit(&pr_pidlock);
1919                                 if (p != NULL) {
1920                                         unpauselwps(p);
1921                                         prunlock(pnp);
1922                                 }
1923                                 *unlocked = 1;
1924                                 return (error);
1925                         }
1926                 }
1927         }
1928 
1929         /*
1930          * Drop p->p_lock in order to perform the rest of this.
1931          * The process is still locked with the P_PR_LOCK flag.
1932          */
1933         mutex_exit(&p->p_lock);
1934 
1935         pwa = kmem_alloc(sizeof (struct watched_area), KM_SLEEP);
1936         pwa->wa_vaddr = (caddr_t)vaddr;
1937         pwa->wa_eaddr = (caddr_t)vaddr + size;
1938         pwa->wa_flags = (ulong_t)wflags;
1939 
1940         error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0)?
1941             clear_watched_area(p, pwa) : set_watched_area(p, pwa);
1942 
1943         if (p == curproc) {
1944                 setallwatch();
1945                 mutex_enter(&p->p_lock);
1946                 continuelwps(p);
1947         } else {
1948                 mutex_enter(&p->p_lock);
1949                 unpauselwps(p);
1950         }
1951 
1952         return (error);
1953 }
1954 
1955 /* jobcontrol stopped, but with a /proc directed stop in effect */
1956 #define JDSTOPPED(t)    \
1957         ((t)->t_state == TS_STOPPED && \
1958         (t)->t_whystop == PR_JOBCONTROL && \
1959         ((t)->t_proc_flag & TP_PRSTOP))
1960 
1961 /*
1962  * pr_agent() creates the agent lwp. If the process is exiting while
1963  * we are creating an agent lwp, then exitlwps() waits until the
1964  * agent has been created using prbarrier().
1965  */
1966 static int
1967 pr_agent(prnode_t *pnp, prgregset_t prgregset, int *unlocked)
1968 {
1969         proc_t *p = pnp->pr_common->prc_proc;
1970         prcommon_t *pcp;
1971         kthread_t *t;
1972         kthread_t *ct;
1973         klwp_t *clwp;
1974         k_sigset_t smask;
1975         int cid;
1976         void *bufp = NULL;
1977         int error;
1978 
1979         *unlocked = 0;
1980 
1981         /*
1982          * Cannot create the /proc agent lwp if :-
1983          * - the process is not fully stopped or directed to stop.
1984          * - there is an agent lwp already.
1985          * - the process has been killed.
1986          * - the process is exiting.
1987          * - it's a vfork(2) parent.
1988          */
1989         t = prchoose(p);        /* returns locked thread */
1990         ASSERT(t != NULL);
1991 
1992         if ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t) && !JDSTOPPED(t)) ||
1993             p->p_agenttp != NULL ||
1994             (p->p_flag & (SKILLED | SEXITING | SVFWAIT))) {
1995                 thread_unlock(t);
1996                 return (EBUSY);
1997         }
1998 
1999         thread_unlock(t);
2000         mutex_exit(&p->p_lock);
2001 
2002         sigfillset(&smask);
2003         sigdiffset(&smask, &cantmask);
2004         clwp = lwp_create(lwp_rtt, NULL, 0, p, TS_STOPPED,
2005             t->t_pri, &smask, NOCLASS, 0);
2006         if (clwp == NULL) {
2007                 mutex_enter(&p->p_lock);
2008                 return (ENOMEM);
2009         }
2010         prsetprregs(clwp, prgregset, 1);
2011 retry:
2012         cid = t->t_cid;
2013         (void) CL_ALLOC(&bufp, cid, KM_SLEEP);
2014         mutex_enter(&p->p_lock);
2015         if (cid != t->t_cid) {
2016                 /*
2017                  * Someone just changed this thread's scheduling class,
2018                  * so try pre-allocating the buffer again.  Hopefully we
2019                  * don't hit this often.
2020                  */
2021                 mutex_exit(&p->p_lock);
2022                 CL_FREE(cid, bufp);
2023                 goto retry;
2024         }
2025 
2026         clwp->lwp_ap = clwp->lwp_arg;
2027         clwp->lwp_eosys = NORMALRETURN;
2028         ct = lwptot(clwp);
2029         ct->t_clfuncs = t->t_clfuncs;
2030         CL_FORK(t, ct, bufp);
2031         ct->t_cid = t->t_cid;
2032         ct->t_proc_flag |= TP_PRSTOP;
2033         /*
2034          * Setting t_sysnum to zero causes post_syscall()
2035          * to bypass all syscall checks and go directly to
2036          *      if (issig()) psig();
2037          * so that the agent lwp will stop in issig_forreal()
2038          * showing PR_REQUESTED.
2039          */
2040         ct->t_sysnum = 0;
2041         ct->t_post_sys = 1;
2042         ct->t_sig_check = 1;
2043         p->p_agenttp = ct;
2044         ct->t_proc_flag &= ~TP_HOLDLWP;
2045 
2046         pcp = pnp->pr_pcommon;
2047         mutex_enter(&pcp->prc_mutex);
2048 
2049         lwp_create_done(ct);
2050 
2051         /*
2052          * Don't return until the agent is stopped on PR_REQUESTED.
2053          */
2054 
2055         for (;;) {
2056                 prunlock(pnp);
2057                 *unlocked = 1;
2058 
2059                 /*
2060                  * Wait for the agent to stop and notify us.
2061                  * If we've been interrupted, return that information.
2062                  */
2063                 error = pr_wait(pcp, NULL, 0);
2064                 if (error == EINTR) {
2065                         error = 0;
2066                         break;
2067                 }
2068 
2069                 /*
2070                  * Confirm that the agent LWP has stopped.
2071                  */
2072 
2073                 if ((error = prlock(pnp, ZNO)) != 0)
2074                         break;
2075                 *unlocked = 0;
2076 
2077                 /*
2078                  * Since we dropped the lock on the process, the agent
2079                  * may have disappeared or changed. Grab the current
2080                  * agent and check fail if it has disappeared.
2081                  */
2082                 if ((ct = p->p_agenttp) == NULL) {
2083                         error = ENOENT;
2084                         break;
2085                 }
2086 
2087                 mutex_enter(&pcp->prc_mutex);
2088                 thread_lock(ct);
2089 
2090                 if (ISTOPPED(ct)) {
2091                         thread_unlock(ct);
2092                         mutex_exit(&pcp->prc_mutex);
2093                         break;
2094                 }
2095 
2096                 thread_unlock(ct);
2097         }
2098 
2099         return (error ? error : -1);
2100 }
2101 
2102 static int
2103 pr_rdwr(proc_t *p, enum uio_rw rw, priovec_t *pio)
2104 {
2105         caddr_t base = (caddr_t)pio->pio_base;
2106         size_t cnt = pio->pio_len;
2107         uintptr_t offset = (uintptr_t)pio->pio_offset;
2108         struct uio auio;
2109         struct iovec aiov;
2110         int error = 0;
2111 
2112         if ((p->p_flag & SSYS) || p->p_as == &kas)
2113                 error = EIO;
2114         else if ((base + cnt) < base || (offset + cnt) < offset)
2115                 error = EINVAL;
2116         else if (cnt != 0) {
2117                 aiov.iov_base = base;
2118                 aiov.iov_len = cnt;
2119 
2120                 auio.uio_loffset = offset;
2121                 auio.uio_iov = &aiov;
2122                 auio.uio_iovcnt = 1;
2123                 auio.uio_resid = cnt;
2124                 auio.uio_segflg = UIO_USERSPACE;
2125                 auio.uio_llimit = (longlong_t)MAXOFFSET_T;
2126                 auio.uio_fmode = FREAD|FWRITE;
2127                 auio.uio_extflg = UIO_COPY_DEFAULT;
2128 
2129                 mutex_exit(&p->p_lock);
2130                 error = prusrio(p, rw, &auio, 0);
2131                 mutex_enter(&p->p_lock);
2132 
2133                 /*
2134                  * We have no way to return the i/o count,
2135                  * like read() or write() would do, so we
2136                  * return an error if the i/o was truncated.
2137                  */
2138                 if (auio.uio_resid != 0 && error == 0)
2139                         error = EIO;
2140         }
2141 
2142         return (error);
2143 }
2144 
2145 static int
2146 pr_scred(proc_t *p, prcred_t *prcred, cred_t *cr, boolean_t dogrps)
2147 {
2148         kthread_t *t;
2149         cred_t *oldcred;
2150         cred_t *newcred;
2151         uid_t oldruid;
2152         int error;
2153         zone_t *zone = crgetzone(cr);
2154 
2155         if (!VALID_UID(prcred->pr_euid, zone) ||
2156             !VALID_UID(prcred->pr_ruid, zone) ||
2157             !VALID_UID(prcred->pr_suid, zone) ||
2158             !VALID_GID(prcred->pr_egid, zone) ||
2159             !VALID_GID(prcred->pr_rgid, zone) ||
2160             !VALID_GID(prcred->pr_sgid, zone))
2161                 return (EINVAL);
2162 
2163         if (dogrps) {
2164                 int ngrp = prcred->pr_ngroups;
2165                 int i;
2166 
2167                 if (ngrp < 0 || ngrp > ngroups_max)
2168                         return (EINVAL);
2169 
2170                 for (i = 0; i < ngrp; i++) {
2171                         if (!VALID_GID(prcred->pr_groups[i], zone))
2172                                 return (EINVAL);
2173                 }
2174         }
2175 
2176         error = secpolicy_allow_setid(cr, prcred->pr_euid, B_FALSE);
2177 
2178         if (error == 0 && prcred->pr_ruid != prcred->pr_euid)
2179                 error = secpolicy_allow_setid(cr, prcred->pr_ruid, B_FALSE);
2180 
2181         if (error == 0 && prcred->pr_suid != prcred->pr_euid &&
2182             prcred->pr_suid != prcred->pr_ruid)
2183                 error = secpolicy_allow_setid(cr, prcred->pr_suid, B_FALSE);
2184 
2185         if (error)
2186                 return (error);
2187 
2188         mutex_exit(&p->p_lock);
2189 
2190         /* hold old cred so it doesn't disappear while we dup it */
2191         mutex_enter(&p->p_crlock);
2192         crhold(oldcred = p->p_cred);
2193         mutex_exit(&p->p_crlock);
2194         newcred = crdup(oldcred);
2195         oldruid = crgetruid(oldcred);
2196         crfree(oldcred);
2197 
2198         /* Error checking done above */
2199         (void) crsetresuid(newcred, prcred->pr_ruid, prcred->pr_euid,
2200             prcred->pr_suid);
2201         (void) crsetresgid(newcred, prcred->pr_rgid, prcred->pr_egid,
2202             prcred->pr_sgid);
2203 
2204         if (dogrps) {
2205                 (void) crsetgroups(newcred, prcred->pr_ngroups,
2206                     prcred->pr_groups);
2207 
2208         }
2209 
2210         mutex_enter(&p->p_crlock);
2211         oldcred = p->p_cred;
2212         p->p_cred = newcred;
2213         mutex_exit(&p->p_crlock);
2214         crfree(oldcred);
2215 
2216         /*
2217          * Keep count of processes per uid consistent.
2218          */
2219         if (oldruid != prcred->pr_ruid) {
2220                 zoneid_t zoneid = crgetzoneid(newcred);
2221 
2222                 mutex_enter(&pidlock);
2223                 upcount_dec(oldruid, zoneid);
2224                 upcount_inc(prcred->pr_ruid, zoneid);
2225                 mutex_exit(&pidlock);
2226         }
2227 
2228         /*
2229          * Broadcast the cred change to the threads.
2230          */
2231         mutex_enter(&p->p_lock);
2232         t = p->p_tlist;
2233         do {
2234                 t->t_pre_sys = 1; /* so syscall will get new cred */
2235         } while ((t = t->t_forw) != p->p_tlist);
2236 
2237         return (0);
2238 }
2239 
2240 /*
2241  * Change process credentials to specified zone.  Used to temporarily
2242  * set a process to run in the global zone; only transitions between
2243  * the process's actual zone and the global zone are allowed.
2244  */
2245 static int
2246 pr_szoneid(proc_t *p, zoneid_t zoneid, cred_t *cr)
2247 {
2248         kthread_t *t;
2249         cred_t *oldcred;
2250         cred_t *newcred;
2251         zone_t *zptr;
2252         zoneid_t oldzoneid;
2253 
2254         if (secpolicy_zone_config(cr) != 0)
2255                 return (EPERM);
2256         if (zoneid != GLOBAL_ZONEID && zoneid != p->p_zone->zone_id)
2257                 return (EINVAL);
2258         if ((zptr = zone_find_by_id(zoneid)) == NULL)
2259                 return (EINVAL);
2260         mutex_exit(&p->p_lock);
2261         mutex_enter(&p->p_crlock);
2262         oldcred = p->p_cred;
2263         crhold(oldcred);
2264         mutex_exit(&p->p_crlock);
2265         newcred = crdup(oldcred);
2266         oldzoneid = crgetzoneid(oldcred);
2267         crfree(oldcred);
2268 
2269         crsetzone(newcred, zptr);
2270         zone_rele(zptr);
2271 
2272         mutex_enter(&p->p_crlock);
2273         oldcred = p->p_cred;
2274         p->p_cred = newcred;
2275         mutex_exit(&p->p_crlock);
2276         crfree(oldcred);
2277 
2278         /*
2279          * The target process is changing zones (according to its cred), so
2280          * update the per-zone upcounts, which are based on process creds.
2281          */
2282         if (oldzoneid != zoneid) {
2283                 uid_t ruid = crgetruid(newcred);
2284 
2285                 mutex_enter(&pidlock);
2286                 upcount_dec(ruid, oldzoneid);
2287                 upcount_inc(ruid, zoneid);
2288                 mutex_exit(&pidlock);
2289         }
2290         /*
2291          * Broadcast the cred change to the threads.
2292          */
2293         mutex_enter(&p->p_lock);
2294         t = p->p_tlist;
2295         do {
2296                 t->t_pre_sys = 1;    /* so syscall will get new cred */
2297         } while ((t = t->t_forw) != p->p_tlist);
2298 
2299         return (0);
2300 }
2301 
2302 static int
2303 pr_spriv(proc_t *p, prpriv_t *prpriv, cred_t *cr)
2304 {
2305         kthread_t *t;
2306         int err;
2307 
2308         ASSERT(MUTEX_HELD(&p->p_lock));
2309 
2310         if ((err = priv_pr_spriv(p, prpriv, cr)) == 0) {
2311                 /*
2312                  * Broadcast the cred change to the threads.
2313                  */
2314                 t = p->p_tlist;
2315                 do {
2316                         t->t_pre_sys = 1; /* so syscall will get new cred */
2317                 } while ((t = t->t_forw) != p->p_tlist);
2318         }
2319 
2320         return (err);
2321 }
2322 
2323 /*
2324  * Return -1 if the process is the parent of a vfork(1) whose child has yet to
2325  * terminate or perform an exec(2).
2326  *
2327  * Returns 0 if the process is fully stopped except for the current thread (if
2328  * we are operating on our own process), 1 otherwise.
2329  *
2330  * If the watchstop flag is set, then we ignore threads with TP_WATCHSTOP set.
2331  * See holdwatch() for details.
2332  */
2333 int
2334 pr_allstopped(proc_t *p, int watchstop)
2335 {
2336         kthread_t *t;
2337         int rv = 0;
2338 
2339         ASSERT(MUTEX_HELD(&p->p_lock));
2340 
2341         if (p->p_flag & SVFWAIT) /* waiting for vfork'd child to exec */
2342                 return (-1);
2343 
2344         if ((t = p->p_tlist) != NULL) {
2345                 do {
2346                         if (t == curthread || VSTOPPED(t) ||
2347                             (watchstop && (t->t_proc_flag & TP_WATCHSTOP)))
2348                                 continue;
2349                         thread_lock(t);
2350                         switch (t->t_state) {
2351                         case TS_ZOMB:
2352                         case TS_STOPPED:
2353                                 break;
2354                         case TS_SLEEP:
2355                                 if (!(t->t_flag & T_WAKEABLE) ||
2356                                     t->t_wchan0 == NULL)
2357                                         rv = 1;
2358                                 break;
2359                         default:
2360                                 rv = 1;
2361                                 break;
2362                         }
2363                         thread_unlock(t);
2364                 } while (rv == 0 && (t = t->t_forw) != p->p_tlist);
2365         }
2366 
2367         return (rv);
2368 }
2369 
2370 /*
2371  * Cause all lwps in the process to pause (for watchpoint operations).
2372  */
2373 static void
2374 pauselwps(proc_t *p)
2375 {
2376         kthread_t *t;
2377 
2378         ASSERT(MUTEX_HELD(&p->p_lock));
2379         ASSERT(p != curproc);
2380 
2381         if ((t = p->p_tlist) != NULL) {
2382                 do {
2383                         thread_lock(t);
2384                         t->t_proc_flag |= TP_PAUSE;
2385                         aston(t);
2386                         if ((ISWAKEABLE(t) && (t->t_wchan0 == NULL)) ||
2387                             ISWAITING(t)) {
2388                                 setrun_locked(t);
2389                         }
2390                         prpokethread(t);
2391                         thread_unlock(t);
2392                 } while ((t = t->t_forw) != p->p_tlist);
2393         }
2394 }
2395 
2396 /*
2397  * undo the effects of pauselwps()
2398  */
2399 static void
2400 unpauselwps(proc_t *p)
2401 {
2402         kthread_t *t;
2403 
2404         ASSERT(MUTEX_HELD(&p->p_lock));
2405         ASSERT(p != curproc);
2406 
2407         if ((t = p->p_tlist) != NULL) {
2408                 do {
2409                         thread_lock(t);
2410                         t->t_proc_flag &= ~TP_PAUSE;
2411                         if (t->t_state == TS_STOPPED) {
2412                                 t->t_schedflag |= TS_UNPAUSE;
2413                                 t->t_dtrace_stop = 0;
2414                                 setrun_locked(t);
2415                         }
2416                         thread_unlock(t);
2417                 } while ((t = t->t_forw) != p->p_tlist);
2418         }
2419 }
2420 
2421 /*
2422  * Cancel all watched areas.  Called from prclose().
2423  */
2424 proc_t *
2425 pr_cancel_watch(prnode_t *pnp)
2426 {
2427         proc_t *p = pnp->pr_pcommon->prc_proc;
2428         struct as *as;
2429         kthread_t *t;
2430 
2431         ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2432 
2433         if (!pr_watch_active(p))
2434                 return (p);
2435 
2436         /*
2437          * Pause the process before dealing with the watchpoints.
2438          */
2439         if (p == curproc) {
2440                 prunlock(pnp);
2441                 while (holdwatch() != 0)
2442                         continue;
2443                 p = pr_p_lock(pnp);
2444                 mutex_exit(&pr_pidlock);
2445                 ASSERT(p == curproc);
2446         } else {
2447                 pauselwps(p);
2448                 while (p != NULL && pr_allstopped(p, 0) > 0) {
2449                         /*
2450                          * This cv/mutex pair is persistent even
2451                          * if the process disappears after we
2452                          * unmark it and drop p->p_lock.
2453                          */
2454                         kcondvar_t *cv = &pr_pid_cv[p->p_slot];
2455                         kmutex_t *mp = &p->p_lock;
2456 
2457                         prunmark(p);
2458                         (void) cv_wait(cv, mp);
2459                         mutex_exit(mp);
2460                         p = pr_p_lock(pnp);  /* NULL if process disappeared */
2461                         mutex_exit(&pr_pidlock);
2462                 }
2463         }
2464 
2465         if (p == NULL)          /* the process disappeared */
2466                 return (NULL);
2467 
2468         ASSERT(p == pnp->pr_pcommon->prc_proc);
2469         ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2470 
2471         if (pr_watch_active(p)) {
2472                 pr_free_watchpoints(p);
2473                 if ((t = p->p_tlist) != NULL) {
2474                         do {
2475                                 watch_disable(t);
2476 
2477                         } while ((t = t->t_forw) != p->p_tlist);
2478                 }
2479         }
2480 
2481         if ((as = p->p_as) != NULL) {
2482                 avl_tree_t *tree;
2483                 struct watched_page *pwp;
2484 
2485                 /*
2486                  * If this is the parent of a vfork, the watched page
2487                  * list has been moved temporarily to p->p_wpage.
2488                  */
2489                 if (avl_numnodes(&p->p_wpage) != 0)
2490                         tree = &p->p_wpage;
2491                 else
2492                         tree = &as->a_wpage;
2493 
2494                 mutex_exit(&p->p_lock);
2495                 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2496 
2497                 for (pwp = avl_first(tree); pwp != NULL;
2498                     pwp = AVL_NEXT(tree, pwp)) {
2499                         pwp->wp_read = 0;
2500                         pwp->wp_write = 0;
2501                         pwp->wp_exec = 0;
2502                         if ((pwp->wp_flags & WP_SETPROT) == 0) {
2503                                 pwp->wp_flags |= WP_SETPROT;
2504                                 pwp->wp_prot = pwp->wp_oprot;
2505                                 pwp->wp_list = p->p_wprot;
2506                                 p->p_wprot = pwp;
2507                         }
2508                 }
2509 
2510                 AS_LOCK_EXIT(as, &as->a_lock);
2511                 mutex_enter(&p->p_lock);
2512         }
2513 
2514         /*
2515          * Unpause the process now.
2516          */
2517         if (p == curproc)
2518                 continuelwps(p);
2519         else
2520                 unpauselwps(p);
2521 
2522         return (p);
2523 }