1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/uio.h>
  29 #include <sys/param.h>
  30 #include <sys/cmn_err.h>
  31 #include <sys/cred.h>
  32 #include <sys/policy.h>
  33 #include <sys/debug.h>
  34 #include <sys/errno.h>
  35 #include <sys/file.h>
  36 #include <sys/inline.h>
  37 #include <sys/kmem.h>
  38 #include <sys/proc.h>
  39 #include <sys/brand.h>
  40 #include <sys/regset.h>
  41 #include <sys/sysmacros.h>
  42 #include <sys/systm.h>
  43 #include <sys/vfs.h>
  44 #include <sys/vnode.h>
  45 #include <sys/signal.h>
  46 #include <sys/auxv.h>
  47 #include <sys/user.h>
  48 #include <sys/class.h>
  49 #include <sys/fault.h>
  50 #include <sys/syscall.h>
  51 #include <sys/procfs.h>
  52 #include <sys/zone.h>
  53 #include <sys/copyops.h>
  54 #include <sys/schedctl.h>
  55 #include <vm/as.h>
  56 #include <vm/seg.h>
  57 #include <fs/proc/prdata.h>
  58 #include <sys/contract/process_impl.h>
  59 
  60 static  void    pr_settrace(proc_t *, sigset_t *);
  61 static  int     pr_setfpregs(prnode_t *, prfpregset_t *);
  62 static  int     pr_setxregs(prnode_t *, prxregset_t *);
  63 #if defined(__sparc)
  64 static  int     pr_setasrs(prnode_t *, asrset_t);
  65 #endif
  66 static  int     pr_setvaddr(prnode_t *, caddr_t);
  67 static  int     pr_clearsig(prnode_t *);
  68 static  int     pr_clearflt(prnode_t *);
  69 static  int     pr_watch(prnode_t *, prwatch_t *, int *);
  70 static  int     pr_agent(prnode_t *, prgregset_t, int *);
  71 static  int     pr_rdwr(proc_t *, enum uio_rw, priovec_t *);
  72 static  int     pr_scred(proc_t *, prcred_t *, cred_t *, boolean_t);
  73 static  int     pr_spriv(proc_t *, prpriv_t *, cred_t *);
  74 static  int     pr_szoneid(proc_t *, zoneid_t, cred_t *);
  75 static  void    pauselwps(proc_t *);
  76 static  void    unpauselwps(proc_t *);
  77 
  78 typedef union {
  79         long            sig;            /* PCKILL, PCUNKILL */
  80         long            nice;           /* PCNICE */
  81         long            timeo;          /* PCTWSTOP */
  82         ulong_t         flags;          /* PCRUN, PCSET, PCUNSET */
  83         caddr_t         vaddr;          /* PCSVADDR */
  84         siginfo_t       siginfo;        /* PCSSIG */
  85         sigset_t        sigset;         /* PCSTRACE, PCSHOLD */
  86         fltset_t        fltset;         /* PCSFAULT */
  87         sysset_t        sysset;         /* PCSENTRY, PCSEXIT */
  88         prgregset_t     prgregset;      /* PCSREG, PCAGENT */
  89         prfpregset_t    prfpregset;     /* PCSFPREG */
  90         prxregset_t     prxregset;      /* PCSXREG */
  91 #if defined(__sparc)
  92         asrset_t        asrset;         /* PCSASRS */
  93 #endif
  94         prwatch_t       prwatch;        /* PCWATCH */
  95         priovec_t       priovec;        /* PCREAD, PCWRITE */
  96         prcred_t        prcred;         /* PCSCRED */
  97         prpriv_t        prpriv;         /* PCSPRIV */
  98         long            przoneid;       /* PCSZONE */
  99 } arg_t;
 100 
 101 static  int     pr_control(long, arg_t *, prnode_t *, cred_t *);
 102 
 103 static size_t
 104 ctlsize(long cmd, size_t resid, arg_t *argp)
 105 {
 106         size_t size = sizeof (long);
 107         size_t rnd;
 108         int ngrp;
 109 
 110         switch (cmd) {
 111         case PCNULL:
 112         case PCSTOP:
 113         case PCDSTOP:
 114         case PCWSTOP:
 115         case PCCSIG:
 116         case PCCFAULT:
 117                 break;
 118         case PCSSIG:
 119                 size += sizeof (siginfo_t);
 120                 break;
 121         case PCTWSTOP:
 122                 size += sizeof (long);
 123                 break;
 124         case PCKILL:
 125         case PCUNKILL:
 126         case PCNICE:
 127                 size += sizeof (long);
 128                 break;
 129         case PCRUN:
 130         case PCSET:
 131         case PCUNSET:
 132                 size += sizeof (ulong_t);
 133                 break;
 134         case PCSVADDR:
 135                 size += sizeof (caddr_t);
 136                 break;
 137         case PCSTRACE:
 138         case PCSHOLD:
 139                 size += sizeof (sigset_t);
 140                 break;
 141         case PCSFAULT:
 142                 size += sizeof (fltset_t);
 143                 break;
 144         case PCSENTRY:
 145         case PCSEXIT:
 146                 size += sizeof (sysset_t);
 147                 break;
 148         case PCSREG:
 149         case PCAGENT:
 150                 size += sizeof (prgregset_t);
 151                 break;
 152         case PCSFPREG:
 153                 size += sizeof (prfpregset_t);
 154                 break;
 155         case PCSXREG:
 156                 size += sizeof (prxregset_t);
 157                 break;
 158 #if defined(__sparc)
 159         case PCSASRS:
 160                 size += sizeof (asrset_t);
 161                 break;
 162 #endif
 163         case PCWATCH:
 164                 size += sizeof (prwatch_t);
 165                 break;
 166         case PCREAD:
 167         case PCWRITE:
 168                 size += sizeof (priovec_t);
 169                 break;
 170         case PCSCRED:
 171                 size += sizeof (prcred_t);
 172                 break;
 173         case PCSCREDX:
 174                 /*
 175                  * We cannot derefence the pr_ngroups fields if it
 176                  * we don't have enough data.
 177                  */
 178                 if (resid < size + sizeof (prcred_t) - sizeof (gid_t))
 179                         return (0);
 180                 ngrp = argp->prcred.pr_ngroups;
 181                 if (ngrp < 0 || ngrp > ngroups_max)
 182                         return (0);
 183 
 184                 /* The result can be smaller than sizeof (prcred_t) */
 185                 size += sizeof (prcred_t) - sizeof (gid_t);
 186                 size += ngrp * sizeof (gid_t);
 187                 break;
 188         case PCSPRIV:
 189                 if (resid >= size + sizeof (prpriv_t))
 190                         size += priv_prgetprivsize(&argp->prpriv);
 191                 else
 192                         return (0);
 193                 break;
 194         case PCSZONE:
 195                 size += sizeof (long);
 196                 break;
 197         default:
 198                 return (0);
 199         }
 200 
 201         /* Round up to a multiple of long, unless exact amount written */
 202         if (size < resid) {
 203                 rnd = size & (sizeof (long) - 1);
 204 
 205                 if (rnd != 0)
 206                         size += sizeof (long) - rnd;
 207         }
 208 
 209         if (size > resid)
 210                 return (0);
 211         return (size);
 212 }
 213 
 214 /*
 215  * Control operations (lots).
 216  */
 217 int
 218 prwritectl(vnode_t *vp, uio_t *uiop, cred_t *cr)
 219 {
 220 #define MY_BUFFER_SIZE \
 221                 100 > 1 + sizeof (arg_t) / sizeof (long) ? \
 222                 100 : 1 + sizeof (arg_t) / sizeof (long)
 223         long buf[MY_BUFFER_SIZE];
 224         long *bufp;
 225         size_t resid = 0;
 226         size_t size;
 227         prnode_t *pnp = VTOP(vp);
 228         int error;
 229         int locked = 0;
 230 
 231         while (uiop->uio_resid) {
 232                 /*
 233                  * Read several commands in one gulp.
 234                  */
 235                 bufp = buf;
 236                 if (resid) {    /* move incomplete command to front of buffer */
 237                         long *tail;
 238 
 239                         if (resid >= sizeof (buf))
 240                                 break;
 241                         tail = (long *)((char *)buf + sizeof (buf) - resid);
 242                         do {
 243                                 *bufp++ = *tail++;
 244                         } while ((resid -= sizeof (long)) != 0);
 245                 }
 246                 resid = sizeof (buf) - ((char *)bufp - (char *)buf);
 247                 if (resid > uiop->uio_resid)
 248                         resid = uiop->uio_resid;
 249                 if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
 250                         return (error);
 251                 resid += (char *)bufp - (char *)buf;
 252                 bufp = buf;
 253 
 254                 do {            /* loop over commands in buffer */
 255                         long cmd = bufp[0];
 256                         arg_t *argp = (arg_t *)&bufp[1];
 257 
 258                         size = ctlsize(cmd, resid, argp);
 259                         if (size == 0)  /* incomplete or invalid command */
 260                                 break;
 261                         /*
 262                          * Perform the specified control operation.
 263                          */
 264                         if (!locked) {
 265                                 if ((error = prlock(pnp, ZNO)) != 0)
 266                                         return (error);
 267                                 locked = 1;
 268                         }
 269                         if (error = pr_control(cmd, argp, pnp, cr)) {
 270                                 if (error == -1)        /* -1 is timeout */
 271                                         locked = 0;
 272                                 else
 273                                         return (error);
 274                         }
 275                         bufp = (long *)((char *)bufp + size);
 276                 } while ((resid -= size) != 0);
 277 
 278                 if (locked) {
 279                         prunlock(pnp);
 280                         locked = 0;
 281                 }
 282         }
 283         return (resid? EINVAL : 0);
 284 }
 285 
 286 static int
 287 pr_control(long cmd, arg_t *argp, prnode_t *pnp, cred_t *cr)
 288 {
 289         prcommon_t *pcp;
 290         proc_t *p;
 291         int unlocked;
 292         int error = 0;
 293 
 294         if (cmd == PCNULL)
 295                 return (0);
 296 
 297         pcp = pnp->pr_common;
 298         p = pcp->prc_proc;
 299         ASSERT(p != NULL);
 300 
 301         /* System processes defy control. */
 302         if (p->p_flag & SSYS) {
 303                 prunlock(pnp);
 304                 return (EBUSY);
 305         }
 306 
 307         switch (cmd) {
 308 
 309         default:
 310                 error = EINVAL;
 311                 break;
 312 
 313         case PCSTOP:    /* direct process or lwp to stop and wait for stop */
 314         case PCDSTOP:   /* direct process or lwp to stop, don't wait */
 315         case PCWSTOP:   /* wait for process or lwp to stop */
 316         case PCTWSTOP:  /* wait for process or lwp to stop, with timeout */
 317                 {
 318                         time_t timeo;
 319 
 320                         /*
 321                          * Can't apply to a system process.
 322                          */
 323                         if (p->p_as == &kas) {
 324                                 error = EBUSY;
 325                                 break;
 326                         }
 327 
 328                         if (cmd == PCSTOP || cmd == PCDSTOP)
 329                                 pr_stop(pnp);
 330 
 331                         if (cmd == PCDSTOP)
 332                                 break;
 333 
 334                         /*
 335                          * If an lwp is waiting for itself or its process,
 336                          * don't wait. The stopped lwp would never see the
 337                          * fact that it is stopped.
 338                          */
 339                         if ((pcp->prc_flags & PRC_LWP)?
 340                             (pcp->prc_thread == curthread) : (p == curproc)) {
 341                                 if (cmd == PCWSTOP || cmd == PCTWSTOP)
 342                                         error = EBUSY;
 343                                 break;
 344                         }
 345 
 346                         timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
 347                         if ((error = pr_wait_stop(pnp, timeo)) != 0)
 348                                 return (error);
 349 
 350                         break;
 351                 }
 352 
 353         case PCRUN:     /* make lwp or process runnable */
 354                 error = pr_setrun(pnp, argp->flags);
 355                 break;
 356 
 357         case PCSTRACE:  /* set signal trace mask */
 358                 pr_settrace(p,  &argp->sigset);
 359                 break;
 360 
 361         case PCSSIG:    /* set current signal */
 362                 error = pr_setsig(pnp, &argp->siginfo);
 363                 if (argp->siginfo.si_signo == SIGKILL && error == 0) {
 364                         prunlock(pnp);
 365                         pr_wait_die(pnp);
 366                         return (-1);
 367                 }
 368                 break;
 369 
 370         case PCKILL:    /* send signal */
 371                 error = pr_kill(pnp, (int)argp->sig, cr);
 372                 if (error == 0 && argp->sig == SIGKILL) {
 373                         prunlock(pnp);
 374                         pr_wait_die(pnp);
 375                         return (-1);
 376                 }
 377                 break;
 378 
 379         case PCUNKILL:  /* delete a pending signal */
 380                 error = pr_unkill(pnp, (int)argp->sig);
 381                 break;
 382 
 383         case PCNICE:    /* set nice priority */
 384                 error = pr_nice(p, (int)argp->nice, cr);
 385                 break;
 386 
 387         case PCSENTRY:  /* set syscall entry bit mask */
 388         case PCSEXIT:   /* set syscall exit bit mask */
 389                 pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
 390                 break;
 391 
 392         case PCSET:     /* set process flags */
 393                 error = pr_set(p, argp->flags);
 394                 break;
 395 
 396         case PCUNSET:   /* unset process flags */
 397                 error = pr_unset(p, argp->flags);
 398                 break;
 399 
 400         case PCSREG:    /* set general registers */
 401                 {
 402                         kthread_t *t = pr_thread(pnp);
 403 
 404                         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
 405                                 thread_unlock(t);
 406                                 error = EBUSY;
 407                         } else {
 408                                 thread_unlock(t);
 409                                 mutex_exit(&p->p_lock);
 410                                 prsetprregs(ttolwp(t), argp->prgregset, 0);
 411                                 mutex_enter(&p->p_lock);
 412                         }
 413                         break;
 414                 }
 415 
 416         case PCSFPREG:  /* set floating-point registers */
 417                 error = pr_setfpregs(pnp, &argp->prfpregset);
 418                 break;
 419 
 420         case PCSXREG:   /* set extra registers */
 421                 error = pr_setxregs(pnp, &argp->prxregset);
 422                 break;
 423 
 424 #if defined(__sparc)
 425         case PCSASRS:   /* set ancillary state registers */
 426                 error = pr_setasrs(pnp, argp->asrset);
 427                 break;
 428 #endif
 429 
 430         case PCSVADDR:  /* set virtual address at which to resume */
 431                 error = pr_setvaddr(pnp, argp->vaddr);
 432                 break;
 433 
 434         case PCSHOLD:   /* set signal-hold mask */
 435                 pr_sethold(pnp, &argp->sigset);
 436                 break;
 437 
 438         case PCSFAULT:  /* set mask of traced faults */
 439                 pr_setfault(p, &argp->fltset);
 440                 break;
 441 
 442         case PCCSIG:    /* clear current signal */
 443                 error = pr_clearsig(pnp);
 444                 break;
 445 
 446         case PCCFAULT:  /* clear current fault */
 447                 error = pr_clearflt(pnp);
 448                 break;
 449 
 450         case PCWATCH:   /* set or clear watched areas */
 451                 error = pr_watch(pnp, &argp->prwatch, &unlocked);
 452                 if (error && unlocked)
 453                         return (error);
 454                 break;
 455 
 456         case PCAGENT:   /* create the /proc agent lwp in the target process */
 457                 error = pr_agent(pnp, argp->prgregset, &unlocked);
 458                 if (error && unlocked)
 459                         return (error);
 460                 break;
 461 
 462         case PCREAD:    /* read from the address space */
 463                 error = pr_rdwr(p, UIO_READ, &argp->priovec);
 464                 break;
 465 
 466         case PCWRITE:   /* write to the address space */
 467                 error = pr_rdwr(p, UIO_WRITE, &argp->priovec);
 468                 break;
 469 
 470         case PCSCRED:   /* set the process credentials */
 471         case PCSCREDX:
 472                 error = pr_scred(p, &argp->prcred, cr, cmd == PCSCREDX);
 473                 break;
 474 
 475         case PCSPRIV:   /* set the process privileges */
 476                 error = pr_spriv(p, &argp->prpriv, cr);
 477                 break;
 478         case PCSZONE:   /* set the process's zoneid credentials */
 479                 error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
 480                 break;
 481         }
 482 
 483         if (error)
 484                 prunlock(pnp);
 485         return (error);
 486 }
 487 
 488 #ifdef _SYSCALL32_IMPL
 489 
 490 typedef union {
 491         int32_t         sig;            /* PCKILL, PCUNKILL */
 492         int32_t         nice;           /* PCNICE */
 493         int32_t         timeo;          /* PCTWSTOP */
 494         uint32_t        flags;          /* PCRUN, PCSET, PCUNSET */
 495         caddr32_t       vaddr;          /* PCSVADDR */
 496         siginfo32_t     siginfo;        /* PCSSIG */
 497         sigset_t        sigset;         /* PCSTRACE, PCSHOLD */
 498         fltset_t        fltset;         /* PCSFAULT */
 499         sysset_t        sysset;         /* PCSENTRY, PCSEXIT */
 500         prgregset32_t   prgregset;      /* PCSREG, PCAGENT */
 501         prfpregset32_t  prfpregset;     /* PCSFPREG */
 502         prxregset_t     prxregset;      /* PCSXREG */
 503         prwatch32_t     prwatch;        /* PCWATCH */
 504         priovec32_t     priovec;        /* PCREAD, PCWRITE */
 505         prcred32_t      prcred;         /* PCSCRED */
 506         prpriv_t        prpriv;         /* PCSPRIV */
 507         int32_t         przoneid;       /* PCSZONE */
 508 } arg32_t;
 509 
 510 static  int     pr_control32(int32_t, arg32_t *, prnode_t *, cred_t *);
 511 static  int     pr_setfpregs32(prnode_t *, prfpregset32_t *);
 512 
 513 /*
 514  * Note that while ctlsize32() can use argp, it must do so only in a way
 515  * that assumes 32-bit rather than 64-bit alignment as argp is a pointer
 516  * to an array of 32-bit values and only 32-bit alignment is ensured.
 517  */
 518 static size_t
 519 ctlsize32(int32_t cmd, size_t resid, arg32_t *argp)
 520 {
 521         size_t size = sizeof (int32_t);
 522         size_t rnd;
 523         int ngrp;
 524 
 525         switch (cmd) {
 526         case PCNULL:
 527         case PCSTOP:
 528         case PCDSTOP:
 529         case PCWSTOP:
 530         case PCCSIG:
 531         case PCCFAULT:
 532                 break;
 533         case PCSSIG:
 534                 size += sizeof (siginfo32_t);
 535                 break;
 536         case PCTWSTOP:
 537                 size += sizeof (int32_t);
 538                 break;
 539         case PCKILL:
 540         case PCUNKILL:
 541         case PCNICE:
 542                 size += sizeof (int32_t);
 543                 break;
 544         case PCRUN:
 545         case PCSET:
 546         case PCUNSET:
 547                 size += sizeof (uint32_t);
 548                 break;
 549         case PCSVADDR:
 550                 size += sizeof (caddr32_t);
 551                 break;
 552         case PCSTRACE:
 553         case PCSHOLD:
 554                 size += sizeof (sigset_t);
 555                 break;
 556         case PCSFAULT:
 557                 size += sizeof (fltset_t);
 558                 break;
 559         case PCSENTRY:
 560         case PCSEXIT:
 561                 size += sizeof (sysset_t);
 562                 break;
 563         case PCSREG:
 564         case PCAGENT:
 565                 size += sizeof (prgregset32_t);
 566                 break;
 567         case PCSFPREG:
 568                 size += sizeof (prfpregset32_t);
 569                 break;
 570         case PCSXREG:
 571                 size += sizeof (prxregset_t);
 572                 break;
 573         case PCWATCH:
 574                 size += sizeof (prwatch32_t);
 575                 break;
 576         case PCREAD:
 577         case PCWRITE:
 578                 size += sizeof (priovec32_t);
 579                 break;
 580         case PCSCRED:
 581                 size += sizeof (prcred32_t);
 582                 break;
 583         case PCSCREDX:
 584                 /*
 585                  * We cannot derefence the pr_ngroups fields if it
 586                  * we don't have enough data.
 587                  */
 588                 if (resid < size + sizeof (prcred32_t) - sizeof (gid32_t))
 589                         return (0);
 590                 ngrp = argp->prcred.pr_ngroups;
 591                 if (ngrp < 0 || ngrp > ngroups_max)
 592                         return (0);
 593 
 594                 /* The result can be smaller than sizeof (prcred32_t) */
 595                 size += sizeof (prcred32_t) - sizeof (gid32_t);
 596                 size += ngrp * sizeof (gid32_t);
 597                 break;
 598         case PCSPRIV:
 599                 if (resid >= size + sizeof (prpriv_t))
 600                         size += priv_prgetprivsize(&argp->prpriv);
 601                 else
 602                         return (0);
 603                 break;
 604         case PCSZONE:
 605                 size += sizeof (int32_t);
 606                 break;
 607         default:
 608                 return (0);
 609         }
 610 
 611         /* Round up to a multiple of int32_t */
 612         rnd = size & (sizeof (int32_t) - 1);
 613 
 614         if (rnd != 0)
 615                 size += sizeof (int32_t) - rnd;
 616 
 617         if (size > resid)
 618                 return (0);
 619         return (size);
 620 }
 621 
 622 /*
 623  * Control operations (lots).
 624  */
 625 int
 626 prwritectl32(struct vnode *vp, struct uio *uiop, cred_t *cr)
 627 {
 628 #define MY_BUFFER_SIZE32 \
 629                 100 > 1 + sizeof (arg32_t) / sizeof (int32_t) ? \
 630                 100 : 1 + sizeof (arg32_t) / sizeof (int32_t)
 631         int32_t buf[MY_BUFFER_SIZE32];
 632         int32_t *bufp;
 633         arg32_t arg;
 634         size_t resid = 0;
 635         size_t size;
 636         prnode_t *pnp = VTOP(vp);
 637         int error;
 638         int locked = 0;
 639 
 640         while (uiop->uio_resid) {
 641                 /*
 642                  * Read several commands in one gulp.
 643                  */
 644                 bufp = buf;
 645                 if (resid) {    /* move incomplete command to front of buffer */
 646                         int32_t *tail;
 647 
 648                         if (resid >= sizeof (buf))
 649                                 break;
 650                         tail = (int32_t *)((char *)buf + sizeof (buf) - resid);
 651                         do {
 652                                 *bufp++ = *tail++;
 653                         } while ((resid -= sizeof (int32_t)) != 0);
 654                 }
 655                 resid = sizeof (buf) - ((char *)bufp - (char *)buf);
 656                 if (resid > uiop->uio_resid)
 657                         resid = uiop->uio_resid;
 658                 if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
 659                         return (error);
 660                 resid += (char *)bufp - (char *)buf;
 661                 bufp = buf;
 662 
 663                 do {            /* loop over commands in buffer */
 664                         int32_t cmd = bufp[0];
 665                         arg32_t *argp = (arg32_t *)&bufp[1];
 666 
 667                         size = ctlsize32(cmd, resid, argp);
 668                         if (size == 0)  /* incomplete or invalid command */
 669                                 break;
 670                         /*
 671                          * Perform the specified control operation.
 672                          */
 673                         if (!locked) {
 674                                 if ((error = prlock(pnp, ZNO)) != 0)
 675                                         return (error);
 676                                 locked = 1;
 677                         }
 678 
 679                         /*
 680                          * Since some members of the arg32_t union contain
 681                          * 64-bit values (which must be 64-bit aligned), we
 682                          * can't simply pass a pointer to the structure as
 683                          * it may be unaligned. Note that we do pass the
 684                          * potentially unaligned structure to ctlsize32()
 685                          * above, but that uses it a way that makes no
 686                          * assumptions about alignment.
 687                          */
 688                         ASSERT(size - sizeof (cmd) <= sizeof (arg));
 689                         bcopy(argp, &arg, size - sizeof (cmd));
 690 
 691                         if (error = pr_control32(cmd, &arg, pnp, cr)) {
 692                                 if (error == -1)        /* -1 is timeout */
 693                                         locked = 0;
 694                                 else
 695                                         return (error);
 696                         }
 697                         bufp = (int32_t *)((char *)bufp + size);
 698                 } while ((resid -= size) != 0);
 699 
 700                 if (locked) {
 701                         prunlock(pnp);
 702                         locked = 0;
 703                 }
 704         }
 705         return (resid? EINVAL : 0);
 706 }
 707 
 708 static int
 709 pr_control32(int32_t cmd, arg32_t *argp, prnode_t *pnp, cred_t *cr)
 710 {
 711         prcommon_t *pcp;
 712         proc_t *p;
 713         int unlocked;
 714         int error = 0;
 715 
 716         if (cmd == PCNULL)
 717                 return (0);
 718 
 719         pcp = pnp->pr_common;
 720         p = pcp->prc_proc;
 721         ASSERT(p != NULL);
 722 
 723         if (p->p_flag & SSYS) {
 724                 prunlock(pnp);
 725                 return (EBUSY);
 726         }
 727 
 728         switch (cmd) {
 729 
 730         default:
 731                 error = EINVAL;
 732                 break;
 733 
 734         case PCSTOP:    /* direct process or lwp to stop and wait for stop */
 735         case PCDSTOP:   /* direct process or lwp to stop, don't wait */
 736         case PCWSTOP:   /* wait for process or lwp to stop */
 737         case PCTWSTOP:  /* wait for process or lwp to stop, with timeout */
 738                 {
 739                         time_t timeo;
 740 
 741                         /*
 742                          * Can't apply to a system process.
 743                          */
 744                         if (p->p_as == &kas) {
 745                                 error = EBUSY;
 746                                 break;
 747                         }
 748 
 749                         if (cmd == PCSTOP || cmd == PCDSTOP)
 750                                 pr_stop(pnp);
 751 
 752                         if (cmd == PCDSTOP)
 753                                 break;
 754 
 755                         /*
 756                          * If an lwp is waiting for itself or its process,
 757                          * don't wait. The lwp will never see the fact that
 758                          * itself is stopped.
 759                          */
 760                         if ((pcp->prc_flags & PRC_LWP)?
 761                             (pcp->prc_thread == curthread) : (p == curproc)) {
 762                                 if (cmd == PCWSTOP || cmd == PCTWSTOP)
 763                                         error = EBUSY;
 764                                 break;
 765                         }
 766 
 767                         timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
 768                         if ((error = pr_wait_stop(pnp, timeo)) != 0)
 769                                 return (error);
 770 
 771                         break;
 772                 }
 773 
 774         case PCRUN:     /* make lwp or process runnable */
 775                 error = pr_setrun(pnp, (ulong_t)argp->flags);
 776                 break;
 777 
 778         case PCSTRACE:  /* set signal trace mask */
 779                 pr_settrace(p,  &argp->sigset);
 780                 break;
 781 
 782         case PCSSIG:    /* set current signal */
 783                 if (PROCESS_NOT_32BIT(p))
 784                         error = EOVERFLOW;
 785                 else {
 786                         int sig = (int)argp->siginfo.si_signo;
 787                         siginfo_t siginfo;
 788 
 789                         bzero(&siginfo, sizeof (siginfo));
 790                         siginfo_32tok(&argp->siginfo, (k_siginfo_t *)&siginfo);
 791                         error = pr_setsig(pnp, &siginfo);
 792                         if (sig == SIGKILL && error == 0) {
 793                                 prunlock(pnp);
 794                                 pr_wait_die(pnp);
 795                                 return (-1);
 796                         }
 797                 }
 798                 break;
 799 
 800         case PCKILL:    /* send signal */
 801                 error = pr_kill(pnp, (int)argp->sig, cr);
 802                 if (error == 0 && argp->sig == SIGKILL) {
 803                         prunlock(pnp);
 804                         pr_wait_die(pnp);
 805                         return (-1);
 806                 }
 807                 break;
 808 
 809         case PCUNKILL:  /* delete a pending signal */
 810                 error = pr_unkill(pnp, (int)argp->sig);
 811                 break;
 812 
 813         case PCNICE:    /* set nice priority */
 814                 error = pr_nice(p, (int)argp->nice, cr);
 815                 break;
 816 
 817         case PCSENTRY:  /* set syscall entry bit mask */
 818         case PCSEXIT:   /* set syscall exit bit mask */
 819                 pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
 820                 break;
 821 
 822         case PCSET:     /* set process flags */
 823                 error = pr_set(p, (long)argp->flags);
 824                 break;
 825 
 826         case PCUNSET:   /* unset process flags */
 827                 error = pr_unset(p, (long)argp->flags);
 828                 break;
 829 
 830         case PCSREG:    /* set general registers */
 831                 if (PROCESS_NOT_32BIT(p))
 832                         error = EOVERFLOW;
 833                 else {
 834                         kthread_t *t = pr_thread(pnp);
 835 
 836                         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
 837                                 thread_unlock(t);
 838                                 error = EBUSY;
 839                         } else {
 840                                 prgregset_t prgregset;
 841                                 klwp_t *lwp = ttolwp(t);
 842 
 843                                 thread_unlock(t);
 844                                 mutex_exit(&p->p_lock);
 845                                 prgregset_32ton(lwp, argp->prgregset,
 846                                     prgregset);
 847                                 prsetprregs(lwp, prgregset, 0);
 848                                 mutex_enter(&p->p_lock);
 849                         }
 850                 }
 851                 break;
 852 
 853         case PCSFPREG:  /* set floating-point registers */
 854                 if (PROCESS_NOT_32BIT(p))
 855                         error = EOVERFLOW;
 856                 else
 857                         error = pr_setfpregs32(pnp, &argp->prfpregset);
 858                 break;
 859 
 860         case PCSXREG:   /* set extra registers */
 861                 if (PROCESS_NOT_32BIT(p))
 862                         error = EOVERFLOW;
 863                 else
 864                         error = pr_setxregs(pnp, &argp->prxregset);
 865                 break;
 866 
 867         case PCSVADDR:  /* set virtual address at which to resume */
 868                 if (PROCESS_NOT_32BIT(p))
 869                         error = EOVERFLOW;
 870                 else
 871                         error = pr_setvaddr(pnp,
 872                             (caddr_t)(uintptr_t)argp->vaddr);
 873                 break;
 874 
 875         case PCSHOLD:   /* set signal-hold mask */
 876                 pr_sethold(pnp, &argp->sigset);
 877                 break;
 878 
 879         case PCSFAULT:  /* set mask of traced faults */
 880                 pr_setfault(p, &argp->fltset);
 881                 break;
 882 
 883         case PCCSIG:    /* clear current signal */
 884                 error = pr_clearsig(pnp);
 885                 break;
 886 
 887         case PCCFAULT:  /* clear current fault */
 888                 error = pr_clearflt(pnp);
 889                 break;
 890 
 891         case PCWATCH:   /* set or clear watched areas */
 892                 if (PROCESS_NOT_32BIT(p))
 893                         error = EOVERFLOW;
 894                 else {
 895                         prwatch_t prwatch;
 896 
 897                         prwatch.pr_vaddr = argp->prwatch.pr_vaddr;
 898                         prwatch.pr_size = argp->prwatch.pr_size;
 899                         prwatch.pr_wflags = argp->prwatch.pr_wflags;
 900                         prwatch.pr_pad = argp->prwatch.pr_pad;
 901                         error = pr_watch(pnp, &prwatch, &unlocked);
 902                         if (error && unlocked)
 903                                 return (error);
 904                 }
 905                 break;
 906 
 907         case PCAGENT:   /* create the /proc agent lwp in the target process */
 908                 if (PROCESS_NOT_32BIT(p))
 909                         error = EOVERFLOW;
 910                 else {
 911                         prgregset_t prgregset;
 912                         kthread_t *t = pr_thread(pnp);
 913                         klwp_t *lwp = ttolwp(t);
 914                         thread_unlock(t);
 915                         mutex_exit(&p->p_lock);
 916                         prgregset_32ton(lwp, argp->prgregset, prgregset);
 917                         mutex_enter(&p->p_lock);
 918                         error = pr_agent(pnp, prgregset, &unlocked);
 919                         if (error && unlocked)
 920                                 return (error);
 921                 }
 922                 break;
 923 
 924         case PCREAD:    /* read from the address space */
 925         case PCWRITE:   /* write to the address space */
 926                 if (PROCESS_NOT_32BIT(p))
 927                         error = EOVERFLOW;
 928                 else {
 929                         enum uio_rw rw = (cmd == PCREAD)? UIO_READ : UIO_WRITE;
 930                         priovec_t priovec;
 931 
 932                         priovec.pio_base =
 933                             (void *)(uintptr_t)argp->priovec.pio_base;
 934                         priovec.pio_len = (size_t)argp->priovec.pio_len;
 935                         priovec.pio_offset = (off_t)
 936                             (uint32_t)argp->priovec.pio_offset;
 937                         error = pr_rdwr(p, rw, &priovec);
 938                 }
 939                 break;
 940 
 941         case PCSCRED:   /* set the process credentials */
 942         case PCSCREDX:
 943                 {
 944                         /*
 945                          * All the fields in these structures are exactly the
 946                          * same and so the structures are compatible.  In case
 947                          * this ever changes, we catch this with the ASSERT
 948                          * below.
 949                          */
 950                         prcred_t *prcred = (prcred_t *)&argp->prcred;
 951 
 952 #ifndef __lint
 953                         ASSERT(sizeof (prcred_t) == sizeof (prcred32_t));
 954 #endif
 955 
 956                         error = pr_scred(p, prcred, cr, cmd == PCSCREDX);
 957                         break;
 958                 }
 959 
 960         case PCSPRIV:   /* set the process privileges */
 961                 error = pr_spriv(p, &argp->prpriv, cr);
 962                 break;
 963 
 964         case PCSZONE:   /* set the process's zoneid */
 965                 error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
 966                 break;
 967         }
 968 
 969         if (error)
 970                 prunlock(pnp);
 971         return (error);
 972 }
 973 
 974 #endif  /* _SYSCALL32_IMPL */
 975 
 976 /*
 977  * Return the specific or chosen thread/lwp for a control operation.
 978  * Returns with the thread locked via thread_lock(t).
 979  */
 980 kthread_t *
 981 pr_thread(prnode_t *pnp)
 982 {
 983         prcommon_t *pcp = pnp->pr_common;
 984         kthread_t *t;
 985 
 986         if (pcp->prc_flags & PRC_LWP) {
 987                 t = pcp->prc_thread;
 988                 ASSERT(t != NULL);
 989                 thread_lock(t);
 990         } else {
 991                 proc_t *p = pcp->prc_proc;
 992                 t = prchoose(p);        /* returns locked thread */
 993                 ASSERT(t != NULL);
 994         }
 995 
 996         return (t);
 997 }
 998 
 999 /*
1000  * Direct the process or lwp to stop.
1001  */
1002 void
1003 pr_stop(prnode_t *pnp)
1004 {
1005         prcommon_t *pcp = pnp->pr_common;
1006         proc_t *p = pcp->prc_proc;
1007         kthread_t *t;
1008         vnode_t *vp;
1009 
1010         /*
1011          * If already stopped, do nothing; otherwise flag
1012          * it to be stopped the next time it tries to run.
1013          * If sleeping at interruptible priority, set it
1014          * running so it will stop within cv_wait_sig().
1015          *
1016          * Take care to cooperate with jobcontrol: if an lwp
1017          * is stopped due to the default action of a jobcontrol
1018          * stop signal, flag it to be stopped the next time it
1019          * starts due to a SIGCONT signal.
1020          */
1021         if (pcp->prc_flags & PRC_LWP)
1022                 t = pcp->prc_thread;
1023         else
1024                 t = p->p_tlist;
1025         ASSERT(t != NULL);
1026 
1027         do {
1028                 int notify;
1029 
1030                 notify = 0;
1031                 thread_lock(t);
1032                 if (!ISTOPPED(t)) {
1033                         t->t_proc_flag |= TP_PRSTOP;
1034                         t->t_sig_check = 1;  /* do ISSIG */
1035                 }
1036 
1037                 /* Move the thread from wait queue to run queue */
1038                 if (ISWAITING(t))
1039                         setrun_locked(t);
1040 
1041                 if (ISWAKEABLE(t)) {
1042                         if (t->t_wchan0 == NULL)
1043                                 setrun_locked(t);
1044                         else if (!VSTOPPED(t)) {
1045                                 /*
1046                                  * Mark it virtually stopped.
1047                                  */
1048                                 t->t_proc_flag |= TP_PRVSTOP;
1049                                 notify = 1;
1050                         }
1051                 }
1052                 /*
1053                  * force the thread into the kernel
1054                  * if it is not already there.
1055                  */
1056                 prpokethread(t);
1057                 thread_unlock(t);
1058                 if (notify &&
1059                     (vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace) != NULL)
1060                         prnotify(vp);
1061                 if (pcp->prc_flags & PRC_LWP)
1062                         break;
1063         } while ((t = t->t_forw) != p->p_tlist);
1064 
1065         /*
1066          * We do this just in case the thread we asked
1067          * to stop is in holdlwps() (called from cfork()).
1068          */
1069         cv_broadcast(&p->p_holdlwps);
1070 }
1071 
1072 /*
1073  * Sleep until the lwp stops, but cooperate with
1074  * jobcontrol:  Don't wake up if the lwp is stopped
1075  * due to the default action of a jobcontrol stop signal.
1076  * If this is the process file descriptor, sleep
1077  * until all of the process's lwps stop.
1078  */
1079 int
1080 pr_wait_stop(prnode_t *pnp, time_t timeo)
1081 {
1082         prcommon_t *pcp = pnp->pr_common;
1083         proc_t *p = pcp->prc_proc;
1084         timestruc_t rqtime;
1085         timestruc_t *rqtp = NULL;
1086         int timecheck = 0;
1087         kthread_t *t;
1088         int error;
1089 
1090         if (timeo > 0) {     /* millisecond timeout */
1091                 /*
1092                  * Determine the precise future time of the requested timeout.
1093                  */
1094                 timestruc_t now;
1095 
1096                 timecheck = timechanged;
1097                 gethrestime(&now);
1098                 rqtp = &rqtime;
1099                 rqtp->tv_sec = timeo / MILLISEC;
1100                 rqtp->tv_nsec = (timeo % MILLISEC) * MICROSEC;
1101                 timespecadd(rqtp, &now);
1102         }
1103 
1104         if (pcp->prc_flags & PRC_LWP) {  /* lwp file descriptor */
1105                 t = pcp->prc_thread;
1106                 ASSERT(t != NULL);
1107                 thread_lock(t);
1108                 while (!ISTOPPED(t) && !VSTOPPED(t)) {
1109                         thread_unlock(t);
1110                         mutex_enter(&pcp->prc_mutex);
1111                         prunlock(pnp);
1112                         error = pr_wait(pcp, rqtp, timecheck);
1113                         if (error)      /* -1 is timeout */
1114                                 return (error);
1115                         if ((error = prlock(pnp, ZNO)) != 0)
1116                                 return (error);
1117                         ASSERT(p == pcp->prc_proc);
1118                         ASSERT(t == pcp->prc_thread);
1119                         thread_lock(t);
1120                 }
1121                 thread_unlock(t);
1122         } else {                        /* process file descriptor */
1123                 t = prchoose(p);        /* returns locked thread */
1124                 ASSERT(t != NULL);
1125                 ASSERT(MUTEX_HELD(&p->p_lock));
1126                 while ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t)) ||
1127                     (p->p_flag & SEXITLWPS)) {
1128                         thread_unlock(t);
1129                         mutex_enter(&pcp->prc_mutex);
1130                         prunlock(pnp);
1131                         error = pr_wait(pcp, rqtp, timecheck);
1132                         if (error)      /* -1 is timeout */
1133                                 return (error);
1134                         if ((error = prlock(pnp, ZNO)) != 0)
1135                                 return (error);
1136                         ASSERT(p == pcp->prc_proc);
1137                         t = prchoose(p);        /* returns locked t */
1138                         ASSERT(t != NULL);
1139                 }
1140                 thread_unlock(t);
1141         }
1142 
1143         ASSERT(!(pcp->prc_flags & PRC_DESTROY) && p->p_stat != SZOMB &&
1144             t != NULL && t->t_state != TS_ZOMB);
1145 
1146         return (0);
1147 }
1148 
1149 int
1150 pr_setrun(prnode_t *pnp, ulong_t flags)
1151 {
1152         prcommon_t *pcp = pnp->pr_common;
1153         proc_t *p = pcp->prc_proc;
1154         kthread_t *t;
1155         klwp_t *lwp;
1156 
1157         /*
1158          * Cannot set an lwp running if it is not stopped.
1159          * Also, no lwp other than the /proc agent lwp can
1160          * be set running so long as the /proc agent lwp exists.
1161          */
1162         t = pr_thread(pnp);     /* returns locked thread */
1163         if ((!ISTOPPED(t) && !VSTOPPED(t) &&
1164             !(t->t_proc_flag & TP_PRSTOP)) ||
1165             (p->p_agenttp != NULL &&
1166             (t != p->p_agenttp || !(pcp->prc_flags & PRC_LWP)))) {
1167                 thread_unlock(t);
1168                 return (EBUSY);
1169         }
1170         thread_unlock(t);
1171         if (flags & ~(PRCSIG|PRCFAULT|PRSTEP|PRSTOP|PRSABORT))
1172                 return (EINVAL);
1173         lwp = ttolwp(t);
1174         if ((flags & PRCSIG) && lwp->lwp_cursig != SIGKILL) {
1175                 /*
1176                  * Discard current siginfo_t, if any.
1177                  */
1178                 lwp->lwp_cursig = 0;
1179                 lwp->lwp_extsig = 0;
1180                 if (lwp->lwp_curinfo) {
1181                         siginfofree(lwp->lwp_curinfo);
1182                         lwp->lwp_curinfo = NULL;
1183                 }
1184         }
1185         if (flags & PRCFAULT)
1186                 lwp->lwp_curflt = 0;
1187         /*
1188          * We can't hold p->p_lock when we touch the lwp's registers.
1189          * It may be swapped out and we will get a page fault.
1190          */
1191         if (flags & PRSTEP) {
1192                 mutex_exit(&p->p_lock);
1193                 prstep(lwp, 0);
1194                 mutex_enter(&p->p_lock);
1195         }
1196         if (flags & PRSTOP) {
1197                 t->t_proc_flag |= TP_PRSTOP;
1198                 t->t_sig_check = 1;  /* do ISSIG */
1199         }
1200         if (flags & PRSABORT)
1201                 lwp->lwp_sysabort = 1;
1202         thread_lock(t);
1203         if ((pcp->prc_flags & PRC_LWP) || (flags & (PRSTEP|PRSTOP))) {
1204                 /*
1205                  * Here, we are dealing with a single lwp.
1206                  */
1207                 if (ISTOPPED(t)) {
1208                         t->t_schedflag |= TS_PSTART;
1209                         t->t_dtrace_stop = 0;
1210                         setrun_locked(t);
1211                 } else if (flags & PRSABORT) {
1212                         t->t_proc_flag &=
1213                             ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1214                         setrun_locked(t);
1215                 } else if (!(flags & PRSTOP)) {
1216                         t->t_proc_flag &=
1217                             ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1218                 }
1219                 thread_unlock(t);
1220         } else {
1221                 /*
1222                  * Here, we are dealing with the whole process.
1223                  */
1224                 if (ISTOPPED(t)) {
1225                         /*
1226                          * The representative lwp is stopped on an event
1227                          * of interest.  We demote it to PR_REQUESTED and
1228                          * choose another representative lwp.  If the new
1229                          * representative lwp is not stopped on an event of
1230                          * interest (other than PR_REQUESTED), we set the
1231                          * whole process running, else we leave the process
1232                          * stopped showing the next event of interest.
1233                          */
1234                         kthread_t *tx = NULL;
1235 
1236                         if (!(flags & PRSABORT) &&
1237                             t->t_whystop == PR_SYSENTRY &&
1238                             t->t_whatstop == SYS_lwp_exit)
1239                                 tx = t;         /* remember the exiting lwp */
1240                         t->t_whystop = PR_REQUESTED;
1241                         t->t_whatstop = 0;
1242                         thread_unlock(t);
1243                         t = prchoose(p);        /* returns locked t */
1244                         ASSERT(ISTOPPED(t) || VSTOPPED(t));
1245                         if (VSTOPPED(t) ||
1246                             t->t_whystop == PR_REQUESTED) {
1247                                 thread_unlock(t);
1248                                 allsetrun(p);
1249                         } else {
1250                                 thread_unlock(t);
1251                                 /*
1252                                  * As a special case, if the old representative
1253                                  * lwp was stopped on entry to _lwp_exit()
1254                                  * (and we are not aborting the system call),
1255                                  * we set the old representative lwp running.
1256                                  * We do this so that the next process stop
1257                                  * will find the exiting lwp gone.
1258                                  */
1259                                 if (tx != NULL) {
1260                                         thread_lock(tx);
1261                                         tx->t_schedflag |= TS_PSTART;
1262                                         t->t_dtrace_stop = 0;
1263                                         setrun_locked(tx);
1264                                         thread_unlock(tx);
1265                                 }
1266                         }
1267                 } else {
1268                         /*
1269                          * No event of interest; set all of the lwps running.
1270                          */
1271                         if (flags & PRSABORT) {
1272                                 t->t_proc_flag &=
1273                                     ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1274                                 setrun_locked(t);
1275                         }
1276                         thread_unlock(t);
1277                         allsetrun(p);
1278                 }
1279         }
1280         return (0);
1281 }
1282 
1283 /*
1284  * Wait until process/lwp stops or until timer expires.
1285  * Return EINTR for an interruption, -1 for timeout, else 0.
1286  */
1287 int
1288 pr_wait(prcommon_t *pcp,        /* prcommon referring to process/lwp */
1289         timestruc_t *ts,        /* absolute time of timeout, if any */
1290         int timecheck)
1291 {
1292         int rval;
1293 
1294         ASSERT(MUTEX_HELD(&pcp->prc_mutex));
1295         rval = cv_waituntil_sig(&pcp->prc_wait, &pcp->prc_mutex, ts, timecheck);
1296         mutex_exit(&pcp->prc_mutex);
1297         switch (rval) {
1298         case 0:
1299                 return (EINTR);
1300         case -1:
1301                 return (-1);
1302         default:
1303                 return (0);
1304         }
1305 }
1306 
1307 /*
1308  * Make all threads in the process runnable.
1309  */
1310 void
1311 allsetrun(proc_t *p)
1312 {
1313         kthread_t *t;
1314 
1315         ASSERT(MUTEX_HELD(&p->p_lock));
1316 
1317         if ((t = p->p_tlist) != NULL) {
1318                 do {
1319                         thread_lock(t);
1320                         ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1321                         t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1322                         if (ISTOPPED(t)) {
1323                                 t->t_schedflag |= TS_PSTART;
1324                                 t->t_dtrace_stop = 0;
1325                                 setrun_locked(t);
1326                         }
1327                         thread_unlock(t);
1328                 } while ((t = t->t_forw) != p->p_tlist);
1329         }
1330 }
1331 
1332 /*
1333  * Wait for the process to die.
1334  * We do this after sending SIGKILL because we know it will
1335  * die soon and we want subsequent operations to return ENOENT.
1336  */
1337 void
1338 pr_wait_die(prnode_t *pnp)
1339 {
1340         proc_t *p;
1341 
1342         mutex_enter(&pidlock);
1343         while ((p = pnp->pr_common->prc_proc) != NULL && p->p_stat != SZOMB) {
1344                 if (!cv_wait_sig(&p->p_srwchan_cv, &pidlock))
1345                         break;
1346         }
1347         mutex_exit(&pidlock);
1348 }
1349 
1350 static void
1351 pr_settrace(proc_t *p, sigset_t *sp)
1352 {
1353         prdelset(sp, SIGKILL);
1354         prassignset(&p->p_sigmask, sp);
1355         if (!sigisempty(&p->p_sigmask))
1356                 p->p_proc_flag |= P_PR_TRACE;
1357         else if (prisempty(&p->p_fltmask)) {
1358                 user_t *up = PTOU(p);
1359                 if (up->u_systrap == 0)
1360                         p->p_proc_flag &= ~P_PR_TRACE;
1361         }
1362 }
1363 
1364 int
1365 pr_setsig(prnode_t *pnp, siginfo_t *sip)
1366 {
1367         int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1368         int sig = sip->si_signo;
1369         prcommon_t *pcp = pnp->pr_common;
1370         proc_t *p = pcp->prc_proc;
1371         kthread_t *t;
1372         klwp_t *lwp;
1373         int error = 0;
1374 
1375         t = pr_thread(pnp);     /* returns locked thread */
1376         thread_unlock(t);
1377         lwp = ttolwp(t);
1378         if (sig < 0 || sig >= nsig)
1379                 /* Zero allowed here */
1380                 error = EINVAL;
1381         else if (lwp->lwp_cursig == SIGKILL)
1382                 /* "can't happen", but just in case */
1383                 error = EBUSY;
1384         else if ((lwp->lwp_cursig = (uchar_t)sig) == 0) {
1385                 lwp->lwp_extsig = 0;
1386                 /*
1387                  * Discard current siginfo_t, if any.
1388                  */
1389                 if (lwp->lwp_curinfo) {
1390                         siginfofree(lwp->lwp_curinfo);
1391                         lwp->lwp_curinfo = NULL;
1392                 }
1393         } else {
1394                 kthread_t *tx;
1395                 sigqueue_t *sqp;
1396 
1397                 /* drop p_lock to do kmem_alloc(KM_SLEEP) */
1398                 mutex_exit(&p->p_lock);
1399                 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
1400                 mutex_enter(&p->p_lock);
1401 
1402                 if (lwp->lwp_curinfo == NULL)
1403                         lwp->lwp_curinfo = sqp;
1404                 else
1405                         kmem_free(sqp, sizeof (sigqueue_t));
1406                 /*
1407                  * Copy contents of info to current siginfo_t.
1408                  */
1409                 bcopy(sip, &lwp->lwp_curinfo->sq_info,
1410                     sizeof (lwp->lwp_curinfo->sq_info));
1411                 /*
1412                  * Prevent contents published by si_zoneid-unaware /proc
1413                  * consumers from being incorrectly filtered.  Because
1414                  * an uninitialized si_zoneid is the same as
1415                  * GLOBAL_ZONEID, this means that you can't pr_setsig a
1416                  * process in a non-global zone with a siginfo which
1417                  * appears to come from the global zone.
1418                  */
1419                 if (SI_FROMUSER(sip) && sip->si_zoneid == 0)
1420                         lwp->lwp_curinfo->sq_info.si_zoneid =
1421                             p->p_zone->zone_id;
1422                 /*
1423                  * Side-effects for SIGKILL and jobcontrol signals.
1424                  */
1425                 if (sig == SIGKILL) {
1426                         p->p_flag |= SKILLED;
1427                         p->p_flag &= ~SEXTKILLED;
1428                 } else if (sig == SIGCONT) {
1429                         p->p_flag |= SSCONT;
1430                         sigdelq(p, NULL, SIGSTOP);
1431                         sigdelq(p, NULL, SIGTSTP);
1432                         sigdelq(p, NULL, SIGTTOU);
1433                         sigdelq(p, NULL, SIGTTIN);
1434                         sigdiffset(&p->p_sig, &stopdefault);
1435                         sigdiffset(&p->p_extsig, &stopdefault);
1436                         if ((tx = p->p_tlist) != NULL) {
1437                                 do {
1438                                         sigdelq(p, tx, SIGSTOP);
1439                                         sigdelq(p, tx, SIGTSTP);
1440                                         sigdelq(p, tx, SIGTTOU);
1441                                         sigdelq(p, tx, SIGTTIN);
1442                                         sigdiffset(&tx->t_sig, &stopdefault);
1443                                         sigdiffset(&tx->t_extsig, &stopdefault);
1444                                 } while ((tx = tx->t_forw) != p->p_tlist);
1445                         }
1446                 } else if (sigismember(&stopdefault, sig)) {
1447                         if (PTOU(p)->u_signal[sig-1] == SIG_DFL &&
1448                             (sig == SIGSTOP || !p->p_pgidp->pid_pgorphaned))
1449                                 p->p_flag &= ~SSCONT;
1450                         sigdelq(p, NULL, SIGCONT);
1451                         sigdelset(&p->p_sig, SIGCONT);
1452                         sigdelset(&p->p_extsig, SIGCONT);
1453                         if ((tx = p->p_tlist) != NULL) {
1454                                 do {
1455                                         sigdelq(p, tx, SIGCONT);
1456                                         sigdelset(&tx->t_sig, SIGCONT);
1457                                         sigdelset(&tx->t_extsig, SIGCONT);
1458                                 } while ((tx = tx->t_forw) != p->p_tlist);
1459                         }
1460                 }
1461                 thread_lock(t);
1462                 if (ISWAKEABLE(t) || ISWAITING(t)) {
1463                         /* Set signaled sleeping/waiting lwp running */
1464                         setrun_locked(t);
1465                 } else if (t->t_state == TS_STOPPED && sig == SIGKILL) {
1466                         /* If SIGKILL, set stopped lwp running */
1467                         p->p_stopsig = 0;
1468                         t->t_schedflag |= TS_XSTART | TS_PSTART;
1469                         t->t_dtrace_stop = 0;
1470                         setrun_locked(t);
1471                 }
1472                 t->t_sig_check = 1;  /* so ISSIG will be done */
1473                 thread_unlock(t);
1474                 /*
1475                  * More jobcontrol side-effects.
1476                  */
1477                 if (sig == SIGCONT && (tx = p->p_tlist) != NULL) {
1478                         p->p_stopsig = 0;
1479                         do {
1480                                 thread_lock(tx);
1481                                 if (tx->t_state == TS_STOPPED &&
1482                                     tx->t_whystop == PR_JOBCONTROL) {
1483                                         tx->t_schedflag |= TS_XSTART;
1484                                         setrun_locked(tx);
1485                                 }
1486                                 thread_unlock(tx);
1487                         } while ((tx = tx->t_forw) != p->p_tlist);
1488                 }
1489         }
1490         return (error);
1491 }
1492 
1493 int
1494 pr_kill(prnode_t *pnp, int sig, cred_t *cr)
1495 {
1496         int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1497         prcommon_t *pcp = pnp->pr_common;
1498         proc_t *p = pcp->prc_proc;
1499         k_siginfo_t info;
1500 
1501         if (sig <= 0 || sig >= nsig)
1502                 return (EINVAL);
1503 
1504         bzero(&info, sizeof (info));
1505         info.si_signo = sig;
1506         info.si_code = SI_USER;
1507         info.si_pid = curproc->p_pid;
1508         info.si_ctid = PRCTID(curproc);
1509         info.si_zoneid = getzoneid();
1510         info.si_uid = crgetruid(cr);
1511         sigaddq(p, (pcp->prc_flags & PRC_LWP)?
1512             pcp->prc_thread : NULL, &info, KM_NOSLEEP);
1513 
1514         return (0);
1515 }
1516 
1517 int
1518 pr_unkill(prnode_t *pnp, int sig)
1519 {
1520         int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1521         prcommon_t *pcp = pnp->pr_common;
1522         proc_t *p = pcp->prc_proc;
1523         sigqueue_t *infop = NULL;
1524 
1525         if (sig <= 0 || sig >= nsig || sig == SIGKILL)
1526                 return (EINVAL);
1527 
1528         if (pcp->prc_flags & PRC_LWP)
1529                 sigdeq(p, pcp->prc_thread, sig, &infop);
1530         else
1531                 sigdeq(p, NULL, sig, &infop);
1532 
1533         if (infop)
1534                 siginfofree(infop);
1535 
1536         return (0);
1537 }
1538 
1539 int
1540 pr_nice(proc_t *p, int nice, cred_t *cr)
1541 {
1542         kthread_t *t;
1543         int err;
1544         int error = 0;
1545 
1546         t = p->p_tlist;
1547         do {
1548                 ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1549                 err = CL_DONICE(t, cr, nice, (int *)NULL);
1550                 schedctl_set_cidpri(t);
1551                 if (error == 0)
1552                         error = err;
1553         } while ((t = t->t_forw) != p->p_tlist);
1554 
1555         return (error);
1556 }
1557 
1558 void
1559 pr_setentryexit(proc_t *p, sysset_t *sysset, int entry)
1560 {
1561         user_t *up = PTOU(p);
1562 
1563         if (entry) {
1564                 prassignset(&up->u_entrymask, sysset);
1565         } else {
1566                 prassignset(&up->u_exitmask, sysset);
1567         }
1568         if (!prisempty(&up->u_entrymask) ||
1569             !prisempty(&up->u_exitmask)) {
1570                 up->u_systrap = 1;
1571                 p->p_proc_flag |= P_PR_TRACE;
1572                 set_proc_sys(p);        /* set pre and post-sys flags */
1573         } else {
1574                 up->u_systrap = 0;
1575                 if (sigisempty(&p->p_sigmask) &&
1576                     prisempty(&p->p_fltmask))
1577                         p->p_proc_flag &= ~P_PR_TRACE;
1578         }
1579 }
1580 
1581 #define ALLFLAGS        \
1582         (PR_FORK|PR_RLC|PR_KLC|PR_ASYNC|PR_BPTADJ|PR_MSACCT|PR_MSFORK|PR_PTRACE)
1583 
1584 int
1585 pr_set(proc_t *p, long flags)
1586 {
1587         if ((p->p_flag & SSYS) || p->p_as == &kas)
1588                 return (EBUSY);
1589 
1590         if (flags & ~ALLFLAGS)
1591                 return (EINVAL);
1592 
1593         if (flags & PR_FORK)
1594                 p->p_proc_flag |= P_PR_FORK;
1595         if (flags & PR_RLC)
1596                 p->p_proc_flag |= P_PR_RUNLCL;
1597         if (flags & PR_KLC)
1598                 p->p_proc_flag |= P_PR_KILLCL;
1599         if (flags & PR_ASYNC)
1600                 p->p_proc_flag |= P_PR_ASYNC;
1601         if (flags & PR_BPTADJ)
1602                 p->p_proc_flag |= P_PR_BPTADJ;
1603         if (flags & PR_MSACCT)
1604                 if ((p->p_flag & SMSACCT) == 0)
1605                         estimate_msacct(p->p_tlist, gethrtime());
1606         if (flags & PR_MSFORK)
1607                 p->p_flag |= SMSFORK;
1608         if (flags & PR_PTRACE) {
1609                 p->p_proc_flag |= P_PR_PTRACE;
1610                 /* ptraced process must die if parent dead */
1611                 if (p->p_ppid == 1)
1612                         sigtoproc(p, NULL, SIGKILL);
1613         }
1614 
1615         return (0);
1616 }
1617 
1618 int
1619 pr_unset(proc_t *p, long flags)
1620 {
1621         if ((p->p_flag & SSYS) || p->p_as == &kas)
1622                 return (EBUSY);
1623 
1624         if (flags & ~ALLFLAGS)
1625                 return (EINVAL);
1626 
1627         if (flags & PR_FORK)
1628                 p->p_proc_flag &= ~P_PR_FORK;
1629         if (flags & PR_RLC)
1630                 p->p_proc_flag &= ~P_PR_RUNLCL;
1631         if (flags & PR_KLC)
1632                 p->p_proc_flag &= ~P_PR_KILLCL;
1633         if (flags & PR_ASYNC)
1634                 p->p_proc_flag &= ~P_PR_ASYNC;
1635         if (flags & PR_BPTADJ)
1636                 p->p_proc_flag &= ~P_PR_BPTADJ;
1637         if (flags & PR_MSACCT)
1638                 disable_msacct(p);
1639         if (flags & PR_MSFORK)
1640                 p->p_flag &= ~SMSFORK;
1641         if (flags & PR_PTRACE)
1642                 p->p_proc_flag &= ~P_PR_PTRACE;
1643 
1644         return (0);
1645 }
1646 
1647 static int
1648 pr_setfpregs(prnode_t *pnp, prfpregset_t *prfpregset)
1649 {
1650         proc_t *p = pnp->pr_common->prc_proc;
1651         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1652 
1653         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1654                 thread_unlock(t);
1655                 return (EBUSY);
1656         }
1657         if (!prhasfp()) {
1658                 thread_unlock(t);
1659                 return (EINVAL);        /* No FP support */
1660         }
1661 
1662         /* drop p_lock while touching the lwp's stack */
1663         thread_unlock(t);
1664         mutex_exit(&p->p_lock);
1665         prsetprfpregs(ttolwp(t), prfpregset);
1666         mutex_enter(&p->p_lock);
1667 
1668         return (0);
1669 }
1670 
1671 #ifdef  _SYSCALL32_IMPL
1672 static int
1673 pr_setfpregs32(prnode_t *pnp, prfpregset32_t *prfpregset)
1674 {
1675         proc_t *p = pnp->pr_common->prc_proc;
1676         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1677 
1678         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1679                 thread_unlock(t);
1680                 return (EBUSY);
1681         }
1682         if (!prhasfp()) {
1683                 thread_unlock(t);
1684                 return (EINVAL);        /* No FP support */
1685         }
1686 
1687         /* drop p_lock while touching the lwp's stack */
1688         thread_unlock(t);
1689         mutex_exit(&p->p_lock);
1690         prsetprfpregs32(ttolwp(t), prfpregset);
1691         mutex_enter(&p->p_lock);
1692 
1693         return (0);
1694 }
1695 #endif  /* _SYSCALL32_IMPL */
1696 
1697 /* ARGSUSED */
1698 static int
1699 pr_setxregs(prnode_t *pnp, prxregset_t *prxregset)
1700 {
1701         proc_t *p = pnp->pr_common->prc_proc;
1702         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1703 
1704         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1705                 thread_unlock(t);
1706                 return (EBUSY);
1707         }
1708         thread_unlock(t);
1709 
1710         if (!prhasx(p))
1711                 return (EINVAL);        /* No extra register support */
1712 
1713         /* drop p_lock while touching the lwp's stack */
1714         mutex_exit(&p->p_lock);
1715         prsetprxregs(ttolwp(t), (caddr_t)prxregset);
1716         mutex_enter(&p->p_lock);
1717 
1718         return (0);
1719 }
1720 
1721 #if defined(__sparc)
1722 static int
1723 pr_setasrs(prnode_t *pnp, asrset_t asrset)
1724 {
1725         proc_t *p = pnp->pr_common->prc_proc;
1726         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1727 
1728         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1729                 thread_unlock(t);
1730                 return (EBUSY);
1731         }
1732         thread_unlock(t);
1733 
1734         /* drop p_lock while touching the lwp's stack */
1735         mutex_exit(&p->p_lock);
1736         prsetasregs(ttolwp(t), asrset);
1737         mutex_enter(&p->p_lock);
1738 
1739         return (0);
1740 }
1741 #endif
1742 
1743 static int
1744 pr_setvaddr(prnode_t *pnp, caddr_t vaddr)
1745 {
1746         proc_t *p = pnp->pr_common->prc_proc;
1747         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1748 
1749         if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1750                 thread_unlock(t);
1751                 return (EBUSY);
1752         }
1753 
1754         /* drop p_lock while touching the lwp's stack */
1755         thread_unlock(t);
1756         mutex_exit(&p->p_lock);
1757         prsvaddr(ttolwp(t), vaddr);
1758         mutex_enter(&p->p_lock);
1759 
1760         return (0);
1761 }
1762 
1763 void
1764 pr_sethold(prnode_t *pnp, sigset_t *sp)
1765 {
1766         proc_t *p = pnp->pr_common->prc_proc;
1767         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1768 
1769         schedctl_finish_sigblock(t);
1770         sigutok(sp, &t->t_hold);
1771         if (ISWAKEABLE(t) &&
1772             (fsig(&p->p_sig, t) || fsig(&t->t_sig, t)))
1773                 setrun_locked(t);
1774         t->t_sig_check = 1;  /* so thread will see new holdmask */
1775         thread_unlock(t);
1776 }
1777 
1778 void
1779 pr_setfault(proc_t *p, fltset_t *fltp)
1780 {
1781         prassignset(&p->p_fltmask, fltp);
1782         if (!prisempty(&p->p_fltmask))
1783                 p->p_proc_flag |= P_PR_TRACE;
1784         else if (sigisempty(&p->p_sigmask)) {
1785                 user_t *up = PTOU(p);
1786                 if (up->u_systrap == 0)
1787                         p->p_proc_flag &= ~P_PR_TRACE;
1788         }
1789 }
1790 
1791 static int
1792 pr_clearsig(prnode_t *pnp)
1793 {
1794         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1795         klwp_t *lwp = ttolwp(t);
1796 
1797         thread_unlock(t);
1798         if (lwp->lwp_cursig == SIGKILL)
1799                 return (EBUSY);
1800 
1801         /*
1802          * Discard current siginfo_t, if any.
1803          */
1804         lwp->lwp_cursig = 0;
1805         lwp->lwp_extsig = 0;
1806         if (lwp->lwp_curinfo) {
1807                 siginfofree(lwp->lwp_curinfo);
1808                 lwp->lwp_curinfo = NULL;
1809         }
1810 
1811         return (0);
1812 }
1813 
1814 static int
1815 pr_clearflt(prnode_t *pnp)
1816 {
1817         kthread_t *t = pr_thread(pnp);  /* returns locked thread */
1818 
1819         thread_unlock(t);
1820         ttolwp(t)->lwp_curflt = 0;
1821 
1822         return (0);
1823 }
1824 
1825 static int
1826 pr_watch(prnode_t *pnp, prwatch_t *pwp, int *unlocked)
1827 {
1828         proc_t *p = pnp->pr_common->prc_proc;
1829         struct as *as = p->p_as;
1830         uintptr_t vaddr = pwp->pr_vaddr;
1831         size_t size = pwp->pr_size;
1832         int wflags = pwp->pr_wflags;
1833         ulong_t newpage = 0;
1834         struct watched_area *pwa;
1835         int error;
1836 
1837         *unlocked = 0;
1838 
1839         /*
1840          * Can't apply to a system process.
1841          */
1842         if ((p->p_flag & SSYS) || p->p_as == &kas)
1843                 return (EBUSY);
1844 
1845         /*
1846          * Verify that the address range does not wrap
1847          * and that only the proper flags were specified.
1848          */
1849         if ((wflags & ~WA_TRAPAFTER) == 0)
1850                 size = 0;
1851         if (vaddr + size < vaddr ||
1852             (wflags & ~(WA_READ|WA_WRITE|WA_EXEC|WA_TRAPAFTER)) != 0 ||
1853             ((wflags & ~WA_TRAPAFTER) != 0 && size == 0))
1854                 return (EINVAL);
1855 
1856         /*
1857          * Don't let the address range go above as->a_userlimit.
1858          * There is no error here, just a limitation.
1859          */
1860         if (vaddr >= (uintptr_t)as->a_userlimit)
1861                 return (0);
1862         if (vaddr + size > (uintptr_t)as->a_userlimit)
1863                 size = (uintptr_t)as->a_userlimit - vaddr;
1864 
1865         /*
1866          * Compute maximum number of pages this will add.
1867          */
1868         if ((wflags & ~WA_TRAPAFTER) != 0) {
1869                 ulong_t pagespan = (vaddr + size) - (vaddr & PAGEMASK);
1870                 newpage = btopr(pagespan);
1871                 if (newpage > 2 * prnwatch)
1872                         return (E2BIG);
1873         }
1874 
1875         /*
1876          * Force the process to be fully stopped.
1877          */
1878         if (p == curproc) {
1879                 prunlock(pnp);
1880                 while (holdwatch() != 0)
1881                         continue;
1882                 if ((error = prlock(pnp, ZNO)) != 0) {
1883                         continuelwps(p);
1884                         *unlocked = 1;
1885                         return (error);
1886                 }
1887         } else {
1888                 pauselwps(p);
1889                 while (pr_allstopped(p, 0) > 0) {
1890                         /*
1891                          * This cv/mutex pair is persistent even
1892                          * if the process disappears after we
1893                          * unmark it and drop p->p_lock.
1894                          */
1895                         kcondvar_t *cv = &pr_pid_cv[p->p_slot];
1896                         kmutex_t *mp = &p->p_lock;
1897 
1898                         prunmark(p);
1899                         (void) cv_wait(cv, mp);
1900                         mutex_exit(mp);
1901                         if ((error = prlock(pnp, ZNO)) != 0) {
1902                                 /*
1903                                  * Unpause the process if it exists.
1904                                  */
1905                                 p = pr_p_lock(pnp);
1906                                 mutex_exit(&pr_pidlock);
1907                                 if (p != NULL) {
1908                                         unpauselwps(p);
1909                                         prunlock(pnp);
1910                                 }
1911                                 *unlocked = 1;
1912                                 return (error);
1913                         }
1914                 }
1915         }
1916 
1917         /*
1918          * Drop p->p_lock in order to perform the rest of this.
1919          * The process is still locked with the P_PR_LOCK flag.
1920          */
1921         mutex_exit(&p->p_lock);
1922 
1923         pwa = kmem_alloc(sizeof (struct watched_area), KM_SLEEP);
1924         pwa->wa_vaddr = (caddr_t)vaddr;
1925         pwa->wa_eaddr = (caddr_t)vaddr + size;
1926         pwa->wa_flags = (ulong_t)wflags;
1927 
1928         error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0)?
1929             clear_watched_area(p, pwa) : set_watched_area(p, pwa);
1930 
1931         if (p == curproc) {
1932                 setallwatch();
1933                 mutex_enter(&p->p_lock);
1934                 continuelwps(p);
1935         } else {
1936                 mutex_enter(&p->p_lock);
1937                 unpauselwps(p);
1938         }
1939 
1940         return (error);
1941 }
1942 
1943 /* jobcontrol stopped, but with a /proc directed stop in effect */
1944 #define JDSTOPPED(t)    \
1945         ((t)->t_state == TS_STOPPED && \
1946         (t)->t_whystop == PR_JOBCONTROL && \
1947         ((t)->t_proc_flag & TP_PRSTOP))
1948 
1949 /*
1950  * pr_agent() creates the agent lwp. If the process is exiting while
1951  * we are creating an agent lwp, then exitlwps() waits until the
1952  * agent has been created using prbarrier().
1953  */
1954 static int
1955 pr_agent(prnode_t *pnp, prgregset_t prgregset, int *unlocked)
1956 {
1957         proc_t *p = pnp->pr_common->prc_proc;
1958         prcommon_t *pcp;
1959         kthread_t *t;
1960         kthread_t *ct;
1961         klwp_t *clwp;
1962         k_sigset_t smask;
1963         int cid;
1964         void *bufp = NULL;
1965         int error;
1966 
1967         *unlocked = 0;
1968 
1969         /*
1970          * Cannot create the /proc agent lwp if :-
1971          * - the process is not fully stopped or directed to stop.
1972          * - there is an agent lwp already.
1973          * - the process has been killed.
1974          * - the process is exiting.
1975          * - it's a vfork(2) parent.
1976          */
1977         t = prchoose(p);        /* returns locked thread */
1978         ASSERT(t != NULL);
1979 
1980         if ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t) && !JDSTOPPED(t)) ||
1981             p->p_agenttp != NULL ||
1982             (p->p_flag & (SKILLED | SEXITING | SVFWAIT))) {
1983                 thread_unlock(t);
1984                 return (EBUSY);
1985         }
1986 
1987         thread_unlock(t);
1988         mutex_exit(&p->p_lock);
1989 
1990         sigfillset(&smask);
1991         sigdiffset(&smask, &cantmask);
1992         clwp = lwp_create(lwp_rtt, NULL, 0, p, TS_STOPPED,
1993             t->t_pri, &smask, NOCLASS, 0);
1994         if (clwp == NULL) {
1995                 mutex_enter(&p->p_lock);
1996                 return (ENOMEM);
1997         }
1998         prsetprregs(clwp, prgregset, 1);
1999 retry:
2000         cid = t->t_cid;
2001         (void) CL_ALLOC(&bufp, cid, KM_SLEEP);
2002         mutex_enter(&p->p_lock);
2003         if (cid != t->t_cid) {
2004                 /*
2005                  * Someone just changed this thread's scheduling class,
2006                  * so try pre-allocating the buffer again.  Hopefully we
2007                  * don't hit this often.
2008                  */
2009                 mutex_exit(&p->p_lock);
2010                 CL_FREE(cid, bufp);
2011                 goto retry;
2012         }
2013 
2014         clwp->lwp_ap = clwp->lwp_arg;
2015         clwp->lwp_eosys = NORMALRETURN;
2016         ct = lwptot(clwp);
2017         ct->t_clfuncs = t->t_clfuncs;
2018         CL_FORK(t, ct, bufp);
2019         ct->t_cid = t->t_cid;
2020         ct->t_proc_flag |= TP_PRSTOP;
2021         /*
2022          * Setting t_sysnum to zero causes post_syscall()
2023          * to bypass all syscall checks and go directly to
2024          *      if (issig()) psig();
2025          * so that the agent lwp will stop in issig_forreal()
2026          * showing PR_REQUESTED.
2027          */
2028         ct->t_sysnum = 0;
2029         ct->t_post_sys = 1;
2030         ct->t_sig_check = 1;
2031         p->p_agenttp = ct;
2032         ct->t_proc_flag &= ~TP_HOLDLWP;
2033 
2034         pcp = pnp->pr_pcommon;
2035         mutex_enter(&pcp->prc_mutex);
2036 
2037         lwp_create_done(ct);
2038 
2039         /*
2040          * Don't return until the agent is stopped on PR_REQUESTED.
2041          */
2042 
2043         for (;;) {
2044                 prunlock(pnp);
2045                 *unlocked = 1;
2046 
2047                 /*
2048                  * Wait for the agent to stop and notify us.
2049                  * If we've been interrupted, return that information.
2050                  */
2051                 error = pr_wait(pcp, NULL, 0);
2052                 if (error == EINTR) {
2053                         error = 0;
2054                         break;
2055                 }
2056 
2057                 /*
2058                  * Confirm that the agent LWP has stopped.
2059                  */
2060 
2061                 if ((error = prlock(pnp, ZNO)) != 0)
2062                         break;
2063                 *unlocked = 0;
2064 
2065                 /*
2066                  * Since we dropped the lock on the process, the agent
2067                  * may have disappeared or changed. Grab the current
2068                  * agent and check fail if it has disappeared.
2069                  */
2070                 if ((ct = p->p_agenttp) == NULL) {
2071                         error = ENOENT;
2072                         break;
2073                 }
2074 
2075                 mutex_enter(&pcp->prc_mutex);
2076                 thread_lock(ct);
2077 
2078                 if (ISTOPPED(ct)) {
2079                         thread_unlock(ct);
2080                         mutex_exit(&pcp->prc_mutex);
2081                         break;
2082                 }
2083 
2084                 thread_unlock(ct);
2085         }
2086 
2087         return (error ? error : -1);
2088 }
2089 
2090 static int
2091 pr_rdwr(proc_t *p, enum uio_rw rw, priovec_t *pio)
2092 {
2093         caddr_t base = (caddr_t)pio->pio_base;
2094         size_t cnt = pio->pio_len;
2095         uintptr_t offset = (uintptr_t)pio->pio_offset;
2096         struct uio auio;
2097         struct iovec aiov;
2098         int error = 0;
2099 
2100         if ((p->p_flag & SSYS) || p->p_as == &kas)
2101                 error = EIO;
2102         else if ((base + cnt) < base || (offset + cnt) < offset)
2103                 error = EINVAL;
2104         else if (cnt != 0) {
2105                 aiov.iov_base = base;
2106                 aiov.iov_len = cnt;
2107 
2108                 auio.uio_loffset = offset;
2109                 auio.uio_iov = &aiov;
2110                 auio.uio_iovcnt = 1;
2111                 auio.uio_resid = cnt;
2112                 auio.uio_segflg = UIO_USERSPACE;
2113                 auio.uio_llimit = (longlong_t)MAXOFFSET_T;
2114                 auio.uio_fmode = FREAD|FWRITE;
2115                 auio.uio_extflg = UIO_COPY_DEFAULT;
2116 
2117                 mutex_exit(&p->p_lock);
2118                 error = prusrio(p, rw, &auio, 0);
2119                 mutex_enter(&p->p_lock);
2120 
2121                 /*
2122                  * We have no way to return the i/o count,
2123                  * like read() or write() would do, so we
2124                  * return an error if the i/o was truncated.
2125                  */
2126                 if (auio.uio_resid != 0 && error == 0)
2127                         error = EIO;
2128         }
2129 
2130         return (error);
2131 }
2132 
2133 static int
2134 pr_scred(proc_t *p, prcred_t *prcred, cred_t *cr, boolean_t dogrps)
2135 {
2136         kthread_t *t;
2137         cred_t *oldcred;
2138         cred_t *newcred;
2139         uid_t oldruid;
2140         int error;
2141         zone_t *zone = crgetzone(cr);
2142 
2143         if (!VALID_UID(prcred->pr_euid, zone) ||
2144             !VALID_UID(prcred->pr_ruid, zone) ||
2145             !VALID_UID(prcred->pr_suid, zone) ||
2146             !VALID_GID(prcred->pr_egid, zone) ||
2147             !VALID_GID(prcred->pr_rgid, zone) ||
2148             !VALID_GID(prcred->pr_sgid, zone))
2149                 return (EINVAL);
2150 
2151         if (dogrps) {
2152                 int ngrp = prcred->pr_ngroups;
2153                 int i;
2154 
2155                 if (ngrp < 0 || ngrp > ngroups_max)
2156                         return (EINVAL);
2157 
2158                 for (i = 0; i < ngrp; i++) {
2159                         if (!VALID_GID(prcred->pr_groups[i], zone))
2160                                 return (EINVAL);
2161                 }
2162         }
2163 
2164         error = secpolicy_allow_setid(cr, prcred->pr_euid, B_FALSE);
2165 
2166         if (error == 0 && prcred->pr_ruid != prcred->pr_euid)
2167                 error = secpolicy_allow_setid(cr, prcred->pr_ruid, B_FALSE);
2168 
2169         if (error == 0 && prcred->pr_suid != prcred->pr_euid &&
2170             prcred->pr_suid != prcred->pr_ruid)
2171                 error = secpolicy_allow_setid(cr, prcred->pr_suid, B_FALSE);
2172 
2173         if (error)
2174                 return (error);
2175 
2176         mutex_exit(&p->p_lock);
2177 
2178         /* hold old cred so it doesn't disappear while we dup it */
2179         mutex_enter(&p->p_crlock);
2180         crhold(oldcred = p->p_cred);
2181         mutex_exit(&p->p_crlock);
2182         newcred = crdup(oldcred);
2183         oldruid = crgetruid(oldcred);
2184         crfree(oldcred);
2185 
2186         /* Error checking done above */
2187         (void) crsetresuid(newcred, prcred->pr_ruid, prcred->pr_euid,
2188             prcred->pr_suid);
2189         (void) crsetresgid(newcred, prcred->pr_rgid, prcred->pr_egid,
2190             prcred->pr_sgid);
2191 
2192         if (dogrps) {
2193                 (void) crsetgroups(newcred, prcred->pr_ngroups,
2194                     prcred->pr_groups);
2195 
2196         }
2197 
2198         mutex_enter(&p->p_crlock);
2199         oldcred = p->p_cred;
2200         p->p_cred = newcred;
2201         mutex_exit(&p->p_crlock);
2202         crfree(oldcred);
2203 
2204         /*
2205          * Keep count of processes per uid consistent.
2206          */
2207         if (oldruid != prcred->pr_ruid) {
2208                 zoneid_t zoneid = crgetzoneid(newcred);
2209 
2210                 mutex_enter(&pidlock);
2211                 upcount_dec(oldruid, zoneid);
2212                 upcount_inc(prcred->pr_ruid, zoneid);
2213                 mutex_exit(&pidlock);
2214         }
2215 
2216         /*
2217          * Broadcast the cred change to the threads.
2218          */
2219         mutex_enter(&p->p_lock);
2220         t = p->p_tlist;
2221         do {
2222                 t->t_pre_sys = 1; /* so syscall will get new cred */
2223         } while ((t = t->t_forw) != p->p_tlist);
2224 
2225         return (0);
2226 }
2227 
2228 /*
2229  * Change process credentials to specified zone.  Used to temporarily
2230  * set a process to run in the global zone; only transitions between
2231  * the process's actual zone and the global zone are allowed.
2232  */
2233 static int
2234 pr_szoneid(proc_t *p, zoneid_t zoneid, cred_t *cr)
2235 {
2236         kthread_t *t;
2237         cred_t *oldcred;
2238         cred_t *newcred;
2239         zone_t *zptr;
2240         zoneid_t oldzoneid;
2241 
2242         if (secpolicy_zone_config(cr) != 0)
2243                 return (EPERM);
2244         if (zoneid != GLOBAL_ZONEID && zoneid != p->p_zone->zone_id)
2245                 return (EINVAL);
2246         if ((zptr = zone_find_by_id(zoneid)) == NULL)
2247                 return (EINVAL);
2248         mutex_exit(&p->p_lock);
2249         mutex_enter(&p->p_crlock);
2250         oldcred = p->p_cred;
2251         crhold(oldcred);
2252         mutex_exit(&p->p_crlock);
2253         newcred = crdup(oldcred);
2254         oldzoneid = crgetzoneid(oldcred);
2255         crfree(oldcred);
2256 
2257         crsetzone(newcred, zptr);
2258         zone_rele(zptr);
2259 
2260         mutex_enter(&p->p_crlock);
2261         oldcred = p->p_cred;
2262         p->p_cred = newcred;
2263         mutex_exit(&p->p_crlock);
2264         crfree(oldcred);
2265 
2266         /*
2267          * The target process is changing zones (according to its cred), so
2268          * update the per-zone upcounts, which are based on process creds.
2269          */
2270         if (oldzoneid != zoneid) {
2271                 uid_t ruid = crgetruid(newcred);
2272 
2273                 mutex_enter(&pidlock);
2274                 upcount_dec(ruid, oldzoneid);
2275                 upcount_inc(ruid, zoneid);
2276                 mutex_exit(&pidlock);
2277         }
2278         /*
2279          * Broadcast the cred change to the threads.
2280          */
2281         mutex_enter(&p->p_lock);
2282         t = p->p_tlist;
2283         do {
2284                 t->t_pre_sys = 1;    /* so syscall will get new cred */
2285         } while ((t = t->t_forw) != p->p_tlist);
2286 
2287         return (0);
2288 }
2289 
2290 static int
2291 pr_spriv(proc_t *p, prpriv_t *prpriv, cred_t *cr)
2292 {
2293         kthread_t *t;
2294         int err;
2295 
2296         ASSERT(MUTEX_HELD(&p->p_lock));
2297 
2298         if ((err = priv_pr_spriv(p, prpriv, cr)) == 0) {
2299                 /*
2300                  * Broadcast the cred change to the threads.
2301                  */
2302                 t = p->p_tlist;
2303                 do {
2304                         t->t_pre_sys = 1; /* so syscall will get new cred */
2305                 } while ((t = t->t_forw) != p->p_tlist);
2306         }
2307 
2308         return (err);
2309 }
2310 
2311 /*
2312  * Return -1 if the process is the parent of a vfork(1) whose child has yet to
2313  * terminate or perform an exec(2).
2314  *
2315  * Returns 0 if the process is fully stopped except for the current thread (if
2316  * we are operating on our own process), 1 otherwise.
2317  *
2318  * If the watchstop flag is set, then we ignore threads with TP_WATCHSTOP set.
2319  * See holdwatch() for details.
2320  */
2321 int
2322 pr_allstopped(proc_t *p, int watchstop)
2323 {
2324         kthread_t *t;
2325         int rv = 0;
2326 
2327         ASSERT(MUTEX_HELD(&p->p_lock));
2328 
2329         if (p->p_flag & SVFWAIT) /* waiting for vfork'd child to exec */
2330                 return (-1);
2331 
2332         if ((t = p->p_tlist) != NULL) {
2333                 do {
2334                         if (t == curthread || VSTOPPED(t) ||
2335                             (watchstop && (t->t_proc_flag & TP_WATCHSTOP)))
2336                                 continue;
2337                         thread_lock(t);
2338                         switch (t->t_state) {
2339                         case TS_ZOMB:
2340                         case TS_STOPPED:
2341                                 break;
2342                         case TS_SLEEP:
2343                                 if (!(t->t_flag & T_WAKEABLE) ||
2344                                     t->t_wchan0 == NULL)
2345                                         rv = 1;
2346                                 break;
2347                         default:
2348                                 rv = 1;
2349                                 break;
2350                         }
2351                         thread_unlock(t);
2352                 } while (rv == 0 && (t = t->t_forw) != p->p_tlist);
2353         }
2354 
2355         return (rv);
2356 }
2357 
2358 /*
2359  * Cause all lwps in the process to pause (for watchpoint operations).
2360  */
2361 static void
2362 pauselwps(proc_t *p)
2363 {
2364         kthread_t *t;
2365 
2366         ASSERT(MUTEX_HELD(&p->p_lock));
2367         ASSERT(p != curproc);
2368 
2369         if ((t = p->p_tlist) != NULL) {
2370                 do {
2371                         thread_lock(t);
2372                         t->t_proc_flag |= TP_PAUSE;
2373                         aston(t);
2374                         if ((ISWAKEABLE(t) && (t->t_wchan0 == NULL)) ||
2375                             ISWAITING(t)) {
2376                                 setrun_locked(t);
2377                         }
2378                         prpokethread(t);
2379                         thread_unlock(t);
2380                 } while ((t = t->t_forw) != p->p_tlist);
2381         }
2382 }
2383 
2384 /*
2385  * undo the effects of pauselwps()
2386  */
2387 static void
2388 unpauselwps(proc_t *p)
2389 {
2390         kthread_t *t;
2391 
2392         ASSERT(MUTEX_HELD(&p->p_lock));
2393         ASSERT(p != curproc);
2394 
2395         if ((t = p->p_tlist) != NULL) {
2396                 do {
2397                         thread_lock(t);
2398                         t->t_proc_flag &= ~TP_PAUSE;
2399                         if (t->t_state == TS_STOPPED) {
2400                                 t->t_schedflag |= TS_UNPAUSE;
2401                                 t->t_dtrace_stop = 0;
2402                                 setrun_locked(t);
2403                         }
2404                         thread_unlock(t);
2405                 } while ((t = t->t_forw) != p->p_tlist);
2406         }
2407 }
2408 
2409 /*
2410  * Cancel all watched areas.  Called from prclose().
2411  */
2412 proc_t *
2413 pr_cancel_watch(prnode_t *pnp)
2414 {
2415         proc_t *p = pnp->pr_pcommon->prc_proc;
2416         struct as *as;
2417         kthread_t *t;
2418 
2419         ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2420 
2421         if (!pr_watch_active(p))
2422                 return (p);
2423 
2424         /*
2425          * Pause the process before dealing with the watchpoints.
2426          */
2427         if (p == curproc) {
2428                 prunlock(pnp);
2429                 while (holdwatch() != 0)
2430                         continue;
2431                 p = pr_p_lock(pnp);
2432                 mutex_exit(&pr_pidlock);
2433                 ASSERT(p == curproc);
2434         } else {
2435                 pauselwps(p);
2436                 while (p != NULL && pr_allstopped(p, 0) > 0) {
2437                         /*
2438                          * This cv/mutex pair is persistent even
2439                          * if the process disappears after we
2440                          * unmark it and drop p->p_lock.
2441                          */
2442                         kcondvar_t *cv = &pr_pid_cv[p->p_slot];
2443                         kmutex_t *mp = &p->p_lock;
2444 
2445                         prunmark(p);
2446                         (void) cv_wait(cv, mp);
2447                         mutex_exit(mp);
2448                         p = pr_p_lock(pnp);  /* NULL if process disappeared */
2449                         mutex_exit(&pr_pidlock);
2450                 }
2451         }
2452 
2453         if (p == NULL)          /* the process disappeared */
2454                 return (NULL);
2455 
2456         ASSERT(p == pnp->pr_pcommon->prc_proc);
2457         ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2458 
2459         if (pr_watch_active(p)) {
2460                 pr_free_watchpoints(p);
2461                 if ((t = p->p_tlist) != NULL) {
2462                         do {
2463                                 watch_disable(t);
2464 
2465                         } while ((t = t->t_forw) != p->p_tlist);
2466                 }
2467         }
2468 
2469         if ((as = p->p_as) != NULL) {
2470                 avl_tree_t *tree;
2471                 struct watched_page *pwp;
2472 
2473                 /*
2474                  * If this is the parent of a vfork, the watched page
2475                  * list has been moved temporarily to p->p_wpage.
2476                  */
2477                 if (avl_numnodes(&p->p_wpage) != 0)
2478                         tree = &p->p_wpage;
2479                 else
2480                         tree = &as->a_wpage;
2481 
2482                 mutex_exit(&p->p_lock);
2483                 AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2484 
2485                 for (pwp = avl_first(tree); pwp != NULL;
2486                     pwp = AVL_NEXT(tree, pwp)) {
2487                         pwp->wp_read = 0;
2488                         pwp->wp_write = 0;
2489                         pwp->wp_exec = 0;
2490                         if ((pwp->wp_flags & WP_SETPROT) == 0) {
2491                                 pwp->wp_flags |= WP_SETPROT;
2492                                 pwp->wp_prot = pwp->wp_oprot;
2493                                 pwp->wp_list = p->p_wprot;
2494                                 p->p_wprot = pwp;
2495                         }
2496                 }
2497 
2498                 AS_LOCK_EXIT(as, &as->a_lock);
2499                 mutex_enter(&p->p_lock);
2500         }
2501 
2502         /*
2503          * Unpause the process now.
2504          */
2505         if (p == curproc)
2506                 continuelwps(p);
2507         else
2508                 unpauselwps(p);
2509 
2510         return (p);
2511 }