1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  22 /*        All Rights Reserved   */
  23 
  24 
  25 /*
  26  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  27  * Copyright 2017 Joyent, Inc.
  28  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
  29  */
  30 
  31 #include <sys/types.h>
  32 #include <sys/sysmacros.h>
  33 #include <sys/param.h>
  34 #include <sys/errno.h>
  35 #include <sys/signal.h>
  36 #include <sys/stat.h>
  37 #include <sys/proc.h>
  38 #include <sys/cred.h>
  39 #include <sys/user.h>
  40 #include <sys/vnode.h>
  41 #include <sys/file.h>
  42 #include <sys/stream.h>
  43 #include <sys/strsubr.h>
  44 #include <sys/stropts.h>
  45 #include <sys/tihdr.h>
  46 #include <sys/var.h>
  47 #include <sys/poll.h>
  48 #include <sys/termio.h>
  49 #include <sys/ttold.h>
  50 #include <sys/systm.h>
  51 #include <sys/uio.h>
  52 #include <sys/cmn_err.h>
  53 #include <sys/sad.h>
  54 #include <sys/netstack.h>
  55 #include <sys/priocntl.h>
  56 #include <sys/jioctl.h>
  57 #include <sys/procset.h>
  58 #include <sys/session.h>
  59 #include <sys/kmem.h>
  60 #include <sys/filio.h>
  61 #include <sys/vtrace.h>
  62 #include <sys/debug.h>
  63 #include <sys/strredir.h>
  64 #include <sys/fs/fifonode.h>
  65 #include <sys/fs/snode.h>
  66 #include <sys/strlog.h>
  67 #include <sys/strsun.h>
  68 #include <sys/project.h>
  69 #include <sys/kbio.h>
  70 #include <sys/msio.h>
  71 #include <sys/tty.h>
  72 #include <sys/ptyvar.h>
  73 #include <sys/vuid_event.h>
  74 #include <sys/modctl.h>
  75 #include <sys/sunddi.h>
  76 #include <sys/sunldi_impl.h>
  77 #include <sys/autoconf.h>
  78 #include <sys/policy.h>
  79 #include <sys/dld.h>
  80 #include <sys/zone.h>
  81 #include <sys/ptms.h>
  82 #include <c2/audit.h>
  83 
  84 /*
  85  * This define helps improve the readability of streams code while
  86  * still maintaining a very old streams performance enhancement.  The
  87  * performance enhancement basically involved having all callers
  88  * of straccess() perform the first check that straccess() will do
  89  * locally before actually calling straccess().  (There by reducing
  90  * the number of unnecessary calls to straccess().)
  91  */
  92 #define i_straccess(x, y)       ((stp->sd_sidp == NULL) ? 0 : \
  93                                     (stp->sd_vnode->v_type == VFIFO) ? 0 : \
  94                                     straccess((x), (y)))
  95 
  96 /*
  97  * what is mblk_pull_len?
  98  *
  99  * If a streams message consists of many short messages,
 100  * a performance degradation occurs from copyout overhead.
 101  * To decrease the per mblk overhead, messages that are
 102  * likely to consist of many small mblks are pulled up into
 103  * one continuous chunk of memory.
 104  *
 105  * To avoid the processing overhead of examining every
 106  * mblk, a quick heuristic is used. If the first mblk in
 107  * the message is shorter than mblk_pull_len, it is likely
 108  * that the rest of the mblk will be short.
 109  *
 110  * This heuristic was decided upon after performance tests
 111  * indicated that anything more complex slowed down the main
 112  * code path.
 113  */
 114 #define MBLK_PULL_LEN 64
 115 uint32_t mblk_pull_len = MBLK_PULL_LEN;
 116 
 117 /*
 118  * The sgttyb_handling flag controls the handling of the old BSD
 119  * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows:
 120  *
 121  * 0 - Emit no warnings at all and retain old, broken behavior.
 122  * 1 - Emit no warnings and silently handle new semantics.
 123  * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used
 124  *     (once per system invocation).  Handle with new semantics.
 125  * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is
 126  *     made (so that offenders drop core and are easy to debug).
 127  *
 128  * The "new semantics" are that TIOCGETP returns B38400 for
 129  * sg_[io]speed if the corresponding value is over B38400, and that
 130  * TIOCSET[PN] accept B38400 in these cases to mean "retain current
 131  * bit rate."
 132  */
 133 int sgttyb_handling = 1;
 134 static boolean_t sgttyb_complaint;
 135 
 136 /* don't push drcompat module by default on Style-2 streams */
 137 static int push_drcompat = 0;
 138 
 139 /*
 140  * id value used to distinguish between different ioctl messages
 141  */
 142 static uint32_t ioc_id;
 143 
 144 static void putback(struct stdata *, queue_t *, mblk_t *, int);
 145 static void strcleanall(struct vnode *);
 146 static int strwsrv(queue_t *);
 147 static int strdocmd(struct stdata *, struct strcmd *, cred_t *);
 148 
 149 /*
 150  * qinit and module_info structures for stream head read and write queues
 151  */
 152 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW };
 153 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 };
 154 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info };
 155 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info };
 156 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT,
 157     FIFOLOWAT };
 158 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 };
 159 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info };
 160 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info };
 161 
 162 extern kmutex_t strresources;   /* protects global resources */
 163 extern kmutex_t muxifier;       /* single-threads multiplexor creation */
 164 
 165 static boolean_t msghasdata(mblk_t *bp);
 166 #define msgnodata(bp) (!msghasdata(bp))
 167 
 168 /*
 169  * Stream head locking notes:
 170  *      There are four monitors associated with the stream head:
 171  *      1. v_stream monitor: in stropen() and strclose() v_lock
 172  *              is held while the association of vnode and stream
 173  *              head is established or tested for.
 174  *      2. open/close/push/pop monitor: sd_lock is held while each
 175  *              thread bids for exclusive access to this monitor
 176  *              for opening or closing a stream.  In addition, this
 177  *              monitor is entered during pushes and pops.  This
 178  *              guarantees that during plumbing operations there
 179  *              is only one thread trying to change the plumbing.
 180  *              Any other threads present in the stream are only
 181  *              using the plumbing.
 182  *      3. read/write monitor: in the case of read, a thread holds
 183  *              sd_lock while trying to get data from the stream
 184  *              head queue.  if there is none to fulfill a read
 185  *              request, it sets RSLEEP and calls cv_wait_sig() down
 186  *              in strwaitq() to await the arrival of new data.
 187  *              when new data arrives in strrput(), sd_lock is acquired
 188  *              before testing for RSLEEP and calling cv_broadcast().
 189  *              the behavior of strwrite(), strwsrv(), and WSLEEP
 190  *              mirror this.
 191  *      4. ioctl monitor: sd_lock is gotten to ensure that only one
 192  *              thread is doing an ioctl at a time.
 193  */
 194 
 195 static int
 196 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name,
 197     int anchor, cred_t *crp, uint_t anchor_zoneid)
 198 {
 199         int error;
 200         fmodsw_impl_t *fp;
 201 
 202         if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) {
 203                 error = (stp->sd_flag & STRHUP) ? ENXIO : EIO;
 204                 return (error);
 205         }
 206         if (stp->sd_pushcnt >= nstrpush) {
 207                 return (EINVAL);
 208         }
 209 
 210         if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) {
 211                 stp->sd_flag |= STREOPENFAIL;
 212                 return (EINVAL);
 213         }
 214 
 215         /*
 216          * push new module and call its open routine via qattach
 217          */
 218         if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0)
 219                 return (error);
 220 
 221         /*
 222          * Check to see if caller wants a STREAMS anchor
 223          * put at this place in the stream, and add if so.
 224          */
 225         mutex_enter(&stp->sd_lock);
 226         if (anchor == stp->sd_pushcnt) {
 227                 stp->sd_anchor = stp->sd_pushcnt;
 228                 stp->sd_anchorzone = anchor_zoneid;
 229         }
 230         mutex_exit(&stp->sd_lock);
 231 
 232         return (0);
 233 }
 234 
 235 static int
 236 xpg4_fixup(queue_t *qp, dev_t *devp, struct stdata *stp, cred_t *crp)
 237 {
 238         static const char *ptsmods[] = {
 239             "ptem", "ldterm", "ttcompat"
 240         };
 241         dev_t dummydev = *devp;
 242         struct strioctl strioc;
 243         zoneid_t zoneid;
 244         int32_t rval;
 245         uint_t i;
 246 
 247         /*
 248          * Push modules required for the slave PTY to have terminal
 249          * semantics out of the box; this is required by XPG4v2.
 250          * These three modules are flagged as single-instance so that
 251          * the system will never end up with duplicate copies pushed
 252          * onto a stream.
 253          */
 254 
 255         zoneid = crgetzoneid(crp);
 256         for (i = 0; i < ARRAY_SIZE(ptsmods); i++) {
 257                 int error;
 258 
 259                 error = push_mod(qp, &dummydev, stp, ptsmods[i], 0,
 260                     crp, zoneid);
 261                 if (error != 0)
 262                         return (error);
 263         }
 264 
 265         /*
 266          * Send PTSSTTY down the stream
 267          */
 268 
 269         strioc.ic_cmd = PTSSTTY;
 270         strioc.ic_timout = 0;
 271         strioc.ic_len = 0;
 272         strioc.ic_dp = NULL;
 273 
 274         (void) strdoioctl(stp, &strioc, FNATIVE, K_TO_K, crp, &rval);
 275 
 276         return (0);
 277 }
 278 
 279 /*
 280  * Open a stream device.
 281  */
 282 int
 283 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp)
 284 {
 285         struct stdata *stp;
 286         queue_t *qp;
 287         int s;
 288         dev_t dummydev, savedev;
 289         struct autopush *ap;
 290         struct dlautopush dlap;
 291         int error = 0;
 292         ssize_t rmin, rmax;
 293         int cloneopen;
 294         queue_t *brq;
 295         major_t major;
 296         str_stack_t *ss;
 297         zoneid_t zoneid;
 298         uint_t anchor;
 299 
 300         /*
 301          * If the stream already exists, wait for any open in progress
 302          * to complete, then call the open function of each module and
 303          * driver in the stream.  Otherwise create the stream.
 304          */
 305         TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp);
 306 retry:
 307         mutex_enter(&vp->v_lock);
 308         if ((stp = vp->v_stream) != NULL) {
 309 
 310                 /*
 311                  * Waiting for stream to be created to device
 312                  * due to another open.
 313                  */
 314                 mutex_exit(&vp->v_lock);
 315 
 316                 if (STRMATED(stp)) {
 317                         struct stdata *strmatep = stp->sd_mate;
 318 
 319                         STRLOCKMATES(stp);
 320                         if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
 321                                 if (flag & (FNDELAY|FNONBLOCK)) {
 322                                         error = EAGAIN;
 323                                         mutex_exit(&strmatep->sd_lock);
 324                                         goto ckreturn;
 325                                 }
 326                                 mutex_exit(&stp->sd_lock);
 327                                 if (!cv_wait_sig(&strmatep->sd_monitor,
 328                                     &strmatep->sd_lock)) {
 329                                         error = EINTR;
 330                                         mutex_exit(&strmatep->sd_lock);
 331                                         mutex_enter(&stp->sd_lock);
 332                                         goto ckreturn;
 333                                 }
 334                                 mutex_exit(&strmatep->sd_lock);
 335                                 goto retry;
 336                         }
 337                         if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
 338                                 if (flag & (FNDELAY|FNONBLOCK)) {
 339                                         error = EAGAIN;
 340                                         mutex_exit(&strmatep->sd_lock);
 341                                         goto ckreturn;
 342                                 }
 343                                 mutex_exit(&strmatep->sd_lock);
 344                                 if (!cv_wait_sig(&stp->sd_monitor,
 345                                     &stp->sd_lock)) {
 346                                         error = EINTR;
 347                                         goto ckreturn;
 348                                 }
 349                                 mutex_exit(&stp->sd_lock);
 350                                 goto retry;
 351                         }
 352 
 353                         if (stp->sd_flag & (STRDERR|STWRERR)) {
 354                                 error = EIO;
 355                                 mutex_exit(&strmatep->sd_lock);
 356                                 goto ckreturn;
 357                         }
 358 
 359                         stp->sd_flag |= STWOPEN;
 360                         STRUNLOCKMATES(stp);
 361                 } else {
 362                         mutex_enter(&stp->sd_lock);
 363                         if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
 364                                 if (flag & (FNDELAY|FNONBLOCK)) {
 365                                         error = EAGAIN;
 366                                         goto ckreturn;
 367                                 }
 368                                 if (!cv_wait_sig(&stp->sd_monitor,
 369                                     &stp->sd_lock)) {
 370                                         error = EINTR;
 371                                         goto ckreturn;
 372                                 }
 373                                 mutex_exit(&stp->sd_lock);
 374                                 goto retry;  /* could be clone! */
 375                         }
 376 
 377                         if (stp->sd_flag & (STRDERR|STWRERR)) {
 378                                 error = EIO;
 379                                 goto ckreturn;
 380                         }
 381 
 382                         stp->sd_flag |= STWOPEN;
 383                         mutex_exit(&stp->sd_lock);
 384                 }
 385 
 386                 /*
 387                  * Open all modules and devices down stream to notify
 388                  * that another user is streaming.  For modules, set the
 389                  * last argument to MODOPEN and do not pass any open flags.
 390                  * Ignore dummydev since this is not the first open.
 391                  */
 392                 claimstr(stp->sd_wrq);
 393                 qp = stp->sd_wrq;
 394                 while (_SAMESTR(qp)) {
 395                         qp = qp->q_next;
 396                         if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0)
 397                                 break;
 398                 }
 399                 releasestr(stp->sd_wrq);
 400                 mutex_enter(&stp->sd_lock);
 401                 stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR);
 402                 stp->sd_rerror = 0;
 403                 stp->sd_werror = 0;
 404 ckreturn:
 405                 cv_broadcast(&stp->sd_monitor);
 406                 mutex_exit(&stp->sd_lock);
 407                 return (error);
 408         }
 409 
 410         /*
 411          * This vnode isn't streaming.  SPECFS already
 412          * checked for multiple vnodes pointing to the
 413          * same stream, so create a stream to the driver.
 414          */
 415         qp = allocq();
 416         stp = shalloc(qp);
 417 
 418         /*
 419          * Initialize stream head.  shalloc() has given us
 420          * exclusive access, and we have the vnode locked;
 421          * we can do whatever we want with stp.
 422          */
 423         stp->sd_flag = STWOPEN;
 424         stp->sd_siglist = NULL;
 425         stp->sd_pollist.ph_list = NULL;
 426         stp->sd_sigflags = 0;
 427         stp->sd_mark = NULL;
 428         stp->sd_closetime = STRTIMOUT;
 429         stp->sd_sidp = NULL;
 430         stp->sd_pgidp = NULL;
 431         stp->sd_vnode = vp;
 432         stp->sd_pvnode = NULL;
 433         stp->sd_rerror = 0;
 434         stp->sd_werror = 0;
 435         stp->sd_wroff = 0;
 436         stp->sd_tail = 0;
 437         stp->sd_iocblk = NULL;
 438         stp->sd_cmdblk = NULL;
 439         stp->sd_pushcnt = 0;
 440         stp->sd_qn_minpsz = 0;
 441         stp->sd_qn_maxpsz = INFPSZ - 1;      /* used to check for initialization */
 442         stp->sd_maxblk = INFPSZ;
 443         qp->q_ptr = _WR(qp)->q_ptr = stp;
 444         STREAM(qp) = STREAM(_WR(qp)) = stp;
 445         vp->v_stream = stp;
 446         mutex_exit(&vp->v_lock);
 447         if (vp->v_type == VFIFO) {
 448                 stp->sd_flag |= OLDNDELAY;
 449                 /*
 450                  * This means, both for pipes and fifos
 451                  * strwrite will send SIGPIPE if the other
 452                  * end is closed. For putmsg it depends
 453                  * on whether it is a XPG4_2 application
 454                  * or not
 455                  */
 456                 stp->sd_wput_opt = SW_SIGPIPE;
 457 
 458                 /* setq might sleep in kmem_alloc - avoid holding locks. */
 459                 setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE,
 460                     SQ_CI|SQ_CO, B_FALSE);
 461 
 462                 set_qend(qp);
 463                 stp->sd_strtab = fifo_getinfo();
 464                 _WR(qp)->q_nfsrv = _WR(qp);
 465                 qp->q_nfsrv = qp;
 466                 /*
 467                  * Wake up others that are waiting for stream to be created.
 468                  */
 469                 mutex_enter(&stp->sd_lock);
 470                 /*
 471                  * nothing is be pushed on stream yet, so
 472                  * optimized stream head packetsizes are just that
 473                  * of the read queue
 474                  */
 475                 stp->sd_qn_minpsz = qp->q_minpsz;
 476                 stp->sd_qn_maxpsz = qp->q_maxpsz;
 477                 stp->sd_flag &= ~STWOPEN;
 478                 goto fifo_opendone;
 479         }
 480         /* setq might sleep in kmem_alloc - avoid holding locks. */
 481         setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE);
 482 
 483         set_qend(qp);
 484 
 485         /*
 486          * Open driver and create stream to it (via qattach).
 487          */
 488         savedev = *devp;
 489         cloneopen = (getmajor(*devp) == clone_major);
 490         if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) {
 491                 mutex_enter(&vp->v_lock);
 492                 vp->v_stream = NULL;
 493                 mutex_exit(&vp->v_lock);
 494                 mutex_enter(&stp->sd_lock);
 495                 cv_broadcast(&stp->sd_monitor);
 496                 mutex_exit(&stp->sd_lock);
 497                 freeq(_RD(qp));
 498                 shfree(stp);
 499                 return (error);
 500         }
 501         /*
 502          * Set sd_strtab after open in order to handle clonable drivers
 503          */
 504         stp->sd_strtab = STREAMSTAB(getmajor(*devp));
 505 
 506         /*
 507          * Historical note: dummydev used to be be prior to the initial
 508          * open (via qattach above), which made the value seen
 509          * inconsistent between an I_PUSH and an autopush of a module.
 510          */
 511         dummydev = *devp;
 512 
 513         /*
 514          * For clone open of old style (Q not associated) network driver,
 515          * push DRMODNAME module to handle DL_ATTACH/DL_DETACH
 516          */
 517         brq = _RD(_WR(qp)->q_next);
 518         major = getmajor(*devp);
 519         if (push_drcompat && cloneopen && NETWORK_DRV(major) &&
 520             ((brq->q_flag & _QASSOCIATED) == 0)) {
 521                 if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp, 0) != 0)
 522                         cmn_err(CE_WARN, "cannot push " DRMODNAME
 523                             " streams module");
 524         }
 525 
 526         if (!NETWORK_DRV(major)) {
 527                 savedev = *devp;
 528         } else {
 529                 /*
 530                  * For network devices, process differently based on the
 531                  * return value from dld_autopush():
 532                  *
 533                  *   0: the passed-in device points to a GLDv3 datalink with
 534                  *   per-link autopush configuration; use that configuration
 535                  *   and ignore any per-driver autopush configuration.
 536                  *
 537                  *   1: the passed-in device points to a physical GLDv3
 538                  *   datalink without per-link autopush configuration.  The
 539                  *   passed in device was changed to refer to the actual
 540                  *   physical device (if it's not already); we use that new
 541                  *   device to look up any per-driver autopush configuration.
 542                  *
 543                  *   -1: neither of the above cases applied; use the initial
 544                  *   device to look up any per-driver autopush configuration.
 545                  */
 546                 switch (dld_autopush(&savedev, &dlap)) {
 547                 case 0:
 548                         zoneid = crgetzoneid(crp);
 549                         for (s = 0; s < dlap.dap_npush; s++) {
 550                                 error = push_mod(qp, &dummydev, stp,
 551                                     dlap.dap_aplist[s], dlap.dap_anchor, crp,
 552                                     zoneid);
 553                                 if (error != 0)
 554                                         break;
 555                         }
 556                         goto opendone;
 557                 case 1:
 558                         break;
 559                 case -1:
 560                         savedev = *devp;
 561                         break;
 562                 }
 563         }
 564         /*
 565          * Find the autopush configuration based on "savedev". Start with the
 566          * global zone. If not found check in the local zone.
 567          */
 568         zoneid = GLOBAL_ZONEID;
 569 retryap:
 570         ss = netstack_find_by_stackid(zoneid_to_netstackid(zoneid))->
 571             netstack_str;
 572         if ((ap = sad_ap_find_by_dev(savedev, ss)) == NULL) {
 573                 netstack_rele(ss->ss_netstack);
 574                 if (zoneid == GLOBAL_ZONEID) {
 575                         /*
 576                          * None found. Also look in the zone's autopush table.
 577                          */
 578                         zoneid = crgetzoneid(crp);
 579                         if (zoneid != GLOBAL_ZONEID)
 580                                 goto retryap;
 581                 }
 582                 goto opendone;
 583         }
 584         anchor = ap->ap_anchor;
 585         zoneid = crgetzoneid(crp);
 586         for (s = 0; s < ap->ap_npush; s++) {
 587                 error = push_mod(qp, &dummydev, stp, ap->ap_list[s],
 588                     anchor, crp, zoneid);
 589                 if (error != 0)
 590                         break;
 591         }
 592         sad_ap_rele(ap, ss);
 593         netstack_rele(ss->ss_netstack);
 594 
 595 opendone:
 596 
 597         if (error == 0 &&
 598             (stp->sd_flag & (STRISTTY|STRXPG4TTY)) == (STRISTTY|STRXPG4TTY)) {
 599                 error = xpg4_fixup(qp, devp, stp, crp);
 600         }
 601 
 602         /*
 603          * let specfs know that open failed part way through
 604          */
 605         if (error != 0) {
 606                 mutex_enter(&stp->sd_lock);
 607                 stp->sd_flag |= STREOPENFAIL;
 608                 mutex_exit(&stp->sd_lock);
 609         }
 610 
 611         /*
 612          * Wake up others that are waiting for stream to be created.
 613          */
 614         mutex_enter(&stp->sd_lock);
 615         stp->sd_flag &= ~STWOPEN;
 616 
 617         /*
 618          * As a performance concern we are caching the values of
 619          * q_minpsz and q_maxpsz of the module below the stream
 620          * head in the stream head.
 621          */
 622         mutex_enter(QLOCK(stp->sd_wrq->q_next));
 623         rmin = stp->sd_wrq->q_next->q_minpsz;
 624         rmax = stp->sd_wrq->q_next->q_maxpsz;
 625         mutex_exit(QLOCK(stp->sd_wrq->q_next));
 626 
 627         /* do this processing here as a performance concern */
 628         if (strmsgsz != 0) {
 629                 if (rmax == INFPSZ)
 630                         rmax = strmsgsz;
 631                 else
 632                         rmax = MIN(strmsgsz, rmax);
 633         }
 634 
 635         mutex_enter(QLOCK(stp->sd_wrq));
 636         stp->sd_qn_minpsz = rmin;
 637         stp->sd_qn_maxpsz = rmax;
 638         mutex_exit(QLOCK(stp->sd_wrq));
 639 
 640 fifo_opendone:
 641         cv_broadcast(&stp->sd_monitor);
 642         mutex_exit(&stp->sd_lock);
 643         return (error);
 644 }
 645 
 646 static int strsink(queue_t *, mblk_t *);
 647 static struct qinit deadrend = {
 648         strsink, NULL, NULL, NULL, NULL, &strm_info, NULL
 649 };
 650 static struct qinit deadwend = {
 651         NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL
 652 };
 653 
 654 /*
 655  * Close a stream.
 656  * This is called from closef() on the last close of an open stream.
 657  * Strclean() will already have removed the siglist and pollist
 658  * information, so all that remains is to remove all multiplexor links
 659  * for the stream, pop all the modules (and the driver), and free the
 660  * stream structure.
 661  */
 662 
 663 int
 664 strclose(struct vnode *vp, int flag, cred_t *crp)
 665 {
 666         struct stdata *stp;
 667         queue_t *qp;
 668         int rval;
 669         int freestp = 1;
 670         queue_t *rmq;
 671 
 672         TRACE_1(TR_FAC_STREAMS_FR,
 673             TR_STRCLOSE, "strclose:%p", vp);
 674         ASSERT(vp->v_stream);
 675 
 676         stp = vp->v_stream;
 677         ASSERT(!(stp->sd_flag & STPLEX));
 678         qp = stp->sd_wrq;
 679 
 680         /*
 681          * Needed so that strpoll will return non-zero for this fd.
 682          * Note that with POLLNOERR STRHUP does still cause POLLHUP.
 683          */
 684         mutex_enter(&stp->sd_lock);
 685         stp->sd_flag |= STRHUP;
 686         mutex_exit(&stp->sd_lock);
 687 
 688         /*
 689          * If the registered process or process group did not have an
 690          * open instance of this stream then strclean would not be
 691          * called. Thus at the time of closing all remaining siglist entries
 692          * are removed.
 693          */
 694         if (stp->sd_siglist != NULL)
 695                 strcleanall(vp);
 696 
 697         ASSERT(stp->sd_siglist == NULL);
 698         ASSERT(stp->sd_sigflags == 0);
 699 
 700         if (STRMATED(stp)) {
 701                 struct stdata *strmatep = stp->sd_mate;
 702                 int waited = 1;
 703 
 704                 STRLOCKMATES(stp);
 705                 while (waited) {
 706                         waited = 0;
 707                         while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
 708                                 mutex_exit(&strmatep->sd_lock);
 709                                 cv_wait(&stp->sd_monitor, &stp->sd_lock);
 710                                 mutex_exit(&stp->sd_lock);
 711                                 STRLOCKMATES(stp);
 712                                 waited = 1;
 713                         }
 714                         while (strmatep->sd_flag &
 715                             (STWOPEN|STRCLOSE|STRPLUMB)) {
 716                                 mutex_exit(&stp->sd_lock);
 717                                 cv_wait(&strmatep->sd_monitor,
 718                                     &strmatep->sd_lock);
 719                                 mutex_exit(&strmatep->sd_lock);
 720                                 STRLOCKMATES(stp);
 721                                 waited = 1;
 722                         }
 723                 }
 724                 stp->sd_flag |= STRCLOSE;
 725                 STRUNLOCKMATES(stp);
 726         } else {
 727                 mutex_enter(&stp->sd_lock);
 728                 stp->sd_flag |= STRCLOSE;
 729                 mutex_exit(&stp->sd_lock);
 730         }
 731 
 732         ASSERT(qp->q_first == NULL); /* No more delayed write */
 733 
 734         /* Check if an I_LINK was ever done on this stream */
 735         if (stp->sd_flag & STRHASLINKS) {
 736                 netstack_t *ns;
 737                 str_stack_t *ss;
 738 
 739                 ns = netstack_find_by_cred(crp);
 740                 ASSERT(ns != NULL);
 741                 ss = ns->netstack_str;
 742                 ASSERT(ss != NULL);
 743 
 744                 (void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval, ss);
 745                 netstack_rele(ss->ss_netstack);
 746         }
 747 
 748         while (_SAMESTR(qp)) {
 749                 /*
 750                  * Holding sd_lock prevents q_next from changing in
 751                  * this stream.
 752                  */
 753                 mutex_enter(&stp->sd_lock);
 754                 if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) {
 755 
 756                         /*
 757                          * sleep until awakened by strwsrv() or timeout
 758                          */
 759                         for (;;) {
 760                                 mutex_enter(QLOCK(qp->q_next));
 761                                 if (!(qp->q_next->q_mblkcnt)) {
 762                                         mutex_exit(QLOCK(qp->q_next));
 763                                         break;
 764                                 }
 765                                 stp->sd_flag |= WSLEEP;
 766 
 767                                 /* ensure strwsrv gets enabled */
 768                                 qp->q_next->q_flag |= QWANTW;
 769                                 mutex_exit(QLOCK(qp->q_next));
 770                                 /* get out if we timed out or recv'd a signal */
 771                                 if (str_cv_wait(&qp->q_wait, &stp->sd_lock,
 772                                     stp->sd_closetime, 0) <= 0) {
 773                                         break;
 774                                 }
 775                         }
 776                         stp->sd_flag &= ~WSLEEP;
 777                 }
 778                 mutex_exit(&stp->sd_lock);
 779 
 780                 rmq = qp->q_next;
 781                 if (rmq->q_flag & QISDRV) {
 782                         ASSERT(!_SAMESTR(rmq));
 783                         wait_sq_svc(_RD(qp)->q_syncq);
 784                 }
 785 
 786                 qdetach(_RD(rmq), 1, flag, crp, B_FALSE);
 787         }
 788 
 789         /*
 790          * Since we call pollwakeup in close() now, the poll list should
 791          * be empty in most cases. The only exception is the layered devices
 792          * (e.g. the console drivers with redirection modules pushed on top
 793          * of it).  We have to do this after calling qdetach() because
 794          * the redirection module won't have torn down the console
 795          * redirection until after qdetach() has been invoked.
 796          */
 797         if (stp->sd_pollist.ph_list != NULL) {
 798                 pollwakeup(&stp->sd_pollist, POLLERR);
 799                 pollhead_clean(&stp->sd_pollist);
 800         }
 801         ASSERT(stp->sd_pollist.ph_list == NULL);
 802         ASSERT(stp->sd_sidp == NULL);
 803         ASSERT(stp->sd_pgidp == NULL);
 804 
 805         /* Prevent qenable from re-enabling the stream head queue */
 806         disable_svc(_RD(qp));
 807 
 808         /*
 809          * Wait until service procedure of each queue is
 810          * run, if QINSERVICE is set.
 811          */
 812         wait_svc(_RD(qp));
 813 
 814         /*
 815          * Now, flush both queues.
 816          */
 817         flushq(_RD(qp), FLUSHALL);
 818         flushq(qp, FLUSHALL);
 819 
 820         /*
 821          * If the write queue of the stream head is pointing to a
 822          * read queue, we have a twisted stream.  If the read queue
 823          * is alive, convert the stream head queues into a dead end.
 824          * If the read queue is dead, free the dead pair.
 825          */
 826         if (qp->q_next && !_SAMESTR(qp)) {
 827                 if (qp->q_next->q_qinfo == &deadrend) {       /* half-closed pipe */
 828                         flushq(qp->q_next, FLUSHALL); /* ensure no message */
 829                         shfree(qp->q_next->q_stream);
 830                         freeq(qp->q_next);
 831                         freeq(_RD(qp));
 832                 } else if (qp->q_next == _RD(qp)) {  /* fifo */
 833                         freeq(_RD(qp));
 834                 } else {                                /* pipe */
 835                         freestp = 0;
 836                         /*
 837                          * The q_info pointers are never accessed when
 838                          * SQLOCK is held.
 839                          */
 840                         ASSERT(qp->q_syncq == _RD(qp)->q_syncq);
 841                         mutex_enter(SQLOCK(qp->q_syncq));
 842                         qp->q_qinfo = &deadwend;
 843                         _RD(qp)->q_qinfo = &deadrend;
 844                         mutex_exit(SQLOCK(qp->q_syncq));
 845                 }
 846         } else {
 847                 freeq(_RD(qp)); /* free stream head queue pair */
 848         }
 849 
 850         mutex_enter(&vp->v_lock);
 851         if (stp->sd_iocblk) {
 852                 if (stp->sd_iocblk != (mblk_t *)-1) {
 853                         freemsg(stp->sd_iocblk);
 854                 }
 855                 stp->sd_iocblk = NULL;
 856         }
 857         stp->sd_vnode = stp->sd_pvnode = NULL;
 858         vp->v_stream = NULL;
 859         mutex_exit(&vp->v_lock);
 860         mutex_enter(&stp->sd_lock);
 861         freemsg(stp->sd_cmdblk);
 862         stp->sd_cmdblk = NULL;
 863         stp->sd_flag &= ~STRCLOSE;
 864         cv_broadcast(&stp->sd_monitor);
 865         mutex_exit(&stp->sd_lock);
 866 
 867         if (freestp)
 868                 shfree(stp);
 869         return (0);
 870 }
 871 
 872 static int
 873 strsink(queue_t *q, mblk_t *bp)
 874 {
 875         struct copyresp *resp;
 876 
 877         switch (bp->b_datap->db_type) {
 878         case M_FLUSH:
 879                 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
 880                         *bp->b_rptr &= ~FLUSHR;
 881                         bp->b_flag |= MSGNOLOOP;
 882                         /*
 883                          * Protect against the driver passing up
 884                          * messages after it has done a qprocsoff.
 885                          */
 886                         if (_OTHERQ(q)->q_next == NULL)
 887                                 freemsg(bp);
 888                         else
 889                                 qreply(q, bp);
 890                 } else {
 891                         freemsg(bp);
 892                 }
 893                 break;
 894 
 895         case M_COPYIN:
 896         case M_COPYOUT:
 897                 if (bp->b_cont) {
 898                         freemsg(bp->b_cont);
 899                         bp->b_cont = NULL;
 900                 }
 901                 bp->b_datap->db_type = M_IOCDATA;
 902                 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
 903                 resp = (struct copyresp *)bp->b_rptr;
 904                 resp->cp_rval = (caddr_t)1;  /* failure */
 905                 /*
 906                  * Protect against the driver passing up
 907                  * messages after it has done a qprocsoff.
 908                  */
 909                 if (_OTHERQ(q)->q_next == NULL)
 910                         freemsg(bp);
 911                 else
 912                         qreply(q, bp);
 913                 break;
 914 
 915         case M_IOCTL:
 916                 if (bp->b_cont) {
 917                         freemsg(bp->b_cont);
 918                         bp->b_cont = NULL;
 919                 }
 920                 bp->b_datap->db_type = M_IOCNAK;
 921                 /*
 922                  * Protect against the driver passing up
 923                  * messages after it has done a qprocsoff.
 924                  */
 925                 if (_OTHERQ(q)->q_next == NULL)
 926                         freemsg(bp);
 927                 else
 928                         qreply(q, bp);
 929                 break;
 930 
 931         default:
 932                 freemsg(bp);
 933                 break;
 934         }
 935 
 936         return (0);
 937 }
 938 
 939 /*
 940  * Clean up after a process when it closes a stream.  This is called
 941  * from closef for all closes, whereas strclose is called only for the
 942  * last close on a stream.  The siglist is scanned for entries for the
 943  * current process, and these are removed.
 944  */
 945 void
 946 strclean(struct vnode *vp)
 947 {
 948         strsig_t *ssp, *pssp, *tssp;
 949         stdata_t *stp;
 950         int update = 0;
 951 
 952         TRACE_1(TR_FAC_STREAMS_FR,
 953             TR_STRCLEAN, "strclean:%p", vp);
 954         stp = vp->v_stream;
 955         pssp = NULL;
 956         mutex_enter(&stp->sd_lock);
 957         ssp = stp->sd_siglist;
 958         while (ssp) {
 959                 if (ssp->ss_pidp == curproc->p_pidp) {
 960                         tssp = ssp->ss_next;
 961                         if (pssp)
 962                                 pssp->ss_next = tssp;
 963                         else
 964                                 stp->sd_siglist = tssp;
 965                         mutex_enter(&pidlock);
 966                         PID_RELE(ssp->ss_pidp);
 967                         mutex_exit(&pidlock);
 968                         kmem_free(ssp, sizeof (strsig_t));
 969                         update = 1;
 970                         ssp = tssp;
 971                 } else {
 972                         pssp = ssp;
 973                         ssp = ssp->ss_next;
 974                 }
 975         }
 976         if (update) {
 977                 stp->sd_sigflags = 0;
 978                 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
 979                         stp->sd_sigflags |= ssp->ss_events;
 980         }
 981         mutex_exit(&stp->sd_lock);
 982 }
 983 
 984 /*
 985  * Used on the last close to remove any remaining items on the siglist.
 986  * These could be present on the siglist due to I_ESETSIG calls that
 987  * use process groups or processed that do not have an open file descriptor
 988  * for this stream (Such entries would not be removed by strclean).
 989  */
 990 static void
 991 strcleanall(struct vnode *vp)
 992 {
 993         strsig_t *ssp, *nssp;
 994         stdata_t *stp;
 995 
 996         stp = vp->v_stream;
 997         mutex_enter(&stp->sd_lock);
 998         ssp = stp->sd_siglist;
 999         stp->sd_siglist = NULL;
1000         while (ssp) {
1001                 nssp = ssp->ss_next;
1002                 mutex_enter(&pidlock);
1003                 PID_RELE(ssp->ss_pidp);
1004                 mutex_exit(&pidlock);
1005                 kmem_free(ssp, sizeof (strsig_t));
1006                 ssp = nssp;
1007         }
1008         stp->sd_sigflags = 0;
1009         mutex_exit(&stp->sd_lock);
1010 }
1011 
1012 /*
1013  * Retrieve the next message from the logical stream head read queue
1014  * using either rwnext (if sync stream) or getq_noenab.
1015  * It is the callers responsibility to call qbackenable after
1016  * it is finished with the message. The caller should not call
1017  * qbackenable until after any putback calls to avoid spurious backenabling.
1018  */
1019 mblk_t *
1020 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
1021     int *errorp)
1022 {
1023         mblk_t *bp;
1024         int error;
1025         ssize_t rbytes = 0;
1026 
1027         /* Holding sd_lock prevents the read queue from changing  */
1028         ASSERT(MUTEX_HELD(&stp->sd_lock));
1029 
1030         if (uiop != NULL && stp->sd_struiordq != NULL &&
1031             q->q_first == NULL &&
1032             (!first || (stp->sd_wakeq & RSLEEP))) {
1033                 /*
1034                  * Stream supports rwnext() for the read side.
1035                  * If this is the first time we're called by e.g. strread
1036                  * only do the downcall if there is a deferred wakeup
1037                  * (registered in sd_wakeq).
1038                  */
1039                 struiod_t uiod;
1040 
1041                 if (first)
1042                         stp->sd_wakeq &= ~RSLEEP;
1043 
1044                 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
1045                     sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
1046                 uiod.d_mp = 0;
1047                 /*
1048                  * Mark that a thread is in rwnext on the read side
1049                  * to prevent strrput from nacking ioctls immediately.
1050                  * When the last concurrent rwnext returns
1051                  * the ioctls are nack'ed.
1052                  */
1053                 ASSERT(MUTEX_HELD(&stp->sd_lock));
1054                 stp->sd_struiodnak++;
1055                 /*
1056                  * Note: rwnext will drop sd_lock.
1057                  */
1058                 error = rwnext(q, &uiod);
1059                 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
1060                 mutex_enter(&stp->sd_lock);
1061                 stp->sd_struiodnak--;
1062                 while (stp->sd_struiodnak == 0 &&
1063                     ((bp = stp->sd_struionak) != NULL)) {
1064                         stp->sd_struionak = bp->b_next;
1065                         bp->b_next = NULL;
1066                         bp->b_datap->db_type = M_IOCNAK;
1067                         /*
1068                          * Protect against the driver passing up
1069                          * messages after it has done a qprocsoff.
1070                          */
1071                         if (_OTHERQ(q)->q_next == NULL)
1072                                 freemsg(bp);
1073                         else {
1074                                 mutex_exit(&stp->sd_lock);
1075                                 qreply(q, bp);
1076                                 mutex_enter(&stp->sd_lock);
1077                         }
1078                 }
1079                 ASSERT(MUTEX_HELD(&stp->sd_lock));
1080                 if (error == 0 || error == EWOULDBLOCK) {
1081                         if ((bp = uiod.d_mp) != NULL) {
1082                                 *errorp = 0;
1083                                 ASSERT(MUTEX_HELD(&stp->sd_lock));
1084                                 return (bp);
1085                         }
1086                         error = 0;
1087                 } else if (error == EINVAL) {
1088                         /*
1089                          * The stream plumbing must have
1090                          * changed while we were away, so
1091                          * just turn off rwnext()s.
1092                          */
1093                         error = 0;
1094                 } else if (error == EBUSY) {
1095                         /*
1096                          * The module might have data in transit using putnext
1097                          * Fall back on waiting + getq.
1098                          */
1099                         error = 0;
1100                 } else {
1101                         *errorp = error;
1102                         ASSERT(MUTEX_HELD(&stp->sd_lock));
1103                         return (NULL);
1104                 }
1105                 /*
1106                  * Try a getq in case a rwnext() generated mblk
1107                  * has bubbled up via strrput().
1108                  */
1109         }
1110         *errorp = 0;
1111         ASSERT(MUTEX_HELD(&stp->sd_lock));
1112 
1113         /*
1114          * If we have a valid uio, try and use this as a guide for how
1115          * many bytes to retrieve from the queue via getq_noenab().
1116          * Doing this can avoid unneccesary counting of overlong
1117          * messages in putback(). We currently only do this for sockets
1118          * and only if there is no sd_rputdatafunc hook.
1119          *
1120          * The sd_rputdatafunc hook transforms the entire message
1121          * before any bytes in it can be given to a client. So, rbytes
1122          * must be 0 if there is a hook.
1123          */
1124         if ((uiop != NULL) && (stp->sd_vnode->v_type == VSOCK) &&
1125             (stp->sd_rputdatafunc == NULL))
1126                 rbytes = uiop->uio_resid;
1127 
1128         return (getq_noenab(q, rbytes));
1129 }
1130 
1131 /*
1132  * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'.
1133  * If the message does not fit in the uio the remainder of it is returned;
1134  * otherwise NULL is returned.  Any embedded zero-length mblk_t's are
1135  * consumed, even if uio_resid reaches zero.  On error, `*errorp' is set to
1136  * the error code, the message is consumed, and NULL is returned.
1137  */
1138 static mblk_t *
1139 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp)
1140 {
1141         int error;
1142         ptrdiff_t n;
1143         mblk_t *nbp;
1144 
1145         ASSERT(bp->b_wptr >= bp->b_rptr);
1146 
1147         do {
1148                 if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) {
1149                         ASSERT(n > 0);
1150 
1151                         error = uiomove(bp->b_rptr, n, UIO_READ, uiop);
1152                         if (error != 0) {
1153                                 freemsg(bp);
1154                                 *errorp = error;
1155                                 return (NULL);
1156                         }
1157                 }
1158 
1159                 bp->b_rptr += n;
1160                 while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) {
1161                         nbp = bp;
1162                         bp = bp->b_cont;
1163                         freeb(nbp);
1164                 }
1165         } while (bp != NULL && uiop->uio_resid > 0);
1166 
1167         *errorp = 0;
1168         return (bp);
1169 }
1170 
1171 /*
1172  * Read a stream according to the mode flags in sd_flag:
1173  *
1174  * (default mode)               - Byte stream, msg boundaries are ignored
1175  * RD_MSGDIS (msg discard)      - Read on msg boundaries and throw away
1176  *                              any data remaining in msg
1177  * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back
1178  *                              any remaining data on head of read queue
1179  *
1180  * Consume readable messages on the front of the queue until
1181  * ttolwp(curthread)->lwp_count
1182  * is satisfied, the readable messages are exhausted, or a message
1183  * boundary is reached in a message mode.  If no data was read and
1184  * the stream was not opened with the NDELAY flag, block until data arrives.
1185  * Otherwise return the data read and update the count.
1186  *
1187  * In default mode a 0 length message signifies end-of-file and terminates
1188  * a read in progress.  The 0 length message is removed from the queue
1189  * only if it is the only message read (no data is read).
1190  *
1191  * An attempt to read an M_PROTO or M_PCPROTO message results in an
1192  * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set.
1193  * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data.
1194  * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message
1195  * are unlinked from and M_DATA blocks in the message, the protos are
1196  * thrown away, and the data is read.
1197  */
1198 /* ARGSUSED */
1199 int
1200 strread(struct vnode *vp, struct uio *uiop, cred_t *crp)
1201 {
1202         struct stdata *stp;
1203         mblk_t *bp, *nbp;
1204         queue_t *q;
1205         int error = 0;
1206         uint_t old_sd_flag;
1207         int first;
1208         char rflg;
1209         uint_t mark;            /* Contains MSG*MARK and _LASTMARK */
1210 #define _LASTMARK       0x8000  /* Distinct from MSG*MARK */
1211         short delim;
1212         unsigned char pri = 0;
1213         char waitflag;
1214         unsigned char type;
1215 
1216         TRACE_1(TR_FAC_STREAMS_FR,
1217             TR_STRREAD_ENTER, "strread:%p", vp);
1218         ASSERT(vp->v_stream);
1219         stp = vp->v_stream;
1220 
1221         mutex_enter(&stp->sd_lock);
1222 
1223         if ((error = i_straccess(stp, JCREAD)) != 0) {
1224                 mutex_exit(&stp->sd_lock);
1225                 return (error);
1226         }
1227 
1228         if (stp->sd_flag & (STRDERR|STPLEX)) {
1229                 error = strgeterr(stp, STRDERR|STPLEX, 0);
1230                 if (error != 0) {
1231                         mutex_exit(&stp->sd_lock);
1232                         return (error);
1233                 }
1234         }
1235 
1236         /*
1237          * Loop terminates when uiop->uio_resid == 0.
1238          */
1239         rflg = 0;
1240         waitflag = READWAIT;
1241         q = _RD(stp->sd_wrq);
1242         for (;;) {
1243                 ASSERT(MUTEX_HELD(&stp->sd_lock));
1244                 old_sd_flag = stp->sd_flag;
1245                 mark = 0;
1246                 delim = 0;
1247                 first = 1;
1248                 while ((bp = strget(stp, q, uiop, first, &error)) == NULL) {
1249                         int done = 0;
1250 
1251                         ASSERT(MUTEX_HELD(&stp->sd_lock));
1252 
1253                         if (error != 0)
1254                                 goto oops;
1255 
1256                         if (stp->sd_flag & (STRHUP|STREOF)) {
1257                                 goto oops;
1258                         }
1259                         if (rflg && !(stp->sd_flag & STRDELIM)) {
1260                                 goto oops;
1261                         }
1262                         /*
1263                          * If a read(fd,buf,0) has been done, there is no
1264                          * need to sleep. We always have zero bytes to
1265                          * return.
1266                          */
1267                         if (uiop->uio_resid == 0) {
1268                                 goto oops;
1269                         }
1270 
1271                         qbackenable(q, 0);
1272 
1273                         TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT,
1274                             "strread calls strwaitq:%p, %p, %p",
1275                             vp, uiop, crp);
1276                         if ((error = strwaitq(stp, waitflag, uiop->uio_resid,
1277                             uiop->uio_fmode, -1, &done)) != 0 || done) {
1278                                 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE,
1279                                     "strread error or done:%p, %p, %p",
1280                                     vp, uiop, crp);
1281                                 if ((uiop->uio_fmode & FNDELAY) &&
1282                                     (stp->sd_flag & OLDNDELAY) &&
1283                                     (error == EAGAIN))
1284                                         error = 0;
1285                                 goto oops;
1286                         }
1287                         TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE,
1288                             "strread awakes:%p, %p, %p", vp, uiop, crp);
1289                         if ((error = i_straccess(stp, JCREAD)) != 0) {
1290                                 goto oops;
1291                         }
1292                         first = 0;
1293                 }
1294 
1295                 ASSERT(MUTEX_HELD(&stp->sd_lock));
1296                 ASSERT(bp);
1297                 pri = bp->b_band;
1298                 /*
1299                  * Extract any mark information. If the message is not
1300                  * completely consumed this information will be put in the mblk
1301                  * that is putback.
1302                  * If MSGMARKNEXT is set and the message is completely consumed
1303                  * the STRATMARK flag will be set below. Likewise, if
1304                  * MSGNOTMARKNEXT is set and the message is
1305                  * completely consumed STRNOTATMARK will be set.
1306                  *
1307                  * For some unknown reason strread only breaks the read at the
1308                  * last mark.
1309                  */
1310                 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
1311                 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
1312                     (MSGMARKNEXT|MSGNOTMARKNEXT));
1313                 if (mark != 0 && bp == stp->sd_mark) {
1314                         if (rflg) {
1315                                 putback(stp, q, bp, pri);
1316                                 goto oops;
1317                         }
1318                         mark |= _LASTMARK;
1319                         stp->sd_mark = NULL;
1320                 }
1321                 if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM))
1322                         delim = 1;
1323                 mutex_exit(&stp->sd_lock);
1324 
1325                 if (STREAM_NEEDSERVICE(stp))
1326                         stream_runservice(stp);
1327 
1328                 type = bp->b_datap->db_type;
1329 
1330                 switch (type) {
1331 
1332                 case M_DATA:
1333 ismdata:
1334                         if (msgnodata(bp)) {
1335                                 if (mark || delim) {
1336                                         freemsg(bp);
1337                                 } else if (rflg) {
1338 
1339                                         /*
1340                                          * If already read data put zero
1341                                          * length message back on queue else
1342                                          * free msg and return 0.
1343                                          */
1344                                         bp->b_band = pri;
1345                                         mutex_enter(&stp->sd_lock);
1346                                         putback(stp, q, bp, pri);
1347                                         mutex_exit(&stp->sd_lock);
1348                                 } else {
1349                                         freemsg(bp);
1350                                 }
1351                                 error =  0;
1352                                 goto oops1;
1353                         }
1354 
1355                         rflg = 1;
1356                         waitflag |= NOINTR;
1357                         bp = struiocopyout(bp, uiop, &error);
1358                         if (error != 0)
1359                                 goto oops1;
1360 
1361                         mutex_enter(&stp->sd_lock);
1362                         if (bp) {
1363                                 /*
1364                                  * Have remaining data in message.
1365                                  * Free msg if in discard mode.
1366                                  */
1367                                 if (stp->sd_read_opt & RD_MSGDIS) {
1368                                         freemsg(bp);
1369                                 } else {
1370                                         bp->b_band = pri;
1371                                         if ((mark & _LASTMARK) &&
1372                                             (stp->sd_mark == NULL))
1373                                                 stp->sd_mark = bp;
1374                                         bp->b_flag |= mark & ~_LASTMARK;
1375                                         if (delim)
1376                                                 bp->b_flag |= MSGDELIM;
1377                                         if (msgnodata(bp))
1378                                                 freemsg(bp);
1379                                         else
1380                                                 putback(stp, q, bp, pri);
1381                                 }
1382                         } else {
1383                                 /*
1384                                  * Consumed the complete message.
1385                                  * Move the MSG*MARKNEXT information
1386                                  * to the stream head just in case
1387                                  * the read queue becomes empty.
1388                                  *
1389                                  * If the stream head was at the mark
1390                                  * (STRATMARK) before we dropped sd_lock above
1391                                  * and some data was consumed then we have
1392                                  * moved past the mark thus STRATMARK is
1393                                  * cleared. However, if a message arrived in
1394                                  * strrput during the copyout above causing
1395                                  * STRATMARK to be set we can not clear that
1396                                  * flag.
1397                                  */
1398                                 if (mark &
1399                                     (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
1400                                         if (mark & MSGMARKNEXT) {
1401                                                 stp->sd_flag &= ~STRNOTATMARK;
1402                                                 stp->sd_flag |= STRATMARK;
1403                                         } else if (mark & MSGNOTMARKNEXT) {
1404                                                 stp->sd_flag &= ~STRATMARK;
1405                                                 stp->sd_flag |= STRNOTATMARK;
1406                                         } else {
1407                                                 stp->sd_flag &=
1408                                                     ~(STRATMARK|STRNOTATMARK);
1409                                         }
1410                                 } else if (rflg && (old_sd_flag & STRATMARK)) {
1411                                         stp->sd_flag &= ~STRATMARK;
1412                                 }
1413                         }
1414 
1415                         /*
1416                          * Check for signal messages at the front of the read
1417                          * queue and generate the signal(s) if appropriate.
1418                          * The only signal that can be on queue is M_SIG at
1419                          * this point.
1420                          */
1421                         while ((((bp = q->q_first)) != NULL) &&
1422                             (bp->b_datap->db_type == M_SIG)) {
1423                                 bp = getq_noenab(q, 0);
1424                                 /*
1425                                  * sd_lock is held so the content of the
1426                                  * read queue can not change.
1427                                  */
1428                                 ASSERT(bp != NULL && DB_TYPE(bp) == M_SIG);
1429                                 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
1430                                 mutex_exit(&stp->sd_lock);
1431                                 freemsg(bp);
1432                                 if (STREAM_NEEDSERVICE(stp))
1433                                         stream_runservice(stp);
1434                                 mutex_enter(&stp->sd_lock);
1435                         }
1436 
1437                         if ((uiop->uio_resid == 0) || (mark & _LASTMARK) ||
1438                             delim ||
1439                             (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) {
1440                                 goto oops;
1441                         }
1442                         continue;
1443 
1444                 case M_SIG:
1445                         strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band);
1446                         freemsg(bp);
1447                         mutex_enter(&stp->sd_lock);
1448                         continue;
1449 
1450                 case M_PROTO:
1451                 case M_PCPROTO:
1452                         /*
1453                          * Only data messages are readable.
1454                          * Any others generate an error, unless
1455                          * RD_PROTDIS or RD_PROTDAT is set.
1456                          */
1457                         if (stp->sd_read_opt & RD_PROTDAT) {
1458                                 for (nbp = bp; nbp; nbp = nbp->b_next) {
1459                                         if ((nbp->b_datap->db_type ==
1460                                             M_PROTO) ||
1461                                             (nbp->b_datap->db_type ==
1462                                             M_PCPROTO)) {
1463                                                 nbp->b_datap->db_type = M_DATA;
1464                                         } else {
1465                                                 break;
1466                                         }
1467                                 }
1468                                 /*
1469                                  * clear stream head hi pri flag based on
1470                                  * first message
1471                                  */
1472                                 if (type == M_PCPROTO) {
1473                                         mutex_enter(&stp->sd_lock);
1474                                         stp->sd_flag &= ~STRPRI;
1475                                         mutex_exit(&stp->sd_lock);
1476                                 }
1477                                 goto ismdata;
1478                         } else if (stp->sd_read_opt & RD_PROTDIS) {
1479                                 /*
1480                                  * discard non-data messages
1481                                  */
1482                                 while (bp &&
1483                                     ((bp->b_datap->db_type == M_PROTO) ||
1484                                     (bp->b_datap->db_type == M_PCPROTO))) {
1485                                         nbp = unlinkb(bp);
1486                                         freeb(bp);
1487                                         bp = nbp;
1488                                 }
1489                                 /*
1490                                  * clear stream head hi pri flag based on
1491                                  * first message
1492                                  */
1493                                 if (type == M_PCPROTO) {
1494                                         mutex_enter(&stp->sd_lock);
1495                                         stp->sd_flag &= ~STRPRI;
1496                                         mutex_exit(&stp->sd_lock);
1497                                 }
1498                                 if (bp) {
1499                                         bp->b_band = pri;
1500                                         goto ismdata;
1501                                 } else {
1502                                         break;
1503                                 }
1504                         }
1505                         /* FALLTHRU */
1506                 case M_PASSFP:
1507                         if ((bp->b_datap->db_type == M_PASSFP) &&
1508                             (stp->sd_read_opt & RD_PROTDIS)) {
1509                                 freemsg(bp);
1510                                 break;
1511                         }
1512                         mutex_enter(&stp->sd_lock);
1513                         putback(stp, q, bp, pri);
1514                         mutex_exit(&stp->sd_lock);
1515                         if (rflg == 0)
1516                                 error = EBADMSG;
1517                         goto oops1;
1518 
1519                 default:
1520                         /*
1521                          * Garbage on stream head read queue.
1522                          */
1523                         cmn_err(CE_WARN, "bad %x found at stream head\n",
1524                             bp->b_datap->db_type);
1525                         freemsg(bp);
1526                         goto oops1;
1527                 }
1528                 mutex_enter(&stp->sd_lock);
1529         }
1530 oops:
1531         mutex_exit(&stp->sd_lock);
1532 oops1:
1533         qbackenable(q, pri);
1534         return (error);
1535 #undef  _LASTMARK
1536 }
1537 
1538 /*
1539  * Default processing of M_PROTO/M_PCPROTO messages.
1540  * Determine which wakeups and signals are needed.
1541  * This can be replaced by a user-specified procedure for kernel users
1542  * of STREAMS.
1543  */
1544 /* ARGSUSED */
1545 mblk_t *
1546 strrput_proto(vnode_t *vp, mblk_t *mp,
1547     strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1548     strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1549 {
1550         *wakeups = RSLEEP;
1551         *allmsgsigs = 0;
1552 
1553         switch (mp->b_datap->db_type) {
1554         case M_PROTO:
1555                 if (mp->b_band == 0) {
1556                         *firstmsgsigs = S_INPUT | S_RDNORM;
1557                         *pollwakeups = POLLIN | POLLRDNORM;
1558                 } else {
1559                         *firstmsgsigs = S_INPUT | S_RDBAND;
1560                         *pollwakeups = POLLIN | POLLRDBAND;
1561                 }
1562                 break;
1563         case M_PCPROTO:
1564                 *firstmsgsigs = S_HIPRI;
1565                 *pollwakeups = POLLPRI;
1566                 break;
1567         }
1568         return (mp);
1569 }
1570 
1571 /*
1572  * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and
1573  * M_PASSFP messages.
1574  * Determine which wakeups and signals are needed.
1575  * This can be replaced by a user-specified procedure for kernel users
1576  * of STREAMS.
1577  */
1578 /* ARGSUSED */
1579 mblk_t *
1580 strrput_misc(vnode_t *vp, mblk_t *mp,
1581     strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1582     strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1583 {
1584         *wakeups = 0;
1585         *firstmsgsigs = 0;
1586         *allmsgsigs = 0;
1587         *pollwakeups = 0;
1588         return (mp);
1589 }
1590 
1591 /*
1592  * Stream read put procedure.  Called from downstream driver/module
1593  * with messages for the stream head.  Data, protocol, and in-stream
1594  * signal messages are placed on the queue, others are handled directly.
1595  */
1596 int
1597 strrput(queue_t *q, mblk_t *bp)
1598 {
1599         struct stdata   *stp;
1600         ulong_t         rput_opt;
1601         strwakeup_t     wakeups;
1602         strsigset_t     firstmsgsigs;   /* Signals if first message on queue */
1603         strsigset_t     allmsgsigs;     /* Signals for all messages */
1604         strsigset_t     signals;        /* Signals events to generate */
1605         strpollset_t    pollwakeups;
1606         mblk_t          *nextbp;
1607         uchar_t         band = 0;
1608         int             hipri_sig;
1609 
1610         stp = (struct stdata *)q->q_ptr;
1611         /*
1612          * Use rput_opt for optimized access to the SR_ flags except
1613          * SR_POLLIN. That flag has to be checked under sd_lock since it
1614          * is modified by strpoll().
1615          */
1616         rput_opt = stp->sd_rput_opt;
1617 
1618         ASSERT(qclaimed(q));
1619         TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER,
1620             "strrput called with message type:q %p bp %p", q, bp);
1621 
1622         /*
1623          * Perform initial processing and pass to the parameterized functions.
1624          */
1625         ASSERT(bp->b_next == NULL);
1626 
1627         switch (bp->b_datap->db_type) {
1628         case M_DATA:
1629                 /*
1630                  * sockfs is the only consumer of STREOF and when it is set,
1631                  * it implies that the receiver is not interested in receiving
1632                  * any more data, hence the mblk is freed to prevent unnecessary
1633                  * message queueing at the stream head.
1634                  */
1635                 if (stp->sd_flag == STREOF) {
1636                         freemsg(bp);
1637                         return (0);
1638                 }
1639                 if ((rput_opt & SR_IGN_ZEROLEN) &&
1640                     bp->b_rptr == bp->b_wptr && msgnodata(bp)) {
1641                         /*
1642                          * Ignore zero-length M_DATA messages. These might be
1643                          * generated by some transports.
1644                          * The zero-length M_DATA messages, even if they
1645                          * are ignored, should effect the atmark tracking and
1646                          * should wake up a thread sleeping in strwaitmark.
1647                          */
1648                         mutex_enter(&stp->sd_lock);
1649                         if (bp->b_flag & MSGMARKNEXT) {
1650                                 /*
1651                                  * Record the position of the mark either
1652                                  * in q_last or in STRATMARK.
1653                                  */
1654                                 if (q->q_last != NULL) {
1655                                         q->q_last->b_flag &= ~MSGNOTMARKNEXT;
1656                                         q->q_last->b_flag |= MSGMARKNEXT;
1657                                 } else {
1658                                         stp->sd_flag &= ~STRNOTATMARK;
1659                                         stp->sd_flag |= STRATMARK;
1660                                 }
1661                         } else if (bp->b_flag & MSGNOTMARKNEXT) {
1662                                 /*
1663                                  * Record that this is not the position of
1664                                  * the mark either in q_last or in
1665                                  * STRNOTATMARK.
1666                                  */
1667                                 if (q->q_last != NULL) {
1668                                         q->q_last->b_flag &= ~MSGMARKNEXT;
1669                                         q->q_last->b_flag |= MSGNOTMARKNEXT;
1670                                 } else {
1671                                         stp->sd_flag &= ~STRATMARK;
1672                                         stp->sd_flag |= STRNOTATMARK;
1673                                 }
1674                         }
1675                         if (stp->sd_flag & RSLEEP) {
1676                                 stp->sd_flag &= ~RSLEEP;
1677                                 cv_broadcast(&q->q_wait);
1678                         }
1679                         mutex_exit(&stp->sd_lock);
1680                         freemsg(bp);
1681                         return (0);
1682                 }
1683                 wakeups = RSLEEP;
1684                 if (bp->b_band == 0) {
1685                         firstmsgsigs = S_INPUT | S_RDNORM;
1686                         pollwakeups = POLLIN | POLLRDNORM;
1687                 } else {
1688                         firstmsgsigs = S_INPUT | S_RDBAND;
1689                         pollwakeups = POLLIN | POLLRDBAND;
1690                 }
1691                 if (rput_opt & SR_SIGALLDATA)
1692                         allmsgsigs = firstmsgsigs;
1693                 else
1694                         allmsgsigs = 0;
1695 
1696                 mutex_enter(&stp->sd_lock);
1697                 if ((rput_opt & SR_CONSOL_DATA) &&
1698                     (q->q_last != NULL) &&
1699                     (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) {
1700                         /*
1701                          * Consolidate an M_DATA message onto an M_DATA,
1702                          * M_PROTO, or M_PCPROTO by merging it with q_last.
1703                          * The consolidation does not take place if
1704                          * the old message is marked with either of the
1705                          * marks or the delim flag or if the new
1706                          * message is marked with MSGMARK. The MSGMARK
1707                          * check is needed to handle the odd semantics of
1708                          * MSGMARK where essentially the whole message
1709                          * is to be treated as marked.
1710                          * Carry any MSGMARKNEXT  and MSGNOTMARKNEXT from the
1711                          * new message to the front of the b_cont chain.
1712                          */
1713                         mblk_t *lbp = q->q_last;
1714                         unsigned char db_type = lbp->b_datap->db_type;
1715 
1716                         if ((db_type == M_DATA || db_type == M_PROTO ||
1717                             db_type == M_PCPROTO) &&
1718                             !(lbp->b_flag & (MSGDELIM|MSGMARK|MSGMARKNEXT))) {
1719                                 rmvq_noenab(q, lbp);
1720                                 /*
1721                                  * The first message in the b_cont list
1722                                  * tracks MSGMARKNEXT and MSGNOTMARKNEXT.
1723                                  * We need to handle the case where we
1724                                  * are appending:
1725                                  *
1726                                  * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT.
1727                                  * 2) a MSGMARKNEXT to a plain message.
1728                                  * 3) a MSGNOTMARKNEXT to a plain message
1729                                  * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT
1730                                  *    message.
1731                                  *
1732                                  * Thus we never append a MSGMARKNEXT or
1733                                  * MSGNOTMARKNEXT to a MSGMARKNEXT message.
1734                                  */
1735                                 if (bp->b_flag & MSGMARKNEXT) {
1736                                         lbp->b_flag |= MSGMARKNEXT;
1737                                         lbp->b_flag &= ~MSGNOTMARKNEXT;
1738                                         bp->b_flag &= ~MSGMARKNEXT;
1739                                 } else if (bp->b_flag & MSGNOTMARKNEXT) {
1740                                         lbp->b_flag |= MSGNOTMARKNEXT;
1741                                         bp->b_flag &= ~MSGNOTMARKNEXT;
1742                                 }
1743 
1744                                 linkb(lbp, bp);
1745                                 bp = lbp;
1746                                 /*
1747                                  * The new message logically isn't the first
1748                                  * even though the q_first check below thinks
1749                                  * it is. Clear the firstmsgsigs to make it
1750                                  * not appear to be first.
1751                                  */
1752                                 firstmsgsigs = 0;
1753                         }
1754                 }
1755                 break;
1756 
1757         case M_PASSFP:
1758                 wakeups = RSLEEP;
1759                 allmsgsigs = 0;
1760                 if (bp->b_band == 0) {
1761                         firstmsgsigs = S_INPUT | S_RDNORM;
1762                         pollwakeups = POLLIN | POLLRDNORM;
1763                 } else {
1764                         firstmsgsigs = S_INPUT | S_RDBAND;
1765                         pollwakeups = POLLIN | POLLRDBAND;
1766                 }
1767                 mutex_enter(&stp->sd_lock);
1768                 break;
1769 
1770         case M_PROTO:
1771         case M_PCPROTO:
1772                 ASSERT(stp->sd_rprotofunc != NULL);
1773                 bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp,
1774                     &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1775 #define ALLSIG  (S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\
1776                 S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)
1777 #define ALLPOLL (POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\
1778                 POLLWRBAND)
1779 
1780                 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1781                 ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1782                 ASSERT((allmsgsigs & ~ALLSIG) == 0);
1783                 ASSERT((pollwakeups & ~ALLPOLL) == 0);
1784 
1785                 mutex_enter(&stp->sd_lock);
1786                 break;
1787 
1788         default:
1789                 ASSERT(stp->sd_rmiscfunc != NULL);
1790                 bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp,
1791                     &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1792                 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1793                 ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1794                 ASSERT((allmsgsigs & ~ALLSIG) == 0);
1795                 ASSERT((pollwakeups & ~ALLPOLL) == 0);
1796 #undef  ALLSIG
1797 #undef  ALLPOLL
1798                 mutex_enter(&stp->sd_lock);
1799                 break;
1800         }
1801         ASSERT(MUTEX_HELD(&stp->sd_lock));
1802 
1803         /* By default generate superset of signals */
1804         signals = (firstmsgsigs | allmsgsigs);
1805 
1806         /*
1807          * The  proto and misc functions can return multiple messages
1808          * as a b_next chain. Such messages are processed separately.
1809          */
1810 one_more:
1811         hipri_sig = 0;
1812         if (bp == NULL) {
1813                 nextbp = NULL;
1814         } else {
1815                 nextbp = bp->b_next;
1816                 bp->b_next = NULL;
1817 
1818                 switch (bp->b_datap->db_type) {
1819                 case M_PCPROTO:
1820                         /*
1821                          * Only one priority protocol message is allowed at the
1822                          * stream head at a time.
1823                          */
1824                         if (stp->sd_flag & STRPRI) {
1825                                 TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR,
1826                                     "M_PCPROTO already at head");
1827                                 freemsg(bp);
1828                                 mutex_exit(&stp->sd_lock);
1829                                 goto done;
1830                         }
1831                         stp->sd_flag |= STRPRI;
1832                         hipri_sig = 1;
1833                         /* FALLTHRU */
1834                 case M_DATA:
1835                 case M_PROTO:
1836                 case M_PASSFP:
1837                         band = bp->b_band;
1838                         /*
1839                          * Marking doesn't work well when messages
1840                          * are marked in more than one band.  We only
1841                          * remember the last message received, even if
1842                          * it is placed on the queue ahead of other
1843                          * marked messages.
1844                          */
1845                         if (bp->b_flag & MSGMARK)
1846                                 stp->sd_mark = bp;
1847                         (void) putq(q, bp);
1848 
1849                         /*
1850                          * If message is a PCPROTO message, always use
1851                          * firstmsgsigs to determine if a signal should be
1852                          * sent as strrput is the only place to send
1853                          * signals for PCPROTO. Other messages are based on
1854                          * the STRGETINPROG flag. The flag determines if
1855                          * strrput or (k)strgetmsg will be responsible for
1856                          * sending the signals, in the firstmsgsigs case.
1857                          */
1858                         if ((hipri_sig == 1) ||
1859                             (((stp->sd_flag & STRGETINPROG) == 0) &&
1860                             (q->q_first == bp)))
1861                                 signals = (firstmsgsigs | allmsgsigs);
1862                         else
1863                                 signals = allmsgsigs;
1864                         break;
1865 
1866                 default:
1867                         mutex_exit(&stp->sd_lock);
1868                         (void) strrput_nondata(q, bp);
1869                         mutex_enter(&stp->sd_lock);
1870                         break;
1871                 }
1872         }
1873         ASSERT(MUTEX_HELD(&stp->sd_lock));
1874         /*
1875          * Wake sleeping read/getmsg and cancel deferred wakeup
1876          */
1877         if (wakeups & RSLEEP)
1878                 stp->sd_wakeq &= ~RSLEEP;
1879 
1880         wakeups &= stp->sd_flag;
1881         if (wakeups & RSLEEP) {
1882                 stp->sd_flag &= ~RSLEEP;
1883                 cv_broadcast(&q->q_wait);
1884         }
1885         if (wakeups & WSLEEP) {
1886                 stp->sd_flag &= ~WSLEEP;
1887                 cv_broadcast(&_WR(q)->q_wait);
1888         }
1889 
1890         if (pollwakeups != 0) {
1891                 if (pollwakeups == (POLLIN | POLLRDNORM)) {
1892                         /*
1893                          * Can't use rput_opt since it was not
1894                          * read when sd_lock was held and SR_POLLIN is changed
1895                          * by strpoll() under sd_lock.
1896                          */
1897                         if (!(stp->sd_rput_opt & SR_POLLIN))
1898                                 goto no_pollwake;
1899                         stp->sd_rput_opt &= ~SR_POLLIN;
1900                 }
1901                 mutex_exit(&stp->sd_lock);
1902                 pollwakeup(&stp->sd_pollist, pollwakeups);
1903                 mutex_enter(&stp->sd_lock);
1904         }
1905 no_pollwake:
1906 
1907         /*
1908          * strsendsig can handle multiple signals with a
1909          * single call.
1910          */
1911         if (stp->sd_sigflags & signals)
1912                 strsendsig(stp->sd_siglist, signals, band, 0);
1913         mutex_exit(&stp->sd_lock);
1914 
1915 
1916 done:
1917         if (nextbp == NULL)
1918                 return (0);
1919 
1920         /*
1921          * Any signals were handled the first time.
1922          * Wakeups and pollwakeups are redone to avoid any race
1923          * conditions - all the messages are not queued until the
1924          * last message has been processed by strrput.
1925          */
1926         bp = nextbp;
1927         signals = firstmsgsigs = allmsgsigs = 0;
1928         mutex_enter(&stp->sd_lock);
1929         goto one_more;
1930 }
1931 
1932 static void
1933 log_dupioc(queue_t *rq, mblk_t *bp)
1934 {
1935         queue_t *wq, *qp;
1936         char *modnames, *mnp, *dname;
1937         size_t maxmodstr;
1938         boolean_t islast;
1939 
1940         /*
1941          * Allocate a buffer large enough to hold the names of nstrpush modules
1942          * and one driver, with spaces between and NUL terminator.  If we can't
1943          * get memory, then we'll just log the driver name.
1944          */
1945         maxmodstr = nstrpush * (FMNAMESZ + 1);
1946         mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP);
1947 
1948         /* march down write side to print log message down to the driver */
1949         wq = WR(rq);
1950 
1951         /* make sure q_next doesn't shift around while we're grabbing data */
1952         claimstr(wq);
1953         qp = wq->q_next;
1954         do {
1955                 dname = Q2NAME(qp);
1956                 islast = !SAMESTR(qp) || qp->q_next == NULL;
1957                 if (modnames == NULL) {
1958                         /*
1959                          * If we don't have memory, then get the driver name in
1960                          * the log where we can see it.  Note that memory
1961                          * pressure is a possible cause of these sorts of bugs.
1962                          */
1963                         if (islast) {
1964                                 modnames = dname;
1965                                 maxmodstr = 0;
1966                         }
1967                 } else {
1968                         mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname);
1969                         if (!islast)
1970                                 *mnp++ = ' ';
1971                 }
1972                 qp = qp->q_next;
1973         } while (!islast);
1974         releasestr(wq);
1975         /* Cannot happen unless stream head is corrupt. */
1976         ASSERT(modnames != NULL);
1977         (void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1,
1978             SL_CONSOLE|SL_TRACE|SL_ERROR,
1979             "Warning: stream %p received duplicate %X M_IOC%s; module list: %s",
1980             rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd,
1981             (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames);
1982         if (maxmodstr != 0)
1983                 kmem_free(modnames, maxmodstr);
1984 }
1985 
1986 int
1987 strrput_nondata(queue_t *q, mblk_t *bp)
1988 {
1989         struct stdata *stp;
1990         struct iocblk *iocbp;
1991         struct stroptions *sop;
1992         struct copyreq *reqp;
1993         struct copyresp *resp;
1994         unsigned char bpri;
1995         unsigned char  flushed_already = 0;
1996 
1997         stp = (struct stdata *)q->q_ptr;
1998 
1999         ASSERT(!(stp->sd_flag & STPLEX));
2000         ASSERT(qclaimed(q));
2001 
2002         switch (bp->b_datap->db_type) {
2003         case M_ERROR:
2004                 /*
2005                  * An error has occurred downstream, the errno is in the first
2006                  * bytes of the message.
2007                  */
2008                 if ((bp->b_wptr - bp->b_rptr) == 2) {     /* New flavor */
2009                         unsigned char rw = 0;
2010 
2011                         mutex_enter(&stp->sd_lock);
2012                         if (*bp->b_rptr != NOERROR) {        /* read error */
2013                                 if (*bp->b_rptr != 0) {
2014                                         if (stp->sd_flag & STRDERR)
2015                                                 flushed_already |= FLUSHR;
2016                                         stp->sd_flag |= STRDERR;
2017                                         rw |= FLUSHR;
2018                                 } else {
2019                                         stp->sd_flag &= ~STRDERR;
2020                                 }
2021                                 stp->sd_rerror = *bp->b_rptr;
2022                         }
2023                         bp->b_rptr++;
2024                         if (*bp->b_rptr != NOERROR) {        /* write error */
2025                                 if (*bp->b_rptr != 0) {
2026                                         if (stp->sd_flag & STWRERR)
2027                                                 flushed_already |= FLUSHW;
2028                                         stp->sd_flag |= STWRERR;
2029                                         rw |= FLUSHW;
2030                                 } else {
2031                                         stp->sd_flag &= ~STWRERR;
2032                                 }
2033                                 stp->sd_werror = *bp->b_rptr;
2034                         }
2035                         if (rw) {
2036                                 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE,
2037                                     "strrput cv_broadcast:q %p, bp %p",
2038                                     q, bp);
2039                                 cv_broadcast(&q->q_wait); /* readers */
2040                                 cv_broadcast(&_WR(q)->q_wait); /* writers */
2041                                 cv_broadcast(&stp->sd_monitor); /* ioctllers */
2042 
2043                                 mutex_exit(&stp->sd_lock);
2044                                 pollwakeup(&stp->sd_pollist, POLLERR);
2045                                 mutex_enter(&stp->sd_lock);
2046 
2047                                 if (stp->sd_sigflags & S_ERROR)
2048                                         strsendsig(stp->sd_siglist, S_ERROR, 0,
2049                                             ((rw & FLUSHR) ? stp->sd_rerror :
2050                                             stp->sd_werror));
2051                                 mutex_exit(&stp->sd_lock);
2052                                 /*
2053                                  * Send the M_FLUSH only
2054                                  * for the first M_ERROR
2055                                  * message on the stream
2056                                  */
2057                                 if (flushed_already == rw) {
2058                                         freemsg(bp);
2059                                         return (0);
2060                                 }
2061 
2062                                 bp->b_datap->db_type = M_FLUSH;
2063                                 *bp->b_rptr = rw;
2064                                 bp->b_wptr = bp->b_rptr + 1;
2065                                 /*
2066                                  * Protect against the driver
2067                                  * passing up messages after
2068                                  * it has done a qprocsoff
2069                                  */
2070                                 if (_OTHERQ(q)->q_next == NULL)
2071                                         freemsg(bp);
2072                                 else
2073                                         qreply(q, bp);
2074                                 return (0);
2075                         } else
2076                                 mutex_exit(&stp->sd_lock);
2077                 } else if (*bp->b_rptr != 0) {               /* Old flavor */
2078                                 if (stp->sd_flag & (STRDERR|STWRERR))
2079                                         flushed_already = FLUSHRW;
2080                                 mutex_enter(&stp->sd_lock);
2081                                 stp->sd_flag |= (STRDERR|STWRERR);
2082                                 stp->sd_rerror = *bp->b_rptr;
2083                                 stp->sd_werror = *bp->b_rptr;
2084                                 TRACE_2(TR_FAC_STREAMS_FR,
2085                                     TR_STRRPUT_WAKE2,
2086                                     "strrput wakeup #2:q %p, bp %p", q, bp);
2087                                 cv_broadcast(&q->q_wait); /* the readers */
2088                                 cv_broadcast(&_WR(q)->q_wait); /* the writers */
2089                                 cv_broadcast(&stp->sd_monitor); /* ioctllers */
2090 
2091                                 mutex_exit(&stp->sd_lock);
2092                                 pollwakeup(&stp->sd_pollist, POLLERR);
2093                                 mutex_enter(&stp->sd_lock);
2094 
2095                                 if (stp->sd_sigflags & S_ERROR)
2096                                         strsendsig(stp->sd_siglist, S_ERROR, 0,
2097                                             (stp->sd_werror ? stp->sd_werror :
2098                                             stp->sd_rerror));
2099                                 mutex_exit(&stp->sd_lock);
2100 
2101                                 /*
2102                                  * Send the M_FLUSH only
2103                                  * for the first M_ERROR
2104                                  * message on the stream
2105                                  */
2106                                 if (flushed_already != FLUSHRW) {
2107                                         bp->b_datap->db_type = M_FLUSH;
2108                                         *bp->b_rptr = FLUSHRW;
2109                                         /*
2110                                          * Protect against the driver passing up
2111                                          * messages after it has done a
2112                                          * qprocsoff.
2113                                          */
2114                                         if (_OTHERQ(q)->q_next == NULL)
2115                                                 freemsg(bp);
2116                                         else
2117                                                 qreply(q, bp);
2118                                         return (0);
2119                                 }
2120                 }
2121                 freemsg(bp);
2122                 return (0);
2123 
2124         case M_HANGUP:
2125 
2126                 freemsg(bp);
2127                 mutex_enter(&stp->sd_lock);
2128                 stp->sd_werror = ENXIO;
2129                 stp->sd_flag |= STRHUP;
2130                 stp->sd_flag &= ~(WSLEEP|RSLEEP);
2131 
2132                 /*
2133                  * send signal if controlling tty
2134                  */
2135 
2136                 if (stp->sd_sidp) {
2137                         prsignal(stp->sd_sidp, SIGHUP);
2138                         if (stp->sd_sidp != stp->sd_pgidp)
2139                                 pgsignal(stp->sd_pgidp, SIGTSTP);
2140                 }
2141 
2142                 /*
2143                  * wake up read, write, and exception pollers and
2144                  * reset wakeup mechanism.
2145                  */
2146                 cv_broadcast(&q->q_wait);        /* the readers */
2147                 cv_broadcast(&_WR(q)->q_wait);   /* the writers */
2148                 cv_broadcast(&stp->sd_monitor);  /* the ioctllers */
2149                 strhup(stp);
2150                 mutex_exit(&stp->sd_lock);
2151                 return (0);
2152 
2153         case M_UNHANGUP:
2154                 freemsg(bp);
2155                 mutex_enter(&stp->sd_lock);
2156                 stp->sd_werror = 0;
2157                 stp->sd_flag &= ~STRHUP;
2158                 mutex_exit(&stp->sd_lock);
2159                 return (0);
2160 
2161         case M_SIG:
2162                 /*
2163                  * Someone downstream wants to post a signal.  The
2164                  * signal to post is contained in the first byte of the
2165                  * message.  If the message would go on the front of
2166                  * the queue, send a signal to the process group
2167                  * (if not SIGPOLL) or to the siglist processes
2168                  * (SIGPOLL).  If something is already on the queue,
2169                  * OR if we are delivering a delayed suspend (*sigh*
2170                  * another "tty" hack) and there's no one sleeping already,
2171                  * just enqueue the message.
2172                  */
2173                 mutex_enter(&stp->sd_lock);
2174                 if (q->q_first || (*bp->b_rptr == SIGTSTP &&
2175                     !(stp->sd_flag & RSLEEP))) {
2176                         (void) putq(q, bp);
2177                         mutex_exit(&stp->sd_lock);
2178                         return (0);
2179                 }
2180                 mutex_exit(&stp->sd_lock);
2181                 /* FALLTHRU */
2182 
2183         case M_PCSIG:
2184                 /*
2185                  * Don't enqueue, just post the signal.
2186                  */
2187                 strsignal(stp, *bp->b_rptr, 0L);
2188                 freemsg(bp);
2189                 return (0);
2190 
2191         case M_CMD:
2192                 if (MBLKL(bp) != sizeof (cmdblk_t)) {
2193                         freemsg(bp);
2194                         return (0);
2195                 }
2196 
2197                 mutex_enter(&stp->sd_lock);
2198                 if (stp->sd_flag & STRCMDWAIT) {
2199                         ASSERT(stp->sd_cmdblk == NULL);
2200                         stp->sd_cmdblk = bp;
2201                         cv_broadcast(&stp->sd_monitor);
2202                         mutex_exit(&stp->sd_lock);
2203                 } else {
2204                         mutex_exit(&stp->sd_lock);
2205                         freemsg(bp);
2206                 }
2207                 return (0);
2208 
2209         case M_FLUSH:
2210                 /*
2211                  * Flush queues.  The indication of which queues to flush
2212                  * is in the first byte of the message.  If the read queue
2213                  * is specified, then flush it.  If FLUSHBAND is set, just
2214                  * flush the band specified by the second byte of the message.
2215                  *
2216                  * If a module has issued a M_SETOPT to not flush hi
2217                  * priority messages off of the stream head, then pass this
2218                  * flag into the flushq code to preserve such messages.
2219                  */
2220 
2221                 if (*bp->b_rptr & FLUSHR) {
2222                         mutex_enter(&stp->sd_lock);
2223                         if (*bp->b_rptr & FLUSHBAND) {
2224                                 ASSERT((bp->b_wptr - bp->b_rptr) >= 2);
2225                                 flushband(q, *(bp->b_rptr + 1), FLUSHALL);
2226                         } else
2227                                 flushq_common(q, FLUSHALL,
2228                                     stp->sd_read_opt & RFLUSHPCPROT);
2229                         if ((q->q_first == NULL) ||
2230                             (q->q_first->b_datap->db_type < QPCTL))
2231                                 stp->sd_flag &= ~STRPRI;
2232                         else {
2233                                 ASSERT(stp->sd_flag & STRPRI);
2234                         }
2235                         mutex_exit(&stp->sd_lock);
2236                 }
2237                 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
2238                         *bp->b_rptr &= ~FLUSHR;
2239                         bp->b_flag |= MSGNOLOOP;
2240                         /*
2241                          * Protect against the driver passing up
2242                          * messages after it has done a qprocsoff.
2243                          */
2244                         if (_OTHERQ(q)->q_next == NULL)
2245                                 freemsg(bp);
2246                         else
2247                                 qreply(q, bp);
2248                         return (0);
2249                 }
2250                 freemsg(bp);
2251                 return (0);
2252 
2253         case M_IOCACK:
2254         case M_IOCNAK:
2255                 iocbp = (struct iocblk *)bp->b_rptr;
2256                 /*
2257                  * If not waiting for ACK or NAK then just free msg.
2258                  * If incorrect id sequence number then just free msg.
2259                  * If already have ACK or NAK for user then this is a
2260                  *    duplicate, display a warning and free the msg.
2261                  */
2262                 mutex_enter(&stp->sd_lock);
2263                 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2264                     (stp->sd_iocid != iocbp->ioc_id)) {
2265                         /*
2266                          * If the ACK/NAK is a dup, display a message
2267                          * Dup is when sd_iocid == ioc_id, and
2268                          * sd_iocblk == <valid ptr> or -1 (the former
2269                          * is when an ioctl has been put on the stream
2270                          * head, but has not yet been consumed, the
2271                          * later is when it has been consumed).
2272                          */
2273                         if ((stp->sd_iocid == iocbp->ioc_id) &&
2274                             (stp->sd_iocblk != NULL)) {
2275                                 log_dupioc(q, bp);
2276                         }
2277                         freemsg(bp);
2278                         mutex_exit(&stp->sd_lock);
2279                         return (0);
2280                 }
2281 
2282                 /*
2283                  * Assign ACK or NAK to user and wake up.
2284                  */
2285                 stp->sd_iocblk = bp;
2286                 cv_broadcast(&stp->sd_monitor);
2287                 mutex_exit(&stp->sd_lock);
2288                 return (0);
2289 
2290         case M_COPYIN:
2291         case M_COPYOUT:
2292                 reqp = (struct copyreq *)bp->b_rptr;
2293 
2294                 /*
2295                  * If not waiting for ACK or NAK then just fail request.
2296                  * If already have ACK, NAK, or copy request, then just
2297                  * fail request.
2298                  * If incorrect id sequence number then just fail request.
2299                  */
2300                 mutex_enter(&stp->sd_lock);
2301                 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2302                     (stp->sd_iocid != reqp->cq_id)) {
2303                         if (bp->b_cont) {
2304                                 freemsg(bp->b_cont);
2305                                 bp->b_cont = NULL;
2306                         }
2307                         bp->b_datap->db_type = M_IOCDATA;
2308                         bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
2309                         resp = (struct copyresp *)bp->b_rptr;
2310                         resp->cp_rval = (caddr_t)1;  /* failure */
2311                         mutex_exit(&stp->sd_lock);
2312                         putnext(stp->sd_wrq, bp);
2313                         return (0);
2314                 }
2315 
2316                 /*
2317                  * Assign copy request to user and wake up.
2318                  */
2319                 stp->sd_iocblk = bp;
2320                 cv_broadcast(&stp->sd_monitor);
2321                 mutex_exit(&stp->sd_lock);
2322                 return (0);
2323 
2324         case M_SETOPTS:
2325                 /*
2326                  * Set stream head options (read option, write offset,
2327                  * min/max packet size, and/or high/low water marks for
2328                  * the read side only).
2329                  */
2330 
2331                 bpri = 0;
2332                 sop = (struct stroptions *)bp->b_rptr;
2333                 mutex_enter(&stp->sd_lock);
2334                 if (sop->so_flags & SO_READOPT) {
2335                         switch (sop->so_readopt & RMODEMASK) {
2336                         case RNORM:
2337                                 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
2338                                 break;
2339 
2340                         case RMSGD:
2341                                 stp->sd_read_opt =
2342                                     ((stp->sd_read_opt & ~RD_MSGNODIS) |
2343                                     RD_MSGDIS);
2344                                 break;
2345 
2346                         case RMSGN:
2347                                 stp->sd_read_opt =
2348                                     ((stp->sd_read_opt & ~RD_MSGDIS) |
2349                                     RD_MSGNODIS);
2350                                 break;
2351                         }
2352                         switch (sop->so_readopt & RPROTMASK) {
2353                         case RPROTNORM:
2354                                 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
2355                                 break;
2356 
2357                         case RPROTDAT:
2358                                 stp->sd_read_opt =
2359                                     ((stp->sd_read_opt & ~RD_PROTDIS) |
2360                                     RD_PROTDAT);
2361                                 break;
2362 
2363                         case RPROTDIS:
2364                                 stp->sd_read_opt =
2365                                     ((stp->sd_read_opt & ~RD_PROTDAT) |
2366                                     RD_PROTDIS);
2367                                 break;
2368                         }
2369                         switch (sop->so_readopt & RFLUSHMASK) {
2370                         case RFLUSHPCPROT:
2371                                 /*
2372                                  * This sets the stream head to NOT flush
2373                                  * M_PCPROTO messages.
2374                                  */
2375                                 stp->sd_read_opt |= RFLUSHPCPROT;
2376                                 break;
2377                         }
2378                 }
2379                 if (sop->so_flags & SO_ERROPT) {
2380                         switch (sop->so_erropt & RERRMASK) {
2381                         case RERRNORM:
2382                                 stp->sd_flag &= ~STRDERRNONPERSIST;
2383                                 break;
2384                         case RERRNONPERSIST:
2385                                 stp->sd_flag |= STRDERRNONPERSIST;
2386                                 break;
2387                         }
2388                         switch (sop->so_erropt & WERRMASK) {
2389                         case WERRNORM:
2390                                 stp->sd_flag &= ~STWRERRNONPERSIST;
2391                                 break;
2392                         case WERRNONPERSIST:
2393                                 stp->sd_flag |= STWRERRNONPERSIST;
2394                                 break;
2395                         }
2396                 }
2397                 if (sop->so_flags & SO_COPYOPT) {
2398                         if (sop->so_copyopt & ZCVMSAFE) {
2399                                 stp->sd_copyflag |= STZCVMSAFE;
2400                                 stp->sd_copyflag &= ~STZCVMUNSAFE;
2401                         } else if (sop->so_copyopt & ZCVMUNSAFE) {
2402                                 stp->sd_copyflag |= STZCVMUNSAFE;
2403                                 stp->sd_copyflag &= ~STZCVMSAFE;
2404                         }
2405 
2406                         if (sop->so_copyopt & COPYCACHED) {
2407                                 stp->sd_copyflag |= STRCOPYCACHED;
2408                         }
2409                 }
2410                 if (sop->so_flags & SO_WROFF)
2411                         stp->sd_wroff = sop->so_wroff;
2412                 if (sop->so_flags & SO_TAIL)
2413                         stp->sd_tail = sop->so_tail;
2414                 if (sop->so_flags & SO_MINPSZ)
2415                         q->q_minpsz = sop->so_minpsz;
2416                 if (sop->so_flags & SO_MAXPSZ)
2417                         q->q_maxpsz = sop->so_maxpsz;
2418                 if (sop->so_flags & SO_MAXBLK)
2419                         stp->sd_maxblk = sop->so_maxblk;
2420                 if (sop->so_flags & SO_HIWAT) {
2421                         if (sop->so_flags & SO_BAND) {
2422                                 if (strqset(q, QHIWAT,
2423                                     sop->so_band, sop->so_hiwat)) {
2424                                         cmn_err(CE_WARN, "strrput: could not "
2425                                             "allocate qband\n");
2426                                 } else {
2427                                         bpri = sop->so_band;
2428                                 }
2429                         } else {
2430                                 q->q_hiwat = sop->so_hiwat;
2431                         }
2432                 }
2433                 if (sop->so_flags & SO_LOWAT) {
2434                         if (sop->so_flags & SO_BAND) {
2435                                 if (strqset(q, QLOWAT,
2436                                     sop->so_band, sop->so_lowat)) {
2437                                         cmn_err(CE_WARN, "strrput: could not "
2438                                             "allocate qband\n");
2439                                 } else {
2440                                         bpri = sop->so_band;
2441                                 }
2442                         } else {
2443                                 q->q_lowat = sop->so_lowat;
2444                         }
2445                 }
2446                 if (sop->so_flags & SO_MREADON)
2447                         stp->sd_flag |= SNDMREAD;
2448                 if (sop->so_flags & SO_MREADOFF)
2449                         stp->sd_flag &= ~SNDMREAD;
2450                 if (sop->so_flags & SO_NDELON)
2451                         stp->sd_flag |= OLDNDELAY;
2452                 if (sop->so_flags & SO_NDELOFF)
2453                         stp->sd_flag &= ~OLDNDELAY;
2454                 if (sop->so_flags & SO_ISTTY)
2455                         stp->sd_flag |= STRISTTY;
2456                 if (sop->so_flags & SO_ISNTTY)
2457                         stp->sd_flag &= ~STRISTTY;
2458                 if (sop->so_flags & SO_TOSTOP)
2459                         stp->sd_flag |= STRTOSTOP;
2460                 if (sop->so_flags & SO_TONSTOP)
2461                         stp->sd_flag &= ~STRTOSTOP;
2462                 if (sop->so_flags & SO_DELIM)
2463                         stp->sd_flag |= STRDELIM;
2464                 if (sop->so_flags & SO_NODELIM)
2465                         stp->sd_flag &= ~STRDELIM;
2466 
2467                 mutex_exit(&stp->sd_lock);
2468                 freemsg(bp);
2469 
2470                 /* Check backenable in case the water marks changed */
2471                 qbackenable(q, bpri);
2472                 return (0);
2473 
2474         /*
2475          * The following set of cases deal with situations where two stream
2476          * heads are connected to each other (twisted streams).  These messages
2477          * have no meaning at the stream head.
2478          */
2479         case M_BREAK:
2480         case M_CTL:
2481         case M_DELAY:
2482         case M_START:
2483         case M_STOP:
2484         case M_IOCDATA:
2485         case M_STARTI:
2486         case M_STOPI:
2487                 freemsg(bp);
2488                 return (0);
2489 
2490         case M_IOCTL:
2491                 /*
2492                  * Always NAK this condition
2493                  * (makes no sense)
2494                  * If there is one or more threads in the read side
2495                  * rwnext we have to defer the nacking until that thread
2496                  * returns (in strget).
2497                  */
2498                 mutex_enter(&stp->sd_lock);
2499                 if (stp->sd_struiodnak != 0) {
2500                         /*
2501                          * Defer NAK to the streamhead. Queue at the end
2502                          * the list.
2503                          */
2504                         mblk_t *mp = stp->sd_struionak;
2505 
2506                         while (mp && mp->b_next)
2507                                 mp = mp->b_next;
2508                         if (mp)
2509                                 mp->b_next = bp;
2510                         else
2511                                 stp->sd_struionak = bp;
2512                         bp->b_next = NULL;
2513                         mutex_exit(&stp->sd_lock);
2514                         return (0);
2515                 }
2516                 mutex_exit(&stp->sd_lock);
2517 
2518                 bp->b_datap->db_type = M_IOCNAK;
2519                 /*
2520                  * Protect against the driver passing up
2521                  * messages after it has done a qprocsoff.
2522                  */
2523                 if (_OTHERQ(q)->q_next == NULL)
2524                         freemsg(bp);
2525                 else
2526                         qreply(q, bp);
2527                 return (0);
2528 
2529         default:
2530 #ifdef DEBUG
2531                 cmn_err(CE_WARN,
2532                     "bad message type %x received at stream head\n",
2533                     bp->b_datap->db_type);
2534 #endif
2535                 freemsg(bp);
2536                 return (0);
2537         }
2538 
2539         /* NOTREACHED */
2540 }
2541 
2542 /*
2543  * Check if the stream pointed to by `stp' can be written to, and return an
2544  * error code if not.  If `eiohup' is set, then return EIO if STRHUP is set.
2545  * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream,
2546  * then always return EPIPE and send a SIGPIPE to the invoking thread.
2547  */
2548 static int
2549 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok)
2550 {
2551         int error;
2552 
2553         ASSERT(MUTEX_HELD(&stp->sd_lock));
2554 
2555         /*
2556          * For modem support, POSIX states that on writes, EIO should
2557          * be returned if the stream has been hung up.
2558          */
2559         if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP)
2560                 error = EIO;
2561         else
2562                 error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0);
2563 
2564         if (error != 0) {
2565                 if (!(stp->sd_flag & STPLEX) &&
2566                     (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) {
2567                         tsignal(curthread, SIGPIPE);
2568                         error = EPIPE;
2569                 }
2570         }
2571 
2572         return (error);
2573 }
2574 
2575 /*
2576  * Copyin and send data down a stream.
2577  * The caller will allocate and copyin any control part that precedes the
2578  * message and pass that in as mctl.
2579  *
2580  * Caller should *not* hold sd_lock.
2581  * When EWOULDBLOCK is returned the caller has to redo the canputnext
2582  * under sd_lock in order to avoid missing a backenabling wakeup.
2583  *
2584  * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA.
2585  *
2586  * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages.
2587  * For sync streams we can only ignore flow control by reverting to using
2588  * putnext.
2589  *
2590  * If sd_maxblk is less than *iosize this routine might return without
2591  * transferring all of *iosize. In all cases, on return *iosize will contain
2592  * the amount of data that was transferred.
2593  */
2594 static int
2595 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
2596     int b_flag, int pri, int flags)
2597 {
2598         struiod_t uiod;
2599         mblk_t *mp;
2600         queue_t *wqp = stp->sd_wrq;
2601         int error = 0;
2602         ssize_t count = *iosize;
2603 
2604         ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
2605 
2606         if (uiop != NULL && count >= 0)
2607                 flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0;
2608 
2609         if (!(flags & STRUIO_POSTPONE)) {
2610                 /*
2611                  * Use regular canputnext, strmakedata, putnext sequence.
2612                  */
2613                 if (pri == 0) {
2614                         if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2615                                 freemsg(mctl);
2616                                 return (EWOULDBLOCK);
2617                         }
2618                 } else {
2619                         if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) {
2620                                 freemsg(mctl);
2621                                 return (EWOULDBLOCK);
2622                         }
2623                 }
2624 
2625                 if ((error = strmakedata(iosize, uiop, stp, flags,
2626                     &mp)) != 0) {
2627                         freemsg(mctl);
2628                         /*
2629                          * need to change return code to ENOMEM
2630                          * so that this is not confused with
2631                          * flow control, EAGAIN.
2632                          */
2633 
2634                         if (error == EAGAIN)
2635                                 return (ENOMEM);
2636                         else
2637                                 return (error);
2638                 }
2639                 if (mctl != NULL) {
2640                         if (mctl->b_cont == NULL)
2641                                 mctl->b_cont = mp;
2642                         else if (mp != NULL)
2643                                 linkb(mctl, mp);
2644                         mp = mctl;
2645                 } else if (mp == NULL)
2646                         return (0);
2647 
2648                 mp->b_flag |= b_flag;
2649                 mp->b_band = (uchar_t)pri;
2650 
2651                 if (flags & MSG_IGNFLOW) {
2652                         /*
2653                          * XXX Hack: Don't get stuck running service
2654                          * procedures. This is needed for sockfs when
2655                          * sending the unbind message out of the rput
2656                          * procedure - we don't want a put procedure
2657                          * to run service procedures.
2658                          */
2659                         putnext(wqp, mp);
2660                 } else {
2661                         stream_willservice(stp);
2662                         putnext(wqp, mp);
2663                         stream_runservice(stp);
2664                 }
2665                 return (0);
2666         }
2667         /*
2668          * Stream supports rwnext() for the write side.
2669          */
2670         if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) {
2671                 freemsg(mctl);
2672                 /*
2673                  * map EAGAIN to ENOMEM since EAGAIN means "flow controlled".
2674                  */
2675                 return (error == EAGAIN ? ENOMEM : error);
2676         }
2677         if (mctl != NULL) {
2678                 if (mctl->b_cont == NULL)
2679                         mctl->b_cont = mp;
2680                 else if (mp != NULL)
2681                         linkb(mctl, mp);
2682                 mp = mctl;
2683         } else if (mp == NULL) {
2684                 return (0);
2685         }
2686 
2687         mp->b_flag |= b_flag;
2688         mp->b_band = (uchar_t)pri;
2689 
2690         (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
2691             sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
2692         uiod.d_uio.uio_offset = 0;
2693         uiod.d_mp = mp;
2694         error = rwnext(wqp, &uiod);
2695         if (! uiod.d_mp) {
2696                 uioskip(uiop, *iosize);
2697                 return (error);
2698         }
2699         ASSERT(mp == uiod.d_mp);
2700         if (error == EINVAL) {
2701                 /*
2702                  * The stream plumbing must have changed while
2703                  * we were away, so just turn off rwnext()s.
2704                  */
2705                 error = 0;
2706         } else if (error == EBUSY || error == EWOULDBLOCK) {
2707                 /*
2708                  * Couldn't enter a perimeter or took a page fault,
2709                  * so fall-back to putnext().
2710                  */
2711                 error = 0;
2712         } else {
2713                 freemsg(mp);
2714                 return (error);
2715         }
2716         /* Have to check canput before consuming data from the uio */
2717         if (pri == 0) {
2718                 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2719                         freemsg(mp);
2720                         return (EWOULDBLOCK);
2721                 }
2722         } else {
2723                 if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) {
2724                         freemsg(mp);
2725                         return (EWOULDBLOCK);
2726                 }
2727         }
2728         ASSERT(mp == uiod.d_mp);
2729         /* Copyin data from the uio */
2730         if ((error = struioget(wqp, mp, &uiod, 0)) != 0) {
2731                 freemsg(mp);
2732                 return (error);
2733         }
2734         uioskip(uiop, *iosize);
2735         if (flags & MSG_IGNFLOW) {
2736                 /*
2737                  * XXX Hack: Don't get stuck running service procedures.
2738                  * This is needed for sockfs when sending the unbind message
2739                  * out of the rput procedure - we don't want a put procedure
2740                  * to run service procedures.
2741                  */
2742                 putnext(wqp, mp);
2743         } else {
2744                 stream_willservice(stp);
2745                 putnext(wqp, mp);
2746                 stream_runservice(stp);
2747         }
2748         return (0);
2749 }
2750 
2751 /*
2752  * Write attempts to break the write request into messages conforming
2753  * with the minimum and maximum packet sizes set downstream.
2754  *
2755  * Write will not block if downstream queue is full and
2756  * O_NDELAY is set, otherwise it will block waiting for the queue to get room.
2757  *
2758  * A write of zero bytes gets packaged into a zero length message and sent
2759  * downstream like any other message.
2760  *
2761  * If buffers of the requested sizes are not available, the write will
2762  * sleep until the buffers become available.
2763  *
2764  * Write (if specified) will supply a write offset in a message if it
2765  * makes sense. This can be specified by downstream modules as part of
2766  * a M_SETOPTS message.  Write will not supply the write offset if it
2767  * cannot supply any data in a buffer.  In other words, write will never
2768  * send down an empty packet due to a write offset.
2769  */
2770 /* ARGSUSED2 */
2771 int
2772 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp)
2773 {
2774         return (strwrite_common(vp, uiop, crp, 0));
2775 }
2776 
2777 /* ARGSUSED2 */
2778 int
2779 strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag)
2780 {
2781         struct stdata *stp;
2782         struct queue *wqp;
2783         ssize_t rmin, rmax;
2784         ssize_t iosize;
2785         int waitflag;
2786         int tempmode;
2787         int error = 0;
2788         int b_flag;
2789 
2790         ASSERT(vp->v_stream);
2791         stp = vp->v_stream;
2792 
2793         mutex_enter(&stp->sd_lock);
2794 
2795         if ((error = i_straccess(stp, JCWRITE)) != 0) {
2796                 mutex_exit(&stp->sd_lock);
2797                 return (error);
2798         }
2799 
2800         if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
2801                 error = strwriteable(stp, B_TRUE, B_TRUE);
2802                 if (error != 0) {
2803                         mutex_exit(&stp->sd_lock);
2804                         return (error);
2805                 }
2806         }
2807 
2808         mutex_exit(&stp->sd_lock);
2809 
2810         wqp = stp->sd_wrq;
2811 
2812         /* get these values from them cached in the stream head */
2813         rmin = stp->sd_qn_minpsz;
2814         rmax = stp->sd_qn_maxpsz;
2815 
2816         /*
2817          * Check the min/max packet size constraints.  If min packet size
2818          * is non-zero, the write cannot be split into multiple messages
2819          * and still guarantee the size constraints.
2820          */
2821         TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp);
2822 
2823         ASSERT((rmax >= 0) || (rmax == INFPSZ));
2824         if (rmax == 0) {
2825                 return (0);
2826         }
2827         if (rmin > 0) {
2828                 if (uiop->uio_resid < rmin) {
2829                         TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2830                             "strwrite out:q %p out %d error %d",
2831                             wqp, 0, ERANGE);
2832                         return (ERANGE);
2833                 }
2834                 if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) {
2835                         TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2836                             "strwrite out:q %p out %d error %d",
2837                             wqp, 1, ERANGE);
2838                         return (ERANGE);
2839                 }
2840         }
2841 
2842         /*
2843          * Do until count satisfied or error.
2844          */
2845         waitflag = WRITEWAIT | wflag;
2846         if (stp->sd_flag & OLDNDELAY)
2847                 tempmode = uiop->uio_fmode & ~FNDELAY;
2848         else
2849                 tempmode = uiop->uio_fmode;
2850 
2851         if (rmax == INFPSZ)
2852                 rmax = uiop->uio_resid;
2853 
2854         /*
2855          * Note that tempmode does not get used in strput/strmakedata
2856          * but only in strwaitq. The other routines use uio_fmode
2857          * unmodified.
2858          */
2859 
2860         /* LINTED: constant in conditional context */
2861         while (1) {     /* breaks when uio_resid reaches zero */
2862                 /*
2863                  * Determine the size of the next message to be
2864                  * packaged.  May have to break write into several
2865                  * messages based on max packet size.
2866                  */
2867                 iosize = MIN(uiop->uio_resid, rmax);
2868 
2869                 /*
2870                  * Put block downstream when flow control allows it.
2871                  */
2872                 if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize))
2873                         b_flag = MSGDELIM;
2874                 else
2875                         b_flag = 0;
2876 
2877                 for (;;) {
2878                         int done = 0;
2879 
2880                         error = strput(stp, NULL, uiop, &iosize, b_flag, 0, 0);
2881                         if (error == 0)
2882                                 break;
2883                         if (error != EWOULDBLOCK)
2884                                 goto out;
2885 
2886                         mutex_enter(&stp->sd_lock);
2887                         /*
2888                          * Check for a missed wakeup.
2889                          * Needed since strput did not hold sd_lock across
2890                          * the canputnext.
2891                          */
2892                         if (canputnext(wqp)) {
2893                                 /* Try again */
2894                                 mutex_exit(&stp->sd_lock);
2895                                 continue;
2896                         }
2897                         TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT,
2898                             "strwrite wait:q %p wait", wqp);
2899                         if ((error = strwaitq(stp, waitflag, (ssize_t)0,
2900                             tempmode, -1, &done)) != 0 || done) {
2901                                 mutex_exit(&stp->sd_lock);
2902                                 if ((vp->v_type == VFIFO) &&
2903                                     (uiop->uio_fmode & FNDELAY) &&
2904                                     (error == EAGAIN))
2905                                         error = 0;
2906                                 goto out;
2907                         }
2908                         TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE,
2909                             "strwrite wake:q %p awakes", wqp);
2910                         if ((error = i_straccess(stp, JCWRITE)) != 0) {
2911                                 mutex_exit(&stp->sd_lock);
2912                                 goto out;
2913                         }
2914                         mutex_exit(&stp->sd_lock);
2915                 }
2916                 waitflag |= NOINTR;
2917                 TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID,
2918                     "strwrite resid:q %p uiop %p", wqp, uiop);
2919                 if (uiop->uio_resid) {
2920                         /* Recheck for errors - needed for sockets */
2921                         if ((stp->sd_wput_opt & SW_RECHECK_ERR) &&
2922                             (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
2923                                 mutex_enter(&stp->sd_lock);
2924                                 error = strwriteable(stp, B_FALSE, B_TRUE);
2925                                 mutex_exit(&stp->sd_lock);
2926                                 if (error != 0)
2927                                         return (error);
2928                         }
2929                         continue;
2930                 }
2931                 break;
2932         }
2933 out:
2934         /*
2935          * For historical reasons, applications expect EAGAIN when a data
2936          * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN.
2937          */
2938         if (error == ENOMEM)
2939                 error = EAGAIN;
2940         TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2941             "strwrite out:q %p out %d error %d", wqp, 2, error);
2942         return (error);
2943 }
2944 
2945 /*
2946  * Stream head write service routine.
2947  * Its job is to wake up any sleeping writers when a queue
2948  * downstream needs data (part of the flow control in putq and getq).
2949  * It also must wake anyone sleeping on a poll().
2950  * For stream head right below mux module, it must also invoke put procedure
2951  * of next downstream module.
2952  */
2953 int
2954 strwsrv(queue_t *q)
2955 {
2956         struct stdata *stp;
2957         queue_t *tq;
2958         qband_t *qbp;
2959         int i;
2960         qband_t *myqbp;
2961         int isevent;
2962         unsigned char   qbf[NBAND];     /* band flushing backenable flags */
2963 
2964         TRACE_1(TR_FAC_STREAMS_FR,
2965             TR_STRWSRV, "strwsrv:q %p", q);
2966         stp = (struct stdata *)q->q_ptr;
2967         ASSERT(qclaimed(q));
2968         mutex_enter(&stp->sd_lock);
2969         ASSERT(!(stp->sd_flag & STPLEX));
2970 
2971         if (stp->sd_flag & WSLEEP) {
2972                 stp->sd_flag &= ~WSLEEP;
2973                 cv_broadcast(&q->q_wait);
2974         }
2975         mutex_exit(&stp->sd_lock);
2976 
2977         /* The other end of a stream pipe went away. */
2978         if ((tq = q->q_next) == NULL) {
2979                 return (0);
2980         }
2981 
2982         /* Find the next module forward that has a service procedure */
2983         claimstr(q);
2984         tq = q->q_nfsrv;
2985         ASSERT(tq != NULL);
2986 
2987         if ((q->q_flag & QBACK)) {
2988                 if ((tq->q_flag & QFULL)) {
2989                         mutex_enter(QLOCK(tq));
2990                         if (!(tq->q_flag & QFULL)) {
2991                                 mutex_exit(QLOCK(tq));
2992                                 goto wakeup;
2993                         }
2994                         /*
2995                          * The queue must have become full again. Set QWANTW
2996                          * again so strwsrv will be back enabled when
2997                          * the queue becomes non-full next time.
2998                          */
2999                         tq->q_flag |= QWANTW;
3000                         mutex_exit(QLOCK(tq));
3001                 } else {
3002                 wakeup:
3003                         pollwakeup(&stp->sd_pollist, POLLWRNORM);
3004                         mutex_enter(&stp->sd_lock);
3005                         if (stp->sd_sigflags & S_WRNORM)
3006                                 strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
3007                         mutex_exit(&stp->sd_lock);
3008                 }
3009         }
3010 
3011         isevent = 0;
3012         i = 1;
3013         bzero((caddr_t)qbf, NBAND);
3014         mutex_enter(QLOCK(tq));
3015         if ((myqbp = q->q_bandp) != NULL)
3016                 for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) {
3017                         ASSERT(myqbp);
3018                         if ((myqbp->qb_flag & QB_BACK)) {
3019                                 if (qbp->qb_flag & QB_FULL) {
3020                                         /*
3021                                          * The band must have become full again.
3022                                          * Set QB_WANTW again so strwsrv will
3023                                          * be back enabled when the band becomes
3024                                          * non-full next time.
3025                                          */
3026                                         qbp->qb_flag |= QB_WANTW;
3027                                 } else {
3028                                         isevent = 1;
3029                                         qbf[i] = 1;
3030                                 }
3031                         }
3032                         myqbp = myqbp->qb_next;
3033                         i++;
3034                 }
3035         mutex_exit(QLOCK(tq));
3036 
3037         if (isevent) {
3038                 for (i = tq->q_nband; i; i--) {
3039                         if (qbf[i]) {
3040                                 pollwakeup(&stp->sd_pollist, POLLWRBAND);
3041                                 mutex_enter(&stp->sd_lock);
3042                                 if (stp->sd_sigflags & S_WRBAND)
3043                                         strsendsig(stp->sd_siglist, S_WRBAND,
3044                                             (uchar_t)i, 0);
3045                                 mutex_exit(&stp->sd_lock);
3046                         }
3047                 }
3048         }
3049 
3050         releasestr(q);
3051         return (0);
3052 }
3053 
3054 /*
3055  * Special case of strcopyin/strcopyout for copying
3056  * struct strioctl that can deal with both data
3057  * models.
3058  */
3059 
3060 #ifdef  _LP64
3061 
3062 static int
3063 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
3064 {
3065         struct  strioctl32 strioc32;
3066         struct  strioctl *striocp;
3067 
3068         if (copyflag & U_TO_K) {
3069                 ASSERT((copyflag & K_TO_K) == 0);
3070 
3071                 if ((flag & FMODELS) == DATAMODEL_ILP32) {
3072                         if (copyin(from, &strioc32, sizeof (strioc32)))
3073                                 return (EFAULT);
3074 
3075                         striocp = (struct strioctl *)to;
3076                         striocp->ic_cmd      = strioc32.ic_cmd;
3077                         striocp->ic_timout = strioc32.ic_timout;
3078                         striocp->ic_len      = strioc32.ic_len;
3079                         striocp->ic_dp       = (char *)(uintptr_t)strioc32.ic_dp;
3080 
3081                 } else { /* NATIVE data model */
3082                         if (copyin(from, to, sizeof (struct strioctl))) {
3083                                 return (EFAULT);
3084                         } else {
3085                                 return (0);
3086                         }
3087                 }
3088         } else {
3089                 ASSERT(copyflag & K_TO_K);
3090                 bcopy(from, to, sizeof (struct strioctl));
3091         }
3092         return (0);
3093 }
3094 
3095 static int
3096 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
3097 {
3098         struct  strioctl32 strioc32;
3099         struct  strioctl *striocp;
3100 
3101         if (copyflag & U_TO_K) {
3102                 ASSERT((copyflag & K_TO_K) == 0);
3103 
3104                 if ((flag & FMODELS) == DATAMODEL_ILP32) {
3105                         striocp = (struct strioctl *)from;
3106                         strioc32.ic_cmd = striocp->ic_cmd;
3107                         strioc32.ic_timout = striocp->ic_timout;
3108                         strioc32.ic_len = striocp->ic_len;
3109                         strioc32.ic_dp  = (caddr32_t)(uintptr_t)striocp->ic_dp;
3110                         ASSERT((char *)(uintptr_t)strioc32.ic_dp ==
3111                             striocp->ic_dp);
3112 
3113                         if (copyout(&strioc32, to, sizeof (strioc32)))
3114                                 return (EFAULT);
3115 
3116                 } else { /* NATIVE data model */
3117                         if (copyout(from, to, sizeof (struct strioctl))) {
3118                                 return (EFAULT);
3119                         } else {
3120                                 return (0);
3121                         }
3122                 }
3123         } else {
3124                 ASSERT(copyflag & K_TO_K);
3125                 bcopy(from, to, sizeof (struct strioctl));
3126         }
3127         return (0);
3128 }
3129 
3130 #else   /* ! _LP64 */
3131 
3132 /* ARGSUSED2 */
3133 static int
3134 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
3135 {
3136         return (strcopyin(from, to, sizeof (struct strioctl), copyflag));
3137 }
3138 
3139 /* ARGSUSED2 */
3140 static int
3141 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
3142 {
3143         return (strcopyout(from, to, sizeof (struct strioctl), copyflag));
3144 }
3145 
3146 #endif  /* _LP64 */
3147 
3148 /*
3149  * Determine type of job control semantics expected by user.  The
3150  * possibilities are:
3151  *      JCREAD  - Behaves like read() on fd; send SIGTTIN
3152  *      JCWRITE - Behaves like write() on fd; send SIGTTOU if TOSTOP set
3153  *      JCSETP  - Sets a value in the stream; send SIGTTOU, ignore TOSTOP
3154  *      JCGETP  - Gets a value in the stream; no signals.
3155  * See straccess in strsubr.c for usage of these values.
3156  *
3157  * This routine also returns -1 for I_STR as a special case; the
3158  * caller must call again with the real ioctl number for
3159  * classification.
3160  */
3161 static int
3162 job_control_type(int cmd)
3163 {
3164         switch (cmd) {
3165         case I_STR:
3166                 return (-1);
3167 
3168         case I_RECVFD:
3169         case I_E_RECVFD:
3170                 return (JCREAD);
3171 
3172         case I_FDINSERT:
3173         case I_SENDFD:
3174                 return (JCWRITE);
3175 
3176         case TCSETA:
3177         case TCSETAW:
3178         case TCSETAF:
3179         case TCSBRK:
3180         case TCXONC:
3181         case TCFLSH:
3182         case TCDSET:    /* Obsolete */
3183         case TIOCSWINSZ:
3184         case TCSETS:
3185         case TCSETSW:
3186         case TCSETSF:
3187         case TIOCSETD:
3188         case TIOCHPCL:
3189         case TIOCSETP:
3190         case TIOCSETN:
3191         case TIOCEXCL:
3192         case TIOCNXCL:
3193         case TIOCFLUSH:
3194         case TIOCSETC:
3195         case TIOCLBIS:
3196         case TIOCLBIC:
3197         case TIOCLSET:
3198         case TIOCSBRK:
3199         case TIOCCBRK:
3200         case TIOCSDTR:
3201         case TIOCCDTR:
3202         case TIOCSLTC:
3203         case TIOCSTOP:
3204         case TIOCSTART:
3205         case TIOCSTI:
3206         case TIOCSPGRP:
3207         case TIOCMSET:
3208         case TIOCMBIS:
3209         case TIOCMBIC:
3210         case TIOCREMOTE:
3211         case TIOCSIGNAL:
3212         case LDSETT:
3213         case LDSMAP:    /* Obsolete */
3214         case DIOCSETP:
3215         case I_FLUSH:
3216         case I_SRDOPT:
3217         case I_SETSIG:
3218         case I_SWROPT:
3219         case I_FLUSHBAND:
3220         case I_SETCLTIME:
3221         case I_SERROPT:
3222         case I_ESETSIG:
3223         case FIONBIO:
3224         case FIOASYNC:
3225         case FIOSETOWN:
3226         case JBOOT:     /* Obsolete */
3227         case JTERM:     /* Obsolete */
3228         case JTIMOM:    /* Obsolete */
3229         case JZOMBOOT:  /* Obsolete */
3230         case JAGENT:    /* Obsolete */
3231         case JTRUN:     /* Obsolete */
3232         case JXTPROTO:  /* Obsolete */
3233                 return (JCSETP);
3234         }
3235 
3236         return (JCGETP);
3237 }
3238 
3239 /*
3240  * ioctl for streams
3241  */
3242 int
3243 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
3244     cred_t *crp, int *rvalp)
3245 {
3246         struct stdata *stp;
3247         struct strcmd *scp;
3248         struct strioctl strioc;
3249         struct uio uio;
3250         struct iovec iov;
3251         int access;
3252         mblk_t *mp;
3253         int error = 0;
3254         int done = 0;
3255         ssize_t rmin, rmax;
3256         queue_t *wrq;
3257         queue_t *rdq;
3258         boolean_t kioctl = B_FALSE;
3259         uint32_t auditing = AU_AUDITING();
3260 
3261         if (flag & FKIOCTL) {
3262                 copyflag = K_TO_K;
3263                 kioctl = B_TRUE;
3264         }
3265         ASSERT(vp->v_stream);
3266         ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
3267         stp = vp->v_stream;
3268 
3269         TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER,
3270             "strioctl:stp %p cmd %X arg %lX", stp, cmd, arg);
3271 
3272         /*
3273          * If the copy is kernel to kernel, make sure that the FNATIVE
3274          * flag is set.  After this it would be a serious error to have
3275          * no model flag.
3276          */
3277         if (copyflag == K_TO_K)
3278                 flag = (flag & ~FMODELS) | FNATIVE;
3279 
3280         ASSERT((flag & FMODELS) != 0);
3281 
3282         wrq = stp->sd_wrq;
3283         rdq = _RD(wrq);
3284 
3285         access = job_control_type(cmd);
3286 
3287         /* We should never see these here, should be handled by iwscn */
3288         if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR)
3289                 return (EINVAL);
3290 
3291         mutex_enter(&stp->sd_lock);
3292         if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) {
3293                 mutex_exit(&stp->sd_lock);
3294                 return (error);
3295         }
3296         mutex_exit(&stp->sd_lock);
3297 
3298         /*
3299          * Check for sgttyb-related ioctls first, and complain as
3300          * necessary.
3301          */
3302         switch (cmd) {
3303         case TIOCGETP:
3304         case TIOCSETP:
3305         case TIOCSETN:
3306                 if (sgttyb_handling >= 2 && !sgttyb_complaint) {
3307                         sgttyb_complaint = B_TRUE;
3308                         cmn_err(CE_NOTE,
3309                             "application used obsolete TIOC[GS]ET");
3310                 }
3311                 if (sgttyb_handling >= 3) {
3312                         tsignal(curthread, SIGSYS);
3313                         return (EIO);
3314                 }
3315                 break;
3316         }
3317 
3318         mutex_enter(&stp->sd_lock);
3319 
3320         switch (cmd) {
3321         case I_RECVFD:
3322         case I_E_RECVFD:
3323         case I_PEEK:
3324         case I_NREAD:
3325         case FIONREAD:
3326         case FIORDCHK:
3327         case I_ATMARK:
3328         case FIONBIO:
3329         case FIOASYNC:
3330                 if (stp->sd_flag & (STRDERR|STPLEX)) {
3331                         error = strgeterr(stp, STRDERR|STPLEX, 0);
3332                         if (error != 0) {
3333                                 mutex_exit(&stp->sd_lock);
3334                                 return (error);
3335                         }
3336                 }
3337                 break;
3338 
3339         default:
3340                 if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) {
3341                         error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0);
3342                         if (error != 0) {
3343                                 mutex_exit(&stp->sd_lock);
3344                                 return (error);
3345                         }
3346                 }
3347         }
3348 
3349         mutex_exit(&stp->sd_lock);
3350 
3351         switch (cmd) {
3352         default:
3353                 /*
3354                  * The stream head has hardcoded knowledge of a
3355                  * miscellaneous collection of terminal-, keyboard- and
3356                  * mouse-related ioctls, enumerated below.  This hardcoded
3357                  * knowledge allows the stream head to automatically
3358                  * convert transparent ioctl requests made by userland
3359                  * programs into I_STR ioctls which many old STREAMS
3360                  * modules and drivers require.
3361                  *
3362                  * No new ioctls should ever be added to this list.
3363                  * Instead, the STREAMS module or driver should be written
3364                  * to either handle transparent ioctls or require any
3365                  * userland programs to use I_STR ioctls (by returning
3366                  * EINVAL to any transparent ioctl requests).
3367                  *
3368                  * More importantly, removing ioctls from this list should
3369                  * be done with the utmost care, since our STREAMS modules
3370                  * and drivers *count* on the stream head performing this
3371                  * conversion, and thus may panic while processing
3372                  * transparent ioctl request for one of these ioctls (keep
3373                  * in mind that third party modules and drivers may have
3374                  * similar problems).
3375                  */
3376                 if (((cmd & IOCTYPE) == LDIOC) ||
3377                     ((cmd & IOCTYPE) == tIOC) ||
3378                     ((cmd & IOCTYPE) == TIOC) ||
3379                     ((cmd & IOCTYPE) == KIOC) ||
3380                     ((cmd & IOCTYPE) == MSIOC) ||
3381                     ((cmd & IOCTYPE) == VUIOC)) {
3382                         /*
3383                          * The ioctl is a tty ioctl - set up strioc buffer
3384                          * and call strdoioctl() to do the work.
3385                          */
3386                         if (stp->sd_flag & STRHUP)
3387                                 return (ENXIO);
3388                         strioc.ic_cmd = cmd;
3389                         strioc.ic_timout = INFTIM;
3390 
3391                         switch (cmd) {
3392 
3393                         case TCXONC:
3394                         case TCSBRK:
3395                         case TCFLSH:
3396                         case TCDSET:
3397                                 {
3398                                 int native_arg = (int)arg;
3399                                 strioc.ic_len = sizeof (int);
3400                                 strioc.ic_dp = (char *)&native_arg;
3401                                 return (strdoioctl(stp, &strioc, flag,
3402                                     K_TO_K, crp, rvalp));
3403                                 }
3404 
3405                         case TCSETA:
3406                         case TCSETAW:
3407                         case TCSETAF:
3408                                 strioc.ic_len = sizeof (struct termio);
3409                                 strioc.ic_dp = (char *)arg;
3410                                 return (strdoioctl(stp, &strioc, flag,
3411                                     copyflag, crp, rvalp));
3412 
3413                         case TCSETS:
3414                         case TCSETSW:
3415                         case TCSETSF:
3416                                 strioc.ic_len = sizeof (struct termios);
3417                                 strioc.ic_dp = (char *)arg;
3418                                 return (strdoioctl(stp, &strioc, flag,
3419                                     copyflag, crp, rvalp));
3420 
3421                         case LDSETT:
3422                                 strioc.ic_len = sizeof (struct termcb);
3423                                 strioc.ic_dp = (char *)arg;
3424                                 return (strdoioctl(stp, &strioc, flag,
3425                                     copyflag, crp, rvalp));
3426 
3427                         case TIOCSETP:
3428                                 strioc.ic_len = sizeof (struct sgttyb);
3429                                 strioc.ic_dp = (char *)arg;
3430                                 return (strdoioctl(stp, &strioc, flag,
3431                                     copyflag, crp, rvalp));
3432 
3433                         case TIOCSTI:
3434                                 if ((flag & FREAD) == 0 &&
3435                                     secpolicy_sti(crp) != 0) {
3436                                         return (EPERM);
3437                                 }
3438                                 mutex_enter(&stp->sd_lock);
3439                                 mutex_enter(&curproc->p_splock);
3440                                 if (stp->sd_sidp != curproc->p_sessp->s_sidp &&
3441                                     secpolicy_sti(crp) != 0) {
3442                                         mutex_exit(&curproc->p_splock);
3443                                         mutex_exit(&stp->sd_lock);
3444                                         return (EACCES);
3445                                 }
3446                                 mutex_exit(&curproc->p_splock);
3447                                 mutex_exit(&stp->sd_lock);
3448 
3449                                 strioc.ic_len = sizeof (char);
3450                                 strioc.ic_dp = (char *)arg;
3451                                 return (strdoioctl(stp, &strioc, flag,
3452                                     copyflag, crp, rvalp));
3453 
3454                         case TIOCSWINSZ:
3455                                 strioc.ic_len = sizeof (struct winsize);
3456                                 strioc.ic_dp = (char *)arg;
3457                                 return (strdoioctl(stp, &strioc, flag,
3458                                     copyflag, crp, rvalp));
3459 
3460                         case TIOCSSIZE:
3461                                 strioc.ic_len = sizeof (struct ttysize);
3462                                 strioc.ic_dp = (char *)arg;
3463                                 return (strdoioctl(stp, &strioc, flag,
3464                                     copyflag, crp, rvalp));
3465 
3466                         case TIOCSSOFTCAR:
3467                         case KIOCTRANS:
3468                         case KIOCTRANSABLE:
3469                         case KIOCCMD:
3470                         case KIOCSDIRECT:
3471                         case KIOCSCOMPAT:
3472                         case KIOCSKABORTEN:
3473                         case KIOCSRPTCOUNT:
3474                         case KIOCSRPTDELAY:
3475                         case KIOCSRPTRATE:
3476                         case VUIDSFORMAT:
3477                         case TIOCSPPS:
3478                                 strioc.ic_len = sizeof (int);
3479                                 strioc.ic_dp = (char *)arg;
3480                                 return (strdoioctl(stp, &strioc, flag,
3481                                     copyflag, crp, rvalp));
3482 
3483                         case KIOCSETKEY:
3484                         case KIOCGETKEY:
3485                                 strioc.ic_len = sizeof (struct kiockey);
3486                                 strioc.ic_dp = (char *)arg;
3487                                 return (strdoioctl(stp, &strioc, flag,
3488                                     copyflag, crp, rvalp));
3489 
3490                         case KIOCSKEY:
3491                         case KIOCGKEY:
3492                                 strioc.ic_len = sizeof (struct kiockeymap);
3493                                 strioc.ic_dp = (char *)arg;
3494                                 return (strdoioctl(stp, &strioc, flag,
3495                                     copyflag, crp, rvalp));
3496 
3497                         case KIOCSLED:
3498                                 /* arg is a pointer to char */
3499                                 strioc.ic_len = sizeof (char);
3500                                 strioc.ic_dp = (char *)arg;
3501                                 return (strdoioctl(stp, &strioc, flag,
3502                                     copyflag, crp, rvalp));
3503 
3504                         case MSIOSETPARMS:
3505                                 strioc.ic_len = sizeof (Ms_parms);
3506                                 strioc.ic_dp = (char *)arg;
3507                                 return (strdoioctl(stp, &strioc, flag,
3508                                     copyflag, crp, rvalp));
3509 
3510                         case VUIDSADDR:
3511                         case VUIDGADDR:
3512                                 strioc.ic_len = sizeof (struct vuid_addr_probe);
3513                                 strioc.ic_dp = (char *)arg;
3514                                 return (strdoioctl(stp, &strioc, flag,
3515                                     copyflag, crp, rvalp));
3516 
3517                         /*
3518                          * These M_IOCTL's don't require any data to be sent
3519                          * downstream, and the driver will allocate and link
3520                          * on its own mblk_t upon M_IOCACK -- thus we set
3521                          * ic_len to zero and set ic_dp to arg so we know
3522                          * where to copyout to later.
3523                          */
3524                         case TIOCGSOFTCAR:
3525                         case TIOCGWINSZ:
3526                         case TIOCGSIZE:
3527                         case KIOCGTRANS:
3528                         case KIOCGTRANSABLE:
3529                         case KIOCTYPE:
3530                         case KIOCGDIRECT:
3531                         case KIOCGCOMPAT:
3532                         case KIOCLAYOUT:
3533                         case KIOCGLED:
3534                         case MSIOGETPARMS:
3535                         case MSIOBUTTONS:
3536                         case VUIDGFORMAT:
3537                         case TIOCGPPS:
3538                         case TIOCGPPSEV:
3539                         case TCGETA:
3540                         case TCGETS:
3541                         case LDGETT:
3542                         case TIOCGETP:
3543                         case KIOCGRPTCOUNT:
3544                         case KIOCGRPTDELAY:
3545                         case KIOCGRPTRATE:
3546                                 strioc.ic_len = 0;
3547                                 strioc.ic_dp = (char *)arg;
3548                                 return (strdoioctl(stp, &strioc, flag,
3549                                     copyflag, crp, rvalp));
3550                         }
3551                 }
3552 
3553                 /*
3554                  * Unknown cmd - send it down as a transparent ioctl.
3555                  */
3556                 strioc.ic_cmd = cmd;
3557                 strioc.ic_timout = INFTIM;
3558                 strioc.ic_len = TRANSPARENT;
3559                 strioc.ic_dp = (char *)&arg;
3560 
3561                 return (strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp));
3562 
3563         case I_STR:
3564                 /*
3565                  * Stream ioctl.  Read in an strioctl buffer from the user
3566                  * along with any data specified and send it downstream.
3567                  * Strdoioctl will wait allow only one ioctl message at
3568                  * a time, and waits for the acknowledgement.
3569                  */
3570 
3571                 if (stp->sd_flag & STRHUP)
3572                         return (ENXIO);
3573 
3574                 error = strcopyin_strioctl((void *)arg, &strioc, flag,
3575                     copyflag);
3576                 if (error != 0)
3577                         return (error);
3578 
3579                 if ((strioc.ic_len < 0) || (strioc.ic_timout < -1))
3580                         return (EINVAL);
3581 
3582                 access = job_control_type(strioc.ic_cmd);
3583                 mutex_enter(&stp->sd_lock);
3584                 if ((access != -1) &&
3585                     ((error = i_straccess(stp, access)) != 0)) {
3586                         mutex_exit(&stp->sd_lock);
3587                         return (error);
3588                 }
3589                 mutex_exit(&stp->sd_lock);
3590 
3591                 /*
3592                  * The I_STR facility provides a trap door for malicious
3593                  * code to send down bogus streamio(7I) ioctl commands to
3594                  * unsuspecting STREAMS modules and drivers which expect to
3595                  * only get these messages from the stream head.
3596                  * Explicitly prohibit any streamio ioctls which can be
3597                  * passed downstream by the stream head.  Note that we do
3598                  * not block all streamio ioctls because the ioctl
3599                  * numberspace is not well managed and thus it's possible
3600                  * that a module or driver's ioctl numbers may accidentally
3601                  * collide with them.
3602                  */
3603                 switch (strioc.ic_cmd) {
3604                 case I_LINK:
3605                 case I_PLINK:
3606                 case I_UNLINK:
3607                 case I_PUNLINK:
3608                 case _I_GETPEERCRED:
3609                 case _I_PLINK_LH:
3610                         return (EINVAL);
3611                 }
3612 
3613                 error = strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp);
3614                 if (error == 0) {
3615                         error = strcopyout_strioctl(&strioc, (void *)arg,
3616                             flag, copyflag);
3617                 }
3618                 return (error);
3619 
3620         case _I_CMD:
3621                 /*
3622                  * Like I_STR, but without using M_IOC* messages and without
3623                  * copyins/copyouts beyond the passed-in argument.
3624                  */
3625                 if (stp->sd_flag & STRHUP)
3626                         return (ENXIO);
3627 
3628                 if (copyflag == U_TO_K) {
3629                         if ((scp = kmem_alloc(sizeof (strcmd_t),
3630                             KM_NOSLEEP)) == NULL) {
3631                                 return (ENOMEM);
3632                         }
3633 
3634                         if (copyin((void *)arg, scp, sizeof (strcmd_t))) {
3635                                 kmem_free(scp, sizeof (strcmd_t));
3636                                 return (EFAULT);
3637                         }
3638                 } else {
3639                         scp = (strcmd_t *)arg;
3640                 }
3641 
3642                 access = job_control_type(scp->sc_cmd);
3643                 mutex_enter(&stp->sd_lock);
3644                 if (access != -1 && (error = i_straccess(stp, access)) != 0) {
3645                         mutex_exit(&stp->sd_lock);
3646                         if (copyflag == U_TO_K)
3647                                 kmem_free(scp, sizeof (strcmd_t));
3648                         return (error);
3649                 }
3650                 mutex_exit(&stp->sd_lock);
3651 
3652                 *rvalp = 0;
3653                 if ((error = strdocmd(stp, scp, crp)) == 0) {
3654                         if (copyflag == U_TO_K &&
3655                             copyout(scp, (void *)arg, sizeof (strcmd_t))) {
3656                                 error = EFAULT;
3657                         }
3658                 }
3659                 if (copyflag == U_TO_K)
3660                         kmem_free(scp, sizeof (strcmd_t));
3661                 return (error);
3662 
3663         case I_NREAD:
3664                 /*
3665                  * Return number of bytes of data in first message
3666                  * in queue in "arg" and return the number of messages
3667                  * in queue in return value.
3668                  */
3669         {
3670                 size_t  size;
3671                 int     retval;
3672                 int     count = 0;
3673 
3674                 mutex_enter(QLOCK(rdq));
3675 
3676                 size = msgdsize(rdq->q_first);
3677                 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3678                         count++;
3679 
3680                 mutex_exit(QLOCK(rdq));
3681                 if (stp->sd_struiordq) {
3682                         infod_t infod;
3683 
3684                         infod.d_cmd = INFOD_COUNT;
3685                         infod.d_count = 0;
3686                         if (count == 0) {
3687                                 infod.d_cmd |= INFOD_FIRSTBYTES;
3688                                 infod.d_bytes = 0;
3689                         }
3690                         infod.d_res = 0;
3691                         (void) infonext(rdq, &infod);
3692                         count += infod.d_count;
3693                         if (infod.d_res & INFOD_FIRSTBYTES)
3694                                 size = infod.d_bytes;
3695                 }
3696 
3697                 /*
3698                  * Drop down from size_t to the "int" required by the
3699                  * interface.  Cap at INT_MAX.
3700                  */
3701                 retval = MIN(size, INT_MAX);
3702                 error = strcopyout(&retval, (void *)arg, sizeof (retval),
3703                     copyflag);
3704                 if (!error)
3705                         *rvalp = count;
3706                 return (error);
3707         }
3708 
3709         case FIONREAD:
3710                 /*
3711                  * Return number of bytes of data in all data messages
3712                  * in queue in "arg".
3713                  */
3714         {
3715                 size_t  size = 0;
3716                 int     retval;
3717 
3718                 mutex_enter(QLOCK(rdq));
3719                 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3720                         size += msgdsize(mp);
3721                 mutex_exit(QLOCK(rdq));
3722 
3723                 if (stp->sd_struiordq) {
3724                         infod_t infod;
3725 
3726                         infod.d_cmd = INFOD_BYTES;
3727                         infod.d_res = 0;
3728                         infod.d_bytes = 0;
3729                         (void) infonext(rdq, &infod);
3730                         size += infod.d_bytes;
3731                 }
3732 
3733                 /*
3734                  * Drop down from size_t to the "int" required by the
3735                  * interface.  Cap at INT_MAX.
3736                  */
3737                 retval = MIN(size, INT_MAX);
3738                 error = strcopyout(&retval, (void *)arg, sizeof (retval),
3739                     copyflag);
3740 
3741                 *rvalp = 0;
3742                 return (error);
3743         }
3744         case FIORDCHK:
3745                 /*
3746                  * FIORDCHK does not use arg value (like FIONREAD),
3747                  * instead a count is returned. I_NREAD value may
3748                  * not be accurate but safe. The real thing to do is
3749                  * to add the msgdsizes of all data  messages until
3750                  * a non-data message.
3751                  */
3752         {
3753                 size_t size = 0;
3754 
3755                 mutex_enter(QLOCK(rdq));
3756                 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3757                         size += msgdsize(mp);
3758                 mutex_exit(QLOCK(rdq));
3759 
3760                 if (stp->sd_struiordq) {
3761                         infod_t infod;
3762 
3763                         infod.d_cmd = INFOD_BYTES;
3764                         infod.d_res = 0;
3765                         infod.d_bytes = 0;
3766                         (void) infonext(rdq, &infod);
3767                         size += infod.d_bytes;
3768                 }
3769 
3770                 /*
3771                  * Since ioctl returns an int, and memory sizes under
3772                  * LP64 may not fit, we return INT_MAX if the count was
3773                  * actually greater.
3774                  */
3775                 *rvalp = MIN(size, INT_MAX);
3776                 return (0);
3777         }
3778 
3779         case I_FIND:
3780                 /*
3781                  * Get module name.
3782                  */
3783         {
3784                 char mname[FMNAMESZ + 1];
3785                 queue_t *q;
3786 
3787                 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
3788                     mname, FMNAMESZ + 1, NULL);
3789                 if (error)
3790                         return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3791 
3792                 /*
3793                  * Return EINVAL if we're handed a bogus module name.
3794                  */
3795                 if (fmodsw_find(mname, FMODSW_LOAD) == NULL) {
3796                         TRACE_0(TR_FAC_STREAMS_FR,
3797                             TR_I_CANT_FIND, "couldn't I_FIND");
3798                         return (EINVAL);
3799                 }
3800 
3801                 *rvalp = 0;
3802 
3803                 /* Look downstream to see if module is there. */
3804                 claimstr(stp->sd_wrq);
3805                 for (q = stp->sd_wrq->q_next; q; q = q->q_next) {
3806                         if (q->q_flag & QREADR) {
3807                                 q = NULL;
3808                                 break;
3809                         }
3810                         if (strcmp(mname, Q2NAME(q)) == 0)
3811                                 break;
3812                 }
3813                 releasestr(stp->sd_wrq);
3814 
3815                 *rvalp = (q ? 1 : 0);
3816                 return (error);
3817         }
3818 
3819         case I_PUSH:
3820         case __I_PUSH_NOCTTY:
3821                 /*
3822                  * Push a module.
3823                  * For the case __I_PUSH_NOCTTY push a module but
3824                  * do not allocate controlling tty. See bugid 4025044
3825                  */
3826 
3827         {
3828                 char mname[FMNAMESZ + 1];
3829                 fmodsw_impl_t *fp;
3830                 dev_t dummydev;
3831 
3832                 if (stp->sd_flag & STRHUP)
3833                         return (ENXIO);
3834 
3835                 /*
3836                  * Get module name and look up in fmodsw.
3837                  */
3838                 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
3839                     mname, FMNAMESZ + 1, NULL);
3840                 if (error)
3841                         return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3842 
3843                 if ((fp = fmodsw_find(mname, FMODSW_HOLD | FMODSW_LOAD)) ==
3844                     NULL)
3845                         return (EINVAL);
3846 
3847                 TRACE_2(TR_FAC_STREAMS_FR, TR_I_PUSH,
3848                     "I_PUSH:fp %p stp %p", fp, stp);
3849 
3850                 /*
3851                  * If the module is flagged as single-instance, then check
3852                  * to see if the module is already pushed. If it is, return
3853                  * as if the push was successful.
3854                  */
3855                 if (fp->f_qflag & _QSINGLE_INSTANCE) {
3856                         queue_t *q;
3857 
3858                         claimstr(stp->sd_wrq);
3859                         for (q = stp->sd_wrq->q_next; q; q = q->q_next) {
3860                                 if (q->q_flag & QREADR) {
3861                                         q = NULL;
3862                                         break;
3863                                 }
3864                                 if (strcmp(mname, Q2NAME(q)) == 0)
3865                                         break;
3866                         }
3867                         releasestr(stp->sd_wrq);
3868                         if (q != NULL) {
3869                                 fmodsw_rele(fp);
3870                                 return (0);
3871                         }
3872                 }
3873 
3874                 if (error = strstartplumb(stp, flag, cmd)) {
3875                         fmodsw_rele(fp);
3876                         return (error);
3877                 }
3878 
3879                 /*
3880                  * See if any more modules can be pushed on this stream.
3881                  * Note that this check must be done after strstartplumb()
3882                  * since otherwise multiple threads issuing I_PUSHes on
3883                  * the same stream will be able to exceed nstrpush.
3884                  */
3885                 mutex_enter(&stp->sd_lock);
3886                 if (stp->sd_pushcnt >= nstrpush) {
3887                         fmodsw_rele(fp);
3888                         strendplumb(stp);
3889                         mutex_exit(&stp->sd_lock);
3890                         return (EINVAL);
3891                 }
3892                 mutex_exit(&stp->sd_lock);
3893 
3894                 /*
3895                  * Push new module and call its open routine
3896                  * via qattach().  Modules don't change device
3897                  * numbers, so just ignore dummydev here.
3898                  */
3899                 dummydev = vp->v_rdev;
3900                 if ((error = qattach(rdq, &dummydev, 0, crp, fp,
3901                     B_FALSE)) == 0) {
3902                         if (vp->v_type == VCHR && /* sorry, no pipes allowed */
3903                             (cmd == I_PUSH) && (stp->sd_flag & STRISTTY)) {
3904                                 /*
3905                                  * try to allocate it as a controlling terminal
3906                                  */
3907                                 (void) strctty(stp);
3908                         }
3909                 }
3910 
3911                 mutex_enter(&stp->sd_lock);
3912 
3913                 /*
3914                  * As a performance concern we are caching the values of
3915                  * q_minpsz and q_maxpsz of the module below the stream
3916                  * head in the stream head.
3917                  */
3918                 mutex_enter(QLOCK(stp->sd_wrq->q_next));
3919                 rmin = stp->sd_wrq->q_next->q_minpsz;
3920                 rmax = stp->sd_wrq->q_next->q_maxpsz;
3921                 mutex_exit(QLOCK(stp->sd_wrq->q_next));
3922 
3923                 /* Do this processing here as a performance concern */
3924                 if (strmsgsz != 0) {
3925                         if (rmax == INFPSZ)
3926                                 rmax = strmsgsz;
3927                         else  {
3928                                 if (vp->v_type == VFIFO)
3929                                         rmax = MIN(PIPE_BUF, rmax);
3930                                 else    rmax = MIN(strmsgsz, rmax);
3931                         }
3932                 }
3933 
3934                 mutex_enter(QLOCK(wrq));
3935                 stp->sd_qn_minpsz = rmin;
3936                 stp->sd_qn_maxpsz = rmax;
3937                 mutex_exit(QLOCK(wrq));
3938 
3939                 strendplumb(stp);
3940                 mutex_exit(&stp->sd_lock);
3941                 return (error);
3942         }
3943 
3944         case I_POP:
3945         {
3946                 queue_t *q;
3947 
3948                 if (stp->sd_flag & STRHUP)
3949                         return (ENXIO);
3950                 if (!wrq->q_next)    /* for broken pipes */
3951                         return (EINVAL);
3952 
3953                 if (error = strstartplumb(stp, flag, cmd))
3954                         return (error);
3955 
3956                 /*
3957                  * If there is an anchor on this stream and popping
3958                  * the current module would attempt to pop through the
3959                  * anchor, then disallow the pop unless we have sufficient
3960                  * privileges; take the cheapest (non-locking) check
3961                  * first.
3962                  */
3963                 if (secpolicy_ip_config(crp, B_TRUE) != 0 ||
3964                     (stp->sd_anchorzone != crgetzoneid(crp))) {
3965                         mutex_enter(&stp->sd_lock);
3966                         /*
3967                          * Anchors only apply if there's at least one
3968                          * module on the stream (sd_pushcnt > 0).
3969                          */
3970                         if (stp->sd_pushcnt > 0 &&
3971                             stp->sd_pushcnt == stp->sd_anchor &&
3972                             stp->sd_vnode->v_type != VFIFO) {
3973                                 strendplumb(stp);
3974                                 mutex_exit(&stp->sd_lock);
3975                                 if (stp->sd_anchorzone != crgetzoneid(crp))
3976                                         return (EINVAL);
3977                                 /* Audit and report error */
3978                                 return (secpolicy_ip_config(crp, B_FALSE));
3979                         }
3980                         mutex_exit(&stp->sd_lock);
3981                 }
3982 
3983                 q = wrq->q_next;
3984                 TRACE_2(TR_FAC_STREAMS_FR, TR_I_POP,
3985                     "I_POP:%p from %p", q, stp);
3986                 if (q->q_next == NULL || (q->q_flag & (QREADR|QISDRV))) {
3987                         error = EINVAL;
3988                 } else {
3989                         qdetach(_RD(q), 1, flag, crp, B_FALSE);
3990                         error = 0;
3991                 }
3992                 mutex_enter(&stp->sd_lock);
3993 
3994                 /*
3995                  * As a performance concern we are caching the values of
3996                  * q_minpsz and q_maxpsz of the module below the stream
3997                  * head in the stream head.
3998                  */
3999                 mutex_enter(QLOCK(wrq->q_next));
4000                 rmin = wrq->q_next->q_minpsz;
4001                 rmax = wrq->q_next->q_maxpsz;
4002                 mutex_exit(QLOCK(wrq->q_next));
4003 
4004                 /* Do this processing here as a performance concern */
4005                 if (strmsgsz != 0) {
4006                         if (rmax == INFPSZ)
4007                                 rmax = strmsgsz;
4008                         else  {
4009                                 if (vp->v_type == VFIFO)
4010                                         rmax = MIN(PIPE_BUF, rmax);
4011                                 else    rmax = MIN(strmsgsz, rmax);
4012                         }
4013                 }
4014 
4015                 mutex_enter(QLOCK(wrq));
4016                 stp->sd_qn_minpsz = rmin;
4017                 stp->sd_qn_maxpsz = rmax;
4018                 mutex_exit(QLOCK(wrq));
4019 
4020                 /* If we popped through the anchor, then reset the anchor. */
4021                 if (stp->sd_pushcnt < stp->sd_anchor) {
4022                         stp->sd_anchor = 0;
4023                         stp->sd_anchorzone = 0;
4024                 }
4025                 strendplumb(stp);
4026                 mutex_exit(&stp->sd_lock);
4027                 return (error);
4028         }
4029 
4030         case _I_MUXID2FD:
4031         {
4032                 /*
4033                  * Create a fd for a I_PLINK'ed lower stream with a given
4034                  * muxid.  With the fd, application can send down ioctls,
4035                  * like I_LIST, to the previously I_PLINK'ed stream.  Note
4036                  * that after getting the fd, the application has to do an
4037                  * I_PUNLINK on the muxid before it can do any operation
4038                  * on the lower stream.  This is required by spec1170.
4039                  *
4040                  * The fd used to do this ioctl should point to the same
4041                  * controlling device used to do the I_PLINK.  If it uses
4042                  * a different stream or an invalid muxid, I_MUXID2FD will
4043                  * fail.  The error code is set to EINVAL.
4044                  *
4045                  * The intended use of this interface is the following.
4046                  * An application I_PLINK'ed a stream and exits.  The fd
4047                  * to the lower stream is gone.  Another application
4048                  * wants to get a fd to the lower stream, it uses I_MUXID2FD.
4049                  */
4050                 int muxid = (int)arg;
4051                 int fd;
4052                 linkinfo_t *linkp;
4053                 struct file *fp;
4054                 netstack_t *ns;
4055                 str_stack_t *ss;
4056 
4057                 /*
4058                  * Do not allow the wildcard muxid.  This ioctl is not
4059                  * intended to find arbitrary link.
4060                  */
4061                 if (muxid == 0) {
4062                         return (EINVAL);
4063                 }
4064 
4065                 ns = netstack_find_by_cred(crp);
4066                 ASSERT(ns != NULL);
4067                 ss = ns->netstack_str;
4068                 ASSERT(ss != NULL);
4069 
4070                 mutex_enter(&muxifier);
4071                 linkp = findlinks(vp->v_stream, muxid, LINKPERSIST, ss);
4072                 if (linkp == NULL) {
4073                         mutex_exit(&muxifier);
4074                         netstack_rele(ss->ss_netstack);
4075                         return (EINVAL);
4076                 }
4077 
4078                 if ((fd = ufalloc(0)) == -1) {
4079                         mutex_exit(&muxifier);
4080                         netstack_rele(ss->ss_netstack);
4081                         return (EMFILE);
4082                 }
4083                 fp = linkp->li_fpdown;
4084                 mutex_enter(&fp->f_tlock);
4085                 fp->f_count++;
4086                 mutex_exit(&fp->f_tlock);
4087                 mutex_exit(&muxifier);
4088                 setf(fd, fp);
4089                 *rvalp = fd;
4090                 netstack_rele(ss->ss_netstack);
4091                 return (0);
4092         }
4093 
4094         case _I_INSERT:
4095         {
4096                 /*
4097                  * To insert a module to a given position in a stream.
4098                  * In the first release, only allow privileged user
4099                  * to use this ioctl. Furthermore, the insert is only allowed
4100                  * below an anchor if the zoneid is the same as the zoneid
4101                  * which created the anchor.
4102                  *
4103                  * Note that we do not plan to support this ioctl
4104                  * on pipes in the first release.  We want to learn more
4105                  * about the implications of these ioctls before extending
4106                  * their support.  And we do not think these features are
4107                  * valuable for pipes.
4108                  */
4109                 STRUCT_DECL(strmodconf, strmodinsert);
4110                 char mod_name[FMNAMESZ + 1];
4111                 fmodsw_impl_t *fp;
4112                 dev_t dummydev;
4113                 queue_t *tmp_wrq;
4114                 int pos;
4115                 boolean_t is_insert;
4116 
4117                 STRUCT_INIT(strmodinsert, flag);
4118                 if (stp->sd_flag & STRHUP)
4119                         return (ENXIO);
4120                 if (STRMATED(stp))
4121                         return (EINVAL);
4122                 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
4123                         return (error);
4124                 if (stp->sd_anchor != 0 &&
4125                     stp->sd_anchorzone != crgetzoneid(crp))
4126                         return (EINVAL);
4127 
4128                 error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert),
4129                     STRUCT_SIZE(strmodinsert), copyflag);
4130                 if (error)
4131                         return (error);
4132 
4133                 /*
4134                  * Get module name and look up in fmodsw.
4135                  */
4136                 error = (copyflag & U_TO_K ? copyinstr :
4137                     copystr)(STRUCT_FGETP(strmodinsert, mod_name),
4138                     mod_name, FMNAMESZ + 1, NULL);
4139                 if (error)
4140                         return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
4141 
4142                 if ((fp = fmodsw_find(mod_name, FMODSW_HOLD | FMODSW_LOAD)) ==
4143                     NULL)
4144                         return (EINVAL);
4145 
4146                 if (error = strstartplumb(stp, flag, cmd)) {
4147                         fmodsw_rele(fp);
4148                         return (error);
4149                 }
4150 
4151                 /*
4152                  * Is this _I_INSERT just like an I_PUSH?  We need to know
4153                  * this because we do some optimizations if this is a
4154                  * module being pushed.
4155                  */
4156                 pos = STRUCT_FGET(strmodinsert, pos);
4157                 is_insert = (pos != 0);
4158 
4159                 /*
4160                  * Make sure pos is valid.  Even though it is not an I_PUSH,
4161                  * we impose the same limit on the number of modules in a
4162                  * stream.
4163                  */
4164                 mutex_enter(&stp->sd_lock);
4165                 if (stp->sd_pushcnt >= nstrpush || pos < 0 ||
4166                     pos > stp->sd_pushcnt) {
4167                         fmodsw_rele(fp);
4168                         strendplumb(stp);
4169                         mutex_exit(&stp->sd_lock);
4170                         return (EINVAL);
4171                 }
4172                 if (stp->sd_anchor != 0) {
4173                         /*
4174                          * Is this insert below the anchor?
4175                          * Pushcnt hasn't been increased yet hence
4176                          * we test for greater than here, and greater or
4177                          * equal after qattach.
4178                          */
4179                         if (pos > (stp->sd_pushcnt - stp->sd_anchor) &&
4180                             stp->sd_anchorzone != crgetzoneid(crp)) {
4181                                 fmodsw_rele(fp);
4182                                 strendplumb(stp);
4183                                 mutex_exit(&stp->sd_lock);
4184                                 return (EPERM);
4185                         }
4186                 }
4187 
4188                 mutex_exit(&stp->sd_lock);
4189 
4190                 /*
4191                  * First find the correct position this module to
4192                  * be inserted.  We don't need to call claimstr()
4193                  * as the stream should not be changing at this point.
4194                  *
4195                  * Insert new module and call its open routine
4196                  * via qattach().  Modules don't change device
4197                  * numbers, so just ignore dummydev here.
4198                  */
4199                 for (tmp_wrq = stp->sd_wrq; pos > 0;
4200                     tmp_wrq = tmp_wrq->q_next, pos--) {
4201                         ASSERT(SAMESTR(tmp_wrq));
4202                 }
4203                 dummydev = vp->v_rdev;
4204                 if ((error = qattach(_RD(tmp_wrq), &dummydev, 0, crp,
4205                     fp, is_insert)) != 0) {
4206                         mutex_enter(&stp->sd_lock);
4207                         strendplumb(stp);
4208                         mutex_exit(&stp->sd_lock);
4209                         return (error);
4210                 }
4211 
4212                 mutex_enter(&stp->sd_lock);
4213 
4214                 /*
4215                  * As a performance concern we are caching the values of
4216                  * q_minpsz and q_maxpsz of the module below the stream
4217                  * head in the stream head.
4218                  */
4219                 if (!is_insert) {
4220                         mutex_enter(QLOCK(stp->sd_wrq->q_next));
4221                         rmin = stp->sd_wrq->q_next->q_minpsz;
4222                         rmax = stp->sd_wrq->q_next->q_maxpsz;
4223                         mutex_exit(QLOCK(stp->sd_wrq->q_next));
4224 
4225                         /* Do this processing here as a performance concern */
4226                         if (strmsgsz != 0) {
4227                                 if (rmax == INFPSZ) {
4228                                         rmax = strmsgsz;
4229                                 } else  {
4230                                         rmax = MIN(strmsgsz, rmax);
4231                                 }
4232                         }
4233 
4234                         mutex_enter(QLOCK(wrq));
4235                         stp->sd_qn_minpsz = rmin;
4236                         stp->sd_qn_maxpsz = rmax;
4237                         mutex_exit(QLOCK(wrq));
4238                 }
4239 
4240                 /*
4241                  * Need to update the anchor value if this module is
4242                  * inserted below the anchor point.
4243                  */
4244                 if (stp->sd_anchor != 0) {
4245                         pos = STRUCT_FGET(strmodinsert, pos);
4246                         if (pos >= (stp->sd_pushcnt - stp->sd_anchor))
4247                                 stp->sd_anchor++;
4248                 }
4249 
4250                 strendplumb(stp);
4251                 mutex_exit(&stp->sd_lock);
4252                 return (0);
4253         }
4254 
4255         case _I_REMOVE:
4256         {
4257                 /*
4258                  * To remove a module with a given name in a stream.  The
4259                  * caller of this ioctl needs to provide both the name and
4260                  * the position of the module to be removed.  This eliminates
4261                  * the ambiguity of removal if a module is inserted/pushed
4262                  * multiple times in a stream.  In the first release, only
4263                  * allow privileged user to use this ioctl.
4264                  * Furthermore, the remove is only allowed
4265                  * below an anchor if the zoneid is the same as the zoneid
4266                  * which created the anchor.
4267                  *
4268                  * Note that we do not plan to support this ioctl
4269                  * on pipes in the first release.  We want to learn more
4270                  * about the implications of these ioctls before extending
4271                  * their support.  And we do not think these features are
4272                  * valuable for pipes.
4273                  *
4274                  * Also note that _I_REMOVE cannot be used to remove a
4275                  * driver or the stream head.
4276                  */
4277                 STRUCT_DECL(strmodconf, strmodremove);
4278                 queue_t *q;
4279                 int pos;
4280                 char mod_name[FMNAMESZ + 1];
4281                 boolean_t is_remove;
4282 
4283                 STRUCT_INIT(strmodremove, flag);
4284                 if (stp->sd_flag & STRHUP)
4285                         return (ENXIO);
4286                 if (STRMATED(stp))
4287                         return (EINVAL);
4288                 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
4289                         return (error);
4290                 if (stp->sd_anchor != 0 &&
4291                     stp->sd_anchorzone != crgetzoneid(crp))
4292                         return (EINVAL);
4293 
4294                 error = strcopyin((void *)arg, STRUCT_BUF(strmodremove),
4295                     STRUCT_SIZE(strmodremove), copyflag);
4296                 if (error)
4297                         return (error);
4298 
4299                 error = (copyflag & U_TO_K ? copyinstr :
4300                     copystr)(STRUCT_FGETP(strmodremove, mod_name),
4301                     mod_name, FMNAMESZ + 1, NULL);
4302                 if (error)
4303                         return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
4304 
4305                 if ((error = strstartplumb(stp, flag, cmd)) != 0)
4306                         return (error);
4307 
4308                 /*
4309                  * Match the name of given module to the name of module at
4310                  * the given position.
4311                  */
4312                 pos = STRUCT_FGET(strmodremove, pos);
4313 
4314                 is_remove = (pos != 0);
4315                 for (q = stp->sd_wrq->q_next; SAMESTR(q) && pos > 0;
4316                     q = q->q_next, pos--)
4317                         ;
4318                 if (pos > 0 || !SAMESTR(q) ||
4319                     strcmp(Q2NAME(q), mod_name) != 0) {
4320                         mutex_enter(&stp->sd_lock);
4321                         strendplumb(stp);
4322                         mutex_exit(&stp->sd_lock);
4323                         return (EINVAL);
4324                 }
4325 
4326                 /*
4327                  * If the position is at or below an anchor, then the zoneid
4328                  * must match the zoneid that created the anchor.
4329                  */
4330                 if (stp->sd_anchor != 0) {
4331                         pos = STRUCT_FGET(strmodremove, pos);
4332                         if (pos >= (stp->sd_pushcnt - stp->sd_anchor) &&
4333                             stp->sd_anchorzone != crgetzoneid(crp)) {
4334                                 mutex_enter(&stp->sd_lock);
4335                                 strendplumb(stp);
4336                                 mutex_exit(&stp->sd_lock);
4337                                 return (EPERM);
4338                         }
4339                 }
4340 
4341 
4342                 ASSERT(!(q->q_flag & QREADR));
4343                 qdetach(_RD(q), 1, flag, crp, is_remove);
4344 
4345                 mutex_enter(&stp->sd_lock);
4346 
4347                 /*
4348                  * As a performance concern we are caching the values of
4349                  * q_minpsz and q_maxpsz of the module below the stream
4350                  * head in the stream head.
4351                  */
4352                 if (!is_remove) {
4353                         mutex_enter(QLOCK(wrq->q_next));
4354                         rmin = wrq->q_next->q_minpsz;
4355                         rmax = wrq->q_next->q_maxpsz;
4356                         mutex_exit(QLOCK(wrq->q_next));
4357 
4358                         /* Do this processing here as a performance concern */
4359                         if (strmsgsz != 0) {
4360                                 if (rmax == INFPSZ)
4361                                         rmax = strmsgsz;
4362                                 else  {
4363                                         if (vp->v_type == VFIFO)
4364                                                 rmax = MIN(PIPE_BUF, rmax);
4365                                         else    rmax = MIN(strmsgsz, rmax);
4366                                 }
4367                         }
4368 
4369                         mutex_enter(QLOCK(wrq));
4370                         stp->sd_qn_minpsz = rmin;
4371                         stp->sd_qn_maxpsz = rmax;
4372                         mutex_exit(QLOCK(wrq));
4373                 }
4374 
4375                 /*
4376                  * Need to update the anchor value if this module is removed
4377                  * at or below the anchor point.  If the removed module is at
4378                  * the anchor point, remove the anchor for this stream if
4379                  * there is no module above the anchor point.  Otherwise, if
4380                  * the removed module is below the anchor point, decrement the
4381                  * anchor point by 1.
4382                  */
4383                 if (stp->sd_anchor != 0) {
4384                         pos = STRUCT_FGET(strmodremove, pos);
4385                         if (pos == stp->sd_pushcnt - stp->sd_anchor + 1)
4386                                 stp->sd_anchor = 0;
4387                         else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1))
4388                                 stp->sd_anchor--;
4389                 }
4390 
4391                 strendplumb(stp);
4392                 mutex_exit(&stp->sd_lock);
4393                 return (0);
4394         }
4395 
4396         case I_ANCHOR:
4397                 /*
4398                  * Set the anchor position on the stream to reside at
4399                  * the top module (in other words, the top module
4400                  * cannot be popped).  Anchors with a FIFO make no
4401                  * obvious sense, so they're not allowed.
4402                  */
4403                 mutex_enter(&stp->sd_lock);
4404 
4405                 if (stp->sd_vnode->v_type == VFIFO) {
4406                         mutex_exit(&stp->sd_lock);
4407                         return (EINVAL);
4408                 }
4409                 /* Only allow the same zoneid to update the anchor */
4410                 if (stp->sd_anchor != 0 &&
4411                     stp->sd_anchorzone != crgetzoneid(crp)) {
4412                         mutex_exit(&stp->sd_lock);
4413                         return (EINVAL);
4414                 }
4415                 stp->sd_anchor = stp->sd_pushcnt;
4416                 stp->sd_anchorzone = crgetzoneid(crp);
4417                 mutex_exit(&stp->sd_lock);
4418                 return (0);
4419 
4420         case I_LOOK:
4421                 /*
4422                  * Get name of first module downstream.
4423                  * If no module, return an error.
4424                  */
4425                 claimstr(wrq);
4426                 if (_SAMESTR(wrq) && wrq->q_next->q_next != NULL) {
4427                         char *name = Q2NAME(wrq->q_next);
4428 
4429                         error = strcopyout(name, (void *)arg, strlen(name) + 1,
4430                             copyflag);
4431                         releasestr(wrq);
4432                         return (error);
4433                 }
4434                 releasestr(wrq);
4435                 return (EINVAL);
4436 
4437         case I_LINK:
4438         case I_PLINK:
4439                 /*
4440                  * Link a multiplexor.
4441                  */
4442                 return (mlink(vp, cmd, (int)arg, crp, rvalp, 0));
4443 
4444         case _I_PLINK_LH:
4445                 /*
4446                  * Link a multiplexor: Call must originate from kernel.
4447                  */
4448                 if (kioctl)
4449                         return (ldi_mlink_lh(vp, cmd, arg, crp, rvalp));
4450 
4451                 return (EINVAL);
4452         case I_UNLINK:
4453         case I_PUNLINK:
4454                 /*
4455                  * Unlink a multiplexor.
4456                  * If arg is -1, unlink all links for which this is the
4457                  * controlling stream.  Otherwise, arg is an index number
4458                  * for a link to be removed.
4459                  */
4460         {
4461                 struct linkinfo *linkp;
4462                 int native_arg = (int)arg;
4463                 int type;
4464                 netstack_t *ns;
4465                 str_stack_t *ss;
4466 
4467                 TRACE_1(TR_FAC_STREAMS_FR,
4468                     TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp);
4469                 if (vp->v_type == VFIFO) {
4470                         return (EINVAL);
4471                 }
4472                 if (cmd == I_UNLINK)
4473                         type = LINKNORMAL;
4474                 else    /* I_PUNLINK */
4475                         type = LINKPERSIST;
4476                 if (native_arg == 0) {
4477                         return (EINVAL);
4478                 }
4479                 ns = netstack_find_by_cred(crp);
4480                 ASSERT(ns != NULL);
4481                 ss = ns->netstack_str;
4482                 ASSERT(ss != NULL);
4483 
4484                 if (native_arg == MUXID_ALL)
4485                         error = munlinkall(stp, type, crp, rvalp, ss);
4486                 else {
4487                         mutex_enter(&muxifier);
4488                         if (!(linkp = findlinks(stp, (int)arg, type, ss))) {
4489                                 /* invalid user supplied index number */
4490                                 mutex_exit(&muxifier);
4491                                 netstack_rele(ss->ss_netstack);
4492                                 return (EINVAL);
4493                         }
4494                         /* munlink drops the muxifier lock */
4495                         error = munlink(stp, linkp, type, crp, rvalp, ss);
4496                 }
4497                 netstack_rele(ss->ss_netstack);
4498                 return (error);
4499         }
4500 
4501         case I_FLUSH:
4502                 /*
4503                  * send a flush message downstream
4504                  * flush message can indicate
4505                  * FLUSHR - flush read queue
4506                  * FLUSHW - flush write queue
4507                  * FLUSHRW - flush read/write queue
4508                  */
4509                 if (stp->sd_flag & STRHUP)
4510                         return (ENXIO);
4511                 if (arg & ~FLUSHRW)
4512                         return (EINVAL);
4513 
4514                 for (;;) {
4515                         if (putnextctl1(stp->sd_wrq, M_FLUSH, (int)arg)) {
4516                                 break;
4517                         }
4518                         if (error = strwaitbuf(1, BPRI_HI)) {
4519                                 return (error);
4520                         }
4521                 }
4522 
4523                 /*
4524                  * Send down an unsupported ioctl and wait for the nack
4525                  * in order to allow the M_FLUSH to propagate back
4526                  * up to the stream head.
4527                  * Replaces if (qready()) runqueues();
4528                  */
4529                 strioc.ic_cmd = -1;     /* The unsupported ioctl */
4530                 strioc.ic_timout = 0;
4531                 strioc.ic_len = 0;
4532                 strioc.ic_dp = NULL;
4533                 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
4534                 *rvalp = 0;
4535                 return (0);
4536 
4537         case I_FLUSHBAND:
4538         {
4539                 struct bandinfo binfo;
4540 
4541                 error = strcopyin((void *)arg, &binfo, sizeof (binfo),
4542                     copyflag);
4543                 if (error)
4544                         return (error);
4545                 if (stp->sd_flag & STRHUP)
4546                         return (ENXIO);
4547                 if (binfo.bi_flag & ~FLUSHRW)
4548                         return (EINVAL);
4549                 while (!(mp = allocb(2, BPRI_HI))) {
4550                         if (error = strwaitbuf(2, BPRI_HI))
4551                                 return (error);
4552                 }
4553                 mp->b_datap->db_type = M_FLUSH;
4554                 *mp->b_wptr++ = binfo.bi_flag | FLUSHBAND;
4555                 *mp->b_wptr++ = binfo.bi_pri;
4556                 putnext(stp->sd_wrq, mp);
4557                 /*
4558                  * Send down an unsupported ioctl and wait for the nack
4559                  * in order to allow the M_FLUSH to propagate back
4560                  * up to the stream head.
4561                  * Replaces if (qready()) runqueues();
4562                  */
4563                 strioc.ic_cmd = -1;     /* The unsupported ioctl */
4564                 strioc.ic_timout = 0;
4565                 strioc.ic_len = 0;
4566                 strioc.ic_dp = NULL;
4567                 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
4568                 *rvalp = 0;
4569                 return (0);
4570         }
4571 
4572         case I_SRDOPT:
4573                 /*
4574                  * Set read options
4575                  *
4576                  * RNORM - default stream mode
4577                  * RMSGN - message no discard
4578                  * RMSGD - message discard
4579                  * RPROTNORM - fail read with EBADMSG for M_[PC]PROTOs
4580                  * RPROTDAT - convert M_[PC]PROTOs to M_DATAs
4581                  * RPROTDIS - discard M_[PC]PROTOs and retain M_DATAs
4582                  */
4583                 if (arg & ~(RMODEMASK | RPROTMASK))
4584                         return (EINVAL);
4585 
4586                 if ((arg & (RMSGD|RMSGN)) == (RMSGD|RMSGN))
4587                         return (EINVAL);
4588 
4589                 mutex_enter(&stp->sd_lock);
4590                 switch (arg & RMODEMASK) {
4591                 case RNORM:
4592                         stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
4593                         break;
4594                 case RMSGD:
4595                         stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGNODIS) |
4596                             RD_MSGDIS;
4597                         break;
4598                 case RMSGN:
4599                         stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGDIS) |
4600                             RD_MSGNODIS;
4601                         break;
4602                 }
4603 
4604                 switch (arg & RPROTMASK) {
4605                 case RPROTNORM:
4606                         stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
4607                         break;
4608 
4609                 case RPROTDAT:
4610                         stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDIS) |
4611                             RD_PROTDAT);
4612                         break;
4613 
4614                 case RPROTDIS:
4615                         stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDAT) |
4616                             RD_PROTDIS);
4617                         break;
4618                 }
4619                 mutex_exit(&stp->sd_lock);
4620                 return (0);
4621 
4622         case I_GRDOPT:
4623                 /*
4624                  * Get read option and return the value
4625                  * to spot pointed to by arg
4626                  */
4627         {
4628                 int rdopt;
4629 
4630                 rdopt = ((stp->sd_read_opt & RD_MSGDIS) ? RMSGD :
4631                     ((stp->sd_read_opt & RD_MSGNODIS) ? RMSGN : RNORM));
4632                 rdopt |= ((stp->sd_read_opt & RD_PROTDAT) ? RPROTDAT :
4633                     ((stp->sd_read_opt & RD_PROTDIS) ? RPROTDIS : RPROTNORM));
4634 
4635                 return (strcopyout(&rdopt, (void *)arg, sizeof (int),
4636                     copyflag));
4637         }
4638 
4639         case I_SERROPT:
4640                 /*
4641                  * Set error options
4642                  *
4643                  * RERRNORM - persistent read errors
4644                  * RERRNONPERSIST - non-persistent read errors
4645                  * WERRNORM - persistent write errors
4646                  * WERRNONPERSIST - non-persistent write errors
4647                  */
4648                 if (arg & ~(RERRMASK | WERRMASK))
4649                         return (EINVAL);
4650 
4651                 mutex_enter(&stp->sd_lock);
4652                 switch (arg & RERRMASK) {
4653                 case RERRNORM:
4654                         stp->sd_flag &= ~STRDERRNONPERSIST;
4655                         break;
4656                 case RERRNONPERSIST:
4657                         stp->sd_flag |= STRDERRNONPERSIST;
4658                         break;
4659                 }
4660                 switch (arg & WERRMASK) {
4661                 case WERRNORM:
4662                         stp->sd_flag &= ~STWRERRNONPERSIST;
4663                         break;
4664                 case WERRNONPERSIST:
4665                         stp->sd_flag |= STWRERRNONPERSIST;
4666                         break;
4667                 }
4668                 mutex_exit(&stp->sd_lock);
4669                 return (0);
4670 
4671         case I_GERROPT:
4672                 /*
4673                  * Get error option and return the value
4674                  * to spot pointed to by arg
4675                  */
4676         {
4677                 int erropt = 0;
4678 
4679                 erropt |= (stp->sd_flag & STRDERRNONPERSIST) ? RERRNONPERSIST :
4680                     RERRNORM;
4681                 erropt |= (stp->sd_flag & STWRERRNONPERSIST) ? WERRNONPERSIST :
4682                     WERRNORM;
4683                 return (strcopyout(&erropt, (void *)arg, sizeof (int),
4684                     copyflag));
4685         }
4686 
4687         case I_SETSIG:
4688                 /*
4689                  * Register the calling proc to receive the SIGPOLL
4690                  * signal based on the events given in arg.  If
4691                  * arg is zero, remove the proc from register list.
4692                  */
4693         {
4694                 strsig_t *ssp, *pssp;
4695                 struct pid *pidp;
4696 
4697                 pssp = NULL;
4698                 pidp = curproc->p_pidp;
4699                 /*
4700                  * Hold sd_lock to prevent traversal of sd_siglist while
4701                  * it is modified.
4702                  */
4703                 mutex_enter(&stp->sd_lock);
4704                 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pidp != pidp);
4705                     pssp = ssp, ssp = ssp->ss_next)
4706                         ;
4707 
4708                 if (arg) {
4709                         if (arg & ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
4710                             S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
4711                                 mutex_exit(&stp->sd_lock);
4712                                 return (EINVAL);
4713                         }
4714                         if ((arg & S_BANDURG) && !(arg & S_RDBAND)) {
4715                                 mutex_exit(&stp->sd_lock);
4716                                 return (EINVAL);
4717                         }
4718 
4719                         /*
4720                          * If proc not already registered, add it
4721                          * to list.
4722                          */
4723                         if (!ssp) {
4724                                 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
4725                                 ssp->ss_pidp = pidp;
4726                                 ssp->ss_pid = pidp->pid_id;
4727                                 ssp->ss_next = NULL;
4728                                 if (pssp)
4729                                         pssp->ss_next = ssp;
4730                                 else
4731                                         stp->sd_siglist = ssp;
4732                                 mutex_enter(&pidlock);
4733                                 PID_HOLD(pidp);
4734                                 mutex_exit(&pidlock);
4735                         }
4736 
4737                         /*
4738                          * Set events.
4739                          */
4740                         ssp->ss_events = (int)arg;
4741                 } else {
4742                         /*
4743                          * Remove proc from register list.
4744                          */
4745                         if (ssp) {
4746                                 mutex_enter(&pidlock);
4747                                 PID_RELE(pidp);
4748                                 mutex_exit(&pidlock);
4749                                 if (pssp)
4750                                         pssp->ss_next = ssp->ss_next;
4751                                 else
4752                                         stp->sd_siglist = ssp->ss_next;
4753                                 kmem_free(ssp, sizeof (strsig_t));
4754                         } else {
4755                                 mutex_exit(&stp->sd_lock);
4756                                 return (EINVAL);
4757                         }
4758                 }
4759 
4760                 /*
4761                  * Recalculate OR of sig events.
4762                  */
4763                 stp->sd_sigflags = 0;
4764                 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4765                         stp->sd_sigflags |= ssp->ss_events;
4766                 mutex_exit(&stp->sd_lock);
4767                 return (0);
4768         }
4769 
4770         case I_GETSIG:
4771                 /*
4772                  * Return (in arg) the current registration of events
4773                  * for which the calling proc is to be signaled.
4774                  */
4775         {
4776                 struct strsig *ssp;
4777                 struct pid  *pidp;
4778 
4779                 pidp = curproc->p_pidp;
4780                 mutex_enter(&stp->sd_lock);
4781                 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4782                         if (ssp->ss_pidp == pidp) {
4783                                 error = strcopyout(&ssp->ss_events, (void *)arg,
4784                                     sizeof (int), copyflag);
4785                                 mutex_exit(&stp->sd_lock);
4786                                 return (error);
4787                         }
4788                 mutex_exit(&stp->sd_lock);
4789                 return (EINVAL);
4790         }
4791 
4792         case I_ESETSIG:
4793                 /*
4794                  * Register the ss_pid to receive the SIGPOLL
4795                  * signal based on the events is ss_events arg.  If
4796                  * ss_events is zero, remove the proc from register list.
4797                  */
4798         {
4799                 struct strsig *ssp, *pssp;
4800                 struct proc *proc;
4801                 struct pid  *pidp;
4802                 pid_t pid;
4803                 struct strsigset ss;
4804 
4805                 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
4806                 if (error)
4807                         return (error);
4808 
4809                 pid = ss.ss_pid;
4810 
4811                 if (ss.ss_events != 0) {
4812                         /*
4813                          * Permissions check by sending signal 0.
4814                          * Note that when kill fails it does a set_errno
4815                          * causing the system call to fail.
4816                          */
4817                         error = kill(pid, 0);
4818                         if (error) {
4819                                 return (error);
4820                         }
4821                 }
4822                 mutex_enter(&pidlock);
4823                 if (pid == 0)
4824                         proc = curproc;
4825                 else if (pid < 0)
4826                         proc = pgfind(-pid);
4827                 else
4828                         proc = prfind(pid);
4829                 if (proc == NULL) {
4830                         mutex_exit(&pidlock);
4831                         return (ESRCH);
4832                 }
4833                 if (pid < 0)
4834                         pidp = proc->p_pgidp;
4835                 else
4836                         pidp = proc->p_pidp;
4837                 ASSERT(pidp);
4838                 /*
4839                  * Get a hold on the pid structure while referencing it.
4840                  * There is a separate PID_HOLD should it be inserted
4841                  * in the list below.
4842                  */
4843                 PID_HOLD(pidp);
4844                 mutex_exit(&pidlock);
4845 
4846                 pssp = NULL;
4847                 /*
4848                  * Hold sd_lock to prevent traversal of sd_siglist while
4849                  * it is modified.
4850                  */
4851                 mutex_enter(&stp->sd_lock);
4852                 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pid != pid);
4853                     pssp = ssp, ssp = ssp->ss_next)
4854                         ;
4855 
4856                 if (ss.ss_events) {
4857                         if (ss.ss_events &
4858                             ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
4859                             S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
4860                                 mutex_exit(&stp->sd_lock);
4861                                 mutex_enter(&pidlock);
4862                                 PID_RELE(pidp);
4863                                 mutex_exit(&pidlock);
4864                                 return (EINVAL);
4865                         }
4866                         if ((ss.ss_events & S_BANDURG) &&
4867                             !(ss.ss_events & S_RDBAND)) {
4868                                 mutex_exit(&stp->sd_lock);
4869                                 mutex_enter(&pidlock);
4870                                 PID_RELE(pidp);
4871                                 mutex_exit(&pidlock);
4872                                 return (EINVAL);
4873                         }
4874 
4875                         /*
4876                          * If proc not already registered, add it
4877                          * to list.
4878                          */
4879                         if (!ssp) {
4880                                 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
4881                                 ssp->ss_pidp = pidp;
4882                                 ssp->ss_pid = pid;
4883                                 ssp->ss_next = NULL;
4884                                 if (pssp)
4885                                         pssp->ss_next = ssp;
4886                                 else
4887                                         stp->sd_siglist = ssp;
4888                                 mutex_enter(&pidlock);
4889                                 PID_HOLD(pidp);
4890                                 mutex_exit(&pidlock);
4891                         }
4892 
4893                         /*
4894                          * Set events.
4895                          */
4896                         ssp->ss_events = ss.ss_events;
4897                 } else {
4898                         /*
4899                          * Remove proc from register list.
4900                          */
4901                         if (ssp) {
4902                                 mutex_enter(&pidlock);
4903                                 PID_RELE(pidp);
4904                                 mutex_exit(&pidlock);
4905                                 if (pssp)
4906                                         pssp->ss_next = ssp->ss_next;
4907                                 else
4908                                         stp->sd_siglist = ssp->ss_next;
4909                                 kmem_free(ssp, sizeof (strsig_t));
4910                         } else {
4911                                 mutex_exit(&stp->sd_lock);
4912                                 mutex_enter(&pidlock);
4913                                 PID_RELE(pidp);
4914                                 mutex_exit(&pidlock);
4915                                 return (EINVAL);
4916                         }
4917                 }
4918 
4919                 /*
4920                  * Recalculate OR of sig events.
4921                  */
4922                 stp->sd_sigflags = 0;
4923                 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4924                         stp->sd_sigflags |= ssp->ss_events;
4925                 mutex_exit(&stp->sd_lock);
4926                 mutex_enter(&pidlock);
4927                 PID_RELE(pidp);
4928                 mutex_exit(&pidlock);
4929                 return (0);
4930         }
4931 
4932         case I_EGETSIG:
4933                 /*
4934                  * Return (in arg) the current registration of events
4935                  * for which the calling proc is to be signaled.
4936                  */
4937         {
4938                 struct strsig *ssp;
4939                 struct proc *proc;
4940                 pid_t pid;
4941                 struct pid  *pidp;
4942                 struct strsigset ss;
4943 
4944                 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
4945                 if (error)
4946                         return (error);
4947 
4948                 pid = ss.ss_pid;
4949                 mutex_enter(&pidlock);
4950                 if (pid == 0)
4951                         proc = curproc;
4952                 else if (pid < 0)
4953                         proc = pgfind(-pid);
4954                 else
4955                         proc = prfind(pid);
4956                 if (proc == NULL) {
4957                         mutex_exit(&pidlock);
4958                         return (ESRCH);
4959                 }
4960                 if (pid < 0)
4961                         pidp = proc->p_pgidp;
4962                 else
4963                         pidp = proc->p_pidp;
4964 
4965                 /* Prevent the pidp from being reassigned */
4966                 PID_HOLD(pidp);
4967                 mutex_exit(&pidlock);
4968 
4969                 mutex_enter(&stp->sd_lock);
4970                 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4971                         if (ssp->ss_pid == pid) {
4972                                 ss.ss_pid = ssp->ss_pid;
4973                                 ss.ss_events = ssp->ss_events;
4974                                 error = strcopyout(&ss, (void *)arg,
4975                                     sizeof (struct strsigset), copyflag);
4976                                 mutex_exit(&stp->sd_lock);
4977                                 mutex_enter(&pidlock);
4978                                 PID_RELE(pidp);
4979                                 mutex_exit(&pidlock);
4980                                 return (error);
4981                         }
4982                 mutex_exit(&stp->sd_lock);
4983                 mutex_enter(&pidlock);
4984                 PID_RELE(pidp);
4985                 mutex_exit(&pidlock);
4986                 return (EINVAL);
4987         }
4988 
4989         case I_PEEK:
4990         {
4991                 STRUCT_DECL(strpeek, strpeek);
4992                 size_t n;
4993                 mblk_t *fmp, *tmp_mp = NULL;
4994 
4995                 STRUCT_INIT(strpeek, flag);
4996 
4997                 error = strcopyin((void *)arg, STRUCT_BUF(strpeek),
4998                     STRUCT_SIZE(strpeek), copyflag);
4999                 if (error)
5000                         return (error);
5001 
5002                 mutex_enter(QLOCK(rdq));
5003                 /*
5004                  * Skip the invalid messages
5005                  */
5006                 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
5007                         if (mp->b_datap->db_type != M_SIG)
5008                                 break;
5009 
5010                 /*
5011                  * If user has requested to peek at a high priority message
5012                  * and first message is not, return 0
5013                  */
5014                 if (mp != NULL) {
5015                         if ((STRUCT_FGET(strpeek, flags) & RS_HIPRI) &&
5016                             queclass(mp) == QNORM) {
5017                                 *rvalp = 0;
5018                                 mutex_exit(QLOCK(rdq));
5019                                 return (0);
5020                         }
5021                 } else if (stp->sd_struiordq == NULL ||
5022                     (STRUCT_FGET(strpeek, flags) & RS_HIPRI)) {
5023                         /*
5024                          * No mblks to look at at the streamhead and
5025                          * 1). This isn't a synch stream or
5026                          * 2). This is a synch stream but caller wants high
5027                          *      priority messages which is not supported by
5028                          *      the synch stream. (it only supports QNORM)
5029                          */
5030                         *rvalp = 0;
5031                         mutex_exit(QLOCK(rdq));
5032                         return (0);
5033                 }
5034 
5035                 fmp = mp;
5036 
5037                 if (mp && mp->b_datap->db_type == M_PASSFP) {
5038                         mutex_exit(QLOCK(rdq));
5039                         return (EBADMSG);
5040                 }
5041 
5042                 ASSERT(mp == NULL || mp->b_datap->db_type == M_PCPROTO ||
5043                     mp->b_datap->db_type == M_PROTO ||
5044                     mp->b_datap->db_type == M_DATA);
5045 
5046                 if (mp && mp->b_datap->db_type == M_PCPROTO) {
5047                         STRUCT_FSET(strpeek, flags, RS_HIPRI);
5048                 } else {
5049                         STRUCT_FSET(strpeek, flags, 0);
5050                 }
5051 
5052 
5053                 if (mp && ((tmp_mp = dupmsg(mp)) == NULL)) {
5054                         mutex_exit(QLOCK(rdq));
5055                         return (ENOSR);
5056                 }
5057                 mutex_exit(QLOCK(rdq));
5058 
5059                 /*
5060                  * set mp = tmp_mp, so that I_PEEK processing can continue.
5061                  * tmp_mp is used to free the dup'd message.
5062                  */
5063                 mp = tmp_mp;
5064 
5065                 uio.uio_fmode = 0;
5066                 uio.uio_extflg = UIO_COPY_CACHED;
5067                 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
5068                     UIO_SYSSPACE;
5069                 uio.uio_limit = 0;
5070                 /*
5071                  * First process PROTO blocks, if any.
5072                  * If user doesn't want to get ctl info by setting maxlen <= 0,
5073                  * then set len to -1/0 and skip control blocks part.
5074                  */
5075                 if (STRUCT_FGET(strpeek, ctlbuf.maxlen) < 0)
5076                         STRUCT_FSET(strpeek, ctlbuf.len, -1);
5077                 else if (STRUCT_FGET(strpeek, ctlbuf.maxlen) == 0)
5078                         STRUCT_FSET(strpeek, ctlbuf.len, 0);
5079                 else {
5080                         int     ctl_part = 0;
5081 
5082                         iov.iov_base = STRUCT_FGETP(strpeek, ctlbuf.buf);
5083                         iov.iov_len = STRUCT_FGET(strpeek, ctlbuf.maxlen);
5084                         uio.uio_iov = &iov;
5085                         uio.uio_resid = iov.iov_len;
5086                         uio.uio_loffset = 0;
5087                         uio.uio_iovcnt = 1;
5088                         while (mp && mp->b_datap->db_type != M_DATA &&
5089                             uio.uio_resid >= 0) {
5090                                 ASSERT(STRUCT_FGET(strpeek, flags) == 0 ?
5091                                     mp->b_datap->db_type == M_PROTO :
5092                                     mp->b_datap->db_type == M_PCPROTO);
5093 
5094                                 if ((n = MIN(uio.uio_resid,
5095                                     mp->b_wptr - mp->b_rptr)) != 0 &&
5096                                     (error = uiomove((char *)mp->b_rptr, n,
5097                                     UIO_READ, &uio)) != 0) {
5098                                         freemsg(tmp_mp);
5099                                         return (error);
5100                                 }
5101                                 ctl_part = 1;
5102                                 mp = mp->b_cont;
5103                         }
5104                         /* No ctl message */
5105                         if (ctl_part == 0)
5106                                 STRUCT_FSET(strpeek, ctlbuf.len, -1);
5107                         else
5108                                 STRUCT_FSET(strpeek, ctlbuf.len,
5109                                     STRUCT_FGET(strpeek, ctlbuf.maxlen) -
5110                                     uio.uio_resid);
5111                 }
5112 
5113                 /*
5114                  * Now process DATA blocks, if any.
5115                  * If user doesn't want to get data info by setting maxlen <= 0,
5116                  * then set len to -1/0 and skip data blocks part.
5117                  */
5118                 if (STRUCT_FGET(strpeek, databuf.maxlen) < 0)
5119                         STRUCT_FSET(strpeek, databuf.len, -1);
5120                 else if (STRUCT_FGET(strpeek, databuf.maxlen) == 0)
5121                         STRUCT_FSET(strpeek, databuf.len, 0);
5122                 else {
5123                         int     data_part = 0;
5124 
5125                         iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf);
5126                         iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen);
5127                         uio.uio_iov = &iov;
5128                         uio.uio_resid = iov.iov_len;
5129                         uio.uio_loffset = 0;
5130                         uio.uio_iovcnt = 1;
5131                         while (mp && uio.uio_resid) {
5132                                 if (mp->b_datap->db_type == M_DATA) {
5133                                         if ((n = MIN(uio.uio_resid,
5134                                             mp->b_wptr - mp->b_rptr)) != 0 &&
5135                                             (error = uiomove((char *)mp->b_rptr,
5136                                             n, UIO_READ, &uio)) != 0) {
5137                                                 freemsg(tmp_mp);
5138                                                 return (error);
5139                                         }
5140                                         data_part = 1;
5141                                 }
5142                                 ASSERT(data_part == 0 ||
5143                                     mp->b_datap->db_type == M_DATA);
5144                                 mp = mp->b_cont;
5145                         }
5146                         /* No data message */
5147                         if (data_part == 0)
5148                                 STRUCT_FSET(strpeek, databuf.len, -1);
5149                         else
5150                                 STRUCT_FSET(strpeek, databuf.len,
5151                                     STRUCT_FGET(strpeek, databuf.maxlen) -
5152                                     uio.uio_resid);
5153                 }
5154                 freemsg(tmp_mp);
5155 
5156                 /*
5157                  * It is a synch stream and user wants to get
5158                  * data (maxlen > 0).
5159                  * uio setup is done by the codes that process DATA
5160                  * blocks above.
5161                  */
5162                 if ((fmp == NULL) && STRUCT_FGET(strpeek, databuf.maxlen) > 0) {
5163                         infod_t infod;
5164 
5165                         infod.d_cmd = INFOD_COPYOUT;
5166                         infod.d_res = 0;
5167                         infod.d_uiop = &uio;
5168                         error = infonext(rdq, &infod);
5169                         if (error == EINVAL || error == EBUSY)
5170                                 error = 0;
5171                         if (error)
5172                                 return (error);
5173                         STRUCT_FSET(strpeek, databuf.len, STRUCT_FGET(strpeek,
5174                             databuf.maxlen) - uio.uio_resid);
5175                         if (STRUCT_FGET(strpeek, databuf.len) == 0) {
5176                                 /*
5177                                  * No data found by the infonext().
5178                                  */
5179                                 STRUCT_FSET(strpeek, databuf.len, -1);
5180                         }
5181                 }
5182                 error = strcopyout(STRUCT_BUF(strpeek), (void *)arg,
5183                     STRUCT_SIZE(strpeek), copyflag);
5184                 if (error) {
5185                         return (error);
5186                 }
5187                 /*
5188                  * If there is no message retrieved, set return code to 0
5189                  * otherwise, set it to 1.
5190                  */
5191                 if (STRUCT_FGET(strpeek, ctlbuf.len) == -1 &&
5192                     STRUCT_FGET(strpeek, databuf.len) == -1)
5193                         *rvalp = 0;
5194                 else
5195                         *rvalp = 1;
5196                 return (0);
5197         }
5198 
5199         case I_FDINSERT:
5200         {
5201                 STRUCT_DECL(strfdinsert, strfdinsert);
5202                 struct file *resftp;
5203                 struct stdata *resstp;
5204                 t_uscalar_t     ival;
5205                 ssize_t msgsize;
5206                 struct strbuf mctl;
5207 
5208                 STRUCT_INIT(strfdinsert, flag);
5209                 if (stp->sd_flag & STRHUP)
5210                         return (ENXIO);
5211                 /*
5212                  * STRDERR, STWRERR and STPLEX tested above.
5213                  */
5214                 error = strcopyin((void *)arg, STRUCT_BUF(strfdinsert),
5215                     STRUCT_SIZE(strfdinsert), copyflag);
5216                 if (error)
5217                         return (error);
5218 
5219                 if (STRUCT_FGET(strfdinsert, offset) < 0 ||
5220                     (STRUCT_FGET(strfdinsert, offset) %
5221                     sizeof (t_uscalar_t)) != 0)
5222                         return (EINVAL);
5223                 if ((resftp = getf(STRUCT_FGET(strfdinsert, fildes))) != NULL) {
5224                         if ((resstp = resftp->f_vnode->v_stream) == NULL) {
5225                                 releasef(STRUCT_FGET(strfdinsert, fildes));
5226                                 return (EINVAL);
5227                         }
5228                 } else
5229                         return (EINVAL);
5230 
5231                 mutex_enter(&resstp->sd_lock);
5232                 if (resstp->sd_flag & (STRDERR|STWRERR|STRHUP|STPLEX)) {
5233                         error = strgeterr(resstp,
5234                             STRDERR|STWRERR|STRHUP|STPLEX, 0);
5235                         if (error != 0) {
5236                                 mutex_exit(&resstp->sd_lock);
5237                                 releasef(STRUCT_FGET(strfdinsert, fildes));
5238                                 return (error);
5239                         }
5240                 }
5241                 mutex_exit(&resstp->sd_lock);
5242 
5243 #ifdef  _ILP32
5244                 {
5245                         queue_t *q;
5246                         queue_t *mate = NULL;
5247 
5248                         /* get read queue of stream terminus */
5249                         claimstr(resstp->sd_wrq);
5250                         for (q = resstp->sd_wrq->q_next; q->q_next != NULL;
5251                             q = q->q_next)
5252                                 if (!STRMATED(resstp) && STREAM(q) != resstp &&
5253                                     mate == NULL) {
5254                                         ASSERT(q->q_qinfo->qi_srvp);
5255                                         ASSERT(_OTHERQ(q)->q_qinfo->qi_srvp);
5256                                         claimstr(q);
5257                                         mate = q;
5258                                 }
5259                         q = _RD(q);
5260                         if (mate)
5261                                 releasestr(mate);
5262                         releasestr(resstp->sd_wrq);
5263                         ival = (t_uscalar_t)q;
5264                 }
5265 #else
5266                 ival = (t_uscalar_t)getminor(resftp->f_vnode->v_rdev);
5267 #endif  /* _ILP32 */
5268 
5269                 if (STRUCT_FGET(strfdinsert, ctlbuf.len) <
5270                     STRUCT_FGET(strfdinsert, offset) + sizeof (t_uscalar_t)) {
5271                         releasef(STRUCT_FGET(strfdinsert, fildes));
5272                         return (EINVAL);
5273                 }
5274 
5275                 /*
5276                  * Check for legal flag value.
5277                  */
5278                 if (STRUCT_FGET(strfdinsert, flags) & ~RS_HIPRI) {
5279                         releasef(STRUCT_FGET(strfdinsert, fildes));
5280                         return (EINVAL);
5281                 }
5282 
5283                 /* get these values from those cached in the stream head */
5284                 mutex_enter(QLOCK(stp->sd_wrq));
5285                 rmin = stp->sd_qn_minpsz;
5286                 rmax = stp->sd_qn_maxpsz;
5287                 mutex_exit(QLOCK(stp->sd_wrq));
5288 
5289                 /*
5290                  * Make sure ctl and data sizes together fall within
5291                  * the limits of the max and min receive packet sizes
5292                  * and do not exceed system limit.  A negative data
5293                  * length means that no data part is to be sent.
5294                  */
5295                 ASSERT((rmax >= 0) || (rmax == INFPSZ));
5296                 if (rmax == 0) {
5297                         releasef(STRUCT_FGET(strfdinsert, fildes));
5298                         return (ERANGE);
5299                 }
5300                 if ((msgsize = STRUCT_FGET(strfdinsert, databuf.len)) < 0)
5301                         msgsize = 0;
5302                 if ((msgsize < rmin) ||
5303                     ((msgsize > rmax) && (rmax != INFPSZ)) ||
5304                     (STRUCT_FGET(strfdinsert, ctlbuf.len) > strctlsz)) {
5305                         releasef(STRUCT_FGET(strfdinsert, fildes));
5306                         return (ERANGE);
5307                 }
5308 
5309                 mutex_enter(&stp->sd_lock);
5310                 while (!(STRUCT_FGET(strfdinsert, flags) & RS_HIPRI) &&
5311                     !canputnext(stp->sd_wrq)) {
5312                         if ((error = strwaitq(stp, WRITEWAIT, (ssize_t)0,
5313                             flag, -1, &done)) != 0 || done) {
5314                                 mutex_exit(&stp->sd_lock);
5315                                 releasef(STRUCT_FGET(strfdinsert, fildes));
5316                                 return (error);
5317                         }
5318                         if ((error = i_straccess(stp, access)) != 0) {
5319                                 mutex_exit(&stp->sd_lock);
5320                                 releasef(
5321                                     STRUCT_FGET(strfdinsert, fildes));
5322                                 return (error);
5323                         }
5324                 }
5325                 mutex_exit(&stp->sd_lock);
5326 
5327                 /*
5328                  * Copy strfdinsert.ctlbuf into native form of
5329                  * ctlbuf to pass down into strmakemsg().
5330                  */
5331                 mctl.maxlen = STRUCT_FGET(strfdinsert, ctlbuf.maxlen);
5332                 mctl.len = STRUCT_FGET(strfdinsert, ctlbuf.len);
5333                 mctl.buf = STRUCT_FGETP(strfdinsert, ctlbuf.buf);
5334 
5335                 iov.iov_base = STRUCT_FGETP(strfdinsert, databuf.buf);
5336                 iov.iov_len = STRUCT_FGET(strfdinsert, databuf.len);
5337                 uio.uio_iov = &iov;
5338                 uio.uio_iovcnt = 1;
5339                 uio.uio_loffset = 0;
5340                 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
5341                     UIO_SYSSPACE;
5342                 uio.uio_fmode = 0;
5343                 uio.uio_extflg = UIO_COPY_CACHED;
5344                 uio.uio_resid = iov.iov_len;
5345                 if ((error = strmakemsg(&mctl,
5346                     &msgsize, &uio, stp,
5347                     STRUCT_FGET(strfdinsert, flags), &mp)) != 0 || !mp) {
5348                         STRUCT_FSET(strfdinsert, databuf.len, msgsize);
5349                         releasef(STRUCT_FGET(strfdinsert, fildes));
5350                         return (error);
5351                 }
5352 
5353                 STRUCT_FSET(strfdinsert, databuf.len, msgsize);
5354 
5355                 /*
5356                  * Place the possibly reencoded queue pointer 'offset' bytes
5357                  * from the start of the control portion of the message.
5358                  */
5359                 *((t_uscalar_t *)(mp->b_rptr +
5360                     STRUCT_FGET(strfdinsert, offset))) = ival;
5361 
5362                 /*
5363                  * Put message downstream.
5364                  */
5365                 stream_willservice(stp);
5366                 putnext(stp->sd_wrq, mp);
5367                 stream_runservice(stp);
5368                 releasef(STRUCT_FGET(strfdinsert, fildes));
5369                 return (error);
5370         }
5371 
5372         case I_SENDFD:
5373         {
5374                 struct file *fp;
5375 
5376                 if ((fp = getf((int)arg)) == NULL)
5377                         return (EBADF);
5378                 error = do_sendfp(stp, fp, crp);
5379                 if (auditing) {
5380                         audit_fdsend((int)arg, fp, error);
5381                 }
5382                 releasef((int)arg);
5383                 return (error);
5384         }
5385 
5386         case I_RECVFD:
5387         case I_E_RECVFD:
5388         {
5389                 struct k_strrecvfd *srf;
5390                 int i, fd;
5391 
5392                 mutex_enter(&stp->sd_lock);
5393                 while (!(mp = getq(rdq))) {
5394                         if (stp->sd_flag & (STRHUP|STREOF)) {
5395                                 mutex_exit(&stp->sd_lock);
5396                                 return (ENXIO);
5397                         }
5398                         if ((error = strwaitq(stp, GETWAIT, (ssize_t)0,
5399                             flag, -1, &done)) != 0 || done) {
5400                                 mutex_exit(&stp->sd_lock);
5401                                 return (error);
5402                         }
5403                         if ((error = i_straccess(stp, access)) != 0) {
5404                                 mutex_exit(&stp->sd_lock);
5405                                 return (error);
5406                         }
5407                 }
5408                 if (mp->b_datap->db_type != M_PASSFP) {
5409                         putback(stp, rdq, mp, mp->b_band);
5410                         mutex_exit(&stp->sd_lock);
5411                         return (EBADMSG);
5412                 }
5413                 mutex_exit(&stp->sd_lock);
5414 
5415                 srf = (struct k_strrecvfd *)mp->b_rptr;
5416                 if ((fd = ufalloc(0)) == -1) {
5417                         mutex_enter(&stp->sd_lock);
5418                         putback(stp, rdq, mp, mp->b_band);
5419                         mutex_exit(&stp->sd_lock);
5420                         return (EMFILE);
5421                 }
5422                 if (cmd == I_RECVFD) {
5423                         struct o_strrecvfd      ostrfd;
5424 
5425                         /* check to see if uid/gid values are too large. */
5426 
5427                         if (srf->uid > (o_uid_t)USHRT_MAX ||
5428                             srf->gid > (o_gid_t)USHRT_MAX) {
5429                                 mutex_enter(&stp->sd_lock);
5430                                 putback(stp, rdq, mp, mp->b_band);
5431                                 mutex_exit(&stp->sd_lock);
5432                                 setf(fd, NULL); /* release fd entry */
5433                                 return (EOVERFLOW);
5434                         }
5435 
5436                         ostrfd.fd = fd;
5437                         ostrfd.uid = (o_uid_t)srf->uid;
5438                         ostrfd.gid = (o_gid_t)srf->gid;
5439 
5440                         /* Null the filler bits */
5441                         for (i = 0; i < 8; i++)
5442                                 ostrfd.fill[i] = 0;
5443 
5444                         error = strcopyout(&ostrfd, (void *)arg,
5445                             sizeof (struct o_strrecvfd), copyflag);
5446                 } else {                /* I_E_RECVFD */
5447                         struct strrecvfd        strfd;
5448 
5449                         strfd.fd = fd;
5450                         strfd.uid = srf->uid;
5451                         strfd.gid = srf->gid;
5452 
5453                         /* null the filler bits */
5454                         for (i = 0; i < 8; i++)
5455                                 strfd.fill[i] = 0;
5456 
5457                         error = strcopyout(&strfd, (void *)arg,
5458                             sizeof (struct strrecvfd), copyflag);
5459                 }
5460 
5461                 if (error) {
5462                         setf(fd, NULL); /* release fd entry */
5463                         mutex_enter(&stp->sd_lock);
5464                         putback(stp, rdq, mp, mp->b_band);
5465                         mutex_exit(&stp->sd_lock);
5466                         return (error);
5467                 }
5468                 if (auditing) {
5469                         audit_fdrecv(fd, srf->fp);
5470                 }
5471 
5472                 /*
5473                  * Always increment f_count since the freemsg() below will
5474                  * always call free_passfp() which performs a closef().
5475                  */
5476                 mutex_enter(&srf->fp->f_tlock);
5477                 srf->fp->f_count++;
5478                 mutex_exit(&srf->fp->f_tlock);
5479                 setf(fd, srf->fp);
5480                 freemsg(mp);
5481                 return (0);
5482         }
5483 
5484         case I_SWROPT:
5485                 /*
5486                  * Set/clear the write options. arg is a bit
5487                  * mask with any of the following bits set...
5488                  *      SNDZERO - send zero length message
5489                  *      SNDPIPE - send sigpipe to process if
5490                  *              sd_werror is set and process is
5491                  *              doing a write or putmsg.
5492                  * The new stream head write options should reflect
5493                  * what is in arg.
5494                  */
5495                 if (arg & ~(SNDZERO|SNDPIPE))
5496                         return (EINVAL);
5497 
5498                 mutex_enter(&stp->sd_lock);
5499                 stp->sd_wput_opt &= ~(SW_SIGPIPE|SW_SNDZERO);
5500                 if (arg & SNDZERO)
5501                         stp->sd_wput_opt |= SW_SNDZERO;
5502                 if (arg & SNDPIPE)
5503                         stp->sd_wput_opt |= SW_SIGPIPE;
5504                 mutex_exit(&stp->sd_lock);
5505                 return (0);
5506 
5507         case I_GWROPT:
5508         {
5509                 int wropt = 0;
5510 
5511                 if (stp->sd_wput_opt & SW_SNDZERO)
5512                         wropt |= SNDZERO;
5513                 if (stp->sd_wput_opt & SW_SIGPIPE)
5514                         wropt |= SNDPIPE;
5515                 return (strcopyout(&wropt, (void *)arg, sizeof (wropt),
5516                     copyflag));
5517         }
5518 
5519         case I_LIST:
5520                 /*
5521                  * Returns all the modules found on this stream,
5522                  * upto the driver. If argument is NULL, return the
5523                  * number of modules (including driver). If argument
5524                  * is not NULL, copy the names into the structure
5525                  * provided.
5526                  */
5527 
5528         {
5529                 queue_t *q;
5530                 char *qname;
5531                 int i, nmods;
5532                 struct str_mlist *mlist;
5533                 STRUCT_DECL(str_list, strlist);
5534 
5535                 if (arg == 0) { /* Return number of modules plus driver */
5536                         if (stp->sd_vnode->v_type == VFIFO)
5537                                 *rvalp = stp->sd_pushcnt;
5538                         else
5539                                 *rvalp = stp->sd_pushcnt + 1;
5540                         return (0);
5541                 }
5542 
5543                 STRUCT_INIT(strlist, flag);
5544 
5545                 error = strcopyin((void *)arg, STRUCT_BUF(strlist),
5546                     STRUCT_SIZE(strlist), copyflag);
5547                 if (error != 0)
5548                         return (error);
5549 
5550                 mlist = STRUCT_FGETP(strlist, sl_modlist);
5551                 nmods = STRUCT_FGET(strlist, sl_nmods);
5552                 if (nmods <= 0)
5553                         return (EINVAL);
5554 
5555                 claimstr(stp->sd_wrq);
5556                 q = stp->sd_wrq;
5557                 for (i = 0; i < nmods && _SAMESTR(q); i++, q = q->q_next) {
5558                         qname = Q2NAME(q->q_next);
5559                         error = strcopyout(qname, &mlist[i], strlen(qname) + 1,
5560                             copyflag);
5561                         if (error != 0) {
5562                                 releasestr(stp->sd_wrq);
5563                                 return (error);
5564                         }
5565                 }
5566                 releasestr(stp->sd_wrq);
5567                 return (strcopyout(&i, (void *)arg, sizeof (int), copyflag));
5568         }
5569 
5570         case I_CKBAND:
5571         {
5572                 queue_t *q;
5573                 qband_t *qbp;
5574 
5575                 if ((arg < 0) || (arg >= NBAND))
5576                         return (EINVAL);
5577                 q = _RD(stp->sd_wrq);
5578                 mutex_enter(QLOCK(q));
5579                 if (arg > (int)q->q_nband) {
5580                         *rvalp = 0;
5581                 } else {
5582                         if (arg == 0) {
5583                                 if (q->q_first)
5584                                         *rvalp = 1;
5585                                 else
5586                                         *rvalp = 0;
5587                         } else {
5588                                 qbp = q->q_bandp;
5589                                 while (--arg > 0)
5590                                         qbp = qbp->qb_next;
5591                                 if (qbp->qb_first)
5592                                         *rvalp = 1;
5593                                 else
5594                                         *rvalp = 0;
5595                         }
5596                 }
5597                 mutex_exit(QLOCK(q));
5598                 return (0);
5599         }
5600 
5601         case I_GETBAND:
5602         {
5603                 int intpri;
5604                 queue_t *q;
5605 
5606                 q = _RD(stp->sd_wrq);
5607                 mutex_enter(QLOCK(q));
5608                 mp = q->q_first;
5609                 if (!mp) {
5610                         mutex_exit(QLOCK(q));
5611                         return (ENODATA);
5612                 }
5613                 intpri = (int)mp->b_band;
5614                 error = strcopyout(&intpri, (void *)arg, sizeof (int),
5615                     copyflag);
5616                 mutex_exit(QLOCK(q));
5617                 return (error);
5618         }
5619 
5620         case I_ATMARK:
5621         {
5622                 queue_t *q;
5623 
5624                 if (arg & ~(ANYMARK|LASTMARK))
5625                         return (EINVAL);
5626                 q = _RD(stp->sd_wrq);
5627                 mutex_enter(&stp->sd_lock);
5628                 if ((stp->sd_flag & STRATMARK) && (arg == ANYMARK)) {
5629                         *rvalp = 1;
5630                 } else {
5631                         mutex_enter(QLOCK(q));
5632                         mp = q->q_first;
5633 
5634                         if (mp == NULL)
5635                                 *rvalp = 0;
5636                         else if ((arg == ANYMARK) && (mp->b_flag & MSGMARK))
5637                                 *rvalp = 1;
5638                         else if ((arg == LASTMARK) && (mp == stp->sd_mark))
5639                                 *rvalp = 1;
5640                         else
5641                                 *rvalp = 0;
5642                         mutex_exit(QLOCK(q));
5643                 }
5644                 mutex_exit(&stp->sd_lock);
5645                 return (0);
5646         }
5647 
5648         case I_CANPUT:
5649         {
5650                 char band;
5651 
5652                 if ((arg < 0) || (arg >= NBAND))
5653                         return (EINVAL);
5654                 band = (char)arg;
5655                 *rvalp = bcanputnext(stp->sd_wrq, band);
5656                 return (0);
5657         }
5658 
5659         case I_SETCLTIME:
5660         {
5661                 int closetime;
5662 
5663                 error = strcopyin((void *)arg, &closetime, sizeof (int),
5664                     copyflag);
5665                 if (error)
5666                         return (error);
5667                 if (closetime < 0)
5668                         return (EINVAL);
5669 
5670                 stp->sd_closetime = closetime;
5671                 return (0);
5672         }
5673 
5674         case I_GETCLTIME:
5675         {
5676                 int closetime;
5677 
5678                 closetime = stp->sd_closetime;
5679                 return (strcopyout(&closetime, (void *)arg, sizeof (int),
5680                     copyflag));
5681         }
5682 
5683         case TIOCGSID:
5684         {
5685                 pid_t sid;
5686 
5687                 mutex_enter(&stp->sd_lock);
5688                 if (stp->sd_sidp == NULL) {
5689                         mutex_exit(&stp->sd_lock);
5690                         return (ENOTTY);
5691                 }
5692                 sid = stp->sd_sidp->pid_id;
5693                 mutex_exit(&stp->sd_lock);
5694                 return (strcopyout(&sid, (void *)arg, sizeof (pid_t),
5695                     copyflag));
5696         }
5697 
5698         case TIOCSPGRP:
5699         {
5700                 pid_t pgrp;
5701                 proc_t *q;
5702                 pid_t   sid, fg_pgid, bg_pgid;
5703 
5704                 if (error = strcopyin((void *)arg, &pgrp, sizeof (pid_t),
5705                     copyflag))
5706                         return (error);
5707                 mutex_enter(&stp->sd_lock);
5708                 mutex_enter(&pidlock);
5709                 if (stp->sd_sidp != ttoproc(curthread)->p_sessp->s_sidp) {
5710                         mutex_exit(&pidlock);
5711                         mutex_exit(&stp->sd_lock);
5712                         return (ENOTTY);
5713                 }
5714                 if (pgrp == stp->sd_pgidp->pid_id) {
5715                         mutex_exit(&pidlock);
5716                         mutex_exit(&stp->sd_lock);
5717                         return (0);
5718                 }
5719                 if (pgrp <= 0 || pgrp >= maxpid) {
5720                         mutex_exit(&pidlock);
5721                         mutex_exit(&stp->sd_lock);
5722                         return (EINVAL);
5723                 }
5724                 if ((q = pgfind(pgrp)) == NULL ||
5725                     q->p_sessp != ttoproc(curthread)->p_sessp) {
5726                         mutex_exit(&pidlock);
5727                         mutex_exit(&stp->sd_lock);
5728                         return (EPERM);
5729                 }
5730                 sid = stp->sd_sidp->pid_id;
5731                 fg_pgid = q->p_pgrp;
5732                 bg_pgid = stp->sd_pgidp->pid_id;
5733                 CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid);
5734                 PID_RELE(stp->sd_pgidp);
5735                 ctty_clear_sighuped();
5736                 stp->sd_pgidp = q->p_pgidp;
5737                 PID_HOLD(stp->sd_pgidp);
5738                 mutex_exit(&pidlock);
5739                 mutex_exit(&stp->sd_lock);
5740                 return (0);
5741         }
5742 
5743         case TIOCGPGRP:
5744         {
5745                 pid_t pgrp;
5746 
5747                 mutex_enter(&stp->sd_lock);
5748                 if (stp->sd_sidp == NULL) {
5749                         mutex_exit(&stp->sd_lock);
5750                         return (ENOTTY);
5751                 }
5752                 pgrp = stp->sd_pgidp->pid_id;
5753                 mutex_exit(&stp->sd_lock);
5754                 return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t),
5755                     copyflag));
5756         }
5757 
5758         case TIOCSCTTY:
5759         {
5760                 return (strctty(stp));
5761         }
5762 
5763         case TIOCNOTTY:
5764         {
5765                 /* freectty() always assumes curproc. */
5766                 if (freectty(B_FALSE) != 0)
5767                         return (0);
5768                 return (ENOTTY);
5769         }
5770 
5771         case FIONBIO:
5772         case FIOASYNC:
5773                 return (0);     /* handled by the upper layer */
5774         }
5775 }
5776 
5777 /*
5778  * Custom free routine used for M_PASSFP messages.
5779  */
5780 static void
5781 free_passfp(struct k_strrecvfd *srf)
5782 {
5783         (void) closef(srf->fp);
5784         kmem_free(srf, sizeof (struct k_strrecvfd) + sizeof (frtn_t));
5785 }
5786 
5787 /* ARGSUSED */
5788 int
5789 do_sendfp(struct stdata *stp, struct file *fp, struct cred *cr)
5790 {
5791         queue_t *qp, *nextqp;
5792         struct k_strrecvfd *srf;
5793         mblk_t *mp;
5794         frtn_t *frtnp;
5795         size_t bufsize;
5796         queue_t *mate = NULL;
5797         syncq_t *sq = NULL;
5798         int retval = 0;
5799 
5800         if (stp->sd_flag & STRHUP)
5801                 return (ENXIO);
5802 
5803         claimstr(stp->sd_wrq);
5804 
5805         /* Fastpath, we have a pipe, and we are already mated, use it. */
5806         if (STRMATED(stp)) {
5807                 qp = _RD(stp->sd_mate->sd_wrq);
5808                 claimstr(qp);
5809                 mate = qp;
5810         } else { /* Not already mated. */
5811 
5812                 /*
5813                  * Walk the stream to the end of this one.
5814                  * assumes that the claimstr() will prevent
5815                  * plumbing between the stream head and the
5816                  * driver from changing
5817                  */
5818                 qp = stp->sd_wrq;
5819 
5820                 /*
5821                  * Loop until we reach the end of this stream.
5822                  * On completion, qp points to the write queue
5823                  * at the end of the stream, or the read queue
5824                  * at the stream head if this is a fifo.
5825                  */
5826                 while (((qp = qp->q_next) != NULL) && _SAMESTR(qp))
5827                         ;
5828 
5829                 /*
5830                  * Just in case we get a q_next which is NULL, but
5831                  * not at the end of the stream.  This is actually
5832                  * broken, so we set an assert to catch it in
5833                  * debug, and set an error and return if not debug.
5834                  */
5835                 ASSERT(qp);
5836                 if (qp == NULL) {
5837                         releasestr(stp->sd_wrq);
5838                         return (EINVAL);
5839                 }
5840 
5841                 /*
5842                  * Enter the syncq for the driver, so (hopefully)
5843                  * the queue values will not change on us.
5844                  * XXXX - This will only prevent the race IFF only
5845                  *   the write side modifies the q_next member, and
5846                  *   the put procedure is protected by at least
5847                  *   MT_PERQ.
5848                  */
5849                 if ((sq = qp->q_syncq) != NULL)
5850                         entersq(sq, SQ_PUT);
5851 
5852                 /* Now get the q_next value from this qp. */
5853                 nextqp = qp->q_next;
5854 
5855                 /*
5856                  * If nextqp exists and the other stream is different
5857                  * from this one claim the stream, set the mate, and
5858                  * get the read queue at the stream head of the other
5859                  * stream.  Assumes that nextqp was at least valid when
5860                  * we got it.  Hopefully the entersq of the driver
5861                  * will prevent it from changing on us.
5862                  */
5863                 if ((nextqp != NULL) && (STREAM(nextqp) != stp)) {
5864                         ASSERT(qp->q_qinfo->qi_srvp);
5865                         ASSERT(_OTHERQ(qp)->q_qinfo->qi_srvp);
5866                         ASSERT(_OTHERQ(qp->q_next)->q_qinfo->qi_srvp);
5867                         claimstr(nextqp);
5868 
5869                         /* Make sure we still have a q_next */
5870                         if (nextqp != qp->q_next) {
5871                                 releasestr(stp->sd_wrq);
5872                                 releasestr(nextqp);
5873                                 return (EINVAL);
5874                         }
5875 
5876                         qp = _RD(STREAM(nextqp)->sd_wrq);
5877                         mate = qp;
5878                 }
5879                 /* If we entered the synq above, leave it. */
5880                 if (sq != NULL)
5881                         leavesq(sq, SQ_PUT);
5882         } /*  STRMATED(STP)  */
5883 
5884         /* XXX prevents substitution of the ops vector */
5885         if (qp->q_qinfo != &strdata && qp->q_qinfo != &fifo_strdata) {
5886                 retval = EINVAL;
5887                 goto out;
5888         }
5889 
5890         if (qp->q_flag & QFULL) {
5891                 retval = EAGAIN;
5892                 goto out;
5893         }
5894 
5895         /*
5896          * Since M_PASSFP messages include a file descriptor, we use
5897          * esballoc() and specify a custom free routine (free_passfp()) that
5898          * will close the descriptor as part of freeing the message.  For
5899          * convenience, we stash the frtn_t right after the data block.
5900          */
5901         bufsize = sizeof (struct k_strrecvfd) + sizeof (frtn_t);
5902         srf = kmem_alloc(bufsize, KM_NOSLEEP);
5903         if (srf == NULL) {
5904                 retval = EAGAIN;
5905                 goto out;
5906         }
5907 
5908         frtnp = (frtn_t *)(srf + 1);
5909         frtnp->free_arg = (caddr_t)srf;
5910         frtnp->free_func = free_passfp;
5911 
5912         mp = esballoc((uchar_t *)srf, bufsize, BPRI_MED, frtnp);
5913         if (mp == NULL) {
5914                 kmem_free(srf, bufsize);
5915                 retval = EAGAIN;
5916                 goto out;
5917         }
5918         mp->b_wptr += sizeof (struct k_strrecvfd);
5919         mp->b_datap->db_type = M_PASSFP;
5920 
5921         srf->fp = fp;
5922         srf->uid = crgetuid(curthread->t_cred);
5923         srf->gid = crgetgid(curthread->t_cred);
5924         mutex_enter(&fp->f_tlock);
5925         fp->f_count++;
5926         mutex_exit(&fp->f_tlock);
5927 
5928         put(qp, mp);
5929 out:
5930         releasestr(stp->sd_wrq);
5931         if (mate)
5932                 releasestr(mate);
5933         return (retval);
5934 }
5935 
5936 /*
5937  * Send an ioctl message downstream and wait for acknowledgement.
5938  * flags may be set to either U_TO_K or K_TO_K and a combination
5939  * of STR_NOERROR or STR_NOSIG
5940  * STR_NOSIG: Signals are essentially ignored or held and have
5941  *      no effect for the duration of the call.
5942  * STR_NOERROR: Ignores stream head read, write and hup errors.
5943  *      Additionally, if an existing ioctl times out, it is assumed
5944  *      lost and and this ioctl will continue as if the previous ioctl had
5945  *      finished.  ETIME may be returned if this ioctl times out (i.e.
5946  *      ic_timout is not INFTIM).  Non-stream head errors may be returned if
5947  *      the ioc_error indicates that the driver/module had problems,
5948  *      an EFAULT was found when accessing user data, a lack of
5949  *      resources, etc.
5950  */
5951 int
5952 strdoioctl(
5953         struct stdata *stp,
5954         struct strioctl *strioc,
5955         int fflags,             /* file flags with model info */
5956         int flag,
5957         cred_t *crp,
5958         int *rvalp)
5959 {
5960         mblk_t *bp;
5961         struct iocblk *iocbp;
5962         struct copyreq *reqp;
5963         struct copyresp *resp;
5964         int id;
5965         int transparent = 0;
5966         int error = 0;
5967         int len = 0;
5968         caddr_t taddr;
5969         int copyflag = (flag & (U_TO_K | K_TO_K));
5970         int sigflag = (flag & STR_NOSIG);
5971         int errs;
5972         uint_t waitflags;
5973         boolean_t set_iocwaitne = B_FALSE;
5974 
5975         ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
5976         ASSERT((fflags & FMODELS) != 0);
5977 
5978         TRACE_2(TR_FAC_STREAMS_FR,
5979             TR_STRDOIOCTL,
5980             "strdoioctl:stp %p strioc %p", stp, strioc);
5981         if (strioc->ic_len == TRANSPARENT) { /* send arg in M_DATA block */
5982                 transparent = 1;
5983                 strioc->ic_len = sizeof (intptr_t);
5984         }
5985 
5986         if (strioc->ic_len < 0 || (strmsgsz > 0 && strioc->ic_len > strmsgsz))
5987                 return (EINVAL);
5988 
5989         if ((bp = allocb_cred_wait(sizeof (union ioctypes), sigflag, &error,
5990             crp, curproc->p_pid)) == NULL)
5991                         return (error);
5992 
5993         bzero(bp->b_wptr, sizeof (union ioctypes));
5994 
5995         iocbp = (struct iocblk *)bp->b_wptr;
5996         iocbp->ioc_count = strioc->ic_len;
5997         iocbp->ioc_cmd = strioc->ic_cmd;
5998         iocbp->ioc_flag = (fflags & FMODELS);
5999 
6000         crhold(crp);
6001         iocbp->ioc_cr = crp;
6002         DB_TYPE(bp) = M_IOCTL;
6003         bp->b_wptr += sizeof (struct iocblk);
6004 
6005         if (flag & STR_NOERROR)
6006                 errs = STPLEX;
6007         else
6008                 errs = STRHUP|STRDERR|STWRERR|STPLEX;
6009 
6010         /*
6011          * If there is data to copy into ioctl block, do so.
6012          */
6013         if (iocbp->ioc_count > 0) {
6014                 if (transparent)
6015                         /*
6016                          * Note: STR_NOERROR does not have an effect
6017                          * in putiocd()
6018                          */
6019                         id = K_TO_K | sigflag;
6020                 else
6021                         id = flag;
6022                 if ((error = putiocd(bp, strioc->ic_dp, id, crp)) != 0) {
6023                         freemsg(bp);
6024                         crfree(crp);
6025                         return (error);
6026                 }
6027 
6028                 /*
6029                  * We could have slept copying in user pages.
6030                  * Recheck the stream head state (the other end
6031                  * of a pipe could have gone away).
6032                  */
6033                 if (stp->sd_flag & errs) {
6034                         mutex_enter(&stp->sd_lock);
6035                         error = strgeterr(stp, errs, 0);
6036                         mutex_exit(&stp->sd_lock);
6037                         if (error != 0) {
6038                                 freemsg(bp);
6039                                 crfree(crp);
6040                                 return (error);
6041                         }
6042                 }
6043         }
6044         if (transparent)
6045                 iocbp->ioc_count = TRANSPARENT;
6046 
6047         /*
6048          * Block for up to STRTIMOUT milliseconds if there is an outstanding
6049          * ioctl for this stream already running.  All processes
6050          * sleeping here will be awakened as a result of an ACK
6051          * or NAK being received for the outstanding ioctl, or
6052          * as a result of the timer expiring on the outstanding
6053          * ioctl (a failure), or as a result of any waiting
6054          * process's timer expiring (also a failure).
6055          */
6056 
6057         error = 0;
6058         mutex_enter(&stp->sd_lock);
6059         while ((stp->sd_flag & IOCWAIT) ||
6060             (!set_iocwaitne && (stp->sd_flag & IOCWAITNE))) {
6061                 clock_t cv_rval;
6062 
6063                 TRACE_0(TR_FAC_STREAMS_FR,
6064                     TR_STRDOIOCTL_WAIT,
6065                     "strdoioctl sleeps - IOCWAIT");
6066                 cv_rval = str_cv_wait(&stp->sd_iocmonitor, &stp->sd_lock,
6067                     STRTIMOUT, sigflag);
6068                 if (cv_rval <= 0) {
6069                         if (cv_rval == 0) {
6070                                 error = EINTR;
6071                         } else {
6072                                 if (flag & STR_NOERROR) {
6073                                         /*
6074                                          * Terminating current ioctl in
6075                                          * progress -- assume it got lost and
6076                                          * wake up the other thread so that the
6077                                          * operation completes.
6078                                          */
6079                                         if (!(stp->sd_flag & IOCWAITNE)) {
6080                                                 set_iocwaitne = B_TRUE;
6081                                                 stp->sd_flag |= IOCWAITNE;
6082                                                 cv_broadcast(&stp->sd_monitor);
6083                                         }
6084                                         /*
6085                                          * Otherwise, there's a running
6086                                          * STR_NOERROR -- we have no choice
6087                                          * here but to wait forever (or until
6088                                          * interrupted).
6089                                          */
6090                                 } else {
6091                                         /*
6092                                          * pending ioctl has caused
6093                                          * us to time out
6094                                          */
6095                                         error = ETIME;
6096                                 }
6097                         }
6098                 } else if ((stp->sd_flag & errs)) {
6099                         error = strgeterr(stp, errs, 0);
6100                 }
6101                 if (error) {
6102                         mutex_exit(&stp->sd_lock);
6103                         freemsg(bp);
6104                         crfree(crp);
6105                         return (error);
6106                 }
6107         }
6108 
6109         /*
6110          * Have control of ioctl mechanism.
6111          * Send down ioctl packet and wait for response.
6112          */
6113         if (stp->sd_iocblk != (mblk_t *)-1) {
6114                 freemsg(stp->sd_iocblk);
6115         }
6116         stp->sd_iocblk = NULL;
6117 
6118         /*
6119          * If this is marked with 'noerror' (internal; mostly
6120          * I_{P,}{UN,}LINK), then make sure nobody else is able to get
6121          * in here by setting IOCWAITNE.
6122          */
6123         waitflags = IOCWAIT;
6124         if (flag & STR_NOERROR)
6125                 waitflags |= IOCWAITNE;
6126 
6127         stp->sd_flag |= waitflags;
6128 
6129         /*
6130          * Assign sequence number.
6131          */
6132         iocbp->ioc_id = stp->sd_iocid = getiocseqno();
6133 
6134         mutex_exit(&stp->sd_lock);
6135 
6136         TRACE_1(TR_FAC_STREAMS_FR,
6137             TR_STRDOIOCTL_PUT, "strdoioctl put: stp %p", stp);
6138         stream_willservice(stp);
6139         putnext(stp->sd_wrq, bp);
6140         stream_runservice(stp);
6141 
6142         /*
6143          * Timed wait for acknowledgment.  The wait time is limited by the
6144          * timeout value, which must be a positive integer (number of
6145          * milliseconds) to wait, or 0 (use default value of STRTIMOUT
6146          * milliseconds), or -1 (wait forever).  This will be awakened
6147          * either by an ACK/NAK message arriving, the timer expiring, or
6148          * the timer expiring on another ioctl waiting for control of the
6149          * mechanism.
6150          */
6151 waitioc:
6152         mutex_enter(&stp->sd_lock);
6153 
6154 
6155         /*
6156          * If the reply has already arrived, don't sleep.  If awakened from
6157          * the sleep, fail only if the reply has not arrived by then.
6158          * Otherwise, process the reply.
6159          */
6160         while (!stp->sd_iocblk) {
6161                 clock_t cv_rval;
6162 
6163                 if (stp->sd_flag & errs) {
6164                         error = strgeterr(stp, errs, 0);
6165                         if (error != 0) {
6166                                 stp->sd_flag &= ~waitflags;
6167                                 cv_broadcast(&stp->sd_iocmonitor);
6168                                 mutex_exit(&stp->sd_lock);
6169                                 crfree(crp);
6170                                 return (error);
6171                         }
6172                 }
6173 
6174                 TRACE_0(TR_FAC_STREAMS_FR,
6175                     TR_STRDOIOCTL_WAIT2,
6176                     "strdoioctl sleeps awaiting reply");
6177                 ASSERT(error == 0);
6178 
6179                 cv_rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock,
6180                     (strioc->ic_timout ?
6181                     strioc->ic_timout * 1000 : STRTIMOUT), sigflag);
6182 
6183                 /*
6184                  * There are four possible cases here: interrupt, timeout,
6185                  * wakeup by IOCWAITNE (above), or wakeup by strrput_nondata (a
6186                  * valid M_IOCTL reply).
6187                  *
6188                  * If we've been awakened by a STR_NOERROR ioctl on some other
6189                  * thread, then sd_iocblk will still be NULL, and IOCWAITNE
6190                  * will be set.  Pretend as if we just timed out.  Note that
6191                  * this other thread waited at least STRTIMOUT before trying to
6192                  * awaken our thread, so this is indistinguishable (even for
6193                  * INFTIM) from the case where we failed with ETIME waiting on
6194                  * IOCWAIT in the prior loop.
6195                  */
6196                 if (cv_rval > 0 && !(flag & STR_NOERROR) &&
6197                     stp->sd_iocblk == NULL && (stp->sd_flag & IOCWAITNE)) {
6198                         cv_rval = -1;
6199                 }
6200 
6201                 /*
6202                  * note: STR_NOERROR does not protect
6203                  * us here.. use ic_timout < 0
6204                  */
6205                 if (cv_rval <= 0) {
6206                         if (cv_rval == 0) {
6207                                 error = EINTR;
6208                         } else {
6209                                 error =  ETIME;
6210                         }
6211                         /*
6212                          * A message could have come in after we were scheduled
6213                          * but before we were actually run.
6214                          */
6215                         bp = stp->sd_iocblk;
6216                         stp->sd_iocblk = NULL;
6217                         if (bp != NULL) {
6218                                 if ((bp->b_datap->db_type == M_COPYIN) ||
6219                                     (bp->b_datap->db_type == M_COPYOUT)) {
6220                                         mutex_exit(&stp->sd_lock);
6221                                         if (bp->b_cont) {
6222                                                 freemsg(bp->b_cont);
6223                                                 bp->b_cont = NULL;
6224                                         }
6225                                         bp->b_datap->db_type = M_IOCDATA;
6226                                         bp->b_wptr = bp->b_rptr +
6227                                             sizeof (struct copyresp);
6228                                         resp = (struct copyresp *)bp->b_rptr;
6229                                         resp->cp_rval =
6230                                             (caddr_t)1; /* failure */
6231                                         stream_willservice(stp);
6232                                         putnext(stp->sd_wrq, bp);
6233                                         stream_runservice(stp);
6234                                         mutex_enter(&stp->sd_lock);
6235                                 } else {
6236                                         freemsg(bp);
6237                                 }
6238                         }
6239                         stp->sd_flag &= ~waitflags;
6240                         cv_broadcast(&stp->sd_iocmonitor);
6241                         mutex_exit(&stp->sd_lock);
6242                         crfree(crp);
6243                         return (error);
6244                 }
6245         }
6246         bp = stp->sd_iocblk;
6247         /*
6248          * Note: it is strictly impossible to get here with sd_iocblk set to
6249          * -1.  This is because the initial loop above doesn't allow any new
6250          * ioctls into the fray until all others have passed this point.
6251          */
6252         ASSERT(bp != NULL && bp != (mblk_t *)-1);
6253         TRACE_1(TR_FAC_STREAMS_FR,
6254             TR_STRDOIOCTL_ACK, "strdoioctl got reply: bp %p", bp);
6255         if ((bp->b_datap->db_type == M_IOCACK) ||
6256             (bp->b_datap->db_type == M_IOCNAK)) {
6257                 /* for detection of duplicate ioctl replies */
6258                 stp->sd_iocblk = (mblk_t *)-1;
6259                 stp->sd_flag &= ~waitflags;
6260                 cv_broadcast(&stp->sd_iocmonitor);
6261                 mutex_exit(&stp->sd_lock);
6262         } else {
6263                 /*
6264                  * flags not cleared here because we're still doing
6265                  * copy in/out for ioctl.
6266                  */
6267                 stp->sd_iocblk = NULL;
6268                 mutex_exit(&stp->sd_lock);
6269         }
6270 
6271 
6272         /*
6273          * Have received acknowledgment.
6274          */
6275 
6276         switch (bp->b_datap->db_type) {
6277         case M_IOCACK:
6278                 /*
6279                  * Positive ack.
6280                  */
6281                 iocbp = (struct iocblk *)bp->b_rptr;
6282 
6283                 /*
6284                  * Set error if indicated.
6285                  */
6286                 if (iocbp->ioc_error) {
6287                         error = iocbp->ioc_error;
6288                         break;
6289                 }
6290 
6291                 /*
6292                  * Set return value.
6293                  */
6294                 *rvalp = iocbp->ioc_rval;
6295 
6296                 /*
6297                  * Data may have been returned in ACK message (ioc_count > 0).
6298                  * If so, copy it out to the user's buffer.
6299                  */
6300                 if (iocbp->ioc_count && !transparent) {
6301                         if (error = getiocd(bp, strioc->ic_dp, copyflag))
6302                                 break;
6303                 }
6304                 if (!transparent) {
6305                         if (len)        /* an M_COPYOUT was used with I_STR */
6306                                 strioc->ic_len = len;
6307                         else
6308                                 strioc->ic_len = (int)iocbp->ioc_count;
6309                 }
6310                 break;
6311 
6312         case M_IOCNAK:
6313                 /*
6314                  * Negative ack.
6315                  *
6316                  * The only thing to do is set error as specified
6317                  * in neg ack packet.
6318                  */
6319                 iocbp = (struct iocblk *)bp->b_rptr;
6320 
6321                 error = (iocbp->ioc_error ? iocbp->ioc_error : EINVAL);
6322                 break;
6323 
6324         case M_COPYIN:
6325                 /*
6326                  * Driver or module has requested user ioctl data.
6327                  */
6328                 reqp = (struct copyreq *)bp->b_rptr;
6329 
6330                 /*
6331                  * M_COPYIN should *never* have a message attached, though
6332                  * it's harmless if it does -- thus, panic on a DEBUG
6333                  * kernel and just free it on a non-DEBUG build.
6334                  */
6335                 ASSERT(bp->b_cont == NULL);
6336                 if (bp->b_cont != NULL) {
6337                         freemsg(bp->b_cont);
6338                         bp->b_cont = NULL;
6339                 }
6340 
6341                 error = putiocd(bp, reqp->cq_addr, flag, crp);
6342                 if (error && bp->b_cont) {
6343                         freemsg(bp->b_cont);
6344                         bp->b_cont = NULL;
6345                 }
6346 
6347                 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
6348                 bp->b_datap->db_type = M_IOCDATA;
6349 
6350                 mblk_setcred(bp, crp, curproc->p_pid);
6351                 resp = (struct copyresp *)bp->b_rptr;
6352                 resp->cp_rval = (caddr_t)(uintptr_t)error;
6353                 resp->cp_flag = (fflags & FMODELS);
6354 
6355                 stream_willservice(stp);
6356                 putnext(stp->sd_wrq, bp);
6357                 stream_runservice(stp);
6358 
6359                 if (error) {
6360                         mutex_enter(&stp->sd_lock);
6361                         stp->sd_flag &= ~waitflags;
6362                         cv_broadcast(&stp->sd_iocmonitor);
6363                         mutex_exit(&stp->sd_lock);
6364                         crfree(crp);
6365                         return (error);
6366                 }
6367 
6368                 goto waitioc;
6369 
6370         case M_COPYOUT:
6371                 /*
6372                  * Driver or module has ioctl data for a user.
6373                  */
6374                 reqp = (struct copyreq *)bp->b_rptr;
6375                 ASSERT(bp->b_cont != NULL);
6376 
6377                 /*
6378                  * Always (transparent or non-transparent )
6379                  * use the address specified in the request
6380                  */
6381                 taddr = reqp->cq_addr;
6382                 if (!transparent)
6383                         len = (int)reqp->cq_size;
6384 
6385                 /* copyout data to the provided address */
6386                 error = getiocd(bp, taddr, copyflag);
6387 
6388                 freemsg(bp->b_cont);
6389                 bp->b_cont = NULL;
6390 
6391                 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
6392                 bp->b_datap->db_type = M_IOCDATA;
6393 
6394                 mblk_setcred(bp, crp, curproc->p_pid);
6395                 resp = (struct copyresp *)bp->b_rptr;
6396                 resp->cp_rval = (caddr_t)(uintptr_t)error;
6397                 resp->cp_flag = (fflags & FMODELS);
6398 
6399                 stream_willservice(stp);
6400                 putnext(stp->sd_wrq, bp);
6401                 stream_runservice(stp);
6402 
6403                 if (error) {
6404                         mutex_enter(&stp->sd_lock);
6405                         stp->sd_flag &= ~waitflags;
6406                         cv_broadcast(&stp->sd_iocmonitor);
6407                         mutex_exit(&stp->sd_lock);
6408                         crfree(crp);
6409                         return (error);
6410                 }
6411                 goto waitioc;
6412 
6413         default:
6414                 ASSERT(0);
6415                 mutex_enter(&stp->sd_lock);
6416                 stp->sd_flag &= ~waitflags;
6417                 cv_broadcast(&stp->sd_iocmonitor);
6418                 mutex_exit(&stp->sd_lock);
6419                 break;
6420         }
6421 
6422         freemsg(bp);
6423         crfree(crp);
6424         return (error);
6425 }
6426 
6427 /*
6428  * Send an M_CMD message downstream and wait for a reply.  This is a ptools
6429  * special used to retrieve information from modules/drivers a stream without
6430  * being subjected to flow control or interfering with pending messages on the
6431  * stream (e.g. an ioctl in flight).
6432  */
6433 int
6434 strdocmd(struct stdata *stp, struct strcmd *scp, cred_t *crp)
6435 {
6436         mblk_t *mp;
6437         struct cmdblk *cmdp;
6438         int error = 0;
6439         int errs = STRHUP|STRDERR|STWRERR|STPLEX;
6440         clock_t rval, timeout = STRTIMOUT;
6441 
6442         if (scp->sc_len < 0 || scp->sc_len > sizeof (scp->sc_buf) ||
6443             scp->sc_timeout < -1)
6444                 return (EINVAL);
6445 
6446         if (scp->sc_timeout > 0)
6447                 timeout = scp->sc_timeout * MILLISEC;
6448 
6449         if ((mp = allocb_cred(sizeof (struct cmdblk), crp,
6450             curproc->p_pid)) == NULL)
6451                 return (ENOMEM);
6452 
6453         crhold(crp);
6454 
6455         cmdp = (struct cmdblk *)mp->b_wptr;
6456         cmdp->cb_cr = crp;
6457         cmdp->cb_cmd = scp->sc_cmd;
6458         cmdp->cb_len = scp->sc_len;
6459         cmdp->cb_error = 0;
6460         mp->b_wptr += sizeof (struct cmdblk);
6461 
6462         DB_TYPE(mp) = M_CMD;
6463         DB_CPID(mp) = curproc->p_pid;
6464 
6465         /*
6466          * Copy in the payload.
6467          */
6468         if (cmdp->cb_len > 0) {
6469                 mp->b_cont = allocb_cred(sizeof (scp->sc_buf), crp,
6470                     curproc->p_pid);
6471                 if (mp->b_cont == NULL) {
6472                         error = ENOMEM;
6473                         goto out;
6474                 }
6475 
6476                 /* cb_len comes from sc_len, which has already been checked */
6477                 ASSERT(cmdp->cb_len <= sizeof (scp->sc_buf));
6478                 (void) bcopy(scp->sc_buf, mp->b_cont->b_wptr, cmdp->cb_len);
6479                 mp->b_cont->b_wptr += cmdp->cb_len;
6480                 DB_CPID(mp->b_cont) = curproc->p_pid;
6481         }
6482 
6483         /*
6484          * Since this mechanism is strictly for ptools, and since only one
6485          * process can be grabbed at a time, we simply fail if there's
6486          * currently an operation pending.
6487          */
6488         mutex_enter(&stp->sd_lock);
6489         if (stp->sd_flag & STRCMDWAIT) {
6490                 mutex_exit(&stp->sd_lock);
6491                 error = EBUSY;
6492                 goto out;
6493         }
6494         stp->sd_flag |= STRCMDWAIT;
6495         ASSERT(stp->sd_cmdblk == NULL);
6496         mutex_exit(&stp->sd_lock);
6497 
6498         putnext(stp->sd_wrq, mp);
6499         mp = NULL;
6500 
6501         /*
6502          * Timed wait for acknowledgment.  If the reply has already arrived,
6503          * don't sleep.  If awakened from the sleep, fail only if the reply
6504          * has not arrived by then.  Otherwise, process the reply.
6505          */
6506         mutex_enter(&stp->sd_lock);
6507         while (stp->sd_cmdblk == NULL) {
6508                 if (stp->sd_flag & errs) {
6509                         if ((error = strgeterr(stp, errs, 0)) != 0)
6510                                 goto waitout;
6511                 }
6512 
6513                 rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, timeout, 0);
6514                 if (stp->sd_cmdblk != NULL)
6515                         break;
6516 
6517                 if (rval <= 0) {
6518                         error = (rval == 0) ? EINTR : ETIME;
6519                         goto waitout;
6520                 }
6521         }
6522 
6523         /*
6524          * We received a reply.
6525          */
6526         mp = stp->sd_cmdblk;
6527         stp->sd_cmdblk = NULL;
6528         ASSERT(mp != NULL && DB_TYPE(mp) == M_CMD);
6529         ASSERT(stp->sd_flag & STRCMDWAIT);
6530         stp->sd_flag &= ~STRCMDWAIT;
6531         mutex_exit(&stp->sd_lock);
6532 
6533         cmdp = (struct cmdblk *)mp->b_rptr;
6534         if ((error = cmdp->cb_error) != 0)
6535                 goto out;
6536 
6537         /*
6538          * Data may have been returned in the reply (cb_len > 0).
6539          * If so, copy it out to the user's buffer.
6540          */
6541         if (cmdp->cb_len > 0) {
6542                 if (mp->b_cont == NULL || MBLKL(mp->b_cont) < cmdp->cb_len) {
6543                         error = EPROTO;
6544                         goto out;
6545                 }
6546 
6547                 cmdp->cb_len = MIN(cmdp->cb_len, sizeof (scp->sc_buf));
6548                 (void) bcopy(mp->b_cont->b_rptr, scp->sc_buf, cmdp->cb_len);
6549         }
6550         scp->sc_len = cmdp->cb_len;
6551 out:
6552         freemsg(mp);
6553         crfree(crp);
6554         return (error);
6555 waitout:
6556         ASSERT(stp->sd_cmdblk == NULL);
6557         stp->sd_flag &= ~STRCMDWAIT;
6558         mutex_exit(&stp->sd_lock);
6559         crfree(crp);
6560         return (error);
6561 }
6562 
6563 /*
6564  * For the SunOS keyboard driver.
6565  * Return the next available "ioctl" sequence number.
6566  * Exported, so that streams modules can send "ioctl" messages
6567  * downstream from their open routine.
6568  */
6569 int
6570 getiocseqno(void)
6571 {
6572         int     i;
6573 
6574         mutex_enter(&strresources);
6575         i = ++ioc_id;
6576         mutex_exit(&strresources);
6577         return (i);
6578 }
6579 
6580 /*
6581  * Get the next message from the read queue.  If the message is
6582  * priority, STRPRI will have been set by strrput().  This flag
6583  * should be reset only when the entire message at the front of the
6584  * queue as been consumed.
6585  *
6586  * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
6587  */
6588 int
6589 strgetmsg(
6590         struct vnode *vp,
6591         struct strbuf *mctl,
6592         struct strbuf *mdata,
6593         unsigned char *prip,
6594         int *flagsp,
6595         int fmode,
6596         rval_t *rvp)
6597 {
6598         struct stdata *stp;
6599         mblk_t *bp, *nbp;
6600         mblk_t *savemp = NULL;
6601         mblk_t *savemptail = NULL;
6602         uint_t old_sd_flag;
6603         int flg = MSG_BAND;
6604         int more = 0;
6605         int error = 0;
6606         char first = 1;
6607         uint_t mark;            /* Contains MSG*MARK and _LASTMARK */
6608 #define _LASTMARK       0x8000  /* Distinct from MSG*MARK */
6609         unsigned char pri = 0;
6610         queue_t *q;
6611         int     pr = 0;                 /* Partial read successful */
6612         struct uio uios;
6613         struct uio *uiop = &uios;
6614         struct iovec iovs;
6615         unsigned char type;
6616 
6617         TRACE_1(TR_FAC_STREAMS_FR, TR_STRGETMSG_ENTER,
6618             "strgetmsg:%p", vp);
6619 
6620         ASSERT(vp->v_stream);
6621         stp = vp->v_stream;
6622         rvp->r_val1 = 0;
6623 
6624         mutex_enter(&stp->sd_lock);
6625 
6626         if ((error = i_straccess(stp, JCREAD)) != 0) {
6627                 mutex_exit(&stp->sd_lock);
6628                 return (error);
6629         }
6630 
6631         if (stp->sd_flag & (STRDERR|STPLEX)) {
6632                 error = strgeterr(stp, STRDERR|STPLEX, 0);
6633                 if (error != 0) {
6634                         mutex_exit(&stp->sd_lock);
6635                         return (error);
6636                 }
6637         }
6638         mutex_exit(&stp->sd_lock);
6639 
6640         switch (*flagsp) {
6641         case MSG_HIPRI:
6642                 if (*prip != 0)
6643                         return (EINVAL);
6644                 break;
6645 
6646         case MSG_ANY:
6647         case MSG_BAND:
6648                 break;
6649 
6650         default:
6651                 return (EINVAL);
6652         }
6653         /*
6654          * Setup uio and iov for data part
6655          */
6656         iovs.iov_base = mdata->buf;
6657         iovs.iov_len = mdata->maxlen;
6658         uios.uio_iov = &iovs;
6659         uios.uio_iovcnt = 1;
6660         uios.uio_loffset = 0;
6661         uios.uio_segflg = UIO_USERSPACE;
6662         uios.uio_fmode = 0;
6663         uios.uio_extflg = UIO_COPY_CACHED;
6664         uios.uio_resid = mdata->maxlen;
6665         uios.uio_offset = 0;
6666 
6667         q = _RD(stp->sd_wrq);
6668         mutex_enter(&stp->sd_lock);
6669         old_sd_flag = stp->sd_flag;
6670         mark = 0;
6671         for (;;) {
6672                 int done = 0;
6673                 mblk_t *q_first = q->q_first;
6674 
6675                 /*
6676                  * Get the next message of appropriate priority
6677                  * from the stream head.  If the caller is interested
6678                  * in band or hipri messages, then they should already
6679                  * be enqueued at the stream head.  On the other hand
6680                  * if the caller wants normal (band 0) messages, they
6681                  * might be deferred in a synchronous stream and they
6682                  * will need to be pulled up.
6683                  *
6684                  * After we have dequeued a message, we might find that
6685                  * it was a deferred M_SIG that was enqueued at the
6686                  * stream head.  It must now be posted as part of the
6687                  * read by calling strsignal_nolock().
6688                  *
6689                  * Also note that strrput does not enqueue an M_PCSIG,
6690                  * and there cannot be more than one hipri message,
6691                  * so there was no need to have the M_PCSIG case.
6692                  *
6693                  * At some time it might be nice to try and wrap the
6694                  * functionality of kstrgetmsg() and strgetmsg() into
6695                  * a common routine so to reduce the amount of replicated
6696                  * code (since they are extremely similar).
6697                  */
6698                 if (!(*flagsp & (MSG_HIPRI|MSG_BAND))) {
6699                         /* Asking for normal, band0 data */
6700                         bp = strget(stp, q, uiop, first, &error);
6701                         ASSERT(MUTEX_HELD(&stp->sd_lock));
6702                         if (bp != NULL) {
6703                                 if (DB_TYPE(bp) == M_SIG) {
6704                                         strsignal_nolock(stp, *bp->b_rptr,
6705                                             bp->b_band);
6706                                         freemsg(bp);
6707                                         continue;
6708                                 } else {
6709                                         break;
6710                                 }
6711                         }
6712                         if (error != 0)
6713                                 goto getmout;
6714 
6715                 /*
6716                  * We can't depend on the value of STRPRI here because
6717                  * the stream head may be in transit. Therefore, we
6718                  * must look at the type of the first message to
6719                  * determine if a high priority messages is waiting
6720                  */
6721                 } else if ((*flagsp & MSG_HIPRI) && q_first != NULL &&
6722                     DB_TYPE(q_first) >= QPCTL &&
6723                     (bp = getq_noenab(q, 0)) != NULL) {
6724                         /* Asked for HIPRI and got one */
6725                         ASSERT(DB_TYPE(bp) >= QPCTL);
6726                         break;
6727                 } else if ((*flagsp & MSG_BAND) && q_first != NULL &&
6728                     ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) &&
6729                     (bp = getq_noenab(q, 0)) != NULL) {
6730                         /*
6731                          * Asked for at least band "prip" and got either at
6732                          * least that band or a hipri message.
6733                          */
6734                         ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL);
6735                         if (DB_TYPE(bp) == M_SIG) {
6736                                 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
6737                                 freemsg(bp);
6738                                 continue;
6739                         } else {
6740                                 break;
6741                         }
6742                 }
6743 
6744                 /* No data. Time to sleep? */
6745                 qbackenable(q, 0);
6746 
6747                 /*
6748                  * If STRHUP or STREOF, return 0 length control and data.
6749                  * If resid is 0, then a read(fd,buf,0) was done. Do not
6750                  * sleep to satisfy this request because by default we have
6751                  * zero bytes to return.
6752                  */
6753                 if ((stp->sd_flag & (STRHUP|STREOF)) || (mctl->maxlen == 0 &&
6754                     mdata->maxlen == 0)) {
6755                         mctl->len = mdata->len = 0;
6756                         *flagsp = 0;
6757                         mutex_exit(&stp->sd_lock);
6758                         return (0);
6759                 }
6760                 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_WAIT,
6761                     "strgetmsg calls strwaitq:%p, %p",
6762                     vp, uiop);
6763                 if (((error = strwaitq(stp, GETWAIT, (ssize_t)0, fmode, -1,
6764                     &done)) != 0) || done) {
6765                         TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_DONE,
6766                             "strgetmsg error or done:%p, %p",
6767                             vp, uiop);
6768                         mutex_exit(&stp->sd_lock);
6769                         return (error);
6770                 }
6771                 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE,
6772                     "strgetmsg awakes:%p, %p", vp, uiop);
6773                 if ((error = i_straccess(stp, JCREAD)) != 0) {
6774                         mutex_exit(&stp->sd_lock);
6775                         return (error);
6776                 }
6777                 first = 0;
6778         }
6779         ASSERT(bp != NULL);
6780         /*
6781          * Extract any mark information. If the message is not completely
6782          * consumed this information will be put in the mblk
6783          * that is putback.
6784          * If MSGMARKNEXT is set and the message is completely consumed
6785          * the STRATMARK flag will be set below. Likewise, if
6786          * MSGNOTMARKNEXT is set and the message is
6787          * completely consumed STRNOTATMARK will be set.
6788          */
6789         mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
6790         ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
6791             (MSGMARKNEXT|MSGNOTMARKNEXT));
6792         if (mark != 0 && bp == stp->sd_mark) {
6793                 mark |= _LASTMARK;
6794                 stp->sd_mark = NULL;
6795         }
6796         /*
6797          * keep track of the original message type and priority
6798          */
6799         pri = bp->b_band;
6800         type = bp->b_datap->db_type;
6801         if (type == M_PASSFP) {
6802                 if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
6803                         stp->sd_mark = bp;
6804                 bp->b_flag |= mark & ~_LASTMARK;
6805                 putback(stp, q, bp, pri);
6806                 qbackenable(q, pri);
6807                 mutex_exit(&stp->sd_lock);
6808                 return (EBADMSG);
6809         }
6810         ASSERT(type != M_SIG);
6811 
6812         /*
6813          * Set this flag so strrput will not generate signals. Need to
6814          * make sure this flag is cleared before leaving this routine
6815          * else signals will stop being sent.
6816          */
6817         stp->sd_flag |= STRGETINPROG;
6818         mutex_exit(&stp->sd_lock);
6819 
6820         if (STREAM_NEEDSERVICE(stp))
6821                 stream_runservice(stp);
6822 
6823         /*
6824          * Set HIPRI flag if message is priority.
6825          */
6826         if (type >= QPCTL)
6827                 flg = MSG_HIPRI;
6828         else
6829                 flg = MSG_BAND;
6830 
6831         /*
6832          * First process PROTO or PCPROTO blocks, if any.
6833          */
6834         if (mctl->maxlen >= 0 && type != M_DATA) {
6835                 size_t  n, bcnt;
6836                 char    *ubuf;
6837 
6838                 bcnt = mctl->maxlen;
6839                 ubuf = mctl->buf;
6840                 while (bp != NULL && bp->b_datap->db_type != M_DATA) {
6841                         if ((n = MIN(bcnt, bp->b_wptr - bp->b_rptr)) != 0 &&
6842                             copyout(bp->b_rptr, ubuf, n)) {
6843                                 error = EFAULT;
6844                                 mutex_enter(&stp->sd_lock);
6845                                 /*
6846                                  * clear stream head pri flag based on
6847                                  * first message type
6848                                  */
6849                                 if (type >= QPCTL) {
6850                                         ASSERT(type == M_PCPROTO);
6851                                         stp->sd_flag &= ~STRPRI;
6852                                 }
6853                                 more = 0;
6854                                 freemsg(bp);
6855                                 goto getmout;
6856                         }
6857                         ubuf += n;
6858                         bp->b_rptr += n;
6859                         if (bp->b_rptr >= bp->b_wptr) {
6860                                 nbp = bp;
6861                                 bp = bp->b_cont;
6862                                 freeb(nbp);
6863                         }
6864                         ASSERT(n <= bcnt);
6865                         bcnt -= n;
6866                         if (bcnt == 0)
6867                                 break;
6868                 }
6869                 mctl->len = mctl->maxlen - bcnt;
6870         } else
6871                 mctl->len = -1;
6872 
6873         if (bp && bp->b_datap->db_type != M_DATA) {
6874                 /*
6875                  * More PROTO blocks in msg.
6876                  */
6877                 more |= MORECTL;
6878                 savemp = bp;
6879                 while (bp && bp->b_datap->db_type != M_DATA) {
6880                         savemptail = bp;
6881                         bp = bp->b_cont;
6882                 }
6883                 savemptail->b_cont = NULL;
6884         }
6885 
6886         /*
6887          * Now process DATA blocks, if any.
6888          */
6889         if (mdata->maxlen >= 0 && bp) {
6890                 /*
6891                  * struiocopyout will consume a potential zero-length
6892                  * M_DATA even if uio_resid is zero.
6893                  */
6894                 size_t oldresid = uiop->uio_resid;
6895 
6896                 bp = struiocopyout(bp, uiop, &error);
6897                 if (error != 0) {
6898                         mutex_enter(&stp->sd_lock);
6899                         /*
6900                          * clear stream head hi pri flag based on
6901                          * first message
6902                          */
6903                         if (type >= QPCTL) {
6904                                 ASSERT(type == M_PCPROTO);
6905                                 stp->sd_flag &= ~STRPRI;
6906                         }
6907                         more = 0;
6908                         freemsg(savemp);
6909                         goto getmout;
6910                 }
6911                 /*
6912                  * (pr == 1) indicates a partial read.
6913                  */
6914                 if (oldresid > uiop->uio_resid)
6915                         pr = 1;
6916                 mdata->len = mdata->maxlen - uiop->uio_resid;
6917         } else
6918                 mdata->len = -1;
6919 
6920         if (bp) {                       /* more data blocks in msg */
6921                 more |= MOREDATA;
6922                 if (savemp)
6923                         savemptail->b_cont = bp;
6924                 else
6925                         savemp = bp;
6926         }
6927 
6928         mutex_enter(&stp->sd_lock);
6929         if (savemp) {
6930                 if (pr && (savemp->b_datap->db_type == M_DATA) &&
6931                     msgnodata(savemp)) {
6932                         /*
6933                          * Avoid queuing a zero-length tail part of
6934                          * a message. pr=1 indicates that we read some of
6935                          * the message.
6936                          */
6937                         freemsg(savemp);
6938                         more &= ~MOREDATA;
6939                         /*
6940                          * clear stream head hi pri flag based on
6941                          * first message
6942                          */
6943                         if (type >= QPCTL) {
6944                                 ASSERT(type == M_PCPROTO);
6945                                 stp->sd_flag &= ~STRPRI;
6946                         }
6947                 } else {
6948                         savemp->b_band = pri;
6949                         /*
6950                          * If the first message was HIPRI and the one we're
6951                          * putting back isn't, then clear STRPRI, otherwise
6952                          * set STRPRI again.  Note that we must set STRPRI
6953                          * again since the flush logic in strrput_nondata()
6954                          * may have cleared it while we had sd_lock dropped.
6955                          */
6956                         if (type >= QPCTL) {
6957                                 ASSERT(type == M_PCPROTO);
6958                                 if (queclass(savemp) < QPCTL)
6959                                         stp->sd_flag &= ~STRPRI;
6960                                 else
6961                                         stp->sd_flag |= STRPRI;
6962                         } else if (queclass(savemp) >= QPCTL) {
6963                                 /*
6964                                  * The first message was not a HIPRI message,
6965                                  * but the one we are about to putback is.
6966                                  * For simplicitly, we do not allow for HIPRI
6967                                  * messages to be embedded in the message
6968                                  * body, so just force it to same type as
6969                                  * first message.
6970                                  */
6971                                 ASSERT(type == M_DATA || type == M_PROTO);
6972                                 ASSERT(savemp->b_datap->db_type == M_PCPROTO);
6973                                 savemp->b_datap->db_type = type;
6974                         }
6975                         if (mark != 0) {
6976                                 savemp->b_flag |= mark & ~_LASTMARK;
6977                                 if ((mark & _LASTMARK) &&
6978                                     (stp->sd_mark == NULL)) {
6979                                         /*
6980                                          * If another marked message arrived
6981                                          * while sd_lock was not held sd_mark
6982                                          * would be non-NULL.
6983                                          */
6984                                         stp->sd_mark = savemp;
6985                                 }
6986                         }
6987                         putback(stp, q, savemp, pri);
6988                 }
6989         } else {
6990                 /*
6991                  * The complete message was consumed.
6992                  *
6993                  * If another M_PCPROTO arrived while sd_lock was not held
6994                  * it would have been discarded since STRPRI was still set.
6995                  *
6996                  * Move the MSG*MARKNEXT information
6997                  * to the stream head just in case
6998                  * the read queue becomes empty.
6999                  * clear stream head hi pri flag based on
7000                  * first message
7001                  *
7002                  * If the stream head was at the mark
7003                  * (STRATMARK) before we dropped sd_lock above
7004                  * and some data was consumed then we have
7005                  * moved past the mark thus STRATMARK is
7006                  * cleared. However, if a message arrived in
7007                  * strrput during the copyout above causing
7008                  * STRATMARK to be set we can not clear that
7009                  * flag.
7010                  */
7011                 if (type >= QPCTL) {
7012                         ASSERT(type == M_PCPROTO);
7013                         stp->sd_flag &= ~STRPRI;
7014                 }
7015                 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
7016                         if (mark & MSGMARKNEXT) {
7017                                 stp->sd_flag &= ~STRNOTATMARK;
7018                                 stp->sd_flag |= STRATMARK;
7019                         } else if (mark & MSGNOTMARKNEXT) {
7020                                 stp->sd_flag &= ~STRATMARK;
7021                                 stp->sd_flag |= STRNOTATMARK;
7022                         } else {
7023                                 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
7024                         }
7025                 } else if (pr && (old_sd_flag & STRATMARK)) {
7026                         stp->sd_flag &= ~STRATMARK;
7027                 }
7028         }
7029 
7030         *flagsp = flg;
7031         *prip = pri;
7032 
7033         /*
7034          * Getmsg cleanup processing - if the state of the queue has changed
7035          * some signals may need to be sent and/or poll awakened.
7036          */
7037 getmout:
7038         qbackenable(q, pri);
7039 
7040         /*
7041          * We dropped the stream head lock above. Send all M_SIG messages
7042          * before processing stream head for SIGPOLL messages.
7043          */
7044         ASSERT(MUTEX_HELD(&stp->sd_lock));
7045         while ((bp = q->q_first) != NULL &&
7046             (bp->b_datap->db_type == M_SIG)) {
7047                 /*
7048                  * sd_lock is held so the content of the read queue can not
7049                  * change.
7050                  */
7051                 bp = getq(q);
7052                 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
7053 
7054                 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
7055                 mutex_exit(&stp->sd_lock);
7056                 freemsg(bp);
7057                 if (STREAM_NEEDSERVICE(stp))
7058                         stream_runservice(stp);
7059                 mutex_enter(&stp->sd_lock);
7060         }
7061 
7062         /*
7063          * stream head cannot change while we make the determination
7064          * whether or not to send a signal. Drop the flag to allow strrput
7065          * to send firstmsgsigs again.
7066          */
7067         stp->sd_flag &= ~STRGETINPROG;
7068 
7069         /*
7070          * If the type of message at the front of the queue changed
7071          * due to the receive the appropriate signals and pollwakeup events
7072          * are generated. The type of changes are:
7073          *      Processed a hipri message, q_first is not hipri.
7074          *      Processed a band X message, and q_first is band Y.
7075          * The generated signals and pollwakeups are identical to what
7076          * strrput() generates should the message that is now on q_first
7077          * arrive to an empty read queue.
7078          *
7079          * Note: only strrput will send a signal for a hipri message.
7080          */
7081         if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
7082                 strsigset_t signals = 0;
7083                 strpollset_t pollwakeups = 0;
7084 
7085                 if (flg & MSG_HIPRI) {
7086                         /*
7087                          * Removed a hipri message. Regular data at
7088                          * the front of  the queue.
7089                          */
7090                         if (bp->b_band == 0) {
7091                                 signals = S_INPUT | S_RDNORM;
7092                                 pollwakeups = POLLIN | POLLRDNORM;
7093                         } else {
7094                                 signals = S_INPUT | S_RDBAND;
7095                                 pollwakeups = POLLIN | POLLRDBAND;
7096                         }
7097                 } else if (pri != bp->b_band) {
7098                         /*
7099                          * The band is different for the new q_first.
7100                          */
7101                         if (bp->b_band == 0) {
7102                                 signals = S_RDNORM;
7103                                 pollwakeups = POLLIN | POLLRDNORM;
7104                         } else {
7105                                 signals = S_RDBAND;
7106                                 pollwakeups = POLLIN | POLLRDBAND;
7107                         }
7108                 }
7109 
7110                 if (pollwakeups != 0) {
7111                         if (pollwakeups == (POLLIN | POLLRDNORM)) {
7112                                 if (!(stp->sd_rput_opt & SR_POLLIN))
7113                                         goto no_pollwake;
7114                                 stp->sd_rput_opt &= ~SR_POLLIN;
7115                         }
7116                         mutex_exit(&stp->sd_lock);
7117                         pollwakeup(&stp->sd_pollist, pollwakeups);
7118                         mutex_enter(&stp->sd_lock);
7119                 }
7120 no_pollwake:
7121 
7122                 if (stp->sd_sigflags & signals)
7123                         strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
7124         }
7125         mutex_exit(&stp->sd_lock);
7126 
7127         rvp->r_val1 = more;
7128         return (error);
7129 #undef  _LASTMARK
7130 }
7131 
7132 /*
7133  * Get the next message from the read queue.  If the message is
7134  * priority, STRPRI will have been set by strrput().  This flag
7135  * should be reset only when the entire message at the front of the
7136  * queue as been consumed.
7137  *
7138  * If uiop is NULL all data is returned in mctlp.
7139  * Note that a NULL uiop implies that FNDELAY and FNONBLOCK are assumed
7140  * not enabled.
7141  * The timeout parameter is in milliseconds; -1 for infinity.
7142  * This routine handles the consolidation private flags:
7143  *      MSG_IGNERROR    Ignore any stream head error except STPLEX.
7144  *      MSG_DELAYERROR  Defer the error check until the queue is empty.
7145  *      MSG_HOLDSIG     Hold signals while waiting for data.
7146  *      MSG_IPEEK       Only peek at messages.
7147  *      MSG_DISCARDTAIL Discard the tail M_DATA part of the message
7148  *                      that doesn't fit.
7149  *      MSG_NOMARK      If the message is marked leave it on the queue.
7150  *
7151  * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
7152  */
7153 int
7154 kstrgetmsg(
7155         struct vnode *vp,
7156         mblk_t **mctlp,
7157         struct uio *uiop,
7158         unsigned char *prip,
7159         int *flagsp,
7160         clock_t timout,
7161         rval_t *rvp)
7162 {
7163         struct stdata *stp;
7164         mblk_t *bp, *nbp;
7165         mblk_t *savemp = NULL;
7166         mblk_t *savemptail = NULL;
7167         int flags;
7168         uint_t old_sd_flag;
7169         int flg = MSG_BAND;
7170         int more = 0;
7171         int error = 0;
7172         char first = 1;
7173         uint_t mark;            /* Contains MSG*MARK and _LASTMARK */
7174 #define _LASTMARK       0x8000  /* Distinct from MSG*MARK */
7175         unsigned char pri = 0;
7176         queue_t *q;
7177         int     pr = 0;                 /* Partial read successful */
7178         unsigned char type;
7179 
7180         TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_ENTER,
7181             "kstrgetmsg:%p", vp);
7182 
7183         ASSERT(vp->v_stream);
7184         stp = vp->v_stream;
7185         rvp->r_val1 = 0;
7186 
7187         mutex_enter(&stp->sd_lock);
7188 
7189         if ((error = i_straccess(stp, JCREAD)) != 0) {
7190                 mutex_exit(&stp->sd_lock);
7191                 return (error);
7192         }
7193 
7194         flags = *flagsp;
7195         if (stp->sd_flag & (STRDERR|STPLEX)) {
7196                 if ((stp->sd_flag & STPLEX) ||
7197                     (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) {
7198                         error = strgeterr(stp, STRDERR|STPLEX,
7199                             (flags & MSG_IPEEK));
7200                         if (error != 0) {
7201                                 mutex_exit(&stp->sd_lock);
7202                                 return (error);
7203                         }
7204                 }
7205         }
7206         mutex_exit(&stp->sd_lock);
7207 
7208         switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) {
7209         case MSG_HIPRI:
7210                 if (*prip != 0)
7211                         return (EINVAL);
7212                 break;
7213 
7214         case MSG_ANY:
7215         case MSG_BAND:
7216                 break;
7217 
7218         default:
7219                 return (EINVAL);
7220         }
7221 
7222 retry:
7223         q = _RD(stp->sd_wrq);
7224         mutex_enter(&stp->sd_lock);
7225         old_sd_flag = stp->sd_flag;
7226         mark = 0;
7227         for (;;) {
7228                 int done = 0;
7229                 int waitflag;
7230                 int fmode;
7231                 mblk_t *q_first = q->q_first;
7232 
7233                 /*
7234                  * This section of the code operates just like the code
7235                  * in strgetmsg().  There is a comment there about what
7236                  * is going on here.
7237                  */
7238                 if (!(flags & (MSG_HIPRI|MSG_BAND))) {
7239                         /* Asking for normal, band0 data */
7240                         bp = strget(stp, q, uiop, first, &error);
7241                         ASSERT(MUTEX_HELD(&stp->sd_lock));
7242                         if (bp != NULL) {
7243                                 if (DB_TYPE(bp) == M_SIG) {
7244                                         strsignal_nolock(stp, *bp->b_rptr,
7245                                             bp->b_band);
7246                                         freemsg(bp);
7247                                         continue;
7248                                 } else {
7249                                         break;
7250                                 }
7251                         }
7252                         if (error != 0) {
7253                                 goto getmout;
7254                         }
7255                 /*
7256                  * We can't depend on the value of STRPRI here because
7257                  * the stream head may be in transit. Therefore, we
7258                  * must look at the type of the first message to
7259                  * determine if a high priority messages is waiting
7260                  */
7261                 } else if ((flags & MSG_HIPRI) && q_first != NULL &&
7262                     DB_TYPE(q_first) >= QPCTL &&
7263                     (bp = getq_noenab(q, 0)) != NULL) {
7264                         ASSERT(DB_TYPE(bp) >= QPCTL);
7265                         break;
7266                 } else if ((flags & MSG_BAND) && q_first != NULL &&
7267                     ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) &&
7268                     (bp = getq_noenab(q, 0)) != NULL) {
7269                         /*
7270                          * Asked for at least band "prip" and got either at
7271                          * least that band or a hipri message.
7272                          */
7273                         ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL);
7274                         if (DB_TYPE(bp) == M_SIG) {
7275                                 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
7276                                 freemsg(bp);
7277                                 continue;
7278                         } else {
7279                                 break;
7280                         }
7281                 }
7282 
7283                 /* No data. Time to sleep? */
7284                 qbackenable(q, 0);
7285 
7286                 /*
7287                  * Delayed error notification?
7288                  */
7289                 if ((stp->sd_flag & (STRDERR|STPLEX)) &&
7290                     (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == MSG_DELAYERROR) {
7291                         error = strgeterr(stp, STRDERR|STPLEX,
7292                             (flags & MSG_IPEEK));
7293                         if (error != 0) {
7294                                 mutex_exit(&stp->sd_lock);
7295                                 return (error);
7296                         }
7297                 }
7298 
7299                 /*
7300                  * If STRHUP or STREOF, return 0 length control and data.
7301                  * If a read(fd,buf,0) has been done, do not sleep, just
7302                  * return.
7303                  *
7304                  * If mctlp == NULL and uiop == NULL, then the code will
7305                  * do the strwaitq. This is an understood way of saying
7306                  * sleep "polling" until a message is received.
7307                  */
7308                 if ((stp->sd_flag & (STRHUP|STREOF)) ||
7309                     (uiop != NULL && uiop->uio_resid == 0)) {
7310                         if (mctlp != NULL)
7311                                 *mctlp = NULL;
7312                         *flagsp = 0;
7313                         mutex_exit(&stp->sd_lock);
7314                         return (0);
7315                 }
7316 
7317                 waitflag = GETWAIT;
7318                 if (flags &
7319                     (MSG_HOLDSIG|MSG_IGNERROR|MSG_IPEEK|MSG_DELAYERROR)) {
7320                         if (flags & MSG_HOLDSIG)
7321                                 waitflag |= STR_NOSIG;
7322                         if (flags & MSG_IGNERROR)
7323                                 waitflag |= STR_NOERROR;
7324                         if (flags & MSG_IPEEK)
7325                                 waitflag |= STR_PEEK;
7326                         if (flags & MSG_DELAYERROR)
7327                                 waitflag |= STR_DELAYERR;
7328                 }
7329                 if (uiop != NULL)
7330                         fmode = uiop->uio_fmode;
7331                 else
7332                         fmode = 0;
7333 
7334                 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_WAIT,
7335                     "kstrgetmsg calls strwaitq:%p, %p",
7336                     vp, uiop);
7337                 if (((error = strwaitq(stp, waitflag, (ssize_t)0,
7338                     fmode, timout, &done))) != 0 || done) {
7339                         TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE,
7340                             "kstrgetmsg error or done:%p, %p",
7341                             vp, uiop);
7342                         mutex_exit(&stp->sd_lock);
7343                         return (error);
7344                 }
7345                 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE,
7346                     "kstrgetmsg awakes:%p, %p", vp, uiop);
7347                 if ((error = i_straccess(stp, JCREAD)) != 0) {
7348                         mutex_exit(&stp->sd_lock);
7349                         return (error);
7350                 }
7351                 first = 0;
7352         }
7353         ASSERT(bp != NULL);
7354         /*
7355          * Extract any mark information. If the message is not completely
7356          * consumed this information will be put in the mblk
7357          * that is putback.
7358          * If MSGMARKNEXT is set and the message is completely consumed
7359          * the STRATMARK flag will be set below. Likewise, if
7360          * MSGNOTMARKNEXT is set and the message is
7361          * completely consumed STRNOTATMARK will be set.
7362          */
7363         mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
7364         ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
7365             (MSGMARKNEXT|MSGNOTMARKNEXT));
7366         pri = bp->b_band;
7367         if (mark != 0) {
7368                 /*
7369                  * If the caller doesn't want the mark return.
7370                  * Used to implement MSG_WAITALL in sockets.
7371                  */
7372                 if (flags & MSG_NOMARK) {
7373                         putback(stp, q, bp, pri);
7374                         qbackenable(q, pri);
7375                         mutex_exit(&stp->sd_lock);
7376                         return (EWOULDBLOCK);
7377                 }
7378                 if (bp == stp->sd_mark) {
7379                         mark |= _LASTMARK;
7380                         stp->sd_mark = NULL;
7381                 }
7382         }
7383 
7384         /*
7385          * keep track of the first message type
7386          */
7387         type = bp->b_datap->db_type;
7388 
7389         if (bp->b_datap->db_type == M_PASSFP) {
7390                 if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7391                         stp->sd_mark = bp;
7392                 bp->b_flag |= mark & ~_LASTMARK;
7393                 putback(stp, q, bp, pri);
7394                 qbackenable(q, pri);
7395                 mutex_exit(&stp->sd_lock);
7396                 return (EBADMSG);
7397         }
7398         ASSERT(type != M_SIG);
7399 
7400         if (flags & MSG_IPEEK) {
7401                 /*
7402                  * Clear any struioflag - we do the uiomove over again
7403                  * when peeking since it simplifies the code.
7404                  *
7405                  * Dup the message and put the original back on the queue.
7406                  * If dupmsg() fails, try again with copymsg() to see if
7407                  * there is indeed a shortage of memory.  dupmsg() may fail
7408                  * if db_ref in any of the messages reaches its limit.
7409                  */
7410 
7411                 if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) {
7412                         /*
7413                          * Restore the state of the stream head since we
7414                          * need to drop sd_lock (strwaitbuf is sleeping).
7415                          */
7416                         size_t size = msgdsize(bp);
7417 
7418                         if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7419                                 stp->sd_mark = bp;
7420                         bp->b_flag |= mark & ~_LASTMARK;
7421                         putback(stp, q, bp, pri);
7422                         mutex_exit(&stp->sd_lock);
7423                         error = strwaitbuf(size, BPRI_HI);
7424                         if (error) {
7425                                 /*
7426                                  * There is no net change to the queue thus
7427                                  * no need to qbackenable.
7428                                  */
7429                                 return (error);
7430                         }
7431                         goto retry;
7432                 }
7433 
7434                 if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7435                         stp->sd_mark = bp;
7436                 bp->b_flag |= mark & ~_LASTMARK;
7437                 putback(stp, q, bp, pri);
7438                 bp = nbp;
7439         }
7440 
7441         /*
7442          * Set this flag so strrput will not generate signals. Need to
7443          * make sure this flag is cleared before leaving this routine
7444          * else signals will stop being sent.
7445          */
7446         stp->sd_flag |= STRGETINPROG;
7447         mutex_exit(&stp->sd_lock);
7448 
7449         if ((stp->sd_rputdatafunc != NULL) && (DB_TYPE(bp) == M_DATA)) {
7450                 mblk_t *tmp, *prevmp;
7451 
7452                 /*
7453                  * Put first non-data mblk back to stream head and
7454                  * cut the mblk chain so sd_rputdatafunc only sees
7455                  * M_DATA mblks. We can skip the first mblk since it
7456                  * is M_DATA according to the condition above.
7457                  */
7458                 for (prevmp = bp, tmp = bp->b_cont; tmp != NULL;
7459                     prevmp = tmp, tmp = tmp->b_cont) {
7460                         if (DB_TYPE(tmp) != M_DATA) {
7461                                 prevmp->b_cont = NULL;
7462                                 mutex_enter(&stp->sd_lock);
7463                                 putback(stp, q, tmp, tmp->b_band);
7464                                 mutex_exit(&stp->sd_lock);
7465                                 break;
7466                         }
7467                 }
7468 
7469                 bp = (stp->sd_rputdatafunc)(stp->sd_vnode, bp,
7470                     NULL, NULL, NULL, NULL);
7471 
7472                 if (bp == NULL)
7473                         goto retry;
7474         }
7475 
7476         if (STREAM_NEEDSERVICE(stp))
7477                 stream_runservice(stp);
7478 
7479         /*
7480          * Set HIPRI flag if message is priority.
7481          */
7482         if (type >= QPCTL)
7483                 flg = MSG_HIPRI;
7484         else
7485                 flg = MSG_BAND;
7486 
7487         /*
7488          * First process PROTO or PCPROTO blocks, if any.
7489          */
7490         if (mctlp != NULL && type != M_DATA) {
7491                 mblk_t *nbp;
7492 
7493                 *mctlp = bp;
7494                 while (bp->b_cont && bp->b_cont->b_datap->db_type != M_DATA)
7495                         bp = bp->b_cont;
7496                 nbp = bp->b_cont;
7497                 bp->b_cont = NULL;
7498                 bp = nbp;
7499         }
7500 
7501         if (bp && bp->b_datap->db_type != M_DATA) {
7502                 /*
7503                  * More PROTO blocks in msg. Will only happen if mctlp is NULL.
7504                  */
7505                 more |= MORECTL;
7506                 savemp = bp;
7507                 while (bp && bp->b_datap->db_type != M_DATA) {
7508                         savemptail = bp;
7509                         bp = bp->b_cont;
7510                 }
7511                 savemptail->b_cont = NULL;
7512         }
7513 
7514         /*
7515          * Now process DATA blocks, if any.
7516          */
7517         if (uiop == NULL) {
7518                 /* Append data to tail of mctlp */
7519 
7520                 if (mctlp != NULL) {
7521                         mblk_t **mpp = mctlp;
7522 
7523                         while (*mpp != NULL)
7524                                 mpp = &((*mpp)->b_cont);
7525                         *mpp = bp;
7526                         bp = NULL;
7527                 }
7528         } else if (uiop->uio_resid >= 0 && bp) {
7529                 size_t oldresid = uiop->uio_resid;
7530 
7531                 /*
7532                  * If a streams message is likely to consist
7533                  * of many small mblks, it is pulled up into
7534                  * one continuous chunk of memory.
7535                  * The size of the first mblk may be bogus because
7536                  * successive read() calls on the socket reduce
7537                  * the size of this mblk until it is exhausted
7538                  * and then the code walks on to the next. Thus
7539                  * the size of the mblk may not be the original size
7540                  * that was passed up, it's simply a remainder
7541                  * and hence can be very small without any
7542                  * implication that the packet is badly fragmented.
7543                  * So the size of the possible second mblk is
7544                  * used to spot a badly fragmented packet.
7545                  * see longer comment at top of page
7546                  * by mblk_pull_len declaration.
7547                  */
7548 
7549                 if (bp->b_cont != NULL && MBLKL(bp->b_cont) < mblk_pull_len) {
7550                         (void) pullupmsg(bp, -1);
7551                 }
7552 
7553                 bp = struiocopyout(bp, uiop, &error);
7554                 if (error != 0) {
7555                         if (mctlp != NULL) {
7556                                 freemsg(*mctlp);
7557                                 *mctlp = NULL;
7558                         } else
7559                                 freemsg(savemp);
7560                         mutex_enter(&stp->sd_lock);
7561                         /*
7562                          * clear stream head hi pri flag based on
7563                          * first message
7564                          */
7565                         if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
7566                                 ASSERT(type == M_PCPROTO);
7567                                 stp->sd_flag &= ~STRPRI;
7568                         }
7569                         more = 0;
7570                         goto getmout;
7571                 }
7572                 /*
7573                  * (pr == 1) indicates a partial read.
7574                  */
7575                 if (oldresid > uiop->uio_resid)
7576                         pr = 1;
7577         }
7578 
7579         if (bp) {                       /* more data blocks in msg */
7580                 more |= MOREDATA;
7581                 if (savemp)
7582                         savemptail->b_cont = bp;
7583                 else
7584                         savemp = bp;
7585         }
7586 
7587         mutex_enter(&stp->sd_lock);
7588         if (savemp) {
7589                 if (flags & (MSG_IPEEK|MSG_DISCARDTAIL)) {
7590                         /*
7591                          * When MSG_DISCARDTAIL is set or
7592                          * when peeking discard any tail. When peeking this
7593                          * is the tail of the dup that was copied out - the
7594                          * message has already been putback on the queue.
7595                          * Return MOREDATA to the caller even though the data
7596                          * is discarded. This is used by sockets (to
7597                          * set MSG_TRUNC).
7598                          */
7599                         freemsg(savemp);
7600                         if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
7601                                 ASSERT(type == M_PCPROTO);
7602                                 stp->sd_flag &= ~STRPRI;
7603                         }
7604                 } else if (pr && (savemp->b_datap->db_type == M_DATA) &&
7605                     msgnodata(savemp)) {
7606                         /*
7607                          * Avoid queuing a zero-length tail part of
7608                          * a message. pr=1 indicates that we read some of
7609                          * the message.
7610                          */
7611                         freemsg(savemp);
7612                         more &= ~MOREDATA;
7613                         if (type >= QPCTL) {
7614                                 ASSERT(type == M_PCPROTO);
7615                                 stp->sd_flag &= ~STRPRI;
7616                         }
7617                 } else {
7618                         savemp->b_band = pri;
7619                         /*
7620                          * If the first message was HIPRI and the one we're
7621                          * putting back isn't, then clear STRPRI, otherwise
7622                          * set STRPRI again.  Note that we must set STRPRI
7623                          * again since the flush logic in strrput_nondata()
7624                          * may have cleared it while we had sd_lock dropped.
7625                          */
7626 
7627                         if (type >= QPCTL) {
7628                                 ASSERT(type == M_PCPROTO);
7629                                 if (queclass(savemp) < QPCTL)
7630                                         stp->sd_flag &= ~STRPRI;
7631                                 else
7632                                         stp->sd_flag |= STRPRI;
7633                         } else if (queclass(savemp) >= QPCTL) {
7634                                 /*
7635                                  * The first message was not a HIPRI message,
7636                                  * but the one we are about to putback is.
7637                                  * For simplicitly, we do not allow for HIPRI
7638                                  * messages to be embedded in the message
7639                                  * body, so just force it to same type as
7640                                  * first message.
7641                                  */
7642                                 ASSERT(type == M_DATA || type == M_PROTO);
7643                                 ASSERT(savemp->b_datap->db_type == M_PCPROTO);
7644                                 savemp->b_datap->db_type = type;
7645                         }
7646                         if (mark != 0) {
7647                                 if ((mark & _LASTMARK) &&
7648                                     (stp->sd_mark == NULL)) {
7649                                         /*
7650                                          * If another marked message arrived
7651                                          * while sd_lock was not held sd_mark
7652                                          * would be non-NULL.
7653                                          */
7654                                         stp->sd_mark = savemp;
7655                                 }
7656                                 savemp->b_flag |= mark & ~_LASTMARK;
7657                         }
7658                         putback(stp, q, savemp, pri);
7659                 }
7660         } else if (!(flags & MSG_IPEEK)) {
7661                 /*
7662                  * The complete message was consumed.
7663                  *
7664                  * If another M_PCPROTO arrived while sd_lock was not held
7665                  * it would have been discarded since STRPRI was still set.
7666                  *
7667                  * Move the MSG*MARKNEXT information
7668                  * to the stream head just in case
7669                  * the read queue becomes empty.
7670                  * clear stream head hi pri flag based on
7671                  * first message
7672                  *
7673                  * If the stream head was at the mark
7674                  * (STRATMARK) before we dropped sd_lock above
7675                  * and some data was consumed then we have
7676                  * moved past the mark thus STRATMARK is
7677                  * cleared. However, if a message arrived in
7678                  * strrput during the copyout above causing
7679                  * STRATMARK to be set we can not clear that
7680                  * flag.
7681                  * XXX A "perimeter" would help by single-threading strrput,
7682                  * strread, strgetmsg and kstrgetmsg.
7683                  */
7684                 if (type >= QPCTL) {
7685                         ASSERT(type == M_PCPROTO);
7686                         stp->sd_flag &= ~STRPRI;
7687                 }
7688                 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
7689                         if (mark & MSGMARKNEXT) {
7690                                 stp->sd_flag &= ~STRNOTATMARK;
7691                                 stp->sd_flag |= STRATMARK;
7692                         } else if (mark & MSGNOTMARKNEXT) {
7693                                 stp->sd_flag &= ~STRATMARK;
7694                                 stp->sd_flag |= STRNOTATMARK;
7695                         } else {
7696                                 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
7697                         }
7698                 } else if (pr && (old_sd_flag & STRATMARK)) {
7699                         stp->sd_flag &= ~STRATMARK;
7700                 }
7701         }
7702 
7703         *flagsp = flg;
7704         *prip = pri;
7705 
7706         /*
7707          * Getmsg cleanup processing - if the state of the queue has changed
7708          * some signals may need to be sent and/or poll awakened.
7709          */
7710 getmout:
7711         qbackenable(q, pri);
7712 
7713         /*
7714          * We dropped the stream head lock above. Send all M_SIG messages
7715          * before processing stream head for SIGPOLL messages.
7716          */
7717         ASSERT(MUTEX_HELD(&stp->sd_lock));
7718         while ((bp = q->q_first) != NULL &&
7719             (bp->b_datap->db_type == M_SIG)) {
7720                 /*
7721                  * sd_lock is held so the content of the read queue can not
7722                  * change.
7723                  */
7724                 bp = getq(q);
7725                 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
7726 
7727                 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
7728                 mutex_exit(&stp->sd_lock);
7729                 freemsg(bp);
7730                 if (STREAM_NEEDSERVICE(stp))
7731                         stream_runservice(stp);
7732                 mutex_enter(&stp->sd_lock);
7733         }
7734 
7735         /*
7736          * stream head cannot change while we make the determination
7737          * whether or not to send a signal. Drop the flag to allow strrput
7738          * to send firstmsgsigs again.
7739          */
7740         stp->sd_flag &= ~STRGETINPROG;
7741 
7742         /*
7743          * If the type of message at the front of the queue changed
7744          * due to the receive the appropriate signals and pollwakeup events
7745          * are generated. The type of changes are:
7746          *      Processed a hipri message, q_first is not hipri.
7747          *      Processed a band X message, and q_first is band Y.
7748          * The generated signals and pollwakeups are identical to what
7749          * strrput() generates should the message that is now on q_first
7750          * arrive to an empty read queue.
7751          *
7752          * Note: only strrput will send a signal for a hipri message.
7753          */
7754         if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
7755                 strsigset_t signals = 0;
7756                 strpollset_t pollwakeups = 0;
7757 
7758                 if (flg & MSG_HIPRI) {
7759                         /*
7760                          * Removed a hipri message. Regular data at
7761                          * the front of  the queue.
7762                          */
7763                         if (bp->b_band == 0) {
7764                                 signals = S_INPUT | S_RDNORM;
7765                                 pollwakeups = POLLIN | POLLRDNORM;
7766                         } else {
7767                                 signals = S_INPUT | S_RDBAND;
7768                                 pollwakeups = POLLIN | POLLRDBAND;
7769                         }
7770                 } else if (pri != bp->b_band) {
7771                         /*
7772                          * The band is different for the new q_first.
7773                          */
7774                         if (bp->b_band == 0) {
7775                                 signals = S_RDNORM;
7776                                 pollwakeups = POLLIN | POLLRDNORM;
7777                         } else {
7778                                 signals = S_RDBAND;
7779                                 pollwakeups = POLLIN | POLLRDBAND;
7780                         }
7781                 }
7782 
7783                 if (pollwakeups != 0) {
7784                         if (pollwakeups == (POLLIN | POLLRDNORM)) {
7785                                 if (!(stp->sd_rput_opt & SR_POLLIN))
7786                                         goto no_pollwake;
7787                                 stp->sd_rput_opt &= ~SR_POLLIN;
7788                         }
7789                         mutex_exit(&stp->sd_lock);
7790                         pollwakeup(&stp->sd_pollist, pollwakeups);
7791                         mutex_enter(&stp->sd_lock);
7792                 }
7793 no_pollwake:
7794 
7795                 if (stp->sd_sigflags & signals)
7796                         strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
7797         }
7798         mutex_exit(&stp->sd_lock);
7799 
7800         rvp->r_val1 = more;
7801         return (error);
7802 #undef  _LASTMARK
7803 }
7804 
7805 /*
7806  * Put a message downstream.
7807  *
7808  * NOTE: strputmsg and kstrputmsg have much of the logic in common.
7809  */
7810 int
7811 strputmsg(
7812         struct vnode *vp,
7813         struct strbuf *mctl,
7814         struct strbuf *mdata,
7815         unsigned char pri,
7816         int flag,
7817         int fmode)
7818 {
7819         struct stdata *stp;
7820         queue_t *wqp;
7821         mblk_t *mp;
7822         ssize_t msgsize;
7823         ssize_t rmin, rmax;
7824         int error;
7825         struct uio uios;
7826         struct uio *uiop = &uios;
7827         struct iovec iovs;
7828         int xpg4 = 0;
7829 
7830         ASSERT(vp->v_stream);
7831         stp = vp->v_stream;
7832         wqp = stp->sd_wrq;
7833 
7834         /*
7835          * If it is an XPG4 application, we need to send
7836          * SIGPIPE below
7837          */
7838 
7839         xpg4 = (flag & MSG_XPG4) ? 1 : 0;
7840         flag &= ~MSG_XPG4;
7841 
7842         if (AU_AUDITING())
7843                 audit_strputmsg(vp, mctl, mdata, pri, flag, fmode);
7844 
7845         mutex_enter(&stp->sd_lock);
7846 
7847         if ((error = i_straccess(stp, JCWRITE)) != 0) {
7848                 mutex_exit(&stp->sd_lock);
7849                 return (error);
7850         }
7851 
7852         if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
7853                 error = strwriteable(stp, B_FALSE, xpg4);
7854                 if (error != 0) {
7855                         mutex_exit(&stp->sd_lock);
7856                         return (error);
7857                 }
7858         }
7859 
7860         mutex_exit(&stp->sd_lock);
7861 
7862         /*
7863          * Check for legal flag value.
7864          */
7865         switch (flag) {
7866         case MSG_HIPRI:
7867                 if ((mctl->len < 0) || (pri != 0))
7868                         return (EINVAL);
7869                 break;
7870         case MSG_BAND:
7871                 break;
7872 
7873         default:
7874                 return (EINVAL);
7875         }
7876 
7877         TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_IN,
7878             "strputmsg in:stp %p", stp);
7879 
7880         /* get these values from those cached in the stream head */
7881         rmin = stp->sd_qn_minpsz;
7882         rmax = stp->sd_qn_maxpsz;
7883 
7884         /*
7885          * Make sure ctl and data sizes together fall within the
7886          * limits of the max and min receive packet sizes and do
7887          * not exceed system limit.
7888          */
7889         ASSERT((rmax >= 0) || (rmax == INFPSZ));
7890         if (rmax == 0) {
7891                 return (ERANGE);
7892         }
7893         /*
7894          * Use the MAXIMUM of sd_maxblk and q_maxpsz.
7895          * Needed to prevent partial failures in the strmakedata loop.
7896          */
7897         if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk)
7898                 rmax = stp->sd_maxblk;
7899 
7900         if ((msgsize = mdata->len) < 0) {
7901                 msgsize = 0;
7902                 rmin = 0;       /* no range check for NULL data part */
7903         }
7904         if ((msgsize < rmin) ||
7905             ((msgsize > rmax) && (rmax != INFPSZ)) ||
7906             (mctl->len > strctlsz)) {
7907                 return (ERANGE);
7908         }
7909 
7910         /*
7911          * Setup uio and iov for data part
7912          */
7913         iovs.iov_base = mdata->buf;
7914         iovs.iov_len = msgsize;
7915         uios.uio_iov = &iovs;
7916         uios.uio_iovcnt = 1;
7917         uios.uio_loffset = 0;
7918         uios.uio_segflg = UIO_USERSPACE;
7919         uios.uio_fmode = fmode;
7920         uios.uio_extflg = UIO_COPY_DEFAULT;
7921         uios.uio_resid = msgsize;
7922         uios.uio_offset = 0;
7923 
7924         /* Ignore flow control in strput for HIPRI */
7925         if (flag & MSG_HIPRI)
7926                 flag |= MSG_IGNFLOW;
7927 
7928         for (;;) {
7929                 int done = 0;
7930 
7931                 /*
7932                  * strput will always free the ctl mblk - even when strput
7933                  * fails.
7934                  */
7935                 if ((error = strmakectl(mctl, flag, fmode, &mp)) != 0) {
7936                         TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7937                             "strputmsg out:stp %p out %d error %d",
7938                             stp, 1, error);
7939                         return (error);
7940                 }
7941                 /*
7942                  * Verify that the whole message can be transferred by
7943                  * strput.
7944                  */
7945                 ASSERT(stp->sd_maxblk == INFPSZ ||
7946                     stp->sd_maxblk >= mdata->len);
7947 
7948                 msgsize = mdata->len;
7949                 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag);
7950                 mdata->len = msgsize;
7951 
7952                 if (error == 0)
7953                         break;
7954 
7955                 if (error != EWOULDBLOCK)
7956                         goto out;
7957 
7958                 mutex_enter(&stp->sd_lock);
7959                 /*
7960                  * Check for a missed wakeup.
7961                  * Needed since strput did not hold sd_lock across
7962                  * the canputnext.
7963                  */
7964                 if (bcanputnext(wqp, pri)) {
7965                         /* Try again */
7966                         mutex_exit(&stp->sd_lock);
7967                         continue;
7968                 }
7969                 TRACE_2(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAIT,
7970                     "strputmsg wait:stp %p waits pri %d", stp, pri);
7971                 if (((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, fmode, -1,
7972                     &done)) != 0) || done) {
7973                         mutex_exit(&stp->sd_lock);
7974                         TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7975                             "strputmsg out:q %p out %d error %d",
7976                             stp, 0, error);
7977                         return (error);
7978                 }
7979                 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE,
7980                     "strputmsg wake:stp %p wakes", stp);
7981                 if ((error = i_straccess(stp, JCWRITE)) != 0) {
7982                         mutex_exit(&stp->sd_lock);
7983                         return (error);
7984                 }
7985                 mutex_exit(&stp->sd_lock);
7986         }
7987 out:
7988         /*
7989          * For historic reasons, applications expect EAGAIN
7990          * when data mblk could not be allocated. so change
7991          * ENOMEM back to EAGAIN
7992          */
7993         if (error == ENOMEM)
7994                 error = EAGAIN;
7995         TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7996             "strputmsg out:stp %p out %d error %d", stp, 2, error);
7997         return (error);
7998 }
7999 
8000 /*
8001  * Put a message downstream.
8002  * Can send only an M_PROTO/M_PCPROTO by passing in a NULL uiop.
8003  * The fmode flag (NDELAY, NONBLOCK) is the or of the flags in the uio
8004  * and the fmode parameter.
8005  *
8006  * This routine handles the consolidation private flags:
8007  *      MSG_IGNERROR    Ignore any stream head error except STPLEX.
8008  *      MSG_HOLDSIG     Hold signals while waiting for data.
8009  *      MSG_IGNFLOW     Don't check streams flow control.
8010  *
8011  * NOTE: strputmsg and kstrputmsg have much of the logic in common.
8012  */
8013 int
8014 kstrputmsg(
8015         struct vnode *vp,
8016         mblk_t *mctl,
8017         struct uio *uiop,
8018         ssize_t msgsize,
8019         unsigned char pri,
8020         int flag,
8021         int fmode)
8022 {
8023         struct stdata *stp;
8024         queue_t *wqp;
8025         ssize_t rmin, rmax;
8026         int error;
8027 
8028         ASSERT(vp->v_stream);
8029         stp = vp->v_stream;
8030         wqp = stp->sd_wrq;
8031         if (AU_AUDITING())
8032                 audit_strputmsg(vp, NULL, NULL, pri, flag, fmode);
8033         if (mctl == NULL)
8034                 return (EINVAL);
8035 
8036         mutex_enter(&stp->sd_lock);
8037 
8038         if ((error = i_straccess(stp, JCWRITE)) != 0) {
8039                 mutex_exit(&stp->sd_lock);
8040                 freemsg(mctl);
8041                 return (error);
8042         }
8043 
8044         if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) {
8045                 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
8046                         error = strwriteable(stp, B_FALSE, B_TRUE);
8047                         if (error != 0) {
8048                                 mutex_exit(&stp->sd_lock);
8049                                 freemsg(mctl);
8050                                 return (error);
8051                         }
8052                 }
8053         }
8054 
8055         mutex_exit(&stp->sd_lock);
8056 
8057         /*
8058          * Check for legal flag value.
8059          */
8060         switch (flag & (MSG_HIPRI|MSG_BAND|MSG_ANY)) {
8061         case MSG_HIPRI:
8062                 if (pri != 0) {
8063                         freemsg(mctl);
8064                         return (EINVAL);
8065                 }
8066                 break;
8067         case MSG_BAND:
8068                 break;
8069         default:
8070                 freemsg(mctl);
8071                 return (EINVAL);
8072         }
8073 
8074         TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_IN,
8075             "kstrputmsg in:stp %p", stp);
8076 
8077         /* get these values from those cached in the stream head */
8078         rmin = stp->sd_qn_minpsz;
8079         rmax = stp->sd_qn_maxpsz;
8080 
8081         /*
8082          * Make sure ctl and data sizes together fall within the
8083          * limits of the max and min receive packet sizes and do
8084          * not exceed system limit.
8085          */
8086         ASSERT((rmax >= 0) || (rmax == INFPSZ));
8087         if (rmax == 0) {
8088                 freemsg(mctl);
8089                 return (ERANGE);
8090         }
8091         /*
8092          * Use the MAXIMUM of sd_maxblk and q_maxpsz.
8093          * Needed to prevent partial failures in the strmakedata loop.
8094          */
8095         if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk)
8096                 rmax = stp->sd_maxblk;
8097 
8098         if (uiop == NULL) {
8099                 msgsize = -1;
8100                 rmin = -1;      /* no range check for NULL data part */
8101         } else {
8102                 /* Use uio flags as well as the fmode parameter flags */
8103                 fmode |= uiop->uio_fmode;
8104 
8105                 if ((msgsize < rmin) ||
8106                     ((msgsize > rmax) && (rmax != INFPSZ))) {
8107                         freemsg(mctl);
8108                         return (ERANGE);
8109                 }
8110         }
8111 
8112         /* Ignore flow control in strput for HIPRI */
8113         if (flag & MSG_HIPRI)
8114                 flag |= MSG_IGNFLOW;
8115 
8116         for (;;) {
8117                 int done = 0;
8118                 int waitflag;
8119                 mblk_t *mp;
8120 
8121                 /*
8122                  * strput will always free the ctl mblk - even when strput
8123                  * fails. If MSG_IGNFLOW is set then any error returned
8124                  * will cause us to break the loop, so we don't need a copy
8125                  * of the message. If MSG_IGNFLOW is not set, then we can
8126                  * get hit by flow control and be forced to try again. In
8127                  * this case we need to have a copy of the message. We
8128                  * do this using copymsg since the message may get modified
8129                  * by something below us.
8130                  *
8131                  * We've observed that many TPI providers do not check db_ref
8132                  * on the control messages but blindly reuse them for the
8133                  * T_OK_ACK/T_ERROR_ACK. Thus using copymsg is more
8134                  * friendly to such providers than using dupmsg. Also, note
8135                  * that sockfs uses MSG_IGNFLOW for all TPI control messages.
8136                  * Only data messages are subject to flow control, hence
8137                  * subject to this copymsg.
8138                  */
8139                 if (flag & MSG_IGNFLOW) {
8140                         mp = mctl;
8141                         mctl = NULL;
8142                 } else {
8143                         do {
8144                                 /*
8145                                  * If a message has a free pointer, the message
8146                                  * must be dupmsg to maintain this pointer.
8147                                  * Code using this facility must be sure
8148                                  * that modules below will not change the
8149                                  * contents of the dblk without checking db_ref
8150                                  * first. If db_ref is > 1, then the module
8151                                  * needs to do a copymsg first. Otherwise,
8152                                  * the contents of the dblk may become
8153                                  * inconsistent because the freesmg/freeb below
8154                                  * may end up calling atomic_add_32_nv.
8155                                  * The atomic_add_32_nv in freeb (accessing
8156                                  * all of db_ref, db_type, db_flags, and
8157                                  * db_struioflag) does not prevent other threads
8158                                  * from concurrently trying to modify e.g.
8159                                  * db_type.
8160                                  */
8161                                 if (mctl->b_datap->db_frtnp != NULL)
8162                                         mp = dupmsg(mctl);
8163                                 else
8164                                         mp = copymsg(mctl);
8165 
8166                                 if (mp != NULL)
8167                                         break;
8168 
8169                                 error = strwaitbuf(msgdsize(mctl), BPRI_MED);
8170                                 if (error) {
8171                                         freemsg(mctl);
8172                                         return (error);
8173                                 }
8174                         } while (mp == NULL);
8175                 }
8176                 /*
8177                  * Verify that all of msgsize can be transferred by
8178                  * strput.
8179                  */
8180                 ASSERT(stp->sd_maxblk == INFPSZ || stp->sd_maxblk >= msgsize);
8181                 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag);
8182                 if (error == 0)
8183                         break;
8184 
8185                 if (error != EWOULDBLOCK)
8186                         goto out;
8187 
8188                 /*
8189                  * IF MSG_IGNFLOW is set we should have broken out of loop
8190                  * above.
8191                  */
8192                 ASSERT(!(flag & MSG_IGNFLOW));
8193                 mutex_enter(&stp->sd_lock);
8194                 /*
8195                  * Check for a missed wakeup.
8196                  * Needed since strput did not hold sd_lock across
8197                  * the canputnext.
8198                  */
8199                 if (bcanputnext(wqp, pri)) {
8200                         /* Try again */
8201                         mutex_exit(&stp->sd_lock);
8202                         continue;
8203                 }
8204                 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAIT,
8205                     "kstrputmsg wait:stp %p waits pri %d", stp, pri);
8206 
8207                 waitflag = WRITEWAIT;
8208                 if (flag & (MSG_HOLDSIG|MSG_IGNERROR)) {
8209                         if (flag & MSG_HOLDSIG)
8210                                 waitflag |= STR_NOSIG;
8211                         if (flag & MSG_IGNERROR)
8212                                 waitflag |= STR_NOERROR;
8213                 }
8214                 if (((error = strwaitq(stp, waitflag,
8215                     (ssize_t)0, fmode, -1, &done)) != 0) || done) {
8216                         mutex_exit(&stp->sd_lock);
8217                         TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT,
8218                             "kstrputmsg out:stp %p out %d error %d",
8219                             stp, 0, error);
8220                         freemsg(mctl);
8221                         return (error);
8222                 }
8223                 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE,
8224                     "kstrputmsg wake:stp %p wakes", stp);
8225                 if ((error = i_straccess(stp, JCWRITE)) != 0) {
8226                         mutex_exit(&stp->sd_lock);
8227                         freemsg(mctl);
8228                         return (error);
8229                 }
8230                 mutex_exit(&stp->sd_lock);
8231         }
8232 out:
8233         freemsg(mctl);
8234         /*
8235          * For historic reasons, applications expect EAGAIN
8236          * when data mblk could not be allocated. so change
8237          * ENOMEM back to EAGAIN
8238          */
8239         if (error == ENOMEM)
8240                 error = EAGAIN;
8241         TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT,
8242             "kstrputmsg out:stp %p out %d error %d", stp, 2, error);
8243         return (error);
8244 }
8245 
8246 /*
8247  * Determines whether the necessary conditions are set on a stream
8248  * for it to be readable, writeable, or have exceptions.
8249  *
8250  * strpoll handles the consolidation private events:
8251  *      POLLNOERR       Do not return POLLERR even if there are stream
8252  *                      head errors.
8253  *                      Used by sockfs.
8254  *      POLLRDDATA      Do not return POLLIN unless at least one message on
8255  *                      the queue contains one or more M_DATA mblks. Thus
8256  *                      when this flag is set a queue with only
8257  *                      M_PROTO/M_PCPROTO mblks does not return POLLIN.
8258  *                      Used by sockfs to ignore T_EXDATA_IND messages.
8259  *
8260  * Note: POLLRDDATA assumes that synch streams only return messages with
8261  * an M_DATA attached (i.e. not messages consisting of only
8262  * an M_PROTO/M_PCPROTO part).
8263  */
8264 int
8265 strpoll(struct stdata *stp, short events_arg, int anyyet, short *reventsp,
8266     struct pollhead **phpp)
8267 {
8268         int events = (ushort_t)events_arg;
8269         int retevents = 0;
8270         mblk_t *mp;
8271         qband_t *qbp;
8272         long sd_flags = stp->sd_flag;
8273         int headlocked = 0;
8274 
8275         /*
8276          * For performance, a single 'if' tests for most possible edge
8277          * conditions in one shot
8278          */
8279         if (sd_flags & (STPLEX | STRDERR | STWRERR)) {
8280                 if (sd_flags & STPLEX) {
8281                         *reventsp = POLLNVAL;
8282                         return (EINVAL);
8283                 }
8284                 if (((events & (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) &&
8285                     (sd_flags & STRDERR)) ||
8286                     ((events & (POLLOUT | POLLWRNORM | POLLWRBAND)) &&
8287                     (sd_flags & STWRERR))) {
8288                         if (!(events & POLLNOERR)) {
8289                                 *reventsp = POLLERR;
8290                                 return (0);
8291                         }
8292                 }
8293         }
8294         if (sd_flags & STRHUP) {
8295                 retevents |= POLLHUP;
8296         } else if (events & (POLLWRNORM | POLLWRBAND)) {
8297                 queue_t *tq;
8298                 queue_t *qp = stp->sd_wrq;
8299 
8300                 claimstr(qp);
8301                 /* Find next module forward that has a service procedure */
8302                 tq = qp->q_next->q_nfsrv;
8303                 ASSERT(tq != NULL);
8304 
8305                 if (polllock(&stp->sd_pollist, QLOCK(tq)) != 0) {
8306                         releasestr(qp);
8307                         *reventsp = POLLNVAL;
8308                         return (0);
8309                 }
8310                 if (events & POLLWRNORM) {
8311                         queue_t *sqp;
8312 
8313                         if (tq->q_flag & QFULL)
8314                                 /* ensure backq svc procedure runs */
8315                                 tq->q_flag |= QWANTW;
8316                         else if ((sqp = stp->sd_struiowrq) != NULL) {
8317                                 /* Check sync stream barrier write q */
8318                                 mutex_exit(QLOCK(tq));
8319                                 if (polllock(&stp->sd_pollist,
8320                                     QLOCK(sqp)) != 0) {
8321                                         releasestr(qp);
8322                                         *reventsp = POLLNVAL;
8323                                         return (0);
8324                                 }
8325                                 if (sqp->q_flag & QFULL)
8326                                         /* ensure pollwakeup() is done */
8327                                         sqp->q_flag |= QWANTWSYNC;
8328                                 else
8329                                         retevents |= POLLOUT;
8330                                 /* More write events to process ??? */
8331                                 if (! (events & POLLWRBAND)) {
8332                                         mutex_exit(QLOCK(sqp));
8333                                         releasestr(qp);
8334                                         goto chkrd;
8335                                 }
8336                                 mutex_exit(QLOCK(sqp));
8337                                 if (polllock(&stp->sd_pollist,
8338                                     QLOCK(tq)) != 0) {
8339                                         releasestr(qp);
8340                                         *reventsp = POLLNVAL;
8341                                         return (0);
8342                                 }
8343                         } else
8344                                 retevents |= POLLOUT;
8345                 }
8346                 if (events & POLLWRBAND) {
8347                         qbp = tq->q_bandp;
8348                         if (qbp) {
8349                                 while (qbp) {
8350                                         if (qbp->qb_flag & QB_FULL)
8351                                                 qbp->qb_flag |= QB_WANTW;
8352                                         else
8353                                                 retevents |= POLLWRBAND;
8354                                         qbp = qbp->qb_next;
8355                                 }
8356                         } else {
8357                                 retevents |= POLLWRBAND;
8358                         }
8359                 }
8360                 mutex_exit(QLOCK(tq));
8361                 releasestr(qp);
8362         }
8363 chkrd:
8364         if (sd_flags & STRPRI) {
8365                 retevents |= (events & POLLPRI);
8366         } else if (events & (POLLRDNORM | POLLRDBAND | POLLIN)) {
8367                 queue_t *qp = _RD(stp->sd_wrq);
8368                 int normevents = (events & (POLLIN | POLLRDNORM));
8369 
8370                 /*
8371                  * Note: Need to do polllock() here since ps_lock may be
8372                  * held. See bug 4191544.
8373                  */
8374                 if (polllock(&stp->sd_pollist, &stp->sd_lock) != 0) {
8375                         *reventsp = POLLNVAL;
8376                         return (0);
8377                 }
8378                 headlocked = 1;
8379                 mp = qp->q_first;
8380                 while (mp) {
8381                         /*
8382                          * For POLLRDDATA we scan b_cont and b_next until we
8383                          * find an M_DATA.
8384                          */
8385                         if ((events & POLLRDDATA) &&
8386                             mp->b_datap->db_type != M_DATA) {
8387                                 mblk_t *nmp = mp->b_cont;
8388 
8389                                 while (nmp != NULL &&
8390                                     nmp->b_datap->db_type != M_DATA)
8391                                         nmp = nmp->b_cont;
8392                                 if (nmp == NULL) {
8393                                         mp = mp->b_next;
8394                                         continue;
8395                                 }
8396                         }
8397                         if (mp->b_band == 0)
8398                                 retevents |= normevents;
8399                         else
8400                                 retevents |= (events & (POLLIN | POLLRDBAND));
8401                         break;
8402                 }
8403                 if (!(retevents & normevents) && (stp->sd_wakeq & RSLEEP)) {
8404                         /*
8405                          * Sync stream barrier read queue has data.
8406                          */
8407                         retevents |= normevents;
8408                 }
8409                 /* Treat eof as normal data */
8410                 if (sd_flags & STREOF)
8411                         retevents |= normevents;
8412         }
8413 
8414         /*
8415          * Pass back a pollhead if no events are pending or if edge-triggering
8416          * has been configured on this resource.
8417          */
8418         if ((retevents == 0 && !anyyet) || (events & POLLET)) {
8419                 *phpp = &stp->sd_pollist;
8420                 if (headlocked == 0) {
8421                         if (polllock(&stp->sd_pollist, &stp->sd_lock) != 0) {
8422                                 *reventsp = POLLNVAL;
8423                                 return (0);
8424                         }
8425                         headlocked = 1;
8426                 }
8427                 stp->sd_rput_opt |= SR_POLLIN;
8428         }
8429 
8430         *reventsp = (short)retevents;
8431         if (headlocked)
8432                 mutex_exit(&stp->sd_lock);
8433         return (0);
8434 }
8435 
8436 /*
8437  * The purpose of putback() is to assure sleeping polls/reads
8438  * are awakened when there are no new messages arriving at the,
8439  * stream head, and a message is placed back on the read queue.
8440  *
8441  * sd_lock must be held when messages are placed back on stream
8442  * head.  (getq() holds sd_lock when it removes messages from
8443  * the queue)
8444  */
8445 
8446 static void
8447 putback(struct stdata *stp, queue_t *q, mblk_t *bp, int band)
8448 {
8449         mblk_t  *qfirst;
8450         ASSERT(MUTEX_HELD(&stp->sd_lock));
8451 
8452         /*
8453          * As a result of lock-step ordering around q_lock and sd_lock,
8454          * it's possible for function calls like putnext() and
8455          * canputnext() to get an inaccurate picture of how much
8456          * data is really being processed at the stream head.
8457          * We only consolidate with existing messages on the queue
8458          * if the length of the message we want to put back is smaller
8459          * than the queue hiwater mark.
8460          */
8461         if ((stp->sd_rput_opt & SR_CONSOL_DATA) &&
8462             (DB_TYPE(bp) == M_DATA) && ((qfirst = q->q_first) != NULL) &&
8463             (DB_TYPE(qfirst) == M_DATA) &&
8464             ((qfirst->b_flag & (MSGMARK|MSGDELIM)) == 0) &&
8465             ((bp->b_flag & (MSGMARK|MSGDELIM|MSGMARKNEXT)) == 0) &&
8466             (mp_cont_len(bp, NULL) < q->q_hiwat)) {
8467                 /*
8468                  * We use the same logic as defined in strrput()
8469                  * but in reverse as we are putting back onto the
8470                  * queue and want to retain byte ordering.
8471                  * Consolidate M_DATA messages with M_DATA ONLY.
8472                  * strrput() allows the consolidation of M_DATA onto
8473                  * M_PROTO | M_PCPROTO but not the other way round.
8474                  *
8475                  * The consolidation does not take place if the message
8476                  * we are returning to the queue is marked with either
8477                  * of the marks or the delim flag or if q_first
8478                  * is marked with MSGMARK. The MSGMARK check is needed to
8479                  * handle the odd semantics of MSGMARK where essentially
8480                  * the whole message is to be treated as marked.
8481                  * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from q_first
8482                  * to the front of the b_cont chain.
8483                  */
8484                 rmvq_noenab(q, qfirst);
8485 
8486                 /*
8487                  * The first message in the b_cont list
8488                  * tracks MSGMARKNEXT and MSGNOTMARKNEXT.
8489                  * We need to handle the case where we
8490                  * are appending:
8491                  *
8492                  * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT.
8493                  * 2) a MSGMARKNEXT to a plain message.
8494                  * 3) a MSGNOTMARKNEXT to a plain message
8495                  * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT
8496                  *    message.
8497                  *
8498                  * Thus we never append a MSGMARKNEXT or
8499                  * MSGNOTMARKNEXT to a MSGMARKNEXT message.
8500                  */
8501                 if (qfirst->b_flag & MSGMARKNEXT) {
8502                         bp->b_flag |= MSGMARKNEXT;
8503                         bp->b_flag &= ~MSGNOTMARKNEXT;
8504                         qfirst->b_flag &= ~MSGMARKNEXT;
8505                 } else if (qfirst->b_flag & MSGNOTMARKNEXT) {
8506                         bp->b_flag |= MSGNOTMARKNEXT;
8507                         qfirst->b_flag &= ~MSGNOTMARKNEXT;
8508                 }
8509 
8510                 linkb(bp, qfirst);
8511         }
8512         (void) putbq(q, bp);
8513 
8514         /*
8515          * A message may have come in when the sd_lock was dropped in the
8516          * calling routine. If this is the case and STR*ATMARK info was
8517          * received, need to move that from the stream head to the q_last
8518          * so that SIOCATMARK can return the proper value.
8519          */
8520         if (stp->sd_flag & (STRATMARK | STRNOTATMARK)) {
8521                 unsigned short *flagp = &q->q_last->b_flag;
8522                 uint_t b_flag = (uint_t)*flagp;
8523 
8524                 if (stp->sd_flag & STRATMARK) {
8525                         b_flag &= ~MSGNOTMARKNEXT;
8526                         b_flag |= MSGMARKNEXT;
8527                         stp->sd_flag &= ~STRATMARK;
8528                 } else {
8529                         b_flag &= ~MSGMARKNEXT;
8530                         b_flag |= MSGNOTMARKNEXT;
8531                         stp->sd_flag &= ~STRNOTATMARK;
8532                 }
8533                 *flagp = (unsigned short) b_flag;
8534         }
8535 
8536 #ifdef  DEBUG
8537         /*
8538          * Make sure that the flags are not messed up.
8539          */
8540         {
8541                 mblk_t *mp;
8542                 mp = q->q_last;
8543                 while (mp != NULL) {
8544                         ASSERT((mp->b_flag & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
8545                             (MSGMARKNEXT|MSGNOTMARKNEXT));
8546                         mp = mp->b_cont;
8547                 }
8548         }
8549 #endif
8550         if (q->q_first == bp) {
8551                 short pollevents;
8552 
8553                 if (stp->sd_flag & RSLEEP) {
8554                         stp->sd_flag &= ~RSLEEP;
8555                         cv_broadcast(&q->q_wait);
8556                 }
8557                 if (stp->sd_flag & STRPRI) {
8558                         pollevents = POLLPRI;
8559                 } else {
8560                         if (band == 0) {
8561                                 if (!(stp->sd_rput_opt & SR_POLLIN))
8562                                         return;
8563                                 stp->sd_rput_opt &= ~SR_POLLIN;
8564                                 pollevents = POLLIN | POLLRDNORM;
8565                         } else {
8566                                 pollevents = POLLIN | POLLRDBAND;
8567                         }
8568                 }
8569                 mutex_exit(&stp->sd_lock);
8570                 pollwakeup(&stp->sd_pollist, pollevents);
8571                 mutex_enter(&stp->sd_lock);
8572         }
8573 }
8574 
8575 /*
8576  * Return the held vnode attached to the stream head of a
8577  * given queue
8578  * It is the responsibility of the calling routine to ensure
8579  * that the queue does not go away (e.g. pop).
8580  */
8581 vnode_t *
8582 strq2vp(queue_t *qp)
8583 {
8584         vnode_t *vp;
8585         vp = STREAM(qp)->sd_vnode;
8586         ASSERT(vp != NULL);
8587         VN_HOLD(vp);
8588         return (vp);
8589 }
8590 
8591 /*
8592  * return the stream head write queue for the given vp
8593  * It is the responsibility of the calling routine to ensure
8594  * that the stream or vnode do not close.
8595  */
8596 queue_t *
8597 strvp2wq(vnode_t *vp)
8598 {
8599         ASSERT(vp->v_stream != NULL);
8600         return (vp->v_stream->sd_wrq);
8601 }
8602 
8603 /*
8604  * pollwakeup stream head
8605  * It is the responsibility of the calling routine to ensure
8606  * that the stream or vnode do not close.
8607  */
8608 void
8609 strpollwakeup(vnode_t *vp, short event)
8610 {
8611         ASSERT(vp->v_stream);
8612         pollwakeup(&vp->v_stream->sd_pollist, event);
8613 }
8614 
8615 /*
8616  * Mate the stream heads of two vnodes together. If the two vnodes are the
8617  * same, we just make the write-side point at the read-side -- otherwise,
8618  * we do a full mate.  Only works on vnodes associated with streams that are
8619  * still being built and thus have only a stream head.
8620  */
8621 void
8622 strmate(vnode_t *vp1, vnode_t *vp2)
8623 {
8624         queue_t *wrq1 = strvp2wq(vp1);
8625         queue_t *wrq2 = strvp2wq(vp2);
8626 
8627         /*
8628          * Verify that there are no modules on the stream yet.  We also
8629          * rely on the stream head always having a service procedure to
8630          * avoid tweaking q_nfsrv.
8631          */
8632         ASSERT(wrq1->q_next == NULL && wrq2->q_next == NULL);
8633         ASSERT(wrq1->q_qinfo->qi_srvp != NULL);
8634         ASSERT(wrq2->q_qinfo->qi_srvp != NULL);
8635 
8636         /*
8637          * If the queues are the same, just twist; otherwise do a full mate.
8638          */
8639         if (wrq1 == wrq2) {
8640                 wrq1->q_next = _RD(wrq1);
8641         } else {
8642                 wrq1->q_next = _RD(wrq2);
8643                 wrq2->q_next = _RD(wrq1);
8644                 STREAM(wrq1)->sd_mate = STREAM(wrq2);
8645                 STREAM(wrq1)->sd_flag |= STRMATE;
8646                 STREAM(wrq2)->sd_mate = STREAM(wrq1);
8647                 STREAM(wrq2)->sd_flag |= STRMATE;
8648         }
8649 }
8650 
8651 /*
8652  * XXX will go away when console is correctly fixed.
8653  * Clean up the console PIDS, from previous I_SETSIG,
8654  * called only for cnopen which never calls strclean().
8655  */
8656 void
8657 str_cn_clean(struct vnode *vp)
8658 {
8659         strsig_t *ssp, *pssp, *tssp;
8660         struct stdata *stp;
8661         struct pid  *pidp;
8662         int update = 0;
8663 
8664         ASSERT(vp->v_stream);
8665         stp = vp->v_stream;
8666         pssp = NULL;
8667         mutex_enter(&stp->sd_lock);
8668         ssp = stp->sd_siglist;
8669         while (ssp) {
8670                 mutex_enter(&pidlock);
8671                 pidp = ssp->ss_pidp;
8672                 /*
8673                  * Get rid of PID if the proc is gone.
8674                  */
8675                 if (pidp->pid_prinactive) {
8676                         tssp = ssp->ss_next;
8677                         if (pssp)
8678                                 pssp->ss_next = tssp;
8679                         else
8680                                 stp->sd_siglist = tssp;
8681                         ASSERT(pidp->pid_ref <= 1);
8682                         PID_RELE(ssp->ss_pidp);
8683                         mutex_exit(&pidlock);
8684                         kmem_free(ssp, sizeof (strsig_t));
8685                         update = 1;
8686                         ssp = tssp;
8687                         continue;
8688                 } else
8689                         mutex_exit(&pidlock);
8690                 pssp = ssp;
8691                 ssp = ssp->ss_next;
8692         }
8693         if (update) {
8694                 stp->sd_sigflags = 0;
8695                 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
8696                         stp->sd_sigflags |= ssp->ss_events;
8697         }
8698         mutex_exit(&stp->sd_lock);
8699 }
8700 
8701 /*
8702  * Return B_TRUE if there is data in the message, B_FALSE otherwise.
8703  */
8704 static boolean_t
8705 msghasdata(mblk_t *bp)
8706 {
8707         for (; bp; bp = bp->b_cont)
8708                 if (bp->b_datap->db_type == M_DATA) {
8709                         ASSERT(bp->b_wptr >= bp->b_rptr);
8710                         if (bp->b_wptr > bp->b_rptr)
8711                                 return (B_TRUE);
8712                 }
8713         return (B_FALSE);
8714 }