1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  22 /*        All Rights Reserved   */
  23 
  24 
  25 /*
  26  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  27  * Copyright 2017 Joyent, Inc.
  28  * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
  29  */
  30 
  31 #include <sys/types.h>
  32 #include <sys/sysmacros.h>
  33 #include <sys/param.h>
  34 #include <sys/errno.h>
  35 #include <sys/signal.h>
  36 #include <sys/stat.h>
  37 #include <sys/proc.h>
  38 #include <sys/cred.h>
  39 #include <sys/user.h>
  40 #include <sys/vnode.h>
  41 #include <sys/file.h>
  42 #include <sys/stream.h>
  43 #include <sys/strsubr.h>
  44 #include <sys/stropts.h>
  45 #include <sys/tihdr.h>
  46 #include <sys/var.h>
  47 #include <sys/poll.h>
  48 #include <sys/termio.h>
  49 #include <sys/ttold.h>
  50 #include <sys/systm.h>
  51 #include <sys/uio.h>
  52 #include <sys/cmn_err.h>
  53 #include <sys/sad.h>
  54 #include <sys/netstack.h>
  55 #include <sys/priocntl.h>
  56 #include <sys/jioctl.h>
  57 #include <sys/procset.h>
  58 #include <sys/session.h>
  59 #include <sys/kmem.h>
  60 #include <sys/filio.h>
  61 #include <sys/vtrace.h>
  62 #include <sys/debug.h>
  63 #include <sys/strredir.h>
  64 #include <sys/fs/fifonode.h>
  65 #include <sys/fs/snode.h>
  66 #include <sys/strlog.h>
  67 #include <sys/strsun.h>
  68 #include <sys/project.h>
  69 #include <sys/kbio.h>
  70 #include <sys/msio.h>
  71 #include <sys/tty.h>
  72 #include <sys/ptyvar.h>
  73 #include <sys/vuid_event.h>
  74 #include <sys/modctl.h>
  75 #include <sys/sunddi.h>
  76 #include <sys/sunldi_impl.h>
  77 #include <sys/autoconf.h>
  78 #include <sys/policy.h>
  79 #include <sys/dld.h>
  80 #include <sys/zone.h>
  81 #include <sys/limits.h>
  82 #include <c2/audit.h>
  83 
  84 /*
  85  * This define helps improve the readability of streams code while
  86  * still maintaining a very old streams performance enhancement.  The
  87  * performance enhancement basically involved having all callers
  88  * of straccess() perform the first check that straccess() will do
  89  * locally before actually calling straccess().  (There by reducing
  90  * the number of unnecessary calls to straccess().)
  91  */
  92 #define i_straccess(x, y)       ((stp->sd_sidp == NULL) ? 0 : \
  93                                     (stp->sd_vnode->v_type == VFIFO) ? 0 : \
  94                                     straccess((x), (y)))
  95 
  96 /*
  97  * what is mblk_pull_len?
  98  *
  99  * If a streams message consists of many short messages,
 100  * a performance degradation occurs from copyout overhead.
 101  * To decrease the per mblk overhead, messages that are
 102  * likely to consist of many small mblks are pulled up into
 103  * one continuous chunk of memory.
 104  *
 105  * To avoid the processing overhead of examining every
 106  * mblk, a quick heuristic is used. If the first mblk in
 107  * the message is shorter than mblk_pull_len, it is likely
 108  * that the rest of the mblk will be short.
 109  *
 110  * This heuristic was decided upon after performance tests
 111  * indicated that anything more complex slowed down the main
 112  * code path.
 113  */
 114 #define MBLK_PULL_LEN 64
 115 uint32_t mblk_pull_len = MBLK_PULL_LEN;
 116 
 117 /*
 118  * The sgttyb_handling flag controls the handling of the old BSD
 119  * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows:
 120  *
 121  * 0 - Emit no warnings at all and retain old, broken behavior.
 122  * 1 - Emit no warnings and silently handle new semantics.
 123  * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used
 124  *     (once per system invocation).  Handle with new semantics.
 125  * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is
 126  *     made (so that offenders drop core and are easy to debug).
 127  *
 128  * The "new semantics" are that TIOCGETP returns B38400 for
 129  * sg_[io]speed if the corresponding value is over B38400, and that
 130  * TIOCSET[PN] accept B38400 in these cases to mean "retain current
 131  * bit rate."
 132  */
 133 int sgttyb_handling = 1;
 134 static boolean_t sgttyb_complaint;
 135 
 136 /* don't push drcompat module by default on Style-2 streams */
 137 static int push_drcompat = 0;
 138 
 139 /*
 140  * id value used to distinguish between different ioctl messages
 141  */
 142 static uint32_t ioc_id;
 143 
 144 static void putback(struct stdata *, queue_t *, mblk_t *, int);
 145 static void strcleanall(struct vnode *);
 146 static int strwsrv(queue_t *);
 147 static int strdocmd(struct stdata *, struct strcmd *, cred_t *);
 148 
 149 /*
 150  * qinit and module_info structures for stream head read and write queues
 151  */
 152 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW };
 153 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 };
 154 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info };
 155 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info };
 156 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT,
 157     FIFOLOWAT };
 158 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 };
 159 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info };
 160 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info };
 161 
 162 extern kmutex_t strresources;   /* protects global resources */
 163 extern kmutex_t muxifier;       /* single-threads multiplexor creation */
 164 
 165 static boolean_t msghasdata(mblk_t *bp);
 166 #define msgnodata(bp) (!msghasdata(bp))
 167 
 168 /*
 169  * Stream head locking notes:
 170  *      There are four monitors associated with the stream head:
 171  *      1. v_stream monitor: in stropen() and strclose() v_lock
 172  *              is held while the association of vnode and stream
 173  *              head is established or tested for.
 174  *      2. open/close/push/pop monitor: sd_lock is held while each
 175  *              thread bids for exclusive access to this monitor
 176  *              for opening or closing a stream.  In addition, this
 177  *              monitor is entered during pushes and pops.  This
 178  *              guarantees that during plumbing operations there
 179  *              is only one thread trying to change the plumbing.
 180  *              Any other threads present in the stream are only
 181  *              using the plumbing.
 182  *      3. read/write monitor: in the case of read, a thread holds
 183  *              sd_lock while trying to get data from the stream
 184  *              head queue.  if there is none to fulfill a read
 185  *              request, it sets RSLEEP and calls cv_wait_sig() down
 186  *              in strwaitq() to await the arrival of new data.
 187  *              when new data arrives in strrput(), sd_lock is acquired
 188  *              before testing for RSLEEP and calling cv_broadcast().
 189  *              the behavior of strwrite(), strwsrv(), and WSLEEP
 190  *              mirror this.
 191  *      4. ioctl monitor: sd_lock is gotten to ensure that only one
 192  *              thread is doing an ioctl at a time.
 193  */
 194 
 195 static int
 196 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name,
 197     int anchor, cred_t *crp, uint_t anchor_zoneid)
 198 {
 199         int error;
 200         fmodsw_impl_t *fp;
 201 
 202         if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) {
 203                 error = (stp->sd_flag & STRHUP) ? ENXIO : EIO;
 204                 return (error);
 205         }
 206         if (stp->sd_pushcnt >= nstrpush) {
 207                 return (EINVAL);
 208         }
 209 
 210         if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) {
 211                 stp->sd_flag |= STREOPENFAIL;
 212                 return (EINVAL);
 213         }
 214 
 215         /*
 216          * push new module and call its open routine via qattach
 217          */
 218         if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0)
 219                 return (error);
 220 
 221         /*
 222          * Check to see if caller wants a STREAMS anchor
 223          * put at this place in the stream, and add if so.
 224          */
 225         mutex_enter(&stp->sd_lock);
 226         if (anchor == stp->sd_pushcnt) {
 227                 stp->sd_anchor = stp->sd_pushcnt;
 228                 stp->sd_anchorzone = anchor_zoneid;
 229         }
 230         mutex_exit(&stp->sd_lock);
 231 
 232         return (0);
 233 }
 234 
 235 /*
 236  * Open a stream device.
 237  */
 238 int
 239 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp)
 240 {
 241         struct stdata *stp;
 242         queue_t *qp;
 243         int s;
 244         dev_t dummydev, savedev;
 245         struct autopush *ap;
 246         struct dlautopush dlap;
 247         int error = 0;
 248         ssize_t rmin, rmax;
 249         int cloneopen;
 250         queue_t *brq;
 251         major_t major;
 252         str_stack_t *ss;
 253         zoneid_t zoneid;
 254         uint_t anchor;
 255 
 256         /*
 257          * If the stream already exists, wait for any open in progress
 258          * to complete, then call the open function of each module and
 259          * driver in the stream.  Otherwise create the stream.
 260          */
 261         TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp);
 262 retry:
 263         mutex_enter(&vp->v_lock);
 264         if ((stp = vp->v_stream) != NULL) {
 265 
 266                 /*
 267                  * Waiting for stream to be created to device
 268                  * due to another open.
 269                  */
 270                 mutex_exit(&vp->v_lock);
 271 
 272                 if (STRMATED(stp)) {
 273                         struct stdata *strmatep = stp->sd_mate;
 274 
 275                         STRLOCKMATES(stp);
 276                         if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
 277                                 if (flag & (FNDELAY|FNONBLOCK)) {
 278                                         error = EAGAIN;
 279                                         mutex_exit(&strmatep->sd_lock);
 280                                         goto ckreturn;
 281                                 }
 282                                 mutex_exit(&stp->sd_lock);
 283                                 if (!cv_wait_sig(&strmatep->sd_monitor,
 284                                     &strmatep->sd_lock)) {
 285                                         error = EINTR;
 286                                         mutex_exit(&strmatep->sd_lock);
 287                                         mutex_enter(&stp->sd_lock);
 288                                         goto ckreturn;
 289                                 }
 290                                 mutex_exit(&strmatep->sd_lock);
 291                                 goto retry;
 292                         }
 293                         if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
 294                                 if (flag & (FNDELAY|FNONBLOCK)) {
 295                                         error = EAGAIN;
 296                                         mutex_exit(&strmatep->sd_lock);
 297                                         goto ckreturn;
 298                                 }
 299                                 mutex_exit(&strmatep->sd_lock);
 300                                 if (!cv_wait_sig(&stp->sd_monitor,
 301                                     &stp->sd_lock)) {
 302                                         error = EINTR;
 303                                         goto ckreturn;
 304                                 }
 305                                 mutex_exit(&stp->sd_lock);
 306                                 goto retry;
 307                         }
 308 
 309                         if (stp->sd_flag & (STRDERR|STWRERR)) {
 310                                 error = EIO;
 311                                 mutex_exit(&strmatep->sd_lock);
 312                                 goto ckreturn;
 313                         }
 314 
 315                         stp->sd_flag |= STWOPEN;
 316                         STRUNLOCKMATES(stp);
 317                 } else {
 318                         mutex_enter(&stp->sd_lock);
 319                         if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
 320                                 if (flag & (FNDELAY|FNONBLOCK)) {
 321                                         error = EAGAIN;
 322                                         goto ckreturn;
 323                                 }
 324                                 if (!cv_wait_sig(&stp->sd_monitor,
 325                                     &stp->sd_lock)) {
 326                                         error = EINTR;
 327                                         goto ckreturn;
 328                                 }
 329                                 mutex_exit(&stp->sd_lock);
 330                                 goto retry;  /* could be clone! */
 331                         }
 332 
 333                         if (stp->sd_flag & (STRDERR|STWRERR)) {
 334                                 error = EIO;
 335                                 goto ckreturn;
 336                         }
 337 
 338                         stp->sd_flag |= STWOPEN;
 339                         mutex_exit(&stp->sd_lock);
 340                 }
 341 
 342                 /*
 343                  * Open all modules and devices down stream to notify
 344                  * that another user is streaming.  For modules, set the
 345                  * last argument to MODOPEN and do not pass any open flags.
 346                  * Ignore dummydev since this is not the first open.
 347                  */
 348                 claimstr(stp->sd_wrq);
 349                 qp = stp->sd_wrq;
 350                 while (_SAMESTR(qp)) {
 351                         qp = qp->q_next;
 352                         if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0)
 353                                 break;
 354                 }
 355                 releasestr(stp->sd_wrq);
 356                 mutex_enter(&stp->sd_lock);
 357                 stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR);
 358                 stp->sd_rerror = 0;
 359                 stp->sd_werror = 0;
 360 ckreturn:
 361                 cv_broadcast(&stp->sd_monitor);
 362                 mutex_exit(&stp->sd_lock);
 363                 return (error);
 364         }
 365 
 366         /*
 367          * This vnode isn't streaming.  SPECFS already
 368          * checked for multiple vnodes pointing to the
 369          * same stream, so create a stream to the driver.
 370          */
 371         qp = allocq();
 372         stp = shalloc(qp);
 373 
 374         /*
 375          * Initialize stream head.  shalloc() has given us
 376          * exclusive access, and we have the vnode locked;
 377          * we can do whatever we want with stp.
 378          */
 379         stp->sd_flag = STWOPEN;
 380         stp->sd_siglist = NULL;
 381         stp->sd_pollist.ph_list = NULL;
 382         stp->sd_sigflags = 0;
 383         stp->sd_mark = NULL;
 384         stp->sd_closetime = STRTIMOUT;
 385         stp->sd_sidp = NULL;
 386         stp->sd_pgidp = NULL;
 387         stp->sd_vnode = vp;
 388         stp->sd_pvnode = NULL;
 389         stp->sd_rerror = 0;
 390         stp->sd_werror = 0;
 391         stp->sd_wroff = 0;
 392         stp->sd_tail = 0;
 393         stp->sd_iocblk = NULL;
 394         stp->sd_cmdblk = NULL;
 395         stp->sd_pushcnt = 0;
 396         stp->sd_qn_minpsz = 0;
 397         stp->sd_qn_maxpsz = INFPSZ - 1;      /* used to check for initialization */
 398         stp->sd_maxblk = INFPSZ;
 399         qp->q_ptr = _WR(qp)->q_ptr = stp;
 400         STREAM(qp) = STREAM(_WR(qp)) = stp;
 401         vp->v_stream = stp;
 402         mutex_exit(&vp->v_lock);
 403         if (vp->v_type == VFIFO) {
 404                 stp->sd_flag |= OLDNDELAY;
 405                 /*
 406                  * This means, both for pipes and fifos
 407                  * strwrite will send SIGPIPE if the other
 408                  * end is closed. For putmsg it depends
 409                  * on whether it is a XPG4_2 application
 410                  * or not
 411                  */
 412                 stp->sd_wput_opt = SW_SIGPIPE;
 413 
 414                 /* setq might sleep in kmem_alloc - avoid holding locks. */
 415                 setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE,
 416                     SQ_CI|SQ_CO, B_FALSE);
 417 
 418                 set_qend(qp);
 419                 stp->sd_strtab = fifo_getinfo();
 420                 _WR(qp)->q_nfsrv = _WR(qp);
 421                 qp->q_nfsrv = qp;
 422                 /*
 423                  * Wake up others that are waiting for stream to be created.
 424                  */
 425                 mutex_enter(&stp->sd_lock);
 426                 /*
 427                  * nothing is be pushed on stream yet, so
 428                  * optimized stream head packetsizes are just that
 429                  * of the read queue
 430                  */
 431                 stp->sd_qn_minpsz = qp->q_minpsz;
 432                 stp->sd_qn_maxpsz = qp->q_maxpsz;
 433                 stp->sd_flag &= ~STWOPEN;
 434                 goto fifo_opendone;
 435         }
 436         /* setq might sleep in kmem_alloc - avoid holding locks. */
 437         setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE);
 438 
 439         set_qend(qp);
 440 
 441         /*
 442          * Open driver and create stream to it (via qattach).
 443          */
 444         savedev = *devp;
 445         cloneopen = (getmajor(*devp) == clone_major);
 446         if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) {
 447                 mutex_enter(&vp->v_lock);
 448                 vp->v_stream = NULL;
 449                 mutex_exit(&vp->v_lock);
 450                 mutex_enter(&stp->sd_lock);
 451                 cv_broadcast(&stp->sd_monitor);
 452                 mutex_exit(&stp->sd_lock);
 453                 freeq(_RD(qp));
 454                 shfree(stp);
 455                 return (error);
 456         }
 457         /*
 458          * Set sd_strtab after open in order to handle clonable drivers
 459          */
 460         stp->sd_strtab = STREAMSTAB(getmajor(*devp));
 461 
 462         /*
 463          * Historical note: dummydev used to be be prior to the initial
 464          * open (via qattach above), which made the value seen
 465          * inconsistent between an I_PUSH and an autopush of a module.
 466          */
 467         dummydev = *devp;
 468 
 469         /*
 470          * For clone open of old style (Q not associated) network driver,
 471          * push DRMODNAME module to handle DL_ATTACH/DL_DETACH
 472          */
 473         brq = _RD(_WR(qp)->q_next);
 474         major = getmajor(*devp);
 475         if (push_drcompat && cloneopen && NETWORK_DRV(major) &&
 476             ((brq->q_flag & _QASSOCIATED) == 0)) {
 477                 if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp, 0) != 0)
 478                         cmn_err(CE_WARN, "cannot push " DRMODNAME
 479                             " streams module");
 480         }
 481 
 482         if (!NETWORK_DRV(major)) {
 483                 savedev = *devp;
 484         } else {
 485                 /*
 486                  * For network devices, process differently based on the
 487                  * return value from dld_autopush():
 488                  *
 489                  *   0: the passed-in device points to a GLDv3 datalink with
 490                  *   per-link autopush configuration; use that configuration
 491                  *   and ignore any per-driver autopush configuration.
 492                  *
 493                  *   1: the passed-in device points to a physical GLDv3
 494                  *   datalink without per-link autopush configuration.  The
 495                  *   passed in device was changed to refer to the actual
 496                  *   physical device (if it's not already); we use that new
 497                  *   device to look up any per-driver autopush configuration.
 498                  *
 499                  *   -1: neither of the above cases applied; use the initial
 500                  *   device to look up any per-driver autopush configuration.
 501                  */
 502                 switch (dld_autopush(&savedev, &dlap)) {
 503                 case 0:
 504                         zoneid = crgetzoneid(crp);
 505                         for (s = 0; s < dlap.dap_npush; s++) {
 506                                 error = push_mod(qp, &dummydev, stp,
 507                                     dlap.dap_aplist[s], dlap.dap_anchor, crp,
 508                                     zoneid);
 509                                 if (error != 0)
 510                                         break;
 511                         }
 512                         goto opendone;
 513                 case 1:
 514                         break;
 515                 case -1:
 516                         savedev = *devp;
 517                         break;
 518                 }
 519         }
 520         /*
 521          * Find the autopush configuration based on "savedev". Start with the
 522          * global zone. If not found check in the local zone.
 523          */
 524         zoneid = GLOBAL_ZONEID;
 525 retryap:
 526         ss = netstack_find_by_stackid(zoneid_to_netstackid(zoneid))->
 527             netstack_str;
 528         if ((ap = sad_ap_find_by_dev(savedev, ss)) == NULL) {
 529                 netstack_rele(ss->ss_netstack);
 530                 if (zoneid == GLOBAL_ZONEID) {
 531                         /*
 532                          * None found. Also look in the zone's autopush table.
 533                          */
 534                         zoneid = crgetzoneid(crp);
 535                         if (zoneid != GLOBAL_ZONEID)
 536                                 goto retryap;
 537                 }
 538                 goto opendone;
 539         }
 540         anchor = ap->ap_anchor;
 541         zoneid = crgetzoneid(crp);
 542         for (s = 0; s < ap->ap_npush; s++) {
 543                 error = push_mod(qp, &dummydev, stp, ap->ap_list[s],
 544                     anchor, crp, zoneid);
 545                 if (error != 0)
 546                         break;
 547         }
 548         sad_ap_rele(ap, ss);
 549         netstack_rele(ss->ss_netstack);
 550 
 551 opendone:
 552 
 553         /*
 554          * let specfs know that open failed part way through
 555          */
 556         if (error) {
 557                 mutex_enter(&stp->sd_lock);
 558                 stp->sd_flag |= STREOPENFAIL;
 559                 mutex_exit(&stp->sd_lock);
 560         }
 561 
 562         /*
 563          * Wake up others that are waiting for stream to be created.
 564          */
 565         mutex_enter(&stp->sd_lock);
 566         stp->sd_flag &= ~STWOPEN;
 567 
 568         /*
 569          * As a performance concern we are caching the values of
 570          * q_minpsz and q_maxpsz of the module below the stream
 571          * head in the stream head.
 572          */
 573         mutex_enter(QLOCK(stp->sd_wrq->q_next));
 574         rmin = stp->sd_wrq->q_next->q_minpsz;
 575         rmax = stp->sd_wrq->q_next->q_maxpsz;
 576         mutex_exit(QLOCK(stp->sd_wrq->q_next));
 577 
 578         /* do this processing here as a performance concern */
 579         if (strmsgsz != 0) {
 580                 if (rmax == INFPSZ)
 581                         rmax = strmsgsz;
 582                 else
 583                         rmax = MIN(strmsgsz, rmax);
 584         }
 585 
 586         mutex_enter(QLOCK(stp->sd_wrq));
 587         stp->sd_qn_minpsz = rmin;
 588         stp->sd_qn_maxpsz = rmax;
 589         mutex_exit(QLOCK(stp->sd_wrq));
 590 
 591 fifo_opendone:
 592         cv_broadcast(&stp->sd_monitor);
 593         mutex_exit(&stp->sd_lock);
 594         return (error);
 595 }
 596 
 597 static int strsink(queue_t *, mblk_t *);
 598 static struct qinit deadrend = {
 599         strsink, NULL, NULL, NULL, NULL, &strm_info, NULL
 600 };
 601 static struct qinit deadwend = {
 602         NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL
 603 };
 604 
 605 /*
 606  * Close a stream.
 607  * This is called from closef() on the last close of an open stream.
 608  * Strclean() will already have removed the siglist and pollist
 609  * information, so all that remains is to remove all multiplexor links
 610  * for the stream, pop all the modules (and the driver), and free the
 611  * stream structure.
 612  */
 613 
 614 int
 615 strclose(struct vnode *vp, int flag, cred_t *crp)
 616 {
 617         struct stdata *stp;
 618         queue_t *qp;
 619         int rval;
 620         int freestp = 1;
 621         queue_t *rmq;
 622 
 623         TRACE_1(TR_FAC_STREAMS_FR,
 624             TR_STRCLOSE, "strclose:%p", vp);
 625         ASSERT(vp->v_stream);
 626 
 627         stp = vp->v_stream;
 628         ASSERT(!(stp->sd_flag & STPLEX));
 629         qp = stp->sd_wrq;
 630 
 631         /*
 632          * Needed so that strpoll will return non-zero for this fd.
 633          * Note that with POLLNOERR STRHUP does still cause POLLHUP.
 634          */
 635         mutex_enter(&stp->sd_lock);
 636         stp->sd_flag |= STRHUP;
 637         mutex_exit(&stp->sd_lock);
 638 
 639         /*
 640          * If the registered process or process group did not have an
 641          * open instance of this stream then strclean would not be
 642          * called. Thus at the time of closing all remaining siglist entries
 643          * are removed.
 644          */
 645         if (stp->sd_siglist != NULL)
 646                 strcleanall(vp);
 647 
 648         ASSERT(stp->sd_siglist == NULL);
 649         ASSERT(stp->sd_sigflags == 0);
 650 
 651         if (STRMATED(stp)) {
 652                 struct stdata *strmatep = stp->sd_mate;
 653                 int waited = 1;
 654 
 655                 STRLOCKMATES(stp);
 656                 while (waited) {
 657                         waited = 0;
 658                         while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
 659                                 mutex_exit(&strmatep->sd_lock);
 660                                 cv_wait(&stp->sd_monitor, &stp->sd_lock);
 661                                 mutex_exit(&stp->sd_lock);
 662                                 STRLOCKMATES(stp);
 663                                 waited = 1;
 664                         }
 665                         while (strmatep->sd_flag &
 666                             (STWOPEN|STRCLOSE|STRPLUMB)) {
 667                                 mutex_exit(&stp->sd_lock);
 668                                 cv_wait(&strmatep->sd_monitor,
 669                                     &strmatep->sd_lock);
 670                                 mutex_exit(&strmatep->sd_lock);
 671                                 STRLOCKMATES(stp);
 672                                 waited = 1;
 673                         }
 674                 }
 675                 stp->sd_flag |= STRCLOSE;
 676                 STRUNLOCKMATES(stp);
 677         } else {
 678                 mutex_enter(&stp->sd_lock);
 679                 stp->sd_flag |= STRCLOSE;
 680                 mutex_exit(&stp->sd_lock);
 681         }
 682 
 683         ASSERT(qp->q_first == NULL); /* No more delayed write */
 684 
 685         /* Check if an I_LINK was ever done on this stream */
 686         if (stp->sd_flag & STRHASLINKS) {
 687                 netstack_t *ns;
 688                 str_stack_t *ss;
 689 
 690                 ns = netstack_find_by_cred(crp);
 691                 ASSERT(ns != NULL);
 692                 ss = ns->netstack_str;
 693                 ASSERT(ss != NULL);
 694 
 695                 (void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval, ss);
 696                 netstack_rele(ss->ss_netstack);
 697         }
 698 
 699         while (_SAMESTR(qp)) {
 700                 /*
 701                  * Holding sd_lock prevents q_next from changing in
 702                  * this stream.
 703                  */
 704                 mutex_enter(&stp->sd_lock);
 705                 if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) {
 706 
 707                         /*
 708                          * sleep until awakened by strwsrv() or timeout
 709                          */
 710                         for (;;) {
 711                                 mutex_enter(QLOCK(qp->q_next));
 712                                 if (!(qp->q_next->q_mblkcnt)) {
 713                                         mutex_exit(QLOCK(qp->q_next));
 714                                         break;
 715                                 }
 716                                 stp->sd_flag |= WSLEEP;
 717 
 718                                 /* ensure strwsrv gets enabled */
 719                                 qp->q_next->q_flag |= QWANTW;
 720                                 mutex_exit(QLOCK(qp->q_next));
 721                                 /* get out if we timed out or recv'd a signal */
 722                                 if (str_cv_wait(&qp->q_wait, &stp->sd_lock,
 723                                     stp->sd_closetime, 0) <= 0) {
 724                                         break;
 725                                 }
 726                         }
 727                         stp->sd_flag &= ~WSLEEP;
 728                 }
 729                 mutex_exit(&stp->sd_lock);
 730 
 731                 rmq = qp->q_next;
 732                 if (rmq->q_flag & QISDRV) {
 733                         ASSERT(!_SAMESTR(rmq));
 734                         wait_sq_svc(_RD(qp)->q_syncq);
 735                 }
 736 
 737                 qdetach(_RD(rmq), 1, flag, crp, B_FALSE);
 738         }
 739 
 740         /*
 741          * Since we call pollwakeup in close() now, the poll list should
 742          * be empty in most cases. The only exception is the layered devices
 743          * (e.g. the console drivers with redirection modules pushed on top
 744          * of it).  We have to do this after calling qdetach() because
 745          * the redirection module won't have torn down the console
 746          * redirection until after qdetach() has been invoked.
 747          */
 748         if (stp->sd_pollist.ph_list != NULL) {
 749                 pollwakeup(&stp->sd_pollist, POLLERR);
 750                 pollhead_clean(&stp->sd_pollist);
 751         }
 752         ASSERT(stp->sd_pollist.ph_list == NULL);
 753         ASSERT(stp->sd_sidp == NULL);
 754         ASSERT(stp->sd_pgidp == NULL);
 755 
 756         /* Prevent qenable from re-enabling the stream head queue */
 757         disable_svc(_RD(qp));
 758 
 759         /*
 760          * Wait until service procedure of each queue is
 761          * run, if QINSERVICE is set.
 762          */
 763         wait_svc(_RD(qp));
 764 
 765         /*
 766          * Now, flush both queues.
 767          */
 768         flushq(_RD(qp), FLUSHALL);
 769         flushq(qp, FLUSHALL);
 770 
 771         /*
 772          * If the write queue of the stream head is pointing to a
 773          * read queue, we have a twisted stream.  If the read queue
 774          * is alive, convert the stream head queues into a dead end.
 775          * If the read queue is dead, free the dead pair.
 776          */
 777         if (qp->q_next && !_SAMESTR(qp)) {
 778                 if (qp->q_next->q_qinfo == &deadrend) {       /* half-closed pipe */
 779                         flushq(qp->q_next, FLUSHALL); /* ensure no message */
 780                         shfree(qp->q_next->q_stream);
 781                         freeq(qp->q_next);
 782                         freeq(_RD(qp));
 783                 } else if (qp->q_next == _RD(qp)) {  /* fifo */
 784                         freeq(_RD(qp));
 785                 } else {                                /* pipe */
 786                         freestp = 0;
 787                         /*
 788                          * The q_info pointers are never accessed when
 789                          * SQLOCK is held.
 790                          */
 791                         ASSERT(qp->q_syncq == _RD(qp)->q_syncq);
 792                         mutex_enter(SQLOCK(qp->q_syncq));
 793                         qp->q_qinfo = &deadwend;
 794                         _RD(qp)->q_qinfo = &deadrend;
 795                         mutex_exit(SQLOCK(qp->q_syncq));
 796                 }
 797         } else {
 798                 freeq(_RD(qp)); /* free stream head queue pair */
 799         }
 800 
 801         mutex_enter(&vp->v_lock);
 802         if (stp->sd_iocblk) {
 803                 if (stp->sd_iocblk != (mblk_t *)-1) {
 804                         freemsg(stp->sd_iocblk);
 805                 }
 806                 stp->sd_iocblk = NULL;
 807         }
 808         stp->sd_vnode = stp->sd_pvnode = NULL;
 809         vp->v_stream = NULL;
 810         mutex_exit(&vp->v_lock);
 811         mutex_enter(&stp->sd_lock);
 812         freemsg(stp->sd_cmdblk);
 813         stp->sd_cmdblk = NULL;
 814         stp->sd_flag &= ~STRCLOSE;
 815         cv_broadcast(&stp->sd_monitor);
 816         mutex_exit(&stp->sd_lock);
 817 
 818         if (freestp)
 819                 shfree(stp);
 820         return (0);
 821 }
 822 
 823 static int
 824 strsink(queue_t *q, mblk_t *bp)
 825 {
 826         struct copyresp *resp;
 827 
 828         switch (bp->b_datap->db_type) {
 829         case M_FLUSH:
 830                 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
 831                         *bp->b_rptr &= ~FLUSHR;
 832                         bp->b_flag |= MSGNOLOOP;
 833                         /*
 834                          * Protect against the driver passing up
 835                          * messages after it has done a qprocsoff.
 836                          */
 837                         if (_OTHERQ(q)->q_next == NULL)
 838                                 freemsg(bp);
 839                         else
 840                                 qreply(q, bp);
 841                 } else {
 842                         freemsg(bp);
 843                 }
 844                 break;
 845 
 846         case M_COPYIN:
 847         case M_COPYOUT:
 848                 if (bp->b_cont) {
 849                         freemsg(bp->b_cont);
 850                         bp->b_cont = NULL;
 851                 }
 852                 bp->b_datap->db_type = M_IOCDATA;
 853                 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
 854                 resp = (struct copyresp *)bp->b_rptr;
 855                 resp->cp_rval = (caddr_t)1;  /* failure */
 856                 /*
 857                  * Protect against the driver passing up
 858                  * messages after it has done a qprocsoff.
 859                  */
 860                 if (_OTHERQ(q)->q_next == NULL)
 861                         freemsg(bp);
 862                 else
 863                         qreply(q, bp);
 864                 break;
 865 
 866         case M_IOCTL:
 867                 if (bp->b_cont) {
 868                         freemsg(bp->b_cont);
 869                         bp->b_cont = NULL;
 870                 }
 871                 bp->b_datap->db_type = M_IOCNAK;
 872                 /*
 873                  * Protect against the driver passing up
 874                  * messages after it has done a qprocsoff.
 875                  */
 876                 if (_OTHERQ(q)->q_next == NULL)
 877                         freemsg(bp);
 878                 else
 879                         qreply(q, bp);
 880                 break;
 881 
 882         default:
 883                 freemsg(bp);
 884                 break;
 885         }
 886 
 887         return (0);
 888 }
 889 
 890 /*
 891  * Clean up after a process when it closes a stream.  This is called
 892  * from closef for all closes, whereas strclose is called only for the
 893  * last close on a stream.  The siglist is scanned for entries for the
 894  * current process, and these are removed.
 895  */
 896 void
 897 strclean(struct vnode *vp)
 898 {
 899         strsig_t *ssp, *pssp, *tssp;
 900         stdata_t *stp;
 901         int update = 0;
 902 
 903         TRACE_1(TR_FAC_STREAMS_FR,
 904             TR_STRCLEAN, "strclean:%p", vp);
 905         stp = vp->v_stream;
 906         pssp = NULL;
 907         mutex_enter(&stp->sd_lock);
 908         ssp = stp->sd_siglist;
 909         while (ssp) {
 910                 if (ssp->ss_pidp == curproc->p_pidp) {
 911                         tssp = ssp->ss_next;
 912                         if (pssp)
 913                                 pssp->ss_next = tssp;
 914                         else
 915                                 stp->sd_siglist = tssp;
 916                         mutex_enter(&pidlock);
 917                         PID_RELE(ssp->ss_pidp);
 918                         mutex_exit(&pidlock);
 919                         kmem_free(ssp, sizeof (strsig_t));
 920                         update = 1;
 921                         ssp = tssp;
 922                 } else {
 923                         pssp = ssp;
 924                         ssp = ssp->ss_next;
 925                 }
 926         }
 927         if (update) {
 928                 stp->sd_sigflags = 0;
 929                 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
 930                         stp->sd_sigflags |= ssp->ss_events;
 931         }
 932         mutex_exit(&stp->sd_lock);
 933 }
 934 
 935 /*
 936  * Used on the last close to remove any remaining items on the siglist.
 937  * These could be present on the siglist due to I_ESETSIG calls that
 938  * use process groups or processed that do not have an open file descriptor
 939  * for this stream (Such entries would not be removed by strclean).
 940  */
 941 static void
 942 strcleanall(struct vnode *vp)
 943 {
 944         strsig_t *ssp, *nssp;
 945         stdata_t *stp;
 946 
 947         stp = vp->v_stream;
 948         mutex_enter(&stp->sd_lock);
 949         ssp = stp->sd_siglist;
 950         stp->sd_siglist = NULL;
 951         while (ssp) {
 952                 nssp = ssp->ss_next;
 953                 mutex_enter(&pidlock);
 954                 PID_RELE(ssp->ss_pidp);
 955                 mutex_exit(&pidlock);
 956                 kmem_free(ssp, sizeof (strsig_t));
 957                 ssp = nssp;
 958         }
 959         stp->sd_sigflags = 0;
 960         mutex_exit(&stp->sd_lock);
 961 }
 962 
 963 /*
 964  * Retrieve the next message from the logical stream head read queue
 965  * using either rwnext (if sync stream) or getq_noenab.
 966  * It is the callers responsibility to call qbackenable after
 967  * it is finished with the message. The caller should not call
 968  * qbackenable until after any putback calls to avoid spurious backenabling.
 969  */
 970 mblk_t *
 971 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
 972     int *errorp)
 973 {
 974         mblk_t *bp;
 975         int error;
 976         ssize_t rbytes = 0;
 977 
 978         /* Holding sd_lock prevents the read queue from changing  */
 979         ASSERT(MUTEX_HELD(&stp->sd_lock));
 980 
 981         if (uiop != NULL && stp->sd_struiordq != NULL &&
 982             q->q_first == NULL &&
 983             (!first || (stp->sd_wakeq & RSLEEP))) {
 984                 /*
 985                  * Stream supports rwnext() for the read side.
 986                  * If this is the first time we're called by e.g. strread
 987                  * only do the downcall if there is a deferred wakeup
 988                  * (registered in sd_wakeq).
 989                  */
 990                 struiod_t uiod;
 991                 struct iovec buf[IOV_MAX_STACK];
 992                 int iovlen = 0;
 993 
 994                 if (first)
 995                         stp->sd_wakeq &= ~RSLEEP;
 996 
 997                 if (uiop->uio_iovcnt > IOV_MAX_STACK) {
 998                         iovlen = uiop->uio_iovcnt * sizeof (iovec_t);
 999                         uiod.d_iov = kmem_alloc(iovlen, KM_SLEEP);
1000                 } else {
1001                         uiod.d_iov = buf;
1002                 }
1003 
1004                 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, uiop->uio_iovcnt);
1005                 uiod.d_mp = 0;
1006                 /*
1007                  * Mark that a thread is in rwnext on the read side
1008                  * to prevent strrput from nacking ioctls immediately.
1009                  * When the last concurrent rwnext returns
1010                  * the ioctls are nack'ed.
1011                  */
1012                 ASSERT(MUTEX_HELD(&stp->sd_lock));
1013                 stp->sd_struiodnak++;
1014                 /*
1015                  * Note: rwnext will drop sd_lock.
1016                  */
1017                 error = rwnext(q, &uiod);
1018                 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
1019                 mutex_enter(&stp->sd_lock);
1020                 stp->sd_struiodnak--;
1021                 while (stp->sd_struiodnak == 0 &&
1022                     ((bp = stp->sd_struionak) != NULL)) {
1023                         stp->sd_struionak = bp->b_next;
1024                         bp->b_next = NULL;
1025                         bp->b_datap->db_type = M_IOCNAK;
1026                         /*
1027                          * Protect against the driver passing up
1028                          * messages after it has done a qprocsoff.
1029                          */
1030                         if (_OTHERQ(q)->q_next == NULL)
1031                                 freemsg(bp);
1032                         else {
1033                                 mutex_exit(&stp->sd_lock);
1034                                 qreply(q, bp);
1035                                 mutex_enter(&stp->sd_lock);
1036                         }
1037                 }
1038                 ASSERT(MUTEX_HELD(&stp->sd_lock));
1039                 if (error == 0 || error == EWOULDBLOCK) {
1040                         if ((bp = uiod.d_mp) != NULL) {
1041                                 *errorp = 0;
1042                                 ASSERT(MUTEX_HELD(&stp->sd_lock));
1043                                 if (iovlen != 0)
1044                                         kmem_free(uiod.d_iov, iovlen);
1045                                 return (bp);
1046                         }
1047                         error = 0;
1048                 } else if (error == EINVAL) {
1049                         /*
1050                          * The stream plumbing must have
1051                          * changed while we were away, so
1052                          * just turn off rwnext()s.
1053                          */
1054                         error = 0;
1055                 } else if (error == EBUSY) {
1056                         /*
1057                          * The module might have data in transit using putnext
1058                          * Fall back on waiting + getq.
1059                          */
1060                         error = 0;
1061                 } else {
1062                         *errorp = error;
1063                         ASSERT(MUTEX_HELD(&stp->sd_lock));
1064                         if (iovlen != 0)
1065                                 kmem_free(uiod.d_iov, iovlen);
1066                         return (NULL);
1067                 }
1068 
1069                 if (iovlen != 0)
1070                         kmem_free(uiod.d_iov, iovlen);
1071 
1072                 /*
1073                  * Try a getq in case a rwnext() generated mblk
1074                  * has bubbled up via strrput().
1075                  */
1076         }
1077         *errorp = 0;
1078         ASSERT(MUTEX_HELD(&stp->sd_lock));
1079 
1080         /*
1081          * If we have a valid uio, try and use this as a guide for how
1082          * many bytes to retrieve from the queue via getq_noenab().
1083          * Doing this can avoid unneccesary counting of overlong
1084          * messages in putback(). We currently only do this for sockets
1085          * and only if there is no sd_rputdatafunc hook.
1086          *
1087          * The sd_rputdatafunc hook transforms the entire message
1088          * before any bytes in it can be given to a client. So, rbytes
1089          * must be 0 if there is a hook.
1090          */
1091         if ((uiop != NULL) && (stp->sd_vnode->v_type == VSOCK) &&
1092             (stp->sd_rputdatafunc == NULL))
1093                 rbytes = uiop->uio_resid;
1094 
1095         return (getq_noenab(q, rbytes));
1096 }
1097 
1098 /*
1099  * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'.
1100  * If the message does not fit in the uio the remainder of it is returned;
1101  * otherwise NULL is returned.  Any embedded zero-length mblk_t's are
1102  * consumed, even if uio_resid reaches zero.  On error, `*errorp' is set to
1103  * the error code, the message is consumed, and NULL is returned.
1104  */
1105 static mblk_t *
1106 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp)
1107 {
1108         int error;
1109         ptrdiff_t n;
1110         mblk_t *nbp;
1111 
1112         ASSERT(bp->b_wptr >= bp->b_rptr);
1113 
1114         do {
1115                 if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) {
1116                         ASSERT(n > 0);
1117 
1118                         error = uiomove(bp->b_rptr, n, UIO_READ, uiop);
1119                         if (error != 0) {
1120                                 freemsg(bp);
1121                                 *errorp = error;
1122                                 return (NULL);
1123                         }
1124                 }
1125 
1126                 bp->b_rptr += n;
1127                 while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) {
1128                         nbp = bp;
1129                         bp = bp->b_cont;
1130                         freeb(nbp);
1131                 }
1132         } while (bp != NULL && uiop->uio_resid > 0);
1133 
1134         *errorp = 0;
1135         return (bp);
1136 }
1137 
1138 /*
1139  * Read a stream according to the mode flags in sd_flag:
1140  *
1141  * (default mode)               - Byte stream, msg boundaries are ignored
1142  * RD_MSGDIS (msg discard)      - Read on msg boundaries and throw away
1143  *                              any data remaining in msg
1144  * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back
1145  *                              any remaining data on head of read queue
1146  *
1147  * Consume readable messages on the front of the queue until
1148  * ttolwp(curthread)->lwp_count
1149  * is satisfied, the readable messages are exhausted, or a message
1150  * boundary is reached in a message mode.  If no data was read and
1151  * the stream was not opened with the NDELAY flag, block until data arrives.
1152  * Otherwise return the data read and update the count.
1153  *
1154  * In default mode a 0 length message signifies end-of-file and terminates
1155  * a read in progress.  The 0 length message is removed from the queue
1156  * only if it is the only message read (no data is read).
1157  *
1158  * An attempt to read an M_PROTO or M_PCPROTO message results in an
1159  * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set.
1160  * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data.
1161  * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message
1162  * are unlinked from and M_DATA blocks in the message, the protos are
1163  * thrown away, and the data is read.
1164  */
1165 /* ARGSUSED */
1166 int
1167 strread(struct vnode *vp, struct uio *uiop, cred_t *crp)
1168 {
1169         struct stdata *stp;
1170         mblk_t *bp, *nbp;
1171         queue_t *q;
1172         int error = 0;
1173         uint_t old_sd_flag;
1174         int first;
1175         char rflg;
1176         uint_t mark;            /* Contains MSG*MARK and _LASTMARK */
1177 #define _LASTMARK       0x8000  /* Distinct from MSG*MARK */
1178         short delim;
1179         unsigned char pri = 0;
1180         char waitflag;
1181         unsigned char type;
1182 
1183         TRACE_1(TR_FAC_STREAMS_FR,
1184             TR_STRREAD_ENTER, "strread:%p", vp);
1185         ASSERT(vp->v_stream);
1186         stp = vp->v_stream;
1187 
1188         mutex_enter(&stp->sd_lock);
1189 
1190         if ((error = i_straccess(stp, JCREAD)) != 0) {
1191                 mutex_exit(&stp->sd_lock);
1192                 return (error);
1193         }
1194 
1195         if (stp->sd_flag & (STRDERR|STPLEX)) {
1196                 error = strgeterr(stp, STRDERR|STPLEX, 0);
1197                 if (error != 0) {
1198                         mutex_exit(&stp->sd_lock);
1199                         return (error);
1200                 }
1201         }
1202 
1203         /*
1204          * Loop terminates when uiop->uio_resid == 0.
1205          */
1206         rflg = 0;
1207         waitflag = READWAIT;
1208         q = _RD(stp->sd_wrq);
1209         for (;;) {
1210                 ASSERT(MUTEX_HELD(&stp->sd_lock));
1211                 old_sd_flag = stp->sd_flag;
1212                 mark = 0;
1213                 delim = 0;
1214                 first = 1;
1215                 while ((bp = strget(stp, q, uiop, first, &error)) == NULL) {
1216                         int done = 0;
1217 
1218                         ASSERT(MUTEX_HELD(&stp->sd_lock));
1219 
1220                         if (error != 0)
1221                                 goto oops;
1222 
1223                         if (stp->sd_flag & (STRHUP|STREOF)) {
1224                                 goto oops;
1225                         }
1226                         if (rflg && !(stp->sd_flag & STRDELIM)) {
1227                                 goto oops;
1228                         }
1229                         /*
1230                          * If a read(fd,buf,0) has been done, there is no
1231                          * need to sleep. We always have zero bytes to
1232                          * return.
1233                          */
1234                         if (uiop->uio_resid == 0) {
1235                                 goto oops;
1236                         }
1237 
1238                         qbackenable(q, 0);
1239 
1240                         TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT,
1241                             "strread calls strwaitq:%p, %p, %p",
1242                             vp, uiop, crp);
1243                         if ((error = strwaitq(stp, waitflag, uiop->uio_resid,
1244                             uiop->uio_fmode, -1, &done)) != 0 || done) {
1245                                 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE,
1246                                     "strread error or done:%p, %p, %p",
1247                                     vp, uiop, crp);
1248                                 if ((uiop->uio_fmode & FNDELAY) &&
1249                                     (stp->sd_flag & OLDNDELAY) &&
1250                                     (error == EAGAIN))
1251                                         error = 0;
1252                                 goto oops;
1253                         }
1254                         TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE,
1255                             "strread awakes:%p, %p, %p", vp, uiop, crp);
1256                         if ((error = i_straccess(stp, JCREAD)) != 0) {
1257                                 goto oops;
1258                         }
1259                         first = 0;
1260                 }
1261 
1262                 ASSERT(MUTEX_HELD(&stp->sd_lock));
1263                 ASSERT(bp);
1264                 pri = bp->b_band;
1265                 /*
1266                  * Extract any mark information. If the message is not
1267                  * completely consumed this information will be put in the mblk
1268                  * that is putback.
1269                  * If MSGMARKNEXT is set and the message is completely consumed
1270                  * the STRATMARK flag will be set below. Likewise, if
1271                  * MSGNOTMARKNEXT is set and the message is
1272                  * completely consumed STRNOTATMARK will be set.
1273                  *
1274                  * For some unknown reason strread only breaks the read at the
1275                  * last mark.
1276                  */
1277                 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
1278                 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
1279                     (MSGMARKNEXT|MSGNOTMARKNEXT));
1280                 if (mark != 0 && bp == stp->sd_mark) {
1281                         if (rflg) {
1282                                 putback(stp, q, bp, pri);
1283                                 goto oops;
1284                         }
1285                         mark |= _LASTMARK;
1286                         stp->sd_mark = NULL;
1287                 }
1288                 if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM))
1289                         delim = 1;
1290                 mutex_exit(&stp->sd_lock);
1291 
1292                 if (STREAM_NEEDSERVICE(stp))
1293                         stream_runservice(stp);
1294 
1295                 type = bp->b_datap->db_type;
1296 
1297                 switch (type) {
1298 
1299                 case M_DATA:
1300 ismdata:
1301                         if (msgnodata(bp)) {
1302                                 if (mark || delim) {
1303                                         freemsg(bp);
1304                                 } else if (rflg) {
1305 
1306                                         /*
1307                                          * If already read data put zero
1308                                          * length message back on queue else
1309                                          * free msg and return 0.
1310                                          */
1311                                         bp->b_band = pri;
1312                                         mutex_enter(&stp->sd_lock);
1313                                         putback(stp, q, bp, pri);
1314                                         mutex_exit(&stp->sd_lock);
1315                                 } else {
1316                                         freemsg(bp);
1317                                 }
1318                                 error =  0;
1319                                 goto oops1;
1320                         }
1321 
1322                         rflg = 1;
1323                         waitflag |= NOINTR;
1324                         bp = struiocopyout(bp, uiop, &error);
1325                         if (error != 0)
1326                                 goto oops1;
1327 
1328                         mutex_enter(&stp->sd_lock);
1329                         if (bp) {
1330                                 /*
1331                                  * Have remaining data in message.
1332                                  * Free msg if in discard mode.
1333                                  */
1334                                 if (stp->sd_read_opt & RD_MSGDIS) {
1335                                         freemsg(bp);
1336                                 } else {
1337                                         bp->b_band = pri;
1338                                         if ((mark & _LASTMARK) &&
1339                                             (stp->sd_mark == NULL))
1340                                                 stp->sd_mark = bp;
1341                                         bp->b_flag |= mark & ~_LASTMARK;
1342                                         if (delim)
1343                                                 bp->b_flag |= MSGDELIM;
1344                                         if (msgnodata(bp))
1345                                                 freemsg(bp);
1346                                         else
1347                                                 putback(stp, q, bp, pri);
1348                                 }
1349                         } else {
1350                                 /*
1351                                  * Consumed the complete message.
1352                                  * Move the MSG*MARKNEXT information
1353                                  * to the stream head just in case
1354                                  * the read queue becomes empty.
1355                                  *
1356                                  * If the stream head was at the mark
1357                                  * (STRATMARK) before we dropped sd_lock above
1358                                  * and some data was consumed then we have
1359                                  * moved past the mark thus STRATMARK is
1360                                  * cleared. However, if a message arrived in
1361                                  * strrput during the copyout above causing
1362                                  * STRATMARK to be set we can not clear that
1363                                  * flag.
1364                                  */
1365                                 if (mark &
1366                                     (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
1367                                         if (mark & MSGMARKNEXT) {
1368                                                 stp->sd_flag &= ~STRNOTATMARK;
1369                                                 stp->sd_flag |= STRATMARK;
1370                                         } else if (mark & MSGNOTMARKNEXT) {
1371                                                 stp->sd_flag &= ~STRATMARK;
1372                                                 stp->sd_flag |= STRNOTATMARK;
1373                                         } else {
1374                                                 stp->sd_flag &=
1375                                                     ~(STRATMARK|STRNOTATMARK);
1376                                         }
1377                                 } else if (rflg && (old_sd_flag & STRATMARK)) {
1378                                         stp->sd_flag &= ~STRATMARK;
1379                                 }
1380                         }
1381 
1382                         /*
1383                          * Check for signal messages at the front of the read
1384                          * queue and generate the signal(s) if appropriate.
1385                          * The only signal that can be on queue is M_SIG at
1386                          * this point.
1387                          */
1388                         while ((((bp = q->q_first)) != NULL) &&
1389                             (bp->b_datap->db_type == M_SIG)) {
1390                                 bp = getq_noenab(q, 0);
1391                                 /*
1392                                  * sd_lock is held so the content of the
1393                                  * read queue can not change.
1394                                  */
1395                                 ASSERT(bp != NULL && DB_TYPE(bp) == M_SIG);
1396                                 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
1397                                 mutex_exit(&stp->sd_lock);
1398                                 freemsg(bp);
1399                                 if (STREAM_NEEDSERVICE(stp))
1400                                         stream_runservice(stp);
1401                                 mutex_enter(&stp->sd_lock);
1402                         }
1403 
1404                         if ((uiop->uio_resid == 0) || (mark & _LASTMARK) ||
1405                             delim ||
1406                             (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) {
1407                                 goto oops;
1408                         }
1409                         continue;
1410 
1411                 case M_SIG:
1412                         strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band);
1413                         freemsg(bp);
1414                         mutex_enter(&stp->sd_lock);
1415                         continue;
1416 
1417                 case M_PROTO:
1418                 case M_PCPROTO:
1419                         /*
1420                          * Only data messages are readable.
1421                          * Any others generate an error, unless
1422                          * RD_PROTDIS or RD_PROTDAT is set.
1423                          */
1424                         if (stp->sd_read_opt & RD_PROTDAT) {
1425                                 for (nbp = bp; nbp; nbp = nbp->b_next) {
1426                                         if ((nbp->b_datap->db_type ==
1427                                             M_PROTO) ||
1428                                             (nbp->b_datap->db_type ==
1429                                             M_PCPROTO)) {
1430                                                 nbp->b_datap->db_type = M_DATA;
1431                                         } else {
1432                                                 break;
1433                                         }
1434                                 }
1435                                 /*
1436                                  * clear stream head hi pri flag based on
1437                                  * first message
1438                                  */
1439                                 if (type == M_PCPROTO) {
1440                                         mutex_enter(&stp->sd_lock);
1441                                         stp->sd_flag &= ~STRPRI;
1442                                         mutex_exit(&stp->sd_lock);
1443                                 }
1444                                 goto ismdata;
1445                         } else if (stp->sd_read_opt & RD_PROTDIS) {
1446                                 /*
1447                                  * discard non-data messages
1448                                  */
1449                                 while (bp &&
1450                                     ((bp->b_datap->db_type == M_PROTO) ||
1451                                     (bp->b_datap->db_type == M_PCPROTO))) {
1452                                         nbp = unlinkb(bp);
1453                                         freeb(bp);
1454                                         bp = nbp;
1455                                 }
1456                                 /*
1457                                  * clear stream head hi pri flag based on
1458                                  * first message
1459                                  */
1460                                 if (type == M_PCPROTO) {
1461                                         mutex_enter(&stp->sd_lock);
1462                                         stp->sd_flag &= ~STRPRI;
1463                                         mutex_exit(&stp->sd_lock);
1464                                 }
1465                                 if (bp) {
1466                                         bp->b_band = pri;
1467                                         goto ismdata;
1468                                 } else {
1469                                         break;
1470                                 }
1471                         }
1472                         /* FALLTHRU */
1473                 case M_PASSFP:
1474                         if ((bp->b_datap->db_type == M_PASSFP) &&
1475                             (stp->sd_read_opt & RD_PROTDIS)) {
1476                                 freemsg(bp);
1477                                 break;
1478                         }
1479                         mutex_enter(&stp->sd_lock);
1480                         putback(stp, q, bp, pri);
1481                         mutex_exit(&stp->sd_lock);
1482                         if (rflg == 0)
1483                                 error = EBADMSG;
1484                         goto oops1;
1485 
1486                 default:
1487                         /*
1488                          * Garbage on stream head read queue.
1489                          */
1490                         cmn_err(CE_WARN, "bad %x found at stream head\n",
1491                             bp->b_datap->db_type);
1492                         freemsg(bp);
1493                         goto oops1;
1494                 }
1495                 mutex_enter(&stp->sd_lock);
1496         }
1497 oops:
1498         mutex_exit(&stp->sd_lock);
1499 oops1:
1500         qbackenable(q, pri);
1501         return (error);
1502 #undef  _LASTMARK
1503 }
1504 
1505 /*
1506  * Default processing of M_PROTO/M_PCPROTO messages.
1507  * Determine which wakeups and signals are needed.
1508  * This can be replaced by a user-specified procedure for kernel users
1509  * of STREAMS.
1510  */
1511 /* ARGSUSED */
1512 mblk_t *
1513 strrput_proto(vnode_t *vp, mblk_t *mp,
1514     strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1515     strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1516 {
1517         *wakeups = RSLEEP;
1518         *allmsgsigs = 0;
1519 
1520         switch (mp->b_datap->db_type) {
1521         case M_PROTO:
1522                 if (mp->b_band == 0) {
1523                         *firstmsgsigs = S_INPUT | S_RDNORM;
1524                         *pollwakeups = POLLIN | POLLRDNORM;
1525                 } else {
1526                         *firstmsgsigs = S_INPUT | S_RDBAND;
1527                         *pollwakeups = POLLIN | POLLRDBAND;
1528                 }
1529                 break;
1530         case M_PCPROTO:
1531                 *firstmsgsigs = S_HIPRI;
1532                 *pollwakeups = POLLPRI;
1533                 break;
1534         }
1535         return (mp);
1536 }
1537 
1538 /*
1539  * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and
1540  * M_PASSFP messages.
1541  * Determine which wakeups and signals are needed.
1542  * This can be replaced by a user-specified procedure for kernel users
1543  * of STREAMS.
1544  */
1545 /* ARGSUSED */
1546 mblk_t *
1547 strrput_misc(vnode_t *vp, mblk_t *mp,
1548     strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1549     strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1550 {
1551         *wakeups = 0;
1552         *firstmsgsigs = 0;
1553         *allmsgsigs = 0;
1554         *pollwakeups = 0;
1555         return (mp);
1556 }
1557 
1558 /*
1559  * Stream read put procedure.  Called from downstream driver/module
1560  * with messages for the stream head.  Data, protocol, and in-stream
1561  * signal messages are placed on the queue, others are handled directly.
1562  */
1563 int
1564 strrput(queue_t *q, mblk_t *bp)
1565 {
1566         struct stdata   *stp;
1567         ulong_t         rput_opt;
1568         strwakeup_t     wakeups;
1569         strsigset_t     firstmsgsigs;   /* Signals if first message on queue */
1570         strsigset_t     allmsgsigs;     /* Signals for all messages */
1571         strsigset_t     signals;        /* Signals events to generate */
1572         strpollset_t    pollwakeups;
1573         mblk_t          *nextbp;
1574         uchar_t         band = 0;
1575         int             hipri_sig;
1576 
1577         stp = (struct stdata *)q->q_ptr;
1578         /*
1579          * Use rput_opt for optimized access to the SR_ flags except
1580          * SR_POLLIN. That flag has to be checked under sd_lock since it
1581          * is modified by strpoll().
1582          */
1583         rput_opt = stp->sd_rput_opt;
1584 
1585         ASSERT(qclaimed(q));
1586         TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER,
1587             "strrput called with message type:q %p bp %p", q, bp);
1588 
1589         /*
1590          * Perform initial processing and pass to the parameterized functions.
1591          */
1592         ASSERT(bp->b_next == NULL);
1593 
1594         switch (bp->b_datap->db_type) {
1595         case M_DATA:
1596                 /*
1597                  * sockfs is the only consumer of STREOF and when it is set,
1598                  * it implies that the receiver is not interested in receiving
1599                  * any more data, hence the mblk is freed to prevent unnecessary
1600                  * message queueing at the stream head.
1601                  */
1602                 if (stp->sd_flag == STREOF) {
1603                         freemsg(bp);
1604                         return (0);
1605                 }
1606                 if ((rput_opt & SR_IGN_ZEROLEN) &&
1607                     bp->b_rptr == bp->b_wptr && msgnodata(bp)) {
1608                         /*
1609                          * Ignore zero-length M_DATA messages. These might be
1610                          * generated by some transports.
1611                          * The zero-length M_DATA messages, even if they
1612                          * are ignored, should effect the atmark tracking and
1613                          * should wake up a thread sleeping in strwaitmark.
1614                          */
1615                         mutex_enter(&stp->sd_lock);
1616                         if (bp->b_flag & MSGMARKNEXT) {
1617                                 /*
1618                                  * Record the position of the mark either
1619                                  * in q_last or in STRATMARK.
1620                                  */
1621                                 if (q->q_last != NULL) {
1622                                         q->q_last->b_flag &= ~MSGNOTMARKNEXT;
1623                                         q->q_last->b_flag |= MSGMARKNEXT;
1624                                 } else {
1625                                         stp->sd_flag &= ~STRNOTATMARK;
1626                                         stp->sd_flag |= STRATMARK;
1627                                 }
1628                         } else if (bp->b_flag & MSGNOTMARKNEXT) {
1629                                 /*
1630                                  * Record that this is not the position of
1631                                  * the mark either in q_last or in
1632                                  * STRNOTATMARK.
1633                                  */
1634                                 if (q->q_last != NULL) {
1635                                         q->q_last->b_flag &= ~MSGMARKNEXT;
1636                                         q->q_last->b_flag |= MSGNOTMARKNEXT;
1637                                 } else {
1638                                         stp->sd_flag &= ~STRATMARK;
1639                                         stp->sd_flag |= STRNOTATMARK;
1640                                 }
1641                         }
1642                         if (stp->sd_flag & RSLEEP) {
1643                                 stp->sd_flag &= ~RSLEEP;
1644                                 cv_broadcast(&q->q_wait);
1645                         }
1646                         mutex_exit(&stp->sd_lock);
1647                         freemsg(bp);
1648                         return (0);
1649                 }
1650                 wakeups = RSLEEP;
1651                 if (bp->b_band == 0) {
1652                         firstmsgsigs = S_INPUT | S_RDNORM;
1653                         pollwakeups = POLLIN | POLLRDNORM;
1654                 } else {
1655                         firstmsgsigs = S_INPUT | S_RDBAND;
1656                         pollwakeups = POLLIN | POLLRDBAND;
1657                 }
1658                 if (rput_opt & SR_SIGALLDATA)
1659                         allmsgsigs = firstmsgsigs;
1660                 else
1661                         allmsgsigs = 0;
1662 
1663                 mutex_enter(&stp->sd_lock);
1664                 if ((rput_opt & SR_CONSOL_DATA) &&
1665                     (q->q_last != NULL) &&
1666                     (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) {
1667                         /*
1668                          * Consolidate an M_DATA message onto an M_DATA,
1669                          * M_PROTO, or M_PCPROTO by merging it with q_last.
1670                          * The consolidation does not take place if
1671                          * the old message is marked with either of the
1672                          * marks or the delim flag or if the new
1673                          * message is marked with MSGMARK. The MSGMARK
1674                          * check is needed to handle the odd semantics of
1675                          * MSGMARK where essentially the whole message
1676                          * is to be treated as marked.
1677                          * Carry any MSGMARKNEXT  and MSGNOTMARKNEXT from the
1678                          * new message to the front of the b_cont chain.
1679                          */
1680                         mblk_t *lbp = q->q_last;
1681                         unsigned char db_type = lbp->b_datap->db_type;
1682 
1683                         if ((db_type == M_DATA || db_type == M_PROTO ||
1684                             db_type == M_PCPROTO) &&
1685                             !(lbp->b_flag & (MSGDELIM|MSGMARK|MSGMARKNEXT))) {
1686                                 rmvq_noenab(q, lbp);
1687                                 /*
1688                                  * The first message in the b_cont list
1689                                  * tracks MSGMARKNEXT and MSGNOTMARKNEXT.
1690                                  * We need to handle the case where we
1691                                  * are appending:
1692                                  *
1693                                  * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT.
1694                                  * 2) a MSGMARKNEXT to a plain message.
1695                                  * 3) a MSGNOTMARKNEXT to a plain message
1696                                  * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT
1697                                  *    message.
1698                                  *
1699                                  * Thus we never append a MSGMARKNEXT or
1700                                  * MSGNOTMARKNEXT to a MSGMARKNEXT message.
1701                                  */
1702                                 if (bp->b_flag & MSGMARKNEXT) {
1703                                         lbp->b_flag |= MSGMARKNEXT;
1704                                         lbp->b_flag &= ~MSGNOTMARKNEXT;
1705                                         bp->b_flag &= ~MSGMARKNEXT;
1706                                 } else if (bp->b_flag & MSGNOTMARKNEXT) {
1707                                         lbp->b_flag |= MSGNOTMARKNEXT;
1708                                         bp->b_flag &= ~MSGNOTMARKNEXT;
1709                                 }
1710 
1711                                 linkb(lbp, bp);
1712                                 bp = lbp;
1713                                 /*
1714                                  * The new message logically isn't the first
1715                                  * even though the q_first check below thinks
1716                                  * it is. Clear the firstmsgsigs to make it
1717                                  * not appear to be first.
1718                                  */
1719                                 firstmsgsigs = 0;
1720                         }
1721                 }
1722                 break;
1723 
1724         case M_PASSFP:
1725                 wakeups = RSLEEP;
1726                 allmsgsigs = 0;
1727                 if (bp->b_band == 0) {
1728                         firstmsgsigs = S_INPUT | S_RDNORM;
1729                         pollwakeups = POLLIN | POLLRDNORM;
1730                 } else {
1731                         firstmsgsigs = S_INPUT | S_RDBAND;
1732                         pollwakeups = POLLIN | POLLRDBAND;
1733                 }
1734                 mutex_enter(&stp->sd_lock);
1735                 break;
1736 
1737         case M_PROTO:
1738         case M_PCPROTO:
1739                 ASSERT(stp->sd_rprotofunc != NULL);
1740                 bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp,
1741                     &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1742 #define ALLSIG  (S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\
1743                 S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)
1744 #define ALLPOLL (POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\
1745                 POLLWRBAND)
1746 
1747                 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1748                 ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1749                 ASSERT((allmsgsigs & ~ALLSIG) == 0);
1750                 ASSERT((pollwakeups & ~ALLPOLL) == 0);
1751 
1752                 mutex_enter(&stp->sd_lock);
1753                 break;
1754 
1755         default:
1756                 ASSERT(stp->sd_rmiscfunc != NULL);
1757                 bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp,
1758                     &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1759                 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1760                 ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1761                 ASSERT((allmsgsigs & ~ALLSIG) == 0);
1762                 ASSERT((pollwakeups & ~ALLPOLL) == 0);
1763 #undef  ALLSIG
1764 #undef  ALLPOLL
1765                 mutex_enter(&stp->sd_lock);
1766                 break;
1767         }
1768         ASSERT(MUTEX_HELD(&stp->sd_lock));
1769 
1770         /* By default generate superset of signals */
1771         signals = (firstmsgsigs | allmsgsigs);
1772 
1773         /*
1774          * The  proto and misc functions can return multiple messages
1775          * as a b_next chain. Such messages are processed separately.
1776          */
1777 one_more:
1778         hipri_sig = 0;
1779         if (bp == NULL) {
1780                 nextbp = NULL;
1781         } else {
1782                 nextbp = bp->b_next;
1783                 bp->b_next = NULL;
1784 
1785                 switch (bp->b_datap->db_type) {
1786                 case M_PCPROTO:
1787                         /*
1788                          * Only one priority protocol message is allowed at the
1789                          * stream head at a time.
1790                          */
1791                         if (stp->sd_flag & STRPRI) {
1792                                 TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR,
1793                                     "M_PCPROTO already at head");
1794                                 freemsg(bp);
1795                                 mutex_exit(&stp->sd_lock);
1796                                 goto done;
1797                         }
1798                         stp->sd_flag |= STRPRI;
1799                         hipri_sig = 1;
1800                         /* FALLTHRU */
1801                 case M_DATA:
1802                 case M_PROTO:
1803                 case M_PASSFP:
1804                         band = bp->b_band;
1805                         /*
1806                          * Marking doesn't work well when messages
1807                          * are marked in more than one band.  We only
1808                          * remember the last message received, even if
1809                          * it is placed on the queue ahead of other
1810                          * marked messages.
1811                          */
1812                         if (bp->b_flag & MSGMARK)
1813                                 stp->sd_mark = bp;
1814                         (void) putq(q, bp);
1815 
1816                         /*
1817                          * If message is a PCPROTO message, always use
1818                          * firstmsgsigs to determine if a signal should be
1819                          * sent as strrput is the only place to send
1820                          * signals for PCPROTO. Other messages are based on
1821                          * the STRGETINPROG flag. The flag determines if
1822                          * strrput or (k)strgetmsg will be responsible for
1823                          * sending the signals, in the firstmsgsigs case.
1824                          */
1825                         if ((hipri_sig == 1) ||
1826                             (((stp->sd_flag & STRGETINPROG) == 0) &&
1827                             (q->q_first == bp)))
1828                                 signals = (firstmsgsigs | allmsgsigs);
1829                         else
1830                                 signals = allmsgsigs;
1831                         break;
1832 
1833                 default:
1834                         mutex_exit(&stp->sd_lock);
1835                         (void) strrput_nondata(q, bp);
1836                         mutex_enter(&stp->sd_lock);
1837                         break;
1838                 }
1839         }
1840         ASSERT(MUTEX_HELD(&stp->sd_lock));
1841         /*
1842          * Wake sleeping read/getmsg and cancel deferred wakeup
1843          */
1844         if (wakeups & RSLEEP)
1845                 stp->sd_wakeq &= ~RSLEEP;
1846 
1847         wakeups &= stp->sd_flag;
1848         if (wakeups & RSLEEP) {
1849                 stp->sd_flag &= ~RSLEEP;
1850                 cv_broadcast(&q->q_wait);
1851         }
1852         if (wakeups & WSLEEP) {
1853                 stp->sd_flag &= ~WSLEEP;
1854                 cv_broadcast(&_WR(q)->q_wait);
1855         }
1856 
1857         if (pollwakeups != 0) {
1858                 if (pollwakeups == (POLLIN | POLLRDNORM)) {
1859                         /*
1860                          * Can't use rput_opt since it was not
1861                          * read when sd_lock was held and SR_POLLIN is changed
1862                          * by strpoll() under sd_lock.
1863                          */
1864                         if (!(stp->sd_rput_opt & SR_POLLIN))
1865                                 goto no_pollwake;
1866                         stp->sd_rput_opt &= ~SR_POLLIN;
1867                 }
1868                 mutex_exit(&stp->sd_lock);
1869                 pollwakeup(&stp->sd_pollist, pollwakeups);
1870                 mutex_enter(&stp->sd_lock);
1871         }
1872 no_pollwake:
1873 
1874         /*
1875          * strsendsig can handle multiple signals with a
1876          * single call.
1877          */
1878         if (stp->sd_sigflags & signals)
1879                 strsendsig(stp->sd_siglist, signals, band, 0);
1880         mutex_exit(&stp->sd_lock);
1881 
1882 
1883 done:
1884         if (nextbp == NULL)
1885                 return (0);
1886 
1887         /*
1888          * Any signals were handled the first time.
1889          * Wakeups and pollwakeups are redone to avoid any race
1890          * conditions - all the messages are not queued until the
1891          * last message has been processed by strrput.
1892          */
1893         bp = nextbp;
1894         signals = firstmsgsigs = allmsgsigs = 0;
1895         mutex_enter(&stp->sd_lock);
1896         goto one_more;
1897 }
1898 
1899 static void
1900 log_dupioc(queue_t *rq, mblk_t *bp)
1901 {
1902         queue_t *wq, *qp;
1903         char *modnames, *mnp, *dname;
1904         size_t maxmodstr;
1905         boolean_t islast;
1906 
1907         /*
1908          * Allocate a buffer large enough to hold the names of nstrpush modules
1909          * and one driver, with spaces between and NUL terminator.  If we can't
1910          * get memory, then we'll just log the driver name.
1911          */
1912         maxmodstr = nstrpush * (FMNAMESZ + 1);
1913         mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP);
1914 
1915         /* march down write side to print log message down to the driver */
1916         wq = WR(rq);
1917 
1918         /* make sure q_next doesn't shift around while we're grabbing data */
1919         claimstr(wq);
1920         qp = wq->q_next;
1921         do {
1922                 dname = Q2NAME(qp);
1923                 islast = !SAMESTR(qp) || qp->q_next == NULL;
1924                 if (modnames == NULL) {
1925                         /*
1926                          * If we don't have memory, then get the driver name in
1927                          * the log where we can see it.  Note that memory
1928                          * pressure is a possible cause of these sorts of bugs.
1929                          */
1930                         if (islast) {
1931                                 modnames = dname;
1932                                 maxmodstr = 0;
1933                         }
1934                 } else {
1935                         mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname);
1936                         if (!islast)
1937                                 *mnp++ = ' ';
1938                 }
1939                 qp = qp->q_next;
1940         } while (!islast);
1941         releasestr(wq);
1942         /* Cannot happen unless stream head is corrupt. */
1943         ASSERT(modnames != NULL);
1944         (void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1,
1945             SL_CONSOLE|SL_TRACE|SL_ERROR,
1946             "Warning: stream %p received duplicate %X M_IOC%s; module list: %s",
1947             rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd,
1948             (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames);
1949         if (maxmodstr != 0)
1950                 kmem_free(modnames, maxmodstr);
1951 }
1952 
1953 int
1954 strrput_nondata(queue_t *q, mblk_t *bp)
1955 {
1956         struct stdata *stp;
1957         struct iocblk *iocbp;
1958         struct stroptions *sop;
1959         struct copyreq *reqp;
1960         struct copyresp *resp;
1961         unsigned char bpri;
1962         unsigned char  flushed_already = 0;
1963 
1964         stp = (struct stdata *)q->q_ptr;
1965 
1966         ASSERT(!(stp->sd_flag & STPLEX));
1967         ASSERT(qclaimed(q));
1968 
1969         switch (bp->b_datap->db_type) {
1970         case M_ERROR:
1971                 /*
1972                  * An error has occurred downstream, the errno is in the first
1973                  * bytes of the message.
1974                  */
1975                 if ((bp->b_wptr - bp->b_rptr) == 2) {     /* New flavor */
1976                         unsigned char rw = 0;
1977 
1978                         mutex_enter(&stp->sd_lock);
1979                         if (*bp->b_rptr != NOERROR) {        /* read error */
1980                                 if (*bp->b_rptr != 0) {
1981                                         if (stp->sd_flag & STRDERR)
1982                                                 flushed_already |= FLUSHR;
1983                                         stp->sd_flag |= STRDERR;
1984                                         rw |= FLUSHR;
1985                                 } else {
1986                                         stp->sd_flag &= ~STRDERR;
1987                                 }
1988                                 stp->sd_rerror = *bp->b_rptr;
1989                         }
1990                         bp->b_rptr++;
1991                         if (*bp->b_rptr != NOERROR) {        /* write error */
1992                                 if (*bp->b_rptr != 0) {
1993                                         if (stp->sd_flag & STWRERR)
1994                                                 flushed_already |= FLUSHW;
1995                                         stp->sd_flag |= STWRERR;
1996                                         rw |= FLUSHW;
1997                                 } else {
1998                                         stp->sd_flag &= ~STWRERR;
1999                                 }
2000                                 stp->sd_werror = *bp->b_rptr;
2001                         }
2002                         if (rw) {
2003                                 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE,
2004                                     "strrput cv_broadcast:q %p, bp %p",
2005                                     q, bp);
2006                                 cv_broadcast(&q->q_wait); /* readers */
2007                                 cv_broadcast(&_WR(q)->q_wait); /* writers */
2008                                 cv_broadcast(&stp->sd_monitor); /* ioctllers */
2009 
2010                                 mutex_exit(&stp->sd_lock);
2011                                 pollwakeup(&stp->sd_pollist, POLLERR);
2012                                 mutex_enter(&stp->sd_lock);
2013 
2014                                 if (stp->sd_sigflags & S_ERROR)
2015                                         strsendsig(stp->sd_siglist, S_ERROR, 0,
2016                                             ((rw & FLUSHR) ? stp->sd_rerror :
2017                                             stp->sd_werror));
2018                                 mutex_exit(&stp->sd_lock);
2019                                 /*
2020                                  * Send the M_FLUSH only
2021                                  * for the first M_ERROR
2022                                  * message on the stream
2023                                  */
2024                                 if (flushed_already == rw) {
2025                                         freemsg(bp);
2026                                         return (0);
2027                                 }
2028 
2029                                 bp->b_datap->db_type = M_FLUSH;
2030                                 *bp->b_rptr = rw;
2031                                 bp->b_wptr = bp->b_rptr + 1;
2032                                 /*
2033                                  * Protect against the driver
2034                                  * passing up messages after
2035                                  * it has done a qprocsoff
2036                                  */
2037                                 if (_OTHERQ(q)->q_next == NULL)
2038                                         freemsg(bp);
2039                                 else
2040                                         qreply(q, bp);
2041                                 return (0);
2042                         } else
2043                                 mutex_exit(&stp->sd_lock);
2044                 } else if (*bp->b_rptr != 0) {               /* Old flavor */
2045                                 if (stp->sd_flag & (STRDERR|STWRERR))
2046                                         flushed_already = FLUSHRW;
2047                                 mutex_enter(&stp->sd_lock);
2048                                 stp->sd_flag |= (STRDERR|STWRERR);
2049                                 stp->sd_rerror = *bp->b_rptr;
2050                                 stp->sd_werror = *bp->b_rptr;
2051                                 TRACE_2(TR_FAC_STREAMS_FR,
2052                                     TR_STRRPUT_WAKE2,
2053                                     "strrput wakeup #2:q %p, bp %p", q, bp);
2054                                 cv_broadcast(&q->q_wait); /* the readers */
2055                                 cv_broadcast(&_WR(q)->q_wait); /* the writers */
2056                                 cv_broadcast(&stp->sd_monitor); /* ioctllers */
2057 
2058                                 mutex_exit(&stp->sd_lock);
2059                                 pollwakeup(&stp->sd_pollist, POLLERR);
2060                                 mutex_enter(&stp->sd_lock);
2061 
2062                                 if (stp->sd_sigflags & S_ERROR)
2063                                         strsendsig(stp->sd_siglist, S_ERROR, 0,
2064                                             (stp->sd_werror ? stp->sd_werror :
2065                                             stp->sd_rerror));
2066                                 mutex_exit(&stp->sd_lock);
2067 
2068                                 /*
2069                                  * Send the M_FLUSH only
2070                                  * for the first M_ERROR
2071                                  * message on the stream
2072                                  */
2073                                 if (flushed_already != FLUSHRW) {
2074                                         bp->b_datap->db_type = M_FLUSH;
2075                                         *bp->b_rptr = FLUSHRW;
2076                                         /*
2077                                          * Protect against the driver passing up
2078                                          * messages after it has done a
2079                                          * qprocsoff.
2080                                          */
2081                                         if (_OTHERQ(q)->q_next == NULL)
2082                                                 freemsg(bp);
2083                                         else
2084                                                 qreply(q, bp);
2085                                         return (0);
2086                                 }
2087                 }
2088                 freemsg(bp);
2089                 return (0);
2090 
2091         case M_HANGUP:
2092 
2093                 freemsg(bp);
2094                 mutex_enter(&stp->sd_lock);
2095                 stp->sd_werror = ENXIO;
2096                 stp->sd_flag |= STRHUP;
2097                 stp->sd_flag &= ~(WSLEEP|RSLEEP);
2098 
2099                 /*
2100                  * send signal if controlling tty
2101                  */
2102 
2103                 if (stp->sd_sidp) {
2104                         prsignal(stp->sd_sidp, SIGHUP);
2105                         if (stp->sd_sidp != stp->sd_pgidp)
2106                                 pgsignal(stp->sd_pgidp, SIGTSTP);
2107                 }
2108 
2109                 /*
2110                  * wake up read, write, and exception pollers and
2111                  * reset wakeup mechanism.
2112                  */
2113                 cv_broadcast(&q->q_wait);        /* the readers */
2114                 cv_broadcast(&_WR(q)->q_wait);   /* the writers */
2115                 cv_broadcast(&stp->sd_monitor);  /* the ioctllers */
2116                 strhup(stp);
2117                 mutex_exit(&stp->sd_lock);
2118                 return (0);
2119 
2120         case M_UNHANGUP:
2121                 freemsg(bp);
2122                 mutex_enter(&stp->sd_lock);
2123                 stp->sd_werror = 0;
2124                 stp->sd_flag &= ~STRHUP;
2125                 mutex_exit(&stp->sd_lock);
2126                 return (0);
2127 
2128         case M_SIG:
2129                 /*
2130                  * Someone downstream wants to post a signal.  The
2131                  * signal to post is contained in the first byte of the
2132                  * message.  If the message would go on the front of
2133                  * the queue, send a signal to the process group
2134                  * (if not SIGPOLL) or to the siglist processes
2135                  * (SIGPOLL).  If something is already on the queue,
2136                  * OR if we are delivering a delayed suspend (*sigh*
2137                  * another "tty" hack) and there's no one sleeping already,
2138                  * just enqueue the message.
2139                  */
2140                 mutex_enter(&stp->sd_lock);
2141                 if (q->q_first || (*bp->b_rptr == SIGTSTP &&
2142                     !(stp->sd_flag & RSLEEP))) {
2143                         (void) putq(q, bp);
2144                         mutex_exit(&stp->sd_lock);
2145                         return (0);
2146                 }
2147                 mutex_exit(&stp->sd_lock);
2148                 /* FALLTHRU */
2149 
2150         case M_PCSIG:
2151                 /*
2152                  * Don't enqueue, just post the signal.
2153                  */
2154                 strsignal(stp, *bp->b_rptr, 0L);
2155                 freemsg(bp);
2156                 return (0);
2157 
2158         case M_CMD:
2159                 if (MBLKL(bp) != sizeof (cmdblk_t)) {
2160                         freemsg(bp);
2161                         return (0);
2162                 }
2163 
2164                 mutex_enter(&stp->sd_lock);
2165                 if (stp->sd_flag & STRCMDWAIT) {
2166                         ASSERT(stp->sd_cmdblk == NULL);
2167                         stp->sd_cmdblk = bp;
2168                         cv_broadcast(&stp->sd_monitor);
2169                         mutex_exit(&stp->sd_lock);
2170                 } else {
2171                         mutex_exit(&stp->sd_lock);
2172                         freemsg(bp);
2173                 }
2174                 return (0);
2175 
2176         case M_FLUSH:
2177                 /*
2178                  * Flush queues.  The indication of which queues to flush
2179                  * is in the first byte of the message.  If the read queue
2180                  * is specified, then flush it.  If FLUSHBAND is set, just
2181                  * flush the band specified by the second byte of the message.
2182                  *
2183                  * If a module has issued a M_SETOPT to not flush hi
2184                  * priority messages off of the stream head, then pass this
2185                  * flag into the flushq code to preserve such messages.
2186                  */
2187 
2188                 if (*bp->b_rptr & FLUSHR) {
2189                         mutex_enter(&stp->sd_lock);
2190                         if (*bp->b_rptr & FLUSHBAND) {
2191                                 ASSERT((bp->b_wptr - bp->b_rptr) >= 2);
2192                                 flushband(q, *(bp->b_rptr + 1), FLUSHALL);
2193                         } else
2194                                 flushq_common(q, FLUSHALL,
2195                                     stp->sd_read_opt & RFLUSHPCPROT);
2196                         if ((q->q_first == NULL) ||
2197                             (q->q_first->b_datap->db_type < QPCTL))
2198                                 stp->sd_flag &= ~STRPRI;
2199                         else {
2200                                 ASSERT(stp->sd_flag & STRPRI);
2201                         }
2202                         mutex_exit(&stp->sd_lock);
2203                 }
2204                 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
2205                         *bp->b_rptr &= ~FLUSHR;
2206                         bp->b_flag |= MSGNOLOOP;
2207                         /*
2208                          * Protect against the driver passing up
2209                          * messages after it has done a qprocsoff.
2210                          */
2211                         if (_OTHERQ(q)->q_next == NULL)
2212                                 freemsg(bp);
2213                         else
2214                                 qreply(q, bp);
2215                         return (0);
2216                 }
2217                 freemsg(bp);
2218                 return (0);
2219 
2220         case M_IOCACK:
2221         case M_IOCNAK:
2222                 iocbp = (struct iocblk *)bp->b_rptr;
2223                 /*
2224                  * If not waiting for ACK or NAK then just free msg.
2225                  * If incorrect id sequence number then just free msg.
2226                  * If already have ACK or NAK for user then this is a
2227                  *    duplicate, display a warning and free the msg.
2228                  */
2229                 mutex_enter(&stp->sd_lock);
2230                 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2231                     (stp->sd_iocid != iocbp->ioc_id)) {
2232                         /*
2233                          * If the ACK/NAK is a dup, display a message
2234                          * Dup is when sd_iocid == ioc_id, and
2235                          * sd_iocblk == <valid ptr> or -1 (the former
2236                          * is when an ioctl has been put on the stream
2237                          * head, but has not yet been consumed, the
2238                          * later is when it has been consumed).
2239                          */
2240                         if ((stp->sd_iocid == iocbp->ioc_id) &&
2241                             (stp->sd_iocblk != NULL)) {
2242                                 log_dupioc(q, bp);
2243                         }
2244                         freemsg(bp);
2245                         mutex_exit(&stp->sd_lock);
2246                         return (0);
2247                 }
2248 
2249                 /*
2250                  * Assign ACK or NAK to user and wake up.
2251                  */
2252                 stp->sd_iocblk = bp;
2253                 cv_broadcast(&stp->sd_monitor);
2254                 mutex_exit(&stp->sd_lock);
2255                 return (0);
2256 
2257         case M_COPYIN:
2258         case M_COPYOUT:
2259                 reqp = (struct copyreq *)bp->b_rptr;
2260 
2261                 /*
2262                  * If not waiting for ACK or NAK then just fail request.
2263                  * If already have ACK, NAK, or copy request, then just
2264                  * fail request.
2265                  * If incorrect id sequence number then just fail request.
2266                  */
2267                 mutex_enter(&stp->sd_lock);
2268                 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2269                     (stp->sd_iocid != reqp->cq_id)) {
2270                         if (bp->b_cont) {
2271                                 freemsg(bp->b_cont);
2272                                 bp->b_cont = NULL;
2273                         }
2274                         bp->b_datap->db_type = M_IOCDATA;
2275                         bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
2276                         resp = (struct copyresp *)bp->b_rptr;
2277                         resp->cp_rval = (caddr_t)1;  /* failure */
2278                         mutex_exit(&stp->sd_lock);
2279                         putnext(stp->sd_wrq, bp);
2280                         return (0);
2281                 }
2282 
2283                 /*
2284                  * Assign copy request to user and wake up.
2285                  */
2286                 stp->sd_iocblk = bp;
2287                 cv_broadcast(&stp->sd_monitor);
2288                 mutex_exit(&stp->sd_lock);
2289                 return (0);
2290 
2291         case M_SETOPTS:
2292                 /*
2293                  * Set stream head options (read option, write offset,
2294                  * min/max packet size, and/or high/low water marks for
2295                  * the read side only).
2296                  */
2297 
2298                 bpri = 0;
2299                 sop = (struct stroptions *)bp->b_rptr;
2300                 mutex_enter(&stp->sd_lock);
2301                 if (sop->so_flags & SO_READOPT) {
2302                         switch (sop->so_readopt & RMODEMASK) {
2303                         case RNORM:
2304                                 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
2305                                 break;
2306 
2307                         case RMSGD:
2308                                 stp->sd_read_opt =
2309                                     ((stp->sd_read_opt & ~RD_MSGNODIS) |
2310                                     RD_MSGDIS);
2311                                 break;
2312 
2313                         case RMSGN:
2314                                 stp->sd_read_opt =
2315                                     ((stp->sd_read_opt & ~RD_MSGDIS) |
2316                                     RD_MSGNODIS);
2317                                 break;
2318                         }
2319                         switch (sop->so_readopt & RPROTMASK) {
2320                         case RPROTNORM:
2321                                 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
2322                                 break;
2323 
2324                         case RPROTDAT:
2325                                 stp->sd_read_opt =
2326                                     ((stp->sd_read_opt & ~RD_PROTDIS) |
2327                                     RD_PROTDAT);
2328                                 break;
2329 
2330                         case RPROTDIS:
2331                                 stp->sd_read_opt =
2332                                     ((stp->sd_read_opt & ~RD_PROTDAT) |
2333                                     RD_PROTDIS);
2334                                 break;
2335                         }
2336                         switch (sop->so_readopt & RFLUSHMASK) {
2337                         case RFLUSHPCPROT:
2338                                 /*
2339                                  * This sets the stream head to NOT flush
2340                                  * M_PCPROTO messages.
2341                                  */
2342                                 stp->sd_read_opt |= RFLUSHPCPROT;
2343                                 break;
2344                         }
2345                 }
2346                 if (sop->so_flags & SO_ERROPT) {
2347                         switch (sop->so_erropt & RERRMASK) {
2348                         case RERRNORM:
2349                                 stp->sd_flag &= ~STRDERRNONPERSIST;
2350                                 break;
2351                         case RERRNONPERSIST:
2352                                 stp->sd_flag |= STRDERRNONPERSIST;
2353                                 break;
2354                         }
2355                         switch (sop->so_erropt & WERRMASK) {
2356                         case WERRNORM:
2357                                 stp->sd_flag &= ~STWRERRNONPERSIST;
2358                                 break;
2359                         case WERRNONPERSIST:
2360                                 stp->sd_flag |= STWRERRNONPERSIST;
2361                                 break;
2362                         }
2363                 }
2364                 if (sop->so_flags & SO_COPYOPT) {
2365                         if (sop->so_copyopt & ZCVMSAFE) {
2366                                 stp->sd_copyflag |= STZCVMSAFE;
2367                                 stp->sd_copyflag &= ~STZCVMUNSAFE;
2368                         } else if (sop->so_copyopt & ZCVMUNSAFE) {
2369                                 stp->sd_copyflag |= STZCVMUNSAFE;
2370                                 stp->sd_copyflag &= ~STZCVMSAFE;
2371                         }
2372 
2373                         if (sop->so_copyopt & COPYCACHED) {
2374                                 stp->sd_copyflag |= STRCOPYCACHED;
2375                         }
2376                 }
2377                 if (sop->so_flags & SO_WROFF)
2378                         stp->sd_wroff = sop->so_wroff;
2379                 if (sop->so_flags & SO_TAIL)
2380                         stp->sd_tail = sop->so_tail;
2381                 if (sop->so_flags & SO_MINPSZ)
2382                         q->q_minpsz = sop->so_minpsz;
2383                 if (sop->so_flags & SO_MAXPSZ)
2384                         q->q_maxpsz = sop->so_maxpsz;
2385                 if (sop->so_flags & SO_MAXBLK)
2386                         stp->sd_maxblk = sop->so_maxblk;
2387                 if (sop->so_flags & SO_HIWAT) {
2388                         if (sop->so_flags & SO_BAND) {
2389                                 if (strqset(q, QHIWAT,
2390                                     sop->so_band, sop->so_hiwat)) {
2391                                         cmn_err(CE_WARN, "strrput: could not "
2392                                             "allocate qband\n");
2393                                 } else {
2394                                         bpri = sop->so_band;
2395                                 }
2396                         } else {
2397                                 q->q_hiwat = sop->so_hiwat;
2398                         }
2399                 }
2400                 if (sop->so_flags & SO_LOWAT) {
2401                         if (sop->so_flags & SO_BAND) {
2402                                 if (strqset(q, QLOWAT,
2403                                     sop->so_band, sop->so_lowat)) {
2404                                         cmn_err(CE_WARN, "strrput: could not "
2405                                             "allocate qband\n");
2406                                 } else {
2407                                         bpri = sop->so_band;
2408                                 }
2409                         } else {
2410                                 q->q_lowat = sop->so_lowat;
2411                         }
2412                 }
2413                 if (sop->so_flags & SO_MREADON)
2414                         stp->sd_flag |= SNDMREAD;
2415                 if (sop->so_flags & SO_MREADOFF)
2416                         stp->sd_flag &= ~SNDMREAD;
2417                 if (sop->so_flags & SO_NDELON)
2418                         stp->sd_flag |= OLDNDELAY;
2419                 if (sop->so_flags & SO_NDELOFF)
2420                         stp->sd_flag &= ~OLDNDELAY;
2421                 if (sop->so_flags & SO_ISTTY)
2422                         stp->sd_flag |= STRISTTY;
2423                 if (sop->so_flags & SO_ISNTTY)
2424                         stp->sd_flag &= ~STRISTTY;
2425                 if (sop->so_flags & SO_TOSTOP)
2426                         stp->sd_flag |= STRTOSTOP;
2427                 if (sop->so_flags & SO_TONSTOP)
2428                         stp->sd_flag &= ~STRTOSTOP;
2429                 if (sop->so_flags & SO_DELIM)
2430                         stp->sd_flag |= STRDELIM;
2431                 if (sop->so_flags & SO_NODELIM)
2432                         stp->sd_flag &= ~STRDELIM;
2433 
2434                 mutex_exit(&stp->sd_lock);
2435                 freemsg(bp);
2436 
2437                 /* Check backenable in case the water marks changed */
2438                 qbackenable(q, bpri);
2439                 return (0);
2440 
2441         /*
2442          * The following set of cases deal with situations where two stream
2443          * heads are connected to each other (twisted streams).  These messages
2444          * have no meaning at the stream head.
2445          */
2446         case M_BREAK:
2447         case M_CTL:
2448         case M_DELAY:
2449         case M_START:
2450         case M_STOP:
2451         case M_IOCDATA:
2452         case M_STARTI:
2453         case M_STOPI:
2454                 freemsg(bp);
2455                 return (0);
2456 
2457         case M_IOCTL:
2458                 /*
2459                  * Always NAK this condition
2460                  * (makes no sense)
2461                  * If there is one or more threads in the read side
2462                  * rwnext we have to defer the nacking until that thread
2463                  * returns (in strget).
2464                  */
2465                 mutex_enter(&stp->sd_lock);
2466                 if (stp->sd_struiodnak != 0) {
2467                         /*
2468                          * Defer NAK to the streamhead. Queue at the end
2469                          * the list.
2470                          */
2471                         mblk_t *mp = stp->sd_struionak;
2472 
2473                         while (mp && mp->b_next)
2474                                 mp = mp->b_next;
2475                         if (mp)
2476                                 mp->b_next = bp;
2477                         else
2478                                 stp->sd_struionak = bp;
2479                         bp->b_next = NULL;
2480                         mutex_exit(&stp->sd_lock);
2481                         return (0);
2482                 }
2483                 mutex_exit(&stp->sd_lock);
2484 
2485                 bp->b_datap->db_type = M_IOCNAK;
2486                 /*
2487                  * Protect against the driver passing up
2488                  * messages after it has done a qprocsoff.
2489                  */
2490                 if (_OTHERQ(q)->q_next == NULL)
2491                         freemsg(bp);
2492                 else
2493                         qreply(q, bp);
2494                 return (0);
2495 
2496         default:
2497 #ifdef DEBUG
2498                 cmn_err(CE_WARN,
2499                     "bad message type %x received at stream head\n",
2500                     bp->b_datap->db_type);
2501 #endif
2502                 freemsg(bp);
2503                 return (0);
2504         }
2505 
2506         /* NOTREACHED */
2507 }
2508 
2509 /*
2510  * Check if the stream pointed to by `stp' can be written to, and return an
2511  * error code if not.  If `eiohup' is set, then return EIO if STRHUP is set.
2512  * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream,
2513  * then always return EPIPE and send a SIGPIPE to the invoking thread.
2514  */
2515 static int
2516 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok)
2517 {
2518         int error;
2519 
2520         ASSERT(MUTEX_HELD(&stp->sd_lock));
2521 
2522         /*
2523          * For modem support, POSIX states that on writes, EIO should
2524          * be returned if the stream has been hung up.
2525          */
2526         if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP)
2527                 error = EIO;
2528         else
2529                 error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0);
2530 
2531         if (error != 0) {
2532                 if (!(stp->sd_flag & STPLEX) &&
2533                     (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) {
2534                         tsignal(curthread, SIGPIPE);
2535                         error = EPIPE;
2536                 }
2537         }
2538 
2539         return (error);
2540 }
2541 
2542 /*
2543  * Copyin and send data down a stream.
2544  * The caller will allocate and copyin any control part that precedes the
2545  * message and pass that in as mctl.
2546  *
2547  * Caller should *not* hold sd_lock.
2548  * When EWOULDBLOCK is returned the caller has to redo the canputnext
2549  * under sd_lock in order to avoid missing a backenabling wakeup.
2550  *
2551  * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA.
2552  *
2553  * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages.
2554  * For sync streams we can only ignore flow control by reverting to using
2555  * putnext.
2556  *
2557  * If sd_maxblk is less than *iosize this routine might return without
2558  * transferring all of *iosize. In all cases, on return *iosize will contain
2559  * the amount of data that was transferred.
2560  */
2561 static int
2562 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
2563     int b_flag, int pri, int flags)
2564 {
2565         struiod_t uiod;
2566         struct iovec buf[IOV_MAX_STACK];
2567         int iovlen = 0;
2568         mblk_t *mp;
2569         queue_t *wqp = stp->sd_wrq;
2570         int error = 0;
2571         ssize_t count = *iosize;
2572 
2573         ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
2574 
2575         if (uiop != NULL && count >= 0)
2576                 flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0;
2577 
2578         if (!(flags & STRUIO_POSTPONE)) {
2579                 /*
2580                  * Use regular canputnext, strmakedata, putnext sequence.
2581                  */
2582                 if (pri == 0) {
2583                         if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2584                                 freemsg(mctl);
2585                                 return (EWOULDBLOCK);
2586                         }
2587                 } else {
2588                         if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) {
2589                                 freemsg(mctl);
2590                                 return (EWOULDBLOCK);
2591                         }
2592                 }
2593 
2594                 if ((error = strmakedata(iosize, uiop, stp, flags,
2595                     &mp)) != 0) {
2596                         freemsg(mctl);
2597                         /*
2598                          * need to change return code to ENOMEM
2599                          * so that this is not confused with
2600                          * flow control, EAGAIN.
2601                          */
2602 
2603                         if (error == EAGAIN)
2604                                 return (ENOMEM);
2605                         else
2606                                 return (error);
2607                 }
2608                 if (mctl != NULL) {
2609                         if (mctl->b_cont == NULL)
2610                                 mctl->b_cont = mp;
2611                         else if (mp != NULL)
2612                                 linkb(mctl, mp);
2613                         mp = mctl;
2614                 } else if (mp == NULL)
2615                         return (0);
2616 
2617                 mp->b_flag |= b_flag;
2618                 mp->b_band = (uchar_t)pri;
2619 
2620                 if (flags & MSG_IGNFLOW) {
2621                         /*
2622                          * XXX Hack: Don't get stuck running service
2623                          * procedures. This is needed for sockfs when
2624                          * sending the unbind message out of the rput
2625                          * procedure - we don't want a put procedure
2626                          * to run service procedures.
2627                          */
2628                         putnext(wqp, mp);
2629                 } else {
2630                         stream_willservice(stp);
2631                         putnext(wqp, mp);
2632                         stream_runservice(stp);
2633                 }
2634                 return (0);
2635         }
2636         /*
2637          * Stream supports rwnext() for the write side.
2638          */
2639         if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) {
2640                 freemsg(mctl);
2641                 /*
2642                  * map EAGAIN to ENOMEM since EAGAIN means "flow controlled".
2643                  */
2644                 return (error == EAGAIN ? ENOMEM : error);
2645         }
2646         if (mctl != NULL) {
2647                 if (mctl->b_cont == NULL)
2648                         mctl->b_cont = mp;
2649                 else if (mp != NULL)
2650                         linkb(mctl, mp);
2651                 mp = mctl;
2652         } else if (mp == NULL) {
2653                 return (0);
2654         }
2655 
2656         mp->b_flag |= b_flag;
2657         mp->b_band = (uchar_t)pri;
2658 
2659         if (uiop->uio_iovcnt > IOV_MAX_STACK) {
2660                 iovlen = uiop->uio_iovcnt * sizeof (iovec_t);
2661                 uiod.d_iov = (struct iovec *)kmem_alloc(iovlen, KM_SLEEP);
2662         } else {
2663                 uiod.d_iov = buf;
2664         }
2665 
2666         (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, uiop->uio_iovcnt);
2667         uiod.d_uio.uio_offset = 0;
2668         uiod.d_mp = mp;
2669         error = rwnext(wqp, &uiod);
2670         if (! uiod.d_mp) {
2671                 uioskip(uiop, *iosize);
2672                 if (iovlen != 0)
2673                         kmem_free(uiod.d_iov, iovlen);
2674                 return (error);
2675         }
2676         ASSERT(mp == uiod.d_mp);
2677         if (error == EINVAL) {
2678                 /*
2679                  * The stream plumbing must have changed while
2680                  * we were away, so just turn off rwnext()s.
2681                  */
2682                 error = 0;
2683         } else if (error == EBUSY || error == EWOULDBLOCK) {
2684                 /*
2685                  * Couldn't enter a perimeter or took a page fault,
2686                  * so fall-back to putnext().
2687                  */
2688                 error = 0;
2689         } else {
2690                 freemsg(mp);
2691                 if (iovlen != 0)
2692                         kmem_free(uiod.d_iov, iovlen);
2693                 return (error);
2694         }
2695         /* Have to check canput before consuming data from the uio */
2696         if (pri == 0) {
2697                 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2698                         freemsg(mp);
2699                         if (iovlen != 0)
2700                                 kmem_free(uiod.d_iov, iovlen);
2701                         return (EWOULDBLOCK);
2702                 }
2703         } else {
2704                 if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) {
2705                         freemsg(mp);
2706                         if (iovlen != 0)
2707                                 kmem_free(uiod.d_iov, iovlen);
2708                         return (EWOULDBLOCK);
2709                 }
2710         }
2711         ASSERT(mp == uiod.d_mp);
2712         /* Copyin data from the uio */
2713         if ((error = struioget(wqp, mp, &uiod, 0)) != 0) {
2714                 freemsg(mp);
2715                 if (iovlen != 0)
2716                         kmem_free(uiod.d_iov, iovlen);
2717                 return (error);
2718         }
2719         uioskip(uiop, *iosize);
2720         if (flags & MSG_IGNFLOW) {
2721                 /*
2722                  * XXX Hack: Don't get stuck running service procedures.
2723                  * This is needed for sockfs when sending the unbind message
2724                  * out of the rput procedure - we don't want a put procedure
2725                  * to run service procedures.
2726                  */
2727                 putnext(wqp, mp);
2728         } else {
2729                 stream_willservice(stp);
2730                 putnext(wqp, mp);
2731                 stream_runservice(stp);
2732         }
2733         if (iovlen != 0)
2734                 kmem_free(uiod.d_iov, iovlen);
2735         return (0);
2736 }
2737 
2738 /*
2739  * Write attempts to break the write request into messages conforming
2740  * with the minimum and maximum packet sizes set downstream.
2741  *
2742  * Write will not block if downstream queue is full and
2743  * O_NDELAY is set, otherwise it will block waiting for the queue to get room.
2744  *
2745  * A write of zero bytes gets packaged into a zero length message and sent
2746  * downstream like any other message.
2747  *
2748  * If buffers of the requested sizes are not available, the write will
2749  * sleep until the buffers become available.
2750  *
2751  * Write (if specified) will supply a write offset in a message if it
2752  * makes sense. This can be specified by downstream modules as part of
2753  * a M_SETOPTS message.  Write will not supply the write offset if it
2754  * cannot supply any data in a buffer.  In other words, write will never
2755  * send down an empty packet due to a write offset.
2756  */
2757 /* ARGSUSED2 */
2758 int
2759 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp)
2760 {
2761         return (strwrite_common(vp, uiop, crp, 0));
2762 }
2763 
2764 /* ARGSUSED2 */
2765 int
2766 strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag)
2767 {
2768         struct stdata *stp;
2769         struct queue *wqp;
2770         ssize_t rmin, rmax;
2771         ssize_t iosize;
2772         int waitflag;
2773         int tempmode;
2774         int error = 0;
2775         int b_flag;
2776 
2777         ASSERT(vp->v_stream);
2778         stp = vp->v_stream;
2779 
2780         mutex_enter(&stp->sd_lock);
2781 
2782         if ((error = i_straccess(stp, JCWRITE)) != 0) {
2783                 mutex_exit(&stp->sd_lock);
2784                 return (error);
2785         }
2786 
2787         if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
2788                 error = strwriteable(stp, B_TRUE, B_TRUE);
2789                 if (error != 0) {
2790                         mutex_exit(&stp->sd_lock);
2791                         return (error);
2792                 }
2793         }
2794 
2795         mutex_exit(&stp->sd_lock);
2796 
2797         wqp = stp->sd_wrq;
2798 
2799         /* get these values from them cached in the stream head */
2800         rmin = stp->sd_qn_minpsz;
2801         rmax = stp->sd_qn_maxpsz;
2802 
2803         /*
2804          * Check the min/max packet size constraints.  If min packet size
2805          * is non-zero, the write cannot be split into multiple messages
2806          * and still guarantee the size constraints.
2807          */
2808         TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp);
2809 
2810         ASSERT((rmax >= 0) || (rmax == INFPSZ));
2811         if (rmax == 0) {
2812                 return (0);
2813         }
2814         if (rmin > 0) {
2815                 if (uiop->uio_resid < rmin) {
2816                         TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2817                             "strwrite out:q %p out %d error %d",
2818                             wqp, 0, ERANGE);
2819                         return (ERANGE);
2820                 }
2821                 if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) {
2822                         TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2823                             "strwrite out:q %p out %d error %d",
2824                             wqp, 1, ERANGE);
2825                         return (ERANGE);
2826                 }
2827         }
2828 
2829         /*
2830          * Do until count satisfied or error.
2831          */
2832         waitflag = WRITEWAIT | wflag;
2833         if (stp->sd_flag & OLDNDELAY)
2834                 tempmode = uiop->uio_fmode & ~FNDELAY;
2835         else
2836                 tempmode = uiop->uio_fmode;
2837 
2838         if (rmax == INFPSZ)
2839                 rmax = uiop->uio_resid;
2840 
2841         /*
2842          * Note that tempmode does not get used in strput/strmakedata
2843          * but only in strwaitq. The other routines use uio_fmode
2844          * unmodified.
2845          */
2846 
2847         /* LINTED: constant in conditional context */
2848         while (1) {     /* breaks when uio_resid reaches zero */
2849                 /*
2850                  * Determine the size of the next message to be
2851                  * packaged.  May have to break write into several
2852                  * messages based on max packet size.
2853                  */
2854                 iosize = MIN(uiop->uio_resid, rmax);
2855 
2856                 /*
2857                  * Put block downstream when flow control allows it.
2858                  */
2859                 if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize))
2860                         b_flag = MSGDELIM;
2861                 else
2862                         b_flag = 0;
2863 
2864                 for (;;) {
2865                         int done = 0;
2866 
2867                         error = strput(stp, NULL, uiop, &iosize, b_flag, 0, 0);
2868                         if (error == 0)
2869                                 break;
2870                         if (error != EWOULDBLOCK)
2871                                 goto out;
2872 
2873                         mutex_enter(&stp->sd_lock);
2874                         /*
2875                          * Check for a missed wakeup.
2876                          * Needed since strput did not hold sd_lock across
2877                          * the canputnext.
2878                          */
2879                         if (canputnext(wqp)) {
2880                                 /* Try again */
2881                                 mutex_exit(&stp->sd_lock);
2882                                 continue;
2883                         }
2884                         TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT,
2885                             "strwrite wait:q %p wait", wqp);
2886                         if ((error = strwaitq(stp, waitflag, (ssize_t)0,
2887                             tempmode, -1, &done)) != 0 || done) {
2888                                 mutex_exit(&stp->sd_lock);
2889                                 if ((vp->v_type == VFIFO) &&
2890                                     (uiop->uio_fmode & FNDELAY) &&
2891                                     (error == EAGAIN))
2892                                         error = 0;
2893                                 goto out;
2894                         }
2895                         TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE,
2896                             "strwrite wake:q %p awakes", wqp);
2897                         if ((error = i_straccess(stp, JCWRITE)) != 0) {
2898                                 mutex_exit(&stp->sd_lock);
2899                                 goto out;
2900                         }
2901                         mutex_exit(&stp->sd_lock);
2902                 }
2903                 waitflag |= NOINTR;
2904                 TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID,
2905                     "strwrite resid:q %p uiop %p", wqp, uiop);
2906                 if (uiop->uio_resid) {
2907                         /* Recheck for errors - needed for sockets */
2908                         if ((stp->sd_wput_opt & SW_RECHECK_ERR) &&
2909                             (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
2910                                 mutex_enter(&stp->sd_lock);
2911                                 error = strwriteable(stp, B_FALSE, B_TRUE);
2912                                 mutex_exit(&stp->sd_lock);
2913                                 if (error != 0)
2914                                         return (error);
2915                         }
2916                         continue;
2917                 }
2918                 break;
2919         }
2920 out:
2921         /*
2922          * For historical reasons, applications expect EAGAIN when a data
2923          * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN.
2924          */
2925         if (error == ENOMEM)
2926                 error = EAGAIN;
2927         TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2928             "strwrite out:q %p out %d error %d", wqp, 2, error);
2929         return (error);
2930 }
2931 
2932 /*
2933  * Stream head write service routine.
2934  * Its job is to wake up any sleeping writers when a queue
2935  * downstream needs data (part of the flow control in putq and getq).
2936  * It also must wake anyone sleeping on a poll().
2937  * For stream head right below mux module, it must also invoke put procedure
2938  * of next downstream module.
2939  */
2940 int
2941 strwsrv(queue_t *q)
2942 {
2943         struct stdata *stp;
2944         queue_t *tq;
2945         qband_t *qbp;
2946         int i;
2947         qband_t *myqbp;
2948         int isevent;
2949         unsigned char   qbf[NBAND];     /* band flushing backenable flags */
2950 
2951         TRACE_1(TR_FAC_STREAMS_FR,
2952             TR_STRWSRV, "strwsrv:q %p", q);
2953         stp = (struct stdata *)q->q_ptr;
2954         ASSERT(qclaimed(q));
2955         mutex_enter(&stp->sd_lock);
2956         ASSERT(!(stp->sd_flag & STPLEX));
2957 
2958         if (stp->sd_flag & WSLEEP) {
2959                 stp->sd_flag &= ~WSLEEP;
2960                 cv_broadcast(&q->q_wait);
2961         }
2962         mutex_exit(&stp->sd_lock);
2963 
2964         /* The other end of a stream pipe went away. */
2965         if ((tq = q->q_next) == NULL) {
2966                 return (0);
2967         }
2968 
2969         /* Find the next module forward that has a service procedure */
2970         claimstr(q);
2971         tq = q->q_nfsrv;
2972         ASSERT(tq != NULL);
2973 
2974         if ((q->q_flag & QBACK)) {
2975                 if ((tq->q_flag & QFULL)) {
2976                         mutex_enter(QLOCK(tq));
2977                         if (!(tq->q_flag & QFULL)) {
2978                                 mutex_exit(QLOCK(tq));
2979                                 goto wakeup;
2980                         }
2981                         /*
2982                          * The queue must have become full again. Set QWANTW
2983                          * again so strwsrv will be back enabled when
2984                          * the queue becomes non-full next time.
2985                          */
2986                         tq->q_flag |= QWANTW;
2987                         mutex_exit(QLOCK(tq));
2988                 } else {
2989                 wakeup:
2990                         pollwakeup(&stp->sd_pollist, POLLWRNORM);
2991                         mutex_enter(&stp->sd_lock);
2992                         if (stp->sd_sigflags & S_WRNORM)
2993                                 strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
2994                         mutex_exit(&stp->sd_lock);
2995                 }
2996         }
2997 
2998         isevent = 0;
2999         i = 1;
3000         bzero((caddr_t)qbf, NBAND);
3001         mutex_enter(QLOCK(tq));
3002         if ((myqbp = q->q_bandp) != NULL)
3003                 for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) {
3004                         ASSERT(myqbp);
3005                         if ((myqbp->qb_flag & QB_BACK)) {
3006                                 if (qbp->qb_flag & QB_FULL) {
3007                                         /*
3008                                          * The band must have become full again.
3009                                          * Set QB_WANTW again so strwsrv will
3010                                          * be back enabled when the band becomes
3011                                          * non-full next time.
3012                                          */
3013                                         qbp->qb_flag |= QB_WANTW;
3014                                 } else {
3015                                         isevent = 1;
3016                                         qbf[i] = 1;
3017                                 }
3018                         }
3019                         myqbp = myqbp->qb_next;
3020                         i++;
3021                 }
3022         mutex_exit(QLOCK(tq));
3023 
3024         if (isevent) {
3025                 for (i = tq->q_nband; i; i--) {
3026                         if (qbf[i]) {
3027                                 pollwakeup(&stp->sd_pollist, POLLWRBAND);
3028                                 mutex_enter(&stp->sd_lock);
3029                                 if (stp->sd_sigflags & S_WRBAND)
3030                                         strsendsig(stp->sd_siglist, S_WRBAND,
3031                                             (uchar_t)i, 0);
3032                                 mutex_exit(&stp->sd_lock);
3033                         }
3034                 }
3035         }
3036 
3037         releasestr(q);
3038         return (0);
3039 }
3040 
3041 /*
3042  * Special case of strcopyin/strcopyout for copying
3043  * struct strioctl that can deal with both data
3044  * models.
3045  */
3046 
3047 #ifdef  _LP64
3048 
3049 static int
3050 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
3051 {
3052         struct  strioctl32 strioc32;
3053         struct  strioctl *striocp;
3054 
3055         if (copyflag & U_TO_K) {
3056                 ASSERT((copyflag & K_TO_K) == 0);
3057 
3058                 if ((flag & FMODELS) == DATAMODEL_ILP32) {
3059                         if (copyin(from, &strioc32, sizeof (strioc32)))
3060                                 return (EFAULT);
3061 
3062                         striocp = (struct strioctl *)to;
3063                         striocp->ic_cmd      = strioc32.ic_cmd;
3064                         striocp->ic_timout = strioc32.ic_timout;
3065                         striocp->ic_len      = strioc32.ic_len;
3066                         striocp->ic_dp       = (char *)(uintptr_t)strioc32.ic_dp;
3067 
3068                 } else { /* NATIVE data model */
3069                         if (copyin(from, to, sizeof (struct strioctl))) {
3070                                 return (EFAULT);
3071                         } else {
3072                                 return (0);
3073                         }
3074                 }
3075         } else {
3076                 ASSERT(copyflag & K_TO_K);
3077                 bcopy(from, to, sizeof (struct strioctl));
3078         }
3079         return (0);
3080 }
3081 
3082 static int
3083 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
3084 {
3085         struct  strioctl32 strioc32;
3086         struct  strioctl *striocp;
3087 
3088         if (copyflag & U_TO_K) {
3089                 ASSERT((copyflag & K_TO_K) == 0);
3090 
3091                 if ((flag & FMODELS) == DATAMODEL_ILP32) {
3092                         striocp = (struct strioctl *)from;
3093                         strioc32.ic_cmd = striocp->ic_cmd;
3094                         strioc32.ic_timout = striocp->ic_timout;
3095                         strioc32.ic_len = striocp->ic_len;
3096                         strioc32.ic_dp  = (caddr32_t)(uintptr_t)striocp->ic_dp;
3097                         ASSERT((char *)(uintptr_t)strioc32.ic_dp ==
3098                             striocp->ic_dp);
3099 
3100                         if (copyout(&strioc32, to, sizeof (strioc32)))
3101                                 return (EFAULT);
3102 
3103                 } else { /* NATIVE data model */
3104                         if (copyout(from, to, sizeof (struct strioctl))) {
3105                                 return (EFAULT);
3106                         } else {
3107                                 return (0);
3108                         }
3109                 }
3110         } else {
3111                 ASSERT(copyflag & K_TO_K);
3112                 bcopy(from, to, sizeof (struct strioctl));
3113         }
3114         return (0);
3115 }
3116 
3117 #else   /* ! _LP64 */
3118 
3119 /* ARGSUSED2 */
3120 static int
3121 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
3122 {
3123         return (strcopyin(from, to, sizeof (struct strioctl), copyflag));
3124 }
3125 
3126 /* ARGSUSED2 */
3127 static int
3128 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
3129 {
3130         return (strcopyout(from, to, sizeof (struct strioctl), copyflag));
3131 }
3132 
3133 #endif  /* _LP64 */
3134 
3135 /*
3136  * Determine type of job control semantics expected by user.  The
3137  * possibilities are:
3138  *      JCREAD  - Behaves like read() on fd; send SIGTTIN
3139  *      JCWRITE - Behaves like write() on fd; send SIGTTOU if TOSTOP set
3140  *      JCSETP  - Sets a value in the stream; send SIGTTOU, ignore TOSTOP
3141  *      JCGETP  - Gets a value in the stream; no signals.
3142  * See straccess in strsubr.c for usage of these values.
3143  *
3144  * This routine also returns -1 for I_STR as a special case; the
3145  * caller must call again with the real ioctl number for
3146  * classification.
3147  */
3148 static int
3149 job_control_type(int cmd)
3150 {
3151         switch (cmd) {
3152         case I_STR:
3153                 return (-1);
3154 
3155         case I_RECVFD:
3156         case I_E_RECVFD:
3157                 return (JCREAD);
3158 
3159         case I_FDINSERT:
3160         case I_SENDFD:
3161                 return (JCWRITE);
3162 
3163         case TCSETA:
3164         case TCSETAW:
3165         case TCSETAF:
3166         case TCSBRK:
3167         case TCXONC:
3168         case TCFLSH:
3169         case TCDSET:    /* Obsolete */
3170         case TIOCSWINSZ:
3171         case TCSETS:
3172         case TCSETSW:
3173         case TCSETSF:
3174         case TIOCSETD:
3175         case TIOCHPCL:
3176         case TIOCSETP:
3177         case TIOCSETN:
3178         case TIOCEXCL:
3179         case TIOCNXCL:
3180         case TIOCFLUSH:
3181         case TIOCSETC:
3182         case TIOCLBIS:
3183         case TIOCLBIC:
3184         case TIOCLSET:
3185         case TIOCSBRK:
3186         case TIOCCBRK:
3187         case TIOCSDTR:
3188         case TIOCCDTR:
3189         case TIOCSLTC:
3190         case TIOCSTOP:
3191         case TIOCSTART:
3192         case TIOCSTI:
3193         case TIOCSPGRP:
3194         case TIOCMSET:
3195         case TIOCMBIS:
3196         case TIOCMBIC:
3197         case TIOCREMOTE:
3198         case TIOCSIGNAL:
3199         case LDSETT:
3200         case LDSMAP:    /* Obsolete */
3201         case DIOCSETP:
3202         case I_FLUSH:
3203         case I_SRDOPT:
3204         case I_SETSIG:
3205         case I_SWROPT:
3206         case I_FLUSHBAND:
3207         case I_SETCLTIME:
3208         case I_SERROPT:
3209         case I_ESETSIG:
3210         case FIONBIO:
3211         case FIOASYNC:
3212         case FIOSETOWN:
3213         case JBOOT:     /* Obsolete */
3214         case JTERM:     /* Obsolete */
3215         case JTIMOM:    /* Obsolete */
3216         case JZOMBOOT:  /* Obsolete */
3217         case JAGENT:    /* Obsolete */
3218         case JTRUN:     /* Obsolete */
3219         case JXTPROTO:  /* Obsolete */
3220                 return (JCSETP);
3221         }
3222 
3223         return (JCGETP);
3224 }
3225 
3226 /*
3227  * ioctl for streams
3228  */
3229 int
3230 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
3231     cred_t *crp, int *rvalp)
3232 {
3233         struct stdata *stp;
3234         struct strcmd *scp;
3235         struct strioctl strioc;
3236         struct uio uio;
3237         struct iovec iov;
3238         int access;
3239         mblk_t *mp;
3240         int error = 0;
3241         int done = 0;
3242         ssize_t rmin, rmax;
3243         queue_t *wrq;
3244         queue_t *rdq;
3245         boolean_t kioctl = B_FALSE;
3246         uint32_t auditing = AU_AUDITING();
3247 
3248         if (flag & FKIOCTL) {
3249                 copyflag = K_TO_K;
3250                 kioctl = B_TRUE;
3251         }
3252         ASSERT(vp->v_stream);
3253         ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
3254         stp = vp->v_stream;
3255 
3256         TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER,
3257             "strioctl:stp %p cmd %X arg %lX", stp, cmd, arg);
3258 
3259         /*
3260          * If the copy is kernel to kernel, make sure that the FNATIVE
3261          * flag is set.  After this it would be a serious error to have
3262          * no model flag.
3263          */
3264         if (copyflag == K_TO_K)
3265                 flag = (flag & ~FMODELS) | FNATIVE;
3266 
3267         ASSERT((flag & FMODELS) != 0);
3268 
3269         wrq = stp->sd_wrq;
3270         rdq = _RD(wrq);
3271 
3272         access = job_control_type(cmd);
3273 
3274         /* We should never see these here, should be handled by iwscn */
3275         if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR)
3276                 return (EINVAL);
3277 
3278         mutex_enter(&stp->sd_lock);
3279         if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) {
3280                 mutex_exit(&stp->sd_lock);
3281                 return (error);
3282         }
3283         mutex_exit(&stp->sd_lock);
3284 
3285         /*
3286          * Check for sgttyb-related ioctls first, and complain as
3287          * necessary.
3288          */
3289         switch (cmd) {
3290         case TIOCGETP:
3291         case TIOCSETP:
3292         case TIOCSETN:
3293                 if (sgttyb_handling >= 2 && !sgttyb_complaint) {
3294                         sgttyb_complaint = B_TRUE;
3295                         cmn_err(CE_NOTE,
3296                             "application used obsolete TIOC[GS]ET");
3297                 }
3298                 if (sgttyb_handling >= 3) {
3299                         tsignal(curthread, SIGSYS);
3300                         return (EIO);
3301                 }
3302                 break;
3303         }
3304 
3305         mutex_enter(&stp->sd_lock);
3306 
3307         switch (cmd) {
3308         case I_RECVFD:
3309         case I_E_RECVFD:
3310         case I_PEEK:
3311         case I_NREAD:
3312         case FIONREAD:
3313         case FIORDCHK:
3314         case I_ATMARK:
3315         case FIONBIO:
3316         case FIOASYNC:
3317                 if (stp->sd_flag & (STRDERR|STPLEX)) {
3318                         error = strgeterr(stp, STRDERR|STPLEX, 0);
3319                         if (error != 0) {
3320                                 mutex_exit(&stp->sd_lock);
3321                                 return (error);
3322                         }
3323                 }
3324                 break;
3325 
3326         default:
3327                 if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) {
3328                         error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0);
3329                         if (error != 0) {
3330                                 mutex_exit(&stp->sd_lock);
3331                                 return (error);
3332                         }
3333                 }
3334         }
3335 
3336         mutex_exit(&stp->sd_lock);
3337 
3338         switch (cmd) {
3339         default:
3340                 /*
3341                  * The stream head has hardcoded knowledge of a
3342                  * miscellaneous collection of terminal-, keyboard- and
3343                  * mouse-related ioctls, enumerated below.  This hardcoded
3344                  * knowledge allows the stream head to automatically
3345                  * convert transparent ioctl requests made by userland
3346                  * programs into I_STR ioctls which many old STREAMS
3347                  * modules and drivers require.
3348                  *
3349                  * No new ioctls should ever be added to this list.
3350                  * Instead, the STREAMS module or driver should be written
3351                  * to either handle transparent ioctls or require any
3352                  * userland programs to use I_STR ioctls (by returning
3353                  * EINVAL to any transparent ioctl requests).
3354                  *
3355                  * More importantly, removing ioctls from this list should
3356                  * be done with the utmost care, since our STREAMS modules
3357                  * and drivers *count* on the stream head performing this
3358                  * conversion, and thus may panic while processing
3359                  * transparent ioctl request for one of these ioctls (keep
3360                  * in mind that third party modules and drivers may have
3361                  * similar problems).
3362                  */
3363                 if (((cmd & IOCTYPE) == LDIOC) ||
3364                     ((cmd & IOCTYPE) == tIOC) ||
3365                     ((cmd & IOCTYPE) == TIOC) ||
3366                     ((cmd & IOCTYPE) == KIOC) ||
3367                     ((cmd & IOCTYPE) == MSIOC) ||
3368                     ((cmd & IOCTYPE) == VUIOC)) {
3369                         /*
3370                          * The ioctl is a tty ioctl - set up strioc buffer
3371                          * and call strdoioctl() to do the work.
3372                          */
3373                         if (stp->sd_flag & STRHUP)
3374                                 return (ENXIO);
3375                         strioc.ic_cmd = cmd;
3376                         strioc.ic_timout = INFTIM;
3377 
3378                         switch (cmd) {
3379 
3380                         case TCXONC:
3381                         case TCSBRK:
3382                         case TCFLSH:
3383                         case TCDSET:
3384                                 {
3385                                 int native_arg = (int)arg;
3386                                 strioc.ic_len = sizeof (int);
3387                                 strioc.ic_dp = (char *)&native_arg;
3388                                 return (strdoioctl(stp, &strioc, flag,
3389                                     K_TO_K, crp, rvalp));
3390                                 }
3391 
3392                         case TCSETA:
3393                         case TCSETAW:
3394                         case TCSETAF:
3395                                 strioc.ic_len = sizeof (struct termio);
3396                                 strioc.ic_dp = (char *)arg;
3397                                 return (strdoioctl(stp, &strioc, flag,
3398                                     copyflag, crp, rvalp));
3399 
3400                         case TCSETS:
3401                         case TCSETSW:
3402                         case TCSETSF:
3403                                 strioc.ic_len = sizeof (struct termios);
3404                                 strioc.ic_dp = (char *)arg;
3405                                 return (strdoioctl(stp, &strioc, flag,
3406                                     copyflag, crp, rvalp));
3407 
3408                         case LDSETT:
3409                                 strioc.ic_len = sizeof (struct termcb);
3410                                 strioc.ic_dp = (char *)arg;
3411                                 return (strdoioctl(stp, &strioc, flag,
3412                                     copyflag, crp, rvalp));
3413 
3414                         case TIOCSETP:
3415                                 strioc.ic_len = sizeof (struct sgttyb);
3416                                 strioc.ic_dp = (char *)arg;
3417                                 return (strdoioctl(stp, &strioc, flag,
3418                                     copyflag, crp, rvalp));
3419 
3420                         case TIOCSTI:
3421                                 if ((flag & FREAD) == 0 &&
3422                                     secpolicy_sti(crp) != 0) {
3423                                         return (EPERM);
3424                                 }
3425                                 mutex_enter(&stp->sd_lock);
3426                                 mutex_enter(&curproc->p_splock);
3427                                 if (stp->sd_sidp != curproc->p_sessp->s_sidp &&
3428                                     secpolicy_sti(crp) != 0) {
3429                                         mutex_exit(&curproc->p_splock);
3430                                         mutex_exit(&stp->sd_lock);
3431                                         return (EACCES);
3432                                 }
3433                                 mutex_exit(&curproc->p_splock);
3434                                 mutex_exit(&stp->sd_lock);
3435 
3436                                 strioc.ic_len = sizeof (char);
3437                                 strioc.ic_dp = (char *)arg;
3438                                 return (strdoioctl(stp, &strioc, flag,
3439                                     copyflag, crp, rvalp));
3440 
3441                         case TIOCSWINSZ:
3442                                 strioc.ic_len = sizeof (struct winsize);
3443                                 strioc.ic_dp = (char *)arg;
3444                                 return (strdoioctl(stp, &strioc, flag,
3445                                     copyflag, crp, rvalp));
3446 
3447                         case TIOCSSIZE:
3448                                 strioc.ic_len = sizeof (struct ttysize);
3449                                 strioc.ic_dp = (char *)arg;
3450                                 return (strdoioctl(stp, &strioc, flag,
3451                                     copyflag, crp, rvalp));
3452 
3453                         case TIOCSSOFTCAR:
3454                         case KIOCTRANS:
3455                         case KIOCTRANSABLE:
3456                         case KIOCCMD:
3457                         case KIOCSDIRECT:
3458                         case KIOCSCOMPAT:
3459                         case KIOCSKABORTEN:
3460                         case KIOCSRPTCOUNT:
3461                         case KIOCSRPTDELAY:
3462                         case KIOCSRPTRATE:
3463                         case VUIDSFORMAT:
3464                         case TIOCSPPS:
3465                                 strioc.ic_len = sizeof (int);
3466                                 strioc.ic_dp = (char *)arg;
3467                                 return (strdoioctl(stp, &strioc, flag,
3468                                     copyflag, crp, rvalp));
3469 
3470                         case KIOCSETKEY:
3471                         case KIOCGETKEY:
3472                                 strioc.ic_len = sizeof (struct kiockey);
3473                                 strioc.ic_dp = (char *)arg;
3474                                 return (strdoioctl(stp, &strioc, flag,
3475                                     copyflag, crp, rvalp));
3476 
3477                         case KIOCSKEY:
3478                         case KIOCGKEY:
3479                                 strioc.ic_len = sizeof (struct kiockeymap);
3480                                 strioc.ic_dp = (char *)arg;
3481                                 return (strdoioctl(stp, &strioc, flag,
3482                                     copyflag, crp, rvalp));
3483 
3484                         case KIOCSLED:
3485                                 /* arg is a pointer to char */
3486                                 strioc.ic_len = sizeof (char);
3487                                 strioc.ic_dp = (char *)arg;
3488                                 return (strdoioctl(stp, &strioc, flag,
3489                                     copyflag, crp, rvalp));
3490 
3491                         case MSIOSETPARMS:
3492                                 strioc.ic_len = sizeof (Ms_parms);
3493                                 strioc.ic_dp = (char *)arg;
3494                                 return (strdoioctl(stp, &strioc, flag,
3495                                     copyflag, crp, rvalp));
3496 
3497                         case VUIDSADDR:
3498                         case VUIDGADDR:
3499                                 strioc.ic_len = sizeof (struct vuid_addr_probe);
3500                                 strioc.ic_dp = (char *)arg;
3501                                 return (strdoioctl(stp, &strioc, flag,
3502                                     copyflag, crp, rvalp));
3503 
3504                         /*
3505                          * These M_IOCTL's don't require any data to be sent
3506                          * downstream, and the driver will allocate and link
3507                          * on its own mblk_t upon M_IOCACK -- thus we set
3508                          * ic_len to zero and set ic_dp to arg so we know
3509                          * where to copyout to later.
3510                          */
3511                         case TIOCGSOFTCAR:
3512                         case TIOCGWINSZ:
3513                         case TIOCGSIZE:
3514                         case KIOCGTRANS:
3515                         case KIOCGTRANSABLE:
3516                         case KIOCTYPE:
3517                         case KIOCGDIRECT:
3518                         case KIOCGCOMPAT:
3519                         case KIOCLAYOUT:
3520                         case KIOCGLED:
3521                         case MSIOGETPARMS:
3522                         case MSIOBUTTONS:
3523                         case VUIDGFORMAT:
3524                         case TIOCGPPS:
3525                         case TIOCGPPSEV:
3526                         case TCGETA:
3527                         case TCGETS:
3528                         case LDGETT:
3529                         case TIOCGETP:
3530                         case KIOCGRPTCOUNT:
3531                         case KIOCGRPTDELAY:
3532                         case KIOCGRPTRATE:
3533                                 strioc.ic_len = 0;
3534                                 strioc.ic_dp = (char *)arg;
3535                                 return (strdoioctl(stp, &strioc, flag,
3536                                     copyflag, crp, rvalp));
3537                         }
3538                 }
3539 
3540                 /*
3541                  * Unknown cmd - send it down as a transparent ioctl.
3542                  */
3543                 strioc.ic_cmd = cmd;
3544                 strioc.ic_timout = INFTIM;
3545                 strioc.ic_len = TRANSPARENT;
3546                 strioc.ic_dp = (char *)&arg;
3547 
3548                 return (strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp));
3549 
3550         case I_STR:
3551                 /*
3552                  * Stream ioctl.  Read in an strioctl buffer from the user
3553                  * along with any data specified and send it downstream.
3554                  * Strdoioctl will wait allow only one ioctl message at
3555                  * a time, and waits for the acknowledgement.
3556                  */
3557 
3558                 if (stp->sd_flag & STRHUP)
3559                         return (ENXIO);
3560 
3561                 error = strcopyin_strioctl((void *)arg, &strioc, flag,
3562                     copyflag);
3563                 if (error != 0)
3564                         return (error);
3565 
3566                 if ((strioc.ic_len < 0) || (strioc.ic_timout < -1))
3567                         return (EINVAL);
3568 
3569                 access = job_control_type(strioc.ic_cmd);
3570                 mutex_enter(&stp->sd_lock);
3571                 if ((access != -1) &&
3572                     ((error = i_straccess(stp, access)) != 0)) {
3573                         mutex_exit(&stp->sd_lock);
3574                         return (error);
3575                 }
3576                 mutex_exit(&stp->sd_lock);
3577 
3578                 /*
3579                  * The I_STR facility provides a trap door for malicious
3580                  * code to send down bogus streamio(7I) ioctl commands to
3581                  * unsuspecting STREAMS modules and drivers which expect to
3582                  * only get these messages from the stream head.
3583                  * Explicitly prohibit any streamio ioctls which can be
3584                  * passed downstream by the stream head.  Note that we do
3585                  * not block all streamio ioctls because the ioctl
3586                  * numberspace is not well managed and thus it's possible
3587                  * that a module or driver's ioctl numbers may accidentally
3588                  * collide with them.
3589                  */
3590                 switch (strioc.ic_cmd) {
3591                 case I_LINK:
3592                 case I_PLINK:
3593                 case I_UNLINK:
3594                 case I_PUNLINK:
3595                 case _I_GETPEERCRED:
3596                 case _I_PLINK_LH:
3597                         return (EINVAL);
3598                 }
3599 
3600                 error = strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp);
3601                 if (error == 0) {
3602                         error = strcopyout_strioctl(&strioc, (void *)arg,
3603                             flag, copyflag);
3604                 }
3605                 return (error);
3606 
3607         case _I_CMD:
3608                 /*
3609                  * Like I_STR, but without using M_IOC* messages and without
3610                  * copyins/copyouts beyond the passed-in argument.
3611                  */
3612                 if (stp->sd_flag & STRHUP)
3613                         return (ENXIO);
3614 
3615                 if (copyflag == U_TO_K) {
3616                         if ((scp = kmem_alloc(sizeof (strcmd_t),
3617                             KM_NOSLEEP)) == NULL) {
3618                                 return (ENOMEM);
3619                         }
3620 
3621                         if (copyin((void *)arg, scp, sizeof (strcmd_t))) {
3622                                 kmem_free(scp, sizeof (strcmd_t));
3623                                 return (EFAULT);
3624                         }
3625                 } else {
3626                         scp = (strcmd_t *)arg;
3627                 }
3628 
3629                 access = job_control_type(scp->sc_cmd);
3630                 mutex_enter(&stp->sd_lock);
3631                 if (access != -1 && (error = i_straccess(stp, access)) != 0) {
3632                         mutex_exit(&stp->sd_lock);
3633                         if (copyflag == U_TO_K)
3634                                 kmem_free(scp, sizeof (strcmd_t));
3635                         return (error);
3636                 }
3637                 mutex_exit(&stp->sd_lock);
3638 
3639                 *rvalp = 0;
3640                 if ((error = strdocmd(stp, scp, crp)) == 0) {
3641                         if (copyflag == U_TO_K &&
3642                             copyout(scp, (void *)arg, sizeof (strcmd_t))) {
3643                                 error = EFAULT;
3644                         }
3645                 }
3646                 if (copyflag == U_TO_K)
3647                         kmem_free(scp, sizeof (strcmd_t));
3648                 return (error);
3649 
3650         case I_NREAD:
3651                 /*
3652                  * Return number of bytes of data in first message
3653                  * in queue in "arg" and return the number of messages
3654                  * in queue in return value.
3655                  */
3656         {
3657                 size_t  size;
3658                 int     retval;
3659                 int     count = 0;
3660 
3661                 mutex_enter(QLOCK(rdq));
3662 
3663                 size = msgdsize(rdq->q_first);
3664                 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3665                         count++;
3666 
3667                 mutex_exit(QLOCK(rdq));
3668                 if (stp->sd_struiordq) {
3669                         infod_t infod;
3670 
3671                         infod.d_cmd = INFOD_COUNT;
3672                         infod.d_count = 0;
3673                         if (count == 0) {
3674                                 infod.d_cmd |= INFOD_FIRSTBYTES;
3675                                 infod.d_bytes = 0;
3676                         }
3677                         infod.d_res = 0;
3678                         (void) infonext(rdq, &infod);
3679                         count += infod.d_count;
3680                         if (infod.d_res & INFOD_FIRSTBYTES)
3681                                 size = infod.d_bytes;
3682                 }
3683 
3684                 /*
3685                  * Drop down from size_t to the "int" required by the
3686                  * interface.  Cap at INT_MAX.
3687                  */
3688                 retval = MIN(size, INT_MAX);
3689                 error = strcopyout(&retval, (void *)arg, sizeof (retval),
3690                     copyflag);
3691                 if (!error)
3692                         *rvalp = count;
3693                 return (error);
3694         }
3695 
3696         case FIONREAD:
3697                 /*
3698                  * Return number of bytes of data in all data messages
3699                  * in queue in "arg".
3700                  */
3701         {
3702                 size_t  size = 0;
3703                 int     retval;
3704 
3705                 mutex_enter(QLOCK(rdq));
3706                 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3707                         size += msgdsize(mp);
3708                 mutex_exit(QLOCK(rdq));
3709 
3710                 if (stp->sd_struiordq) {
3711                         infod_t infod;
3712 
3713                         infod.d_cmd = INFOD_BYTES;
3714                         infod.d_res = 0;
3715                         infod.d_bytes = 0;
3716                         (void) infonext(rdq, &infod);
3717                         size += infod.d_bytes;
3718                 }
3719 
3720                 /*
3721                  * Drop down from size_t to the "int" required by the
3722                  * interface.  Cap at INT_MAX.
3723                  */
3724                 retval = MIN(size, INT_MAX);
3725                 error = strcopyout(&retval, (void *)arg, sizeof (retval),
3726                     copyflag);
3727 
3728                 *rvalp = 0;
3729                 return (error);
3730         }
3731         case FIORDCHK:
3732                 /*
3733                  * FIORDCHK does not use arg value (like FIONREAD),
3734                  * instead a count is returned. I_NREAD value may
3735                  * not be accurate but safe. The real thing to do is
3736                  * to add the msgdsizes of all data  messages until
3737                  * a non-data message.
3738                  */
3739         {
3740                 size_t size = 0;
3741 
3742                 mutex_enter(QLOCK(rdq));
3743                 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3744                         size += msgdsize(mp);
3745                 mutex_exit(QLOCK(rdq));
3746 
3747                 if (stp->sd_struiordq) {
3748                         infod_t infod;
3749 
3750                         infod.d_cmd = INFOD_BYTES;
3751                         infod.d_res = 0;
3752                         infod.d_bytes = 0;
3753                         (void) infonext(rdq, &infod);
3754                         size += infod.d_bytes;
3755                 }
3756 
3757                 /*
3758                  * Since ioctl returns an int, and memory sizes under
3759                  * LP64 may not fit, we return INT_MAX if the count was
3760                  * actually greater.
3761                  */
3762                 *rvalp = MIN(size, INT_MAX);
3763                 return (0);
3764         }
3765 
3766         case I_FIND:
3767                 /*
3768                  * Get module name.
3769                  */
3770         {
3771                 char mname[FMNAMESZ + 1];
3772                 queue_t *q;
3773 
3774                 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
3775                     mname, FMNAMESZ + 1, NULL);
3776                 if (error)
3777                         return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3778 
3779                 /*
3780                  * Return EINVAL if we're handed a bogus module name.
3781                  */
3782                 if (fmodsw_find(mname, FMODSW_LOAD) == NULL) {
3783                         TRACE_0(TR_FAC_STREAMS_FR,
3784                             TR_I_CANT_FIND, "couldn't I_FIND");
3785                         return (EINVAL);
3786                 }
3787 
3788                 *rvalp = 0;
3789 
3790                 /* Look downstream to see if module is there. */
3791                 claimstr(stp->sd_wrq);
3792                 for (q = stp->sd_wrq->q_next; q; q = q->q_next) {
3793                         if (q->q_flag & QREADR) {
3794                                 q = NULL;
3795                                 break;
3796                         }
3797                         if (strcmp(mname, Q2NAME(q)) == 0)
3798                                 break;
3799                 }
3800                 releasestr(stp->sd_wrq);
3801 
3802                 *rvalp = (q ? 1 : 0);
3803                 return (error);
3804         }
3805 
3806         case I_PUSH:
3807         case __I_PUSH_NOCTTY:
3808                 /*
3809                  * Push a module.
3810                  * For the case __I_PUSH_NOCTTY push a module but
3811                  * do not allocate controlling tty. See bugid 4025044
3812                  */
3813 
3814         {
3815                 char mname[FMNAMESZ + 1];
3816                 fmodsw_impl_t *fp;
3817                 dev_t dummydev;
3818 
3819                 if (stp->sd_flag & STRHUP)
3820                         return (ENXIO);
3821 
3822                 /*
3823                  * Get module name and look up in fmodsw.
3824                  */
3825                 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
3826                     mname, FMNAMESZ + 1, NULL);
3827                 if (error)
3828                         return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3829 
3830                 if ((fp = fmodsw_find(mname, FMODSW_HOLD | FMODSW_LOAD)) ==
3831                     NULL)
3832                         return (EINVAL);
3833 
3834                 TRACE_2(TR_FAC_STREAMS_FR, TR_I_PUSH,
3835                     "I_PUSH:fp %p stp %p", fp, stp);
3836 
3837                 /*
3838                  * If the module is flagged as single-instance, then check
3839                  * to see if the module is already pushed. If it is, return
3840                  * as if the push was successful.
3841                  */
3842                 if (fp->f_qflag & _QSINGLE_INSTANCE) {
3843                         queue_t *q;
3844 
3845                         claimstr(stp->sd_wrq);
3846                         for (q = stp->sd_wrq->q_next; q; q = q->q_next) {
3847                                 if (q->q_flag & QREADR) {
3848                                         q = NULL;
3849                                         break;
3850                                 }
3851                                 if (strcmp(mname, Q2NAME(q)) == 0)
3852                                         break;
3853                         }
3854                         releasestr(stp->sd_wrq);
3855                         if (q != NULL) {
3856                                 fmodsw_rele(fp);
3857                                 return (0);
3858                         }
3859                 }
3860 
3861                 if (error = strstartplumb(stp, flag, cmd)) {
3862                         fmodsw_rele(fp);
3863                         return (error);
3864                 }
3865 
3866                 /*
3867                  * See if any more modules can be pushed on this stream.
3868                  * Note that this check must be done after strstartplumb()
3869                  * since otherwise multiple threads issuing I_PUSHes on
3870                  * the same stream will be able to exceed nstrpush.
3871                  */
3872                 mutex_enter(&stp->sd_lock);
3873                 if (stp->sd_pushcnt >= nstrpush) {
3874                         fmodsw_rele(fp);
3875                         strendplumb(stp);
3876                         mutex_exit(&stp->sd_lock);
3877                         return (EINVAL);
3878                 }
3879                 mutex_exit(&stp->sd_lock);
3880 
3881                 /*
3882                  * Push new module and call its open routine
3883                  * via qattach().  Modules don't change device
3884                  * numbers, so just ignore dummydev here.
3885                  */
3886                 dummydev = vp->v_rdev;
3887                 if ((error = qattach(rdq, &dummydev, 0, crp, fp,
3888                     B_FALSE)) == 0) {
3889                         if (vp->v_type == VCHR && /* sorry, no pipes allowed */
3890                             (cmd == I_PUSH) && (stp->sd_flag & STRISTTY)) {
3891                                 /*
3892                                  * try to allocate it as a controlling terminal
3893                                  */
3894                                 (void) strctty(stp);
3895                         }
3896                 }
3897 
3898                 mutex_enter(&stp->sd_lock);
3899 
3900                 /*
3901                  * As a performance concern we are caching the values of
3902                  * q_minpsz and q_maxpsz of the module below the stream
3903                  * head in the stream head.
3904                  */
3905                 mutex_enter(QLOCK(stp->sd_wrq->q_next));
3906                 rmin = stp->sd_wrq->q_next->q_minpsz;
3907                 rmax = stp->sd_wrq->q_next->q_maxpsz;
3908                 mutex_exit(QLOCK(stp->sd_wrq->q_next));
3909 
3910                 /* Do this processing here as a performance concern */
3911                 if (strmsgsz != 0) {
3912                         if (rmax == INFPSZ)
3913                                 rmax = strmsgsz;
3914                         else  {
3915                                 if (vp->v_type == VFIFO)
3916                                         rmax = MIN(PIPE_BUF, rmax);
3917                                 else    rmax = MIN(strmsgsz, rmax);
3918                         }
3919                 }
3920 
3921                 mutex_enter(QLOCK(wrq));
3922                 stp->sd_qn_minpsz = rmin;
3923                 stp->sd_qn_maxpsz = rmax;
3924                 mutex_exit(QLOCK(wrq));
3925 
3926                 strendplumb(stp);
3927                 mutex_exit(&stp->sd_lock);
3928                 return (error);
3929         }
3930 
3931         case I_POP:
3932         {
3933                 queue_t *q;
3934 
3935                 if (stp->sd_flag & STRHUP)
3936                         return (ENXIO);
3937                 if (!wrq->q_next)    /* for broken pipes */
3938                         return (EINVAL);
3939 
3940                 if (error = strstartplumb(stp, flag, cmd))
3941                         return (error);
3942 
3943                 /*
3944                  * If there is an anchor on this stream and popping
3945                  * the current module would attempt to pop through the
3946                  * anchor, then disallow the pop unless we have sufficient
3947                  * privileges; take the cheapest (non-locking) check
3948                  * first.
3949                  */
3950                 if (secpolicy_ip_config(crp, B_TRUE) != 0 ||
3951                     (stp->sd_anchorzone != crgetzoneid(crp))) {
3952                         mutex_enter(&stp->sd_lock);
3953                         /*
3954                          * Anchors only apply if there's at least one
3955                          * module on the stream (sd_pushcnt > 0).
3956                          */
3957                         if (stp->sd_pushcnt > 0 &&
3958                             stp->sd_pushcnt == stp->sd_anchor &&
3959                             stp->sd_vnode->v_type != VFIFO) {
3960                                 strendplumb(stp);
3961                                 mutex_exit(&stp->sd_lock);
3962                                 if (stp->sd_anchorzone != crgetzoneid(crp))
3963                                         return (EINVAL);
3964                                 /* Audit and report error */
3965                                 return (secpolicy_ip_config(crp, B_FALSE));
3966                         }
3967                         mutex_exit(&stp->sd_lock);
3968                 }
3969 
3970                 q = wrq->q_next;
3971                 TRACE_2(TR_FAC_STREAMS_FR, TR_I_POP,
3972                     "I_POP:%p from %p", q, stp);
3973                 if (q->q_next == NULL || (q->q_flag & (QREADR|QISDRV))) {
3974                         error = EINVAL;
3975                 } else {
3976                         qdetach(_RD(q), 1, flag, crp, B_FALSE);
3977                         error = 0;
3978                 }
3979                 mutex_enter(&stp->sd_lock);
3980 
3981                 /*
3982                  * As a performance concern we are caching the values of
3983                  * q_minpsz and q_maxpsz of the module below the stream
3984                  * head in the stream head.
3985                  */
3986                 mutex_enter(QLOCK(wrq->q_next));
3987                 rmin = wrq->q_next->q_minpsz;
3988                 rmax = wrq->q_next->q_maxpsz;
3989                 mutex_exit(QLOCK(wrq->q_next));
3990 
3991                 /* Do this processing here as a performance concern */
3992                 if (strmsgsz != 0) {
3993                         if (rmax == INFPSZ)
3994                                 rmax = strmsgsz;
3995                         else  {
3996                                 if (vp->v_type == VFIFO)
3997                                         rmax = MIN(PIPE_BUF, rmax);
3998                                 else    rmax = MIN(strmsgsz, rmax);
3999                         }
4000                 }
4001 
4002                 mutex_enter(QLOCK(wrq));
4003                 stp->sd_qn_minpsz = rmin;
4004                 stp->sd_qn_maxpsz = rmax;
4005                 mutex_exit(QLOCK(wrq));
4006 
4007                 /* If we popped through the anchor, then reset the anchor. */
4008                 if (stp->sd_pushcnt < stp->sd_anchor) {
4009                         stp->sd_anchor = 0;
4010                         stp->sd_anchorzone = 0;
4011                 }
4012                 strendplumb(stp);
4013                 mutex_exit(&stp->sd_lock);
4014                 return (error);
4015         }
4016 
4017         case _I_MUXID2FD:
4018         {
4019                 /*
4020                  * Create a fd for a I_PLINK'ed lower stream with a given
4021                  * muxid.  With the fd, application can send down ioctls,
4022                  * like I_LIST, to the previously I_PLINK'ed stream.  Note
4023                  * that after getting the fd, the application has to do an
4024                  * I_PUNLINK on the muxid before it can do any operation
4025                  * on the lower stream.  This is required by spec1170.
4026                  *
4027                  * The fd used to do this ioctl should point to the same
4028                  * controlling device used to do the I_PLINK.  If it uses
4029                  * a different stream or an invalid muxid, I_MUXID2FD will
4030                  * fail.  The error code is set to EINVAL.
4031                  *
4032                  * The intended use of this interface is the following.
4033                  * An application I_PLINK'ed a stream and exits.  The fd
4034                  * to the lower stream is gone.  Another application
4035                  * wants to get a fd to the lower stream, it uses I_MUXID2FD.
4036                  */
4037                 int muxid = (int)arg;
4038                 int fd;
4039                 linkinfo_t *linkp;
4040                 struct file *fp;
4041                 netstack_t *ns;
4042                 str_stack_t *ss;
4043 
4044                 /*
4045                  * Do not allow the wildcard muxid.  This ioctl is not
4046                  * intended to find arbitrary link.
4047                  */
4048                 if (muxid == 0) {
4049                         return (EINVAL);
4050                 }
4051 
4052                 ns = netstack_find_by_cred(crp);
4053                 ASSERT(ns != NULL);
4054                 ss = ns->netstack_str;
4055                 ASSERT(ss != NULL);
4056 
4057                 mutex_enter(&muxifier);
4058                 linkp = findlinks(vp->v_stream, muxid, LINKPERSIST, ss);
4059                 if (linkp == NULL) {
4060                         mutex_exit(&muxifier);
4061                         netstack_rele(ss->ss_netstack);
4062                         return (EINVAL);
4063                 }
4064 
4065                 if ((fd = ufalloc(0)) == -1) {
4066                         mutex_exit(&muxifier);
4067                         netstack_rele(ss->ss_netstack);
4068                         return (EMFILE);
4069                 }
4070                 fp = linkp->li_fpdown;
4071                 mutex_enter(&fp->f_tlock);
4072                 fp->f_count++;
4073                 mutex_exit(&fp->f_tlock);
4074                 mutex_exit(&muxifier);
4075                 setf(fd, fp);
4076                 *rvalp = fd;
4077                 netstack_rele(ss->ss_netstack);
4078                 return (0);
4079         }
4080 
4081         case _I_INSERT:
4082         {
4083                 /*
4084                  * To insert a module to a given position in a stream.
4085                  * In the first release, only allow privileged user
4086                  * to use this ioctl. Furthermore, the insert is only allowed
4087                  * below an anchor if the zoneid is the same as the zoneid
4088                  * which created the anchor.
4089                  *
4090                  * Note that we do not plan to support this ioctl
4091                  * on pipes in the first release.  We want to learn more
4092                  * about the implications of these ioctls before extending
4093                  * their support.  And we do not think these features are
4094                  * valuable for pipes.
4095                  */
4096                 STRUCT_DECL(strmodconf, strmodinsert);
4097                 char mod_name[FMNAMESZ + 1];
4098                 fmodsw_impl_t *fp;
4099                 dev_t dummydev;
4100                 queue_t *tmp_wrq;
4101                 int pos;
4102                 boolean_t is_insert;
4103 
4104                 STRUCT_INIT(strmodinsert, flag);
4105                 if (stp->sd_flag & STRHUP)
4106                         return (ENXIO);
4107                 if (STRMATED(stp))
4108                         return (EINVAL);
4109                 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
4110                         return (error);
4111                 if (stp->sd_anchor != 0 &&
4112                     stp->sd_anchorzone != crgetzoneid(crp))
4113                         return (EINVAL);
4114 
4115                 error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert),
4116                     STRUCT_SIZE(strmodinsert), copyflag);
4117                 if (error)
4118                         return (error);
4119 
4120                 /*
4121                  * Get module name and look up in fmodsw.
4122                  */
4123                 error = (copyflag & U_TO_K ? copyinstr :
4124                     copystr)(STRUCT_FGETP(strmodinsert, mod_name),
4125                     mod_name, FMNAMESZ + 1, NULL);
4126                 if (error)
4127                         return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
4128 
4129                 if ((fp = fmodsw_find(mod_name, FMODSW_HOLD | FMODSW_LOAD)) ==
4130                     NULL)
4131                         return (EINVAL);
4132 
4133                 if (error = strstartplumb(stp, flag, cmd)) {
4134                         fmodsw_rele(fp);
4135                         return (error);
4136                 }
4137 
4138                 /*
4139                  * Is this _I_INSERT just like an I_PUSH?  We need to know
4140                  * this because we do some optimizations if this is a
4141                  * module being pushed.
4142                  */
4143                 pos = STRUCT_FGET(strmodinsert, pos);
4144                 is_insert = (pos != 0);
4145 
4146                 /*
4147                  * Make sure pos is valid.  Even though it is not an I_PUSH,
4148                  * we impose the same limit on the number of modules in a
4149                  * stream.
4150                  */
4151                 mutex_enter(&stp->sd_lock);
4152                 if (stp->sd_pushcnt >= nstrpush || pos < 0 ||
4153                     pos > stp->sd_pushcnt) {
4154                         fmodsw_rele(fp);
4155                         strendplumb(stp);
4156                         mutex_exit(&stp->sd_lock);
4157                         return (EINVAL);
4158                 }
4159                 if (stp->sd_anchor != 0) {
4160                         /*
4161                          * Is this insert below the anchor?
4162                          * Pushcnt hasn't been increased yet hence
4163                          * we test for greater than here, and greater or
4164                          * equal after qattach.
4165                          */
4166                         if (pos > (stp->sd_pushcnt - stp->sd_anchor) &&
4167                             stp->sd_anchorzone != crgetzoneid(crp)) {
4168                                 fmodsw_rele(fp);
4169                                 strendplumb(stp);
4170                                 mutex_exit(&stp->sd_lock);
4171                                 return (EPERM);
4172                         }
4173                 }
4174 
4175                 mutex_exit(&stp->sd_lock);
4176 
4177                 /*
4178                  * First find the correct position this module to
4179                  * be inserted.  We don't need to call claimstr()
4180                  * as the stream should not be changing at this point.
4181                  *
4182                  * Insert new module and call its open routine
4183                  * via qattach().  Modules don't change device
4184                  * numbers, so just ignore dummydev here.
4185                  */
4186                 for (tmp_wrq = stp->sd_wrq; pos > 0;
4187                     tmp_wrq = tmp_wrq->q_next, pos--) {
4188                         ASSERT(SAMESTR(tmp_wrq));
4189                 }
4190                 dummydev = vp->v_rdev;
4191                 if ((error = qattach(_RD(tmp_wrq), &dummydev, 0, crp,
4192                     fp, is_insert)) != 0) {
4193                         mutex_enter(&stp->sd_lock);
4194                         strendplumb(stp);
4195                         mutex_exit(&stp->sd_lock);
4196                         return (error);
4197                 }
4198 
4199                 mutex_enter(&stp->sd_lock);
4200 
4201                 /*
4202                  * As a performance concern we are caching the values of
4203                  * q_minpsz and q_maxpsz of the module below the stream
4204                  * head in the stream head.
4205                  */
4206                 if (!is_insert) {
4207                         mutex_enter(QLOCK(stp->sd_wrq->q_next));
4208                         rmin = stp->sd_wrq->q_next->q_minpsz;
4209                         rmax = stp->sd_wrq->q_next->q_maxpsz;
4210                         mutex_exit(QLOCK(stp->sd_wrq->q_next));
4211 
4212                         /* Do this processing here as a performance concern */
4213                         if (strmsgsz != 0) {
4214                                 if (rmax == INFPSZ) {
4215                                         rmax = strmsgsz;
4216                                 } else  {
4217                                         rmax = MIN(strmsgsz, rmax);
4218                                 }
4219                         }
4220 
4221                         mutex_enter(QLOCK(wrq));
4222                         stp->sd_qn_minpsz = rmin;
4223                         stp->sd_qn_maxpsz = rmax;
4224                         mutex_exit(QLOCK(wrq));
4225                 }
4226 
4227                 /*
4228                  * Need to update the anchor value if this module is
4229                  * inserted below the anchor point.
4230                  */
4231                 if (stp->sd_anchor != 0) {
4232                         pos = STRUCT_FGET(strmodinsert, pos);
4233                         if (pos >= (stp->sd_pushcnt - stp->sd_anchor))
4234                                 stp->sd_anchor++;
4235                 }
4236 
4237                 strendplumb(stp);
4238                 mutex_exit(&stp->sd_lock);
4239                 return (0);
4240         }
4241 
4242         case _I_REMOVE:
4243         {
4244                 /*
4245                  * To remove a module with a given name in a stream.  The
4246                  * caller of this ioctl needs to provide both the name and
4247                  * the position of the module to be removed.  This eliminates
4248                  * the ambiguity of removal if a module is inserted/pushed
4249                  * multiple times in a stream.  In the first release, only
4250                  * allow privileged user to use this ioctl.
4251                  * Furthermore, the remove is only allowed
4252                  * below an anchor if the zoneid is the same as the zoneid
4253                  * which created the anchor.
4254                  *
4255                  * Note that we do not plan to support this ioctl
4256                  * on pipes in the first release.  We want to learn more
4257                  * about the implications of these ioctls before extending
4258                  * their support.  And we do not think these features are
4259                  * valuable for pipes.
4260                  *
4261                  * Also note that _I_REMOVE cannot be used to remove a
4262                  * driver or the stream head.
4263                  */
4264                 STRUCT_DECL(strmodconf, strmodremove);
4265                 queue_t *q;
4266                 int pos;
4267                 char mod_name[FMNAMESZ + 1];
4268                 boolean_t is_remove;
4269 
4270                 STRUCT_INIT(strmodremove, flag);
4271                 if (stp->sd_flag & STRHUP)
4272                         return (ENXIO);
4273                 if (STRMATED(stp))
4274                         return (EINVAL);
4275                 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
4276                         return (error);
4277                 if (stp->sd_anchor != 0 &&
4278                     stp->sd_anchorzone != crgetzoneid(crp))
4279                         return (EINVAL);
4280 
4281                 error = strcopyin((void *)arg, STRUCT_BUF(strmodremove),
4282                     STRUCT_SIZE(strmodremove), copyflag);
4283                 if (error)
4284                         return (error);
4285 
4286                 error = (copyflag & U_TO_K ? copyinstr :
4287                     copystr)(STRUCT_FGETP(strmodremove, mod_name),
4288                     mod_name, FMNAMESZ + 1, NULL);
4289                 if (error)
4290                         return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
4291 
4292                 if ((error = strstartplumb(stp, flag, cmd)) != 0)
4293                         return (error);
4294 
4295                 /*
4296                  * Match the name of given module to the name of module at
4297                  * the given position.
4298                  */
4299                 pos = STRUCT_FGET(strmodremove, pos);
4300 
4301                 is_remove = (pos != 0);
4302                 for (q = stp->sd_wrq->q_next; SAMESTR(q) && pos > 0;
4303                     q = q->q_next, pos--)
4304                         ;
4305                 if (pos > 0 || !SAMESTR(q) ||
4306                     strcmp(Q2NAME(q), mod_name) != 0) {
4307                         mutex_enter(&stp->sd_lock);
4308                         strendplumb(stp);
4309                         mutex_exit(&stp->sd_lock);
4310                         return (EINVAL);
4311                 }
4312 
4313                 /*
4314                  * If the position is at or below an anchor, then the zoneid
4315                  * must match the zoneid that created the anchor.
4316                  */
4317                 if (stp->sd_anchor != 0) {
4318                         pos = STRUCT_FGET(strmodremove, pos);
4319                         if (pos >= (stp->sd_pushcnt - stp->sd_anchor) &&
4320                             stp->sd_anchorzone != crgetzoneid(crp)) {
4321                                 mutex_enter(&stp->sd_lock);
4322                                 strendplumb(stp);
4323                                 mutex_exit(&stp->sd_lock);
4324                                 return (EPERM);
4325                         }
4326                 }
4327 
4328 
4329                 ASSERT(!(q->q_flag & QREADR));
4330                 qdetach(_RD(q), 1, flag, crp, is_remove);
4331 
4332                 mutex_enter(&stp->sd_lock);
4333 
4334                 /*
4335                  * As a performance concern we are caching the values of
4336                  * q_minpsz and q_maxpsz of the module below the stream
4337                  * head in the stream head.
4338                  */
4339                 if (!is_remove) {
4340                         mutex_enter(QLOCK(wrq->q_next));
4341                         rmin = wrq->q_next->q_minpsz;
4342                         rmax = wrq->q_next->q_maxpsz;
4343                         mutex_exit(QLOCK(wrq->q_next));
4344 
4345                         /* Do this processing here as a performance concern */
4346                         if (strmsgsz != 0) {
4347                                 if (rmax == INFPSZ)
4348                                         rmax = strmsgsz;
4349                                 else  {
4350                                         if (vp->v_type == VFIFO)
4351                                                 rmax = MIN(PIPE_BUF, rmax);
4352                                         else    rmax = MIN(strmsgsz, rmax);
4353                                 }
4354                         }
4355 
4356                         mutex_enter(QLOCK(wrq));
4357                         stp->sd_qn_minpsz = rmin;
4358                         stp->sd_qn_maxpsz = rmax;
4359                         mutex_exit(QLOCK(wrq));
4360                 }
4361 
4362                 /*
4363                  * Need to update the anchor value if this module is removed
4364                  * at or below the anchor point.  If the removed module is at
4365                  * the anchor point, remove the anchor for this stream if
4366                  * there is no module above the anchor point.  Otherwise, if
4367                  * the removed module is below the anchor point, decrement the
4368                  * anchor point by 1.
4369                  */
4370                 if (stp->sd_anchor != 0) {
4371                         pos = STRUCT_FGET(strmodremove, pos);
4372                         if (pos == stp->sd_pushcnt - stp->sd_anchor + 1)
4373                                 stp->sd_anchor = 0;
4374                         else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1))
4375                                 stp->sd_anchor--;
4376                 }
4377 
4378                 strendplumb(stp);
4379                 mutex_exit(&stp->sd_lock);
4380                 return (0);
4381         }
4382 
4383         case I_ANCHOR:
4384                 /*
4385                  * Set the anchor position on the stream to reside at
4386                  * the top module (in other words, the top module
4387                  * cannot be popped).  Anchors with a FIFO make no
4388                  * obvious sense, so they're not allowed.
4389                  */
4390                 mutex_enter(&stp->sd_lock);
4391 
4392                 if (stp->sd_vnode->v_type == VFIFO) {
4393                         mutex_exit(&stp->sd_lock);
4394                         return (EINVAL);
4395                 }
4396                 /* Only allow the same zoneid to update the anchor */
4397                 if (stp->sd_anchor != 0 &&
4398                     stp->sd_anchorzone != crgetzoneid(crp)) {
4399                         mutex_exit(&stp->sd_lock);
4400                         return (EINVAL);
4401                 }
4402                 stp->sd_anchor = stp->sd_pushcnt;
4403                 stp->sd_anchorzone = crgetzoneid(crp);
4404                 mutex_exit(&stp->sd_lock);
4405                 return (0);
4406 
4407         case I_LOOK:
4408                 /*
4409                  * Get name of first module downstream.
4410                  * If no module, return an error.
4411                  */
4412                 claimstr(wrq);
4413                 if (_SAMESTR(wrq) && wrq->q_next->q_next != NULL) {
4414                         char *name = Q2NAME(wrq->q_next);
4415 
4416                         error = strcopyout(name, (void *)arg, strlen(name) + 1,
4417                             copyflag);
4418                         releasestr(wrq);
4419                         return (error);
4420                 }
4421                 releasestr(wrq);
4422                 return (EINVAL);
4423 
4424         case I_LINK:
4425         case I_PLINK:
4426                 /*
4427                  * Link a multiplexor.
4428                  */
4429                 return (mlink(vp, cmd, (int)arg, crp, rvalp, 0));
4430 
4431         case _I_PLINK_LH:
4432                 /*
4433                  * Link a multiplexor: Call must originate from kernel.
4434                  */
4435                 if (kioctl)
4436                         return (ldi_mlink_lh(vp, cmd, arg, crp, rvalp));
4437 
4438                 return (EINVAL);
4439         case I_UNLINK:
4440         case I_PUNLINK:
4441                 /*
4442                  * Unlink a multiplexor.
4443                  * If arg is -1, unlink all links for which this is the
4444                  * controlling stream.  Otherwise, arg is an index number
4445                  * for a link to be removed.
4446                  */
4447         {
4448                 struct linkinfo *linkp;
4449                 int native_arg = (int)arg;
4450                 int type;
4451                 netstack_t *ns;
4452                 str_stack_t *ss;
4453 
4454                 TRACE_1(TR_FAC_STREAMS_FR,
4455                     TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp);
4456                 if (vp->v_type == VFIFO) {
4457                         return (EINVAL);
4458                 }
4459                 if (cmd == I_UNLINK)
4460                         type = LINKNORMAL;
4461                 else    /* I_PUNLINK */
4462                         type = LINKPERSIST;
4463                 if (native_arg == 0) {
4464                         return (EINVAL);
4465                 }
4466                 ns = netstack_find_by_cred(crp);
4467                 ASSERT(ns != NULL);
4468                 ss = ns->netstack_str;
4469                 ASSERT(ss != NULL);
4470 
4471                 if (native_arg == MUXID_ALL)
4472                         error = munlinkall(stp, type, crp, rvalp, ss);
4473                 else {
4474                         mutex_enter(&muxifier);
4475                         if (!(linkp = findlinks(stp, (int)arg, type, ss))) {
4476                                 /* invalid user supplied index number */
4477                                 mutex_exit(&muxifier);
4478                                 netstack_rele(ss->ss_netstack);
4479                                 return (EINVAL);
4480                         }
4481                         /* munlink drops the muxifier lock */
4482                         error = munlink(stp, linkp, type, crp, rvalp, ss);
4483                 }
4484                 netstack_rele(ss->ss_netstack);
4485                 return (error);
4486         }
4487 
4488         case I_FLUSH:
4489                 /*
4490                  * send a flush message downstream
4491                  * flush message can indicate
4492                  * FLUSHR - flush read queue
4493                  * FLUSHW - flush write queue
4494                  * FLUSHRW - flush read/write queue
4495                  */
4496                 if (stp->sd_flag & STRHUP)
4497                         return (ENXIO);
4498                 if (arg & ~FLUSHRW)
4499                         return (EINVAL);
4500 
4501                 for (;;) {
4502                         if (putnextctl1(stp->sd_wrq, M_FLUSH, (int)arg)) {
4503                                 break;
4504                         }
4505                         if (error = strwaitbuf(1, BPRI_HI)) {
4506                                 return (error);
4507                         }
4508                 }
4509 
4510                 /*
4511                  * Send down an unsupported ioctl and wait for the nack
4512                  * in order to allow the M_FLUSH to propagate back
4513                  * up to the stream head.
4514                  * Replaces if (qready()) runqueues();
4515                  */
4516                 strioc.ic_cmd = -1;     /* The unsupported ioctl */
4517                 strioc.ic_timout = 0;
4518                 strioc.ic_len = 0;
4519                 strioc.ic_dp = NULL;
4520                 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
4521                 *rvalp = 0;
4522                 return (0);
4523 
4524         case I_FLUSHBAND:
4525         {
4526                 struct bandinfo binfo;
4527 
4528                 error = strcopyin((void *)arg, &binfo, sizeof (binfo),
4529                     copyflag);
4530                 if (error)
4531                         return (error);
4532                 if (stp->sd_flag & STRHUP)
4533                         return (ENXIO);
4534                 if (binfo.bi_flag & ~FLUSHRW)
4535                         return (EINVAL);
4536                 while (!(mp = allocb(2, BPRI_HI))) {
4537                         if (error = strwaitbuf(2, BPRI_HI))
4538                                 return (error);
4539                 }
4540                 mp->b_datap->db_type = M_FLUSH;
4541                 *mp->b_wptr++ = binfo.bi_flag | FLUSHBAND;
4542                 *mp->b_wptr++ = binfo.bi_pri;
4543                 putnext(stp->sd_wrq, mp);
4544                 /*
4545                  * Send down an unsupported ioctl and wait for the nack
4546                  * in order to allow the M_FLUSH to propagate back
4547                  * up to the stream head.
4548                  * Replaces if (qready()) runqueues();
4549                  */
4550                 strioc.ic_cmd = -1;     /* The unsupported ioctl */
4551                 strioc.ic_timout = 0;
4552                 strioc.ic_len = 0;
4553                 strioc.ic_dp = NULL;
4554                 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
4555                 *rvalp = 0;
4556                 return (0);
4557         }
4558 
4559         case I_SRDOPT:
4560                 /*
4561                  * Set read options
4562                  *
4563                  * RNORM - default stream mode
4564                  * RMSGN - message no discard
4565                  * RMSGD - message discard
4566                  * RPROTNORM - fail read with EBADMSG for M_[PC]PROTOs
4567                  * RPROTDAT - convert M_[PC]PROTOs to M_DATAs
4568                  * RPROTDIS - discard M_[PC]PROTOs and retain M_DATAs
4569                  */
4570                 if (arg & ~(RMODEMASK | RPROTMASK))
4571                         return (EINVAL);
4572 
4573                 if ((arg & (RMSGD|RMSGN)) == (RMSGD|RMSGN))
4574                         return (EINVAL);
4575 
4576                 mutex_enter(&stp->sd_lock);
4577                 switch (arg & RMODEMASK) {
4578                 case RNORM:
4579                         stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
4580                         break;
4581                 case RMSGD:
4582                         stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGNODIS) |
4583                             RD_MSGDIS;
4584                         break;
4585                 case RMSGN:
4586                         stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGDIS) |
4587                             RD_MSGNODIS;
4588                         break;
4589                 }
4590 
4591                 switch (arg & RPROTMASK) {
4592                 case RPROTNORM:
4593                         stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
4594                         break;
4595 
4596                 case RPROTDAT:
4597                         stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDIS) |
4598                             RD_PROTDAT);
4599                         break;
4600 
4601                 case RPROTDIS:
4602                         stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDAT) |
4603                             RD_PROTDIS);
4604                         break;
4605                 }
4606                 mutex_exit(&stp->sd_lock);
4607                 return (0);
4608 
4609         case I_GRDOPT:
4610                 /*
4611                  * Get read option and return the value
4612                  * to spot pointed to by arg
4613                  */
4614         {
4615                 int rdopt;
4616 
4617                 rdopt = ((stp->sd_read_opt & RD_MSGDIS) ? RMSGD :
4618                     ((stp->sd_read_opt & RD_MSGNODIS) ? RMSGN : RNORM));
4619                 rdopt |= ((stp->sd_read_opt & RD_PROTDAT) ? RPROTDAT :
4620                     ((stp->sd_read_opt & RD_PROTDIS) ? RPROTDIS : RPROTNORM));
4621 
4622                 return (strcopyout(&rdopt, (void *)arg, sizeof (int),
4623                     copyflag));
4624         }
4625 
4626         case I_SERROPT:
4627                 /*
4628                  * Set error options
4629                  *
4630                  * RERRNORM - persistent read errors
4631                  * RERRNONPERSIST - non-persistent read errors
4632                  * WERRNORM - persistent write errors
4633                  * WERRNONPERSIST - non-persistent write errors
4634                  */
4635                 if (arg & ~(RERRMASK | WERRMASK))
4636                         return (EINVAL);
4637 
4638                 mutex_enter(&stp->sd_lock);
4639                 switch (arg & RERRMASK) {
4640                 case RERRNORM:
4641                         stp->sd_flag &= ~STRDERRNONPERSIST;
4642                         break;
4643                 case RERRNONPERSIST:
4644                         stp->sd_flag |= STRDERRNONPERSIST;
4645                         break;
4646                 }
4647                 switch (arg & WERRMASK) {
4648                 case WERRNORM:
4649                         stp->sd_flag &= ~STWRERRNONPERSIST;
4650                         break;
4651                 case WERRNONPERSIST:
4652                         stp->sd_flag |= STWRERRNONPERSIST;
4653                         break;
4654                 }
4655                 mutex_exit(&stp->sd_lock);
4656                 return (0);
4657 
4658         case I_GERROPT:
4659                 /*
4660                  * Get error option and return the value
4661                  * to spot pointed to by arg
4662                  */
4663         {
4664                 int erropt = 0;
4665 
4666                 erropt |= (stp->sd_flag & STRDERRNONPERSIST) ? RERRNONPERSIST :
4667                     RERRNORM;
4668                 erropt |= (stp->sd_flag & STWRERRNONPERSIST) ? WERRNONPERSIST :
4669                     WERRNORM;
4670                 return (strcopyout(&erropt, (void *)arg, sizeof (int),
4671                     copyflag));
4672         }
4673 
4674         case I_SETSIG:
4675                 /*
4676                  * Register the calling proc to receive the SIGPOLL
4677                  * signal based on the events given in arg.  If
4678                  * arg is zero, remove the proc from register list.
4679                  */
4680         {
4681                 strsig_t *ssp, *pssp;
4682                 struct pid *pidp;
4683 
4684                 pssp = NULL;
4685                 pidp = curproc->p_pidp;
4686                 /*
4687                  * Hold sd_lock to prevent traversal of sd_siglist while
4688                  * it is modified.
4689                  */
4690                 mutex_enter(&stp->sd_lock);
4691                 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pidp != pidp);
4692                     pssp = ssp, ssp = ssp->ss_next)
4693                         ;
4694 
4695                 if (arg) {
4696                         if (arg & ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
4697                             S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
4698                                 mutex_exit(&stp->sd_lock);
4699                                 return (EINVAL);
4700                         }
4701                         if ((arg & S_BANDURG) && !(arg & S_RDBAND)) {
4702                                 mutex_exit(&stp->sd_lock);
4703                                 return (EINVAL);
4704                         }
4705 
4706                         /*
4707                          * If proc not already registered, add it
4708                          * to list.
4709                          */
4710                         if (!ssp) {
4711                                 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
4712                                 ssp->ss_pidp = pidp;
4713                                 ssp->ss_pid = pidp->pid_id;
4714                                 ssp->ss_next = NULL;
4715                                 if (pssp)
4716                                         pssp->ss_next = ssp;
4717                                 else
4718                                         stp->sd_siglist = ssp;
4719                                 mutex_enter(&pidlock);
4720                                 PID_HOLD(pidp);
4721                                 mutex_exit(&pidlock);
4722                         }
4723 
4724                         /*
4725                          * Set events.
4726                          */
4727                         ssp->ss_events = (int)arg;
4728                 } else {
4729                         /*
4730                          * Remove proc from register list.
4731                          */
4732                         if (ssp) {
4733                                 mutex_enter(&pidlock);
4734                                 PID_RELE(pidp);
4735                                 mutex_exit(&pidlock);
4736                                 if (pssp)
4737                                         pssp->ss_next = ssp->ss_next;
4738                                 else
4739                                         stp->sd_siglist = ssp->ss_next;
4740                                 kmem_free(ssp, sizeof (strsig_t));
4741                         } else {
4742                                 mutex_exit(&stp->sd_lock);
4743                                 return (EINVAL);
4744                         }
4745                 }
4746 
4747                 /*
4748                  * Recalculate OR of sig events.
4749                  */
4750                 stp->sd_sigflags = 0;
4751                 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4752                         stp->sd_sigflags |= ssp->ss_events;
4753                 mutex_exit(&stp->sd_lock);
4754                 return (0);
4755         }
4756 
4757         case I_GETSIG:
4758                 /*
4759                  * Return (in arg) the current registration of events
4760                  * for which the calling proc is to be signaled.
4761                  */
4762         {
4763                 struct strsig *ssp;
4764                 struct pid  *pidp;
4765 
4766                 pidp = curproc->p_pidp;
4767                 mutex_enter(&stp->sd_lock);
4768                 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4769                         if (ssp->ss_pidp == pidp) {
4770                                 error = strcopyout(&ssp->ss_events, (void *)arg,
4771                                     sizeof (int), copyflag);
4772                                 mutex_exit(&stp->sd_lock);
4773                                 return (error);
4774                         }
4775                 mutex_exit(&stp->sd_lock);
4776                 return (EINVAL);
4777         }
4778 
4779         case I_ESETSIG:
4780                 /*
4781                  * Register the ss_pid to receive the SIGPOLL
4782                  * signal based on the events is ss_events arg.  If
4783                  * ss_events is zero, remove the proc from register list.
4784                  */
4785         {
4786                 struct strsig *ssp, *pssp;
4787                 struct proc *proc;
4788                 struct pid  *pidp;
4789                 pid_t pid;
4790                 struct strsigset ss;
4791 
4792                 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
4793                 if (error)
4794                         return (error);
4795 
4796                 pid = ss.ss_pid;
4797 
4798                 if (ss.ss_events != 0) {
4799                         /*
4800                          * Permissions check by sending signal 0.
4801                          * Note that when kill fails it does a set_errno
4802                          * causing the system call to fail.
4803                          */
4804                         error = kill(pid, 0);
4805                         if (error) {
4806                                 return (error);
4807                         }
4808                 }
4809                 mutex_enter(&pidlock);
4810                 if (pid == 0)
4811                         proc = curproc;
4812                 else if (pid < 0)
4813                         proc = pgfind(-pid);
4814                 else
4815                         proc = prfind(pid);
4816                 if (proc == NULL) {
4817                         mutex_exit(&pidlock);
4818                         return (ESRCH);
4819                 }
4820                 if (pid < 0)
4821                         pidp = proc->p_pgidp;
4822                 else
4823                         pidp = proc->p_pidp;
4824                 ASSERT(pidp);
4825                 /*
4826                  * Get a hold on the pid structure while referencing it.
4827                  * There is a separate PID_HOLD should it be inserted
4828                  * in the list below.
4829                  */
4830                 PID_HOLD(pidp);
4831                 mutex_exit(&pidlock);
4832 
4833                 pssp = NULL;
4834                 /*
4835                  * Hold sd_lock to prevent traversal of sd_siglist while
4836                  * it is modified.
4837                  */
4838                 mutex_enter(&stp->sd_lock);
4839                 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pid != pid);
4840                     pssp = ssp, ssp = ssp->ss_next)
4841                         ;
4842 
4843                 if (ss.ss_events) {
4844                         if (ss.ss_events &
4845                             ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
4846                             S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
4847                                 mutex_exit(&stp->sd_lock);
4848                                 mutex_enter(&pidlock);
4849                                 PID_RELE(pidp);
4850                                 mutex_exit(&pidlock);
4851                                 return (EINVAL);
4852                         }
4853                         if ((ss.ss_events & S_BANDURG) &&
4854                             !(ss.ss_events & S_RDBAND)) {
4855                                 mutex_exit(&stp->sd_lock);
4856                                 mutex_enter(&pidlock);
4857                                 PID_RELE(pidp);
4858                                 mutex_exit(&pidlock);
4859                                 return (EINVAL);
4860                         }
4861 
4862                         /*
4863                          * If proc not already registered, add it
4864                          * to list.
4865                          */
4866                         if (!ssp) {
4867                                 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
4868                                 ssp->ss_pidp = pidp;
4869                                 ssp->ss_pid = pid;
4870                                 ssp->ss_next = NULL;
4871                                 if (pssp)
4872                                         pssp->ss_next = ssp;
4873                                 else
4874                                         stp->sd_siglist = ssp;
4875                                 mutex_enter(&pidlock);
4876                                 PID_HOLD(pidp);
4877                                 mutex_exit(&pidlock);
4878                         }
4879 
4880                         /*
4881                          * Set events.
4882                          */
4883                         ssp->ss_events = ss.ss_events;
4884                 } else {
4885                         /*
4886                          * Remove proc from register list.
4887                          */
4888                         if (ssp) {
4889                                 mutex_enter(&pidlock);
4890                                 PID_RELE(pidp);
4891                                 mutex_exit(&pidlock);
4892                                 if (pssp)
4893                                         pssp->ss_next = ssp->ss_next;
4894                                 else
4895                                         stp->sd_siglist = ssp->ss_next;
4896                                 kmem_free(ssp, sizeof (strsig_t));
4897                         } else {
4898                                 mutex_exit(&stp->sd_lock);
4899                                 mutex_enter(&pidlock);
4900                                 PID_RELE(pidp);
4901                                 mutex_exit(&pidlock);
4902                                 return (EINVAL);
4903                         }
4904                 }
4905 
4906                 /*
4907                  * Recalculate OR of sig events.
4908                  */
4909                 stp->sd_sigflags = 0;
4910                 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4911                         stp->sd_sigflags |= ssp->ss_events;
4912                 mutex_exit(&stp->sd_lock);
4913                 mutex_enter(&pidlock);
4914                 PID_RELE(pidp);
4915                 mutex_exit(&pidlock);
4916                 return (0);
4917         }
4918 
4919         case I_EGETSIG:
4920                 /*
4921                  * Return (in arg) the current registration of events
4922                  * for which the calling proc is to be signaled.
4923                  */
4924         {
4925                 struct strsig *ssp;
4926                 struct proc *proc;
4927                 pid_t pid;
4928                 struct pid  *pidp;
4929                 struct strsigset ss;
4930 
4931                 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
4932                 if (error)
4933                         return (error);
4934 
4935                 pid = ss.ss_pid;
4936                 mutex_enter(&pidlock);
4937                 if (pid == 0)
4938                         proc = curproc;
4939                 else if (pid < 0)
4940                         proc = pgfind(-pid);
4941                 else
4942                         proc = prfind(pid);
4943                 if (proc == NULL) {
4944                         mutex_exit(&pidlock);
4945                         return (ESRCH);
4946                 }
4947                 if (pid < 0)
4948                         pidp = proc->p_pgidp;
4949                 else
4950                         pidp = proc->p_pidp;
4951 
4952                 /* Prevent the pidp from being reassigned */
4953                 PID_HOLD(pidp);
4954                 mutex_exit(&pidlock);
4955 
4956                 mutex_enter(&stp->sd_lock);
4957                 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4958                         if (ssp->ss_pid == pid) {
4959                                 ss.ss_pid = ssp->ss_pid;
4960                                 ss.ss_events = ssp->ss_events;
4961                                 error = strcopyout(&ss, (void *)arg,
4962                                     sizeof (struct strsigset), copyflag);
4963                                 mutex_exit(&stp->sd_lock);
4964                                 mutex_enter(&pidlock);
4965                                 PID_RELE(pidp);
4966                                 mutex_exit(&pidlock);
4967                                 return (error);
4968                         }
4969                 mutex_exit(&stp->sd_lock);
4970                 mutex_enter(&pidlock);
4971                 PID_RELE(pidp);
4972                 mutex_exit(&pidlock);
4973                 return (EINVAL);
4974         }
4975 
4976         case I_PEEK:
4977         {
4978                 STRUCT_DECL(strpeek, strpeek);
4979                 size_t n;
4980                 mblk_t *fmp, *tmp_mp = NULL;
4981 
4982                 STRUCT_INIT(strpeek, flag);
4983 
4984                 error = strcopyin((void *)arg, STRUCT_BUF(strpeek),
4985                     STRUCT_SIZE(strpeek), copyflag);
4986                 if (error)
4987                         return (error);
4988 
4989                 mutex_enter(QLOCK(rdq));
4990                 /*
4991                  * Skip the invalid messages
4992                  */
4993                 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
4994                         if (mp->b_datap->db_type != M_SIG)
4995                                 break;
4996 
4997                 /*
4998                  * If user has requested to peek at a high priority message
4999                  * and first message is not, return 0
5000                  */
5001                 if (mp != NULL) {
5002                         if ((STRUCT_FGET(strpeek, flags) & RS_HIPRI) &&
5003                             queclass(mp) == QNORM) {
5004                                 *rvalp = 0;
5005                                 mutex_exit(QLOCK(rdq));
5006                                 return (0);
5007                         }
5008                 } else if (stp->sd_struiordq == NULL ||
5009                     (STRUCT_FGET(strpeek, flags) & RS_HIPRI)) {
5010                         /*
5011                          * No mblks to look at at the streamhead and
5012                          * 1). This isn't a synch stream or
5013                          * 2). This is a synch stream but caller wants high
5014                          *      priority messages which is not supported by
5015                          *      the synch stream. (it only supports QNORM)
5016                          */
5017                         *rvalp = 0;
5018                         mutex_exit(QLOCK(rdq));
5019                         return (0);
5020                 }
5021 
5022                 fmp = mp;
5023 
5024                 if (mp && mp->b_datap->db_type == M_PASSFP) {
5025                         mutex_exit(QLOCK(rdq));
5026                         return (EBADMSG);
5027                 }
5028 
5029                 ASSERT(mp == NULL || mp->b_datap->db_type == M_PCPROTO ||
5030                     mp->b_datap->db_type == M_PROTO ||
5031                     mp->b_datap->db_type == M_DATA);
5032 
5033                 if (mp && mp->b_datap->db_type == M_PCPROTO) {
5034                         STRUCT_FSET(strpeek, flags, RS_HIPRI);
5035                 } else {
5036                         STRUCT_FSET(strpeek, flags, 0);
5037                 }
5038 
5039 
5040                 if (mp && ((tmp_mp = dupmsg(mp)) == NULL)) {
5041                         mutex_exit(QLOCK(rdq));
5042                         return (ENOSR);
5043                 }
5044                 mutex_exit(QLOCK(rdq));
5045 
5046                 /*
5047                  * set mp = tmp_mp, so that I_PEEK processing can continue.
5048                  * tmp_mp is used to free the dup'd message.
5049                  */
5050                 mp = tmp_mp;
5051 
5052                 uio.uio_fmode = 0;
5053                 uio.uio_extflg = UIO_COPY_CACHED;
5054                 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
5055                     UIO_SYSSPACE;
5056                 uio.uio_limit = 0;
5057                 /*
5058                  * First process PROTO blocks, if any.
5059                  * If user doesn't want to get ctl info by setting maxlen <= 0,
5060                  * then set len to -1/0 and skip control blocks part.
5061                  */
5062                 if (STRUCT_FGET(strpeek, ctlbuf.maxlen) < 0)
5063                         STRUCT_FSET(strpeek, ctlbuf.len, -1);
5064                 else if (STRUCT_FGET(strpeek, ctlbuf.maxlen) == 0)
5065                         STRUCT_FSET(strpeek, ctlbuf.len, 0);
5066                 else {
5067                         int     ctl_part = 0;
5068 
5069                         iov.iov_base = STRUCT_FGETP(strpeek, ctlbuf.buf);
5070                         iov.iov_len = STRUCT_FGET(strpeek, ctlbuf.maxlen);
5071                         uio.uio_iov = &iov;
5072                         uio.uio_resid = iov.iov_len;
5073                         uio.uio_loffset = 0;
5074                         uio.uio_iovcnt = 1;
5075                         while (mp && mp->b_datap->db_type != M_DATA &&
5076                             uio.uio_resid >= 0) {
5077                                 ASSERT(STRUCT_FGET(strpeek, flags) == 0 ?
5078                                     mp->b_datap->db_type == M_PROTO :
5079                                     mp->b_datap->db_type == M_PCPROTO);
5080 
5081                                 if ((n = MIN(uio.uio_resid,
5082                                     mp->b_wptr - mp->b_rptr)) != 0 &&
5083                                     (error = uiomove((char *)mp->b_rptr, n,
5084                                     UIO_READ, &uio)) != 0) {
5085                                         freemsg(tmp_mp);
5086                                         return (error);
5087                                 }
5088                                 ctl_part = 1;
5089                                 mp = mp->b_cont;
5090                         }
5091                         /* No ctl message */
5092                         if (ctl_part == 0)
5093                                 STRUCT_FSET(strpeek, ctlbuf.len, -1);
5094                         else
5095                                 STRUCT_FSET(strpeek, ctlbuf.len,
5096                                     STRUCT_FGET(strpeek, ctlbuf.maxlen) -
5097                                     uio.uio_resid);
5098                 }
5099 
5100                 /*
5101                  * Now process DATA blocks, if any.
5102                  * If user doesn't want to get data info by setting maxlen <= 0,
5103                  * then set len to -1/0 and skip data blocks part.
5104                  */
5105                 if (STRUCT_FGET(strpeek, databuf.maxlen) < 0)
5106                         STRUCT_FSET(strpeek, databuf.len, -1);
5107                 else if (STRUCT_FGET(strpeek, databuf.maxlen) == 0)
5108                         STRUCT_FSET(strpeek, databuf.len, 0);
5109                 else {
5110                         int     data_part = 0;
5111 
5112                         iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf);
5113                         iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen);
5114                         uio.uio_iov = &iov;
5115                         uio.uio_resid = iov.iov_len;
5116                         uio.uio_loffset = 0;
5117                         uio.uio_iovcnt = 1;
5118                         while (mp && uio.uio_resid) {
5119                                 if (mp->b_datap->db_type == M_DATA) {
5120                                         if ((n = MIN(uio.uio_resid,
5121                                             mp->b_wptr - mp->b_rptr)) != 0 &&
5122                                             (error = uiomove((char *)mp->b_rptr,
5123                                             n, UIO_READ, &uio)) != 0) {
5124                                                 freemsg(tmp_mp);
5125                                                 return (error);
5126                                         }
5127                                         data_part = 1;
5128                                 }
5129                                 ASSERT(data_part == 0 ||
5130                                     mp->b_datap->db_type == M_DATA);
5131                                 mp = mp->b_cont;
5132                         }
5133                         /* No data message */
5134                         if (data_part == 0)
5135                                 STRUCT_FSET(strpeek, databuf.len, -1);
5136                         else
5137                                 STRUCT_FSET(strpeek, databuf.len,
5138                                     STRUCT_FGET(strpeek, databuf.maxlen) -
5139                                     uio.uio_resid);
5140                 }
5141                 freemsg(tmp_mp);
5142 
5143                 /*
5144                  * It is a synch stream and user wants to get
5145                  * data (maxlen > 0).
5146                  * uio setup is done by the codes that process DATA
5147                  * blocks above.
5148                  */
5149                 if ((fmp == NULL) && STRUCT_FGET(strpeek, databuf.maxlen) > 0) {
5150                         infod_t infod;
5151 
5152                         infod.d_cmd = INFOD_COPYOUT;
5153                         infod.d_res = 0;
5154                         infod.d_uiop = &uio;
5155                         error = infonext(rdq, &infod);
5156                         if (error == EINVAL || error == EBUSY)
5157                                 error = 0;
5158                         if (error)
5159                                 return (error);
5160                         STRUCT_FSET(strpeek, databuf.len, STRUCT_FGET(strpeek,
5161                             databuf.maxlen) - uio.uio_resid);
5162                         if (STRUCT_FGET(strpeek, databuf.len) == 0) {
5163                                 /*
5164                                  * No data found by the infonext().
5165                                  */
5166                                 STRUCT_FSET(strpeek, databuf.len, -1);
5167                         }
5168                 }
5169                 error = strcopyout(STRUCT_BUF(strpeek), (void *)arg,
5170                     STRUCT_SIZE(strpeek), copyflag);
5171                 if (error) {
5172                         return (error);
5173                 }
5174                 /*
5175                  * If there is no message retrieved, set return code to 0
5176                  * otherwise, set it to 1.
5177                  */
5178                 if (STRUCT_FGET(strpeek, ctlbuf.len) == -1 &&
5179                     STRUCT_FGET(strpeek, databuf.len) == -1)
5180                         *rvalp = 0;
5181                 else
5182                         *rvalp = 1;
5183                 return (0);
5184         }
5185 
5186         case I_FDINSERT:
5187         {
5188                 STRUCT_DECL(strfdinsert, strfdinsert);
5189                 struct file *resftp;
5190                 struct stdata *resstp;
5191                 t_uscalar_t     ival;
5192                 ssize_t msgsize;
5193                 struct strbuf mctl;
5194 
5195                 STRUCT_INIT(strfdinsert, flag);
5196                 if (stp->sd_flag & STRHUP)
5197                         return (ENXIO);
5198                 /*
5199                  * STRDERR, STWRERR and STPLEX tested above.
5200                  */
5201                 error = strcopyin((void *)arg, STRUCT_BUF(strfdinsert),
5202                     STRUCT_SIZE(strfdinsert), copyflag);
5203                 if (error)
5204                         return (error);
5205 
5206                 if (STRUCT_FGET(strfdinsert, offset) < 0 ||
5207                     (STRUCT_FGET(strfdinsert, offset) %
5208                     sizeof (t_uscalar_t)) != 0)
5209                         return (EINVAL);
5210                 if ((resftp = getf(STRUCT_FGET(strfdinsert, fildes))) != NULL) {
5211                         if ((resstp = resftp->f_vnode->v_stream) == NULL) {
5212                                 releasef(STRUCT_FGET(strfdinsert, fildes));
5213                                 return (EINVAL);
5214                         }
5215                 } else
5216                         return (EINVAL);
5217 
5218                 mutex_enter(&resstp->sd_lock);
5219                 if (resstp->sd_flag & (STRDERR|STWRERR|STRHUP|STPLEX)) {
5220                         error = strgeterr(resstp,
5221                             STRDERR|STWRERR|STRHUP|STPLEX, 0);
5222                         if (error != 0) {
5223                                 mutex_exit(&resstp->sd_lock);
5224                                 releasef(STRUCT_FGET(strfdinsert, fildes));
5225                                 return (error);
5226                         }
5227                 }
5228                 mutex_exit(&resstp->sd_lock);
5229 
5230 #ifdef  _ILP32
5231                 {
5232                         queue_t *q;
5233                         queue_t *mate = NULL;
5234 
5235                         /* get read queue of stream terminus */
5236                         claimstr(resstp->sd_wrq);
5237                         for (q = resstp->sd_wrq->q_next; q->q_next != NULL;
5238                             q = q->q_next)
5239                                 if (!STRMATED(resstp) && STREAM(q) != resstp &&
5240                                     mate == NULL) {
5241                                         ASSERT(q->q_qinfo->qi_srvp);
5242                                         ASSERT(_OTHERQ(q)->q_qinfo->qi_srvp);
5243                                         claimstr(q);
5244                                         mate = q;
5245                                 }
5246                         q = _RD(q);
5247                         if (mate)
5248                                 releasestr(mate);
5249                         releasestr(resstp->sd_wrq);
5250                         ival = (t_uscalar_t)q;
5251                 }
5252 #else
5253                 ival = (t_uscalar_t)getminor(resftp->f_vnode->v_rdev);
5254 #endif  /* _ILP32 */
5255 
5256                 if (STRUCT_FGET(strfdinsert, ctlbuf.len) <
5257                     STRUCT_FGET(strfdinsert, offset) + sizeof (t_uscalar_t)) {
5258                         releasef(STRUCT_FGET(strfdinsert, fildes));
5259                         return (EINVAL);
5260                 }
5261 
5262                 /*
5263                  * Check for legal flag value.
5264                  */
5265                 if (STRUCT_FGET(strfdinsert, flags) & ~RS_HIPRI) {
5266                         releasef(STRUCT_FGET(strfdinsert, fildes));
5267                         return (EINVAL);
5268                 }
5269 
5270                 /* get these values from those cached in the stream head */
5271                 mutex_enter(QLOCK(stp->sd_wrq));
5272                 rmin = stp->sd_qn_minpsz;
5273                 rmax = stp->sd_qn_maxpsz;
5274                 mutex_exit(QLOCK(stp->sd_wrq));
5275 
5276                 /*
5277                  * Make sure ctl and data sizes together fall within
5278                  * the limits of the max and min receive packet sizes
5279                  * and do not exceed system limit.  A negative data
5280                  * length means that no data part is to be sent.
5281                  */
5282                 ASSERT((rmax >= 0) || (rmax == INFPSZ));
5283                 if (rmax == 0) {
5284                         releasef(STRUCT_FGET(strfdinsert, fildes));
5285                         return (ERANGE);
5286                 }
5287                 if ((msgsize = STRUCT_FGET(strfdinsert, databuf.len)) < 0)
5288                         msgsize = 0;
5289                 if ((msgsize < rmin) ||
5290                     ((msgsize > rmax) && (rmax != INFPSZ)) ||
5291                     (STRUCT_FGET(strfdinsert, ctlbuf.len) > strctlsz)) {
5292                         releasef(STRUCT_FGET(strfdinsert, fildes));
5293                         return (ERANGE);
5294                 }
5295 
5296                 mutex_enter(&stp->sd_lock);
5297                 while (!(STRUCT_FGET(strfdinsert, flags) & RS_HIPRI) &&
5298                     !canputnext(stp->sd_wrq)) {
5299                         if ((error = strwaitq(stp, WRITEWAIT, (ssize_t)0,
5300                             flag, -1, &done)) != 0 || done) {
5301                                 mutex_exit(&stp->sd_lock);
5302                                 releasef(STRUCT_FGET(strfdinsert, fildes));
5303                                 return (error);
5304                         }
5305                         if ((error = i_straccess(stp, access)) != 0) {
5306                                 mutex_exit(&stp->sd_lock);
5307                                 releasef(
5308                                     STRUCT_FGET(strfdinsert, fildes));
5309                                 return (error);
5310                         }
5311                 }
5312                 mutex_exit(&stp->sd_lock);
5313 
5314                 /*
5315                  * Copy strfdinsert.ctlbuf into native form of
5316                  * ctlbuf to pass down into strmakemsg().
5317                  */
5318                 mctl.maxlen = STRUCT_FGET(strfdinsert, ctlbuf.maxlen);
5319                 mctl.len = STRUCT_FGET(strfdinsert, ctlbuf.len);
5320                 mctl.buf = STRUCT_FGETP(strfdinsert, ctlbuf.buf);
5321 
5322                 iov.iov_base = STRUCT_FGETP(strfdinsert, databuf.buf);
5323                 iov.iov_len = STRUCT_FGET(strfdinsert, databuf.len);
5324                 uio.uio_iov = &iov;
5325                 uio.uio_iovcnt = 1;
5326                 uio.uio_loffset = 0;
5327                 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
5328                     UIO_SYSSPACE;
5329                 uio.uio_fmode = 0;
5330                 uio.uio_extflg = UIO_COPY_CACHED;
5331                 uio.uio_resid = iov.iov_len;
5332                 if ((error = strmakemsg(&mctl,
5333                     &msgsize, &uio, stp,
5334                     STRUCT_FGET(strfdinsert, flags), &mp)) != 0 || !mp) {
5335                         STRUCT_FSET(strfdinsert, databuf.len, msgsize);
5336                         releasef(STRUCT_FGET(strfdinsert, fildes));
5337                         return (error);
5338                 }
5339 
5340                 STRUCT_FSET(strfdinsert, databuf.len, msgsize);
5341 
5342                 /*
5343                  * Place the possibly reencoded queue pointer 'offset' bytes
5344                  * from the start of the control portion of the message.
5345                  */
5346                 *((t_uscalar_t *)(mp->b_rptr +
5347                     STRUCT_FGET(strfdinsert, offset))) = ival;
5348 
5349                 /*
5350                  * Put message downstream.
5351                  */
5352                 stream_willservice(stp);
5353                 putnext(stp->sd_wrq, mp);
5354                 stream_runservice(stp);
5355                 releasef(STRUCT_FGET(strfdinsert, fildes));
5356                 return (error);
5357         }
5358 
5359         case I_SENDFD:
5360         {
5361                 struct file *fp;
5362 
5363                 if ((fp = getf((int)arg)) == NULL)
5364                         return (EBADF);
5365                 error = do_sendfp(stp, fp, crp);
5366                 if (auditing) {
5367                         audit_fdsend((int)arg, fp, error);
5368                 }
5369                 releasef((int)arg);
5370                 return (error);
5371         }
5372 
5373         case I_RECVFD:
5374         case I_E_RECVFD:
5375         {
5376                 struct k_strrecvfd *srf;
5377                 int i, fd;
5378 
5379                 mutex_enter(&stp->sd_lock);
5380                 while (!(mp = getq(rdq))) {
5381                         if (stp->sd_flag & (STRHUP|STREOF)) {
5382                                 mutex_exit(&stp->sd_lock);
5383                                 return (ENXIO);
5384                         }
5385                         if ((error = strwaitq(stp, GETWAIT, (ssize_t)0,
5386                             flag, -1, &done)) != 0 || done) {
5387                                 mutex_exit(&stp->sd_lock);
5388                                 return (error);
5389                         }
5390                         if ((error = i_straccess(stp, access)) != 0) {
5391                                 mutex_exit(&stp->sd_lock);
5392                                 return (error);
5393                         }
5394                 }
5395                 if (mp->b_datap->db_type != M_PASSFP) {
5396                         putback(stp, rdq, mp, mp->b_band);
5397                         mutex_exit(&stp->sd_lock);
5398                         return (EBADMSG);
5399                 }
5400                 mutex_exit(&stp->sd_lock);
5401 
5402                 srf = (struct k_strrecvfd *)mp->b_rptr;
5403                 if ((fd = ufalloc(0)) == -1) {
5404                         mutex_enter(&stp->sd_lock);
5405                         putback(stp, rdq, mp, mp->b_band);
5406                         mutex_exit(&stp->sd_lock);
5407                         return (EMFILE);
5408                 }
5409                 if (cmd == I_RECVFD) {
5410                         struct o_strrecvfd      ostrfd;
5411 
5412                         /* check to see if uid/gid values are too large. */
5413 
5414                         if (srf->uid > (o_uid_t)USHRT_MAX ||
5415                             srf->gid > (o_gid_t)USHRT_MAX) {
5416                                 mutex_enter(&stp->sd_lock);
5417                                 putback(stp, rdq, mp, mp->b_band);
5418                                 mutex_exit(&stp->sd_lock);
5419                                 setf(fd, NULL); /* release fd entry */
5420                                 return (EOVERFLOW);
5421                         }
5422 
5423                         ostrfd.fd = fd;
5424                         ostrfd.uid = (o_uid_t)srf->uid;
5425                         ostrfd.gid = (o_gid_t)srf->gid;
5426 
5427                         /* Null the filler bits */
5428                         for (i = 0; i < 8; i++)
5429                                 ostrfd.fill[i] = 0;
5430 
5431                         error = strcopyout(&ostrfd, (void *)arg,
5432                             sizeof (struct o_strrecvfd), copyflag);
5433                 } else {                /* I_E_RECVFD */
5434                         struct strrecvfd        strfd;
5435 
5436                         strfd.fd = fd;
5437                         strfd.uid = srf->uid;
5438                         strfd.gid = srf->gid;
5439 
5440                         /* null the filler bits */
5441                         for (i = 0; i < 8; i++)
5442                                 strfd.fill[i] = 0;
5443 
5444                         error = strcopyout(&strfd, (void *)arg,
5445                             sizeof (struct strrecvfd), copyflag);
5446                 }
5447 
5448                 if (error) {
5449                         setf(fd, NULL); /* release fd entry */
5450                         mutex_enter(&stp->sd_lock);
5451                         putback(stp, rdq, mp, mp->b_band);
5452                         mutex_exit(&stp->sd_lock);
5453                         return (error);
5454                 }
5455                 if (auditing) {
5456                         audit_fdrecv(fd, srf->fp);
5457                 }
5458 
5459                 /*
5460                  * Always increment f_count since the freemsg() below will
5461                  * always call free_passfp() which performs a closef().
5462                  */
5463                 mutex_enter(&srf->fp->f_tlock);
5464                 srf->fp->f_count++;
5465                 mutex_exit(&srf->fp->f_tlock);
5466                 setf(fd, srf->fp);
5467                 freemsg(mp);
5468                 return (0);
5469         }
5470 
5471         case I_SWROPT:
5472                 /*
5473                  * Set/clear the write options. arg is a bit
5474                  * mask with any of the following bits set...
5475                  *      SNDZERO - send zero length message
5476                  *      SNDPIPE - send sigpipe to process if
5477                  *              sd_werror is set and process is
5478                  *              doing a write or putmsg.
5479                  * The new stream head write options should reflect
5480                  * what is in arg.
5481                  */
5482                 if (arg & ~(SNDZERO|SNDPIPE))
5483                         return (EINVAL);
5484 
5485                 mutex_enter(&stp->sd_lock);
5486                 stp->sd_wput_opt &= ~(SW_SIGPIPE|SW_SNDZERO);
5487                 if (arg & SNDZERO)
5488                         stp->sd_wput_opt |= SW_SNDZERO;
5489                 if (arg & SNDPIPE)
5490                         stp->sd_wput_opt |= SW_SIGPIPE;
5491                 mutex_exit(&stp->sd_lock);
5492                 return (0);
5493 
5494         case I_GWROPT:
5495         {
5496                 int wropt = 0;
5497 
5498                 if (stp->sd_wput_opt & SW_SNDZERO)
5499                         wropt |= SNDZERO;
5500                 if (stp->sd_wput_opt & SW_SIGPIPE)
5501                         wropt |= SNDPIPE;
5502                 return (strcopyout(&wropt, (void *)arg, sizeof (wropt),
5503                     copyflag));
5504         }
5505 
5506         case I_LIST:
5507                 /*
5508                  * Returns all the modules found on this stream,
5509                  * upto the driver. If argument is NULL, return the
5510                  * number of modules (including driver). If argument
5511                  * is not NULL, copy the names into the structure
5512                  * provided.
5513                  */
5514 
5515         {
5516                 queue_t *q;
5517                 char *qname;
5518                 int i, nmods;
5519                 struct str_mlist *mlist;
5520                 STRUCT_DECL(str_list, strlist);
5521 
5522                 if (arg == 0) { /* Return number of modules plus driver */
5523                         if (stp->sd_vnode->v_type == VFIFO)
5524                                 *rvalp = stp->sd_pushcnt;
5525                         else
5526                                 *rvalp = stp->sd_pushcnt + 1;
5527                         return (0);
5528                 }
5529 
5530                 STRUCT_INIT(strlist, flag);
5531 
5532                 error = strcopyin((void *)arg, STRUCT_BUF(strlist),
5533                     STRUCT_SIZE(strlist), copyflag);
5534                 if (error != 0)
5535                         return (error);
5536 
5537                 mlist = STRUCT_FGETP(strlist, sl_modlist);
5538                 nmods = STRUCT_FGET(strlist, sl_nmods);
5539                 if (nmods <= 0)
5540                         return (EINVAL);
5541 
5542                 claimstr(stp->sd_wrq);
5543                 q = stp->sd_wrq;
5544                 for (i = 0; i < nmods && _SAMESTR(q); i++, q = q->q_next) {
5545                         qname = Q2NAME(q->q_next);
5546                         error = strcopyout(qname, &mlist[i], strlen(qname) + 1,
5547                             copyflag);
5548                         if (error != 0) {
5549                                 releasestr(stp->sd_wrq);
5550                                 return (error);
5551                         }
5552                 }
5553                 releasestr(stp->sd_wrq);
5554                 return (strcopyout(&i, (void *)arg, sizeof (int), copyflag));
5555         }
5556 
5557         case I_CKBAND:
5558         {
5559                 queue_t *q;
5560                 qband_t *qbp;
5561 
5562                 if ((arg < 0) || (arg >= NBAND))
5563                         return (EINVAL);
5564                 q = _RD(stp->sd_wrq);
5565                 mutex_enter(QLOCK(q));
5566                 if (arg > (int)q->q_nband) {
5567                         *rvalp = 0;
5568                 } else {
5569                         if (arg == 0) {
5570                                 if (q->q_first)
5571                                         *rvalp = 1;
5572                                 else
5573                                         *rvalp = 0;
5574                         } else {
5575                                 qbp = q->q_bandp;
5576                                 while (--arg > 0)
5577                                         qbp = qbp->qb_next;
5578                                 if (qbp->qb_first)
5579                                         *rvalp = 1;
5580                                 else
5581                                         *rvalp = 0;
5582                         }
5583                 }
5584                 mutex_exit(QLOCK(q));
5585                 return (0);
5586         }
5587 
5588         case I_GETBAND:
5589         {
5590                 int intpri;
5591                 queue_t *q;
5592 
5593                 q = _RD(stp->sd_wrq);
5594                 mutex_enter(QLOCK(q));
5595                 mp = q->q_first;
5596                 if (!mp) {
5597                         mutex_exit(QLOCK(q));
5598                         return (ENODATA);
5599                 }
5600                 intpri = (int)mp->b_band;
5601                 error = strcopyout(&intpri, (void *)arg, sizeof (int),
5602                     copyflag);
5603                 mutex_exit(QLOCK(q));
5604                 return (error);
5605         }
5606 
5607         case I_ATMARK:
5608         {
5609                 queue_t *q;
5610 
5611                 if (arg & ~(ANYMARK|LASTMARK))
5612                         return (EINVAL);
5613                 q = _RD(stp->sd_wrq);
5614                 mutex_enter(&stp->sd_lock);
5615                 if ((stp->sd_flag & STRATMARK) && (arg == ANYMARK)) {
5616                         *rvalp = 1;
5617                 } else {
5618                         mutex_enter(QLOCK(q));
5619                         mp = q->q_first;
5620 
5621                         if (mp == NULL)
5622                                 *rvalp = 0;
5623                         else if ((arg == ANYMARK) && (mp->b_flag & MSGMARK))
5624                                 *rvalp = 1;
5625                         else if ((arg == LASTMARK) && (mp == stp->sd_mark))
5626                                 *rvalp = 1;
5627                         else
5628                                 *rvalp = 0;
5629                         mutex_exit(QLOCK(q));
5630                 }
5631                 mutex_exit(&stp->sd_lock);
5632                 return (0);
5633         }
5634 
5635         case I_CANPUT:
5636         {
5637                 char band;
5638 
5639                 if ((arg < 0) || (arg >= NBAND))
5640                         return (EINVAL);
5641                 band = (char)arg;
5642                 *rvalp = bcanputnext(stp->sd_wrq, band);
5643                 return (0);
5644         }
5645 
5646         case I_SETCLTIME:
5647         {
5648                 int closetime;
5649 
5650                 error = strcopyin((void *)arg, &closetime, sizeof (int),
5651                     copyflag);
5652                 if (error)
5653                         return (error);
5654                 if (closetime < 0)
5655                         return (EINVAL);
5656 
5657                 stp->sd_closetime = closetime;
5658                 return (0);
5659         }
5660 
5661         case I_GETCLTIME:
5662         {
5663                 int closetime;
5664 
5665                 closetime = stp->sd_closetime;
5666                 return (strcopyout(&closetime, (void *)arg, sizeof (int),
5667                     copyflag));
5668         }
5669 
5670         case TIOCGSID:
5671         {
5672                 pid_t sid;
5673 
5674                 mutex_enter(&stp->sd_lock);
5675                 if (stp->sd_sidp == NULL) {
5676                         mutex_exit(&stp->sd_lock);
5677                         return (ENOTTY);
5678                 }
5679                 sid = stp->sd_sidp->pid_id;
5680                 mutex_exit(&stp->sd_lock);
5681                 return (strcopyout(&sid, (void *)arg, sizeof (pid_t),
5682                     copyflag));
5683         }
5684 
5685         case TIOCSPGRP:
5686         {
5687                 pid_t pgrp;
5688                 proc_t *q;
5689                 pid_t   sid, fg_pgid, bg_pgid;
5690 
5691                 if (error = strcopyin((void *)arg, &pgrp, sizeof (pid_t),
5692                     copyflag))
5693                         return (error);
5694                 mutex_enter(&stp->sd_lock);
5695                 mutex_enter(&pidlock);
5696                 if (stp->sd_sidp != ttoproc(curthread)->p_sessp->s_sidp) {
5697                         mutex_exit(&pidlock);
5698                         mutex_exit(&stp->sd_lock);
5699                         return (ENOTTY);
5700                 }
5701                 if (pgrp == stp->sd_pgidp->pid_id) {
5702                         mutex_exit(&pidlock);
5703                         mutex_exit(&stp->sd_lock);
5704                         return (0);
5705                 }
5706                 if (pgrp <= 0 || pgrp >= maxpid) {
5707                         mutex_exit(&pidlock);
5708                         mutex_exit(&stp->sd_lock);
5709                         return (EINVAL);
5710                 }
5711                 if ((q = pgfind(pgrp)) == NULL ||
5712                     q->p_sessp != ttoproc(curthread)->p_sessp) {
5713                         mutex_exit(&pidlock);
5714                         mutex_exit(&stp->sd_lock);
5715                         return (EPERM);
5716                 }
5717                 sid = stp->sd_sidp->pid_id;
5718                 fg_pgid = q->p_pgrp;
5719                 bg_pgid = stp->sd_pgidp->pid_id;
5720                 CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid);
5721                 PID_RELE(stp->sd_pgidp);
5722                 ctty_clear_sighuped();
5723                 stp->sd_pgidp = q->p_pgidp;
5724                 PID_HOLD(stp->sd_pgidp);
5725                 mutex_exit(&pidlock);
5726                 mutex_exit(&stp->sd_lock);
5727                 return (0);
5728         }
5729 
5730         case TIOCGPGRP:
5731         {
5732                 pid_t pgrp;
5733 
5734                 mutex_enter(&stp->sd_lock);
5735                 if (stp->sd_sidp == NULL) {
5736                         mutex_exit(&stp->sd_lock);
5737                         return (ENOTTY);
5738                 }
5739                 pgrp = stp->sd_pgidp->pid_id;
5740                 mutex_exit(&stp->sd_lock);
5741                 return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t),
5742                     copyflag));
5743         }
5744 
5745         case TIOCSCTTY:
5746         {
5747                 return (strctty(stp));
5748         }
5749 
5750         case TIOCNOTTY:
5751         {
5752                 /* freectty() always assumes curproc. */
5753                 if (freectty(B_FALSE) != 0)
5754                         return (0);
5755                 return (ENOTTY);
5756         }
5757 
5758         case FIONBIO:
5759         case FIOASYNC:
5760                 return (0);     /* handled by the upper layer */
5761         }
5762 }
5763 
5764 /*
5765  * Custom free routine used for M_PASSFP messages.
5766  */
5767 static void
5768 free_passfp(struct k_strrecvfd *srf)
5769 {
5770         (void) closef(srf->fp);
5771         kmem_free(srf, sizeof (struct k_strrecvfd) + sizeof (frtn_t));
5772 }
5773 
5774 /* ARGSUSED */
5775 int
5776 do_sendfp(struct stdata *stp, struct file *fp, struct cred *cr)
5777 {
5778         queue_t *qp, *nextqp;
5779         struct k_strrecvfd *srf;
5780         mblk_t *mp;
5781         frtn_t *frtnp;
5782         size_t bufsize;
5783         queue_t *mate = NULL;
5784         syncq_t *sq = NULL;
5785         int retval = 0;
5786 
5787         if (stp->sd_flag & STRHUP)
5788                 return (ENXIO);
5789 
5790         claimstr(stp->sd_wrq);
5791 
5792         /* Fastpath, we have a pipe, and we are already mated, use it. */
5793         if (STRMATED(stp)) {
5794                 qp = _RD(stp->sd_mate->sd_wrq);
5795                 claimstr(qp);
5796                 mate = qp;
5797         } else { /* Not already mated. */
5798 
5799                 /*
5800                  * Walk the stream to the end of this one.
5801                  * assumes that the claimstr() will prevent
5802                  * plumbing between the stream head and the
5803                  * driver from changing
5804                  */
5805                 qp = stp->sd_wrq;
5806 
5807                 /*
5808                  * Loop until we reach the end of this stream.
5809                  * On completion, qp points to the write queue
5810                  * at the end of the stream, or the read queue
5811                  * at the stream head if this is a fifo.
5812                  */
5813                 while (((qp = qp->q_next) != NULL) && _SAMESTR(qp))
5814                         ;
5815 
5816                 /*
5817                  * Just in case we get a q_next which is NULL, but
5818                  * not at the end of the stream.  This is actually
5819                  * broken, so we set an assert to catch it in
5820                  * debug, and set an error and return if not debug.
5821                  */
5822                 ASSERT(qp);
5823                 if (qp == NULL) {
5824                         releasestr(stp->sd_wrq);
5825                         return (EINVAL);
5826                 }
5827 
5828                 /*
5829                  * Enter the syncq for the driver, so (hopefully)
5830                  * the queue values will not change on us.
5831                  * XXXX - This will only prevent the race IFF only
5832                  *   the write side modifies the q_next member, and
5833                  *   the put procedure is protected by at least
5834                  *   MT_PERQ.
5835                  */
5836                 if ((sq = qp->q_syncq) != NULL)
5837                         entersq(sq, SQ_PUT);
5838 
5839                 /* Now get the q_next value from this qp. */
5840                 nextqp = qp->q_next;
5841 
5842                 /*
5843                  * If nextqp exists and the other stream is different
5844                  * from this one claim the stream, set the mate, and
5845                  * get the read queue at the stream head of the other
5846                  * stream.  Assumes that nextqp was at least valid when
5847                  * we got it.  Hopefully the entersq of the driver
5848                  * will prevent it from changing on us.
5849                  */
5850                 if ((nextqp != NULL) && (STREAM(nextqp) != stp)) {
5851                         ASSERT(qp->q_qinfo->qi_srvp);
5852                         ASSERT(_OTHERQ(qp)->q_qinfo->qi_srvp);
5853                         ASSERT(_OTHERQ(qp->q_next)->q_qinfo->qi_srvp);
5854                         claimstr(nextqp);
5855 
5856                         /* Make sure we still have a q_next */
5857                         if (nextqp != qp->q_next) {
5858                                 releasestr(stp->sd_wrq);
5859                                 releasestr(nextqp);
5860                                 return (EINVAL);
5861                         }
5862 
5863                         qp = _RD(STREAM(nextqp)->sd_wrq);
5864                         mate = qp;
5865                 }
5866                 /* If we entered the synq above, leave it. */
5867                 if (sq != NULL)
5868                         leavesq(sq, SQ_PUT);
5869         } /*  STRMATED(STP)  */
5870 
5871         /* XXX prevents substitution of the ops vector */
5872         if (qp->q_qinfo != &strdata && qp->q_qinfo != &fifo_strdata) {
5873                 retval = EINVAL;
5874                 goto out;
5875         }
5876 
5877         if (qp->q_flag & QFULL) {
5878                 retval = EAGAIN;
5879                 goto out;
5880         }
5881 
5882         /*
5883          * Since M_PASSFP messages include a file descriptor, we use
5884          * esballoc() and specify a custom free routine (free_passfp()) that
5885          * will close the descriptor as part of freeing the message.  For
5886          * convenience, we stash the frtn_t right after the data block.
5887          */
5888         bufsize = sizeof (struct k_strrecvfd) + sizeof (frtn_t);
5889         srf = kmem_alloc(bufsize, KM_NOSLEEP);
5890         if (srf == NULL) {
5891                 retval = EAGAIN;
5892                 goto out;
5893         }
5894 
5895         frtnp = (frtn_t *)(srf + 1);
5896         frtnp->free_arg = (caddr_t)srf;
5897         frtnp->free_func = free_passfp;
5898 
5899         mp = esballoc((uchar_t *)srf, bufsize, BPRI_MED, frtnp);
5900         if (mp == NULL) {
5901                 kmem_free(srf, bufsize);
5902                 retval = EAGAIN;
5903                 goto out;
5904         }
5905         mp->b_wptr += sizeof (struct k_strrecvfd);
5906         mp->b_datap->db_type = M_PASSFP;
5907 
5908         srf->fp = fp;
5909         srf->uid = crgetuid(curthread->t_cred);
5910         srf->gid = crgetgid(curthread->t_cred);
5911         mutex_enter(&fp->f_tlock);
5912         fp->f_count++;
5913         mutex_exit(&fp->f_tlock);
5914 
5915         put(qp, mp);
5916 out:
5917         releasestr(stp->sd_wrq);
5918         if (mate)
5919                 releasestr(mate);
5920         return (retval);
5921 }
5922 
5923 /*
5924  * Send an ioctl message downstream and wait for acknowledgement.
5925  * flags may be set to either U_TO_K or K_TO_K and a combination
5926  * of STR_NOERROR or STR_NOSIG
5927  * STR_NOSIG: Signals are essentially ignored or held and have
5928  *      no effect for the duration of the call.
5929  * STR_NOERROR: Ignores stream head read, write and hup errors.
5930  *      Additionally, if an existing ioctl times out, it is assumed
5931  *      lost and and this ioctl will continue as if the previous ioctl had
5932  *      finished.  ETIME may be returned if this ioctl times out (i.e.
5933  *      ic_timout is not INFTIM).  Non-stream head errors may be returned if
5934  *      the ioc_error indicates that the driver/module had problems,
5935  *      an EFAULT was found when accessing user data, a lack of
5936  *      resources, etc.
5937  */
5938 int
5939 strdoioctl(
5940         struct stdata *stp,
5941         struct strioctl *strioc,
5942         int fflags,             /* file flags with model info */
5943         int flag,
5944         cred_t *crp,
5945         int *rvalp)
5946 {
5947         mblk_t *bp;
5948         struct iocblk *iocbp;
5949         struct copyreq *reqp;
5950         struct copyresp *resp;
5951         int id;
5952         int transparent = 0;
5953         int error = 0;
5954         int len = 0;
5955         caddr_t taddr;
5956         int copyflag = (flag & (U_TO_K | K_TO_K));
5957         int sigflag = (flag & STR_NOSIG);
5958         int errs;
5959         uint_t waitflags;
5960         boolean_t set_iocwaitne = B_FALSE;
5961 
5962         ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
5963         ASSERT((fflags & FMODELS) != 0);
5964 
5965         TRACE_2(TR_FAC_STREAMS_FR,
5966             TR_STRDOIOCTL,
5967             "strdoioctl:stp %p strioc %p", stp, strioc);
5968         if (strioc->ic_len == TRANSPARENT) { /* send arg in M_DATA block */
5969                 transparent = 1;
5970                 strioc->ic_len = sizeof (intptr_t);
5971         }
5972 
5973         if (strioc->ic_len < 0 || (strmsgsz > 0 && strioc->ic_len > strmsgsz))
5974                 return (EINVAL);
5975 
5976         if ((bp = allocb_cred_wait(sizeof (union ioctypes), sigflag, &error,
5977             crp, curproc->p_pid)) == NULL)
5978                         return (error);
5979 
5980         bzero(bp->b_wptr, sizeof (union ioctypes));
5981 
5982         iocbp = (struct iocblk *)bp->b_wptr;
5983         iocbp->ioc_count = strioc->ic_len;
5984         iocbp->ioc_cmd = strioc->ic_cmd;
5985         iocbp->ioc_flag = (fflags & FMODELS);
5986 
5987         crhold(crp);
5988         iocbp->ioc_cr = crp;
5989         DB_TYPE(bp) = M_IOCTL;
5990         bp->b_wptr += sizeof (struct iocblk);
5991 
5992         if (flag & STR_NOERROR)
5993                 errs = STPLEX;
5994         else
5995                 errs = STRHUP|STRDERR|STWRERR|STPLEX;
5996 
5997         /*
5998          * If there is data to copy into ioctl block, do so.
5999          */
6000         if (iocbp->ioc_count > 0) {
6001                 if (transparent)
6002                         /*
6003                          * Note: STR_NOERROR does not have an effect
6004                          * in putiocd()
6005                          */
6006                         id = K_TO_K | sigflag;
6007                 else
6008                         id = flag;
6009                 if ((error = putiocd(bp, strioc->ic_dp, id, crp)) != 0) {
6010                         freemsg(bp);
6011                         crfree(crp);
6012                         return (error);
6013                 }
6014 
6015                 /*
6016                  * We could have slept copying in user pages.
6017                  * Recheck the stream head state (the other end
6018                  * of a pipe could have gone away).
6019                  */
6020                 if (stp->sd_flag & errs) {
6021                         mutex_enter(&stp->sd_lock);
6022                         error = strgeterr(stp, errs, 0);
6023                         mutex_exit(&stp->sd_lock);
6024                         if (error != 0) {
6025                                 freemsg(bp);
6026                                 crfree(crp);
6027                                 return (error);
6028                         }
6029                 }
6030         }
6031         if (transparent)
6032                 iocbp->ioc_count = TRANSPARENT;
6033 
6034         /*
6035          * Block for up to STRTIMOUT milliseconds if there is an outstanding
6036          * ioctl for this stream already running.  All processes
6037          * sleeping here will be awakened as a result of an ACK
6038          * or NAK being received for the outstanding ioctl, or
6039          * as a result of the timer expiring on the outstanding
6040          * ioctl (a failure), or as a result of any waiting
6041          * process's timer expiring (also a failure).
6042          */
6043 
6044         error = 0;
6045         mutex_enter(&stp->sd_lock);
6046         while ((stp->sd_flag & IOCWAIT) ||
6047             (!set_iocwaitne && (stp->sd_flag & IOCWAITNE))) {
6048                 clock_t cv_rval;
6049 
6050                 TRACE_0(TR_FAC_STREAMS_FR,
6051                     TR_STRDOIOCTL_WAIT,
6052                     "strdoioctl sleeps - IOCWAIT");
6053                 cv_rval = str_cv_wait(&stp->sd_iocmonitor, &stp->sd_lock,
6054                     STRTIMOUT, sigflag);
6055                 if (cv_rval <= 0) {
6056                         if (cv_rval == 0) {
6057                                 error = EINTR;
6058                         } else {
6059                                 if (flag & STR_NOERROR) {
6060                                         /*
6061                                          * Terminating current ioctl in
6062                                          * progress -- assume it got lost and
6063                                          * wake up the other thread so that the
6064                                          * operation completes.
6065                                          */
6066                                         if (!(stp->sd_flag & IOCWAITNE)) {
6067                                                 set_iocwaitne = B_TRUE;
6068                                                 stp->sd_flag |= IOCWAITNE;
6069                                                 cv_broadcast(&stp->sd_monitor);
6070                                         }
6071                                         /*
6072                                          * Otherwise, there's a running
6073                                          * STR_NOERROR -- we have no choice
6074                                          * here but to wait forever (or until
6075                                          * interrupted).
6076                                          */
6077                                 } else {
6078                                         /*
6079                                          * pending ioctl has caused
6080                                          * us to time out
6081                                          */
6082                                         error = ETIME;
6083                                 }
6084                         }
6085                 } else if ((stp->sd_flag & errs)) {
6086                         error = strgeterr(stp, errs, 0);
6087                 }
6088                 if (error) {
6089                         mutex_exit(&stp->sd_lock);
6090                         freemsg(bp);
6091                         crfree(crp);
6092                         return (error);
6093                 }
6094         }
6095 
6096         /*
6097          * Have control of ioctl mechanism.
6098          * Send down ioctl packet and wait for response.
6099          */
6100         if (stp->sd_iocblk != (mblk_t *)-1) {
6101                 freemsg(stp->sd_iocblk);
6102         }
6103         stp->sd_iocblk = NULL;
6104 
6105         /*
6106          * If this is marked with 'noerror' (internal; mostly
6107          * I_{P,}{UN,}LINK), then make sure nobody else is able to get
6108          * in here by setting IOCWAITNE.
6109          */
6110         waitflags = IOCWAIT;
6111         if (flag & STR_NOERROR)
6112                 waitflags |= IOCWAITNE;
6113 
6114         stp->sd_flag |= waitflags;
6115 
6116         /*
6117          * Assign sequence number.
6118          */
6119         iocbp->ioc_id = stp->sd_iocid = getiocseqno();
6120 
6121         mutex_exit(&stp->sd_lock);
6122 
6123         TRACE_1(TR_FAC_STREAMS_FR,
6124             TR_STRDOIOCTL_PUT, "strdoioctl put: stp %p", stp);
6125         stream_willservice(stp);
6126         putnext(stp->sd_wrq, bp);
6127         stream_runservice(stp);
6128 
6129         /*
6130          * Timed wait for acknowledgment.  The wait time is limited by the
6131          * timeout value, which must be a positive integer (number of
6132          * milliseconds) to wait, or 0 (use default value of STRTIMOUT
6133          * milliseconds), or -1 (wait forever).  This will be awakened
6134          * either by an ACK/NAK message arriving, the timer expiring, or
6135          * the timer expiring on another ioctl waiting for control of the
6136          * mechanism.
6137          */
6138 waitioc:
6139         mutex_enter(&stp->sd_lock);
6140 
6141 
6142         /*
6143          * If the reply has already arrived, don't sleep.  If awakened from
6144          * the sleep, fail only if the reply has not arrived by then.
6145          * Otherwise, process the reply.
6146          */
6147         while (!stp->sd_iocblk) {
6148                 clock_t cv_rval;
6149 
6150                 if (stp->sd_flag & errs) {
6151                         error = strgeterr(stp, errs, 0);
6152                         if (error != 0) {
6153                                 stp->sd_flag &= ~waitflags;
6154                                 cv_broadcast(&stp->sd_iocmonitor);
6155                                 mutex_exit(&stp->sd_lock);
6156                                 crfree(crp);
6157                                 return (error);
6158                         }
6159                 }
6160 
6161                 TRACE_0(TR_FAC_STREAMS_FR,
6162                     TR_STRDOIOCTL_WAIT2,
6163                     "strdoioctl sleeps awaiting reply");
6164                 ASSERT(error == 0);
6165 
6166                 cv_rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock,
6167                     (strioc->ic_timout ?
6168                     strioc->ic_timout * 1000 : STRTIMOUT), sigflag);
6169 
6170                 /*
6171                  * There are four possible cases here: interrupt, timeout,
6172                  * wakeup by IOCWAITNE (above), or wakeup by strrput_nondata (a
6173                  * valid M_IOCTL reply).
6174                  *
6175                  * If we've been awakened by a STR_NOERROR ioctl on some other
6176                  * thread, then sd_iocblk will still be NULL, and IOCWAITNE
6177                  * will be set.  Pretend as if we just timed out.  Note that
6178                  * this other thread waited at least STRTIMOUT before trying to
6179                  * awaken our thread, so this is indistinguishable (even for
6180                  * INFTIM) from the case where we failed with ETIME waiting on
6181                  * IOCWAIT in the prior loop.
6182                  */
6183                 if (cv_rval > 0 && !(flag & STR_NOERROR) &&
6184                     stp->sd_iocblk == NULL && (stp->sd_flag & IOCWAITNE)) {
6185                         cv_rval = -1;
6186                 }
6187 
6188                 /*
6189                  * note: STR_NOERROR does not protect
6190                  * us here.. use ic_timout < 0
6191                  */
6192                 if (cv_rval <= 0) {
6193                         if (cv_rval == 0) {
6194                                 error = EINTR;
6195                         } else {
6196                                 error =  ETIME;
6197                         }
6198                         /*
6199                          * A message could have come in after we were scheduled
6200                          * but before we were actually run.
6201                          */
6202                         bp = stp->sd_iocblk;
6203                         stp->sd_iocblk = NULL;
6204                         if (bp != NULL) {
6205                                 if ((bp->b_datap->db_type == M_COPYIN) ||
6206                                     (bp->b_datap->db_type == M_COPYOUT)) {
6207                                         mutex_exit(&stp->sd_lock);
6208                                         if (bp->b_cont) {
6209                                                 freemsg(bp->b_cont);
6210                                                 bp->b_cont = NULL;
6211                                         }
6212                                         bp->b_datap->db_type = M_IOCDATA;
6213                                         bp->b_wptr = bp->b_rptr +
6214                                             sizeof (struct copyresp);
6215                                         resp = (struct copyresp *)bp->b_rptr;
6216                                         resp->cp_rval =
6217                                             (caddr_t)1; /* failure */
6218                                         stream_willservice(stp);
6219                                         putnext(stp->sd_wrq, bp);
6220                                         stream_runservice(stp);
6221                                         mutex_enter(&stp->sd_lock);
6222                                 } else {
6223                                         freemsg(bp);
6224                                 }
6225                         }
6226                         stp->sd_flag &= ~waitflags;
6227                         cv_broadcast(&stp->sd_iocmonitor);
6228                         mutex_exit(&stp->sd_lock);
6229                         crfree(crp);
6230                         return (error);
6231                 }
6232         }
6233         bp = stp->sd_iocblk;
6234         /*
6235          * Note: it is strictly impossible to get here with sd_iocblk set to
6236          * -1.  This is because the initial loop above doesn't allow any new
6237          * ioctls into the fray until all others have passed this point.
6238          */
6239         ASSERT(bp != NULL && bp != (mblk_t *)-1);
6240         TRACE_1(TR_FAC_STREAMS_FR,
6241             TR_STRDOIOCTL_ACK, "strdoioctl got reply: bp %p", bp);
6242         if ((bp->b_datap->db_type == M_IOCACK) ||
6243             (bp->b_datap->db_type == M_IOCNAK)) {
6244                 /* for detection of duplicate ioctl replies */
6245                 stp->sd_iocblk = (mblk_t *)-1;
6246                 stp->sd_flag &= ~waitflags;
6247                 cv_broadcast(&stp->sd_iocmonitor);
6248                 mutex_exit(&stp->sd_lock);
6249         } else {
6250                 /*
6251                  * flags not cleared here because we're still doing
6252                  * copy in/out for ioctl.
6253                  */
6254                 stp->sd_iocblk = NULL;
6255                 mutex_exit(&stp->sd_lock);
6256         }
6257 
6258 
6259         /*
6260          * Have received acknowledgment.
6261          */
6262 
6263         switch (bp->b_datap->db_type) {
6264         case M_IOCACK:
6265                 /*
6266                  * Positive ack.
6267                  */
6268                 iocbp = (struct iocblk *)bp->b_rptr;
6269 
6270                 /*
6271                  * Set error if indicated.
6272                  */
6273                 if (iocbp->ioc_error) {
6274                         error = iocbp->ioc_error;
6275                         break;
6276                 }
6277 
6278                 /*
6279                  * Set return value.
6280                  */
6281                 *rvalp = iocbp->ioc_rval;
6282 
6283                 /*
6284                  * Data may have been returned in ACK message (ioc_count > 0).
6285                  * If so, copy it out to the user's buffer.
6286                  */
6287                 if (iocbp->ioc_count && !transparent) {
6288                         if (error = getiocd(bp, strioc->ic_dp, copyflag))
6289                                 break;
6290                 }
6291                 if (!transparent) {
6292                         if (len)        /* an M_COPYOUT was used with I_STR */
6293                                 strioc->ic_len = len;
6294                         else
6295                                 strioc->ic_len = (int)iocbp->ioc_count;
6296                 }
6297                 break;
6298 
6299         case M_IOCNAK:
6300                 /*
6301                  * Negative ack.
6302                  *
6303                  * The only thing to do is set error as specified
6304                  * in neg ack packet.
6305                  */
6306                 iocbp = (struct iocblk *)bp->b_rptr;
6307 
6308                 error = (iocbp->ioc_error ? iocbp->ioc_error : EINVAL);
6309                 break;
6310 
6311         case M_COPYIN:
6312                 /*
6313                  * Driver or module has requested user ioctl data.
6314                  */
6315                 reqp = (struct copyreq *)bp->b_rptr;
6316 
6317                 /*
6318                  * M_COPYIN should *never* have a message attached, though
6319                  * it's harmless if it does -- thus, panic on a DEBUG
6320                  * kernel and just free it on a non-DEBUG build.
6321                  */
6322                 ASSERT(bp->b_cont == NULL);
6323                 if (bp->b_cont != NULL) {
6324                         freemsg(bp->b_cont);
6325                         bp->b_cont = NULL;
6326                 }
6327 
6328                 error = putiocd(bp, reqp->cq_addr, flag, crp);
6329                 if (error && bp->b_cont) {
6330                         freemsg(bp->b_cont);
6331                         bp->b_cont = NULL;
6332                 }
6333 
6334                 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
6335                 bp->b_datap->db_type = M_IOCDATA;
6336 
6337                 mblk_setcred(bp, crp, curproc->p_pid);
6338                 resp = (struct copyresp *)bp->b_rptr;
6339                 resp->cp_rval = (caddr_t)(uintptr_t)error;
6340                 resp->cp_flag = (fflags & FMODELS);
6341 
6342                 stream_willservice(stp);
6343                 putnext(stp->sd_wrq, bp);
6344                 stream_runservice(stp);
6345 
6346                 if (error) {
6347                         mutex_enter(&stp->sd_lock);
6348                         stp->sd_flag &= ~waitflags;
6349                         cv_broadcast(&stp->sd_iocmonitor);
6350                         mutex_exit(&stp->sd_lock);
6351                         crfree(crp);
6352                         return (error);
6353                 }
6354 
6355                 goto waitioc;
6356 
6357         case M_COPYOUT:
6358                 /*
6359                  * Driver or module has ioctl data for a user.
6360                  */
6361                 reqp = (struct copyreq *)bp->b_rptr;
6362                 ASSERT(bp->b_cont != NULL);
6363 
6364                 /*
6365                  * Always (transparent or non-transparent )
6366                  * use the address specified in the request
6367                  */
6368                 taddr = reqp->cq_addr;
6369                 if (!transparent)
6370                         len = (int)reqp->cq_size;
6371 
6372                 /* copyout data to the provided address */
6373                 error = getiocd(bp, taddr, copyflag);
6374 
6375                 freemsg(bp->b_cont);
6376                 bp->b_cont = NULL;
6377 
6378                 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
6379                 bp->b_datap->db_type = M_IOCDATA;
6380 
6381                 mblk_setcred(bp, crp, curproc->p_pid);
6382                 resp = (struct copyresp *)bp->b_rptr;
6383                 resp->cp_rval = (caddr_t)(uintptr_t)error;
6384                 resp->cp_flag = (fflags & FMODELS);
6385 
6386                 stream_willservice(stp);
6387                 putnext(stp->sd_wrq, bp);
6388                 stream_runservice(stp);
6389 
6390                 if (error) {
6391                         mutex_enter(&stp->sd_lock);
6392                         stp->sd_flag &= ~waitflags;
6393                         cv_broadcast(&stp->sd_iocmonitor);
6394                         mutex_exit(&stp->sd_lock);
6395                         crfree(crp);
6396                         return (error);
6397                 }
6398                 goto waitioc;
6399 
6400         default:
6401                 ASSERT(0);
6402                 mutex_enter(&stp->sd_lock);
6403                 stp->sd_flag &= ~waitflags;
6404                 cv_broadcast(&stp->sd_iocmonitor);
6405                 mutex_exit(&stp->sd_lock);
6406                 break;
6407         }
6408 
6409         freemsg(bp);
6410         crfree(crp);
6411         return (error);
6412 }
6413 
6414 /*
6415  * Send an M_CMD message downstream and wait for a reply.  This is a ptools
6416  * special used to retrieve information from modules/drivers a stream without
6417  * being subjected to flow control or interfering with pending messages on the
6418  * stream (e.g. an ioctl in flight).
6419  */
6420 int
6421 strdocmd(struct stdata *stp, struct strcmd *scp, cred_t *crp)
6422 {
6423         mblk_t *mp;
6424         struct cmdblk *cmdp;
6425         int error = 0;
6426         int errs = STRHUP|STRDERR|STWRERR|STPLEX;
6427         clock_t rval, timeout = STRTIMOUT;
6428 
6429         if (scp->sc_len < 0 || scp->sc_len > sizeof (scp->sc_buf) ||
6430             scp->sc_timeout < -1)
6431                 return (EINVAL);
6432 
6433         if (scp->sc_timeout > 0)
6434                 timeout = scp->sc_timeout * MILLISEC;
6435 
6436         if ((mp = allocb_cred(sizeof (struct cmdblk), crp,
6437             curproc->p_pid)) == NULL)
6438                 return (ENOMEM);
6439 
6440         crhold(crp);
6441 
6442         cmdp = (struct cmdblk *)mp->b_wptr;
6443         cmdp->cb_cr = crp;
6444         cmdp->cb_cmd = scp->sc_cmd;
6445         cmdp->cb_len = scp->sc_len;
6446         cmdp->cb_error = 0;
6447         mp->b_wptr += sizeof (struct cmdblk);
6448 
6449         DB_TYPE(mp) = M_CMD;
6450         DB_CPID(mp) = curproc->p_pid;
6451 
6452         /*
6453          * Copy in the payload.
6454          */
6455         if (cmdp->cb_len > 0) {
6456                 mp->b_cont = allocb_cred(sizeof (scp->sc_buf), crp,
6457                     curproc->p_pid);
6458                 if (mp->b_cont == NULL) {
6459                         error = ENOMEM;
6460                         goto out;
6461                 }
6462 
6463                 /* cb_len comes from sc_len, which has already been checked */
6464                 ASSERT(cmdp->cb_len <= sizeof (scp->sc_buf));
6465                 (void) bcopy(scp->sc_buf, mp->b_cont->b_wptr, cmdp->cb_len);
6466                 mp->b_cont->b_wptr += cmdp->cb_len;
6467                 DB_CPID(mp->b_cont) = curproc->p_pid;
6468         }
6469 
6470         /*
6471          * Since this mechanism is strictly for ptools, and since only one
6472          * process can be grabbed at a time, we simply fail if there's
6473          * currently an operation pending.
6474          */
6475         mutex_enter(&stp->sd_lock);
6476         if (stp->sd_flag & STRCMDWAIT) {
6477                 mutex_exit(&stp->sd_lock);
6478                 error = EBUSY;
6479                 goto out;
6480         }
6481         stp->sd_flag |= STRCMDWAIT;
6482         ASSERT(stp->sd_cmdblk == NULL);
6483         mutex_exit(&stp->sd_lock);
6484 
6485         putnext(stp->sd_wrq, mp);
6486         mp = NULL;
6487 
6488         /*
6489          * Timed wait for acknowledgment.  If the reply has already arrived,
6490          * don't sleep.  If awakened from the sleep, fail only if the reply
6491          * has not arrived by then.  Otherwise, process the reply.
6492          */
6493         mutex_enter(&stp->sd_lock);
6494         while (stp->sd_cmdblk == NULL) {
6495                 if (stp->sd_flag & errs) {
6496                         if ((error = strgeterr(stp, errs, 0)) != 0)
6497                                 goto waitout;
6498                 }
6499 
6500                 rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, timeout, 0);
6501                 if (stp->sd_cmdblk != NULL)
6502                         break;
6503 
6504                 if (rval <= 0) {
6505                         error = (rval == 0) ? EINTR : ETIME;
6506                         goto waitout;
6507                 }
6508         }
6509 
6510         /*
6511          * We received a reply.
6512          */
6513         mp = stp->sd_cmdblk;
6514         stp->sd_cmdblk = NULL;
6515         ASSERT(mp != NULL && DB_TYPE(mp) == M_CMD);
6516         ASSERT(stp->sd_flag & STRCMDWAIT);
6517         stp->sd_flag &= ~STRCMDWAIT;
6518         mutex_exit(&stp->sd_lock);
6519 
6520         cmdp = (struct cmdblk *)mp->b_rptr;
6521         if ((error = cmdp->cb_error) != 0)
6522                 goto out;
6523 
6524         /*
6525          * Data may have been returned in the reply (cb_len > 0).
6526          * If so, copy it out to the user's buffer.
6527          */
6528         if (cmdp->cb_len > 0) {
6529                 if (mp->b_cont == NULL || MBLKL(mp->b_cont) < cmdp->cb_len) {
6530                         error = EPROTO;
6531                         goto out;
6532                 }
6533 
6534                 cmdp->cb_len = MIN(cmdp->cb_len, sizeof (scp->sc_buf));
6535                 (void) bcopy(mp->b_cont->b_rptr, scp->sc_buf, cmdp->cb_len);
6536         }
6537         scp->sc_len = cmdp->cb_len;
6538 out:
6539         freemsg(mp);
6540         crfree(crp);
6541         return (error);
6542 waitout:
6543         ASSERT(stp->sd_cmdblk == NULL);
6544         stp->sd_flag &= ~STRCMDWAIT;
6545         mutex_exit(&stp->sd_lock);
6546         crfree(crp);
6547         return (error);
6548 }
6549 
6550 /*
6551  * For the SunOS keyboard driver.
6552  * Return the next available "ioctl" sequence number.
6553  * Exported, so that streams modules can send "ioctl" messages
6554  * downstream from their open routine.
6555  */
6556 int
6557 getiocseqno(void)
6558 {
6559         int     i;
6560 
6561         mutex_enter(&strresources);
6562         i = ++ioc_id;
6563         mutex_exit(&strresources);
6564         return (i);
6565 }
6566 
6567 /*
6568  * Get the next message from the read queue.  If the message is
6569  * priority, STRPRI will have been set by strrput().  This flag
6570  * should be reset only when the entire message at the front of the
6571  * queue as been consumed.
6572  *
6573  * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
6574  */
6575 int
6576 strgetmsg(
6577         struct vnode *vp,
6578         struct strbuf *mctl,
6579         struct strbuf *mdata,
6580         unsigned char *prip,
6581         int *flagsp,
6582         int fmode,
6583         rval_t *rvp)
6584 {
6585         struct stdata *stp;
6586         mblk_t *bp, *nbp;
6587         mblk_t *savemp = NULL;
6588         mblk_t *savemptail = NULL;
6589         uint_t old_sd_flag;
6590         int flg = MSG_BAND;
6591         int more = 0;
6592         int error = 0;
6593         char first = 1;
6594         uint_t mark;            /* Contains MSG*MARK and _LASTMARK */
6595 #define _LASTMARK       0x8000  /* Distinct from MSG*MARK */
6596         unsigned char pri = 0;
6597         queue_t *q;
6598         int     pr = 0;                 /* Partial read successful */
6599         struct uio uios;
6600         struct uio *uiop = &uios;
6601         struct iovec iovs;
6602         unsigned char type;
6603 
6604         TRACE_1(TR_FAC_STREAMS_FR, TR_STRGETMSG_ENTER,
6605             "strgetmsg:%p", vp);
6606 
6607         ASSERT(vp->v_stream);
6608         stp = vp->v_stream;
6609         rvp->r_val1 = 0;
6610 
6611         mutex_enter(&stp->sd_lock);
6612 
6613         if ((error = i_straccess(stp, JCREAD)) != 0) {
6614                 mutex_exit(&stp->sd_lock);
6615                 return (error);
6616         }
6617 
6618         if (stp->sd_flag & (STRDERR|STPLEX)) {
6619                 error = strgeterr(stp, STRDERR|STPLEX, 0);
6620                 if (error != 0) {
6621                         mutex_exit(&stp->sd_lock);
6622                         return (error);
6623                 }
6624         }
6625         mutex_exit(&stp->sd_lock);
6626 
6627         switch (*flagsp) {
6628         case MSG_HIPRI:
6629                 if (*prip != 0)
6630                         return (EINVAL);
6631                 break;
6632 
6633         case MSG_ANY:
6634         case MSG_BAND:
6635                 break;
6636 
6637         default:
6638                 return (EINVAL);
6639         }
6640         /*
6641          * Setup uio and iov for data part
6642          */
6643         iovs.iov_base = mdata->buf;
6644         iovs.iov_len = mdata->maxlen;
6645         uios.uio_iov = &iovs;
6646         uios.uio_iovcnt = 1;
6647         uios.uio_loffset = 0;
6648         uios.uio_segflg = UIO_USERSPACE;
6649         uios.uio_fmode = 0;
6650         uios.uio_extflg = UIO_COPY_CACHED;
6651         uios.uio_resid = mdata->maxlen;
6652         uios.uio_offset = 0;
6653 
6654         q = _RD(stp->sd_wrq);
6655         mutex_enter(&stp->sd_lock);
6656         old_sd_flag = stp->sd_flag;
6657         mark = 0;
6658         for (;;) {
6659                 int done = 0;
6660                 mblk_t *q_first = q->q_first;
6661 
6662                 /*
6663                  * Get the next message of appropriate priority
6664                  * from the stream head.  If the caller is interested
6665                  * in band or hipri messages, then they should already
6666                  * be enqueued at the stream head.  On the other hand
6667                  * if the caller wants normal (band 0) messages, they
6668                  * might be deferred in a synchronous stream and they
6669                  * will need to be pulled up.
6670                  *
6671                  * After we have dequeued a message, we might find that
6672                  * it was a deferred M_SIG that was enqueued at the
6673                  * stream head.  It must now be posted as part of the
6674                  * read by calling strsignal_nolock().
6675                  *
6676                  * Also note that strrput does not enqueue an M_PCSIG,
6677                  * and there cannot be more than one hipri message,
6678                  * so there was no need to have the M_PCSIG case.
6679                  *
6680                  * At some time it might be nice to try and wrap the
6681                  * functionality of kstrgetmsg() and strgetmsg() into
6682                  * a common routine so to reduce the amount of replicated
6683                  * code (since they are extremely similar).
6684                  */
6685                 if (!(*flagsp & (MSG_HIPRI|MSG_BAND))) {
6686                         /* Asking for normal, band0 data */
6687                         bp = strget(stp, q, uiop, first, &error);
6688                         ASSERT(MUTEX_HELD(&stp->sd_lock));
6689                         if (bp != NULL) {
6690                                 if (DB_TYPE(bp) == M_SIG) {
6691                                         strsignal_nolock(stp, *bp->b_rptr,
6692                                             bp->b_band);
6693                                         freemsg(bp);
6694                                         continue;
6695                                 } else {
6696                                         break;
6697                                 }
6698                         }
6699                         if (error != 0)
6700                                 goto getmout;
6701 
6702                 /*
6703                  * We can't depend on the value of STRPRI here because
6704                  * the stream head may be in transit. Therefore, we
6705                  * must look at the type of the first message to
6706                  * determine if a high priority messages is waiting
6707                  */
6708                 } else if ((*flagsp & MSG_HIPRI) && q_first != NULL &&
6709                     DB_TYPE(q_first) >= QPCTL &&
6710                     (bp = getq_noenab(q, 0)) != NULL) {
6711                         /* Asked for HIPRI and got one */
6712                         ASSERT(DB_TYPE(bp) >= QPCTL);
6713                         break;
6714                 } else if ((*flagsp & MSG_BAND) && q_first != NULL &&
6715                     ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) &&
6716                     (bp = getq_noenab(q, 0)) != NULL) {
6717                         /*
6718                          * Asked for at least band "prip" and got either at
6719                          * least that band or a hipri message.
6720                          */
6721                         ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL);
6722                         if (DB_TYPE(bp) == M_SIG) {
6723                                 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
6724                                 freemsg(bp);
6725                                 continue;
6726                         } else {
6727                                 break;
6728                         }
6729                 }
6730 
6731                 /* No data. Time to sleep? */
6732                 qbackenable(q, 0);
6733 
6734                 /*
6735                  * If STRHUP or STREOF, return 0 length control and data.
6736                  * If resid is 0, then a read(fd,buf,0) was done. Do not
6737                  * sleep to satisfy this request because by default we have
6738                  * zero bytes to return.
6739                  */
6740                 if ((stp->sd_flag & (STRHUP|STREOF)) || (mctl->maxlen == 0 &&
6741                     mdata->maxlen == 0)) {
6742                         mctl->len = mdata->len = 0;
6743                         *flagsp = 0;
6744                         mutex_exit(&stp->sd_lock);
6745                         return (0);
6746                 }
6747                 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_WAIT,
6748                     "strgetmsg calls strwaitq:%p, %p",
6749                     vp, uiop);
6750                 if (((error = strwaitq(stp, GETWAIT, (ssize_t)0, fmode, -1,
6751                     &done)) != 0) || done) {
6752                         TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_DONE,
6753                             "strgetmsg error or done:%p, %p",
6754                             vp, uiop);
6755                         mutex_exit(&stp->sd_lock);
6756                         return (error);
6757                 }
6758                 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE,
6759                     "strgetmsg awakes:%p, %p", vp, uiop);
6760                 if ((error = i_straccess(stp, JCREAD)) != 0) {
6761                         mutex_exit(&stp->sd_lock);
6762                         return (error);
6763                 }
6764                 first = 0;
6765         }
6766         ASSERT(bp != NULL);
6767         /*
6768          * Extract any mark information. If the message is not completely
6769          * consumed this information will be put in the mblk
6770          * that is putback.
6771          * If MSGMARKNEXT is set and the message is completely consumed
6772          * the STRATMARK flag will be set below. Likewise, if
6773          * MSGNOTMARKNEXT is set and the message is
6774          * completely consumed STRNOTATMARK will be set.
6775          */
6776         mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
6777         ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
6778             (MSGMARKNEXT|MSGNOTMARKNEXT));
6779         if (mark != 0 && bp == stp->sd_mark) {
6780                 mark |= _LASTMARK;
6781                 stp->sd_mark = NULL;
6782         }
6783         /*
6784          * keep track of the original message type and priority
6785          */
6786         pri = bp->b_band;
6787         type = bp->b_datap->db_type;
6788         if (type == M_PASSFP) {
6789                 if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
6790                         stp->sd_mark = bp;
6791                 bp->b_flag |= mark & ~_LASTMARK;
6792                 putback(stp, q, bp, pri);
6793                 qbackenable(q, pri);
6794                 mutex_exit(&stp->sd_lock);
6795                 return (EBADMSG);
6796         }
6797         ASSERT(type != M_SIG);
6798 
6799         /*
6800          * Set this flag so strrput will not generate signals. Need to
6801          * make sure this flag is cleared before leaving this routine
6802          * else signals will stop being sent.
6803          */
6804         stp->sd_flag |= STRGETINPROG;
6805         mutex_exit(&stp->sd_lock);
6806 
6807         if (STREAM_NEEDSERVICE(stp))
6808                 stream_runservice(stp);
6809 
6810         /*
6811          * Set HIPRI flag if message is priority.
6812          */
6813         if (type >= QPCTL)
6814                 flg = MSG_HIPRI;
6815         else
6816                 flg = MSG_BAND;
6817 
6818         /*
6819          * First process PROTO or PCPROTO blocks, if any.
6820          */
6821         if (mctl->maxlen >= 0 && type != M_DATA) {
6822                 size_t  n, bcnt;
6823                 char    *ubuf;
6824 
6825                 bcnt = mctl->maxlen;
6826                 ubuf = mctl->buf;
6827                 while (bp != NULL && bp->b_datap->db_type != M_DATA) {
6828                         if ((n = MIN(bcnt, bp->b_wptr - bp->b_rptr)) != 0 &&
6829                             copyout(bp->b_rptr, ubuf, n)) {
6830                                 error = EFAULT;
6831                                 mutex_enter(&stp->sd_lock);
6832                                 /*
6833                                  * clear stream head pri flag based on
6834                                  * first message type
6835                                  */
6836                                 if (type >= QPCTL) {
6837                                         ASSERT(type == M_PCPROTO);
6838                                         stp->sd_flag &= ~STRPRI;
6839                                 }
6840                                 more = 0;
6841                                 freemsg(bp);
6842                                 goto getmout;
6843                         }
6844                         ubuf += n;
6845                         bp->b_rptr += n;
6846                         if (bp->b_rptr >= bp->b_wptr) {
6847                                 nbp = bp;
6848                                 bp = bp->b_cont;
6849                                 freeb(nbp);
6850                         }
6851                         ASSERT(n <= bcnt);
6852                         bcnt -= n;
6853                         if (bcnt == 0)
6854                                 break;
6855                 }
6856                 mctl->len = mctl->maxlen - bcnt;
6857         } else
6858                 mctl->len = -1;
6859 
6860         if (bp && bp->b_datap->db_type != M_DATA) {
6861                 /*
6862                  * More PROTO blocks in msg.
6863                  */
6864                 more |= MORECTL;
6865                 savemp = bp;
6866                 while (bp && bp->b_datap->db_type != M_DATA) {
6867                         savemptail = bp;
6868                         bp = bp->b_cont;
6869                 }
6870                 savemptail->b_cont = NULL;
6871         }
6872 
6873         /*
6874          * Now process DATA blocks, if any.
6875          */
6876         if (mdata->maxlen >= 0 && bp) {
6877                 /*
6878                  * struiocopyout will consume a potential zero-length
6879                  * M_DATA even if uio_resid is zero.
6880                  */
6881                 size_t oldresid = uiop->uio_resid;
6882 
6883                 bp = struiocopyout(bp, uiop, &error);
6884                 if (error != 0) {
6885                         mutex_enter(&stp->sd_lock);
6886                         /*
6887                          * clear stream head hi pri flag based on
6888                          * first message
6889                          */
6890                         if (type >= QPCTL) {
6891                                 ASSERT(type == M_PCPROTO);
6892                                 stp->sd_flag &= ~STRPRI;
6893                         }
6894                         more = 0;
6895                         freemsg(savemp);
6896                         goto getmout;
6897                 }
6898                 /*
6899                  * (pr == 1) indicates a partial read.
6900                  */
6901                 if (oldresid > uiop->uio_resid)
6902                         pr = 1;
6903                 mdata->len = mdata->maxlen - uiop->uio_resid;
6904         } else
6905                 mdata->len = -1;
6906 
6907         if (bp) {                       /* more data blocks in msg */
6908                 more |= MOREDATA;
6909                 if (savemp)
6910                         savemptail->b_cont = bp;
6911                 else
6912                         savemp = bp;
6913         }
6914 
6915         mutex_enter(&stp->sd_lock);
6916         if (savemp) {
6917                 if (pr && (savemp->b_datap->db_type == M_DATA) &&
6918                     msgnodata(savemp)) {
6919                         /*
6920                          * Avoid queuing a zero-length tail part of
6921                          * a message. pr=1 indicates that we read some of
6922                          * the message.
6923                          */
6924                         freemsg(savemp);
6925                         more &= ~MOREDATA;
6926                         /*
6927                          * clear stream head hi pri flag based on
6928                          * first message
6929                          */
6930                         if (type >= QPCTL) {
6931                                 ASSERT(type == M_PCPROTO);
6932                                 stp->sd_flag &= ~STRPRI;
6933                         }
6934                 } else {
6935                         savemp->b_band = pri;
6936                         /*
6937                          * If the first message was HIPRI and the one we're
6938                          * putting back isn't, then clear STRPRI, otherwise
6939                          * set STRPRI again.  Note that we must set STRPRI
6940                          * again since the flush logic in strrput_nondata()
6941                          * may have cleared it while we had sd_lock dropped.
6942                          */
6943                         if (type >= QPCTL) {
6944                                 ASSERT(type == M_PCPROTO);
6945                                 if (queclass(savemp) < QPCTL)
6946                                         stp->sd_flag &= ~STRPRI;
6947                                 else
6948                                         stp->sd_flag |= STRPRI;
6949                         } else if (queclass(savemp) >= QPCTL) {
6950                                 /*
6951                                  * The first message was not a HIPRI message,
6952                                  * but the one we are about to putback is.
6953                                  * For simplicitly, we do not allow for HIPRI
6954                                  * messages to be embedded in the message
6955                                  * body, so just force it to same type as
6956                                  * first message.
6957                                  */
6958                                 ASSERT(type == M_DATA || type == M_PROTO);
6959                                 ASSERT(savemp->b_datap->db_type == M_PCPROTO);
6960                                 savemp->b_datap->db_type = type;
6961                         }
6962                         if (mark != 0) {
6963                                 savemp->b_flag |= mark & ~_LASTMARK;
6964                                 if ((mark & _LASTMARK) &&
6965                                     (stp->sd_mark == NULL)) {
6966                                         /*
6967                                          * If another marked message arrived
6968                                          * while sd_lock was not held sd_mark
6969                                          * would be non-NULL.
6970                                          */
6971                                         stp->sd_mark = savemp;
6972                                 }
6973                         }
6974                         putback(stp, q, savemp, pri);
6975                 }
6976         } else {
6977                 /*
6978                  * The complete message was consumed.
6979                  *
6980                  * If another M_PCPROTO arrived while sd_lock was not held
6981                  * it would have been discarded since STRPRI was still set.
6982                  *
6983                  * Move the MSG*MARKNEXT information
6984                  * to the stream head just in case
6985                  * the read queue becomes empty.
6986                  * clear stream head hi pri flag based on
6987                  * first message
6988                  *
6989                  * If the stream head was at the mark
6990                  * (STRATMARK) before we dropped sd_lock above
6991                  * and some data was consumed then we have
6992                  * moved past the mark thus STRATMARK is
6993                  * cleared. However, if a message arrived in
6994                  * strrput during the copyout above causing
6995                  * STRATMARK to be set we can not clear that
6996                  * flag.
6997                  */
6998                 if (type >= QPCTL) {
6999                         ASSERT(type == M_PCPROTO);
7000                         stp->sd_flag &= ~STRPRI;
7001                 }
7002                 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
7003                         if (mark & MSGMARKNEXT) {
7004                                 stp->sd_flag &= ~STRNOTATMARK;
7005                                 stp->sd_flag |= STRATMARK;
7006                         } else if (mark & MSGNOTMARKNEXT) {
7007                                 stp->sd_flag &= ~STRATMARK;
7008                                 stp->sd_flag |= STRNOTATMARK;
7009                         } else {
7010                                 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
7011                         }
7012                 } else if (pr && (old_sd_flag & STRATMARK)) {
7013                         stp->sd_flag &= ~STRATMARK;
7014                 }
7015         }
7016 
7017         *flagsp = flg;
7018         *prip = pri;
7019 
7020         /*
7021          * Getmsg cleanup processing - if the state of the queue has changed
7022          * some signals may need to be sent and/or poll awakened.
7023          */
7024 getmout:
7025         qbackenable(q, pri);
7026 
7027         /*
7028          * We dropped the stream head lock above. Send all M_SIG messages
7029          * before processing stream head for SIGPOLL messages.
7030          */
7031         ASSERT(MUTEX_HELD(&stp->sd_lock));
7032         while ((bp = q->q_first) != NULL &&
7033             (bp->b_datap->db_type == M_SIG)) {
7034                 /*
7035                  * sd_lock is held so the content of the read queue can not
7036                  * change.
7037                  */
7038                 bp = getq(q);
7039                 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
7040 
7041                 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
7042                 mutex_exit(&stp->sd_lock);
7043                 freemsg(bp);
7044                 if (STREAM_NEEDSERVICE(stp))
7045                         stream_runservice(stp);
7046                 mutex_enter(&stp->sd_lock);
7047         }
7048 
7049         /*
7050          * stream head cannot change while we make the determination
7051          * whether or not to send a signal. Drop the flag to allow strrput
7052          * to send firstmsgsigs again.
7053          */
7054         stp->sd_flag &= ~STRGETINPROG;
7055 
7056         /*
7057          * If the type of message at the front of the queue changed
7058          * due to the receive the appropriate signals and pollwakeup events
7059          * are generated. The type of changes are:
7060          *      Processed a hipri message, q_first is not hipri.
7061          *      Processed a band X message, and q_first is band Y.
7062          * The generated signals and pollwakeups are identical to what
7063          * strrput() generates should the message that is now on q_first
7064          * arrive to an empty read queue.
7065          *
7066          * Note: only strrput will send a signal for a hipri message.
7067          */
7068         if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
7069                 strsigset_t signals = 0;
7070                 strpollset_t pollwakeups = 0;
7071 
7072                 if (flg & MSG_HIPRI) {
7073                         /*
7074                          * Removed a hipri message. Regular data at
7075                          * the front of  the queue.
7076                          */
7077                         if (bp->b_band == 0) {
7078                                 signals = S_INPUT | S_RDNORM;
7079                                 pollwakeups = POLLIN | POLLRDNORM;
7080                         } else {
7081                                 signals = S_INPUT | S_RDBAND;
7082                                 pollwakeups = POLLIN | POLLRDBAND;
7083                         }
7084                 } else if (pri != bp->b_band) {
7085                         /*
7086                          * The band is different for the new q_first.
7087                          */
7088                         if (bp->b_band == 0) {
7089                                 signals = S_RDNORM;
7090                                 pollwakeups = POLLIN | POLLRDNORM;
7091                         } else {
7092                                 signals = S_RDBAND;
7093                                 pollwakeups = POLLIN | POLLRDBAND;
7094                         }
7095                 }
7096 
7097                 if (pollwakeups != 0) {
7098                         if (pollwakeups == (POLLIN | POLLRDNORM)) {
7099                                 if (!(stp->sd_rput_opt & SR_POLLIN))
7100                                         goto no_pollwake;
7101                                 stp->sd_rput_opt &= ~SR_POLLIN;
7102                         }
7103                         mutex_exit(&stp->sd_lock);
7104                         pollwakeup(&stp->sd_pollist, pollwakeups);
7105                         mutex_enter(&stp->sd_lock);
7106                 }
7107 no_pollwake:
7108 
7109                 if (stp->sd_sigflags & signals)
7110                         strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
7111         }
7112         mutex_exit(&stp->sd_lock);
7113 
7114         rvp->r_val1 = more;
7115         return (error);
7116 #undef  _LASTMARK
7117 }
7118 
7119 /*
7120  * Get the next message from the read queue.  If the message is
7121  * priority, STRPRI will have been set by strrput().  This flag
7122  * should be reset only when the entire message at the front of the
7123  * queue as been consumed.
7124  *
7125  * If uiop is NULL all data is returned in mctlp.
7126  * Note that a NULL uiop implies that FNDELAY and FNONBLOCK are assumed
7127  * not enabled.
7128  * The timeout parameter is in milliseconds; -1 for infinity.
7129  * This routine handles the consolidation private flags:
7130  *      MSG_IGNERROR    Ignore any stream head error except STPLEX.
7131  *      MSG_DELAYERROR  Defer the error check until the queue is empty.
7132  *      MSG_HOLDSIG     Hold signals while waiting for data.
7133  *      MSG_IPEEK       Only peek at messages.
7134  *      MSG_DISCARDTAIL Discard the tail M_DATA part of the message
7135  *                      that doesn't fit.
7136  *      MSG_NOMARK      If the message is marked leave it on the queue.
7137  *
7138  * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
7139  */
7140 int
7141 kstrgetmsg(
7142         struct vnode *vp,
7143         mblk_t **mctlp,
7144         struct uio *uiop,
7145         unsigned char *prip,
7146         int *flagsp,
7147         clock_t timout,
7148         rval_t *rvp)
7149 {
7150         struct stdata *stp;
7151         mblk_t *bp, *nbp;
7152         mblk_t *savemp = NULL;
7153         mblk_t *savemptail = NULL;
7154         int flags;
7155         uint_t old_sd_flag;
7156         int flg = MSG_BAND;
7157         int more = 0;
7158         int error = 0;
7159         char first = 1;
7160         uint_t mark;            /* Contains MSG*MARK and _LASTMARK */
7161 #define _LASTMARK       0x8000  /* Distinct from MSG*MARK */
7162         unsigned char pri = 0;
7163         queue_t *q;
7164         int     pr = 0;                 /* Partial read successful */
7165         unsigned char type;
7166 
7167         TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_ENTER,
7168             "kstrgetmsg:%p", vp);
7169 
7170         ASSERT(vp->v_stream);
7171         stp = vp->v_stream;
7172         rvp->r_val1 = 0;
7173 
7174         mutex_enter(&stp->sd_lock);
7175 
7176         if ((error = i_straccess(stp, JCREAD)) != 0) {
7177                 mutex_exit(&stp->sd_lock);
7178                 return (error);
7179         }
7180 
7181         flags = *flagsp;
7182         if (stp->sd_flag & (STRDERR|STPLEX)) {
7183                 if ((stp->sd_flag & STPLEX) ||
7184                     (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) {
7185                         error = strgeterr(stp, STRDERR|STPLEX,
7186                             (flags & MSG_IPEEK));
7187                         if (error != 0) {
7188                                 mutex_exit(&stp->sd_lock);
7189                                 return (error);
7190                         }
7191                 }
7192         }
7193         mutex_exit(&stp->sd_lock);
7194 
7195         switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) {
7196         case MSG_HIPRI:
7197                 if (*prip != 0)
7198                         return (EINVAL);
7199                 break;
7200 
7201         case MSG_ANY:
7202         case MSG_BAND:
7203                 break;
7204 
7205         default:
7206                 return (EINVAL);
7207         }
7208 
7209 retry:
7210         q = _RD(stp->sd_wrq);
7211         mutex_enter(&stp->sd_lock);
7212         old_sd_flag = stp->sd_flag;
7213         mark = 0;
7214         for (;;) {
7215                 int done = 0;
7216                 int waitflag;
7217                 int fmode;
7218                 mblk_t *q_first = q->q_first;
7219 
7220                 /*
7221                  * This section of the code operates just like the code
7222                  * in strgetmsg().  There is a comment there about what
7223                  * is going on here.
7224                  */
7225                 if (!(flags & (MSG_HIPRI|MSG_BAND))) {
7226                         /* Asking for normal, band0 data */
7227                         bp = strget(stp, q, uiop, first, &error);
7228                         ASSERT(MUTEX_HELD(&stp->sd_lock));
7229                         if (bp != NULL) {
7230                                 if (DB_TYPE(bp) == M_SIG) {
7231                                         strsignal_nolock(stp, *bp->b_rptr,
7232                                             bp->b_band);
7233                                         freemsg(bp);
7234                                         continue;
7235                                 } else {
7236                                         break;
7237                                 }
7238                         }
7239                         if (error != 0) {
7240                                 goto getmout;
7241                         }
7242                 /*
7243                  * We can't depend on the value of STRPRI here because
7244                  * the stream head may be in transit. Therefore, we
7245                  * must look at the type of the first message to
7246                  * determine if a high priority messages is waiting
7247                  */
7248                 } else if ((flags & MSG_HIPRI) && q_first != NULL &&
7249                     DB_TYPE(q_first) >= QPCTL &&
7250                     (bp = getq_noenab(q, 0)) != NULL) {
7251                         ASSERT(DB_TYPE(bp) >= QPCTL);
7252                         break;
7253                 } else if ((flags & MSG_BAND) && q_first != NULL &&
7254                     ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) &&
7255                     (bp = getq_noenab(q, 0)) != NULL) {
7256                         /*
7257                          * Asked for at least band "prip" and got either at
7258                          * least that band or a hipri message.
7259                          */
7260                         ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL);
7261                         if (DB_TYPE(bp) == M_SIG) {
7262                                 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
7263                                 freemsg(bp);
7264                                 continue;
7265                         } else {
7266                                 break;
7267                         }
7268                 }
7269 
7270                 /* No data. Time to sleep? */
7271                 qbackenable(q, 0);
7272 
7273                 /*
7274                  * Delayed error notification?
7275                  */
7276                 if ((stp->sd_flag & (STRDERR|STPLEX)) &&
7277                     (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == MSG_DELAYERROR) {
7278                         error = strgeterr(stp, STRDERR|STPLEX,
7279                             (flags & MSG_IPEEK));
7280                         if (error != 0) {
7281                                 mutex_exit(&stp->sd_lock);
7282                                 return (error);
7283                         }
7284                 }
7285 
7286                 /*
7287                  * If STRHUP or STREOF, return 0 length control and data.
7288                  * If a read(fd,buf,0) has been done, do not sleep, just
7289                  * return.
7290                  *
7291                  * If mctlp == NULL and uiop == NULL, then the code will
7292                  * do the strwaitq. This is an understood way of saying
7293                  * sleep "polling" until a message is received.
7294                  */
7295                 if ((stp->sd_flag & (STRHUP|STREOF)) ||
7296                     (uiop != NULL && uiop->uio_resid == 0)) {
7297                         if (mctlp != NULL)
7298                                 *mctlp = NULL;
7299                         *flagsp = 0;
7300                         mutex_exit(&stp->sd_lock);
7301                         return (0);
7302                 }
7303 
7304                 waitflag = GETWAIT;
7305                 if (flags &
7306                     (MSG_HOLDSIG|MSG_IGNERROR|MSG_IPEEK|MSG_DELAYERROR)) {
7307                         if (flags & MSG_HOLDSIG)
7308                                 waitflag |= STR_NOSIG;
7309                         if (flags & MSG_IGNERROR)
7310                                 waitflag |= STR_NOERROR;
7311                         if (flags & MSG_IPEEK)
7312                                 waitflag |= STR_PEEK;
7313                         if (flags & MSG_DELAYERROR)
7314                                 waitflag |= STR_DELAYERR;
7315                 }
7316                 if (uiop != NULL)
7317                         fmode = uiop->uio_fmode;
7318                 else
7319                         fmode = 0;
7320 
7321                 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_WAIT,
7322                     "kstrgetmsg calls strwaitq:%p, %p",
7323                     vp, uiop);
7324                 if (((error = strwaitq(stp, waitflag, (ssize_t)0,
7325                     fmode, timout, &done))) != 0 || done) {
7326                         TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE,
7327                             "kstrgetmsg error or done:%p, %p",
7328                             vp, uiop);
7329                         mutex_exit(&stp->sd_lock);
7330                         return (error);
7331                 }
7332                 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE,
7333                     "kstrgetmsg awakes:%p, %p", vp, uiop);
7334                 if ((error = i_straccess(stp, JCREAD)) != 0) {
7335                         mutex_exit(&stp->sd_lock);
7336                         return (error);
7337                 }
7338                 first = 0;
7339         }
7340         ASSERT(bp != NULL);
7341         /*
7342          * Extract any mark information. If the message is not completely
7343          * consumed this information will be put in the mblk
7344          * that is putback.
7345          * If MSGMARKNEXT is set and the message is completely consumed
7346          * the STRATMARK flag will be set below. Likewise, if
7347          * MSGNOTMARKNEXT is set and the message is
7348          * completely consumed STRNOTATMARK will be set.
7349          */
7350         mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
7351         ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
7352             (MSGMARKNEXT|MSGNOTMARKNEXT));
7353         pri = bp->b_band;
7354         if (mark != 0) {
7355                 /*
7356                  * If the caller doesn't want the mark return.
7357                  * Used to implement MSG_WAITALL in sockets.
7358                  */
7359                 if (flags & MSG_NOMARK) {
7360                         putback(stp, q, bp, pri);
7361                         qbackenable(q, pri);
7362                         mutex_exit(&stp->sd_lock);
7363                         return (EWOULDBLOCK);
7364                 }
7365                 if (bp == stp->sd_mark) {
7366                         mark |= _LASTMARK;
7367                         stp->sd_mark = NULL;
7368                 }
7369         }
7370 
7371         /*
7372          * keep track of the first message type
7373          */
7374         type = bp->b_datap->db_type;
7375 
7376         if (bp->b_datap->db_type == M_PASSFP) {
7377                 if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7378                         stp->sd_mark = bp;
7379                 bp->b_flag |= mark & ~_LASTMARK;
7380                 putback(stp, q, bp, pri);
7381                 qbackenable(q, pri);
7382                 mutex_exit(&stp->sd_lock);
7383                 return (EBADMSG);
7384         }
7385         ASSERT(type != M_SIG);
7386 
7387         if (flags & MSG_IPEEK) {
7388                 /*
7389                  * Clear any struioflag - we do the uiomove over again
7390                  * when peeking since it simplifies the code.
7391                  *
7392                  * Dup the message and put the original back on the queue.
7393                  * If dupmsg() fails, try again with copymsg() to see if
7394                  * there is indeed a shortage of memory.  dupmsg() may fail
7395                  * if db_ref in any of the messages reaches its limit.
7396                  */
7397 
7398                 if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) {
7399                         /*
7400                          * Restore the state of the stream head since we
7401                          * need to drop sd_lock (strwaitbuf is sleeping).
7402                          */
7403                         size_t size = msgdsize(bp);
7404 
7405                         if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7406                                 stp->sd_mark = bp;
7407                         bp->b_flag |= mark & ~_LASTMARK;
7408                         putback(stp, q, bp, pri);
7409                         mutex_exit(&stp->sd_lock);
7410                         error = strwaitbuf(size, BPRI_HI);
7411                         if (error) {
7412                                 /*
7413                                  * There is no net change to the queue thus
7414                                  * no need to qbackenable.
7415                                  */
7416                                 return (error);
7417                         }
7418                         goto retry;
7419                 }
7420 
7421                 if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7422                         stp->sd_mark = bp;
7423                 bp->b_flag |= mark & ~_LASTMARK;
7424                 putback(stp, q, bp, pri);
7425                 bp = nbp;
7426         }
7427 
7428         /*
7429          * Set this flag so strrput will not generate signals. Need to
7430          * make sure this flag is cleared before leaving this routine
7431          * else signals will stop being sent.
7432          */
7433         stp->sd_flag |= STRGETINPROG;
7434         mutex_exit(&stp->sd_lock);
7435 
7436         if ((stp->sd_rputdatafunc != NULL) && (DB_TYPE(bp) == M_DATA)) {
7437                 mblk_t *tmp, *prevmp;
7438 
7439                 /*
7440                  * Put first non-data mblk back to stream head and
7441                  * cut the mblk chain so sd_rputdatafunc only sees
7442                  * M_DATA mblks. We can skip the first mblk since it
7443                  * is M_DATA according to the condition above.
7444                  */
7445                 for (prevmp = bp, tmp = bp->b_cont; tmp != NULL;
7446                     prevmp = tmp, tmp = tmp->b_cont) {
7447                         if (DB_TYPE(tmp) != M_DATA) {
7448                                 prevmp->b_cont = NULL;
7449                                 mutex_enter(&stp->sd_lock);
7450                                 putback(stp, q, tmp, tmp->b_band);
7451                                 mutex_exit(&stp->sd_lock);
7452                                 break;
7453                         }
7454                 }
7455 
7456                 bp = (stp->sd_rputdatafunc)(stp->sd_vnode, bp,
7457                     NULL, NULL, NULL, NULL);
7458 
7459                 if (bp == NULL)
7460                         goto retry;
7461         }
7462 
7463         if (STREAM_NEEDSERVICE(stp))
7464                 stream_runservice(stp);
7465 
7466         /*
7467          * Set HIPRI flag if message is priority.
7468          */
7469         if (type >= QPCTL)
7470                 flg = MSG_HIPRI;
7471         else
7472                 flg = MSG_BAND;
7473 
7474         /*
7475          * First process PROTO or PCPROTO blocks, if any.
7476          */
7477         if (mctlp != NULL && type != M_DATA) {
7478                 mblk_t *nbp;
7479 
7480                 *mctlp = bp;
7481                 while (bp->b_cont && bp->b_cont->b_datap->db_type != M_DATA)
7482                         bp = bp->b_cont;
7483                 nbp = bp->b_cont;
7484                 bp->b_cont = NULL;
7485                 bp = nbp;
7486         }
7487 
7488         if (bp && bp->b_datap->db_type != M_DATA) {
7489                 /*
7490                  * More PROTO blocks in msg. Will only happen if mctlp is NULL.
7491                  */
7492                 more |= MORECTL;
7493                 savemp = bp;
7494                 while (bp && bp->b_datap->db_type != M_DATA) {
7495                         savemptail = bp;
7496                         bp = bp->b_cont;
7497                 }
7498                 savemptail->b_cont = NULL;
7499         }
7500 
7501         /*
7502          * Now process DATA blocks, if any.
7503          */
7504         if (uiop == NULL) {
7505                 /* Append data to tail of mctlp */
7506 
7507                 if (mctlp != NULL) {
7508                         mblk_t **mpp = mctlp;
7509 
7510                         while (*mpp != NULL)
7511                                 mpp = &((*mpp)->b_cont);
7512                         *mpp = bp;
7513                         bp = NULL;
7514                 }
7515         } else if (uiop->uio_resid >= 0 && bp) {
7516                 size_t oldresid = uiop->uio_resid;
7517 
7518                 /*
7519                  * If a streams message is likely to consist
7520                  * of many small mblks, it is pulled up into
7521                  * one continuous chunk of memory.
7522                  * The size of the first mblk may be bogus because
7523                  * successive read() calls on the socket reduce
7524                  * the size of this mblk until it is exhausted
7525                  * and then the code walks on to the next. Thus
7526                  * the size of the mblk may not be the original size
7527                  * that was passed up, it's simply a remainder
7528                  * and hence can be very small without any
7529                  * implication that the packet is badly fragmented.
7530                  * So the size of the possible second mblk is
7531                  * used to spot a badly fragmented packet.
7532                  * see longer comment at top of page
7533                  * by mblk_pull_len declaration.
7534                  */
7535 
7536                 if (bp->b_cont != NULL && MBLKL(bp->b_cont) < mblk_pull_len) {
7537                         (void) pullupmsg(bp, -1);
7538                 }
7539 
7540                 bp = struiocopyout(bp, uiop, &error);
7541                 if (error != 0) {
7542                         if (mctlp != NULL) {
7543                                 freemsg(*mctlp);
7544                                 *mctlp = NULL;
7545                         } else
7546                                 freemsg(savemp);
7547                         mutex_enter(&stp->sd_lock);
7548                         /*
7549                          * clear stream head hi pri flag based on
7550                          * first message
7551                          */
7552                         if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
7553                                 ASSERT(type == M_PCPROTO);
7554                                 stp->sd_flag &= ~STRPRI;
7555                         }
7556                         more = 0;
7557                         goto getmout;
7558                 }
7559                 /*
7560                  * (pr == 1) indicates a partial read.
7561                  */
7562                 if (oldresid > uiop->uio_resid)
7563                         pr = 1;
7564         }
7565 
7566         if (bp) {                       /* more data blocks in msg */
7567                 more |= MOREDATA;
7568                 if (savemp)
7569                         savemptail->b_cont = bp;
7570                 else
7571                         savemp = bp;
7572         }
7573 
7574         mutex_enter(&stp->sd_lock);
7575         if (savemp) {
7576                 if (flags & (MSG_IPEEK|MSG_DISCARDTAIL)) {
7577                         /*
7578                          * When MSG_DISCARDTAIL is set or
7579                          * when peeking discard any tail. When peeking this
7580                          * is the tail of the dup that was copied out - the
7581                          * message has already been putback on the queue.
7582                          * Return MOREDATA to the caller even though the data
7583                          * is discarded. This is used by sockets (to
7584                          * set MSG_TRUNC).
7585                          */
7586                         freemsg(savemp);
7587                         if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
7588                                 ASSERT(type == M_PCPROTO);
7589                                 stp->sd_flag &= ~STRPRI;
7590                         }
7591                 } else if (pr && (savemp->b_datap->db_type == M_DATA) &&
7592                     msgnodata(savemp)) {
7593                         /*
7594                          * Avoid queuing a zero-length tail part of
7595                          * a message. pr=1 indicates that we read some of
7596                          * the message.
7597                          */
7598                         freemsg(savemp);
7599                         more &= ~MOREDATA;
7600                         if (type >= QPCTL) {
7601                                 ASSERT(type == M_PCPROTO);
7602                                 stp->sd_flag &= ~STRPRI;
7603                         }
7604                 } else {
7605                         savemp->b_band = pri;
7606                         /*
7607                          * If the first message was HIPRI and the one we're
7608                          * putting back isn't, then clear STRPRI, otherwise
7609                          * set STRPRI again.  Note that we must set STRPRI
7610                          * again since the flush logic in strrput_nondata()
7611                          * may have cleared it while we had sd_lock dropped.
7612                          */
7613 
7614                         if (type >= QPCTL) {
7615                                 ASSERT(type == M_PCPROTO);
7616                                 if (queclass(savemp) < QPCTL)
7617                                         stp->sd_flag &= ~STRPRI;
7618                                 else
7619                                         stp->sd_flag |= STRPRI;
7620                         } else if (queclass(savemp) >= QPCTL) {
7621                                 /*
7622                                  * The first message was not a HIPRI message,
7623                                  * but the one we are about to putback is.
7624                                  * For simplicitly, we do not allow for HIPRI
7625                                  * messages to be embedded in the message
7626                                  * body, so just force it to same type as
7627                                  * first message.
7628                                  */
7629                                 ASSERT(type == M_DATA || type == M_PROTO);
7630                                 ASSERT(savemp->b_datap->db_type == M_PCPROTO);
7631                                 savemp->b_datap->db_type = type;
7632                         }
7633                         if (mark != 0) {
7634                                 if ((mark & _LASTMARK) &&
7635                                     (stp->sd_mark == NULL)) {
7636                                         /*
7637                                          * If another marked message arrived
7638                                          * while sd_lock was not held sd_mark
7639                                          * would be non-NULL.
7640                                          */
7641                                         stp->sd_mark = savemp;
7642                                 }
7643                                 savemp->b_flag |= mark & ~_LASTMARK;
7644                         }
7645                         putback(stp, q, savemp, pri);
7646                 }
7647         } else if (!(flags & MSG_IPEEK)) {
7648                 /*
7649                  * The complete message was consumed.
7650                  *
7651                  * If another M_PCPROTO arrived while sd_lock was not held
7652                  * it would have been discarded since STRPRI was still set.
7653                  *
7654                  * Move the MSG*MARKNEXT information
7655                  * to the stream head just in case
7656                  * the read queue becomes empty.
7657                  * clear stream head hi pri flag based on
7658                  * first message
7659                  *
7660                  * If the stream head was at the mark
7661                  * (STRATMARK) before we dropped sd_lock above
7662                  * and some data was consumed then we have
7663                  * moved past the mark thus STRATMARK is
7664                  * cleared. However, if a message arrived in
7665                  * strrput during the copyout above causing
7666                  * STRATMARK to be set we can not clear that
7667                  * flag.
7668                  * XXX A "perimeter" would help by single-threading strrput,
7669                  * strread, strgetmsg and kstrgetmsg.
7670                  */
7671                 if (type >= QPCTL) {
7672                         ASSERT(type == M_PCPROTO);
7673                         stp->sd_flag &= ~STRPRI;
7674                 }
7675                 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
7676                         if (mark & MSGMARKNEXT) {
7677                                 stp->sd_flag &= ~STRNOTATMARK;
7678                                 stp->sd_flag |= STRATMARK;
7679                         } else if (mark & MSGNOTMARKNEXT) {
7680                                 stp->sd_flag &= ~STRATMARK;
7681                                 stp->sd_flag |= STRNOTATMARK;
7682                         } else {
7683                                 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
7684                         }
7685                 } else if (pr && (old_sd_flag & STRATMARK)) {
7686                         stp->sd_flag &= ~STRATMARK;
7687                 }
7688         }
7689 
7690         *flagsp = flg;
7691         *prip = pri;
7692 
7693         /*
7694          * Getmsg cleanup processing - if the state of the queue has changed
7695          * some signals may need to be sent and/or poll awakened.
7696          */
7697 getmout:
7698         qbackenable(q, pri);
7699 
7700         /*
7701          * We dropped the stream head lock above. Send all M_SIG messages
7702          * before processing stream head for SIGPOLL messages.
7703          */
7704         ASSERT(MUTEX_HELD(&stp->sd_lock));
7705         while ((bp = q->q_first) != NULL &&
7706             (bp->b_datap->db_type == M_SIG)) {
7707                 /*
7708                  * sd_lock is held so the content of the read queue can not
7709                  * change.
7710                  */
7711                 bp = getq(q);
7712                 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
7713 
7714                 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
7715                 mutex_exit(&stp->sd_lock);
7716                 freemsg(bp);
7717                 if (STREAM_NEEDSERVICE(stp))
7718                         stream_runservice(stp);
7719                 mutex_enter(&stp->sd_lock);
7720         }
7721 
7722         /*
7723          * stream head cannot change while we make the determination
7724          * whether or not to send a signal. Drop the flag to allow strrput
7725          * to send firstmsgsigs again.
7726          */
7727         stp->sd_flag &= ~STRGETINPROG;
7728 
7729         /*
7730          * If the type of message at the front of the queue changed
7731          * due to the receive the appropriate signals and pollwakeup events
7732          * are generated. The type of changes are:
7733          *      Processed a hipri message, q_first is not hipri.
7734          *      Processed a band X message, and q_first is band Y.
7735          * The generated signals and pollwakeups are identical to what
7736          * strrput() generates should the message that is now on q_first
7737          * arrive to an empty read queue.
7738          *
7739          * Note: only strrput will send a signal for a hipri message.
7740          */
7741         if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
7742                 strsigset_t signals = 0;
7743                 strpollset_t pollwakeups = 0;
7744 
7745                 if (flg & MSG_HIPRI) {
7746                         /*
7747                          * Removed a hipri message. Regular data at
7748                          * the front of  the queue.
7749                          */
7750                         if (bp->b_band == 0) {
7751                                 signals = S_INPUT | S_RDNORM;
7752                                 pollwakeups = POLLIN | POLLRDNORM;
7753                         } else {
7754                                 signals = S_INPUT | S_RDBAND;
7755                                 pollwakeups = POLLIN | POLLRDBAND;
7756                         }
7757                 } else if (pri != bp->b_band) {
7758                         /*
7759                          * The band is different for the new q_first.
7760                          */
7761                         if (bp->b_band == 0) {
7762                                 signals = S_RDNORM;
7763                                 pollwakeups = POLLIN | POLLRDNORM;
7764                         } else {
7765                                 signals = S_RDBAND;
7766                                 pollwakeups = POLLIN | POLLRDBAND;
7767                         }
7768                 }
7769 
7770                 if (pollwakeups != 0) {
7771                         if (pollwakeups == (POLLIN | POLLRDNORM)) {
7772                                 if (!(stp->sd_rput_opt & SR_POLLIN))
7773                                         goto no_pollwake;
7774                                 stp->sd_rput_opt &= ~SR_POLLIN;
7775                         }
7776                         mutex_exit(&stp->sd_lock);
7777                         pollwakeup(&stp->sd_pollist, pollwakeups);
7778                         mutex_enter(&stp->sd_lock);
7779                 }
7780 no_pollwake:
7781 
7782                 if (stp->sd_sigflags & signals)
7783                         strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
7784         }
7785         mutex_exit(&stp->sd_lock);
7786 
7787         rvp->r_val1 = more;
7788         return (error);
7789 #undef  _LASTMARK
7790 }
7791 
7792 /*
7793  * Put a message downstream.
7794  *
7795  * NOTE: strputmsg and kstrputmsg have much of the logic in common.
7796  */
7797 int
7798 strputmsg(
7799         struct vnode *vp,
7800         struct strbuf *mctl,
7801         struct strbuf *mdata,
7802         unsigned char pri,
7803         int flag,
7804         int fmode)
7805 {
7806         struct stdata *stp;
7807         queue_t *wqp;
7808         mblk_t *mp;
7809         ssize_t msgsize;
7810         ssize_t rmin, rmax;
7811         int error;
7812         struct uio uios;
7813         struct uio *uiop = &uios;
7814         struct iovec iovs;
7815         int xpg4 = 0;
7816 
7817         ASSERT(vp->v_stream);
7818         stp = vp->v_stream;
7819         wqp = stp->sd_wrq;
7820 
7821         /*
7822          * If it is an XPG4 application, we need to send
7823          * SIGPIPE below
7824          */
7825 
7826         xpg4 = (flag & MSG_XPG4) ? 1 : 0;
7827         flag &= ~MSG_XPG4;
7828 
7829         if (AU_AUDITING())
7830                 audit_strputmsg(vp, mctl, mdata, pri, flag, fmode);
7831 
7832         mutex_enter(&stp->sd_lock);
7833 
7834         if ((error = i_straccess(stp, JCWRITE)) != 0) {
7835                 mutex_exit(&stp->sd_lock);
7836                 return (error);
7837         }
7838 
7839         if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
7840                 error = strwriteable(stp, B_FALSE, xpg4);
7841                 if (error != 0) {
7842                         mutex_exit(&stp->sd_lock);
7843                         return (error);
7844                 }
7845         }
7846 
7847         mutex_exit(&stp->sd_lock);
7848 
7849         /*
7850          * Check for legal flag value.
7851          */
7852         switch (flag) {
7853         case MSG_HIPRI:
7854                 if ((mctl->len < 0) || (pri != 0))
7855                         return (EINVAL);
7856                 break;
7857         case MSG_BAND:
7858                 break;
7859 
7860         default:
7861                 return (EINVAL);
7862         }
7863 
7864         TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_IN,
7865             "strputmsg in:stp %p", stp);
7866 
7867         /* get these values from those cached in the stream head */
7868         rmin = stp->sd_qn_minpsz;
7869         rmax = stp->sd_qn_maxpsz;
7870 
7871         /*
7872          * Make sure ctl and data sizes together fall within the
7873          * limits of the max and min receive packet sizes and do
7874          * not exceed system limit.
7875          */
7876         ASSERT((rmax >= 0) || (rmax == INFPSZ));
7877         if (rmax == 0) {
7878                 return (ERANGE);
7879         }
7880         /*
7881          * Use the MAXIMUM of sd_maxblk and q_maxpsz.
7882          * Needed to prevent partial failures in the strmakedata loop.
7883          */
7884         if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk)
7885                 rmax = stp->sd_maxblk;
7886 
7887         if ((msgsize = mdata->len) < 0) {
7888                 msgsize = 0;
7889                 rmin = 0;       /* no range check for NULL data part */
7890         }
7891         if ((msgsize < rmin) ||
7892             ((msgsize > rmax) && (rmax != INFPSZ)) ||
7893             (mctl->len > strctlsz)) {
7894                 return (ERANGE);
7895         }
7896 
7897         /*
7898          * Setup uio and iov for data part
7899          */
7900         iovs.iov_base = mdata->buf;
7901         iovs.iov_len = msgsize;
7902         uios.uio_iov = &iovs;
7903         uios.uio_iovcnt = 1;
7904         uios.uio_loffset = 0;
7905         uios.uio_segflg = UIO_USERSPACE;
7906         uios.uio_fmode = fmode;
7907         uios.uio_extflg = UIO_COPY_DEFAULT;
7908         uios.uio_resid = msgsize;
7909         uios.uio_offset = 0;
7910 
7911         /* Ignore flow control in strput for HIPRI */
7912         if (flag & MSG_HIPRI)
7913                 flag |= MSG_IGNFLOW;
7914 
7915         for (;;) {
7916                 int done = 0;
7917 
7918                 /*
7919                  * strput will always free the ctl mblk - even when strput
7920                  * fails.
7921                  */
7922                 if ((error = strmakectl(mctl, flag, fmode, &mp)) != 0) {
7923                         TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7924                             "strputmsg out:stp %p out %d error %d",
7925                             stp, 1, error);
7926                         return (error);
7927                 }
7928                 /*
7929                  * Verify that the whole message can be transferred by
7930                  * strput.
7931                  */
7932                 ASSERT(stp->sd_maxblk == INFPSZ ||
7933                     stp->sd_maxblk >= mdata->len);
7934 
7935                 msgsize = mdata->len;
7936                 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag);
7937                 mdata->len = msgsize;
7938 
7939                 if (error == 0)
7940                         break;
7941 
7942                 if (error != EWOULDBLOCK)
7943                         goto out;
7944 
7945                 mutex_enter(&stp->sd_lock);
7946                 /*
7947                  * Check for a missed wakeup.
7948                  * Needed since strput did not hold sd_lock across
7949                  * the canputnext.
7950                  */
7951                 if (bcanputnext(wqp, pri)) {
7952                         /* Try again */
7953                         mutex_exit(&stp->sd_lock);
7954                         continue;
7955                 }
7956                 TRACE_2(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAIT,
7957                     "strputmsg wait:stp %p waits pri %d", stp, pri);
7958                 if (((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, fmode, -1,
7959                     &done)) != 0) || done) {
7960                         mutex_exit(&stp->sd_lock);
7961                         TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7962                             "strputmsg out:q %p out %d error %d",
7963                             stp, 0, error);
7964                         return (error);
7965                 }
7966                 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE,
7967                     "strputmsg wake:stp %p wakes", stp);
7968                 if ((error = i_straccess(stp, JCWRITE)) != 0) {
7969                         mutex_exit(&stp->sd_lock);
7970                         return (error);
7971                 }
7972                 mutex_exit(&stp->sd_lock);
7973         }
7974 out:
7975         /*
7976          * For historic reasons, applications expect EAGAIN
7977          * when data mblk could not be allocated. so change
7978          * ENOMEM back to EAGAIN
7979          */
7980         if (error == ENOMEM)
7981                 error = EAGAIN;
7982         TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7983             "strputmsg out:stp %p out %d error %d", stp, 2, error);
7984         return (error);
7985 }
7986 
7987 /*
7988  * Put a message downstream.
7989  * Can send only an M_PROTO/M_PCPROTO by passing in a NULL uiop.
7990  * The fmode flag (NDELAY, NONBLOCK) is the or of the flags in the uio
7991  * and the fmode parameter.
7992  *
7993  * This routine handles the consolidation private flags:
7994  *      MSG_IGNERROR    Ignore any stream head error except STPLEX.
7995  *      MSG_HOLDSIG     Hold signals while waiting for data.
7996  *      MSG_IGNFLOW     Don't check streams flow control.
7997  *
7998  * NOTE: strputmsg and kstrputmsg have much of the logic in common.
7999  */
8000 int
8001 kstrputmsg(
8002         struct vnode *vp,
8003         mblk_t *mctl,
8004         struct uio *uiop,
8005         ssize_t msgsize,
8006         unsigned char pri,
8007         int flag,
8008         int fmode)
8009 {
8010         struct stdata *stp;
8011         queue_t *wqp;
8012         ssize_t rmin, rmax;
8013         int error;
8014 
8015         ASSERT(vp->v_stream);
8016         stp = vp->v_stream;
8017         wqp = stp->sd_wrq;
8018         if (AU_AUDITING())
8019                 audit_strputmsg(vp, NULL, NULL, pri, flag, fmode);
8020         if (mctl == NULL)
8021                 return (EINVAL);
8022 
8023         mutex_enter(&stp->sd_lock);
8024 
8025         if ((error = i_straccess(stp, JCWRITE)) != 0) {
8026                 mutex_exit(&stp->sd_lock);
8027                 freemsg(mctl);
8028                 return (error);
8029         }
8030 
8031         if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) {
8032                 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
8033                         error = strwriteable(stp, B_FALSE, B_TRUE);
8034                         if (error != 0) {
8035                                 mutex_exit(&stp->sd_lock);
8036                                 freemsg(mctl);
8037                                 return (error);
8038                         }
8039                 }
8040         }
8041 
8042         mutex_exit(&stp->sd_lock);
8043 
8044         /*
8045          * Check for legal flag value.
8046          */
8047         switch (flag & (MSG_HIPRI|MSG_BAND|MSG_ANY)) {
8048         case MSG_HIPRI:
8049                 if (pri != 0) {
8050                         freemsg(mctl);
8051                         return (EINVAL);
8052                 }
8053                 break;
8054         case MSG_BAND:
8055                 break;
8056         default:
8057                 freemsg(mctl);
8058                 return (EINVAL);
8059         }
8060 
8061         TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_IN,
8062             "kstrputmsg in:stp %p", stp);
8063 
8064         /* get these values from those cached in the stream head */
8065         rmin = stp->sd_qn_minpsz;
8066         rmax = stp->sd_qn_maxpsz;
8067 
8068         /*
8069          * Make sure ctl and data sizes together fall within the
8070          * limits of the max and min receive packet sizes and do
8071          * not exceed system limit.
8072          */
8073         ASSERT((rmax >= 0) || (rmax == INFPSZ));
8074         if (rmax == 0) {
8075                 freemsg(mctl);
8076                 return (ERANGE);
8077         }
8078         /*
8079          * Use the MAXIMUM of sd_maxblk and q_maxpsz.
8080          * Needed to prevent partial failures in the strmakedata loop.
8081          */
8082         if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk)
8083                 rmax = stp->sd_maxblk;
8084 
8085         if (uiop == NULL) {
8086                 msgsize = -1;
8087                 rmin = -1;      /* no range check for NULL data part */
8088         } else {
8089                 /* Use uio flags as well as the fmode parameter flags */
8090                 fmode |= uiop->uio_fmode;
8091 
8092                 if ((msgsize < rmin) ||
8093                     ((msgsize > rmax) && (rmax != INFPSZ))) {
8094                         freemsg(mctl);
8095                         return (ERANGE);
8096                 }
8097         }
8098 
8099         /* Ignore flow control in strput for HIPRI */
8100         if (flag & MSG_HIPRI)
8101                 flag |= MSG_IGNFLOW;
8102 
8103         for (;;) {
8104                 int done = 0;
8105                 int waitflag;
8106                 mblk_t *mp;
8107 
8108                 /*
8109                  * strput will always free the ctl mblk - even when strput
8110                  * fails. If MSG_IGNFLOW is set then any error returned
8111                  * will cause us to break the loop, so we don't need a copy
8112                  * of the message. If MSG_IGNFLOW is not set, then we can
8113                  * get hit by flow control and be forced to try again. In
8114                  * this case we need to have a copy of the message. We
8115                  * do this using copymsg since the message may get modified
8116                  * by something below us.
8117                  *
8118                  * We've observed that many TPI providers do not check db_ref
8119                  * on the control messages but blindly reuse them for the
8120                  * T_OK_ACK/T_ERROR_ACK. Thus using copymsg is more
8121                  * friendly to such providers than using dupmsg. Also, note
8122                  * that sockfs uses MSG_IGNFLOW for all TPI control messages.
8123                  * Only data messages are subject to flow control, hence
8124                  * subject to this copymsg.
8125                  */
8126                 if (flag & MSG_IGNFLOW) {
8127                         mp = mctl;
8128                         mctl = NULL;
8129                 } else {
8130                         do {
8131                                 /*
8132                                  * If a message has a free pointer, the message
8133                                  * must be dupmsg to maintain this pointer.
8134                                  * Code using this facility must be sure
8135                                  * that modules below will not change the
8136                                  * contents of the dblk without checking db_ref
8137                                  * first. If db_ref is > 1, then the module
8138                                  * needs to do a copymsg first. Otherwise,
8139                                  * the contents of the dblk may become
8140                                  * inconsistent because the freesmg/freeb below
8141                                  * may end up calling atomic_add_32_nv.
8142                                  * The atomic_add_32_nv in freeb (accessing
8143                                  * all of db_ref, db_type, db_flags, and
8144                                  * db_struioflag) does not prevent other threads
8145                                  * from concurrently trying to modify e.g.
8146                                  * db_type.
8147                                  */
8148                                 if (mctl->b_datap->db_frtnp != NULL)
8149                                         mp = dupmsg(mctl);
8150                                 else
8151                                         mp = copymsg(mctl);
8152 
8153                                 if (mp != NULL)
8154                                         break;
8155 
8156                                 error = strwaitbuf(msgdsize(mctl), BPRI_MED);
8157                                 if (error) {
8158                                         freemsg(mctl);
8159                                         return (error);
8160                                 }
8161                         } while (mp == NULL);
8162                 }
8163                 /*
8164                  * Verify that all of msgsize can be transferred by
8165                  * strput.
8166                  */
8167                 ASSERT(stp->sd_maxblk == INFPSZ || stp->sd_maxblk >= msgsize);
8168                 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag);
8169                 if (error == 0)
8170                         break;
8171 
8172                 if (error != EWOULDBLOCK)
8173                         goto out;
8174 
8175                 /*
8176                  * IF MSG_IGNFLOW is set we should have broken out of loop
8177                  * above.
8178                  */
8179                 ASSERT(!(flag & MSG_IGNFLOW));
8180                 mutex_enter(&stp->sd_lock);
8181                 /*
8182                  * Check for a missed wakeup.
8183                  * Needed since strput did not hold sd_lock across
8184                  * the canputnext.
8185                  */
8186                 if (bcanputnext(wqp, pri)) {
8187                         /* Try again */
8188                         mutex_exit(&stp->sd_lock);
8189                         continue;
8190                 }
8191                 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAIT,
8192                     "kstrputmsg wait:stp %p waits pri %d", stp, pri);
8193 
8194                 waitflag = WRITEWAIT;
8195                 if (flag & (MSG_HOLDSIG|MSG_IGNERROR)) {
8196                         if (flag & MSG_HOLDSIG)
8197                                 waitflag |= STR_NOSIG;
8198                         if (flag & MSG_IGNERROR)
8199                                 waitflag |= STR_NOERROR;
8200                 }
8201                 if (((error = strwaitq(stp, waitflag,
8202                     (ssize_t)0, fmode, -1, &done)) != 0) || done) {
8203                         mutex_exit(&stp->sd_lock);
8204                         TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT,
8205                             "kstrputmsg out:stp %p out %d error %d",
8206                             stp, 0, error);
8207                         freemsg(mctl);
8208                         return (error);
8209                 }
8210                 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE,
8211                     "kstrputmsg wake:stp %p wakes", stp);
8212                 if ((error = i_straccess(stp, JCWRITE)) != 0) {
8213                         mutex_exit(&stp->sd_lock);
8214                         freemsg(mctl);
8215                         return (error);
8216                 }
8217                 mutex_exit(&stp->sd_lock);
8218         }
8219 out:
8220         freemsg(mctl);
8221         /*
8222          * For historic reasons, applications expect EAGAIN
8223          * when data mblk could not be allocated. so change
8224          * ENOMEM back to EAGAIN
8225          */
8226         if (error == ENOMEM)
8227                 error = EAGAIN;
8228         TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT,
8229             "kstrputmsg out:stp %p out %d error %d", stp, 2, error);
8230         return (error);
8231 }
8232 
8233 /*
8234  * Determines whether the necessary conditions are set on a stream
8235  * for it to be readable, writeable, or have exceptions.
8236  *
8237  * strpoll handles the consolidation private events:
8238  *      POLLNOERR       Do not return POLLERR even if there are stream
8239  *                      head errors.
8240  *                      Used by sockfs.
8241  *      POLLRDDATA      Do not return POLLIN unless at least one message on
8242  *                      the queue contains one or more M_DATA mblks. Thus
8243  *                      when this flag is set a queue with only
8244  *                      M_PROTO/M_PCPROTO mblks does not return POLLIN.
8245  *                      Used by sockfs to ignore T_EXDATA_IND messages.
8246  *
8247  * Note: POLLRDDATA assumes that synch streams only return messages with
8248  * an M_DATA attached (i.e. not messages consisting of only
8249  * an M_PROTO/M_PCPROTO part).
8250  */
8251 int
8252 strpoll(struct stdata *stp, short events_arg, int anyyet, short *reventsp,
8253     struct pollhead **phpp)
8254 {
8255         int events = (ushort_t)events_arg;
8256         int retevents = 0;
8257         mblk_t *mp;
8258         qband_t *qbp;
8259         long sd_flags = stp->sd_flag;
8260         int headlocked = 0;
8261 
8262         /*
8263          * For performance, a single 'if' tests for most possible edge
8264          * conditions in one shot
8265          */
8266         if (sd_flags & (STPLEX | STRDERR | STWRERR)) {
8267                 if (sd_flags & STPLEX) {
8268                         *reventsp = POLLNVAL;
8269                         return (EINVAL);
8270                 }
8271                 if (((events & (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) &&
8272                     (sd_flags & STRDERR)) ||
8273                     ((events & (POLLOUT | POLLWRNORM | POLLWRBAND)) &&
8274                     (sd_flags & STWRERR))) {
8275                         if (!(events & POLLNOERR)) {
8276                                 *reventsp = POLLERR;
8277                                 return (0);
8278                         }
8279                 }
8280         }
8281         if (sd_flags & STRHUP) {
8282                 retevents |= POLLHUP;
8283         } else if (events & (POLLWRNORM | POLLWRBAND)) {
8284                 queue_t *tq;
8285                 queue_t *qp = stp->sd_wrq;
8286 
8287                 claimstr(qp);
8288                 /* Find next module forward that has a service procedure */
8289                 tq = qp->q_next->q_nfsrv;
8290                 ASSERT(tq != NULL);
8291 
8292                 if (polllock(&stp->sd_pollist, QLOCK(tq)) != 0) {
8293                         releasestr(qp);
8294                         *reventsp = POLLNVAL;
8295                         return (0);
8296                 }
8297                 if (events & POLLWRNORM) {
8298                         queue_t *sqp;
8299 
8300                         if (tq->q_flag & QFULL)
8301                                 /* ensure backq svc procedure runs */
8302                                 tq->q_flag |= QWANTW;
8303                         else if ((sqp = stp->sd_struiowrq) != NULL) {
8304                                 /* Check sync stream barrier write q */
8305                                 mutex_exit(QLOCK(tq));
8306                                 if (polllock(&stp->sd_pollist,
8307                                     QLOCK(sqp)) != 0) {
8308                                         releasestr(qp);
8309                                         *reventsp = POLLNVAL;
8310                                         return (0);
8311                                 }
8312                                 if (sqp->q_flag & QFULL)
8313                                         /* ensure pollwakeup() is done */
8314                                         sqp->q_flag |= QWANTWSYNC;
8315                                 else
8316                                         retevents |= POLLOUT;
8317                                 /* More write events to process ??? */
8318                                 if (! (events & POLLWRBAND)) {
8319                                         mutex_exit(QLOCK(sqp));
8320                                         releasestr(qp);
8321                                         goto chkrd;
8322                                 }
8323                                 mutex_exit(QLOCK(sqp));
8324                                 if (polllock(&stp->sd_pollist,
8325                                     QLOCK(tq)) != 0) {
8326                                         releasestr(qp);
8327                                         *reventsp = POLLNVAL;
8328                                         return (0);
8329                                 }
8330                         } else
8331                                 retevents |= POLLOUT;
8332                 }
8333                 if (events & POLLWRBAND) {
8334                         qbp = tq->q_bandp;
8335                         if (qbp) {
8336                                 while (qbp) {
8337                                         if (qbp->qb_flag & QB_FULL)
8338                                                 qbp->qb_flag |= QB_WANTW;
8339                                         else
8340                                                 retevents |= POLLWRBAND;
8341                                         qbp = qbp->qb_next;
8342                                 }
8343                         } else {
8344                                 retevents |= POLLWRBAND;
8345                         }
8346                 }
8347                 mutex_exit(QLOCK(tq));
8348                 releasestr(qp);
8349         }
8350 chkrd:
8351         if (sd_flags & STRPRI) {
8352                 retevents |= (events & POLLPRI);
8353         } else if (events & (POLLRDNORM | POLLRDBAND | POLLIN)) {
8354                 queue_t *qp = _RD(stp->sd_wrq);
8355                 int normevents = (events & (POLLIN | POLLRDNORM));
8356 
8357                 /*
8358                  * Note: Need to do polllock() here since ps_lock may be
8359                  * held. See bug 4191544.
8360                  */
8361                 if (polllock(&stp->sd_pollist, &stp->sd_lock) != 0) {
8362                         *reventsp = POLLNVAL;
8363                         return (0);
8364                 }
8365                 headlocked = 1;
8366                 mp = qp->q_first;
8367                 while (mp) {
8368                         /*
8369                          * For POLLRDDATA we scan b_cont and b_next until we
8370                          * find an M_DATA.
8371                          */
8372                         if ((events & POLLRDDATA) &&
8373                             mp->b_datap->db_type != M_DATA) {
8374                                 mblk_t *nmp = mp->b_cont;
8375 
8376                                 while (nmp != NULL &&
8377                                     nmp->b_datap->db_type != M_DATA)
8378                                         nmp = nmp->b_cont;
8379                                 if (nmp == NULL) {
8380                                         mp = mp->b_next;
8381                                         continue;
8382                                 }
8383                         }
8384                         if (mp->b_band == 0)
8385                                 retevents |= normevents;
8386                         else
8387                                 retevents |= (events & (POLLIN | POLLRDBAND));
8388                         break;
8389                 }
8390                 if (!(retevents & normevents) && (stp->sd_wakeq & RSLEEP)) {
8391                         /*
8392                          * Sync stream barrier read queue has data.
8393                          */
8394                         retevents |= normevents;
8395                 }
8396                 /* Treat eof as normal data */
8397                 if (sd_flags & STREOF)
8398                         retevents |= normevents;
8399         }
8400 
8401         /*
8402          * Pass back a pollhead if no events are pending or if edge-triggering
8403          * has been configured on this resource.
8404          */
8405         if ((retevents == 0 && !anyyet) || (events & POLLET)) {
8406                 *phpp = &stp->sd_pollist;
8407                 if (headlocked == 0) {
8408                         if (polllock(&stp->sd_pollist, &stp->sd_lock) != 0) {
8409                                 *reventsp = POLLNVAL;
8410                                 return (0);
8411                         }
8412                         headlocked = 1;
8413                 }
8414                 stp->sd_rput_opt |= SR_POLLIN;
8415         }
8416 
8417         *reventsp = (short)retevents;
8418         if (headlocked)
8419                 mutex_exit(&stp->sd_lock);
8420         return (0);
8421 }
8422 
8423 /*
8424  * The purpose of putback() is to assure sleeping polls/reads
8425  * are awakened when there are no new messages arriving at the,
8426  * stream head, and a message is placed back on the read queue.
8427  *
8428  * sd_lock must be held when messages are placed back on stream
8429  * head.  (getq() holds sd_lock when it removes messages from
8430  * the queue)
8431  */
8432 
8433 static void
8434 putback(struct stdata *stp, queue_t *q, mblk_t *bp, int band)
8435 {
8436         mblk_t  *qfirst;
8437         ASSERT(MUTEX_HELD(&stp->sd_lock));
8438 
8439         /*
8440          * As a result of lock-step ordering around q_lock and sd_lock,
8441          * it's possible for function calls like putnext() and
8442          * canputnext() to get an inaccurate picture of how much
8443          * data is really being processed at the stream head.
8444          * We only consolidate with existing messages on the queue
8445          * if the length of the message we want to put back is smaller
8446          * than the queue hiwater mark.
8447          */
8448         if ((stp->sd_rput_opt & SR_CONSOL_DATA) &&
8449             (DB_TYPE(bp) == M_DATA) && ((qfirst = q->q_first) != NULL) &&
8450             (DB_TYPE(qfirst) == M_DATA) &&
8451             ((qfirst->b_flag & (MSGMARK|MSGDELIM)) == 0) &&
8452             ((bp->b_flag & (MSGMARK|MSGDELIM|MSGMARKNEXT)) == 0) &&
8453             (mp_cont_len(bp, NULL) < q->q_hiwat)) {
8454                 /*
8455                  * We use the same logic as defined in strrput()
8456                  * but in reverse as we are putting back onto the
8457                  * queue and want to retain byte ordering.
8458                  * Consolidate M_DATA messages with M_DATA ONLY.
8459                  * strrput() allows the consolidation of M_DATA onto
8460                  * M_PROTO | M_PCPROTO but not the other way round.
8461                  *
8462                  * The consolidation does not take place if the message
8463                  * we are returning to the queue is marked with either
8464                  * of the marks or the delim flag or if q_first
8465                  * is marked with MSGMARK. The MSGMARK check is needed to
8466                  * handle the odd semantics of MSGMARK where essentially
8467                  * the whole message is to be treated as marked.
8468                  * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from q_first
8469                  * to the front of the b_cont chain.
8470                  */
8471                 rmvq_noenab(q, qfirst);
8472 
8473                 /*
8474                  * The first message in the b_cont list
8475                  * tracks MSGMARKNEXT and MSGNOTMARKNEXT.
8476                  * We need to handle the case where we
8477                  * are appending:
8478                  *
8479                  * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT.
8480                  * 2) a MSGMARKNEXT to a plain message.
8481                  * 3) a MSGNOTMARKNEXT to a plain message
8482                  * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT
8483                  *    message.
8484                  *
8485                  * Thus we never append a MSGMARKNEXT or
8486                  * MSGNOTMARKNEXT to a MSGMARKNEXT message.
8487                  */
8488                 if (qfirst->b_flag & MSGMARKNEXT) {
8489                         bp->b_flag |= MSGMARKNEXT;
8490                         bp->b_flag &= ~MSGNOTMARKNEXT;
8491                         qfirst->b_flag &= ~MSGMARKNEXT;
8492                 } else if (qfirst->b_flag & MSGNOTMARKNEXT) {
8493                         bp->b_flag |= MSGNOTMARKNEXT;
8494                         qfirst->b_flag &= ~MSGNOTMARKNEXT;
8495                 }
8496 
8497                 linkb(bp, qfirst);
8498         }
8499         (void) putbq(q, bp);
8500 
8501         /*
8502          * A message may have come in when the sd_lock was dropped in the
8503          * calling routine. If this is the case and STR*ATMARK info was
8504          * received, need to move that from the stream head to the q_last
8505          * so that SIOCATMARK can return the proper value.
8506          */
8507         if (stp->sd_flag & (STRATMARK | STRNOTATMARK)) {
8508                 unsigned short *flagp = &q->q_last->b_flag;
8509                 uint_t b_flag = (uint_t)*flagp;
8510 
8511                 if (stp->sd_flag & STRATMARK) {
8512                         b_flag &= ~MSGNOTMARKNEXT;
8513                         b_flag |= MSGMARKNEXT;
8514                         stp->sd_flag &= ~STRATMARK;
8515                 } else {
8516                         b_flag &= ~MSGMARKNEXT;
8517                         b_flag |= MSGNOTMARKNEXT;
8518                         stp->sd_flag &= ~STRNOTATMARK;
8519                 }
8520                 *flagp = (unsigned short) b_flag;
8521         }
8522 
8523 #ifdef  DEBUG
8524         /*
8525          * Make sure that the flags are not messed up.
8526          */
8527         {
8528                 mblk_t *mp;
8529                 mp = q->q_last;
8530                 while (mp != NULL) {
8531                         ASSERT((mp->b_flag & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
8532                             (MSGMARKNEXT|MSGNOTMARKNEXT));
8533                         mp = mp->b_cont;
8534                 }
8535         }
8536 #endif
8537         if (q->q_first == bp) {
8538                 short pollevents;
8539 
8540                 if (stp->sd_flag & RSLEEP) {
8541                         stp->sd_flag &= ~RSLEEP;
8542                         cv_broadcast(&q->q_wait);
8543                 }
8544                 if (stp->sd_flag & STRPRI) {
8545                         pollevents = POLLPRI;
8546                 } else {
8547                         if (band == 0) {
8548                                 if (!(stp->sd_rput_opt & SR_POLLIN))
8549                                         return;
8550                                 stp->sd_rput_opt &= ~SR_POLLIN;
8551                                 pollevents = POLLIN | POLLRDNORM;
8552                         } else {
8553                                 pollevents = POLLIN | POLLRDBAND;
8554                         }
8555                 }
8556                 mutex_exit(&stp->sd_lock);
8557                 pollwakeup(&stp->sd_pollist, pollevents);
8558                 mutex_enter(&stp->sd_lock);
8559         }
8560 }
8561 
8562 /*
8563  * Return the held vnode attached to the stream head of a
8564  * given queue
8565  * It is the responsibility of the calling routine to ensure
8566  * that the queue does not go away (e.g. pop).
8567  */
8568 vnode_t *
8569 strq2vp(queue_t *qp)
8570 {
8571         vnode_t *vp;
8572         vp = STREAM(qp)->sd_vnode;
8573         ASSERT(vp != NULL);
8574         VN_HOLD(vp);
8575         return (vp);
8576 }
8577 
8578 /*
8579  * return the stream head write queue for the given vp
8580  * It is the responsibility of the calling routine to ensure
8581  * that the stream or vnode do not close.
8582  */
8583 queue_t *
8584 strvp2wq(vnode_t *vp)
8585 {
8586         ASSERT(vp->v_stream != NULL);
8587         return (vp->v_stream->sd_wrq);
8588 }
8589 
8590 /*
8591  * pollwakeup stream head
8592  * It is the responsibility of the calling routine to ensure
8593  * that the stream or vnode do not close.
8594  */
8595 void
8596 strpollwakeup(vnode_t *vp, short event)
8597 {
8598         ASSERT(vp->v_stream);
8599         pollwakeup(&vp->v_stream->sd_pollist, event);
8600 }
8601 
8602 /*
8603  * Mate the stream heads of two vnodes together. If the two vnodes are the
8604  * same, we just make the write-side point at the read-side -- otherwise,
8605  * we do a full mate.  Only works on vnodes associated with streams that are
8606  * still being built and thus have only a stream head.
8607  */
8608 void
8609 strmate(vnode_t *vp1, vnode_t *vp2)
8610 {
8611         queue_t *wrq1 = strvp2wq(vp1);
8612         queue_t *wrq2 = strvp2wq(vp2);
8613 
8614         /*
8615          * Verify that there are no modules on the stream yet.  We also
8616          * rely on the stream head always having a service procedure to
8617          * avoid tweaking q_nfsrv.
8618          */
8619         ASSERT(wrq1->q_next == NULL && wrq2->q_next == NULL);
8620         ASSERT(wrq1->q_qinfo->qi_srvp != NULL);
8621         ASSERT(wrq2->q_qinfo->qi_srvp != NULL);
8622 
8623         /*
8624          * If the queues are the same, just twist; otherwise do a full mate.
8625          */
8626         if (wrq1 == wrq2) {
8627                 wrq1->q_next = _RD(wrq1);
8628         } else {
8629                 wrq1->q_next = _RD(wrq2);
8630                 wrq2->q_next = _RD(wrq1);
8631                 STREAM(wrq1)->sd_mate = STREAM(wrq2);
8632                 STREAM(wrq1)->sd_flag |= STRMATE;
8633                 STREAM(wrq2)->sd_mate = STREAM(wrq1);
8634                 STREAM(wrq2)->sd_flag |= STRMATE;
8635         }
8636 }
8637 
8638 /*
8639  * XXX will go away when console is correctly fixed.
8640  * Clean up the console PIDS, from previous I_SETSIG,
8641  * called only for cnopen which never calls strclean().
8642  */
8643 void
8644 str_cn_clean(struct vnode *vp)
8645 {
8646         strsig_t *ssp, *pssp, *tssp;
8647         struct stdata *stp;
8648         struct pid  *pidp;
8649         int update = 0;
8650 
8651         ASSERT(vp->v_stream);
8652         stp = vp->v_stream;
8653         pssp = NULL;
8654         mutex_enter(&stp->sd_lock);
8655         ssp = stp->sd_siglist;
8656         while (ssp) {
8657                 mutex_enter(&pidlock);
8658                 pidp = ssp->ss_pidp;
8659                 /*
8660                  * Get rid of PID if the proc is gone.
8661                  */
8662                 if (pidp->pid_prinactive) {
8663                         tssp = ssp->ss_next;
8664                         if (pssp)
8665                                 pssp->ss_next = tssp;
8666                         else
8667                                 stp->sd_siglist = tssp;
8668                         ASSERT(pidp->pid_ref <= 1);
8669                         PID_RELE(ssp->ss_pidp);
8670                         mutex_exit(&pidlock);
8671                         kmem_free(ssp, sizeof (strsig_t));
8672                         update = 1;
8673                         ssp = tssp;
8674                         continue;
8675                 } else
8676                         mutex_exit(&pidlock);
8677                 pssp = ssp;
8678                 ssp = ssp->ss_next;
8679         }
8680         if (update) {
8681                 stp->sd_sigflags = 0;
8682                 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
8683                         stp->sd_sigflags |= ssp->ss_events;
8684         }
8685         mutex_exit(&stp->sd_lock);
8686 }
8687 
8688 /*
8689  * Return B_TRUE if there is data in the message, B_FALSE otherwise.
8690  */
8691 static boolean_t
8692 msghasdata(mblk_t *bp)
8693 {
8694         for (; bp; bp = bp->b_cont)
8695                 if (bp->b_datap->db_type == M_DATA) {
8696                         ASSERT(bp->b_wptr >= bp->b_rptr);
8697                         if (bp->b_wptr > bp->b_rptr)
8698                                 return (B_TRUE);
8699                 }
8700         return (B_FALSE);
8701 }