1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
22 /* All Rights Reserved */
23
24
25 /*
26 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
27 * Copyright 2017 Joyent, Inc.
28 * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
29 */
30
31 #include <sys/types.h>
32 #include <sys/sysmacros.h>
33 #include <sys/param.h>
34 #include <sys/errno.h>
35 #include <sys/signal.h>
36 #include <sys/stat.h>
37 #include <sys/proc.h>
38 #include <sys/cred.h>
39 #include <sys/user.h>
40 #include <sys/vnode.h>
41 #include <sys/file.h>
42 #include <sys/stream.h>
43 #include <sys/strsubr.h>
44 #include <sys/stropts.h>
45 #include <sys/tihdr.h>
46 #include <sys/var.h>
47 #include <sys/poll.h>
48 #include <sys/termio.h>
49 #include <sys/ttold.h>
50 #include <sys/systm.h>
51 #include <sys/uio.h>
52 #include <sys/cmn_err.h>
53 #include <sys/sad.h>
54 #include <sys/netstack.h>
55 #include <sys/priocntl.h>
56 #include <sys/jioctl.h>
57 #include <sys/procset.h>
58 #include <sys/session.h>
59 #include <sys/kmem.h>
60 #include <sys/filio.h>
61 #include <sys/vtrace.h>
62 #include <sys/debug.h>
63 #include <sys/strredir.h>
64 #include <sys/fs/fifonode.h>
65 #include <sys/fs/snode.h>
66 #include <sys/strlog.h>
67 #include <sys/strsun.h>
68 #include <sys/project.h>
69 #include <sys/kbio.h>
70 #include <sys/msio.h>
71 #include <sys/tty.h>
72 #include <sys/ptyvar.h>
73 #include <sys/vuid_event.h>
74 #include <sys/modctl.h>
75 #include <sys/sunddi.h>
76 #include <sys/sunldi_impl.h>
77 #include <sys/autoconf.h>
78 #include <sys/policy.h>
79 #include <sys/dld.h>
80 #include <sys/zone.h>
81 #include <sys/limits.h>
82 #include <c2/audit.h>
83
84 /*
85 * This define helps improve the readability of streams code while
86 * still maintaining a very old streams performance enhancement. The
87 * performance enhancement basically involved having all callers
88 * of straccess() perform the first check that straccess() will do
89 * locally before actually calling straccess(). (There by reducing
90 * the number of unnecessary calls to straccess().)
91 */
92 #define i_straccess(x, y) ((stp->sd_sidp == NULL) ? 0 : \
93 (stp->sd_vnode->v_type == VFIFO) ? 0 : \
94 straccess((x), (y)))
95
96 /*
97 * what is mblk_pull_len?
98 *
99 * If a streams message consists of many short messages,
100 * a performance degradation occurs from copyout overhead.
101 * To decrease the per mblk overhead, messages that are
102 * likely to consist of many small mblks are pulled up into
103 * one continuous chunk of memory.
104 *
105 * To avoid the processing overhead of examining every
106 * mblk, a quick heuristic is used. If the first mblk in
107 * the message is shorter than mblk_pull_len, it is likely
108 * that the rest of the mblk will be short.
109 *
110 * This heuristic was decided upon after performance tests
111 * indicated that anything more complex slowed down the main
112 * code path.
113 */
114 #define MBLK_PULL_LEN 64
115 uint32_t mblk_pull_len = MBLK_PULL_LEN;
116
117 /*
118 * The sgttyb_handling flag controls the handling of the old BSD
119 * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows:
120 *
121 * 0 - Emit no warnings at all and retain old, broken behavior.
122 * 1 - Emit no warnings and silently handle new semantics.
123 * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used
124 * (once per system invocation). Handle with new semantics.
125 * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is
126 * made (so that offenders drop core and are easy to debug).
127 *
128 * The "new semantics" are that TIOCGETP returns B38400 for
129 * sg_[io]speed if the corresponding value is over B38400, and that
130 * TIOCSET[PN] accept B38400 in these cases to mean "retain current
131 * bit rate."
132 */
133 int sgttyb_handling = 1;
134 static boolean_t sgttyb_complaint;
135
136 /* don't push drcompat module by default on Style-2 streams */
137 static int push_drcompat = 0;
138
139 /*
140 * id value used to distinguish between different ioctl messages
141 */
142 static uint32_t ioc_id;
143
144 static void putback(struct stdata *, queue_t *, mblk_t *, int);
145 static void strcleanall(struct vnode *);
146 static int strwsrv(queue_t *);
147 static int strdocmd(struct stdata *, struct strcmd *, cred_t *);
148
149 /*
150 * qinit and module_info structures for stream head read and write queues
151 */
152 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW };
153 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 };
154 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info };
155 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info };
156 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT,
157 FIFOLOWAT };
158 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 };
159 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info };
160 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info };
161
162 extern kmutex_t strresources; /* protects global resources */
163 extern kmutex_t muxifier; /* single-threads multiplexor creation */
164
165 static boolean_t msghasdata(mblk_t *bp);
166 #define msgnodata(bp) (!msghasdata(bp))
167
168 /*
169 * Stream head locking notes:
170 * There are four monitors associated with the stream head:
171 * 1. v_stream monitor: in stropen() and strclose() v_lock
172 * is held while the association of vnode and stream
173 * head is established or tested for.
174 * 2. open/close/push/pop monitor: sd_lock is held while each
175 * thread bids for exclusive access to this monitor
176 * for opening or closing a stream. In addition, this
177 * monitor is entered during pushes and pops. This
178 * guarantees that during plumbing operations there
179 * is only one thread trying to change the plumbing.
180 * Any other threads present in the stream are only
181 * using the plumbing.
182 * 3. read/write monitor: in the case of read, a thread holds
183 * sd_lock while trying to get data from the stream
184 * head queue. if there is none to fulfill a read
185 * request, it sets RSLEEP and calls cv_wait_sig() down
186 * in strwaitq() to await the arrival of new data.
187 * when new data arrives in strrput(), sd_lock is acquired
188 * before testing for RSLEEP and calling cv_broadcast().
189 * the behavior of strwrite(), strwsrv(), and WSLEEP
190 * mirror this.
191 * 4. ioctl monitor: sd_lock is gotten to ensure that only one
192 * thread is doing an ioctl at a time.
193 */
194
195 static int
196 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name,
197 int anchor, cred_t *crp, uint_t anchor_zoneid)
198 {
199 int error;
200 fmodsw_impl_t *fp;
201
202 if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) {
203 error = (stp->sd_flag & STRHUP) ? ENXIO : EIO;
204 return (error);
205 }
206 if (stp->sd_pushcnt >= nstrpush) {
207 return (EINVAL);
208 }
209
210 if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) {
211 stp->sd_flag |= STREOPENFAIL;
212 return (EINVAL);
213 }
214
215 /*
216 * push new module and call its open routine via qattach
217 */
218 if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0)
219 return (error);
220
221 /*
222 * Check to see if caller wants a STREAMS anchor
223 * put at this place in the stream, and add if so.
224 */
225 mutex_enter(&stp->sd_lock);
226 if (anchor == stp->sd_pushcnt) {
227 stp->sd_anchor = stp->sd_pushcnt;
228 stp->sd_anchorzone = anchor_zoneid;
229 }
230 mutex_exit(&stp->sd_lock);
231
232 return (0);
233 }
234
235 /*
236 * Open a stream device.
237 */
238 int
239 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp)
240 {
241 struct stdata *stp;
242 queue_t *qp;
243 int s;
244 dev_t dummydev, savedev;
245 struct autopush *ap;
246 struct dlautopush dlap;
247 int error = 0;
248 ssize_t rmin, rmax;
249 int cloneopen;
250 queue_t *brq;
251 major_t major;
252 str_stack_t *ss;
253 zoneid_t zoneid;
254 uint_t anchor;
255
256 /*
257 * If the stream already exists, wait for any open in progress
258 * to complete, then call the open function of each module and
259 * driver in the stream. Otherwise create the stream.
260 */
261 TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp);
262 retry:
263 mutex_enter(&vp->v_lock);
264 if ((stp = vp->v_stream) != NULL) {
265
266 /*
267 * Waiting for stream to be created to device
268 * due to another open.
269 */
270 mutex_exit(&vp->v_lock);
271
272 if (STRMATED(stp)) {
273 struct stdata *strmatep = stp->sd_mate;
274
275 STRLOCKMATES(stp);
276 if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
277 if (flag & (FNDELAY|FNONBLOCK)) {
278 error = EAGAIN;
279 mutex_exit(&strmatep->sd_lock);
280 goto ckreturn;
281 }
282 mutex_exit(&stp->sd_lock);
283 if (!cv_wait_sig(&strmatep->sd_monitor,
284 &strmatep->sd_lock)) {
285 error = EINTR;
286 mutex_exit(&strmatep->sd_lock);
287 mutex_enter(&stp->sd_lock);
288 goto ckreturn;
289 }
290 mutex_exit(&strmatep->sd_lock);
291 goto retry;
292 }
293 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
294 if (flag & (FNDELAY|FNONBLOCK)) {
295 error = EAGAIN;
296 mutex_exit(&strmatep->sd_lock);
297 goto ckreturn;
298 }
299 mutex_exit(&strmatep->sd_lock);
300 if (!cv_wait_sig(&stp->sd_monitor,
301 &stp->sd_lock)) {
302 error = EINTR;
303 goto ckreturn;
304 }
305 mutex_exit(&stp->sd_lock);
306 goto retry;
307 }
308
309 if (stp->sd_flag & (STRDERR|STWRERR)) {
310 error = EIO;
311 mutex_exit(&strmatep->sd_lock);
312 goto ckreturn;
313 }
314
315 stp->sd_flag |= STWOPEN;
316 STRUNLOCKMATES(stp);
317 } else {
318 mutex_enter(&stp->sd_lock);
319 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
320 if (flag & (FNDELAY|FNONBLOCK)) {
321 error = EAGAIN;
322 goto ckreturn;
323 }
324 if (!cv_wait_sig(&stp->sd_monitor,
325 &stp->sd_lock)) {
326 error = EINTR;
327 goto ckreturn;
328 }
329 mutex_exit(&stp->sd_lock);
330 goto retry; /* could be clone! */
331 }
332
333 if (stp->sd_flag & (STRDERR|STWRERR)) {
334 error = EIO;
335 goto ckreturn;
336 }
337
338 stp->sd_flag |= STWOPEN;
339 mutex_exit(&stp->sd_lock);
340 }
341
342 /*
343 * Open all modules and devices down stream to notify
344 * that another user is streaming. For modules, set the
345 * last argument to MODOPEN and do not pass any open flags.
346 * Ignore dummydev since this is not the first open.
347 */
348 claimstr(stp->sd_wrq);
349 qp = stp->sd_wrq;
350 while (_SAMESTR(qp)) {
351 qp = qp->q_next;
352 if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0)
353 break;
354 }
355 releasestr(stp->sd_wrq);
356 mutex_enter(&stp->sd_lock);
357 stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR);
358 stp->sd_rerror = 0;
359 stp->sd_werror = 0;
360 ckreturn:
361 cv_broadcast(&stp->sd_monitor);
362 mutex_exit(&stp->sd_lock);
363 return (error);
364 }
365
366 /*
367 * This vnode isn't streaming. SPECFS already
368 * checked for multiple vnodes pointing to the
369 * same stream, so create a stream to the driver.
370 */
371 qp = allocq();
372 stp = shalloc(qp);
373
374 /*
375 * Initialize stream head. shalloc() has given us
376 * exclusive access, and we have the vnode locked;
377 * we can do whatever we want with stp.
378 */
379 stp->sd_flag = STWOPEN;
380 stp->sd_siglist = NULL;
381 stp->sd_pollist.ph_list = NULL;
382 stp->sd_sigflags = 0;
383 stp->sd_mark = NULL;
384 stp->sd_closetime = STRTIMOUT;
385 stp->sd_sidp = NULL;
386 stp->sd_pgidp = NULL;
387 stp->sd_vnode = vp;
388 stp->sd_pvnode = NULL;
389 stp->sd_rerror = 0;
390 stp->sd_werror = 0;
391 stp->sd_wroff = 0;
392 stp->sd_tail = 0;
393 stp->sd_iocblk = NULL;
394 stp->sd_cmdblk = NULL;
395 stp->sd_pushcnt = 0;
396 stp->sd_qn_minpsz = 0;
397 stp->sd_qn_maxpsz = INFPSZ - 1; /* used to check for initialization */
398 stp->sd_maxblk = INFPSZ;
399 qp->q_ptr = _WR(qp)->q_ptr = stp;
400 STREAM(qp) = STREAM(_WR(qp)) = stp;
401 vp->v_stream = stp;
402 mutex_exit(&vp->v_lock);
403 if (vp->v_type == VFIFO) {
404 stp->sd_flag |= OLDNDELAY;
405 /*
406 * This means, both for pipes and fifos
407 * strwrite will send SIGPIPE if the other
408 * end is closed. For putmsg it depends
409 * on whether it is a XPG4_2 application
410 * or not
411 */
412 stp->sd_wput_opt = SW_SIGPIPE;
413
414 /* setq might sleep in kmem_alloc - avoid holding locks. */
415 setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE,
416 SQ_CI|SQ_CO, B_FALSE);
417
418 set_qend(qp);
419 stp->sd_strtab = fifo_getinfo();
420 _WR(qp)->q_nfsrv = _WR(qp);
421 qp->q_nfsrv = qp;
422 /*
423 * Wake up others that are waiting for stream to be created.
424 */
425 mutex_enter(&stp->sd_lock);
426 /*
427 * nothing is be pushed on stream yet, so
428 * optimized stream head packetsizes are just that
429 * of the read queue
430 */
431 stp->sd_qn_minpsz = qp->q_minpsz;
432 stp->sd_qn_maxpsz = qp->q_maxpsz;
433 stp->sd_flag &= ~STWOPEN;
434 goto fifo_opendone;
435 }
436 /* setq might sleep in kmem_alloc - avoid holding locks. */
437 setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE);
438
439 set_qend(qp);
440
441 /*
442 * Open driver and create stream to it (via qattach).
443 */
444 savedev = *devp;
445 cloneopen = (getmajor(*devp) == clone_major);
446 if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) {
447 mutex_enter(&vp->v_lock);
448 vp->v_stream = NULL;
449 mutex_exit(&vp->v_lock);
450 mutex_enter(&stp->sd_lock);
451 cv_broadcast(&stp->sd_monitor);
452 mutex_exit(&stp->sd_lock);
453 freeq(_RD(qp));
454 shfree(stp);
455 return (error);
456 }
457 /*
458 * Set sd_strtab after open in order to handle clonable drivers
459 */
460 stp->sd_strtab = STREAMSTAB(getmajor(*devp));
461
462 /*
463 * Historical note: dummydev used to be be prior to the initial
464 * open (via qattach above), which made the value seen
465 * inconsistent between an I_PUSH and an autopush of a module.
466 */
467 dummydev = *devp;
468
469 /*
470 * For clone open of old style (Q not associated) network driver,
471 * push DRMODNAME module to handle DL_ATTACH/DL_DETACH
472 */
473 brq = _RD(_WR(qp)->q_next);
474 major = getmajor(*devp);
475 if (push_drcompat && cloneopen && NETWORK_DRV(major) &&
476 ((brq->q_flag & _QASSOCIATED) == 0)) {
477 if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp, 0) != 0)
478 cmn_err(CE_WARN, "cannot push " DRMODNAME
479 " streams module");
480 }
481
482 if (!NETWORK_DRV(major)) {
483 savedev = *devp;
484 } else {
485 /*
486 * For network devices, process differently based on the
487 * return value from dld_autopush():
488 *
489 * 0: the passed-in device points to a GLDv3 datalink with
490 * per-link autopush configuration; use that configuration
491 * and ignore any per-driver autopush configuration.
492 *
493 * 1: the passed-in device points to a physical GLDv3
494 * datalink without per-link autopush configuration. The
495 * passed in device was changed to refer to the actual
496 * physical device (if it's not already); we use that new
497 * device to look up any per-driver autopush configuration.
498 *
499 * -1: neither of the above cases applied; use the initial
500 * device to look up any per-driver autopush configuration.
501 */
502 switch (dld_autopush(&savedev, &dlap)) {
503 case 0:
504 zoneid = crgetzoneid(crp);
505 for (s = 0; s < dlap.dap_npush; s++) {
506 error = push_mod(qp, &dummydev, stp,
507 dlap.dap_aplist[s], dlap.dap_anchor, crp,
508 zoneid);
509 if (error != 0)
510 break;
511 }
512 goto opendone;
513 case 1:
514 break;
515 case -1:
516 savedev = *devp;
517 break;
518 }
519 }
520 /*
521 * Find the autopush configuration based on "savedev". Start with the
522 * global zone. If not found check in the local zone.
523 */
524 zoneid = GLOBAL_ZONEID;
525 retryap:
526 ss = netstack_find_by_stackid(zoneid_to_netstackid(zoneid))->
527 netstack_str;
528 if ((ap = sad_ap_find_by_dev(savedev, ss)) == NULL) {
529 netstack_rele(ss->ss_netstack);
530 if (zoneid == GLOBAL_ZONEID) {
531 /*
532 * None found. Also look in the zone's autopush table.
533 */
534 zoneid = crgetzoneid(crp);
535 if (zoneid != GLOBAL_ZONEID)
536 goto retryap;
537 }
538 goto opendone;
539 }
540 anchor = ap->ap_anchor;
541 zoneid = crgetzoneid(crp);
542 for (s = 0; s < ap->ap_npush; s++) {
543 error = push_mod(qp, &dummydev, stp, ap->ap_list[s],
544 anchor, crp, zoneid);
545 if (error != 0)
546 break;
547 }
548 sad_ap_rele(ap, ss);
549 netstack_rele(ss->ss_netstack);
550
551 opendone:
552
553 /*
554 * let specfs know that open failed part way through
555 */
556 if (error) {
557 mutex_enter(&stp->sd_lock);
558 stp->sd_flag |= STREOPENFAIL;
559 mutex_exit(&stp->sd_lock);
560 }
561
562 /*
563 * Wake up others that are waiting for stream to be created.
564 */
565 mutex_enter(&stp->sd_lock);
566 stp->sd_flag &= ~STWOPEN;
567
568 /*
569 * As a performance concern we are caching the values of
570 * q_minpsz and q_maxpsz of the module below the stream
571 * head in the stream head.
572 */
573 mutex_enter(QLOCK(stp->sd_wrq->q_next));
574 rmin = stp->sd_wrq->q_next->q_minpsz;
575 rmax = stp->sd_wrq->q_next->q_maxpsz;
576 mutex_exit(QLOCK(stp->sd_wrq->q_next));
577
578 /* do this processing here as a performance concern */
579 if (strmsgsz != 0) {
580 if (rmax == INFPSZ)
581 rmax = strmsgsz;
582 else
583 rmax = MIN(strmsgsz, rmax);
584 }
585
586 mutex_enter(QLOCK(stp->sd_wrq));
587 stp->sd_qn_minpsz = rmin;
588 stp->sd_qn_maxpsz = rmax;
589 mutex_exit(QLOCK(stp->sd_wrq));
590
591 fifo_opendone:
592 cv_broadcast(&stp->sd_monitor);
593 mutex_exit(&stp->sd_lock);
594 return (error);
595 }
596
597 static int strsink(queue_t *, mblk_t *);
598 static struct qinit deadrend = {
599 strsink, NULL, NULL, NULL, NULL, &strm_info, NULL
600 };
601 static struct qinit deadwend = {
602 NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL
603 };
604
605 /*
606 * Close a stream.
607 * This is called from closef() on the last close of an open stream.
608 * Strclean() will already have removed the siglist and pollist
609 * information, so all that remains is to remove all multiplexor links
610 * for the stream, pop all the modules (and the driver), and free the
611 * stream structure.
612 */
613
614 int
615 strclose(struct vnode *vp, int flag, cred_t *crp)
616 {
617 struct stdata *stp;
618 queue_t *qp;
619 int rval;
620 int freestp = 1;
621 queue_t *rmq;
622
623 TRACE_1(TR_FAC_STREAMS_FR,
624 TR_STRCLOSE, "strclose:%p", vp);
625 ASSERT(vp->v_stream);
626
627 stp = vp->v_stream;
628 ASSERT(!(stp->sd_flag & STPLEX));
629 qp = stp->sd_wrq;
630
631 /*
632 * Needed so that strpoll will return non-zero for this fd.
633 * Note that with POLLNOERR STRHUP does still cause POLLHUP.
634 */
635 mutex_enter(&stp->sd_lock);
636 stp->sd_flag |= STRHUP;
637 mutex_exit(&stp->sd_lock);
638
639 /*
640 * If the registered process or process group did not have an
641 * open instance of this stream then strclean would not be
642 * called. Thus at the time of closing all remaining siglist entries
643 * are removed.
644 */
645 if (stp->sd_siglist != NULL)
646 strcleanall(vp);
647
648 ASSERT(stp->sd_siglist == NULL);
649 ASSERT(stp->sd_sigflags == 0);
650
651 if (STRMATED(stp)) {
652 struct stdata *strmatep = stp->sd_mate;
653 int waited = 1;
654
655 STRLOCKMATES(stp);
656 while (waited) {
657 waited = 0;
658 while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
659 mutex_exit(&strmatep->sd_lock);
660 cv_wait(&stp->sd_monitor, &stp->sd_lock);
661 mutex_exit(&stp->sd_lock);
662 STRLOCKMATES(stp);
663 waited = 1;
664 }
665 while (strmatep->sd_flag &
666 (STWOPEN|STRCLOSE|STRPLUMB)) {
667 mutex_exit(&stp->sd_lock);
668 cv_wait(&strmatep->sd_monitor,
669 &strmatep->sd_lock);
670 mutex_exit(&strmatep->sd_lock);
671 STRLOCKMATES(stp);
672 waited = 1;
673 }
674 }
675 stp->sd_flag |= STRCLOSE;
676 STRUNLOCKMATES(stp);
677 } else {
678 mutex_enter(&stp->sd_lock);
679 stp->sd_flag |= STRCLOSE;
680 mutex_exit(&stp->sd_lock);
681 }
682
683 ASSERT(qp->q_first == NULL); /* No more delayed write */
684
685 /* Check if an I_LINK was ever done on this stream */
686 if (stp->sd_flag & STRHASLINKS) {
687 netstack_t *ns;
688 str_stack_t *ss;
689
690 ns = netstack_find_by_cred(crp);
691 ASSERT(ns != NULL);
692 ss = ns->netstack_str;
693 ASSERT(ss != NULL);
694
695 (void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval, ss);
696 netstack_rele(ss->ss_netstack);
697 }
698
699 while (_SAMESTR(qp)) {
700 /*
701 * Holding sd_lock prevents q_next from changing in
702 * this stream.
703 */
704 mutex_enter(&stp->sd_lock);
705 if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) {
706
707 /*
708 * sleep until awakened by strwsrv() or timeout
709 */
710 for (;;) {
711 mutex_enter(QLOCK(qp->q_next));
712 if (!(qp->q_next->q_mblkcnt)) {
713 mutex_exit(QLOCK(qp->q_next));
714 break;
715 }
716 stp->sd_flag |= WSLEEP;
717
718 /* ensure strwsrv gets enabled */
719 qp->q_next->q_flag |= QWANTW;
720 mutex_exit(QLOCK(qp->q_next));
721 /* get out if we timed out or recv'd a signal */
722 if (str_cv_wait(&qp->q_wait, &stp->sd_lock,
723 stp->sd_closetime, 0) <= 0) {
724 break;
725 }
726 }
727 stp->sd_flag &= ~WSLEEP;
728 }
729 mutex_exit(&stp->sd_lock);
730
731 rmq = qp->q_next;
732 if (rmq->q_flag & QISDRV) {
733 ASSERT(!_SAMESTR(rmq));
734 wait_sq_svc(_RD(qp)->q_syncq);
735 }
736
737 qdetach(_RD(rmq), 1, flag, crp, B_FALSE);
738 }
739
740 /*
741 * Since we call pollwakeup in close() now, the poll list should
742 * be empty in most cases. The only exception is the layered devices
743 * (e.g. the console drivers with redirection modules pushed on top
744 * of it). We have to do this after calling qdetach() because
745 * the redirection module won't have torn down the console
746 * redirection until after qdetach() has been invoked.
747 */
748 if (stp->sd_pollist.ph_list != NULL) {
749 pollwakeup(&stp->sd_pollist, POLLERR);
750 pollhead_clean(&stp->sd_pollist);
751 }
752 ASSERT(stp->sd_pollist.ph_list == NULL);
753 ASSERT(stp->sd_sidp == NULL);
754 ASSERT(stp->sd_pgidp == NULL);
755
756 /* Prevent qenable from re-enabling the stream head queue */
757 disable_svc(_RD(qp));
758
759 /*
760 * Wait until service procedure of each queue is
761 * run, if QINSERVICE is set.
762 */
763 wait_svc(_RD(qp));
764
765 /*
766 * Now, flush both queues.
767 */
768 flushq(_RD(qp), FLUSHALL);
769 flushq(qp, FLUSHALL);
770
771 /*
772 * If the write queue of the stream head is pointing to a
773 * read queue, we have a twisted stream. If the read queue
774 * is alive, convert the stream head queues into a dead end.
775 * If the read queue is dead, free the dead pair.
776 */
777 if (qp->q_next && !_SAMESTR(qp)) {
778 if (qp->q_next->q_qinfo == &deadrend) { /* half-closed pipe */
779 flushq(qp->q_next, FLUSHALL); /* ensure no message */
780 shfree(qp->q_next->q_stream);
781 freeq(qp->q_next);
782 freeq(_RD(qp));
783 } else if (qp->q_next == _RD(qp)) { /* fifo */
784 freeq(_RD(qp));
785 } else { /* pipe */
786 freestp = 0;
787 /*
788 * The q_info pointers are never accessed when
789 * SQLOCK is held.
790 */
791 ASSERT(qp->q_syncq == _RD(qp)->q_syncq);
792 mutex_enter(SQLOCK(qp->q_syncq));
793 qp->q_qinfo = &deadwend;
794 _RD(qp)->q_qinfo = &deadrend;
795 mutex_exit(SQLOCK(qp->q_syncq));
796 }
797 } else {
798 freeq(_RD(qp)); /* free stream head queue pair */
799 }
800
801 mutex_enter(&vp->v_lock);
802 if (stp->sd_iocblk) {
803 if (stp->sd_iocblk != (mblk_t *)-1) {
804 freemsg(stp->sd_iocblk);
805 }
806 stp->sd_iocblk = NULL;
807 }
808 stp->sd_vnode = stp->sd_pvnode = NULL;
809 vp->v_stream = NULL;
810 mutex_exit(&vp->v_lock);
811 mutex_enter(&stp->sd_lock);
812 freemsg(stp->sd_cmdblk);
813 stp->sd_cmdblk = NULL;
814 stp->sd_flag &= ~STRCLOSE;
815 cv_broadcast(&stp->sd_monitor);
816 mutex_exit(&stp->sd_lock);
817
818 if (freestp)
819 shfree(stp);
820 return (0);
821 }
822
823 static int
824 strsink(queue_t *q, mblk_t *bp)
825 {
826 struct copyresp *resp;
827
828 switch (bp->b_datap->db_type) {
829 case M_FLUSH:
830 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
831 *bp->b_rptr &= ~FLUSHR;
832 bp->b_flag |= MSGNOLOOP;
833 /*
834 * Protect against the driver passing up
835 * messages after it has done a qprocsoff.
836 */
837 if (_OTHERQ(q)->q_next == NULL)
838 freemsg(bp);
839 else
840 qreply(q, bp);
841 } else {
842 freemsg(bp);
843 }
844 break;
845
846 case M_COPYIN:
847 case M_COPYOUT:
848 if (bp->b_cont) {
849 freemsg(bp->b_cont);
850 bp->b_cont = NULL;
851 }
852 bp->b_datap->db_type = M_IOCDATA;
853 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
854 resp = (struct copyresp *)bp->b_rptr;
855 resp->cp_rval = (caddr_t)1; /* failure */
856 /*
857 * Protect against the driver passing up
858 * messages after it has done a qprocsoff.
859 */
860 if (_OTHERQ(q)->q_next == NULL)
861 freemsg(bp);
862 else
863 qreply(q, bp);
864 break;
865
866 case M_IOCTL:
867 if (bp->b_cont) {
868 freemsg(bp->b_cont);
869 bp->b_cont = NULL;
870 }
871 bp->b_datap->db_type = M_IOCNAK;
872 /*
873 * Protect against the driver passing up
874 * messages after it has done a qprocsoff.
875 */
876 if (_OTHERQ(q)->q_next == NULL)
877 freemsg(bp);
878 else
879 qreply(q, bp);
880 break;
881
882 default:
883 freemsg(bp);
884 break;
885 }
886
887 return (0);
888 }
889
890 /*
891 * Clean up after a process when it closes a stream. This is called
892 * from closef for all closes, whereas strclose is called only for the
893 * last close on a stream. The siglist is scanned for entries for the
894 * current process, and these are removed.
895 */
896 void
897 strclean(struct vnode *vp)
898 {
899 strsig_t *ssp, *pssp, *tssp;
900 stdata_t *stp;
901 int update = 0;
902
903 TRACE_1(TR_FAC_STREAMS_FR,
904 TR_STRCLEAN, "strclean:%p", vp);
905 stp = vp->v_stream;
906 pssp = NULL;
907 mutex_enter(&stp->sd_lock);
908 ssp = stp->sd_siglist;
909 while (ssp) {
910 if (ssp->ss_pidp == curproc->p_pidp) {
911 tssp = ssp->ss_next;
912 if (pssp)
913 pssp->ss_next = tssp;
914 else
915 stp->sd_siglist = tssp;
916 mutex_enter(&pidlock);
917 PID_RELE(ssp->ss_pidp);
918 mutex_exit(&pidlock);
919 kmem_free(ssp, sizeof (strsig_t));
920 update = 1;
921 ssp = tssp;
922 } else {
923 pssp = ssp;
924 ssp = ssp->ss_next;
925 }
926 }
927 if (update) {
928 stp->sd_sigflags = 0;
929 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
930 stp->sd_sigflags |= ssp->ss_events;
931 }
932 mutex_exit(&stp->sd_lock);
933 }
934
935 /*
936 * Used on the last close to remove any remaining items on the siglist.
937 * These could be present on the siglist due to I_ESETSIG calls that
938 * use process groups or processed that do not have an open file descriptor
939 * for this stream (Such entries would not be removed by strclean).
940 */
941 static void
942 strcleanall(struct vnode *vp)
943 {
944 strsig_t *ssp, *nssp;
945 stdata_t *stp;
946
947 stp = vp->v_stream;
948 mutex_enter(&stp->sd_lock);
949 ssp = stp->sd_siglist;
950 stp->sd_siglist = NULL;
951 while (ssp) {
952 nssp = ssp->ss_next;
953 mutex_enter(&pidlock);
954 PID_RELE(ssp->ss_pidp);
955 mutex_exit(&pidlock);
956 kmem_free(ssp, sizeof (strsig_t));
957 ssp = nssp;
958 }
959 stp->sd_sigflags = 0;
960 mutex_exit(&stp->sd_lock);
961 }
962
963 /*
964 * Retrieve the next message from the logical stream head read queue
965 * using either rwnext (if sync stream) or getq_noenab.
966 * It is the callers responsibility to call qbackenable after
967 * it is finished with the message. The caller should not call
968 * qbackenable until after any putback calls to avoid spurious backenabling.
969 */
970 mblk_t *
971 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
972 int *errorp)
973 {
974 mblk_t *bp;
975 int error;
976 ssize_t rbytes = 0;
977
978 /* Holding sd_lock prevents the read queue from changing */
979 ASSERT(MUTEX_HELD(&stp->sd_lock));
980
981 if (uiop != NULL && stp->sd_struiordq != NULL &&
982 q->q_first == NULL &&
983 (!first || (stp->sd_wakeq & RSLEEP))) {
984 /*
985 * Stream supports rwnext() for the read side.
986 * If this is the first time we're called by e.g. strread
987 * only do the downcall if there is a deferred wakeup
988 * (registered in sd_wakeq).
989 */
990 struiod_t uiod;
991 struct iovec buf[IOV_MAX_STACK];
992 int iovlen = 0;
993
994 if (first)
995 stp->sd_wakeq &= ~RSLEEP;
996
997 if (uiop->uio_iovcnt > IOV_MAX_STACK) {
998 iovlen = uiop->uio_iovcnt * sizeof (iovec_t);
999 uiod.d_iov = kmem_alloc(iovlen, KM_SLEEP);
1000 } else {
1001 uiod.d_iov = buf;
1002 }
1003
1004 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, uiop->uio_iovcnt);
1005 uiod.d_mp = 0;
1006 /*
1007 * Mark that a thread is in rwnext on the read side
1008 * to prevent strrput from nacking ioctls immediately.
1009 * When the last concurrent rwnext returns
1010 * the ioctls are nack'ed.
1011 */
1012 ASSERT(MUTEX_HELD(&stp->sd_lock));
1013 stp->sd_struiodnak++;
1014 /*
1015 * Note: rwnext will drop sd_lock.
1016 */
1017 error = rwnext(q, &uiod);
1018 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
1019 mutex_enter(&stp->sd_lock);
1020 stp->sd_struiodnak--;
1021 while (stp->sd_struiodnak == 0 &&
1022 ((bp = stp->sd_struionak) != NULL)) {
1023 stp->sd_struionak = bp->b_next;
1024 bp->b_next = NULL;
1025 bp->b_datap->db_type = M_IOCNAK;
1026 /*
1027 * Protect against the driver passing up
1028 * messages after it has done a qprocsoff.
1029 */
1030 if (_OTHERQ(q)->q_next == NULL)
1031 freemsg(bp);
1032 else {
1033 mutex_exit(&stp->sd_lock);
1034 qreply(q, bp);
1035 mutex_enter(&stp->sd_lock);
1036 }
1037 }
1038 ASSERT(MUTEX_HELD(&stp->sd_lock));
1039 if (error == 0 || error == EWOULDBLOCK) {
1040 if ((bp = uiod.d_mp) != NULL) {
1041 *errorp = 0;
1042 ASSERT(MUTEX_HELD(&stp->sd_lock));
1043 if (iovlen != 0)
1044 kmem_free(uiod.d_iov, iovlen);
1045 return (bp);
1046 }
1047 error = 0;
1048 } else if (error == EINVAL) {
1049 /*
1050 * The stream plumbing must have
1051 * changed while we were away, so
1052 * just turn off rwnext()s.
1053 */
1054 error = 0;
1055 } else if (error == EBUSY) {
1056 /*
1057 * The module might have data in transit using putnext
1058 * Fall back on waiting + getq.
1059 */
1060 error = 0;
1061 } else {
1062 *errorp = error;
1063 ASSERT(MUTEX_HELD(&stp->sd_lock));
1064 if (iovlen != 0)
1065 kmem_free(uiod.d_iov, iovlen);
1066 return (NULL);
1067 }
1068
1069 if (iovlen != 0)
1070 kmem_free(uiod.d_iov, iovlen);
1071
1072 /*
1073 * Try a getq in case a rwnext() generated mblk
1074 * has bubbled up via strrput().
1075 */
1076 }
1077 *errorp = 0;
1078 ASSERT(MUTEX_HELD(&stp->sd_lock));
1079
1080 /*
1081 * If we have a valid uio, try and use this as a guide for how
1082 * many bytes to retrieve from the queue via getq_noenab().
1083 * Doing this can avoid unneccesary counting of overlong
1084 * messages in putback(). We currently only do this for sockets
1085 * and only if there is no sd_rputdatafunc hook.
1086 *
1087 * The sd_rputdatafunc hook transforms the entire message
1088 * before any bytes in it can be given to a client. So, rbytes
1089 * must be 0 if there is a hook.
1090 */
1091 if ((uiop != NULL) && (stp->sd_vnode->v_type == VSOCK) &&
1092 (stp->sd_rputdatafunc == NULL))
1093 rbytes = uiop->uio_resid;
1094
1095 return (getq_noenab(q, rbytes));
1096 }
1097
1098 /*
1099 * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'.
1100 * If the message does not fit in the uio the remainder of it is returned;
1101 * otherwise NULL is returned. Any embedded zero-length mblk_t's are
1102 * consumed, even if uio_resid reaches zero. On error, `*errorp' is set to
1103 * the error code, the message is consumed, and NULL is returned.
1104 */
1105 static mblk_t *
1106 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp)
1107 {
1108 int error;
1109 ptrdiff_t n;
1110 mblk_t *nbp;
1111
1112 ASSERT(bp->b_wptr >= bp->b_rptr);
1113
1114 do {
1115 if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) {
1116 ASSERT(n > 0);
1117
1118 error = uiomove(bp->b_rptr, n, UIO_READ, uiop);
1119 if (error != 0) {
1120 freemsg(bp);
1121 *errorp = error;
1122 return (NULL);
1123 }
1124 }
1125
1126 bp->b_rptr += n;
1127 while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) {
1128 nbp = bp;
1129 bp = bp->b_cont;
1130 freeb(nbp);
1131 }
1132 } while (bp != NULL && uiop->uio_resid > 0);
1133
1134 *errorp = 0;
1135 return (bp);
1136 }
1137
1138 /*
1139 * Read a stream according to the mode flags in sd_flag:
1140 *
1141 * (default mode) - Byte stream, msg boundaries are ignored
1142 * RD_MSGDIS (msg discard) - Read on msg boundaries and throw away
1143 * any data remaining in msg
1144 * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back
1145 * any remaining data on head of read queue
1146 *
1147 * Consume readable messages on the front of the queue until
1148 * ttolwp(curthread)->lwp_count
1149 * is satisfied, the readable messages are exhausted, or a message
1150 * boundary is reached in a message mode. If no data was read and
1151 * the stream was not opened with the NDELAY flag, block until data arrives.
1152 * Otherwise return the data read and update the count.
1153 *
1154 * In default mode a 0 length message signifies end-of-file and terminates
1155 * a read in progress. The 0 length message is removed from the queue
1156 * only if it is the only message read (no data is read).
1157 *
1158 * An attempt to read an M_PROTO or M_PCPROTO message results in an
1159 * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set.
1160 * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data.
1161 * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message
1162 * are unlinked from and M_DATA blocks in the message, the protos are
1163 * thrown away, and the data is read.
1164 */
1165 /* ARGSUSED */
1166 int
1167 strread(struct vnode *vp, struct uio *uiop, cred_t *crp)
1168 {
1169 struct stdata *stp;
1170 mblk_t *bp, *nbp;
1171 queue_t *q;
1172 int error = 0;
1173 uint_t old_sd_flag;
1174 int first;
1175 char rflg;
1176 uint_t mark; /* Contains MSG*MARK and _LASTMARK */
1177 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */
1178 short delim;
1179 unsigned char pri = 0;
1180 char waitflag;
1181 unsigned char type;
1182
1183 TRACE_1(TR_FAC_STREAMS_FR,
1184 TR_STRREAD_ENTER, "strread:%p", vp);
1185 ASSERT(vp->v_stream);
1186 stp = vp->v_stream;
1187
1188 mutex_enter(&stp->sd_lock);
1189
1190 if ((error = i_straccess(stp, JCREAD)) != 0) {
1191 mutex_exit(&stp->sd_lock);
1192 return (error);
1193 }
1194
1195 if (stp->sd_flag & (STRDERR|STPLEX)) {
1196 error = strgeterr(stp, STRDERR|STPLEX, 0);
1197 if (error != 0) {
1198 mutex_exit(&stp->sd_lock);
1199 return (error);
1200 }
1201 }
1202
1203 /*
1204 * Loop terminates when uiop->uio_resid == 0.
1205 */
1206 rflg = 0;
1207 waitflag = READWAIT;
1208 q = _RD(stp->sd_wrq);
1209 for (;;) {
1210 ASSERT(MUTEX_HELD(&stp->sd_lock));
1211 old_sd_flag = stp->sd_flag;
1212 mark = 0;
1213 delim = 0;
1214 first = 1;
1215 while ((bp = strget(stp, q, uiop, first, &error)) == NULL) {
1216 int done = 0;
1217
1218 ASSERT(MUTEX_HELD(&stp->sd_lock));
1219
1220 if (error != 0)
1221 goto oops;
1222
1223 if (stp->sd_flag & (STRHUP|STREOF)) {
1224 goto oops;
1225 }
1226 if (rflg && !(stp->sd_flag & STRDELIM)) {
1227 goto oops;
1228 }
1229 /*
1230 * If a read(fd,buf,0) has been done, there is no
1231 * need to sleep. We always have zero bytes to
1232 * return.
1233 */
1234 if (uiop->uio_resid == 0) {
1235 goto oops;
1236 }
1237
1238 qbackenable(q, 0);
1239
1240 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT,
1241 "strread calls strwaitq:%p, %p, %p",
1242 vp, uiop, crp);
1243 if ((error = strwaitq(stp, waitflag, uiop->uio_resid,
1244 uiop->uio_fmode, -1, &done)) != 0 || done) {
1245 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE,
1246 "strread error or done:%p, %p, %p",
1247 vp, uiop, crp);
1248 if ((uiop->uio_fmode & FNDELAY) &&
1249 (stp->sd_flag & OLDNDELAY) &&
1250 (error == EAGAIN))
1251 error = 0;
1252 goto oops;
1253 }
1254 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE,
1255 "strread awakes:%p, %p, %p", vp, uiop, crp);
1256 if ((error = i_straccess(stp, JCREAD)) != 0) {
1257 goto oops;
1258 }
1259 first = 0;
1260 }
1261
1262 ASSERT(MUTEX_HELD(&stp->sd_lock));
1263 ASSERT(bp);
1264 pri = bp->b_band;
1265 /*
1266 * Extract any mark information. If the message is not
1267 * completely consumed this information will be put in the mblk
1268 * that is putback.
1269 * If MSGMARKNEXT is set and the message is completely consumed
1270 * the STRATMARK flag will be set below. Likewise, if
1271 * MSGNOTMARKNEXT is set and the message is
1272 * completely consumed STRNOTATMARK will be set.
1273 *
1274 * For some unknown reason strread only breaks the read at the
1275 * last mark.
1276 */
1277 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
1278 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
1279 (MSGMARKNEXT|MSGNOTMARKNEXT));
1280 if (mark != 0 && bp == stp->sd_mark) {
1281 if (rflg) {
1282 putback(stp, q, bp, pri);
1283 goto oops;
1284 }
1285 mark |= _LASTMARK;
1286 stp->sd_mark = NULL;
1287 }
1288 if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM))
1289 delim = 1;
1290 mutex_exit(&stp->sd_lock);
1291
1292 if (STREAM_NEEDSERVICE(stp))
1293 stream_runservice(stp);
1294
1295 type = bp->b_datap->db_type;
1296
1297 switch (type) {
1298
1299 case M_DATA:
1300 ismdata:
1301 if (msgnodata(bp)) {
1302 if (mark || delim) {
1303 freemsg(bp);
1304 } else if (rflg) {
1305
1306 /*
1307 * If already read data put zero
1308 * length message back on queue else
1309 * free msg and return 0.
1310 */
1311 bp->b_band = pri;
1312 mutex_enter(&stp->sd_lock);
1313 putback(stp, q, bp, pri);
1314 mutex_exit(&stp->sd_lock);
1315 } else {
1316 freemsg(bp);
1317 }
1318 error = 0;
1319 goto oops1;
1320 }
1321
1322 rflg = 1;
1323 waitflag |= NOINTR;
1324 bp = struiocopyout(bp, uiop, &error);
1325 if (error != 0)
1326 goto oops1;
1327
1328 mutex_enter(&stp->sd_lock);
1329 if (bp) {
1330 /*
1331 * Have remaining data in message.
1332 * Free msg if in discard mode.
1333 */
1334 if (stp->sd_read_opt & RD_MSGDIS) {
1335 freemsg(bp);
1336 } else {
1337 bp->b_band = pri;
1338 if ((mark & _LASTMARK) &&
1339 (stp->sd_mark == NULL))
1340 stp->sd_mark = bp;
1341 bp->b_flag |= mark & ~_LASTMARK;
1342 if (delim)
1343 bp->b_flag |= MSGDELIM;
1344 if (msgnodata(bp))
1345 freemsg(bp);
1346 else
1347 putback(stp, q, bp, pri);
1348 }
1349 } else {
1350 /*
1351 * Consumed the complete message.
1352 * Move the MSG*MARKNEXT information
1353 * to the stream head just in case
1354 * the read queue becomes empty.
1355 *
1356 * If the stream head was at the mark
1357 * (STRATMARK) before we dropped sd_lock above
1358 * and some data was consumed then we have
1359 * moved past the mark thus STRATMARK is
1360 * cleared. However, if a message arrived in
1361 * strrput during the copyout above causing
1362 * STRATMARK to be set we can not clear that
1363 * flag.
1364 */
1365 if (mark &
1366 (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
1367 if (mark & MSGMARKNEXT) {
1368 stp->sd_flag &= ~STRNOTATMARK;
1369 stp->sd_flag |= STRATMARK;
1370 } else if (mark & MSGNOTMARKNEXT) {
1371 stp->sd_flag &= ~STRATMARK;
1372 stp->sd_flag |= STRNOTATMARK;
1373 } else {
1374 stp->sd_flag &=
1375 ~(STRATMARK|STRNOTATMARK);
1376 }
1377 } else if (rflg && (old_sd_flag & STRATMARK)) {
1378 stp->sd_flag &= ~STRATMARK;
1379 }
1380 }
1381
1382 /*
1383 * Check for signal messages at the front of the read
1384 * queue and generate the signal(s) if appropriate.
1385 * The only signal that can be on queue is M_SIG at
1386 * this point.
1387 */
1388 while ((((bp = q->q_first)) != NULL) &&
1389 (bp->b_datap->db_type == M_SIG)) {
1390 bp = getq_noenab(q, 0);
1391 /*
1392 * sd_lock is held so the content of the
1393 * read queue can not change.
1394 */
1395 ASSERT(bp != NULL && DB_TYPE(bp) == M_SIG);
1396 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
1397 mutex_exit(&stp->sd_lock);
1398 freemsg(bp);
1399 if (STREAM_NEEDSERVICE(stp))
1400 stream_runservice(stp);
1401 mutex_enter(&stp->sd_lock);
1402 }
1403
1404 if ((uiop->uio_resid == 0) || (mark & _LASTMARK) ||
1405 delim ||
1406 (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) {
1407 goto oops;
1408 }
1409 continue;
1410
1411 case M_SIG:
1412 strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band);
1413 freemsg(bp);
1414 mutex_enter(&stp->sd_lock);
1415 continue;
1416
1417 case M_PROTO:
1418 case M_PCPROTO:
1419 /*
1420 * Only data messages are readable.
1421 * Any others generate an error, unless
1422 * RD_PROTDIS or RD_PROTDAT is set.
1423 */
1424 if (stp->sd_read_opt & RD_PROTDAT) {
1425 for (nbp = bp; nbp; nbp = nbp->b_next) {
1426 if ((nbp->b_datap->db_type ==
1427 M_PROTO) ||
1428 (nbp->b_datap->db_type ==
1429 M_PCPROTO)) {
1430 nbp->b_datap->db_type = M_DATA;
1431 } else {
1432 break;
1433 }
1434 }
1435 /*
1436 * clear stream head hi pri flag based on
1437 * first message
1438 */
1439 if (type == M_PCPROTO) {
1440 mutex_enter(&stp->sd_lock);
1441 stp->sd_flag &= ~STRPRI;
1442 mutex_exit(&stp->sd_lock);
1443 }
1444 goto ismdata;
1445 } else if (stp->sd_read_opt & RD_PROTDIS) {
1446 /*
1447 * discard non-data messages
1448 */
1449 while (bp &&
1450 ((bp->b_datap->db_type == M_PROTO) ||
1451 (bp->b_datap->db_type == M_PCPROTO))) {
1452 nbp = unlinkb(bp);
1453 freeb(bp);
1454 bp = nbp;
1455 }
1456 /*
1457 * clear stream head hi pri flag based on
1458 * first message
1459 */
1460 if (type == M_PCPROTO) {
1461 mutex_enter(&stp->sd_lock);
1462 stp->sd_flag &= ~STRPRI;
1463 mutex_exit(&stp->sd_lock);
1464 }
1465 if (bp) {
1466 bp->b_band = pri;
1467 goto ismdata;
1468 } else {
1469 break;
1470 }
1471 }
1472 /* FALLTHRU */
1473 case M_PASSFP:
1474 if ((bp->b_datap->db_type == M_PASSFP) &&
1475 (stp->sd_read_opt & RD_PROTDIS)) {
1476 freemsg(bp);
1477 break;
1478 }
1479 mutex_enter(&stp->sd_lock);
1480 putback(stp, q, bp, pri);
1481 mutex_exit(&stp->sd_lock);
1482 if (rflg == 0)
1483 error = EBADMSG;
1484 goto oops1;
1485
1486 default:
1487 /*
1488 * Garbage on stream head read queue.
1489 */
1490 cmn_err(CE_WARN, "bad %x found at stream head\n",
1491 bp->b_datap->db_type);
1492 freemsg(bp);
1493 goto oops1;
1494 }
1495 mutex_enter(&stp->sd_lock);
1496 }
1497 oops:
1498 mutex_exit(&stp->sd_lock);
1499 oops1:
1500 qbackenable(q, pri);
1501 return (error);
1502 #undef _LASTMARK
1503 }
1504
1505 /*
1506 * Default processing of M_PROTO/M_PCPROTO messages.
1507 * Determine which wakeups and signals are needed.
1508 * This can be replaced by a user-specified procedure for kernel users
1509 * of STREAMS.
1510 */
1511 /* ARGSUSED */
1512 mblk_t *
1513 strrput_proto(vnode_t *vp, mblk_t *mp,
1514 strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1515 strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1516 {
1517 *wakeups = RSLEEP;
1518 *allmsgsigs = 0;
1519
1520 switch (mp->b_datap->db_type) {
1521 case M_PROTO:
1522 if (mp->b_band == 0) {
1523 *firstmsgsigs = S_INPUT | S_RDNORM;
1524 *pollwakeups = POLLIN | POLLRDNORM;
1525 } else {
1526 *firstmsgsigs = S_INPUT | S_RDBAND;
1527 *pollwakeups = POLLIN | POLLRDBAND;
1528 }
1529 break;
1530 case M_PCPROTO:
1531 *firstmsgsigs = S_HIPRI;
1532 *pollwakeups = POLLPRI;
1533 break;
1534 }
1535 return (mp);
1536 }
1537
1538 /*
1539 * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and
1540 * M_PASSFP messages.
1541 * Determine which wakeups and signals are needed.
1542 * This can be replaced by a user-specified procedure for kernel users
1543 * of STREAMS.
1544 */
1545 /* ARGSUSED */
1546 mblk_t *
1547 strrput_misc(vnode_t *vp, mblk_t *mp,
1548 strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1549 strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1550 {
1551 *wakeups = 0;
1552 *firstmsgsigs = 0;
1553 *allmsgsigs = 0;
1554 *pollwakeups = 0;
1555 return (mp);
1556 }
1557
1558 /*
1559 * Stream read put procedure. Called from downstream driver/module
1560 * with messages for the stream head. Data, protocol, and in-stream
1561 * signal messages are placed on the queue, others are handled directly.
1562 */
1563 int
1564 strrput(queue_t *q, mblk_t *bp)
1565 {
1566 struct stdata *stp;
1567 ulong_t rput_opt;
1568 strwakeup_t wakeups;
1569 strsigset_t firstmsgsigs; /* Signals if first message on queue */
1570 strsigset_t allmsgsigs; /* Signals for all messages */
1571 strsigset_t signals; /* Signals events to generate */
1572 strpollset_t pollwakeups;
1573 mblk_t *nextbp;
1574 uchar_t band = 0;
1575 int hipri_sig;
1576
1577 stp = (struct stdata *)q->q_ptr;
1578 /*
1579 * Use rput_opt for optimized access to the SR_ flags except
1580 * SR_POLLIN. That flag has to be checked under sd_lock since it
1581 * is modified by strpoll().
1582 */
1583 rput_opt = stp->sd_rput_opt;
1584
1585 ASSERT(qclaimed(q));
1586 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER,
1587 "strrput called with message type:q %p bp %p", q, bp);
1588
1589 /*
1590 * Perform initial processing and pass to the parameterized functions.
1591 */
1592 ASSERT(bp->b_next == NULL);
1593
1594 switch (bp->b_datap->db_type) {
1595 case M_DATA:
1596 /*
1597 * sockfs is the only consumer of STREOF and when it is set,
1598 * it implies that the receiver is not interested in receiving
1599 * any more data, hence the mblk is freed to prevent unnecessary
1600 * message queueing at the stream head.
1601 */
1602 if (stp->sd_flag == STREOF) {
1603 freemsg(bp);
1604 return (0);
1605 }
1606 if ((rput_opt & SR_IGN_ZEROLEN) &&
1607 bp->b_rptr == bp->b_wptr && msgnodata(bp)) {
1608 /*
1609 * Ignore zero-length M_DATA messages. These might be
1610 * generated by some transports.
1611 * The zero-length M_DATA messages, even if they
1612 * are ignored, should effect the atmark tracking and
1613 * should wake up a thread sleeping in strwaitmark.
1614 */
1615 mutex_enter(&stp->sd_lock);
1616 if (bp->b_flag & MSGMARKNEXT) {
1617 /*
1618 * Record the position of the mark either
1619 * in q_last or in STRATMARK.
1620 */
1621 if (q->q_last != NULL) {
1622 q->q_last->b_flag &= ~MSGNOTMARKNEXT;
1623 q->q_last->b_flag |= MSGMARKNEXT;
1624 } else {
1625 stp->sd_flag &= ~STRNOTATMARK;
1626 stp->sd_flag |= STRATMARK;
1627 }
1628 } else if (bp->b_flag & MSGNOTMARKNEXT) {
1629 /*
1630 * Record that this is not the position of
1631 * the mark either in q_last or in
1632 * STRNOTATMARK.
1633 */
1634 if (q->q_last != NULL) {
1635 q->q_last->b_flag &= ~MSGMARKNEXT;
1636 q->q_last->b_flag |= MSGNOTMARKNEXT;
1637 } else {
1638 stp->sd_flag &= ~STRATMARK;
1639 stp->sd_flag |= STRNOTATMARK;
1640 }
1641 }
1642 if (stp->sd_flag & RSLEEP) {
1643 stp->sd_flag &= ~RSLEEP;
1644 cv_broadcast(&q->q_wait);
1645 }
1646 mutex_exit(&stp->sd_lock);
1647 freemsg(bp);
1648 return (0);
1649 }
1650 wakeups = RSLEEP;
1651 if (bp->b_band == 0) {
1652 firstmsgsigs = S_INPUT | S_RDNORM;
1653 pollwakeups = POLLIN | POLLRDNORM;
1654 } else {
1655 firstmsgsigs = S_INPUT | S_RDBAND;
1656 pollwakeups = POLLIN | POLLRDBAND;
1657 }
1658 if (rput_opt & SR_SIGALLDATA)
1659 allmsgsigs = firstmsgsigs;
1660 else
1661 allmsgsigs = 0;
1662
1663 mutex_enter(&stp->sd_lock);
1664 if ((rput_opt & SR_CONSOL_DATA) &&
1665 (q->q_last != NULL) &&
1666 (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) {
1667 /*
1668 * Consolidate an M_DATA message onto an M_DATA,
1669 * M_PROTO, or M_PCPROTO by merging it with q_last.
1670 * The consolidation does not take place if
1671 * the old message is marked with either of the
1672 * marks or the delim flag or if the new
1673 * message is marked with MSGMARK. The MSGMARK
1674 * check is needed to handle the odd semantics of
1675 * MSGMARK where essentially the whole message
1676 * is to be treated as marked.
1677 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from the
1678 * new message to the front of the b_cont chain.
1679 */
1680 mblk_t *lbp = q->q_last;
1681 unsigned char db_type = lbp->b_datap->db_type;
1682
1683 if ((db_type == M_DATA || db_type == M_PROTO ||
1684 db_type == M_PCPROTO) &&
1685 !(lbp->b_flag & (MSGDELIM|MSGMARK|MSGMARKNEXT))) {
1686 rmvq_noenab(q, lbp);
1687 /*
1688 * The first message in the b_cont list
1689 * tracks MSGMARKNEXT and MSGNOTMARKNEXT.
1690 * We need to handle the case where we
1691 * are appending:
1692 *
1693 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT.
1694 * 2) a MSGMARKNEXT to a plain message.
1695 * 3) a MSGNOTMARKNEXT to a plain message
1696 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT
1697 * message.
1698 *
1699 * Thus we never append a MSGMARKNEXT or
1700 * MSGNOTMARKNEXT to a MSGMARKNEXT message.
1701 */
1702 if (bp->b_flag & MSGMARKNEXT) {
1703 lbp->b_flag |= MSGMARKNEXT;
1704 lbp->b_flag &= ~MSGNOTMARKNEXT;
1705 bp->b_flag &= ~MSGMARKNEXT;
1706 } else if (bp->b_flag & MSGNOTMARKNEXT) {
1707 lbp->b_flag |= MSGNOTMARKNEXT;
1708 bp->b_flag &= ~MSGNOTMARKNEXT;
1709 }
1710
1711 linkb(lbp, bp);
1712 bp = lbp;
1713 /*
1714 * The new message logically isn't the first
1715 * even though the q_first check below thinks
1716 * it is. Clear the firstmsgsigs to make it
1717 * not appear to be first.
1718 */
1719 firstmsgsigs = 0;
1720 }
1721 }
1722 break;
1723
1724 case M_PASSFP:
1725 wakeups = RSLEEP;
1726 allmsgsigs = 0;
1727 if (bp->b_band == 0) {
1728 firstmsgsigs = S_INPUT | S_RDNORM;
1729 pollwakeups = POLLIN | POLLRDNORM;
1730 } else {
1731 firstmsgsigs = S_INPUT | S_RDBAND;
1732 pollwakeups = POLLIN | POLLRDBAND;
1733 }
1734 mutex_enter(&stp->sd_lock);
1735 break;
1736
1737 case M_PROTO:
1738 case M_PCPROTO:
1739 ASSERT(stp->sd_rprotofunc != NULL);
1740 bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp,
1741 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1742 #define ALLSIG (S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\
1743 S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)
1744 #define ALLPOLL (POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\
1745 POLLWRBAND)
1746
1747 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1748 ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1749 ASSERT((allmsgsigs & ~ALLSIG) == 0);
1750 ASSERT((pollwakeups & ~ALLPOLL) == 0);
1751
1752 mutex_enter(&stp->sd_lock);
1753 break;
1754
1755 default:
1756 ASSERT(stp->sd_rmiscfunc != NULL);
1757 bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp,
1758 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1759 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1760 ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1761 ASSERT((allmsgsigs & ~ALLSIG) == 0);
1762 ASSERT((pollwakeups & ~ALLPOLL) == 0);
1763 #undef ALLSIG
1764 #undef ALLPOLL
1765 mutex_enter(&stp->sd_lock);
1766 break;
1767 }
1768 ASSERT(MUTEX_HELD(&stp->sd_lock));
1769
1770 /* By default generate superset of signals */
1771 signals = (firstmsgsigs | allmsgsigs);
1772
1773 /*
1774 * The proto and misc functions can return multiple messages
1775 * as a b_next chain. Such messages are processed separately.
1776 */
1777 one_more:
1778 hipri_sig = 0;
1779 if (bp == NULL) {
1780 nextbp = NULL;
1781 } else {
1782 nextbp = bp->b_next;
1783 bp->b_next = NULL;
1784
1785 switch (bp->b_datap->db_type) {
1786 case M_PCPROTO:
1787 /*
1788 * Only one priority protocol message is allowed at the
1789 * stream head at a time.
1790 */
1791 if (stp->sd_flag & STRPRI) {
1792 TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR,
1793 "M_PCPROTO already at head");
1794 freemsg(bp);
1795 mutex_exit(&stp->sd_lock);
1796 goto done;
1797 }
1798 stp->sd_flag |= STRPRI;
1799 hipri_sig = 1;
1800 /* FALLTHRU */
1801 case M_DATA:
1802 case M_PROTO:
1803 case M_PASSFP:
1804 band = bp->b_band;
1805 /*
1806 * Marking doesn't work well when messages
1807 * are marked in more than one band. We only
1808 * remember the last message received, even if
1809 * it is placed on the queue ahead of other
1810 * marked messages.
1811 */
1812 if (bp->b_flag & MSGMARK)
1813 stp->sd_mark = bp;
1814 (void) putq(q, bp);
1815
1816 /*
1817 * If message is a PCPROTO message, always use
1818 * firstmsgsigs to determine if a signal should be
1819 * sent as strrput is the only place to send
1820 * signals for PCPROTO. Other messages are based on
1821 * the STRGETINPROG flag. The flag determines if
1822 * strrput or (k)strgetmsg will be responsible for
1823 * sending the signals, in the firstmsgsigs case.
1824 */
1825 if ((hipri_sig == 1) ||
1826 (((stp->sd_flag & STRGETINPROG) == 0) &&
1827 (q->q_first == bp)))
1828 signals = (firstmsgsigs | allmsgsigs);
1829 else
1830 signals = allmsgsigs;
1831 break;
1832
1833 default:
1834 mutex_exit(&stp->sd_lock);
1835 (void) strrput_nondata(q, bp);
1836 mutex_enter(&stp->sd_lock);
1837 break;
1838 }
1839 }
1840 ASSERT(MUTEX_HELD(&stp->sd_lock));
1841 /*
1842 * Wake sleeping read/getmsg and cancel deferred wakeup
1843 */
1844 if (wakeups & RSLEEP)
1845 stp->sd_wakeq &= ~RSLEEP;
1846
1847 wakeups &= stp->sd_flag;
1848 if (wakeups & RSLEEP) {
1849 stp->sd_flag &= ~RSLEEP;
1850 cv_broadcast(&q->q_wait);
1851 }
1852 if (wakeups & WSLEEP) {
1853 stp->sd_flag &= ~WSLEEP;
1854 cv_broadcast(&_WR(q)->q_wait);
1855 }
1856
1857 if (pollwakeups != 0) {
1858 if (pollwakeups == (POLLIN | POLLRDNORM)) {
1859 /*
1860 * Can't use rput_opt since it was not
1861 * read when sd_lock was held and SR_POLLIN is changed
1862 * by strpoll() under sd_lock.
1863 */
1864 if (!(stp->sd_rput_opt & SR_POLLIN))
1865 goto no_pollwake;
1866 stp->sd_rput_opt &= ~SR_POLLIN;
1867 }
1868 mutex_exit(&stp->sd_lock);
1869 pollwakeup(&stp->sd_pollist, pollwakeups);
1870 mutex_enter(&stp->sd_lock);
1871 }
1872 no_pollwake:
1873
1874 /*
1875 * strsendsig can handle multiple signals with a
1876 * single call.
1877 */
1878 if (stp->sd_sigflags & signals)
1879 strsendsig(stp->sd_siglist, signals, band, 0);
1880 mutex_exit(&stp->sd_lock);
1881
1882
1883 done:
1884 if (nextbp == NULL)
1885 return (0);
1886
1887 /*
1888 * Any signals were handled the first time.
1889 * Wakeups and pollwakeups are redone to avoid any race
1890 * conditions - all the messages are not queued until the
1891 * last message has been processed by strrput.
1892 */
1893 bp = nextbp;
1894 signals = firstmsgsigs = allmsgsigs = 0;
1895 mutex_enter(&stp->sd_lock);
1896 goto one_more;
1897 }
1898
1899 static void
1900 log_dupioc(queue_t *rq, mblk_t *bp)
1901 {
1902 queue_t *wq, *qp;
1903 char *modnames, *mnp, *dname;
1904 size_t maxmodstr;
1905 boolean_t islast;
1906
1907 /*
1908 * Allocate a buffer large enough to hold the names of nstrpush modules
1909 * and one driver, with spaces between and NUL terminator. If we can't
1910 * get memory, then we'll just log the driver name.
1911 */
1912 maxmodstr = nstrpush * (FMNAMESZ + 1);
1913 mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP);
1914
1915 /* march down write side to print log message down to the driver */
1916 wq = WR(rq);
1917
1918 /* make sure q_next doesn't shift around while we're grabbing data */
1919 claimstr(wq);
1920 qp = wq->q_next;
1921 do {
1922 dname = Q2NAME(qp);
1923 islast = !SAMESTR(qp) || qp->q_next == NULL;
1924 if (modnames == NULL) {
1925 /*
1926 * If we don't have memory, then get the driver name in
1927 * the log where we can see it. Note that memory
1928 * pressure is a possible cause of these sorts of bugs.
1929 */
1930 if (islast) {
1931 modnames = dname;
1932 maxmodstr = 0;
1933 }
1934 } else {
1935 mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname);
1936 if (!islast)
1937 *mnp++ = ' ';
1938 }
1939 qp = qp->q_next;
1940 } while (!islast);
1941 releasestr(wq);
1942 /* Cannot happen unless stream head is corrupt. */
1943 ASSERT(modnames != NULL);
1944 (void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1,
1945 SL_CONSOLE|SL_TRACE|SL_ERROR,
1946 "Warning: stream %p received duplicate %X M_IOC%s; module list: %s",
1947 rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd,
1948 (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames);
1949 if (maxmodstr != 0)
1950 kmem_free(modnames, maxmodstr);
1951 }
1952
1953 int
1954 strrput_nondata(queue_t *q, mblk_t *bp)
1955 {
1956 struct stdata *stp;
1957 struct iocblk *iocbp;
1958 struct stroptions *sop;
1959 struct copyreq *reqp;
1960 struct copyresp *resp;
1961 unsigned char bpri;
1962 unsigned char flushed_already = 0;
1963
1964 stp = (struct stdata *)q->q_ptr;
1965
1966 ASSERT(!(stp->sd_flag & STPLEX));
1967 ASSERT(qclaimed(q));
1968
1969 switch (bp->b_datap->db_type) {
1970 case M_ERROR:
1971 /*
1972 * An error has occurred downstream, the errno is in the first
1973 * bytes of the message.
1974 */
1975 if ((bp->b_wptr - bp->b_rptr) == 2) { /* New flavor */
1976 unsigned char rw = 0;
1977
1978 mutex_enter(&stp->sd_lock);
1979 if (*bp->b_rptr != NOERROR) { /* read error */
1980 if (*bp->b_rptr != 0) {
1981 if (stp->sd_flag & STRDERR)
1982 flushed_already |= FLUSHR;
1983 stp->sd_flag |= STRDERR;
1984 rw |= FLUSHR;
1985 } else {
1986 stp->sd_flag &= ~STRDERR;
1987 }
1988 stp->sd_rerror = *bp->b_rptr;
1989 }
1990 bp->b_rptr++;
1991 if (*bp->b_rptr != NOERROR) { /* write error */
1992 if (*bp->b_rptr != 0) {
1993 if (stp->sd_flag & STWRERR)
1994 flushed_already |= FLUSHW;
1995 stp->sd_flag |= STWRERR;
1996 rw |= FLUSHW;
1997 } else {
1998 stp->sd_flag &= ~STWRERR;
1999 }
2000 stp->sd_werror = *bp->b_rptr;
2001 }
2002 if (rw) {
2003 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE,
2004 "strrput cv_broadcast:q %p, bp %p",
2005 q, bp);
2006 cv_broadcast(&q->q_wait); /* readers */
2007 cv_broadcast(&_WR(q)->q_wait); /* writers */
2008 cv_broadcast(&stp->sd_monitor); /* ioctllers */
2009
2010 mutex_exit(&stp->sd_lock);
2011 pollwakeup(&stp->sd_pollist, POLLERR);
2012 mutex_enter(&stp->sd_lock);
2013
2014 if (stp->sd_sigflags & S_ERROR)
2015 strsendsig(stp->sd_siglist, S_ERROR, 0,
2016 ((rw & FLUSHR) ? stp->sd_rerror :
2017 stp->sd_werror));
2018 mutex_exit(&stp->sd_lock);
2019 /*
2020 * Send the M_FLUSH only
2021 * for the first M_ERROR
2022 * message on the stream
2023 */
2024 if (flushed_already == rw) {
2025 freemsg(bp);
2026 return (0);
2027 }
2028
2029 bp->b_datap->db_type = M_FLUSH;
2030 *bp->b_rptr = rw;
2031 bp->b_wptr = bp->b_rptr + 1;
2032 /*
2033 * Protect against the driver
2034 * passing up messages after
2035 * it has done a qprocsoff
2036 */
2037 if (_OTHERQ(q)->q_next == NULL)
2038 freemsg(bp);
2039 else
2040 qreply(q, bp);
2041 return (0);
2042 } else
2043 mutex_exit(&stp->sd_lock);
2044 } else if (*bp->b_rptr != 0) { /* Old flavor */
2045 if (stp->sd_flag & (STRDERR|STWRERR))
2046 flushed_already = FLUSHRW;
2047 mutex_enter(&stp->sd_lock);
2048 stp->sd_flag |= (STRDERR|STWRERR);
2049 stp->sd_rerror = *bp->b_rptr;
2050 stp->sd_werror = *bp->b_rptr;
2051 TRACE_2(TR_FAC_STREAMS_FR,
2052 TR_STRRPUT_WAKE2,
2053 "strrput wakeup #2:q %p, bp %p", q, bp);
2054 cv_broadcast(&q->q_wait); /* the readers */
2055 cv_broadcast(&_WR(q)->q_wait); /* the writers */
2056 cv_broadcast(&stp->sd_monitor); /* ioctllers */
2057
2058 mutex_exit(&stp->sd_lock);
2059 pollwakeup(&stp->sd_pollist, POLLERR);
2060 mutex_enter(&stp->sd_lock);
2061
2062 if (stp->sd_sigflags & S_ERROR)
2063 strsendsig(stp->sd_siglist, S_ERROR, 0,
2064 (stp->sd_werror ? stp->sd_werror :
2065 stp->sd_rerror));
2066 mutex_exit(&stp->sd_lock);
2067
2068 /*
2069 * Send the M_FLUSH only
2070 * for the first M_ERROR
2071 * message on the stream
2072 */
2073 if (flushed_already != FLUSHRW) {
2074 bp->b_datap->db_type = M_FLUSH;
2075 *bp->b_rptr = FLUSHRW;
2076 /*
2077 * Protect against the driver passing up
2078 * messages after it has done a
2079 * qprocsoff.
2080 */
2081 if (_OTHERQ(q)->q_next == NULL)
2082 freemsg(bp);
2083 else
2084 qreply(q, bp);
2085 return (0);
2086 }
2087 }
2088 freemsg(bp);
2089 return (0);
2090
2091 case M_HANGUP:
2092
2093 freemsg(bp);
2094 mutex_enter(&stp->sd_lock);
2095 stp->sd_werror = ENXIO;
2096 stp->sd_flag |= STRHUP;
2097 stp->sd_flag &= ~(WSLEEP|RSLEEP);
2098
2099 /*
2100 * send signal if controlling tty
2101 */
2102
2103 if (stp->sd_sidp) {
2104 prsignal(stp->sd_sidp, SIGHUP);
2105 if (stp->sd_sidp != stp->sd_pgidp)
2106 pgsignal(stp->sd_pgidp, SIGTSTP);
2107 }
2108
2109 /*
2110 * wake up read, write, and exception pollers and
2111 * reset wakeup mechanism.
2112 */
2113 cv_broadcast(&q->q_wait); /* the readers */
2114 cv_broadcast(&_WR(q)->q_wait); /* the writers */
2115 cv_broadcast(&stp->sd_monitor); /* the ioctllers */
2116 strhup(stp);
2117 mutex_exit(&stp->sd_lock);
2118 return (0);
2119
2120 case M_UNHANGUP:
2121 freemsg(bp);
2122 mutex_enter(&stp->sd_lock);
2123 stp->sd_werror = 0;
2124 stp->sd_flag &= ~STRHUP;
2125 mutex_exit(&stp->sd_lock);
2126 return (0);
2127
2128 case M_SIG:
2129 /*
2130 * Someone downstream wants to post a signal. The
2131 * signal to post is contained in the first byte of the
2132 * message. If the message would go on the front of
2133 * the queue, send a signal to the process group
2134 * (if not SIGPOLL) or to the siglist processes
2135 * (SIGPOLL). If something is already on the queue,
2136 * OR if we are delivering a delayed suspend (*sigh*
2137 * another "tty" hack) and there's no one sleeping already,
2138 * just enqueue the message.
2139 */
2140 mutex_enter(&stp->sd_lock);
2141 if (q->q_first || (*bp->b_rptr == SIGTSTP &&
2142 !(stp->sd_flag & RSLEEP))) {
2143 (void) putq(q, bp);
2144 mutex_exit(&stp->sd_lock);
2145 return (0);
2146 }
2147 mutex_exit(&stp->sd_lock);
2148 /* FALLTHRU */
2149
2150 case M_PCSIG:
2151 /*
2152 * Don't enqueue, just post the signal.
2153 */
2154 strsignal(stp, *bp->b_rptr, 0L);
2155 freemsg(bp);
2156 return (0);
2157
2158 case M_CMD:
2159 if (MBLKL(bp) != sizeof (cmdblk_t)) {
2160 freemsg(bp);
2161 return (0);
2162 }
2163
2164 mutex_enter(&stp->sd_lock);
2165 if (stp->sd_flag & STRCMDWAIT) {
2166 ASSERT(stp->sd_cmdblk == NULL);
2167 stp->sd_cmdblk = bp;
2168 cv_broadcast(&stp->sd_monitor);
2169 mutex_exit(&stp->sd_lock);
2170 } else {
2171 mutex_exit(&stp->sd_lock);
2172 freemsg(bp);
2173 }
2174 return (0);
2175
2176 case M_FLUSH:
2177 /*
2178 * Flush queues. The indication of which queues to flush
2179 * is in the first byte of the message. If the read queue
2180 * is specified, then flush it. If FLUSHBAND is set, just
2181 * flush the band specified by the second byte of the message.
2182 *
2183 * If a module has issued a M_SETOPT to not flush hi
2184 * priority messages off of the stream head, then pass this
2185 * flag into the flushq code to preserve such messages.
2186 */
2187
2188 if (*bp->b_rptr & FLUSHR) {
2189 mutex_enter(&stp->sd_lock);
2190 if (*bp->b_rptr & FLUSHBAND) {
2191 ASSERT((bp->b_wptr - bp->b_rptr) >= 2);
2192 flushband(q, *(bp->b_rptr + 1), FLUSHALL);
2193 } else
2194 flushq_common(q, FLUSHALL,
2195 stp->sd_read_opt & RFLUSHPCPROT);
2196 if ((q->q_first == NULL) ||
2197 (q->q_first->b_datap->db_type < QPCTL))
2198 stp->sd_flag &= ~STRPRI;
2199 else {
2200 ASSERT(stp->sd_flag & STRPRI);
2201 }
2202 mutex_exit(&stp->sd_lock);
2203 }
2204 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
2205 *bp->b_rptr &= ~FLUSHR;
2206 bp->b_flag |= MSGNOLOOP;
2207 /*
2208 * Protect against the driver passing up
2209 * messages after it has done a qprocsoff.
2210 */
2211 if (_OTHERQ(q)->q_next == NULL)
2212 freemsg(bp);
2213 else
2214 qreply(q, bp);
2215 return (0);
2216 }
2217 freemsg(bp);
2218 return (0);
2219
2220 case M_IOCACK:
2221 case M_IOCNAK:
2222 iocbp = (struct iocblk *)bp->b_rptr;
2223 /*
2224 * If not waiting for ACK or NAK then just free msg.
2225 * If incorrect id sequence number then just free msg.
2226 * If already have ACK or NAK for user then this is a
2227 * duplicate, display a warning and free the msg.
2228 */
2229 mutex_enter(&stp->sd_lock);
2230 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2231 (stp->sd_iocid != iocbp->ioc_id)) {
2232 /*
2233 * If the ACK/NAK is a dup, display a message
2234 * Dup is when sd_iocid == ioc_id, and
2235 * sd_iocblk == <valid ptr> or -1 (the former
2236 * is when an ioctl has been put on the stream
2237 * head, but has not yet been consumed, the
2238 * later is when it has been consumed).
2239 */
2240 if ((stp->sd_iocid == iocbp->ioc_id) &&
2241 (stp->sd_iocblk != NULL)) {
2242 log_dupioc(q, bp);
2243 }
2244 freemsg(bp);
2245 mutex_exit(&stp->sd_lock);
2246 return (0);
2247 }
2248
2249 /*
2250 * Assign ACK or NAK to user and wake up.
2251 */
2252 stp->sd_iocblk = bp;
2253 cv_broadcast(&stp->sd_monitor);
2254 mutex_exit(&stp->sd_lock);
2255 return (0);
2256
2257 case M_COPYIN:
2258 case M_COPYOUT:
2259 reqp = (struct copyreq *)bp->b_rptr;
2260
2261 /*
2262 * If not waiting for ACK or NAK then just fail request.
2263 * If already have ACK, NAK, or copy request, then just
2264 * fail request.
2265 * If incorrect id sequence number then just fail request.
2266 */
2267 mutex_enter(&stp->sd_lock);
2268 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2269 (stp->sd_iocid != reqp->cq_id)) {
2270 if (bp->b_cont) {
2271 freemsg(bp->b_cont);
2272 bp->b_cont = NULL;
2273 }
2274 bp->b_datap->db_type = M_IOCDATA;
2275 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
2276 resp = (struct copyresp *)bp->b_rptr;
2277 resp->cp_rval = (caddr_t)1; /* failure */
2278 mutex_exit(&stp->sd_lock);
2279 putnext(stp->sd_wrq, bp);
2280 return (0);
2281 }
2282
2283 /*
2284 * Assign copy request to user and wake up.
2285 */
2286 stp->sd_iocblk = bp;
2287 cv_broadcast(&stp->sd_monitor);
2288 mutex_exit(&stp->sd_lock);
2289 return (0);
2290
2291 case M_SETOPTS:
2292 /*
2293 * Set stream head options (read option, write offset,
2294 * min/max packet size, and/or high/low water marks for
2295 * the read side only).
2296 */
2297
2298 bpri = 0;
2299 sop = (struct stroptions *)bp->b_rptr;
2300 mutex_enter(&stp->sd_lock);
2301 if (sop->so_flags & SO_READOPT) {
2302 switch (sop->so_readopt & RMODEMASK) {
2303 case RNORM:
2304 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
2305 break;
2306
2307 case RMSGD:
2308 stp->sd_read_opt =
2309 ((stp->sd_read_opt & ~RD_MSGNODIS) |
2310 RD_MSGDIS);
2311 break;
2312
2313 case RMSGN:
2314 stp->sd_read_opt =
2315 ((stp->sd_read_opt & ~RD_MSGDIS) |
2316 RD_MSGNODIS);
2317 break;
2318 }
2319 switch (sop->so_readopt & RPROTMASK) {
2320 case RPROTNORM:
2321 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
2322 break;
2323
2324 case RPROTDAT:
2325 stp->sd_read_opt =
2326 ((stp->sd_read_opt & ~RD_PROTDIS) |
2327 RD_PROTDAT);
2328 break;
2329
2330 case RPROTDIS:
2331 stp->sd_read_opt =
2332 ((stp->sd_read_opt & ~RD_PROTDAT) |
2333 RD_PROTDIS);
2334 break;
2335 }
2336 switch (sop->so_readopt & RFLUSHMASK) {
2337 case RFLUSHPCPROT:
2338 /*
2339 * This sets the stream head to NOT flush
2340 * M_PCPROTO messages.
2341 */
2342 stp->sd_read_opt |= RFLUSHPCPROT;
2343 break;
2344 }
2345 }
2346 if (sop->so_flags & SO_ERROPT) {
2347 switch (sop->so_erropt & RERRMASK) {
2348 case RERRNORM:
2349 stp->sd_flag &= ~STRDERRNONPERSIST;
2350 break;
2351 case RERRNONPERSIST:
2352 stp->sd_flag |= STRDERRNONPERSIST;
2353 break;
2354 }
2355 switch (sop->so_erropt & WERRMASK) {
2356 case WERRNORM:
2357 stp->sd_flag &= ~STWRERRNONPERSIST;
2358 break;
2359 case WERRNONPERSIST:
2360 stp->sd_flag |= STWRERRNONPERSIST;
2361 break;
2362 }
2363 }
2364 if (sop->so_flags & SO_COPYOPT) {
2365 if (sop->so_copyopt & ZCVMSAFE) {
2366 stp->sd_copyflag |= STZCVMSAFE;
2367 stp->sd_copyflag &= ~STZCVMUNSAFE;
2368 } else if (sop->so_copyopt & ZCVMUNSAFE) {
2369 stp->sd_copyflag |= STZCVMUNSAFE;
2370 stp->sd_copyflag &= ~STZCVMSAFE;
2371 }
2372
2373 if (sop->so_copyopt & COPYCACHED) {
2374 stp->sd_copyflag |= STRCOPYCACHED;
2375 }
2376 }
2377 if (sop->so_flags & SO_WROFF)
2378 stp->sd_wroff = sop->so_wroff;
2379 if (sop->so_flags & SO_TAIL)
2380 stp->sd_tail = sop->so_tail;
2381 if (sop->so_flags & SO_MINPSZ)
2382 q->q_minpsz = sop->so_minpsz;
2383 if (sop->so_flags & SO_MAXPSZ)
2384 q->q_maxpsz = sop->so_maxpsz;
2385 if (sop->so_flags & SO_MAXBLK)
2386 stp->sd_maxblk = sop->so_maxblk;
2387 if (sop->so_flags & SO_HIWAT) {
2388 if (sop->so_flags & SO_BAND) {
2389 if (strqset(q, QHIWAT,
2390 sop->so_band, sop->so_hiwat)) {
2391 cmn_err(CE_WARN, "strrput: could not "
2392 "allocate qband\n");
2393 } else {
2394 bpri = sop->so_band;
2395 }
2396 } else {
2397 q->q_hiwat = sop->so_hiwat;
2398 }
2399 }
2400 if (sop->so_flags & SO_LOWAT) {
2401 if (sop->so_flags & SO_BAND) {
2402 if (strqset(q, QLOWAT,
2403 sop->so_band, sop->so_lowat)) {
2404 cmn_err(CE_WARN, "strrput: could not "
2405 "allocate qband\n");
2406 } else {
2407 bpri = sop->so_band;
2408 }
2409 } else {
2410 q->q_lowat = sop->so_lowat;
2411 }
2412 }
2413 if (sop->so_flags & SO_MREADON)
2414 stp->sd_flag |= SNDMREAD;
2415 if (sop->so_flags & SO_MREADOFF)
2416 stp->sd_flag &= ~SNDMREAD;
2417 if (sop->so_flags & SO_NDELON)
2418 stp->sd_flag |= OLDNDELAY;
2419 if (sop->so_flags & SO_NDELOFF)
2420 stp->sd_flag &= ~OLDNDELAY;
2421 if (sop->so_flags & SO_ISTTY)
2422 stp->sd_flag |= STRISTTY;
2423 if (sop->so_flags & SO_ISNTTY)
2424 stp->sd_flag &= ~STRISTTY;
2425 if (sop->so_flags & SO_TOSTOP)
2426 stp->sd_flag |= STRTOSTOP;
2427 if (sop->so_flags & SO_TONSTOP)
2428 stp->sd_flag &= ~STRTOSTOP;
2429 if (sop->so_flags & SO_DELIM)
2430 stp->sd_flag |= STRDELIM;
2431 if (sop->so_flags & SO_NODELIM)
2432 stp->sd_flag &= ~STRDELIM;
2433
2434 mutex_exit(&stp->sd_lock);
2435 freemsg(bp);
2436
2437 /* Check backenable in case the water marks changed */
2438 qbackenable(q, bpri);
2439 return (0);
2440
2441 /*
2442 * The following set of cases deal with situations where two stream
2443 * heads are connected to each other (twisted streams). These messages
2444 * have no meaning at the stream head.
2445 */
2446 case M_BREAK:
2447 case M_CTL:
2448 case M_DELAY:
2449 case M_START:
2450 case M_STOP:
2451 case M_IOCDATA:
2452 case M_STARTI:
2453 case M_STOPI:
2454 freemsg(bp);
2455 return (0);
2456
2457 case M_IOCTL:
2458 /*
2459 * Always NAK this condition
2460 * (makes no sense)
2461 * If there is one or more threads in the read side
2462 * rwnext we have to defer the nacking until that thread
2463 * returns (in strget).
2464 */
2465 mutex_enter(&stp->sd_lock);
2466 if (stp->sd_struiodnak != 0) {
2467 /*
2468 * Defer NAK to the streamhead. Queue at the end
2469 * the list.
2470 */
2471 mblk_t *mp = stp->sd_struionak;
2472
2473 while (mp && mp->b_next)
2474 mp = mp->b_next;
2475 if (mp)
2476 mp->b_next = bp;
2477 else
2478 stp->sd_struionak = bp;
2479 bp->b_next = NULL;
2480 mutex_exit(&stp->sd_lock);
2481 return (0);
2482 }
2483 mutex_exit(&stp->sd_lock);
2484
2485 bp->b_datap->db_type = M_IOCNAK;
2486 /*
2487 * Protect against the driver passing up
2488 * messages after it has done a qprocsoff.
2489 */
2490 if (_OTHERQ(q)->q_next == NULL)
2491 freemsg(bp);
2492 else
2493 qreply(q, bp);
2494 return (0);
2495
2496 default:
2497 #ifdef DEBUG
2498 cmn_err(CE_WARN,
2499 "bad message type %x received at stream head\n",
2500 bp->b_datap->db_type);
2501 #endif
2502 freemsg(bp);
2503 return (0);
2504 }
2505
2506 /* NOTREACHED */
2507 }
2508
2509 /*
2510 * Check if the stream pointed to by `stp' can be written to, and return an
2511 * error code if not. If `eiohup' is set, then return EIO if STRHUP is set.
2512 * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream,
2513 * then always return EPIPE and send a SIGPIPE to the invoking thread.
2514 */
2515 static int
2516 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok)
2517 {
2518 int error;
2519
2520 ASSERT(MUTEX_HELD(&stp->sd_lock));
2521
2522 /*
2523 * For modem support, POSIX states that on writes, EIO should
2524 * be returned if the stream has been hung up.
2525 */
2526 if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP)
2527 error = EIO;
2528 else
2529 error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0);
2530
2531 if (error != 0) {
2532 if (!(stp->sd_flag & STPLEX) &&
2533 (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) {
2534 tsignal(curthread, SIGPIPE);
2535 error = EPIPE;
2536 }
2537 }
2538
2539 return (error);
2540 }
2541
2542 /*
2543 * Copyin and send data down a stream.
2544 * The caller will allocate and copyin any control part that precedes the
2545 * message and pass that in as mctl.
2546 *
2547 * Caller should *not* hold sd_lock.
2548 * When EWOULDBLOCK is returned the caller has to redo the canputnext
2549 * under sd_lock in order to avoid missing a backenabling wakeup.
2550 *
2551 * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA.
2552 *
2553 * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages.
2554 * For sync streams we can only ignore flow control by reverting to using
2555 * putnext.
2556 *
2557 * If sd_maxblk is less than *iosize this routine might return without
2558 * transferring all of *iosize. In all cases, on return *iosize will contain
2559 * the amount of data that was transferred.
2560 */
2561 static int
2562 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
2563 int b_flag, int pri, int flags)
2564 {
2565 struiod_t uiod;
2566 struct iovec buf[IOV_MAX_STACK];
2567 int iovlen = 0;
2568 mblk_t *mp;
2569 queue_t *wqp = stp->sd_wrq;
2570 int error = 0;
2571 ssize_t count = *iosize;
2572
2573 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
2574
2575 if (uiop != NULL && count >= 0)
2576 flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0;
2577
2578 if (!(flags & STRUIO_POSTPONE)) {
2579 /*
2580 * Use regular canputnext, strmakedata, putnext sequence.
2581 */
2582 if (pri == 0) {
2583 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2584 freemsg(mctl);
2585 return (EWOULDBLOCK);
2586 }
2587 } else {
2588 if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) {
2589 freemsg(mctl);
2590 return (EWOULDBLOCK);
2591 }
2592 }
2593
2594 if ((error = strmakedata(iosize, uiop, stp, flags,
2595 &mp)) != 0) {
2596 freemsg(mctl);
2597 /*
2598 * need to change return code to ENOMEM
2599 * so that this is not confused with
2600 * flow control, EAGAIN.
2601 */
2602
2603 if (error == EAGAIN)
2604 return (ENOMEM);
2605 else
2606 return (error);
2607 }
2608 if (mctl != NULL) {
2609 if (mctl->b_cont == NULL)
2610 mctl->b_cont = mp;
2611 else if (mp != NULL)
2612 linkb(mctl, mp);
2613 mp = mctl;
2614 } else if (mp == NULL)
2615 return (0);
2616
2617 mp->b_flag |= b_flag;
2618 mp->b_band = (uchar_t)pri;
2619
2620 if (flags & MSG_IGNFLOW) {
2621 /*
2622 * XXX Hack: Don't get stuck running service
2623 * procedures. This is needed for sockfs when
2624 * sending the unbind message out of the rput
2625 * procedure - we don't want a put procedure
2626 * to run service procedures.
2627 */
2628 putnext(wqp, mp);
2629 } else {
2630 stream_willservice(stp);
2631 putnext(wqp, mp);
2632 stream_runservice(stp);
2633 }
2634 return (0);
2635 }
2636 /*
2637 * Stream supports rwnext() for the write side.
2638 */
2639 if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) {
2640 freemsg(mctl);
2641 /*
2642 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled".
2643 */
2644 return (error == EAGAIN ? ENOMEM : error);
2645 }
2646 if (mctl != NULL) {
2647 if (mctl->b_cont == NULL)
2648 mctl->b_cont = mp;
2649 else if (mp != NULL)
2650 linkb(mctl, mp);
2651 mp = mctl;
2652 } else if (mp == NULL) {
2653 return (0);
2654 }
2655
2656 mp->b_flag |= b_flag;
2657 mp->b_band = (uchar_t)pri;
2658
2659 if (uiop->uio_iovcnt > IOV_MAX_STACK) {
2660 iovlen = uiop->uio_iovcnt * sizeof (iovec_t);
2661 uiod.d_iov = (struct iovec *)kmem_alloc(iovlen, KM_SLEEP);
2662 } else {
2663 uiod.d_iov = buf;
2664 }
2665
2666 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, uiop->uio_iovcnt);
2667 uiod.d_uio.uio_offset = 0;
2668 uiod.d_mp = mp;
2669 error = rwnext(wqp, &uiod);
2670 if (! uiod.d_mp) {
2671 uioskip(uiop, *iosize);
2672 if (iovlen != 0)
2673 kmem_free(uiod.d_iov, iovlen);
2674 return (error);
2675 }
2676 ASSERT(mp == uiod.d_mp);
2677 if (error == EINVAL) {
2678 /*
2679 * The stream plumbing must have changed while
2680 * we were away, so just turn off rwnext()s.
2681 */
2682 error = 0;
2683 } else if (error == EBUSY || error == EWOULDBLOCK) {
2684 /*
2685 * Couldn't enter a perimeter or took a page fault,
2686 * so fall-back to putnext().
2687 */
2688 error = 0;
2689 } else {
2690 freemsg(mp);
2691 if (iovlen != 0)
2692 kmem_free(uiod.d_iov, iovlen);
2693 return (error);
2694 }
2695 /* Have to check canput before consuming data from the uio */
2696 if (pri == 0) {
2697 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2698 freemsg(mp);
2699 if (iovlen != 0)
2700 kmem_free(uiod.d_iov, iovlen);
2701 return (EWOULDBLOCK);
2702 }
2703 } else {
2704 if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) {
2705 freemsg(mp);
2706 if (iovlen != 0)
2707 kmem_free(uiod.d_iov, iovlen);
2708 return (EWOULDBLOCK);
2709 }
2710 }
2711 ASSERT(mp == uiod.d_mp);
2712 /* Copyin data from the uio */
2713 if ((error = struioget(wqp, mp, &uiod, 0)) != 0) {
2714 freemsg(mp);
2715 if (iovlen != 0)
2716 kmem_free(uiod.d_iov, iovlen);
2717 return (error);
2718 }
2719 uioskip(uiop, *iosize);
2720 if (flags & MSG_IGNFLOW) {
2721 /*
2722 * XXX Hack: Don't get stuck running service procedures.
2723 * This is needed for sockfs when sending the unbind message
2724 * out of the rput procedure - we don't want a put procedure
2725 * to run service procedures.
2726 */
2727 putnext(wqp, mp);
2728 } else {
2729 stream_willservice(stp);
2730 putnext(wqp, mp);
2731 stream_runservice(stp);
2732 }
2733 if (iovlen != 0)
2734 kmem_free(uiod.d_iov, iovlen);
2735 return (0);
2736 }
2737
2738 /*
2739 * Write attempts to break the write request into messages conforming
2740 * with the minimum and maximum packet sizes set downstream.
2741 *
2742 * Write will not block if downstream queue is full and
2743 * O_NDELAY is set, otherwise it will block waiting for the queue to get room.
2744 *
2745 * A write of zero bytes gets packaged into a zero length message and sent
2746 * downstream like any other message.
2747 *
2748 * If buffers of the requested sizes are not available, the write will
2749 * sleep until the buffers become available.
2750 *
2751 * Write (if specified) will supply a write offset in a message if it
2752 * makes sense. This can be specified by downstream modules as part of
2753 * a M_SETOPTS message. Write will not supply the write offset if it
2754 * cannot supply any data in a buffer. In other words, write will never
2755 * send down an empty packet due to a write offset.
2756 */
2757 /* ARGSUSED2 */
2758 int
2759 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp)
2760 {
2761 return (strwrite_common(vp, uiop, crp, 0));
2762 }
2763
2764 /* ARGSUSED2 */
2765 int
2766 strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag)
2767 {
2768 struct stdata *stp;
2769 struct queue *wqp;
2770 ssize_t rmin, rmax;
2771 ssize_t iosize;
2772 int waitflag;
2773 int tempmode;
2774 int error = 0;
2775 int b_flag;
2776
2777 ASSERT(vp->v_stream);
2778 stp = vp->v_stream;
2779
2780 mutex_enter(&stp->sd_lock);
2781
2782 if ((error = i_straccess(stp, JCWRITE)) != 0) {
2783 mutex_exit(&stp->sd_lock);
2784 return (error);
2785 }
2786
2787 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
2788 error = strwriteable(stp, B_TRUE, B_TRUE);
2789 if (error != 0) {
2790 mutex_exit(&stp->sd_lock);
2791 return (error);
2792 }
2793 }
2794
2795 mutex_exit(&stp->sd_lock);
2796
2797 wqp = stp->sd_wrq;
2798
2799 /* get these values from them cached in the stream head */
2800 rmin = stp->sd_qn_minpsz;
2801 rmax = stp->sd_qn_maxpsz;
2802
2803 /*
2804 * Check the min/max packet size constraints. If min packet size
2805 * is non-zero, the write cannot be split into multiple messages
2806 * and still guarantee the size constraints.
2807 */
2808 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp);
2809
2810 ASSERT((rmax >= 0) || (rmax == INFPSZ));
2811 if (rmax == 0) {
2812 return (0);
2813 }
2814 if (rmin > 0) {
2815 if (uiop->uio_resid < rmin) {
2816 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2817 "strwrite out:q %p out %d error %d",
2818 wqp, 0, ERANGE);
2819 return (ERANGE);
2820 }
2821 if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) {
2822 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2823 "strwrite out:q %p out %d error %d",
2824 wqp, 1, ERANGE);
2825 return (ERANGE);
2826 }
2827 }
2828
2829 /*
2830 * Do until count satisfied or error.
2831 */
2832 waitflag = WRITEWAIT | wflag;
2833 if (stp->sd_flag & OLDNDELAY)
2834 tempmode = uiop->uio_fmode & ~FNDELAY;
2835 else
2836 tempmode = uiop->uio_fmode;
2837
2838 if (rmax == INFPSZ)
2839 rmax = uiop->uio_resid;
2840
2841 /*
2842 * Note that tempmode does not get used in strput/strmakedata
2843 * but only in strwaitq. The other routines use uio_fmode
2844 * unmodified.
2845 */
2846
2847 /* LINTED: constant in conditional context */
2848 while (1) { /* breaks when uio_resid reaches zero */
2849 /*
2850 * Determine the size of the next message to be
2851 * packaged. May have to break write into several
2852 * messages based on max packet size.
2853 */
2854 iosize = MIN(uiop->uio_resid, rmax);
2855
2856 /*
2857 * Put block downstream when flow control allows it.
2858 */
2859 if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize))
2860 b_flag = MSGDELIM;
2861 else
2862 b_flag = 0;
2863
2864 for (;;) {
2865 int done = 0;
2866
2867 error = strput(stp, NULL, uiop, &iosize, b_flag, 0, 0);
2868 if (error == 0)
2869 break;
2870 if (error != EWOULDBLOCK)
2871 goto out;
2872
2873 mutex_enter(&stp->sd_lock);
2874 /*
2875 * Check for a missed wakeup.
2876 * Needed since strput did not hold sd_lock across
2877 * the canputnext.
2878 */
2879 if (canputnext(wqp)) {
2880 /* Try again */
2881 mutex_exit(&stp->sd_lock);
2882 continue;
2883 }
2884 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT,
2885 "strwrite wait:q %p wait", wqp);
2886 if ((error = strwaitq(stp, waitflag, (ssize_t)0,
2887 tempmode, -1, &done)) != 0 || done) {
2888 mutex_exit(&stp->sd_lock);
2889 if ((vp->v_type == VFIFO) &&
2890 (uiop->uio_fmode & FNDELAY) &&
2891 (error == EAGAIN))
2892 error = 0;
2893 goto out;
2894 }
2895 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE,
2896 "strwrite wake:q %p awakes", wqp);
2897 if ((error = i_straccess(stp, JCWRITE)) != 0) {
2898 mutex_exit(&stp->sd_lock);
2899 goto out;
2900 }
2901 mutex_exit(&stp->sd_lock);
2902 }
2903 waitflag |= NOINTR;
2904 TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID,
2905 "strwrite resid:q %p uiop %p", wqp, uiop);
2906 if (uiop->uio_resid) {
2907 /* Recheck for errors - needed for sockets */
2908 if ((stp->sd_wput_opt & SW_RECHECK_ERR) &&
2909 (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
2910 mutex_enter(&stp->sd_lock);
2911 error = strwriteable(stp, B_FALSE, B_TRUE);
2912 mutex_exit(&stp->sd_lock);
2913 if (error != 0)
2914 return (error);
2915 }
2916 continue;
2917 }
2918 break;
2919 }
2920 out:
2921 /*
2922 * For historical reasons, applications expect EAGAIN when a data
2923 * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN.
2924 */
2925 if (error == ENOMEM)
2926 error = EAGAIN;
2927 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2928 "strwrite out:q %p out %d error %d", wqp, 2, error);
2929 return (error);
2930 }
2931
2932 /*
2933 * Stream head write service routine.
2934 * Its job is to wake up any sleeping writers when a queue
2935 * downstream needs data (part of the flow control in putq and getq).
2936 * It also must wake anyone sleeping on a poll().
2937 * For stream head right below mux module, it must also invoke put procedure
2938 * of next downstream module.
2939 */
2940 int
2941 strwsrv(queue_t *q)
2942 {
2943 struct stdata *stp;
2944 queue_t *tq;
2945 qband_t *qbp;
2946 int i;
2947 qband_t *myqbp;
2948 int isevent;
2949 unsigned char qbf[NBAND]; /* band flushing backenable flags */
2950
2951 TRACE_1(TR_FAC_STREAMS_FR,
2952 TR_STRWSRV, "strwsrv:q %p", q);
2953 stp = (struct stdata *)q->q_ptr;
2954 ASSERT(qclaimed(q));
2955 mutex_enter(&stp->sd_lock);
2956 ASSERT(!(stp->sd_flag & STPLEX));
2957
2958 if (stp->sd_flag & WSLEEP) {
2959 stp->sd_flag &= ~WSLEEP;
2960 cv_broadcast(&q->q_wait);
2961 }
2962 mutex_exit(&stp->sd_lock);
2963
2964 /* The other end of a stream pipe went away. */
2965 if ((tq = q->q_next) == NULL) {
2966 return (0);
2967 }
2968
2969 /* Find the next module forward that has a service procedure */
2970 claimstr(q);
2971 tq = q->q_nfsrv;
2972 ASSERT(tq != NULL);
2973
2974 if ((q->q_flag & QBACK)) {
2975 if ((tq->q_flag & QFULL)) {
2976 mutex_enter(QLOCK(tq));
2977 if (!(tq->q_flag & QFULL)) {
2978 mutex_exit(QLOCK(tq));
2979 goto wakeup;
2980 }
2981 /*
2982 * The queue must have become full again. Set QWANTW
2983 * again so strwsrv will be back enabled when
2984 * the queue becomes non-full next time.
2985 */
2986 tq->q_flag |= QWANTW;
2987 mutex_exit(QLOCK(tq));
2988 } else {
2989 wakeup:
2990 pollwakeup(&stp->sd_pollist, POLLWRNORM);
2991 mutex_enter(&stp->sd_lock);
2992 if (stp->sd_sigflags & S_WRNORM)
2993 strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
2994 mutex_exit(&stp->sd_lock);
2995 }
2996 }
2997
2998 isevent = 0;
2999 i = 1;
3000 bzero((caddr_t)qbf, NBAND);
3001 mutex_enter(QLOCK(tq));
3002 if ((myqbp = q->q_bandp) != NULL)
3003 for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) {
3004 ASSERT(myqbp);
3005 if ((myqbp->qb_flag & QB_BACK)) {
3006 if (qbp->qb_flag & QB_FULL) {
3007 /*
3008 * The band must have become full again.
3009 * Set QB_WANTW again so strwsrv will
3010 * be back enabled when the band becomes
3011 * non-full next time.
3012 */
3013 qbp->qb_flag |= QB_WANTW;
3014 } else {
3015 isevent = 1;
3016 qbf[i] = 1;
3017 }
3018 }
3019 myqbp = myqbp->qb_next;
3020 i++;
3021 }
3022 mutex_exit(QLOCK(tq));
3023
3024 if (isevent) {
3025 for (i = tq->q_nband; i; i--) {
3026 if (qbf[i]) {
3027 pollwakeup(&stp->sd_pollist, POLLWRBAND);
3028 mutex_enter(&stp->sd_lock);
3029 if (stp->sd_sigflags & S_WRBAND)
3030 strsendsig(stp->sd_siglist, S_WRBAND,
3031 (uchar_t)i, 0);
3032 mutex_exit(&stp->sd_lock);
3033 }
3034 }
3035 }
3036
3037 releasestr(q);
3038 return (0);
3039 }
3040
3041 /*
3042 * Special case of strcopyin/strcopyout for copying
3043 * struct strioctl that can deal with both data
3044 * models.
3045 */
3046
3047 #ifdef _LP64
3048
3049 static int
3050 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
3051 {
3052 struct strioctl32 strioc32;
3053 struct strioctl *striocp;
3054
3055 if (copyflag & U_TO_K) {
3056 ASSERT((copyflag & K_TO_K) == 0);
3057
3058 if ((flag & FMODELS) == DATAMODEL_ILP32) {
3059 if (copyin(from, &strioc32, sizeof (strioc32)))
3060 return (EFAULT);
3061
3062 striocp = (struct strioctl *)to;
3063 striocp->ic_cmd = strioc32.ic_cmd;
3064 striocp->ic_timout = strioc32.ic_timout;
3065 striocp->ic_len = strioc32.ic_len;
3066 striocp->ic_dp = (char *)(uintptr_t)strioc32.ic_dp;
3067
3068 } else { /* NATIVE data model */
3069 if (copyin(from, to, sizeof (struct strioctl))) {
3070 return (EFAULT);
3071 } else {
3072 return (0);
3073 }
3074 }
3075 } else {
3076 ASSERT(copyflag & K_TO_K);
3077 bcopy(from, to, sizeof (struct strioctl));
3078 }
3079 return (0);
3080 }
3081
3082 static int
3083 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
3084 {
3085 struct strioctl32 strioc32;
3086 struct strioctl *striocp;
3087
3088 if (copyflag & U_TO_K) {
3089 ASSERT((copyflag & K_TO_K) == 0);
3090
3091 if ((flag & FMODELS) == DATAMODEL_ILP32) {
3092 striocp = (struct strioctl *)from;
3093 strioc32.ic_cmd = striocp->ic_cmd;
3094 strioc32.ic_timout = striocp->ic_timout;
3095 strioc32.ic_len = striocp->ic_len;
3096 strioc32.ic_dp = (caddr32_t)(uintptr_t)striocp->ic_dp;
3097 ASSERT((char *)(uintptr_t)strioc32.ic_dp ==
3098 striocp->ic_dp);
3099
3100 if (copyout(&strioc32, to, sizeof (strioc32)))
3101 return (EFAULT);
3102
3103 } else { /* NATIVE data model */
3104 if (copyout(from, to, sizeof (struct strioctl))) {
3105 return (EFAULT);
3106 } else {
3107 return (0);
3108 }
3109 }
3110 } else {
3111 ASSERT(copyflag & K_TO_K);
3112 bcopy(from, to, sizeof (struct strioctl));
3113 }
3114 return (0);
3115 }
3116
3117 #else /* ! _LP64 */
3118
3119 /* ARGSUSED2 */
3120 static int
3121 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
3122 {
3123 return (strcopyin(from, to, sizeof (struct strioctl), copyflag));
3124 }
3125
3126 /* ARGSUSED2 */
3127 static int
3128 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
3129 {
3130 return (strcopyout(from, to, sizeof (struct strioctl), copyflag));
3131 }
3132
3133 #endif /* _LP64 */
3134
3135 /*
3136 * Determine type of job control semantics expected by user. The
3137 * possibilities are:
3138 * JCREAD - Behaves like read() on fd; send SIGTTIN
3139 * JCWRITE - Behaves like write() on fd; send SIGTTOU if TOSTOP set
3140 * JCSETP - Sets a value in the stream; send SIGTTOU, ignore TOSTOP
3141 * JCGETP - Gets a value in the stream; no signals.
3142 * See straccess in strsubr.c for usage of these values.
3143 *
3144 * This routine also returns -1 for I_STR as a special case; the
3145 * caller must call again with the real ioctl number for
3146 * classification.
3147 */
3148 static int
3149 job_control_type(int cmd)
3150 {
3151 switch (cmd) {
3152 case I_STR:
3153 return (-1);
3154
3155 case I_RECVFD:
3156 case I_E_RECVFD:
3157 return (JCREAD);
3158
3159 case I_FDINSERT:
3160 case I_SENDFD:
3161 return (JCWRITE);
3162
3163 case TCSETA:
3164 case TCSETAW:
3165 case TCSETAF:
3166 case TCSBRK:
3167 case TCXONC:
3168 case TCFLSH:
3169 case TCDSET: /* Obsolete */
3170 case TIOCSWINSZ:
3171 case TCSETS:
3172 case TCSETSW:
3173 case TCSETSF:
3174 case TIOCSETD:
3175 case TIOCHPCL:
3176 case TIOCSETP:
3177 case TIOCSETN:
3178 case TIOCEXCL:
3179 case TIOCNXCL:
3180 case TIOCFLUSH:
3181 case TIOCSETC:
3182 case TIOCLBIS:
3183 case TIOCLBIC:
3184 case TIOCLSET:
3185 case TIOCSBRK:
3186 case TIOCCBRK:
3187 case TIOCSDTR:
3188 case TIOCCDTR:
3189 case TIOCSLTC:
3190 case TIOCSTOP:
3191 case TIOCSTART:
3192 case TIOCSTI:
3193 case TIOCSPGRP:
3194 case TIOCMSET:
3195 case TIOCMBIS:
3196 case TIOCMBIC:
3197 case TIOCREMOTE:
3198 case TIOCSIGNAL:
3199 case LDSETT:
3200 case LDSMAP: /* Obsolete */
3201 case DIOCSETP:
3202 case I_FLUSH:
3203 case I_SRDOPT:
3204 case I_SETSIG:
3205 case I_SWROPT:
3206 case I_FLUSHBAND:
3207 case I_SETCLTIME:
3208 case I_SERROPT:
3209 case I_ESETSIG:
3210 case FIONBIO:
3211 case FIOASYNC:
3212 case FIOSETOWN:
3213 case JBOOT: /* Obsolete */
3214 case JTERM: /* Obsolete */
3215 case JTIMOM: /* Obsolete */
3216 case JZOMBOOT: /* Obsolete */
3217 case JAGENT: /* Obsolete */
3218 case JTRUN: /* Obsolete */
3219 case JXTPROTO: /* Obsolete */
3220 return (JCSETP);
3221 }
3222
3223 return (JCGETP);
3224 }
3225
3226 /*
3227 * ioctl for streams
3228 */
3229 int
3230 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
3231 cred_t *crp, int *rvalp)
3232 {
3233 struct stdata *stp;
3234 struct strcmd *scp;
3235 struct strioctl strioc;
3236 struct uio uio;
3237 struct iovec iov;
3238 int access;
3239 mblk_t *mp;
3240 int error = 0;
3241 int done = 0;
3242 ssize_t rmin, rmax;
3243 queue_t *wrq;
3244 queue_t *rdq;
3245 boolean_t kioctl = B_FALSE;
3246 uint32_t auditing = AU_AUDITING();
3247
3248 if (flag & FKIOCTL) {
3249 copyflag = K_TO_K;
3250 kioctl = B_TRUE;
3251 }
3252 ASSERT(vp->v_stream);
3253 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
3254 stp = vp->v_stream;
3255
3256 TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER,
3257 "strioctl:stp %p cmd %X arg %lX", stp, cmd, arg);
3258
3259 /*
3260 * If the copy is kernel to kernel, make sure that the FNATIVE
3261 * flag is set. After this it would be a serious error to have
3262 * no model flag.
3263 */
3264 if (copyflag == K_TO_K)
3265 flag = (flag & ~FMODELS) | FNATIVE;
3266
3267 ASSERT((flag & FMODELS) != 0);
3268
3269 wrq = stp->sd_wrq;
3270 rdq = _RD(wrq);
3271
3272 access = job_control_type(cmd);
3273
3274 /* We should never see these here, should be handled by iwscn */
3275 if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR)
3276 return (EINVAL);
3277
3278 mutex_enter(&stp->sd_lock);
3279 if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) {
3280 mutex_exit(&stp->sd_lock);
3281 return (error);
3282 }
3283 mutex_exit(&stp->sd_lock);
3284
3285 /*
3286 * Check for sgttyb-related ioctls first, and complain as
3287 * necessary.
3288 */
3289 switch (cmd) {
3290 case TIOCGETP:
3291 case TIOCSETP:
3292 case TIOCSETN:
3293 if (sgttyb_handling >= 2 && !sgttyb_complaint) {
3294 sgttyb_complaint = B_TRUE;
3295 cmn_err(CE_NOTE,
3296 "application used obsolete TIOC[GS]ET");
3297 }
3298 if (sgttyb_handling >= 3) {
3299 tsignal(curthread, SIGSYS);
3300 return (EIO);
3301 }
3302 break;
3303 }
3304
3305 mutex_enter(&stp->sd_lock);
3306
3307 switch (cmd) {
3308 case I_RECVFD:
3309 case I_E_RECVFD:
3310 case I_PEEK:
3311 case I_NREAD:
3312 case FIONREAD:
3313 case FIORDCHK:
3314 case I_ATMARK:
3315 case FIONBIO:
3316 case FIOASYNC:
3317 if (stp->sd_flag & (STRDERR|STPLEX)) {
3318 error = strgeterr(stp, STRDERR|STPLEX, 0);
3319 if (error != 0) {
3320 mutex_exit(&stp->sd_lock);
3321 return (error);
3322 }
3323 }
3324 break;
3325
3326 default:
3327 if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) {
3328 error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0);
3329 if (error != 0) {
3330 mutex_exit(&stp->sd_lock);
3331 return (error);
3332 }
3333 }
3334 }
3335
3336 mutex_exit(&stp->sd_lock);
3337
3338 switch (cmd) {
3339 default:
3340 /*
3341 * The stream head has hardcoded knowledge of a
3342 * miscellaneous collection of terminal-, keyboard- and
3343 * mouse-related ioctls, enumerated below. This hardcoded
3344 * knowledge allows the stream head to automatically
3345 * convert transparent ioctl requests made by userland
3346 * programs into I_STR ioctls which many old STREAMS
3347 * modules and drivers require.
3348 *
3349 * No new ioctls should ever be added to this list.
3350 * Instead, the STREAMS module or driver should be written
3351 * to either handle transparent ioctls or require any
3352 * userland programs to use I_STR ioctls (by returning
3353 * EINVAL to any transparent ioctl requests).
3354 *
3355 * More importantly, removing ioctls from this list should
3356 * be done with the utmost care, since our STREAMS modules
3357 * and drivers *count* on the stream head performing this
3358 * conversion, and thus may panic while processing
3359 * transparent ioctl request for one of these ioctls (keep
3360 * in mind that third party modules and drivers may have
3361 * similar problems).
3362 */
3363 if (((cmd & IOCTYPE) == LDIOC) ||
3364 ((cmd & IOCTYPE) == tIOC) ||
3365 ((cmd & IOCTYPE) == TIOC) ||
3366 ((cmd & IOCTYPE) == KIOC) ||
3367 ((cmd & IOCTYPE) == MSIOC) ||
3368 ((cmd & IOCTYPE) == VUIOC)) {
3369 /*
3370 * The ioctl is a tty ioctl - set up strioc buffer
3371 * and call strdoioctl() to do the work.
3372 */
3373 if (stp->sd_flag & STRHUP)
3374 return (ENXIO);
3375 strioc.ic_cmd = cmd;
3376 strioc.ic_timout = INFTIM;
3377
3378 switch (cmd) {
3379
3380 case TCXONC:
3381 case TCSBRK:
3382 case TCFLSH:
3383 case TCDSET:
3384 {
3385 int native_arg = (int)arg;
3386 strioc.ic_len = sizeof (int);
3387 strioc.ic_dp = (char *)&native_arg;
3388 return (strdoioctl(stp, &strioc, flag,
3389 K_TO_K, crp, rvalp));
3390 }
3391
3392 case TCSETA:
3393 case TCSETAW:
3394 case TCSETAF:
3395 strioc.ic_len = sizeof (struct termio);
3396 strioc.ic_dp = (char *)arg;
3397 return (strdoioctl(stp, &strioc, flag,
3398 copyflag, crp, rvalp));
3399
3400 case TCSETS:
3401 case TCSETSW:
3402 case TCSETSF:
3403 strioc.ic_len = sizeof (struct termios);
3404 strioc.ic_dp = (char *)arg;
3405 return (strdoioctl(stp, &strioc, flag,
3406 copyflag, crp, rvalp));
3407
3408 case LDSETT:
3409 strioc.ic_len = sizeof (struct termcb);
3410 strioc.ic_dp = (char *)arg;
3411 return (strdoioctl(stp, &strioc, flag,
3412 copyflag, crp, rvalp));
3413
3414 case TIOCSETP:
3415 strioc.ic_len = sizeof (struct sgttyb);
3416 strioc.ic_dp = (char *)arg;
3417 return (strdoioctl(stp, &strioc, flag,
3418 copyflag, crp, rvalp));
3419
3420 case TIOCSTI:
3421 if ((flag & FREAD) == 0 &&
3422 secpolicy_sti(crp) != 0) {
3423 return (EPERM);
3424 }
3425 mutex_enter(&stp->sd_lock);
3426 mutex_enter(&curproc->p_splock);
3427 if (stp->sd_sidp != curproc->p_sessp->s_sidp &&
3428 secpolicy_sti(crp) != 0) {
3429 mutex_exit(&curproc->p_splock);
3430 mutex_exit(&stp->sd_lock);
3431 return (EACCES);
3432 }
3433 mutex_exit(&curproc->p_splock);
3434 mutex_exit(&stp->sd_lock);
3435
3436 strioc.ic_len = sizeof (char);
3437 strioc.ic_dp = (char *)arg;
3438 return (strdoioctl(stp, &strioc, flag,
3439 copyflag, crp, rvalp));
3440
3441 case TIOCSWINSZ:
3442 strioc.ic_len = sizeof (struct winsize);
3443 strioc.ic_dp = (char *)arg;
3444 return (strdoioctl(stp, &strioc, flag,
3445 copyflag, crp, rvalp));
3446
3447 case TIOCSSIZE:
3448 strioc.ic_len = sizeof (struct ttysize);
3449 strioc.ic_dp = (char *)arg;
3450 return (strdoioctl(stp, &strioc, flag,
3451 copyflag, crp, rvalp));
3452
3453 case TIOCSSOFTCAR:
3454 case KIOCTRANS:
3455 case KIOCTRANSABLE:
3456 case KIOCCMD:
3457 case KIOCSDIRECT:
3458 case KIOCSCOMPAT:
3459 case KIOCSKABORTEN:
3460 case KIOCSRPTCOUNT:
3461 case KIOCSRPTDELAY:
3462 case KIOCSRPTRATE:
3463 case VUIDSFORMAT:
3464 case TIOCSPPS:
3465 strioc.ic_len = sizeof (int);
3466 strioc.ic_dp = (char *)arg;
3467 return (strdoioctl(stp, &strioc, flag,
3468 copyflag, crp, rvalp));
3469
3470 case KIOCSETKEY:
3471 case KIOCGETKEY:
3472 strioc.ic_len = sizeof (struct kiockey);
3473 strioc.ic_dp = (char *)arg;
3474 return (strdoioctl(stp, &strioc, flag,
3475 copyflag, crp, rvalp));
3476
3477 case KIOCSKEY:
3478 case KIOCGKEY:
3479 strioc.ic_len = sizeof (struct kiockeymap);
3480 strioc.ic_dp = (char *)arg;
3481 return (strdoioctl(stp, &strioc, flag,
3482 copyflag, crp, rvalp));
3483
3484 case KIOCSLED:
3485 /* arg is a pointer to char */
3486 strioc.ic_len = sizeof (char);
3487 strioc.ic_dp = (char *)arg;
3488 return (strdoioctl(stp, &strioc, flag,
3489 copyflag, crp, rvalp));
3490
3491 case MSIOSETPARMS:
3492 strioc.ic_len = sizeof (Ms_parms);
3493 strioc.ic_dp = (char *)arg;
3494 return (strdoioctl(stp, &strioc, flag,
3495 copyflag, crp, rvalp));
3496
3497 case VUIDSADDR:
3498 case VUIDGADDR:
3499 strioc.ic_len = sizeof (struct vuid_addr_probe);
3500 strioc.ic_dp = (char *)arg;
3501 return (strdoioctl(stp, &strioc, flag,
3502 copyflag, crp, rvalp));
3503
3504 /*
3505 * These M_IOCTL's don't require any data to be sent
3506 * downstream, and the driver will allocate and link
3507 * on its own mblk_t upon M_IOCACK -- thus we set
3508 * ic_len to zero and set ic_dp to arg so we know
3509 * where to copyout to later.
3510 */
3511 case TIOCGSOFTCAR:
3512 case TIOCGWINSZ:
3513 case TIOCGSIZE:
3514 case KIOCGTRANS:
3515 case KIOCGTRANSABLE:
3516 case KIOCTYPE:
3517 case KIOCGDIRECT:
3518 case KIOCGCOMPAT:
3519 case KIOCLAYOUT:
3520 case KIOCGLED:
3521 case MSIOGETPARMS:
3522 case MSIOBUTTONS:
3523 case VUIDGFORMAT:
3524 case TIOCGPPS:
3525 case TIOCGPPSEV:
3526 case TCGETA:
3527 case TCGETS:
3528 case LDGETT:
3529 case TIOCGETP:
3530 case KIOCGRPTCOUNT:
3531 case KIOCGRPTDELAY:
3532 case KIOCGRPTRATE:
3533 strioc.ic_len = 0;
3534 strioc.ic_dp = (char *)arg;
3535 return (strdoioctl(stp, &strioc, flag,
3536 copyflag, crp, rvalp));
3537 }
3538 }
3539
3540 /*
3541 * Unknown cmd - send it down as a transparent ioctl.
3542 */
3543 strioc.ic_cmd = cmd;
3544 strioc.ic_timout = INFTIM;
3545 strioc.ic_len = TRANSPARENT;
3546 strioc.ic_dp = (char *)&arg;
3547
3548 return (strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp));
3549
3550 case I_STR:
3551 /*
3552 * Stream ioctl. Read in an strioctl buffer from the user
3553 * along with any data specified and send it downstream.
3554 * Strdoioctl will wait allow only one ioctl message at
3555 * a time, and waits for the acknowledgement.
3556 */
3557
3558 if (stp->sd_flag & STRHUP)
3559 return (ENXIO);
3560
3561 error = strcopyin_strioctl((void *)arg, &strioc, flag,
3562 copyflag);
3563 if (error != 0)
3564 return (error);
3565
3566 if ((strioc.ic_len < 0) || (strioc.ic_timout < -1))
3567 return (EINVAL);
3568
3569 access = job_control_type(strioc.ic_cmd);
3570 mutex_enter(&stp->sd_lock);
3571 if ((access != -1) &&
3572 ((error = i_straccess(stp, access)) != 0)) {
3573 mutex_exit(&stp->sd_lock);
3574 return (error);
3575 }
3576 mutex_exit(&stp->sd_lock);
3577
3578 /*
3579 * The I_STR facility provides a trap door for malicious
3580 * code to send down bogus streamio(7I) ioctl commands to
3581 * unsuspecting STREAMS modules and drivers which expect to
3582 * only get these messages from the stream head.
3583 * Explicitly prohibit any streamio ioctls which can be
3584 * passed downstream by the stream head. Note that we do
3585 * not block all streamio ioctls because the ioctl
3586 * numberspace is not well managed and thus it's possible
3587 * that a module or driver's ioctl numbers may accidentally
3588 * collide with them.
3589 */
3590 switch (strioc.ic_cmd) {
3591 case I_LINK:
3592 case I_PLINK:
3593 case I_UNLINK:
3594 case I_PUNLINK:
3595 case _I_GETPEERCRED:
3596 case _I_PLINK_LH:
3597 return (EINVAL);
3598 }
3599
3600 error = strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp);
3601 if (error == 0) {
3602 error = strcopyout_strioctl(&strioc, (void *)arg,
3603 flag, copyflag);
3604 }
3605 return (error);
3606
3607 case _I_CMD:
3608 /*
3609 * Like I_STR, but without using M_IOC* messages and without
3610 * copyins/copyouts beyond the passed-in argument.
3611 */
3612 if (stp->sd_flag & STRHUP)
3613 return (ENXIO);
3614
3615 if (copyflag == U_TO_K) {
3616 if ((scp = kmem_alloc(sizeof (strcmd_t),
3617 KM_NOSLEEP)) == NULL) {
3618 return (ENOMEM);
3619 }
3620
3621 if (copyin((void *)arg, scp, sizeof (strcmd_t))) {
3622 kmem_free(scp, sizeof (strcmd_t));
3623 return (EFAULT);
3624 }
3625 } else {
3626 scp = (strcmd_t *)arg;
3627 }
3628
3629 access = job_control_type(scp->sc_cmd);
3630 mutex_enter(&stp->sd_lock);
3631 if (access != -1 && (error = i_straccess(stp, access)) != 0) {
3632 mutex_exit(&stp->sd_lock);
3633 if (copyflag == U_TO_K)
3634 kmem_free(scp, sizeof (strcmd_t));
3635 return (error);
3636 }
3637 mutex_exit(&stp->sd_lock);
3638
3639 *rvalp = 0;
3640 if ((error = strdocmd(stp, scp, crp)) == 0) {
3641 if (copyflag == U_TO_K &&
3642 copyout(scp, (void *)arg, sizeof (strcmd_t))) {
3643 error = EFAULT;
3644 }
3645 }
3646 if (copyflag == U_TO_K)
3647 kmem_free(scp, sizeof (strcmd_t));
3648 return (error);
3649
3650 case I_NREAD:
3651 /*
3652 * Return number of bytes of data in first message
3653 * in queue in "arg" and return the number of messages
3654 * in queue in return value.
3655 */
3656 {
3657 size_t size;
3658 int retval;
3659 int count = 0;
3660
3661 mutex_enter(QLOCK(rdq));
3662
3663 size = msgdsize(rdq->q_first);
3664 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3665 count++;
3666
3667 mutex_exit(QLOCK(rdq));
3668 if (stp->sd_struiordq) {
3669 infod_t infod;
3670
3671 infod.d_cmd = INFOD_COUNT;
3672 infod.d_count = 0;
3673 if (count == 0) {
3674 infod.d_cmd |= INFOD_FIRSTBYTES;
3675 infod.d_bytes = 0;
3676 }
3677 infod.d_res = 0;
3678 (void) infonext(rdq, &infod);
3679 count += infod.d_count;
3680 if (infod.d_res & INFOD_FIRSTBYTES)
3681 size = infod.d_bytes;
3682 }
3683
3684 /*
3685 * Drop down from size_t to the "int" required by the
3686 * interface. Cap at INT_MAX.
3687 */
3688 retval = MIN(size, INT_MAX);
3689 error = strcopyout(&retval, (void *)arg, sizeof (retval),
3690 copyflag);
3691 if (!error)
3692 *rvalp = count;
3693 return (error);
3694 }
3695
3696 case FIONREAD:
3697 /*
3698 * Return number of bytes of data in all data messages
3699 * in queue in "arg".
3700 */
3701 {
3702 size_t size = 0;
3703 int retval;
3704
3705 mutex_enter(QLOCK(rdq));
3706 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3707 size += msgdsize(mp);
3708 mutex_exit(QLOCK(rdq));
3709
3710 if (stp->sd_struiordq) {
3711 infod_t infod;
3712
3713 infod.d_cmd = INFOD_BYTES;
3714 infod.d_res = 0;
3715 infod.d_bytes = 0;
3716 (void) infonext(rdq, &infod);
3717 size += infod.d_bytes;
3718 }
3719
3720 /*
3721 * Drop down from size_t to the "int" required by the
3722 * interface. Cap at INT_MAX.
3723 */
3724 retval = MIN(size, INT_MAX);
3725 error = strcopyout(&retval, (void *)arg, sizeof (retval),
3726 copyflag);
3727
3728 *rvalp = 0;
3729 return (error);
3730 }
3731 case FIORDCHK:
3732 /*
3733 * FIORDCHK does not use arg value (like FIONREAD),
3734 * instead a count is returned. I_NREAD value may
3735 * not be accurate but safe. The real thing to do is
3736 * to add the msgdsizes of all data messages until
3737 * a non-data message.
3738 */
3739 {
3740 size_t size = 0;
3741
3742 mutex_enter(QLOCK(rdq));
3743 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3744 size += msgdsize(mp);
3745 mutex_exit(QLOCK(rdq));
3746
3747 if (stp->sd_struiordq) {
3748 infod_t infod;
3749
3750 infod.d_cmd = INFOD_BYTES;
3751 infod.d_res = 0;
3752 infod.d_bytes = 0;
3753 (void) infonext(rdq, &infod);
3754 size += infod.d_bytes;
3755 }
3756
3757 /*
3758 * Since ioctl returns an int, and memory sizes under
3759 * LP64 may not fit, we return INT_MAX if the count was
3760 * actually greater.
3761 */
3762 *rvalp = MIN(size, INT_MAX);
3763 return (0);
3764 }
3765
3766 case I_FIND:
3767 /*
3768 * Get module name.
3769 */
3770 {
3771 char mname[FMNAMESZ + 1];
3772 queue_t *q;
3773
3774 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
3775 mname, FMNAMESZ + 1, NULL);
3776 if (error)
3777 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3778
3779 /*
3780 * Return EINVAL if we're handed a bogus module name.
3781 */
3782 if (fmodsw_find(mname, FMODSW_LOAD) == NULL) {
3783 TRACE_0(TR_FAC_STREAMS_FR,
3784 TR_I_CANT_FIND, "couldn't I_FIND");
3785 return (EINVAL);
3786 }
3787
3788 *rvalp = 0;
3789
3790 /* Look downstream to see if module is there. */
3791 claimstr(stp->sd_wrq);
3792 for (q = stp->sd_wrq->q_next; q; q = q->q_next) {
3793 if (q->q_flag & QREADR) {
3794 q = NULL;
3795 break;
3796 }
3797 if (strcmp(mname, Q2NAME(q)) == 0)
3798 break;
3799 }
3800 releasestr(stp->sd_wrq);
3801
3802 *rvalp = (q ? 1 : 0);
3803 return (error);
3804 }
3805
3806 case I_PUSH:
3807 case __I_PUSH_NOCTTY:
3808 /*
3809 * Push a module.
3810 * For the case __I_PUSH_NOCTTY push a module but
3811 * do not allocate controlling tty. See bugid 4025044
3812 */
3813
3814 {
3815 char mname[FMNAMESZ + 1];
3816 fmodsw_impl_t *fp;
3817 dev_t dummydev;
3818
3819 if (stp->sd_flag & STRHUP)
3820 return (ENXIO);
3821
3822 /*
3823 * Get module name and look up in fmodsw.
3824 */
3825 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
3826 mname, FMNAMESZ + 1, NULL);
3827 if (error)
3828 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3829
3830 if ((fp = fmodsw_find(mname, FMODSW_HOLD | FMODSW_LOAD)) ==
3831 NULL)
3832 return (EINVAL);
3833
3834 TRACE_2(TR_FAC_STREAMS_FR, TR_I_PUSH,
3835 "I_PUSH:fp %p stp %p", fp, stp);
3836
3837 /*
3838 * If the module is flagged as single-instance, then check
3839 * to see if the module is already pushed. If it is, return
3840 * as if the push was successful.
3841 */
3842 if (fp->f_qflag & _QSINGLE_INSTANCE) {
3843 queue_t *q;
3844
3845 claimstr(stp->sd_wrq);
3846 for (q = stp->sd_wrq->q_next; q; q = q->q_next) {
3847 if (q->q_flag & QREADR) {
3848 q = NULL;
3849 break;
3850 }
3851 if (strcmp(mname, Q2NAME(q)) == 0)
3852 break;
3853 }
3854 releasestr(stp->sd_wrq);
3855 if (q != NULL) {
3856 fmodsw_rele(fp);
3857 return (0);
3858 }
3859 }
3860
3861 if (error = strstartplumb(stp, flag, cmd)) {
3862 fmodsw_rele(fp);
3863 return (error);
3864 }
3865
3866 /*
3867 * See if any more modules can be pushed on this stream.
3868 * Note that this check must be done after strstartplumb()
3869 * since otherwise multiple threads issuing I_PUSHes on
3870 * the same stream will be able to exceed nstrpush.
3871 */
3872 mutex_enter(&stp->sd_lock);
3873 if (stp->sd_pushcnt >= nstrpush) {
3874 fmodsw_rele(fp);
3875 strendplumb(stp);
3876 mutex_exit(&stp->sd_lock);
3877 return (EINVAL);
3878 }
3879 mutex_exit(&stp->sd_lock);
3880
3881 /*
3882 * Push new module and call its open routine
3883 * via qattach(). Modules don't change device
3884 * numbers, so just ignore dummydev here.
3885 */
3886 dummydev = vp->v_rdev;
3887 if ((error = qattach(rdq, &dummydev, 0, crp, fp,
3888 B_FALSE)) == 0) {
3889 if (vp->v_type == VCHR && /* sorry, no pipes allowed */
3890 (cmd == I_PUSH) && (stp->sd_flag & STRISTTY)) {
3891 /*
3892 * try to allocate it as a controlling terminal
3893 */
3894 (void) strctty(stp);
3895 }
3896 }
3897
3898 mutex_enter(&stp->sd_lock);
3899
3900 /*
3901 * As a performance concern we are caching the values of
3902 * q_minpsz and q_maxpsz of the module below the stream
3903 * head in the stream head.
3904 */
3905 mutex_enter(QLOCK(stp->sd_wrq->q_next));
3906 rmin = stp->sd_wrq->q_next->q_minpsz;
3907 rmax = stp->sd_wrq->q_next->q_maxpsz;
3908 mutex_exit(QLOCK(stp->sd_wrq->q_next));
3909
3910 /* Do this processing here as a performance concern */
3911 if (strmsgsz != 0) {
3912 if (rmax == INFPSZ)
3913 rmax = strmsgsz;
3914 else {
3915 if (vp->v_type == VFIFO)
3916 rmax = MIN(PIPE_BUF, rmax);
3917 else rmax = MIN(strmsgsz, rmax);
3918 }
3919 }
3920
3921 mutex_enter(QLOCK(wrq));
3922 stp->sd_qn_minpsz = rmin;
3923 stp->sd_qn_maxpsz = rmax;
3924 mutex_exit(QLOCK(wrq));
3925
3926 strendplumb(stp);
3927 mutex_exit(&stp->sd_lock);
3928 return (error);
3929 }
3930
3931 case I_POP:
3932 {
3933 queue_t *q;
3934
3935 if (stp->sd_flag & STRHUP)
3936 return (ENXIO);
3937 if (!wrq->q_next) /* for broken pipes */
3938 return (EINVAL);
3939
3940 if (error = strstartplumb(stp, flag, cmd))
3941 return (error);
3942
3943 /*
3944 * If there is an anchor on this stream and popping
3945 * the current module would attempt to pop through the
3946 * anchor, then disallow the pop unless we have sufficient
3947 * privileges; take the cheapest (non-locking) check
3948 * first.
3949 */
3950 if (secpolicy_ip_config(crp, B_TRUE) != 0 ||
3951 (stp->sd_anchorzone != crgetzoneid(crp))) {
3952 mutex_enter(&stp->sd_lock);
3953 /*
3954 * Anchors only apply if there's at least one
3955 * module on the stream (sd_pushcnt > 0).
3956 */
3957 if (stp->sd_pushcnt > 0 &&
3958 stp->sd_pushcnt == stp->sd_anchor &&
3959 stp->sd_vnode->v_type != VFIFO) {
3960 strendplumb(stp);
3961 mutex_exit(&stp->sd_lock);
3962 if (stp->sd_anchorzone != crgetzoneid(crp))
3963 return (EINVAL);
3964 /* Audit and report error */
3965 return (secpolicy_ip_config(crp, B_FALSE));
3966 }
3967 mutex_exit(&stp->sd_lock);
3968 }
3969
3970 q = wrq->q_next;
3971 TRACE_2(TR_FAC_STREAMS_FR, TR_I_POP,
3972 "I_POP:%p from %p", q, stp);
3973 if (q->q_next == NULL || (q->q_flag & (QREADR|QISDRV))) {
3974 error = EINVAL;
3975 } else {
3976 qdetach(_RD(q), 1, flag, crp, B_FALSE);
3977 error = 0;
3978 }
3979 mutex_enter(&stp->sd_lock);
3980
3981 /*
3982 * As a performance concern we are caching the values of
3983 * q_minpsz and q_maxpsz of the module below the stream
3984 * head in the stream head.
3985 */
3986 mutex_enter(QLOCK(wrq->q_next));
3987 rmin = wrq->q_next->q_minpsz;
3988 rmax = wrq->q_next->q_maxpsz;
3989 mutex_exit(QLOCK(wrq->q_next));
3990
3991 /* Do this processing here as a performance concern */
3992 if (strmsgsz != 0) {
3993 if (rmax == INFPSZ)
3994 rmax = strmsgsz;
3995 else {
3996 if (vp->v_type == VFIFO)
3997 rmax = MIN(PIPE_BUF, rmax);
3998 else rmax = MIN(strmsgsz, rmax);
3999 }
4000 }
4001
4002 mutex_enter(QLOCK(wrq));
4003 stp->sd_qn_minpsz = rmin;
4004 stp->sd_qn_maxpsz = rmax;
4005 mutex_exit(QLOCK(wrq));
4006
4007 /* If we popped through the anchor, then reset the anchor. */
4008 if (stp->sd_pushcnt < stp->sd_anchor) {
4009 stp->sd_anchor = 0;
4010 stp->sd_anchorzone = 0;
4011 }
4012 strendplumb(stp);
4013 mutex_exit(&stp->sd_lock);
4014 return (error);
4015 }
4016
4017 case _I_MUXID2FD:
4018 {
4019 /*
4020 * Create a fd for a I_PLINK'ed lower stream with a given
4021 * muxid. With the fd, application can send down ioctls,
4022 * like I_LIST, to the previously I_PLINK'ed stream. Note
4023 * that after getting the fd, the application has to do an
4024 * I_PUNLINK on the muxid before it can do any operation
4025 * on the lower stream. This is required by spec1170.
4026 *
4027 * The fd used to do this ioctl should point to the same
4028 * controlling device used to do the I_PLINK. If it uses
4029 * a different stream or an invalid muxid, I_MUXID2FD will
4030 * fail. The error code is set to EINVAL.
4031 *
4032 * The intended use of this interface is the following.
4033 * An application I_PLINK'ed a stream and exits. The fd
4034 * to the lower stream is gone. Another application
4035 * wants to get a fd to the lower stream, it uses I_MUXID2FD.
4036 */
4037 int muxid = (int)arg;
4038 int fd;
4039 linkinfo_t *linkp;
4040 struct file *fp;
4041 netstack_t *ns;
4042 str_stack_t *ss;
4043
4044 /*
4045 * Do not allow the wildcard muxid. This ioctl is not
4046 * intended to find arbitrary link.
4047 */
4048 if (muxid == 0) {
4049 return (EINVAL);
4050 }
4051
4052 ns = netstack_find_by_cred(crp);
4053 ASSERT(ns != NULL);
4054 ss = ns->netstack_str;
4055 ASSERT(ss != NULL);
4056
4057 mutex_enter(&muxifier);
4058 linkp = findlinks(vp->v_stream, muxid, LINKPERSIST, ss);
4059 if (linkp == NULL) {
4060 mutex_exit(&muxifier);
4061 netstack_rele(ss->ss_netstack);
4062 return (EINVAL);
4063 }
4064
4065 if ((fd = ufalloc(0)) == -1) {
4066 mutex_exit(&muxifier);
4067 netstack_rele(ss->ss_netstack);
4068 return (EMFILE);
4069 }
4070 fp = linkp->li_fpdown;
4071 mutex_enter(&fp->f_tlock);
4072 fp->f_count++;
4073 mutex_exit(&fp->f_tlock);
4074 mutex_exit(&muxifier);
4075 setf(fd, fp);
4076 *rvalp = fd;
4077 netstack_rele(ss->ss_netstack);
4078 return (0);
4079 }
4080
4081 case _I_INSERT:
4082 {
4083 /*
4084 * To insert a module to a given position in a stream.
4085 * In the first release, only allow privileged user
4086 * to use this ioctl. Furthermore, the insert is only allowed
4087 * below an anchor if the zoneid is the same as the zoneid
4088 * which created the anchor.
4089 *
4090 * Note that we do not plan to support this ioctl
4091 * on pipes in the first release. We want to learn more
4092 * about the implications of these ioctls before extending
4093 * their support. And we do not think these features are
4094 * valuable for pipes.
4095 */
4096 STRUCT_DECL(strmodconf, strmodinsert);
4097 char mod_name[FMNAMESZ + 1];
4098 fmodsw_impl_t *fp;
4099 dev_t dummydev;
4100 queue_t *tmp_wrq;
4101 int pos;
4102 boolean_t is_insert;
4103
4104 STRUCT_INIT(strmodinsert, flag);
4105 if (stp->sd_flag & STRHUP)
4106 return (ENXIO);
4107 if (STRMATED(stp))
4108 return (EINVAL);
4109 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
4110 return (error);
4111 if (stp->sd_anchor != 0 &&
4112 stp->sd_anchorzone != crgetzoneid(crp))
4113 return (EINVAL);
4114
4115 error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert),
4116 STRUCT_SIZE(strmodinsert), copyflag);
4117 if (error)
4118 return (error);
4119
4120 /*
4121 * Get module name and look up in fmodsw.
4122 */
4123 error = (copyflag & U_TO_K ? copyinstr :
4124 copystr)(STRUCT_FGETP(strmodinsert, mod_name),
4125 mod_name, FMNAMESZ + 1, NULL);
4126 if (error)
4127 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
4128
4129 if ((fp = fmodsw_find(mod_name, FMODSW_HOLD | FMODSW_LOAD)) ==
4130 NULL)
4131 return (EINVAL);
4132
4133 if (error = strstartplumb(stp, flag, cmd)) {
4134 fmodsw_rele(fp);
4135 return (error);
4136 }
4137
4138 /*
4139 * Is this _I_INSERT just like an I_PUSH? We need to know
4140 * this because we do some optimizations if this is a
4141 * module being pushed.
4142 */
4143 pos = STRUCT_FGET(strmodinsert, pos);
4144 is_insert = (pos != 0);
4145
4146 /*
4147 * Make sure pos is valid. Even though it is not an I_PUSH,
4148 * we impose the same limit on the number of modules in a
4149 * stream.
4150 */
4151 mutex_enter(&stp->sd_lock);
4152 if (stp->sd_pushcnt >= nstrpush || pos < 0 ||
4153 pos > stp->sd_pushcnt) {
4154 fmodsw_rele(fp);
4155 strendplumb(stp);
4156 mutex_exit(&stp->sd_lock);
4157 return (EINVAL);
4158 }
4159 if (stp->sd_anchor != 0) {
4160 /*
4161 * Is this insert below the anchor?
4162 * Pushcnt hasn't been increased yet hence
4163 * we test for greater than here, and greater or
4164 * equal after qattach.
4165 */
4166 if (pos > (stp->sd_pushcnt - stp->sd_anchor) &&
4167 stp->sd_anchorzone != crgetzoneid(crp)) {
4168 fmodsw_rele(fp);
4169 strendplumb(stp);
4170 mutex_exit(&stp->sd_lock);
4171 return (EPERM);
4172 }
4173 }
4174
4175 mutex_exit(&stp->sd_lock);
4176
4177 /*
4178 * First find the correct position this module to
4179 * be inserted. We don't need to call claimstr()
4180 * as the stream should not be changing at this point.
4181 *
4182 * Insert new module and call its open routine
4183 * via qattach(). Modules don't change device
4184 * numbers, so just ignore dummydev here.
4185 */
4186 for (tmp_wrq = stp->sd_wrq; pos > 0;
4187 tmp_wrq = tmp_wrq->q_next, pos--) {
4188 ASSERT(SAMESTR(tmp_wrq));
4189 }
4190 dummydev = vp->v_rdev;
4191 if ((error = qattach(_RD(tmp_wrq), &dummydev, 0, crp,
4192 fp, is_insert)) != 0) {
4193 mutex_enter(&stp->sd_lock);
4194 strendplumb(stp);
4195 mutex_exit(&stp->sd_lock);
4196 return (error);
4197 }
4198
4199 mutex_enter(&stp->sd_lock);
4200
4201 /*
4202 * As a performance concern we are caching the values of
4203 * q_minpsz and q_maxpsz of the module below the stream
4204 * head in the stream head.
4205 */
4206 if (!is_insert) {
4207 mutex_enter(QLOCK(stp->sd_wrq->q_next));
4208 rmin = stp->sd_wrq->q_next->q_minpsz;
4209 rmax = stp->sd_wrq->q_next->q_maxpsz;
4210 mutex_exit(QLOCK(stp->sd_wrq->q_next));
4211
4212 /* Do this processing here as a performance concern */
4213 if (strmsgsz != 0) {
4214 if (rmax == INFPSZ) {
4215 rmax = strmsgsz;
4216 } else {
4217 rmax = MIN(strmsgsz, rmax);
4218 }
4219 }
4220
4221 mutex_enter(QLOCK(wrq));
4222 stp->sd_qn_minpsz = rmin;
4223 stp->sd_qn_maxpsz = rmax;
4224 mutex_exit(QLOCK(wrq));
4225 }
4226
4227 /*
4228 * Need to update the anchor value if this module is
4229 * inserted below the anchor point.
4230 */
4231 if (stp->sd_anchor != 0) {
4232 pos = STRUCT_FGET(strmodinsert, pos);
4233 if (pos >= (stp->sd_pushcnt - stp->sd_anchor))
4234 stp->sd_anchor++;
4235 }
4236
4237 strendplumb(stp);
4238 mutex_exit(&stp->sd_lock);
4239 return (0);
4240 }
4241
4242 case _I_REMOVE:
4243 {
4244 /*
4245 * To remove a module with a given name in a stream. The
4246 * caller of this ioctl needs to provide both the name and
4247 * the position of the module to be removed. This eliminates
4248 * the ambiguity of removal if a module is inserted/pushed
4249 * multiple times in a stream. In the first release, only
4250 * allow privileged user to use this ioctl.
4251 * Furthermore, the remove is only allowed
4252 * below an anchor if the zoneid is the same as the zoneid
4253 * which created the anchor.
4254 *
4255 * Note that we do not plan to support this ioctl
4256 * on pipes in the first release. We want to learn more
4257 * about the implications of these ioctls before extending
4258 * their support. And we do not think these features are
4259 * valuable for pipes.
4260 *
4261 * Also note that _I_REMOVE cannot be used to remove a
4262 * driver or the stream head.
4263 */
4264 STRUCT_DECL(strmodconf, strmodremove);
4265 queue_t *q;
4266 int pos;
4267 char mod_name[FMNAMESZ + 1];
4268 boolean_t is_remove;
4269
4270 STRUCT_INIT(strmodremove, flag);
4271 if (stp->sd_flag & STRHUP)
4272 return (ENXIO);
4273 if (STRMATED(stp))
4274 return (EINVAL);
4275 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
4276 return (error);
4277 if (stp->sd_anchor != 0 &&
4278 stp->sd_anchorzone != crgetzoneid(crp))
4279 return (EINVAL);
4280
4281 error = strcopyin((void *)arg, STRUCT_BUF(strmodremove),
4282 STRUCT_SIZE(strmodremove), copyflag);
4283 if (error)
4284 return (error);
4285
4286 error = (copyflag & U_TO_K ? copyinstr :
4287 copystr)(STRUCT_FGETP(strmodremove, mod_name),
4288 mod_name, FMNAMESZ + 1, NULL);
4289 if (error)
4290 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
4291
4292 if ((error = strstartplumb(stp, flag, cmd)) != 0)
4293 return (error);
4294
4295 /*
4296 * Match the name of given module to the name of module at
4297 * the given position.
4298 */
4299 pos = STRUCT_FGET(strmodremove, pos);
4300
4301 is_remove = (pos != 0);
4302 for (q = stp->sd_wrq->q_next; SAMESTR(q) && pos > 0;
4303 q = q->q_next, pos--)
4304 ;
4305 if (pos > 0 || !SAMESTR(q) ||
4306 strcmp(Q2NAME(q), mod_name) != 0) {
4307 mutex_enter(&stp->sd_lock);
4308 strendplumb(stp);
4309 mutex_exit(&stp->sd_lock);
4310 return (EINVAL);
4311 }
4312
4313 /*
4314 * If the position is at or below an anchor, then the zoneid
4315 * must match the zoneid that created the anchor.
4316 */
4317 if (stp->sd_anchor != 0) {
4318 pos = STRUCT_FGET(strmodremove, pos);
4319 if (pos >= (stp->sd_pushcnt - stp->sd_anchor) &&
4320 stp->sd_anchorzone != crgetzoneid(crp)) {
4321 mutex_enter(&stp->sd_lock);
4322 strendplumb(stp);
4323 mutex_exit(&stp->sd_lock);
4324 return (EPERM);
4325 }
4326 }
4327
4328
4329 ASSERT(!(q->q_flag & QREADR));
4330 qdetach(_RD(q), 1, flag, crp, is_remove);
4331
4332 mutex_enter(&stp->sd_lock);
4333
4334 /*
4335 * As a performance concern we are caching the values of
4336 * q_minpsz and q_maxpsz of the module below the stream
4337 * head in the stream head.
4338 */
4339 if (!is_remove) {
4340 mutex_enter(QLOCK(wrq->q_next));
4341 rmin = wrq->q_next->q_minpsz;
4342 rmax = wrq->q_next->q_maxpsz;
4343 mutex_exit(QLOCK(wrq->q_next));
4344
4345 /* Do this processing here as a performance concern */
4346 if (strmsgsz != 0) {
4347 if (rmax == INFPSZ)
4348 rmax = strmsgsz;
4349 else {
4350 if (vp->v_type == VFIFO)
4351 rmax = MIN(PIPE_BUF, rmax);
4352 else rmax = MIN(strmsgsz, rmax);
4353 }
4354 }
4355
4356 mutex_enter(QLOCK(wrq));
4357 stp->sd_qn_minpsz = rmin;
4358 stp->sd_qn_maxpsz = rmax;
4359 mutex_exit(QLOCK(wrq));
4360 }
4361
4362 /*
4363 * Need to update the anchor value if this module is removed
4364 * at or below the anchor point. If the removed module is at
4365 * the anchor point, remove the anchor for this stream if
4366 * there is no module above the anchor point. Otherwise, if
4367 * the removed module is below the anchor point, decrement the
4368 * anchor point by 1.
4369 */
4370 if (stp->sd_anchor != 0) {
4371 pos = STRUCT_FGET(strmodremove, pos);
4372 if (pos == stp->sd_pushcnt - stp->sd_anchor + 1)
4373 stp->sd_anchor = 0;
4374 else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1))
4375 stp->sd_anchor--;
4376 }
4377
4378 strendplumb(stp);
4379 mutex_exit(&stp->sd_lock);
4380 return (0);
4381 }
4382
4383 case I_ANCHOR:
4384 /*
4385 * Set the anchor position on the stream to reside at
4386 * the top module (in other words, the top module
4387 * cannot be popped). Anchors with a FIFO make no
4388 * obvious sense, so they're not allowed.
4389 */
4390 mutex_enter(&stp->sd_lock);
4391
4392 if (stp->sd_vnode->v_type == VFIFO) {
4393 mutex_exit(&stp->sd_lock);
4394 return (EINVAL);
4395 }
4396 /* Only allow the same zoneid to update the anchor */
4397 if (stp->sd_anchor != 0 &&
4398 stp->sd_anchorzone != crgetzoneid(crp)) {
4399 mutex_exit(&stp->sd_lock);
4400 return (EINVAL);
4401 }
4402 stp->sd_anchor = stp->sd_pushcnt;
4403 stp->sd_anchorzone = crgetzoneid(crp);
4404 mutex_exit(&stp->sd_lock);
4405 return (0);
4406
4407 case I_LOOK:
4408 /*
4409 * Get name of first module downstream.
4410 * If no module, return an error.
4411 */
4412 claimstr(wrq);
4413 if (_SAMESTR(wrq) && wrq->q_next->q_next != NULL) {
4414 char *name = Q2NAME(wrq->q_next);
4415
4416 error = strcopyout(name, (void *)arg, strlen(name) + 1,
4417 copyflag);
4418 releasestr(wrq);
4419 return (error);
4420 }
4421 releasestr(wrq);
4422 return (EINVAL);
4423
4424 case I_LINK:
4425 case I_PLINK:
4426 /*
4427 * Link a multiplexor.
4428 */
4429 return (mlink(vp, cmd, (int)arg, crp, rvalp, 0));
4430
4431 case _I_PLINK_LH:
4432 /*
4433 * Link a multiplexor: Call must originate from kernel.
4434 */
4435 if (kioctl)
4436 return (ldi_mlink_lh(vp, cmd, arg, crp, rvalp));
4437
4438 return (EINVAL);
4439 case I_UNLINK:
4440 case I_PUNLINK:
4441 /*
4442 * Unlink a multiplexor.
4443 * If arg is -1, unlink all links for which this is the
4444 * controlling stream. Otherwise, arg is an index number
4445 * for a link to be removed.
4446 */
4447 {
4448 struct linkinfo *linkp;
4449 int native_arg = (int)arg;
4450 int type;
4451 netstack_t *ns;
4452 str_stack_t *ss;
4453
4454 TRACE_1(TR_FAC_STREAMS_FR,
4455 TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp);
4456 if (vp->v_type == VFIFO) {
4457 return (EINVAL);
4458 }
4459 if (cmd == I_UNLINK)
4460 type = LINKNORMAL;
4461 else /* I_PUNLINK */
4462 type = LINKPERSIST;
4463 if (native_arg == 0) {
4464 return (EINVAL);
4465 }
4466 ns = netstack_find_by_cred(crp);
4467 ASSERT(ns != NULL);
4468 ss = ns->netstack_str;
4469 ASSERT(ss != NULL);
4470
4471 if (native_arg == MUXID_ALL)
4472 error = munlinkall(stp, type, crp, rvalp, ss);
4473 else {
4474 mutex_enter(&muxifier);
4475 if (!(linkp = findlinks(stp, (int)arg, type, ss))) {
4476 /* invalid user supplied index number */
4477 mutex_exit(&muxifier);
4478 netstack_rele(ss->ss_netstack);
4479 return (EINVAL);
4480 }
4481 /* munlink drops the muxifier lock */
4482 error = munlink(stp, linkp, type, crp, rvalp, ss);
4483 }
4484 netstack_rele(ss->ss_netstack);
4485 return (error);
4486 }
4487
4488 case I_FLUSH:
4489 /*
4490 * send a flush message downstream
4491 * flush message can indicate
4492 * FLUSHR - flush read queue
4493 * FLUSHW - flush write queue
4494 * FLUSHRW - flush read/write queue
4495 */
4496 if (stp->sd_flag & STRHUP)
4497 return (ENXIO);
4498 if (arg & ~FLUSHRW)
4499 return (EINVAL);
4500
4501 for (;;) {
4502 if (putnextctl1(stp->sd_wrq, M_FLUSH, (int)arg)) {
4503 break;
4504 }
4505 if (error = strwaitbuf(1, BPRI_HI)) {
4506 return (error);
4507 }
4508 }
4509
4510 /*
4511 * Send down an unsupported ioctl and wait for the nack
4512 * in order to allow the M_FLUSH to propagate back
4513 * up to the stream head.
4514 * Replaces if (qready()) runqueues();
4515 */
4516 strioc.ic_cmd = -1; /* The unsupported ioctl */
4517 strioc.ic_timout = 0;
4518 strioc.ic_len = 0;
4519 strioc.ic_dp = NULL;
4520 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
4521 *rvalp = 0;
4522 return (0);
4523
4524 case I_FLUSHBAND:
4525 {
4526 struct bandinfo binfo;
4527
4528 error = strcopyin((void *)arg, &binfo, sizeof (binfo),
4529 copyflag);
4530 if (error)
4531 return (error);
4532 if (stp->sd_flag & STRHUP)
4533 return (ENXIO);
4534 if (binfo.bi_flag & ~FLUSHRW)
4535 return (EINVAL);
4536 while (!(mp = allocb(2, BPRI_HI))) {
4537 if (error = strwaitbuf(2, BPRI_HI))
4538 return (error);
4539 }
4540 mp->b_datap->db_type = M_FLUSH;
4541 *mp->b_wptr++ = binfo.bi_flag | FLUSHBAND;
4542 *mp->b_wptr++ = binfo.bi_pri;
4543 putnext(stp->sd_wrq, mp);
4544 /*
4545 * Send down an unsupported ioctl and wait for the nack
4546 * in order to allow the M_FLUSH to propagate back
4547 * up to the stream head.
4548 * Replaces if (qready()) runqueues();
4549 */
4550 strioc.ic_cmd = -1; /* The unsupported ioctl */
4551 strioc.ic_timout = 0;
4552 strioc.ic_len = 0;
4553 strioc.ic_dp = NULL;
4554 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
4555 *rvalp = 0;
4556 return (0);
4557 }
4558
4559 case I_SRDOPT:
4560 /*
4561 * Set read options
4562 *
4563 * RNORM - default stream mode
4564 * RMSGN - message no discard
4565 * RMSGD - message discard
4566 * RPROTNORM - fail read with EBADMSG for M_[PC]PROTOs
4567 * RPROTDAT - convert M_[PC]PROTOs to M_DATAs
4568 * RPROTDIS - discard M_[PC]PROTOs and retain M_DATAs
4569 */
4570 if (arg & ~(RMODEMASK | RPROTMASK))
4571 return (EINVAL);
4572
4573 if ((arg & (RMSGD|RMSGN)) == (RMSGD|RMSGN))
4574 return (EINVAL);
4575
4576 mutex_enter(&stp->sd_lock);
4577 switch (arg & RMODEMASK) {
4578 case RNORM:
4579 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
4580 break;
4581 case RMSGD:
4582 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGNODIS) |
4583 RD_MSGDIS;
4584 break;
4585 case RMSGN:
4586 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGDIS) |
4587 RD_MSGNODIS;
4588 break;
4589 }
4590
4591 switch (arg & RPROTMASK) {
4592 case RPROTNORM:
4593 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
4594 break;
4595
4596 case RPROTDAT:
4597 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDIS) |
4598 RD_PROTDAT);
4599 break;
4600
4601 case RPROTDIS:
4602 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDAT) |
4603 RD_PROTDIS);
4604 break;
4605 }
4606 mutex_exit(&stp->sd_lock);
4607 return (0);
4608
4609 case I_GRDOPT:
4610 /*
4611 * Get read option and return the value
4612 * to spot pointed to by arg
4613 */
4614 {
4615 int rdopt;
4616
4617 rdopt = ((stp->sd_read_opt & RD_MSGDIS) ? RMSGD :
4618 ((stp->sd_read_opt & RD_MSGNODIS) ? RMSGN : RNORM));
4619 rdopt |= ((stp->sd_read_opt & RD_PROTDAT) ? RPROTDAT :
4620 ((stp->sd_read_opt & RD_PROTDIS) ? RPROTDIS : RPROTNORM));
4621
4622 return (strcopyout(&rdopt, (void *)arg, sizeof (int),
4623 copyflag));
4624 }
4625
4626 case I_SERROPT:
4627 /*
4628 * Set error options
4629 *
4630 * RERRNORM - persistent read errors
4631 * RERRNONPERSIST - non-persistent read errors
4632 * WERRNORM - persistent write errors
4633 * WERRNONPERSIST - non-persistent write errors
4634 */
4635 if (arg & ~(RERRMASK | WERRMASK))
4636 return (EINVAL);
4637
4638 mutex_enter(&stp->sd_lock);
4639 switch (arg & RERRMASK) {
4640 case RERRNORM:
4641 stp->sd_flag &= ~STRDERRNONPERSIST;
4642 break;
4643 case RERRNONPERSIST:
4644 stp->sd_flag |= STRDERRNONPERSIST;
4645 break;
4646 }
4647 switch (arg & WERRMASK) {
4648 case WERRNORM:
4649 stp->sd_flag &= ~STWRERRNONPERSIST;
4650 break;
4651 case WERRNONPERSIST:
4652 stp->sd_flag |= STWRERRNONPERSIST;
4653 break;
4654 }
4655 mutex_exit(&stp->sd_lock);
4656 return (0);
4657
4658 case I_GERROPT:
4659 /*
4660 * Get error option and return the value
4661 * to spot pointed to by arg
4662 */
4663 {
4664 int erropt = 0;
4665
4666 erropt |= (stp->sd_flag & STRDERRNONPERSIST) ? RERRNONPERSIST :
4667 RERRNORM;
4668 erropt |= (stp->sd_flag & STWRERRNONPERSIST) ? WERRNONPERSIST :
4669 WERRNORM;
4670 return (strcopyout(&erropt, (void *)arg, sizeof (int),
4671 copyflag));
4672 }
4673
4674 case I_SETSIG:
4675 /*
4676 * Register the calling proc to receive the SIGPOLL
4677 * signal based on the events given in arg. If
4678 * arg is zero, remove the proc from register list.
4679 */
4680 {
4681 strsig_t *ssp, *pssp;
4682 struct pid *pidp;
4683
4684 pssp = NULL;
4685 pidp = curproc->p_pidp;
4686 /*
4687 * Hold sd_lock to prevent traversal of sd_siglist while
4688 * it is modified.
4689 */
4690 mutex_enter(&stp->sd_lock);
4691 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pidp != pidp);
4692 pssp = ssp, ssp = ssp->ss_next)
4693 ;
4694
4695 if (arg) {
4696 if (arg & ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
4697 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
4698 mutex_exit(&stp->sd_lock);
4699 return (EINVAL);
4700 }
4701 if ((arg & S_BANDURG) && !(arg & S_RDBAND)) {
4702 mutex_exit(&stp->sd_lock);
4703 return (EINVAL);
4704 }
4705
4706 /*
4707 * If proc not already registered, add it
4708 * to list.
4709 */
4710 if (!ssp) {
4711 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
4712 ssp->ss_pidp = pidp;
4713 ssp->ss_pid = pidp->pid_id;
4714 ssp->ss_next = NULL;
4715 if (pssp)
4716 pssp->ss_next = ssp;
4717 else
4718 stp->sd_siglist = ssp;
4719 mutex_enter(&pidlock);
4720 PID_HOLD(pidp);
4721 mutex_exit(&pidlock);
4722 }
4723
4724 /*
4725 * Set events.
4726 */
4727 ssp->ss_events = (int)arg;
4728 } else {
4729 /*
4730 * Remove proc from register list.
4731 */
4732 if (ssp) {
4733 mutex_enter(&pidlock);
4734 PID_RELE(pidp);
4735 mutex_exit(&pidlock);
4736 if (pssp)
4737 pssp->ss_next = ssp->ss_next;
4738 else
4739 stp->sd_siglist = ssp->ss_next;
4740 kmem_free(ssp, sizeof (strsig_t));
4741 } else {
4742 mutex_exit(&stp->sd_lock);
4743 return (EINVAL);
4744 }
4745 }
4746
4747 /*
4748 * Recalculate OR of sig events.
4749 */
4750 stp->sd_sigflags = 0;
4751 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4752 stp->sd_sigflags |= ssp->ss_events;
4753 mutex_exit(&stp->sd_lock);
4754 return (0);
4755 }
4756
4757 case I_GETSIG:
4758 /*
4759 * Return (in arg) the current registration of events
4760 * for which the calling proc is to be signaled.
4761 */
4762 {
4763 struct strsig *ssp;
4764 struct pid *pidp;
4765
4766 pidp = curproc->p_pidp;
4767 mutex_enter(&stp->sd_lock);
4768 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4769 if (ssp->ss_pidp == pidp) {
4770 error = strcopyout(&ssp->ss_events, (void *)arg,
4771 sizeof (int), copyflag);
4772 mutex_exit(&stp->sd_lock);
4773 return (error);
4774 }
4775 mutex_exit(&stp->sd_lock);
4776 return (EINVAL);
4777 }
4778
4779 case I_ESETSIG:
4780 /*
4781 * Register the ss_pid to receive the SIGPOLL
4782 * signal based on the events is ss_events arg. If
4783 * ss_events is zero, remove the proc from register list.
4784 */
4785 {
4786 struct strsig *ssp, *pssp;
4787 struct proc *proc;
4788 struct pid *pidp;
4789 pid_t pid;
4790 struct strsigset ss;
4791
4792 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
4793 if (error)
4794 return (error);
4795
4796 pid = ss.ss_pid;
4797
4798 if (ss.ss_events != 0) {
4799 /*
4800 * Permissions check by sending signal 0.
4801 * Note that when kill fails it does a set_errno
4802 * causing the system call to fail.
4803 */
4804 error = kill(pid, 0);
4805 if (error) {
4806 return (error);
4807 }
4808 }
4809 mutex_enter(&pidlock);
4810 if (pid == 0)
4811 proc = curproc;
4812 else if (pid < 0)
4813 proc = pgfind(-pid);
4814 else
4815 proc = prfind(pid);
4816 if (proc == NULL) {
4817 mutex_exit(&pidlock);
4818 return (ESRCH);
4819 }
4820 if (pid < 0)
4821 pidp = proc->p_pgidp;
4822 else
4823 pidp = proc->p_pidp;
4824 ASSERT(pidp);
4825 /*
4826 * Get a hold on the pid structure while referencing it.
4827 * There is a separate PID_HOLD should it be inserted
4828 * in the list below.
4829 */
4830 PID_HOLD(pidp);
4831 mutex_exit(&pidlock);
4832
4833 pssp = NULL;
4834 /*
4835 * Hold sd_lock to prevent traversal of sd_siglist while
4836 * it is modified.
4837 */
4838 mutex_enter(&stp->sd_lock);
4839 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pid != pid);
4840 pssp = ssp, ssp = ssp->ss_next)
4841 ;
4842
4843 if (ss.ss_events) {
4844 if (ss.ss_events &
4845 ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
4846 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
4847 mutex_exit(&stp->sd_lock);
4848 mutex_enter(&pidlock);
4849 PID_RELE(pidp);
4850 mutex_exit(&pidlock);
4851 return (EINVAL);
4852 }
4853 if ((ss.ss_events & S_BANDURG) &&
4854 !(ss.ss_events & S_RDBAND)) {
4855 mutex_exit(&stp->sd_lock);
4856 mutex_enter(&pidlock);
4857 PID_RELE(pidp);
4858 mutex_exit(&pidlock);
4859 return (EINVAL);
4860 }
4861
4862 /*
4863 * If proc not already registered, add it
4864 * to list.
4865 */
4866 if (!ssp) {
4867 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
4868 ssp->ss_pidp = pidp;
4869 ssp->ss_pid = pid;
4870 ssp->ss_next = NULL;
4871 if (pssp)
4872 pssp->ss_next = ssp;
4873 else
4874 stp->sd_siglist = ssp;
4875 mutex_enter(&pidlock);
4876 PID_HOLD(pidp);
4877 mutex_exit(&pidlock);
4878 }
4879
4880 /*
4881 * Set events.
4882 */
4883 ssp->ss_events = ss.ss_events;
4884 } else {
4885 /*
4886 * Remove proc from register list.
4887 */
4888 if (ssp) {
4889 mutex_enter(&pidlock);
4890 PID_RELE(pidp);
4891 mutex_exit(&pidlock);
4892 if (pssp)
4893 pssp->ss_next = ssp->ss_next;
4894 else
4895 stp->sd_siglist = ssp->ss_next;
4896 kmem_free(ssp, sizeof (strsig_t));
4897 } else {
4898 mutex_exit(&stp->sd_lock);
4899 mutex_enter(&pidlock);
4900 PID_RELE(pidp);
4901 mutex_exit(&pidlock);
4902 return (EINVAL);
4903 }
4904 }
4905
4906 /*
4907 * Recalculate OR of sig events.
4908 */
4909 stp->sd_sigflags = 0;
4910 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4911 stp->sd_sigflags |= ssp->ss_events;
4912 mutex_exit(&stp->sd_lock);
4913 mutex_enter(&pidlock);
4914 PID_RELE(pidp);
4915 mutex_exit(&pidlock);
4916 return (0);
4917 }
4918
4919 case I_EGETSIG:
4920 /*
4921 * Return (in arg) the current registration of events
4922 * for which the calling proc is to be signaled.
4923 */
4924 {
4925 struct strsig *ssp;
4926 struct proc *proc;
4927 pid_t pid;
4928 struct pid *pidp;
4929 struct strsigset ss;
4930
4931 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
4932 if (error)
4933 return (error);
4934
4935 pid = ss.ss_pid;
4936 mutex_enter(&pidlock);
4937 if (pid == 0)
4938 proc = curproc;
4939 else if (pid < 0)
4940 proc = pgfind(-pid);
4941 else
4942 proc = prfind(pid);
4943 if (proc == NULL) {
4944 mutex_exit(&pidlock);
4945 return (ESRCH);
4946 }
4947 if (pid < 0)
4948 pidp = proc->p_pgidp;
4949 else
4950 pidp = proc->p_pidp;
4951
4952 /* Prevent the pidp from being reassigned */
4953 PID_HOLD(pidp);
4954 mutex_exit(&pidlock);
4955
4956 mutex_enter(&stp->sd_lock);
4957 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4958 if (ssp->ss_pid == pid) {
4959 ss.ss_pid = ssp->ss_pid;
4960 ss.ss_events = ssp->ss_events;
4961 error = strcopyout(&ss, (void *)arg,
4962 sizeof (struct strsigset), copyflag);
4963 mutex_exit(&stp->sd_lock);
4964 mutex_enter(&pidlock);
4965 PID_RELE(pidp);
4966 mutex_exit(&pidlock);
4967 return (error);
4968 }
4969 mutex_exit(&stp->sd_lock);
4970 mutex_enter(&pidlock);
4971 PID_RELE(pidp);
4972 mutex_exit(&pidlock);
4973 return (EINVAL);
4974 }
4975
4976 case I_PEEK:
4977 {
4978 STRUCT_DECL(strpeek, strpeek);
4979 size_t n;
4980 mblk_t *fmp, *tmp_mp = NULL;
4981
4982 STRUCT_INIT(strpeek, flag);
4983
4984 error = strcopyin((void *)arg, STRUCT_BUF(strpeek),
4985 STRUCT_SIZE(strpeek), copyflag);
4986 if (error)
4987 return (error);
4988
4989 mutex_enter(QLOCK(rdq));
4990 /*
4991 * Skip the invalid messages
4992 */
4993 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
4994 if (mp->b_datap->db_type != M_SIG)
4995 break;
4996
4997 /*
4998 * If user has requested to peek at a high priority message
4999 * and first message is not, return 0
5000 */
5001 if (mp != NULL) {
5002 if ((STRUCT_FGET(strpeek, flags) & RS_HIPRI) &&
5003 queclass(mp) == QNORM) {
5004 *rvalp = 0;
5005 mutex_exit(QLOCK(rdq));
5006 return (0);
5007 }
5008 } else if (stp->sd_struiordq == NULL ||
5009 (STRUCT_FGET(strpeek, flags) & RS_HIPRI)) {
5010 /*
5011 * No mblks to look at at the streamhead and
5012 * 1). This isn't a synch stream or
5013 * 2). This is a synch stream but caller wants high
5014 * priority messages which is not supported by
5015 * the synch stream. (it only supports QNORM)
5016 */
5017 *rvalp = 0;
5018 mutex_exit(QLOCK(rdq));
5019 return (0);
5020 }
5021
5022 fmp = mp;
5023
5024 if (mp && mp->b_datap->db_type == M_PASSFP) {
5025 mutex_exit(QLOCK(rdq));
5026 return (EBADMSG);
5027 }
5028
5029 ASSERT(mp == NULL || mp->b_datap->db_type == M_PCPROTO ||
5030 mp->b_datap->db_type == M_PROTO ||
5031 mp->b_datap->db_type == M_DATA);
5032
5033 if (mp && mp->b_datap->db_type == M_PCPROTO) {
5034 STRUCT_FSET(strpeek, flags, RS_HIPRI);
5035 } else {
5036 STRUCT_FSET(strpeek, flags, 0);
5037 }
5038
5039
5040 if (mp && ((tmp_mp = dupmsg(mp)) == NULL)) {
5041 mutex_exit(QLOCK(rdq));
5042 return (ENOSR);
5043 }
5044 mutex_exit(QLOCK(rdq));
5045
5046 /*
5047 * set mp = tmp_mp, so that I_PEEK processing can continue.
5048 * tmp_mp is used to free the dup'd message.
5049 */
5050 mp = tmp_mp;
5051
5052 uio.uio_fmode = 0;
5053 uio.uio_extflg = UIO_COPY_CACHED;
5054 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
5055 UIO_SYSSPACE;
5056 uio.uio_limit = 0;
5057 /*
5058 * First process PROTO blocks, if any.
5059 * If user doesn't want to get ctl info by setting maxlen <= 0,
5060 * then set len to -1/0 and skip control blocks part.
5061 */
5062 if (STRUCT_FGET(strpeek, ctlbuf.maxlen) < 0)
5063 STRUCT_FSET(strpeek, ctlbuf.len, -1);
5064 else if (STRUCT_FGET(strpeek, ctlbuf.maxlen) == 0)
5065 STRUCT_FSET(strpeek, ctlbuf.len, 0);
5066 else {
5067 int ctl_part = 0;
5068
5069 iov.iov_base = STRUCT_FGETP(strpeek, ctlbuf.buf);
5070 iov.iov_len = STRUCT_FGET(strpeek, ctlbuf.maxlen);
5071 uio.uio_iov = &iov;
5072 uio.uio_resid = iov.iov_len;
5073 uio.uio_loffset = 0;
5074 uio.uio_iovcnt = 1;
5075 while (mp && mp->b_datap->db_type != M_DATA &&
5076 uio.uio_resid >= 0) {
5077 ASSERT(STRUCT_FGET(strpeek, flags) == 0 ?
5078 mp->b_datap->db_type == M_PROTO :
5079 mp->b_datap->db_type == M_PCPROTO);
5080
5081 if ((n = MIN(uio.uio_resid,
5082 mp->b_wptr - mp->b_rptr)) != 0 &&
5083 (error = uiomove((char *)mp->b_rptr, n,
5084 UIO_READ, &uio)) != 0) {
5085 freemsg(tmp_mp);
5086 return (error);
5087 }
5088 ctl_part = 1;
5089 mp = mp->b_cont;
5090 }
5091 /* No ctl message */
5092 if (ctl_part == 0)
5093 STRUCT_FSET(strpeek, ctlbuf.len, -1);
5094 else
5095 STRUCT_FSET(strpeek, ctlbuf.len,
5096 STRUCT_FGET(strpeek, ctlbuf.maxlen) -
5097 uio.uio_resid);
5098 }
5099
5100 /*
5101 * Now process DATA blocks, if any.
5102 * If user doesn't want to get data info by setting maxlen <= 0,
5103 * then set len to -1/0 and skip data blocks part.
5104 */
5105 if (STRUCT_FGET(strpeek, databuf.maxlen) < 0)
5106 STRUCT_FSET(strpeek, databuf.len, -1);
5107 else if (STRUCT_FGET(strpeek, databuf.maxlen) == 0)
5108 STRUCT_FSET(strpeek, databuf.len, 0);
5109 else {
5110 int data_part = 0;
5111
5112 iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf);
5113 iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen);
5114 uio.uio_iov = &iov;
5115 uio.uio_resid = iov.iov_len;
5116 uio.uio_loffset = 0;
5117 uio.uio_iovcnt = 1;
5118 while (mp && uio.uio_resid) {
5119 if (mp->b_datap->db_type == M_DATA) {
5120 if ((n = MIN(uio.uio_resid,
5121 mp->b_wptr - mp->b_rptr)) != 0 &&
5122 (error = uiomove((char *)mp->b_rptr,
5123 n, UIO_READ, &uio)) != 0) {
5124 freemsg(tmp_mp);
5125 return (error);
5126 }
5127 data_part = 1;
5128 }
5129 ASSERT(data_part == 0 ||
5130 mp->b_datap->db_type == M_DATA);
5131 mp = mp->b_cont;
5132 }
5133 /* No data message */
5134 if (data_part == 0)
5135 STRUCT_FSET(strpeek, databuf.len, -1);
5136 else
5137 STRUCT_FSET(strpeek, databuf.len,
5138 STRUCT_FGET(strpeek, databuf.maxlen) -
5139 uio.uio_resid);
5140 }
5141 freemsg(tmp_mp);
5142
5143 /*
5144 * It is a synch stream and user wants to get
5145 * data (maxlen > 0).
5146 * uio setup is done by the codes that process DATA
5147 * blocks above.
5148 */
5149 if ((fmp == NULL) && STRUCT_FGET(strpeek, databuf.maxlen) > 0) {
5150 infod_t infod;
5151
5152 infod.d_cmd = INFOD_COPYOUT;
5153 infod.d_res = 0;
5154 infod.d_uiop = &uio;
5155 error = infonext(rdq, &infod);
5156 if (error == EINVAL || error == EBUSY)
5157 error = 0;
5158 if (error)
5159 return (error);
5160 STRUCT_FSET(strpeek, databuf.len, STRUCT_FGET(strpeek,
5161 databuf.maxlen) - uio.uio_resid);
5162 if (STRUCT_FGET(strpeek, databuf.len) == 0) {
5163 /*
5164 * No data found by the infonext().
5165 */
5166 STRUCT_FSET(strpeek, databuf.len, -1);
5167 }
5168 }
5169 error = strcopyout(STRUCT_BUF(strpeek), (void *)arg,
5170 STRUCT_SIZE(strpeek), copyflag);
5171 if (error) {
5172 return (error);
5173 }
5174 /*
5175 * If there is no message retrieved, set return code to 0
5176 * otherwise, set it to 1.
5177 */
5178 if (STRUCT_FGET(strpeek, ctlbuf.len) == -1 &&
5179 STRUCT_FGET(strpeek, databuf.len) == -1)
5180 *rvalp = 0;
5181 else
5182 *rvalp = 1;
5183 return (0);
5184 }
5185
5186 case I_FDINSERT:
5187 {
5188 STRUCT_DECL(strfdinsert, strfdinsert);
5189 struct file *resftp;
5190 struct stdata *resstp;
5191 t_uscalar_t ival;
5192 ssize_t msgsize;
5193 struct strbuf mctl;
5194
5195 STRUCT_INIT(strfdinsert, flag);
5196 if (stp->sd_flag & STRHUP)
5197 return (ENXIO);
5198 /*
5199 * STRDERR, STWRERR and STPLEX tested above.
5200 */
5201 error = strcopyin((void *)arg, STRUCT_BUF(strfdinsert),
5202 STRUCT_SIZE(strfdinsert), copyflag);
5203 if (error)
5204 return (error);
5205
5206 if (STRUCT_FGET(strfdinsert, offset) < 0 ||
5207 (STRUCT_FGET(strfdinsert, offset) %
5208 sizeof (t_uscalar_t)) != 0)
5209 return (EINVAL);
5210 if ((resftp = getf(STRUCT_FGET(strfdinsert, fildes))) != NULL) {
5211 if ((resstp = resftp->f_vnode->v_stream) == NULL) {
5212 releasef(STRUCT_FGET(strfdinsert, fildes));
5213 return (EINVAL);
5214 }
5215 } else
5216 return (EINVAL);
5217
5218 mutex_enter(&resstp->sd_lock);
5219 if (resstp->sd_flag & (STRDERR|STWRERR|STRHUP|STPLEX)) {
5220 error = strgeterr(resstp,
5221 STRDERR|STWRERR|STRHUP|STPLEX, 0);
5222 if (error != 0) {
5223 mutex_exit(&resstp->sd_lock);
5224 releasef(STRUCT_FGET(strfdinsert, fildes));
5225 return (error);
5226 }
5227 }
5228 mutex_exit(&resstp->sd_lock);
5229
5230 #ifdef _ILP32
5231 {
5232 queue_t *q;
5233 queue_t *mate = NULL;
5234
5235 /* get read queue of stream terminus */
5236 claimstr(resstp->sd_wrq);
5237 for (q = resstp->sd_wrq->q_next; q->q_next != NULL;
5238 q = q->q_next)
5239 if (!STRMATED(resstp) && STREAM(q) != resstp &&
5240 mate == NULL) {
5241 ASSERT(q->q_qinfo->qi_srvp);
5242 ASSERT(_OTHERQ(q)->q_qinfo->qi_srvp);
5243 claimstr(q);
5244 mate = q;
5245 }
5246 q = _RD(q);
5247 if (mate)
5248 releasestr(mate);
5249 releasestr(resstp->sd_wrq);
5250 ival = (t_uscalar_t)q;
5251 }
5252 #else
5253 ival = (t_uscalar_t)getminor(resftp->f_vnode->v_rdev);
5254 #endif /* _ILP32 */
5255
5256 if (STRUCT_FGET(strfdinsert, ctlbuf.len) <
5257 STRUCT_FGET(strfdinsert, offset) + sizeof (t_uscalar_t)) {
5258 releasef(STRUCT_FGET(strfdinsert, fildes));
5259 return (EINVAL);
5260 }
5261
5262 /*
5263 * Check for legal flag value.
5264 */
5265 if (STRUCT_FGET(strfdinsert, flags) & ~RS_HIPRI) {
5266 releasef(STRUCT_FGET(strfdinsert, fildes));
5267 return (EINVAL);
5268 }
5269
5270 /* get these values from those cached in the stream head */
5271 mutex_enter(QLOCK(stp->sd_wrq));
5272 rmin = stp->sd_qn_minpsz;
5273 rmax = stp->sd_qn_maxpsz;
5274 mutex_exit(QLOCK(stp->sd_wrq));
5275
5276 /*
5277 * Make sure ctl and data sizes together fall within
5278 * the limits of the max and min receive packet sizes
5279 * and do not exceed system limit. A negative data
5280 * length means that no data part is to be sent.
5281 */
5282 ASSERT((rmax >= 0) || (rmax == INFPSZ));
5283 if (rmax == 0) {
5284 releasef(STRUCT_FGET(strfdinsert, fildes));
5285 return (ERANGE);
5286 }
5287 if ((msgsize = STRUCT_FGET(strfdinsert, databuf.len)) < 0)
5288 msgsize = 0;
5289 if ((msgsize < rmin) ||
5290 ((msgsize > rmax) && (rmax != INFPSZ)) ||
5291 (STRUCT_FGET(strfdinsert, ctlbuf.len) > strctlsz)) {
5292 releasef(STRUCT_FGET(strfdinsert, fildes));
5293 return (ERANGE);
5294 }
5295
5296 mutex_enter(&stp->sd_lock);
5297 while (!(STRUCT_FGET(strfdinsert, flags) & RS_HIPRI) &&
5298 !canputnext(stp->sd_wrq)) {
5299 if ((error = strwaitq(stp, WRITEWAIT, (ssize_t)0,
5300 flag, -1, &done)) != 0 || done) {
5301 mutex_exit(&stp->sd_lock);
5302 releasef(STRUCT_FGET(strfdinsert, fildes));
5303 return (error);
5304 }
5305 if ((error = i_straccess(stp, access)) != 0) {
5306 mutex_exit(&stp->sd_lock);
5307 releasef(
5308 STRUCT_FGET(strfdinsert, fildes));
5309 return (error);
5310 }
5311 }
5312 mutex_exit(&stp->sd_lock);
5313
5314 /*
5315 * Copy strfdinsert.ctlbuf into native form of
5316 * ctlbuf to pass down into strmakemsg().
5317 */
5318 mctl.maxlen = STRUCT_FGET(strfdinsert, ctlbuf.maxlen);
5319 mctl.len = STRUCT_FGET(strfdinsert, ctlbuf.len);
5320 mctl.buf = STRUCT_FGETP(strfdinsert, ctlbuf.buf);
5321
5322 iov.iov_base = STRUCT_FGETP(strfdinsert, databuf.buf);
5323 iov.iov_len = STRUCT_FGET(strfdinsert, databuf.len);
5324 uio.uio_iov = &iov;
5325 uio.uio_iovcnt = 1;
5326 uio.uio_loffset = 0;
5327 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
5328 UIO_SYSSPACE;
5329 uio.uio_fmode = 0;
5330 uio.uio_extflg = UIO_COPY_CACHED;
5331 uio.uio_resid = iov.iov_len;
5332 if ((error = strmakemsg(&mctl,
5333 &msgsize, &uio, stp,
5334 STRUCT_FGET(strfdinsert, flags), &mp)) != 0 || !mp) {
5335 STRUCT_FSET(strfdinsert, databuf.len, msgsize);
5336 releasef(STRUCT_FGET(strfdinsert, fildes));
5337 return (error);
5338 }
5339
5340 STRUCT_FSET(strfdinsert, databuf.len, msgsize);
5341
5342 /*
5343 * Place the possibly reencoded queue pointer 'offset' bytes
5344 * from the start of the control portion of the message.
5345 */
5346 *((t_uscalar_t *)(mp->b_rptr +
5347 STRUCT_FGET(strfdinsert, offset))) = ival;
5348
5349 /*
5350 * Put message downstream.
5351 */
5352 stream_willservice(stp);
5353 putnext(stp->sd_wrq, mp);
5354 stream_runservice(stp);
5355 releasef(STRUCT_FGET(strfdinsert, fildes));
5356 return (error);
5357 }
5358
5359 case I_SENDFD:
5360 {
5361 struct file *fp;
5362
5363 if ((fp = getf((int)arg)) == NULL)
5364 return (EBADF);
5365 error = do_sendfp(stp, fp, crp);
5366 if (auditing) {
5367 audit_fdsend((int)arg, fp, error);
5368 }
5369 releasef((int)arg);
5370 return (error);
5371 }
5372
5373 case I_RECVFD:
5374 case I_E_RECVFD:
5375 {
5376 struct k_strrecvfd *srf;
5377 int i, fd;
5378
5379 mutex_enter(&stp->sd_lock);
5380 while (!(mp = getq(rdq))) {
5381 if (stp->sd_flag & (STRHUP|STREOF)) {
5382 mutex_exit(&stp->sd_lock);
5383 return (ENXIO);
5384 }
5385 if ((error = strwaitq(stp, GETWAIT, (ssize_t)0,
5386 flag, -1, &done)) != 0 || done) {
5387 mutex_exit(&stp->sd_lock);
5388 return (error);
5389 }
5390 if ((error = i_straccess(stp, access)) != 0) {
5391 mutex_exit(&stp->sd_lock);
5392 return (error);
5393 }
5394 }
5395 if (mp->b_datap->db_type != M_PASSFP) {
5396 putback(stp, rdq, mp, mp->b_band);
5397 mutex_exit(&stp->sd_lock);
5398 return (EBADMSG);
5399 }
5400 mutex_exit(&stp->sd_lock);
5401
5402 srf = (struct k_strrecvfd *)mp->b_rptr;
5403 if ((fd = ufalloc(0)) == -1) {
5404 mutex_enter(&stp->sd_lock);
5405 putback(stp, rdq, mp, mp->b_band);
5406 mutex_exit(&stp->sd_lock);
5407 return (EMFILE);
5408 }
5409 if (cmd == I_RECVFD) {
5410 struct o_strrecvfd ostrfd;
5411
5412 /* check to see if uid/gid values are too large. */
5413
5414 if (srf->uid > (o_uid_t)USHRT_MAX ||
5415 srf->gid > (o_gid_t)USHRT_MAX) {
5416 mutex_enter(&stp->sd_lock);
5417 putback(stp, rdq, mp, mp->b_band);
5418 mutex_exit(&stp->sd_lock);
5419 setf(fd, NULL); /* release fd entry */
5420 return (EOVERFLOW);
5421 }
5422
5423 ostrfd.fd = fd;
5424 ostrfd.uid = (o_uid_t)srf->uid;
5425 ostrfd.gid = (o_gid_t)srf->gid;
5426
5427 /* Null the filler bits */
5428 for (i = 0; i < 8; i++)
5429 ostrfd.fill[i] = 0;
5430
5431 error = strcopyout(&ostrfd, (void *)arg,
5432 sizeof (struct o_strrecvfd), copyflag);
5433 } else { /* I_E_RECVFD */
5434 struct strrecvfd strfd;
5435
5436 strfd.fd = fd;
5437 strfd.uid = srf->uid;
5438 strfd.gid = srf->gid;
5439
5440 /* null the filler bits */
5441 for (i = 0; i < 8; i++)
5442 strfd.fill[i] = 0;
5443
5444 error = strcopyout(&strfd, (void *)arg,
5445 sizeof (struct strrecvfd), copyflag);
5446 }
5447
5448 if (error) {
5449 setf(fd, NULL); /* release fd entry */
5450 mutex_enter(&stp->sd_lock);
5451 putback(stp, rdq, mp, mp->b_band);
5452 mutex_exit(&stp->sd_lock);
5453 return (error);
5454 }
5455 if (auditing) {
5456 audit_fdrecv(fd, srf->fp);
5457 }
5458
5459 /*
5460 * Always increment f_count since the freemsg() below will
5461 * always call free_passfp() which performs a closef().
5462 */
5463 mutex_enter(&srf->fp->f_tlock);
5464 srf->fp->f_count++;
5465 mutex_exit(&srf->fp->f_tlock);
5466 setf(fd, srf->fp);
5467 freemsg(mp);
5468 return (0);
5469 }
5470
5471 case I_SWROPT:
5472 /*
5473 * Set/clear the write options. arg is a bit
5474 * mask with any of the following bits set...
5475 * SNDZERO - send zero length message
5476 * SNDPIPE - send sigpipe to process if
5477 * sd_werror is set and process is
5478 * doing a write or putmsg.
5479 * The new stream head write options should reflect
5480 * what is in arg.
5481 */
5482 if (arg & ~(SNDZERO|SNDPIPE))
5483 return (EINVAL);
5484
5485 mutex_enter(&stp->sd_lock);
5486 stp->sd_wput_opt &= ~(SW_SIGPIPE|SW_SNDZERO);
5487 if (arg & SNDZERO)
5488 stp->sd_wput_opt |= SW_SNDZERO;
5489 if (arg & SNDPIPE)
5490 stp->sd_wput_opt |= SW_SIGPIPE;
5491 mutex_exit(&stp->sd_lock);
5492 return (0);
5493
5494 case I_GWROPT:
5495 {
5496 int wropt = 0;
5497
5498 if (stp->sd_wput_opt & SW_SNDZERO)
5499 wropt |= SNDZERO;
5500 if (stp->sd_wput_opt & SW_SIGPIPE)
5501 wropt |= SNDPIPE;
5502 return (strcopyout(&wropt, (void *)arg, sizeof (wropt),
5503 copyflag));
5504 }
5505
5506 case I_LIST:
5507 /*
5508 * Returns all the modules found on this stream,
5509 * upto the driver. If argument is NULL, return the
5510 * number of modules (including driver). If argument
5511 * is not NULL, copy the names into the structure
5512 * provided.
5513 */
5514
5515 {
5516 queue_t *q;
5517 char *qname;
5518 int i, nmods;
5519 struct str_mlist *mlist;
5520 STRUCT_DECL(str_list, strlist);
5521
5522 if (arg == 0) { /* Return number of modules plus driver */
5523 if (stp->sd_vnode->v_type == VFIFO)
5524 *rvalp = stp->sd_pushcnt;
5525 else
5526 *rvalp = stp->sd_pushcnt + 1;
5527 return (0);
5528 }
5529
5530 STRUCT_INIT(strlist, flag);
5531
5532 error = strcopyin((void *)arg, STRUCT_BUF(strlist),
5533 STRUCT_SIZE(strlist), copyflag);
5534 if (error != 0)
5535 return (error);
5536
5537 mlist = STRUCT_FGETP(strlist, sl_modlist);
5538 nmods = STRUCT_FGET(strlist, sl_nmods);
5539 if (nmods <= 0)
5540 return (EINVAL);
5541
5542 claimstr(stp->sd_wrq);
5543 q = stp->sd_wrq;
5544 for (i = 0; i < nmods && _SAMESTR(q); i++, q = q->q_next) {
5545 qname = Q2NAME(q->q_next);
5546 error = strcopyout(qname, &mlist[i], strlen(qname) + 1,
5547 copyflag);
5548 if (error != 0) {
5549 releasestr(stp->sd_wrq);
5550 return (error);
5551 }
5552 }
5553 releasestr(stp->sd_wrq);
5554 return (strcopyout(&i, (void *)arg, sizeof (int), copyflag));
5555 }
5556
5557 case I_CKBAND:
5558 {
5559 queue_t *q;
5560 qband_t *qbp;
5561
5562 if ((arg < 0) || (arg >= NBAND))
5563 return (EINVAL);
5564 q = _RD(stp->sd_wrq);
5565 mutex_enter(QLOCK(q));
5566 if (arg > (int)q->q_nband) {
5567 *rvalp = 0;
5568 } else {
5569 if (arg == 0) {
5570 if (q->q_first)
5571 *rvalp = 1;
5572 else
5573 *rvalp = 0;
5574 } else {
5575 qbp = q->q_bandp;
5576 while (--arg > 0)
5577 qbp = qbp->qb_next;
5578 if (qbp->qb_first)
5579 *rvalp = 1;
5580 else
5581 *rvalp = 0;
5582 }
5583 }
5584 mutex_exit(QLOCK(q));
5585 return (0);
5586 }
5587
5588 case I_GETBAND:
5589 {
5590 int intpri;
5591 queue_t *q;
5592
5593 q = _RD(stp->sd_wrq);
5594 mutex_enter(QLOCK(q));
5595 mp = q->q_first;
5596 if (!mp) {
5597 mutex_exit(QLOCK(q));
5598 return (ENODATA);
5599 }
5600 intpri = (int)mp->b_band;
5601 error = strcopyout(&intpri, (void *)arg, sizeof (int),
5602 copyflag);
5603 mutex_exit(QLOCK(q));
5604 return (error);
5605 }
5606
5607 case I_ATMARK:
5608 {
5609 queue_t *q;
5610
5611 if (arg & ~(ANYMARK|LASTMARK))
5612 return (EINVAL);
5613 q = _RD(stp->sd_wrq);
5614 mutex_enter(&stp->sd_lock);
5615 if ((stp->sd_flag & STRATMARK) && (arg == ANYMARK)) {
5616 *rvalp = 1;
5617 } else {
5618 mutex_enter(QLOCK(q));
5619 mp = q->q_first;
5620
5621 if (mp == NULL)
5622 *rvalp = 0;
5623 else if ((arg == ANYMARK) && (mp->b_flag & MSGMARK))
5624 *rvalp = 1;
5625 else if ((arg == LASTMARK) && (mp == stp->sd_mark))
5626 *rvalp = 1;
5627 else
5628 *rvalp = 0;
5629 mutex_exit(QLOCK(q));
5630 }
5631 mutex_exit(&stp->sd_lock);
5632 return (0);
5633 }
5634
5635 case I_CANPUT:
5636 {
5637 char band;
5638
5639 if ((arg < 0) || (arg >= NBAND))
5640 return (EINVAL);
5641 band = (char)arg;
5642 *rvalp = bcanputnext(stp->sd_wrq, band);
5643 return (0);
5644 }
5645
5646 case I_SETCLTIME:
5647 {
5648 int closetime;
5649
5650 error = strcopyin((void *)arg, &closetime, sizeof (int),
5651 copyflag);
5652 if (error)
5653 return (error);
5654 if (closetime < 0)
5655 return (EINVAL);
5656
5657 stp->sd_closetime = closetime;
5658 return (0);
5659 }
5660
5661 case I_GETCLTIME:
5662 {
5663 int closetime;
5664
5665 closetime = stp->sd_closetime;
5666 return (strcopyout(&closetime, (void *)arg, sizeof (int),
5667 copyflag));
5668 }
5669
5670 case TIOCGSID:
5671 {
5672 pid_t sid;
5673
5674 mutex_enter(&stp->sd_lock);
5675 if (stp->sd_sidp == NULL) {
5676 mutex_exit(&stp->sd_lock);
5677 return (ENOTTY);
5678 }
5679 sid = stp->sd_sidp->pid_id;
5680 mutex_exit(&stp->sd_lock);
5681 return (strcopyout(&sid, (void *)arg, sizeof (pid_t),
5682 copyflag));
5683 }
5684
5685 case TIOCSPGRP:
5686 {
5687 pid_t pgrp;
5688 proc_t *q;
5689 pid_t sid, fg_pgid, bg_pgid;
5690
5691 if (error = strcopyin((void *)arg, &pgrp, sizeof (pid_t),
5692 copyflag))
5693 return (error);
5694 mutex_enter(&stp->sd_lock);
5695 mutex_enter(&pidlock);
5696 if (stp->sd_sidp != ttoproc(curthread)->p_sessp->s_sidp) {
5697 mutex_exit(&pidlock);
5698 mutex_exit(&stp->sd_lock);
5699 return (ENOTTY);
5700 }
5701 if (pgrp == stp->sd_pgidp->pid_id) {
5702 mutex_exit(&pidlock);
5703 mutex_exit(&stp->sd_lock);
5704 return (0);
5705 }
5706 if (pgrp <= 0 || pgrp >= maxpid) {
5707 mutex_exit(&pidlock);
5708 mutex_exit(&stp->sd_lock);
5709 return (EINVAL);
5710 }
5711 if ((q = pgfind(pgrp)) == NULL ||
5712 q->p_sessp != ttoproc(curthread)->p_sessp) {
5713 mutex_exit(&pidlock);
5714 mutex_exit(&stp->sd_lock);
5715 return (EPERM);
5716 }
5717 sid = stp->sd_sidp->pid_id;
5718 fg_pgid = q->p_pgrp;
5719 bg_pgid = stp->sd_pgidp->pid_id;
5720 CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid);
5721 PID_RELE(stp->sd_pgidp);
5722 ctty_clear_sighuped();
5723 stp->sd_pgidp = q->p_pgidp;
5724 PID_HOLD(stp->sd_pgidp);
5725 mutex_exit(&pidlock);
5726 mutex_exit(&stp->sd_lock);
5727 return (0);
5728 }
5729
5730 case TIOCGPGRP:
5731 {
5732 pid_t pgrp;
5733
5734 mutex_enter(&stp->sd_lock);
5735 if (stp->sd_sidp == NULL) {
5736 mutex_exit(&stp->sd_lock);
5737 return (ENOTTY);
5738 }
5739 pgrp = stp->sd_pgidp->pid_id;
5740 mutex_exit(&stp->sd_lock);
5741 return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t),
5742 copyflag));
5743 }
5744
5745 case TIOCSCTTY:
5746 {
5747 return (strctty(stp));
5748 }
5749
5750 case TIOCNOTTY:
5751 {
5752 /* freectty() always assumes curproc. */
5753 if (freectty(B_FALSE) != 0)
5754 return (0);
5755 return (ENOTTY);
5756 }
5757
5758 case FIONBIO:
5759 case FIOASYNC:
5760 return (0); /* handled by the upper layer */
5761 }
5762 }
5763
5764 /*
5765 * Custom free routine used for M_PASSFP messages.
5766 */
5767 static void
5768 free_passfp(struct k_strrecvfd *srf)
5769 {
5770 (void) closef(srf->fp);
5771 kmem_free(srf, sizeof (struct k_strrecvfd) + sizeof (frtn_t));
5772 }
5773
5774 /* ARGSUSED */
5775 int
5776 do_sendfp(struct stdata *stp, struct file *fp, struct cred *cr)
5777 {
5778 queue_t *qp, *nextqp;
5779 struct k_strrecvfd *srf;
5780 mblk_t *mp;
5781 frtn_t *frtnp;
5782 size_t bufsize;
5783 queue_t *mate = NULL;
5784 syncq_t *sq = NULL;
5785 int retval = 0;
5786
5787 if (stp->sd_flag & STRHUP)
5788 return (ENXIO);
5789
5790 claimstr(stp->sd_wrq);
5791
5792 /* Fastpath, we have a pipe, and we are already mated, use it. */
5793 if (STRMATED(stp)) {
5794 qp = _RD(stp->sd_mate->sd_wrq);
5795 claimstr(qp);
5796 mate = qp;
5797 } else { /* Not already mated. */
5798
5799 /*
5800 * Walk the stream to the end of this one.
5801 * assumes that the claimstr() will prevent
5802 * plumbing between the stream head and the
5803 * driver from changing
5804 */
5805 qp = stp->sd_wrq;
5806
5807 /*
5808 * Loop until we reach the end of this stream.
5809 * On completion, qp points to the write queue
5810 * at the end of the stream, or the read queue
5811 * at the stream head if this is a fifo.
5812 */
5813 while (((qp = qp->q_next) != NULL) && _SAMESTR(qp))
5814 ;
5815
5816 /*
5817 * Just in case we get a q_next which is NULL, but
5818 * not at the end of the stream. This is actually
5819 * broken, so we set an assert to catch it in
5820 * debug, and set an error and return if not debug.
5821 */
5822 ASSERT(qp);
5823 if (qp == NULL) {
5824 releasestr(stp->sd_wrq);
5825 return (EINVAL);
5826 }
5827
5828 /*
5829 * Enter the syncq for the driver, so (hopefully)
5830 * the queue values will not change on us.
5831 * XXXX - This will only prevent the race IFF only
5832 * the write side modifies the q_next member, and
5833 * the put procedure is protected by at least
5834 * MT_PERQ.
5835 */
5836 if ((sq = qp->q_syncq) != NULL)
5837 entersq(sq, SQ_PUT);
5838
5839 /* Now get the q_next value from this qp. */
5840 nextqp = qp->q_next;
5841
5842 /*
5843 * If nextqp exists and the other stream is different
5844 * from this one claim the stream, set the mate, and
5845 * get the read queue at the stream head of the other
5846 * stream. Assumes that nextqp was at least valid when
5847 * we got it. Hopefully the entersq of the driver
5848 * will prevent it from changing on us.
5849 */
5850 if ((nextqp != NULL) && (STREAM(nextqp) != stp)) {
5851 ASSERT(qp->q_qinfo->qi_srvp);
5852 ASSERT(_OTHERQ(qp)->q_qinfo->qi_srvp);
5853 ASSERT(_OTHERQ(qp->q_next)->q_qinfo->qi_srvp);
5854 claimstr(nextqp);
5855
5856 /* Make sure we still have a q_next */
5857 if (nextqp != qp->q_next) {
5858 releasestr(stp->sd_wrq);
5859 releasestr(nextqp);
5860 return (EINVAL);
5861 }
5862
5863 qp = _RD(STREAM(nextqp)->sd_wrq);
5864 mate = qp;
5865 }
5866 /* If we entered the synq above, leave it. */
5867 if (sq != NULL)
5868 leavesq(sq, SQ_PUT);
5869 } /* STRMATED(STP) */
5870
5871 /* XXX prevents substitution of the ops vector */
5872 if (qp->q_qinfo != &strdata && qp->q_qinfo != &fifo_strdata) {
5873 retval = EINVAL;
5874 goto out;
5875 }
5876
5877 if (qp->q_flag & QFULL) {
5878 retval = EAGAIN;
5879 goto out;
5880 }
5881
5882 /*
5883 * Since M_PASSFP messages include a file descriptor, we use
5884 * esballoc() and specify a custom free routine (free_passfp()) that
5885 * will close the descriptor as part of freeing the message. For
5886 * convenience, we stash the frtn_t right after the data block.
5887 */
5888 bufsize = sizeof (struct k_strrecvfd) + sizeof (frtn_t);
5889 srf = kmem_alloc(bufsize, KM_NOSLEEP);
5890 if (srf == NULL) {
5891 retval = EAGAIN;
5892 goto out;
5893 }
5894
5895 frtnp = (frtn_t *)(srf + 1);
5896 frtnp->free_arg = (caddr_t)srf;
5897 frtnp->free_func = free_passfp;
5898
5899 mp = esballoc((uchar_t *)srf, bufsize, BPRI_MED, frtnp);
5900 if (mp == NULL) {
5901 kmem_free(srf, bufsize);
5902 retval = EAGAIN;
5903 goto out;
5904 }
5905 mp->b_wptr += sizeof (struct k_strrecvfd);
5906 mp->b_datap->db_type = M_PASSFP;
5907
5908 srf->fp = fp;
5909 srf->uid = crgetuid(curthread->t_cred);
5910 srf->gid = crgetgid(curthread->t_cred);
5911 mutex_enter(&fp->f_tlock);
5912 fp->f_count++;
5913 mutex_exit(&fp->f_tlock);
5914
5915 put(qp, mp);
5916 out:
5917 releasestr(stp->sd_wrq);
5918 if (mate)
5919 releasestr(mate);
5920 return (retval);
5921 }
5922
5923 /*
5924 * Send an ioctl message downstream and wait for acknowledgement.
5925 * flags may be set to either U_TO_K or K_TO_K and a combination
5926 * of STR_NOERROR or STR_NOSIG
5927 * STR_NOSIG: Signals are essentially ignored or held and have
5928 * no effect for the duration of the call.
5929 * STR_NOERROR: Ignores stream head read, write and hup errors.
5930 * Additionally, if an existing ioctl times out, it is assumed
5931 * lost and and this ioctl will continue as if the previous ioctl had
5932 * finished. ETIME may be returned if this ioctl times out (i.e.
5933 * ic_timout is not INFTIM). Non-stream head errors may be returned if
5934 * the ioc_error indicates that the driver/module had problems,
5935 * an EFAULT was found when accessing user data, a lack of
5936 * resources, etc.
5937 */
5938 int
5939 strdoioctl(
5940 struct stdata *stp,
5941 struct strioctl *strioc,
5942 int fflags, /* file flags with model info */
5943 int flag,
5944 cred_t *crp,
5945 int *rvalp)
5946 {
5947 mblk_t *bp;
5948 struct iocblk *iocbp;
5949 struct copyreq *reqp;
5950 struct copyresp *resp;
5951 int id;
5952 int transparent = 0;
5953 int error = 0;
5954 int len = 0;
5955 caddr_t taddr;
5956 int copyflag = (flag & (U_TO_K | K_TO_K));
5957 int sigflag = (flag & STR_NOSIG);
5958 int errs;
5959 uint_t waitflags;
5960 boolean_t set_iocwaitne = B_FALSE;
5961
5962 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
5963 ASSERT((fflags & FMODELS) != 0);
5964
5965 TRACE_2(TR_FAC_STREAMS_FR,
5966 TR_STRDOIOCTL,
5967 "strdoioctl:stp %p strioc %p", stp, strioc);
5968 if (strioc->ic_len == TRANSPARENT) { /* send arg in M_DATA block */
5969 transparent = 1;
5970 strioc->ic_len = sizeof (intptr_t);
5971 }
5972
5973 if (strioc->ic_len < 0 || (strmsgsz > 0 && strioc->ic_len > strmsgsz))
5974 return (EINVAL);
5975
5976 if ((bp = allocb_cred_wait(sizeof (union ioctypes), sigflag, &error,
5977 crp, curproc->p_pid)) == NULL)
5978 return (error);
5979
5980 bzero(bp->b_wptr, sizeof (union ioctypes));
5981
5982 iocbp = (struct iocblk *)bp->b_wptr;
5983 iocbp->ioc_count = strioc->ic_len;
5984 iocbp->ioc_cmd = strioc->ic_cmd;
5985 iocbp->ioc_flag = (fflags & FMODELS);
5986
5987 crhold(crp);
5988 iocbp->ioc_cr = crp;
5989 DB_TYPE(bp) = M_IOCTL;
5990 bp->b_wptr += sizeof (struct iocblk);
5991
5992 if (flag & STR_NOERROR)
5993 errs = STPLEX;
5994 else
5995 errs = STRHUP|STRDERR|STWRERR|STPLEX;
5996
5997 /*
5998 * If there is data to copy into ioctl block, do so.
5999 */
6000 if (iocbp->ioc_count > 0) {
6001 if (transparent)
6002 /*
6003 * Note: STR_NOERROR does not have an effect
6004 * in putiocd()
6005 */
6006 id = K_TO_K | sigflag;
6007 else
6008 id = flag;
6009 if ((error = putiocd(bp, strioc->ic_dp, id, crp)) != 0) {
6010 freemsg(bp);
6011 crfree(crp);
6012 return (error);
6013 }
6014
6015 /*
6016 * We could have slept copying in user pages.
6017 * Recheck the stream head state (the other end
6018 * of a pipe could have gone away).
6019 */
6020 if (stp->sd_flag & errs) {
6021 mutex_enter(&stp->sd_lock);
6022 error = strgeterr(stp, errs, 0);
6023 mutex_exit(&stp->sd_lock);
6024 if (error != 0) {
6025 freemsg(bp);
6026 crfree(crp);
6027 return (error);
6028 }
6029 }
6030 }
6031 if (transparent)
6032 iocbp->ioc_count = TRANSPARENT;
6033
6034 /*
6035 * Block for up to STRTIMOUT milliseconds if there is an outstanding
6036 * ioctl for this stream already running. All processes
6037 * sleeping here will be awakened as a result of an ACK
6038 * or NAK being received for the outstanding ioctl, or
6039 * as a result of the timer expiring on the outstanding
6040 * ioctl (a failure), or as a result of any waiting
6041 * process's timer expiring (also a failure).
6042 */
6043
6044 error = 0;
6045 mutex_enter(&stp->sd_lock);
6046 while ((stp->sd_flag & IOCWAIT) ||
6047 (!set_iocwaitne && (stp->sd_flag & IOCWAITNE))) {
6048 clock_t cv_rval;
6049
6050 TRACE_0(TR_FAC_STREAMS_FR,
6051 TR_STRDOIOCTL_WAIT,
6052 "strdoioctl sleeps - IOCWAIT");
6053 cv_rval = str_cv_wait(&stp->sd_iocmonitor, &stp->sd_lock,
6054 STRTIMOUT, sigflag);
6055 if (cv_rval <= 0) {
6056 if (cv_rval == 0) {
6057 error = EINTR;
6058 } else {
6059 if (flag & STR_NOERROR) {
6060 /*
6061 * Terminating current ioctl in
6062 * progress -- assume it got lost and
6063 * wake up the other thread so that the
6064 * operation completes.
6065 */
6066 if (!(stp->sd_flag & IOCWAITNE)) {
6067 set_iocwaitne = B_TRUE;
6068 stp->sd_flag |= IOCWAITNE;
6069 cv_broadcast(&stp->sd_monitor);
6070 }
6071 /*
6072 * Otherwise, there's a running
6073 * STR_NOERROR -- we have no choice
6074 * here but to wait forever (or until
6075 * interrupted).
6076 */
6077 } else {
6078 /*
6079 * pending ioctl has caused
6080 * us to time out
6081 */
6082 error = ETIME;
6083 }
6084 }
6085 } else if ((stp->sd_flag & errs)) {
6086 error = strgeterr(stp, errs, 0);
6087 }
6088 if (error) {
6089 mutex_exit(&stp->sd_lock);
6090 freemsg(bp);
6091 crfree(crp);
6092 return (error);
6093 }
6094 }
6095
6096 /*
6097 * Have control of ioctl mechanism.
6098 * Send down ioctl packet and wait for response.
6099 */
6100 if (stp->sd_iocblk != (mblk_t *)-1) {
6101 freemsg(stp->sd_iocblk);
6102 }
6103 stp->sd_iocblk = NULL;
6104
6105 /*
6106 * If this is marked with 'noerror' (internal; mostly
6107 * I_{P,}{UN,}LINK), then make sure nobody else is able to get
6108 * in here by setting IOCWAITNE.
6109 */
6110 waitflags = IOCWAIT;
6111 if (flag & STR_NOERROR)
6112 waitflags |= IOCWAITNE;
6113
6114 stp->sd_flag |= waitflags;
6115
6116 /*
6117 * Assign sequence number.
6118 */
6119 iocbp->ioc_id = stp->sd_iocid = getiocseqno();
6120
6121 mutex_exit(&stp->sd_lock);
6122
6123 TRACE_1(TR_FAC_STREAMS_FR,
6124 TR_STRDOIOCTL_PUT, "strdoioctl put: stp %p", stp);
6125 stream_willservice(stp);
6126 putnext(stp->sd_wrq, bp);
6127 stream_runservice(stp);
6128
6129 /*
6130 * Timed wait for acknowledgment. The wait time is limited by the
6131 * timeout value, which must be a positive integer (number of
6132 * milliseconds) to wait, or 0 (use default value of STRTIMOUT
6133 * milliseconds), or -1 (wait forever). This will be awakened
6134 * either by an ACK/NAK message arriving, the timer expiring, or
6135 * the timer expiring on another ioctl waiting for control of the
6136 * mechanism.
6137 */
6138 waitioc:
6139 mutex_enter(&stp->sd_lock);
6140
6141
6142 /*
6143 * If the reply has already arrived, don't sleep. If awakened from
6144 * the sleep, fail only if the reply has not arrived by then.
6145 * Otherwise, process the reply.
6146 */
6147 while (!stp->sd_iocblk) {
6148 clock_t cv_rval;
6149
6150 if (stp->sd_flag & errs) {
6151 error = strgeterr(stp, errs, 0);
6152 if (error != 0) {
6153 stp->sd_flag &= ~waitflags;
6154 cv_broadcast(&stp->sd_iocmonitor);
6155 mutex_exit(&stp->sd_lock);
6156 crfree(crp);
6157 return (error);
6158 }
6159 }
6160
6161 TRACE_0(TR_FAC_STREAMS_FR,
6162 TR_STRDOIOCTL_WAIT2,
6163 "strdoioctl sleeps awaiting reply");
6164 ASSERT(error == 0);
6165
6166 cv_rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock,
6167 (strioc->ic_timout ?
6168 strioc->ic_timout * 1000 : STRTIMOUT), sigflag);
6169
6170 /*
6171 * There are four possible cases here: interrupt, timeout,
6172 * wakeup by IOCWAITNE (above), or wakeup by strrput_nondata (a
6173 * valid M_IOCTL reply).
6174 *
6175 * If we've been awakened by a STR_NOERROR ioctl on some other
6176 * thread, then sd_iocblk will still be NULL, and IOCWAITNE
6177 * will be set. Pretend as if we just timed out. Note that
6178 * this other thread waited at least STRTIMOUT before trying to
6179 * awaken our thread, so this is indistinguishable (even for
6180 * INFTIM) from the case where we failed with ETIME waiting on
6181 * IOCWAIT in the prior loop.
6182 */
6183 if (cv_rval > 0 && !(flag & STR_NOERROR) &&
6184 stp->sd_iocblk == NULL && (stp->sd_flag & IOCWAITNE)) {
6185 cv_rval = -1;
6186 }
6187
6188 /*
6189 * note: STR_NOERROR does not protect
6190 * us here.. use ic_timout < 0
6191 */
6192 if (cv_rval <= 0) {
6193 if (cv_rval == 0) {
6194 error = EINTR;
6195 } else {
6196 error = ETIME;
6197 }
6198 /*
6199 * A message could have come in after we were scheduled
6200 * but before we were actually run.
6201 */
6202 bp = stp->sd_iocblk;
6203 stp->sd_iocblk = NULL;
6204 if (bp != NULL) {
6205 if ((bp->b_datap->db_type == M_COPYIN) ||
6206 (bp->b_datap->db_type == M_COPYOUT)) {
6207 mutex_exit(&stp->sd_lock);
6208 if (bp->b_cont) {
6209 freemsg(bp->b_cont);
6210 bp->b_cont = NULL;
6211 }
6212 bp->b_datap->db_type = M_IOCDATA;
6213 bp->b_wptr = bp->b_rptr +
6214 sizeof (struct copyresp);
6215 resp = (struct copyresp *)bp->b_rptr;
6216 resp->cp_rval =
6217 (caddr_t)1; /* failure */
6218 stream_willservice(stp);
6219 putnext(stp->sd_wrq, bp);
6220 stream_runservice(stp);
6221 mutex_enter(&stp->sd_lock);
6222 } else {
6223 freemsg(bp);
6224 }
6225 }
6226 stp->sd_flag &= ~waitflags;
6227 cv_broadcast(&stp->sd_iocmonitor);
6228 mutex_exit(&stp->sd_lock);
6229 crfree(crp);
6230 return (error);
6231 }
6232 }
6233 bp = stp->sd_iocblk;
6234 /*
6235 * Note: it is strictly impossible to get here with sd_iocblk set to
6236 * -1. This is because the initial loop above doesn't allow any new
6237 * ioctls into the fray until all others have passed this point.
6238 */
6239 ASSERT(bp != NULL && bp != (mblk_t *)-1);
6240 TRACE_1(TR_FAC_STREAMS_FR,
6241 TR_STRDOIOCTL_ACK, "strdoioctl got reply: bp %p", bp);
6242 if ((bp->b_datap->db_type == M_IOCACK) ||
6243 (bp->b_datap->db_type == M_IOCNAK)) {
6244 /* for detection of duplicate ioctl replies */
6245 stp->sd_iocblk = (mblk_t *)-1;
6246 stp->sd_flag &= ~waitflags;
6247 cv_broadcast(&stp->sd_iocmonitor);
6248 mutex_exit(&stp->sd_lock);
6249 } else {
6250 /*
6251 * flags not cleared here because we're still doing
6252 * copy in/out for ioctl.
6253 */
6254 stp->sd_iocblk = NULL;
6255 mutex_exit(&stp->sd_lock);
6256 }
6257
6258
6259 /*
6260 * Have received acknowledgment.
6261 */
6262
6263 switch (bp->b_datap->db_type) {
6264 case M_IOCACK:
6265 /*
6266 * Positive ack.
6267 */
6268 iocbp = (struct iocblk *)bp->b_rptr;
6269
6270 /*
6271 * Set error if indicated.
6272 */
6273 if (iocbp->ioc_error) {
6274 error = iocbp->ioc_error;
6275 break;
6276 }
6277
6278 /*
6279 * Set return value.
6280 */
6281 *rvalp = iocbp->ioc_rval;
6282
6283 /*
6284 * Data may have been returned in ACK message (ioc_count > 0).
6285 * If so, copy it out to the user's buffer.
6286 */
6287 if (iocbp->ioc_count && !transparent) {
6288 if (error = getiocd(bp, strioc->ic_dp, copyflag))
6289 break;
6290 }
6291 if (!transparent) {
6292 if (len) /* an M_COPYOUT was used with I_STR */
6293 strioc->ic_len = len;
6294 else
6295 strioc->ic_len = (int)iocbp->ioc_count;
6296 }
6297 break;
6298
6299 case M_IOCNAK:
6300 /*
6301 * Negative ack.
6302 *
6303 * The only thing to do is set error as specified
6304 * in neg ack packet.
6305 */
6306 iocbp = (struct iocblk *)bp->b_rptr;
6307
6308 error = (iocbp->ioc_error ? iocbp->ioc_error : EINVAL);
6309 break;
6310
6311 case M_COPYIN:
6312 /*
6313 * Driver or module has requested user ioctl data.
6314 */
6315 reqp = (struct copyreq *)bp->b_rptr;
6316
6317 /*
6318 * M_COPYIN should *never* have a message attached, though
6319 * it's harmless if it does -- thus, panic on a DEBUG
6320 * kernel and just free it on a non-DEBUG build.
6321 */
6322 ASSERT(bp->b_cont == NULL);
6323 if (bp->b_cont != NULL) {
6324 freemsg(bp->b_cont);
6325 bp->b_cont = NULL;
6326 }
6327
6328 error = putiocd(bp, reqp->cq_addr, flag, crp);
6329 if (error && bp->b_cont) {
6330 freemsg(bp->b_cont);
6331 bp->b_cont = NULL;
6332 }
6333
6334 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
6335 bp->b_datap->db_type = M_IOCDATA;
6336
6337 mblk_setcred(bp, crp, curproc->p_pid);
6338 resp = (struct copyresp *)bp->b_rptr;
6339 resp->cp_rval = (caddr_t)(uintptr_t)error;
6340 resp->cp_flag = (fflags & FMODELS);
6341
6342 stream_willservice(stp);
6343 putnext(stp->sd_wrq, bp);
6344 stream_runservice(stp);
6345
6346 if (error) {
6347 mutex_enter(&stp->sd_lock);
6348 stp->sd_flag &= ~waitflags;
6349 cv_broadcast(&stp->sd_iocmonitor);
6350 mutex_exit(&stp->sd_lock);
6351 crfree(crp);
6352 return (error);
6353 }
6354
6355 goto waitioc;
6356
6357 case M_COPYOUT:
6358 /*
6359 * Driver or module has ioctl data for a user.
6360 */
6361 reqp = (struct copyreq *)bp->b_rptr;
6362 ASSERT(bp->b_cont != NULL);
6363
6364 /*
6365 * Always (transparent or non-transparent )
6366 * use the address specified in the request
6367 */
6368 taddr = reqp->cq_addr;
6369 if (!transparent)
6370 len = (int)reqp->cq_size;
6371
6372 /* copyout data to the provided address */
6373 error = getiocd(bp, taddr, copyflag);
6374
6375 freemsg(bp->b_cont);
6376 bp->b_cont = NULL;
6377
6378 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
6379 bp->b_datap->db_type = M_IOCDATA;
6380
6381 mblk_setcred(bp, crp, curproc->p_pid);
6382 resp = (struct copyresp *)bp->b_rptr;
6383 resp->cp_rval = (caddr_t)(uintptr_t)error;
6384 resp->cp_flag = (fflags & FMODELS);
6385
6386 stream_willservice(stp);
6387 putnext(stp->sd_wrq, bp);
6388 stream_runservice(stp);
6389
6390 if (error) {
6391 mutex_enter(&stp->sd_lock);
6392 stp->sd_flag &= ~waitflags;
6393 cv_broadcast(&stp->sd_iocmonitor);
6394 mutex_exit(&stp->sd_lock);
6395 crfree(crp);
6396 return (error);
6397 }
6398 goto waitioc;
6399
6400 default:
6401 ASSERT(0);
6402 mutex_enter(&stp->sd_lock);
6403 stp->sd_flag &= ~waitflags;
6404 cv_broadcast(&stp->sd_iocmonitor);
6405 mutex_exit(&stp->sd_lock);
6406 break;
6407 }
6408
6409 freemsg(bp);
6410 crfree(crp);
6411 return (error);
6412 }
6413
6414 /*
6415 * Send an M_CMD message downstream and wait for a reply. This is a ptools
6416 * special used to retrieve information from modules/drivers a stream without
6417 * being subjected to flow control or interfering with pending messages on the
6418 * stream (e.g. an ioctl in flight).
6419 */
6420 int
6421 strdocmd(struct stdata *stp, struct strcmd *scp, cred_t *crp)
6422 {
6423 mblk_t *mp;
6424 struct cmdblk *cmdp;
6425 int error = 0;
6426 int errs = STRHUP|STRDERR|STWRERR|STPLEX;
6427 clock_t rval, timeout = STRTIMOUT;
6428
6429 if (scp->sc_len < 0 || scp->sc_len > sizeof (scp->sc_buf) ||
6430 scp->sc_timeout < -1)
6431 return (EINVAL);
6432
6433 if (scp->sc_timeout > 0)
6434 timeout = scp->sc_timeout * MILLISEC;
6435
6436 if ((mp = allocb_cred(sizeof (struct cmdblk), crp,
6437 curproc->p_pid)) == NULL)
6438 return (ENOMEM);
6439
6440 crhold(crp);
6441
6442 cmdp = (struct cmdblk *)mp->b_wptr;
6443 cmdp->cb_cr = crp;
6444 cmdp->cb_cmd = scp->sc_cmd;
6445 cmdp->cb_len = scp->sc_len;
6446 cmdp->cb_error = 0;
6447 mp->b_wptr += sizeof (struct cmdblk);
6448
6449 DB_TYPE(mp) = M_CMD;
6450 DB_CPID(mp) = curproc->p_pid;
6451
6452 /*
6453 * Copy in the payload.
6454 */
6455 if (cmdp->cb_len > 0) {
6456 mp->b_cont = allocb_cred(sizeof (scp->sc_buf), crp,
6457 curproc->p_pid);
6458 if (mp->b_cont == NULL) {
6459 error = ENOMEM;
6460 goto out;
6461 }
6462
6463 /* cb_len comes from sc_len, which has already been checked */
6464 ASSERT(cmdp->cb_len <= sizeof (scp->sc_buf));
6465 (void) bcopy(scp->sc_buf, mp->b_cont->b_wptr, cmdp->cb_len);
6466 mp->b_cont->b_wptr += cmdp->cb_len;
6467 DB_CPID(mp->b_cont) = curproc->p_pid;
6468 }
6469
6470 /*
6471 * Since this mechanism is strictly for ptools, and since only one
6472 * process can be grabbed at a time, we simply fail if there's
6473 * currently an operation pending.
6474 */
6475 mutex_enter(&stp->sd_lock);
6476 if (stp->sd_flag & STRCMDWAIT) {
6477 mutex_exit(&stp->sd_lock);
6478 error = EBUSY;
6479 goto out;
6480 }
6481 stp->sd_flag |= STRCMDWAIT;
6482 ASSERT(stp->sd_cmdblk == NULL);
6483 mutex_exit(&stp->sd_lock);
6484
6485 putnext(stp->sd_wrq, mp);
6486 mp = NULL;
6487
6488 /*
6489 * Timed wait for acknowledgment. If the reply has already arrived,
6490 * don't sleep. If awakened from the sleep, fail only if the reply
6491 * has not arrived by then. Otherwise, process the reply.
6492 */
6493 mutex_enter(&stp->sd_lock);
6494 while (stp->sd_cmdblk == NULL) {
6495 if (stp->sd_flag & errs) {
6496 if ((error = strgeterr(stp, errs, 0)) != 0)
6497 goto waitout;
6498 }
6499
6500 rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, timeout, 0);
6501 if (stp->sd_cmdblk != NULL)
6502 break;
6503
6504 if (rval <= 0) {
6505 error = (rval == 0) ? EINTR : ETIME;
6506 goto waitout;
6507 }
6508 }
6509
6510 /*
6511 * We received a reply.
6512 */
6513 mp = stp->sd_cmdblk;
6514 stp->sd_cmdblk = NULL;
6515 ASSERT(mp != NULL && DB_TYPE(mp) == M_CMD);
6516 ASSERT(stp->sd_flag & STRCMDWAIT);
6517 stp->sd_flag &= ~STRCMDWAIT;
6518 mutex_exit(&stp->sd_lock);
6519
6520 cmdp = (struct cmdblk *)mp->b_rptr;
6521 if ((error = cmdp->cb_error) != 0)
6522 goto out;
6523
6524 /*
6525 * Data may have been returned in the reply (cb_len > 0).
6526 * If so, copy it out to the user's buffer.
6527 */
6528 if (cmdp->cb_len > 0) {
6529 if (mp->b_cont == NULL || MBLKL(mp->b_cont) < cmdp->cb_len) {
6530 error = EPROTO;
6531 goto out;
6532 }
6533
6534 cmdp->cb_len = MIN(cmdp->cb_len, sizeof (scp->sc_buf));
6535 (void) bcopy(mp->b_cont->b_rptr, scp->sc_buf, cmdp->cb_len);
6536 }
6537 scp->sc_len = cmdp->cb_len;
6538 out:
6539 freemsg(mp);
6540 crfree(crp);
6541 return (error);
6542 waitout:
6543 ASSERT(stp->sd_cmdblk == NULL);
6544 stp->sd_flag &= ~STRCMDWAIT;
6545 mutex_exit(&stp->sd_lock);
6546 crfree(crp);
6547 return (error);
6548 }
6549
6550 /*
6551 * For the SunOS keyboard driver.
6552 * Return the next available "ioctl" sequence number.
6553 * Exported, so that streams modules can send "ioctl" messages
6554 * downstream from their open routine.
6555 */
6556 int
6557 getiocseqno(void)
6558 {
6559 int i;
6560
6561 mutex_enter(&strresources);
6562 i = ++ioc_id;
6563 mutex_exit(&strresources);
6564 return (i);
6565 }
6566
6567 /*
6568 * Get the next message from the read queue. If the message is
6569 * priority, STRPRI will have been set by strrput(). This flag
6570 * should be reset only when the entire message at the front of the
6571 * queue as been consumed.
6572 *
6573 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
6574 */
6575 int
6576 strgetmsg(
6577 struct vnode *vp,
6578 struct strbuf *mctl,
6579 struct strbuf *mdata,
6580 unsigned char *prip,
6581 int *flagsp,
6582 int fmode,
6583 rval_t *rvp)
6584 {
6585 struct stdata *stp;
6586 mblk_t *bp, *nbp;
6587 mblk_t *savemp = NULL;
6588 mblk_t *savemptail = NULL;
6589 uint_t old_sd_flag;
6590 int flg = MSG_BAND;
6591 int more = 0;
6592 int error = 0;
6593 char first = 1;
6594 uint_t mark; /* Contains MSG*MARK and _LASTMARK */
6595 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */
6596 unsigned char pri = 0;
6597 queue_t *q;
6598 int pr = 0; /* Partial read successful */
6599 struct uio uios;
6600 struct uio *uiop = &uios;
6601 struct iovec iovs;
6602 unsigned char type;
6603
6604 TRACE_1(TR_FAC_STREAMS_FR, TR_STRGETMSG_ENTER,
6605 "strgetmsg:%p", vp);
6606
6607 ASSERT(vp->v_stream);
6608 stp = vp->v_stream;
6609 rvp->r_val1 = 0;
6610
6611 mutex_enter(&stp->sd_lock);
6612
6613 if ((error = i_straccess(stp, JCREAD)) != 0) {
6614 mutex_exit(&stp->sd_lock);
6615 return (error);
6616 }
6617
6618 if (stp->sd_flag & (STRDERR|STPLEX)) {
6619 error = strgeterr(stp, STRDERR|STPLEX, 0);
6620 if (error != 0) {
6621 mutex_exit(&stp->sd_lock);
6622 return (error);
6623 }
6624 }
6625 mutex_exit(&stp->sd_lock);
6626
6627 switch (*flagsp) {
6628 case MSG_HIPRI:
6629 if (*prip != 0)
6630 return (EINVAL);
6631 break;
6632
6633 case MSG_ANY:
6634 case MSG_BAND:
6635 break;
6636
6637 default:
6638 return (EINVAL);
6639 }
6640 /*
6641 * Setup uio and iov for data part
6642 */
6643 iovs.iov_base = mdata->buf;
6644 iovs.iov_len = mdata->maxlen;
6645 uios.uio_iov = &iovs;
6646 uios.uio_iovcnt = 1;
6647 uios.uio_loffset = 0;
6648 uios.uio_segflg = UIO_USERSPACE;
6649 uios.uio_fmode = 0;
6650 uios.uio_extflg = UIO_COPY_CACHED;
6651 uios.uio_resid = mdata->maxlen;
6652 uios.uio_offset = 0;
6653
6654 q = _RD(stp->sd_wrq);
6655 mutex_enter(&stp->sd_lock);
6656 old_sd_flag = stp->sd_flag;
6657 mark = 0;
6658 for (;;) {
6659 int done = 0;
6660 mblk_t *q_first = q->q_first;
6661
6662 /*
6663 * Get the next message of appropriate priority
6664 * from the stream head. If the caller is interested
6665 * in band or hipri messages, then they should already
6666 * be enqueued at the stream head. On the other hand
6667 * if the caller wants normal (band 0) messages, they
6668 * might be deferred in a synchronous stream and they
6669 * will need to be pulled up.
6670 *
6671 * After we have dequeued a message, we might find that
6672 * it was a deferred M_SIG that was enqueued at the
6673 * stream head. It must now be posted as part of the
6674 * read by calling strsignal_nolock().
6675 *
6676 * Also note that strrput does not enqueue an M_PCSIG,
6677 * and there cannot be more than one hipri message,
6678 * so there was no need to have the M_PCSIG case.
6679 *
6680 * At some time it might be nice to try and wrap the
6681 * functionality of kstrgetmsg() and strgetmsg() into
6682 * a common routine so to reduce the amount of replicated
6683 * code (since they are extremely similar).
6684 */
6685 if (!(*flagsp & (MSG_HIPRI|MSG_BAND))) {
6686 /* Asking for normal, band0 data */
6687 bp = strget(stp, q, uiop, first, &error);
6688 ASSERT(MUTEX_HELD(&stp->sd_lock));
6689 if (bp != NULL) {
6690 if (DB_TYPE(bp) == M_SIG) {
6691 strsignal_nolock(stp, *bp->b_rptr,
6692 bp->b_band);
6693 freemsg(bp);
6694 continue;
6695 } else {
6696 break;
6697 }
6698 }
6699 if (error != 0)
6700 goto getmout;
6701
6702 /*
6703 * We can't depend on the value of STRPRI here because
6704 * the stream head may be in transit. Therefore, we
6705 * must look at the type of the first message to
6706 * determine if a high priority messages is waiting
6707 */
6708 } else if ((*flagsp & MSG_HIPRI) && q_first != NULL &&
6709 DB_TYPE(q_first) >= QPCTL &&
6710 (bp = getq_noenab(q, 0)) != NULL) {
6711 /* Asked for HIPRI and got one */
6712 ASSERT(DB_TYPE(bp) >= QPCTL);
6713 break;
6714 } else if ((*flagsp & MSG_BAND) && q_first != NULL &&
6715 ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) &&
6716 (bp = getq_noenab(q, 0)) != NULL) {
6717 /*
6718 * Asked for at least band "prip" and got either at
6719 * least that band or a hipri message.
6720 */
6721 ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL);
6722 if (DB_TYPE(bp) == M_SIG) {
6723 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
6724 freemsg(bp);
6725 continue;
6726 } else {
6727 break;
6728 }
6729 }
6730
6731 /* No data. Time to sleep? */
6732 qbackenable(q, 0);
6733
6734 /*
6735 * If STRHUP or STREOF, return 0 length control and data.
6736 * If resid is 0, then a read(fd,buf,0) was done. Do not
6737 * sleep to satisfy this request because by default we have
6738 * zero bytes to return.
6739 */
6740 if ((stp->sd_flag & (STRHUP|STREOF)) || (mctl->maxlen == 0 &&
6741 mdata->maxlen == 0)) {
6742 mctl->len = mdata->len = 0;
6743 *flagsp = 0;
6744 mutex_exit(&stp->sd_lock);
6745 return (0);
6746 }
6747 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_WAIT,
6748 "strgetmsg calls strwaitq:%p, %p",
6749 vp, uiop);
6750 if (((error = strwaitq(stp, GETWAIT, (ssize_t)0, fmode, -1,
6751 &done)) != 0) || done) {
6752 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_DONE,
6753 "strgetmsg error or done:%p, %p",
6754 vp, uiop);
6755 mutex_exit(&stp->sd_lock);
6756 return (error);
6757 }
6758 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE,
6759 "strgetmsg awakes:%p, %p", vp, uiop);
6760 if ((error = i_straccess(stp, JCREAD)) != 0) {
6761 mutex_exit(&stp->sd_lock);
6762 return (error);
6763 }
6764 first = 0;
6765 }
6766 ASSERT(bp != NULL);
6767 /*
6768 * Extract any mark information. If the message is not completely
6769 * consumed this information will be put in the mblk
6770 * that is putback.
6771 * If MSGMARKNEXT is set and the message is completely consumed
6772 * the STRATMARK flag will be set below. Likewise, if
6773 * MSGNOTMARKNEXT is set and the message is
6774 * completely consumed STRNOTATMARK will be set.
6775 */
6776 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
6777 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
6778 (MSGMARKNEXT|MSGNOTMARKNEXT));
6779 if (mark != 0 && bp == stp->sd_mark) {
6780 mark |= _LASTMARK;
6781 stp->sd_mark = NULL;
6782 }
6783 /*
6784 * keep track of the original message type and priority
6785 */
6786 pri = bp->b_band;
6787 type = bp->b_datap->db_type;
6788 if (type == M_PASSFP) {
6789 if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
6790 stp->sd_mark = bp;
6791 bp->b_flag |= mark & ~_LASTMARK;
6792 putback(stp, q, bp, pri);
6793 qbackenable(q, pri);
6794 mutex_exit(&stp->sd_lock);
6795 return (EBADMSG);
6796 }
6797 ASSERT(type != M_SIG);
6798
6799 /*
6800 * Set this flag so strrput will not generate signals. Need to
6801 * make sure this flag is cleared before leaving this routine
6802 * else signals will stop being sent.
6803 */
6804 stp->sd_flag |= STRGETINPROG;
6805 mutex_exit(&stp->sd_lock);
6806
6807 if (STREAM_NEEDSERVICE(stp))
6808 stream_runservice(stp);
6809
6810 /*
6811 * Set HIPRI flag if message is priority.
6812 */
6813 if (type >= QPCTL)
6814 flg = MSG_HIPRI;
6815 else
6816 flg = MSG_BAND;
6817
6818 /*
6819 * First process PROTO or PCPROTO blocks, if any.
6820 */
6821 if (mctl->maxlen >= 0 && type != M_DATA) {
6822 size_t n, bcnt;
6823 char *ubuf;
6824
6825 bcnt = mctl->maxlen;
6826 ubuf = mctl->buf;
6827 while (bp != NULL && bp->b_datap->db_type != M_DATA) {
6828 if ((n = MIN(bcnt, bp->b_wptr - bp->b_rptr)) != 0 &&
6829 copyout(bp->b_rptr, ubuf, n)) {
6830 error = EFAULT;
6831 mutex_enter(&stp->sd_lock);
6832 /*
6833 * clear stream head pri flag based on
6834 * first message type
6835 */
6836 if (type >= QPCTL) {
6837 ASSERT(type == M_PCPROTO);
6838 stp->sd_flag &= ~STRPRI;
6839 }
6840 more = 0;
6841 freemsg(bp);
6842 goto getmout;
6843 }
6844 ubuf += n;
6845 bp->b_rptr += n;
6846 if (bp->b_rptr >= bp->b_wptr) {
6847 nbp = bp;
6848 bp = bp->b_cont;
6849 freeb(nbp);
6850 }
6851 ASSERT(n <= bcnt);
6852 bcnt -= n;
6853 if (bcnt == 0)
6854 break;
6855 }
6856 mctl->len = mctl->maxlen - bcnt;
6857 } else
6858 mctl->len = -1;
6859
6860 if (bp && bp->b_datap->db_type != M_DATA) {
6861 /*
6862 * More PROTO blocks in msg.
6863 */
6864 more |= MORECTL;
6865 savemp = bp;
6866 while (bp && bp->b_datap->db_type != M_DATA) {
6867 savemptail = bp;
6868 bp = bp->b_cont;
6869 }
6870 savemptail->b_cont = NULL;
6871 }
6872
6873 /*
6874 * Now process DATA blocks, if any.
6875 */
6876 if (mdata->maxlen >= 0 && bp) {
6877 /*
6878 * struiocopyout will consume a potential zero-length
6879 * M_DATA even if uio_resid is zero.
6880 */
6881 size_t oldresid = uiop->uio_resid;
6882
6883 bp = struiocopyout(bp, uiop, &error);
6884 if (error != 0) {
6885 mutex_enter(&stp->sd_lock);
6886 /*
6887 * clear stream head hi pri flag based on
6888 * first message
6889 */
6890 if (type >= QPCTL) {
6891 ASSERT(type == M_PCPROTO);
6892 stp->sd_flag &= ~STRPRI;
6893 }
6894 more = 0;
6895 freemsg(savemp);
6896 goto getmout;
6897 }
6898 /*
6899 * (pr == 1) indicates a partial read.
6900 */
6901 if (oldresid > uiop->uio_resid)
6902 pr = 1;
6903 mdata->len = mdata->maxlen - uiop->uio_resid;
6904 } else
6905 mdata->len = -1;
6906
6907 if (bp) { /* more data blocks in msg */
6908 more |= MOREDATA;
6909 if (savemp)
6910 savemptail->b_cont = bp;
6911 else
6912 savemp = bp;
6913 }
6914
6915 mutex_enter(&stp->sd_lock);
6916 if (savemp) {
6917 if (pr && (savemp->b_datap->db_type == M_DATA) &&
6918 msgnodata(savemp)) {
6919 /*
6920 * Avoid queuing a zero-length tail part of
6921 * a message. pr=1 indicates that we read some of
6922 * the message.
6923 */
6924 freemsg(savemp);
6925 more &= ~MOREDATA;
6926 /*
6927 * clear stream head hi pri flag based on
6928 * first message
6929 */
6930 if (type >= QPCTL) {
6931 ASSERT(type == M_PCPROTO);
6932 stp->sd_flag &= ~STRPRI;
6933 }
6934 } else {
6935 savemp->b_band = pri;
6936 /*
6937 * If the first message was HIPRI and the one we're
6938 * putting back isn't, then clear STRPRI, otherwise
6939 * set STRPRI again. Note that we must set STRPRI
6940 * again since the flush logic in strrput_nondata()
6941 * may have cleared it while we had sd_lock dropped.
6942 */
6943 if (type >= QPCTL) {
6944 ASSERT(type == M_PCPROTO);
6945 if (queclass(savemp) < QPCTL)
6946 stp->sd_flag &= ~STRPRI;
6947 else
6948 stp->sd_flag |= STRPRI;
6949 } else if (queclass(savemp) >= QPCTL) {
6950 /*
6951 * The first message was not a HIPRI message,
6952 * but the one we are about to putback is.
6953 * For simplicitly, we do not allow for HIPRI
6954 * messages to be embedded in the message
6955 * body, so just force it to same type as
6956 * first message.
6957 */
6958 ASSERT(type == M_DATA || type == M_PROTO);
6959 ASSERT(savemp->b_datap->db_type == M_PCPROTO);
6960 savemp->b_datap->db_type = type;
6961 }
6962 if (mark != 0) {
6963 savemp->b_flag |= mark & ~_LASTMARK;
6964 if ((mark & _LASTMARK) &&
6965 (stp->sd_mark == NULL)) {
6966 /*
6967 * If another marked message arrived
6968 * while sd_lock was not held sd_mark
6969 * would be non-NULL.
6970 */
6971 stp->sd_mark = savemp;
6972 }
6973 }
6974 putback(stp, q, savemp, pri);
6975 }
6976 } else {
6977 /*
6978 * The complete message was consumed.
6979 *
6980 * If another M_PCPROTO arrived while sd_lock was not held
6981 * it would have been discarded since STRPRI was still set.
6982 *
6983 * Move the MSG*MARKNEXT information
6984 * to the stream head just in case
6985 * the read queue becomes empty.
6986 * clear stream head hi pri flag based on
6987 * first message
6988 *
6989 * If the stream head was at the mark
6990 * (STRATMARK) before we dropped sd_lock above
6991 * and some data was consumed then we have
6992 * moved past the mark thus STRATMARK is
6993 * cleared. However, if a message arrived in
6994 * strrput during the copyout above causing
6995 * STRATMARK to be set we can not clear that
6996 * flag.
6997 */
6998 if (type >= QPCTL) {
6999 ASSERT(type == M_PCPROTO);
7000 stp->sd_flag &= ~STRPRI;
7001 }
7002 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
7003 if (mark & MSGMARKNEXT) {
7004 stp->sd_flag &= ~STRNOTATMARK;
7005 stp->sd_flag |= STRATMARK;
7006 } else if (mark & MSGNOTMARKNEXT) {
7007 stp->sd_flag &= ~STRATMARK;
7008 stp->sd_flag |= STRNOTATMARK;
7009 } else {
7010 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
7011 }
7012 } else if (pr && (old_sd_flag & STRATMARK)) {
7013 stp->sd_flag &= ~STRATMARK;
7014 }
7015 }
7016
7017 *flagsp = flg;
7018 *prip = pri;
7019
7020 /*
7021 * Getmsg cleanup processing - if the state of the queue has changed
7022 * some signals may need to be sent and/or poll awakened.
7023 */
7024 getmout:
7025 qbackenable(q, pri);
7026
7027 /*
7028 * We dropped the stream head lock above. Send all M_SIG messages
7029 * before processing stream head for SIGPOLL messages.
7030 */
7031 ASSERT(MUTEX_HELD(&stp->sd_lock));
7032 while ((bp = q->q_first) != NULL &&
7033 (bp->b_datap->db_type == M_SIG)) {
7034 /*
7035 * sd_lock is held so the content of the read queue can not
7036 * change.
7037 */
7038 bp = getq(q);
7039 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
7040
7041 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
7042 mutex_exit(&stp->sd_lock);
7043 freemsg(bp);
7044 if (STREAM_NEEDSERVICE(stp))
7045 stream_runservice(stp);
7046 mutex_enter(&stp->sd_lock);
7047 }
7048
7049 /*
7050 * stream head cannot change while we make the determination
7051 * whether or not to send a signal. Drop the flag to allow strrput
7052 * to send firstmsgsigs again.
7053 */
7054 stp->sd_flag &= ~STRGETINPROG;
7055
7056 /*
7057 * If the type of message at the front of the queue changed
7058 * due to the receive the appropriate signals and pollwakeup events
7059 * are generated. The type of changes are:
7060 * Processed a hipri message, q_first is not hipri.
7061 * Processed a band X message, and q_first is band Y.
7062 * The generated signals and pollwakeups are identical to what
7063 * strrput() generates should the message that is now on q_first
7064 * arrive to an empty read queue.
7065 *
7066 * Note: only strrput will send a signal for a hipri message.
7067 */
7068 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
7069 strsigset_t signals = 0;
7070 strpollset_t pollwakeups = 0;
7071
7072 if (flg & MSG_HIPRI) {
7073 /*
7074 * Removed a hipri message. Regular data at
7075 * the front of the queue.
7076 */
7077 if (bp->b_band == 0) {
7078 signals = S_INPUT | S_RDNORM;
7079 pollwakeups = POLLIN | POLLRDNORM;
7080 } else {
7081 signals = S_INPUT | S_RDBAND;
7082 pollwakeups = POLLIN | POLLRDBAND;
7083 }
7084 } else if (pri != bp->b_band) {
7085 /*
7086 * The band is different for the new q_first.
7087 */
7088 if (bp->b_band == 0) {
7089 signals = S_RDNORM;
7090 pollwakeups = POLLIN | POLLRDNORM;
7091 } else {
7092 signals = S_RDBAND;
7093 pollwakeups = POLLIN | POLLRDBAND;
7094 }
7095 }
7096
7097 if (pollwakeups != 0) {
7098 if (pollwakeups == (POLLIN | POLLRDNORM)) {
7099 if (!(stp->sd_rput_opt & SR_POLLIN))
7100 goto no_pollwake;
7101 stp->sd_rput_opt &= ~SR_POLLIN;
7102 }
7103 mutex_exit(&stp->sd_lock);
7104 pollwakeup(&stp->sd_pollist, pollwakeups);
7105 mutex_enter(&stp->sd_lock);
7106 }
7107 no_pollwake:
7108
7109 if (stp->sd_sigflags & signals)
7110 strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
7111 }
7112 mutex_exit(&stp->sd_lock);
7113
7114 rvp->r_val1 = more;
7115 return (error);
7116 #undef _LASTMARK
7117 }
7118
7119 /*
7120 * Get the next message from the read queue. If the message is
7121 * priority, STRPRI will have been set by strrput(). This flag
7122 * should be reset only when the entire message at the front of the
7123 * queue as been consumed.
7124 *
7125 * If uiop is NULL all data is returned in mctlp.
7126 * Note that a NULL uiop implies that FNDELAY and FNONBLOCK are assumed
7127 * not enabled.
7128 * The timeout parameter is in milliseconds; -1 for infinity.
7129 * This routine handles the consolidation private flags:
7130 * MSG_IGNERROR Ignore any stream head error except STPLEX.
7131 * MSG_DELAYERROR Defer the error check until the queue is empty.
7132 * MSG_HOLDSIG Hold signals while waiting for data.
7133 * MSG_IPEEK Only peek at messages.
7134 * MSG_DISCARDTAIL Discard the tail M_DATA part of the message
7135 * that doesn't fit.
7136 * MSG_NOMARK If the message is marked leave it on the queue.
7137 *
7138 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
7139 */
7140 int
7141 kstrgetmsg(
7142 struct vnode *vp,
7143 mblk_t **mctlp,
7144 struct uio *uiop,
7145 unsigned char *prip,
7146 int *flagsp,
7147 clock_t timout,
7148 rval_t *rvp)
7149 {
7150 struct stdata *stp;
7151 mblk_t *bp, *nbp;
7152 mblk_t *savemp = NULL;
7153 mblk_t *savemptail = NULL;
7154 int flags;
7155 uint_t old_sd_flag;
7156 int flg = MSG_BAND;
7157 int more = 0;
7158 int error = 0;
7159 char first = 1;
7160 uint_t mark; /* Contains MSG*MARK and _LASTMARK */
7161 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */
7162 unsigned char pri = 0;
7163 queue_t *q;
7164 int pr = 0; /* Partial read successful */
7165 unsigned char type;
7166
7167 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_ENTER,
7168 "kstrgetmsg:%p", vp);
7169
7170 ASSERT(vp->v_stream);
7171 stp = vp->v_stream;
7172 rvp->r_val1 = 0;
7173
7174 mutex_enter(&stp->sd_lock);
7175
7176 if ((error = i_straccess(stp, JCREAD)) != 0) {
7177 mutex_exit(&stp->sd_lock);
7178 return (error);
7179 }
7180
7181 flags = *flagsp;
7182 if (stp->sd_flag & (STRDERR|STPLEX)) {
7183 if ((stp->sd_flag & STPLEX) ||
7184 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) {
7185 error = strgeterr(stp, STRDERR|STPLEX,
7186 (flags & MSG_IPEEK));
7187 if (error != 0) {
7188 mutex_exit(&stp->sd_lock);
7189 return (error);
7190 }
7191 }
7192 }
7193 mutex_exit(&stp->sd_lock);
7194
7195 switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) {
7196 case MSG_HIPRI:
7197 if (*prip != 0)
7198 return (EINVAL);
7199 break;
7200
7201 case MSG_ANY:
7202 case MSG_BAND:
7203 break;
7204
7205 default:
7206 return (EINVAL);
7207 }
7208
7209 retry:
7210 q = _RD(stp->sd_wrq);
7211 mutex_enter(&stp->sd_lock);
7212 old_sd_flag = stp->sd_flag;
7213 mark = 0;
7214 for (;;) {
7215 int done = 0;
7216 int waitflag;
7217 int fmode;
7218 mblk_t *q_first = q->q_first;
7219
7220 /*
7221 * This section of the code operates just like the code
7222 * in strgetmsg(). There is a comment there about what
7223 * is going on here.
7224 */
7225 if (!(flags & (MSG_HIPRI|MSG_BAND))) {
7226 /* Asking for normal, band0 data */
7227 bp = strget(stp, q, uiop, first, &error);
7228 ASSERT(MUTEX_HELD(&stp->sd_lock));
7229 if (bp != NULL) {
7230 if (DB_TYPE(bp) == M_SIG) {
7231 strsignal_nolock(stp, *bp->b_rptr,
7232 bp->b_band);
7233 freemsg(bp);
7234 continue;
7235 } else {
7236 break;
7237 }
7238 }
7239 if (error != 0) {
7240 goto getmout;
7241 }
7242 /*
7243 * We can't depend on the value of STRPRI here because
7244 * the stream head may be in transit. Therefore, we
7245 * must look at the type of the first message to
7246 * determine if a high priority messages is waiting
7247 */
7248 } else if ((flags & MSG_HIPRI) && q_first != NULL &&
7249 DB_TYPE(q_first) >= QPCTL &&
7250 (bp = getq_noenab(q, 0)) != NULL) {
7251 ASSERT(DB_TYPE(bp) >= QPCTL);
7252 break;
7253 } else if ((flags & MSG_BAND) && q_first != NULL &&
7254 ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) &&
7255 (bp = getq_noenab(q, 0)) != NULL) {
7256 /*
7257 * Asked for at least band "prip" and got either at
7258 * least that band or a hipri message.
7259 */
7260 ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL);
7261 if (DB_TYPE(bp) == M_SIG) {
7262 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
7263 freemsg(bp);
7264 continue;
7265 } else {
7266 break;
7267 }
7268 }
7269
7270 /* No data. Time to sleep? */
7271 qbackenable(q, 0);
7272
7273 /*
7274 * Delayed error notification?
7275 */
7276 if ((stp->sd_flag & (STRDERR|STPLEX)) &&
7277 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == MSG_DELAYERROR) {
7278 error = strgeterr(stp, STRDERR|STPLEX,
7279 (flags & MSG_IPEEK));
7280 if (error != 0) {
7281 mutex_exit(&stp->sd_lock);
7282 return (error);
7283 }
7284 }
7285
7286 /*
7287 * If STRHUP or STREOF, return 0 length control and data.
7288 * If a read(fd,buf,0) has been done, do not sleep, just
7289 * return.
7290 *
7291 * If mctlp == NULL and uiop == NULL, then the code will
7292 * do the strwaitq. This is an understood way of saying
7293 * sleep "polling" until a message is received.
7294 */
7295 if ((stp->sd_flag & (STRHUP|STREOF)) ||
7296 (uiop != NULL && uiop->uio_resid == 0)) {
7297 if (mctlp != NULL)
7298 *mctlp = NULL;
7299 *flagsp = 0;
7300 mutex_exit(&stp->sd_lock);
7301 return (0);
7302 }
7303
7304 waitflag = GETWAIT;
7305 if (flags &
7306 (MSG_HOLDSIG|MSG_IGNERROR|MSG_IPEEK|MSG_DELAYERROR)) {
7307 if (flags & MSG_HOLDSIG)
7308 waitflag |= STR_NOSIG;
7309 if (flags & MSG_IGNERROR)
7310 waitflag |= STR_NOERROR;
7311 if (flags & MSG_IPEEK)
7312 waitflag |= STR_PEEK;
7313 if (flags & MSG_DELAYERROR)
7314 waitflag |= STR_DELAYERR;
7315 }
7316 if (uiop != NULL)
7317 fmode = uiop->uio_fmode;
7318 else
7319 fmode = 0;
7320
7321 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_WAIT,
7322 "kstrgetmsg calls strwaitq:%p, %p",
7323 vp, uiop);
7324 if (((error = strwaitq(stp, waitflag, (ssize_t)0,
7325 fmode, timout, &done))) != 0 || done) {
7326 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE,
7327 "kstrgetmsg error or done:%p, %p",
7328 vp, uiop);
7329 mutex_exit(&stp->sd_lock);
7330 return (error);
7331 }
7332 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE,
7333 "kstrgetmsg awakes:%p, %p", vp, uiop);
7334 if ((error = i_straccess(stp, JCREAD)) != 0) {
7335 mutex_exit(&stp->sd_lock);
7336 return (error);
7337 }
7338 first = 0;
7339 }
7340 ASSERT(bp != NULL);
7341 /*
7342 * Extract any mark information. If the message is not completely
7343 * consumed this information will be put in the mblk
7344 * that is putback.
7345 * If MSGMARKNEXT is set and the message is completely consumed
7346 * the STRATMARK flag will be set below. Likewise, if
7347 * MSGNOTMARKNEXT is set and the message is
7348 * completely consumed STRNOTATMARK will be set.
7349 */
7350 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
7351 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
7352 (MSGMARKNEXT|MSGNOTMARKNEXT));
7353 pri = bp->b_band;
7354 if (mark != 0) {
7355 /*
7356 * If the caller doesn't want the mark return.
7357 * Used to implement MSG_WAITALL in sockets.
7358 */
7359 if (flags & MSG_NOMARK) {
7360 putback(stp, q, bp, pri);
7361 qbackenable(q, pri);
7362 mutex_exit(&stp->sd_lock);
7363 return (EWOULDBLOCK);
7364 }
7365 if (bp == stp->sd_mark) {
7366 mark |= _LASTMARK;
7367 stp->sd_mark = NULL;
7368 }
7369 }
7370
7371 /*
7372 * keep track of the first message type
7373 */
7374 type = bp->b_datap->db_type;
7375
7376 if (bp->b_datap->db_type == M_PASSFP) {
7377 if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7378 stp->sd_mark = bp;
7379 bp->b_flag |= mark & ~_LASTMARK;
7380 putback(stp, q, bp, pri);
7381 qbackenable(q, pri);
7382 mutex_exit(&stp->sd_lock);
7383 return (EBADMSG);
7384 }
7385 ASSERT(type != M_SIG);
7386
7387 if (flags & MSG_IPEEK) {
7388 /*
7389 * Clear any struioflag - we do the uiomove over again
7390 * when peeking since it simplifies the code.
7391 *
7392 * Dup the message and put the original back on the queue.
7393 * If dupmsg() fails, try again with copymsg() to see if
7394 * there is indeed a shortage of memory. dupmsg() may fail
7395 * if db_ref in any of the messages reaches its limit.
7396 */
7397
7398 if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) {
7399 /*
7400 * Restore the state of the stream head since we
7401 * need to drop sd_lock (strwaitbuf is sleeping).
7402 */
7403 size_t size = msgdsize(bp);
7404
7405 if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7406 stp->sd_mark = bp;
7407 bp->b_flag |= mark & ~_LASTMARK;
7408 putback(stp, q, bp, pri);
7409 mutex_exit(&stp->sd_lock);
7410 error = strwaitbuf(size, BPRI_HI);
7411 if (error) {
7412 /*
7413 * There is no net change to the queue thus
7414 * no need to qbackenable.
7415 */
7416 return (error);
7417 }
7418 goto retry;
7419 }
7420
7421 if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7422 stp->sd_mark = bp;
7423 bp->b_flag |= mark & ~_LASTMARK;
7424 putback(stp, q, bp, pri);
7425 bp = nbp;
7426 }
7427
7428 /*
7429 * Set this flag so strrput will not generate signals. Need to
7430 * make sure this flag is cleared before leaving this routine
7431 * else signals will stop being sent.
7432 */
7433 stp->sd_flag |= STRGETINPROG;
7434 mutex_exit(&stp->sd_lock);
7435
7436 if ((stp->sd_rputdatafunc != NULL) && (DB_TYPE(bp) == M_DATA)) {
7437 mblk_t *tmp, *prevmp;
7438
7439 /*
7440 * Put first non-data mblk back to stream head and
7441 * cut the mblk chain so sd_rputdatafunc only sees
7442 * M_DATA mblks. We can skip the first mblk since it
7443 * is M_DATA according to the condition above.
7444 */
7445 for (prevmp = bp, tmp = bp->b_cont; tmp != NULL;
7446 prevmp = tmp, tmp = tmp->b_cont) {
7447 if (DB_TYPE(tmp) != M_DATA) {
7448 prevmp->b_cont = NULL;
7449 mutex_enter(&stp->sd_lock);
7450 putback(stp, q, tmp, tmp->b_band);
7451 mutex_exit(&stp->sd_lock);
7452 break;
7453 }
7454 }
7455
7456 bp = (stp->sd_rputdatafunc)(stp->sd_vnode, bp,
7457 NULL, NULL, NULL, NULL);
7458
7459 if (bp == NULL)
7460 goto retry;
7461 }
7462
7463 if (STREAM_NEEDSERVICE(stp))
7464 stream_runservice(stp);
7465
7466 /*
7467 * Set HIPRI flag if message is priority.
7468 */
7469 if (type >= QPCTL)
7470 flg = MSG_HIPRI;
7471 else
7472 flg = MSG_BAND;
7473
7474 /*
7475 * First process PROTO or PCPROTO blocks, if any.
7476 */
7477 if (mctlp != NULL && type != M_DATA) {
7478 mblk_t *nbp;
7479
7480 *mctlp = bp;
7481 while (bp->b_cont && bp->b_cont->b_datap->db_type != M_DATA)
7482 bp = bp->b_cont;
7483 nbp = bp->b_cont;
7484 bp->b_cont = NULL;
7485 bp = nbp;
7486 }
7487
7488 if (bp && bp->b_datap->db_type != M_DATA) {
7489 /*
7490 * More PROTO blocks in msg. Will only happen if mctlp is NULL.
7491 */
7492 more |= MORECTL;
7493 savemp = bp;
7494 while (bp && bp->b_datap->db_type != M_DATA) {
7495 savemptail = bp;
7496 bp = bp->b_cont;
7497 }
7498 savemptail->b_cont = NULL;
7499 }
7500
7501 /*
7502 * Now process DATA blocks, if any.
7503 */
7504 if (uiop == NULL) {
7505 /* Append data to tail of mctlp */
7506
7507 if (mctlp != NULL) {
7508 mblk_t **mpp = mctlp;
7509
7510 while (*mpp != NULL)
7511 mpp = &((*mpp)->b_cont);
7512 *mpp = bp;
7513 bp = NULL;
7514 }
7515 } else if (uiop->uio_resid >= 0 && bp) {
7516 size_t oldresid = uiop->uio_resid;
7517
7518 /*
7519 * If a streams message is likely to consist
7520 * of many small mblks, it is pulled up into
7521 * one continuous chunk of memory.
7522 * The size of the first mblk may be bogus because
7523 * successive read() calls on the socket reduce
7524 * the size of this mblk until it is exhausted
7525 * and then the code walks on to the next. Thus
7526 * the size of the mblk may not be the original size
7527 * that was passed up, it's simply a remainder
7528 * and hence can be very small without any
7529 * implication that the packet is badly fragmented.
7530 * So the size of the possible second mblk is
7531 * used to spot a badly fragmented packet.
7532 * see longer comment at top of page
7533 * by mblk_pull_len declaration.
7534 */
7535
7536 if (bp->b_cont != NULL && MBLKL(bp->b_cont) < mblk_pull_len) {
7537 (void) pullupmsg(bp, -1);
7538 }
7539
7540 bp = struiocopyout(bp, uiop, &error);
7541 if (error != 0) {
7542 if (mctlp != NULL) {
7543 freemsg(*mctlp);
7544 *mctlp = NULL;
7545 } else
7546 freemsg(savemp);
7547 mutex_enter(&stp->sd_lock);
7548 /*
7549 * clear stream head hi pri flag based on
7550 * first message
7551 */
7552 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
7553 ASSERT(type == M_PCPROTO);
7554 stp->sd_flag &= ~STRPRI;
7555 }
7556 more = 0;
7557 goto getmout;
7558 }
7559 /*
7560 * (pr == 1) indicates a partial read.
7561 */
7562 if (oldresid > uiop->uio_resid)
7563 pr = 1;
7564 }
7565
7566 if (bp) { /* more data blocks in msg */
7567 more |= MOREDATA;
7568 if (savemp)
7569 savemptail->b_cont = bp;
7570 else
7571 savemp = bp;
7572 }
7573
7574 mutex_enter(&stp->sd_lock);
7575 if (savemp) {
7576 if (flags & (MSG_IPEEK|MSG_DISCARDTAIL)) {
7577 /*
7578 * When MSG_DISCARDTAIL is set or
7579 * when peeking discard any tail. When peeking this
7580 * is the tail of the dup that was copied out - the
7581 * message has already been putback on the queue.
7582 * Return MOREDATA to the caller even though the data
7583 * is discarded. This is used by sockets (to
7584 * set MSG_TRUNC).
7585 */
7586 freemsg(savemp);
7587 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
7588 ASSERT(type == M_PCPROTO);
7589 stp->sd_flag &= ~STRPRI;
7590 }
7591 } else if (pr && (savemp->b_datap->db_type == M_DATA) &&
7592 msgnodata(savemp)) {
7593 /*
7594 * Avoid queuing a zero-length tail part of
7595 * a message. pr=1 indicates that we read some of
7596 * the message.
7597 */
7598 freemsg(savemp);
7599 more &= ~MOREDATA;
7600 if (type >= QPCTL) {
7601 ASSERT(type == M_PCPROTO);
7602 stp->sd_flag &= ~STRPRI;
7603 }
7604 } else {
7605 savemp->b_band = pri;
7606 /*
7607 * If the first message was HIPRI and the one we're
7608 * putting back isn't, then clear STRPRI, otherwise
7609 * set STRPRI again. Note that we must set STRPRI
7610 * again since the flush logic in strrput_nondata()
7611 * may have cleared it while we had sd_lock dropped.
7612 */
7613
7614 if (type >= QPCTL) {
7615 ASSERT(type == M_PCPROTO);
7616 if (queclass(savemp) < QPCTL)
7617 stp->sd_flag &= ~STRPRI;
7618 else
7619 stp->sd_flag |= STRPRI;
7620 } else if (queclass(savemp) >= QPCTL) {
7621 /*
7622 * The first message was not a HIPRI message,
7623 * but the one we are about to putback is.
7624 * For simplicitly, we do not allow for HIPRI
7625 * messages to be embedded in the message
7626 * body, so just force it to same type as
7627 * first message.
7628 */
7629 ASSERT(type == M_DATA || type == M_PROTO);
7630 ASSERT(savemp->b_datap->db_type == M_PCPROTO);
7631 savemp->b_datap->db_type = type;
7632 }
7633 if (mark != 0) {
7634 if ((mark & _LASTMARK) &&
7635 (stp->sd_mark == NULL)) {
7636 /*
7637 * If another marked message arrived
7638 * while sd_lock was not held sd_mark
7639 * would be non-NULL.
7640 */
7641 stp->sd_mark = savemp;
7642 }
7643 savemp->b_flag |= mark & ~_LASTMARK;
7644 }
7645 putback(stp, q, savemp, pri);
7646 }
7647 } else if (!(flags & MSG_IPEEK)) {
7648 /*
7649 * The complete message was consumed.
7650 *
7651 * If another M_PCPROTO arrived while sd_lock was not held
7652 * it would have been discarded since STRPRI was still set.
7653 *
7654 * Move the MSG*MARKNEXT information
7655 * to the stream head just in case
7656 * the read queue becomes empty.
7657 * clear stream head hi pri flag based on
7658 * first message
7659 *
7660 * If the stream head was at the mark
7661 * (STRATMARK) before we dropped sd_lock above
7662 * and some data was consumed then we have
7663 * moved past the mark thus STRATMARK is
7664 * cleared. However, if a message arrived in
7665 * strrput during the copyout above causing
7666 * STRATMARK to be set we can not clear that
7667 * flag.
7668 * XXX A "perimeter" would help by single-threading strrput,
7669 * strread, strgetmsg and kstrgetmsg.
7670 */
7671 if (type >= QPCTL) {
7672 ASSERT(type == M_PCPROTO);
7673 stp->sd_flag &= ~STRPRI;
7674 }
7675 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
7676 if (mark & MSGMARKNEXT) {
7677 stp->sd_flag &= ~STRNOTATMARK;
7678 stp->sd_flag |= STRATMARK;
7679 } else if (mark & MSGNOTMARKNEXT) {
7680 stp->sd_flag &= ~STRATMARK;
7681 stp->sd_flag |= STRNOTATMARK;
7682 } else {
7683 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
7684 }
7685 } else if (pr && (old_sd_flag & STRATMARK)) {
7686 stp->sd_flag &= ~STRATMARK;
7687 }
7688 }
7689
7690 *flagsp = flg;
7691 *prip = pri;
7692
7693 /*
7694 * Getmsg cleanup processing - if the state of the queue has changed
7695 * some signals may need to be sent and/or poll awakened.
7696 */
7697 getmout:
7698 qbackenable(q, pri);
7699
7700 /*
7701 * We dropped the stream head lock above. Send all M_SIG messages
7702 * before processing stream head for SIGPOLL messages.
7703 */
7704 ASSERT(MUTEX_HELD(&stp->sd_lock));
7705 while ((bp = q->q_first) != NULL &&
7706 (bp->b_datap->db_type == M_SIG)) {
7707 /*
7708 * sd_lock is held so the content of the read queue can not
7709 * change.
7710 */
7711 bp = getq(q);
7712 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
7713
7714 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
7715 mutex_exit(&stp->sd_lock);
7716 freemsg(bp);
7717 if (STREAM_NEEDSERVICE(stp))
7718 stream_runservice(stp);
7719 mutex_enter(&stp->sd_lock);
7720 }
7721
7722 /*
7723 * stream head cannot change while we make the determination
7724 * whether or not to send a signal. Drop the flag to allow strrput
7725 * to send firstmsgsigs again.
7726 */
7727 stp->sd_flag &= ~STRGETINPROG;
7728
7729 /*
7730 * If the type of message at the front of the queue changed
7731 * due to the receive the appropriate signals and pollwakeup events
7732 * are generated. The type of changes are:
7733 * Processed a hipri message, q_first is not hipri.
7734 * Processed a band X message, and q_first is band Y.
7735 * The generated signals and pollwakeups are identical to what
7736 * strrput() generates should the message that is now on q_first
7737 * arrive to an empty read queue.
7738 *
7739 * Note: only strrput will send a signal for a hipri message.
7740 */
7741 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
7742 strsigset_t signals = 0;
7743 strpollset_t pollwakeups = 0;
7744
7745 if (flg & MSG_HIPRI) {
7746 /*
7747 * Removed a hipri message. Regular data at
7748 * the front of the queue.
7749 */
7750 if (bp->b_band == 0) {
7751 signals = S_INPUT | S_RDNORM;
7752 pollwakeups = POLLIN | POLLRDNORM;
7753 } else {
7754 signals = S_INPUT | S_RDBAND;
7755 pollwakeups = POLLIN | POLLRDBAND;
7756 }
7757 } else if (pri != bp->b_band) {
7758 /*
7759 * The band is different for the new q_first.
7760 */
7761 if (bp->b_band == 0) {
7762 signals = S_RDNORM;
7763 pollwakeups = POLLIN | POLLRDNORM;
7764 } else {
7765 signals = S_RDBAND;
7766 pollwakeups = POLLIN | POLLRDBAND;
7767 }
7768 }
7769
7770 if (pollwakeups != 0) {
7771 if (pollwakeups == (POLLIN | POLLRDNORM)) {
7772 if (!(stp->sd_rput_opt & SR_POLLIN))
7773 goto no_pollwake;
7774 stp->sd_rput_opt &= ~SR_POLLIN;
7775 }
7776 mutex_exit(&stp->sd_lock);
7777 pollwakeup(&stp->sd_pollist, pollwakeups);
7778 mutex_enter(&stp->sd_lock);
7779 }
7780 no_pollwake:
7781
7782 if (stp->sd_sigflags & signals)
7783 strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
7784 }
7785 mutex_exit(&stp->sd_lock);
7786
7787 rvp->r_val1 = more;
7788 return (error);
7789 #undef _LASTMARK
7790 }
7791
7792 /*
7793 * Put a message downstream.
7794 *
7795 * NOTE: strputmsg and kstrputmsg have much of the logic in common.
7796 */
7797 int
7798 strputmsg(
7799 struct vnode *vp,
7800 struct strbuf *mctl,
7801 struct strbuf *mdata,
7802 unsigned char pri,
7803 int flag,
7804 int fmode)
7805 {
7806 struct stdata *stp;
7807 queue_t *wqp;
7808 mblk_t *mp;
7809 ssize_t msgsize;
7810 ssize_t rmin, rmax;
7811 int error;
7812 struct uio uios;
7813 struct uio *uiop = &uios;
7814 struct iovec iovs;
7815 int xpg4 = 0;
7816
7817 ASSERT(vp->v_stream);
7818 stp = vp->v_stream;
7819 wqp = stp->sd_wrq;
7820
7821 /*
7822 * If it is an XPG4 application, we need to send
7823 * SIGPIPE below
7824 */
7825
7826 xpg4 = (flag & MSG_XPG4) ? 1 : 0;
7827 flag &= ~MSG_XPG4;
7828
7829 if (AU_AUDITING())
7830 audit_strputmsg(vp, mctl, mdata, pri, flag, fmode);
7831
7832 mutex_enter(&stp->sd_lock);
7833
7834 if ((error = i_straccess(stp, JCWRITE)) != 0) {
7835 mutex_exit(&stp->sd_lock);
7836 return (error);
7837 }
7838
7839 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
7840 error = strwriteable(stp, B_FALSE, xpg4);
7841 if (error != 0) {
7842 mutex_exit(&stp->sd_lock);
7843 return (error);
7844 }
7845 }
7846
7847 mutex_exit(&stp->sd_lock);
7848
7849 /*
7850 * Check for legal flag value.
7851 */
7852 switch (flag) {
7853 case MSG_HIPRI:
7854 if ((mctl->len < 0) || (pri != 0))
7855 return (EINVAL);
7856 break;
7857 case MSG_BAND:
7858 break;
7859
7860 default:
7861 return (EINVAL);
7862 }
7863
7864 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_IN,
7865 "strputmsg in:stp %p", stp);
7866
7867 /* get these values from those cached in the stream head */
7868 rmin = stp->sd_qn_minpsz;
7869 rmax = stp->sd_qn_maxpsz;
7870
7871 /*
7872 * Make sure ctl and data sizes together fall within the
7873 * limits of the max and min receive packet sizes and do
7874 * not exceed system limit.
7875 */
7876 ASSERT((rmax >= 0) || (rmax == INFPSZ));
7877 if (rmax == 0) {
7878 return (ERANGE);
7879 }
7880 /*
7881 * Use the MAXIMUM of sd_maxblk and q_maxpsz.
7882 * Needed to prevent partial failures in the strmakedata loop.
7883 */
7884 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk)
7885 rmax = stp->sd_maxblk;
7886
7887 if ((msgsize = mdata->len) < 0) {
7888 msgsize = 0;
7889 rmin = 0; /* no range check for NULL data part */
7890 }
7891 if ((msgsize < rmin) ||
7892 ((msgsize > rmax) && (rmax != INFPSZ)) ||
7893 (mctl->len > strctlsz)) {
7894 return (ERANGE);
7895 }
7896
7897 /*
7898 * Setup uio and iov for data part
7899 */
7900 iovs.iov_base = mdata->buf;
7901 iovs.iov_len = msgsize;
7902 uios.uio_iov = &iovs;
7903 uios.uio_iovcnt = 1;
7904 uios.uio_loffset = 0;
7905 uios.uio_segflg = UIO_USERSPACE;
7906 uios.uio_fmode = fmode;
7907 uios.uio_extflg = UIO_COPY_DEFAULT;
7908 uios.uio_resid = msgsize;
7909 uios.uio_offset = 0;
7910
7911 /* Ignore flow control in strput for HIPRI */
7912 if (flag & MSG_HIPRI)
7913 flag |= MSG_IGNFLOW;
7914
7915 for (;;) {
7916 int done = 0;
7917
7918 /*
7919 * strput will always free the ctl mblk - even when strput
7920 * fails.
7921 */
7922 if ((error = strmakectl(mctl, flag, fmode, &mp)) != 0) {
7923 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7924 "strputmsg out:stp %p out %d error %d",
7925 stp, 1, error);
7926 return (error);
7927 }
7928 /*
7929 * Verify that the whole message can be transferred by
7930 * strput.
7931 */
7932 ASSERT(stp->sd_maxblk == INFPSZ ||
7933 stp->sd_maxblk >= mdata->len);
7934
7935 msgsize = mdata->len;
7936 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag);
7937 mdata->len = msgsize;
7938
7939 if (error == 0)
7940 break;
7941
7942 if (error != EWOULDBLOCK)
7943 goto out;
7944
7945 mutex_enter(&stp->sd_lock);
7946 /*
7947 * Check for a missed wakeup.
7948 * Needed since strput did not hold sd_lock across
7949 * the canputnext.
7950 */
7951 if (bcanputnext(wqp, pri)) {
7952 /* Try again */
7953 mutex_exit(&stp->sd_lock);
7954 continue;
7955 }
7956 TRACE_2(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAIT,
7957 "strputmsg wait:stp %p waits pri %d", stp, pri);
7958 if (((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, fmode, -1,
7959 &done)) != 0) || done) {
7960 mutex_exit(&stp->sd_lock);
7961 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7962 "strputmsg out:q %p out %d error %d",
7963 stp, 0, error);
7964 return (error);
7965 }
7966 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE,
7967 "strputmsg wake:stp %p wakes", stp);
7968 if ((error = i_straccess(stp, JCWRITE)) != 0) {
7969 mutex_exit(&stp->sd_lock);
7970 return (error);
7971 }
7972 mutex_exit(&stp->sd_lock);
7973 }
7974 out:
7975 /*
7976 * For historic reasons, applications expect EAGAIN
7977 * when data mblk could not be allocated. so change
7978 * ENOMEM back to EAGAIN
7979 */
7980 if (error == ENOMEM)
7981 error = EAGAIN;
7982 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7983 "strputmsg out:stp %p out %d error %d", stp, 2, error);
7984 return (error);
7985 }
7986
7987 /*
7988 * Put a message downstream.
7989 * Can send only an M_PROTO/M_PCPROTO by passing in a NULL uiop.
7990 * The fmode flag (NDELAY, NONBLOCK) is the or of the flags in the uio
7991 * and the fmode parameter.
7992 *
7993 * This routine handles the consolidation private flags:
7994 * MSG_IGNERROR Ignore any stream head error except STPLEX.
7995 * MSG_HOLDSIG Hold signals while waiting for data.
7996 * MSG_IGNFLOW Don't check streams flow control.
7997 *
7998 * NOTE: strputmsg and kstrputmsg have much of the logic in common.
7999 */
8000 int
8001 kstrputmsg(
8002 struct vnode *vp,
8003 mblk_t *mctl,
8004 struct uio *uiop,
8005 ssize_t msgsize,
8006 unsigned char pri,
8007 int flag,
8008 int fmode)
8009 {
8010 struct stdata *stp;
8011 queue_t *wqp;
8012 ssize_t rmin, rmax;
8013 int error;
8014
8015 ASSERT(vp->v_stream);
8016 stp = vp->v_stream;
8017 wqp = stp->sd_wrq;
8018 if (AU_AUDITING())
8019 audit_strputmsg(vp, NULL, NULL, pri, flag, fmode);
8020 if (mctl == NULL)
8021 return (EINVAL);
8022
8023 mutex_enter(&stp->sd_lock);
8024
8025 if ((error = i_straccess(stp, JCWRITE)) != 0) {
8026 mutex_exit(&stp->sd_lock);
8027 freemsg(mctl);
8028 return (error);
8029 }
8030
8031 if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) {
8032 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
8033 error = strwriteable(stp, B_FALSE, B_TRUE);
8034 if (error != 0) {
8035 mutex_exit(&stp->sd_lock);
8036 freemsg(mctl);
8037 return (error);
8038 }
8039 }
8040 }
8041
8042 mutex_exit(&stp->sd_lock);
8043
8044 /*
8045 * Check for legal flag value.
8046 */
8047 switch (flag & (MSG_HIPRI|MSG_BAND|MSG_ANY)) {
8048 case MSG_HIPRI:
8049 if (pri != 0) {
8050 freemsg(mctl);
8051 return (EINVAL);
8052 }
8053 break;
8054 case MSG_BAND:
8055 break;
8056 default:
8057 freemsg(mctl);
8058 return (EINVAL);
8059 }
8060
8061 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_IN,
8062 "kstrputmsg in:stp %p", stp);
8063
8064 /* get these values from those cached in the stream head */
8065 rmin = stp->sd_qn_minpsz;
8066 rmax = stp->sd_qn_maxpsz;
8067
8068 /*
8069 * Make sure ctl and data sizes together fall within the
8070 * limits of the max and min receive packet sizes and do
8071 * not exceed system limit.
8072 */
8073 ASSERT((rmax >= 0) || (rmax == INFPSZ));
8074 if (rmax == 0) {
8075 freemsg(mctl);
8076 return (ERANGE);
8077 }
8078 /*
8079 * Use the MAXIMUM of sd_maxblk and q_maxpsz.
8080 * Needed to prevent partial failures in the strmakedata loop.
8081 */
8082 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk)
8083 rmax = stp->sd_maxblk;
8084
8085 if (uiop == NULL) {
8086 msgsize = -1;
8087 rmin = -1; /* no range check for NULL data part */
8088 } else {
8089 /* Use uio flags as well as the fmode parameter flags */
8090 fmode |= uiop->uio_fmode;
8091
8092 if ((msgsize < rmin) ||
8093 ((msgsize > rmax) && (rmax != INFPSZ))) {
8094 freemsg(mctl);
8095 return (ERANGE);
8096 }
8097 }
8098
8099 /* Ignore flow control in strput for HIPRI */
8100 if (flag & MSG_HIPRI)
8101 flag |= MSG_IGNFLOW;
8102
8103 for (;;) {
8104 int done = 0;
8105 int waitflag;
8106 mblk_t *mp;
8107
8108 /*
8109 * strput will always free the ctl mblk - even when strput
8110 * fails. If MSG_IGNFLOW is set then any error returned
8111 * will cause us to break the loop, so we don't need a copy
8112 * of the message. If MSG_IGNFLOW is not set, then we can
8113 * get hit by flow control and be forced to try again. In
8114 * this case we need to have a copy of the message. We
8115 * do this using copymsg since the message may get modified
8116 * by something below us.
8117 *
8118 * We've observed that many TPI providers do not check db_ref
8119 * on the control messages but blindly reuse them for the
8120 * T_OK_ACK/T_ERROR_ACK. Thus using copymsg is more
8121 * friendly to such providers than using dupmsg. Also, note
8122 * that sockfs uses MSG_IGNFLOW for all TPI control messages.
8123 * Only data messages are subject to flow control, hence
8124 * subject to this copymsg.
8125 */
8126 if (flag & MSG_IGNFLOW) {
8127 mp = mctl;
8128 mctl = NULL;
8129 } else {
8130 do {
8131 /*
8132 * If a message has a free pointer, the message
8133 * must be dupmsg to maintain this pointer.
8134 * Code using this facility must be sure
8135 * that modules below will not change the
8136 * contents of the dblk without checking db_ref
8137 * first. If db_ref is > 1, then the module
8138 * needs to do a copymsg first. Otherwise,
8139 * the contents of the dblk may become
8140 * inconsistent because the freesmg/freeb below
8141 * may end up calling atomic_add_32_nv.
8142 * The atomic_add_32_nv in freeb (accessing
8143 * all of db_ref, db_type, db_flags, and
8144 * db_struioflag) does not prevent other threads
8145 * from concurrently trying to modify e.g.
8146 * db_type.
8147 */
8148 if (mctl->b_datap->db_frtnp != NULL)
8149 mp = dupmsg(mctl);
8150 else
8151 mp = copymsg(mctl);
8152
8153 if (mp != NULL)
8154 break;
8155
8156 error = strwaitbuf(msgdsize(mctl), BPRI_MED);
8157 if (error) {
8158 freemsg(mctl);
8159 return (error);
8160 }
8161 } while (mp == NULL);
8162 }
8163 /*
8164 * Verify that all of msgsize can be transferred by
8165 * strput.
8166 */
8167 ASSERT(stp->sd_maxblk == INFPSZ || stp->sd_maxblk >= msgsize);
8168 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag);
8169 if (error == 0)
8170 break;
8171
8172 if (error != EWOULDBLOCK)
8173 goto out;
8174
8175 /*
8176 * IF MSG_IGNFLOW is set we should have broken out of loop
8177 * above.
8178 */
8179 ASSERT(!(flag & MSG_IGNFLOW));
8180 mutex_enter(&stp->sd_lock);
8181 /*
8182 * Check for a missed wakeup.
8183 * Needed since strput did not hold sd_lock across
8184 * the canputnext.
8185 */
8186 if (bcanputnext(wqp, pri)) {
8187 /* Try again */
8188 mutex_exit(&stp->sd_lock);
8189 continue;
8190 }
8191 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAIT,
8192 "kstrputmsg wait:stp %p waits pri %d", stp, pri);
8193
8194 waitflag = WRITEWAIT;
8195 if (flag & (MSG_HOLDSIG|MSG_IGNERROR)) {
8196 if (flag & MSG_HOLDSIG)
8197 waitflag |= STR_NOSIG;
8198 if (flag & MSG_IGNERROR)
8199 waitflag |= STR_NOERROR;
8200 }
8201 if (((error = strwaitq(stp, waitflag,
8202 (ssize_t)0, fmode, -1, &done)) != 0) || done) {
8203 mutex_exit(&stp->sd_lock);
8204 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT,
8205 "kstrputmsg out:stp %p out %d error %d",
8206 stp, 0, error);
8207 freemsg(mctl);
8208 return (error);
8209 }
8210 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE,
8211 "kstrputmsg wake:stp %p wakes", stp);
8212 if ((error = i_straccess(stp, JCWRITE)) != 0) {
8213 mutex_exit(&stp->sd_lock);
8214 freemsg(mctl);
8215 return (error);
8216 }
8217 mutex_exit(&stp->sd_lock);
8218 }
8219 out:
8220 freemsg(mctl);
8221 /*
8222 * For historic reasons, applications expect EAGAIN
8223 * when data mblk could not be allocated. so change
8224 * ENOMEM back to EAGAIN
8225 */
8226 if (error == ENOMEM)
8227 error = EAGAIN;
8228 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT,
8229 "kstrputmsg out:stp %p out %d error %d", stp, 2, error);
8230 return (error);
8231 }
8232
8233 /*
8234 * Determines whether the necessary conditions are set on a stream
8235 * for it to be readable, writeable, or have exceptions.
8236 *
8237 * strpoll handles the consolidation private events:
8238 * POLLNOERR Do not return POLLERR even if there are stream
8239 * head errors.
8240 * Used by sockfs.
8241 * POLLRDDATA Do not return POLLIN unless at least one message on
8242 * the queue contains one or more M_DATA mblks. Thus
8243 * when this flag is set a queue with only
8244 * M_PROTO/M_PCPROTO mblks does not return POLLIN.
8245 * Used by sockfs to ignore T_EXDATA_IND messages.
8246 *
8247 * Note: POLLRDDATA assumes that synch streams only return messages with
8248 * an M_DATA attached (i.e. not messages consisting of only
8249 * an M_PROTO/M_PCPROTO part).
8250 */
8251 int
8252 strpoll(struct stdata *stp, short events_arg, int anyyet, short *reventsp,
8253 struct pollhead **phpp)
8254 {
8255 int events = (ushort_t)events_arg;
8256 int retevents = 0;
8257 mblk_t *mp;
8258 qband_t *qbp;
8259 long sd_flags = stp->sd_flag;
8260 int headlocked = 0;
8261
8262 /*
8263 * For performance, a single 'if' tests for most possible edge
8264 * conditions in one shot
8265 */
8266 if (sd_flags & (STPLEX | STRDERR | STWRERR)) {
8267 if (sd_flags & STPLEX) {
8268 *reventsp = POLLNVAL;
8269 return (EINVAL);
8270 }
8271 if (((events & (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) &&
8272 (sd_flags & STRDERR)) ||
8273 ((events & (POLLOUT | POLLWRNORM | POLLWRBAND)) &&
8274 (sd_flags & STWRERR))) {
8275 if (!(events & POLLNOERR)) {
8276 *reventsp = POLLERR;
8277 return (0);
8278 }
8279 }
8280 }
8281 if (sd_flags & STRHUP) {
8282 retevents |= POLLHUP;
8283 } else if (events & (POLLWRNORM | POLLWRBAND)) {
8284 queue_t *tq;
8285 queue_t *qp = stp->sd_wrq;
8286
8287 claimstr(qp);
8288 /* Find next module forward that has a service procedure */
8289 tq = qp->q_next->q_nfsrv;
8290 ASSERT(tq != NULL);
8291
8292 if (polllock(&stp->sd_pollist, QLOCK(tq)) != 0) {
8293 releasestr(qp);
8294 *reventsp = POLLNVAL;
8295 return (0);
8296 }
8297 if (events & POLLWRNORM) {
8298 queue_t *sqp;
8299
8300 if (tq->q_flag & QFULL)
8301 /* ensure backq svc procedure runs */
8302 tq->q_flag |= QWANTW;
8303 else if ((sqp = stp->sd_struiowrq) != NULL) {
8304 /* Check sync stream barrier write q */
8305 mutex_exit(QLOCK(tq));
8306 if (polllock(&stp->sd_pollist,
8307 QLOCK(sqp)) != 0) {
8308 releasestr(qp);
8309 *reventsp = POLLNVAL;
8310 return (0);
8311 }
8312 if (sqp->q_flag & QFULL)
8313 /* ensure pollwakeup() is done */
8314 sqp->q_flag |= QWANTWSYNC;
8315 else
8316 retevents |= POLLOUT;
8317 /* More write events to process ??? */
8318 if (! (events & POLLWRBAND)) {
8319 mutex_exit(QLOCK(sqp));
8320 releasestr(qp);
8321 goto chkrd;
8322 }
8323 mutex_exit(QLOCK(sqp));
8324 if (polllock(&stp->sd_pollist,
8325 QLOCK(tq)) != 0) {
8326 releasestr(qp);
8327 *reventsp = POLLNVAL;
8328 return (0);
8329 }
8330 } else
8331 retevents |= POLLOUT;
8332 }
8333 if (events & POLLWRBAND) {
8334 qbp = tq->q_bandp;
8335 if (qbp) {
8336 while (qbp) {
8337 if (qbp->qb_flag & QB_FULL)
8338 qbp->qb_flag |= QB_WANTW;
8339 else
8340 retevents |= POLLWRBAND;
8341 qbp = qbp->qb_next;
8342 }
8343 } else {
8344 retevents |= POLLWRBAND;
8345 }
8346 }
8347 mutex_exit(QLOCK(tq));
8348 releasestr(qp);
8349 }
8350 chkrd:
8351 if (sd_flags & STRPRI) {
8352 retevents |= (events & POLLPRI);
8353 } else if (events & (POLLRDNORM | POLLRDBAND | POLLIN)) {
8354 queue_t *qp = _RD(stp->sd_wrq);
8355 int normevents = (events & (POLLIN | POLLRDNORM));
8356
8357 /*
8358 * Note: Need to do polllock() here since ps_lock may be
8359 * held. See bug 4191544.
8360 */
8361 if (polllock(&stp->sd_pollist, &stp->sd_lock) != 0) {
8362 *reventsp = POLLNVAL;
8363 return (0);
8364 }
8365 headlocked = 1;
8366 mp = qp->q_first;
8367 while (mp) {
8368 /*
8369 * For POLLRDDATA we scan b_cont and b_next until we
8370 * find an M_DATA.
8371 */
8372 if ((events & POLLRDDATA) &&
8373 mp->b_datap->db_type != M_DATA) {
8374 mblk_t *nmp = mp->b_cont;
8375
8376 while (nmp != NULL &&
8377 nmp->b_datap->db_type != M_DATA)
8378 nmp = nmp->b_cont;
8379 if (nmp == NULL) {
8380 mp = mp->b_next;
8381 continue;
8382 }
8383 }
8384 if (mp->b_band == 0)
8385 retevents |= normevents;
8386 else
8387 retevents |= (events & (POLLIN | POLLRDBAND));
8388 break;
8389 }
8390 if (!(retevents & normevents) && (stp->sd_wakeq & RSLEEP)) {
8391 /*
8392 * Sync stream barrier read queue has data.
8393 */
8394 retevents |= normevents;
8395 }
8396 /* Treat eof as normal data */
8397 if (sd_flags & STREOF)
8398 retevents |= normevents;
8399 }
8400
8401 /*
8402 * Pass back a pollhead if no events are pending or if edge-triggering
8403 * has been configured on this resource.
8404 */
8405 if ((retevents == 0 && !anyyet) || (events & POLLET)) {
8406 *phpp = &stp->sd_pollist;
8407 if (headlocked == 0) {
8408 if (polllock(&stp->sd_pollist, &stp->sd_lock) != 0) {
8409 *reventsp = POLLNVAL;
8410 return (0);
8411 }
8412 headlocked = 1;
8413 }
8414 stp->sd_rput_opt |= SR_POLLIN;
8415 }
8416
8417 *reventsp = (short)retevents;
8418 if (headlocked)
8419 mutex_exit(&stp->sd_lock);
8420 return (0);
8421 }
8422
8423 /*
8424 * The purpose of putback() is to assure sleeping polls/reads
8425 * are awakened when there are no new messages arriving at the,
8426 * stream head, and a message is placed back on the read queue.
8427 *
8428 * sd_lock must be held when messages are placed back on stream
8429 * head. (getq() holds sd_lock when it removes messages from
8430 * the queue)
8431 */
8432
8433 static void
8434 putback(struct stdata *stp, queue_t *q, mblk_t *bp, int band)
8435 {
8436 mblk_t *qfirst;
8437 ASSERT(MUTEX_HELD(&stp->sd_lock));
8438
8439 /*
8440 * As a result of lock-step ordering around q_lock and sd_lock,
8441 * it's possible for function calls like putnext() and
8442 * canputnext() to get an inaccurate picture of how much
8443 * data is really being processed at the stream head.
8444 * We only consolidate with existing messages on the queue
8445 * if the length of the message we want to put back is smaller
8446 * than the queue hiwater mark.
8447 */
8448 if ((stp->sd_rput_opt & SR_CONSOL_DATA) &&
8449 (DB_TYPE(bp) == M_DATA) && ((qfirst = q->q_first) != NULL) &&
8450 (DB_TYPE(qfirst) == M_DATA) &&
8451 ((qfirst->b_flag & (MSGMARK|MSGDELIM)) == 0) &&
8452 ((bp->b_flag & (MSGMARK|MSGDELIM|MSGMARKNEXT)) == 0) &&
8453 (mp_cont_len(bp, NULL) < q->q_hiwat)) {
8454 /*
8455 * We use the same logic as defined in strrput()
8456 * but in reverse as we are putting back onto the
8457 * queue and want to retain byte ordering.
8458 * Consolidate M_DATA messages with M_DATA ONLY.
8459 * strrput() allows the consolidation of M_DATA onto
8460 * M_PROTO | M_PCPROTO but not the other way round.
8461 *
8462 * The consolidation does not take place if the message
8463 * we are returning to the queue is marked with either
8464 * of the marks or the delim flag or if q_first
8465 * is marked with MSGMARK. The MSGMARK check is needed to
8466 * handle the odd semantics of MSGMARK where essentially
8467 * the whole message is to be treated as marked.
8468 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from q_first
8469 * to the front of the b_cont chain.
8470 */
8471 rmvq_noenab(q, qfirst);
8472
8473 /*
8474 * The first message in the b_cont list
8475 * tracks MSGMARKNEXT and MSGNOTMARKNEXT.
8476 * We need to handle the case where we
8477 * are appending:
8478 *
8479 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT.
8480 * 2) a MSGMARKNEXT to a plain message.
8481 * 3) a MSGNOTMARKNEXT to a plain message
8482 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT
8483 * message.
8484 *
8485 * Thus we never append a MSGMARKNEXT or
8486 * MSGNOTMARKNEXT to a MSGMARKNEXT message.
8487 */
8488 if (qfirst->b_flag & MSGMARKNEXT) {
8489 bp->b_flag |= MSGMARKNEXT;
8490 bp->b_flag &= ~MSGNOTMARKNEXT;
8491 qfirst->b_flag &= ~MSGMARKNEXT;
8492 } else if (qfirst->b_flag & MSGNOTMARKNEXT) {
8493 bp->b_flag |= MSGNOTMARKNEXT;
8494 qfirst->b_flag &= ~MSGNOTMARKNEXT;
8495 }
8496
8497 linkb(bp, qfirst);
8498 }
8499 (void) putbq(q, bp);
8500
8501 /*
8502 * A message may have come in when the sd_lock was dropped in the
8503 * calling routine. If this is the case and STR*ATMARK info was
8504 * received, need to move that from the stream head to the q_last
8505 * so that SIOCATMARK can return the proper value.
8506 */
8507 if (stp->sd_flag & (STRATMARK | STRNOTATMARK)) {
8508 unsigned short *flagp = &q->q_last->b_flag;
8509 uint_t b_flag = (uint_t)*flagp;
8510
8511 if (stp->sd_flag & STRATMARK) {
8512 b_flag &= ~MSGNOTMARKNEXT;
8513 b_flag |= MSGMARKNEXT;
8514 stp->sd_flag &= ~STRATMARK;
8515 } else {
8516 b_flag &= ~MSGMARKNEXT;
8517 b_flag |= MSGNOTMARKNEXT;
8518 stp->sd_flag &= ~STRNOTATMARK;
8519 }
8520 *flagp = (unsigned short) b_flag;
8521 }
8522
8523 #ifdef DEBUG
8524 /*
8525 * Make sure that the flags are not messed up.
8526 */
8527 {
8528 mblk_t *mp;
8529 mp = q->q_last;
8530 while (mp != NULL) {
8531 ASSERT((mp->b_flag & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
8532 (MSGMARKNEXT|MSGNOTMARKNEXT));
8533 mp = mp->b_cont;
8534 }
8535 }
8536 #endif
8537 if (q->q_first == bp) {
8538 short pollevents;
8539
8540 if (stp->sd_flag & RSLEEP) {
8541 stp->sd_flag &= ~RSLEEP;
8542 cv_broadcast(&q->q_wait);
8543 }
8544 if (stp->sd_flag & STRPRI) {
8545 pollevents = POLLPRI;
8546 } else {
8547 if (band == 0) {
8548 if (!(stp->sd_rput_opt & SR_POLLIN))
8549 return;
8550 stp->sd_rput_opt &= ~SR_POLLIN;
8551 pollevents = POLLIN | POLLRDNORM;
8552 } else {
8553 pollevents = POLLIN | POLLRDBAND;
8554 }
8555 }
8556 mutex_exit(&stp->sd_lock);
8557 pollwakeup(&stp->sd_pollist, pollevents);
8558 mutex_enter(&stp->sd_lock);
8559 }
8560 }
8561
8562 /*
8563 * Return the held vnode attached to the stream head of a
8564 * given queue
8565 * It is the responsibility of the calling routine to ensure
8566 * that the queue does not go away (e.g. pop).
8567 */
8568 vnode_t *
8569 strq2vp(queue_t *qp)
8570 {
8571 vnode_t *vp;
8572 vp = STREAM(qp)->sd_vnode;
8573 ASSERT(vp != NULL);
8574 VN_HOLD(vp);
8575 return (vp);
8576 }
8577
8578 /*
8579 * return the stream head write queue for the given vp
8580 * It is the responsibility of the calling routine to ensure
8581 * that the stream or vnode do not close.
8582 */
8583 queue_t *
8584 strvp2wq(vnode_t *vp)
8585 {
8586 ASSERT(vp->v_stream != NULL);
8587 return (vp->v_stream->sd_wrq);
8588 }
8589
8590 /*
8591 * pollwakeup stream head
8592 * It is the responsibility of the calling routine to ensure
8593 * that the stream or vnode do not close.
8594 */
8595 void
8596 strpollwakeup(vnode_t *vp, short event)
8597 {
8598 ASSERT(vp->v_stream);
8599 pollwakeup(&vp->v_stream->sd_pollist, event);
8600 }
8601
8602 /*
8603 * Mate the stream heads of two vnodes together. If the two vnodes are the
8604 * same, we just make the write-side point at the read-side -- otherwise,
8605 * we do a full mate. Only works on vnodes associated with streams that are
8606 * still being built and thus have only a stream head.
8607 */
8608 void
8609 strmate(vnode_t *vp1, vnode_t *vp2)
8610 {
8611 queue_t *wrq1 = strvp2wq(vp1);
8612 queue_t *wrq2 = strvp2wq(vp2);
8613
8614 /*
8615 * Verify that there are no modules on the stream yet. We also
8616 * rely on the stream head always having a service procedure to
8617 * avoid tweaking q_nfsrv.
8618 */
8619 ASSERT(wrq1->q_next == NULL && wrq2->q_next == NULL);
8620 ASSERT(wrq1->q_qinfo->qi_srvp != NULL);
8621 ASSERT(wrq2->q_qinfo->qi_srvp != NULL);
8622
8623 /*
8624 * If the queues are the same, just twist; otherwise do a full mate.
8625 */
8626 if (wrq1 == wrq2) {
8627 wrq1->q_next = _RD(wrq1);
8628 } else {
8629 wrq1->q_next = _RD(wrq2);
8630 wrq2->q_next = _RD(wrq1);
8631 STREAM(wrq1)->sd_mate = STREAM(wrq2);
8632 STREAM(wrq1)->sd_flag |= STRMATE;
8633 STREAM(wrq2)->sd_mate = STREAM(wrq1);
8634 STREAM(wrq2)->sd_flag |= STRMATE;
8635 }
8636 }
8637
8638 /*
8639 * XXX will go away when console is correctly fixed.
8640 * Clean up the console PIDS, from previous I_SETSIG,
8641 * called only for cnopen which never calls strclean().
8642 */
8643 void
8644 str_cn_clean(struct vnode *vp)
8645 {
8646 strsig_t *ssp, *pssp, *tssp;
8647 struct stdata *stp;
8648 struct pid *pidp;
8649 int update = 0;
8650
8651 ASSERT(vp->v_stream);
8652 stp = vp->v_stream;
8653 pssp = NULL;
8654 mutex_enter(&stp->sd_lock);
8655 ssp = stp->sd_siglist;
8656 while (ssp) {
8657 mutex_enter(&pidlock);
8658 pidp = ssp->ss_pidp;
8659 /*
8660 * Get rid of PID if the proc is gone.
8661 */
8662 if (pidp->pid_prinactive) {
8663 tssp = ssp->ss_next;
8664 if (pssp)
8665 pssp->ss_next = tssp;
8666 else
8667 stp->sd_siglist = tssp;
8668 ASSERT(pidp->pid_ref <= 1);
8669 PID_RELE(ssp->ss_pidp);
8670 mutex_exit(&pidlock);
8671 kmem_free(ssp, sizeof (strsig_t));
8672 update = 1;
8673 ssp = tssp;
8674 continue;
8675 } else
8676 mutex_exit(&pidlock);
8677 pssp = ssp;
8678 ssp = ssp->ss_next;
8679 }
8680 if (update) {
8681 stp->sd_sigflags = 0;
8682 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
8683 stp->sd_sigflags |= ssp->ss_events;
8684 }
8685 mutex_exit(&stp->sd_lock);
8686 }
8687
8688 /*
8689 * Return B_TRUE if there is data in the message, B_FALSE otherwise.
8690 */
8691 static boolean_t
8692 msghasdata(mblk_t *bp)
8693 {
8694 for (; bp; bp = bp->b_cont)
8695 if (bp->b_datap->db_type == M_DATA) {
8696 ASSERT(bp->b_wptr >= bp->b_rptr);
8697 if (bp->b_wptr > bp->b_rptr)
8698 return (B_TRUE);
8699 }
8700 return (B_FALSE);
8701 }