1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
22 /* All Rights Reserved */
23
24
25 /*
26 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
27 * Copyright (c) 2014, Joyent, Inc. All rights reserved.
28 */
29
30 #include <sys/types.h>
31 #include <sys/sysmacros.h>
32 #include <sys/param.h>
33 #include <sys/errno.h>
34 #include <sys/signal.h>
35 #include <sys/stat.h>
36 #include <sys/proc.h>
37 #include <sys/cred.h>
38 #include <sys/user.h>
39 #include <sys/vnode.h>
40 #include <sys/file.h>
41 #include <sys/stream.h>
42 #include <sys/strsubr.h>
43 #include <sys/stropts.h>
44 #include <sys/tihdr.h>
45 #include <sys/var.h>
46 #include <sys/poll.h>
47 #include <sys/termio.h>
48 #include <sys/ttold.h>
49 #include <sys/systm.h>
50 #include <sys/uio.h>
51 #include <sys/cmn_err.h>
52 #include <sys/sad.h>
53 #include <sys/netstack.h>
54 #include <sys/priocntl.h>
55 #include <sys/jioctl.h>
56 #include <sys/procset.h>
57 #include <sys/session.h>
58 #include <sys/kmem.h>
59 #include <sys/filio.h>
60 #include <sys/vtrace.h>
61 #include <sys/debug.h>
62 #include <sys/strredir.h>
63 #include <sys/fs/fifonode.h>
64 #include <sys/fs/snode.h>
65 #include <sys/strlog.h>
66 #include <sys/strsun.h>
67 #include <sys/project.h>
68 #include <sys/kbio.h>
69 #include <sys/msio.h>
70 #include <sys/tty.h>
71 #include <sys/ptyvar.h>
72 #include <sys/vuid_event.h>
73 #include <sys/modctl.h>
74 #include <sys/sunddi.h>
75 #include <sys/sunldi_impl.h>
76 #include <sys/autoconf.h>
77 #include <sys/policy.h>
78 #include <sys/dld.h>
79 #include <sys/zone.h>
80 #include <c2/audit.h>
81 #include <sys/fcntl.h>
82
83 /*
84 * This define helps improve the readability of streams code while
85 * still maintaining a very old streams performance enhancement. The
86 * performance enhancement basically involved having all callers
87 * of straccess() perform the first check that straccess() will do
88 * locally before actually calling straccess(). (There by reducing
89 * the number of unnecessary calls to straccess().)
90 */
91 #define i_straccess(x, y) ((stp->sd_sidp == NULL) ? 0 : \
92 (stp->sd_vnode->v_type == VFIFO) ? 0 : \
93 straccess((x), (y)))
94
95 /*
96 * what is mblk_pull_len?
97 *
98 * If a streams message consists of many short messages,
99 * a performance degradation occurs from copyout overhead.
100 * To decrease the per mblk overhead, messages that are
101 * likely to consist of many small mblks are pulled up into
102 * one continuous chunk of memory.
103 *
104 * To avoid the processing overhead of examining every
105 * mblk, a quick heuristic is used. If the first mblk in
106 * the message is shorter than mblk_pull_len, it is likely
107 * that the rest of the mblk will be short.
108 *
109 * This heuristic was decided upon after performance tests
110 * indicated that anything more complex slowed down the main
111 * code path.
112 */
113 #define MBLK_PULL_LEN 64
114 uint32_t mblk_pull_len = MBLK_PULL_LEN;
115
116 /*
117 * The sgttyb_handling flag controls the handling of the old BSD
118 * TIOCGETP, TIOCSETP, and TIOCSETN ioctls as follows:
119 *
120 * 0 - Emit no warnings at all and retain old, broken behavior.
121 * 1 - Emit no warnings and silently handle new semantics.
122 * 2 - Send cmn_err(CE_NOTE) when either TIOCSETP or TIOCSETN is used
123 * (once per system invocation). Handle with new semantics.
124 * 3 - Send SIGSYS when any TIOCGETP, TIOCSETP, or TIOCSETN call is
125 * made (so that offenders drop core and are easy to debug).
126 *
127 * The "new semantics" are that TIOCGETP returns B38400 for
128 * sg_[io]speed if the corresponding value is over B38400, and that
129 * TIOCSET[PN] accept B38400 in these cases to mean "retain current
130 * bit rate."
131 */
132 int sgttyb_handling = 1;
133 static boolean_t sgttyb_complaint;
134
135 /* don't push drcompat module by default on Style-2 streams */
136 static int push_drcompat = 0;
137
138 /*
139 * id value used to distinguish between different ioctl messages
140 */
141 static uint32_t ioc_id;
142
143 static void putback(struct stdata *, queue_t *, mblk_t *, int);
144 static void strcleanall(struct vnode *);
145 static int strwsrv(queue_t *);
146 static int strdocmd(struct stdata *, struct strcmd *, cred_t *);
147 static boolean_t is_xti_str(const struct stdata *);
148
149 /*
150 * qinit and module_info structures for stream head read and write queues
151 */
152 struct module_info strm_info = { 0, "strrhead", 0, INFPSZ, STRHIGH, STRLOW };
153 struct module_info stwm_info = { 0, "strwhead", 0, 0, 0, 0 };
154 struct qinit strdata = { strrput, NULL, NULL, NULL, NULL, &strm_info };
155 struct qinit stwdata = { NULL, strwsrv, NULL, NULL, NULL, &stwm_info };
156 struct module_info fiform_info = { 0, "fifostrrhead", 0, PIPE_BUF, FIFOHIWAT,
157 FIFOLOWAT };
158 struct module_info fifowm_info = { 0, "fifostrwhead", 0, 0, 0, 0 };
159 struct qinit fifo_strdata = { strrput, NULL, NULL, NULL, NULL, &fiform_info };
160 struct qinit fifo_stwdata = { NULL, strwsrv, NULL, NULL, NULL, &fifowm_info };
161
162 extern kmutex_t strresources; /* protects global resources */
163 extern kmutex_t muxifier; /* single-threads multiplexor creation */
164
165 static boolean_t msghasdata(mblk_t *bp);
166 #define msgnodata(bp) (!msghasdata(bp))
167
168 /*
169 * Stream head locking notes:
170 * There are four monitors associated with the stream head:
171 * 1. v_stream monitor: in stropen() and strclose() v_lock
172 * is held while the association of vnode and stream
173 * head is established or tested for.
174 * 2. open/close/push/pop monitor: sd_lock is held while each
175 * thread bids for exclusive access to this monitor
176 * for opening or closing a stream. In addition, this
177 * monitor is entered during pushes and pops. This
178 * guarantees that during plumbing operations there
179 * is only one thread trying to change the plumbing.
180 * Any other threads present in the stream are only
181 * using the plumbing.
182 * 3. read/write monitor: in the case of read, a thread holds
183 * sd_lock while trying to get data from the stream
184 * head queue. if there is none to fulfill a read
185 * request, it sets RSLEEP and calls cv_wait_sig() down
186 * in strwaitq() to await the arrival of new data.
187 * when new data arrives in strrput(), sd_lock is acquired
188 * before testing for RSLEEP and calling cv_broadcast().
189 * the behavior of strwrite(), strwsrv(), and WSLEEP
190 * mirror this.
191 * 4. ioctl monitor: sd_lock is gotten to ensure that only one
192 * thread is doing an ioctl at a time.
193 */
194
195 static int
196 push_mod(queue_t *qp, dev_t *devp, struct stdata *stp, const char *name,
197 int anchor, cred_t *crp, uint_t anchor_zoneid)
198 {
199 int error;
200 fmodsw_impl_t *fp;
201
202 if (stp->sd_flag & (STRHUP|STRDERR|STWRERR)) {
203 error = (stp->sd_flag & STRHUP) ? ENXIO : EIO;
204 return (error);
205 }
206 if (stp->sd_pushcnt >= nstrpush) {
207 return (EINVAL);
208 }
209
210 if ((fp = fmodsw_find(name, FMODSW_HOLD | FMODSW_LOAD)) == NULL) {
211 stp->sd_flag |= STREOPENFAIL;
212 return (EINVAL);
213 }
214
215 /*
216 * push new module and call its open routine via qattach
217 */
218 if ((error = qattach(qp, devp, 0, crp, fp, B_FALSE)) != 0)
219 return (error);
220
221 /*
222 * Check to see if caller wants a STREAMS anchor
223 * put at this place in the stream, and add if so.
224 */
225 mutex_enter(&stp->sd_lock);
226 if (anchor == stp->sd_pushcnt) {
227 stp->sd_anchor = stp->sd_pushcnt;
228 stp->sd_anchorzone = anchor_zoneid;
229 }
230 mutex_exit(&stp->sd_lock);
231
232 return (0);
233 }
234
235 /*
236 * Open a stream device.
237 */
238 int
239 stropen(vnode_t *vp, dev_t *devp, int flag, cred_t *crp)
240 {
241 struct stdata *stp;
242 queue_t *qp;
243 int s;
244 dev_t dummydev, savedev;
245 struct autopush *ap;
246 struct dlautopush dlap;
247 int error = 0;
248 ssize_t rmin, rmax;
249 int cloneopen;
250 queue_t *brq;
251 major_t major;
252 str_stack_t *ss;
253 zoneid_t zoneid;
254 uint_t anchor;
255
256 /*
257 * If the stream already exists, wait for any open in progress
258 * to complete, then call the open function of each module and
259 * driver in the stream. Otherwise create the stream.
260 */
261 TRACE_1(TR_FAC_STREAMS_FR, TR_STROPEN, "stropen:%p", vp);
262 retry:
263 mutex_enter(&vp->v_lock);
264 if ((stp = vp->v_stream) != NULL) {
265
266 /*
267 * Waiting for stream to be created to device
268 * due to another open.
269 */
270 mutex_exit(&vp->v_lock);
271
272 if (STRMATED(stp)) {
273 struct stdata *strmatep = stp->sd_mate;
274
275 STRLOCKMATES(stp);
276 if (strmatep->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
277 if (flag & (FNDELAY|FNONBLOCK)) {
278 error = EAGAIN;
279 mutex_exit(&strmatep->sd_lock);
280 goto ckreturn;
281 }
282 mutex_exit(&stp->sd_lock);
283 if (!cv_wait_sig(&strmatep->sd_monitor,
284 &strmatep->sd_lock)) {
285 error = EINTR;
286 mutex_exit(&strmatep->sd_lock);
287 mutex_enter(&stp->sd_lock);
288 goto ckreturn;
289 }
290 mutex_exit(&strmatep->sd_lock);
291 goto retry;
292 }
293 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
294 if (flag & (FNDELAY|FNONBLOCK)) {
295 error = EAGAIN;
296 mutex_exit(&strmatep->sd_lock);
297 goto ckreturn;
298 }
299 mutex_exit(&strmatep->sd_lock);
300 if (!cv_wait_sig(&stp->sd_monitor,
301 &stp->sd_lock)) {
302 error = EINTR;
303 goto ckreturn;
304 }
305 mutex_exit(&stp->sd_lock);
306 goto retry;
307 }
308
309 if (stp->sd_flag & (STRDERR|STWRERR)) {
310 error = EIO;
311 mutex_exit(&strmatep->sd_lock);
312 goto ckreturn;
313 }
314
315 stp->sd_flag |= STWOPEN;
316 STRUNLOCKMATES(stp);
317 } else {
318 mutex_enter(&stp->sd_lock);
319 if (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
320 if (flag & (FNDELAY|FNONBLOCK)) {
321 error = EAGAIN;
322 goto ckreturn;
323 }
324 if (!cv_wait_sig(&stp->sd_monitor,
325 &stp->sd_lock)) {
326 error = EINTR;
327 goto ckreturn;
328 }
329 mutex_exit(&stp->sd_lock);
330 goto retry; /* could be clone! */
331 }
332
333 if (stp->sd_flag & (STRDERR|STWRERR)) {
334 error = EIO;
335 goto ckreturn;
336 }
337
338 stp->sd_flag |= STWOPEN;
339 mutex_exit(&stp->sd_lock);
340 }
341
342 /*
343 * Open all modules and devices down stream to notify
344 * that another user is streaming. For modules, set the
345 * last argument to MODOPEN and do not pass any open flags.
346 * Ignore dummydev since this is not the first open.
347 */
348 claimstr(stp->sd_wrq);
349 qp = stp->sd_wrq;
350 while (_SAMESTR(qp)) {
351 qp = qp->q_next;
352 if ((error = qreopen(_RD(qp), devp, flag, crp)) != 0)
353 break;
354 }
355 releasestr(stp->sd_wrq);
356 mutex_enter(&stp->sd_lock);
357 stp->sd_flag &= ~(STRHUP|STWOPEN|STRDERR|STWRERR);
358 stp->sd_rerror = 0;
359 stp->sd_werror = 0;
360 ckreturn:
361 cv_broadcast(&stp->sd_monitor);
362 mutex_exit(&stp->sd_lock);
363 return (error);
364 }
365
366 /*
367 * This vnode isn't streaming. SPECFS already
368 * checked for multiple vnodes pointing to the
369 * same stream, so create a stream to the driver.
370 */
371 qp = allocq();
372 stp = shalloc(qp);
373
374 /*
375 * Initialize stream head. shalloc() has given us
376 * exclusive access, and we have the vnode locked;
377 * we can do whatever we want with stp.
378 */
379 stp->sd_flag = STWOPEN;
380 stp->sd_siglist = NULL;
381 stp->sd_pollist.ph_list = NULL;
382 stp->sd_sigflags = 0;
383 stp->sd_mark = NULL;
384 stp->sd_closetime = STRTIMOUT;
385 stp->sd_sidp = NULL;
386 stp->sd_pgidp = NULL;
387 stp->sd_vnode = vp;
388 stp->sd_rerror = 0;
389 stp->sd_werror = 0;
390 stp->sd_wroff = 0;
391 stp->sd_tail = 0;
392 stp->sd_iocblk = NULL;
393 stp->sd_cmdblk = NULL;
394 stp->sd_pushcnt = 0;
395 stp->sd_qn_minpsz = 0;
396 stp->sd_qn_maxpsz = INFPSZ - 1; /* used to check for initialization */
397 stp->sd_maxblk = INFPSZ;
398 qp->q_ptr = _WR(qp)->q_ptr = stp;
399 STREAM(qp) = STREAM(_WR(qp)) = stp;
400 vp->v_stream = stp;
401 mutex_exit(&vp->v_lock);
402
403 /*
404 * If this is not a system process, then add it to
405 * the list associated with the stream head.
406 */
407 if (!(curproc->p_flag & SSYS) && is_xti_str(stp))
408 sh_insert_pid(stp, curproc->p_pidp->pid_id);
409
410 if (vp->v_type == VFIFO) {
411 stp->sd_flag |= OLDNDELAY;
412 /*
413 * This means, both for pipes and fifos
414 * strwrite will send SIGPIPE if the other
415 * end is closed. For putmsg it depends
416 * on whether it is a XPG4_2 application
417 * or not
418 */
419 stp->sd_wput_opt = SW_SIGPIPE;
420
421 /* setq might sleep in kmem_alloc - avoid holding locks. */
422 setq(qp, &fifo_strdata, &fifo_stwdata, NULL, QMTSAFE,
423 SQ_CI|SQ_CO, B_FALSE);
424
425 set_qend(qp);
426 stp->sd_strtab = fifo_getinfo();
427 _WR(qp)->q_nfsrv = _WR(qp);
428 qp->q_nfsrv = qp;
429 /*
430 * Wake up others that are waiting for stream to be created.
431 */
432 mutex_enter(&stp->sd_lock);
433 /*
434 * nothing is be pushed on stream yet, so
435 * optimized stream head packetsizes are just that
436 * of the read queue
437 */
438 stp->sd_qn_minpsz = qp->q_minpsz;
439 stp->sd_qn_maxpsz = qp->q_maxpsz;
440 stp->sd_flag &= ~STWOPEN;
441 goto fifo_opendone;
442 }
443 /* setq might sleep in kmem_alloc - avoid holding locks. */
444 setq(qp, &strdata, &stwdata, NULL, QMTSAFE, SQ_CI|SQ_CO, B_FALSE);
445
446 set_qend(qp);
447
448 /*
449 * Open driver and create stream to it (via qattach).
450 */
451 savedev = *devp;
452 cloneopen = (getmajor(*devp) == clone_major);
453 if ((error = qattach(qp, devp, flag, crp, NULL, B_FALSE)) != 0) {
454 mutex_enter(&vp->v_lock);
455 vp->v_stream = NULL;
456 mutex_exit(&vp->v_lock);
457 mutex_enter(&stp->sd_lock);
458 cv_broadcast(&stp->sd_monitor);
459 mutex_exit(&stp->sd_lock);
460 freeq(_RD(qp));
461 shfree(stp);
462 return (error);
463 }
464 /*
465 * Set sd_strtab after open in order to handle clonable drivers
466 */
467 stp->sd_strtab = STREAMSTAB(getmajor(*devp));
468
469 /*
470 * Historical note: dummydev used to be be prior to the initial
471 * open (via qattach above), which made the value seen
472 * inconsistent between an I_PUSH and an autopush of a module.
473 */
474 dummydev = *devp;
475
476 /*
477 * For clone open of old style (Q not associated) network driver,
478 * push DRMODNAME module to handle DL_ATTACH/DL_DETACH
479 */
480 brq = _RD(_WR(qp)->q_next);
481 major = getmajor(*devp);
482 if (push_drcompat && cloneopen && NETWORK_DRV(major) &&
483 ((brq->q_flag & _QASSOCIATED) == 0)) {
484 if (push_mod(qp, &dummydev, stp, DRMODNAME, 0, crp, 0) != 0)
485 cmn_err(CE_WARN, "cannot push " DRMODNAME
486 " streams module");
487 }
488
489 if (!NETWORK_DRV(major)) {
490 savedev = *devp;
491 } else {
492 /*
493 * For network devices, process differently based on the
494 * return value from dld_autopush():
495 *
496 * 0: the passed-in device points to a GLDv3 datalink with
497 * per-link autopush configuration; use that configuration
498 * and ignore any per-driver autopush configuration.
499 *
500 * 1: the passed-in device points to a physical GLDv3
501 * datalink without per-link autopush configuration. The
502 * passed in device was changed to refer to the actual
503 * physical device (if it's not already); we use that new
504 * device to look up any per-driver autopush configuration.
505 *
506 * -1: neither of the above cases applied; use the initial
507 * device to look up any per-driver autopush configuration.
508 */
509 switch (dld_autopush(&savedev, &dlap)) {
510 case 0:
511 zoneid = crgetzoneid(crp);
512 for (s = 0; s < dlap.dap_npush; s++) {
513 error = push_mod(qp, &dummydev, stp,
514 dlap.dap_aplist[s], dlap.dap_anchor, crp,
515 zoneid);
516 if (error != 0)
517 break;
518 }
519 goto opendone;
520 case 1:
521 break;
522 case -1:
523 savedev = *devp;
524 break;
525 }
526 }
527 /*
528 * Find the autopush configuration based on "savedev". Start with the
529 * global zone. If not found check in the local zone.
530 */
531 zoneid = GLOBAL_ZONEID;
532 retryap:
533 ss = netstack_find_by_stackid(zoneid_to_netstackid(zoneid))->
534 netstack_str;
535 if ((ap = sad_ap_find_by_dev(savedev, ss)) == NULL) {
536 netstack_rele(ss->ss_netstack);
537 if (zoneid == GLOBAL_ZONEID) {
538 /*
539 * None found. Also look in the zone's autopush table.
540 */
541 zoneid = crgetzoneid(crp);
542 if (zoneid != GLOBAL_ZONEID)
543 goto retryap;
544 }
545 goto opendone;
546 }
547 anchor = ap->ap_anchor;
548 zoneid = crgetzoneid(crp);
549 for (s = 0; s < ap->ap_npush; s++) {
550 error = push_mod(qp, &dummydev, stp, ap->ap_list[s],
551 anchor, crp, zoneid);
552 if (error != 0)
553 break;
554 }
555 sad_ap_rele(ap, ss);
556 netstack_rele(ss->ss_netstack);
557
558 opendone:
559
560 /*
561 * let specfs know that open failed part way through
562 */
563 if (error) {
564 mutex_enter(&stp->sd_lock);
565 stp->sd_flag |= STREOPENFAIL;
566 mutex_exit(&stp->sd_lock);
567 }
568
569 /*
570 * Wake up others that are waiting for stream to be created.
571 */
572 mutex_enter(&stp->sd_lock);
573 stp->sd_flag &= ~STWOPEN;
574
575 /*
576 * As a performance concern we are caching the values of
577 * q_minpsz and q_maxpsz of the module below the stream
578 * head in the stream head.
579 */
580 mutex_enter(QLOCK(stp->sd_wrq->q_next));
581 rmin = stp->sd_wrq->q_next->q_minpsz;
582 rmax = stp->sd_wrq->q_next->q_maxpsz;
583 mutex_exit(QLOCK(stp->sd_wrq->q_next));
584
585 /* do this processing here as a performance concern */
586 if (strmsgsz != 0) {
587 if (rmax == INFPSZ)
588 rmax = strmsgsz;
589 else
590 rmax = MIN(strmsgsz, rmax);
591 }
592
593 mutex_enter(QLOCK(stp->sd_wrq));
594 stp->sd_qn_minpsz = rmin;
595 stp->sd_qn_maxpsz = rmax;
596 mutex_exit(QLOCK(stp->sd_wrq));
597
598 fifo_opendone:
599 cv_broadcast(&stp->sd_monitor);
600 mutex_exit(&stp->sd_lock);
601 return (error);
602 }
603
604 static int strsink(queue_t *, mblk_t *);
605 static struct qinit deadrend = {
606 strsink, NULL, NULL, NULL, NULL, &strm_info, NULL
607 };
608 static struct qinit deadwend = {
609 NULL, NULL, NULL, NULL, NULL, &stwm_info, NULL
610 };
611
612 /*
613 * Close a stream.
614 * This is called from closef() on the last close of an open stream.
615 * Strclean() will already have removed the siglist and pollist
616 * information, so all that remains is to remove all multiplexor links
617 * for the stream, pop all the modules (and the driver), and free the
618 * stream structure.
619 */
620
621 int
622 strclose(struct vnode *vp, int flag, cred_t *crp)
623 {
624 struct stdata *stp;
625 queue_t *qp;
626 int rval;
627 int freestp = 1;
628 queue_t *rmq;
629
630 TRACE_1(TR_FAC_STREAMS_FR,
631 TR_STRCLOSE, "strclose:%p", vp);
632 ASSERT(vp->v_stream);
633
634 stp = vp->v_stream;
635 ASSERT(!(stp->sd_flag & STPLEX));
636 qp = stp->sd_wrq;
637
638 /*
639 * Needed so that strpoll will return non-zero for this fd.
640 * Note that with POLLNOERR STRHUP does still cause POLLHUP.
641 */
642 mutex_enter(&stp->sd_lock);
643 stp->sd_flag |= STRHUP;
644 mutex_exit(&stp->sd_lock);
645
646 /*
647 * If the registered process or process group did not have an
648 * open instance of this stream then strclean would not be
649 * called. Thus at the time of closing all remaining siglist entries
650 * are removed.
651 */
652 if (stp->sd_siglist != NULL)
653 strcleanall(vp);
654
655 ASSERT(stp->sd_siglist == NULL);
656 ASSERT(stp->sd_sigflags == 0);
657
658 if (STRMATED(stp)) {
659 struct stdata *strmatep = stp->sd_mate;
660 int waited = 1;
661
662 STRLOCKMATES(stp);
663 while (waited) {
664 waited = 0;
665 while (stp->sd_flag & (STWOPEN|STRCLOSE|STRPLUMB)) {
666 mutex_exit(&strmatep->sd_lock);
667 cv_wait(&stp->sd_monitor, &stp->sd_lock);
668 mutex_exit(&stp->sd_lock);
669 STRLOCKMATES(stp);
670 waited = 1;
671 }
672 while (strmatep->sd_flag &
673 (STWOPEN|STRCLOSE|STRPLUMB)) {
674 mutex_exit(&stp->sd_lock);
675 cv_wait(&strmatep->sd_monitor,
676 &strmatep->sd_lock);
677 mutex_exit(&strmatep->sd_lock);
678 STRLOCKMATES(stp);
679 waited = 1;
680 }
681 }
682 stp->sd_flag |= STRCLOSE;
683 STRUNLOCKMATES(stp);
684 } else {
685 mutex_enter(&stp->sd_lock);
686 stp->sd_flag |= STRCLOSE;
687 mutex_exit(&stp->sd_lock);
688 }
689
690 ASSERT(qp->q_first == NULL); /* No more delayed write */
691
692 /* Check if an I_LINK was ever done on this stream */
693 if (stp->sd_flag & STRHASLINKS) {
694 netstack_t *ns;
695 str_stack_t *ss;
696
697 ns = netstack_find_by_cred(crp);
698 ASSERT(ns != NULL);
699 ss = ns->netstack_str;
700 ASSERT(ss != NULL);
701
702 (void) munlinkall(stp, LINKCLOSE|LINKNORMAL, crp, &rval, ss);
703 netstack_rele(ss->ss_netstack);
704 }
705
706 while (_SAMESTR(qp)) {
707 /*
708 * Holding sd_lock prevents q_next from changing in
709 * this stream.
710 */
711 mutex_enter(&stp->sd_lock);
712 if (!(flag & (FNDELAY|FNONBLOCK)) && (stp->sd_closetime > 0)) {
713
714 /*
715 * sleep until awakened by strwsrv() or timeout
716 */
717 for (;;) {
718 mutex_enter(QLOCK(qp->q_next));
719 if (!(qp->q_next->q_mblkcnt)) {
720 mutex_exit(QLOCK(qp->q_next));
721 break;
722 }
723 stp->sd_flag |= WSLEEP;
724
725 /* ensure strwsrv gets enabled */
726 qp->q_next->q_flag |= QWANTW;
727 mutex_exit(QLOCK(qp->q_next));
728 /* get out if we timed out or recv'd a signal */
729 if (str_cv_wait(&qp->q_wait, &stp->sd_lock,
730 stp->sd_closetime, 0) <= 0) {
731 break;
732 }
733 }
734 stp->sd_flag &= ~WSLEEP;
735 }
736 mutex_exit(&stp->sd_lock);
737
738 rmq = qp->q_next;
739 if (rmq->q_flag & QISDRV) {
740 ASSERT(!_SAMESTR(rmq));
741 wait_sq_svc(_RD(qp)->q_syncq);
742 }
743
744 qdetach(_RD(rmq), 1, flag, crp, B_FALSE);
745 }
746
747 /*
748 * Since we call pollwakeup in close() now, the poll list should
749 * be empty in most cases. The only exception is the layered devices
750 * (e.g. the console drivers with redirection modules pushed on top
751 * of it). We have to do this after calling qdetach() because
752 * the redirection module won't have torn down the console
753 * redirection until after qdetach() has been invoked.
754 */
755 if (stp->sd_pollist.ph_list != NULL) {
756 pollwakeup(&stp->sd_pollist, POLLERR);
757 pollhead_clean(&stp->sd_pollist);
758 }
759 ASSERT(stp->sd_pollist.ph_list == NULL);
760 ASSERT(stp->sd_sidp == NULL);
761 ASSERT(stp->sd_pgidp == NULL);
762
763 /* Prevent qenable from re-enabling the stream head queue */
764 disable_svc(_RD(qp));
765
766 /*
767 * Wait until service procedure of each queue is
768 * run, if QINSERVICE is set.
769 */
770 wait_svc(_RD(qp));
771
772 /*
773 * Now, flush both queues.
774 */
775 flushq(_RD(qp), FLUSHALL);
776 flushq(qp, FLUSHALL);
777
778 /*
779 * If the write queue of the stream head is pointing to a
780 * read queue, we have a twisted stream. If the read queue
781 * is alive, convert the stream head queues into a dead end.
782 * If the read queue is dead, free the dead pair.
783 */
784 if (qp->q_next && !_SAMESTR(qp)) {
785 if (qp->q_next->q_qinfo == &deadrend) { /* half-closed pipe */
786 flushq(qp->q_next, FLUSHALL); /* ensure no message */
787 shfree(qp->q_next->q_stream);
788 freeq(qp->q_next);
789 freeq(_RD(qp));
790 } else if (qp->q_next == _RD(qp)) { /* fifo */
791 freeq(_RD(qp));
792 } else { /* pipe */
793 freestp = 0;
794 /*
795 * The q_info pointers are never accessed when
796 * SQLOCK is held.
797 */
798 ASSERT(qp->q_syncq == _RD(qp)->q_syncq);
799 mutex_enter(SQLOCK(qp->q_syncq));
800 qp->q_qinfo = &deadwend;
801 _RD(qp)->q_qinfo = &deadrend;
802 mutex_exit(SQLOCK(qp->q_syncq));
803 }
804 } else {
805 freeq(_RD(qp)); /* free stream head queue pair */
806 }
807
808 mutex_enter(&vp->v_lock);
809 if (stp->sd_iocblk) {
810 if (stp->sd_iocblk != (mblk_t *)-1) {
811 freemsg(stp->sd_iocblk);
812 }
813 stp->sd_iocblk = NULL;
814 }
815 stp->sd_vnode = NULL;
816 vp->v_stream = NULL;
817 mutex_exit(&vp->v_lock);
818 mutex_enter(&stp->sd_lock);
819 freemsg(stp->sd_cmdblk);
820 stp->sd_cmdblk = NULL;
821 stp->sd_flag &= ~STRCLOSE;
822 cv_broadcast(&stp->sd_monitor);
823 mutex_exit(&stp->sd_lock);
824
825 if (freestp)
826 shfree(stp);
827 return (0);
828 }
829
830 static int
831 strsink(queue_t *q, mblk_t *bp)
832 {
833 struct copyresp *resp;
834
835 switch (bp->b_datap->db_type) {
836 case M_FLUSH:
837 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
838 *bp->b_rptr &= ~FLUSHR;
839 bp->b_flag |= MSGNOLOOP;
840 /*
841 * Protect against the driver passing up
842 * messages after it has done a qprocsoff.
843 */
844 if (_OTHERQ(q)->q_next == NULL)
845 freemsg(bp);
846 else
847 qreply(q, bp);
848 } else {
849 freemsg(bp);
850 }
851 break;
852
853 case M_COPYIN:
854 case M_COPYOUT:
855 if (bp->b_cont) {
856 freemsg(bp->b_cont);
857 bp->b_cont = NULL;
858 }
859 bp->b_datap->db_type = M_IOCDATA;
860 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
861 resp = (struct copyresp *)bp->b_rptr;
862 resp->cp_rval = (caddr_t)1; /* failure */
863 /*
864 * Protect against the driver passing up
865 * messages after it has done a qprocsoff.
866 */
867 if (_OTHERQ(q)->q_next == NULL)
868 freemsg(bp);
869 else
870 qreply(q, bp);
871 break;
872
873 case M_IOCTL:
874 if (bp->b_cont) {
875 freemsg(bp->b_cont);
876 bp->b_cont = NULL;
877 }
878 bp->b_datap->db_type = M_IOCNAK;
879 /*
880 * Protect against the driver passing up
881 * messages after it has done a qprocsoff.
882 */
883 if (_OTHERQ(q)->q_next == NULL)
884 freemsg(bp);
885 else
886 qreply(q, bp);
887 break;
888
889 default:
890 freemsg(bp);
891 break;
892 }
893
894 return (0);
895 }
896
897 /*
898 * Clean up after a process when it closes a stream. This is called
899 * from closef for all closes, whereas strclose is called only for the
900 * last close on a stream. The siglist is scanned for entries for the
901 * current process, and these are removed.
902 */
903 void
904 strclean(struct vnode *vp)
905 {
906 strsig_t *ssp, *pssp, *tssp;
907 stdata_t *stp;
908 int update = 0;
909
910 TRACE_1(TR_FAC_STREAMS_FR,
911 TR_STRCLEAN, "strclean:%p", vp);
912 stp = vp->v_stream;
913 pssp = NULL;
914 mutex_enter(&stp->sd_lock);
915 ssp = stp->sd_siglist;
916 while (ssp) {
917 if (ssp->ss_pidp == curproc->p_pidp) {
918 tssp = ssp->ss_next;
919 if (pssp)
920 pssp->ss_next = tssp;
921 else
922 stp->sd_siglist = tssp;
923 mutex_enter(&pidlock);
924 PID_RELE(ssp->ss_pidp);
925 mutex_exit(&pidlock);
926 kmem_free(ssp, sizeof (strsig_t));
927 update = 1;
928 ssp = tssp;
929 } else {
930 pssp = ssp;
931 ssp = ssp->ss_next;
932 }
933 }
934 if (update) {
935 stp->sd_sigflags = 0;
936 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
937 stp->sd_sigflags |= ssp->ss_events;
938 }
939 mutex_exit(&stp->sd_lock);
940 }
941
942 /*
943 * Used on the last close to remove any remaining items on the siglist.
944 * These could be present on the siglist due to I_ESETSIG calls that
945 * use process groups or processed that do not have an open file descriptor
946 * for this stream (Such entries would not be removed by strclean).
947 */
948 static void
949 strcleanall(struct vnode *vp)
950 {
951 strsig_t *ssp, *nssp;
952 stdata_t *stp;
953
954 stp = vp->v_stream;
955 mutex_enter(&stp->sd_lock);
956 ssp = stp->sd_siglist;
957 stp->sd_siglist = NULL;
958 while (ssp) {
959 nssp = ssp->ss_next;
960 mutex_enter(&pidlock);
961 PID_RELE(ssp->ss_pidp);
962 mutex_exit(&pidlock);
963 kmem_free(ssp, sizeof (strsig_t));
964 ssp = nssp;
965 }
966 stp->sd_sigflags = 0;
967 mutex_exit(&stp->sd_lock);
968 }
969
970 /*
971 * Retrieve the next message from the logical stream head read queue
972 * using either rwnext (if sync stream) or getq_noenab.
973 * It is the callers responsibility to call qbackenable after
974 * it is finished with the message. The caller should not call
975 * qbackenable until after any putback calls to avoid spurious backenabling.
976 */
977 mblk_t *
978 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
979 int *errorp)
980 {
981 mblk_t *bp;
982 int error;
983 ssize_t rbytes = 0;
984
985 /* Holding sd_lock prevents the read queue from changing */
986 ASSERT(MUTEX_HELD(&stp->sd_lock));
987
988 if (uiop != NULL && stp->sd_struiordq != NULL &&
989 q->q_first == NULL &&
990 (!first || (stp->sd_wakeq & RSLEEP))) {
991 /*
992 * Stream supports rwnext() for the read side.
993 * If this is the first time we're called by e.g. strread
994 * only do the downcall if there is a deferred wakeup
995 * (registered in sd_wakeq).
996 */
997 struiod_t uiod;
998
999 if (first)
1000 stp->sd_wakeq &= ~RSLEEP;
1001
1002 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
1003 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
1004 uiod.d_mp = 0;
1005 /*
1006 * Mark that a thread is in rwnext on the read side
1007 * to prevent strrput from nacking ioctls immediately.
1008 * When the last concurrent rwnext returns
1009 * the ioctls are nack'ed.
1010 */
1011 ASSERT(MUTEX_HELD(&stp->sd_lock));
1012 stp->sd_struiodnak++;
1013 /*
1014 * Note: rwnext will drop sd_lock.
1015 */
1016 error = rwnext(q, &uiod);
1017 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
1018 mutex_enter(&stp->sd_lock);
1019 stp->sd_struiodnak--;
1020 while (stp->sd_struiodnak == 0 &&
1021 ((bp = stp->sd_struionak) != NULL)) {
1022 stp->sd_struionak = bp->b_next;
1023 bp->b_next = NULL;
1024 bp->b_datap->db_type = M_IOCNAK;
1025 /*
1026 * Protect against the driver passing up
1027 * messages after it has done a qprocsoff.
1028 */
1029 if (_OTHERQ(q)->q_next == NULL)
1030 freemsg(bp);
1031 else {
1032 mutex_exit(&stp->sd_lock);
1033 qreply(q, bp);
1034 mutex_enter(&stp->sd_lock);
1035 }
1036 }
1037 ASSERT(MUTEX_HELD(&stp->sd_lock));
1038 if (error == 0 || error == EWOULDBLOCK) {
1039 if ((bp = uiod.d_mp) != NULL) {
1040 *errorp = 0;
1041 ASSERT(MUTEX_HELD(&stp->sd_lock));
1042 return (bp);
1043 }
1044 error = 0;
1045 } else if (error == EINVAL) {
1046 /*
1047 * The stream plumbing must have
1048 * changed while we were away, so
1049 * just turn off rwnext()s.
1050 */
1051 error = 0;
1052 } else if (error == EBUSY) {
1053 /*
1054 * The module might have data in transit using putnext
1055 * Fall back on waiting + getq.
1056 */
1057 error = 0;
1058 } else {
1059 *errorp = error;
1060 ASSERT(MUTEX_HELD(&stp->sd_lock));
1061 return (NULL);
1062 }
1063 /*
1064 * Try a getq in case a rwnext() generated mblk
1065 * has bubbled up via strrput().
1066 */
1067 }
1068 *errorp = 0;
1069 ASSERT(MUTEX_HELD(&stp->sd_lock));
1070
1071 /*
1072 * If we have a valid uio, try and use this as a guide for how
1073 * many bytes to retrieve from the queue via getq_noenab().
1074 * Doing this can avoid unneccesary counting of overlong
1075 * messages in putback(). We currently only do this for sockets
1076 * and only if there is no sd_rputdatafunc hook.
1077 *
1078 * The sd_rputdatafunc hook transforms the entire message
1079 * before any bytes in it can be given to a client. So, rbytes
1080 * must be 0 if there is a hook.
1081 */
1082 if ((uiop != NULL) && (stp->sd_vnode->v_type == VSOCK) &&
1083 (stp->sd_rputdatafunc == NULL))
1084 rbytes = uiop->uio_resid;
1085
1086 return (getq_noenab(q, rbytes));
1087 }
1088
1089 /*
1090 * Copy out the message pointed to by `bp' into the uio pointed to by `uiop'.
1091 * If the message does not fit in the uio the remainder of it is returned;
1092 * otherwise NULL is returned. Any embedded zero-length mblk_t's are
1093 * consumed, even if uio_resid reaches zero. On error, `*errorp' is set to
1094 * the error code, the message is consumed, and NULL is returned.
1095 */
1096 static mblk_t *
1097 struiocopyout(mblk_t *bp, struct uio *uiop, int *errorp)
1098 {
1099 int error;
1100 ptrdiff_t n;
1101 mblk_t *nbp;
1102
1103 ASSERT(bp->b_wptr >= bp->b_rptr);
1104
1105 do {
1106 if ((n = MIN(uiop->uio_resid, MBLKL(bp))) != 0) {
1107 ASSERT(n > 0);
1108
1109 error = uiomove(bp->b_rptr, n, UIO_READ, uiop);
1110 if (error != 0) {
1111 freemsg(bp);
1112 *errorp = error;
1113 return (NULL);
1114 }
1115 }
1116
1117 bp->b_rptr += n;
1118 while (bp != NULL && (bp->b_rptr >= bp->b_wptr)) {
1119 nbp = bp;
1120 bp = bp->b_cont;
1121 freeb(nbp);
1122 }
1123 } while (bp != NULL && uiop->uio_resid > 0);
1124
1125 *errorp = 0;
1126 return (bp);
1127 }
1128
1129 /*
1130 * Read a stream according to the mode flags in sd_flag:
1131 *
1132 * (default mode) - Byte stream, msg boundaries are ignored
1133 * RD_MSGDIS (msg discard) - Read on msg boundaries and throw away
1134 * any data remaining in msg
1135 * RD_MSGNODIS (msg non-discard) - Read on msg boundaries and put back
1136 * any remaining data on head of read queue
1137 *
1138 * Consume readable messages on the front of the queue until
1139 * ttolwp(curthread)->lwp_count
1140 * is satisfied, the readable messages are exhausted, or a message
1141 * boundary is reached in a message mode. If no data was read and
1142 * the stream was not opened with the NDELAY flag, block until data arrives.
1143 * Otherwise return the data read and update the count.
1144 *
1145 * In default mode a 0 length message signifies end-of-file and terminates
1146 * a read in progress. The 0 length message is removed from the queue
1147 * only if it is the only message read (no data is read).
1148 *
1149 * An attempt to read an M_PROTO or M_PCPROTO message results in an
1150 * EBADMSG error return, unless either RD_PROTDAT or RD_PROTDIS are set.
1151 * If RD_PROTDAT is set, M_PROTO and M_PCPROTO messages are read as data.
1152 * If RD_PROTDIS is set, the M_PROTO and M_PCPROTO parts of the message
1153 * are unlinked from and M_DATA blocks in the message, the protos are
1154 * thrown away, and the data is read.
1155 */
1156 /* ARGSUSED */
1157 int
1158 strread(struct vnode *vp, struct uio *uiop, cred_t *crp)
1159 {
1160 struct stdata *stp;
1161 mblk_t *bp, *nbp;
1162 queue_t *q;
1163 int error = 0;
1164 uint_t old_sd_flag;
1165 int first;
1166 char rflg;
1167 uint_t mark; /* Contains MSG*MARK and _LASTMARK */
1168 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */
1169 short delim;
1170 unsigned char pri = 0;
1171 char waitflag;
1172 unsigned char type;
1173
1174 TRACE_1(TR_FAC_STREAMS_FR,
1175 TR_STRREAD_ENTER, "strread:%p", vp);
1176 ASSERT(vp->v_stream);
1177 stp = vp->v_stream;
1178
1179 mutex_enter(&stp->sd_lock);
1180
1181 if ((error = i_straccess(stp, JCREAD)) != 0) {
1182 mutex_exit(&stp->sd_lock);
1183 return (error);
1184 }
1185
1186 if (stp->sd_flag & (STRDERR|STPLEX)) {
1187 error = strgeterr(stp, STRDERR|STPLEX, 0);
1188 if (error != 0) {
1189 mutex_exit(&stp->sd_lock);
1190 return (error);
1191 }
1192 }
1193
1194 /*
1195 * Loop terminates when uiop->uio_resid == 0.
1196 */
1197 rflg = 0;
1198 waitflag = READWAIT;
1199 q = _RD(stp->sd_wrq);
1200 for (;;) {
1201 ASSERT(MUTEX_HELD(&stp->sd_lock));
1202 old_sd_flag = stp->sd_flag;
1203 mark = 0;
1204 delim = 0;
1205 first = 1;
1206 while ((bp = strget(stp, q, uiop, first, &error)) == NULL) {
1207 int done = 0;
1208
1209 ASSERT(MUTEX_HELD(&stp->sd_lock));
1210
1211 if (error != 0)
1212 goto oops;
1213
1214 if (stp->sd_flag & (STRHUP|STREOF)) {
1215 goto oops;
1216 }
1217 if (rflg && !(stp->sd_flag & STRDELIM)) {
1218 goto oops;
1219 }
1220 /*
1221 * If a read(fd,buf,0) has been done, there is no
1222 * need to sleep. We always have zero bytes to
1223 * return.
1224 */
1225 if (uiop->uio_resid == 0) {
1226 goto oops;
1227 }
1228
1229 qbackenable(q, 0);
1230
1231 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_WAIT,
1232 "strread calls strwaitq:%p, %p, %p",
1233 vp, uiop, crp);
1234 if ((error = strwaitq(stp, waitflag, uiop->uio_resid,
1235 uiop->uio_fmode, -1, &done)) != 0 || done) {
1236 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_DONE,
1237 "strread error or done:%p, %p, %p",
1238 vp, uiop, crp);
1239 if ((uiop->uio_fmode & FNDELAY) &&
1240 (stp->sd_flag & OLDNDELAY) &&
1241 (error == EAGAIN))
1242 error = 0;
1243 goto oops;
1244 }
1245 TRACE_3(TR_FAC_STREAMS_FR, TR_STRREAD_AWAKE,
1246 "strread awakes:%p, %p, %p", vp, uiop, crp);
1247 if ((error = i_straccess(stp, JCREAD)) != 0) {
1248 goto oops;
1249 }
1250 first = 0;
1251 }
1252
1253 ASSERT(MUTEX_HELD(&stp->sd_lock));
1254 ASSERT(bp);
1255 pri = bp->b_band;
1256 /*
1257 * Extract any mark information. If the message is not
1258 * completely consumed this information will be put in the mblk
1259 * that is putback.
1260 * If MSGMARKNEXT is set and the message is completely consumed
1261 * the STRATMARK flag will be set below. Likewise, if
1262 * MSGNOTMARKNEXT is set and the message is
1263 * completely consumed STRNOTATMARK will be set.
1264 *
1265 * For some unknown reason strread only breaks the read at the
1266 * last mark.
1267 */
1268 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
1269 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
1270 (MSGMARKNEXT|MSGNOTMARKNEXT));
1271 if (mark != 0 && bp == stp->sd_mark) {
1272 if (rflg) {
1273 putback(stp, q, bp, pri);
1274 goto oops;
1275 }
1276 mark |= _LASTMARK;
1277 stp->sd_mark = NULL;
1278 }
1279 if ((stp->sd_flag & STRDELIM) && (bp->b_flag & MSGDELIM))
1280 delim = 1;
1281 mutex_exit(&stp->sd_lock);
1282
1283 if (STREAM_NEEDSERVICE(stp))
1284 stream_runservice(stp);
1285
1286 type = bp->b_datap->db_type;
1287
1288 switch (type) {
1289
1290 case M_DATA:
1291 ismdata:
1292 if (msgnodata(bp)) {
1293 if (mark || delim) {
1294 freemsg(bp);
1295 } else if (rflg) {
1296
1297 /*
1298 * If already read data put zero
1299 * length message back on queue else
1300 * free msg and return 0.
1301 */
1302 bp->b_band = pri;
1303 mutex_enter(&stp->sd_lock);
1304 putback(stp, q, bp, pri);
1305 mutex_exit(&stp->sd_lock);
1306 } else {
1307 freemsg(bp);
1308 }
1309 error = 0;
1310 goto oops1;
1311 }
1312
1313 rflg = 1;
1314 waitflag |= NOINTR;
1315 bp = struiocopyout(bp, uiop, &error);
1316 if (error != 0)
1317 goto oops1;
1318
1319 mutex_enter(&stp->sd_lock);
1320 if (bp) {
1321 /*
1322 * Have remaining data in message.
1323 * Free msg if in discard mode.
1324 */
1325 if (stp->sd_read_opt & RD_MSGDIS) {
1326 freemsg(bp);
1327 } else {
1328 bp->b_band = pri;
1329 if ((mark & _LASTMARK) &&
1330 (stp->sd_mark == NULL))
1331 stp->sd_mark = bp;
1332 bp->b_flag |= mark & ~_LASTMARK;
1333 if (delim)
1334 bp->b_flag |= MSGDELIM;
1335 if (msgnodata(bp))
1336 freemsg(bp);
1337 else
1338 putback(stp, q, bp, pri);
1339 }
1340 } else {
1341 /*
1342 * Consumed the complete message.
1343 * Move the MSG*MARKNEXT information
1344 * to the stream head just in case
1345 * the read queue becomes empty.
1346 *
1347 * If the stream head was at the mark
1348 * (STRATMARK) before we dropped sd_lock above
1349 * and some data was consumed then we have
1350 * moved past the mark thus STRATMARK is
1351 * cleared. However, if a message arrived in
1352 * strrput during the copyout above causing
1353 * STRATMARK to be set we can not clear that
1354 * flag.
1355 */
1356 if (mark &
1357 (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
1358 if (mark & MSGMARKNEXT) {
1359 stp->sd_flag &= ~STRNOTATMARK;
1360 stp->sd_flag |= STRATMARK;
1361 } else if (mark & MSGNOTMARKNEXT) {
1362 stp->sd_flag &= ~STRATMARK;
1363 stp->sd_flag |= STRNOTATMARK;
1364 } else {
1365 stp->sd_flag &=
1366 ~(STRATMARK|STRNOTATMARK);
1367 }
1368 } else if (rflg && (old_sd_flag & STRATMARK)) {
1369 stp->sd_flag &= ~STRATMARK;
1370 }
1371 }
1372
1373 /*
1374 * Check for signal messages at the front of the read
1375 * queue and generate the signal(s) if appropriate.
1376 * The only signal that can be on queue is M_SIG at
1377 * this point.
1378 */
1379 while ((((bp = q->q_first)) != NULL) &&
1380 (bp->b_datap->db_type == M_SIG)) {
1381 bp = getq_noenab(q, 0);
1382 /*
1383 * sd_lock is held so the content of the
1384 * read queue can not change.
1385 */
1386 ASSERT(bp != NULL && DB_TYPE(bp) == M_SIG);
1387 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
1388 mutex_exit(&stp->sd_lock);
1389 freemsg(bp);
1390 if (STREAM_NEEDSERVICE(stp))
1391 stream_runservice(stp);
1392 mutex_enter(&stp->sd_lock);
1393 }
1394
1395 if ((uiop->uio_resid == 0) || (mark & _LASTMARK) ||
1396 delim ||
1397 (stp->sd_read_opt & (RD_MSGDIS|RD_MSGNODIS))) {
1398 goto oops;
1399 }
1400 continue;
1401
1402 case M_SIG:
1403 strsignal(stp, *bp->b_rptr, (int32_t)bp->b_band);
1404 freemsg(bp);
1405 mutex_enter(&stp->sd_lock);
1406 continue;
1407
1408 case M_PROTO:
1409 case M_PCPROTO:
1410 /*
1411 * Only data messages are readable.
1412 * Any others generate an error, unless
1413 * RD_PROTDIS or RD_PROTDAT is set.
1414 */
1415 if (stp->sd_read_opt & RD_PROTDAT) {
1416 for (nbp = bp; nbp; nbp = nbp->b_next) {
1417 if ((nbp->b_datap->db_type ==
1418 M_PROTO) ||
1419 (nbp->b_datap->db_type ==
1420 M_PCPROTO)) {
1421 nbp->b_datap->db_type = M_DATA;
1422 } else {
1423 break;
1424 }
1425 }
1426 /*
1427 * clear stream head hi pri flag based on
1428 * first message
1429 */
1430 if (type == M_PCPROTO) {
1431 mutex_enter(&stp->sd_lock);
1432 stp->sd_flag &= ~STRPRI;
1433 mutex_exit(&stp->sd_lock);
1434 }
1435 goto ismdata;
1436 } else if (stp->sd_read_opt & RD_PROTDIS) {
1437 /*
1438 * discard non-data messages
1439 */
1440 while (bp &&
1441 ((bp->b_datap->db_type == M_PROTO) ||
1442 (bp->b_datap->db_type == M_PCPROTO))) {
1443 nbp = unlinkb(bp);
1444 freeb(bp);
1445 bp = nbp;
1446 }
1447 /*
1448 * clear stream head hi pri flag based on
1449 * first message
1450 */
1451 if (type == M_PCPROTO) {
1452 mutex_enter(&stp->sd_lock);
1453 stp->sd_flag &= ~STRPRI;
1454 mutex_exit(&stp->sd_lock);
1455 }
1456 if (bp) {
1457 bp->b_band = pri;
1458 goto ismdata;
1459 } else {
1460 break;
1461 }
1462 }
1463 /* FALLTHRU */
1464 case M_PASSFP:
1465 if ((bp->b_datap->db_type == M_PASSFP) &&
1466 (stp->sd_read_opt & RD_PROTDIS)) {
1467 freemsg(bp);
1468 break;
1469 }
1470 mutex_enter(&stp->sd_lock);
1471 putback(stp, q, bp, pri);
1472 mutex_exit(&stp->sd_lock);
1473 if (rflg == 0)
1474 error = EBADMSG;
1475 goto oops1;
1476
1477 default:
1478 /*
1479 * Garbage on stream head read queue.
1480 */
1481 cmn_err(CE_WARN, "bad %x found at stream head\n",
1482 bp->b_datap->db_type);
1483 freemsg(bp);
1484 goto oops1;
1485 }
1486 mutex_enter(&stp->sd_lock);
1487 }
1488 oops:
1489 mutex_exit(&stp->sd_lock);
1490 oops1:
1491 qbackenable(q, pri);
1492 return (error);
1493 #undef _LASTMARK
1494 }
1495
1496 /*
1497 * Default processing of M_PROTO/M_PCPROTO messages.
1498 * Determine which wakeups and signals are needed.
1499 * This can be replaced by a user-specified procedure for kernel users
1500 * of STREAMS.
1501 */
1502 /* ARGSUSED */
1503 mblk_t *
1504 strrput_proto(vnode_t *vp, mblk_t *mp,
1505 strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1506 strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1507 {
1508 *wakeups = RSLEEP;
1509 *allmsgsigs = 0;
1510
1511 switch (mp->b_datap->db_type) {
1512 case M_PROTO:
1513 if (mp->b_band == 0) {
1514 *firstmsgsigs = S_INPUT | S_RDNORM;
1515 *pollwakeups = POLLIN | POLLRDNORM;
1516 } else {
1517 *firstmsgsigs = S_INPUT | S_RDBAND;
1518 *pollwakeups = POLLIN | POLLRDBAND;
1519 }
1520 break;
1521 case M_PCPROTO:
1522 *firstmsgsigs = S_HIPRI;
1523 *pollwakeups = POLLPRI;
1524 break;
1525 }
1526 return (mp);
1527 }
1528
1529 /*
1530 * Default processing of everything but M_DATA, M_PROTO, M_PCPROTO and
1531 * M_PASSFP messages.
1532 * Determine which wakeups and signals are needed.
1533 * This can be replaced by a user-specified procedure for kernel users
1534 * of STREAMS.
1535 */
1536 /* ARGSUSED */
1537 mblk_t *
1538 strrput_misc(vnode_t *vp, mblk_t *mp,
1539 strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1540 strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1541 {
1542 *wakeups = 0;
1543 *firstmsgsigs = 0;
1544 *allmsgsigs = 0;
1545 *pollwakeups = 0;
1546 return (mp);
1547 }
1548
1549 /*
1550 * Stream read put procedure. Called from downstream driver/module
1551 * with messages for the stream head. Data, protocol, and in-stream
1552 * signal messages are placed on the queue, others are handled directly.
1553 */
1554 int
1555 strrput(queue_t *q, mblk_t *bp)
1556 {
1557 struct stdata *stp;
1558 ulong_t rput_opt;
1559 strwakeup_t wakeups;
1560 strsigset_t firstmsgsigs; /* Signals if first message on queue */
1561 strsigset_t allmsgsigs; /* Signals for all messages */
1562 strsigset_t signals; /* Signals events to generate */
1563 strpollset_t pollwakeups;
1564 mblk_t *nextbp;
1565 uchar_t band = 0;
1566 int hipri_sig;
1567
1568 stp = (struct stdata *)q->q_ptr;
1569 /*
1570 * Use rput_opt for optimized access to the SR_ flags except
1571 * SR_POLLIN. That flag has to be checked under sd_lock since it
1572 * is modified by strpoll().
1573 */
1574 rput_opt = stp->sd_rput_opt;
1575
1576 ASSERT(qclaimed(q));
1577 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_ENTER,
1578 "strrput called with message type:q %p bp %p", q, bp);
1579
1580 /*
1581 * Perform initial processing and pass to the parameterized functions.
1582 */
1583 ASSERT(bp->b_next == NULL);
1584
1585 switch (bp->b_datap->db_type) {
1586 case M_DATA:
1587 /*
1588 * sockfs is the only consumer of STREOF and when it is set,
1589 * it implies that the receiver is not interested in receiving
1590 * any more data, hence the mblk is freed to prevent unnecessary
1591 * message queueing at the stream head.
1592 */
1593 if (stp->sd_flag == STREOF) {
1594 freemsg(bp);
1595 return (0);
1596 }
1597 if ((rput_opt & SR_IGN_ZEROLEN) &&
1598 bp->b_rptr == bp->b_wptr && msgnodata(bp)) {
1599 /*
1600 * Ignore zero-length M_DATA messages. These might be
1601 * generated by some transports.
1602 * The zero-length M_DATA messages, even if they
1603 * are ignored, should effect the atmark tracking and
1604 * should wake up a thread sleeping in strwaitmark.
1605 */
1606 mutex_enter(&stp->sd_lock);
1607 if (bp->b_flag & MSGMARKNEXT) {
1608 /*
1609 * Record the position of the mark either
1610 * in q_last or in STRATMARK.
1611 */
1612 if (q->q_last != NULL) {
1613 q->q_last->b_flag &= ~MSGNOTMARKNEXT;
1614 q->q_last->b_flag |= MSGMARKNEXT;
1615 } else {
1616 stp->sd_flag &= ~STRNOTATMARK;
1617 stp->sd_flag |= STRATMARK;
1618 }
1619 } else if (bp->b_flag & MSGNOTMARKNEXT) {
1620 /*
1621 * Record that this is not the position of
1622 * the mark either in q_last or in
1623 * STRNOTATMARK.
1624 */
1625 if (q->q_last != NULL) {
1626 q->q_last->b_flag &= ~MSGMARKNEXT;
1627 q->q_last->b_flag |= MSGNOTMARKNEXT;
1628 } else {
1629 stp->sd_flag &= ~STRATMARK;
1630 stp->sd_flag |= STRNOTATMARK;
1631 }
1632 }
1633 if (stp->sd_flag & RSLEEP) {
1634 stp->sd_flag &= ~RSLEEP;
1635 cv_broadcast(&q->q_wait);
1636 }
1637 mutex_exit(&stp->sd_lock);
1638 freemsg(bp);
1639 return (0);
1640 }
1641 wakeups = RSLEEP;
1642 if (bp->b_band == 0) {
1643 firstmsgsigs = S_INPUT | S_RDNORM;
1644 pollwakeups = POLLIN | POLLRDNORM;
1645 } else {
1646 firstmsgsigs = S_INPUT | S_RDBAND;
1647 pollwakeups = POLLIN | POLLRDBAND;
1648 }
1649 if (rput_opt & SR_SIGALLDATA)
1650 allmsgsigs = firstmsgsigs;
1651 else
1652 allmsgsigs = 0;
1653
1654 mutex_enter(&stp->sd_lock);
1655 if ((rput_opt & SR_CONSOL_DATA) &&
1656 (q->q_last != NULL) &&
1657 (bp->b_flag & (MSGMARK|MSGDELIM)) == 0) {
1658 /*
1659 * Consolidate an M_DATA message onto an M_DATA,
1660 * M_PROTO, or M_PCPROTO by merging it with q_last.
1661 * The consolidation does not take place if
1662 * the old message is marked with either of the
1663 * marks or the delim flag or if the new
1664 * message is marked with MSGMARK. The MSGMARK
1665 * check is needed to handle the odd semantics of
1666 * MSGMARK where essentially the whole message
1667 * is to be treated as marked.
1668 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from the
1669 * new message to the front of the b_cont chain.
1670 */
1671 mblk_t *lbp = q->q_last;
1672 unsigned char db_type = lbp->b_datap->db_type;
1673
1674 if ((db_type == M_DATA || db_type == M_PROTO ||
1675 db_type == M_PCPROTO) &&
1676 !(lbp->b_flag & (MSGDELIM|MSGMARK|MSGMARKNEXT))) {
1677 rmvq_noenab(q, lbp);
1678 /*
1679 * The first message in the b_cont list
1680 * tracks MSGMARKNEXT and MSGNOTMARKNEXT.
1681 * We need to handle the case where we
1682 * are appending:
1683 *
1684 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT.
1685 * 2) a MSGMARKNEXT to a plain message.
1686 * 3) a MSGNOTMARKNEXT to a plain message
1687 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT
1688 * message.
1689 *
1690 * Thus we never append a MSGMARKNEXT or
1691 * MSGNOTMARKNEXT to a MSGMARKNEXT message.
1692 */
1693 if (bp->b_flag & MSGMARKNEXT) {
1694 lbp->b_flag |= MSGMARKNEXT;
1695 lbp->b_flag &= ~MSGNOTMARKNEXT;
1696 bp->b_flag &= ~MSGMARKNEXT;
1697 } else if (bp->b_flag & MSGNOTMARKNEXT) {
1698 lbp->b_flag |= MSGNOTMARKNEXT;
1699 bp->b_flag &= ~MSGNOTMARKNEXT;
1700 }
1701
1702 linkb(lbp, bp);
1703 bp = lbp;
1704 /*
1705 * The new message logically isn't the first
1706 * even though the q_first check below thinks
1707 * it is. Clear the firstmsgsigs to make it
1708 * not appear to be first.
1709 */
1710 firstmsgsigs = 0;
1711 }
1712 }
1713 break;
1714
1715 case M_PASSFP:
1716 wakeups = RSLEEP;
1717 allmsgsigs = 0;
1718 if (bp->b_band == 0) {
1719 firstmsgsigs = S_INPUT | S_RDNORM;
1720 pollwakeups = POLLIN | POLLRDNORM;
1721 } else {
1722 firstmsgsigs = S_INPUT | S_RDBAND;
1723 pollwakeups = POLLIN | POLLRDBAND;
1724 }
1725 mutex_enter(&stp->sd_lock);
1726 break;
1727
1728 case M_PROTO:
1729 case M_PCPROTO:
1730 ASSERT(stp->sd_rprotofunc != NULL);
1731 bp = (stp->sd_rprotofunc)(stp->sd_vnode, bp,
1732 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1733 #define ALLSIG (S_INPUT|S_HIPRI|S_OUTPUT|S_MSG|S_ERROR|S_HANGUP|S_RDNORM|\
1734 S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)
1735 #define ALLPOLL (POLLIN|POLLPRI|POLLOUT|POLLRDNORM|POLLWRNORM|POLLRDBAND|\
1736 POLLWRBAND)
1737
1738 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1739 ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1740 ASSERT((allmsgsigs & ~ALLSIG) == 0);
1741 ASSERT((pollwakeups & ~ALLPOLL) == 0);
1742
1743 mutex_enter(&stp->sd_lock);
1744 break;
1745
1746 default:
1747 ASSERT(stp->sd_rmiscfunc != NULL);
1748 bp = (stp->sd_rmiscfunc)(stp->sd_vnode, bp,
1749 &wakeups, &firstmsgsigs, &allmsgsigs, &pollwakeups);
1750 ASSERT((wakeups & ~(RSLEEP|WSLEEP)) == 0);
1751 ASSERT((firstmsgsigs & ~ALLSIG) == 0);
1752 ASSERT((allmsgsigs & ~ALLSIG) == 0);
1753 ASSERT((pollwakeups & ~ALLPOLL) == 0);
1754 #undef ALLSIG
1755 #undef ALLPOLL
1756 mutex_enter(&stp->sd_lock);
1757 break;
1758 }
1759 ASSERT(MUTEX_HELD(&stp->sd_lock));
1760
1761 /* By default generate superset of signals */
1762 signals = (firstmsgsigs | allmsgsigs);
1763
1764 /*
1765 * The proto and misc functions can return multiple messages
1766 * as a b_next chain. Such messages are processed separately.
1767 */
1768 one_more:
1769 hipri_sig = 0;
1770 if (bp == NULL) {
1771 nextbp = NULL;
1772 } else {
1773 nextbp = bp->b_next;
1774 bp->b_next = NULL;
1775
1776 switch (bp->b_datap->db_type) {
1777 case M_PCPROTO:
1778 /*
1779 * Only one priority protocol message is allowed at the
1780 * stream head at a time.
1781 */
1782 if (stp->sd_flag & STRPRI) {
1783 TRACE_0(TR_FAC_STREAMS_FR, TR_STRRPUT_PROTERR,
1784 "M_PCPROTO already at head");
1785 freemsg(bp);
1786 mutex_exit(&stp->sd_lock);
1787 goto done;
1788 }
1789 stp->sd_flag |= STRPRI;
1790 hipri_sig = 1;
1791 /* FALLTHRU */
1792 case M_DATA:
1793 case M_PROTO:
1794 case M_PASSFP:
1795 band = bp->b_band;
1796 /*
1797 * Marking doesn't work well when messages
1798 * are marked in more than one band. We only
1799 * remember the last message received, even if
1800 * it is placed on the queue ahead of other
1801 * marked messages.
1802 */
1803 if (bp->b_flag & MSGMARK)
1804 stp->sd_mark = bp;
1805 (void) putq(q, bp);
1806
1807 /*
1808 * If message is a PCPROTO message, always use
1809 * firstmsgsigs to determine if a signal should be
1810 * sent as strrput is the only place to send
1811 * signals for PCPROTO. Other messages are based on
1812 * the STRGETINPROG flag. The flag determines if
1813 * strrput or (k)strgetmsg will be responsible for
1814 * sending the signals, in the firstmsgsigs case.
1815 */
1816 if ((hipri_sig == 1) ||
1817 (((stp->sd_flag & STRGETINPROG) == 0) &&
1818 (q->q_first == bp)))
1819 signals = (firstmsgsigs | allmsgsigs);
1820 else
1821 signals = allmsgsigs;
1822 break;
1823
1824 default:
1825 mutex_exit(&stp->sd_lock);
1826 (void) strrput_nondata(q, bp);
1827 mutex_enter(&stp->sd_lock);
1828 break;
1829 }
1830 }
1831 ASSERT(MUTEX_HELD(&stp->sd_lock));
1832 /*
1833 * Wake sleeping read/getmsg and cancel deferred wakeup
1834 */
1835 if (wakeups & RSLEEP)
1836 stp->sd_wakeq &= ~RSLEEP;
1837
1838 wakeups &= stp->sd_flag;
1839 if (wakeups & RSLEEP) {
1840 stp->sd_flag &= ~RSLEEP;
1841 cv_broadcast(&q->q_wait);
1842 }
1843 if (wakeups & WSLEEP) {
1844 stp->sd_flag &= ~WSLEEP;
1845 cv_broadcast(&_WR(q)->q_wait);
1846 }
1847
1848 if (pollwakeups != 0) {
1849 if (pollwakeups == (POLLIN | POLLRDNORM)) {
1850 /*
1851 * Can't use rput_opt since it was not
1852 * read when sd_lock was held and SR_POLLIN is changed
1853 * by strpoll() under sd_lock.
1854 */
1855 if (!(stp->sd_rput_opt & SR_POLLIN))
1856 goto no_pollwake;
1857 stp->sd_rput_opt &= ~SR_POLLIN;
1858 }
1859 mutex_exit(&stp->sd_lock);
1860 pollwakeup(&stp->sd_pollist, pollwakeups);
1861 mutex_enter(&stp->sd_lock);
1862 }
1863 no_pollwake:
1864
1865 /*
1866 * strsendsig can handle multiple signals with a
1867 * single call.
1868 */
1869 if (stp->sd_sigflags & signals)
1870 strsendsig(stp->sd_siglist, signals, band, 0);
1871 mutex_exit(&stp->sd_lock);
1872
1873
1874 done:
1875 if (nextbp == NULL)
1876 return (0);
1877
1878 /*
1879 * Any signals were handled the first time.
1880 * Wakeups and pollwakeups are redone to avoid any race
1881 * conditions - all the messages are not queued until the
1882 * last message has been processed by strrput.
1883 */
1884 bp = nextbp;
1885 signals = firstmsgsigs = allmsgsigs = 0;
1886 mutex_enter(&stp->sd_lock);
1887 goto one_more;
1888 }
1889
1890 static void
1891 log_dupioc(queue_t *rq, mblk_t *bp)
1892 {
1893 queue_t *wq, *qp;
1894 char *modnames, *mnp, *dname;
1895 size_t maxmodstr;
1896 boolean_t islast;
1897
1898 /*
1899 * Allocate a buffer large enough to hold the names of nstrpush modules
1900 * and one driver, with spaces between and NUL terminator. If we can't
1901 * get memory, then we'll just log the driver name.
1902 */
1903 maxmodstr = nstrpush * (FMNAMESZ + 1);
1904 mnp = modnames = kmem_alloc(maxmodstr, KM_NOSLEEP);
1905
1906 /* march down write side to print log message down to the driver */
1907 wq = WR(rq);
1908
1909 /* make sure q_next doesn't shift around while we're grabbing data */
1910 claimstr(wq);
1911 qp = wq->q_next;
1912 do {
1913 dname = Q2NAME(qp);
1914 islast = !SAMESTR(qp) || qp->q_next == NULL;
1915 if (modnames == NULL) {
1916 /*
1917 * If we don't have memory, then get the driver name in
1918 * the log where we can see it. Note that memory
1919 * pressure is a possible cause of these sorts of bugs.
1920 */
1921 if (islast) {
1922 modnames = dname;
1923 maxmodstr = 0;
1924 }
1925 } else {
1926 mnp += snprintf(mnp, FMNAMESZ + 1, "%s", dname);
1927 if (!islast)
1928 *mnp++ = ' ';
1929 }
1930 qp = qp->q_next;
1931 } while (!islast);
1932 releasestr(wq);
1933 /* Cannot happen unless stream head is corrupt. */
1934 ASSERT(modnames != NULL);
1935 (void) strlog(rq->q_qinfo->qi_minfo->mi_idnum, 0, 1,
1936 SL_CONSOLE|SL_TRACE|SL_ERROR,
1937 "Warning: stream %p received duplicate %X M_IOC%s; module list: %s",
1938 rq->q_ptr, ((struct iocblk *)bp->b_rptr)->ioc_cmd,
1939 (DB_TYPE(bp) == M_IOCACK ? "ACK" : "NAK"), modnames);
1940 if (maxmodstr != 0)
1941 kmem_free(modnames, maxmodstr);
1942 }
1943
1944 int
1945 strrput_nondata(queue_t *q, mblk_t *bp)
1946 {
1947 struct stdata *stp;
1948 struct iocblk *iocbp;
1949 struct stroptions *sop;
1950 struct copyreq *reqp;
1951 struct copyresp *resp;
1952 unsigned char bpri;
1953 unsigned char flushed_already = 0;
1954
1955 stp = (struct stdata *)q->q_ptr;
1956
1957 ASSERT(!(stp->sd_flag & STPLEX));
1958 ASSERT(qclaimed(q));
1959
1960 switch (bp->b_datap->db_type) {
1961 case M_ERROR:
1962 /*
1963 * An error has occurred downstream, the errno is in the first
1964 * bytes of the message.
1965 */
1966 if ((bp->b_wptr - bp->b_rptr) == 2) { /* New flavor */
1967 unsigned char rw = 0;
1968
1969 mutex_enter(&stp->sd_lock);
1970 if (*bp->b_rptr != NOERROR) { /* read error */
1971 if (*bp->b_rptr != 0) {
1972 if (stp->sd_flag & STRDERR)
1973 flushed_already |= FLUSHR;
1974 stp->sd_flag |= STRDERR;
1975 rw |= FLUSHR;
1976 } else {
1977 stp->sd_flag &= ~STRDERR;
1978 }
1979 stp->sd_rerror = *bp->b_rptr;
1980 }
1981 bp->b_rptr++;
1982 if (*bp->b_rptr != NOERROR) { /* write error */
1983 if (*bp->b_rptr != 0) {
1984 if (stp->sd_flag & STWRERR)
1985 flushed_already |= FLUSHW;
1986 stp->sd_flag |= STWRERR;
1987 rw |= FLUSHW;
1988 } else {
1989 stp->sd_flag &= ~STWRERR;
1990 }
1991 stp->sd_werror = *bp->b_rptr;
1992 }
1993 if (rw) {
1994 TRACE_2(TR_FAC_STREAMS_FR, TR_STRRPUT_WAKE,
1995 "strrput cv_broadcast:q %p, bp %p",
1996 q, bp);
1997 cv_broadcast(&q->q_wait); /* readers */
1998 cv_broadcast(&_WR(q)->q_wait); /* writers */
1999 cv_broadcast(&stp->sd_monitor); /* ioctllers */
2000
2001 mutex_exit(&stp->sd_lock);
2002 pollwakeup(&stp->sd_pollist, POLLERR);
2003 mutex_enter(&stp->sd_lock);
2004
2005 if (stp->sd_sigflags & S_ERROR)
2006 strsendsig(stp->sd_siglist, S_ERROR, 0,
2007 ((rw & FLUSHR) ? stp->sd_rerror :
2008 stp->sd_werror));
2009 mutex_exit(&stp->sd_lock);
2010 /*
2011 * Send the M_FLUSH only
2012 * for the first M_ERROR
2013 * message on the stream
2014 */
2015 if (flushed_already == rw) {
2016 freemsg(bp);
2017 return (0);
2018 }
2019
2020 bp->b_datap->db_type = M_FLUSH;
2021 *bp->b_rptr = rw;
2022 bp->b_wptr = bp->b_rptr + 1;
2023 /*
2024 * Protect against the driver
2025 * passing up messages after
2026 * it has done a qprocsoff
2027 */
2028 if (_OTHERQ(q)->q_next == NULL)
2029 freemsg(bp);
2030 else
2031 qreply(q, bp);
2032 return (0);
2033 } else
2034 mutex_exit(&stp->sd_lock);
2035 } else if (*bp->b_rptr != 0) { /* Old flavor */
2036 if (stp->sd_flag & (STRDERR|STWRERR))
2037 flushed_already = FLUSHRW;
2038 mutex_enter(&stp->sd_lock);
2039 stp->sd_flag |= (STRDERR|STWRERR);
2040 stp->sd_rerror = *bp->b_rptr;
2041 stp->sd_werror = *bp->b_rptr;
2042 TRACE_2(TR_FAC_STREAMS_FR,
2043 TR_STRRPUT_WAKE2,
2044 "strrput wakeup #2:q %p, bp %p", q, bp);
2045 cv_broadcast(&q->q_wait); /* the readers */
2046 cv_broadcast(&_WR(q)->q_wait); /* the writers */
2047 cv_broadcast(&stp->sd_monitor); /* ioctllers */
2048
2049 mutex_exit(&stp->sd_lock);
2050 pollwakeup(&stp->sd_pollist, POLLERR);
2051 mutex_enter(&stp->sd_lock);
2052
2053 if (stp->sd_sigflags & S_ERROR)
2054 strsendsig(stp->sd_siglist, S_ERROR, 0,
2055 (stp->sd_werror ? stp->sd_werror :
2056 stp->sd_rerror));
2057 mutex_exit(&stp->sd_lock);
2058
2059 /*
2060 * Send the M_FLUSH only
2061 * for the first M_ERROR
2062 * message on the stream
2063 */
2064 if (flushed_already != FLUSHRW) {
2065 bp->b_datap->db_type = M_FLUSH;
2066 *bp->b_rptr = FLUSHRW;
2067 /*
2068 * Protect against the driver passing up
2069 * messages after it has done a
2070 * qprocsoff.
2071 */
2072 if (_OTHERQ(q)->q_next == NULL)
2073 freemsg(bp);
2074 else
2075 qreply(q, bp);
2076 return (0);
2077 }
2078 }
2079 freemsg(bp);
2080 return (0);
2081
2082 case M_HANGUP:
2083
2084 freemsg(bp);
2085 mutex_enter(&stp->sd_lock);
2086 stp->sd_werror = ENXIO;
2087 stp->sd_flag |= STRHUP;
2088 stp->sd_flag &= ~(WSLEEP|RSLEEP);
2089
2090 /*
2091 * send signal if controlling tty
2092 */
2093
2094 if (stp->sd_sidp) {
2095 prsignal(stp->sd_sidp, SIGHUP);
2096 if (stp->sd_sidp != stp->sd_pgidp)
2097 pgsignal(stp->sd_pgidp, SIGTSTP);
2098 }
2099
2100 /*
2101 * wake up read, write, and exception pollers and
2102 * reset wakeup mechanism.
2103 */
2104 cv_broadcast(&q->q_wait); /* the readers */
2105 cv_broadcast(&_WR(q)->q_wait); /* the writers */
2106 cv_broadcast(&stp->sd_monitor); /* the ioctllers */
2107 strhup(stp);
2108 mutex_exit(&stp->sd_lock);
2109 return (0);
2110
2111 case M_UNHANGUP:
2112 freemsg(bp);
2113 mutex_enter(&stp->sd_lock);
2114 stp->sd_werror = 0;
2115 stp->sd_flag &= ~STRHUP;
2116 mutex_exit(&stp->sd_lock);
2117 return (0);
2118
2119 case M_SIG:
2120 /*
2121 * Someone downstream wants to post a signal. The
2122 * signal to post is contained in the first byte of the
2123 * message. If the message would go on the front of
2124 * the queue, send a signal to the process group
2125 * (if not SIGPOLL) or to the siglist processes
2126 * (SIGPOLL). If something is already on the queue,
2127 * OR if we are delivering a delayed suspend (*sigh*
2128 * another "tty" hack) and there's no one sleeping already,
2129 * just enqueue the message.
2130 */
2131 mutex_enter(&stp->sd_lock);
2132 if (q->q_first || (*bp->b_rptr == SIGTSTP &&
2133 !(stp->sd_flag & RSLEEP))) {
2134 (void) putq(q, bp);
2135 mutex_exit(&stp->sd_lock);
2136 return (0);
2137 }
2138 mutex_exit(&stp->sd_lock);
2139 /* FALLTHRU */
2140
2141 case M_PCSIG:
2142 /*
2143 * Don't enqueue, just post the signal.
2144 */
2145 strsignal(stp, *bp->b_rptr, 0L);
2146 freemsg(bp);
2147 return (0);
2148
2149 case M_CMD:
2150 if (MBLKL(bp) != sizeof (cmdblk_t)) {
2151 freemsg(bp);
2152 return (0);
2153 }
2154
2155 mutex_enter(&stp->sd_lock);
2156 if (stp->sd_flag & STRCMDWAIT) {
2157 ASSERT(stp->sd_cmdblk == NULL);
2158 stp->sd_cmdblk = bp;
2159 cv_broadcast(&stp->sd_monitor);
2160 mutex_exit(&stp->sd_lock);
2161 } else {
2162 mutex_exit(&stp->sd_lock);
2163 freemsg(bp);
2164 }
2165 return (0);
2166
2167 case M_FLUSH:
2168 /*
2169 * Flush queues. The indication of which queues to flush
2170 * is in the first byte of the message. If the read queue
2171 * is specified, then flush it. If FLUSHBAND is set, just
2172 * flush the band specified by the second byte of the message.
2173 *
2174 * If a module has issued a M_SETOPT to not flush hi
2175 * priority messages off of the stream head, then pass this
2176 * flag into the flushq code to preserve such messages.
2177 */
2178
2179 if (*bp->b_rptr & FLUSHR) {
2180 mutex_enter(&stp->sd_lock);
2181 if (*bp->b_rptr & FLUSHBAND) {
2182 ASSERT((bp->b_wptr - bp->b_rptr) >= 2);
2183 flushband(q, *(bp->b_rptr + 1), FLUSHALL);
2184 } else
2185 flushq_common(q, FLUSHALL,
2186 stp->sd_read_opt & RFLUSHPCPROT);
2187 if ((q->q_first == NULL) ||
2188 (q->q_first->b_datap->db_type < QPCTL))
2189 stp->sd_flag &= ~STRPRI;
2190 else {
2191 ASSERT(stp->sd_flag & STRPRI);
2192 }
2193 mutex_exit(&stp->sd_lock);
2194 }
2195 if ((*bp->b_rptr & FLUSHW) && !(bp->b_flag & MSGNOLOOP)) {
2196 *bp->b_rptr &= ~FLUSHR;
2197 bp->b_flag |= MSGNOLOOP;
2198 /*
2199 * Protect against the driver passing up
2200 * messages after it has done a qprocsoff.
2201 */
2202 if (_OTHERQ(q)->q_next == NULL)
2203 freemsg(bp);
2204 else
2205 qreply(q, bp);
2206 return (0);
2207 }
2208 freemsg(bp);
2209 return (0);
2210
2211 case M_IOCACK:
2212 case M_IOCNAK:
2213 iocbp = (struct iocblk *)bp->b_rptr;
2214 /*
2215 * If not waiting for ACK or NAK then just free msg.
2216 * If incorrect id sequence number then just free msg.
2217 * If already have ACK or NAK for user then this is a
2218 * duplicate, display a warning and free the msg.
2219 */
2220 mutex_enter(&stp->sd_lock);
2221 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2222 (stp->sd_iocid != iocbp->ioc_id)) {
2223 /*
2224 * If the ACK/NAK is a dup, display a message
2225 * Dup is when sd_iocid == ioc_id, and
2226 * sd_iocblk == <valid ptr> or -1 (the former
2227 * is when an ioctl has been put on the stream
2228 * head, but has not yet been consumed, the
2229 * later is when it has been consumed).
2230 */
2231 if ((stp->sd_iocid == iocbp->ioc_id) &&
2232 (stp->sd_iocblk != NULL)) {
2233 log_dupioc(q, bp);
2234 }
2235 freemsg(bp);
2236 mutex_exit(&stp->sd_lock);
2237 return (0);
2238 }
2239
2240 /*
2241 * Assign ACK or NAK to user and wake up.
2242 */
2243 stp->sd_iocblk = bp;
2244 cv_broadcast(&stp->sd_monitor);
2245 mutex_exit(&stp->sd_lock);
2246 return (0);
2247
2248 case M_COPYIN:
2249 case M_COPYOUT:
2250 reqp = (struct copyreq *)bp->b_rptr;
2251
2252 /*
2253 * If not waiting for ACK or NAK then just fail request.
2254 * If already have ACK, NAK, or copy request, then just
2255 * fail request.
2256 * If incorrect id sequence number then just fail request.
2257 */
2258 mutex_enter(&stp->sd_lock);
2259 if ((stp->sd_flag & IOCWAIT) == 0 || stp->sd_iocblk ||
2260 (stp->sd_iocid != reqp->cq_id)) {
2261 if (bp->b_cont) {
2262 freemsg(bp->b_cont);
2263 bp->b_cont = NULL;
2264 }
2265 bp->b_datap->db_type = M_IOCDATA;
2266 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
2267 resp = (struct copyresp *)bp->b_rptr;
2268 resp->cp_rval = (caddr_t)1; /* failure */
2269 mutex_exit(&stp->sd_lock);
2270 putnext(stp->sd_wrq, bp);
2271 return (0);
2272 }
2273
2274 /*
2275 * Assign copy request to user and wake up.
2276 */
2277 stp->sd_iocblk = bp;
2278 cv_broadcast(&stp->sd_monitor);
2279 mutex_exit(&stp->sd_lock);
2280 return (0);
2281
2282 case M_SETOPTS:
2283 /*
2284 * Set stream head options (read option, write offset,
2285 * min/max packet size, and/or high/low water marks for
2286 * the read side only).
2287 */
2288
2289 bpri = 0;
2290 sop = (struct stroptions *)bp->b_rptr;
2291 mutex_enter(&stp->sd_lock);
2292 if (sop->so_flags & SO_READOPT) {
2293 switch (sop->so_readopt & RMODEMASK) {
2294 case RNORM:
2295 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
2296 break;
2297
2298 case RMSGD:
2299 stp->sd_read_opt =
2300 ((stp->sd_read_opt & ~RD_MSGNODIS) |
2301 RD_MSGDIS);
2302 break;
2303
2304 case RMSGN:
2305 stp->sd_read_opt =
2306 ((stp->sd_read_opt & ~RD_MSGDIS) |
2307 RD_MSGNODIS);
2308 break;
2309 }
2310 switch (sop->so_readopt & RPROTMASK) {
2311 case RPROTNORM:
2312 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
2313 break;
2314
2315 case RPROTDAT:
2316 stp->sd_read_opt =
2317 ((stp->sd_read_opt & ~RD_PROTDIS) |
2318 RD_PROTDAT);
2319 break;
2320
2321 case RPROTDIS:
2322 stp->sd_read_opt =
2323 ((stp->sd_read_opt & ~RD_PROTDAT) |
2324 RD_PROTDIS);
2325 break;
2326 }
2327 switch (sop->so_readopt & RFLUSHMASK) {
2328 case RFLUSHPCPROT:
2329 /*
2330 * This sets the stream head to NOT flush
2331 * M_PCPROTO messages.
2332 */
2333 stp->sd_read_opt |= RFLUSHPCPROT;
2334 break;
2335 }
2336 }
2337 if (sop->so_flags & SO_ERROPT) {
2338 switch (sop->so_erropt & RERRMASK) {
2339 case RERRNORM:
2340 stp->sd_flag &= ~STRDERRNONPERSIST;
2341 break;
2342 case RERRNONPERSIST:
2343 stp->sd_flag |= STRDERRNONPERSIST;
2344 break;
2345 }
2346 switch (sop->so_erropt & WERRMASK) {
2347 case WERRNORM:
2348 stp->sd_flag &= ~STWRERRNONPERSIST;
2349 break;
2350 case WERRNONPERSIST:
2351 stp->sd_flag |= STWRERRNONPERSIST;
2352 break;
2353 }
2354 }
2355 if (sop->so_flags & SO_COPYOPT) {
2356 if (sop->so_copyopt & ZCVMSAFE) {
2357 stp->sd_copyflag |= STZCVMSAFE;
2358 stp->sd_copyflag &= ~STZCVMUNSAFE;
2359 } else if (sop->so_copyopt & ZCVMUNSAFE) {
2360 stp->sd_copyflag |= STZCVMUNSAFE;
2361 stp->sd_copyflag &= ~STZCVMSAFE;
2362 }
2363
2364 if (sop->so_copyopt & COPYCACHED) {
2365 stp->sd_copyflag |= STRCOPYCACHED;
2366 }
2367 }
2368 if (sop->so_flags & SO_WROFF)
2369 stp->sd_wroff = sop->so_wroff;
2370 if (sop->so_flags & SO_TAIL)
2371 stp->sd_tail = sop->so_tail;
2372 if (sop->so_flags & SO_MINPSZ)
2373 q->q_minpsz = sop->so_minpsz;
2374 if (sop->so_flags & SO_MAXPSZ)
2375 q->q_maxpsz = sop->so_maxpsz;
2376 if (sop->so_flags & SO_MAXBLK)
2377 stp->sd_maxblk = sop->so_maxblk;
2378 if (sop->so_flags & SO_HIWAT) {
2379 if (sop->so_flags & SO_BAND) {
2380 if (strqset(q, QHIWAT,
2381 sop->so_band, sop->so_hiwat)) {
2382 cmn_err(CE_WARN, "strrput: could not "
2383 "allocate qband\n");
2384 } else {
2385 bpri = sop->so_band;
2386 }
2387 } else {
2388 q->q_hiwat = sop->so_hiwat;
2389 }
2390 }
2391 if (sop->so_flags & SO_LOWAT) {
2392 if (sop->so_flags & SO_BAND) {
2393 if (strqset(q, QLOWAT,
2394 sop->so_band, sop->so_lowat)) {
2395 cmn_err(CE_WARN, "strrput: could not "
2396 "allocate qband\n");
2397 } else {
2398 bpri = sop->so_band;
2399 }
2400 } else {
2401 q->q_lowat = sop->so_lowat;
2402 }
2403 }
2404 if (sop->so_flags & SO_MREADON)
2405 stp->sd_flag |= SNDMREAD;
2406 if (sop->so_flags & SO_MREADOFF)
2407 stp->sd_flag &= ~SNDMREAD;
2408 if (sop->so_flags & SO_NDELON)
2409 stp->sd_flag |= OLDNDELAY;
2410 if (sop->so_flags & SO_NDELOFF)
2411 stp->sd_flag &= ~OLDNDELAY;
2412 if (sop->so_flags & SO_ISTTY)
2413 stp->sd_flag |= STRISTTY;
2414 if (sop->so_flags & SO_ISNTTY)
2415 stp->sd_flag &= ~STRISTTY;
2416 if (sop->so_flags & SO_TOSTOP)
2417 stp->sd_flag |= STRTOSTOP;
2418 if (sop->so_flags & SO_TONSTOP)
2419 stp->sd_flag &= ~STRTOSTOP;
2420 if (sop->so_flags & SO_DELIM)
2421 stp->sd_flag |= STRDELIM;
2422 if (sop->so_flags & SO_NODELIM)
2423 stp->sd_flag &= ~STRDELIM;
2424
2425 mutex_exit(&stp->sd_lock);
2426 freemsg(bp);
2427
2428 /* Check backenable in case the water marks changed */
2429 qbackenable(q, bpri);
2430 return (0);
2431
2432 /*
2433 * The following set of cases deal with situations where two stream
2434 * heads are connected to each other (twisted streams). These messages
2435 * have no meaning at the stream head.
2436 */
2437 case M_BREAK:
2438 case M_CTL:
2439 case M_DELAY:
2440 case M_START:
2441 case M_STOP:
2442 case M_IOCDATA:
2443 case M_STARTI:
2444 case M_STOPI:
2445 freemsg(bp);
2446 return (0);
2447
2448 case M_IOCTL:
2449 /*
2450 * Always NAK this condition
2451 * (makes no sense)
2452 * If there is one or more threads in the read side
2453 * rwnext we have to defer the nacking until that thread
2454 * returns (in strget).
2455 */
2456 mutex_enter(&stp->sd_lock);
2457 if (stp->sd_struiodnak != 0) {
2458 /*
2459 * Defer NAK to the streamhead. Queue at the end
2460 * the list.
2461 */
2462 mblk_t *mp = stp->sd_struionak;
2463
2464 while (mp && mp->b_next)
2465 mp = mp->b_next;
2466 if (mp)
2467 mp->b_next = bp;
2468 else
2469 stp->sd_struionak = bp;
2470 bp->b_next = NULL;
2471 mutex_exit(&stp->sd_lock);
2472 return (0);
2473 }
2474 mutex_exit(&stp->sd_lock);
2475
2476 bp->b_datap->db_type = M_IOCNAK;
2477 /*
2478 * Protect against the driver passing up
2479 * messages after it has done a qprocsoff.
2480 */
2481 if (_OTHERQ(q)->q_next == NULL)
2482 freemsg(bp);
2483 else
2484 qreply(q, bp);
2485 return (0);
2486
2487 default:
2488 #ifdef DEBUG
2489 cmn_err(CE_WARN,
2490 "bad message type %x received at stream head\n",
2491 bp->b_datap->db_type);
2492 #endif
2493 freemsg(bp);
2494 return (0);
2495 }
2496
2497 /* NOTREACHED */
2498 }
2499
2500 /*
2501 * Check if the stream pointed to by `stp' can be written to, and return an
2502 * error code if not. If `eiohup' is set, then return EIO if STRHUP is set.
2503 * If `sigpipeok' is set and the SW_SIGPIPE option is enabled on the stream,
2504 * then always return EPIPE and send a SIGPIPE to the invoking thread.
2505 */
2506 static int
2507 strwriteable(struct stdata *stp, boolean_t eiohup, boolean_t sigpipeok)
2508 {
2509 int error;
2510
2511 ASSERT(MUTEX_HELD(&stp->sd_lock));
2512
2513 /*
2514 * For modem support, POSIX states that on writes, EIO should
2515 * be returned if the stream has been hung up.
2516 */
2517 if (eiohup && (stp->sd_flag & (STPLEX|STRHUP)) == STRHUP)
2518 error = EIO;
2519 else
2520 error = strgeterr(stp, STRHUP|STPLEX|STWRERR, 0);
2521
2522 if (error != 0) {
2523 if (!(stp->sd_flag & STPLEX) &&
2524 (stp->sd_wput_opt & SW_SIGPIPE) && sigpipeok) {
2525 tsignal(curthread, SIGPIPE);
2526 error = EPIPE;
2527 }
2528 }
2529
2530 return (error);
2531 }
2532
2533 /*
2534 * Copyin and send data down a stream.
2535 * The caller will allocate and copyin any control part that precedes the
2536 * message and pass that in as mctl.
2537 *
2538 * Caller should *not* hold sd_lock.
2539 * When EWOULDBLOCK is returned the caller has to redo the canputnext
2540 * under sd_lock in order to avoid missing a backenabling wakeup.
2541 *
2542 * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA.
2543 *
2544 * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages.
2545 * For sync streams we can only ignore flow control by reverting to using
2546 * putnext.
2547 *
2548 * If sd_maxblk is less than *iosize this routine might return without
2549 * transferring all of *iosize. In all cases, on return *iosize will contain
2550 * the amount of data that was transferred.
2551 */
2552 static int
2553 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
2554 int b_flag, int pri, int flags)
2555 {
2556 struiod_t uiod;
2557 mblk_t *mp;
2558 queue_t *wqp = stp->sd_wrq;
2559 int error = 0;
2560 ssize_t count = *iosize;
2561
2562 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
2563
2564 if (uiop != NULL && count >= 0)
2565 flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0;
2566
2567 if (!(flags & STRUIO_POSTPONE)) {
2568 /*
2569 * Use regular canputnext, strmakedata, putnext sequence.
2570 */
2571 if (pri == 0) {
2572 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2573 freemsg(mctl);
2574 return (EWOULDBLOCK);
2575 }
2576 } else {
2577 if (!(flags & MSG_IGNFLOW) && !bcanputnext(wqp, pri)) {
2578 freemsg(mctl);
2579 return (EWOULDBLOCK);
2580 }
2581 }
2582
2583 if ((error = strmakedata(iosize, uiop, stp, flags,
2584 &mp)) != 0) {
2585 freemsg(mctl);
2586 /*
2587 * need to change return code to ENOMEM
2588 * so that this is not confused with
2589 * flow control, EAGAIN.
2590 */
2591
2592 if (error == EAGAIN)
2593 return (ENOMEM);
2594 else
2595 return (error);
2596 }
2597 if (mctl != NULL) {
2598 if (mctl->b_cont == NULL)
2599 mctl->b_cont = mp;
2600 else if (mp != NULL)
2601 linkb(mctl, mp);
2602 mp = mctl;
2603 } else if (mp == NULL)
2604 return (0);
2605
2606 mp->b_flag |= b_flag;
2607 mp->b_band = (uchar_t)pri;
2608
2609 if (flags & MSG_IGNFLOW) {
2610 /*
2611 * XXX Hack: Don't get stuck running service
2612 * procedures. This is needed for sockfs when
2613 * sending the unbind message out of the rput
2614 * procedure - we don't want a put procedure
2615 * to run service procedures.
2616 */
2617 putnext(wqp, mp);
2618 } else {
2619 stream_willservice(stp);
2620 putnext(wqp, mp);
2621 stream_runservice(stp);
2622 }
2623 return (0);
2624 }
2625 /*
2626 * Stream supports rwnext() for the write side.
2627 */
2628 if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) {
2629 freemsg(mctl);
2630 /*
2631 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled".
2632 */
2633 return (error == EAGAIN ? ENOMEM : error);
2634 }
2635 if (mctl != NULL) {
2636 if (mctl->b_cont == NULL)
2637 mctl->b_cont = mp;
2638 else if (mp != NULL)
2639 linkb(mctl, mp);
2640 mp = mctl;
2641 } else if (mp == NULL) {
2642 return (0);
2643 }
2644
2645 mp->b_flag |= b_flag;
2646 mp->b_band = (uchar_t)pri;
2647
2648 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
2649 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
2650 uiod.d_uio.uio_offset = 0;
2651 uiod.d_mp = mp;
2652 error = rwnext(wqp, &uiod);
2653 if (! uiod.d_mp) {
2654 uioskip(uiop, *iosize);
2655 return (error);
2656 }
2657 ASSERT(mp == uiod.d_mp);
2658 if (error == EINVAL) {
2659 /*
2660 * The stream plumbing must have changed while
2661 * we were away, so just turn off rwnext()s.
2662 */
2663 error = 0;
2664 } else if (error == EBUSY || error == EWOULDBLOCK) {
2665 /*
2666 * Couldn't enter a perimeter or took a page fault,
2667 * so fall-back to putnext().
2668 */
2669 error = 0;
2670 } else {
2671 freemsg(mp);
2672 return (error);
2673 }
2674 /* Have to check canput before consuming data from the uio */
2675 if (pri == 0) {
2676 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2677 freemsg(mp);
2678 return (EWOULDBLOCK);
2679 }
2680 } else {
2681 if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) {
2682 freemsg(mp);
2683 return (EWOULDBLOCK);
2684 }
2685 }
2686 ASSERT(mp == uiod.d_mp);
2687 /* Copyin data from the uio */
2688 if ((error = struioget(wqp, mp, &uiod, 0)) != 0) {
2689 freemsg(mp);
2690 return (error);
2691 }
2692 uioskip(uiop, *iosize);
2693 if (flags & MSG_IGNFLOW) {
2694 /*
2695 * XXX Hack: Don't get stuck running service procedures.
2696 * This is needed for sockfs when sending the unbind message
2697 * out of the rput procedure - we don't want a put procedure
2698 * to run service procedures.
2699 */
2700 putnext(wqp, mp);
2701 } else {
2702 stream_willservice(stp);
2703 putnext(wqp, mp);
2704 stream_runservice(stp);
2705 }
2706 return (0);
2707 }
2708
2709 /*
2710 * Write attempts to break the write request into messages conforming
2711 * with the minimum and maximum packet sizes set downstream.
2712 *
2713 * Write will not block if downstream queue is full and
2714 * O_NDELAY is set, otherwise it will block waiting for the queue to get room.
2715 *
2716 * A write of zero bytes gets packaged into a zero length message and sent
2717 * downstream like any other message.
2718 *
2719 * If buffers of the requested sizes are not available, the write will
2720 * sleep until the buffers become available.
2721 *
2722 * Write (if specified) will supply a write offset in a message if it
2723 * makes sense. This can be specified by downstream modules as part of
2724 * a M_SETOPTS message. Write will not supply the write offset if it
2725 * cannot supply any data in a buffer. In other words, write will never
2726 * send down an empty packet due to a write offset.
2727 */
2728 /* ARGSUSED2 */
2729 int
2730 strwrite(struct vnode *vp, struct uio *uiop, cred_t *crp)
2731 {
2732 return (strwrite_common(vp, uiop, crp, 0));
2733 }
2734
2735 /* ARGSUSED2 */
2736 int
2737 strwrite_common(struct vnode *vp, struct uio *uiop, cred_t *crp, int wflag)
2738 {
2739 struct stdata *stp;
2740 struct queue *wqp;
2741 ssize_t rmin, rmax;
2742 ssize_t iosize;
2743 int waitflag;
2744 int tempmode;
2745 int error = 0;
2746 int b_flag;
2747
2748 ASSERT(vp->v_stream);
2749 stp = vp->v_stream;
2750
2751 mutex_enter(&stp->sd_lock);
2752
2753 if ((error = i_straccess(stp, JCWRITE)) != 0) {
2754 mutex_exit(&stp->sd_lock);
2755 return (error);
2756 }
2757
2758 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
2759 error = strwriteable(stp, B_TRUE, B_TRUE);
2760 if (error != 0) {
2761 mutex_exit(&stp->sd_lock);
2762 return (error);
2763 }
2764 }
2765
2766 mutex_exit(&stp->sd_lock);
2767
2768 wqp = stp->sd_wrq;
2769
2770 /* get these values from them cached in the stream head */
2771 rmin = stp->sd_qn_minpsz;
2772 rmax = stp->sd_qn_maxpsz;
2773
2774 /*
2775 * Check the min/max packet size constraints. If min packet size
2776 * is non-zero, the write cannot be split into multiple messages
2777 * and still guarantee the size constraints.
2778 */
2779 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_IN, "strwrite in:q %p", wqp);
2780
2781 ASSERT((rmax >= 0) || (rmax == INFPSZ));
2782 if (rmax == 0) {
2783 return (0);
2784 }
2785 if (rmin > 0) {
2786 if (uiop->uio_resid < rmin) {
2787 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2788 "strwrite out:q %p out %d error %d",
2789 wqp, 0, ERANGE);
2790 return (ERANGE);
2791 }
2792 if ((rmax != INFPSZ) && (uiop->uio_resid > rmax)) {
2793 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2794 "strwrite out:q %p out %d error %d",
2795 wqp, 1, ERANGE);
2796 return (ERANGE);
2797 }
2798 }
2799
2800 /*
2801 * Do until count satisfied or error.
2802 */
2803 waitflag = WRITEWAIT | wflag;
2804 if (stp->sd_flag & OLDNDELAY)
2805 tempmode = uiop->uio_fmode & ~FNDELAY;
2806 else
2807 tempmode = uiop->uio_fmode;
2808
2809 if (rmax == INFPSZ)
2810 rmax = uiop->uio_resid;
2811
2812 /*
2813 * Note that tempmode does not get used in strput/strmakedata
2814 * but only in strwaitq. The other routines use uio_fmode
2815 * unmodified.
2816 */
2817
2818 /* LINTED: constant in conditional context */
2819 while (1) { /* breaks when uio_resid reaches zero */
2820 /*
2821 * Determine the size of the next message to be
2822 * packaged. May have to break write into several
2823 * messages based on max packet size.
2824 */
2825 iosize = MIN(uiop->uio_resid, rmax);
2826
2827 /*
2828 * Put block downstream when flow control allows it.
2829 */
2830 if ((stp->sd_flag & STRDELIM) && (uiop->uio_resid == iosize))
2831 b_flag = MSGDELIM;
2832 else
2833 b_flag = 0;
2834
2835 for (;;) {
2836 int done = 0;
2837
2838 error = strput(stp, NULL, uiop, &iosize, b_flag, 0, 0);
2839 if (error == 0)
2840 break;
2841 if (error != EWOULDBLOCK)
2842 goto out;
2843
2844 mutex_enter(&stp->sd_lock);
2845 /*
2846 * Check for a missed wakeup.
2847 * Needed since strput did not hold sd_lock across
2848 * the canputnext.
2849 */
2850 if (canputnext(wqp)) {
2851 /* Try again */
2852 mutex_exit(&stp->sd_lock);
2853 continue;
2854 }
2855 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAIT,
2856 "strwrite wait:q %p wait", wqp);
2857 if ((error = strwaitq(stp, waitflag, (ssize_t)0,
2858 tempmode, -1, &done)) != 0 || done) {
2859 mutex_exit(&stp->sd_lock);
2860 if ((vp->v_type == VFIFO) &&
2861 (uiop->uio_fmode & FNDELAY) &&
2862 (error == EAGAIN))
2863 error = 0;
2864 goto out;
2865 }
2866 TRACE_1(TR_FAC_STREAMS_FR, TR_STRWRITE_WAKE,
2867 "strwrite wake:q %p awakes", wqp);
2868 if ((error = i_straccess(stp, JCWRITE)) != 0) {
2869 mutex_exit(&stp->sd_lock);
2870 goto out;
2871 }
2872 mutex_exit(&stp->sd_lock);
2873 }
2874 waitflag |= NOINTR;
2875 TRACE_2(TR_FAC_STREAMS_FR, TR_STRWRITE_RESID,
2876 "strwrite resid:q %p uiop %p", wqp, uiop);
2877 if (uiop->uio_resid) {
2878 /* Recheck for errors - needed for sockets */
2879 if ((stp->sd_wput_opt & SW_RECHECK_ERR) &&
2880 (stp->sd_flag & (STWRERR|STRHUP|STPLEX))) {
2881 mutex_enter(&stp->sd_lock);
2882 error = strwriteable(stp, B_FALSE, B_TRUE);
2883 mutex_exit(&stp->sd_lock);
2884 if (error != 0)
2885 return (error);
2886 }
2887 continue;
2888 }
2889 break;
2890 }
2891 out:
2892 /*
2893 * For historical reasons, applications expect EAGAIN when a data
2894 * mblk_t cannot be allocated, so change ENOMEM back to EAGAIN.
2895 */
2896 if (error == ENOMEM)
2897 error = EAGAIN;
2898 TRACE_3(TR_FAC_STREAMS_FR, TR_STRWRITE_OUT,
2899 "strwrite out:q %p out %d error %d", wqp, 2, error);
2900 return (error);
2901 }
2902
2903 /*
2904 * Stream head write service routine.
2905 * Its job is to wake up any sleeping writers when a queue
2906 * downstream needs data (part of the flow control in putq and getq).
2907 * It also must wake anyone sleeping on a poll().
2908 * For stream head right below mux module, it must also invoke put procedure
2909 * of next downstream module.
2910 */
2911 int
2912 strwsrv(queue_t *q)
2913 {
2914 struct stdata *stp;
2915 queue_t *tq;
2916 qband_t *qbp;
2917 int i;
2918 qband_t *myqbp;
2919 int isevent;
2920 unsigned char qbf[NBAND]; /* band flushing backenable flags */
2921
2922 TRACE_1(TR_FAC_STREAMS_FR,
2923 TR_STRWSRV, "strwsrv:q %p", q);
2924 stp = (struct stdata *)q->q_ptr;
2925 ASSERT(qclaimed(q));
2926 mutex_enter(&stp->sd_lock);
2927 ASSERT(!(stp->sd_flag & STPLEX));
2928
2929 if (stp->sd_flag & WSLEEP) {
2930 stp->sd_flag &= ~WSLEEP;
2931 cv_broadcast(&q->q_wait);
2932 }
2933 mutex_exit(&stp->sd_lock);
2934
2935 /* The other end of a stream pipe went away. */
2936 if ((tq = q->q_next) == NULL) {
2937 return (0);
2938 }
2939
2940 /* Find the next module forward that has a service procedure */
2941 claimstr(q);
2942 tq = q->q_nfsrv;
2943 ASSERT(tq != NULL);
2944
2945 if ((q->q_flag & QBACK)) {
2946 if ((tq->q_flag & QFULL)) {
2947 mutex_enter(QLOCK(tq));
2948 if (!(tq->q_flag & QFULL)) {
2949 mutex_exit(QLOCK(tq));
2950 goto wakeup;
2951 }
2952 /*
2953 * The queue must have become full again. Set QWANTW
2954 * again so strwsrv will be back enabled when
2955 * the queue becomes non-full next time.
2956 */
2957 tq->q_flag |= QWANTW;
2958 mutex_exit(QLOCK(tq));
2959 } else {
2960 wakeup:
2961 pollwakeup(&stp->sd_pollist, POLLWRNORM);
2962 mutex_enter(&stp->sd_lock);
2963 if (stp->sd_sigflags & S_WRNORM)
2964 strsendsig(stp->sd_siglist, S_WRNORM, 0, 0);
2965 mutex_exit(&stp->sd_lock);
2966 }
2967 }
2968
2969 isevent = 0;
2970 i = 1;
2971 bzero((caddr_t)qbf, NBAND);
2972 mutex_enter(QLOCK(tq));
2973 if ((myqbp = q->q_bandp) != NULL)
2974 for (qbp = tq->q_bandp; qbp && myqbp; qbp = qbp->qb_next) {
2975 ASSERT(myqbp);
2976 if ((myqbp->qb_flag & QB_BACK)) {
2977 if (qbp->qb_flag & QB_FULL) {
2978 /*
2979 * The band must have become full again.
2980 * Set QB_WANTW again so strwsrv will
2981 * be back enabled when the band becomes
2982 * non-full next time.
2983 */
2984 qbp->qb_flag |= QB_WANTW;
2985 } else {
2986 isevent = 1;
2987 qbf[i] = 1;
2988 }
2989 }
2990 myqbp = myqbp->qb_next;
2991 i++;
2992 }
2993 mutex_exit(QLOCK(tq));
2994
2995 if (isevent) {
2996 for (i = tq->q_nband; i; i--) {
2997 if (qbf[i]) {
2998 pollwakeup(&stp->sd_pollist, POLLWRBAND);
2999 mutex_enter(&stp->sd_lock);
3000 if (stp->sd_sigflags & S_WRBAND)
3001 strsendsig(stp->sd_siglist, S_WRBAND,
3002 (uchar_t)i, 0);
3003 mutex_exit(&stp->sd_lock);
3004 }
3005 }
3006 }
3007
3008 releasestr(q);
3009 return (0);
3010 }
3011
3012 /*
3013 * Special case of strcopyin/strcopyout for copying
3014 * struct strioctl that can deal with both data
3015 * models.
3016 */
3017
3018 #ifdef _LP64
3019
3020 static int
3021 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
3022 {
3023 struct strioctl32 strioc32;
3024 struct strioctl *striocp;
3025
3026 if (copyflag & U_TO_K) {
3027 ASSERT((copyflag & K_TO_K) == 0);
3028
3029 if ((flag & FMODELS) == DATAMODEL_ILP32) {
3030 if (copyin(from, &strioc32, sizeof (strioc32)))
3031 return (EFAULT);
3032
3033 striocp = (struct strioctl *)to;
3034 striocp->ic_cmd = strioc32.ic_cmd;
3035 striocp->ic_timout = strioc32.ic_timout;
3036 striocp->ic_len = strioc32.ic_len;
3037 striocp->ic_dp = (char *)(uintptr_t)strioc32.ic_dp;
3038
3039 } else { /* NATIVE data model */
3040 if (copyin(from, to, sizeof (struct strioctl))) {
3041 return (EFAULT);
3042 } else {
3043 return (0);
3044 }
3045 }
3046 } else {
3047 ASSERT(copyflag & K_TO_K);
3048 bcopy(from, to, sizeof (struct strioctl));
3049 }
3050 return (0);
3051 }
3052
3053 static int
3054 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
3055 {
3056 struct strioctl32 strioc32;
3057 struct strioctl *striocp;
3058
3059 if (copyflag & U_TO_K) {
3060 ASSERT((copyflag & K_TO_K) == 0);
3061
3062 if ((flag & FMODELS) == DATAMODEL_ILP32) {
3063 striocp = (struct strioctl *)from;
3064 strioc32.ic_cmd = striocp->ic_cmd;
3065 strioc32.ic_timout = striocp->ic_timout;
3066 strioc32.ic_len = striocp->ic_len;
3067 strioc32.ic_dp = (caddr32_t)(uintptr_t)striocp->ic_dp;
3068 ASSERT((char *)(uintptr_t)strioc32.ic_dp ==
3069 striocp->ic_dp);
3070
3071 if (copyout(&strioc32, to, sizeof (strioc32)))
3072 return (EFAULT);
3073
3074 } else { /* NATIVE data model */
3075 if (copyout(from, to, sizeof (struct strioctl))) {
3076 return (EFAULT);
3077 } else {
3078 return (0);
3079 }
3080 }
3081 } else {
3082 ASSERT(copyflag & K_TO_K);
3083 bcopy(from, to, sizeof (struct strioctl));
3084 }
3085 return (0);
3086 }
3087
3088 #else /* ! _LP64 */
3089
3090 /* ARGSUSED2 */
3091 static int
3092 strcopyin_strioctl(void *from, void *to, int flag, int copyflag)
3093 {
3094 return (strcopyin(from, to, sizeof (struct strioctl), copyflag));
3095 }
3096
3097 /* ARGSUSED2 */
3098 static int
3099 strcopyout_strioctl(void *from, void *to, int flag, int copyflag)
3100 {
3101 return (strcopyout(from, to, sizeof (struct strioctl), copyflag));
3102 }
3103
3104 #endif /* _LP64 */
3105
3106 /*
3107 * Determine type of job control semantics expected by user. The
3108 * possibilities are:
3109 * JCREAD - Behaves like read() on fd; send SIGTTIN
3110 * JCWRITE - Behaves like write() on fd; send SIGTTOU if TOSTOP set
3111 * JCSETP - Sets a value in the stream; send SIGTTOU, ignore TOSTOP
3112 * JCGETP - Gets a value in the stream; no signals.
3113 * See straccess in strsubr.c for usage of these values.
3114 *
3115 * This routine also returns -1 for I_STR as a special case; the
3116 * caller must call again with the real ioctl number for
3117 * classification.
3118 */
3119 static int
3120 job_control_type(int cmd)
3121 {
3122 switch (cmd) {
3123 case I_STR:
3124 return (-1);
3125
3126 case I_RECVFD:
3127 case I_E_RECVFD:
3128 return (JCREAD);
3129
3130 case I_FDINSERT:
3131 case I_SENDFD:
3132 return (JCWRITE);
3133
3134 case TCSETA:
3135 case TCSETAW:
3136 case TCSETAF:
3137 case TCSBRK:
3138 case TCXONC:
3139 case TCFLSH:
3140 case TCDSET: /* Obsolete */
3141 case TIOCSWINSZ:
3142 case TCSETS:
3143 case TCSETSW:
3144 case TCSETSF:
3145 case TIOCSETD:
3146 case TIOCHPCL:
3147 case TIOCSETP:
3148 case TIOCSETN:
3149 case TIOCEXCL:
3150 case TIOCNXCL:
3151 case TIOCFLUSH:
3152 case TIOCSETC:
3153 case TIOCLBIS:
3154 case TIOCLBIC:
3155 case TIOCLSET:
3156 case TIOCSBRK:
3157 case TIOCCBRK:
3158 case TIOCSDTR:
3159 case TIOCCDTR:
3160 case TIOCSLTC:
3161 case TIOCSTOP:
3162 case TIOCSTART:
3163 case TIOCSTI:
3164 case TIOCSPGRP:
3165 case TIOCMSET:
3166 case TIOCMBIS:
3167 case TIOCMBIC:
3168 case TIOCREMOTE:
3169 case TIOCSIGNAL:
3170 case LDSETT:
3171 case LDSMAP: /* Obsolete */
3172 case DIOCSETP:
3173 case I_FLUSH:
3174 case I_SRDOPT:
3175 case I_SETSIG:
3176 case I_SWROPT:
3177 case I_FLUSHBAND:
3178 case I_SETCLTIME:
3179 case I_SERROPT:
3180 case I_ESETSIG:
3181 case FIONBIO:
3182 case FIOASYNC:
3183 case FIOSETOWN:
3184 case JBOOT: /* Obsolete */
3185 case JTERM: /* Obsolete */
3186 case JTIMOM: /* Obsolete */
3187 case JZOMBOOT: /* Obsolete */
3188 case JAGENT: /* Obsolete */
3189 case JTRUN: /* Obsolete */
3190 case JXTPROTO: /* Obsolete */
3191 return (JCSETP);
3192 }
3193
3194 return (JCGETP);
3195 }
3196
3197 /*
3198 * ioctl for streams
3199 */
3200 int
3201 strioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, int copyflag,
3202 cred_t *crp, int *rvalp)
3203 {
3204 struct stdata *stp;
3205 struct strcmd *scp;
3206 struct strioctl strioc;
3207 struct uio uio;
3208 struct iovec iov;
3209 int access;
3210 mblk_t *mp;
3211 int error = 0;
3212 int done = 0;
3213 ssize_t rmin, rmax;
3214 queue_t *wrq;
3215 queue_t *rdq;
3216 boolean_t kioctl = B_FALSE;
3217 uint32_t auditing = AU_AUDITING();
3218
3219 if (flag & FKIOCTL) {
3220 copyflag = K_TO_K;
3221 kioctl = B_TRUE;
3222 }
3223 ASSERT(vp->v_stream);
3224 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
3225 stp = vp->v_stream;
3226
3227 TRACE_3(TR_FAC_STREAMS_FR, TR_IOCTL_ENTER,
3228 "strioctl:stp %p cmd %X arg %lX", stp, cmd, arg);
3229
3230 /*
3231 * If the copy is kernel to kernel, make sure that the FNATIVE
3232 * flag is set. After this it would be a serious error to have
3233 * no model flag.
3234 */
3235 if (copyflag == K_TO_K)
3236 flag = (flag & ~FMODELS) | FNATIVE;
3237
3238 ASSERT((flag & FMODELS) != 0);
3239
3240 wrq = stp->sd_wrq;
3241 rdq = _RD(wrq);
3242
3243 access = job_control_type(cmd);
3244
3245 /* We should never see these here, should be handled by iwscn */
3246 if (cmd == SRIOCSREDIR || cmd == SRIOCISREDIR)
3247 return (EINVAL);
3248
3249 mutex_enter(&stp->sd_lock);
3250 if ((access != -1) && ((error = i_straccess(stp, access)) != 0)) {
3251 mutex_exit(&stp->sd_lock);
3252 return (error);
3253 }
3254 mutex_exit(&stp->sd_lock);
3255
3256 /*
3257 * Check for sgttyb-related ioctls first, and complain as
3258 * necessary.
3259 */
3260 switch (cmd) {
3261 case TIOCGETP:
3262 case TIOCSETP:
3263 case TIOCSETN:
3264 if (sgttyb_handling >= 2 && !sgttyb_complaint) {
3265 sgttyb_complaint = B_TRUE;
3266 cmn_err(CE_NOTE,
3267 "application used obsolete TIOC[GS]ET");
3268 }
3269 if (sgttyb_handling >= 3) {
3270 tsignal(curthread, SIGSYS);
3271 return (EIO);
3272 }
3273 break;
3274 }
3275
3276 mutex_enter(&stp->sd_lock);
3277
3278 switch (cmd) {
3279 case I_RECVFD:
3280 case I_E_RECVFD:
3281 case I_PEEK:
3282 case I_NREAD:
3283 case FIONREAD:
3284 case FIORDCHK:
3285 case I_ATMARK:
3286 case FIONBIO:
3287 case FIOASYNC:
3288 if (stp->sd_flag & (STRDERR|STPLEX)) {
3289 error = strgeterr(stp, STRDERR|STPLEX, 0);
3290 if (error != 0) {
3291 mutex_exit(&stp->sd_lock);
3292 return (error);
3293 }
3294 }
3295 break;
3296
3297 default:
3298 if (stp->sd_flag & (STRDERR|STWRERR|STPLEX)) {
3299 error = strgeterr(stp, STRDERR|STWRERR|STPLEX, 0);
3300 if (error != 0) {
3301 mutex_exit(&stp->sd_lock);
3302 return (error);
3303 }
3304 }
3305 }
3306
3307 mutex_exit(&stp->sd_lock);
3308
3309 switch (cmd) {
3310 default:
3311 /*
3312 * The stream head has hardcoded knowledge of a
3313 * miscellaneous collection of terminal-, keyboard- and
3314 * mouse-related ioctls, enumerated below. This hardcoded
3315 * knowledge allows the stream head to automatically
3316 * convert transparent ioctl requests made by userland
3317 * programs into I_STR ioctls which many old STREAMS
3318 * modules and drivers require.
3319 *
3320 * No new ioctls should ever be added to this list.
3321 * Instead, the STREAMS module or driver should be written
3322 * to either handle transparent ioctls or require any
3323 * userland programs to use I_STR ioctls (by returning
3324 * EINVAL to any transparent ioctl requests).
3325 *
3326 * More importantly, removing ioctls from this list should
3327 * be done with the utmost care, since our STREAMS modules
3328 * and drivers *count* on the stream head performing this
3329 * conversion, and thus may panic while processing
3330 * transparent ioctl request for one of these ioctls (keep
3331 * in mind that third party modules and drivers may have
3332 * similar problems).
3333 */
3334 if (((cmd & IOCTYPE) == LDIOC) ||
3335 ((cmd & IOCTYPE) == tIOC) ||
3336 ((cmd & IOCTYPE) == TIOC) ||
3337 ((cmd & IOCTYPE) == KIOC) ||
3338 ((cmd & IOCTYPE) == MSIOC) ||
3339 ((cmd & IOCTYPE) == VUIOC)) {
3340 /*
3341 * The ioctl is a tty ioctl - set up strioc buffer
3342 * and call strdoioctl() to do the work.
3343 */
3344 if (stp->sd_flag & STRHUP)
3345 return (ENXIO);
3346 strioc.ic_cmd = cmd;
3347 strioc.ic_timout = INFTIM;
3348
3349 switch (cmd) {
3350
3351 case TCXONC:
3352 case TCSBRK:
3353 case TCFLSH:
3354 case TCDSET:
3355 {
3356 int native_arg = (int)arg;
3357 strioc.ic_len = sizeof (int);
3358 strioc.ic_dp = (char *)&native_arg;
3359 return (strdoioctl(stp, &strioc, flag,
3360 K_TO_K, crp, rvalp));
3361 }
3362
3363 case TCSETA:
3364 case TCSETAW:
3365 case TCSETAF:
3366 strioc.ic_len = sizeof (struct termio);
3367 strioc.ic_dp = (char *)arg;
3368 return (strdoioctl(stp, &strioc, flag,
3369 copyflag, crp, rvalp));
3370
3371 case TCSETS:
3372 case TCSETSW:
3373 case TCSETSF:
3374 strioc.ic_len = sizeof (struct termios);
3375 strioc.ic_dp = (char *)arg;
3376 return (strdoioctl(stp, &strioc, flag,
3377 copyflag, crp, rvalp));
3378
3379 case LDSETT:
3380 strioc.ic_len = sizeof (struct termcb);
3381 strioc.ic_dp = (char *)arg;
3382 return (strdoioctl(stp, &strioc, flag,
3383 copyflag, crp, rvalp));
3384
3385 case TIOCSETP:
3386 strioc.ic_len = sizeof (struct sgttyb);
3387 strioc.ic_dp = (char *)arg;
3388 return (strdoioctl(stp, &strioc, flag,
3389 copyflag, crp, rvalp));
3390
3391 case TIOCSTI:
3392 if ((flag & FREAD) == 0 &&
3393 secpolicy_sti(crp) != 0) {
3394 return (EPERM);
3395 }
3396 mutex_enter(&stp->sd_lock);
3397 mutex_enter(&curproc->p_splock);
3398 if (stp->sd_sidp != curproc->p_sessp->s_sidp &&
3399 secpolicy_sti(crp) != 0) {
3400 mutex_exit(&curproc->p_splock);
3401 mutex_exit(&stp->sd_lock);
3402 return (EACCES);
3403 }
3404 mutex_exit(&curproc->p_splock);
3405 mutex_exit(&stp->sd_lock);
3406
3407 strioc.ic_len = sizeof (char);
3408 strioc.ic_dp = (char *)arg;
3409 return (strdoioctl(stp, &strioc, flag,
3410 copyflag, crp, rvalp));
3411
3412 case TIOCSWINSZ:
3413 strioc.ic_len = sizeof (struct winsize);
3414 strioc.ic_dp = (char *)arg;
3415 return (strdoioctl(stp, &strioc, flag,
3416 copyflag, crp, rvalp));
3417
3418 case TIOCSSIZE:
3419 strioc.ic_len = sizeof (struct ttysize);
3420 strioc.ic_dp = (char *)arg;
3421 return (strdoioctl(stp, &strioc, flag,
3422 copyflag, crp, rvalp));
3423
3424 case TIOCSSOFTCAR:
3425 case KIOCTRANS:
3426 case KIOCTRANSABLE:
3427 case KIOCCMD:
3428 case KIOCSDIRECT:
3429 case KIOCSCOMPAT:
3430 case KIOCSKABORTEN:
3431 case KIOCSRPTDELAY:
3432 case KIOCSRPTRATE:
3433 case VUIDSFORMAT:
3434 case TIOCSPPS:
3435 strioc.ic_len = sizeof (int);
3436 strioc.ic_dp = (char *)arg;
3437 return (strdoioctl(stp, &strioc, flag,
3438 copyflag, crp, rvalp));
3439
3440 case KIOCSETKEY:
3441 case KIOCGETKEY:
3442 strioc.ic_len = sizeof (struct kiockey);
3443 strioc.ic_dp = (char *)arg;
3444 return (strdoioctl(stp, &strioc, flag,
3445 copyflag, crp, rvalp));
3446
3447 case KIOCSKEY:
3448 case KIOCGKEY:
3449 strioc.ic_len = sizeof (struct kiockeymap);
3450 strioc.ic_dp = (char *)arg;
3451 return (strdoioctl(stp, &strioc, flag,
3452 copyflag, crp, rvalp));
3453
3454 case KIOCSLED:
3455 /* arg is a pointer to char */
3456 strioc.ic_len = sizeof (char);
3457 strioc.ic_dp = (char *)arg;
3458 return (strdoioctl(stp, &strioc, flag,
3459 copyflag, crp, rvalp));
3460
3461 case MSIOSETPARMS:
3462 strioc.ic_len = sizeof (Ms_parms);
3463 strioc.ic_dp = (char *)arg;
3464 return (strdoioctl(stp, &strioc, flag,
3465 copyflag, crp, rvalp));
3466
3467 case VUIDSADDR:
3468 case VUIDGADDR:
3469 strioc.ic_len = sizeof (struct vuid_addr_probe);
3470 strioc.ic_dp = (char *)arg;
3471 return (strdoioctl(stp, &strioc, flag,
3472 copyflag, crp, rvalp));
3473
3474 /*
3475 * These M_IOCTL's don't require any data to be sent
3476 * downstream, and the driver will allocate and link
3477 * on its own mblk_t upon M_IOCACK -- thus we set
3478 * ic_len to zero and set ic_dp to arg so we know
3479 * where to copyout to later.
3480 */
3481 case TIOCGSOFTCAR:
3482 case TIOCGWINSZ:
3483 case TIOCGSIZE:
3484 case KIOCGTRANS:
3485 case KIOCGTRANSABLE:
3486 case KIOCTYPE:
3487 case KIOCGDIRECT:
3488 case KIOCGCOMPAT:
3489 case KIOCLAYOUT:
3490 case KIOCGLED:
3491 case MSIOGETPARMS:
3492 case MSIOBUTTONS:
3493 case VUIDGFORMAT:
3494 case TIOCGPPS:
3495 case TIOCGPPSEV:
3496 case TCGETA:
3497 case TCGETS:
3498 case LDGETT:
3499 case TIOCGETP:
3500 case KIOCGRPTDELAY:
3501 case KIOCGRPTRATE:
3502 strioc.ic_len = 0;
3503 strioc.ic_dp = (char *)arg;
3504 return (strdoioctl(stp, &strioc, flag,
3505 copyflag, crp, rvalp));
3506 }
3507 }
3508
3509 /*
3510 * Unknown cmd - send it down as a transparent ioctl.
3511 */
3512 strioc.ic_cmd = cmd;
3513 strioc.ic_timout = INFTIM;
3514 strioc.ic_len = TRANSPARENT;
3515 strioc.ic_dp = (char *)&arg;
3516
3517 return (strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp));
3518
3519 case I_STR:
3520 /*
3521 * Stream ioctl. Read in an strioctl buffer from the user
3522 * along with any data specified and send it downstream.
3523 * Strdoioctl will wait allow only one ioctl message at
3524 * a time, and waits for the acknowledgement.
3525 */
3526
3527 if (stp->sd_flag & STRHUP)
3528 return (ENXIO);
3529
3530 error = strcopyin_strioctl((void *)arg, &strioc, flag,
3531 copyflag);
3532 if (error != 0)
3533 return (error);
3534
3535 if ((strioc.ic_len < 0) || (strioc.ic_timout < -1))
3536 return (EINVAL);
3537
3538 access = job_control_type(strioc.ic_cmd);
3539 mutex_enter(&stp->sd_lock);
3540 if ((access != -1) &&
3541 ((error = i_straccess(stp, access)) != 0)) {
3542 mutex_exit(&stp->sd_lock);
3543 return (error);
3544 }
3545 mutex_exit(&stp->sd_lock);
3546
3547 /*
3548 * The I_STR facility provides a trap door for malicious
3549 * code to send down bogus streamio(7I) ioctl commands to
3550 * unsuspecting STREAMS modules and drivers which expect to
3551 * only get these messages from the stream head.
3552 * Explicitly prohibit any streamio ioctls which can be
3553 * passed downstream by the stream head. Note that we do
3554 * not block all streamio ioctls because the ioctl
3555 * numberspace is not well managed and thus it's possible
3556 * that a module or driver's ioctl numbers may accidentally
3557 * collide with them.
3558 */
3559 switch (strioc.ic_cmd) {
3560 case I_LINK:
3561 case I_PLINK:
3562 case I_UNLINK:
3563 case I_PUNLINK:
3564 case _I_GETPEERCRED:
3565 case _I_PLINK_LH:
3566 return (EINVAL);
3567 }
3568
3569 error = strdoioctl(stp, &strioc, flag, copyflag, crp, rvalp);
3570 if (error == 0) {
3571 error = strcopyout_strioctl(&strioc, (void *)arg,
3572 flag, copyflag);
3573 }
3574 return (error);
3575
3576 case _I_CMD:
3577 /*
3578 * Like I_STR, but without using M_IOC* messages and without
3579 * copyins/copyouts beyond the passed-in argument.
3580 */
3581 if (stp->sd_flag & STRHUP)
3582 return (ENXIO);
3583
3584 if ((scp = kmem_alloc(sizeof (strcmd_t), KM_NOSLEEP)) == NULL)
3585 return (ENOMEM);
3586
3587 if (copyin((void *)arg, scp, sizeof (strcmd_t))) {
3588 kmem_free(scp, sizeof (strcmd_t));
3589 return (EFAULT);
3590 }
3591
3592 access = job_control_type(scp->sc_cmd);
3593 mutex_enter(&stp->sd_lock);
3594 if (access != -1 && (error = i_straccess(stp, access)) != 0) {
3595 mutex_exit(&stp->sd_lock);
3596 kmem_free(scp, sizeof (strcmd_t));
3597 return (error);
3598 }
3599 mutex_exit(&stp->sd_lock);
3600
3601 *rvalp = 0;
3602 if ((error = strdocmd(stp, scp, crp)) == 0) {
3603 if (copyout(scp, (void *)arg, sizeof (strcmd_t)))
3604 error = EFAULT;
3605 }
3606 kmem_free(scp, sizeof (strcmd_t));
3607 return (error);
3608
3609 case I_NREAD:
3610 /*
3611 * Return number of bytes of data in first message
3612 * in queue in "arg" and return the number of messages
3613 * in queue in return value.
3614 */
3615 {
3616 size_t size;
3617 int retval;
3618 int count = 0;
3619
3620 mutex_enter(QLOCK(rdq));
3621
3622 size = msgdsize(rdq->q_first);
3623 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3624 count++;
3625
3626 mutex_exit(QLOCK(rdq));
3627 if (stp->sd_struiordq) {
3628 infod_t infod;
3629
3630 infod.d_cmd = INFOD_COUNT;
3631 infod.d_count = 0;
3632 if (count == 0) {
3633 infod.d_cmd |= INFOD_FIRSTBYTES;
3634 infod.d_bytes = 0;
3635 }
3636 infod.d_res = 0;
3637 (void) infonext(rdq, &infod);
3638 count += infod.d_count;
3639 if (infod.d_res & INFOD_FIRSTBYTES)
3640 size = infod.d_bytes;
3641 }
3642
3643 /*
3644 * Drop down from size_t to the "int" required by the
3645 * interface. Cap at INT_MAX.
3646 */
3647 retval = MIN(size, INT_MAX);
3648 error = strcopyout(&retval, (void *)arg, sizeof (retval),
3649 copyflag);
3650 if (!error)
3651 *rvalp = count;
3652 return (error);
3653 }
3654
3655 case FIONREAD:
3656 /*
3657 * Return number of bytes of data in all data messages
3658 * in queue in "arg".
3659 */
3660 {
3661 size_t size = 0;
3662 int retval;
3663
3664 mutex_enter(QLOCK(rdq));
3665 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3666 size += msgdsize(mp);
3667 mutex_exit(QLOCK(rdq));
3668
3669 if (stp->sd_struiordq) {
3670 infod_t infod;
3671
3672 infod.d_cmd = INFOD_BYTES;
3673 infod.d_res = 0;
3674 infod.d_bytes = 0;
3675 (void) infonext(rdq, &infod);
3676 size += infod.d_bytes;
3677 }
3678
3679 /*
3680 * Drop down from size_t to the "int" required by the
3681 * interface. Cap at INT_MAX.
3682 */
3683 retval = MIN(size, INT_MAX);
3684 error = strcopyout(&retval, (void *)arg, sizeof (retval),
3685 copyflag);
3686
3687 *rvalp = 0;
3688 return (error);
3689 }
3690 case FIORDCHK:
3691 /*
3692 * FIORDCHK does not use arg value (like FIONREAD),
3693 * instead a count is returned. I_NREAD value may
3694 * not be accurate but safe. The real thing to do is
3695 * to add the msgdsizes of all data messages until
3696 * a non-data message.
3697 */
3698 {
3699 size_t size = 0;
3700
3701 mutex_enter(QLOCK(rdq));
3702 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
3703 size += msgdsize(mp);
3704 mutex_exit(QLOCK(rdq));
3705
3706 if (stp->sd_struiordq) {
3707 infod_t infod;
3708
3709 infod.d_cmd = INFOD_BYTES;
3710 infod.d_res = 0;
3711 infod.d_bytes = 0;
3712 (void) infonext(rdq, &infod);
3713 size += infod.d_bytes;
3714 }
3715
3716 /*
3717 * Since ioctl returns an int, and memory sizes under
3718 * LP64 may not fit, we return INT_MAX if the count was
3719 * actually greater.
3720 */
3721 *rvalp = MIN(size, INT_MAX);
3722 return (0);
3723 }
3724
3725 case I_FIND:
3726 /*
3727 * Get module name.
3728 */
3729 {
3730 char mname[FMNAMESZ + 1];
3731 queue_t *q;
3732
3733 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
3734 mname, FMNAMESZ + 1, NULL);
3735 if (error)
3736 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3737
3738 /*
3739 * Return EINVAL if we're handed a bogus module name.
3740 */
3741 if (fmodsw_find(mname, FMODSW_LOAD) == NULL) {
3742 TRACE_0(TR_FAC_STREAMS_FR,
3743 TR_I_CANT_FIND, "couldn't I_FIND");
3744 return (EINVAL);
3745 }
3746
3747 *rvalp = 0;
3748
3749 /* Look downstream to see if module is there. */
3750 claimstr(stp->sd_wrq);
3751 for (q = stp->sd_wrq->q_next; q; q = q->q_next) {
3752 if (q->q_flag & QREADR) {
3753 q = NULL;
3754 break;
3755 }
3756 if (strcmp(mname, Q2NAME(q)) == 0)
3757 break;
3758 }
3759 releasestr(stp->sd_wrq);
3760
3761 *rvalp = (q ? 1 : 0);
3762 return (error);
3763 }
3764
3765 case I_PUSH:
3766 case __I_PUSH_NOCTTY:
3767 /*
3768 * Push a module.
3769 * For the case __I_PUSH_NOCTTY push a module but
3770 * do not allocate controlling tty. See bugid 4025044
3771 */
3772
3773 {
3774 char mname[FMNAMESZ + 1];
3775 fmodsw_impl_t *fp;
3776 dev_t dummydev;
3777
3778 if (stp->sd_flag & STRHUP)
3779 return (ENXIO);
3780
3781 /*
3782 * Get module name and look up in fmodsw.
3783 */
3784 error = (copyflag & U_TO_K ? copyinstr : copystr)((void *)arg,
3785 mname, FMNAMESZ + 1, NULL);
3786 if (error)
3787 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
3788
3789 if ((fp = fmodsw_find(mname, FMODSW_HOLD | FMODSW_LOAD)) ==
3790 NULL)
3791 return (EINVAL);
3792
3793 TRACE_2(TR_FAC_STREAMS_FR, TR_I_PUSH,
3794 "I_PUSH:fp %p stp %p", fp, stp);
3795
3796 if (error = strstartplumb(stp, flag, cmd)) {
3797 fmodsw_rele(fp);
3798 return (error);
3799 }
3800
3801 /*
3802 * See if any more modules can be pushed on this stream.
3803 * Note that this check must be done after strstartplumb()
3804 * since otherwise multiple threads issuing I_PUSHes on
3805 * the same stream will be able to exceed nstrpush.
3806 */
3807 mutex_enter(&stp->sd_lock);
3808 if (stp->sd_pushcnt >= nstrpush) {
3809 fmodsw_rele(fp);
3810 strendplumb(stp);
3811 mutex_exit(&stp->sd_lock);
3812 return (EINVAL);
3813 }
3814 mutex_exit(&stp->sd_lock);
3815
3816 /*
3817 * Push new module and call its open routine
3818 * via qattach(). Modules don't change device
3819 * numbers, so just ignore dummydev here.
3820 */
3821 dummydev = vp->v_rdev;
3822 if ((error = qattach(rdq, &dummydev, 0, crp, fp,
3823 B_FALSE)) == 0) {
3824 if (vp->v_type == VCHR && /* sorry, no pipes allowed */
3825 (cmd == I_PUSH) && (stp->sd_flag & STRISTTY)) {
3826 /*
3827 * try to allocate it as a controlling terminal
3828 */
3829 (void) strctty(stp);
3830 }
3831 }
3832
3833 mutex_enter(&stp->sd_lock);
3834
3835 /*
3836 * As a performance concern we are caching the values of
3837 * q_minpsz and q_maxpsz of the module below the stream
3838 * head in the stream head.
3839 */
3840 mutex_enter(QLOCK(stp->sd_wrq->q_next));
3841 rmin = stp->sd_wrq->q_next->q_minpsz;
3842 rmax = stp->sd_wrq->q_next->q_maxpsz;
3843 mutex_exit(QLOCK(stp->sd_wrq->q_next));
3844
3845 /* Do this processing here as a performance concern */
3846 if (strmsgsz != 0) {
3847 if (rmax == INFPSZ)
3848 rmax = strmsgsz;
3849 else {
3850 if (vp->v_type == VFIFO)
3851 rmax = MIN(PIPE_BUF, rmax);
3852 else rmax = MIN(strmsgsz, rmax);
3853 }
3854 }
3855
3856 mutex_enter(QLOCK(wrq));
3857 stp->sd_qn_minpsz = rmin;
3858 stp->sd_qn_maxpsz = rmax;
3859 mutex_exit(QLOCK(wrq));
3860
3861 strendplumb(stp);
3862 mutex_exit(&stp->sd_lock);
3863 return (error);
3864 }
3865
3866 case I_POP:
3867 {
3868 queue_t *q;
3869
3870 if (stp->sd_flag & STRHUP)
3871 return (ENXIO);
3872 if (!wrq->q_next) /* for broken pipes */
3873 return (EINVAL);
3874
3875 if (error = strstartplumb(stp, flag, cmd))
3876 return (error);
3877
3878 /*
3879 * If there is an anchor on this stream and popping
3880 * the current module would attempt to pop through the
3881 * anchor, then disallow the pop unless we have sufficient
3882 * privileges; take the cheapest (non-locking) check
3883 * first.
3884 */
3885 if (secpolicy_ip_config(crp, B_TRUE) != 0 ||
3886 (stp->sd_anchorzone != crgetzoneid(crp))) {
3887 mutex_enter(&stp->sd_lock);
3888 /*
3889 * Anchors only apply if there's at least one
3890 * module on the stream (sd_pushcnt > 0).
3891 */
3892 if (stp->sd_pushcnt > 0 &&
3893 stp->sd_pushcnt == stp->sd_anchor &&
3894 stp->sd_vnode->v_type != VFIFO) {
3895 strendplumb(stp);
3896 mutex_exit(&stp->sd_lock);
3897 if (stp->sd_anchorzone != crgetzoneid(crp))
3898 return (EINVAL);
3899 /* Audit and report error */
3900 return (secpolicy_ip_config(crp, B_FALSE));
3901 }
3902 mutex_exit(&stp->sd_lock);
3903 }
3904
3905 q = wrq->q_next;
3906 TRACE_2(TR_FAC_STREAMS_FR, TR_I_POP,
3907 "I_POP:%p from %p", q, stp);
3908 if (q->q_next == NULL || (q->q_flag & (QREADR|QISDRV))) {
3909 error = EINVAL;
3910 } else {
3911 qdetach(_RD(q), 1, flag, crp, B_FALSE);
3912 error = 0;
3913 }
3914 mutex_enter(&stp->sd_lock);
3915
3916 /*
3917 * As a performance concern we are caching the values of
3918 * q_minpsz and q_maxpsz of the module below the stream
3919 * head in the stream head.
3920 */
3921 mutex_enter(QLOCK(wrq->q_next));
3922 rmin = wrq->q_next->q_minpsz;
3923 rmax = wrq->q_next->q_maxpsz;
3924 mutex_exit(QLOCK(wrq->q_next));
3925
3926 /* Do this processing here as a performance concern */
3927 if (strmsgsz != 0) {
3928 if (rmax == INFPSZ)
3929 rmax = strmsgsz;
3930 else {
3931 if (vp->v_type == VFIFO)
3932 rmax = MIN(PIPE_BUF, rmax);
3933 else rmax = MIN(strmsgsz, rmax);
3934 }
3935 }
3936
3937 mutex_enter(QLOCK(wrq));
3938 stp->sd_qn_minpsz = rmin;
3939 stp->sd_qn_maxpsz = rmax;
3940 mutex_exit(QLOCK(wrq));
3941
3942 /* If we popped through the anchor, then reset the anchor. */
3943 if (stp->sd_pushcnt < stp->sd_anchor) {
3944 stp->sd_anchor = 0;
3945 stp->sd_anchorzone = 0;
3946 }
3947 strendplumb(stp);
3948 mutex_exit(&stp->sd_lock);
3949 return (error);
3950 }
3951
3952 case _I_MUXID2FD:
3953 {
3954 /*
3955 * Create a fd for a I_PLINK'ed lower stream with a given
3956 * muxid. With the fd, application can send down ioctls,
3957 * like I_LIST, to the previously I_PLINK'ed stream. Note
3958 * that after getting the fd, the application has to do an
3959 * I_PUNLINK on the muxid before it can do any operation
3960 * on the lower stream. This is required by spec1170.
3961 *
3962 * The fd used to do this ioctl should point to the same
3963 * controlling device used to do the I_PLINK. If it uses
3964 * a different stream or an invalid muxid, I_MUXID2FD will
3965 * fail. The error code is set to EINVAL.
3966 *
3967 * The intended use of this interface is the following.
3968 * An application I_PLINK'ed a stream and exits. The fd
3969 * to the lower stream is gone. Another application
3970 * wants to get a fd to the lower stream, it uses I_MUXID2FD.
3971 */
3972 int muxid = (int)arg;
3973 int fd;
3974 linkinfo_t *linkp;
3975 struct file *fp;
3976 netstack_t *ns;
3977 str_stack_t *ss;
3978
3979 /*
3980 * Do not allow the wildcard muxid. This ioctl is not
3981 * intended to find arbitrary link.
3982 */
3983 if (muxid == 0) {
3984 return (EINVAL);
3985 }
3986
3987 ns = netstack_find_by_cred(crp);
3988 ASSERT(ns != NULL);
3989 ss = ns->netstack_str;
3990 ASSERT(ss != NULL);
3991
3992 mutex_enter(&muxifier);
3993 linkp = findlinks(vp->v_stream, muxid, LINKPERSIST, ss);
3994 if (linkp == NULL) {
3995 mutex_exit(&muxifier);
3996 netstack_rele(ss->ss_netstack);
3997 return (EINVAL);
3998 }
3999
4000 if ((fd = ufalloc(0)) == -1) {
4001 mutex_exit(&muxifier);
4002 netstack_rele(ss->ss_netstack);
4003 return (EMFILE);
4004 }
4005 fp = linkp->li_fpdown;
4006 mutex_enter(&fp->f_tlock);
4007 fp->f_count++;
4008 mutex_exit(&fp->f_tlock);
4009 mutex_exit(&muxifier);
4010 setf(fd, fp);
4011 *rvalp = fd;
4012 netstack_rele(ss->ss_netstack);
4013 return (0);
4014 }
4015
4016 case _I_INSERT:
4017 {
4018 /*
4019 * To insert a module to a given position in a stream.
4020 * In the first release, only allow privileged user
4021 * to use this ioctl. Furthermore, the insert is only allowed
4022 * below an anchor if the zoneid is the same as the zoneid
4023 * which created the anchor.
4024 *
4025 * Note that we do not plan to support this ioctl
4026 * on pipes in the first release. We want to learn more
4027 * about the implications of these ioctls before extending
4028 * their support. And we do not think these features are
4029 * valuable for pipes.
4030 */
4031 STRUCT_DECL(strmodconf, strmodinsert);
4032 char mod_name[FMNAMESZ + 1];
4033 fmodsw_impl_t *fp;
4034 dev_t dummydev;
4035 queue_t *tmp_wrq;
4036 int pos;
4037 boolean_t is_insert;
4038
4039 STRUCT_INIT(strmodinsert, flag);
4040 if (stp->sd_flag & STRHUP)
4041 return (ENXIO);
4042 if (STRMATED(stp))
4043 return (EINVAL);
4044 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
4045 return (error);
4046 if (stp->sd_anchor != 0 &&
4047 stp->sd_anchorzone != crgetzoneid(crp))
4048 return (EINVAL);
4049
4050 error = strcopyin((void *)arg, STRUCT_BUF(strmodinsert),
4051 STRUCT_SIZE(strmodinsert), copyflag);
4052 if (error)
4053 return (error);
4054
4055 /*
4056 * Get module name and look up in fmodsw.
4057 */
4058 error = (copyflag & U_TO_K ? copyinstr :
4059 copystr)(STRUCT_FGETP(strmodinsert, mod_name),
4060 mod_name, FMNAMESZ + 1, NULL);
4061 if (error)
4062 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
4063
4064 if ((fp = fmodsw_find(mod_name, FMODSW_HOLD | FMODSW_LOAD)) ==
4065 NULL)
4066 return (EINVAL);
4067
4068 if (error = strstartplumb(stp, flag, cmd)) {
4069 fmodsw_rele(fp);
4070 return (error);
4071 }
4072
4073 /*
4074 * Is this _I_INSERT just like an I_PUSH? We need to know
4075 * this because we do some optimizations if this is a
4076 * module being pushed.
4077 */
4078 pos = STRUCT_FGET(strmodinsert, pos);
4079 is_insert = (pos != 0);
4080
4081 /*
4082 * Make sure pos is valid. Even though it is not an I_PUSH,
4083 * we impose the same limit on the number of modules in a
4084 * stream.
4085 */
4086 mutex_enter(&stp->sd_lock);
4087 if (stp->sd_pushcnt >= nstrpush || pos < 0 ||
4088 pos > stp->sd_pushcnt) {
4089 fmodsw_rele(fp);
4090 strendplumb(stp);
4091 mutex_exit(&stp->sd_lock);
4092 return (EINVAL);
4093 }
4094 if (stp->sd_anchor != 0) {
4095 /*
4096 * Is this insert below the anchor?
4097 * Pushcnt hasn't been increased yet hence
4098 * we test for greater than here, and greater or
4099 * equal after qattach.
4100 */
4101 if (pos > (stp->sd_pushcnt - stp->sd_anchor) &&
4102 stp->sd_anchorzone != crgetzoneid(crp)) {
4103 fmodsw_rele(fp);
4104 strendplumb(stp);
4105 mutex_exit(&stp->sd_lock);
4106 return (EPERM);
4107 }
4108 }
4109
4110 mutex_exit(&stp->sd_lock);
4111
4112 /*
4113 * First find the correct position this module to
4114 * be inserted. We don't need to call claimstr()
4115 * as the stream should not be changing at this point.
4116 *
4117 * Insert new module and call its open routine
4118 * via qattach(). Modules don't change device
4119 * numbers, so just ignore dummydev here.
4120 */
4121 for (tmp_wrq = stp->sd_wrq; pos > 0;
4122 tmp_wrq = tmp_wrq->q_next, pos--) {
4123 ASSERT(SAMESTR(tmp_wrq));
4124 }
4125 dummydev = vp->v_rdev;
4126 if ((error = qattach(_RD(tmp_wrq), &dummydev, 0, crp,
4127 fp, is_insert)) != 0) {
4128 mutex_enter(&stp->sd_lock);
4129 strendplumb(stp);
4130 mutex_exit(&stp->sd_lock);
4131 return (error);
4132 }
4133
4134 mutex_enter(&stp->sd_lock);
4135
4136 /*
4137 * As a performance concern we are caching the values of
4138 * q_minpsz and q_maxpsz of the module below the stream
4139 * head in the stream head.
4140 */
4141 if (!is_insert) {
4142 mutex_enter(QLOCK(stp->sd_wrq->q_next));
4143 rmin = stp->sd_wrq->q_next->q_minpsz;
4144 rmax = stp->sd_wrq->q_next->q_maxpsz;
4145 mutex_exit(QLOCK(stp->sd_wrq->q_next));
4146
4147 /* Do this processing here as a performance concern */
4148 if (strmsgsz != 0) {
4149 if (rmax == INFPSZ) {
4150 rmax = strmsgsz;
4151 } else {
4152 rmax = MIN(strmsgsz, rmax);
4153 }
4154 }
4155
4156 mutex_enter(QLOCK(wrq));
4157 stp->sd_qn_minpsz = rmin;
4158 stp->sd_qn_maxpsz = rmax;
4159 mutex_exit(QLOCK(wrq));
4160 }
4161
4162 /*
4163 * Need to update the anchor value if this module is
4164 * inserted below the anchor point.
4165 */
4166 if (stp->sd_anchor != 0) {
4167 pos = STRUCT_FGET(strmodinsert, pos);
4168 if (pos >= (stp->sd_pushcnt - stp->sd_anchor))
4169 stp->sd_anchor++;
4170 }
4171
4172 strendplumb(stp);
4173 mutex_exit(&stp->sd_lock);
4174 return (0);
4175 }
4176
4177 case _I_REMOVE:
4178 {
4179 /*
4180 * To remove a module with a given name in a stream. The
4181 * caller of this ioctl needs to provide both the name and
4182 * the position of the module to be removed. This eliminates
4183 * the ambiguity of removal if a module is inserted/pushed
4184 * multiple times in a stream. In the first release, only
4185 * allow privileged user to use this ioctl.
4186 * Furthermore, the remove is only allowed
4187 * below an anchor if the zoneid is the same as the zoneid
4188 * which created the anchor.
4189 *
4190 * Note that we do not plan to support this ioctl
4191 * on pipes in the first release. We want to learn more
4192 * about the implications of these ioctls before extending
4193 * their support. And we do not think these features are
4194 * valuable for pipes.
4195 *
4196 * Also note that _I_REMOVE cannot be used to remove a
4197 * driver or the stream head.
4198 */
4199 STRUCT_DECL(strmodconf, strmodremove);
4200 queue_t *q;
4201 int pos;
4202 char mod_name[FMNAMESZ + 1];
4203 boolean_t is_remove;
4204
4205 STRUCT_INIT(strmodremove, flag);
4206 if (stp->sd_flag & STRHUP)
4207 return (ENXIO);
4208 if (STRMATED(stp))
4209 return (EINVAL);
4210 if ((error = secpolicy_net_config(crp, B_FALSE)) != 0)
4211 return (error);
4212 if (stp->sd_anchor != 0 &&
4213 stp->sd_anchorzone != crgetzoneid(crp))
4214 return (EINVAL);
4215
4216 error = strcopyin((void *)arg, STRUCT_BUF(strmodremove),
4217 STRUCT_SIZE(strmodremove), copyflag);
4218 if (error)
4219 return (error);
4220
4221 error = (copyflag & U_TO_K ? copyinstr :
4222 copystr)(STRUCT_FGETP(strmodremove, mod_name),
4223 mod_name, FMNAMESZ + 1, NULL);
4224 if (error)
4225 return ((error == ENAMETOOLONG) ? EINVAL : EFAULT);
4226
4227 if ((error = strstartplumb(stp, flag, cmd)) != 0)
4228 return (error);
4229
4230 /*
4231 * Match the name of given module to the name of module at
4232 * the given position.
4233 */
4234 pos = STRUCT_FGET(strmodremove, pos);
4235
4236 is_remove = (pos != 0);
4237 for (q = stp->sd_wrq->q_next; SAMESTR(q) && pos > 0;
4238 q = q->q_next, pos--)
4239 ;
4240 if (pos > 0 || !SAMESTR(q) ||
4241 strcmp(Q2NAME(q), mod_name) != 0) {
4242 mutex_enter(&stp->sd_lock);
4243 strendplumb(stp);
4244 mutex_exit(&stp->sd_lock);
4245 return (EINVAL);
4246 }
4247
4248 /*
4249 * If the position is at or below an anchor, then the zoneid
4250 * must match the zoneid that created the anchor.
4251 */
4252 if (stp->sd_anchor != 0) {
4253 pos = STRUCT_FGET(strmodremove, pos);
4254 if (pos >= (stp->sd_pushcnt - stp->sd_anchor) &&
4255 stp->sd_anchorzone != crgetzoneid(crp)) {
4256 mutex_enter(&stp->sd_lock);
4257 strendplumb(stp);
4258 mutex_exit(&stp->sd_lock);
4259 return (EPERM);
4260 }
4261 }
4262
4263
4264 ASSERT(!(q->q_flag & QREADR));
4265 qdetach(_RD(q), 1, flag, crp, is_remove);
4266
4267 mutex_enter(&stp->sd_lock);
4268
4269 /*
4270 * As a performance concern we are caching the values of
4271 * q_minpsz and q_maxpsz of the module below the stream
4272 * head in the stream head.
4273 */
4274 if (!is_remove) {
4275 mutex_enter(QLOCK(wrq->q_next));
4276 rmin = wrq->q_next->q_minpsz;
4277 rmax = wrq->q_next->q_maxpsz;
4278 mutex_exit(QLOCK(wrq->q_next));
4279
4280 /* Do this processing here as a performance concern */
4281 if (strmsgsz != 0) {
4282 if (rmax == INFPSZ)
4283 rmax = strmsgsz;
4284 else {
4285 if (vp->v_type == VFIFO)
4286 rmax = MIN(PIPE_BUF, rmax);
4287 else rmax = MIN(strmsgsz, rmax);
4288 }
4289 }
4290
4291 mutex_enter(QLOCK(wrq));
4292 stp->sd_qn_minpsz = rmin;
4293 stp->sd_qn_maxpsz = rmax;
4294 mutex_exit(QLOCK(wrq));
4295 }
4296
4297 /*
4298 * Need to update the anchor value if this module is removed
4299 * at or below the anchor point. If the removed module is at
4300 * the anchor point, remove the anchor for this stream if
4301 * there is no module above the anchor point. Otherwise, if
4302 * the removed module is below the anchor point, decrement the
4303 * anchor point by 1.
4304 */
4305 if (stp->sd_anchor != 0) {
4306 pos = STRUCT_FGET(strmodremove, pos);
4307 if (pos == stp->sd_pushcnt - stp->sd_anchor + 1)
4308 stp->sd_anchor = 0;
4309 else if (pos > (stp->sd_pushcnt - stp->sd_anchor + 1))
4310 stp->sd_anchor--;
4311 }
4312
4313 strendplumb(stp);
4314 mutex_exit(&stp->sd_lock);
4315 return (0);
4316 }
4317
4318 case I_ANCHOR:
4319 /*
4320 * Set the anchor position on the stream to reside at
4321 * the top module (in other words, the top module
4322 * cannot be popped). Anchors with a FIFO make no
4323 * obvious sense, so they're not allowed.
4324 */
4325 mutex_enter(&stp->sd_lock);
4326
4327 if (stp->sd_vnode->v_type == VFIFO) {
4328 mutex_exit(&stp->sd_lock);
4329 return (EINVAL);
4330 }
4331 /* Only allow the same zoneid to update the anchor */
4332 if (stp->sd_anchor != 0 &&
4333 stp->sd_anchorzone != crgetzoneid(crp)) {
4334 mutex_exit(&stp->sd_lock);
4335 return (EINVAL);
4336 }
4337 stp->sd_anchor = stp->sd_pushcnt;
4338 stp->sd_anchorzone = crgetzoneid(crp);
4339 mutex_exit(&stp->sd_lock);
4340 return (0);
4341
4342 case I_LOOK:
4343 /*
4344 * Get name of first module downstream.
4345 * If no module, return an error.
4346 */
4347 claimstr(wrq);
4348 if (_SAMESTR(wrq) && wrq->q_next->q_next != NULL) {
4349 char *name = Q2NAME(wrq->q_next);
4350
4351 error = strcopyout(name, (void *)arg, strlen(name) + 1,
4352 copyflag);
4353 releasestr(wrq);
4354 return (error);
4355 }
4356 releasestr(wrq);
4357 return (EINVAL);
4358
4359 case I_LINK:
4360 case I_PLINK:
4361 /*
4362 * Link a multiplexor.
4363 */
4364 return (mlink(vp, cmd, (int)arg, crp, rvalp, 0));
4365
4366 case _I_PLINK_LH:
4367 /*
4368 * Link a multiplexor: Call must originate from kernel.
4369 */
4370 if (kioctl)
4371 return (ldi_mlink_lh(vp, cmd, arg, crp, rvalp));
4372
4373 return (EINVAL);
4374 case I_UNLINK:
4375 case I_PUNLINK:
4376 /*
4377 * Unlink a multiplexor.
4378 * If arg is -1, unlink all links for which this is the
4379 * controlling stream. Otherwise, arg is an index number
4380 * for a link to be removed.
4381 */
4382 {
4383 struct linkinfo *linkp;
4384 int native_arg = (int)arg;
4385 int type;
4386 netstack_t *ns;
4387 str_stack_t *ss;
4388
4389 TRACE_1(TR_FAC_STREAMS_FR,
4390 TR_I_UNLINK, "I_UNLINK/I_PUNLINK:%p", stp);
4391 if (vp->v_type == VFIFO) {
4392 return (EINVAL);
4393 }
4394 if (cmd == I_UNLINK)
4395 type = LINKNORMAL;
4396 else /* I_PUNLINK */
4397 type = LINKPERSIST;
4398 if (native_arg == 0) {
4399 return (EINVAL);
4400 }
4401 ns = netstack_find_by_cred(crp);
4402 ASSERT(ns != NULL);
4403 ss = ns->netstack_str;
4404 ASSERT(ss != NULL);
4405
4406 if (native_arg == MUXID_ALL)
4407 error = munlinkall(stp, type, crp, rvalp, ss);
4408 else {
4409 mutex_enter(&muxifier);
4410 if (!(linkp = findlinks(stp, (int)arg, type, ss))) {
4411 /* invalid user supplied index number */
4412 mutex_exit(&muxifier);
4413 netstack_rele(ss->ss_netstack);
4414 return (EINVAL);
4415 }
4416 /* munlink drops the muxifier lock */
4417 error = munlink(stp, linkp, type, crp, rvalp, ss);
4418 }
4419 netstack_rele(ss->ss_netstack);
4420 return (error);
4421 }
4422
4423 case I_FLUSH:
4424 /*
4425 * send a flush message downstream
4426 * flush message can indicate
4427 * FLUSHR - flush read queue
4428 * FLUSHW - flush write queue
4429 * FLUSHRW - flush read/write queue
4430 */
4431 if (stp->sd_flag & STRHUP)
4432 return (ENXIO);
4433 if (arg & ~FLUSHRW)
4434 return (EINVAL);
4435
4436 for (;;) {
4437 if (putnextctl1(stp->sd_wrq, M_FLUSH, (int)arg)) {
4438 break;
4439 }
4440 if (error = strwaitbuf(1, BPRI_HI)) {
4441 return (error);
4442 }
4443 }
4444
4445 /*
4446 * Send down an unsupported ioctl and wait for the nack
4447 * in order to allow the M_FLUSH to propagate back
4448 * up to the stream head.
4449 * Replaces if (qready()) runqueues();
4450 */
4451 strioc.ic_cmd = -1; /* The unsupported ioctl */
4452 strioc.ic_timout = 0;
4453 strioc.ic_len = 0;
4454 strioc.ic_dp = NULL;
4455 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
4456 *rvalp = 0;
4457 return (0);
4458
4459 case I_FLUSHBAND:
4460 {
4461 struct bandinfo binfo;
4462
4463 error = strcopyin((void *)arg, &binfo, sizeof (binfo),
4464 copyflag);
4465 if (error)
4466 return (error);
4467 if (stp->sd_flag & STRHUP)
4468 return (ENXIO);
4469 if (binfo.bi_flag & ~FLUSHRW)
4470 return (EINVAL);
4471 while (!(mp = allocb(2, BPRI_HI))) {
4472 if (error = strwaitbuf(2, BPRI_HI))
4473 return (error);
4474 }
4475 mp->b_datap->db_type = M_FLUSH;
4476 *mp->b_wptr++ = binfo.bi_flag | FLUSHBAND;
4477 *mp->b_wptr++ = binfo.bi_pri;
4478 putnext(stp->sd_wrq, mp);
4479 /*
4480 * Send down an unsupported ioctl and wait for the nack
4481 * in order to allow the M_FLUSH to propagate back
4482 * up to the stream head.
4483 * Replaces if (qready()) runqueues();
4484 */
4485 strioc.ic_cmd = -1; /* The unsupported ioctl */
4486 strioc.ic_timout = 0;
4487 strioc.ic_len = 0;
4488 strioc.ic_dp = NULL;
4489 (void) strdoioctl(stp, &strioc, flag, K_TO_K, crp, rvalp);
4490 *rvalp = 0;
4491 return (0);
4492 }
4493
4494 case I_SRDOPT:
4495 /*
4496 * Set read options
4497 *
4498 * RNORM - default stream mode
4499 * RMSGN - message no discard
4500 * RMSGD - message discard
4501 * RPROTNORM - fail read with EBADMSG for M_[PC]PROTOs
4502 * RPROTDAT - convert M_[PC]PROTOs to M_DATAs
4503 * RPROTDIS - discard M_[PC]PROTOs and retain M_DATAs
4504 */
4505 if (arg & ~(RMODEMASK | RPROTMASK))
4506 return (EINVAL);
4507
4508 if ((arg & (RMSGD|RMSGN)) == (RMSGD|RMSGN))
4509 return (EINVAL);
4510
4511 mutex_enter(&stp->sd_lock);
4512 switch (arg & RMODEMASK) {
4513 case RNORM:
4514 stp->sd_read_opt &= ~(RD_MSGDIS | RD_MSGNODIS);
4515 break;
4516 case RMSGD:
4517 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGNODIS) |
4518 RD_MSGDIS;
4519 break;
4520 case RMSGN:
4521 stp->sd_read_opt = (stp->sd_read_opt & ~RD_MSGDIS) |
4522 RD_MSGNODIS;
4523 break;
4524 }
4525
4526 switch (arg & RPROTMASK) {
4527 case RPROTNORM:
4528 stp->sd_read_opt &= ~(RD_PROTDAT | RD_PROTDIS);
4529 break;
4530
4531 case RPROTDAT:
4532 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDIS) |
4533 RD_PROTDAT);
4534 break;
4535
4536 case RPROTDIS:
4537 stp->sd_read_opt = ((stp->sd_read_opt & ~RD_PROTDAT) |
4538 RD_PROTDIS);
4539 break;
4540 }
4541 mutex_exit(&stp->sd_lock);
4542 return (0);
4543
4544 case I_GRDOPT:
4545 /*
4546 * Get read option and return the value
4547 * to spot pointed to by arg
4548 */
4549 {
4550 int rdopt;
4551
4552 rdopt = ((stp->sd_read_opt & RD_MSGDIS) ? RMSGD :
4553 ((stp->sd_read_opt & RD_MSGNODIS) ? RMSGN : RNORM));
4554 rdopt |= ((stp->sd_read_opt & RD_PROTDAT) ? RPROTDAT :
4555 ((stp->sd_read_opt & RD_PROTDIS) ? RPROTDIS : RPROTNORM));
4556
4557 return (strcopyout(&rdopt, (void *)arg, sizeof (int),
4558 copyflag));
4559 }
4560
4561 case I_SERROPT:
4562 /*
4563 * Set error options
4564 *
4565 * RERRNORM - persistent read errors
4566 * RERRNONPERSIST - non-persistent read errors
4567 * WERRNORM - persistent write errors
4568 * WERRNONPERSIST - non-persistent write errors
4569 */
4570 if (arg & ~(RERRMASK | WERRMASK))
4571 return (EINVAL);
4572
4573 mutex_enter(&stp->sd_lock);
4574 switch (arg & RERRMASK) {
4575 case RERRNORM:
4576 stp->sd_flag &= ~STRDERRNONPERSIST;
4577 break;
4578 case RERRNONPERSIST:
4579 stp->sd_flag |= STRDERRNONPERSIST;
4580 break;
4581 }
4582 switch (arg & WERRMASK) {
4583 case WERRNORM:
4584 stp->sd_flag &= ~STWRERRNONPERSIST;
4585 break;
4586 case WERRNONPERSIST:
4587 stp->sd_flag |= STWRERRNONPERSIST;
4588 break;
4589 }
4590 mutex_exit(&stp->sd_lock);
4591 return (0);
4592
4593 case I_GERROPT:
4594 /*
4595 * Get error option and return the value
4596 * to spot pointed to by arg
4597 */
4598 {
4599 int erropt = 0;
4600
4601 erropt |= (stp->sd_flag & STRDERRNONPERSIST) ? RERRNONPERSIST :
4602 RERRNORM;
4603 erropt |= (stp->sd_flag & STWRERRNONPERSIST) ? WERRNONPERSIST :
4604 WERRNORM;
4605 return (strcopyout(&erropt, (void *)arg, sizeof (int),
4606 copyflag));
4607 }
4608
4609 case I_SETSIG:
4610 /*
4611 * Register the calling proc to receive the SIGPOLL
4612 * signal based on the events given in arg. If
4613 * arg is zero, remove the proc from register list.
4614 */
4615 {
4616 strsig_t *ssp, *pssp;
4617 struct pid *pidp;
4618
4619 pssp = NULL;
4620 pidp = curproc->p_pidp;
4621 /*
4622 * Hold sd_lock to prevent traversal of sd_siglist while
4623 * it is modified.
4624 */
4625 mutex_enter(&stp->sd_lock);
4626 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pidp != pidp);
4627 pssp = ssp, ssp = ssp->ss_next)
4628 ;
4629
4630 if (arg) {
4631 if (arg & ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
4632 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
4633 mutex_exit(&stp->sd_lock);
4634 return (EINVAL);
4635 }
4636 if ((arg & S_BANDURG) && !(arg & S_RDBAND)) {
4637 mutex_exit(&stp->sd_lock);
4638 return (EINVAL);
4639 }
4640
4641 /*
4642 * If proc not already registered, add it
4643 * to list.
4644 */
4645 if (!ssp) {
4646 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
4647 ssp->ss_pidp = pidp;
4648 ssp->ss_pid = pidp->pid_id;
4649 ssp->ss_next = NULL;
4650 if (pssp)
4651 pssp->ss_next = ssp;
4652 else
4653 stp->sd_siglist = ssp;
4654 mutex_enter(&pidlock);
4655 PID_HOLD(pidp);
4656 mutex_exit(&pidlock);
4657 }
4658
4659 /*
4660 * Set events.
4661 */
4662 ssp->ss_events = (int)arg;
4663 } else {
4664 /*
4665 * Remove proc from register list.
4666 */
4667 if (ssp) {
4668 mutex_enter(&pidlock);
4669 PID_RELE(pidp);
4670 mutex_exit(&pidlock);
4671 if (pssp)
4672 pssp->ss_next = ssp->ss_next;
4673 else
4674 stp->sd_siglist = ssp->ss_next;
4675 kmem_free(ssp, sizeof (strsig_t));
4676 } else {
4677 mutex_exit(&stp->sd_lock);
4678 return (EINVAL);
4679 }
4680 }
4681
4682 /*
4683 * Recalculate OR of sig events.
4684 */
4685 stp->sd_sigflags = 0;
4686 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4687 stp->sd_sigflags |= ssp->ss_events;
4688 mutex_exit(&stp->sd_lock);
4689 return (0);
4690 }
4691
4692 case I_GETSIG:
4693 /*
4694 * Return (in arg) the current registration of events
4695 * for which the calling proc is to be signaled.
4696 */
4697 {
4698 struct strsig *ssp;
4699 struct pid *pidp;
4700
4701 pidp = curproc->p_pidp;
4702 mutex_enter(&stp->sd_lock);
4703 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4704 if (ssp->ss_pidp == pidp) {
4705 error = strcopyout(&ssp->ss_events, (void *)arg,
4706 sizeof (int), copyflag);
4707 mutex_exit(&stp->sd_lock);
4708 return (error);
4709 }
4710 mutex_exit(&stp->sd_lock);
4711 return (EINVAL);
4712 }
4713
4714 case I_ESETSIG:
4715 /*
4716 * Register the ss_pid to receive the SIGPOLL
4717 * signal based on the events is ss_events arg. If
4718 * ss_events is zero, remove the proc from register list.
4719 */
4720 {
4721 struct strsig *ssp, *pssp;
4722 struct proc *proc;
4723 struct pid *pidp;
4724 pid_t pid;
4725 struct strsigset ss;
4726
4727 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
4728 if (error)
4729 return (error);
4730
4731 pid = ss.ss_pid;
4732
4733 if (ss.ss_events != 0) {
4734 /*
4735 * Permissions check by sending signal 0.
4736 * Note that when kill fails it does a set_errno
4737 * causing the system call to fail.
4738 */
4739 error = kill(pid, 0);
4740 if (error) {
4741 return (error);
4742 }
4743 }
4744 mutex_enter(&pidlock);
4745 if (pid == 0)
4746 proc = curproc;
4747 else if (pid < 0)
4748 proc = pgfind(-pid);
4749 else
4750 proc = prfind(pid);
4751 if (proc == NULL) {
4752 mutex_exit(&pidlock);
4753 return (ESRCH);
4754 }
4755 if (pid < 0)
4756 pidp = proc->p_pgidp;
4757 else
4758 pidp = proc->p_pidp;
4759 ASSERT(pidp);
4760 /*
4761 * Get a hold on the pid structure while referencing it.
4762 * There is a separate PID_HOLD should it be inserted
4763 * in the list below.
4764 */
4765 PID_HOLD(pidp);
4766 mutex_exit(&pidlock);
4767
4768 pssp = NULL;
4769 /*
4770 * Hold sd_lock to prevent traversal of sd_siglist while
4771 * it is modified.
4772 */
4773 mutex_enter(&stp->sd_lock);
4774 for (ssp = stp->sd_siglist; ssp && (ssp->ss_pid != pid);
4775 pssp = ssp, ssp = ssp->ss_next)
4776 ;
4777
4778 if (ss.ss_events) {
4779 if (ss.ss_events &
4780 ~(S_INPUT|S_HIPRI|S_MSG|S_HANGUP|S_ERROR|
4781 S_RDNORM|S_WRNORM|S_RDBAND|S_WRBAND|S_BANDURG)) {
4782 mutex_exit(&stp->sd_lock);
4783 mutex_enter(&pidlock);
4784 PID_RELE(pidp);
4785 mutex_exit(&pidlock);
4786 return (EINVAL);
4787 }
4788 if ((ss.ss_events & S_BANDURG) &&
4789 !(ss.ss_events & S_RDBAND)) {
4790 mutex_exit(&stp->sd_lock);
4791 mutex_enter(&pidlock);
4792 PID_RELE(pidp);
4793 mutex_exit(&pidlock);
4794 return (EINVAL);
4795 }
4796
4797 /*
4798 * If proc not already registered, add it
4799 * to list.
4800 */
4801 if (!ssp) {
4802 ssp = kmem_alloc(sizeof (strsig_t), KM_SLEEP);
4803 ssp->ss_pidp = pidp;
4804 ssp->ss_pid = pid;
4805 ssp->ss_next = NULL;
4806 if (pssp)
4807 pssp->ss_next = ssp;
4808 else
4809 stp->sd_siglist = ssp;
4810 mutex_enter(&pidlock);
4811 PID_HOLD(pidp);
4812 mutex_exit(&pidlock);
4813 }
4814
4815 /*
4816 * Set events.
4817 */
4818 ssp->ss_events = ss.ss_events;
4819 } else {
4820 /*
4821 * Remove proc from register list.
4822 */
4823 if (ssp) {
4824 mutex_enter(&pidlock);
4825 PID_RELE(pidp);
4826 mutex_exit(&pidlock);
4827 if (pssp)
4828 pssp->ss_next = ssp->ss_next;
4829 else
4830 stp->sd_siglist = ssp->ss_next;
4831 kmem_free(ssp, sizeof (strsig_t));
4832 } else {
4833 mutex_exit(&stp->sd_lock);
4834 mutex_enter(&pidlock);
4835 PID_RELE(pidp);
4836 mutex_exit(&pidlock);
4837 return (EINVAL);
4838 }
4839 }
4840
4841 /*
4842 * Recalculate OR of sig events.
4843 */
4844 stp->sd_sigflags = 0;
4845 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4846 stp->sd_sigflags |= ssp->ss_events;
4847 mutex_exit(&stp->sd_lock);
4848 mutex_enter(&pidlock);
4849 PID_RELE(pidp);
4850 mutex_exit(&pidlock);
4851 return (0);
4852 }
4853
4854 case I_EGETSIG:
4855 /*
4856 * Return (in arg) the current registration of events
4857 * for which the calling proc is to be signaled.
4858 */
4859 {
4860 struct strsig *ssp;
4861 struct proc *proc;
4862 pid_t pid;
4863 struct pid *pidp;
4864 struct strsigset ss;
4865
4866 error = strcopyin((void *)arg, &ss, sizeof (ss), copyflag);
4867 if (error)
4868 return (error);
4869
4870 pid = ss.ss_pid;
4871 mutex_enter(&pidlock);
4872 if (pid == 0)
4873 proc = curproc;
4874 else if (pid < 0)
4875 proc = pgfind(-pid);
4876 else
4877 proc = prfind(pid);
4878 if (proc == NULL) {
4879 mutex_exit(&pidlock);
4880 return (ESRCH);
4881 }
4882 if (pid < 0)
4883 pidp = proc->p_pgidp;
4884 else
4885 pidp = proc->p_pidp;
4886
4887 /* Prevent the pidp from being reassigned */
4888 PID_HOLD(pidp);
4889 mutex_exit(&pidlock);
4890
4891 mutex_enter(&stp->sd_lock);
4892 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
4893 if (ssp->ss_pid == pid) {
4894 ss.ss_pid = ssp->ss_pid;
4895 ss.ss_events = ssp->ss_events;
4896 error = strcopyout(&ss, (void *)arg,
4897 sizeof (struct strsigset), copyflag);
4898 mutex_exit(&stp->sd_lock);
4899 mutex_enter(&pidlock);
4900 PID_RELE(pidp);
4901 mutex_exit(&pidlock);
4902 return (error);
4903 }
4904 mutex_exit(&stp->sd_lock);
4905 mutex_enter(&pidlock);
4906 PID_RELE(pidp);
4907 mutex_exit(&pidlock);
4908 return (EINVAL);
4909 }
4910
4911 case I_PEEK:
4912 {
4913 STRUCT_DECL(strpeek, strpeek);
4914 size_t n;
4915 mblk_t *fmp, *tmp_mp = NULL;
4916
4917 STRUCT_INIT(strpeek, flag);
4918
4919 error = strcopyin((void *)arg, STRUCT_BUF(strpeek),
4920 STRUCT_SIZE(strpeek), copyflag);
4921 if (error)
4922 return (error);
4923
4924 mutex_enter(QLOCK(rdq));
4925 /*
4926 * Skip the invalid messages
4927 */
4928 for (mp = rdq->q_first; mp != NULL; mp = mp->b_next)
4929 if (mp->b_datap->db_type != M_SIG)
4930 break;
4931
4932 /*
4933 * If user has requested to peek at a high priority message
4934 * and first message is not, return 0
4935 */
4936 if (mp != NULL) {
4937 if ((STRUCT_FGET(strpeek, flags) & RS_HIPRI) &&
4938 queclass(mp) == QNORM) {
4939 *rvalp = 0;
4940 mutex_exit(QLOCK(rdq));
4941 return (0);
4942 }
4943 } else if (stp->sd_struiordq == NULL ||
4944 (STRUCT_FGET(strpeek, flags) & RS_HIPRI)) {
4945 /*
4946 * No mblks to look at at the streamhead and
4947 * 1). This isn't a synch stream or
4948 * 2). This is a synch stream but caller wants high
4949 * priority messages which is not supported by
4950 * the synch stream. (it only supports QNORM)
4951 */
4952 *rvalp = 0;
4953 mutex_exit(QLOCK(rdq));
4954 return (0);
4955 }
4956
4957 fmp = mp;
4958
4959 if (mp && mp->b_datap->db_type == M_PASSFP) {
4960 mutex_exit(QLOCK(rdq));
4961 return (EBADMSG);
4962 }
4963
4964 ASSERT(mp == NULL || mp->b_datap->db_type == M_PCPROTO ||
4965 mp->b_datap->db_type == M_PROTO ||
4966 mp->b_datap->db_type == M_DATA);
4967
4968 if (mp && mp->b_datap->db_type == M_PCPROTO) {
4969 STRUCT_FSET(strpeek, flags, RS_HIPRI);
4970 } else {
4971 STRUCT_FSET(strpeek, flags, 0);
4972 }
4973
4974
4975 if (mp && ((tmp_mp = dupmsg(mp)) == NULL)) {
4976 mutex_exit(QLOCK(rdq));
4977 return (ENOSR);
4978 }
4979 mutex_exit(QLOCK(rdq));
4980
4981 /*
4982 * set mp = tmp_mp, so that I_PEEK processing can continue.
4983 * tmp_mp is used to free the dup'd message.
4984 */
4985 mp = tmp_mp;
4986
4987 uio.uio_fmode = 0;
4988 uio.uio_extflg = UIO_COPY_CACHED;
4989 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
4990 UIO_SYSSPACE;
4991 uio.uio_limit = 0;
4992 /*
4993 * First process PROTO blocks, if any.
4994 * If user doesn't want to get ctl info by setting maxlen <= 0,
4995 * then set len to -1/0 and skip control blocks part.
4996 */
4997 if (STRUCT_FGET(strpeek, ctlbuf.maxlen) < 0)
4998 STRUCT_FSET(strpeek, ctlbuf.len, -1);
4999 else if (STRUCT_FGET(strpeek, ctlbuf.maxlen) == 0)
5000 STRUCT_FSET(strpeek, ctlbuf.len, 0);
5001 else {
5002 int ctl_part = 0;
5003
5004 iov.iov_base = STRUCT_FGETP(strpeek, ctlbuf.buf);
5005 iov.iov_len = STRUCT_FGET(strpeek, ctlbuf.maxlen);
5006 uio.uio_iov = &iov;
5007 uio.uio_resid = iov.iov_len;
5008 uio.uio_loffset = 0;
5009 uio.uio_iovcnt = 1;
5010 while (mp && mp->b_datap->db_type != M_DATA &&
5011 uio.uio_resid >= 0) {
5012 ASSERT(STRUCT_FGET(strpeek, flags) == 0 ?
5013 mp->b_datap->db_type == M_PROTO :
5014 mp->b_datap->db_type == M_PCPROTO);
5015
5016 if ((n = MIN(uio.uio_resid,
5017 mp->b_wptr - mp->b_rptr)) != 0 &&
5018 (error = uiomove((char *)mp->b_rptr, n,
5019 UIO_READ, &uio)) != 0) {
5020 freemsg(tmp_mp);
5021 return (error);
5022 }
5023 ctl_part = 1;
5024 mp = mp->b_cont;
5025 }
5026 /* No ctl message */
5027 if (ctl_part == 0)
5028 STRUCT_FSET(strpeek, ctlbuf.len, -1);
5029 else
5030 STRUCT_FSET(strpeek, ctlbuf.len,
5031 STRUCT_FGET(strpeek, ctlbuf.maxlen) -
5032 uio.uio_resid);
5033 }
5034
5035 /*
5036 * Now process DATA blocks, if any.
5037 * If user doesn't want to get data info by setting maxlen <= 0,
5038 * then set len to -1/0 and skip data blocks part.
5039 */
5040 if (STRUCT_FGET(strpeek, databuf.maxlen) < 0)
5041 STRUCT_FSET(strpeek, databuf.len, -1);
5042 else if (STRUCT_FGET(strpeek, databuf.maxlen) == 0)
5043 STRUCT_FSET(strpeek, databuf.len, 0);
5044 else {
5045 int data_part = 0;
5046
5047 iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf);
5048 iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen);
5049 uio.uio_iov = &iov;
5050 uio.uio_resid = iov.iov_len;
5051 uio.uio_loffset = 0;
5052 uio.uio_iovcnt = 1;
5053 while (mp && uio.uio_resid) {
5054 if (mp->b_datap->db_type == M_DATA) {
5055 if ((n = MIN(uio.uio_resid,
5056 mp->b_wptr - mp->b_rptr)) != 0 &&
5057 (error = uiomove((char *)mp->b_rptr,
5058 n, UIO_READ, &uio)) != 0) {
5059 freemsg(tmp_mp);
5060 return (error);
5061 }
5062 data_part = 1;
5063 }
5064 ASSERT(data_part == 0 ||
5065 mp->b_datap->db_type == M_DATA);
5066 mp = mp->b_cont;
5067 }
5068 /* No data message */
5069 if (data_part == 0)
5070 STRUCT_FSET(strpeek, databuf.len, -1);
5071 else
5072 STRUCT_FSET(strpeek, databuf.len,
5073 STRUCT_FGET(strpeek, databuf.maxlen) -
5074 uio.uio_resid);
5075 }
5076 freemsg(tmp_mp);
5077
5078 /*
5079 * It is a synch stream and user wants to get
5080 * data (maxlen > 0).
5081 * uio setup is done by the codes that process DATA
5082 * blocks above.
5083 */
5084 if ((fmp == NULL) && STRUCT_FGET(strpeek, databuf.maxlen) > 0) {
5085 infod_t infod;
5086
5087 infod.d_cmd = INFOD_COPYOUT;
5088 infod.d_res = 0;
5089 infod.d_uiop = &uio;
5090 error = infonext(rdq, &infod);
5091 if (error == EINVAL || error == EBUSY)
5092 error = 0;
5093 if (error)
5094 return (error);
5095 STRUCT_FSET(strpeek, databuf.len, STRUCT_FGET(strpeek,
5096 databuf.maxlen) - uio.uio_resid);
5097 if (STRUCT_FGET(strpeek, databuf.len) == 0) {
5098 /*
5099 * No data found by the infonext().
5100 */
5101 STRUCT_FSET(strpeek, databuf.len, -1);
5102 }
5103 }
5104 error = strcopyout(STRUCT_BUF(strpeek), (void *)arg,
5105 STRUCT_SIZE(strpeek), copyflag);
5106 if (error) {
5107 return (error);
5108 }
5109 /*
5110 * If there is no message retrieved, set return code to 0
5111 * otherwise, set it to 1.
5112 */
5113 if (STRUCT_FGET(strpeek, ctlbuf.len) == -1 &&
5114 STRUCT_FGET(strpeek, databuf.len) == -1)
5115 *rvalp = 0;
5116 else
5117 *rvalp = 1;
5118 return (0);
5119 }
5120
5121 case I_FDINSERT:
5122 {
5123 STRUCT_DECL(strfdinsert, strfdinsert);
5124 struct file *resftp;
5125 struct stdata *resstp;
5126 t_uscalar_t ival;
5127 ssize_t msgsize;
5128 struct strbuf mctl;
5129
5130 STRUCT_INIT(strfdinsert, flag);
5131 if (stp->sd_flag & STRHUP)
5132 return (ENXIO);
5133 /*
5134 * STRDERR, STWRERR and STPLEX tested above.
5135 */
5136 error = strcopyin((void *)arg, STRUCT_BUF(strfdinsert),
5137 STRUCT_SIZE(strfdinsert), copyflag);
5138 if (error)
5139 return (error);
5140
5141 if (STRUCT_FGET(strfdinsert, offset) < 0 ||
5142 (STRUCT_FGET(strfdinsert, offset) %
5143 sizeof (t_uscalar_t)) != 0)
5144 return (EINVAL);
5145 if ((resftp = getf(STRUCT_FGET(strfdinsert, fildes))) != NULL) {
5146 if ((resstp = resftp->f_vnode->v_stream) == NULL) {
5147 releasef(STRUCT_FGET(strfdinsert, fildes));
5148 return (EINVAL);
5149 }
5150 } else
5151 return (EINVAL);
5152
5153 mutex_enter(&resstp->sd_lock);
5154 if (resstp->sd_flag & (STRDERR|STWRERR|STRHUP|STPLEX)) {
5155 error = strgeterr(resstp,
5156 STRDERR|STWRERR|STRHUP|STPLEX, 0);
5157 if (error != 0) {
5158 mutex_exit(&resstp->sd_lock);
5159 releasef(STRUCT_FGET(strfdinsert, fildes));
5160 return (error);
5161 }
5162 }
5163 mutex_exit(&resstp->sd_lock);
5164
5165 #ifdef _ILP32
5166 {
5167 queue_t *q;
5168 queue_t *mate = NULL;
5169
5170 /* get read queue of stream terminus */
5171 claimstr(resstp->sd_wrq);
5172 for (q = resstp->sd_wrq->q_next; q->q_next != NULL;
5173 q = q->q_next)
5174 if (!STRMATED(resstp) && STREAM(q) != resstp &&
5175 mate == NULL) {
5176 ASSERT(q->q_qinfo->qi_srvp);
5177 ASSERT(_OTHERQ(q)->q_qinfo->qi_srvp);
5178 claimstr(q);
5179 mate = q;
5180 }
5181 q = _RD(q);
5182 if (mate)
5183 releasestr(mate);
5184 releasestr(resstp->sd_wrq);
5185 ival = (t_uscalar_t)q;
5186 }
5187 #else
5188 ival = (t_uscalar_t)getminor(resftp->f_vnode->v_rdev);
5189 #endif /* _ILP32 */
5190
5191 if (STRUCT_FGET(strfdinsert, ctlbuf.len) <
5192 STRUCT_FGET(strfdinsert, offset) + sizeof (t_uscalar_t)) {
5193 releasef(STRUCT_FGET(strfdinsert, fildes));
5194 return (EINVAL);
5195 }
5196
5197 /*
5198 * Check for legal flag value.
5199 */
5200 if (STRUCT_FGET(strfdinsert, flags) & ~RS_HIPRI) {
5201 releasef(STRUCT_FGET(strfdinsert, fildes));
5202 return (EINVAL);
5203 }
5204
5205 /* get these values from those cached in the stream head */
5206 mutex_enter(QLOCK(stp->sd_wrq));
5207 rmin = stp->sd_qn_minpsz;
5208 rmax = stp->sd_qn_maxpsz;
5209 mutex_exit(QLOCK(stp->sd_wrq));
5210
5211 /*
5212 * Make sure ctl and data sizes together fall within
5213 * the limits of the max and min receive packet sizes
5214 * and do not exceed system limit. A negative data
5215 * length means that no data part is to be sent.
5216 */
5217 ASSERT((rmax >= 0) || (rmax == INFPSZ));
5218 if (rmax == 0) {
5219 releasef(STRUCT_FGET(strfdinsert, fildes));
5220 return (ERANGE);
5221 }
5222 if ((msgsize = STRUCT_FGET(strfdinsert, databuf.len)) < 0)
5223 msgsize = 0;
5224 if ((msgsize < rmin) ||
5225 ((msgsize > rmax) && (rmax != INFPSZ)) ||
5226 (STRUCT_FGET(strfdinsert, ctlbuf.len) > strctlsz)) {
5227 releasef(STRUCT_FGET(strfdinsert, fildes));
5228 return (ERANGE);
5229 }
5230
5231 mutex_enter(&stp->sd_lock);
5232 while (!(STRUCT_FGET(strfdinsert, flags) & RS_HIPRI) &&
5233 !canputnext(stp->sd_wrq)) {
5234 if ((error = strwaitq(stp, WRITEWAIT, (ssize_t)0,
5235 flag, -1, &done)) != 0 || done) {
5236 mutex_exit(&stp->sd_lock);
5237 releasef(STRUCT_FGET(strfdinsert, fildes));
5238 return (error);
5239 }
5240 if ((error = i_straccess(stp, access)) != 0) {
5241 mutex_exit(&stp->sd_lock);
5242 releasef(
5243 STRUCT_FGET(strfdinsert, fildes));
5244 return (error);
5245 }
5246 }
5247 mutex_exit(&stp->sd_lock);
5248
5249 /*
5250 * Copy strfdinsert.ctlbuf into native form of
5251 * ctlbuf to pass down into strmakemsg().
5252 */
5253 mctl.maxlen = STRUCT_FGET(strfdinsert, ctlbuf.maxlen);
5254 mctl.len = STRUCT_FGET(strfdinsert, ctlbuf.len);
5255 mctl.buf = STRUCT_FGETP(strfdinsert, ctlbuf.buf);
5256
5257 iov.iov_base = STRUCT_FGETP(strfdinsert, databuf.buf);
5258 iov.iov_len = STRUCT_FGET(strfdinsert, databuf.len);
5259 uio.uio_iov = &iov;
5260 uio.uio_iovcnt = 1;
5261 uio.uio_loffset = 0;
5262 uio.uio_segflg = (copyflag == U_TO_K) ? UIO_USERSPACE :
5263 UIO_SYSSPACE;
5264 uio.uio_fmode = 0;
5265 uio.uio_extflg = UIO_COPY_CACHED;
5266 uio.uio_resid = iov.iov_len;
5267 if ((error = strmakemsg(&mctl,
5268 &msgsize, &uio, stp,
5269 STRUCT_FGET(strfdinsert, flags), &mp)) != 0 || !mp) {
5270 STRUCT_FSET(strfdinsert, databuf.len, msgsize);
5271 releasef(STRUCT_FGET(strfdinsert, fildes));
5272 return (error);
5273 }
5274
5275 STRUCT_FSET(strfdinsert, databuf.len, msgsize);
5276
5277 /*
5278 * Place the possibly reencoded queue pointer 'offset' bytes
5279 * from the start of the control portion of the message.
5280 */
5281 *((t_uscalar_t *)(mp->b_rptr +
5282 STRUCT_FGET(strfdinsert, offset))) = ival;
5283
5284 /*
5285 * Put message downstream.
5286 */
5287 stream_willservice(stp);
5288 putnext(stp->sd_wrq, mp);
5289 stream_runservice(stp);
5290 releasef(STRUCT_FGET(strfdinsert, fildes));
5291 return (error);
5292 }
5293
5294 case I_SENDFD:
5295 {
5296 struct file *fp;
5297
5298 if ((fp = getf((int)arg)) == NULL)
5299 return (EBADF);
5300 error = do_sendfp(stp, fp, crp);
5301 if (auditing) {
5302 audit_fdsend((int)arg, fp, error);
5303 }
5304 releasef((int)arg);
5305 return (error);
5306 }
5307
5308 case I_RECVFD:
5309 case I_E_RECVFD:
5310 {
5311 struct k_strrecvfd *srf;
5312 int i, fd;
5313
5314 mutex_enter(&stp->sd_lock);
5315 while (!(mp = getq(rdq))) {
5316 if (stp->sd_flag & (STRHUP|STREOF)) {
5317 mutex_exit(&stp->sd_lock);
5318 return (ENXIO);
5319 }
5320 if ((error = strwaitq(stp, GETWAIT, (ssize_t)0,
5321 flag, -1, &done)) != 0 || done) {
5322 mutex_exit(&stp->sd_lock);
5323 return (error);
5324 }
5325 if ((error = i_straccess(stp, access)) != 0) {
5326 mutex_exit(&stp->sd_lock);
5327 return (error);
5328 }
5329 }
5330 if (mp->b_datap->db_type != M_PASSFP) {
5331 putback(stp, rdq, mp, mp->b_band);
5332 mutex_exit(&stp->sd_lock);
5333 return (EBADMSG);
5334 }
5335 mutex_exit(&stp->sd_lock);
5336
5337 srf = (struct k_strrecvfd *)mp->b_rptr;
5338 if ((fd = ufalloc(0)) == -1) {
5339 mutex_enter(&stp->sd_lock);
5340 putback(stp, rdq, mp, mp->b_band);
5341 mutex_exit(&stp->sd_lock);
5342 return (EMFILE);
5343 }
5344 if (cmd == I_RECVFD) {
5345 struct o_strrecvfd ostrfd;
5346
5347 /* check to see if uid/gid values are too large. */
5348
5349 if (srf->uid > (o_uid_t)USHRT_MAX ||
5350 srf->gid > (o_gid_t)USHRT_MAX) {
5351 mutex_enter(&stp->sd_lock);
5352 putback(stp, rdq, mp, mp->b_band);
5353 mutex_exit(&stp->sd_lock);
5354 setf(fd, NULL); /* release fd entry */
5355 return (EOVERFLOW);
5356 }
5357
5358 ostrfd.fd = fd;
5359 ostrfd.uid = (o_uid_t)srf->uid;
5360 ostrfd.gid = (o_gid_t)srf->gid;
5361
5362 /* Null the filler bits */
5363 for (i = 0; i < 8; i++)
5364 ostrfd.fill[i] = 0;
5365
5366 error = strcopyout(&ostrfd, (void *)arg,
5367 sizeof (struct o_strrecvfd), copyflag);
5368 } else { /* I_E_RECVFD */
5369 struct strrecvfd strfd;
5370
5371 strfd.fd = fd;
5372 strfd.uid = srf->uid;
5373 strfd.gid = srf->gid;
5374
5375 /* null the filler bits */
5376 for (i = 0; i < 8; i++)
5377 strfd.fill[i] = 0;
5378
5379 error = strcopyout(&strfd, (void *)arg,
5380 sizeof (struct strrecvfd), copyflag);
5381 }
5382
5383 if (error) {
5384 setf(fd, NULL); /* release fd entry */
5385 mutex_enter(&stp->sd_lock);
5386 putback(stp, rdq, mp, mp->b_band);
5387 mutex_exit(&stp->sd_lock);
5388 return (error);
5389 }
5390 if (auditing) {
5391 audit_fdrecv(fd, srf->fp);
5392 }
5393
5394 /*
5395 * Always increment f_count since the freemsg() below will
5396 * always call free_passfp() which performs a closef().
5397 */
5398 mutex_enter(&srf->fp->f_tlock);
5399 srf->fp->f_count++;
5400 mutex_exit(&srf->fp->f_tlock);
5401 setf(fd, srf->fp);
5402 freemsg(mp);
5403 return (0);
5404 }
5405
5406 case I_SWROPT:
5407 /*
5408 * Set/clear the write options. arg is a bit
5409 * mask with any of the following bits set...
5410 * SNDZERO - send zero length message
5411 * SNDPIPE - send sigpipe to process if
5412 * sd_werror is set and process is
5413 * doing a write or putmsg.
5414 * The new stream head write options should reflect
5415 * what is in arg.
5416 */
5417 if (arg & ~(SNDZERO|SNDPIPE))
5418 return (EINVAL);
5419
5420 mutex_enter(&stp->sd_lock);
5421 stp->sd_wput_opt &= ~(SW_SIGPIPE|SW_SNDZERO);
5422 if (arg & SNDZERO)
5423 stp->sd_wput_opt |= SW_SNDZERO;
5424 if (arg & SNDPIPE)
5425 stp->sd_wput_opt |= SW_SIGPIPE;
5426 mutex_exit(&stp->sd_lock);
5427 return (0);
5428
5429 case I_GWROPT:
5430 {
5431 int wropt = 0;
5432
5433 if (stp->sd_wput_opt & SW_SNDZERO)
5434 wropt |= SNDZERO;
5435 if (stp->sd_wput_opt & SW_SIGPIPE)
5436 wropt |= SNDPIPE;
5437 return (strcopyout(&wropt, (void *)arg, sizeof (wropt),
5438 copyflag));
5439 }
5440
5441 case I_LIST:
5442 /*
5443 * Returns all the modules found on this stream,
5444 * upto the driver. If argument is NULL, return the
5445 * number of modules (including driver). If argument
5446 * is not NULL, copy the names into the structure
5447 * provided.
5448 */
5449
5450 {
5451 queue_t *q;
5452 char *qname;
5453 int i, nmods;
5454 struct str_mlist *mlist;
5455 STRUCT_DECL(str_list, strlist);
5456
5457 if (arg == NULL) { /* Return number of modules plus driver */
5458 if (stp->sd_vnode->v_type == VFIFO)
5459 *rvalp = stp->sd_pushcnt;
5460 else
5461 *rvalp = stp->sd_pushcnt + 1;
5462 return (0);
5463 }
5464
5465 STRUCT_INIT(strlist, flag);
5466
5467 error = strcopyin((void *)arg, STRUCT_BUF(strlist),
5468 STRUCT_SIZE(strlist), copyflag);
5469 if (error != 0)
5470 return (error);
5471
5472 mlist = STRUCT_FGETP(strlist, sl_modlist);
5473 nmods = STRUCT_FGET(strlist, sl_nmods);
5474 if (nmods <= 0)
5475 return (EINVAL);
5476
5477 claimstr(stp->sd_wrq);
5478 q = stp->sd_wrq;
5479 for (i = 0; i < nmods && _SAMESTR(q); i++, q = q->q_next) {
5480 qname = Q2NAME(q->q_next);
5481 error = strcopyout(qname, &mlist[i], strlen(qname) + 1,
5482 copyflag);
5483 if (error != 0) {
5484 releasestr(stp->sd_wrq);
5485 return (error);
5486 }
5487 }
5488 releasestr(stp->sd_wrq);
5489 return (strcopyout(&i, (void *)arg, sizeof (int), copyflag));
5490 }
5491
5492 case I_CKBAND:
5493 {
5494 queue_t *q;
5495 qband_t *qbp;
5496
5497 if ((arg < 0) || (arg >= NBAND))
5498 return (EINVAL);
5499 q = _RD(stp->sd_wrq);
5500 mutex_enter(QLOCK(q));
5501 if (arg > (int)q->q_nband) {
5502 *rvalp = 0;
5503 } else {
5504 if (arg == 0) {
5505 if (q->q_first)
5506 *rvalp = 1;
5507 else
5508 *rvalp = 0;
5509 } else {
5510 qbp = q->q_bandp;
5511 while (--arg > 0)
5512 qbp = qbp->qb_next;
5513 if (qbp->qb_first)
5514 *rvalp = 1;
5515 else
5516 *rvalp = 0;
5517 }
5518 }
5519 mutex_exit(QLOCK(q));
5520 return (0);
5521 }
5522
5523 case I_GETBAND:
5524 {
5525 int intpri;
5526 queue_t *q;
5527
5528 q = _RD(stp->sd_wrq);
5529 mutex_enter(QLOCK(q));
5530 mp = q->q_first;
5531 if (!mp) {
5532 mutex_exit(QLOCK(q));
5533 return (ENODATA);
5534 }
5535 intpri = (int)mp->b_band;
5536 error = strcopyout(&intpri, (void *)arg, sizeof (int),
5537 copyflag);
5538 mutex_exit(QLOCK(q));
5539 return (error);
5540 }
5541
5542 case I_ATMARK:
5543 {
5544 queue_t *q;
5545
5546 if (arg & ~(ANYMARK|LASTMARK))
5547 return (EINVAL);
5548 q = _RD(stp->sd_wrq);
5549 mutex_enter(&stp->sd_lock);
5550 if ((stp->sd_flag & STRATMARK) && (arg == ANYMARK)) {
5551 *rvalp = 1;
5552 } else {
5553 mutex_enter(QLOCK(q));
5554 mp = q->q_first;
5555
5556 if (mp == NULL)
5557 *rvalp = 0;
5558 else if ((arg == ANYMARK) && (mp->b_flag & MSGMARK))
5559 *rvalp = 1;
5560 else if ((arg == LASTMARK) && (mp == stp->sd_mark))
5561 *rvalp = 1;
5562 else
5563 *rvalp = 0;
5564 mutex_exit(QLOCK(q));
5565 }
5566 mutex_exit(&stp->sd_lock);
5567 return (0);
5568 }
5569
5570 case I_CANPUT:
5571 {
5572 char band;
5573
5574 if ((arg < 0) || (arg >= NBAND))
5575 return (EINVAL);
5576 band = (char)arg;
5577 *rvalp = bcanputnext(stp->sd_wrq, band);
5578 return (0);
5579 }
5580
5581 case I_SETCLTIME:
5582 {
5583 int closetime;
5584
5585 error = strcopyin((void *)arg, &closetime, sizeof (int),
5586 copyflag);
5587 if (error)
5588 return (error);
5589 if (closetime < 0)
5590 return (EINVAL);
5591
5592 stp->sd_closetime = closetime;
5593 return (0);
5594 }
5595
5596 case I_GETCLTIME:
5597 {
5598 int closetime;
5599
5600 closetime = stp->sd_closetime;
5601 return (strcopyout(&closetime, (void *)arg, sizeof (int),
5602 copyflag));
5603 }
5604
5605 case TIOCGSID:
5606 {
5607 pid_t sid;
5608
5609 mutex_enter(&stp->sd_lock);
5610 if (stp->sd_sidp == NULL) {
5611 mutex_exit(&stp->sd_lock);
5612 return (ENOTTY);
5613 }
5614 sid = stp->sd_sidp->pid_id;
5615 mutex_exit(&stp->sd_lock);
5616 return (strcopyout(&sid, (void *)arg, sizeof (pid_t),
5617 copyflag));
5618 }
5619
5620 case TIOCSPGRP:
5621 {
5622 pid_t pgrp;
5623 proc_t *q;
5624 pid_t sid, fg_pgid, bg_pgid;
5625
5626 if (error = strcopyin((void *)arg, &pgrp, sizeof (pid_t),
5627 copyflag))
5628 return (error);
5629 mutex_enter(&stp->sd_lock);
5630 mutex_enter(&pidlock);
5631 if (stp->sd_sidp != ttoproc(curthread)->p_sessp->s_sidp) {
5632 mutex_exit(&pidlock);
5633 mutex_exit(&stp->sd_lock);
5634 return (ENOTTY);
5635 }
5636 if (pgrp == stp->sd_pgidp->pid_id) {
5637 mutex_exit(&pidlock);
5638 mutex_exit(&stp->sd_lock);
5639 return (0);
5640 }
5641 if (pgrp <= 0 || pgrp >= maxpid) {
5642 mutex_exit(&pidlock);
5643 mutex_exit(&stp->sd_lock);
5644 return (EINVAL);
5645 }
5646 if ((q = pgfind(pgrp)) == NULL ||
5647 q->p_sessp != ttoproc(curthread)->p_sessp) {
5648 mutex_exit(&pidlock);
5649 mutex_exit(&stp->sd_lock);
5650 return (EPERM);
5651 }
5652 sid = stp->sd_sidp->pid_id;
5653 fg_pgid = q->p_pgrp;
5654 bg_pgid = stp->sd_pgidp->pid_id;
5655 CL_SET_PROCESS_GROUP(curthread, sid, bg_pgid, fg_pgid);
5656 PID_RELE(stp->sd_pgidp);
5657 ctty_clear_sighuped();
5658 stp->sd_pgidp = q->p_pgidp;
5659 PID_HOLD(stp->sd_pgidp);
5660 mutex_exit(&pidlock);
5661 mutex_exit(&stp->sd_lock);
5662 return (0);
5663 }
5664
5665 case TIOCGPGRP:
5666 {
5667 pid_t pgrp;
5668
5669 mutex_enter(&stp->sd_lock);
5670 if (stp->sd_sidp == NULL) {
5671 mutex_exit(&stp->sd_lock);
5672 return (ENOTTY);
5673 }
5674 pgrp = stp->sd_pgidp->pid_id;
5675 mutex_exit(&stp->sd_lock);
5676 return (strcopyout(&pgrp, (void *)arg, sizeof (pid_t),
5677 copyflag));
5678 }
5679
5680 case TIOCSCTTY:
5681 {
5682 return (strctty(stp));
5683 }
5684
5685 case TIOCNOTTY:
5686 {
5687 /* freectty() always assumes curproc. */
5688 if (freectty(B_FALSE) != 0)
5689 return (0);
5690 return (ENOTTY);
5691 }
5692
5693 case FIONBIO:
5694 case FIOASYNC:
5695 return (0); /* handled by the upper layer */
5696 case F_ASSOCI_PID:
5697 {
5698 if (crp != kcred)
5699 return (EPERM);
5700 if (is_xti_str(stp))
5701 sh_insert_pid(stp, (pid_t)arg);
5702 return (0);
5703 }
5704 case F_DASSOC_PID:
5705 {
5706 if (crp != kcred)
5707 return (EPERM);
5708 if (is_xti_str(stp))
5709 sh_remove_pid(stp, (pid_t)arg);
5710 return (0);
5711 }
5712 }
5713 }
5714
5715 /*
5716 * Custom free routine used for M_PASSFP messages.
5717 */
5718 static void
5719 free_passfp(struct k_strrecvfd *srf)
5720 {
5721 (void) closef(srf->fp);
5722 kmem_free(srf, sizeof (struct k_strrecvfd) + sizeof (frtn_t));
5723 }
5724
5725 /* ARGSUSED */
5726 int
5727 do_sendfp(struct stdata *stp, struct file *fp, struct cred *cr)
5728 {
5729 queue_t *qp, *nextqp;
5730 struct k_strrecvfd *srf;
5731 mblk_t *mp;
5732 frtn_t *frtnp;
5733 size_t bufsize;
5734 queue_t *mate = NULL;
5735 syncq_t *sq = NULL;
5736 int retval = 0;
5737
5738 if (stp->sd_flag & STRHUP)
5739 return (ENXIO);
5740
5741 claimstr(stp->sd_wrq);
5742
5743 /* Fastpath, we have a pipe, and we are already mated, use it. */
5744 if (STRMATED(stp)) {
5745 qp = _RD(stp->sd_mate->sd_wrq);
5746 claimstr(qp);
5747 mate = qp;
5748 } else { /* Not already mated. */
5749
5750 /*
5751 * Walk the stream to the end of this one.
5752 * assumes that the claimstr() will prevent
5753 * plumbing between the stream head and the
5754 * driver from changing
5755 */
5756 qp = stp->sd_wrq;
5757
5758 /*
5759 * Loop until we reach the end of this stream.
5760 * On completion, qp points to the write queue
5761 * at the end of the stream, or the read queue
5762 * at the stream head if this is a fifo.
5763 */
5764 while (((qp = qp->q_next) != NULL) && _SAMESTR(qp))
5765 ;
5766
5767 /*
5768 * Just in case we get a q_next which is NULL, but
5769 * not at the end of the stream. This is actually
5770 * broken, so we set an assert to catch it in
5771 * debug, and set an error and return if not debug.
5772 */
5773 ASSERT(qp);
5774 if (qp == NULL) {
5775 releasestr(stp->sd_wrq);
5776 return (EINVAL);
5777 }
5778
5779 /*
5780 * Enter the syncq for the driver, so (hopefully)
5781 * the queue values will not change on us.
5782 * XXXX - This will only prevent the race IFF only
5783 * the write side modifies the q_next member, and
5784 * the put procedure is protected by at least
5785 * MT_PERQ.
5786 */
5787 if ((sq = qp->q_syncq) != NULL)
5788 entersq(sq, SQ_PUT);
5789
5790 /* Now get the q_next value from this qp. */
5791 nextqp = qp->q_next;
5792
5793 /*
5794 * If nextqp exists and the other stream is different
5795 * from this one claim the stream, set the mate, and
5796 * get the read queue at the stream head of the other
5797 * stream. Assumes that nextqp was at least valid when
5798 * we got it. Hopefully the entersq of the driver
5799 * will prevent it from changing on us.
5800 */
5801 if ((nextqp != NULL) && (STREAM(nextqp) != stp)) {
5802 ASSERT(qp->q_qinfo->qi_srvp);
5803 ASSERT(_OTHERQ(qp)->q_qinfo->qi_srvp);
5804 ASSERT(_OTHERQ(qp->q_next)->q_qinfo->qi_srvp);
5805 claimstr(nextqp);
5806
5807 /* Make sure we still have a q_next */
5808 if (nextqp != qp->q_next) {
5809 releasestr(stp->sd_wrq);
5810 releasestr(nextqp);
5811 return (EINVAL);
5812 }
5813
5814 qp = _RD(STREAM(nextqp)->sd_wrq);
5815 mate = qp;
5816 }
5817 /* If we entered the synq above, leave it. */
5818 if (sq != NULL)
5819 leavesq(sq, SQ_PUT);
5820 } /* STRMATED(STP) */
5821
5822 /* XXX prevents substitution of the ops vector */
5823 if (qp->q_qinfo != &strdata && qp->q_qinfo != &fifo_strdata) {
5824 retval = EINVAL;
5825 goto out;
5826 }
5827
5828 if (qp->q_flag & QFULL) {
5829 retval = EAGAIN;
5830 goto out;
5831 }
5832
5833 /*
5834 * Since M_PASSFP messages include a file descriptor, we use
5835 * esballoc() and specify a custom free routine (free_passfp()) that
5836 * will close the descriptor as part of freeing the message. For
5837 * convenience, we stash the frtn_t right after the data block.
5838 */
5839 bufsize = sizeof (struct k_strrecvfd) + sizeof (frtn_t);
5840 srf = kmem_alloc(bufsize, KM_NOSLEEP);
5841 if (srf == NULL) {
5842 retval = EAGAIN;
5843 goto out;
5844 }
5845
5846 frtnp = (frtn_t *)(srf + 1);
5847 frtnp->free_arg = (caddr_t)srf;
5848 frtnp->free_func = free_passfp;
5849
5850 mp = esballoc((uchar_t *)srf, bufsize, BPRI_MED, frtnp);
5851 if (mp == NULL) {
5852 kmem_free(srf, bufsize);
5853 retval = EAGAIN;
5854 goto out;
5855 }
5856 mp->b_wptr += sizeof (struct k_strrecvfd);
5857 mp->b_datap->db_type = M_PASSFP;
5858
5859 srf->fp = fp;
5860 srf->uid = crgetuid(curthread->t_cred);
5861 srf->gid = crgetgid(curthread->t_cred);
5862 mutex_enter(&fp->f_tlock);
5863 fp->f_count++;
5864 mutex_exit(&fp->f_tlock);
5865
5866 put(qp, mp);
5867 out:
5868 releasestr(stp->sd_wrq);
5869 if (mate)
5870 releasestr(mate);
5871 return (retval);
5872 }
5873
5874 /*
5875 * Send an ioctl message downstream and wait for acknowledgement.
5876 * flags may be set to either U_TO_K or K_TO_K and a combination
5877 * of STR_NOERROR or STR_NOSIG
5878 * STR_NOSIG: Signals are essentially ignored or held and have
5879 * no effect for the duration of the call.
5880 * STR_NOERROR: Ignores stream head read, write and hup errors.
5881 * Additionally, if an existing ioctl times out, it is assumed
5882 * lost and and this ioctl will continue as if the previous ioctl had
5883 * finished. ETIME may be returned if this ioctl times out (i.e.
5884 * ic_timout is not INFTIM). Non-stream head errors may be returned if
5885 * the ioc_error indicates that the driver/module had problems,
5886 * an EFAULT was found when accessing user data, a lack of
5887 * resources, etc.
5888 */
5889 int
5890 strdoioctl(
5891 struct stdata *stp,
5892 struct strioctl *strioc,
5893 int fflags, /* file flags with model info */
5894 int flag,
5895 cred_t *crp,
5896 int *rvalp)
5897 {
5898 mblk_t *bp;
5899 struct iocblk *iocbp;
5900 struct copyreq *reqp;
5901 struct copyresp *resp;
5902 int id;
5903 int transparent = 0;
5904 int error = 0;
5905 int len = 0;
5906 caddr_t taddr;
5907 int copyflag = (flag & (U_TO_K | K_TO_K));
5908 int sigflag = (flag & STR_NOSIG);
5909 int errs;
5910 uint_t waitflags;
5911 boolean_t set_iocwaitne = B_FALSE;
5912
5913 ASSERT(copyflag == U_TO_K || copyflag == K_TO_K);
5914 ASSERT((fflags & FMODELS) != 0);
5915
5916 TRACE_2(TR_FAC_STREAMS_FR,
5917 TR_STRDOIOCTL,
5918 "strdoioctl:stp %p strioc %p", stp, strioc);
5919 if (strioc->ic_len == TRANSPARENT) { /* send arg in M_DATA block */
5920 transparent = 1;
5921 strioc->ic_len = sizeof (intptr_t);
5922 }
5923
5924 if (strioc->ic_len < 0 || (strmsgsz > 0 && strioc->ic_len > strmsgsz))
5925 return (EINVAL);
5926
5927 if ((bp = allocb_cred_wait(sizeof (union ioctypes), sigflag, &error,
5928 crp, curproc->p_pid)) == NULL)
5929 return (error);
5930
5931 bzero(bp->b_wptr, sizeof (union ioctypes));
5932
5933 iocbp = (struct iocblk *)bp->b_wptr;
5934 iocbp->ioc_count = strioc->ic_len;
5935 iocbp->ioc_cmd = strioc->ic_cmd;
5936 iocbp->ioc_flag = (fflags & FMODELS);
5937
5938 crhold(crp);
5939 iocbp->ioc_cr = crp;
5940 DB_TYPE(bp) = M_IOCTL;
5941 bp->b_wptr += sizeof (struct iocblk);
5942
5943 if (flag & STR_NOERROR)
5944 errs = STPLEX;
5945 else
5946 errs = STRHUP|STRDERR|STWRERR|STPLEX;
5947
5948 /*
5949 * If there is data to copy into ioctl block, do so.
5950 */
5951 if (iocbp->ioc_count > 0) {
5952 if (transparent)
5953 /*
5954 * Note: STR_NOERROR does not have an effect
5955 * in putiocd()
5956 */
5957 id = K_TO_K | sigflag;
5958 else
5959 id = flag;
5960 if ((error = putiocd(bp, strioc->ic_dp, id, crp)) != 0) {
5961 freemsg(bp);
5962 crfree(crp);
5963 return (error);
5964 }
5965
5966 /*
5967 * We could have slept copying in user pages.
5968 * Recheck the stream head state (the other end
5969 * of a pipe could have gone away).
5970 */
5971 if (stp->sd_flag & errs) {
5972 mutex_enter(&stp->sd_lock);
5973 error = strgeterr(stp, errs, 0);
5974 mutex_exit(&stp->sd_lock);
5975 if (error != 0) {
5976 freemsg(bp);
5977 crfree(crp);
5978 return (error);
5979 }
5980 }
5981 }
5982 if (transparent)
5983 iocbp->ioc_count = TRANSPARENT;
5984
5985 /*
5986 * Block for up to STRTIMOUT milliseconds if there is an outstanding
5987 * ioctl for this stream already running. All processes
5988 * sleeping here will be awakened as a result of an ACK
5989 * or NAK being received for the outstanding ioctl, or
5990 * as a result of the timer expiring on the outstanding
5991 * ioctl (a failure), or as a result of any waiting
5992 * process's timer expiring (also a failure).
5993 */
5994
5995 error = 0;
5996 mutex_enter(&stp->sd_lock);
5997 while ((stp->sd_flag & IOCWAIT) ||
5998 (!set_iocwaitne && (stp->sd_flag & IOCWAITNE))) {
5999 clock_t cv_rval;
6000
6001 TRACE_0(TR_FAC_STREAMS_FR,
6002 TR_STRDOIOCTL_WAIT,
6003 "strdoioctl sleeps - IOCWAIT");
6004 cv_rval = str_cv_wait(&stp->sd_iocmonitor, &stp->sd_lock,
6005 STRTIMOUT, sigflag);
6006 if (cv_rval <= 0) {
6007 if (cv_rval == 0) {
6008 error = EINTR;
6009 } else {
6010 if (flag & STR_NOERROR) {
6011 /*
6012 * Terminating current ioctl in
6013 * progress -- assume it got lost and
6014 * wake up the other thread so that the
6015 * operation completes.
6016 */
6017 if (!(stp->sd_flag & IOCWAITNE)) {
6018 set_iocwaitne = B_TRUE;
6019 stp->sd_flag |= IOCWAITNE;
6020 cv_broadcast(&stp->sd_monitor);
6021 }
6022 /*
6023 * Otherwise, there's a running
6024 * STR_NOERROR -- we have no choice
6025 * here but to wait forever (or until
6026 * interrupted).
6027 */
6028 } else {
6029 /*
6030 * pending ioctl has caused
6031 * us to time out
6032 */
6033 error = ETIME;
6034 }
6035 }
6036 } else if ((stp->sd_flag & errs)) {
6037 error = strgeterr(stp, errs, 0);
6038 }
6039 if (error) {
6040 mutex_exit(&stp->sd_lock);
6041 freemsg(bp);
6042 crfree(crp);
6043 return (error);
6044 }
6045 }
6046
6047 /*
6048 * Have control of ioctl mechanism.
6049 * Send down ioctl packet and wait for response.
6050 */
6051 if (stp->sd_iocblk != (mblk_t *)-1) {
6052 freemsg(stp->sd_iocblk);
6053 }
6054 stp->sd_iocblk = NULL;
6055
6056 /*
6057 * If this is marked with 'noerror' (internal; mostly
6058 * I_{P,}{UN,}LINK), then make sure nobody else is able to get
6059 * in here by setting IOCWAITNE.
6060 */
6061 waitflags = IOCWAIT;
6062 if (flag & STR_NOERROR)
6063 waitflags |= IOCWAITNE;
6064
6065 stp->sd_flag |= waitflags;
6066
6067 /*
6068 * Assign sequence number.
6069 */
6070 iocbp->ioc_id = stp->sd_iocid = getiocseqno();
6071
6072 mutex_exit(&stp->sd_lock);
6073
6074 TRACE_1(TR_FAC_STREAMS_FR,
6075 TR_STRDOIOCTL_PUT, "strdoioctl put: stp %p", stp);
6076 stream_willservice(stp);
6077 putnext(stp->sd_wrq, bp);
6078 stream_runservice(stp);
6079
6080 /*
6081 * Timed wait for acknowledgment. The wait time is limited by the
6082 * timeout value, which must be a positive integer (number of
6083 * milliseconds) to wait, or 0 (use default value of STRTIMOUT
6084 * milliseconds), or -1 (wait forever). This will be awakened
6085 * either by an ACK/NAK message arriving, the timer expiring, or
6086 * the timer expiring on another ioctl waiting for control of the
6087 * mechanism.
6088 */
6089 waitioc:
6090 mutex_enter(&stp->sd_lock);
6091
6092
6093 /*
6094 * If the reply has already arrived, don't sleep. If awakened from
6095 * the sleep, fail only if the reply has not arrived by then.
6096 * Otherwise, process the reply.
6097 */
6098 while (!stp->sd_iocblk) {
6099 clock_t cv_rval;
6100
6101 if (stp->sd_flag & errs) {
6102 error = strgeterr(stp, errs, 0);
6103 if (error != 0) {
6104 stp->sd_flag &= ~waitflags;
6105 cv_broadcast(&stp->sd_iocmonitor);
6106 mutex_exit(&stp->sd_lock);
6107 crfree(crp);
6108 return (error);
6109 }
6110 }
6111
6112 TRACE_0(TR_FAC_STREAMS_FR,
6113 TR_STRDOIOCTL_WAIT2,
6114 "strdoioctl sleeps awaiting reply");
6115 ASSERT(error == 0);
6116
6117 cv_rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock,
6118 (strioc->ic_timout ?
6119 strioc->ic_timout * 1000 : STRTIMOUT), sigflag);
6120
6121 /*
6122 * There are four possible cases here: interrupt, timeout,
6123 * wakeup by IOCWAITNE (above), or wakeup by strrput_nondata (a
6124 * valid M_IOCTL reply).
6125 *
6126 * If we've been awakened by a STR_NOERROR ioctl on some other
6127 * thread, then sd_iocblk will still be NULL, and IOCWAITNE
6128 * will be set. Pretend as if we just timed out. Note that
6129 * this other thread waited at least STRTIMOUT before trying to
6130 * awaken our thread, so this is indistinguishable (even for
6131 * INFTIM) from the case where we failed with ETIME waiting on
6132 * IOCWAIT in the prior loop.
6133 */
6134 if (cv_rval > 0 && !(flag & STR_NOERROR) &&
6135 stp->sd_iocblk == NULL && (stp->sd_flag & IOCWAITNE)) {
6136 cv_rval = -1;
6137 }
6138
6139 /*
6140 * note: STR_NOERROR does not protect
6141 * us here.. use ic_timout < 0
6142 */
6143 if (cv_rval <= 0) {
6144 if (cv_rval == 0) {
6145 error = EINTR;
6146 } else {
6147 error = ETIME;
6148 }
6149 /*
6150 * A message could have come in after we were scheduled
6151 * but before we were actually run.
6152 */
6153 bp = stp->sd_iocblk;
6154 stp->sd_iocblk = NULL;
6155 if (bp != NULL) {
6156 if ((bp->b_datap->db_type == M_COPYIN) ||
6157 (bp->b_datap->db_type == M_COPYOUT)) {
6158 mutex_exit(&stp->sd_lock);
6159 if (bp->b_cont) {
6160 freemsg(bp->b_cont);
6161 bp->b_cont = NULL;
6162 }
6163 bp->b_datap->db_type = M_IOCDATA;
6164 bp->b_wptr = bp->b_rptr +
6165 sizeof (struct copyresp);
6166 resp = (struct copyresp *)bp->b_rptr;
6167 resp->cp_rval =
6168 (caddr_t)1; /* failure */
6169 stream_willservice(stp);
6170 putnext(stp->sd_wrq, bp);
6171 stream_runservice(stp);
6172 mutex_enter(&stp->sd_lock);
6173 } else {
6174 freemsg(bp);
6175 }
6176 }
6177 stp->sd_flag &= ~waitflags;
6178 cv_broadcast(&stp->sd_iocmonitor);
6179 mutex_exit(&stp->sd_lock);
6180 crfree(crp);
6181 return (error);
6182 }
6183 }
6184 bp = stp->sd_iocblk;
6185 /*
6186 * Note: it is strictly impossible to get here with sd_iocblk set to
6187 * -1. This is because the initial loop above doesn't allow any new
6188 * ioctls into the fray until all others have passed this point.
6189 */
6190 ASSERT(bp != NULL && bp != (mblk_t *)-1);
6191 TRACE_1(TR_FAC_STREAMS_FR,
6192 TR_STRDOIOCTL_ACK, "strdoioctl got reply: bp %p", bp);
6193 if ((bp->b_datap->db_type == M_IOCACK) ||
6194 (bp->b_datap->db_type == M_IOCNAK)) {
6195 /* for detection of duplicate ioctl replies */
6196 stp->sd_iocblk = (mblk_t *)-1;
6197 stp->sd_flag &= ~waitflags;
6198 cv_broadcast(&stp->sd_iocmonitor);
6199 mutex_exit(&stp->sd_lock);
6200 } else {
6201 /*
6202 * flags not cleared here because we're still doing
6203 * copy in/out for ioctl.
6204 */
6205 stp->sd_iocblk = NULL;
6206 mutex_exit(&stp->sd_lock);
6207 }
6208
6209
6210 /*
6211 * Have received acknowledgment.
6212 */
6213
6214 switch (bp->b_datap->db_type) {
6215 case M_IOCACK:
6216 /*
6217 * Positive ack.
6218 */
6219 iocbp = (struct iocblk *)bp->b_rptr;
6220
6221 /*
6222 * Set error if indicated.
6223 */
6224 if (iocbp->ioc_error) {
6225 error = iocbp->ioc_error;
6226 break;
6227 }
6228
6229 /*
6230 * Set return value.
6231 */
6232 *rvalp = iocbp->ioc_rval;
6233
6234 /*
6235 * Data may have been returned in ACK message (ioc_count > 0).
6236 * If so, copy it out to the user's buffer.
6237 */
6238 if (iocbp->ioc_count && !transparent) {
6239 if (error = getiocd(bp, strioc->ic_dp, copyflag))
6240 break;
6241 }
6242 if (!transparent) {
6243 if (len) /* an M_COPYOUT was used with I_STR */
6244 strioc->ic_len = len;
6245 else
6246 strioc->ic_len = (int)iocbp->ioc_count;
6247 }
6248 break;
6249
6250 case M_IOCNAK:
6251 /*
6252 * Negative ack.
6253 *
6254 * The only thing to do is set error as specified
6255 * in neg ack packet.
6256 */
6257 iocbp = (struct iocblk *)bp->b_rptr;
6258
6259 error = (iocbp->ioc_error ? iocbp->ioc_error : EINVAL);
6260 break;
6261
6262 case M_COPYIN:
6263 /*
6264 * Driver or module has requested user ioctl data.
6265 */
6266 reqp = (struct copyreq *)bp->b_rptr;
6267
6268 /*
6269 * M_COPYIN should *never* have a message attached, though
6270 * it's harmless if it does -- thus, panic on a DEBUG
6271 * kernel and just free it on a non-DEBUG build.
6272 */
6273 ASSERT(bp->b_cont == NULL);
6274 if (bp->b_cont != NULL) {
6275 freemsg(bp->b_cont);
6276 bp->b_cont = NULL;
6277 }
6278
6279 error = putiocd(bp, reqp->cq_addr, flag, crp);
6280 if (error && bp->b_cont) {
6281 freemsg(bp->b_cont);
6282 bp->b_cont = NULL;
6283 }
6284
6285 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
6286 bp->b_datap->db_type = M_IOCDATA;
6287
6288 mblk_setcred(bp, crp, curproc->p_pid);
6289 resp = (struct copyresp *)bp->b_rptr;
6290 resp->cp_rval = (caddr_t)(uintptr_t)error;
6291 resp->cp_flag = (fflags & FMODELS);
6292
6293 stream_willservice(stp);
6294 putnext(stp->sd_wrq, bp);
6295 stream_runservice(stp);
6296
6297 if (error) {
6298 mutex_enter(&stp->sd_lock);
6299 stp->sd_flag &= ~waitflags;
6300 cv_broadcast(&stp->sd_iocmonitor);
6301 mutex_exit(&stp->sd_lock);
6302 crfree(crp);
6303 return (error);
6304 }
6305
6306 goto waitioc;
6307
6308 case M_COPYOUT:
6309 /*
6310 * Driver or module has ioctl data for a user.
6311 */
6312 reqp = (struct copyreq *)bp->b_rptr;
6313 ASSERT(bp->b_cont != NULL);
6314
6315 /*
6316 * Always (transparent or non-transparent )
6317 * use the address specified in the request
6318 */
6319 taddr = reqp->cq_addr;
6320 if (!transparent)
6321 len = (int)reqp->cq_size;
6322
6323 /* copyout data to the provided address */
6324 error = getiocd(bp, taddr, copyflag);
6325
6326 freemsg(bp->b_cont);
6327 bp->b_cont = NULL;
6328
6329 bp->b_wptr = bp->b_rptr + sizeof (struct copyresp);
6330 bp->b_datap->db_type = M_IOCDATA;
6331
6332 mblk_setcred(bp, crp, curproc->p_pid);
6333 resp = (struct copyresp *)bp->b_rptr;
6334 resp->cp_rval = (caddr_t)(uintptr_t)error;
6335 resp->cp_flag = (fflags & FMODELS);
6336
6337 stream_willservice(stp);
6338 putnext(stp->sd_wrq, bp);
6339 stream_runservice(stp);
6340
6341 if (error) {
6342 mutex_enter(&stp->sd_lock);
6343 stp->sd_flag &= ~waitflags;
6344 cv_broadcast(&stp->sd_iocmonitor);
6345 mutex_exit(&stp->sd_lock);
6346 crfree(crp);
6347 return (error);
6348 }
6349 goto waitioc;
6350
6351 default:
6352 ASSERT(0);
6353 mutex_enter(&stp->sd_lock);
6354 stp->sd_flag &= ~waitflags;
6355 cv_broadcast(&stp->sd_iocmonitor);
6356 mutex_exit(&stp->sd_lock);
6357 break;
6358 }
6359
6360 freemsg(bp);
6361 crfree(crp);
6362 return (error);
6363 }
6364
6365 /*
6366 * Send an M_CMD message downstream and wait for a reply. This is a ptools
6367 * special used to retrieve information from modules/drivers a stream without
6368 * being subjected to flow control or interfering with pending messages on the
6369 * stream (e.g. an ioctl in flight).
6370 */
6371 int
6372 strdocmd(struct stdata *stp, struct strcmd *scp, cred_t *crp)
6373 {
6374 mblk_t *mp;
6375 struct cmdblk *cmdp;
6376 int error = 0;
6377 int errs = STRHUP|STRDERR|STWRERR|STPLEX;
6378 clock_t rval, timeout = STRTIMOUT;
6379
6380 if (scp->sc_len < 0 || scp->sc_len > sizeof (scp->sc_buf) ||
6381 scp->sc_timeout < -1)
6382 return (EINVAL);
6383
6384 if (scp->sc_timeout > 0)
6385 timeout = scp->sc_timeout * MILLISEC;
6386
6387 if ((mp = allocb_cred(sizeof (struct cmdblk), crp,
6388 curproc->p_pid)) == NULL)
6389 return (ENOMEM);
6390
6391 crhold(crp);
6392
6393 cmdp = (struct cmdblk *)mp->b_wptr;
6394 cmdp->cb_cr = crp;
6395 cmdp->cb_cmd = scp->sc_cmd;
6396 cmdp->cb_len = scp->sc_len;
6397 cmdp->cb_error = 0;
6398 mp->b_wptr += sizeof (struct cmdblk);
6399
6400 DB_TYPE(mp) = M_CMD;
6401 DB_CPID(mp) = curproc->p_pid;
6402
6403 /*
6404 * Copy in the payload.
6405 */
6406 if (cmdp->cb_len > 0) {
6407 mp->b_cont = allocb_cred(sizeof (scp->sc_buf), crp,
6408 curproc->p_pid);
6409 if (mp->b_cont == NULL) {
6410 error = ENOMEM;
6411 goto out;
6412 }
6413
6414 /* cb_len comes from sc_len, which has already been checked */
6415 ASSERT(cmdp->cb_len <= sizeof (scp->sc_buf));
6416 (void) bcopy(scp->sc_buf, mp->b_cont->b_wptr, cmdp->cb_len);
6417 mp->b_cont->b_wptr += cmdp->cb_len;
6418 DB_CPID(mp->b_cont) = curproc->p_pid;
6419 }
6420
6421 /*
6422 * Since this mechanism is strictly for ptools, and since only one
6423 * process can be grabbed at a time, we simply fail if there's
6424 * currently an operation pending.
6425 */
6426 mutex_enter(&stp->sd_lock);
6427 if (stp->sd_flag & STRCMDWAIT) {
6428 mutex_exit(&stp->sd_lock);
6429 error = EBUSY;
6430 goto out;
6431 }
6432 stp->sd_flag |= STRCMDWAIT;
6433 ASSERT(stp->sd_cmdblk == NULL);
6434 mutex_exit(&stp->sd_lock);
6435
6436 putnext(stp->sd_wrq, mp);
6437 mp = NULL;
6438
6439 /*
6440 * Timed wait for acknowledgment. If the reply has already arrived,
6441 * don't sleep. If awakened from the sleep, fail only if the reply
6442 * has not arrived by then. Otherwise, process the reply.
6443 */
6444 mutex_enter(&stp->sd_lock);
6445 while (stp->sd_cmdblk == NULL) {
6446 if (stp->sd_flag & errs) {
6447 if ((error = strgeterr(stp, errs, 0)) != 0)
6448 goto waitout;
6449 }
6450
6451 rval = str_cv_wait(&stp->sd_monitor, &stp->sd_lock, timeout, 0);
6452 if (stp->sd_cmdblk != NULL)
6453 break;
6454
6455 if (rval <= 0) {
6456 error = (rval == 0) ? EINTR : ETIME;
6457 goto waitout;
6458 }
6459 }
6460
6461 /*
6462 * We received a reply.
6463 */
6464 mp = stp->sd_cmdblk;
6465 stp->sd_cmdblk = NULL;
6466 ASSERT(mp != NULL && DB_TYPE(mp) == M_CMD);
6467 ASSERT(stp->sd_flag & STRCMDWAIT);
6468 stp->sd_flag &= ~STRCMDWAIT;
6469 mutex_exit(&stp->sd_lock);
6470
6471 cmdp = (struct cmdblk *)mp->b_rptr;
6472 if ((error = cmdp->cb_error) != 0)
6473 goto out;
6474
6475 /*
6476 * Data may have been returned in the reply (cb_len > 0).
6477 * If so, copy it out to the user's buffer.
6478 */
6479 if (cmdp->cb_len > 0) {
6480 if (mp->b_cont == NULL || MBLKL(mp->b_cont) < cmdp->cb_len) {
6481 error = EPROTO;
6482 goto out;
6483 }
6484
6485 cmdp->cb_len = MIN(cmdp->cb_len, sizeof (scp->sc_buf));
6486 (void) bcopy(mp->b_cont->b_rptr, scp->sc_buf, cmdp->cb_len);
6487 }
6488 scp->sc_len = cmdp->cb_len;
6489 out:
6490 freemsg(mp);
6491 crfree(crp);
6492 return (error);
6493 waitout:
6494 ASSERT(stp->sd_cmdblk == NULL);
6495 stp->sd_flag &= ~STRCMDWAIT;
6496 mutex_exit(&stp->sd_lock);
6497 crfree(crp);
6498 return (error);
6499 }
6500
6501 /*
6502 * For the SunOS keyboard driver.
6503 * Return the next available "ioctl" sequence number.
6504 * Exported, so that streams modules can send "ioctl" messages
6505 * downstream from their open routine.
6506 */
6507 int
6508 getiocseqno(void)
6509 {
6510 int i;
6511
6512 mutex_enter(&strresources);
6513 i = ++ioc_id;
6514 mutex_exit(&strresources);
6515 return (i);
6516 }
6517
6518 /*
6519 * Get the next message from the read queue. If the message is
6520 * priority, STRPRI will have been set by strrput(). This flag
6521 * should be reset only when the entire message at the front of the
6522 * queue as been consumed.
6523 *
6524 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
6525 */
6526 int
6527 strgetmsg(
6528 struct vnode *vp,
6529 struct strbuf *mctl,
6530 struct strbuf *mdata,
6531 unsigned char *prip,
6532 int *flagsp,
6533 int fmode,
6534 rval_t *rvp)
6535 {
6536 struct stdata *stp;
6537 mblk_t *bp, *nbp;
6538 mblk_t *savemp = NULL;
6539 mblk_t *savemptail = NULL;
6540 uint_t old_sd_flag;
6541 int flg;
6542 int more = 0;
6543 int error = 0;
6544 char first = 1;
6545 uint_t mark; /* Contains MSG*MARK and _LASTMARK */
6546 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */
6547 unsigned char pri = 0;
6548 queue_t *q;
6549 int pr = 0; /* Partial read successful */
6550 struct uio uios;
6551 struct uio *uiop = &uios;
6552 struct iovec iovs;
6553 unsigned char type;
6554
6555 TRACE_1(TR_FAC_STREAMS_FR, TR_STRGETMSG_ENTER,
6556 "strgetmsg:%p", vp);
6557
6558 ASSERT(vp->v_stream);
6559 stp = vp->v_stream;
6560 rvp->r_val1 = 0;
6561
6562 mutex_enter(&stp->sd_lock);
6563
6564 if ((error = i_straccess(stp, JCREAD)) != 0) {
6565 mutex_exit(&stp->sd_lock);
6566 return (error);
6567 }
6568
6569 if (stp->sd_flag & (STRDERR|STPLEX)) {
6570 error = strgeterr(stp, STRDERR|STPLEX, 0);
6571 if (error != 0) {
6572 mutex_exit(&stp->sd_lock);
6573 return (error);
6574 }
6575 }
6576 mutex_exit(&stp->sd_lock);
6577
6578 switch (*flagsp) {
6579 case MSG_HIPRI:
6580 if (*prip != 0)
6581 return (EINVAL);
6582 break;
6583
6584 case MSG_ANY:
6585 case MSG_BAND:
6586 break;
6587
6588 default:
6589 return (EINVAL);
6590 }
6591 /*
6592 * Setup uio and iov for data part
6593 */
6594 iovs.iov_base = mdata->buf;
6595 iovs.iov_len = mdata->maxlen;
6596 uios.uio_iov = &iovs;
6597 uios.uio_iovcnt = 1;
6598 uios.uio_loffset = 0;
6599 uios.uio_segflg = UIO_USERSPACE;
6600 uios.uio_fmode = 0;
6601 uios.uio_extflg = UIO_COPY_CACHED;
6602 uios.uio_resid = mdata->maxlen;
6603 uios.uio_offset = 0;
6604
6605 q = _RD(stp->sd_wrq);
6606 mutex_enter(&stp->sd_lock);
6607 old_sd_flag = stp->sd_flag;
6608 mark = 0;
6609 for (;;) {
6610 int done = 0;
6611 mblk_t *q_first = q->q_first;
6612
6613 /*
6614 * Get the next message of appropriate priority
6615 * from the stream head. If the caller is interested
6616 * in band or hipri messages, then they should already
6617 * be enqueued at the stream head. On the other hand
6618 * if the caller wants normal (band 0) messages, they
6619 * might be deferred in a synchronous stream and they
6620 * will need to be pulled up.
6621 *
6622 * After we have dequeued a message, we might find that
6623 * it was a deferred M_SIG that was enqueued at the
6624 * stream head. It must now be posted as part of the
6625 * read by calling strsignal_nolock().
6626 *
6627 * Also note that strrput does not enqueue an M_PCSIG,
6628 * and there cannot be more than one hipri message,
6629 * so there was no need to have the M_PCSIG case.
6630 *
6631 * At some time it might be nice to try and wrap the
6632 * functionality of kstrgetmsg() and strgetmsg() into
6633 * a common routine so to reduce the amount of replicated
6634 * code (since they are extremely similar).
6635 */
6636 if (!(*flagsp & (MSG_HIPRI|MSG_BAND))) {
6637 /* Asking for normal, band0 data */
6638 bp = strget(stp, q, uiop, first, &error);
6639 ASSERT(MUTEX_HELD(&stp->sd_lock));
6640 if (bp != NULL) {
6641 if (DB_TYPE(bp) == M_SIG) {
6642 strsignal_nolock(stp, *bp->b_rptr,
6643 bp->b_band);
6644 freemsg(bp);
6645 continue;
6646 } else {
6647 break;
6648 }
6649 }
6650 if (error != 0)
6651 goto getmout;
6652
6653 /*
6654 * We can't depend on the value of STRPRI here because
6655 * the stream head may be in transit. Therefore, we
6656 * must look at the type of the first message to
6657 * determine if a high priority messages is waiting
6658 */
6659 } else if ((*flagsp & MSG_HIPRI) && q_first != NULL &&
6660 DB_TYPE(q_first) >= QPCTL &&
6661 (bp = getq_noenab(q, 0)) != NULL) {
6662 /* Asked for HIPRI and got one */
6663 ASSERT(DB_TYPE(bp) >= QPCTL);
6664 break;
6665 } else if ((*flagsp & MSG_BAND) && q_first != NULL &&
6666 ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) &&
6667 (bp = getq_noenab(q, 0)) != NULL) {
6668 /*
6669 * Asked for at least band "prip" and got either at
6670 * least that band or a hipri message.
6671 */
6672 ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL);
6673 if (DB_TYPE(bp) == M_SIG) {
6674 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
6675 freemsg(bp);
6676 continue;
6677 } else {
6678 break;
6679 }
6680 }
6681
6682 /* No data. Time to sleep? */
6683 qbackenable(q, 0);
6684
6685 /*
6686 * If STRHUP or STREOF, return 0 length control and data.
6687 * If resid is 0, then a read(fd,buf,0) was done. Do not
6688 * sleep to satisfy this request because by default we have
6689 * zero bytes to return.
6690 */
6691 if ((stp->sd_flag & (STRHUP|STREOF)) || (mctl->maxlen == 0 &&
6692 mdata->maxlen == 0)) {
6693 mctl->len = mdata->len = 0;
6694 *flagsp = 0;
6695 mutex_exit(&stp->sd_lock);
6696 return (0);
6697 }
6698 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_WAIT,
6699 "strgetmsg calls strwaitq:%p, %p",
6700 vp, uiop);
6701 if (((error = strwaitq(stp, GETWAIT, (ssize_t)0, fmode, -1,
6702 &done)) != 0) || done) {
6703 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_DONE,
6704 "strgetmsg error or done:%p, %p",
6705 vp, uiop);
6706 mutex_exit(&stp->sd_lock);
6707 return (error);
6708 }
6709 TRACE_2(TR_FAC_STREAMS_FR, TR_STRGETMSG_AWAKE,
6710 "strgetmsg awakes:%p, %p", vp, uiop);
6711 if ((error = i_straccess(stp, JCREAD)) != 0) {
6712 mutex_exit(&stp->sd_lock);
6713 return (error);
6714 }
6715 first = 0;
6716 }
6717 ASSERT(bp != NULL);
6718 /*
6719 * Extract any mark information. If the message is not completely
6720 * consumed this information will be put in the mblk
6721 * that is putback.
6722 * If MSGMARKNEXT is set and the message is completely consumed
6723 * the STRATMARK flag will be set below. Likewise, if
6724 * MSGNOTMARKNEXT is set and the message is
6725 * completely consumed STRNOTATMARK will be set.
6726 */
6727 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
6728 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
6729 (MSGMARKNEXT|MSGNOTMARKNEXT));
6730 if (mark != 0 && bp == stp->sd_mark) {
6731 mark |= _LASTMARK;
6732 stp->sd_mark = NULL;
6733 }
6734 /*
6735 * keep track of the original message type and priority
6736 */
6737 pri = bp->b_band;
6738 type = bp->b_datap->db_type;
6739 if (type == M_PASSFP) {
6740 if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
6741 stp->sd_mark = bp;
6742 bp->b_flag |= mark & ~_LASTMARK;
6743 putback(stp, q, bp, pri);
6744 qbackenable(q, pri);
6745 mutex_exit(&stp->sd_lock);
6746 return (EBADMSG);
6747 }
6748 ASSERT(type != M_SIG);
6749
6750 /*
6751 * Set this flag so strrput will not generate signals. Need to
6752 * make sure this flag is cleared before leaving this routine
6753 * else signals will stop being sent.
6754 */
6755 stp->sd_flag |= STRGETINPROG;
6756 mutex_exit(&stp->sd_lock);
6757
6758 if (STREAM_NEEDSERVICE(stp))
6759 stream_runservice(stp);
6760
6761 /*
6762 * Set HIPRI flag if message is priority.
6763 */
6764 if (type >= QPCTL)
6765 flg = MSG_HIPRI;
6766 else
6767 flg = MSG_BAND;
6768
6769 /*
6770 * First process PROTO or PCPROTO blocks, if any.
6771 */
6772 if (mctl->maxlen >= 0 && type != M_DATA) {
6773 size_t n, bcnt;
6774 char *ubuf;
6775
6776 bcnt = mctl->maxlen;
6777 ubuf = mctl->buf;
6778 while (bp != NULL && bp->b_datap->db_type != M_DATA) {
6779 if ((n = MIN(bcnt, bp->b_wptr - bp->b_rptr)) != 0 &&
6780 copyout(bp->b_rptr, ubuf, n)) {
6781 error = EFAULT;
6782 mutex_enter(&stp->sd_lock);
6783 /*
6784 * clear stream head pri flag based on
6785 * first message type
6786 */
6787 if (type >= QPCTL) {
6788 ASSERT(type == M_PCPROTO);
6789 stp->sd_flag &= ~STRPRI;
6790 }
6791 more = 0;
6792 freemsg(bp);
6793 goto getmout;
6794 }
6795 ubuf += n;
6796 bp->b_rptr += n;
6797 if (bp->b_rptr >= bp->b_wptr) {
6798 nbp = bp;
6799 bp = bp->b_cont;
6800 freeb(nbp);
6801 }
6802 ASSERT(n <= bcnt);
6803 bcnt -= n;
6804 if (bcnt == 0)
6805 break;
6806 }
6807 mctl->len = mctl->maxlen - bcnt;
6808 } else
6809 mctl->len = -1;
6810
6811 if (bp && bp->b_datap->db_type != M_DATA) {
6812 /*
6813 * More PROTO blocks in msg.
6814 */
6815 more |= MORECTL;
6816 savemp = bp;
6817 while (bp && bp->b_datap->db_type != M_DATA) {
6818 savemptail = bp;
6819 bp = bp->b_cont;
6820 }
6821 savemptail->b_cont = NULL;
6822 }
6823
6824 /*
6825 * Now process DATA blocks, if any.
6826 */
6827 if (mdata->maxlen >= 0 && bp) {
6828 /*
6829 * struiocopyout will consume a potential zero-length
6830 * M_DATA even if uio_resid is zero.
6831 */
6832 size_t oldresid = uiop->uio_resid;
6833
6834 bp = struiocopyout(bp, uiop, &error);
6835 if (error != 0) {
6836 mutex_enter(&stp->sd_lock);
6837 /*
6838 * clear stream head hi pri flag based on
6839 * first message
6840 */
6841 if (type >= QPCTL) {
6842 ASSERT(type == M_PCPROTO);
6843 stp->sd_flag &= ~STRPRI;
6844 }
6845 more = 0;
6846 freemsg(savemp);
6847 goto getmout;
6848 }
6849 /*
6850 * (pr == 1) indicates a partial read.
6851 */
6852 if (oldresid > uiop->uio_resid)
6853 pr = 1;
6854 mdata->len = mdata->maxlen - uiop->uio_resid;
6855 } else
6856 mdata->len = -1;
6857
6858 if (bp) { /* more data blocks in msg */
6859 more |= MOREDATA;
6860 if (savemp)
6861 savemptail->b_cont = bp;
6862 else
6863 savemp = bp;
6864 }
6865
6866 mutex_enter(&stp->sd_lock);
6867 if (savemp) {
6868 if (pr && (savemp->b_datap->db_type == M_DATA) &&
6869 msgnodata(savemp)) {
6870 /*
6871 * Avoid queuing a zero-length tail part of
6872 * a message. pr=1 indicates that we read some of
6873 * the message.
6874 */
6875 freemsg(savemp);
6876 more &= ~MOREDATA;
6877 /*
6878 * clear stream head hi pri flag based on
6879 * first message
6880 */
6881 if (type >= QPCTL) {
6882 ASSERT(type == M_PCPROTO);
6883 stp->sd_flag &= ~STRPRI;
6884 }
6885 } else {
6886 savemp->b_band = pri;
6887 /*
6888 * If the first message was HIPRI and the one we're
6889 * putting back isn't, then clear STRPRI, otherwise
6890 * set STRPRI again. Note that we must set STRPRI
6891 * again since the flush logic in strrput_nondata()
6892 * may have cleared it while we had sd_lock dropped.
6893 */
6894 if (type >= QPCTL) {
6895 ASSERT(type == M_PCPROTO);
6896 if (queclass(savemp) < QPCTL)
6897 stp->sd_flag &= ~STRPRI;
6898 else
6899 stp->sd_flag |= STRPRI;
6900 } else if (queclass(savemp) >= QPCTL) {
6901 /*
6902 * The first message was not a HIPRI message,
6903 * but the one we are about to putback is.
6904 * For simplicitly, we do not allow for HIPRI
6905 * messages to be embedded in the message
6906 * body, so just force it to same type as
6907 * first message.
6908 */
6909 ASSERT(type == M_DATA || type == M_PROTO);
6910 ASSERT(savemp->b_datap->db_type == M_PCPROTO);
6911 savemp->b_datap->db_type = type;
6912 }
6913 if (mark != 0) {
6914 savemp->b_flag |= mark & ~_LASTMARK;
6915 if ((mark & _LASTMARK) &&
6916 (stp->sd_mark == NULL)) {
6917 /*
6918 * If another marked message arrived
6919 * while sd_lock was not held sd_mark
6920 * would be non-NULL.
6921 */
6922 stp->sd_mark = savemp;
6923 }
6924 }
6925 putback(stp, q, savemp, pri);
6926 }
6927 } else {
6928 /*
6929 * The complete message was consumed.
6930 *
6931 * If another M_PCPROTO arrived while sd_lock was not held
6932 * it would have been discarded since STRPRI was still set.
6933 *
6934 * Move the MSG*MARKNEXT information
6935 * to the stream head just in case
6936 * the read queue becomes empty.
6937 * clear stream head hi pri flag based on
6938 * first message
6939 *
6940 * If the stream head was at the mark
6941 * (STRATMARK) before we dropped sd_lock above
6942 * and some data was consumed then we have
6943 * moved past the mark thus STRATMARK is
6944 * cleared. However, if a message arrived in
6945 * strrput during the copyout above causing
6946 * STRATMARK to be set we can not clear that
6947 * flag.
6948 */
6949 if (type >= QPCTL) {
6950 ASSERT(type == M_PCPROTO);
6951 stp->sd_flag &= ~STRPRI;
6952 }
6953 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
6954 if (mark & MSGMARKNEXT) {
6955 stp->sd_flag &= ~STRNOTATMARK;
6956 stp->sd_flag |= STRATMARK;
6957 } else if (mark & MSGNOTMARKNEXT) {
6958 stp->sd_flag &= ~STRATMARK;
6959 stp->sd_flag |= STRNOTATMARK;
6960 } else {
6961 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
6962 }
6963 } else if (pr && (old_sd_flag & STRATMARK)) {
6964 stp->sd_flag &= ~STRATMARK;
6965 }
6966 }
6967
6968 *flagsp = flg;
6969 *prip = pri;
6970
6971 /*
6972 * Getmsg cleanup processing - if the state of the queue has changed
6973 * some signals may need to be sent and/or poll awakened.
6974 */
6975 getmout:
6976 qbackenable(q, pri);
6977
6978 /*
6979 * We dropped the stream head lock above. Send all M_SIG messages
6980 * before processing stream head for SIGPOLL messages.
6981 */
6982 ASSERT(MUTEX_HELD(&stp->sd_lock));
6983 while ((bp = q->q_first) != NULL &&
6984 (bp->b_datap->db_type == M_SIG)) {
6985 /*
6986 * sd_lock is held so the content of the read queue can not
6987 * change.
6988 */
6989 bp = getq(q);
6990 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
6991
6992 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
6993 mutex_exit(&stp->sd_lock);
6994 freemsg(bp);
6995 if (STREAM_NEEDSERVICE(stp))
6996 stream_runservice(stp);
6997 mutex_enter(&stp->sd_lock);
6998 }
6999
7000 /*
7001 * stream head cannot change while we make the determination
7002 * whether or not to send a signal. Drop the flag to allow strrput
7003 * to send firstmsgsigs again.
7004 */
7005 stp->sd_flag &= ~STRGETINPROG;
7006
7007 /*
7008 * If the type of message at the front of the queue changed
7009 * due to the receive the appropriate signals and pollwakeup events
7010 * are generated. The type of changes are:
7011 * Processed a hipri message, q_first is not hipri.
7012 * Processed a band X message, and q_first is band Y.
7013 * The generated signals and pollwakeups are identical to what
7014 * strrput() generates should the message that is now on q_first
7015 * arrive to an empty read queue.
7016 *
7017 * Note: only strrput will send a signal for a hipri message.
7018 */
7019 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
7020 strsigset_t signals = 0;
7021 strpollset_t pollwakeups = 0;
7022
7023 if (flg & MSG_HIPRI) {
7024 /*
7025 * Removed a hipri message. Regular data at
7026 * the front of the queue.
7027 */
7028 if (bp->b_band == 0) {
7029 signals = S_INPUT | S_RDNORM;
7030 pollwakeups = POLLIN | POLLRDNORM;
7031 } else {
7032 signals = S_INPUT | S_RDBAND;
7033 pollwakeups = POLLIN | POLLRDBAND;
7034 }
7035 } else if (pri != bp->b_band) {
7036 /*
7037 * The band is different for the new q_first.
7038 */
7039 if (bp->b_band == 0) {
7040 signals = S_RDNORM;
7041 pollwakeups = POLLIN | POLLRDNORM;
7042 } else {
7043 signals = S_RDBAND;
7044 pollwakeups = POLLIN | POLLRDBAND;
7045 }
7046 }
7047
7048 if (pollwakeups != 0) {
7049 if (pollwakeups == (POLLIN | POLLRDNORM)) {
7050 if (!(stp->sd_rput_opt & SR_POLLIN))
7051 goto no_pollwake;
7052 stp->sd_rput_opt &= ~SR_POLLIN;
7053 }
7054 mutex_exit(&stp->sd_lock);
7055 pollwakeup(&stp->sd_pollist, pollwakeups);
7056 mutex_enter(&stp->sd_lock);
7057 }
7058 no_pollwake:
7059
7060 if (stp->sd_sigflags & signals)
7061 strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
7062 }
7063 mutex_exit(&stp->sd_lock);
7064
7065 rvp->r_val1 = more;
7066 return (error);
7067 #undef _LASTMARK
7068 }
7069
7070 /*
7071 * Get the next message from the read queue. If the message is
7072 * priority, STRPRI will have been set by strrput(). This flag
7073 * should be reset only when the entire message at the front of the
7074 * queue as been consumed.
7075 *
7076 * If uiop is NULL all data is returned in mctlp.
7077 * Note that a NULL uiop implies that FNDELAY and FNONBLOCK are assumed
7078 * not enabled.
7079 * The timeout parameter is in milliseconds; -1 for infinity.
7080 * This routine handles the consolidation private flags:
7081 * MSG_IGNERROR Ignore any stream head error except STPLEX.
7082 * MSG_DELAYERROR Defer the error check until the queue is empty.
7083 * MSG_HOLDSIG Hold signals while waiting for data.
7084 * MSG_IPEEK Only peek at messages.
7085 * MSG_DISCARDTAIL Discard the tail M_DATA part of the message
7086 * that doesn't fit.
7087 * MSG_NOMARK If the message is marked leave it on the queue.
7088 *
7089 * NOTE: strgetmsg and kstrgetmsg have much of the logic in common.
7090 */
7091 int
7092 kstrgetmsg(
7093 struct vnode *vp,
7094 mblk_t **mctlp,
7095 struct uio *uiop,
7096 unsigned char *prip,
7097 int *flagsp,
7098 clock_t timout,
7099 rval_t *rvp)
7100 {
7101 struct stdata *stp;
7102 mblk_t *bp, *nbp;
7103 mblk_t *savemp = NULL;
7104 mblk_t *savemptail = NULL;
7105 int flags;
7106 uint_t old_sd_flag;
7107 int flg;
7108 int more = 0;
7109 int error = 0;
7110 char first = 1;
7111 uint_t mark; /* Contains MSG*MARK and _LASTMARK */
7112 #define _LASTMARK 0x8000 /* Distinct from MSG*MARK */
7113 unsigned char pri = 0;
7114 queue_t *q;
7115 int pr = 0; /* Partial read successful */
7116 unsigned char type;
7117
7118 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_ENTER,
7119 "kstrgetmsg:%p", vp);
7120
7121 ASSERT(vp->v_stream);
7122 stp = vp->v_stream;
7123 rvp->r_val1 = 0;
7124
7125 mutex_enter(&stp->sd_lock);
7126
7127 if ((error = i_straccess(stp, JCREAD)) != 0) {
7128 mutex_exit(&stp->sd_lock);
7129 return (error);
7130 }
7131
7132 flags = *flagsp;
7133 if (stp->sd_flag & (STRDERR|STPLEX)) {
7134 if ((stp->sd_flag & STPLEX) ||
7135 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == 0) {
7136 error = strgeterr(stp, STRDERR|STPLEX,
7137 (flags & MSG_IPEEK));
7138 if (error != 0) {
7139 mutex_exit(&stp->sd_lock);
7140 return (error);
7141 }
7142 }
7143 }
7144 mutex_exit(&stp->sd_lock);
7145
7146 switch (flags & (MSG_HIPRI|MSG_ANY|MSG_BAND)) {
7147 case MSG_HIPRI:
7148 if (*prip != 0)
7149 return (EINVAL);
7150 break;
7151
7152 case MSG_ANY:
7153 case MSG_BAND:
7154 break;
7155
7156 default:
7157 return (EINVAL);
7158 }
7159
7160 retry:
7161 q = _RD(stp->sd_wrq);
7162 mutex_enter(&stp->sd_lock);
7163 old_sd_flag = stp->sd_flag;
7164 mark = 0;
7165 for (;;) {
7166 int done = 0;
7167 int waitflag;
7168 int fmode;
7169 mblk_t *q_first = q->q_first;
7170
7171 /*
7172 * This section of the code operates just like the code
7173 * in strgetmsg(). There is a comment there about what
7174 * is going on here.
7175 */
7176 if (!(flags & (MSG_HIPRI|MSG_BAND))) {
7177 /* Asking for normal, band0 data */
7178 bp = strget(stp, q, uiop, first, &error);
7179 ASSERT(MUTEX_HELD(&stp->sd_lock));
7180 if (bp != NULL) {
7181 if (DB_TYPE(bp) == M_SIG) {
7182 strsignal_nolock(stp, *bp->b_rptr,
7183 bp->b_band);
7184 freemsg(bp);
7185 continue;
7186 } else {
7187 break;
7188 }
7189 }
7190 if (error != 0) {
7191 goto getmout;
7192 }
7193 /*
7194 * We can't depend on the value of STRPRI here because
7195 * the stream head may be in transit. Therefore, we
7196 * must look at the type of the first message to
7197 * determine if a high priority messages is waiting
7198 */
7199 } else if ((flags & MSG_HIPRI) && q_first != NULL &&
7200 DB_TYPE(q_first) >= QPCTL &&
7201 (bp = getq_noenab(q, 0)) != NULL) {
7202 ASSERT(DB_TYPE(bp) >= QPCTL);
7203 break;
7204 } else if ((flags & MSG_BAND) && q_first != NULL &&
7205 ((q_first->b_band >= *prip) || DB_TYPE(q_first) >= QPCTL) &&
7206 (bp = getq_noenab(q, 0)) != NULL) {
7207 /*
7208 * Asked for at least band "prip" and got either at
7209 * least that band or a hipri message.
7210 */
7211 ASSERT(bp->b_band >= *prip || DB_TYPE(bp) >= QPCTL);
7212 if (DB_TYPE(bp) == M_SIG) {
7213 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
7214 freemsg(bp);
7215 continue;
7216 } else {
7217 break;
7218 }
7219 }
7220
7221 /* No data. Time to sleep? */
7222 qbackenable(q, 0);
7223
7224 /*
7225 * Delayed error notification?
7226 */
7227 if ((stp->sd_flag & (STRDERR|STPLEX)) &&
7228 (flags & (MSG_IGNERROR|MSG_DELAYERROR)) == MSG_DELAYERROR) {
7229 error = strgeterr(stp, STRDERR|STPLEX,
7230 (flags & MSG_IPEEK));
7231 if (error != 0) {
7232 mutex_exit(&stp->sd_lock);
7233 return (error);
7234 }
7235 }
7236
7237 /*
7238 * If STRHUP or STREOF, return 0 length control and data.
7239 * If a read(fd,buf,0) has been done, do not sleep, just
7240 * return.
7241 *
7242 * If mctlp == NULL and uiop == NULL, then the code will
7243 * do the strwaitq. This is an understood way of saying
7244 * sleep "polling" until a message is received.
7245 */
7246 if ((stp->sd_flag & (STRHUP|STREOF)) ||
7247 (uiop != NULL && uiop->uio_resid == 0)) {
7248 if (mctlp != NULL)
7249 *mctlp = NULL;
7250 *flagsp = 0;
7251 mutex_exit(&stp->sd_lock);
7252 return (0);
7253 }
7254
7255 waitflag = GETWAIT;
7256 if (flags &
7257 (MSG_HOLDSIG|MSG_IGNERROR|MSG_IPEEK|MSG_DELAYERROR)) {
7258 if (flags & MSG_HOLDSIG)
7259 waitflag |= STR_NOSIG;
7260 if (flags & MSG_IGNERROR)
7261 waitflag |= STR_NOERROR;
7262 if (flags & MSG_IPEEK)
7263 waitflag |= STR_PEEK;
7264 if (flags & MSG_DELAYERROR)
7265 waitflag |= STR_DELAYERR;
7266 }
7267 if (uiop != NULL)
7268 fmode = uiop->uio_fmode;
7269 else
7270 fmode = 0;
7271
7272 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_WAIT,
7273 "kstrgetmsg calls strwaitq:%p, %p",
7274 vp, uiop);
7275 if (((error = strwaitq(stp, waitflag, (ssize_t)0,
7276 fmode, timout, &done))) != 0 || done) {
7277 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_DONE,
7278 "kstrgetmsg error or done:%p, %p",
7279 vp, uiop);
7280 mutex_exit(&stp->sd_lock);
7281 return (error);
7282 }
7283 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRGETMSG_AWAKE,
7284 "kstrgetmsg awakes:%p, %p", vp, uiop);
7285 if ((error = i_straccess(stp, JCREAD)) != 0) {
7286 mutex_exit(&stp->sd_lock);
7287 return (error);
7288 }
7289 first = 0;
7290 }
7291 ASSERT(bp != NULL);
7292 /*
7293 * Extract any mark information. If the message is not completely
7294 * consumed this information will be put in the mblk
7295 * that is putback.
7296 * If MSGMARKNEXT is set and the message is completely consumed
7297 * the STRATMARK flag will be set below. Likewise, if
7298 * MSGNOTMARKNEXT is set and the message is
7299 * completely consumed STRNOTATMARK will be set.
7300 */
7301 mark = bp->b_flag & (MSGMARK | MSGMARKNEXT | MSGNOTMARKNEXT);
7302 ASSERT((mark & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
7303 (MSGMARKNEXT|MSGNOTMARKNEXT));
7304 pri = bp->b_band;
7305 if (mark != 0) {
7306 /*
7307 * If the caller doesn't want the mark return.
7308 * Used to implement MSG_WAITALL in sockets.
7309 */
7310 if (flags & MSG_NOMARK) {
7311 putback(stp, q, bp, pri);
7312 qbackenable(q, pri);
7313 mutex_exit(&stp->sd_lock);
7314 return (EWOULDBLOCK);
7315 }
7316 if (bp == stp->sd_mark) {
7317 mark |= _LASTMARK;
7318 stp->sd_mark = NULL;
7319 }
7320 }
7321
7322 /*
7323 * keep track of the first message type
7324 */
7325 type = bp->b_datap->db_type;
7326
7327 if (bp->b_datap->db_type == M_PASSFP) {
7328 if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7329 stp->sd_mark = bp;
7330 bp->b_flag |= mark & ~_LASTMARK;
7331 putback(stp, q, bp, pri);
7332 qbackenable(q, pri);
7333 mutex_exit(&stp->sd_lock);
7334 return (EBADMSG);
7335 }
7336 ASSERT(type != M_SIG);
7337
7338 if (flags & MSG_IPEEK) {
7339 /*
7340 * Clear any struioflag - we do the uiomove over again
7341 * when peeking since it simplifies the code.
7342 *
7343 * Dup the message and put the original back on the queue.
7344 * If dupmsg() fails, try again with copymsg() to see if
7345 * there is indeed a shortage of memory. dupmsg() may fail
7346 * if db_ref in any of the messages reaches its limit.
7347 */
7348
7349 if ((nbp = dupmsg(bp)) == NULL && (nbp = copymsg(bp)) == NULL) {
7350 /*
7351 * Restore the state of the stream head since we
7352 * need to drop sd_lock (strwaitbuf is sleeping).
7353 */
7354 size_t size = msgdsize(bp);
7355
7356 if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7357 stp->sd_mark = bp;
7358 bp->b_flag |= mark & ~_LASTMARK;
7359 putback(stp, q, bp, pri);
7360 mutex_exit(&stp->sd_lock);
7361 error = strwaitbuf(size, BPRI_HI);
7362 if (error) {
7363 /*
7364 * There is no net change to the queue thus
7365 * no need to qbackenable.
7366 */
7367 return (error);
7368 }
7369 goto retry;
7370 }
7371
7372 if ((mark & _LASTMARK) && (stp->sd_mark == NULL))
7373 stp->sd_mark = bp;
7374 bp->b_flag |= mark & ~_LASTMARK;
7375 putback(stp, q, bp, pri);
7376 bp = nbp;
7377 }
7378
7379 /*
7380 * Set this flag so strrput will not generate signals. Need to
7381 * make sure this flag is cleared before leaving this routine
7382 * else signals will stop being sent.
7383 */
7384 stp->sd_flag |= STRGETINPROG;
7385 mutex_exit(&stp->sd_lock);
7386
7387 if ((stp->sd_rputdatafunc != NULL) && (DB_TYPE(bp) == M_DATA)) {
7388 mblk_t *tmp, *prevmp;
7389
7390 /*
7391 * Put first non-data mblk back to stream head and
7392 * cut the mblk chain so sd_rputdatafunc only sees
7393 * M_DATA mblks. We can skip the first mblk since it
7394 * is M_DATA according to the condition above.
7395 */
7396 for (prevmp = bp, tmp = bp->b_cont; tmp != NULL;
7397 prevmp = tmp, tmp = tmp->b_cont) {
7398 if (DB_TYPE(tmp) != M_DATA) {
7399 prevmp->b_cont = NULL;
7400 mutex_enter(&stp->sd_lock);
7401 putback(stp, q, tmp, tmp->b_band);
7402 mutex_exit(&stp->sd_lock);
7403 break;
7404 }
7405 }
7406
7407 bp = (stp->sd_rputdatafunc)(stp->sd_vnode, bp,
7408 NULL, NULL, NULL, NULL);
7409
7410 if (bp == NULL)
7411 goto retry;
7412 }
7413
7414 if (STREAM_NEEDSERVICE(stp))
7415 stream_runservice(stp);
7416
7417 /*
7418 * Set HIPRI flag if message is priority.
7419 */
7420 if (type >= QPCTL)
7421 flg = MSG_HIPRI;
7422 else
7423 flg = MSG_BAND;
7424
7425 /*
7426 * First process PROTO or PCPROTO blocks, if any.
7427 */
7428 if (mctlp != NULL && type != M_DATA) {
7429 mblk_t *nbp;
7430
7431 *mctlp = bp;
7432 while (bp->b_cont && bp->b_cont->b_datap->db_type != M_DATA)
7433 bp = bp->b_cont;
7434 nbp = bp->b_cont;
7435 bp->b_cont = NULL;
7436 bp = nbp;
7437 }
7438
7439 if (bp && bp->b_datap->db_type != M_DATA) {
7440 /*
7441 * More PROTO blocks in msg. Will only happen if mctlp is NULL.
7442 */
7443 more |= MORECTL;
7444 savemp = bp;
7445 while (bp && bp->b_datap->db_type != M_DATA) {
7446 savemptail = bp;
7447 bp = bp->b_cont;
7448 }
7449 savemptail->b_cont = NULL;
7450 }
7451
7452 /*
7453 * Now process DATA blocks, if any.
7454 */
7455 if (uiop == NULL) {
7456 /* Append data to tail of mctlp */
7457
7458 if (mctlp != NULL) {
7459 mblk_t **mpp = mctlp;
7460
7461 while (*mpp != NULL)
7462 mpp = &((*mpp)->b_cont);
7463 *mpp = bp;
7464 bp = NULL;
7465 }
7466 } else if (uiop->uio_resid >= 0 && bp) {
7467 size_t oldresid = uiop->uio_resid;
7468
7469 /*
7470 * If a streams message is likely to consist
7471 * of many small mblks, it is pulled up into
7472 * one continuous chunk of memory.
7473 * The size of the first mblk may be bogus because
7474 * successive read() calls on the socket reduce
7475 * the size of this mblk until it is exhausted
7476 * and then the code walks on to the next. Thus
7477 * the size of the mblk may not be the original size
7478 * that was passed up, it's simply a remainder
7479 * and hence can be very small without any
7480 * implication that the packet is badly fragmented.
7481 * So the size of the possible second mblk is
7482 * used to spot a badly fragmented packet.
7483 * see longer comment at top of page
7484 * by mblk_pull_len declaration.
7485 */
7486
7487 if (bp->b_cont != NULL && MBLKL(bp->b_cont) < mblk_pull_len) {
7488 (void) pullupmsg(bp, -1);
7489 }
7490
7491 bp = struiocopyout(bp, uiop, &error);
7492 if (error != 0) {
7493 if (mctlp != NULL) {
7494 freemsg(*mctlp);
7495 *mctlp = NULL;
7496 } else
7497 freemsg(savemp);
7498 mutex_enter(&stp->sd_lock);
7499 /*
7500 * clear stream head hi pri flag based on
7501 * first message
7502 */
7503 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
7504 ASSERT(type == M_PCPROTO);
7505 stp->sd_flag &= ~STRPRI;
7506 }
7507 more = 0;
7508 goto getmout;
7509 }
7510 /*
7511 * (pr == 1) indicates a partial read.
7512 */
7513 if (oldresid > uiop->uio_resid)
7514 pr = 1;
7515 }
7516
7517 if (bp) { /* more data blocks in msg */
7518 more |= MOREDATA;
7519 if (savemp)
7520 savemptail->b_cont = bp;
7521 else
7522 savemp = bp;
7523 }
7524
7525 mutex_enter(&stp->sd_lock);
7526 if (savemp) {
7527 if (flags & (MSG_IPEEK|MSG_DISCARDTAIL)) {
7528 /*
7529 * When MSG_DISCARDTAIL is set or
7530 * when peeking discard any tail. When peeking this
7531 * is the tail of the dup that was copied out - the
7532 * message has already been putback on the queue.
7533 * Return MOREDATA to the caller even though the data
7534 * is discarded. This is used by sockets (to
7535 * set MSG_TRUNC).
7536 */
7537 freemsg(savemp);
7538 if (!(flags & MSG_IPEEK) && (type >= QPCTL)) {
7539 ASSERT(type == M_PCPROTO);
7540 stp->sd_flag &= ~STRPRI;
7541 }
7542 } else if (pr && (savemp->b_datap->db_type == M_DATA) &&
7543 msgnodata(savemp)) {
7544 /*
7545 * Avoid queuing a zero-length tail part of
7546 * a message. pr=1 indicates that we read some of
7547 * the message.
7548 */
7549 freemsg(savemp);
7550 more &= ~MOREDATA;
7551 if (type >= QPCTL) {
7552 ASSERT(type == M_PCPROTO);
7553 stp->sd_flag &= ~STRPRI;
7554 }
7555 } else {
7556 savemp->b_band = pri;
7557 /*
7558 * If the first message was HIPRI and the one we're
7559 * putting back isn't, then clear STRPRI, otherwise
7560 * set STRPRI again. Note that we must set STRPRI
7561 * again since the flush logic in strrput_nondata()
7562 * may have cleared it while we had sd_lock dropped.
7563 */
7564
7565 if (type >= QPCTL) {
7566 ASSERT(type == M_PCPROTO);
7567 if (queclass(savemp) < QPCTL)
7568 stp->sd_flag &= ~STRPRI;
7569 else
7570 stp->sd_flag |= STRPRI;
7571 } else if (queclass(savemp) >= QPCTL) {
7572 /*
7573 * The first message was not a HIPRI message,
7574 * but the one we are about to putback is.
7575 * For simplicitly, we do not allow for HIPRI
7576 * messages to be embedded in the message
7577 * body, so just force it to same type as
7578 * first message.
7579 */
7580 ASSERT(type == M_DATA || type == M_PROTO);
7581 ASSERT(savemp->b_datap->db_type == M_PCPROTO);
7582 savemp->b_datap->db_type = type;
7583 }
7584 if (mark != 0) {
7585 if ((mark & _LASTMARK) &&
7586 (stp->sd_mark == NULL)) {
7587 /*
7588 * If another marked message arrived
7589 * while sd_lock was not held sd_mark
7590 * would be non-NULL.
7591 */
7592 stp->sd_mark = savemp;
7593 }
7594 savemp->b_flag |= mark & ~_LASTMARK;
7595 }
7596 putback(stp, q, savemp, pri);
7597 }
7598 } else if (!(flags & MSG_IPEEK)) {
7599 /*
7600 * The complete message was consumed.
7601 *
7602 * If another M_PCPROTO arrived while sd_lock was not held
7603 * it would have been discarded since STRPRI was still set.
7604 *
7605 * Move the MSG*MARKNEXT information
7606 * to the stream head just in case
7607 * the read queue becomes empty.
7608 * clear stream head hi pri flag based on
7609 * first message
7610 *
7611 * If the stream head was at the mark
7612 * (STRATMARK) before we dropped sd_lock above
7613 * and some data was consumed then we have
7614 * moved past the mark thus STRATMARK is
7615 * cleared. However, if a message arrived in
7616 * strrput during the copyout above causing
7617 * STRATMARK to be set we can not clear that
7618 * flag.
7619 * XXX A "perimeter" would help by single-threading strrput,
7620 * strread, strgetmsg and kstrgetmsg.
7621 */
7622 if (type >= QPCTL) {
7623 ASSERT(type == M_PCPROTO);
7624 stp->sd_flag &= ~STRPRI;
7625 }
7626 if (mark & (MSGMARKNEXT|MSGNOTMARKNEXT|MSGMARK)) {
7627 if (mark & MSGMARKNEXT) {
7628 stp->sd_flag &= ~STRNOTATMARK;
7629 stp->sd_flag |= STRATMARK;
7630 } else if (mark & MSGNOTMARKNEXT) {
7631 stp->sd_flag &= ~STRATMARK;
7632 stp->sd_flag |= STRNOTATMARK;
7633 } else {
7634 stp->sd_flag &= ~(STRATMARK|STRNOTATMARK);
7635 }
7636 } else if (pr && (old_sd_flag & STRATMARK)) {
7637 stp->sd_flag &= ~STRATMARK;
7638 }
7639 }
7640
7641 *flagsp = flg;
7642 *prip = pri;
7643
7644 /*
7645 * Getmsg cleanup processing - if the state of the queue has changed
7646 * some signals may need to be sent and/or poll awakened.
7647 */
7648 getmout:
7649 qbackenable(q, pri);
7650
7651 /*
7652 * We dropped the stream head lock above. Send all M_SIG messages
7653 * before processing stream head for SIGPOLL messages.
7654 */
7655 ASSERT(MUTEX_HELD(&stp->sd_lock));
7656 while ((bp = q->q_first) != NULL &&
7657 (bp->b_datap->db_type == M_SIG)) {
7658 /*
7659 * sd_lock is held so the content of the read queue can not
7660 * change.
7661 */
7662 bp = getq(q);
7663 ASSERT(bp != NULL && bp->b_datap->db_type == M_SIG);
7664
7665 strsignal_nolock(stp, *bp->b_rptr, bp->b_band);
7666 mutex_exit(&stp->sd_lock);
7667 freemsg(bp);
7668 if (STREAM_NEEDSERVICE(stp))
7669 stream_runservice(stp);
7670 mutex_enter(&stp->sd_lock);
7671 }
7672
7673 /*
7674 * stream head cannot change while we make the determination
7675 * whether or not to send a signal. Drop the flag to allow strrput
7676 * to send firstmsgsigs again.
7677 */
7678 stp->sd_flag &= ~STRGETINPROG;
7679
7680 /*
7681 * If the type of message at the front of the queue changed
7682 * due to the receive the appropriate signals and pollwakeup events
7683 * are generated. The type of changes are:
7684 * Processed a hipri message, q_first is not hipri.
7685 * Processed a band X message, and q_first is band Y.
7686 * The generated signals and pollwakeups are identical to what
7687 * strrput() generates should the message that is now on q_first
7688 * arrive to an empty read queue.
7689 *
7690 * Note: only strrput will send a signal for a hipri message.
7691 */
7692 if ((bp = q->q_first) != NULL && !(stp->sd_flag & STRPRI)) {
7693 strsigset_t signals = 0;
7694 strpollset_t pollwakeups = 0;
7695
7696 if (flg & MSG_HIPRI) {
7697 /*
7698 * Removed a hipri message. Regular data at
7699 * the front of the queue.
7700 */
7701 if (bp->b_band == 0) {
7702 signals = S_INPUT | S_RDNORM;
7703 pollwakeups = POLLIN | POLLRDNORM;
7704 } else {
7705 signals = S_INPUT | S_RDBAND;
7706 pollwakeups = POLLIN | POLLRDBAND;
7707 }
7708 } else if (pri != bp->b_band) {
7709 /*
7710 * The band is different for the new q_first.
7711 */
7712 if (bp->b_band == 0) {
7713 signals = S_RDNORM;
7714 pollwakeups = POLLIN | POLLRDNORM;
7715 } else {
7716 signals = S_RDBAND;
7717 pollwakeups = POLLIN | POLLRDBAND;
7718 }
7719 }
7720
7721 if (pollwakeups != 0) {
7722 if (pollwakeups == (POLLIN | POLLRDNORM)) {
7723 if (!(stp->sd_rput_opt & SR_POLLIN))
7724 goto no_pollwake;
7725 stp->sd_rput_opt &= ~SR_POLLIN;
7726 }
7727 mutex_exit(&stp->sd_lock);
7728 pollwakeup(&stp->sd_pollist, pollwakeups);
7729 mutex_enter(&stp->sd_lock);
7730 }
7731 no_pollwake:
7732
7733 if (stp->sd_sigflags & signals)
7734 strsendsig(stp->sd_siglist, signals, bp->b_band, 0);
7735 }
7736 mutex_exit(&stp->sd_lock);
7737
7738 rvp->r_val1 = more;
7739 return (error);
7740 #undef _LASTMARK
7741 }
7742
7743 /*
7744 * Put a message downstream.
7745 *
7746 * NOTE: strputmsg and kstrputmsg have much of the logic in common.
7747 */
7748 int
7749 strputmsg(
7750 struct vnode *vp,
7751 struct strbuf *mctl,
7752 struct strbuf *mdata,
7753 unsigned char pri,
7754 int flag,
7755 int fmode)
7756 {
7757 struct stdata *stp;
7758 queue_t *wqp;
7759 mblk_t *mp;
7760 ssize_t msgsize;
7761 ssize_t rmin, rmax;
7762 int error;
7763 struct uio uios;
7764 struct uio *uiop = &uios;
7765 struct iovec iovs;
7766 int xpg4 = 0;
7767
7768 ASSERT(vp->v_stream);
7769 stp = vp->v_stream;
7770 wqp = stp->sd_wrq;
7771
7772 /*
7773 * If it is an XPG4 application, we need to send
7774 * SIGPIPE below
7775 */
7776
7777 xpg4 = (flag & MSG_XPG4) ? 1 : 0;
7778 flag &= ~MSG_XPG4;
7779
7780 if (AU_AUDITING())
7781 audit_strputmsg(vp, mctl, mdata, pri, flag, fmode);
7782
7783 mutex_enter(&stp->sd_lock);
7784
7785 if ((error = i_straccess(stp, JCWRITE)) != 0) {
7786 mutex_exit(&stp->sd_lock);
7787 return (error);
7788 }
7789
7790 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
7791 error = strwriteable(stp, B_FALSE, xpg4);
7792 if (error != 0) {
7793 mutex_exit(&stp->sd_lock);
7794 return (error);
7795 }
7796 }
7797
7798 mutex_exit(&stp->sd_lock);
7799
7800 /*
7801 * Check for legal flag value.
7802 */
7803 switch (flag) {
7804 case MSG_HIPRI:
7805 if ((mctl->len < 0) || (pri != 0))
7806 return (EINVAL);
7807 break;
7808 case MSG_BAND:
7809 break;
7810
7811 default:
7812 return (EINVAL);
7813 }
7814
7815 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_IN,
7816 "strputmsg in:stp %p", stp);
7817
7818 /* get these values from those cached in the stream head */
7819 rmin = stp->sd_qn_minpsz;
7820 rmax = stp->sd_qn_maxpsz;
7821
7822 /*
7823 * Make sure ctl and data sizes together fall within the
7824 * limits of the max and min receive packet sizes and do
7825 * not exceed system limit.
7826 */
7827 ASSERT((rmax >= 0) || (rmax == INFPSZ));
7828 if (rmax == 0) {
7829 return (ERANGE);
7830 }
7831 /*
7832 * Use the MAXIMUM of sd_maxblk and q_maxpsz.
7833 * Needed to prevent partial failures in the strmakedata loop.
7834 */
7835 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk)
7836 rmax = stp->sd_maxblk;
7837
7838 if ((msgsize = mdata->len) < 0) {
7839 msgsize = 0;
7840 rmin = 0; /* no range check for NULL data part */
7841 }
7842 if ((msgsize < rmin) ||
7843 ((msgsize > rmax) && (rmax != INFPSZ)) ||
7844 (mctl->len > strctlsz)) {
7845 return (ERANGE);
7846 }
7847
7848 /*
7849 * Setup uio and iov for data part
7850 */
7851 iovs.iov_base = mdata->buf;
7852 iovs.iov_len = msgsize;
7853 uios.uio_iov = &iovs;
7854 uios.uio_iovcnt = 1;
7855 uios.uio_loffset = 0;
7856 uios.uio_segflg = UIO_USERSPACE;
7857 uios.uio_fmode = fmode;
7858 uios.uio_extflg = UIO_COPY_DEFAULT;
7859 uios.uio_resid = msgsize;
7860 uios.uio_offset = 0;
7861
7862 /* Ignore flow control in strput for HIPRI */
7863 if (flag & MSG_HIPRI)
7864 flag |= MSG_IGNFLOW;
7865
7866 for (;;) {
7867 int done = 0;
7868
7869 /*
7870 * strput will always free the ctl mblk - even when strput
7871 * fails.
7872 */
7873 if ((error = strmakectl(mctl, flag, fmode, &mp)) != 0) {
7874 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7875 "strputmsg out:stp %p out %d error %d",
7876 stp, 1, error);
7877 return (error);
7878 }
7879 /*
7880 * Verify that the whole message can be transferred by
7881 * strput.
7882 */
7883 ASSERT(stp->sd_maxblk == INFPSZ ||
7884 stp->sd_maxblk >= mdata->len);
7885
7886 msgsize = mdata->len;
7887 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag);
7888 mdata->len = msgsize;
7889
7890 if (error == 0)
7891 break;
7892
7893 if (error != EWOULDBLOCK)
7894 goto out;
7895
7896 mutex_enter(&stp->sd_lock);
7897 /*
7898 * Check for a missed wakeup.
7899 * Needed since strput did not hold sd_lock across
7900 * the canputnext.
7901 */
7902 if (bcanputnext(wqp, pri)) {
7903 /* Try again */
7904 mutex_exit(&stp->sd_lock);
7905 continue;
7906 }
7907 TRACE_2(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAIT,
7908 "strputmsg wait:stp %p waits pri %d", stp, pri);
7909 if (((error = strwaitq(stp, WRITEWAIT, (ssize_t)0, fmode, -1,
7910 &done)) != 0) || done) {
7911 mutex_exit(&stp->sd_lock);
7912 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7913 "strputmsg out:q %p out %d error %d",
7914 stp, 0, error);
7915 return (error);
7916 }
7917 TRACE_1(TR_FAC_STREAMS_FR, TR_STRPUTMSG_WAKE,
7918 "strputmsg wake:stp %p wakes", stp);
7919 if ((error = i_straccess(stp, JCWRITE)) != 0) {
7920 mutex_exit(&stp->sd_lock);
7921 return (error);
7922 }
7923 mutex_exit(&stp->sd_lock);
7924 }
7925 out:
7926 /*
7927 * For historic reasons, applications expect EAGAIN
7928 * when data mblk could not be allocated. so change
7929 * ENOMEM back to EAGAIN
7930 */
7931 if (error == ENOMEM)
7932 error = EAGAIN;
7933 TRACE_3(TR_FAC_STREAMS_FR, TR_STRPUTMSG_OUT,
7934 "strputmsg out:stp %p out %d error %d", stp, 2, error);
7935 return (error);
7936 }
7937
7938 /*
7939 * Put a message downstream.
7940 * Can send only an M_PROTO/M_PCPROTO by passing in a NULL uiop.
7941 * The fmode flag (NDELAY, NONBLOCK) is the or of the flags in the uio
7942 * and the fmode parameter.
7943 *
7944 * This routine handles the consolidation private flags:
7945 * MSG_IGNERROR Ignore any stream head error except STPLEX.
7946 * MSG_HOLDSIG Hold signals while waiting for data.
7947 * MSG_IGNFLOW Don't check streams flow control.
7948 *
7949 * NOTE: strputmsg and kstrputmsg have much of the logic in common.
7950 */
7951 int
7952 kstrputmsg(
7953 struct vnode *vp,
7954 mblk_t *mctl,
7955 struct uio *uiop,
7956 ssize_t msgsize,
7957 unsigned char pri,
7958 int flag,
7959 int fmode)
7960 {
7961 struct stdata *stp;
7962 queue_t *wqp;
7963 ssize_t rmin, rmax;
7964 int error;
7965
7966 ASSERT(vp->v_stream);
7967 stp = vp->v_stream;
7968 wqp = stp->sd_wrq;
7969 if (AU_AUDITING())
7970 audit_strputmsg(vp, NULL, NULL, pri, flag, fmode);
7971 if (mctl == NULL)
7972 return (EINVAL);
7973
7974 mutex_enter(&stp->sd_lock);
7975
7976 if ((error = i_straccess(stp, JCWRITE)) != 0) {
7977 mutex_exit(&stp->sd_lock);
7978 freemsg(mctl);
7979 return (error);
7980 }
7981
7982 if ((stp->sd_flag & STPLEX) || !(flag & MSG_IGNERROR)) {
7983 if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
7984 error = strwriteable(stp, B_FALSE, B_TRUE);
7985 if (error != 0) {
7986 mutex_exit(&stp->sd_lock);
7987 freemsg(mctl);
7988 return (error);
7989 }
7990 }
7991 }
7992
7993 mutex_exit(&stp->sd_lock);
7994
7995 /*
7996 * Check for legal flag value.
7997 */
7998 switch (flag & (MSG_HIPRI|MSG_BAND|MSG_ANY)) {
7999 case MSG_HIPRI:
8000 if (pri != 0) {
8001 freemsg(mctl);
8002 return (EINVAL);
8003 }
8004 break;
8005 case MSG_BAND:
8006 break;
8007 default:
8008 freemsg(mctl);
8009 return (EINVAL);
8010 }
8011
8012 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_IN,
8013 "kstrputmsg in:stp %p", stp);
8014
8015 /* get these values from those cached in the stream head */
8016 rmin = stp->sd_qn_minpsz;
8017 rmax = stp->sd_qn_maxpsz;
8018
8019 /*
8020 * Make sure ctl and data sizes together fall within the
8021 * limits of the max and min receive packet sizes and do
8022 * not exceed system limit.
8023 */
8024 ASSERT((rmax >= 0) || (rmax == INFPSZ));
8025 if (rmax == 0) {
8026 freemsg(mctl);
8027 return (ERANGE);
8028 }
8029 /*
8030 * Use the MAXIMUM of sd_maxblk and q_maxpsz.
8031 * Needed to prevent partial failures in the strmakedata loop.
8032 */
8033 if (stp->sd_maxblk != INFPSZ && rmax != INFPSZ && rmax < stp->sd_maxblk)
8034 rmax = stp->sd_maxblk;
8035
8036 if (uiop == NULL) {
8037 msgsize = -1;
8038 rmin = -1; /* no range check for NULL data part */
8039 } else {
8040 /* Use uio flags as well as the fmode parameter flags */
8041 fmode |= uiop->uio_fmode;
8042
8043 if ((msgsize < rmin) ||
8044 ((msgsize > rmax) && (rmax != INFPSZ))) {
8045 freemsg(mctl);
8046 return (ERANGE);
8047 }
8048 }
8049
8050 /* Ignore flow control in strput for HIPRI */
8051 if (flag & MSG_HIPRI)
8052 flag |= MSG_IGNFLOW;
8053
8054 for (;;) {
8055 int done = 0;
8056 int waitflag;
8057 mblk_t *mp;
8058
8059 /*
8060 * strput will always free the ctl mblk - even when strput
8061 * fails. If MSG_IGNFLOW is set then any error returned
8062 * will cause us to break the loop, so we don't need a copy
8063 * of the message. If MSG_IGNFLOW is not set, then we can
8064 * get hit by flow control and be forced to try again. In
8065 * this case we need to have a copy of the message. We
8066 * do this using copymsg since the message may get modified
8067 * by something below us.
8068 *
8069 * We've observed that many TPI providers do not check db_ref
8070 * on the control messages but blindly reuse them for the
8071 * T_OK_ACK/T_ERROR_ACK. Thus using copymsg is more
8072 * friendly to such providers than using dupmsg. Also, note
8073 * that sockfs uses MSG_IGNFLOW for all TPI control messages.
8074 * Only data messages are subject to flow control, hence
8075 * subject to this copymsg.
8076 */
8077 if (flag & MSG_IGNFLOW) {
8078 mp = mctl;
8079 mctl = NULL;
8080 } else {
8081 do {
8082 /*
8083 * If a message has a free pointer, the message
8084 * must be dupmsg to maintain this pointer.
8085 * Code using this facility must be sure
8086 * that modules below will not change the
8087 * contents of the dblk without checking db_ref
8088 * first. If db_ref is > 1, then the module
8089 * needs to do a copymsg first. Otherwise,
8090 * the contents of the dblk may become
8091 * inconsistent because the freesmg/freeb below
8092 * may end up calling atomic_add_32_nv.
8093 * The atomic_add_32_nv in freeb (accessing
8094 * all of db_ref, db_type, db_flags, and
8095 * db_struioflag) does not prevent other threads
8096 * from concurrently trying to modify e.g.
8097 * db_type.
8098 */
8099 if (mctl->b_datap->db_frtnp != NULL)
8100 mp = dupmsg(mctl);
8101 else
8102 mp = copymsg(mctl);
8103
8104 if (mp != NULL)
8105 break;
8106
8107 error = strwaitbuf(msgdsize(mctl), BPRI_MED);
8108 if (error) {
8109 freemsg(mctl);
8110 return (error);
8111 }
8112 } while (mp == NULL);
8113 }
8114 /*
8115 * Verify that all of msgsize can be transferred by
8116 * strput.
8117 */
8118 ASSERT(stp->sd_maxblk == INFPSZ || stp->sd_maxblk >= msgsize);
8119 error = strput(stp, mp, uiop, &msgsize, 0, pri, flag);
8120 if (error == 0)
8121 break;
8122
8123 if (error != EWOULDBLOCK)
8124 goto out;
8125
8126 /*
8127 * IF MSG_IGNFLOW is set we should have broken out of loop
8128 * above.
8129 */
8130 ASSERT(!(flag & MSG_IGNFLOW));
8131 mutex_enter(&stp->sd_lock);
8132 /*
8133 * Check for a missed wakeup.
8134 * Needed since strput did not hold sd_lock across
8135 * the canputnext.
8136 */
8137 if (bcanputnext(wqp, pri)) {
8138 /* Try again */
8139 mutex_exit(&stp->sd_lock);
8140 continue;
8141 }
8142 TRACE_2(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAIT,
8143 "kstrputmsg wait:stp %p waits pri %d", stp, pri);
8144
8145 waitflag = WRITEWAIT;
8146 if (flag & (MSG_HOLDSIG|MSG_IGNERROR)) {
8147 if (flag & MSG_HOLDSIG)
8148 waitflag |= STR_NOSIG;
8149 if (flag & MSG_IGNERROR)
8150 waitflag |= STR_NOERROR;
8151 }
8152 if (((error = strwaitq(stp, waitflag,
8153 (ssize_t)0, fmode, -1, &done)) != 0) || done) {
8154 mutex_exit(&stp->sd_lock);
8155 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT,
8156 "kstrputmsg out:stp %p out %d error %d",
8157 stp, 0, error);
8158 freemsg(mctl);
8159 return (error);
8160 }
8161 TRACE_1(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_WAKE,
8162 "kstrputmsg wake:stp %p wakes", stp);
8163 if ((error = i_straccess(stp, JCWRITE)) != 0) {
8164 mutex_exit(&stp->sd_lock);
8165 freemsg(mctl);
8166 return (error);
8167 }
8168 mutex_exit(&stp->sd_lock);
8169 }
8170 out:
8171 freemsg(mctl);
8172 /*
8173 * For historic reasons, applications expect EAGAIN
8174 * when data mblk could not be allocated. so change
8175 * ENOMEM back to EAGAIN
8176 */
8177 if (error == ENOMEM)
8178 error = EAGAIN;
8179 TRACE_3(TR_FAC_STREAMS_FR, TR_KSTRPUTMSG_OUT,
8180 "kstrputmsg out:stp %p out %d error %d", stp, 2, error);
8181 return (error);
8182 }
8183
8184 /*
8185 * Determines whether the necessary conditions are set on a stream
8186 * for it to be readable, writeable, or have exceptions.
8187 *
8188 * strpoll handles the consolidation private events:
8189 * POLLNOERR Do not return POLLERR even if there are stream
8190 * head errors.
8191 * Used by sockfs.
8192 * POLLRDDATA Do not return POLLIN unless at least one message on
8193 * the queue contains one or more M_DATA mblks. Thus
8194 * when this flag is set a queue with only
8195 * M_PROTO/M_PCPROTO mblks does not return POLLIN.
8196 * Used by sockfs to ignore T_EXDATA_IND messages.
8197 *
8198 * Note: POLLRDDATA assumes that synch streams only return messages with
8199 * an M_DATA attached (i.e. not messages consisting of only
8200 * an M_PROTO/M_PCPROTO part).
8201 */
8202 int
8203 strpoll(
8204 struct stdata *stp,
8205 short events_arg,
8206 int anyyet,
8207 short *reventsp,
8208 struct pollhead **phpp)
8209 {
8210 int events = (ushort_t)events_arg;
8211 int retevents = 0;
8212 mblk_t *mp;
8213 qband_t *qbp;
8214 long sd_flags = stp->sd_flag;
8215 int headlocked = 0;
8216
8217 /*
8218 * For performance, a single 'if' tests for most possible edge
8219 * conditions in one shot
8220 */
8221 if (sd_flags & (STPLEX | STRDERR | STWRERR)) {
8222 if (sd_flags & STPLEX) {
8223 *reventsp = POLLNVAL;
8224 return (EINVAL);
8225 }
8226 if (((events & (POLLIN | POLLRDNORM | POLLRDBAND | POLLPRI)) &&
8227 (sd_flags & STRDERR)) ||
8228 ((events & (POLLOUT | POLLWRNORM | POLLWRBAND)) &&
8229 (sd_flags & STWRERR))) {
8230 if (!(events & POLLNOERR)) {
8231 *reventsp = POLLERR;
8232 return (0);
8233 }
8234 }
8235 }
8236 if (sd_flags & STRHUP) {
8237 retevents |= POLLHUP;
8238 } else if (events & (POLLWRNORM | POLLWRBAND)) {
8239 queue_t *tq;
8240 queue_t *qp = stp->sd_wrq;
8241
8242 claimstr(qp);
8243 /* Find next module forward that has a service procedure */
8244 tq = qp->q_next->q_nfsrv;
8245 ASSERT(tq != NULL);
8246
8247 if (polllock(&stp->sd_pollist, QLOCK(tq)) != 0) {
8248 releasestr(qp);
8249 *reventsp = POLLNVAL;
8250 return (0);
8251 }
8252 if (events & POLLWRNORM) {
8253 queue_t *sqp;
8254
8255 if (tq->q_flag & QFULL)
8256 /* ensure backq svc procedure runs */
8257 tq->q_flag |= QWANTW;
8258 else if ((sqp = stp->sd_struiowrq) != NULL) {
8259 /* Check sync stream barrier write q */
8260 mutex_exit(QLOCK(tq));
8261 if (polllock(&stp->sd_pollist,
8262 QLOCK(sqp)) != 0) {
8263 releasestr(qp);
8264 *reventsp = POLLNVAL;
8265 return (0);
8266 }
8267 if (sqp->q_flag & QFULL)
8268 /* ensure pollwakeup() is done */
8269 sqp->q_flag |= QWANTWSYNC;
8270 else
8271 retevents |= POLLOUT;
8272 /* More write events to process ??? */
8273 if (! (events & POLLWRBAND)) {
8274 mutex_exit(QLOCK(sqp));
8275 releasestr(qp);
8276 goto chkrd;
8277 }
8278 mutex_exit(QLOCK(sqp));
8279 if (polllock(&stp->sd_pollist,
8280 QLOCK(tq)) != 0) {
8281 releasestr(qp);
8282 *reventsp = POLLNVAL;
8283 return (0);
8284 }
8285 } else
8286 retevents |= POLLOUT;
8287 }
8288 if (events & POLLWRBAND) {
8289 qbp = tq->q_bandp;
8290 if (qbp) {
8291 while (qbp) {
8292 if (qbp->qb_flag & QB_FULL)
8293 qbp->qb_flag |= QB_WANTW;
8294 else
8295 retevents |= POLLWRBAND;
8296 qbp = qbp->qb_next;
8297 }
8298 } else {
8299 retevents |= POLLWRBAND;
8300 }
8301 }
8302 mutex_exit(QLOCK(tq));
8303 releasestr(qp);
8304 }
8305 chkrd:
8306 if (sd_flags & STRPRI) {
8307 retevents |= (events & POLLPRI);
8308 } else if (events & (POLLRDNORM | POLLRDBAND | POLLIN)) {
8309 queue_t *qp = _RD(stp->sd_wrq);
8310 int normevents = (events & (POLLIN | POLLRDNORM));
8311
8312 /*
8313 * Note: Need to do polllock() here since ps_lock may be
8314 * held. See bug 4191544.
8315 */
8316 if (polllock(&stp->sd_pollist, &stp->sd_lock) != 0) {
8317 *reventsp = POLLNVAL;
8318 return (0);
8319 }
8320 headlocked = 1;
8321 mp = qp->q_first;
8322 while (mp) {
8323 /*
8324 * For POLLRDDATA we scan b_cont and b_next until we
8325 * find an M_DATA.
8326 */
8327 if ((events & POLLRDDATA) &&
8328 mp->b_datap->db_type != M_DATA) {
8329 mblk_t *nmp = mp->b_cont;
8330
8331 while (nmp != NULL &&
8332 nmp->b_datap->db_type != M_DATA)
8333 nmp = nmp->b_cont;
8334 if (nmp == NULL) {
8335 mp = mp->b_next;
8336 continue;
8337 }
8338 }
8339 if (mp->b_band == 0)
8340 retevents |= normevents;
8341 else
8342 retevents |= (events & (POLLIN | POLLRDBAND));
8343 break;
8344 }
8345 if (! (retevents & normevents) &&
8346 (stp->sd_wakeq & RSLEEP)) {
8347 /*
8348 * Sync stream barrier read queue has data.
8349 */
8350 retevents |= normevents;
8351 }
8352 /* Treat eof as normal data */
8353 if (sd_flags & STREOF)
8354 retevents |= normevents;
8355 }
8356
8357 *reventsp = (short)retevents;
8358 if (retevents && !(events & POLLET)) {
8359 if (headlocked)
8360 mutex_exit(&stp->sd_lock);
8361 return (0);
8362 }
8363
8364 /*
8365 * If poll() has not found any events yet, set up event cell
8366 * to wake up the poll if a requested event occurs on this
8367 * stream. Check for collisions with outstanding poll requests.
8368 */
8369 if (!anyyet) {
8370 *phpp = &stp->sd_pollist;
8371 if (headlocked == 0) {
8372 if (polllock(&stp->sd_pollist, &stp->sd_lock) != 0) {
8373 *reventsp = POLLNVAL;
8374 return (0);
8375 }
8376 headlocked = 1;
8377 }
8378 stp->sd_rput_opt |= SR_POLLIN;
8379 }
8380 if (headlocked)
8381 mutex_exit(&stp->sd_lock);
8382 return (0);
8383 }
8384
8385 /*
8386 * The purpose of putback() is to assure sleeping polls/reads
8387 * are awakened when there are no new messages arriving at the,
8388 * stream head, and a message is placed back on the read queue.
8389 *
8390 * sd_lock must be held when messages are placed back on stream
8391 * head. (getq() holds sd_lock when it removes messages from
8392 * the queue)
8393 */
8394
8395 static void
8396 putback(struct stdata *stp, queue_t *q, mblk_t *bp, int band)
8397 {
8398 mblk_t *qfirst;
8399 ASSERT(MUTEX_HELD(&stp->sd_lock));
8400
8401 /*
8402 * As a result of lock-step ordering around q_lock and sd_lock,
8403 * it's possible for function calls like putnext() and
8404 * canputnext() to get an inaccurate picture of how much
8405 * data is really being processed at the stream head.
8406 * We only consolidate with existing messages on the queue
8407 * if the length of the message we want to put back is smaller
8408 * than the queue hiwater mark.
8409 */
8410 if ((stp->sd_rput_opt & SR_CONSOL_DATA) &&
8411 (DB_TYPE(bp) == M_DATA) && ((qfirst = q->q_first) != NULL) &&
8412 (DB_TYPE(qfirst) == M_DATA) &&
8413 ((qfirst->b_flag & (MSGMARK|MSGDELIM)) == 0) &&
8414 ((bp->b_flag & (MSGMARK|MSGDELIM|MSGMARKNEXT)) == 0) &&
8415 (mp_cont_len(bp, NULL) < q->q_hiwat)) {
8416 /*
8417 * We use the same logic as defined in strrput()
8418 * but in reverse as we are putting back onto the
8419 * queue and want to retain byte ordering.
8420 * Consolidate M_DATA messages with M_DATA ONLY.
8421 * strrput() allows the consolidation of M_DATA onto
8422 * M_PROTO | M_PCPROTO but not the other way round.
8423 *
8424 * The consolidation does not take place if the message
8425 * we are returning to the queue is marked with either
8426 * of the marks or the delim flag or if q_first
8427 * is marked with MSGMARK. The MSGMARK check is needed to
8428 * handle the odd semantics of MSGMARK where essentially
8429 * the whole message is to be treated as marked.
8430 * Carry any MSGMARKNEXT and MSGNOTMARKNEXT from q_first
8431 * to the front of the b_cont chain.
8432 */
8433 rmvq_noenab(q, qfirst);
8434
8435 /*
8436 * The first message in the b_cont list
8437 * tracks MSGMARKNEXT and MSGNOTMARKNEXT.
8438 * We need to handle the case where we
8439 * are appending:
8440 *
8441 * 1) a MSGMARKNEXT to a MSGNOTMARKNEXT.
8442 * 2) a MSGMARKNEXT to a plain message.
8443 * 3) a MSGNOTMARKNEXT to a plain message
8444 * 4) a MSGNOTMARKNEXT to a MSGNOTMARKNEXT
8445 * message.
8446 *
8447 * Thus we never append a MSGMARKNEXT or
8448 * MSGNOTMARKNEXT to a MSGMARKNEXT message.
8449 */
8450 if (qfirst->b_flag & MSGMARKNEXT) {
8451 bp->b_flag |= MSGMARKNEXT;
8452 bp->b_flag &= ~MSGNOTMARKNEXT;
8453 qfirst->b_flag &= ~MSGMARKNEXT;
8454 } else if (qfirst->b_flag & MSGNOTMARKNEXT) {
8455 bp->b_flag |= MSGNOTMARKNEXT;
8456 qfirst->b_flag &= ~MSGNOTMARKNEXT;
8457 }
8458
8459 linkb(bp, qfirst);
8460 }
8461 (void) putbq(q, bp);
8462
8463 /*
8464 * A message may have come in when the sd_lock was dropped in the
8465 * calling routine. If this is the case and STR*ATMARK info was
8466 * received, need to move that from the stream head to the q_last
8467 * so that SIOCATMARK can return the proper value.
8468 */
8469 if (stp->sd_flag & (STRATMARK | STRNOTATMARK)) {
8470 unsigned short *flagp = &q->q_last->b_flag;
8471 uint_t b_flag = (uint_t)*flagp;
8472
8473 if (stp->sd_flag & STRATMARK) {
8474 b_flag &= ~MSGNOTMARKNEXT;
8475 b_flag |= MSGMARKNEXT;
8476 stp->sd_flag &= ~STRATMARK;
8477 } else {
8478 b_flag &= ~MSGMARKNEXT;
8479 b_flag |= MSGNOTMARKNEXT;
8480 stp->sd_flag &= ~STRNOTATMARK;
8481 }
8482 *flagp = (unsigned short) b_flag;
8483 }
8484
8485 #ifdef DEBUG
8486 /*
8487 * Make sure that the flags are not messed up.
8488 */
8489 {
8490 mblk_t *mp;
8491 mp = q->q_last;
8492 while (mp != NULL) {
8493 ASSERT((mp->b_flag & (MSGMARKNEXT|MSGNOTMARKNEXT)) !=
8494 (MSGMARKNEXT|MSGNOTMARKNEXT));
8495 mp = mp->b_cont;
8496 }
8497 }
8498 #endif
8499 if (q->q_first == bp) {
8500 short pollevents;
8501
8502 if (stp->sd_flag & RSLEEP) {
8503 stp->sd_flag &= ~RSLEEP;
8504 cv_broadcast(&q->q_wait);
8505 }
8506 if (stp->sd_flag & STRPRI) {
8507 pollevents = POLLPRI;
8508 } else {
8509 if (band == 0) {
8510 if (!(stp->sd_rput_opt & SR_POLLIN))
8511 return;
8512 stp->sd_rput_opt &= ~SR_POLLIN;
8513 pollevents = POLLIN | POLLRDNORM;
8514 } else {
8515 pollevents = POLLIN | POLLRDBAND;
8516 }
8517 }
8518 mutex_exit(&stp->sd_lock);
8519 pollwakeup(&stp->sd_pollist, pollevents);
8520 mutex_enter(&stp->sd_lock);
8521 }
8522 }
8523
8524 /*
8525 * Return the held vnode attached to the stream head of a
8526 * given queue
8527 * It is the responsibility of the calling routine to ensure
8528 * that the queue does not go away (e.g. pop).
8529 */
8530 vnode_t *
8531 strq2vp(queue_t *qp)
8532 {
8533 vnode_t *vp;
8534 vp = STREAM(qp)->sd_vnode;
8535 ASSERT(vp != NULL);
8536 VN_HOLD(vp);
8537 return (vp);
8538 }
8539
8540 /*
8541 * return the stream head write queue for the given vp
8542 * It is the responsibility of the calling routine to ensure
8543 * that the stream or vnode do not close.
8544 */
8545 queue_t *
8546 strvp2wq(vnode_t *vp)
8547 {
8548 ASSERT(vp->v_stream != NULL);
8549 return (vp->v_stream->sd_wrq);
8550 }
8551
8552 /*
8553 * pollwakeup stream head
8554 * It is the responsibility of the calling routine to ensure
8555 * that the stream or vnode do not close.
8556 */
8557 void
8558 strpollwakeup(vnode_t *vp, short event)
8559 {
8560 ASSERT(vp->v_stream);
8561 pollwakeup(&vp->v_stream->sd_pollist, event);
8562 }
8563
8564 /*
8565 * Mate the stream heads of two vnodes together. If the two vnodes are the
8566 * same, we just make the write-side point at the read-side -- otherwise,
8567 * we do a full mate. Only works on vnodes associated with streams that are
8568 * still being built and thus have only a stream head.
8569 */
8570 void
8571 strmate(vnode_t *vp1, vnode_t *vp2)
8572 {
8573 queue_t *wrq1 = strvp2wq(vp1);
8574 queue_t *wrq2 = strvp2wq(vp2);
8575
8576 /*
8577 * Verify that there are no modules on the stream yet. We also
8578 * rely on the stream head always having a service procedure to
8579 * avoid tweaking q_nfsrv.
8580 */
8581 ASSERT(wrq1->q_next == NULL && wrq2->q_next == NULL);
8582 ASSERT(wrq1->q_qinfo->qi_srvp != NULL);
8583 ASSERT(wrq2->q_qinfo->qi_srvp != NULL);
8584
8585 /*
8586 * If the queues are the same, just twist; otherwise do a full mate.
8587 */
8588 if (wrq1 == wrq2) {
8589 wrq1->q_next = _RD(wrq1);
8590 } else {
8591 wrq1->q_next = _RD(wrq2);
8592 wrq2->q_next = _RD(wrq1);
8593 STREAM(wrq1)->sd_mate = STREAM(wrq2);
8594 STREAM(wrq1)->sd_flag |= STRMATE;
8595 STREAM(wrq2)->sd_mate = STREAM(wrq1);
8596 STREAM(wrq2)->sd_flag |= STRMATE;
8597 }
8598 }
8599
8600 /*
8601 * XXX will go away when console is correctly fixed.
8602 * Clean up the console PIDS, from previous I_SETSIG,
8603 * called only for cnopen which never calls strclean().
8604 */
8605 void
8606 str_cn_clean(struct vnode *vp)
8607 {
8608 strsig_t *ssp, *pssp, *tssp;
8609 struct stdata *stp;
8610 struct pid *pidp;
8611 int update = 0;
8612
8613 ASSERT(vp->v_stream);
8614 stp = vp->v_stream;
8615 pssp = NULL;
8616 mutex_enter(&stp->sd_lock);
8617 ssp = stp->sd_siglist;
8618 while (ssp) {
8619 mutex_enter(&pidlock);
8620 pidp = ssp->ss_pidp;
8621 /*
8622 * Get rid of PID if the proc is gone.
8623 */
8624 if (pidp->pid_prinactive) {
8625 tssp = ssp->ss_next;
8626 if (pssp)
8627 pssp->ss_next = tssp;
8628 else
8629 stp->sd_siglist = tssp;
8630 ASSERT(pidp->pid_ref <= 1);
8631 PID_RELE(ssp->ss_pidp);
8632 mutex_exit(&pidlock);
8633 kmem_free(ssp, sizeof (strsig_t));
8634 update = 1;
8635 ssp = tssp;
8636 continue;
8637 } else
8638 mutex_exit(&pidlock);
8639 pssp = ssp;
8640 ssp = ssp->ss_next;
8641 }
8642 if (update) {
8643 stp->sd_sigflags = 0;
8644 for (ssp = stp->sd_siglist; ssp; ssp = ssp->ss_next)
8645 stp->sd_sigflags |= ssp->ss_events;
8646 }
8647 mutex_exit(&stp->sd_lock);
8648 }
8649
8650 /*
8651 * Return B_TRUE if there is data in the message, B_FALSE otherwise.
8652 */
8653 static boolean_t
8654 msghasdata(mblk_t *bp)
8655 {
8656 for (; bp; bp = bp->b_cont)
8657 if (bp->b_datap->db_type == M_DATA) {
8658 ASSERT(bp->b_wptr >= bp->b_rptr);
8659 if (bp->b_wptr > bp->b_rptr)
8660 return (B_TRUE);
8661 }
8662 return (B_FALSE);
8663 }
8664
8665 /*
8666 * Check whether a stream is an XTI stream or not.
8667 */
8668 static boolean_t
8669 is_xti_str(const struct stdata *stp)
8670 {
8671 struct devnames *dnp;
8672 vnode_t *vn;
8673 major_t major;
8674 if ((vn = stp->sd_vnode) != NULL && vn->v_type == VCHR &&
8675 vn->v_rdev != 0) {
8676 major = getmajor(vn->v_rdev);
8677 dnp = (major != DDI_MAJOR_T_NONE && major >= 0 &&
8678 major < devcnt) ? &devnamesp[major] : NULL;
8679 if (dnp != NULL && dnp->dn_name != NULL &&
8680 (strcmp(dnp->dn_name, "ip") == 0 ||
8681 strcmp(dnp->dn_name, "tcp") == 0 ||
8682 strcmp(dnp->dn_name, "udp") == 0 ||
8683 strcmp(dnp->dn_name, "icmp") == 0 ||
8684 strcmp(dnp->dn_name, "tl") == 0 ||
8685 strcmp(dnp->dn_name, "ip6") == 0 ||
8686 strcmp(dnp->dn_name, "tcp6") == 0 ||
8687 strcmp(dnp->dn_name, "udp6") == 0 ||
8688 strcmp(dnp->dn_name, "icmp6") == 0)) {
8689 return (B_TRUE);
8690 }
8691 }
8692 return (B_FALSE);
8693 }