61 #include <sys/vtrace.h>
62 #include <sys/debug.h>
63 #include <sys/strredir.h>
64 #include <sys/fs/fifonode.h>
65 #include <sys/fs/snode.h>
66 #include <sys/strlog.h>
67 #include <sys/strsun.h>
68 #include <sys/project.h>
69 #include <sys/kbio.h>
70 #include <sys/msio.h>
71 #include <sys/tty.h>
72 #include <sys/ptyvar.h>
73 #include <sys/vuid_event.h>
74 #include <sys/modctl.h>
75 #include <sys/sunddi.h>
76 #include <sys/sunldi_impl.h>
77 #include <sys/autoconf.h>
78 #include <sys/policy.h>
79 #include <sys/dld.h>
80 #include <sys/zone.h>
81 #include <c2/audit.h>
82
83 /*
84 * This define helps improve the readability of streams code while
85 * still maintaining a very old streams performance enhancement. The
86 * performance enhancement basically involved having all callers
87 * of straccess() perform the first check that straccess() will do
88 * locally before actually calling straccess(). (There by reducing
89 * the number of unnecessary calls to straccess().)
90 */
91 #define i_straccess(x, y) ((stp->sd_sidp == NULL) ? 0 : \
92 (stp->sd_vnode->v_type == VFIFO) ? 0 : \
93 straccess((x), (y)))
94
95 /*
96 * what is mblk_pull_len?
97 *
98 * If a streams message consists of many short messages,
99 * a performance degradation occurs from copyout overhead.
100 * To decrease the per mblk overhead, messages that are
970 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
971 int *errorp)
972 {
973 mblk_t *bp;
974 int error;
975 ssize_t rbytes = 0;
976
977 /* Holding sd_lock prevents the read queue from changing */
978 ASSERT(MUTEX_HELD(&stp->sd_lock));
979
980 if (uiop != NULL && stp->sd_struiordq != NULL &&
981 q->q_first == NULL &&
982 (!first || (stp->sd_wakeq & RSLEEP))) {
983 /*
984 * Stream supports rwnext() for the read side.
985 * If this is the first time we're called by e.g. strread
986 * only do the downcall if there is a deferred wakeup
987 * (registered in sd_wakeq).
988 */
989 struiod_t uiod;
990
991 if (first)
992 stp->sd_wakeq &= ~RSLEEP;
993
994 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
995 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
996 uiod.d_mp = 0;
997 /*
998 * Mark that a thread is in rwnext on the read side
999 * to prevent strrput from nacking ioctls immediately.
1000 * When the last concurrent rwnext returns
1001 * the ioctls are nack'ed.
1002 */
1003 ASSERT(MUTEX_HELD(&stp->sd_lock));
1004 stp->sd_struiodnak++;
1005 /*
1006 * Note: rwnext will drop sd_lock.
1007 */
1008 error = rwnext(q, &uiod);
1009 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
1010 mutex_enter(&stp->sd_lock);
1011 stp->sd_struiodnak--;
1012 while (stp->sd_struiodnak == 0 &&
1013 ((bp = stp->sd_struionak) != NULL)) {
1014 stp->sd_struionak = bp->b_next;
1015 bp->b_next = NULL;
1016 bp->b_datap->db_type = M_IOCNAK;
1017 /*
1018 * Protect against the driver passing up
1019 * messages after it has done a qprocsoff.
1020 */
1021 if (_OTHERQ(q)->q_next == NULL)
1022 freemsg(bp);
1023 else {
1024 mutex_exit(&stp->sd_lock);
1025 qreply(q, bp);
1026 mutex_enter(&stp->sd_lock);
1027 }
1028 }
1029 ASSERT(MUTEX_HELD(&stp->sd_lock));
1030 if (error == 0 || error == EWOULDBLOCK) {
1031 if ((bp = uiod.d_mp) != NULL) {
1032 *errorp = 0;
1033 ASSERT(MUTEX_HELD(&stp->sd_lock));
1034 return (bp);
1035 }
1036 error = 0;
1037 } else if (error == EINVAL) {
1038 /*
1039 * The stream plumbing must have
1040 * changed while we were away, so
1041 * just turn off rwnext()s.
1042 */
1043 error = 0;
1044 } else if (error == EBUSY) {
1045 /*
1046 * The module might have data in transit using putnext
1047 * Fall back on waiting + getq.
1048 */
1049 error = 0;
1050 } else {
1051 *errorp = error;
1052 ASSERT(MUTEX_HELD(&stp->sd_lock));
1053 return (NULL);
1054 }
1055 /*
1056 * Try a getq in case a rwnext() generated mblk
1057 * has bubbled up via strrput().
1058 */
1059 }
1060 *errorp = 0;
1061 ASSERT(MUTEX_HELD(&stp->sd_lock));
1062
1063 /*
1064 * If we have a valid uio, try and use this as a guide for how
1065 * many bytes to retrieve from the queue via getq_noenab().
1066 * Doing this can avoid unneccesary counting of overlong
1067 * messages in putback(). We currently only do this for sockets
1068 * and only if there is no sd_rputdatafunc hook.
1069 *
1070 * The sd_rputdatafunc hook transforms the entire message
1071 * before any bytes in it can be given to a client. So, rbytes
1072 * must be 0 if there is a hook.
1073 */
1074 if ((uiop != NULL) && (stp->sd_vnode->v_type == VSOCK) &&
2529 *
2530 * Caller should *not* hold sd_lock.
2531 * When EWOULDBLOCK is returned the caller has to redo the canputnext
2532 * under sd_lock in order to avoid missing a backenabling wakeup.
2533 *
2534 * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA.
2535 *
2536 * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages.
2537 * For sync streams we can only ignore flow control by reverting to using
2538 * putnext.
2539 *
2540 * If sd_maxblk is less than *iosize this routine might return without
2541 * transferring all of *iosize. In all cases, on return *iosize will contain
2542 * the amount of data that was transferred.
2543 */
2544 static int
2545 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
2546 int b_flag, int pri, int flags)
2547 {
2548 struiod_t uiod;
2549 mblk_t *mp;
2550 queue_t *wqp = stp->sd_wrq;
2551 int error = 0;
2552 ssize_t count = *iosize;
2553
2554 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
2555
2556 if (uiop != NULL && count >= 0)
2557 flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0;
2558
2559 if (!(flags & STRUIO_POSTPONE)) {
2560 /*
2561 * Use regular canputnext, strmakedata, putnext sequence.
2562 */
2563 if (pri == 0) {
2564 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2565 freemsg(mctl);
2566 return (EWOULDBLOCK);
2567 }
2568 } else {
2620 if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) {
2621 freemsg(mctl);
2622 /*
2623 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled".
2624 */
2625 return (error == EAGAIN ? ENOMEM : error);
2626 }
2627 if (mctl != NULL) {
2628 if (mctl->b_cont == NULL)
2629 mctl->b_cont = mp;
2630 else if (mp != NULL)
2631 linkb(mctl, mp);
2632 mp = mctl;
2633 } else if (mp == NULL) {
2634 return (0);
2635 }
2636
2637 mp->b_flag |= b_flag;
2638 mp->b_band = (uchar_t)pri;
2639
2640 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
2641 sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
2642 uiod.d_uio.uio_offset = 0;
2643 uiod.d_mp = mp;
2644 error = rwnext(wqp, &uiod);
2645 if (! uiod.d_mp) {
2646 uioskip(uiop, *iosize);
2647 return (error);
2648 }
2649 ASSERT(mp == uiod.d_mp);
2650 if (error == EINVAL) {
2651 /*
2652 * The stream plumbing must have changed while
2653 * we were away, so just turn off rwnext()s.
2654 */
2655 error = 0;
2656 } else if (error == EBUSY || error == EWOULDBLOCK) {
2657 /*
2658 * Couldn't enter a perimeter or took a page fault,
2659 * so fall-back to putnext().
2660 */
2661 error = 0;
2662 } else {
2663 freemsg(mp);
2664 return (error);
2665 }
2666 /* Have to check canput before consuming data from the uio */
2667 if (pri == 0) {
2668 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2669 freemsg(mp);
2670 return (EWOULDBLOCK);
2671 }
2672 } else {
2673 if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) {
2674 freemsg(mp);
2675 return (EWOULDBLOCK);
2676 }
2677 }
2678 ASSERT(mp == uiod.d_mp);
2679 /* Copyin data from the uio */
2680 if ((error = struioget(wqp, mp, &uiod, 0)) != 0) {
2681 freemsg(mp);
2682 return (error);
2683 }
2684 uioskip(uiop, *iosize);
2685 if (flags & MSG_IGNFLOW) {
2686 /*
2687 * XXX Hack: Don't get stuck running service procedures.
2688 * This is needed for sockfs when sending the unbind message
2689 * out of the rput procedure - we don't want a put procedure
2690 * to run service procedures.
2691 */
2692 putnext(wqp, mp);
2693 } else {
2694 stream_willservice(stp);
2695 putnext(wqp, mp);
2696 stream_runservice(stp);
2697 }
2698 return (0);
2699 }
2700
2701 /*
2702 * Write attempts to break the write request into messages conforming
2703 * with the minimum and maximum packet sizes set downstream.
2704 *
2705 * Write will not block if downstream queue is full and
2706 * O_NDELAY is set, otherwise it will block waiting for the queue to get room.
2707 *
2708 * A write of zero bytes gets packaged into a zero length message and sent
2709 * downstream like any other message.
2710 *
2711 * If buffers of the requested sizes are not available, the write will
2712 * sleep until the buffers become available.
2713 *
2714 * Write (if specified) will supply a write offset in a message if it
2715 * makes sense. This can be specified by downstream modules as part of
2716 * a M_SETOPTS message. Write will not supply the write offset if it
2717 * cannot supply any data in a buffer. In other words, write will never
|
61 #include <sys/vtrace.h>
62 #include <sys/debug.h>
63 #include <sys/strredir.h>
64 #include <sys/fs/fifonode.h>
65 #include <sys/fs/snode.h>
66 #include <sys/strlog.h>
67 #include <sys/strsun.h>
68 #include <sys/project.h>
69 #include <sys/kbio.h>
70 #include <sys/msio.h>
71 #include <sys/tty.h>
72 #include <sys/ptyvar.h>
73 #include <sys/vuid_event.h>
74 #include <sys/modctl.h>
75 #include <sys/sunddi.h>
76 #include <sys/sunldi_impl.h>
77 #include <sys/autoconf.h>
78 #include <sys/policy.h>
79 #include <sys/dld.h>
80 #include <sys/zone.h>
81 #include <sys/limits.h>
82 #include <c2/audit.h>
83
84 /*
85 * This define helps improve the readability of streams code while
86 * still maintaining a very old streams performance enhancement. The
87 * performance enhancement basically involved having all callers
88 * of straccess() perform the first check that straccess() will do
89 * locally before actually calling straccess(). (There by reducing
90 * the number of unnecessary calls to straccess().)
91 */
92 #define i_straccess(x, y) ((stp->sd_sidp == NULL) ? 0 : \
93 (stp->sd_vnode->v_type == VFIFO) ? 0 : \
94 straccess((x), (y)))
95
96 /*
97 * what is mblk_pull_len?
98 *
99 * If a streams message consists of many short messages,
100 * a performance degradation occurs from copyout overhead.
101 * To decrease the per mblk overhead, messages that are
971 strget(struct stdata *stp, queue_t *q, struct uio *uiop, int first,
972 int *errorp)
973 {
974 mblk_t *bp;
975 int error;
976 ssize_t rbytes = 0;
977
978 /* Holding sd_lock prevents the read queue from changing */
979 ASSERT(MUTEX_HELD(&stp->sd_lock));
980
981 if (uiop != NULL && stp->sd_struiordq != NULL &&
982 q->q_first == NULL &&
983 (!first || (stp->sd_wakeq & RSLEEP))) {
984 /*
985 * Stream supports rwnext() for the read side.
986 * If this is the first time we're called by e.g. strread
987 * only do the downcall if there is a deferred wakeup
988 * (registered in sd_wakeq).
989 */
990 struiod_t uiod;
991 struct iovec buf[IOV_MAX_STACK];
992 int iovlen = 0;
993
994 if (first)
995 stp->sd_wakeq &= ~RSLEEP;
996
997 if (uiop->uio_iovcnt > IOV_MAX_STACK) {
998 iovlen = uiop->uio_iovcnt * sizeof (iovec_t);
999 uiod.d_iov = kmem_alloc(iovlen, KM_SLEEP);
1000 } else {
1001 uiod.d_iov = buf;
1002 }
1003
1004 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, uiop->uio_iovcnt);
1005 uiod.d_mp = 0;
1006 /*
1007 * Mark that a thread is in rwnext on the read side
1008 * to prevent strrput from nacking ioctls immediately.
1009 * When the last concurrent rwnext returns
1010 * the ioctls are nack'ed.
1011 */
1012 ASSERT(MUTEX_HELD(&stp->sd_lock));
1013 stp->sd_struiodnak++;
1014 /*
1015 * Note: rwnext will drop sd_lock.
1016 */
1017 error = rwnext(q, &uiod);
1018 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
1019 mutex_enter(&stp->sd_lock);
1020 stp->sd_struiodnak--;
1021 while (stp->sd_struiodnak == 0 &&
1022 ((bp = stp->sd_struionak) != NULL)) {
1023 stp->sd_struionak = bp->b_next;
1024 bp->b_next = NULL;
1025 bp->b_datap->db_type = M_IOCNAK;
1026 /*
1027 * Protect against the driver passing up
1028 * messages after it has done a qprocsoff.
1029 */
1030 if (_OTHERQ(q)->q_next == NULL)
1031 freemsg(bp);
1032 else {
1033 mutex_exit(&stp->sd_lock);
1034 qreply(q, bp);
1035 mutex_enter(&stp->sd_lock);
1036 }
1037 }
1038 ASSERT(MUTEX_HELD(&stp->sd_lock));
1039 if (error == 0 || error == EWOULDBLOCK) {
1040 if ((bp = uiod.d_mp) != NULL) {
1041 *errorp = 0;
1042 ASSERT(MUTEX_HELD(&stp->sd_lock));
1043 if (iovlen != 0)
1044 kmem_free(uiod.d_iov, iovlen);
1045 return (bp);
1046 }
1047 error = 0;
1048 } else if (error == EINVAL) {
1049 /*
1050 * The stream plumbing must have
1051 * changed while we were away, so
1052 * just turn off rwnext()s.
1053 */
1054 error = 0;
1055 } else if (error == EBUSY) {
1056 /*
1057 * The module might have data in transit using putnext
1058 * Fall back on waiting + getq.
1059 */
1060 error = 0;
1061 } else {
1062 *errorp = error;
1063 ASSERT(MUTEX_HELD(&stp->sd_lock));
1064 if (iovlen != 0)
1065 kmem_free(uiod.d_iov, iovlen);
1066 return (NULL);
1067 }
1068
1069 if (iovlen != 0)
1070 kmem_free(uiod.d_iov, iovlen);
1071
1072 /*
1073 * Try a getq in case a rwnext() generated mblk
1074 * has bubbled up via strrput().
1075 */
1076 }
1077 *errorp = 0;
1078 ASSERT(MUTEX_HELD(&stp->sd_lock));
1079
1080 /*
1081 * If we have a valid uio, try and use this as a guide for how
1082 * many bytes to retrieve from the queue via getq_noenab().
1083 * Doing this can avoid unneccesary counting of overlong
1084 * messages in putback(). We currently only do this for sockets
1085 * and only if there is no sd_rputdatafunc hook.
1086 *
1087 * The sd_rputdatafunc hook transforms the entire message
1088 * before any bytes in it can be given to a client. So, rbytes
1089 * must be 0 if there is a hook.
1090 */
1091 if ((uiop != NULL) && (stp->sd_vnode->v_type == VSOCK) &&
2546 *
2547 * Caller should *not* hold sd_lock.
2548 * When EWOULDBLOCK is returned the caller has to redo the canputnext
2549 * under sd_lock in order to avoid missing a backenabling wakeup.
2550 *
2551 * Use iosize = -1 to not send any M_DATA. iosize = 0 sends zero-length M_DATA.
2552 *
2553 * Set MSG_IGNFLOW in flags to ignore flow control for hipri messages.
2554 * For sync streams we can only ignore flow control by reverting to using
2555 * putnext.
2556 *
2557 * If sd_maxblk is less than *iosize this routine might return without
2558 * transferring all of *iosize. In all cases, on return *iosize will contain
2559 * the amount of data that was transferred.
2560 */
2561 static int
2562 strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
2563 int b_flag, int pri, int flags)
2564 {
2565 struiod_t uiod;
2566 struct iovec buf[IOV_MAX_STACK];
2567 int iovlen = 0;
2568 mblk_t *mp;
2569 queue_t *wqp = stp->sd_wrq;
2570 int error = 0;
2571 ssize_t count = *iosize;
2572
2573 ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
2574
2575 if (uiop != NULL && count >= 0)
2576 flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0;
2577
2578 if (!(flags & STRUIO_POSTPONE)) {
2579 /*
2580 * Use regular canputnext, strmakedata, putnext sequence.
2581 */
2582 if (pri == 0) {
2583 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2584 freemsg(mctl);
2585 return (EWOULDBLOCK);
2586 }
2587 } else {
2639 if ((error = strmakedata(iosize, uiop, stp, flags, &mp)) != 0) {
2640 freemsg(mctl);
2641 /*
2642 * map EAGAIN to ENOMEM since EAGAIN means "flow controlled".
2643 */
2644 return (error == EAGAIN ? ENOMEM : error);
2645 }
2646 if (mctl != NULL) {
2647 if (mctl->b_cont == NULL)
2648 mctl->b_cont = mp;
2649 else if (mp != NULL)
2650 linkb(mctl, mp);
2651 mp = mctl;
2652 } else if (mp == NULL) {
2653 return (0);
2654 }
2655
2656 mp->b_flag |= b_flag;
2657 mp->b_band = (uchar_t)pri;
2658
2659 if (uiop->uio_iovcnt > IOV_MAX_STACK) {
2660 iovlen = uiop->uio_iovcnt * sizeof (iovec_t);
2661 uiod.d_iov = (struct iovec *)kmem_alloc(iovlen, KM_SLEEP);
2662 } else {
2663 uiod.d_iov = buf;
2664 }
2665
2666 (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, uiop->uio_iovcnt);
2667 uiod.d_uio.uio_offset = 0;
2668 uiod.d_mp = mp;
2669 error = rwnext(wqp, &uiod);
2670 if (! uiod.d_mp) {
2671 uioskip(uiop, *iosize);
2672 if (iovlen != 0)
2673 kmem_free(uiod.d_iov, iovlen);
2674 return (error);
2675 }
2676 ASSERT(mp == uiod.d_mp);
2677 if (error == EINVAL) {
2678 /*
2679 * The stream plumbing must have changed while
2680 * we were away, so just turn off rwnext()s.
2681 */
2682 error = 0;
2683 } else if (error == EBUSY || error == EWOULDBLOCK) {
2684 /*
2685 * Couldn't enter a perimeter or took a page fault,
2686 * so fall-back to putnext().
2687 */
2688 error = 0;
2689 } else {
2690 freemsg(mp);
2691 if (iovlen != 0)
2692 kmem_free(uiod.d_iov, iovlen);
2693 return (error);
2694 }
2695 /* Have to check canput before consuming data from the uio */
2696 if (pri == 0) {
2697 if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2698 freemsg(mp);
2699 if (iovlen != 0)
2700 kmem_free(uiod.d_iov, iovlen);
2701 return (EWOULDBLOCK);
2702 }
2703 } else {
2704 if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) {
2705 freemsg(mp);
2706 if (iovlen != 0)
2707 kmem_free(uiod.d_iov, iovlen);
2708 return (EWOULDBLOCK);
2709 }
2710 }
2711 ASSERT(mp == uiod.d_mp);
2712 /* Copyin data from the uio */
2713 if ((error = struioget(wqp, mp, &uiod, 0)) != 0) {
2714 freemsg(mp);
2715 if (iovlen != 0)
2716 kmem_free(uiod.d_iov, iovlen);
2717 return (error);
2718 }
2719 uioskip(uiop, *iosize);
2720 if (flags & MSG_IGNFLOW) {
2721 /*
2722 * XXX Hack: Don't get stuck running service procedures.
2723 * This is needed for sockfs when sending the unbind message
2724 * out of the rput procedure - we don't want a put procedure
2725 * to run service procedures.
2726 */
2727 putnext(wqp, mp);
2728 } else {
2729 stream_willservice(stp);
2730 putnext(wqp, mp);
2731 stream_runservice(stp);
2732 }
2733 if (iovlen != 0)
2734 kmem_free(uiod.d_iov, iovlen);
2735 return (0);
2736 }
2737
2738 /*
2739 * Write attempts to break the write request into messages conforming
2740 * with the minimum and maximum packet sizes set downstream.
2741 *
2742 * Write will not block if downstream queue is full and
2743 * O_NDELAY is set, otherwise it will block waiting for the queue to get room.
2744 *
2745 * A write of zero bytes gets packaged into a zero length message and sent
2746 * downstream like any other message.
2747 *
2748 * If buffers of the requested sizes are not available, the write will
2749 * sleep until the buffers become available.
2750 *
2751 * Write (if specified) will supply a write offset in a message if it
2752 * makes sense. This can be specified by downstream modules as part of
2753 * a M_SETOPTS message. Write will not supply the write offset if it
2754 * cannot supply any data in a buffer. In other words, write will never
|