Print this page
5880 Increase IOV_MAX to at least 1024
Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/os/streamio.c
          +++ new/usr/src/uts/common/os/streamio.c
↓ open down ↓ 70 lines elided ↑ open up ↑
  71   71  #include <sys/tty.h>
  72   72  #include <sys/ptyvar.h>
  73   73  #include <sys/vuid_event.h>
  74   74  #include <sys/modctl.h>
  75   75  #include <sys/sunddi.h>
  76   76  #include <sys/sunldi_impl.h>
  77   77  #include <sys/autoconf.h>
  78   78  #include <sys/policy.h>
  79   79  #include <sys/dld.h>
  80   80  #include <sys/zone.h>
       81 +#include <sys/limits.h>
  81   82  #include <c2/audit.h>
  82   83  
  83   84  /*
  84   85   * This define helps improve the readability of streams code while
  85   86   * still maintaining a very old streams performance enhancement.  The
  86   87   * performance enhancement basically involved having all callers
  87   88   * of straccess() perform the first check that straccess() will do
  88   89   * locally before actually calling straccess().  (There by reducing
  89   90   * the number of unnecessary calls to straccess().)
  90   91   */
↓ open down ↓ 889 lines elided ↑ open up ↑
 980  981          if (uiop != NULL && stp->sd_struiordq != NULL &&
 981  982              q->q_first == NULL &&
 982  983              (!first || (stp->sd_wakeq & RSLEEP))) {
 983  984                  /*
 984  985                   * Stream supports rwnext() for the read side.
 985  986                   * If this is the first time we're called by e.g. strread
 986  987                   * only do the downcall if there is a deferred wakeup
 987  988                   * (registered in sd_wakeq).
 988  989                   */
 989  990                  struiod_t uiod;
      991 +                struct iovec buf[IOV_MAX_STACK];
      992 +                int iovlen = 0;
 990  993  
 991  994                  if (first)
 992  995                          stp->sd_wakeq &= ~RSLEEP;
 993  996  
 994      -                (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
 995      -                    sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
      997 +                if (uiop->uio_iovcnt > IOV_MAX_STACK) {
      998 +                        iovlen = uiop->uio_iovcnt * sizeof (iovec_t);
      999 +                        uiod.d_iov = kmem_alloc(iovlen, KM_SLEEP);
     1000 +                } else {
     1001 +                        uiod.d_iov = buf;
     1002 +                }
     1003 +
     1004 +                (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, uiop->uio_iovcnt);
 996 1005                  uiod.d_mp = 0;
 997 1006                  /*
 998 1007                   * Mark that a thread is in rwnext on the read side
 999 1008                   * to prevent strrput from nacking ioctls immediately.
1000 1009                   * When the last concurrent rwnext returns
1001 1010                   * the ioctls are nack'ed.
1002 1011                   */
1003 1012                  ASSERT(MUTEX_HELD(&stp->sd_lock));
1004 1013                  stp->sd_struiodnak++;
1005 1014                  /*
↓ open down ↓ 18 lines elided ↑ open up ↑
1024 1033                                  mutex_exit(&stp->sd_lock);
1025 1034                                  qreply(q, bp);
1026 1035                                  mutex_enter(&stp->sd_lock);
1027 1036                          }
1028 1037                  }
1029 1038                  ASSERT(MUTEX_HELD(&stp->sd_lock));
1030 1039                  if (error == 0 || error == EWOULDBLOCK) {
1031 1040                          if ((bp = uiod.d_mp) != NULL) {
1032 1041                                  *errorp = 0;
1033 1042                                  ASSERT(MUTEX_HELD(&stp->sd_lock));
     1043 +                                if (iovlen != 0)
     1044 +                                        kmem_free(uiod.d_iov, iovlen);
1034 1045                                  return (bp);
1035 1046                          }
1036 1047                          error = 0;
1037 1048                  } else if (error == EINVAL) {
1038 1049                          /*
1039 1050                           * The stream plumbing must have
1040 1051                           * changed while we were away, so
1041 1052                           * just turn off rwnext()s.
1042 1053                           */
1043 1054                          error = 0;
1044 1055                  } else if (error == EBUSY) {
1045 1056                          /*
1046 1057                           * The module might have data in transit using putnext
1047 1058                           * Fall back on waiting + getq.
1048 1059                           */
1049 1060                          error = 0;
1050 1061                  } else {
1051 1062                          *errorp = error;
1052 1063                          ASSERT(MUTEX_HELD(&stp->sd_lock));
     1064 +                        if (iovlen != 0)
     1065 +                                kmem_free(uiod.d_iov, iovlen);
1053 1066                          return (NULL);
1054 1067                  }
     1068 +
     1069 +                if (iovlen != 0)
     1070 +                        kmem_free(uiod.d_iov, iovlen);
     1071 +
1055 1072                  /*
1056 1073                   * Try a getq in case a rwnext() generated mblk
1057 1074                   * has bubbled up via strrput().
1058 1075                   */
1059 1076          }
1060 1077          *errorp = 0;
1061 1078          ASSERT(MUTEX_HELD(&stp->sd_lock));
1062 1079  
1063 1080          /*
1064 1081           * If we have a valid uio, try and use this as a guide for how
↓ open down ↓ 1474 lines elided ↑ open up ↑
2539 2556   *
2540 2557   * If sd_maxblk is less than *iosize this routine might return without
2541 2558   * transferring all of *iosize. In all cases, on return *iosize will contain
2542 2559   * the amount of data that was transferred.
2543 2560   */
2544 2561  static int
2545 2562  strput(struct stdata *stp, mblk_t *mctl, struct uio *uiop, ssize_t *iosize,
2546 2563      int b_flag, int pri, int flags)
2547 2564  {
2548 2565          struiod_t uiod;
     2566 +        struct iovec buf[IOV_MAX_STACK];
     2567 +        int iovlen = 0;
2549 2568          mblk_t *mp;
2550 2569          queue_t *wqp = stp->sd_wrq;
2551 2570          int error = 0;
2552 2571          ssize_t count = *iosize;
2553 2572  
2554 2573          ASSERT(MUTEX_NOT_HELD(&stp->sd_lock));
2555 2574  
2556 2575          if (uiop != NULL && count >= 0)
2557 2576                  flags |= stp->sd_struiowrq ? STRUIO_POSTPONE : 0;
2558 2577  
↓ open down ↓ 71 lines elided ↑ open up ↑
2630 2649                  else if (mp != NULL)
2631 2650                          linkb(mctl, mp);
2632 2651                  mp = mctl;
2633 2652          } else if (mp == NULL) {
2634 2653                  return (0);
2635 2654          }
2636 2655  
2637 2656          mp->b_flag |= b_flag;
2638 2657          mp->b_band = (uchar_t)pri;
2639 2658  
2640      -        (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov,
2641      -            sizeof (uiod.d_iov) / sizeof (*uiod.d_iov));
     2659 +        if (uiop->uio_iovcnt > IOV_MAX_STACK) {
     2660 +                iovlen = uiop->uio_iovcnt * sizeof (iovec_t);
     2661 +                uiod.d_iov = (struct iovec *)kmem_alloc(iovlen, KM_SLEEP);
     2662 +        } else {
     2663 +                uiod.d_iov = buf;
     2664 +        }
     2665 +
     2666 +        (void) uiodup(uiop, &uiod.d_uio, uiod.d_iov, uiop->uio_iovcnt);
2642 2667          uiod.d_uio.uio_offset = 0;
2643 2668          uiod.d_mp = mp;
2644 2669          error = rwnext(wqp, &uiod);
2645 2670          if (! uiod.d_mp) {
2646 2671                  uioskip(uiop, *iosize);
     2672 +                if (iovlen != 0)
     2673 +                        kmem_free(uiod.d_iov, iovlen);
2647 2674                  return (error);
2648 2675          }
2649 2676          ASSERT(mp == uiod.d_mp);
2650 2677          if (error == EINVAL) {
2651 2678                  /*
2652 2679                   * The stream plumbing must have changed while
2653 2680                   * we were away, so just turn off rwnext()s.
2654 2681                   */
2655 2682                  error = 0;
2656 2683          } else if (error == EBUSY || error == EWOULDBLOCK) {
2657 2684                  /*
2658 2685                   * Couldn't enter a perimeter or took a page fault,
2659 2686                   * so fall-back to putnext().
2660 2687                   */
2661 2688                  error = 0;
2662 2689          } else {
2663 2690                  freemsg(mp);
     2691 +                if (iovlen != 0)
     2692 +                        kmem_free(uiod.d_iov, iovlen);
2664 2693                  return (error);
2665 2694          }
2666 2695          /* Have to check canput before consuming data from the uio */
2667 2696          if (pri == 0) {
2668 2697                  if (!canputnext(wqp) && !(flags & MSG_IGNFLOW)) {
2669 2698                          freemsg(mp);
     2699 +                        if (iovlen != 0)
     2700 +                                kmem_free(uiod.d_iov, iovlen);
2670 2701                          return (EWOULDBLOCK);
2671 2702                  }
2672 2703          } else {
2673 2704                  if (!bcanputnext(wqp, pri) && !(flags & MSG_IGNFLOW)) {
2674 2705                          freemsg(mp);
     2706 +                        if (iovlen != 0)
     2707 +                                kmem_free(uiod.d_iov, iovlen);
2675 2708                          return (EWOULDBLOCK);
2676 2709                  }
2677 2710          }
2678 2711          ASSERT(mp == uiod.d_mp);
2679 2712          /* Copyin data from the uio */
2680 2713          if ((error = struioget(wqp, mp, &uiod, 0)) != 0) {
2681 2714                  freemsg(mp);
     2715 +                if (iovlen != 0)
     2716 +                        kmem_free(uiod.d_iov, iovlen);
2682 2717                  return (error);
2683 2718          }
2684 2719          uioskip(uiop, *iosize);
2685 2720          if (flags & MSG_IGNFLOW) {
2686 2721                  /*
2687 2722                   * XXX Hack: Don't get stuck running service procedures.
2688 2723                   * This is needed for sockfs when sending the unbind message
2689 2724                   * out of the rput procedure - we don't want a put procedure
2690 2725                   * to run service procedures.
2691 2726                   */
2692 2727                  putnext(wqp, mp);
2693 2728          } else {
2694 2729                  stream_willservice(stp);
2695 2730                  putnext(wqp, mp);
2696 2731                  stream_runservice(stp);
2697 2732          }
     2733 +        if (iovlen != 0)
     2734 +                kmem_free(uiod.d_iov, iovlen);
2698 2735          return (0);
2699 2736  }
2700 2737  
2701 2738  /*
2702 2739   * Write attempts to break the write request into messages conforming
2703 2740   * with the minimum and maximum packet sizes set downstream.
2704 2741   *
2705 2742   * Write will not block if downstream queue is full and
2706 2743   * O_NDELAY is set, otherwise it will block waiting for the queue to get room.
2707 2744   *
↓ open down ↓ 5957 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX