Print this page
5880 Increase IOV_MAX to at least 1024
Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com>

@@ -19,14 +19,12 @@
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
- */
-
-/* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. */
-/*
+ * Copyright 2015, Joyent, Inc.  All rights reserved.
+ * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
  * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
  */
 
 #include <sys/types.h>
 #include <sys/t_lock.h>

@@ -52,10 +50,11 @@
 #include <sys/flock.h>
 #include <sys/modctl.h>
 #include <sys/cmn_err.h>
 #include <sys/vmsystm.h>
 #include <sys/policy.h>
+#include <sys/limits.h>
 
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 
 #include <sys/isa_defs.h>

@@ -84,16 +83,10 @@
 
 extern void     nl7c_init(void);
 extern int      sockfs_defer_nl7c_init;
 
 /*
- * Note: DEF_IOV_MAX is defined and used as it is in "fs/vncalls.c"
- *       as there isn't a formal definition of IOV_MAX ???
- */
-#define MSG_MAXIOVLEN   16
-
-/*
  * Kernel component of socket creation.
  *
  * The socket library determines which version number to use.
  * First the library calls this with a NULL devpath. If this fails
  * to find a transport (using solookup) the library will look in /etc/netconfig

@@ -1019,13 +1012,14 @@
 {
         STRUCT_DECL(nmsghdr, u_lmsg);
         STRUCT_HANDLE(nmsghdr, umsgptr);
         struct nmsghdr lmsg;
         struct uio auio;
-        struct iovec aiov[MSG_MAXIOVLEN];
+        struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+        ssize_t iovsize = 0;
         int iovcnt;
-        ssize_t len;
+        ssize_t len, rval;
         int i;
         int *flagsp;
         model_t model;
 
         dprint(1, ("recvmsg(%d, %p, %d)\n",

@@ -1064,49 +1058,80 @@
         lmsg.msg_controllen = STRUCT_FGET(u_lmsg, msg_controllen);
         lmsg.msg_flags = STRUCT_FGET(u_lmsg, msg_flags);
 
         iovcnt = lmsg.msg_iovlen;
 
-        if (iovcnt <= 0 || iovcnt > MSG_MAXIOVLEN) {
+        if (iovcnt <= 0 || iovcnt > IOV_MAX) {
                 return (set_errno(EMSGSIZE));
         }
 
+        if (iovcnt > IOV_MAX_STACK) {
+                iovsize = iovcnt * sizeof (struct iovec);
+                aiov = kmem_alloc(iovsize, KM_SLEEP);
+        }
+
 #ifdef _SYSCALL32_IMPL
         /*
          * 32-bit callers need to have their iovec expanded, while ensuring
          * that they can't move more than 2Gbytes of data in a single call.
          */
         if (model == DATAMODEL_ILP32) {
-                struct iovec32 aiov32[MSG_MAXIOVLEN];
+                struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+                ssize_t iov32size;
                 ssize32_t count32;
 
-                if (copyin((struct iovec32 *)lmsg.msg_iov, aiov32,
-                    iovcnt * sizeof (struct iovec32)))
+                iov32size = iovcnt * sizeof (struct iovec32);
+                if (iovsize != 0)
+                        aiov32 = kmem_alloc(iov32size, KM_SLEEP);
+
+                if (copyin((struct iovec32 *)lmsg.msg_iov, aiov32, iov32size)) {
+                        if (iovsize != 0) {
+                                kmem_free(aiov32, iov32size);
+                                kmem_free(aiov, iovsize);
+                        }
+
                         return (set_errno(EFAULT));
+                }
 
                 count32 = 0;
                 for (i = 0; i < iovcnt; i++) {
                         ssize32_t iovlen32;
 
                         iovlen32 = aiov32[i].iov_len;
                         count32 += iovlen32;
-                        if (iovlen32 < 0 || count32 < 0)
+                        if (iovlen32 < 0 || count32 < 0) {
+                                if (iovsize != 0) {
+                                        kmem_free(aiov32, iov32size);
+                                        kmem_free(aiov, iovsize);
+                                }
+
                                 return (set_errno(EINVAL));
+                        }
+
                         aiov[i].iov_len = iovlen32;
                         aiov[i].iov_base =
                             (caddr_t)(uintptr_t)aiov32[i].iov_base;
                 }
+
+                if (iovsize != 0)
+                        kmem_free(aiov32, iov32size);
         } else
 #endif /* _SYSCALL32_IMPL */
         if (copyin(lmsg.msg_iov, aiov, iovcnt * sizeof (struct iovec))) {
+                if (iovsize != 0)
+                        kmem_free(aiov, iovsize);
+
                 return (set_errno(EFAULT));
         }
         len = 0;
         for (i = 0; i < iovcnt; i++) {
                 ssize_t iovlen = aiov[i].iov_len;
                 len += iovlen;
                 if (iovlen < 0 || len < 0) {
+                        if (iovsize != 0)
+                                kmem_free(aiov, iovsize);
+
                         return (set_errno(EINVAL));
                 }
         }
         auio.uio_loffset = 0;
         auio.uio_iov = aiov;

@@ -1117,16 +1142,24 @@
 
         if (lmsg.msg_control != NULL &&
             (do_useracc == 0 ||
             useracc(lmsg.msg_control, lmsg.msg_controllen,
             B_WRITE) != 0)) {
+                if (iovsize != 0)
+                        kmem_free(aiov, iovsize);
+
                 return (set_errno(EFAULT));
         }
 
-        return (recvit(sock, &lmsg, &auio, flags,
+        rval = recvit(sock, &lmsg, &auio, flags,
             STRUCT_FADDR(umsgptr, msg_namelen),
-            STRUCT_FADDR(umsgptr, msg_controllen), flagsp));
+            STRUCT_FADDR(umsgptr, msg_controllen), flagsp);
+
+        if (iovsize != 0)
+                kmem_free(aiov, iovsize);
+
+        return (rval);
 }
 
 /*
  * Common send function.
  */

@@ -1260,13 +1293,14 @@
 sendmsg(int sock, struct nmsghdr *msg, int flags)
 {
         struct nmsghdr lmsg;
         STRUCT_DECL(nmsghdr, u_lmsg);
         struct uio auio;
-        struct iovec aiov[MSG_MAXIOVLEN];
+        struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+        ssize_t iovsize = 0;
         int iovcnt;
-        ssize_t len;
+        ssize_t len, rval;
         int i;
         model_t model;
 
         dprint(1, ("sendmsg(%d, %p, %d)\n", sock, (void *)msg, flags));
 

@@ -1305,57 +1339,88 @@
         lmsg.msg_controllen = STRUCT_FGET(u_lmsg, msg_controllen);
         lmsg.msg_flags = STRUCT_FGET(u_lmsg, msg_flags);
 
         iovcnt = lmsg.msg_iovlen;
 
-        if (iovcnt <= 0 || iovcnt > MSG_MAXIOVLEN) {
+        if (iovcnt <= 0 || iovcnt > IOV_MAX) {
                 /*
                  * Unless this is XPG 4.2 we allow iovcnt == 0 to
                  * be compatible with SunOS 4.X and 4.4BSD.
                  */
                 if (iovcnt != 0 || (flags & MSG_XPG4_2))
                         return (set_errno(EMSGSIZE));
         }
 
+        if (iovcnt > IOV_MAX_STACK) {
+                iovsize = iovcnt * sizeof (struct iovec);
+                aiov = kmem_alloc(iovsize, KM_SLEEP);
+        }
+
 #ifdef _SYSCALL32_IMPL
         /*
          * 32-bit callers need to have their iovec expanded, while ensuring
          * that they can't move more than 2Gbytes of data in a single call.
          */
         if (model == DATAMODEL_ILP32) {
-                struct iovec32 aiov32[MSG_MAXIOVLEN];
+                struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+                ssize_t iov32size;
                 ssize32_t count32;
 
+                iov32size = iovcnt * sizeof (struct iovec32);
+                if (iovsize != 0)
+                        aiov32 = kmem_alloc(iov32size, KM_SLEEP);
+
                 if (iovcnt != 0 &&
-                    copyin((struct iovec32 *)lmsg.msg_iov, aiov32,
-                    iovcnt * sizeof (struct iovec32)))
+                    copyin((struct iovec32 *)lmsg.msg_iov, aiov32, iov32size)) {
+                        if (iovsize != 0) {
+                                kmem_free(aiov32, iov32size);
+                                kmem_free(aiov, iovsize);
+                        }
+
                         return (set_errno(EFAULT));
+                }
 
                 count32 = 0;
                 for (i = 0; i < iovcnt; i++) {
                         ssize32_t iovlen32;
 
                         iovlen32 = aiov32[i].iov_len;
                         count32 += iovlen32;
-                        if (iovlen32 < 0 || count32 < 0)
+                        if (iovlen32 < 0 || count32 < 0) {
+                                if (iovsize != 0) {
+                                        kmem_free(aiov32, iov32size);
+                                        kmem_free(aiov, iovsize);
+                                }
+
                                 return (set_errno(EINVAL));
+                        }
+
                         aiov[i].iov_len = iovlen32;
                         aiov[i].iov_base =
                             (caddr_t)(uintptr_t)aiov32[i].iov_base;
                 }
+
+                if (iovsize != 0)
+                        kmem_free(aiov32, iov32size);
         } else
 #endif /* _SYSCALL32_IMPL */
         if (iovcnt != 0 &&
             copyin(lmsg.msg_iov, aiov,
             (unsigned)iovcnt * sizeof (struct iovec))) {
+                if (iovsize != 0)
+                        kmem_free(aiov, iovsize);
+
                 return (set_errno(EFAULT));
         }
         len = 0;
         for (i = 0; i < iovcnt; i++) {
                 ssize_t iovlen = aiov[i].iov_len;
                 len += iovlen;
                 if (iovlen < 0 || len < 0) {
+                        if (iovsize != 0)
+                                kmem_free(aiov, iovsize);
+
                         return (set_errno(EINVAL));
                 }
         }
         auio.uio_loffset = 0;
         auio.uio_iov = aiov;

@@ -1362,11 +1427,16 @@
         auio.uio_iovcnt = iovcnt;
         auio.uio_resid = len;
         auio.uio_segflg = UIO_USERSPACE;
         auio.uio_limit = 0;
 
-        return (sendit(sock, &lmsg, &auio, flags));
+        rval = sendit(sock, &lmsg, &auio, flags);
+
+        if (iovsize != 0)
+                kmem_free(aiov, iovsize);
+
+        return (rval);
 }
 
 ssize_t
 sendto(int sock, void *buffer, size_t len, int flags,
     struct sockaddr *name, socklen_t namelen)