Print this page
5880 Increase IOV_MAX to at least 1024
Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com>

@@ -20,11 +20,11 @@
  */
 
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
- * Copyright (c) 2015, Joyent, Inc.  All rights reserved.
+ * Copyright 2015, Joyent, Inc.  All rights reserved.
  */
 
 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
 /*        All Rights Reserved   */
 

@@ -48,10 +48,11 @@
 #include <sys/cpuvar.h>
 #include <sys/uio.h>
 #include <sys/debug.h>
 #include <sys/rctl.h>
 #include <sys/nbmlock.h>
+#include <sys/limits.h>
 
 #define COPYOUT_MAX_CACHE       (1<<17)         /* 128K */
 
 size_t copyout_max_cached = COPYOUT_MAX_CACHE;  /* global so it's patchable */
 

@@ -605,23 +606,16 @@
         if (error)
                 return (set_errno(error));
         return (bcount);
 }
 
-/*
- * XXX -- The SVID refers to IOV_MAX, but doesn't define it.  Grrrr....
- * XXX -- However, SVVS expects readv() and writev() to fail if
- * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source),
- * XXX -- so I guess that's the "interface".
- */
-#define DEF_IOV_MAX     16
-
 ssize_t
 readv(int fdes, struct iovec *iovp, int iovcnt)
 {
         struct uio auio;
-        struct iovec aiov[DEF_IOV_MAX];
+        struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+        int aiovlen = 0;
         file_t *fp;
         register vnode_t *vp;
         struct cpu *cp;
         int fflag, ioflag, rwflag;
         ssize_t count, bcount;

@@ -628,50 +622,82 @@
         int error = 0;
         int i;
         u_offset_t fileoff;
         int in_crit = 0;
 
-        if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
+        if (iovcnt <= 0 || iovcnt > IOV_MAX)
                 return (set_errno(EINVAL));
 
+        if (iovcnt > IOV_MAX_STACK) {
+                aiovlen = iovcnt * sizeof (iovec_t);
+                aiov = kmem_alloc(aiovlen, KM_SLEEP);
+        }
+
 #ifdef _SYSCALL32_IMPL
         /*
          * 32-bit callers need to have their iovec expanded,
          * while ensuring that they can't move more than 2Gbytes
          * of data in a single call.
          */
         if (get_udatamodel() == DATAMODEL_ILP32) {
-                struct iovec32 aiov32[DEF_IOV_MAX];
+                struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+                int aiov32len;
                 ssize32_t count32;
 
-                if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
+                aiov32len = iovcnt * sizeof (iovec32_t);
+                if (aiovlen != 0)
+                        aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
+
+                if (copyin(iovp, aiov32, aiov32len)) {
+                        if (aiovlen != 0) {
+                                kmem_free(aiov32, aiov32len);
+                                kmem_free(aiov, aiovlen);
+                        }
                         return (set_errno(EFAULT));
+                }
 
                 count32 = 0;
                 for (i = 0; i < iovcnt; i++) {
                         ssize32_t iovlen32 = aiov32[i].iov_len;
                         count32 += iovlen32;
-                        if (iovlen32 < 0 || count32 < 0)
+                        if (iovlen32 < 0 || count32 < 0) {
+                                if (aiovlen != 0) {
+                                        kmem_free(aiov32, aiov32len);
+                                        kmem_free(aiov, aiovlen);
+                                }
                                 return (set_errno(EINVAL));
+                        }
                         aiov[i].iov_len = iovlen32;
                         aiov[i].iov_base =
                             (caddr_t)(uintptr_t)aiov32[i].iov_base;
                 }
+
+                if (aiovlen != 0)
+                        kmem_free(aiov32, aiov32len);
         } else
 #endif
-        if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
+        if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
+                if (aiovlen != 0)
+                        kmem_free(aiov, aiovlen);
                 return (set_errno(EFAULT));
+        }
 
         count = 0;
         for (i = 0; i < iovcnt; i++) {
                 ssize_t iovlen = aiov[i].iov_len;
                 count += iovlen;
-                if (iovlen < 0 || count < 0)
+                if (iovlen < 0 || count < 0) {
+                        if (aiovlen != 0)
+                                kmem_free(aiov, aiovlen);
                         return (set_errno(EINVAL));
         }
-        if ((fp = getf(fdes)) == NULL)
+        }
+        if ((fp = getf(fdes)) == NULL) {
+                if (aiovlen != 0)
+                        kmem_free(aiov, aiovlen);
                 return (set_errno(EBADF));
+        }
         if (((fflag = fp->f_flag) & FREAD) == 0) {
                 error = EBADF;
                 goto out;
         }
         vp = fp->f_vnode;

@@ -766,20 +792,23 @@
                 error = 0;
 out:
         if (in_crit)
                 nbl_end_crit(vp);
         releasef(fdes);
+        if (aiovlen != 0)
+                kmem_free(aiov, aiovlen);
         if (error)
                 return (set_errno(error));
         return (count);
 }
 
 ssize_t
 writev(int fdes, struct iovec *iovp, int iovcnt)
 {
         struct uio auio;
-        struct iovec aiov[DEF_IOV_MAX];
+        struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+        int aiovlen = 0;
         file_t *fp;
         register vnode_t *vp;
         struct cpu *cp;
         int fflag, ioflag, rwflag;
         ssize_t count, bcount;

@@ -786,50 +815,81 @@
         int error = 0;
         int i;
         u_offset_t fileoff;
         int in_crit = 0;
 
-        if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
+        if (iovcnt <= 0 || iovcnt > IOV_MAX)
                 return (set_errno(EINVAL));
 
+        if (iovcnt > IOV_MAX_STACK) {
+                aiovlen = iovcnt * sizeof (iovec_t);
+                aiov = kmem_alloc(aiovlen, KM_SLEEP);
+        }
+
 #ifdef _SYSCALL32_IMPL
         /*
          * 32-bit callers need to have their iovec expanded,
          * while ensuring that they can't move more than 2Gbytes
          * of data in a single call.
          */
         if (get_udatamodel() == DATAMODEL_ILP32) {
-                struct iovec32 aiov32[DEF_IOV_MAX];
+                struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+                int aiov32len;
                 ssize32_t count32;
 
-                if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
+                aiov32len = iovcnt * sizeof (iovec32_t);
+                if (aiovlen != 0)
+                        aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
+
+                if (copyin(iovp, aiov32, aiov32len)) {
+                        if (aiovlen != 0) {
+                                kmem_free(aiov32, aiov32len);
+                                kmem_free(aiov, aiovlen);
+                        }
                         return (set_errno(EFAULT));
+                }
 
                 count32 = 0;
                 for (i = 0; i < iovcnt; i++) {
                         ssize32_t iovlen = aiov32[i].iov_len;
                         count32 += iovlen;
-                        if (iovlen < 0 || count32 < 0)
+                        if (iovlen < 0 || count32 < 0) {
+                                if (aiovlen != 0) {
+                                        kmem_free(aiov32, aiov32len);
+                                        kmem_free(aiov, aiovlen);
+                                }
                                 return (set_errno(EINVAL));
+                        }
                         aiov[i].iov_len = iovlen;
                         aiov[i].iov_base =
                             (caddr_t)(uintptr_t)aiov32[i].iov_base;
                 }
+                if (aiovlen != 0)
+                        kmem_free(aiov32, aiov32len);
         } else
 #endif
-        if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
+        if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
+                if (aiovlen != 0)
+                        kmem_free(aiov, aiovlen);
                 return (set_errno(EFAULT));
+        }
 
         count = 0;
         for (i = 0; i < iovcnt; i++) {
                 ssize_t iovlen = aiov[i].iov_len;
                 count += iovlen;
-                if (iovlen < 0 || count < 0)
+                if (iovlen < 0 || count < 0) {
+                        if (aiovlen != 0)
+                                kmem_free(aiov, aiovlen);
                         return (set_errno(EINVAL));
         }
-        if ((fp = getf(fdes)) == NULL)
+        }
+        if ((fp = getf(fdes)) == NULL) {
+                if (aiovlen != 0)
+                        kmem_free(aiov, aiovlen);
                 return (set_errno(EBADF));
+        }
         if (((fflag = fp->f_flag) & FWRITE) == 0) {
                 error = EBADF;
                 goto out;
         }
         vp = fp->f_vnode;

@@ -915,10 +975,12 @@
                 error = 0;
 out:
         if (in_crit)
                 nbl_end_crit(vp);
         releasef(fdes);
+        if (aiovlen != 0)
+                kmem_free(aiov, aiovlen);
         if (error)
                 return (set_errno(error));
         return (count);
 }
 

@@ -925,11 +987,12 @@
 ssize_t
 preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
     off_t extended_offset)
 {
         struct uio auio;
-        struct iovec aiov[DEF_IOV_MAX];
+        struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+        int aiovlen = 0;
         file_t *fp;
         register vnode_t *vp;
         struct cpu *cp;
         int fflag, ioflag, rwflag;
         ssize_t count, bcount;

@@ -950,53 +1013,87 @@
         const u_offset_t maxoff = MAXOFF32_T;
 #endif /* _SYSCALL32_IMPL */
 
         int in_crit = 0;
 
-        if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
+        if (iovcnt <= 0 || iovcnt > IOV_MAX)
                 return (set_errno(EINVAL));
 
+        if (iovcnt > IOV_MAX_STACK) {
+                aiovlen = iovcnt * sizeof (iovec_t);
+                aiov = kmem_alloc(aiovlen, KM_SLEEP);
+        }
+
 #ifdef _SYSCALL32_IMPL
         /*
          * 32-bit callers need to have their iovec expanded,
          * while ensuring that they can't move more than 2Gbytes
          * of data in a single call.
          */
         if (get_udatamodel() == DATAMODEL_ILP32) {
-                struct iovec32 aiov32[DEF_IOV_MAX];
+                struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+                int aiov32len;
                 ssize32_t count32;
 
-                if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
+                aiov32len = iovcnt * sizeof (iovec32_t);
+                if (aiovlen != 0)
+                        aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
+
+                if (copyin(iovp, aiov32, aiov32len)) {
+                        if (aiovlen != 0) {
+                                kmem_free(aiov32, aiov32len);
+                                kmem_free(aiov, aiovlen);
+                        }
                         return (set_errno(EFAULT));
+                }
 
                 count32 = 0;
                 for (i = 0; i < iovcnt; i++) {
                         ssize32_t iovlen32 = aiov32[i].iov_len;
                         count32 += iovlen32;
-                        if (iovlen32 < 0 || count32 < 0)
+                        if (iovlen32 < 0 || count32 < 0) {
+                                if (aiovlen != 0) {
+                                        kmem_free(aiov32, aiov32len);
+                                        kmem_free(aiov, aiovlen);
+                                }
                                 return (set_errno(EINVAL));
+                        }
                         aiov[i].iov_len = iovlen32;
                         aiov[i].iov_base =
                             (caddr_t)(uintptr_t)aiov32[i].iov_base;
                 }
+                if (aiovlen != 0)
+                        kmem_free(aiov32, aiov32len);
         } else
 #endif /* _SYSCALL32_IMPL */
-                if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
+                if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
+                        if (aiovlen != 0)
+                                kmem_free(aiov, aiovlen);
                         return (set_errno(EFAULT));
+                }
 
         count = 0;
         for (i = 0; i < iovcnt; i++) {
                 ssize_t iovlen = aiov[i].iov_len;
                 count += iovlen;
-                if (iovlen < 0 || count < 0)
+                if (iovlen < 0 || count < 0) {
+                        if (aiovlen != 0)
+                                kmem_free(aiov, aiovlen);
                         return (set_errno(EINVAL));
         }
+        }
 
-        if ((bcount = (ssize_t)count) < 0)
+        if ((bcount = (ssize_t)count) < 0) {
+                if (aiovlen != 0)
+                        kmem_free(aiov, aiovlen);
                 return (set_errno(EINVAL));
-        if ((fp = getf(fdes)) == NULL)
+        }
+        if ((fp = getf(fdes)) == NULL) {
+                if (aiovlen != 0)
+                        kmem_free(aiov, aiovlen);
                 return (set_errno(EBADF));
+        }
         if (((fflag = fp->f_flag) & FREAD) == 0) {
                 error = EBADF;
                 goto out;
         }
         vp = fp->f_vnode;

@@ -1097,10 +1194,12 @@
                 error = 0;
 out:
         if (in_crit)
                 nbl_end_crit(vp);
         releasef(fdes);
+        if (aiovlen != 0)
+                kmem_free(aiov, aiovlen);
         if (error)
                 return (set_errno(error));
         return (count);
 }
 

@@ -1107,11 +1206,12 @@
 ssize_t
 pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
     off_t extended_offset)
 {
         struct uio auio;
-        struct iovec aiov[DEF_IOV_MAX];
+        struct iovec buf[IOV_MAX_STACK], *aiov = buf;
+        int aiovlen = 0;
         file_t *fp;
         register vnode_t *vp;
         struct cpu *cp;
         int fflag, ioflag, rwflag;
         ssize_t count, bcount;

@@ -1132,53 +1232,87 @@
         const u_offset_t maxoff = MAXOFF32_T;
 #endif /* _SYSCALL32_IMPL */
 
         int in_crit = 0;
 
-        if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
+        if (iovcnt <= 0 || iovcnt > IOV_MAX)
                 return (set_errno(EINVAL));
 
+        if (iovcnt > IOV_MAX_STACK) {
+                aiovlen = iovcnt * sizeof (iovec_t);
+                aiov = kmem_alloc(aiovlen, KM_SLEEP);
+        }
+
 #ifdef _SYSCALL32_IMPL
         /*
          * 32-bit callers need to have their iovec expanded,
          * while ensuring that they can't move more than 2Gbytes
          * of data in a single call.
          */
         if (get_udatamodel() == DATAMODEL_ILP32) {
-                struct iovec32 aiov32[DEF_IOV_MAX];
+                struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
+                int aiov32len;
                 ssize32_t count32;
 
-                if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))
+                aiov32len = iovcnt * sizeof (iovec32_t);
+                if (aiovlen != 0)
+                        aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
+
+                if (copyin(iovp, aiov32, aiov32len)) {
+                        if (aiovlen != 0) {
+                                kmem_free(aiov32, aiov32len);
+                                kmem_free(aiov, aiovlen);
+                        }
                         return (set_errno(EFAULT));
+                }
 
                 count32 = 0;
                 for (i = 0; i < iovcnt; i++) {
                         ssize32_t iovlen32 = aiov32[i].iov_len;
                         count32 += iovlen32;
-                        if (iovlen32 < 0 || count32 < 0)
+                        if (iovlen32 < 0 || count32 < 0) {
+                                if (aiovlen != 0) {
+                                        kmem_free(aiov32, aiov32len);
+                                        kmem_free(aiov, aiovlen);
+                                }
                                 return (set_errno(EINVAL));
+                        }
                         aiov[i].iov_len = iovlen32;
                         aiov[i].iov_base =
                             (caddr_t)(uintptr_t)aiov32[i].iov_base;
                 }
+                if (aiovlen != 0)
+                        kmem_free(aiov32, aiov32len);
         } else
 #endif /* _SYSCALL32_IMPL */
-                if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))
+                if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
+                        if (aiovlen != 0)
+                                kmem_free(aiov, aiovlen);
                         return (set_errno(EFAULT));
+                }
 
         count = 0;
         for (i = 0; i < iovcnt; i++) {
                 ssize_t iovlen = aiov[i].iov_len;
                 count += iovlen;
-                if (iovlen < 0 || count < 0)
+                if (iovlen < 0 || count < 0) {
+                        if (aiovlen != 0)
+                                kmem_free(aiov, aiovlen);
                         return (set_errno(EINVAL));
         }
+        }
 
-        if ((bcount = (ssize_t)count) < 0)
+        if ((bcount = (ssize_t)count) < 0) {
+                if (aiovlen != 0)
+                        kmem_free(aiov, aiovlen);
                 return (set_errno(EINVAL));
-        if ((fp = getf(fdes)) == NULL)
+        }
+        if ((fp = getf(fdes)) == NULL) {
+                if (aiovlen != 0)
+                        kmem_free(aiov, aiovlen);
                 return (set_errno(EBADF));
+        }
         if (((fflag = fp->f_flag) & FWRITE) == 0) {
                 error = EBADF;
                 goto out;
         }
         vp = fp->f_vnode;

@@ -1306,10 +1440,12 @@
                 error = 0;
 out:
         if (in_crit)
                 nbl_end_crit(vp);
         releasef(fdes);
+        if (aiovlen != 0)
+                kmem_free(aiov, aiovlen);
         if (error)
                 return (set_errno(error));
         return (count);
 }