Print this page
5880 Increase IOV_MAX to at least 1024
Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com>


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright (c) 2015, Joyent, Inc.  All rights reserved.
  26  */
  27 
  28 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /*        All Rights Reserved   */
  30 
  31 /*
  32  * Portions of this source code were derived from Berkeley 4.3 BSD
  33  * under license from the Regents of the University of California.
  34  */
  35 
  36 #include <sys/param.h>
  37 #include <sys/isa_defs.h>
  38 #include <sys/types.h>
  39 #include <sys/inttypes.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/cred.h>
  42 #include <sys/user.h>
  43 #include <sys/systm.h>
  44 #include <sys/errno.h>
  45 #include <sys/vnode.h>
  46 #include <sys/file.h>
  47 #include <sys/proc.h>
  48 #include <sys/cpuvar.h>
  49 #include <sys/uio.h>
  50 #include <sys/debug.h>
  51 #include <sys/rctl.h>
  52 #include <sys/nbmlock.h>

  53 
  54 #define COPYOUT_MAX_CACHE       (1<<17)           /* 128K */
  55 
  56 size_t copyout_max_cached = COPYOUT_MAX_CACHE;  /* global so it's patchable */
  57 
  58 /*
  59  * read, write, pread, pwrite, readv, and writev syscalls.
  60  *
  61  * 64-bit open: all open's are large file opens.
  62  * Large Files: the behaviour of read depends on whether the fd
  63  *              corresponds to large open or not.
  64  * 32-bit open: FOFFMAX flag not set.
  65  *              read until MAXOFF32_T - 1 and read at MAXOFF32_T returns
  66  *              EOVERFLOW if count is non-zero and if size of file
  67  *              is > MAXOFF32_T. If size of file is <= MAXOFF32_T read
  68  *              at >= MAXOFF32_T returns EOF.
  69  */
  70 
  71 /*
  72  * Native system call


 590         bcount -= auio.uio_resid;
 591         CPU_STATS_ENTER_K();
 592         cp = CPU;
 593         CPU_STATS_ADDQ(cp, sys, syswrite, 1);
 594         CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
 595         CPU_STATS_EXIT_K();
 596         ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
 597         VOP_RWUNLOCK(vp, rwflag, NULL);
 598 
 599         if (error == EINTR && bcount != 0)
 600                 error = 0;
 601 out:
 602         if (in_crit)
 603                 nbl_end_crit(vp);
 604         releasef(fdes);
 605         if (error)
 606                 return (set_errno(error));
 607         return (bcount);
 608 }
 609 
 610 /*
 611  * XXX -- The SVID refers to IOV_MAX, but doesn't define it.  Grrrr....
 612  * XXX -- However, SVVS expects readv() and writev() to fail if
 613  * XXX -- iovcnt > 16 (yes, it's hard-coded in the SVVS source),
 614  * XXX -- so I guess that's the "interface".
 615  */
 616 #define DEF_IOV_MAX     16
 617 
 618 ssize_t
 619 readv(int fdes, struct iovec *iovp, int iovcnt)
 620 {
 621         struct uio auio;
 622         struct iovec aiov[DEF_IOV_MAX];

 623         file_t *fp;
 624         register vnode_t *vp;
 625         struct cpu *cp;
 626         int fflag, ioflag, rwflag;
 627         ssize_t count, bcount;
 628         int error = 0;
 629         int i;
 630         u_offset_t fileoff;
 631         int in_crit = 0;
 632 
 633         if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
 634                 return (set_errno(EINVAL));
 635 





 636 #ifdef _SYSCALL32_IMPL
 637         /*
 638          * 32-bit callers need to have their iovec expanded,
 639          * while ensuring that they can't move more than 2Gbytes
 640          * of data in a single call.
 641          */
 642         if (get_udatamodel() == DATAMODEL_ILP32) {
 643                 struct iovec32 aiov32[DEF_IOV_MAX];

 644                 ssize32_t count32;
 645 
 646                 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))








 647                         return (set_errno(EFAULT));

 648 
 649                 count32 = 0;
 650                 for (i = 0; i < iovcnt; i++) {
 651                         ssize32_t iovlen32 = aiov32[i].iov_len;
 652                         count32 += iovlen32;
 653                         if (iovlen32 < 0 || count32 < 0)




 654                                 return (set_errno(EINVAL));

 655                         aiov[i].iov_len = iovlen32;
 656                         aiov[i].iov_base =
 657                             (caddr_t)(uintptr_t)aiov32[i].iov_base;
 658                 }



 659         } else
 660 #endif
 661         if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))


 662                 return (set_errno(EFAULT));

 663 
 664         count = 0;
 665         for (i = 0; i < iovcnt; i++) {
 666                 ssize_t iovlen = aiov[i].iov_len;
 667                 count += iovlen;
 668                 if (iovlen < 0 || count < 0)


 669                         return (set_errno(EINVAL));
 670         }
 671         if ((fp = getf(fdes)) == NULL)



 672                 return (set_errno(EBADF));

 673         if (((fflag = fp->f_flag) & FREAD) == 0) {
 674                 error = EBADF;
 675                 goto out;
 676         }
 677         vp = fp->f_vnode;
 678         if (vp->v_type == VREG && count == 0) {
 679                 goto out;
 680         }
 681 
 682         rwflag = 0;
 683 
 684         /*
 685          * We have to enter the critical region before calling VOP_RWLOCK
 686          * to avoid a deadlock with ufs.
 687          */
 688         if (nbl_need_check(vp)) {
 689                 int svmand;
 690 
 691                 nbl_start_crit(vp, RW_READER);
 692                 in_crit = 1;


 751         cp = CPU;
 752         CPU_STATS_ADDQ(cp, sys, sysread, 1);
 753         CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
 754         CPU_STATS_EXIT_K();
 755         ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
 756 
 757         if (vp->v_type == VFIFO)     /* Backward compatibility */
 758                 fp->f_offset = count;
 759         else if (((fp->f_flag & FAPPEND) == 0) ||
 760             (vp->v_type != VREG) || (bcount != 0))   /* POSIX */
 761                 fp->f_offset = auio.uio_loffset;
 762 
 763         VOP_RWUNLOCK(vp, rwflag, NULL);
 764 
 765         if (error == EINTR && count != 0)
 766                 error = 0;
 767 out:
 768         if (in_crit)
 769                 nbl_end_crit(vp);
 770         releasef(fdes);


 771         if (error)
 772                 return (set_errno(error));
 773         return (count);
 774 }
 775 
 776 ssize_t
 777 writev(int fdes, struct iovec *iovp, int iovcnt)
 778 {
 779         struct uio auio;
 780         struct iovec aiov[DEF_IOV_MAX];

 781         file_t *fp;
 782         register vnode_t *vp;
 783         struct cpu *cp;
 784         int fflag, ioflag, rwflag;
 785         ssize_t count, bcount;
 786         int error = 0;
 787         int i;
 788         u_offset_t fileoff;
 789         int in_crit = 0;
 790 
 791         if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
 792                 return (set_errno(EINVAL));
 793 





 794 #ifdef _SYSCALL32_IMPL
 795         /*
 796          * 32-bit callers need to have their iovec expanded,
 797          * while ensuring that they can't move more than 2Gbytes
 798          * of data in a single call.
 799          */
 800         if (get_udatamodel() == DATAMODEL_ILP32) {
 801                 struct iovec32 aiov32[DEF_IOV_MAX];

 802                 ssize32_t count32;
 803 
 804                 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))








 805                         return (set_errno(EFAULT));

 806 
 807                 count32 = 0;
 808                 for (i = 0; i < iovcnt; i++) {
 809                         ssize32_t iovlen = aiov32[i].iov_len;
 810                         count32 += iovlen;
 811                         if (iovlen < 0 || count32 < 0)




 812                                 return (set_errno(EINVAL));

 813                         aiov[i].iov_len = iovlen;
 814                         aiov[i].iov_base =
 815                             (caddr_t)(uintptr_t)aiov32[i].iov_base;
 816                 }


 817         } else
 818 #endif
 819         if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))


 820                 return (set_errno(EFAULT));

 821 
 822         count = 0;
 823         for (i = 0; i < iovcnt; i++) {
 824                 ssize_t iovlen = aiov[i].iov_len;
 825                 count += iovlen;
 826                 if (iovlen < 0 || count < 0)


 827                         return (set_errno(EINVAL));
 828         }
 829         if ((fp = getf(fdes)) == NULL)



 830                 return (set_errno(EBADF));

 831         if (((fflag = fp->f_flag) & FWRITE) == 0) {
 832                 error = EBADF;
 833                 goto out;
 834         }
 835         vp = fp->f_vnode;
 836         if (vp->v_type == VREG && count == 0) {
 837                 goto out;
 838         }
 839 
 840         rwflag = 1;
 841 
 842         /*
 843          * We have to enter the critical region before calling VOP_RWLOCK
 844          * to avoid a deadlock with ufs.
 845          */
 846         if (nbl_need_check(vp)) {
 847                 int svmand;
 848 
 849                 nbl_start_crit(vp, RW_READER);
 850                 in_crit = 1;


 900         CPU_STATS_ENTER_K();
 901         cp = CPU;
 902         CPU_STATS_ADDQ(cp, sys, syswrite, 1);
 903         CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
 904         CPU_STATS_EXIT_K();
 905         ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
 906 
 907         if (vp->v_type == VFIFO)     /* Backward compatibility */
 908                 fp->f_offset = count;
 909         else if (((fp->f_flag & FAPPEND) == 0) ||
 910             (vp->v_type != VREG) || (bcount != 0))   /* POSIX */
 911                 fp->f_offset = auio.uio_loffset;
 912         VOP_RWUNLOCK(vp, rwflag, NULL);
 913 
 914         if (error == EINTR && count != 0)
 915                 error = 0;
 916 out:
 917         if (in_crit)
 918                 nbl_end_crit(vp);
 919         releasef(fdes);


 920         if (error)
 921                 return (set_errno(error));
 922         return (count);
 923 }
 924 
 925 ssize_t
 926 preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
 927     off_t extended_offset)
 928 {
 929         struct uio auio;
 930         struct iovec aiov[DEF_IOV_MAX];

 931         file_t *fp;
 932         register vnode_t *vp;
 933         struct cpu *cp;
 934         int fflag, ioflag, rwflag;
 935         ssize_t count, bcount;
 936         int error = 0;
 937         int i;
 938 
 939 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
 940         u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
 941             (u_offset_t)offset;
 942 #else /* _SYSCALL32_IMPL || _ILP32 */
 943         u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
 944 #endif /* _SYSCALL32_IMPR || _ILP32 */
 945 #ifdef _SYSCALL32_IMPL
 946         const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
 947             extended_offset == 0?
 948             MAXOFF32_T : MAXOFFSET_T;
 949 #else /* _SYSCALL32_IMPL */
 950         const u_offset_t maxoff = MAXOFF32_T;
 951 #endif /* _SYSCALL32_IMPL */
 952 
 953         int in_crit = 0;
 954 
 955         if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
 956                 return (set_errno(EINVAL));
 957 





 958 #ifdef _SYSCALL32_IMPL
 959         /*
 960          * 32-bit callers need to have their iovec expanded,
 961          * while ensuring that they can't move more than 2Gbytes
 962          * of data in a single call.
 963          */
 964         if (get_udatamodel() == DATAMODEL_ILP32) {
 965                 struct iovec32 aiov32[DEF_IOV_MAX];

 966                 ssize32_t count32;
 967 
 968                 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))








 969                         return (set_errno(EFAULT));

 970 
 971                 count32 = 0;
 972                 for (i = 0; i < iovcnt; i++) {
 973                         ssize32_t iovlen32 = aiov32[i].iov_len;
 974                         count32 += iovlen32;
 975                         if (iovlen32 < 0 || count32 < 0)




 976                                 return (set_errno(EINVAL));

 977                         aiov[i].iov_len = iovlen32;
 978                         aiov[i].iov_base =
 979                             (caddr_t)(uintptr_t)aiov32[i].iov_base;
 980                 }


 981         } else
 982 #endif /* _SYSCALL32_IMPL */
 983                 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))


 984                         return (set_errno(EFAULT));

 985 
 986         count = 0;
 987         for (i = 0; i < iovcnt; i++) {
 988                 ssize_t iovlen = aiov[i].iov_len;
 989                 count += iovlen;
 990                 if (iovlen < 0 || count < 0)


 991                         return (set_errno(EINVAL));
 992         }

 993 
 994         if ((bcount = (ssize_t)count) < 0)


 995                 return (set_errno(EINVAL));
 996         if ((fp = getf(fdes)) == NULL)



 997                 return (set_errno(EBADF));

 998         if (((fflag = fp->f_flag) & FREAD) == 0) {
 999                 error = EBADF;
1000                 goto out;
1001         }
1002         vp = fp->f_vnode;
1003         rwflag = 0;
1004         if (vp->v_type == VREG) {
1005 
1006                 if (bcount == 0)
1007                         goto out;
1008 
1009                 /*
1010                  * return EINVAL for offsets that cannot be
1011                  * represented in an off_t.
1012                  */
1013                 if (fileoff > maxoff) {
1014                         error = EINVAL;
1015                         goto out;
1016                 }
1017 


1082                 auio.uio_extflg = UIO_COPY_DEFAULT;
1083 
1084         ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1085         error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1086         count -= auio.uio_resid;
1087         CPU_STATS_ENTER_K();
1088         cp = CPU;
1089         CPU_STATS_ADDQ(cp, sys, sysread, 1);
1090         CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
1091         CPU_STATS_EXIT_K();
1092         ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1093 
1094         VOP_RWUNLOCK(vp, rwflag, NULL);
1095 
1096         if (error == EINTR && count != 0)
1097                 error = 0;
1098 out:
1099         if (in_crit)
1100                 nbl_end_crit(vp);
1101         releasef(fdes);


1102         if (error)
1103                 return (set_errno(error));
1104         return (count);
1105 }
1106 
1107 ssize_t
1108 pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
1109     off_t extended_offset)
1110 {
1111         struct uio auio;
1112         struct iovec aiov[DEF_IOV_MAX];

1113         file_t *fp;
1114         register vnode_t *vp;
1115         struct cpu *cp;
1116         int fflag, ioflag, rwflag;
1117         ssize_t count, bcount;
1118         int error = 0;
1119         int i;
1120 
1121 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1122         u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1123             (u_offset_t)offset;
1124 #else /* _SYSCALL32_IMPL || _ILP32 */
1125         u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1126 #endif /* _SYSCALL32_IMPR || _ILP32 */
1127 #ifdef _SYSCALL32_IMPL
1128         const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
1129             extended_offset == 0?
1130             MAXOFF32_T : MAXOFFSET_T;
1131 #else /* _SYSCALL32_IMPL */
1132         const u_offset_t maxoff = MAXOFF32_T;
1133 #endif /* _SYSCALL32_IMPL */
1134 
1135         int in_crit = 0;
1136 
1137         if (iovcnt <= 0 || iovcnt > DEF_IOV_MAX)
1138                 return (set_errno(EINVAL));
1139 





1140 #ifdef _SYSCALL32_IMPL
1141         /*
1142          * 32-bit callers need to have their iovec expanded,
1143          * while ensuring that they can't move more than 2Gbytes
1144          * of data in a single call.
1145          */
1146         if (get_udatamodel() == DATAMODEL_ILP32) {
1147                 struct iovec32 aiov32[DEF_IOV_MAX];

1148                 ssize32_t count32;
1149 
1150                 if (copyin(iovp, aiov32, iovcnt * sizeof (struct iovec32)))








1151                         return (set_errno(EFAULT));

1152 
1153                 count32 = 0;
1154                 for (i = 0; i < iovcnt; i++) {
1155                         ssize32_t iovlen32 = aiov32[i].iov_len;
1156                         count32 += iovlen32;
1157                         if (iovlen32 < 0 || count32 < 0)




1158                                 return (set_errno(EINVAL));

1159                         aiov[i].iov_len = iovlen32;
1160                         aiov[i].iov_base =
1161                             (caddr_t)(uintptr_t)aiov32[i].iov_base;
1162                 }


1163         } else
1164 #endif /* _SYSCALL32_IMPL */
1165                 if (copyin(iovp, aiov, iovcnt * sizeof (struct iovec)))


1166                         return (set_errno(EFAULT));

1167 
1168         count = 0;
1169         for (i = 0; i < iovcnt; i++) {
1170                 ssize_t iovlen = aiov[i].iov_len;
1171                 count += iovlen;
1172                 if (iovlen < 0 || count < 0)


1173                         return (set_errno(EINVAL));
1174         }

1175 
1176         if ((bcount = (ssize_t)count) < 0)


1177                 return (set_errno(EINVAL));
1178         if ((fp = getf(fdes)) == NULL)



1179                 return (set_errno(EBADF));

1180         if (((fflag = fp->f_flag) & FWRITE) == 0) {
1181                 error = EBADF;
1182                 goto out;
1183         }
1184         vp = fp->f_vnode;
1185         rwflag = 1;
1186         if (vp->v_type == VREG) {
1187 
1188                 if (bcount == 0)
1189                         goto out;
1190 
1191                 /*
1192                  * return EINVAL for offsets that cannot be
1193                  * represented in an off_t.
1194                  */
1195                 if (fileoff > maxoff) {
1196                         error = EINVAL;
1197                         goto out;
1198                 }
1199                 /*


1291         auio.uio_fmode = fflag;
1292         auio.uio_extflg = UIO_COPY_CACHED;
1293         ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1294         error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
1295         count -= auio.uio_resid;
1296         CPU_STATS_ENTER_K();
1297         cp = CPU;
1298         CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1299         CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
1300         CPU_STATS_EXIT_K();
1301         ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1302 
1303         VOP_RWUNLOCK(vp, rwflag, NULL);
1304 
1305         if (error == EINTR && count != 0)
1306                 error = 0;
1307 out:
1308         if (in_crit)
1309                 nbl_end_crit(vp);
1310         releasef(fdes);


1311         if (error)
1312                 return (set_errno(error));
1313         return (count);
1314 }
1315 
1316 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1317 
1318 /*
1319  * This syscall supplies 64-bit file offsets to 32-bit applications only.
1320  */
1321 ssize32_t
1322 pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
1323     uint32_t offset_2)
1324 {
1325         struct uio auio;
1326         struct iovec aiov;
1327         file_t *fp;
1328         register vnode_t *vp;
1329         struct cpu *cp;
1330         int fflag, ioflag, rwflag;




   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright 2015, Joyent, Inc.  All rights reserved.
  26  */
  27 
  28 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /*        All Rights Reserved   */
  30 
  31 /*
  32  * Portions of this source code were derived from Berkeley 4.3 BSD
  33  * under license from the Regents of the University of California.
  34  */
  35 
  36 #include <sys/param.h>
  37 #include <sys/isa_defs.h>
  38 #include <sys/types.h>
  39 #include <sys/inttypes.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/cred.h>
  42 #include <sys/user.h>
  43 #include <sys/systm.h>
  44 #include <sys/errno.h>
  45 #include <sys/vnode.h>
  46 #include <sys/file.h>
  47 #include <sys/proc.h>
  48 #include <sys/cpuvar.h>
  49 #include <sys/uio.h>
  50 #include <sys/debug.h>
  51 #include <sys/rctl.h>
  52 #include <sys/nbmlock.h>
  53 #include <sys/limits.h>
  54 
  55 #define COPYOUT_MAX_CACHE       (1<<17)           /* 128K */
  56 
  57 size_t copyout_max_cached = COPYOUT_MAX_CACHE;  /* global so it's patchable */
  58 
  59 /*
  60  * read, write, pread, pwrite, readv, and writev syscalls.
  61  *
  62  * 64-bit open: all open's are large file opens.
  63  * Large Files: the behaviour of read depends on whether the fd
  64  *              corresponds to large open or not.
  65  * 32-bit open: FOFFMAX flag not set.
  66  *              read until MAXOFF32_T - 1 and read at MAXOFF32_T returns
  67  *              EOVERFLOW if count is non-zero and if size of file
  68  *              is > MAXOFF32_T. If size of file is <= MAXOFF32_T read
  69  *              at >= MAXOFF32_T returns EOF.
  70  */
  71 
  72 /*
  73  * Native system call


 591         bcount -= auio.uio_resid;
 592         CPU_STATS_ENTER_K();
 593         cp = CPU;
 594         CPU_STATS_ADDQ(cp, sys, syswrite, 1);
 595         CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
 596         CPU_STATS_EXIT_K();
 597         ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
 598         VOP_RWUNLOCK(vp, rwflag, NULL);
 599 
 600         if (error == EINTR && bcount != 0)
 601                 error = 0;
 602 out:
 603         if (in_crit)
 604                 nbl_end_crit(vp);
 605         releasef(fdes);
 606         if (error)
 607                 return (set_errno(error));
 608         return (bcount);
 609 }
 610 








 611 ssize_t
 612 readv(int fdes, struct iovec *iovp, int iovcnt)
 613 {
 614         struct uio auio;
 615         struct iovec buf[IOV_MAX_STACK], *aiov = buf;
 616         int aiovlen = 0;
 617         file_t *fp;
 618         register vnode_t *vp;
 619         struct cpu *cp;
 620         int fflag, ioflag, rwflag;
 621         ssize_t count, bcount;
 622         int error = 0;
 623         int i;
 624         u_offset_t fileoff;
 625         int in_crit = 0;
 626 
 627         if (iovcnt <= 0 || iovcnt > IOV_MAX)
 628                 return (set_errno(EINVAL));
 629 
 630         if (iovcnt > IOV_MAX_STACK) {
 631                 aiovlen = iovcnt * sizeof (iovec_t);
 632                 aiov = kmem_alloc(aiovlen, KM_SLEEP);
 633         }
 634 
 635 #ifdef _SYSCALL32_IMPL
 636         /*
 637          * 32-bit callers need to have their iovec expanded,
 638          * while ensuring that they can't move more than 2Gbytes
 639          * of data in a single call.
 640          */
 641         if (get_udatamodel() == DATAMODEL_ILP32) {
 642                 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
 643                 int aiov32len;
 644                 ssize32_t count32;
 645 
 646                 aiov32len = iovcnt * sizeof (iovec32_t);
 647                 if (aiovlen != 0)
 648                         aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
 649 
 650                 if (copyin(iovp, aiov32, aiov32len)) {
 651                         if (aiovlen != 0) {
 652                                 kmem_free(aiov32, aiov32len);
 653                                 kmem_free(aiov, aiovlen);
 654                         }
 655                         return (set_errno(EFAULT));
 656                 }
 657 
 658                 count32 = 0;
 659                 for (i = 0; i < iovcnt; i++) {
 660                         ssize32_t iovlen32 = aiov32[i].iov_len;
 661                         count32 += iovlen32;
 662                         if (iovlen32 < 0 || count32 < 0) {
 663                                 if (aiovlen != 0) {
 664                                         kmem_free(aiov32, aiov32len);
 665                                         kmem_free(aiov, aiovlen);
 666                                 }
 667                                 return (set_errno(EINVAL));
 668                         }
 669                         aiov[i].iov_len = iovlen32;
 670                         aiov[i].iov_base =
 671                             (caddr_t)(uintptr_t)aiov32[i].iov_base;
 672                 }
 673 
 674                 if (aiovlen != 0)
 675                         kmem_free(aiov32, aiov32len);
 676         } else
 677 #endif
 678         if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
 679                 if (aiovlen != 0)
 680                         kmem_free(aiov, aiovlen);
 681                 return (set_errno(EFAULT));
 682         }
 683 
 684         count = 0;
 685         for (i = 0; i < iovcnt; i++) {
 686                 ssize_t iovlen = aiov[i].iov_len;
 687                 count += iovlen;
 688                 if (iovlen < 0 || count < 0) {
 689                         if (aiovlen != 0)
 690                                 kmem_free(aiov, aiovlen);
 691                         return (set_errno(EINVAL));
 692                 }
 693         }
 694         if ((fp = getf(fdes)) == NULL) {
 695                 if (aiovlen != 0)
 696                         kmem_free(aiov, aiovlen);
 697                 return (set_errno(EBADF));
 698         }
 699         if (((fflag = fp->f_flag) & FREAD) == 0) {
 700                 error = EBADF;
 701                 goto out;
 702         }
 703         vp = fp->f_vnode;
 704         if (vp->v_type == VREG && count == 0) {
 705                 goto out;
 706         }
 707 
 708         rwflag = 0;
 709 
 710         /*
 711          * We have to enter the critical region before calling VOP_RWLOCK
 712          * to avoid a deadlock with ufs.
 713          */
 714         if (nbl_need_check(vp)) {
 715                 int svmand;
 716 
 717                 nbl_start_crit(vp, RW_READER);
 718                 in_crit = 1;


 777         cp = CPU;
 778         CPU_STATS_ADDQ(cp, sys, sysread, 1);
 779         CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
 780         CPU_STATS_EXIT_K();
 781         ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
 782 
 783         if (vp->v_type == VFIFO)     /* Backward compatibility */
 784                 fp->f_offset = count;
 785         else if (((fp->f_flag & FAPPEND) == 0) ||
 786             (vp->v_type != VREG) || (bcount != 0))   /* POSIX */
 787                 fp->f_offset = auio.uio_loffset;
 788 
 789         VOP_RWUNLOCK(vp, rwflag, NULL);
 790 
 791         if (error == EINTR && count != 0)
 792                 error = 0;
 793 out:
 794         if (in_crit)
 795                 nbl_end_crit(vp);
 796         releasef(fdes);
 797         if (aiovlen != 0)
 798                 kmem_free(aiov, aiovlen);
 799         if (error)
 800                 return (set_errno(error));
 801         return (count);
 802 }
 803 
 804 ssize_t
 805 writev(int fdes, struct iovec *iovp, int iovcnt)
 806 {
 807         struct uio auio;
 808         struct iovec buf[IOV_MAX_STACK], *aiov = buf;
 809         int aiovlen = 0;
 810         file_t *fp;
 811         register vnode_t *vp;
 812         struct cpu *cp;
 813         int fflag, ioflag, rwflag;
 814         ssize_t count, bcount;
 815         int error = 0;
 816         int i;
 817         u_offset_t fileoff;
 818         int in_crit = 0;
 819 
 820         if (iovcnt <= 0 || iovcnt > IOV_MAX)
 821                 return (set_errno(EINVAL));
 822 
 823         if (iovcnt > IOV_MAX_STACK) {
 824                 aiovlen = iovcnt * sizeof (iovec_t);
 825                 aiov = kmem_alloc(aiovlen, KM_SLEEP);
 826         }
 827 
 828 #ifdef _SYSCALL32_IMPL
 829         /*
 830          * 32-bit callers need to have their iovec expanded,
 831          * while ensuring that they can't move more than 2Gbytes
 832          * of data in a single call.
 833          */
 834         if (get_udatamodel() == DATAMODEL_ILP32) {
 835                 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
 836                 int aiov32len;
 837                 ssize32_t count32;
 838 
 839                 aiov32len = iovcnt * sizeof (iovec32_t);
 840                 if (aiovlen != 0)
 841                         aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
 842 
 843                 if (copyin(iovp, aiov32, aiov32len)) {
 844                         if (aiovlen != 0) {
 845                                 kmem_free(aiov32, aiov32len);
 846                                 kmem_free(aiov, aiovlen);
 847                         }
 848                         return (set_errno(EFAULT));
 849                 }
 850 
 851                 count32 = 0;
 852                 for (i = 0; i < iovcnt; i++) {
 853                         ssize32_t iovlen = aiov32[i].iov_len;
 854                         count32 += iovlen;
 855                         if (iovlen < 0 || count32 < 0) {
 856                                 if (aiovlen != 0) {
 857                                         kmem_free(aiov32, aiov32len);
 858                                         kmem_free(aiov, aiovlen);
 859                                 }
 860                                 return (set_errno(EINVAL));
 861                         }
 862                         aiov[i].iov_len = iovlen;
 863                         aiov[i].iov_base =
 864                             (caddr_t)(uintptr_t)aiov32[i].iov_base;
 865                 }
 866                 if (aiovlen != 0)
 867                         kmem_free(aiov32, aiov32len);
 868         } else
 869 #endif
 870         if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
 871                 if (aiovlen != 0)
 872                         kmem_free(aiov, aiovlen);
 873                 return (set_errno(EFAULT));
 874         }
 875 
 876         count = 0;
 877         for (i = 0; i < iovcnt; i++) {
 878                 ssize_t iovlen = aiov[i].iov_len;
 879                 count += iovlen;
 880                 if (iovlen < 0 || count < 0) {
 881                         if (aiovlen != 0)
 882                                 kmem_free(aiov, aiovlen);
 883                         return (set_errno(EINVAL));
 884                 }
 885         }
 886         if ((fp = getf(fdes)) == NULL) {
 887                 if (aiovlen != 0)
 888                         kmem_free(aiov, aiovlen);
 889                 return (set_errno(EBADF));
 890         }
 891         if (((fflag = fp->f_flag) & FWRITE) == 0) {
 892                 error = EBADF;
 893                 goto out;
 894         }
 895         vp = fp->f_vnode;
 896         if (vp->v_type == VREG && count == 0) {
 897                 goto out;
 898         }
 899 
 900         rwflag = 1;
 901 
 902         /*
 903          * We have to enter the critical region before calling VOP_RWLOCK
 904          * to avoid a deadlock with ufs.
 905          */
 906         if (nbl_need_check(vp)) {
 907                 int svmand;
 908 
 909                 nbl_start_crit(vp, RW_READER);
 910                 in_crit = 1;


 960         CPU_STATS_ENTER_K();
 961         cp = CPU;
 962         CPU_STATS_ADDQ(cp, sys, syswrite, 1);
 963         CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
 964         CPU_STATS_EXIT_K();
 965         ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
 966 
 967         if (vp->v_type == VFIFO)     /* Backward compatibility */
 968                 fp->f_offset = count;
 969         else if (((fp->f_flag & FAPPEND) == 0) ||
 970             (vp->v_type != VREG) || (bcount != 0))   /* POSIX */
 971                 fp->f_offset = auio.uio_loffset;
 972         VOP_RWUNLOCK(vp, rwflag, NULL);
 973 
 974         if (error == EINTR && count != 0)
 975                 error = 0;
 976 out:
 977         if (in_crit)
 978                 nbl_end_crit(vp);
 979         releasef(fdes);
 980         if (aiovlen != 0)
 981                 kmem_free(aiov, aiovlen);
 982         if (error)
 983                 return (set_errno(error));
 984         return (count);
 985 }
 986 
 987 ssize_t
 988 preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
 989     off_t extended_offset)
 990 {
 991         struct uio auio;
 992         struct iovec buf[IOV_MAX_STACK], *aiov = buf;
 993         int aiovlen = 0;
 994         file_t *fp;
 995         register vnode_t *vp;
 996         struct cpu *cp;
 997         int fflag, ioflag, rwflag;
 998         ssize_t count, bcount;
 999         int error = 0;
1000         int i;
1001 
1002 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1003         u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1004             (u_offset_t)offset;
1005 #else /* _SYSCALL32_IMPL || _ILP32 */
1006         u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1007 #endif /* _SYSCALL32_IMPR || _ILP32 */
1008 #ifdef _SYSCALL32_IMPL
1009         const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
1010             extended_offset == 0?
1011             MAXOFF32_T : MAXOFFSET_T;
1012 #else /* _SYSCALL32_IMPL */
1013         const u_offset_t maxoff = MAXOFF32_T;
1014 #endif /* _SYSCALL32_IMPL */
1015 
1016         int in_crit = 0;
1017 
1018         if (iovcnt <= 0 || iovcnt > IOV_MAX)
1019                 return (set_errno(EINVAL));
1020 
1021         if (iovcnt > IOV_MAX_STACK) {
1022                 aiovlen = iovcnt * sizeof (iovec_t);
1023                 aiov = kmem_alloc(aiovlen, KM_SLEEP);
1024         }
1025 
1026 #ifdef _SYSCALL32_IMPL
1027         /*
1028          * 32-bit callers need to have their iovec expanded,
1029          * while ensuring that they can't move more than 2Gbytes
1030          * of data in a single call.
1031          */
1032         if (get_udatamodel() == DATAMODEL_ILP32) {
1033                 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
1034                 int aiov32len;
1035                 ssize32_t count32;
1036 
1037                 aiov32len = iovcnt * sizeof (iovec32_t);
1038                 if (aiovlen != 0)
1039                         aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
1040 
1041                 if (copyin(iovp, aiov32, aiov32len)) {
1042                         if (aiovlen != 0) {
1043                                 kmem_free(aiov32, aiov32len);
1044                                 kmem_free(aiov, aiovlen);
1045                         }
1046                         return (set_errno(EFAULT));
1047                 }
1048 
1049                 count32 = 0;
1050                 for (i = 0; i < iovcnt; i++) {
1051                         ssize32_t iovlen32 = aiov32[i].iov_len;
1052                         count32 += iovlen32;
1053                         if (iovlen32 < 0 || count32 < 0) {
1054                                 if (aiovlen != 0) {
1055                                         kmem_free(aiov32, aiov32len);
1056                                         kmem_free(aiov, aiovlen);
1057                                 }
1058                                 return (set_errno(EINVAL));
1059                         }
1060                         aiov[i].iov_len = iovlen32;
1061                         aiov[i].iov_base =
1062                             (caddr_t)(uintptr_t)aiov32[i].iov_base;
1063                 }
1064                 if (aiovlen != 0)
1065                         kmem_free(aiov32, aiov32len);
1066         } else
1067 #endif /* _SYSCALL32_IMPL */
1068                 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
1069                         if (aiovlen != 0)
1070                                 kmem_free(aiov, aiovlen);
1071                         return (set_errno(EFAULT));
1072                 }
1073 
1074         count = 0;
1075         for (i = 0; i < iovcnt; i++) {
1076                 ssize_t iovlen = aiov[i].iov_len;
1077                 count += iovlen;
1078                 if (iovlen < 0 || count < 0) {
1079                         if (aiovlen != 0)
1080                                 kmem_free(aiov, aiovlen);
1081                         return (set_errno(EINVAL));
1082                 }
1083         }
1084 
1085         if ((bcount = (ssize_t)count) < 0) {
1086                 if (aiovlen != 0)
1087                         kmem_free(aiov, aiovlen);
1088                 return (set_errno(EINVAL));
1089         }
1090         if ((fp = getf(fdes)) == NULL) {
1091                 if (aiovlen != 0)
1092                         kmem_free(aiov, aiovlen);
1093                 return (set_errno(EBADF));
1094         }
1095         if (((fflag = fp->f_flag) & FREAD) == 0) {
1096                 error = EBADF;
1097                 goto out;
1098         }
1099         vp = fp->f_vnode;
1100         rwflag = 0;
1101         if (vp->v_type == VREG) {
1102 
1103                 if (bcount == 0)
1104                         goto out;
1105 
1106                 /*
1107                  * return EINVAL for offsets that cannot be
1108                  * represented in an off_t.
1109                  */
1110                 if (fileoff > maxoff) {
1111                         error = EINVAL;
1112                         goto out;
1113                 }
1114 


1179                 auio.uio_extflg = UIO_COPY_DEFAULT;
1180 
1181         ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1182         error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1183         count -= auio.uio_resid;
1184         CPU_STATS_ENTER_K();
1185         cp = CPU;
1186         CPU_STATS_ADDQ(cp, sys, sysread, 1);
1187         CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
1188         CPU_STATS_EXIT_K();
1189         ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1190 
1191         VOP_RWUNLOCK(vp, rwflag, NULL);
1192 
1193         if (error == EINTR && count != 0)
1194                 error = 0;
1195 out:
1196         if (in_crit)
1197                 nbl_end_crit(vp);
1198         releasef(fdes);
1199         if (aiovlen != 0)
1200                 kmem_free(aiov, aiovlen);
1201         if (error)
1202                 return (set_errno(error));
1203         return (count);
1204 }
1205 
1206 ssize_t
1207 pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
1208     off_t extended_offset)
1209 {
1210         struct uio auio;
1211         struct iovec buf[IOV_MAX_STACK], *aiov = buf;
1212         int aiovlen = 0;
1213         file_t *fp;
1214         register vnode_t *vp;
1215         struct cpu *cp;
1216         int fflag, ioflag, rwflag;
1217         ssize_t count, bcount;
1218         int error = 0;
1219         int i;
1220 
1221 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1222         u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1223             (u_offset_t)offset;
1224 #else /* _SYSCALL32_IMPL || _ILP32 */
1225         u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1226 #endif /* _SYSCALL32_IMPR || _ILP32 */
1227 #ifdef _SYSCALL32_IMPL
1228         const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
1229             extended_offset == 0?
1230             MAXOFF32_T : MAXOFFSET_T;
1231 #else /* _SYSCALL32_IMPL */
1232         const u_offset_t maxoff = MAXOFF32_T;
1233 #endif /* _SYSCALL32_IMPL */
1234 
1235         int in_crit = 0;
1236 
1237         if (iovcnt <= 0 || iovcnt > IOV_MAX)
1238                 return (set_errno(EINVAL));
1239 
1240         if (iovcnt > IOV_MAX_STACK) {
1241                 aiovlen = iovcnt * sizeof (iovec_t);
1242                 aiov = kmem_alloc(aiovlen, KM_SLEEP);
1243         }
1244 
1245 #ifdef _SYSCALL32_IMPL
1246         /*
1247          * 32-bit callers need to have their iovec expanded,
1248          * while ensuring that they can't move more than 2Gbytes
1249          * of data in a single call.
1250          */
1251         if (get_udatamodel() == DATAMODEL_ILP32) {
1252                 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
1253                 int aiov32len;
1254                 ssize32_t count32;
1255 
1256                 aiov32len = iovcnt * sizeof (iovec32_t);
1257                 if (aiovlen != 0)
1258                         aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
1259 
1260                 if (copyin(iovp, aiov32, aiov32len)) {
1261                         if (aiovlen != 0) {
1262                                 kmem_free(aiov32, aiov32len);
1263                                 kmem_free(aiov, aiovlen);
1264                         }
1265                         return (set_errno(EFAULT));
1266                 }
1267 
1268                 count32 = 0;
1269                 for (i = 0; i < iovcnt; i++) {
1270                         ssize32_t iovlen32 = aiov32[i].iov_len;
1271                         count32 += iovlen32;
1272                         if (iovlen32 < 0 || count32 < 0) {
1273                                 if (aiovlen != 0) {
1274                                         kmem_free(aiov32, aiov32len);
1275                                         kmem_free(aiov, aiovlen);
1276                                 }
1277                                 return (set_errno(EINVAL));
1278                         }
1279                         aiov[i].iov_len = iovlen32;
1280                         aiov[i].iov_base =
1281                             (caddr_t)(uintptr_t)aiov32[i].iov_base;
1282                 }
1283                 if (aiovlen != 0)
1284                         kmem_free(aiov32, aiov32len);
1285         } else
1286 #endif /* _SYSCALL32_IMPL */
1287                 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
1288                         if (aiovlen != 0)
1289                                 kmem_free(aiov, aiovlen);
1290                         return (set_errno(EFAULT));
1291                 }
1292 
1293         count = 0;
1294         for (i = 0; i < iovcnt; i++) {
1295                 ssize_t iovlen = aiov[i].iov_len;
1296                 count += iovlen;
1297                 if (iovlen < 0 || count < 0) {
1298                         if (aiovlen != 0)
1299                                 kmem_free(aiov, aiovlen);
1300                         return (set_errno(EINVAL));
1301                 }
1302         }
1303 
1304         if ((bcount = (ssize_t)count) < 0) {
1305                 if (aiovlen != 0)
1306                         kmem_free(aiov, aiovlen);
1307                 return (set_errno(EINVAL));
1308         }
1309         if ((fp = getf(fdes)) == NULL) {
1310                 if (aiovlen != 0)
1311                         kmem_free(aiov, aiovlen);
1312                 return (set_errno(EBADF));
1313         }
1314         if (((fflag = fp->f_flag) & FWRITE) == 0) {
1315                 error = EBADF;
1316                 goto out;
1317         }
1318         vp = fp->f_vnode;
1319         rwflag = 1;
1320         if (vp->v_type == VREG) {
1321 
1322                 if (bcount == 0)
1323                         goto out;
1324 
1325                 /*
1326                  * return EINVAL for offsets that cannot be
1327                  * represented in an off_t.
1328                  */
1329                 if (fileoff > maxoff) {
1330                         error = EINVAL;
1331                         goto out;
1332                 }
1333                 /*


1425         auio.uio_fmode = fflag;
1426         auio.uio_extflg = UIO_COPY_CACHED;
1427         ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1428         error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
1429         count -= auio.uio_resid;
1430         CPU_STATS_ENTER_K();
1431         cp = CPU;
1432         CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1433         CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
1434         CPU_STATS_EXIT_K();
1435         ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1436 
1437         VOP_RWUNLOCK(vp, rwflag, NULL);
1438 
1439         if (error == EINTR && count != 0)
1440                 error = 0;
1441 out:
1442         if (in_crit)
1443                 nbl_end_crit(vp);
1444         releasef(fdes);
1445         if (aiovlen != 0)
1446                 kmem_free(aiov, aiovlen);
1447         if (error)
1448                 return (set_errno(error));
1449         return (count);
1450 }
1451 
1452 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1453 
1454 /*
1455  * This syscall supplies 64-bit file offsets to 32-bit applications only.
1456  */
1457 ssize32_t
1458 pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
1459     uint32_t offset_2)
1460 {
1461         struct uio auio;
1462         struct iovec aiov;
1463         file_t *fp;
1464         register vnode_t *vp;
1465         struct cpu *cp;
1466         int fflag, ioflag, rwflag;