Print this page
12365 pwritev64 can't write at offsets between [2 GiB, 4 GiB)
Portions contributed by: John Levon <john.levon@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright 2015, Joyent, Inc.  All rights reserved.
  26  */
  27 
  28 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /*        All Rights Reserved   */
  30 
  31 /*
  32  * Portions of this source code were derived from Berkeley 4.3 BSD
  33  * under license from the Regents of the University of California.
  34  */
  35 
  36 #include <sys/param.h>
  37 #include <sys/isa_defs.h>
  38 #include <sys/types.h>
  39 #include <sys/inttypes.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/cred.h>
  42 #include <sys/user.h>
  43 #include <sys/systm.h>
  44 #include <sys/errno.h>
  45 #include <sys/vnode.h>


 982         if (error)
 983                 return (set_errno(error));
 984         return (count);
 985 }
 986 
 987 ssize_t
 988 preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
 989     off_t extended_offset)
 990 {
 991         struct uio auio;
 992         struct iovec buf[IOV_MAX_STACK], *aiov = buf;
 993         int aiovlen = 0;
 994         file_t *fp;
 995         register vnode_t *vp;
 996         struct cpu *cp;
 997         int fflag, ioflag, rwflag;
 998         ssize_t count, bcount;
 999         int error = 0;
1000         int i;
1001 












1002 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1003         u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1004             (u_offset_t)offset;
1005 #else /* _SYSCALL32_IMPL || _ILP32 */
1006         u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1007 #endif /* _SYSCALL32_IMPR || _ILP32 */
1008 #ifdef _SYSCALL32_IMPL
1009         const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
1010             extended_offset == 0?
1011             MAXOFF32_T : MAXOFFSET_T;
1012 #else /* _SYSCALL32_IMPL */
1013         const u_offset_t maxoff = MAXOFF32_T;
1014 #endif /* _SYSCALL32_IMPL */
1015 
1016         int in_crit = 0;
1017 
1018         if (iovcnt <= 0 || iovcnt > IOV_MAX)
1019                 return (set_errno(EINVAL));
1020 
1021         if (iovcnt > IOV_MAX_STACK) {
1022                 aiovlen = iovcnt * sizeof (iovec_t);
1023                 aiov = kmem_alloc(aiovlen, KM_SLEEP);
1024         }
1025 
1026 #ifdef _SYSCALL32_IMPL
1027         /*
1028          * 32-bit callers need to have their iovec expanded,
1029          * while ensuring that they can't move more than 2Gbytes
1030          * of data in a single call.
1031          */
1032         if (get_udatamodel() == DATAMODEL_ILP32) {
1033                 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
1034                 int aiov32len;


1065                         kmem_free(aiov32, aiov32len);
1066         } else
1067 #endif /* _SYSCALL32_IMPL */
1068                 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
1069                         if (aiovlen != 0)
1070                                 kmem_free(aiov, aiovlen);
1071                         return (set_errno(EFAULT));
1072                 }
1073 
1074         count = 0;
1075         for (i = 0; i < iovcnt; i++) {
1076                 ssize_t iovlen = aiov[i].iov_len;
1077                 count += iovlen;
1078                 if (iovlen < 0 || count < 0) {
1079                         if (aiovlen != 0)
1080                                 kmem_free(aiov, aiovlen);
1081                         return (set_errno(EINVAL));
1082                 }
1083         }
1084 
1085         if ((bcount = (ssize_t)count) < 0) {
1086                 if (aiovlen != 0)
1087                         kmem_free(aiov, aiovlen);
1088                 return (set_errno(EINVAL));
1089         }
1090         if ((fp = getf(fdes)) == NULL) {
1091                 if (aiovlen != 0)
1092                         kmem_free(aiov, aiovlen);
1093                 return (set_errno(EBADF));
1094         }
1095         if (((fflag = fp->f_flag) & FREAD) == 0) {
1096                 error = EBADF;
1097                 goto out;
1098         }
1099         vp = fp->f_vnode;
1100         rwflag = 0;
1101         if (vp->v_type == VREG) {
1102 




1103                 if (bcount == 0)
1104                         goto out;
1105 
1106                 /*
1107                  * return EINVAL for offsets that cannot be
1108                  * represented in an off_t.
1109                  */
1110                 if (fileoff > maxoff) {
1111                         error = EINVAL;







1112                         goto out;
1113                 }
1114 
1115                 if (fileoff + bcount > maxoff)
1116                         bcount = (ssize_t)((u_offset_t)maxoff - fileoff);




1117         } else if (vp->v_type == VFIFO) {
1118                 error = ESPIPE;
1119                 goto out;
1120         }
1121         /*
1122          * We have to enter the critical region before calling VOP_RWLOCK
1123          * to avoid a deadlock with ufs.
1124          */
1125         if (nbl_need_check(vp)) {
1126                 int svmand;
1127 
1128                 nbl_start_crit(vp, RW_READER);
1129                 in_crit = 1;
1130                 error = nbl_svmand(vp, fp->f_cred, &svmand);
1131                 if (error != 0)
1132                         goto out;
1133                 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1134                     NULL)) {
1135                         error = EACCES;
1136                         goto out;
1137                 }
1138         }
1139 
1140         (void) VOP_RWLOCK(vp, rwflag, NULL);
1141 
1142         /*
1143          * Behaviour is same as read(2). Please see comments in
1144          * read(2).
1145          */
1146 
1147         if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
1148                 struct vattr va;
1149                 va.va_mask = AT_SIZE;
1150                 if ((error =
1151                     VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL)))  {
1152                         VOP_RWUNLOCK(vp, rwflag, NULL);
1153                         goto out;
1154                 }
1155                 if (fileoff >= va.va_size) {
1156                         VOP_RWUNLOCK(vp, rwflag, NULL);
1157                         count = 0;
1158                         goto out;
1159                 } else {
1160                         VOP_RWUNLOCK(vp, rwflag, NULL);
1161                         error = EOVERFLOW;
1162                         goto out;
1163                 }
1164         }
1165         if ((vp->v_type == VREG) &&
1166             (fileoff + count > OFFSET_MAX(fp))) {
1167                 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1168         }
1169         auio.uio_loffset = fileoff;
1170         auio.uio_iov = aiov;
1171         auio.uio_iovcnt = iovcnt;
1172         auio.uio_resid = bcount = count;
1173         auio.uio_segflg = UIO_USERSPACE;
1174         auio.uio_llimit = MAXOFFSET_T;
1175         auio.uio_fmode = fflag;
1176         if (bcount <= copyout_max_cached)
1177                 auio.uio_extflg = UIO_COPY_CACHED;
1178         else
1179                 auio.uio_extflg = UIO_COPY_DEFAULT;
1180 
1181         ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1182         error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1183         count -= auio.uio_resid;
1184         CPU_STATS_ENTER_K();
1185         cp = CPU;
1186         CPU_STATS_ADDQ(cp, sys, sysread, 1);
1187         CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
1188         CPU_STATS_EXIT_K();


1201         if (error)
1202                 return (set_errno(error));
1203         return (count);
1204 }
1205 
1206 ssize_t
1207 pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
1208     off_t extended_offset)
1209 {
1210         struct uio auio;
1211         struct iovec buf[IOV_MAX_STACK], *aiov = buf;
1212         int aiovlen = 0;
1213         file_t *fp;
1214         register vnode_t *vp;
1215         struct cpu *cp;
1216         int fflag, ioflag, rwflag;
1217         ssize_t count, bcount;
1218         int error = 0;
1219         int i;
1220 



1221 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1222         u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1223             (u_offset_t)offset;
1224 #else /* _SYSCALL32_IMPL || _ILP32 */
1225         u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1226 #endif /* _SYSCALL32_IMPR || _ILP32 */
1227 #ifdef _SYSCALL32_IMPL
1228         const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
1229             extended_offset == 0?
1230             MAXOFF32_T : MAXOFFSET_T;
1231 #else /* _SYSCALL32_IMPL */
1232         const u_offset_t maxoff = MAXOFF32_T;
1233 #endif /* _SYSCALL32_IMPL */
1234 
1235         int in_crit = 0;
1236 
1237         if (iovcnt <= 0 || iovcnt > IOV_MAX)
1238                 return (set_errno(EINVAL));
1239 
1240         if (iovcnt > IOV_MAX_STACK) {
1241                 aiovlen = iovcnt * sizeof (iovec_t);
1242                 aiov = kmem_alloc(aiovlen, KM_SLEEP);
1243         }
1244 
1245 #ifdef _SYSCALL32_IMPL
1246         /*
1247          * 32-bit callers need to have their iovec expanded,
1248          * while ensuring that they can't move more than 2Gbytes
1249          * of data in a single call.
1250          */
1251         if (get_udatamodel() == DATAMODEL_ILP32) {
1252                 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
1253                 int aiov32len;


1284                         kmem_free(aiov32, aiov32len);
1285         } else
1286 #endif /* _SYSCALL32_IMPL */
1287                 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
1288                         if (aiovlen != 0)
1289                                 kmem_free(aiov, aiovlen);
1290                         return (set_errno(EFAULT));
1291                 }
1292 
1293         count = 0;
1294         for (i = 0; i < iovcnt; i++) {
1295                 ssize_t iovlen = aiov[i].iov_len;
1296                 count += iovlen;
1297                 if (iovlen < 0 || count < 0) {
1298                         if (aiovlen != 0)
1299                                 kmem_free(aiov, aiovlen);
1300                         return (set_errno(EINVAL));
1301                 }
1302         }
1303 
1304         if ((bcount = (ssize_t)count) < 0) {
1305                 if (aiovlen != 0)
1306                         kmem_free(aiov, aiovlen);
1307                 return (set_errno(EINVAL));
1308         }
1309         if ((fp = getf(fdes)) == NULL) {
1310                 if (aiovlen != 0)
1311                         kmem_free(aiov, aiovlen);
1312                 return (set_errno(EBADF));
1313         }
1314         if (((fflag = fp->f_flag) & FWRITE) == 0) {
1315                 error = EBADF;
1316                 goto out;
1317         }
1318         vp = fp->f_vnode;
1319         rwflag = 1;
1320         if (vp->v_type == VREG) {
1321 





1322                 if (bcount == 0)
1323                         goto out;
1324 
1325                 /*
1326                  * return EINVAL for offsets that cannot be
1327                  * represented in an off_t.
1328                  */
1329                 if (fileoff > maxoff) {
1330                         error = EINVAL;
1331                         goto out;
1332                 }

1333                 /*
1334                  * Take appropriate action if we are trying
1335                  * to write above the resource limit.
1336                  */
1337                 if (fileoff >= curproc->p_fsz_ctl) {
1338                         mutex_enter(&curproc->p_lock);
1339                         /*
1340                          * Return value ignored because it lists
1341                          * actions taken, but we are in an error case.
1342                          * We don't have any actions that depend on
1343                          * what could happen in this call, so we ignore
1344                          * the return value.
1345                          */
1346                         (void) rctl_action(
1347                             rctlproc_legacy[RLIMIT_FSIZE],
1348                             curproc->p_rctls, curproc,
1349                             RCA_UNSAFE_SIGINFO);
1350                         mutex_exit(&curproc->p_lock);
1351 
1352                         error = EFBIG;
1353                         goto out;
1354                 }
1355                 /*
1356                  * Don't allow pwritev to cause file sizes to exceed
1357                  * maxoff.
1358                  */
1359                 if (fileoff == maxoff) {
1360                         error = EFBIG;
1361                         goto out;
1362                 }
1363 
1364                 if (fileoff + bcount > maxoff)
1365                         bcount = (ssize_t)((u_offset_t)maxoff - fileoff);




1366         } else if (vp->v_type == VFIFO) {
1367                 error = ESPIPE;
1368                 goto out;
1369         }
1370         /*
1371          * We have to enter the critical region before calling VOP_RWLOCK
1372          * to avoid a deadlock with ufs.
1373          */
1374         if (nbl_need_check(vp)) {
1375                 int svmand;
1376 
1377                 nbl_start_crit(vp, RW_READER);
1378                 in_crit = 1;
1379                 error = nbl_svmand(vp, fp->f_cred, &svmand);
1380                 if (error != 0)
1381                         goto out;
1382                 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1383                     NULL)) {
1384                         error = EACCES;
1385                         goto out;
1386                 }
1387         }
1388 
1389         (void) VOP_RWLOCK(vp, rwflag, NULL);
1390 
1391 
1392         /*
1393          * Behaviour is same as write(2). Please see comments for
1394          * write(2).
1395          */
1396 
1397         if (vp->v_type == VREG) {
1398                 if (fileoff >= curproc->p_fsz_ctl) {
1399                         VOP_RWUNLOCK(vp, rwflag, NULL);
1400                         mutex_enter(&curproc->p_lock);
1401                         /* see above rctl_action comment */
1402                         (void) rctl_action(
1403                             rctlproc_legacy[RLIMIT_FSIZE],
1404                             curproc->p_rctls,
1405                             curproc, RCA_UNSAFE_SIGINFO);
1406                         mutex_exit(&curproc->p_lock);
1407                         error = EFBIG;
1408                         goto out;
1409                 }
1410                 if (fileoff >= OFFSET_MAX(fp)) {
1411                         VOP_RWUNLOCK(vp, rwflag, NULL);
1412                         error = EFBIG;
1413                         goto out;
1414                 }
1415                 if (fileoff + count > OFFSET_MAX(fp))
1416                         count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1417         }
1418 
1419         auio.uio_loffset = fileoff;
1420         auio.uio_iov = aiov;
1421         auio.uio_iovcnt = iovcnt;
1422         auio.uio_resid = bcount = count;
1423         auio.uio_segflg = UIO_USERSPACE;
1424         auio.uio_llimit = curproc->p_fsz_ctl;
1425         auio.uio_fmode = fflag;
1426         auio.uio_extflg = UIO_COPY_CACHED;
1427         ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1428         error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
1429         count -= auio.uio_resid;
1430         CPU_STATS_ENTER_K();
1431         cp = CPU;
1432         CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1433         CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
1434         CPU_STATS_EXIT_K();
1435         ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1436 
1437         VOP_RWUNLOCK(vp, rwflag, NULL);
1438 




   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright 2017, Joyent, Inc.
  26  */
  27 
  28 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /*        All Rights Reserved   */
  30 
  31 /*
  32  * Portions of this source code were derived from Berkeley 4.3 BSD
  33  * under license from the Regents of the University of California.
  34  */
  35 
  36 #include <sys/param.h>
  37 #include <sys/isa_defs.h>
  38 #include <sys/types.h>
  39 #include <sys/inttypes.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/cred.h>
  42 #include <sys/user.h>
  43 #include <sys/systm.h>
  44 #include <sys/errno.h>
  45 #include <sys/vnode.h>


 982         if (error)
 983                 return (set_errno(error));
 984         return (count);
 985 }
 986 
 987 ssize_t
 988 preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
 989     off_t extended_offset)
 990 {
 991         struct uio auio;
 992         struct iovec buf[IOV_MAX_STACK], *aiov = buf;
 993         int aiovlen = 0;
 994         file_t *fp;
 995         register vnode_t *vp;
 996         struct cpu *cp;
 997         int fflag, ioflag, rwflag;
 998         ssize_t count, bcount;
 999         int error = 0;
1000         int i;
1001 
1002         /*
1003          * In a 64-bit kernel, this interface supports native 64-bit
1004          * applications as well as 32-bit applications using both standard and
1005          * large-file access. For 32-bit large-file aware applications, the
1006          * offset is passed as two parameters which are joined into the actual
1007          * offset used. The 64-bit libc always passes 0 for the extended_offset.
1008          * Note that off_t is a signed value, but the preadv/pwritev API treats
1009          * the offset as a position in the file for the operation, so passing
1010          * a negative value will likely fail the maximum offset checks below
1011          * because we convert it to an unsigned value which will be larger than
1012          * the maximum valid offset.
1013          */
1014 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1015         u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1016             (u_offset_t)offset;
1017 #else /* _SYSCALL32_IMPL || _ILP32 */
1018         u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1019 #endif /* _SYSCALL32_IMPR || _ILP32 */







1020 
1021         int in_crit = 0;
1022 
1023         if (iovcnt <= 0 || iovcnt > IOV_MAX)
1024                 return (set_errno(EINVAL));
1025 
1026         if (iovcnt > IOV_MAX_STACK) {
1027                 aiovlen = iovcnt * sizeof (iovec_t);
1028                 aiov = kmem_alloc(aiovlen, KM_SLEEP);
1029         }
1030 
1031 #ifdef _SYSCALL32_IMPL
1032         /*
1033          * 32-bit callers need to have their iovec expanded,
1034          * while ensuring that they can't move more than 2Gbytes
1035          * of data in a single call.
1036          */
1037         if (get_udatamodel() == DATAMODEL_ILP32) {
1038                 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
1039                 int aiov32len;


1070                         kmem_free(aiov32, aiov32len);
1071         } else
1072 #endif /* _SYSCALL32_IMPL */
1073                 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
1074                         if (aiovlen != 0)
1075                                 kmem_free(aiov, aiovlen);
1076                         return (set_errno(EFAULT));
1077                 }
1078 
1079         count = 0;
1080         for (i = 0; i < iovcnt; i++) {
1081                 ssize_t iovlen = aiov[i].iov_len;
1082                 count += iovlen;
1083                 if (iovlen < 0 || count < 0) {
1084                         if (aiovlen != 0)
1085                                 kmem_free(aiov, aiovlen);
1086                         return (set_errno(EINVAL));
1087                 }
1088         }
1089 
1090         if ((bcount = count) < 0) {
1091                 if (aiovlen != 0)
1092                         kmem_free(aiov, aiovlen);
1093                 return (set_errno(EINVAL));
1094         }
1095         if ((fp = getf(fdes)) == NULL) {
1096                 if (aiovlen != 0)
1097                         kmem_free(aiov, aiovlen);
1098                 return (set_errno(EBADF));
1099         }
1100         if (((fflag = fp->f_flag) & FREAD) == 0) {
1101                 error = EBADF;
1102                 goto out;
1103         }
1104         vp = fp->f_vnode;
1105         rwflag = 0;

1106 
1107         /*
1108          * Behaviour is same as read(2). Please see comments in read above.
1109          */
1110         if (vp->v_type == VREG) {
1111                 if (bcount == 0)
1112                         goto out;
1113 
1114                 /* Handle offset past maximum offset allowed for file. */
1115                 if (fileoff >= OFFSET_MAX(fp)) {
1116                         struct vattr va;
1117                         va.va_mask = AT_SIZE;
1118 
1119                         error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL);
1120                         if (error == 0)  {
1121                                 if (fileoff >= va.va_size) {
1122                                         count = 0;
1123                                 } else {
1124                                         error = EOVERFLOW;
1125                                 }
1126                         }
1127                         goto out;
1128                 }
1129 
1130                 ASSERT(bcount == count);
1131 
1132                 /* Note: modified count used in nbl_conflict() call below. */
1133                 if ((fileoff + count) > OFFSET_MAX(fp))
1134                         count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1135 
1136         } else if (vp->v_type == VFIFO) {
1137                 error = ESPIPE;
1138                 goto out;
1139         }
1140         /*
1141          * We have to enter the critical region before calling VOP_RWLOCK
1142          * to avoid a deadlock with ufs.
1143          */
1144         if (nbl_need_check(vp)) {
1145                 int svmand;
1146 
1147                 nbl_start_crit(vp, RW_READER);
1148                 in_crit = 1;
1149                 error = nbl_svmand(vp, fp->f_cred, &svmand);
1150                 if (error != 0)
1151                         goto out;
1152                 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, NULL)) {

1153                         error = EACCES;
1154                         goto out;
1155                 }
1156         }
1157 
1158         (void) VOP_RWLOCK(vp, rwflag, NULL);
1159 



























1160         auio.uio_loffset = fileoff;
1161         auio.uio_iov = aiov;
1162         auio.uio_iovcnt = iovcnt;
1163         auio.uio_resid = bcount = count;
1164         auio.uio_segflg = UIO_USERSPACE;
1165         auio.uio_llimit = MAXOFFSET_T;
1166         auio.uio_fmode = fflag;
1167         if (bcount <= copyout_max_cached)
1168                 auio.uio_extflg = UIO_COPY_CACHED;
1169         else
1170                 auio.uio_extflg = UIO_COPY_DEFAULT;
1171 
1172         ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1173         error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1174         count -= auio.uio_resid;
1175         CPU_STATS_ENTER_K();
1176         cp = CPU;
1177         CPU_STATS_ADDQ(cp, sys, sysread, 1);
1178         CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
1179         CPU_STATS_EXIT_K();


1192         if (error)
1193                 return (set_errno(error));
1194         return (count);
1195 }
1196 
1197 ssize_t
1198 pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
1199     off_t extended_offset)
1200 {
1201         struct uio auio;
1202         struct iovec buf[IOV_MAX_STACK], *aiov = buf;
1203         int aiovlen = 0;
1204         file_t *fp;
1205         register vnode_t *vp;
1206         struct cpu *cp;
1207         int fflag, ioflag, rwflag;
1208         ssize_t count, bcount;
1209         int error = 0;
1210         int i;
1211 
1212         /*
1213          * See the comment in preadv for how the offset is handled.
1214          */
1215 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1216         u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1217             (u_offset_t)offset;
1218 #else /* _SYSCALL32_IMPL || _ILP32 */
1219         u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1220 #endif /* _SYSCALL32_IMPR || _ILP32 */







1221 
1222         int in_crit = 0;
1223 
1224         if (iovcnt <= 0 || iovcnt > IOV_MAX)
1225                 return (set_errno(EINVAL));
1226 
1227         if (iovcnt > IOV_MAX_STACK) {
1228                 aiovlen = iovcnt * sizeof (iovec_t);
1229                 aiov = kmem_alloc(aiovlen, KM_SLEEP);
1230         }
1231 
1232 #ifdef _SYSCALL32_IMPL
1233         /*
1234          * 32-bit callers need to have their iovec expanded,
1235          * while ensuring that they can't move more than 2Gbytes
1236          * of data in a single call.
1237          */
1238         if (get_udatamodel() == DATAMODEL_ILP32) {
1239                 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
1240                 int aiov32len;


1271                         kmem_free(aiov32, aiov32len);
1272         } else
1273 #endif /* _SYSCALL32_IMPL */
1274                 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
1275                         if (aiovlen != 0)
1276                                 kmem_free(aiov, aiovlen);
1277                         return (set_errno(EFAULT));
1278                 }
1279 
1280         count = 0;
1281         for (i = 0; i < iovcnt; i++) {
1282                 ssize_t iovlen = aiov[i].iov_len;
1283                 count += iovlen;
1284                 if (iovlen < 0 || count < 0) {
1285                         if (aiovlen != 0)
1286                                 kmem_free(aiov, aiovlen);
1287                         return (set_errno(EINVAL));
1288                 }
1289         }
1290 
1291         if ((bcount = count) < 0) {
1292                 if (aiovlen != 0)
1293                         kmem_free(aiov, aiovlen);
1294                 return (set_errno(EINVAL));
1295         }
1296         if ((fp = getf(fdes)) == NULL) {
1297                 if (aiovlen != 0)
1298                         kmem_free(aiov, aiovlen);
1299                 return (set_errno(EBADF));
1300         }
1301         if (((fflag = fp->f_flag) & FWRITE) == 0) {
1302                 error = EBADF;
1303                 goto out;
1304         }
1305         vp = fp->f_vnode;
1306         rwflag = 1;

1307 
1308         /*
1309          * The kernel's write(2) code checks OFFSET_MAX and the rctl, and
1310          * returns EFBIG when fileoff exceeds either limit. We do the same.
1311          */
1312         if (vp->v_type == VREG) {
1313                 if (bcount == 0)
1314                         goto out;
1315 
1316                 /*
1317                  * Don't allow pwritev to cause file size to exceed the proper
1318                  * offset limit.
1319                  */
1320                 if (fileoff >= OFFSET_MAX(fp)) {
1321                         error = EFBIG;
1322                         goto out;
1323                 }
1324 
1325                 /*
1326                  * Take appropriate action if we are trying
1327                  * to write above the resource limit.
1328                  */
1329                 if (fileoff >= curproc->p_fsz_ctl) {
1330                         mutex_enter(&curproc->p_lock);
1331                         /*
1332                          * Return value ignored because it lists
1333                          * actions taken, but we are in an error case.
1334                          * We don't have any actions that depend on
1335                          * what could happen in this call, so we ignore
1336                          * the return value.
1337                          */
1338                         (void) rctl_action(
1339                             rctlproc_legacy[RLIMIT_FSIZE],
1340                             curproc->p_rctls, curproc,
1341                             RCA_UNSAFE_SIGINFO);
1342                         mutex_exit(&curproc->p_lock);
1343 
1344                         error = EFBIG;
1345                         goto out;
1346                 }








1347 
1348                 ASSERT(bcount == count);
1349 
1350                 /* Note: modified count used in nbl_conflict() call below. */
1351                 if ((fileoff + count) > OFFSET_MAX(fp))
1352                         count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1353 
1354         } else if (vp->v_type == VFIFO) {
1355                 error = ESPIPE;
1356                 goto out;
1357         }
1358         /*
1359          * We have to enter the critical region before calling VOP_RWLOCK
1360          * to avoid a deadlock with ufs.
1361          */
1362         if (nbl_need_check(vp)) {
1363                 int svmand;
1364 
1365                 nbl_start_crit(vp, RW_READER);
1366                 in_crit = 1;
1367                 error = nbl_svmand(vp, fp->f_cred, &svmand);
1368                 if (error != 0)
1369                         goto out;
1370                 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, NULL)) {

1371                         error = EACCES;
1372                         goto out;
1373                 }
1374         }
1375 
1376         (void) VOP_RWLOCK(vp, rwflag, NULL);
1377 




























1378         auio.uio_loffset = fileoff;
1379         auio.uio_iov = aiov;
1380         auio.uio_iovcnt = iovcnt;
1381         auio.uio_resid = bcount = count;
1382         auio.uio_segflg = UIO_USERSPACE;
1383         auio.uio_llimit = curproc->p_fsz_ctl;
1384         auio.uio_fmode = fflag;
1385         auio.uio_extflg = UIO_COPY_CACHED;
1386         ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1387         error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
1388         count -= auio.uio_resid;
1389         CPU_STATS_ENTER_K();
1390         cp = CPU;
1391         CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1392         CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
1393         CPU_STATS_EXIT_K();
1394         ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1395 
1396         VOP_RWUNLOCK(vp, rwflag, NULL);
1397