Print this page
12365 pwritev64 can't write at offsets between [2 GiB, 4 GiB)
Portions contributed by: John Levon <john.levon@joyent.com>
Reviewed by: Patrick Mooney <patrick.mooney@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/syscall/rw.c
          +++ new/usr/src/uts/common/syscall/rw.c
↓ open down ↓ 14 lines elided ↑ open up ↑
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25      - * Copyright 2015, Joyent, Inc.  All rights reserved.
       25 + * Copyright 2017, Joyent, Inc.
  26   26   */
  27   27  
  28   28  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  29      -/*        All Rights Reserved   */
       29 +/*        All Rights Reserved   */
  30   30  
  31   31  /*
  32   32   * Portions of this source code were derived from Berkeley 4.3 BSD
  33   33   * under license from the Regents of the University of California.
  34   34   */
  35   35  
  36   36  #include <sys/param.h>
  37   37  #include <sys/isa_defs.h>
  38   38  #include <sys/types.h>
  39   39  #include <sys/inttypes.h>
↓ open down ↓ 952 lines elided ↑ open up ↑
 992  992          struct iovec buf[IOV_MAX_STACK], *aiov = buf;
 993  993          int aiovlen = 0;
 994  994          file_t *fp;
 995  995          register vnode_t *vp;
 996  996          struct cpu *cp;
 997  997          int fflag, ioflag, rwflag;
 998  998          ssize_t count, bcount;
 999  999          int error = 0;
1000 1000          int i;
1001 1001  
     1002 +        /*
     1003 +         * In a 64-bit kernel, this interface supports native 64-bit
     1004 +         * applications as well as 32-bit applications using both standard and
     1005 +         * large-file access. For 32-bit large-file aware applications, the
     1006 +         * offset is passed as two parameters which are joined into the actual
     1007 +         * offset used. The 64-bit libc always passes 0 for the extended_offset.
     1008 +         * Note that off_t is a signed value, but the preadv/pwritev API treats
     1009 +         * the offset as a position in the file for the operation, so passing
     1010 +         * a negative value will likely fail the maximum offset checks below
     1011 +         * because we convert it to an unsigned value which will be larger than
     1012 +         * the maximum valid offset.
     1013 +         */
1002 1014  #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1003 1015          u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1004 1016              (u_offset_t)offset;
1005 1017  #else /* _SYSCALL32_IMPL || _ILP32 */
1006 1018          u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1007 1019  #endif /* _SYSCALL32_IMPR || _ILP32 */
1008      -#ifdef _SYSCALL32_IMPL
1009      -        const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
1010      -            extended_offset == 0?
1011      -            MAXOFF32_T : MAXOFFSET_T;
1012      -#else /* _SYSCALL32_IMPL */
1013      -        const u_offset_t maxoff = MAXOFF32_T;
1014      -#endif /* _SYSCALL32_IMPL */
1015 1020  
1016 1021          int in_crit = 0;
1017 1022  
1018 1023          if (iovcnt <= 0 || iovcnt > IOV_MAX)
1019 1024                  return (set_errno(EINVAL));
1020 1025  
1021 1026          if (iovcnt > IOV_MAX_STACK) {
1022 1027                  aiovlen = iovcnt * sizeof (iovec_t);
1023 1028                  aiov = kmem_alloc(aiovlen, KM_SLEEP);
1024 1029          }
↓ open down ↓ 50 lines elided ↑ open up ↑
1075 1080          for (i = 0; i < iovcnt; i++) {
1076 1081                  ssize_t iovlen = aiov[i].iov_len;
1077 1082                  count += iovlen;
1078 1083                  if (iovlen < 0 || count < 0) {
1079 1084                          if (aiovlen != 0)
1080 1085                                  kmem_free(aiov, aiovlen);
1081 1086                          return (set_errno(EINVAL));
1082 1087                  }
1083 1088          }
1084 1089  
1085      -        if ((bcount = (ssize_t)count) < 0) {
     1090 +        if ((bcount = count) < 0) {
1086 1091                  if (aiovlen != 0)
1087 1092                          kmem_free(aiov, aiovlen);
1088 1093                  return (set_errno(EINVAL));
1089 1094          }
1090 1095          if ((fp = getf(fdes)) == NULL) {
1091 1096                  if (aiovlen != 0)
1092 1097                          kmem_free(aiov, aiovlen);
1093 1098                  return (set_errno(EBADF));
1094 1099          }
1095 1100          if (((fflag = fp->f_flag) & FREAD) == 0) {
1096 1101                  error = EBADF;
1097 1102                  goto out;
1098 1103          }
1099 1104          vp = fp->f_vnode;
1100 1105          rwflag = 0;
1101      -        if (vp->v_type == VREG) {
1102 1106  
     1107 +        /*
     1108 +         * Behaviour is same as read(2). Please see comments in read above.
     1109 +         */
     1110 +        if (vp->v_type == VREG) {
1103 1111                  if (bcount == 0)
1104 1112                          goto out;
1105 1113  
1106      -                /*
1107      -                 * return EINVAL for offsets that cannot be
1108      -                 * represented in an off_t.
1109      -                 */
1110      -                if (fileoff > maxoff) {
1111      -                        error = EINVAL;
     1114 +                /* Handle offset past maximum offset allowed for file. */
     1115 +                if (fileoff >= OFFSET_MAX(fp)) {
     1116 +                        struct vattr va;
     1117 +                        va.va_mask = AT_SIZE;
     1118 +
     1119 +                        error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL);
     1120 +                        if (error == 0)  {
     1121 +                                if (fileoff >= va.va_size) {
     1122 +                                        count = 0;
     1123 +                                } else {
     1124 +                                        error = EOVERFLOW;
     1125 +                                }
     1126 +                        }
1112 1127                          goto out;
1113 1128                  }
1114 1129  
1115      -                if (fileoff + bcount > maxoff)
1116      -                        bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
     1130 +                ASSERT(bcount == count);
     1131 +
     1132 +                /* Note: modified count used in nbl_conflict() call below. */
     1133 +                if ((fileoff + count) > OFFSET_MAX(fp))
     1134 +                        count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
     1135 +
1117 1136          } else if (vp->v_type == VFIFO) {
1118 1137                  error = ESPIPE;
1119 1138                  goto out;
1120 1139          }
1121 1140          /*
1122 1141           * We have to enter the critical region before calling VOP_RWLOCK
1123 1142           * to avoid a deadlock with ufs.
1124 1143           */
1125 1144          if (nbl_need_check(vp)) {
1126 1145                  int svmand;
1127 1146  
1128 1147                  nbl_start_crit(vp, RW_READER);
1129 1148                  in_crit = 1;
1130 1149                  error = nbl_svmand(vp, fp->f_cred, &svmand);
1131 1150                  if (error != 0)
1132 1151                          goto out;
1133      -                if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1134      -                    NULL)) {
     1152 +                if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, NULL)) {
1135 1153                          error = EACCES;
1136 1154                          goto out;
1137 1155                  }
1138 1156          }
1139 1157  
1140 1158          (void) VOP_RWLOCK(vp, rwflag, NULL);
1141 1159  
1142      -        /*
1143      -         * Behaviour is same as read(2). Please see comments in
1144      -         * read(2).
1145      -         */
1146      -
1147      -        if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
1148      -                struct vattr va;
1149      -                va.va_mask = AT_SIZE;
1150      -                if ((error =
1151      -                    VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL)))  {
1152      -                        VOP_RWUNLOCK(vp, rwflag, NULL);
1153      -                        goto out;
1154      -                }
1155      -                if (fileoff >= va.va_size) {
1156      -                        VOP_RWUNLOCK(vp, rwflag, NULL);
1157      -                        count = 0;
1158      -                        goto out;
1159      -                } else {
1160      -                        VOP_RWUNLOCK(vp, rwflag, NULL);
1161      -                        error = EOVERFLOW;
1162      -                        goto out;
1163      -                }
1164      -        }
1165      -        if ((vp->v_type == VREG) &&
1166      -            (fileoff + count > OFFSET_MAX(fp))) {
1167      -                count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1168      -        }
1169 1160          auio.uio_loffset = fileoff;
1170 1161          auio.uio_iov = aiov;
1171 1162          auio.uio_iovcnt = iovcnt;
1172 1163          auio.uio_resid = bcount = count;
1173 1164          auio.uio_segflg = UIO_USERSPACE;
1174 1165          auio.uio_llimit = MAXOFFSET_T;
1175 1166          auio.uio_fmode = fflag;
1176 1167          if (bcount <= copyout_max_cached)
1177 1168                  auio.uio_extflg = UIO_COPY_CACHED;
1178 1169          else
↓ open down ↓ 32 lines elided ↑ open up ↑
1211 1202          struct iovec buf[IOV_MAX_STACK], *aiov = buf;
1212 1203          int aiovlen = 0;
1213 1204          file_t *fp;
1214 1205          register vnode_t *vp;
1215 1206          struct cpu *cp;
1216 1207          int fflag, ioflag, rwflag;
1217 1208          ssize_t count, bcount;
1218 1209          int error = 0;
1219 1210          int i;
1220 1211  
     1212 +        /*
     1213 +         * See the comment in preadv for how the offset is handled.
     1214 +         */
1221 1215  #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1222 1216          u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1223 1217              (u_offset_t)offset;
1224 1218  #else /* _SYSCALL32_IMPL || _ILP32 */
1225 1219          u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1226 1220  #endif /* _SYSCALL32_IMPR || _ILP32 */
1227      -#ifdef _SYSCALL32_IMPL
1228      -        const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
1229      -            extended_offset == 0?
1230      -            MAXOFF32_T : MAXOFFSET_T;
1231      -#else /* _SYSCALL32_IMPL */
1232      -        const u_offset_t maxoff = MAXOFF32_T;
1233      -#endif /* _SYSCALL32_IMPL */
1234 1221  
1235 1222          int in_crit = 0;
1236 1223  
1237 1224          if (iovcnt <= 0 || iovcnt > IOV_MAX)
1238 1225                  return (set_errno(EINVAL));
1239 1226  
1240 1227          if (iovcnt > IOV_MAX_STACK) {
1241 1228                  aiovlen = iovcnt * sizeof (iovec_t);
1242 1229                  aiov = kmem_alloc(aiovlen, KM_SLEEP);
1243 1230          }
↓ open down ↓ 50 lines elided ↑ open up ↑
1294 1281          for (i = 0; i < iovcnt; i++) {
1295 1282                  ssize_t iovlen = aiov[i].iov_len;
1296 1283                  count += iovlen;
1297 1284                  if (iovlen < 0 || count < 0) {
1298 1285                          if (aiovlen != 0)
1299 1286                                  kmem_free(aiov, aiovlen);
1300 1287                          return (set_errno(EINVAL));
1301 1288                  }
1302 1289          }
1303 1290  
1304      -        if ((bcount = (ssize_t)count) < 0) {
     1291 +        if ((bcount = count) < 0) {
1305 1292                  if (aiovlen != 0)
1306 1293                          kmem_free(aiov, aiovlen);
1307 1294                  return (set_errno(EINVAL));
1308 1295          }
1309 1296          if ((fp = getf(fdes)) == NULL) {
1310 1297                  if (aiovlen != 0)
1311 1298                          kmem_free(aiov, aiovlen);
1312 1299                  return (set_errno(EBADF));
1313 1300          }
1314 1301          if (((fflag = fp->f_flag) & FWRITE) == 0) {
1315 1302                  error = EBADF;
1316 1303                  goto out;
1317 1304          }
1318 1305          vp = fp->f_vnode;
1319 1306          rwflag = 1;
1320      -        if (vp->v_type == VREG) {
1321 1307  
     1308 +        /*
     1309 +         * The kernel's write(2) code checks OFFSET_MAX and the rctl, and
     1310 +         * returns EFBIG when fileoff exceeds either limit. We do the same.
     1311 +         */
     1312 +        if (vp->v_type == VREG) {
1322 1313                  if (bcount == 0)
1323 1314                          goto out;
1324 1315  
1325 1316                  /*
1326      -                 * return EINVAL for offsets that cannot be
1327      -                 * represented in an off_t.
     1317 +                 * Don't allow pwritev to cause file size to exceed the proper
     1318 +                 * offset limit.
1328 1319                   */
1329      -                if (fileoff > maxoff) {
1330      -                        error = EINVAL;
     1320 +                if (fileoff >= OFFSET_MAX(fp)) {
     1321 +                        error = EFBIG;
1331 1322                          goto out;
1332 1323                  }
     1324 +
1333 1325                  /*
1334 1326                   * Take appropriate action if we are trying
1335 1327                   * to write above the resource limit.
1336 1328                   */
1337 1329                  if (fileoff >= curproc->p_fsz_ctl) {
1338 1330                          mutex_enter(&curproc->p_lock);
1339 1331                          /*
1340 1332                           * Return value ignored because it lists
1341 1333                           * actions taken, but we are in an error case.
1342 1334                           * We don't have any actions that depend on
↓ open down ↓ 2 lines elided ↑ open up ↑
1345 1337                           */
1346 1338                          (void) rctl_action(
1347 1339                              rctlproc_legacy[RLIMIT_FSIZE],
1348 1340                              curproc->p_rctls, curproc,
1349 1341                              RCA_UNSAFE_SIGINFO);
1350 1342                          mutex_exit(&curproc->p_lock);
1351 1343  
1352 1344                          error = EFBIG;
1353 1345                          goto out;
1354 1346                  }
1355      -                /*
1356      -                 * Don't allow pwritev to cause file sizes to exceed
1357      -                 * maxoff.
1358      -                 */
1359      -                if (fileoff == maxoff) {
1360      -                        error = EFBIG;
1361      -                        goto out;
1362      -                }
1363 1347  
1364      -                if (fileoff + bcount > maxoff)
1365      -                        bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
     1348 +                ASSERT(bcount == count);
     1349 +
     1350 +                /* Note: modified count used in nbl_conflict() call below. */
     1351 +                if ((fileoff + count) > OFFSET_MAX(fp))
     1352 +                        count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
     1353 +
1366 1354          } else if (vp->v_type == VFIFO) {
1367 1355                  error = ESPIPE;
1368 1356                  goto out;
1369 1357          }
1370 1358          /*
1371 1359           * We have to enter the critical region before calling VOP_RWLOCK
1372 1360           * to avoid a deadlock with ufs.
1373 1361           */
1374 1362          if (nbl_need_check(vp)) {
1375 1363                  int svmand;
1376 1364  
1377 1365                  nbl_start_crit(vp, RW_READER);
1378 1366                  in_crit = 1;
1379 1367                  error = nbl_svmand(vp, fp->f_cred, &svmand);
1380 1368                  if (error != 0)
1381 1369                          goto out;
1382      -                if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1383      -                    NULL)) {
     1370 +                if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, NULL)) {
1384 1371                          error = EACCES;
1385 1372                          goto out;
1386 1373                  }
1387 1374          }
1388 1375  
1389 1376          (void) VOP_RWLOCK(vp, rwflag, NULL);
1390 1377  
1391      -
1392      -        /*
1393      -         * Behaviour is same as write(2). Please see comments for
1394      -         * write(2).
1395      -         */
1396      -
1397      -        if (vp->v_type == VREG) {
1398      -                if (fileoff >= curproc->p_fsz_ctl) {
1399      -                        VOP_RWUNLOCK(vp, rwflag, NULL);
1400      -                        mutex_enter(&curproc->p_lock);
1401      -                        /* see above rctl_action comment */
1402      -                        (void) rctl_action(
1403      -                            rctlproc_legacy[RLIMIT_FSIZE],
1404      -                            curproc->p_rctls,
1405      -                            curproc, RCA_UNSAFE_SIGINFO);
1406      -                        mutex_exit(&curproc->p_lock);
1407      -                        error = EFBIG;
1408      -                        goto out;
1409      -                }
1410      -                if (fileoff >= OFFSET_MAX(fp)) {
1411      -                        VOP_RWUNLOCK(vp, rwflag, NULL);
1412      -                        error = EFBIG;
1413      -                        goto out;
1414      -                }
1415      -                if (fileoff + count > OFFSET_MAX(fp))
1416      -                        count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1417      -        }
1418      -
1419 1378          auio.uio_loffset = fileoff;
1420 1379          auio.uio_iov = aiov;
1421 1380          auio.uio_iovcnt = iovcnt;
1422 1381          auio.uio_resid = bcount = count;
1423 1382          auio.uio_segflg = UIO_USERSPACE;
1424 1383          auio.uio_llimit = curproc->p_fsz_ctl;
1425 1384          auio.uio_fmode = fflag;
1426 1385          auio.uio_extflg = UIO_COPY_CACHED;
1427 1386          ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1428 1387          error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
↓ open down ↓ 341 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX