1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2017, Joyent, Inc. 26 */ 27 28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 /* 32 * Portions of this source code were derived from Berkeley 4.3 BSD 33 * under license from the Regents of the University of California. 34 */ 35 36 #include <sys/param.h> 37 #include <sys/isa_defs.h> 38 #include <sys/types.h> 39 #include <sys/inttypes.h> 40 #include <sys/sysmacros.h> 41 #include <sys/cred.h> 42 #include <sys/user.h> 43 #include <sys/systm.h> 44 #include <sys/errno.h> 45 #include <sys/vnode.h> 46 #include <sys/file.h> 47 #include <sys/proc.h> 48 #include <sys/cpuvar.h> 49 #include <sys/uio.h> 50 #include <sys/debug.h> 51 #include <sys/rctl.h> 52 #include <sys/nbmlock.h> 53 #include <sys/limits.h> 54 55 #define COPYOUT_MAX_CACHE (1<<17) /* 128K */ 56 57 size_t copyout_max_cached = COPYOUT_MAX_CACHE; /* global so it's patchable */ 58 59 /* 60 * read, write, pread, pwrite, readv, and writev syscalls. 61 * 62 * 64-bit open: all open's are large file opens. 63 * Large Files: the behaviour of read depends on whether the fd 64 * corresponds to large open or not. 65 * 32-bit open: FOFFMAX flag not set. 66 * read until MAXOFF32_T - 1 and read at MAXOFF32_T returns 67 * EOVERFLOW if count is non-zero and if size of file 68 * is > MAXOFF32_T. If size of file is <= MAXOFF32_T read 69 * at >= MAXOFF32_T returns EOF. 70 */ 71 72 /* 73 * Native system call 74 */ 75 ssize_t 76 read(int fdes, void *cbuf, size_t count) 77 { 78 struct uio auio; 79 struct iovec aiov; 80 file_t *fp; 81 register vnode_t *vp; 82 struct cpu *cp; 83 int fflag, ioflag, rwflag; 84 ssize_t cnt, bcount; 85 int error = 0; 86 u_offset_t fileoff; 87 int in_crit = 0; 88 89 if ((cnt = (ssize_t)count) < 0) 90 return (set_errno(EINVAL)); 91 if ((fp = getf(fdes)) == NULL) 92 return (set_errno(EBADF)); 93 if (((fflag = fp->f_flag) & FREAD) == 0) { 94 error = EBADF; 95 goto out; 96 } 97 vp = fp->f_vnode; 98 99 if (vp->v_type == VREG && cnt == 0) { 100 goto out; 101 } 102 103 rwflag = 0; 104 aiov.iov_base = cbuf; 105 aiov.iov_len = cnt; 106 107 /* 108 * We have to enter the critical region before calling VOP_RWLOCK 109 * to avoid a deadlock with write() calls. 110 */ 111 if (nbl_need_check(vp)) { 112 int svmand; 113 114 nbl_start_crit(vp, RW_READER); 115 in_crit = 1; 116 error = nbl_svmand(vp, fp->f_cred, &svmand); 117 if (error != 0) 118 goto out; 119 if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand, 120 NULL)) { 121 error = EACCES; 122 goto out; 123 } 124 } 125 126 (void) VOP_RWLOCK(vp, rwflag, NULL); 127 128 /* 129 * We do the following checks inside VOP_RWLOCK so as to 130 * prevent file size from changing while these checks are 131 * being done. Also, we load fp's offset to the local 132 * variable fileoff because we can have a parallel lseek 133 * going on (f_offset is not protected by any lock) which 134 * could change f_offset. We need to see the value only 135 * once here and take a decision. Seeing it more than once 136 * can lead to incorrect functionality. 137 */ 138 139 fileoff = (u_offset_t)fp->f_offset; 140 if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) { 141 struct vattr va; 142 va.va_mask = AT_SIZE; 143 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 144 VOP_RWUNLOCK(vp, rwflag, NULL); 145 goto out; 146 } 147 if (fileoff >= va.va_size) { 148 cnt = 0; 149 VOP_RWUNLOCK(vp, rwflag, NULL); 150 goto out; 151 } else { 152 error = EOVERFLOW; 153 VOP_RWUNLOCK(vp, rwflag, NULL); 154 goto out; 155 } 156 } 157 if ((vp->v_type == VREG) && 158 (fileoff + cnt > OFFSET_MAX(fp))) { 159 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 160 } 161 auio.uio_loffset = fileoff; 162 auio.uio_iov = &aiov; 163 auio.uio_iovcnt = 1; 164 auio.uio_resid = bcount = cnt; 165 auio.uio_segflg = UIO_USERSPACE; 166 auio.uio_llimit = MAXOFFSET_T; 167 auio.uio_fmode = fflag; 168 /* 169 * Only use bypass caches when the count is large enough 170 */ 171 if (bcount <= copyout_max_cached) 172 auio.uio_extflg = UIO_COPY_CACHED; 173 else 174 auio.uio_extflg = UIO_COPY_DEFAULT; 175 176 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 177 178 /* If read sync is not asked for, filter sync flags */ 179 if ((ioflag & FRSYNC) == 0) 180 ioflag &= ~(FSYNC|FDSYNC); 181 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 182 cnt -= auio.uio_resid; 183 CPU_STATS_ENTER_K(); 184 cp = CPU; 185 CPU_STATS_ADDQ(cp, sys, sysread, 1); 186 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt); 187 CPU_STATS_EXIT_K(); 188 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 189 190 if (vp->v_type == VFIFO) /* Backward compatibility */ 191 fp->f_offset = cnt; 192 else if (((fp->f_flag & FAPPEND) == 0) || 193 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 194 fp->f_offset = auio.uio_loffset; 195 VOP_RWUNLOCK(vp, rwflag, NULL); 196 197 if (error == EINTR && cnt != 0) 198 error = 0; 199 out: 200 if (in_crit) 201 nbl_end_crit(vp); 202 releasef(fdes); 203 if (error) 204 return (set_errno(error)); 205 return (cnt); 206 } 207 208 /* 209 * Native system call 210 */ 211 ssize_t 212 write(int fdes, void *cbuf, size_t count) 213 { 214 struct uio auio; 215 struct iovec aiov; 216 file_t *fp; 217 register vnode_t *vp; 218 struct cpu *cp; 219 int fflag, ioflag, rwflag; 220 ssize_t cnt, bcount; 221 int error = 0; 222 u_offset_t fileoff; 223 int in_crit = 0; 224 225 if ((cnt = (ssize_t)count) < 0) 226 return (set_errno(EINVAL)); 227 if ((fp = getf(fdes)) == NULL) 228 return (set_errno(EBADF)); 229 if (((fflag = fp->f_flag) & FWRITE) == 0) { 230 error = EBADF; 231 goto out; 232 } 233 vp = fp->f_vnode; 234 235 if (vp->v_type == VREG && cnt == 0) { 236 goto out; 237 } 238 239 rwflag = 1; 240 aiov.iov_base = cbuf; 241 aiov.iov_len = cnt; 242 243 /* 244 * We have to enter the critical region before calling VOP_RWLOCK 245 * to avoid a deadlock with ufs. 246 */ 247 if (nbl_need_check(vp)) { 248 int svmand; 249 250 nbl_start_crit(vp, RW_READER); 251 in_crit = 1; 252 error = nbl_svmand(vp, fp->f_cred, &svmand); 253 if (error != 0) 254 goto out; 255 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand, 256 NULL)) { 257 error = EACCES; 258 goto out; 259 } 260 } 261 262 (void) VOP_RWLOCK(vp, rwflag, NULL); 263 264 fileoff = fp->f_offset; 265 if (vp->v_type == VREG) { 266 267 /* 268 * We raise psignal if write for >0 bytes causes 269 * it to exceed the ulimit. 270 */ 271 if (fileoff >= curproc->p_fsz_ctl) { 272 VOP_RWUNLOCK(vp, rwflag, NULL); 273 274 mutex_enter(&curproc->p_lock); 275 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 276 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 277 mutex_exit(&curproc->p_lock); 278 279 error = EFBIG; 280 goto out; 281 } 282 /* 283 * We return EFBIG if write is done at an offset 284 * greater than the offset maximum for this file structure. 285 */ 286 287 if (fileoff >= OFFSET_MAX(fp)) { 288 VOP_RWUNLOCK(vp, rwflag, NULL); 289 error = EFBIG; 290 goto out; 291 } 292 /* 293 * Limit the bytes to be written upto offset maximum for 294 * this open file structure. 295 */ 296 if (fileoff + cnt > OFFSET_MAX(fp)) 297 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff); 298 } 299 auio.uio_loffset = fileoff; 300 auio.uio_iov = &aiov; 301 auio.uio_iovcnt = 1; 302 auio.uio_resid = bcount = cnt; 303 auio.uio_segflg = UIO_USERSPACE; 304 auio.uio_llimit = curproc->p_fsz_ctl; 305 auio.uio_fmode = fflag; 306 auio.uio_extflg = UIO_COPY_DEFAULT; 307 308 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 309 310 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 311 cnt -= auio.uio_resid; 312 CPU_STATS_ENTER_K(); 313 cp = CPU; 314 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 315 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt); 316 CPU_STATS_EXIT_K(); 317 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt; 318 319 if (vp->v_type == VFIFO) /* Backward compatibility */ 320 fp->f_offset = cnt; 321 else if (((fp->f_flag & FAPPEND) == 0) || 322 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 323 fp->f_offset = auio.uio_loffset; 324 VOP_RWUNLOCK(vp, rwflag, NULL); 325 326 if (error == EINTR && cnt != 0) 327 error = 0; 328 out: 329 if (in_crit) 330 nbl_end_crit(vp); 331 releasef(fdes); 332 if (error) 333 return (set_errno(error)); 334 return (cnt); 335 } 336 337 ssize_t 338 pread(int fdes, void *cbuf, size_t count, off_t offset) 339 { 340 struct uio auio; 341 struct iovec aiov; 342 file_t *fp; 343 register vnode_t *vp; 344 struct cpu *cp; 345 int fflag, ioflag, rwflag; 346 ssize_t bcount; 347 int error = 0; 348 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 349 #ifdef _SYSCALL32_IMPL 350 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 351 MAXOFF32_T : MAXOFFSET_T; 352 #else 353 const u_offset_t maxoff = MAXOFF32_T; 354 #endif 355 int in_crit = 0; 356 357 if ((bcount = (ssize_t)count) < 0) 358 return (set_errno(EINVAL)); 359 360 if ((fp = getf(fdes)) == NULL) 361 return (set_errno(EBADF)); 362 if (((fflag = fp->f_flag) & (FREAD)) == 0) { 363 error = EBADF; 364 goto out; 365 } 366 367 rwflag = 0; 368 vp = fp->f_vnode; 369 370 if (vp->v_type == VREG) { 371 372 if (bcount == 0) 373 goto out; 374 375 /* 376 * Return EINVAL if an invalid offset comes to pread. 377 * Negative offset from user will cause this error. 378 */ 379 380 if (fileoff > maxoff) { 381 error = EINVAL; 382 goto out; 383 } 384 /* 385 * Limit offset such that we don't read or write 386 * a file beyond the maximum offset representable in 387 * an off_t structure. 388 */ 389 if (fileoff + bcount > maxoff) 390 bcount = (ssize_t)((offset_t)maxoff - fileoff); 391 } else if (vp->v_type == VFIFO) { 392 error = ESPIPE; 393 goto out; 394 } 395 396 /* 397 * We have to enter the critical region before calling VOP_RWLOCK 398 * to avoid a deadlock with ufs. 399 */ 400 if (nbl_need_check(vp)) { 401 int svmand; 402 403 nbl_start_crit(vp, RW_READER); 404 in_crit = 1; 405 error = nbl_svmand(vp, fp->f_cred, &svmand); 406 if (error != 0) 407 goto out; 408 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand, 409 NULL)) { 410 error = EACCES; 411 goto out; 412 } 413 } 414 415 aiov.iov_base = cbuf; 416 aiov.iov_len = bcount; 417 (void) VOP_RWLOCK(vp, rwflag, NULL); 418 if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) { 419 struct vattr va; 420 va.va_mask = AT_SIZE; 421 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 422 VOP_RWUNLOCK(vp, rwflag, NULL); 423 goto out; 424 } 425 VOP_RWUNLOCK(vp, rwflag, NULL); 426 427 /* 428 * We have to return EOF if fileoff is >= file size. 429 */ 430 if (fileoff >= va.va_size) { 431 bcount = 0; 432 goto out; 433 } 434 435 /* 436 * File is greater than or equal to maxoff and therefore 437 * we return EOVERFLOW. 438 */ 439 error = EOVERFLOW; 440 goto out; 441 } 442 auio.uio_loffset = fileoff; 443 auio.uio_iov = &aiov; 444 auio.uio_iovcnt = 1; 445 auio.uio_resid = bcount; 446 auio.uio_segflg = UIO_USERSPACE; 447 auio.uio_llimit = MAXOFFSET_T; 448 auio.uio_fmode = fflag; 449 auio.uio_extflg = UIO_COPY_CACHED; 450 451 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 452 453 /* If read sync is not asked for, filter sync flags */ 454 if ((ioflag & FRSYNC) == 0) 455 ioflag &= ~(FSYNC|FDSYNC); 456 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 457 bcount -= auio.uio_resid; 458 CPU_STATS_ENTER_K(); 459 cp = CPU; 460 CPU_STATS_ADDQ(cp, sys, sysread, 1); 461 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 462 CPU_STATS_EXIT_K(); 463 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 464 VOP_RWUNLOCK(vp, rwflag, NULL); 465 466 if (error == EINTR && bcount != 0) 467 error = 0; 468 out: 469 if (in_crit) 470 nbl_end_crit(vp); 471 releasef(fdes); 472 if (error) 473 return (set_errno(error)); 474 return (bcount); 475 } 476 477 ssize_t 478 pwrite(int fdes, void *cbuf, size_t count, off_t offset) 479 { 480 struct uio auio; 481 struct iovec aiov; 482 file_t *fp; 483 register vnode_t *vp; 484 struct cpu *cp; 485 int fflag, ioflag, rwflag; 486 ssize_t bcount; 487 int error = 0; 488 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 489 #ifdef _SYSCALL32_IMPL 490 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ? 491 MAXOFF32_T : MAXOFFSET_T; 492 #else 493 const u_offset_t maxoff = MAXOFF32_T; 494 #endif 495 int in_crit = 0; 496 497 if ((bcount = (ssize_t)count) < 0) 498 return (set_errno(EINVAL)); 499 if ((fp = getf(fdes)) == NULL) 500 return (set_errno(EBADF)); 501 if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 502 error = EBADF; 503 goto out; 504 } 505 506 rwflag = 1; 507 vp = fp->f_vnode; 508 509 if (vp->v_type == VREG) { 510 511 if (bcount == 0) 512 goto out; 513 514 /* 515 * return EINVAL for offsets that cannot be 516 * represented in an off_t. 517 */ 518 if (fileoff > maxoff) { 519 error = EINVAL; 520 goto out; 521 } 522 /* 523 * Take appropriate action if we are trying to write above the 524 * resource limit. 525 */ 526 if (fileoff >= curproc->p_fsz_ctl) { 527 mutex_enter(&curproc->p_lock); 528 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 529 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 530 mutex_exit(&curproc->p_lock); 531 532 error = EFBIG; 533 goto out; 534 } 535 /* 536 * Don't allow pwrite to cause file sizes to exceed 537 * maxoff. 538 */ 539 if (fileoff == maxoff) { 540 error = EFBIG; 541 goto out; 542 } 543 if (fileoff + count > maxoff) 544 bcount = (ssize_t)((u_offset_t)maxoff - fileoff); 545 } else if (vp->v_type == VFIFO) { 546 error = ESPIPE; 547 goto out; 548 } 549 550 /* 551 * We have to enter the critical region before calling VOP_RWLOCK 552 * to avoid a deadlock with ufs. 553 */ 554 if (nbl_need_check(vp)) { 555 int svmand; 556 557 nbl_start_crit(vp, RW_READER); 558 in_crit = 1; 559 error = nbl_svmand(vp, fp->f_cred, &svmand); 560 if (error != 0) 561 goto out; 562 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand, 563 NULL)) { 564 error = EACCES; 565 goto out; 566 } 567 } 568 569 aiov.iov_base = cbuf; 570 aiov.iov_len = bcount; 571 (void) VOP_RWLOCK(vp, rwflag, NULL); 572 auio.uio_loffset = fileoff; 573 auio.uio_iov = &aiov; 574 auio.uio_iovcnt = 1; 575 auio.uio_resid = bcount; 576 auio.uio_segflg = UIO_USERSPACE; 577 auio.uio_llimit = curproc->p_fsz_ctl; 578 auio.uio_fmode = fflag; 579 auio.uio_extflg = UIO_COPY_CACHED; 580 581 /* 582 * The SUSv4 POSIX specification states: 583 * The pwrite() function shall be equivalent to write(), except 584 * that it writes into a given position and does not change 585 * the file offset (regardless of whether O_APPEND is set). 586 * To make this be true, we omit the FAPPEND flag from ioflag. 587 */ 588 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC); 589 590 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 591 bcount -= auio.uio_resid; 592 CPU_STATS_ENTER_K(); 593 cp = CPU; 594 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 595 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 596 CPU_STATS_EXIT_K(); 597 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 598 VOP_RWUNLOCK(vp, rwflag, NULL); 599 600 if (error == EINTR && bcount != 0) 601 error = 0; 602 out: 603 if (in_crit) 604 nbl_end_crit(vp); 605 releasef(fdes); 606 if (error) 607 return (set_errno(error)); 608 return (bcount); 609 } 610 611 ssize_t 612 readv(int fdes, struct iovec *iovp, int iovcnt) 613 { 614 struct uio auio; 615 struct iovec buf[IOV_MAX_STACK], *aiov = buf; 616 int aiovlen = 0; 617 file_t *fp; 618 register vnode_t *vp; 619 struct cpu *cp; 620 int fflag, ioflag, rwflag; 621 ssize_t count, bcount; 622 int error = 0; 623 int i; 624 u_offset_t fileoff; 625 int in_crit = 0; 626 627 if (iovcnt <= 0 || iovcnt > IOV_MAX) 628 return (set_errno(EINVAL)); 629 630 if (iovcnt > IOV_MAX_STACK) { 631 aiovlen = iovcnt * sizeof (iovec_t); 632 aiov = kmem_alloc(aiovlen, KM_SLEEP); 633 } 634 635 #ifdef _SYSCALL32_IMPL 636 /* 637 * 32-bit callers need to have their iovec expanded, 638 * while ensuring that they can't move more than 2Gbytes 639 * of data in a single call. 640 */ 641 if (get_udatamodel() == DATAMODEL_ILP32) { 642 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32; 643 int aiov32len; 644 ssize32_t count32; 645 646 aiov32len = iovcnt * sizeof (iovec32_t); 647 if (aiovlen != 0) 648 aiov32 = kmem_alloc(aiov32len, KM_SLEEP); 649 650 if (copyin(iovp, aiov32, aiov32len)) { 651 if (aiovlen != 0) { 652 kmem_free(aiov32, aiov32len); 653 kmem_free(aiov, aiovlen); 654 } 655 return (set_errno(EFAULT)); 656 } 657 658 count32 = 0; 659 for (i = 0; i < iovcnt; i++) { 660 ssize32_t iovlen32 = aiov32[i].iov_len; 661 count32 += iovlen32; 662 if (iovlen32 < 0 || count32 < 0) { 663 if (aiovlen != 0) { 664 kmem_free(aiov32, aiov32len); 665 kmem_free(aiov, aiovlen); 666 } 667 return (set_errno(EINVAL)); 668 } 669 aiov[i].iov_len = iovlen32; 670 aiov[i].iov_base = 671 (caddr_t)(uintptr_t)aiov32[i].iov_base; 672 } 673 674 if (aiovlen != 0) 675 kmem_free(aiov32, aiov32len); 676 } else 677 #endif 678 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) { 679 if (aiovlen != 0) 680 kmem_free(aiov, aiovlen); 681 return (set_errno(EFAULT)); 682 } 683 684 count = 0; 685 for (i = 0; i < iovcnt; i++) { 686 ssize_t iovlen = aiov[i].iov_len; 687 count += iovlen; 688 if (iovlen < 0 || count < 0) { 689 if (aiovlen != 0) 690 kmem_free(aiov, aiovlen); 691 return (set_errno(EINVAL)); 692 } 693 } 694 if ((fp = getf(fdes)) == NULL) { 695 if (aiovlen != 0) 696 kmem_free(aiov, aiovlen); 697 return (set_errno(EBADF)); 698 } 699 if (((fflag = fp->f_flag) & FREAD) == 0) { 700 error = EBADF; 701 goto out; 702 } 703 vp = fp->f_vnode; 704 if (vp->v_type == VREG && count == 0) { 705 goto out; 706 } 707 708 rwflag = 0; 709 710 /* 711 * We have to enter the critical region before calling VOP_RWLOCK 712 * to avoid a deadlock with ufs. 713 */ 714 if (nbl_need_check(vp)) { 715 int svmand; 716 717 nbl_start_crit(vp, RW_READER); 718 in_crit = 1; 719 error = nbl_svmand(vp, fp->f_cred, &svmand); 720 if (error != 0) 721 goto out; 722 if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand, 723 NULL)) { 724 error = EACCES; 725 goto out; 726 } 727 } 728 729 (void) VOP_RWLOCK(vp, rwflag, NULL); 730 fileoff = fp->f_offset; 731 732 /* 733 * Behaviour is same as read. Please see comments in read. 734 */ 735 736 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) { 737 struct vattr va; 738 va.va_mask = AT_SIZE; 739 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) { 740 VOP_RWUNLOCK(vp, rwflag, NULL); 741 goto out; 742 } 743 if (fileoff >= va.va_size) { 744 VOP_RWUNLOCK(vp, rwflag, NULL); 745 count = 0; 746 goto out; 747 } else { 748 VOP_RWUNLOCK(vp, rwflag, NULL); 749 error = EOVERFLOW; 750 goto out; 751 } 752 } 753 if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) { 754 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 755 } 756 auio.uio_loffset = fileoff; 757 auio.uio_iov = aiov; 758 auio.uio_iovcnt = iovcnt; 759 auio.uio_resid = bcount = count; 760 auio.uio_segflg = UIO_USERSPACE; 761 auio.uio_llimit = MAXOFFSET_T; 762 auio.uio_fmode = fflag; 763 if (bcount <= copyout_max_cached) 764 auio.uio_extflg = UIO_COPY_CACHED; 765 else 766 auio.uio_extflg = UIO_COPY_DEFAULT; 767 768 769 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 770 771 /* If read sync is not asked for, filter sync flags */ 772 if ((ioflag & FRSYNC) == 0) 773 ioflag &= ~(FSYNC|FDSYNC); 774 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 775 count -= auio.uio_resid; 776 CPU_STATS_ENTER_K(); 777 cp = CPU; 778 CPU_STATS_ADDQ(cp, sys, sysread, 1); 779 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count); 780 CPU_STATS_EXIT_K(); 781 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 782 783 if (vp->v_type == VFIFO) /* Backward compatibility */ 784 fp->f_offset = count; 785 else if (((fp->f_flag & FAPPEND) == 0) || 786 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 787 fp->f_offset = auio.uio_loffset; 788 789 VOP_RWUNLOCK(vp, rwflag, NULL); 790 791 if (error == EINTR && count != 0) 792 error = 0; 793 out: 794 if (in_crit) 795 nbl_end_crit(vp); 796 releasef(fdes); 797 if (aiovlen != 0) 798 kmem_free(aiov, aiovlen); 799 if (error) 800 return (set_errno(error)); 801 return (count); 802 } 803 804 ssize_t 805 writev(int fdes, struct iovec *iovp, int iovcnt) 806 { 807 struct uio auio; 808 struct iovec buf[IOV_MAX_STACK], *aiov = buf; 809 int aiovlen = 0; 810 file_t *fp; 811 register vnode_t *vp; 812 struct cpu *cp; 813 int fflag, ioflag, rwflag; 814 ssize_t count, bcount; 815 int error = 0; 816 int i; 817 u_offset_t fileoff; 818 int in_crit = 0; 819 820 if (iovcnt <= 0 || iovcnt > IOV_MAX) 821 return (set_errno(EINVAL)); 822 823 if (iovcnt > IOV_MAX_STACK) { 824 aiovlen = iovcnt * sizeof (iovec_t); 825 aiov = kmem_alloc(aiovlen, KM_SLEEP); 826 } 827 828 #ifdef _SYSCALL32_IMPL 829 /* 830 * 32-bit callers need to have their iovec expanded, 831 * while ensuring that they can't move more than 2Gbytes 832 * of data in a single call. 833 */ 834 if (get_udatamodel() == DATAMODEL_ILP32) { 835 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32; 836 int aiov32len; 837 ssize32_t count32; 838 839 aiov32len = iovcnt * sizeof (iovec32_t); 840 if (aiovlen != 0) 841 aiov32 = kmem_alloc(aiov32len, KM_SLEEP); 842 843 if (copyin(iovp, aiov32, aiov32len)) { 844 if (aiovlen != 0) { 845 kmem_free(aiov32, aiov32len); 846 kmem_free(aiov, aiovlen); 847 } 848 return (set_errno(EFAULT)); 849 } 850 851 count32 = 0; 852 for (i = 0; i < iovcnt; i++) { 853 ssize32_t iovlen = aiov32[i].iov_len; 854 count32 += iovlen; 855 if (iovlen < 0 || count32 < 0) { 856 if (aiovlen != 0) { 857 kmem_free(aiov32, aiov32len); 858 kmem_free(aiov, aiovlen); 859 } 860 return (set_errno(EINVAL)); 861 } 862 aiov[i].iov_len = iovlen; 863 aiov[i].iov_base = 864 (caddr_t)(uintptr_t)aiov32[i].iov_base; 865 } 866 if (aiovlen != 0) 867 kmem_free(aiov32, aiov32len); 868 } else 869 #endif 870 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) { 871 if (aiovlen != 0) 872 kmem_free(aiov, aiovlen); 873 return (set_errno(EFAULT)); 874 } 875 876 count = 0; 877 for (i = 0; i < iovcnt; i++) { 878 ssize_t iovlen = aiov[i].iov_len; 879 count += iovlen; 880 if (iovlen < 0 || count < 0) { 881 if (aiovlen != 0) 882 kmem_free(aiov, aiovlen); 883 return (set_errno(EINVAL)); 884 } 885 } 886 if ((fp = getf(fdes)) == NULL) { 887 if (aiovlen != 0) 888 kmem_free(aiov, aiovlen); 889 return (set_errno(EBADF)); 890 } 891 if (((fflag = fp->f_flag) & FWRITE) == 0) { 892 error = EBADF; 893 goto out; 894 } 895 vp = fp->f_vnode; 896 if (vp->v_type == VREG && count == 0) { 897 goto out; 898 } 899 900 rwflag = 1; 901 902 /* 903 * We have to enter the critical region before calling VOP_RWLOCK 904 * to avoid a deadlock with ufs. 905 */ 906 if (nbl_need_check(vp)) { 907 int svmand; 908 909 nbl_start_crit(vp, RW_READER); 910 in_crit = 1; 911 error = nbl_svmand(vp, fp->f_cred, &svmand); 912 if (error != 0) 913 goto out; 914 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand, 915 NULL)) { 916 error = EACCES; 917 goto out; 918 } 919 } 920 921 (void) VOP_RWLOCK(vp, rwflag, NULL); 922 923 fileoff = fp->f_offset; 924 925 /* 926 * Behaviour is same as write. Please see comments for write. 927 */ 928 929 if (vp->v_type == VREG) { 930 if (fileoff >= curproc->p_fsz_ctl) { 931 VOP_RWUNLOCK(vp, rwflag, NULL); 932 mutex_enter(&curproc->p_lock); 933 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 934 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO); 935 mutex_exit(&curproc->p_lock); 936 error = EFBIG; 937 goto out; 938 } 939 if (fileoff >= OFFSET_MAX(fp)) { 940 VOP_RWUNLOCK(vp, rwflag, NULL); 941 error = EFBIG; 942 goto out; 943 } 944 if (fileoff + count > OFFSET_MAX(fp)) 945 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 946 } 947 auio.uio_loffset = fileoff; 948 auio.uio_iov = aiov; 949 auio.uio_iovcnt = iovcnt; 950 auio.uio_resid = bcount = count; 951 auio.uio_segflg = UIO_USERSPACE; 952 auio.uio_llimit = curproc->p_fsz_ctl; 953 auio.uio_fmode = fflag; 954 auio.uio_extflg = UIO_COPY_DEFAULT; 955 956 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 957 958 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 959 count -= auio.uio_resid; 960 CPU_STATS_ENTER_K(); 961 cp = CPU; 962 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 963 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count); 964 CPU_STATS_EXIT_K(); 965 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 966 967 if (vp->v_type == VFIFO) /* Backward compatibility */ 968 fp->f_offset = count; 969 else if (((fp->f_flag & FAPPEND) == 0) || 970 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */ 971 fp->f_offset = auio.uio_loffset; 972 VOP_RWUNLOCK(vp, rwflag, NULL); 973 974 if (error == EINTR && count != 0) 975 error = 0; 976 out: 977 if (in_crit) 978 nbl_end_crit(vp); 979 releasef(fdes); 980 if (aiovlen != 0) 981 kmem_free(aiov, aiovlen); 982 if (error) 983 return (set_errno(error)); 984 return (count); 985 } 986 987 ssize_t 988 preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset, 989 off_t extended_offset) 990 { 991 struct uio auio; 992 struct iovec buf[IOV_MAX_STACK], *aiov = buf; 993 int aiovlen = 0; 994 file_t *fp; 995 register vnode_t *vp; 996 struct cpu *cp; 997 int fflag, ioflag, rwflag; 998 ssize_t count, bcount; 999 int error = 0; 1000 int i; 1001 1002 /* 1003 * In a 64-bit kernel, this interface supports native 64-bit 1004 * applications as well as 32-bit applications using both standard and 1005 * large-file access. For 32-bit large-file aware applications, the 1006 * offset is passed as two parameters which are joined into the actual 1007 * offset used. The 64-bit libc always passes 0 for the extended_offset. 1008 * Note that off_t is a signed value, but the preadv/pwritev API treats 1009 * the offset as a position in the file for the operation, so passing 1010 * a negative value will likely fail the maximum offset checks below 1011 * because we convert it to an unsigned value which will be larger than 1012 * the maximum valid offset. 1013 */ 1014 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 1015 u_offset_t fileoff = ((u_offset_t)extended_offset << 32) | 1016 (u_offset_t)offset; 1017 #else /* _SYSCALL32_IMPL || _ILP32 */ 1018 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 1019 #endif /* _SYSCALL32_IMPR || _ILP32 */ 1020 1021 int in_crit = 0; 1022 1023 if (iovcnt <= 0 || iovcnt > IOV_MAX) 1024 return (set_errno(EINVAL)); 1025 1026 if (iovcnt > IOV_MAX_STACK) { 1027 aiovlen = iovcnt * sizeof (iovec_t); 1028 aiov = kmem_alloc(aiovlen, KM_SLEEP); 1029 } 1030 1031 #ifdef _SYSCALL32_IMPL 1032 /* 1033 * 32-bit callers need to have their iovec expanded, 1034 * while ensuring that they can't move more than 2Gbytes 1035 * of data in a single call. 1036 */ 1037 if (get_udatamodel() == DATAMODEL_ILP32) { 1038 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32; 1039 int aiov32len; 1040 ssize32_t count32; 1041 1042 aiov32len = iovcnt * sizeof (iovec32_t); 1043 if (aiovlen != 0) 1044 aiov32 = kmem_alloc(aiov32len, KM_SLEEP); 1045 1046 if (copyin(iovp, aiov32, aiov32len)) { 1047 if (aiovlen != 0) { 1048 kmem_free(aiov32, aiov32len); 1049 kmem_free(aiov, aiovlen); 1050 } 1051 return (set_errno(EFAULT)); 1052 } 1053 1054 count32 = 0; 1055 for (i = 0; i < iovcnt; i++) { 1056 ssize32_t iovlen32 = aiov32[i].iov_len; 1057 count32 += iovlen32; 1058 if (iovlen32 < 0 || count32 < 0) { 1059 if (aiovlen != 0) { 1060 kmem_free(aiov32, aiov32len); 1061 kmem_free(aiov, aiovlen); 1062 } 1063 return (set_errno(EINVAL)); 1064 } 1065 aiov[i].iov_len = iovlen32; 1066 aiov[i].iov_base = 1067 (caddr_t)(uintptr_t)aiov32[i].iov_base; 1068 } 1069 if (aiovlen != 0) 1070 kmem_free(aiov32, aiov32len); 1071 } else 1072 #endif /* _SYSCALL32_IMPL */ 1073 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) { 1074 if (aiovlen != 0) 1075 kmem_free(aiov, aiovlen); 1076 return (set_errno(EFAULT)); 1077 } 1078 1079 count = 0; 1080 for (i = 0; i < iovcnt; i++) { 1081 ssize_t iovlen = aiov[i].iov_len; 1082 count += iovlen; 1083 if (iovlen < 0 || count < 0) { 1084 if (aiovlen != 0) 1085 kmem_free(aiov, aiovlen); 1086 return (set_errno(EINVAL)); 1087 } 1088 } 1089 1090 if ((bcount = count) < 0) { 1091 if (aiovlen != 0) 1092 kmem_free(aiov, aiovlen); 1093 return (set_errno(EINVAL)); 1094 } 1095 if ((fp = getf(fdes)) == NULL) { 1096 if (aiovlen != 0) 1097 kmem_free(aiov, aiovlen); 1098 return (set_errno(EBADF)); 1099 } 1100 if (((fflag = fp->f_flag) & FREAD) == 0) { 1101 error = EBADF; 1102 goto out; 1103 } 1104 vp = fp->f_vnode; 1105 rwflag = 0; 1106 1107 /* 1108 * Behaviour is same as read(2). Please see comments in read above. 1109 */ 1110 if (vp->v_type == VREG) { 1111 if (bcount == 0) 1112 goto out; 1113 1114 /* Handle offset past maximum offset allowed for file. */ 1115 if (fileoff >= OFFSET_MAX(fp)) { 1116 struct vattr va; 1117 va.va_mask = AT_SIZE; 1118 1119 error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL); 1120 if (error == 0) { 1121 if (fileoff >= va.va_size) { 1122 count = 0; 1123 } else { 1124 error = EOVERFLOW; 1125 } 1126 } 1127 goto out; 1128 } 1129 1130 ASSERT(bcount == count); 1131 1132 /* Note: modified count used in nbl_conflict() call below. */ 1133 if ((fileoff + count) > OFFSET_MAX(fp)) 1134 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 1135 1136 } else if (vp->v_type == VFIFO) { 1137 error = ESPIPE; 1138 goto out; 1139 } 1140 /* 1141 * We have to enter the critical region before calling VOP_RWLOCK 1142 * to avoid a deadlock with ufs. 1143 */ 1144 if (nbl_need_check(vp)) { 1145 int svmand; 1146 1147 nbl_start_crit(vp, RW_READER); 1148 in_crit = 1; 1149 error = nbl_svmand(vp, fp->f_cred, &svmand); 1150 if (error != 0) 1151 goto out; 1152 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, NULL)) { 1153 error = EACCES; 1154 goto out; 1155 } 1156 } 1157 1158 (void) VOP_RWLOCK(vp, rwflag, NULL); 1159 1160 auio.uio_loffset = fileoff; 1161 auio.uio_iov = aiov; 1162 auio.uio_iovcnt = iovcnt; 1163 auio.uio_resid = bcount = count; 1164 auio.uio_segflg = UIO_USERSPACE; 1165 auio.uio_llimit = MAXOFFSET_T; 1166 auio.uio_fmode = fflag; 1167 if (bcount <= copyout_max_cached) 1168 auio.uio_extflg = UIO_COPY_CACHED; 1169 else 1170 auio.uio_extflg = UIO_COPY_DEFAULT; 1171 1172 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1173 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 1174 count -= auio.uio_resid; 1175 CPU_STATS_ENTER_K(); 1176 cp = CPU; 1177 CPU_STATS_ADDQ(cp, sys, sysread, 1); 1178 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count); 1179 CPU_STATS_EXIT_K(); 1180 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 1181 1182 VOP_RWUNLOCK(vp, rwflag, NULL); 1183 1184 if (error == EINTR && count != 0) 1185 error = 0; 1186 out: 1187 if (in_crit) 1188 nbl_end_crit(vp); 1189 releasef(fdes); 1190 if (aiovlen != 0) 1191 kmem_free(aiov, aiovlen); 1192 if (error) 1193 return (set_errno(error)); 1194 return (count); 1195 } 1196 1197 ssize_t 1198 pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset, 1199 off_t extended_offset) 1200 { 1201 struct uio auio; 1202 struct iovec buf[IOV_MAX_STACK], *aiov = buf; 1203 int aiovlen = 0; 1204 file_t *fp; 1205 register vnode_t *vp; 1206 struct cpu *cp; 1207 int fflag, ioflag, rwflag; 1208 ssize_t count, bcount; 1209 int error = 0; 1210 int i; 1211 1212 /* 1213 * See the comment in preadv for how the offset is handled. 1214 */ 1215 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 1216 u_offset_t fileoff = ((u_offset_t)extended_offset << 32) | 1217 (u_offset_t)offset; 1218 #else /* _SYSCALL32_IMPL || _ILP32 */ 1219 u_offset_t fileoff = (u_offset_t)(ulong_t)offset; 1220 #endif /* _SYSCALL32_IMPR || _ILP32 */ 1221 1222 int in_crit = 0; 1223 1224 if (iovcnt <= 0 || iovcnt > IOV_MAX) 1225 return (set_errno(EINVAL)); 1226 1227 if (iovcnt > IOV_MAX_STACK) { 1228 aiovlen = iovcnt * sizeof (iovec_t); 1229 aiov = kmem_alloc(aiovlen, KM_SLEEP); 1230 } 1231 1232 #ifdef _SYSCALL32_IMPL 1233 /* 1234 * 32-bit callers need to have their iovec expanded, 1235 * while ensuring that they can't move more than 2Gbytes 1236 * of data in a single call. 1237 */ 1238 if (get_udatamodel() == DATAMODEL_ILP32) { 1239 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32; 1240 int aiov32len; 1241 ssize32_t count32; 1242 1243 aiov32len = iovcnt * sizeof (iovec32_t); 1244 if (aiovlen != 0) 1245 aiov32 = kmem_alloc(aiov32len, KM_SLEEP); 1246 1247 if (copyin(iovp, aiov32, aiov32len)) { 1248 if (aiovlen != 0) { 1249 kmem_free(aiov32, aiov32len); 1250 kmem_free(aiov, aiovlen); 1251 } 1252 return (set_errno(EFAULT)); 1253 } 1254 1255 count32 = 0; 1256 for (i = 0; i < iovcnt; i++) { 1257 ssize32_t iovlen32 = aiov32[i].iov_len; 1258 count32 += iovlen32; 1259 if (iovlen32 < 0 || count32 < 0) { 1260 if (aiovlen != 0) { 1261 kmem_free(aiov32, aiov32len); 1262 kmem_free(aiov, aiovlen); 1263 } 1264 return (set_errno(EINVAL)); 1265 } 1266 aiov[i].iov_len = iovlen32; 1267 aiov[i].iov_base = 1268 (caddr_t)(uintptr_t)aiov32[i].iov_base; 1269 } 1270 if (aiovlen != 0) 1271 kmem_free(aiov32, aiov32len); 1272 } else 1273 #endif /* _SYSCALL32_IMPL */ 1274 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) { 1275 if (aiovlen != 0) 1276 kmem_free(aiov, aiovlen); 1277 return (set_errno(EFAULT)); 1278 } 1279 1280 count = 0; 1281 for (i = 0; i < iovcnt; i++) { 1282 ssize_t iovlen = aiov[i].iov_len; 1283 count += iovlen; 1284 if (iovlen < 0 || count < 0) { 1285 if (aiovlen != 0) 1286 kmem_free(aiov, aiovlen); 1287 return (set_errno(EINVAL)); 1288 } 1289 } 1290 1291 if ((bcount = count) < 0) { 1292 if (aiovlen != 0) 1293 kmem_free(aiov, aiovlen); 1294 return (set_errno(EINVAL)); 1295 } 1296 if ((fp = getf(fdes)) == NULL) { 1297 if (aiovlen != 0) 1298 kmem_free(aiov, aiovlen); 1299 return (set_errno(EBADF)); 1300 } 1301 if (((fflag = fp->f_flag) & FWRITE) == 0) { 1302 error = EBADF; 1303 goto out; 1304 } 1305 vp = fp->f_vnode; 1306 rwflag = 1; 1307 1308 /* 1309 * The kernel's write(2) code checks OFFSET_MAX and the rctl, and 1310 * returns EFBIG when fileoff exceeds either limit. We do the same. 1311 */ 1312 if (vp->v_type == VREG) { 1313 if (bcount == 0) 1314 goto out; 1315 1316 /* 1317 * Don't allow pwritev to cause file size to exceed the proper 1318 * offset limit. 1319 */ 1320 if (fileoff >= OFFSET_MAX(fp)) { 1321 error = EFBIG; 1322 goto out; 1323 } 1324 1325 /* 1326 * Take appropriate action if we are trying 1327 * to write above the resource limit. 1328 */ 1329 if (fileoff >= curproc->p_fsz_ctl) { 1330 mutex_enter(&curproc->p_lock); 1331 /* 1332 * Return value ignored because it lists 1333 * actions taken, but we are in an error case. 1334 * We don't have any actions that depend on 1335 * what could happen in this call, so we ignore 1336 * the return value. 1337 */ 1338 (void) rctl_action( 1339 rctlproc_legacy[RLIMIT_FSIZE], 1340 curproc->p_rctls, curproc, 1341 RCA_UNSAFE_SIGINFO); 1342 mutex_exit(&curproc->p_lock); 1343 1344 error = EFBIG; 1345 goto out; 1346 } 1347 1348 ASSERT(bcount == count); 1349 1350 /* Note: modified count used in nbl_conflict() call below. */ 1351 if ((fileoff + count) > OFFSET_MAX(fp)) 1352 count = (ssize_t)(OFFSET_MAX(fp) - fileoff); 1353 1354 } else if (vp->v_type == VFIFO) { 1355 error = ESPIPE; 1356 goto out; 1357 } 1358 /* 1359 * We have to enter the critical region before calling VOP_RWLOCK 1360 * to avoid a deadlock with ufs. 1361 */ 1362 if (nbl_need_check(vp)) { 1363 int svmand; 1364 1365 nbl_start_crit(vp, RW_READER); 1366 in_crit = 1; 1367 error = nbl_svmand(vp, fp->f_cred, &svmand); 1368 if (error != 0) 1369 goto out; 1370 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand, NULL)) { 1371 error = EACCES; 1372 goto out; 1373 } 1374 } 1375 1376 (void) VOP_RWLOCK(vp, rwflag, NULL); 1377 1378 auio.uio_loffset = fileoff; 1379 auio.uio_iov = aiov; 1380 auio.uio_iovcnt = iovcnt; 1381 auio.uio_resid = bcount = count; 1382 auio.uio_segflg = UIO_USERSPACE; 1383 auio.uio_llimit = curproc->p_fsz_ctl; 1384 auio.uio_fmode = fflag; 1385 auio.uio_extflg = UIO_COPY_CACHED; 1386 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC); 1387 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 1388 count -= auio.uio_resid; 1389 CPU_STATS_ENTER_K(); 1390 cp = CPU; 1391 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 1392 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count); 1393 CPU_STATS_EXIT_K(); 1394 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count; 1395 1396 VOP_RWUNLOCK(vp, rwflag, NULL); 1397 1398 if (error == EINTR && count != 0) 1399 error = 0; 1400 out: 1401 if (in_crit) 1402 nbl_end_crit(vp); 1403 releasef(fdes); 1404 if (aiovlen != 0) 1405 kmem_free(aiov, aiovlen); 1406 if (error) 1407 return (set_errno(error)); 1408 return (count); 1409 } 1410 1411 #if defined(_SYSCALL32_IMPL) || defined(_ILP32) 1412 1413 /* 1414 * This syscall supplies 64-bit file offsets to 32-bit applications only. 1415 */ 1416 ssize32_t 1417 pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 1418 uint32_t offset_2) 1419 { 1420 struct uio auio; 1421 struct iovec aiov; 1422 file_t *fp; 1423 register vnode_t *vp; 1424 struct cpu *cp; 1425 int fflag, ioflag, rwflag; 1426 ssize_t bcount; 1427 int error = 0; 1428 u_offset_t fileoff; 1429 int in_crit = 0; 1430 1431 #if defined(_LITTLE_ENDIAN) 1432 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 1433 #else 1434 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 1435 #endif 1436 1437 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 1438 return (set_errno(EINVAL)); 1439 1440 if ((fp = getf(fdes)) == NULL) 1441 return (set_errno(EBADF)); 1442 if (((fflag = fp->f_flag) & (FREAD)) == 0) { 1443 error = EBADF; 1444 goto out; 1445 } 1446 1447 rwflag = 0; 1448 vp = fp->f_vnode; 1449 1450 if (vp->v_type == VREG) { 1451 1452 if (bcount == 0) 1453 goto out; 1454 1455 /* 1456 * Same as pread. See comments in pread. 1457 */ 1458 1459 if (fileoff > MAXOFFSET_T) { 1460 error = EINVAL; 1461 goto out; 1462 } 1463 if (fileoff + bcount > MAXOFFSET_T) 1464 bcount = (ssize_t)(MAXOFFSET_T - fileoff); 1465 } else if (vp->v_type == VFIFO) { 1466 error = ESPIPE; 1467 goto out; 1468 } 1469 1470 /* 1471 * We have to enter the critical region before calling VOP_RWLOCK 1472 * to avoid a deadlock with ufs. 1473 */ 1474 if (nbl_need_check(vp)) { 1475 int svmand; 1476 1477 nbl_start_crit(vp, RW_READER); 1478 in_crit = 1; 1479 error = nbl_svmand(vp, fp->f_cred, &svmand); 1480 if (error != 0) 1481 goto out; 1482 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand, 1483 NULL)) { 1484 error = EACCES; 1485 goto out; 1486 } 1487 } 1488 1489 aiov.iov_base = cbuf; 1490 aiov.iov_len = bcount; 1491 (void) VOP_RWLOCK(vp, rwflag, NULL); 1492 auio.uio_loffset = fileoff; 1493 1494 /* 1495 * Note: File size can never be greater than MAXOFFSET_T. 1496 * If ever we start supporting 128 bit files the code 1497 * similar to the one in pread at this place should be here. 1498 * Here we avoid the unnecessary VOP_GETATTR() when we 1499 * know that fileoff == MAXOFFSET_T implies that it is always 1500 * greater than or equal to file size. 1501 */ 1502 auio.uio_iov = &aiov; 1503 auio.uio_iovcnt = 1; 1504 auio.uio_resid = bcount; 1505 auio.uio_segflg = UIO_USERSPACE; 1506 auio.uio_llimit = MAXOFFSET_T; 1507 auio.uio_fmode = fflag; 1508 auio.uio_extflg = UIO_COPY_CACHED; 1509 1510 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC); 1511 1512 /* If read sync is not asked for, filter sync flags */ 1513 if ((ioflag & FRSYNC) == 0) 1514 ioflag &= ~(FSYNC|FDSYNC); 1515 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL); 1516 bcount -= auio.uio_resid; 1517 CPU_STATS_ENTER_K(); 1518 cp = CPU; 1519 CPU_STATS_ADDQ(cp, sys, sysread, 1); 1520 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount); 1521 CPU_STATS_EXIT_K(); 1522 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1523 VOP_RWUNLOCK(vp, rwflag, NULL); 1524 1525 if (error == EINTR && bcount != 0) 1526 error = 0; 1527 out: 1528 if (in_crit) 1529 nbl_end_crit(vp); 1530 releasef(fdes); 1531 if (error) 1532 return (set_errno(error)); 1533 return (bcount); 1534 } 1535 1536 /* 1537 * This syscall supplies 64-bit file offsets to 32-bit applications only. 1538 */ 1539 ssize32_t 1540 pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1, 1541 uint32_t offset_2) 1542 { 1543 struct uio auio; 1544 struct iovec aiov; 1545 file_t *fp; 1546 register vnode_t *vp; 1547 struct cpu *cp; 1548 int fflag, ioflag, rwflag; 1549 ssize_t bcount; 1550 int error = 0; 1551 u_offset_t fileoff; 1552 int in_crit = 0; 1553 1554 #if defined(_LITTLE_ENDIAN) 1555 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1; 1556 #else 1557 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2; 1558 #endif 1559 1560 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX) 1561 return (set_errno(EINVAL)); 1562 if ((fp = getf(fdes)) == NULL) 1563 return (set_errno(EBADF)); 1564 if (((fflag = fp->f_flag) & (FWRITE)) == 0) { 1565 error = EBADF; 1566 goto out; 1567 } 1568 1569 rwflag = 1; 1570 vp = fp->f_vnode; 1571 1572 if (vp->v_type == VREG) { 1573 1574 if (bcount == 0) 1575 goto out; 1576 1577 /* 1578 * See comments in pwrite. 1579 */ 1580 if (fileoff > MAXOFFSET_T) { 1581 error = EINVAL; 1582 goto out; 1583 } 1584 if (fileoff >= curproc->p_fsz_ctl) { 1585 mutex_enter(&curproc->p_lock); 1586 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], 1587 curproc->p_rctls, curproc, RCA_SAFE); 1588 mutex_exit(&curproc->p_lock); 1589 error = EFBIG; 1590 goto out; 1591 } 1592 if (fileoff == MAXOFFSET_T) { 1593 error = EFBIG; 1594 goto out; 1595 } 1596 if (fileoff + bcount > MAXOFFSET_T) 1597 bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff); 1598 } else if (vp->v_type == VFIFO) { 1599 error = ESPIPE; 1600 goto out; 1601 } 1602 1603 /* 1604 * We have to enter the critical region before calling VOP_RWLOCK 1605 * to avoid a deadlock with ufs. 1606 */ 1607 if (nbl_need_check(vp)) { 1608 int svmand; 1609 1610 nbl_start_crit(vp, RW_READER); 1611 in_crit = 1; 1612 error = nbl_svmand(vp, fp->f_cred, &svmand); 1613 if (error != 0) 1614 goto out; 1615 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand, 1616 NULL)) { 1617 error = EACCES; 1618 goto out; 1619 } 1620 } 1621 1622 aiov.iov_base = cbuf; 1623 aiov.iov_len = bcount; 1624 (void) VOP_RWLOCK(vp, rwflag, NULL); 1625 auio.uio_loffset = fileoff; 1626 auio.uio_iov = &aiov; 1627 auio.uio_iovcnt = 1; 1628 auio.uio_resid = bcount; 1629 auio.uio_segflg = UIO_USERSPACE; 1630 auio.uio_llimit = curproc->p_fsz_ctl; 1631 auio.uio_fmode = fflag; 1632 auio.uio_extflg = UIO_COPY_CACHED; 1633 1634 /* 1635 * The SUSv4 POSIX specification states: 1636 * The pwrite() function shall be equivalent to write(), except 1637 * that it writes into a given position and does not change 1638 * the file offset (regardless of whether O_APPEND is set). 1639 * To make this be true, we omit the FAPPEND flag from ioflag. 1640 */ 1641 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC); 1642 1643 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL); 1644 bcount -= auio.uio_resid; 1645 CPU_STATS_ENTER_K(); 1646 cp = CPU; 1647 CPU_STATS_ADDQ(cp, sys, syswrite, 1); 1648 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount); 1649 CPU_STATS_EXIT_K(); 1650 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount; 1651 VOP_RWUNLOCK(vp, rwflag, NULL); 1652 1653 if (error == EINTR && bcount != 0) 1654 error = 0; 1655 out: 1656 if (in_crit) 1657 nbl_end_crit(vp); 1658 releasef(fdes); 1659 if (error) 1660 return (set_errno(error)); 1661 return (bcount); 1662 } 1663 1664 #endif /* _SYSCALL32_IMPL || _ILP32 */ 1665 1666 #ifdef _SYSCALL32_IMPL 1667 /* 1668 * Tail-call elimination of xxx32() down to xxx() 1669 * 1670 * A number of xxx32 system calls take a len (or count) argument and 1671 * return a number in the range [0,len] or -1 on error. 1672 * Given an ssize32_t input len, the downcall xxx() will return 1673 * a 64-bit value that is -1 or in the range [0,len] which actually 1674 * is a proper return value for the xxx32 call. So even if the xxx32 1675 * calls can be considered as returning a ssize32_t, they are currently 1676 * declared as returning a ssize_t as this enables tail-call elimination. 1677 * 1678 * The cast of len (or count) to ssize32_t is needed to ensure we pass 1679 * down negative input values as such and let the downcall handle error 1680 * reporting. Functions covered by this comments are: 1681 * 1682 * rw.c: read32, write32, pread32, pwrite32, readv32, writev32. 1683 * socksyscall.c: recv32, recvfrom32, send32, sendto32. 1684 * readlink.c: readlink32. 1685 */ 1686 1687 ssize_t 1688 read32(int32_t fdes, caddr32_t cbuf, size32_t count) 1689 { 1690 return (read(fdes, 1691 (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1692 } 1693 1694 ssize_t 1695 write32(int32_t fdes, caddr32_t cbuf, size32_t count) 1696 { 1697 return (write(fdes, 1698 (void *)(uintptr_t)cbuf, (ssize32_t)count)); 1699 } 1700 1701 ssize_t 1702 pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1703 { 1704 return (pread(fdes, 1705 (void *)(uintptr_t)cbuf, (ssize32_t)count, 1706 (off_t)(uint32_t)offset)); 1707 } 1708 1709 ssize_t 1710 pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset) 1711 { 1712 return (pwrite(fdes, 1713 (void *)(uintptr_t)cbuf, (ssize32_t)count, 1714 (off_t)(uint32_t)offset)); 1715 } 1716 1717 ssize_t 1718 readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1719 { 1720 return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1721 } 1722 1723 ssize_t 1724 writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt) 1725 { 1726 return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt)); 1727 } 1728 #endif /* _SYSCALL32_IMPL */