1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2015, Joyent, Inc. All rights reserved.
26 */
27
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31 /*
32 * Portions of this source code were derived from Berkeley 4.3 BSD
33 * under license from the Regents of the University of California.
34 */
35
36 #include <sys/param.h>
37 #include <sys/isa_defs.h>
38 #include <sys/types.h>
39 #include <sys/inttypes.h>
40 #include <sys/sysmacros.h>
41 #include <sys/cred.h>
42 #include <sys/user.h>
43 #include <sys/systm.h>
44 #include <sys/errno.h>
45 #include <sys/vnode.h>
46 #include <sys/file.h>
47 #include <sys/proc.h>
48 #include <sys/cpuvar.h>
49 #include <sys/uio.h>
50 #include <sys/debug.h>
51 #include <sys/rctl.h>
52 #include <sys/nbmlock.h>
53 #include <sys/limits.h>
54
55 #define COPYOUT_MAX_CACHE (1<<17) /* 128K */
56
57 size_t copyout_max_cached = COPYOUT_MAX_CACHE; /* global so it's patchable */
58
59 /*
60 * read, write, pread, pwrite, readv, and writev syscalls.
61 *
62 * 64-bit open: all open's are large file opens.
63 * Large Files: the behaviour of read depends on whether the fd
64 * corresponds to large open or not.
65 * 32-bit open: FOFFMAX flag not set.
66 * read until MAXOFF32_T - 1 and read at MAXOFF32_T returns
67 * EOVERFLOW if count is non-zero and if size of file
68 * is > MAXOFF32_T. If size of file is <= MAXOFF32_T read
69 * at >= MAXOFF32_T returns EOF.
70 */
71
72 /*
73 * Native system call
74 */
75 ssize_t
76 read(int fdes, void *cbuf, size_t count)
77 {
78 struct uio auio;
79 struct iovec aiov;
80 file_t *fp;
81 register vnode_t *vp;
82 struct cpu *cp;
83 int fflag, ioflag, rwflag;
84 ssize_t cnt, bcount;
85 int error = 0;
86 u_offset_t fileoff;
87 int in_crit = 0;
88
89 if ((cnt = (ssize_t)count) < 0)
90 return (set_errno(EINVAL));
91 if ((fp = getf(fdes)) == NULL)
92 return (set_errno(EBADF));
93 if (((fflag = fp->f_flag) & FREAD) == 0) {
94 error = EBADF;
95 goto out;
96 }
97 vp = fp->f_vnode;
98
99 if (vp->v_type == VREG && cnt == 0) {
100 goto out;
101 }
102
103 rwflag = 0;
104 aiov.iov_base = cbuf;
105 aiov.iov_len = cnt;
106
107 /*
108 * We have to enter the critical region before calling VOP_RWLOCK
109 * to avoid a deadlock with write() calls.
110 */
111 if (nbl_need_check(vp)) {
112 int svmand;
113
114 nbl_start_crit(vp, RW_READER);
115 in_crit = 1;
116 error = nbl_svmand(vp, fp->f_cred, &svmand);
117 if (error != 0)
118 goto out;
119 if (nbl_conflict(vp, NBL_READ, fp->f_offset, cnt, svmand,
120 NULL)) {
121 error = EACCES;
122 goto out;
123 }
124 }
125
126 (void) VOP_RWLOCK(vp, rwflag, NULL);
127
128 /*
129 * We do the following checks inside VOP_RWLOCK so as to
130 * prevent file size from changing while these checks are
131 * being done. Also, we load fp's offset to the local
132 * variable fileoff because we can have a parallel lseek
133 * going on (f_offset is not protected by any lock) which
134 * could change f_offset. We need to see the value only
135 * once here and take a decision. Seeing it more than once
136 * can lead to incorrect functionality.
137 */
138
139 fileoff = (u_offset_t)fp->f_offset;
140 if (fileoff >= OFFSET_MAX(fp) && (vp->v_type == VREG)) {
141 struct vattr va;
142 va.va_mask = AT_SIZE;
143 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) {
144 VOP_RWUNLOCK(vp, rwflag, NULL);
145 goto out;
146 }
147 if (fileoff >= va.va_size) {
148 cnt = 0;
149 VOP_RWUNLOCK(vp, rwflag, NULL);
150 goto out;
151 } else {
152 error = EOVERFLOW;
153 VOP_RWUNLOCK(vp, rwflag, NULL);
154 goto out;
155 }
156 }
157 if ((vp->v_type == VREG) &&
158 (fileoff + cnt > OFFSET_MAX(fp))) {
159 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
160 }
161 auio.uio_loffset = fileoff;
162 auio.uio_iov = &aiov;
163 auio.uio_iovcnt = 1;
164 auio.uio_resid = bcount = cnt;
165 auio.uio_segflg = UIO_USERSPACE;
166 auio.uio_llimit = MAXOFFSET_T;
167 auio.uio_fmode = fflag;
168 /*
169 * Only use bypass caches when the count is large enough
170 */
171 if (bcount <= copyout_max_cached)
172 auio.uio_extflg = UIO_COPY_CACHED;
173 else
174 auio.uio_extflg = UIO_COPY_DEFAULT;
175
176 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
177
178 /* If read sync is not asked for, filter sync flags */
179 if ((ioflag & FRSYNC) == 0)
180 ioflag &= ~(FSYNC|FDSYNC);
181 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
182 cnt -= auio.uio_resid;
183 CPU_STATS_ENTER_K();
184 cp = CPU;
185 CPU_STATS_ADDQ(cp, sys, sysread, 1);
186 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)cnt);
187 CPU_STATS_EXIT_K();
188 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
189
190 if (vp->v_type == VFIFO) /* Backward compatibility */
191 fp->f_offset = cnt;
192 else if (((fp->f_flag & FAPPEND) == 0) ||
193 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
194 fp->f_offset = auio.uio_loffset;
195 VOP_RWUNLOCK(vp, rwflag, NULL);
196
197 if (error == EINTR && cnt != 0)
198 error = 0;
199 out:
200 if (in_crit)
201 nbl_end_crit(vp);
202 releasef(fdes);
203 if (error)
204 return (set_errno(error));
205 return (cnt);
206 }
207
208 /*
209 * Native system call
210 */
211 ssize_t
212 write(int fdes, void *cbuf, size_t count)
213 {
214 struct uio auio;
215 struct iovec aiov;
216 file_t *fp;
217 register vnode_t *vp;
218 struct cpu *cp;
219 int fflag, ioflag, rwflag;
220 ssize_t cnt, bcount;
221 int error = 0;
222 u_offset_t fileoff;
223 int in_crit = 0;
224
225 if ((cnt = (ssize_t)count) < 0)
226 return (set_errno(EINVAL));
227 if ((fp = getf(fdes)) == NULL)
228 return (set_errno(EBADF));
229 if (((fflag = fp->f_flag) & FWRITE) == 0) {
230 error = EBADF;
231 goto out;
232 }
233 vp = fp->f_vnode;
234
235 if (vp->v_type == VREG && cnt == 0) {
236 goto out;
237 }
238
239 rwflag = 1;
240 aiov.iov_base = cbuf;
241 aiov.iov_len = cnt;
242
243 /*
244 * We have to enter the critical region before calling VOP_RWLOCK
245 * to avoid a deadlock with ufs.
246 */
247 if (nbl_need_check(vp)) {
248 int svmand;
249
250 nbl_start_crit(vp, RW_READER);
251 in_crit = 1;
252 error = nbl_svmand(vp, fp->f_cred, &svmand);
253 if (error != 0)
254 goto out;
255 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, cnt, svmand,
256 NULL)) {
257 error = EACCES;
258 goto out;
259 }
260 }
261
262 (void) VOP_RWLOCK(vp, rwflag, NULL);
263
264 fileoff = fp->f_offset;
265 if (vp->v_type == VREG) {
266
267 /*
268 * We raise psignal if write for >0 bytes causes
269 * it to exceed the ulimit.
270 */
271 if (fileoff >= curproc->p_fsz_ctl) {
272 VOP_RWUNLOCK(vp, rwflag, NULL);
273
274 mutex_enter(&curproc->p_lock);
275 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
276 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
277 mutex_exit(&curproc->p_lock);
278
279 error = EFBIG;
280 goto out;
281 }
282 /*
283 * We return EFBIG if write is done at an offset
284 * greater than the offset maximum for this file structure.
285 */
286
287 if (fileoff >= OFFSET_MAX(fp)) {
288 VOP_RWUNLOCK(vp, rwflag, NULL);
289 error = EFBIG;
290 goto out;
291 }
292 /*
293 * Limit the bytes to be written upto offset maximum for
294 * this open file structure.
295 */
296 if (fileoff + cnt > OFFSET_MAX(fp))
297 cnt = (ssize_t)(OFFSET_MAX(fp) - fileoff);
298 }
299 auio.uio_loffset = fileoff;
300 auio.uio_iov = &aiov;
301 auio.uio_iovcnt = 1;
302 auio.uio_resid = bcount = cnt;
303 auio.uio_segflg = UIO_USERSPACE;
304 auio.uio_llimit = curproc->p_fsz_ctl;
305 auio.uio_fmode = fflag;
306 auio.uio_extflg = UIO_COPY_DEFAULT;
307
308 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
309
310 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
311 cnt -= auio.uio_resid;
312 CPU_STATS_ENTER_K();
313 cp = CPU;
314 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
315 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)cnt);
316 CPU_STATS_EXIT_K();
317 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)cnt;
318
319 if (vp->v_type == VFIFO) /* Backward compatibility */
320 fp->f_offset = cnt;
321 else if (((fp->f_flag & FAPPEND) == 0) ||
322 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
323 fp->f_offset = auio.uio_loffset;
324 VOP_RWUNLOCK(vp, rwflag, NULL);
325
326 if (error == EINTR && cnt != 0)
327 error = 0;
328 out:
329 if (in_crit)
330 nbl_end_crit(vp);
331 releasef(fdes);
332 if (error)
333 return (set_errno(error));
334 return (cnt);
335 }
336
337 ssize_t
338 pread(int fdes, void *cbuf, size_t count, off_t offset)
339 {
340 struct uio auio;
341 struct iovec aiov;
342 file_t *fp;
343 register vnode_t *vp;
344 struct cpu *cp;
345 int fflag, ioflag, rwflag;
346 ssize_t bcount;
347 int error = 0;
348 u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
349 #ifdef _SYSCALL32_IMPL
350 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ?
351 MAXOFF32_T : MAXOFFSET_T;
352 #else
353 const u_offset_t maxoff = MAXOFF32_T;
354 #endif
355 int in_crit = 0;
356
357 if ((bcount = (ssize_t)count) < 0)
358 return (set_errno(EINVAL));
359
360 if ((fp = getf(fdes)) == NULL)
361 return (set_errno(EBADF));
362 if (((fflag = fp->f_flag) & (FREAD)) == 0) {
363 error = EBADF;
364 goto out;
365 }
366
367 rwflag = 0;
368 vp = fp->f_vnode;
369
370 if (vp->v_type == VREG) {
371
372 if (bcount == 0)
373 goto out;
374
375 /*
376 * Return EINVAL if an invalid offset comes to pread.
377 * Negative offset from user will cause this error.
378 */
379
380 if (fileoff > maxoff) {
381 error = EINVAL;
382 goto out;
383 }
384 /*
385 * Limit offset such that we don't read or write
386 * a file beyond the maximum offset representable in
387 * an off_t structure.
388 */
389 if (fileoff + bcount > maxoff)
390 bcount = (ssize_t)((offset_t)maxoff - fileoff);
391 } else if (vp->v_type == VFIFO) {
392 error = ESPIPE;
393 goto out;
394 }
395
396 /*
397 * We have to enter the critical region before calling VOP_RWLOCK
398 * to avoid a deadlock with ufs.
399 */
400 if (nbl_need_check(vp)) {
401 int svmand;
402
403 nbl_start_crit(vp, RW_READER);
404 in_crit = 1;
405 error = nbl_svmand(vp, fp->f_cred, &svmand);
406 if (error != 0)
407 goto out;
408 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand,
409 NULL)) {
410 error = EACCES;
411 goto out;
412 }
413 }
414
415 aiov.iov_base = cbuf;
416 aiov.iov_len = bcount;
417 (void) VOP_RWLOCK(vp, rwflag, NULL);
418 if (vp->v_type == VREG && fileoff == (u_offset_t)maxoff) {
419 struct vattr va;
420 va.va_mask = AT_SIZE;
421 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) {
422 VOP_RWUNLOCK(vp, rwflag, NULL);
423 goto out;
424 }
425 VOP_RWUNLOCK(vp, rwflag, NULL);
426
427 /*
428 * We have to return EOF if fileoff is >= file size.
429 */
430 if (fileoff >= va.va_size) {
431 bcount = 0;
432 goto out;
433 }
434
435 /*
436 * File is greater than or equal to maxoff and therefore
437 * we return EOVERFLOW.
438 */
439 error = EOVERFLOW;
440 goto out;
441 }
442 auio.uio_loffset = fileoff;
443 auio.uio_iov = &aiov;
444 auio.uio_iovcnt = 1;
445 auio.uio_resid = bcount;
446 auio.uio_segflg = UIO_USERSPACE;
447 auio.uio_llimit = MAXOFFSET_T;
448 auio.uio_fmode = fflag;
449 auio.uio_extflg = UIO_COPY_CACHED;
450
451 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
452
453 /* If read sync is not asked for, filter sync flags */
454 if ((ioflag & FRSYNC) == 0)
455 ioflag &= ~(FSYNC|FDSYNC);
456 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
457 bcount -= auio.uio_resid;
458 CPU_STATS_ENTER_K();
459 cp = CPU;
460 CPU_STATS_ADDQ(cp, sys, sysread, 1);
461 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
462 CPU_STATS_EXIT_K();
463 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
464 VOP_RWUNLOCK(vp, rwflag, NULL);
465
466 if (error == EINTR && bcount != 0)
467 error = 0;
468 out:
469 if (in_crit)
470 nbl_end_crit(vp);
471 releasef(fdes);
472 if (error)
473 return (set_errno(error));
474 return (bcount);
475 }
476
477 ssize_t
478 pwrite(int fdes, void *cbuf, size_t count, off_t offset)
479 {
480 struct uio auio;
481 struct iovec aiov;
482 file_t *fp;
483 register vnode_t *vp;
484 struct cpu *cp;
485 int fflag, ioflag, rwflag;
486 ssize_t bcount;
487 int error = 0;
488 u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
489 #ifdef _SYSCALL32_IMPL
490 u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 ?
491 MAXOFF32_T : MAXOFFSET_T;
492 #else
493 const u_offset_t maxoff = MAXOFF32_T;
494 #endif
495 int in_crit = 0;
496
497 if ((bcount = (ssize_t)count) < 0)
498 return (set_errno(EINVAL));
499 if ((fp = getf(fdes)) == NULL)
500 return (set_errno(EBADF));
501 if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
502 error = EBADF;
503 goto out;
504 }
505
506 rwflag = 1;
507 vp = fp->f_vnode;
508
509 if (vp->v_type == VREG) {
510
511 if (bcount == 0)
512 goto out;
513
514 /*
515 * return EINVAL for offsets that cannot be
516 * represented in an off_t.
517 */
518 if (fileoff > maxoff) {
519 error = EINVAL;
520 goto out;
521 }
522 /*
523 * Take appropriate action if we are trying to write above the
524 * resource limit.
525 */
526 if (fileoff >= curproc->p_fsz_ctl) {
527 mutex_enter(&curproc->p_lock);
528 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
529 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
530 mutex_exit(&curproc->p_lock);
531
532 error = EFBIG;
533 goto out;
534 }
535 /*
536 * Don't allow pwrite to cause file sizes to exceed
537 * maxoff.
538 */
539 if (fileoff == maxoff) {
540 error = EFBIG;
541 goto out;
542 }
543 if (fileoff + count > maxoff)
544 bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
545 } else if (vp->v_type == VFIFO) {
546 error = ESPIPE;
547 goto out;
548 }
549
550 /*
551 * We have to enter the critical region before calling VOP_RWLOCK
552 * to avoid a deadlock with ufs.
553 */
554 if (nbl_need_check(vp)) {
555 int svmand;
556
557 nbl_start_crit(vp, RW_READER);
558 in_crit = 1;
559 error = nbl_svmand(vp, fp->f_cred, &svmand);
560 if (error != 0)
561 goto out;
562 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand,
563 NULL)) {
564 error = EACCES;
565 goto out;
566 }
567 }
568
569 aiov.iov_base = cbuf;
570 aiov.iov_len = bcount;
571 (void) VOP_RWLOCK(vp, rwflag, NULL);
572 auio.uio_loffset = fileoff;
573 auio.uio_iov = &aiov;
574 auio.uio_iovcnt = 1;
575 auio.uio_resid = bcount;
576 auio.uio_segflg = UIO_USERSPACE;
577 auio.uio_llimit = curproc->p_fsz_ctl;
578 auio.uio_fmode = fflag;
579 auio.uio_extflg = UIO_COPY_CACHED;
580
581 /*
582 * The SUSv4 POSIX specification states:
583 * The pwrite() function shall be equivalent to write(), except
584 * that it writes into a given position and does not change
585 * the file offset (regardless of whether O_APPEND is set).
586 * To make this be true, we omit the FAPPEND flag from ioflag.
587 */
588 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
589
590 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
591 bcount -= auio.uio_resid;
592 CPU_STATS_ENTER_K();
593 cp = CPU;
594 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
595 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
596 CPU_STATS_EXIT_K();
597 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
598 VOP_RWUNLOCK(vp, rwflag, NULL);
599
600 if (error == EINTR && bcount != 0)
601 error = 0;
602 out:
603 if (in_crit)
604 nbl_end_crit(vp);
605 releasef(fdes);
606 if (error)
607 return (set_errno(error));
608 return (bcount);
609 }
610
611 ssize_t
612 readv(int fdes, struct iovec *iovp, int iovcnt)
613 {
614 struct uio auio;
615 struct iovec buf[IOV_MAX_STACK], *aiov = buf;
616 int aiovlen = 0;
617 file_t *fp;
618 register vnode_t *vp;
619 struct cpu *cp;
620 int fflag, ioflag, rwflag;
621 ssize_t count, bcount;
622 int error = 0;
623 int i;
624 u_offset_t fileoff;
625 int in_crit = 0;
626
627 if (iovcnt <= 0 || iovcnt > IOV_MAX)
628 return (set_errno(EINVAL));
629
630 if (iovcnt > IOV_MAX_STACK) {
631 aiovlen = iovcnt * sizeof (iovec_t);
632 aiov = kmem_alloc(aiovlen, KM_SLEEP);
633 }
634
635 #ifdef _SYSCALL32_IMPL
636 /*
637 * 32-bit callers need to have their iovec expanded,
638 * while ensuring that they can't move more than 2Gbytes
639 * of data in a single call.
640 */
641 if (get_udatamodel() == DATAMODEL_ILP32) {
642 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
643 int aiov32len;
644 ssize32_t count32;
645
646 aiov32len = iovcnt * sizeof (iovec32_t);
647 if (aiovlen != 0)
648 aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
649
650 if (copyin(iovp, aiov32, aiov32len)) {
651 if (aiovlen != 0) {
652 kmem_free(aiov32, aiov32len);
653 kmem_free(aiov, aiovlen);
654 }
655 return (set_errno(EFAULT));
656 }
657
658 count32 = 0;
659 for (i = 0; i < iovcnt; i++) {
660 ssize32_t iovlen32 = aiov32[i].iov_len;
661 count32 += iovlen32;
662 if (iovlen32 < 0 || count32 < 0) {
663 if (aiovlen != 0) {
664 kmem_free(aiov32, aiov32len);
665 kmem_free(aiov, aiovlen);
666 }
667 return (set_errno(EINVAL));
668 }
669 aiov[i].iov_len = iovlen32;
670 aiov[i].iov_base =
671 (caddr_t)(uintptr_t)aiov32[i].iov_base;
672 }
673
674 if (aiovlen != 0)
675 kmem_free(aiov32, aiov32len);
676 } else
677 #endif
678 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
679 if (aiovlen != 0)
680 kmem_free(aiov, aiovlen);
681 return (set_errno(EFAULT));
682 }
683
684 count = 0;
685 for (i = 0; i < iovcnt; i++) {
686 ssize_t iovlen = aiov[i].iov_len;
687 count += iovlen;
688 if (iovlen < 0 || count < 0) {
689 if (aiovlen != 0)
690 kmem_free(aiov, aiovlen);
691 return (set_errno(EINVAL));
692 }
693 }
694 if ((fp = getf(fdes)) == NULL) {
695 if (aiovlen != 0)
696 kmem_free(aiov, aiovlen);
697 return (set_errno(EBADF));
698 }
699 if (((fflag = fp->f_flag) & FREAD) == 0) {
700 error = EBADF;
701 goto out;
702 }
703 vp = fp->f_vnode;
704 if (vp->v_type == VREG && count == 0) {
705 goto out;
706 }
707
708 rwflag = 0;
709
710 /*
711 * We have to enter the critical region before calling VOP_RWLOCK
712 * to avoid a deadlock with ufs.
713 */
714 if (nbl_need_check(vp)) {
715 int svmand;
716
717 nbl_start_crit(vp, RW_READER);
718 in_crit = 1;
719 error = nbl_svmand(vp, fp->f_cred, &svmand);
720 if (error != 0)
721 goto out;
722 if (nbl_conflict(vp, NBL_READ, fp->f_offset, count, svmand,
723 NULL)) {
724 error = EACCES;
725 goto out;
726 }
727 }
728
729 (void) VOP_RWLOCK(vp, rwflag, NULL);
730 fileoff = fp->f_offset;
731
732 /*
733 * Behaviour is same as read. Please see comments in read.
734 */
735
736 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
737 struct vattr va;
738 va.va_mask = AT_SIZE;
739 if ((error = VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) {
740 VOP_RWUNLOCK(vp, rwflag, NULL);
741 goto out;
742 }
743 if (fileoff >= va.va_size) {
744 VOP_RWUNLOCK(vp, rwflag, NULL);
745 count = 0;
746 goto out;
747 } else {
748 VOP_RWUNLOCK(vp, rwflag, NULL);
749 error = EOVERFLOW;
750 goto out;
751 }
752 }
753 if ((vp->v_type == VREG) && (fileoff + count > OFFSET_MAX(fp))) {
754 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
755 }
756 auio.uio_loffset = fileoff;
757 auio.uio_iov = aiov;
758 auio.uio_iovcnt = iovcnt;
759 auio.uio_resid = bcount = count;
760 auio.uio_segflg = UIO_USERSPACE;
761 auio.uio_llimit = MAXOFFSET_T;
762 auio.uio_fmode = fflag;
763 if (bcount <= copyout_max_cached)
764 auio.uio_extflg = UIO_COPY_CACHED;
765 else
766 auio.uio_extflg = UIO_COPY_DEFAULT;
767
768
769 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
770
771 /* If read sync is not asked for, filter sync flags */
772 if ((ioflag & FRSYNC) == 0)
773 ioflag &= ~(FSYNC|FDSYNC);
774 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
775 count -= auio.uio_resid;
776 CPU_STATS_ENTER_K();
777 cp = CPU;
778 CPU_STATS_ADDQ(cp, sys, sysread, 1);
779 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
780 CPU_STATS_EXIT_K();
781 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
782
783 if (vp->v_type == VFIFO) /* Backward compatibility */
784 fp->f_offset = count;
785 else if (((fp->f_flag & FAPPEND) == 0) ||
786 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
787 fp->f_offset = auio.uio_loffset;
788
789 VOP_RWUNLOCK(vp, rwflag, NULL);
790
791 if (error == EINTR && count != 0)
792 error = 0;
793 out:
794 if (in_crit)
795 nbl_end_crit(vp);
796 releasef(fdes);
797 if (aiovlen != 0)
798 kmem_free(aiov, aiovlen);
799 if (error)
800 return (set_errno(error));
801 return (count);
802 }
803
804 ssize_t
805 writev(int fdes, struct iovec *iovp, int iovcnt)
806 {
807 struct uio auio;
808 struct iovec buf[IOV_MAX_STACK], *aiov = buf;
809 int aiovlen = 0;
810 file_t *fp;
811 register vnode_t *vp;
812 struct cpu *cp;
813 int fflag, ioflag, rwflag;
814 ssize_t count, bcount;
815 int error = 0;
816 int i;
817 u_offset_t fileoff;
818 int in_crit = 0;
819
820 if (iovcnt <= 0 || iovcnt > IOV_MAX)
821 return (set_errno(EINVAL));
822
823 if (iovcnt > IOV_MAX_STACK) {
824 aiovlen = iovcnt * sizeof (iovec_t);
825 aiov = kmem_alloc(aiovlen, KM_SLEEP);
826 }
827
828 #ifdef _SYSCALL32_IMPL
829 /*
830 * 32-bit callers need to have their iovec expanded,
831 * while ensuring that they can't move more than 2Gbytes
832 * of data in a single call.
833 */
834 if (get_udatamodel() == DATAMODEL_ILP32) {
835 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
836 int aiov32len;
837 ssize32_t count32;
838
839 aiov32len = iovcnt * sizeof (iovec32_t);
840 if (aiovlen != 0)
841 aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
842
843 if (copyin(iovp, aiov32, aiov32len)) {
844 if (aiovlen != 0) {
845 kmem_free(aiov32, aiov32len);
846 kmem_free(aiov, aiovlen);
847 }
848 return (set_errno(EFAULT));
849 }
850
851 count32 = 0;
852 for (i = 0; i < iovcnt; i++) {
853 ssize32_t iovlen = aiov32[i].iov_len;
854 count32 += iovlen;
855 if (iovlen < 0 || count32 < 0) {
856 if (aiovlen != 0) {
857 kmem_free(aiov32, aiov32len);
858 kmem_free(aiov, aiovlen);
859 }
860 return (set_errno(EINVAL));
861 }
862 aiov[i].iov_len = iovlen;
863 aiov[i].iov_base =
864 (caddr_t)(uintptr_t)aiov32[i].iov_base;
865 }
866 if (aiovlen != 0)
867 kmem_free(aiov32, aiov32len);
868 } else
869 #endif
870 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
871 if (aiovlen != 0)
872 kmem_free(aiov, aiovlen);
873 return (set_errno(EFAULT));
874 }
875
876 count = 0;
877 for (i = 0; i < iovcnt; i++) {
878 ssize_t iovlen = aiov[i].iov_len;
879 count += iovlen;
880 if (iovlen < 0 || count < 0) {
881 if (aiovlen != 0)
882 kmem_free(aiov, aiovlen);
883 return (set_errno(EINVAL));
884 }
885 }
886 if ((fp = getf(fdes)) == NULL) {
887 if (aiovlen != 0)
888 kmem_free(aiov, aiovlen);
889 return (set_errno(EBADF));
890 }
891 if (((fflag = fp->f_flag) & FWRITE) == 0) {
892 error = EBADF;
893 goto out;
894 }
895 vp = fp->f_vnode;
896 if (vp->v_type == VREG && count == 0) {
897 goto out;
898 }
899
900 rwflag = 1;
901
902 /*
903 * We have to enter the critical region before calling VOP_RWLOCK
904 * to avoid a deadlock with ufs.
905 */
906 if (nbl_need_check(vp)) {
907 int svmand;
908
909 nbl_start_crit(vp, RW_READER);
910 in_crit = 1;
911 error = nbl_svmand(vp, fp->f_cred, &svmand);
912 if (error != 0)
913 goto out;
914 if (nbl_conflict(vp, NBL_WRITE, fp->f_offset, count, svmand,
915 NULL)) {
916 error = EACCES;
917 goto out;
918 }
919 }
920
921 (void) VOP_RWLOCK(vp, rwflag, NULL);
922
923 fileoff = fp->f_offset;
924
925 /*
926 * Behaviour is same as write. Please see comments for write.
927 */
928
929 if (vp->v_type == VREG) {
930 if (fileoff >= curproc->p_fsz_ctl) {
931 VOP_RWUNLOCK(vp, rwflag, NULL);
932 mutex_enter(&curproc->p_lock);
933 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
934 curproc->p_rctls, curproc, RCA_UNSAFE_SIGINFO);
935 mutex_exit(&curproc->p_lock);
936 error = EFBIG;
937 goto out;
938 }
939 if (fileoff >= OFFSET_MAX(fp)) {
940 VOP_RWUNLOCK(vp, rwflag, NULL);
941 error = EFBIG;
942 goto out;
943 }
944 if (fileoff + count > OFFSET_MAX(fp))
945 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
946 }
947 auio.uio_loffset = fileoff;
948 auio.uio_iov = aiov;
949 auio.uio_iovcnt = iovcnt;
950 auio.uio_resid = bcount = count;
951 auio.uio_segflg = UIO_USERSPACE;
952 auio.uio_llimit = curproc->p_fsz_ctl;
953 auio.uio_fmode = fflag;
954 auio.uio_extflg = UIO_COPY_DEFAULT;
955
956 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
957
958 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
959 count -= auio.uio_resid;
960 CPU_STATS_ENTER_K();
961 cp = CPU;
962 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
963 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
964 CPU_STATS_EXIT_K();
965 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
966
967 if (vp->v_type == VFIFO) /* Backward compatibility */
968 fp->f_offset = count;
969 else if (((fp->f_flag & FAPPEND) == 0) ||
970 (vp->v_type != VREG) || (bcount != 0)) /* POSIX */
971 fp->f_offset = auio.uio_loffset;
972 VOP_RWUNLOCK(vp, rwflag, NULL);
973
974 if (error == EINTR && count != 0)
975 error = 0;
976 out:
977 if (in_crit)
978 nbl_end_crit(vp);
979 releasef(fdes);
980 if (aiovlen != 0)
981 kmem_free(aiov, aiovlen);
982 if (error)
983 return (set_errno(error));
984 return (count);
985 }
986
987 ssize_t
988 preadv(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
989 off_t extended_offset)
990 {
991 struct uio auio;
992 struct iovec buf[IOV_MAX_STACK], *aiov = buf;
993 int aiovlen = 0;
994 file_t *fp;
995 register vnode_t *vp;
996 struct cpu *cp;
997 int fflag, ioflag, rwflag;
998 ssize_t count, bcount;
999 int error = 0;
1000 int i;
1001
1002 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1003 u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1004 (u_offset_t)offset;
1005 #else /* _SYSCALL32_IMPL || _ILP32 */
1006 u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1007 #endif /* _SYSCALL32_IMPR || _ILP32 */
1008 #ifdef _SYSCALL32_IMPL
1009 const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
1010 extended_offset == 0?
1011 MAXOFF32_T : MAXOFFSET_T;
1012 #else /* _SYSCALL32_IMPL */
1013 const u_offset_t maxoff = MAXOFF32_T;
1014 #endif /* _SYSCALL32_IMPL */
1015
1016 int in_crit = 0;
1017
1018 if (iovcnt <= 0 || iovcnt > IOV_MAX)
1019 return (set_errno(EINVAL));
1020
1021 if (iovcnt > IOV_MAX_STACK) {
1022 aiovlen = iovcnt * sizeof (iovec_t);
1023 aiov = kmem_alloc(aiovlen, KM_SLEEP);
1024 }
1025
1026 #ifdef _SYSCALL32_IMPL
1027 /*
1028 * 32-bit callers need to have their iovec expanded,
1029 * while ensuring that they can't move more than 2Gbytes
1030 * of data in a single call.
1031 */
1032 if (get_udatamodel() == DATAMODEL_ILP32) {
1033 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
1034 int aiov32len;
1035 ssize32_t count32;
1036
1037 aiov32len = iovcnt * sizeof (iovec32_t);
1038 if (aiovlen != 0)
1039 aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
1040
1041 if (copyin(iovp, aiov32, aiov32len)) {
1042 if (aiovlen != 0) {
1043 kmem_free(aiov32, aiov32len);
1044 kmem_free(aiov, aiovlen);
1045 }
1046 return (set_errno(EFAULT));
1047 }
1048
1049 count32 = 0;
1050 for (i = 0; i < iovcnt; i++) {
1051 ssize32_t iovlen32 = aiov32[i].iov_len;
1052 count32 += iovlen32;
1053 if (iovlen32 < 0 || count32 < 0) {
1054 if (aiovlen != 0) {
1055 kmem_free(aiov32, aiov32len);
1056 kmem_free(aiov, aiovlen);
1057 }
1058 return (set_errno(EINVAL));
1059 }
1060 aiov[i].iov_len = iovlen32;
1061 aiov[i].iov_base =
1062 (caddr_t)(uintptr_t)aiov32[i].iov_base;
1063 }
1064 if (aiovlen != 0)
1065 kmem_free(aiov32, aiov32len);
1066 } else
1067 #endif /* _SYSCALL32_IMPL */
1068 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
1069 if (aiovlen != 0)
1070 kmem_free(aiov, aiovlen);
1071 return (set_errno(EFAULT));
1072 }
1073
1074 count = 0;
1075 for (i = 0; i < iovcnt; i++) {
1076 ssize_t iovlen = aiov[i].iov_len;
1077 count += iovlen;
1078 if (iovlen < 0 || count < 0) {
1079 if (aiovlen != 0)
1080 kmem_free(aiov, aiovlen);
1081 return (set_errno(EINVAL));
1082 }
1083 }
1084
1085 if ((bcount = (ssize_t)count) < 0) {
1086 if (aiovlen != 0)
1087 kmem_free(aiov, aiovlen);
1088 return (set_errno(EINVAL));
1089 }
1090 if ((fp = getf(fdes)) == NULL) {
1091 if (aiovlen != 0)
1092 kmem_free(aiov, aiovlen);
1093 return (set_errno(EBADF));
1094 }
1095 if (((fflag = fp->f_flag) & FREAD) == 0) {
1096 error = EBADF;
1097 goto out;
1098 }
1099 vp = fp->f_vnode;
1100 rwflag = 0;
1101 if (vp->v_type == VREG) {
1102
1103 if (bcount == 0)
1104 goto out;
1105
1106 /*
1107 * return EINVAL for offsets that cannot be
1108 * represented in an off_t.
1109 */
1110 if (fileoff > maxoff) {
1111 error = EINVAL;
1112 goto out;
1113 }
1114
1115 if (fileoff + bcount > maxoff)
1116 bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
1117 } else if (vp->v_type == VFIFO) {
1118 error = ESPIPE;
1119 goto out;
1120 }
1121 /*
1122 * We have to enter the critical region before calling VOP_RWLOCK
1123 * to avoid a deadlock with ufs.
1124 */
1125 if (nbl_need_check(vp)) {
1126 int svmand;
1127
1128 nbl_start_crit(vp, RW_READER);
1129 in_crit = 1;
1130 error = nbl_svmand(vp, fp->f_cred, &svmand);
1131 if (error != 0)
1132 goto out;
1133 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1134 NULL)) {
1135 error = EACCES;
1136 goto out;
1137 }
1138 }
1139
1140 (void) VOP_RWLOCK(vp, rwflag, NULL);
1141
1142 /*
1143 * Behaviour is same as read(2). Please see comments in
1144 * read(2).
1145 */
1146
1147 if ((vp->v_type == VREG) && (fileoff >= OFFSET_MAX(fp))) {
1148 struct vattr va;
1149 va.va_mask = AT_SIZE;
1150 if ((error =
1151 VOP_GETATTR(vp, &va, 0, fp->f_cred, NULL))) {
1152 VOP_RWUNLOCK(vp, rwflag, NULL);
1153 goto out;
1154 }
1155 if (fileoff >= va.va_size) {
1156 VOP_RWUNLOCK(vp, rwflag, NULL);
1157 count = 0;
1158 goto out;
1159 } else {
1160 VOP_RWUNLOCK(vp, rwflag, NULL);
1161 error = EOVERFLOW;
1162 goto out;
1163 }
1164 }
1165 if ((vp->v_type == VREG) &&
1166 (fileoff + count > OFFSET_MAX(fp))) {
1167 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1168 }
1169 auio.uio_loffset = fileoff;
1170 auio.uio_iov = aiov;
1171 auio.uio_iovcnt = iovcnt;
1172 auio.uio_resid = bcount = count;
1173 auio.uio_segflg = UIO_USERSPACE;
1174 auio.uio_llimit = MAXOFFSET_T;
1175 auio.uio_fmode = fflag;
1176 if (bcount <= copyout_max_cached)
1177 auio.uio_extflg = UIO_COPY_CACHED;
1178 else
1179 auio.uio_extflg = UIO_COPY_DEFAULT;
1180
1181 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1182 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1183 count -= auio.uio_resid;
1184 CPU_STATS_ENTER_K();
1185 cp = CPU;
1186 CPU_STATS_ADDQ(cp, sys, sysread, 1);
1187 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)count);
1188 CPU_STATS_EXIT_K();
1189 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1190
1191 VOP_RWUNLOCK(vp, rwflag, NULL);
1192
1193 if (error == EINTR && count != 0)
1194 error = 0;
1195 out:
1196 if (in_crit)
1197 nbl_end_crit(vp);
1198 releasef(fdes);
1199 if (aiovlen != 0)
1200 kmem_free(aiov, aiovlen);
1201 if (error)
1202 return (set_errno(error));
1203 return (count);
1204 }
1205
1206 ssize_t
1207 pwritev(int fdes, struct iovec *iovp, int iovcnt, off_t offset,
1208 off_t extended_offset)
1209 {
1210 struct uio auio;
1211 struct iovec buf[IOV_MAX_STACK], *aiov = buf;
1212 int aiovlen = 0;
1213 file_t *fp;
1214 register vnode_t *vp;
1215 struct cpu *cp;
1216 int fflag, ioflag, rwflag;
1217 ssize_t count, bcount;
1218 int error = 0;
1219 int i;
1220
1221 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1222 u_offset_t fileoff = ((u_offset_t)extended_offset << 32) |
1223 (u_offset_t)offset;
1224 #else /* _SYSCALL32_IMPL || _ILP32 */
1225 u_offset_t fileoff = (u_offset_t)(ulong_t)offset;
1226 #endif /* _SYSCALL32_IMPR || _ILP32 */
1227 #ifdef _SYSCALL32_IMPL
1228 const u_offset_t maxoff = get_udatamodel() == DATAMODEL_ILP32 &&
1229 extended_offset == 0?
1230 MAXOFF32_T : MAXOFFSET_T;
1231 #else /* _SYSCALL32_IMPL */
1232 const u_offset_t maxoff = MAXOFF32_T;
1233 #endif /* _SYSCALL32_IMPL */
1234
1235 int in_crit = 0;
1236
1237 if (iovcnt <= 0 || iovcnt > IOV_MAX)
1238 return (set_errno(EINVAL));
1239
1240 if (iovcnt > IOV_MAX_STACK) {
1241 aiovlen = iovcnt * sizeof (iovec_t);
1242 aiov = kmem_alloc(aiovlen, KM_SLEEP);
1243 }
1244
1245 #ifdef _SYSCALL32_IMPL
1246 /*
1247 * 32-bit callers need to have their iovec expanded,
1248 * while ensuring that they can't move more than 2Gbytes
1249 * of data in a single call.
1250 */
1251 if (get_udatamodel() == DATAMODEL_ILP32) {
1252 struct iovec32 buf32[IOV_MAX_STACK], *aiov32 = buf32;
1253 int aiov32len;
1254 ssize32_t count32;
1255
1256 aiov32len = iovcnt * sizeof (iovec32_t);
1257 if (aiovlen != 0)
1258 aiov32 = kmem_alloc(aiov32len, KM_SLEEP);
1259
1260 if (copyin(iovp, aiov32, aiov32len)) {
1261 if (aiovlen != 0) {
1262 kmem_free(aiov32, aiov32len);
1263 kmem_free(aiov, aiovlen);
1264 }
1265 return (set_errno(EFAULT));
1266 }
1267
1268 count32 = 0;
1269 for (i = 0; i < iovcnt; i++) {
1270 ssize32_t iovlen32 = aiov32[i].iov_len;
1271 count32 += iovlen32;
1272 if (iovlen32 < 0 || count32 < 0) {
1273 if (aiovlen != 0) {
1274 kmem_free(aiov32, aiov32len);
1275 kmem_free(aiov, aiovlen);
1276 }
1277 return (set_errno(EINVAL));
1278 }
1279 aiov[i].iov_len = iovlen32;
1280 aiov[i].iov_base =
1281 (caddr_t)(uintptr_t)aiov32[i].iov_base;
1282 }
1283 if (aiovlen != 0)
1284 kmem_free(aiov32, aiov32len);
1285 } else
1286 #endif /* _SYSCALL32_IMPL */
1287 if (copyin(iovp, aiov, iovcnt * sizeof (iovec_t))) {
1288 if (aiovlen != 0)
1289 kmem_free(aiov, aiovlen);
1290 return (set_errno(EFAULT));
1291 }
1292
1293 count = 0;
1294 for (i = 0; i < iovcnt; i++) {
1295 ssize_t iovlen = aiov[i].iov_len;
1296 count += iovlen;
1297 if (iovlen < 0 || count < 0) {
1298 if (aiovlen != 0)
1299 kmem_free(aiov, aiovlen);
1300 return (set_errno(EINVAL));
1301 }
1302 }
1303
1304 if ((bcount = (ssize_t)count) < 0) {
1305 if (aiovlen != 0)
1306 kmem_free(aiov, aiovlen);
1307 return (set_errno(EINVAL));
1308 }
1309 if ((fp = getf(fdes)) == NULL) {
1310 if (aiovlen != 0)
1311 kmem_free(aiov, aiovlen);
1312 return (set_errno(EBADF));
1313 }
1314 if (((fflag = fp->f_flag) & FWRITE) == 0) {
1315 error = EBADF;
1316 goto out;
1317 }
1318 vp = fp->f_vnode;
1319 rwflag = 1;
1320 if (vp->v_type == VREG) {
1321
1322 if (bcount == 0)
1323 goto out;
1324
1325 /*
1326 * return EINVAL for offsets that cannot be
1327 * represented in an off_t.
1328 */
1329 if (fileoff > maxoff) {
1330 error = EINVAL;
1331 goto out;
1332 }
1333 /*
1334 * Take appropriate action if we are trying
1335 * to write above the resource limit.
1336 */
1337 if (fileoff >= curproc->p_fsz_ctl) {
1338 mutex_enter(&curproc->p_lock);
1339 /*
1340 * Return value ignored because it lists
1341 * actions taken, but we are in an error case.
1342 * We don't have any actions that depend on
1343 * what could happen in this call, so we ignore
1344 * the return value.
1345 */
1346 (void) rctl_action(
1347 rctlproc_legacy[RLIMIT_FSIZE],
1348 curproc->p_rctls, curproc,
1349 RCA_UNSAFE_SIGINFO);
1350 mutex_exit(&curproc->p_lock);
1351
1352 error = EFBIG;
1353 goto out;
1354 }
1355 /*
1356 * Don't allow pwritev to cause file sizes to exceed
1357 * maxoff.
1358 */
1359 if (fileoff == maxoff) {
1360 error = EFBIG;
1361 goto out;
1362 }
1363
1364 if (fileoff + bcount > maxoff)
1365 bcount = (ssize_t)((u_offset_t)maxoff - fileoff);
1366 } else if (vp->v_type == VFIFO) {
1367 error = ESPIPE;
1368 goto out;
1369 }
1370 /*
1371 * We have to enter the critical region before calling VOP_RWLOCK
1372 * to avoid a deadlock with ufs.
1373 */
1374 if (nbl_need_check(vp)) {
1375 int svmand;
1376
1377 nbl_start_crit(vp, RW_READER);
1378 in_crit = 1;
1379 error = nbl_svmand(vp, fp->f_cred, &svmand);
1380 if (error != 0)
1381 goto out;
1382 if (nbl_conflict(vp, NBL_WRITE, fileoff, count, svmand,
1383 NULL)) {
1384 error = EACCES;
1385 goto out;
1386 }
1387 }
1388
1389 (void) VOP_RWLOCK(vp, rwflag, NULL);
1390
1391
1392 /*
1393 * Behaviour is same as write(2). Please see comments for
1394 * write(2).
1395 */
1396
1397 if (vp->v_type == VREG) {
1398 if (fileoff >= curproc->p_fsz_ctl) {
1399 VOP_RWUNLOCK(vp, rwflag, NULL);
1400 mutex_enter(&curproc->p_lock);
1401 /* see above rctl_action comment */
1402 (void) rctl_action(
1403 rctlproc_legacy[RLIMIT_FSIZE],
1404 curproc->p_rctls,
1405 curproc, RCA_UNSAFE_SIGINFO);
1406 mutex_exit(&curproc->p_lock);
1407 error = EFBIG;
1408 goto out;
1409 }
1410 if (fileoff >= OFFSET_MAX(fp)) {
1411 VOP_RWUNLOCK(vp, rwflag, NULL);
1412 error = EFBIG;
1413 goto out;
1414 }
1415 if (fileoff + count > OFFSET_MAX(fp))
1416 count = (ssize_t)(OFFSET_MAX(fp) - fileoff);
1417 }
1418
1419 auio.uio_loffset = fileoff;
1420 auio.uio_iov = aiov;
1421 auio.uio_iovcnt = iovcnt;
1422 auio.uio_resid = bcount = count;
1423 auio.uio_segflg = UIO_USERSPACE;
1424 auio.uio_llimit = curproc->p_fsz_ctl;
1425 auio.uio_fmode = fflag;
1426 auio.uio_extflg = UIO_COPY_CACHED;
1427 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1428 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
1429 count -= auio.uio_resid;
1430 CPU_STATS_ENTER_K();
1431 cp = CPU;
1432 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1433 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)count);
1434 CPU_STATS_EXIT_K();
1435 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)count;
1436
1437 VOP_RWUNLOCK(vp, rwflag, NULL);
1438
1439 if (error == EINTR && count != 0)
1440 error = 0;
1441 out:
1442 if (in_crit)
1443 nbl_end_crit(vp);
1444 releasef(fdes);
1445 if (aiovlen != 0)
1446 kmem_free(aiov, aiovlen);
1447 if (error)
1448 return (set_errno(error));
1449 return (count);
1450 }
1451
1452 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
1453
1454 /*
1455 * This syscall supplies 64-bit file offsets to 32-bit applications only.
1456 */
1457 ssize32_t
1458 pread64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
1459 uint32_t offset_2)
1460 {
1461 struct uio auio;
1462 struct iovec aiov;
1463 file_t *fp;
1464 register vnode_t *vp;
1465 struct cpu *cp;
1466 int fflag, ioflag, rwflag;
1467 ssize_t bcount;
1468 int error = 0;
1469 u_offset_t fileoff;
1470 int in_crit = 0;
1471
1472 #if defined(_LITTLE_ENDIAN)
1473 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1;
1474 #else
1475 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2;
1476 #endif
1477
1478 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
1479 return (set_errno(EINVAL));
1480
1481 if ((fp = getf(fdes)) == NULL)
1482 return (set_errno(EBADF));
1483 if (((fflag = fp->f_flag) & (FREAD)) == 0) {
1484 error = EBADF;
1485 goto out;
1486 }
1487
1488 rwflag = 0;
1489 vp = fp->f_vnode;
1490
1491 if (vp->v_type == VREG) {
1492
1493 if (bcount == 0)
1494 goto out;
1495
1496 /*
1497 * Same as pread. See comments in pread.
1498 */
1499
1500 if (fileoff > MAXOFFSET_T) {
1501 error = EINVAL;
1502 goto out;
1503 }
1504 if (fileoff + bcount > MAXOFFSET_T)
1505 bcount = (ssize_t)(MAXOFFSET_T - fileoff);
1506 } else if (vp->v_type == VFIFO) {
1507 error = ESPIPE;
1508 goto out;
1509 }
1510
1511 /*
1512 * We have to enter the critical region before calling VOP_RWLOCK
1513 * to avoid a deadlock with ufs.
1514 */
1515 if (nbl_need_check(vp)) {
1516 int svmand;
1517
1518 nbl_start_crit(vp, RW_READER);
1519 in_crit = 1;
1520 error = nbl_svmand(vp, fp->f_cred, &svmand);
1521 if (error != 0)
1522 goto out;
1523 if (nbl_conflict(vp, NBL_READ, fileoff, bcount, svmand,
1524 NULL)) {
1525 error = EACCES;
1526 goto out;
1527 }
1528 }
1529
1530 aiov.iov_base = cbuf;
1531 aiov.iov_len = bcount;
1532 (void) VOP_RWLOCK(vp, rwflag, NULL);
1533 auio.uio_loffset = fileoff;
1534
1535 /*
1536 * Note: File size can never be greater than MAXOFFSET_T.
1537 * If ever we start supporting 128 bit files the code
1538 * similar to the one in pread at this place should be here.
1539 * Here we avoid the unnecessary VOP_GETATTR() when we
1540 * know that fileoff == MAXOFFSET_T implies that it is always
1541 * greater than or equal to file size.
1542 */
1543 auio.uio_iov = &aiov;
1544 auio.uio_iovcnt = 1;
1545 auio.uio_resid = bcount;
1546 auio.uio_segflg = UIO_USERSPACE;
1547 auio.uio_llimit = MAXOFFSET_T;
1548 auio.uio_fmode = fflag;
1549 auio.uio_extflg = UIO_COPY_CACHED;
1550
1551 ioflag = auio.uio_fmode & (FAPPEND|FSYNC|FDSYNC|FRSYNC);
1552
1553 /* If read sync is not asked for, filter sync flags */
1554 if ((ioflag & FRSYNC) == 0)
1555 ioflag &= ~(FSYNC|FDSYNC);
1556 error = VOP_READ(vp, &auio, ioflag, fp->f_cred, NULL);
1557 bcount -= auio.uio_resid;
1558 CPU_STATS_ENTER_K();
1559 cp = CPU;
1560 CPU_STATS_ADDQ(cp, sys, sysread, 1);
1561 CPU_STATS_ADDQ(cp, sys, readch, (ulong_t)bcount);
1562 CPU_STATS_EXIT_K();
1563 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
1564 VOP_RWUNLOCK(vp, rwflag, NULL);
1565
1566 if (error == EINTR && bcount != 0)
1567 error = 0;
1568 out:
1569 if (in_crit)
1570 nbl_end_crit(vp);
1571 releasef(fdes);
1572 if (error)
1573 return (set_errno(error));
1574 return (bcount);
1575 }
1576
1577 /*
1578 * This syscall supplies 64-bit file offsets to 32-bit applications only.
1579 */
1580 ssize32_t
1581 pwrite64(int fdes, void *cbuf, size32_t count, uint32_t offset_1,
1582 uint32_t offset_2)
1583 {
1584 struct uio auio;
1585 struct iovec aiov;
1586 file_t *fp;
1587 register vnode_t *vp;
1588 struct cpu *cp;
1589 int fflag, ioflag, rwflag;
1590 ssize_t bcount;
1591 int error = 0;
1592 u_offset_t fileoff;
1593 int in_crit = 0;
1594
1595 #if defined(_LITTLE_ENDIAN)
1596 fileoff = ((u_offset_t)offset_2 << 32) | (u_offset_t)offset_1;
1597 #else
1598 fileoff = ((u_offset_t)offset_1 << 32) | (u_offset_t)offset_2;
1599 #endif
1600
1601 if ((bcount = (ssize_t)count) < 0 || bcount > INT32_MAX)
1602 return (set_errno(EINVAL));
1603 if ((fp = getf(fdes)) == NULL)
1604 return (set_errno(EBADF));
1605 if (((fflag = fp->f_flag) & (FWRITE)) == 0) {
1606 error = EBADF;
1607 goto out;
1608 }
1609
1610 rwflag = 1;
1611 vp = fp->f_vnode;
1612
1613 if (vp->v_type == VREG) {
1614
1615 if (bcount == 0)
1616 goto out;
1617
1618 /*
1619 * See comments in pwrite.
1620 */
1621 if (fileoff > MAXOFFSET_T) {
1622 error = EINVAL;
1623 goto out;
1624 }
1625 if (fileoff >= curproc->p_fsz_ctl) {
1626 mutex_enter(&curproc->p_lock);
1627 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
1628 curproc->p_rctls, curproc, RCA_SAFE);
1629 mutex_exit(&curproc->p_lock);
1630 error = EFBIG;
1631 goto out;
1632 }
1633 if (fileoff == MAXOFFSET_T) {
1634 error = EFBIG;
1635 goto out;
1636 }
1637 if (fileoff + bcount > MAXOFFSET_T)
1638 bcount = (ssize_t)((u_offset_t)MAXOFFSET_T - fileoff);
1639 } else if (vp->v_type == VFIFO) {
1640 error = ESPIPE;
1641 goto out;
1642 }
1643
1644 /*
1645 * We have to enter the critical region before calling VOP_RWLOCK
1646 * to avoid a deadlock with ufs.
1647 */
1648 if (nbl_need_check(vp)) {
1649 int svmand;
1650
1651 nbl_start_crit(vp, RW_READER);
1652 in_crit = 1;
1653 error = nbl_svmand(vp, fp->f_cred, &svmand);
1654 if (error != 0)
1655 goto out;
1656 if (nbl_conflict(vp, NBL_WRITE, fileoff, bcount, svmand,
1657 NULL)) {
1658 error = EACCES;
1659 goto out;
1660 }
1661 }
1662
1663 aiov.iov_base = cbuf;
1664 aiov.iov_len = bcount;
1665 (void) VOP_RWLOCK(vp, rwflag, NULL);
1666 auio.uio_loffset = fileoff;
1667 auio.uio_iov = &aiov;
1668 auio.uio_iovcnt = 1;
1669 auio.uio_resid = bcount;
1670 auio.uio_segflg = UIO_USERSPACE;
1671 auio.uio_llimit = curproc->p_fsz_ctl;
1672 auio.uio_fmode = fflag;
1673 auio.uio_extflg = UIO_COPY_CACHED;
1674
1675 /*
1676 * The SUSv4 POSIX specification states:
1677 * The pwrite() function shall be equivalent to write(), except
1678 * that it writes into a given position and does not change
1679 * the file offset (regardless of whether O_APPEND is set).
1680 * To make this be true, we omit the FAPPEND flag from ioflag.
1681 */
1682 ioflag = auio.uio_fmode & (FSYNC|FDSYNC|FRSYNC);
1683
1684 error = VOP_WRITE(vp, &auio, ioflag, fp->f_cred, NULL);
1685 bcount -= auio.uio_resid;
1686 CPU_STATS_ENTER_K();
1687 cp = CPU;
1688 CPU_STATS_ADDQ(cp, sys, syswrite, 1);
1689 CPU_STATS_ADDQ(cp, sys, writech, (ulong_t)bcount);
1690 CPU_STATS_EXIT_K();
1691 ttolwp(curthread)->lwp_ru.ioch += (ulong_t)bcount;
1692 VOP_RWUNLOCK(vp, rwflag, NULL);
1693
1694 if (error == EINTR && bcount != 0)
1695 error = 0;
1696 out:
1697 if (in_crit)
1698 nbl_end_crit(vp);
1699 releasef(fdes);
1700 if (error)
1701 return (set_errno(error));
1702 return (bcount);
1703 }
1704
1705 #endif /* _SYSCALL32_IMPL || _ILP32 */
1706
1707 #ifdef _SYSCALL32_IMPL
1708 /*
1709 * Tail-call elimination of xxx32() down to xxx()
1710 *
1711 * A number of xxx32 system calls take a len (or count) argument and
1712 * return a number in the range [0,len] or -1 on error.
1713 * Given an ssize32_t input len, the downcall xxx() will return
1714 * a 64-bit value that is -1 or in the range [0,len] which actually
1715 * is a proper return value for the xxx32 call. So even if the xxx32
1716 * calls can be considered as returning a ssize32_t, they are currently
1717 * declared as returning a ssize_t as this enables tail-call elimination.
1718 *
1719 * The cast of len (or count) to ssize32_t is needed to ensure we pass
1720 * down negative input values as such and let the downcall handle error
1721 * reporting. Functions covered by this comments are:
1722 *
1723 * rw.c: read32, write32, pread32, pwrite32, readv32, writev32.
1724 * socksyscall.c: recv32, recvfrom32, send32, sendto32.
1725 * readlink.c: readlink32.
1726 */
1727
1728 ssize_t
1729 read32(int32_t fdes, caddr32_t cbuf, size32_t count)
1730 {
1731 return (read(fdes,
1732 (void *)(uintptr_t)cbuf, (ssize32_t)count));
1733 }
1734
1735 ssize_t
1736 write32(int32_t fdes, caddr32_t cbuf, size32_t count)
1737 {
1738 return (write(fdes,
1739 (void *)(uintptr_t)cbuf, (ssize32_t)count));
1740 }
1741
1742 ssize_t
1743 pread32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset)
1744 {
1745 return (pread(fdes,
1746 (void *)(uintptr_t)cbuf, (ssize32_t)count,
1747 (off_t)(uint32_t)offset));
1748 }
1749
1750 ssize_t
1751 pwrite32(int32_t fdes, caddr32_t cbuf, size32_t count, off32_t offset)
1752 {
1753 return (pwrite(fdes,
1754 (void *)(uintptr_t)cbuf, (ssize32_t)count,
1755 (off_t)(uint32_t)offset));
1756 }
1757
1758 ssize_t
1759 readv32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
1760 {
1761 return (readv(fdes, (void *)(uintptr_t)iovp, iovcnt));
1762 }
1763
1764 ssize_t
1765 writev32(int32_t fdes, caddr32_t iovp, int32_t iovcnt)
1766 {
1767 return (writev(fdes, (void *)(uintptr_t)iovp, iovcnt));
1768 }
1769 #endif /* _SYSCALL32_IMPL */