1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
25 * Copyright 2015, Joyent, Inc.
26 */
27
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31 /*
32 * Portions of this source code were derived from Berkeley 4.3 BSD
33 * under license from the Regents of the University of California.
34 */
35
36
37 #include <sys/param.h>
38 #include <sys/isa_defs.h>
39 #include <sys/types.h>
40 #include <sys/sysmacros.h>
41 #include <sys/systm.h>
42 #include <sys/errno.h>
43 #include <sys/fcntl.h>
44 #include <sys/flock.h>
45 #include <sys/vnode.h>
46 #include <sys/file.h>
47 #include <sys/mode.h>
48 #include <sys/proc.h>
49 #include <sys/filio.h>
50 #include <sys/share.h>
51 #include <sys/debug.h>
52 #include <sys/rctl.h>
53 #include <sys/nbmlock.h>
54
55 #include <sys/cmn_err.h>
56
57 static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
58 static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *);
59 static void fd_too_big(proc_t *);
60
61 /*
62 * File control.
63 */
64 int
65 fcntl(int fdes, int cmd, intptr_t arg)
66 {
67 int iarg;
68 int error = 0;
69 int retval;
70 proc_t *p;
71 file_t *fp;
72 vnode_t *vp;
73 u_offset_t offset;
74 u_offset_t start;
75 struct vattr vattr;
76 int in_crit;
77 int flag;
78 struct flock sbf;
79 struct flock64 bf;
80 struct o_flock obf;
81 struct flock64_32 bf64_32;
82 struct fshare fsh;
83 struct shrlock shr;
84 struct shr_locowner shr_own;
85 offset_t maxoffset;
86 model_t datamodel;
87 int fdres;
88
89 #if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32)
90 ASSERT(sizeof (struct flock) == sizeof (struct flock32));
91 ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32));
92 #endif
93 #if defined(_LP64) && !defined(lint) && defined(_SYSCALL32)
94 ASSERT(sizeof (struct flock) == sizeof (struct flock64_64));
95 ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64));
96 #endif
97
98 /*
99 * First, for speed, deal with the subset of cases
100 * that do not require getf() / releasef().
101 */
102 switch (cmd) {
103 case F_GETFD:
104 if ((error = f_getfd_error(fdes, &flag)) == 0)
105 retval = flag;
106 goto out;
107
108 case F_SETFD:
109 error = f_setfd_error(fdes, (int)arg);
110 retval = 0;
111 goto out;
112
113 case F_GETFL:
114 if ((error = f_getfl(fdes, &flag)) == 0) {
115 retval = (flag & (FMASK | FASYNC));
116 if ((flag & (FSEARCH | FEXEC)) == 0)
117 retval += FOPEN;
118 else
119 retval |= (flag & (FSEARCH | FEXEC));
120 }
121 goto out;
122
123 case F_GETXFL:
124 if ((error = f_getfl(fdes, &flag)) == 0) {
125 retval = flag;
126 if ((flag & (FSEARCH | FEXEC)) == 0)
127 retval += FOPEN;
128 }
129 goto out;
130
131 case F_BADFD:
132 if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0)
133 retval = fdres;
134 goto out;
135 }
136
137 /*
138 * Second, for speed, deal with the subset of cases that
139 * require getf() / releasef() but do not require copyin.
140 */
141 if ((fp = getf(fdes)) == NULL) {
142 error = EBADF;
143 goto out;
144 }
145 iarg = (int)arg;
146
147 switch (cmd) {
148 case F_DUPFD:
149 case F_DUPFD_CLOEXEC:
150 p = curproc;
151 if ((uint_t)iarg >= p->p_fno_ctl) {
152 if (iarg >= 0)
153 fd_too_big(p);
154 error = EINVAL;
155 goto done;
156 }
157 /*
158 * We need to increment the f_count reference counter
159 * before allocating a new file descriptor.
160 * Doing it other way round opens a window for race condition
161 * with closeandsetf() on the target file descriptor which can
162 * close the file still referenced by the original
163 * file descriptor.
164 */
165 mutex_enter(&fp->f_tlock);
166 fp->f_count++;
167 mutex_exit(&fp->f_tlock);
168 if ((retval = ufalloc_file(iarg, fp)) == -1) {
169 /*
170 * New file descriptor can't be allocated.
171 * Revert the reference count.
172 */
173 mutex_enter(&fp->f_tlock);
174 fp->f_count--;
175 mutex_exit(&fp->f_tlock);
176 error = EMFILE;
177 } else {
178 if (cmd == F_DUPFD_CLOEXEC) {
179 f_setfd(retval, FD_CLOEXEC);
180 }
181 }
182
183 if (error == 0 && fp->f_vnode != NULL) {
184 (void) VOP_IOCTL(fp->f_vnode, F_ASSOCI_PID,
185 (intptr_t)p->p_pidp->pid_id, FKIOCTL, kcred,
186 NULL, NULL);
187 }
188
189 goto done;
190
191 case F_DUP2FD_CLOEXEC:
192 if (fdes == iarg) {
193 error = EINVAL;
194 goto done;
195 }
196
197 /*FALLTHROUGH*/
198
199 case F_DUP2FD:
200 p = curproc;
201 if (fdes == iarg) {
202 retval = iarg;
203 } else if ((uint_t)iarg >= p->p_fno_ctl) {
204 if (iarg >= 0)
205 fd_too_big(p);
206 error = EBADF;
207 } else {
208 /*
209 * We can't hold our getf(fdes) across the call to
210 * closeandsetf() because it creates a window for
211 * deadlock: if one thread is doing dup2(a, b) while
212 * another is doing dup2(b, a), each one will block
213 * waiting for the other to call releasef(). The
214 * solution is to increment the file reference count
215 * (which we have to do anyway), then releasef(fdes),
216 * then closeandsetf(). Incrementing f_count ensures
217 * that fp won't disappear after we call releasef().
218 * When closeandsetf() fails, we try avoid calling
219 * closef() because of all the side effects.
220 */
221 mutex_enter(&fp->f_tlock);
222 fp->f_count++;
223 mutex_exit(&fp->f_tlock);
224 releasef(fdes);
225
226 /*
227 * Assume we succeed to duplicate the file descriptor
228 * and associate the pid to the vnode.
229 */
230 if (fp->f_vnode != NULL) {
231 (void) VOP_IOCTL(fp->f_vnode, F_ASSOCI_PID,
232 (intptr_t)p->p_pidp->pid_id, FKIOCTL,
233 kcred, NULL, NULL);
234 }
235
236 if ((error = closeandsetf(iarg, fp)) == 0) {
237 if (cmd == F_DUP2FD_CLOEXEC) {
238 f_setfd(iarg, FD_CLOEXEC);
239 }
240 retval = iarg;
241 } else {
242 mutex_enter(&fp->f_tlock);
243 if (fp->f_count > 1) {
244 fp->f_count--;
245 mutex_exit(&fp->f_tlock);
246 /*
247 * Failed to duplicate fdes,
248 * disassociate the pid from the vnode.
249 */
250 if (fp->f_vnode != NULL) {
251 (void) VOP_IOCTL(fp->f_vnode,
252 F_DASSOC_PID,
253 (intptr_t)p->p_pidp->pid_id,
254 FKIOCTL, kcred, NULL, NULL);
255 }
256
257 } else {
258 mutex_exit(&fp->f_tlock);
259 (void) closef(fp);
260 }
261 }
262 goto out;
263 }
264 goto done;
265
266 case F_SETFL:
267 vp = fp->f_vnode;
268 flag = fp->f_flag;
269 if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY))
270 iarg &= ~FNDELAY;
271 if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred, NULL)) ==
272 0) {
273 iarg &= FMASK;
274 mutex_enter(&fp->f_tlock);
275 fp->f_flag &= ~FMASK | (FREAD|FWRITE);
276 fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE);
277 mutex_exit(&fp->f_tlock);
278 }
279 retval = 0;
280 goto done;
281 }
282
283 /*
284 * Finally, deal with the expensive cases.
285 */
286 retval = 0;
287 in_crit = 0;
288 maxoffset = MAXOFF_T;
289 datamodel = DATAMODEL_NATIVE;
290 #if defined(_SYSCALL32_IMPL)
291 if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32)
292 maxoffset = MAXOFF32_T;
293 #endif
294
295 vp = fp->f_vnode;
296 flag = fp->f_flag;
297 offset = fp->f_offset;
298
299 switch (cmd) {
300 /*
301 * The file system and vnode layers understand and implement
302 * locking with flock64 structures. So here once we pass through
303 * the test for compatibility as defined by LFS API, (for F_SETLK,
304 * F_SETLKW, F_GETLK, F_GETLKW, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW,
305 * F_FREESP) we transform the flock structure to a flock64 structure
306 * and send it to the lower layers. Similarly in case of GETLK and
307 * OFD_GETLK the returned flock64 structure is transformed to a flock
308 * structure if everything fits in nicely, otherwise we return
309 * EOVERFLOW.
310 */
311
312 case F_GETLK:
313 case F_O_GETLK:
314 case F_SETLK:
315 case F_SETLKW:
316 case F_SETLK_NBMAND:
317 case F_OFD_GETLK:
318 case F_OFD_SETLK:
319 case F_OFD_SETLKW:
320 case F_FLOCK:
321 case F_FLOCKW:
322
323 /*
324 * Copy in input fields only.
325 */
326
327 if (cmd == F_O_GETLK) {
328 if (datamodel != DATAMODEL_ILP32) {
329 error = EINVAL;
330 break;
331 }
332
333 if (copyin((void *)arg, &obf, sizeof (obf))) {
334 error = EFAULT;
335 break;
336 }
337 bf.l_type = obf.l_type;
338 bf.l_whence = obf.l_whence;
339 bf.l_start = (off64_t)obf.l_start;
340 bf.l_len = (off64_t)obf.l_len;
341 bf.l_sysid = (int)obf.l_sysid;
342 bf.l_pid = obf.l_pid;
343 } else if (datamodel == DATAMODEL_NATIVE) {
344 if (copyin((void *)arg, &sbf, sizeof (sbf))) {
345 error = EFAULT;
346 break;
347 }
348 /*
349 * XXX In an LP64 kernel with an LP64 application
350 * there's no need to do a structure copy here
351 * struct flock == struct flock64. However,
352 * we did it this way to avoid more conditional
353 * compilation.
354 */
355 bf.l_type = sbf.l_type;
356 bf.l_whence = sbf.l_whence;
357 bf.l_start = (off64_t)sbf.l_start;
358 bf.l_len = (off64_t)sbf.l_len;
359 bf.l_sysid = sbf.l_sysid;
360 bf.l_pid = sbf.l_pid;
361 }
362 #if defined(_SYSCALL32_IMPL)
363 else {
364 struct flock32 sbf32;
365 if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
366 error = EFAULT;
367 break;
368 }
369 bf.l_type = sbf32.l_type;
370 bf.l_whence = sbf32.l_whence;
371 bf.l_start = (off64_t)sbf32.l_start;
372 bf.l_len = (off64_t)sbf32.l_len;
373 bf.l_sysid = sbf32.l_sysid;
374 bf.l_pid = sbf32.l_pid;
375 }
376 #endif /* _SYSCALL32_IMPL */
377
378 /*
379 * 64-bit support: check for overflow for 32-bit lock ops
380 */
381 if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0)
382 break;
383
384 if (cmd == F_FLOCK || cmd == F_FLOCKW) {
385 /* FLOCK* locking is always over the entire file. */
386 if (bf.l_whence != 0 || bf.l_start != 0 ||
387 bf.l_len != 0) {
388 error = EINVAL;
389 break;
390 }
391 if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
392 error = EINVAL;
393 break;
394 }
395 }
396
397 if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
398 /*
399 * TBD OFD-style locking is currently limited to
400 * covering the entire file.
401 */
402 if (bf.l_whence != 0 || bf.l_start != 0 ||
403 bf.l_len != 0) {
404 error = EINVAL;
405 break;
406 }
407 }
408
409 /*
410 * Not all of the filesystems understand F_O_GETLK, and
411 * there's no need for them to know. Map it to F_GETLK.
412 *
413 * The *_frlock functions in the various file systems basically
414 * do some validation and then funnel everything through the
415 * fs_frlock function. For OFD-style locks fs_frlock will do
416 * nothing so that once control returns here we can call the
417 * ofdlock function with the correct fp. For OFD-style locks
418 * the unsupported remote file systems, such as NFS, detect and
419 * reject the OFD-style cmd argument.
420 */
421 if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd,
422 &bf, flag, offset, NULL, fp->f_cred, NULL)) != 0)
423 break;
424
425 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
426 cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
427 /*
428 * This is an OFD-style lock so we need to handle it
429 * here. Because OFD-style locks are associated with
430 * the file_t we didn't have enough info down the
431 * VOP_FRLOCK path immediately above.
432 */
433 if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
434 break;
435 }
436
437 /*
438 * If command is GETLK and no lock is found, only
439 * the type field is changed.
440 */
441 if ((cmd == F_O_GETLK || cmd == F_GETLK ||
442 cmd == F_OFD_GETLK) && bf.l_type == F_UNLCK) {
443 /* l_type always first entry, always a short */
444 if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
445 sizeof (bf.l_type)))
446 error = EFAULT;
447 break;
448 }
449
450 if (cmd == F_O_GETLK) {
451 /*
452 * Return an SVR3 flock structure to the user.
453 */
454 obf.l_type = (int16_t)bf.l_type;
455 obf.l_whence = (int16_t)bf.l_whence;
456 obf.l_start = (int32_t)bf.l_start;
457 obf.l_len = (int32_t)bf.l_len;
458 if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) {
459 /*
460 * One or both values for the above fields
461 * is too large to store in an SVR3 flock
462 * structure.
463 */
464 error = EOVERFLOW;
465 break;
466 }
467 obf.l_sysid = (int16_t)bf.l_sysid;
468 obf.l_pid = (int16_t)bf.l_pid;
469 if (copyout(&obf, (void *)arg, sizeof (obf)))
470 error = EFAULT;
471 } else if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
472 /*
473 * Copy out SVR4 flock.
474 */
475 int i;
476
477 if (bf.l_start > maxoffset || bf.l_len > maxoffset) {
478 error = EOVERFLOW;
479 break;
480 }
481
482 if (datamodel == DATAMODEL_NATIVE) {
483 for (i = 0; i < 4; i++)
484 sbf.l_pad[i] = 0;
485 /*
486 * XXX In an LP64 kernel with an LP64
487 * application there's no need to do a
488 * structure copy here as currently
489 * struct flock == struct flock64.
490 * We did it this way to avoid more
491 * conditional compilation.
492 */
493 sbf.l_type = bf.l_type;
494 sbf.l_whence = bf.l_whence;
495 sbf.l_start = (off_t)bf.l_start;
496 sbf.l_len = (off_t)bf.l_len;
497 sbf.l_sysid = bf.l_sysid;
498 sbf.l_pid = bf.l_pid;
499 if (copyout(&sbf, (void *)arg, sizeof (sbf)))
500 error = EFAULT;
501 }
502 #if defined(_SYSCALL32_IMPL)
503 else {
504 struct flock32 sbf32;
505 if (bf.l_start > MAXOFF32_T ||
506 bf.l_len > MAXOFF32_T) {
507 error = EOVERFLOW;
508 break;
509 }
510 for (i = 0; i < 4; i++)
511 sbf32.l_pad[i] = 0;
512 sbf32.l_type = (int16_t)bf.l_type;
513 sbf32.l_whence = (int16_t)bf.l_whence;
514 sbf32.l_start = (off32_t)bf.l_start;
515 sbf32.l_len = (off32_t)bf.l_len;
516 sbf32.l_sysid = (int32_t)bf.l_sysid;
517 sbf32.l_pid = (pid32_t)bf.l_pid;
518 if (copyout(&sbf32,
519 (void *)arg, sizeof (sbf32)))
520 error = EFAULT;
521 }
522 #endif
523 }
524 break;
525
526 case F_CHKFL:
527 /*
528 * This is for internal use only, to allow the vnode layer
529 * to validate a flags setting before applying it. User
530 * programs can't issue it.
531 */
532 error = EINVAL;
533 break;
534
535 case F_ALLOCSP:
536 case F_FREESP:
537 case F_ALLOCSP64:
538 case F_FREESP64:
539 /*
540 * Test for not-a-regular-file (and returning EINVAL)
541 * before testing for open-for-writing (and returning EBADF).
542 * This is relied upon by posix_fallocate() in libc.
543 */
544 if (vp->v_type != VREG) {
545 error = EINVAL;
546 break;
547 }
548
549 if ((flag & FWRITE) == 0) {
550 error = EBADF;
551 break;
552 }
553
554 if (datamodel != DATAMODEL_ILP32 &&
555 (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
556 error = EINVAL;
557 break;
558 }
559
560 #if defined(_ILP32) || defined(_SYSCALL32_IMPL)
561 if (datamodel == DATAMODEL_ILP32 &&
562 (cmd == F_ALLOCSP || cmd == F_FREESP)) {
563 struct flock32 sbf32;
564 /*
565 * For compatibility we overlay an SVR3 flock on an SVR4
566 * flock. This works because the input field offsets
567 * in "struct flock" were preserved.
568 */
569 if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
570 error = EFAULT;
571 break;
572 } else {
573 bf.l_type = sbf32.l_type;
574 bf.l_whence = sbf32.l_whence;
575 bf.l_start = (off64_t)sbf32.l_start;
576 bf.l_len = (off64_t)sbf32.l_len;
577 bf.l_sysid = sbf32.l_sysid;
578 bf.l_pid = sbf32.l_pid;
579 }
580 }
581 #endif /* _ILP32 || _SYSCALL32_IMPL */
582
583 #if defined(_LP64)
584 if (datamodel == DATAMODEL_LP64 &&
585 (cmd == F_ALLOCSP || cmd == F_FREESP)) {
586 if (copyin((void *)arg, &bf, sizeof (bf))) {
587 error = EFAULT;
588 break;
589 }
590 }
591 #endif /* defined(_LP64) */
592
593 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
594 if (datamodel == DATAMODEL_ILP32 &&
595 (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
596 if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
597 error = EFAULT;
598 break;
599 } else {
600 /*
601 * Note that the size of flock64 is different in
602 * the ILP32 and LP64 models, due to the l_pad
603 * field. We do not want to assume that the
604 * flock64 structure is laid out the same in
605 * ILP32 and LP64 environments, so we will
606 * copy in the ILP32 version of flock64
607 * explicitly and copy it to the native
608 * flock64 structure.
609 */
610 bf.l_type = (short)bf64_32.l_type;
611 bf.l_whence = (short)bf64_32.l_whence;
612 bf.l_start = bf64_32.l_start;
613 bf.l_len = bf64_32.l_len;
614 bf.l_sysid = (int)bf64_32.l_sysid;
615 bf.l_pid = (pid_t)bf64_32.l_pid;
616 }
617 }
618 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
619
620 if (cmd == F_ALLOCSP || cmd == F_FREESP)
621 error = flock_check(vp, &bf, offset, maxoffset);
622 else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64)
623 error = flock_check(vp, &bf, offset, MAXOFFSET_T);
624 if (error)
625 break;
626
627 if (vp->v_type == VREG && bf.l_len == 0 &&
628 bf.l_start > OFFSET_MAX(fp)) {
629 error = EFBIG;
630 break;
631 }
632
633 /*
634 * Make sure that there are no conflicting non-blocking
635 * mandatory locks in the region being manipulated. If
636 * there are such locks then return EACCES.
637 */
638 if ((error = flock_get_start(vp, &bf, offset, &start)) != 0)
639 break;
640
641 if (nbl_need_check(vp)) {
642 u_offset_t begin;
643 ssize_t length;
644
645 nbl_start_crit(vp, RW_READER);
646 in_crit = 1;
647 vattr.va_mask = AT_SIZE;
648 if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
649 != 0)
650 break;
651 begin = start > vattr.va_size ? vattr.va_size : start;
652 length = vattr.va_size > start ? vattr.va_size - start :
653 start - vattr.va_size;
654 if (nbl_conflict(vp, NBL_WRITE, begin, length, 0,
655 NULL)) {
656 error = EACCES;
657 break;
658 }
659 }
660
661 if (cmd == F_ALLOCSP64)
662 cmd = F_ALLOCSP;
663 else if (cmd == F_FREESP64)
664 cmd = F_FREESP;
665
666 error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL);
667
668 break;
669
670 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
671 case F_GETLK64:
672 case F_SETLK64:
673 case F_SETLKW64:
674 case F_SETLK64_NBMAND:
675 case F_OFD_GETLK64:
676 case F_OFD_SETLK64:
677 case F_OFD_SETLKW64:
678 case F_FLOCK64:
679 case F_FLOCKW64:
680 /*
681 * Large Files: Here we set cmd as *LK and send it to
682 * lower layers. *LK64 is only for the user land.
683 * Most of the comments described above for F_SETLK
684 * applies here too.
685 * Large File support is only needed for ILP32 apps!
686 */
687 if (datamodel != DATAMODEL_ILP32) {
688 error = EINVAL;
689 break;
690 }
691
692 if (cmd == F_GETLK64)
693 cmd = F_GETLK;
694 else if (cmd == F_SETLK64)
695 cmd = F_SETLK;
696 else if (cmd == F_SETLKW64)
697 cmd = F_SETLKW;
698 else if (cmd == F_SETLK64_NBMAND)
699 cmd = F_SETLK_NBMAND;
700 else if (cmd == F_OFD_GETLK64)
701 cmd = F_OFD_GETLK;
702 else if (cmd == F_OFD_SETLK64)
703 cmd = F_OFD_SETLK;
704 else if (cmd == F_OFD_SETLKW64)
705 cmd = F_OFD_SETLKW;
706 else if (cmd == F_FLOCK64)
707 cmd = F_FLOCK;
708 else if (cmd == F_FLOCKW64)
709 cmd = F_FLOCKW;
710
711 /*
712 * Note that the size of flock64 is different in the ILP32
713 * and LP64 models, due to the sucking l_pad field.
714 * We do not want to assume that the flock64 structure is
715 * laid out in the same in ILP32 and LP64 environments, so
716 * we will copy in the ILP32 version of flock64 explicitly
717 * and copy it to the native flock64 structure.
718 */
719
720 if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
721 error = EFAULT;
722 break;
723 }
724
725 bf.l_type = (short)bf64_32.l_type;
726 bf.l_whence = (short)bf64_32.l_whence;
727 bf.l_start = bf64_32.l_start;
728 bf.l_len = bf64_32.l_len;
729 bf.l_sysid = (int)bf64_32.l_sysid;
730 bf.l_pid = (pid_t)bf64_32.l_pid;
731
732 if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0)
733 break;
734
735 if (cmd == F_FLOCK || cmd == F_FLOCKW) {
736 /* FLOCK* locking is always over the entire file. */
737 if (bf.l_whence != 0 || bf.l_start != 0 ||
738 bf.l_len != 0) {
739 error = EINVAL;
740 break;
741 }
742 if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
743 error = EINVAL;
744 break;
745 }
746 }
747
748 if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
749 /*
750 * TBD OFD-style locking is currently limited to
751 * covering the entire file.
752 */
753 if (bf.l_whence != 0 || bf.l_start != 0 ||
754 bf.l_len != 0) {
755 error = EINVAL;
756 break;
757 }
758 }
759
760 /*
761 * The *_frlock functions in the various file systems basically
762 * do some validation and then funnel everything through the
763 * fs_frlock function. For OFD-style locks fs_frlock will do
764 * nothing so that once control returns here we can call the
765 * ofdlock function with the correct fp. For OFD-style locks
766 * the unsupported remote file systems, such as NFS, detect and
767 * reject the OFD-style cmd argument.
768 */
769 if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset,
770 NULL, fp->f_cred, NULL)) != 0)
771 break;
772
773 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
774 cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
775 /*
776 * This is an OFD-style lock so we need to handle it
777 * here. Because OFD-style locks are associated with
778 * the file_t we didn't have enough info down the
779 * VOP_FRLOCK path immediately above.
780 */
781 if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
782 break;
783 }
784
785 if ((cmd == F_GETLK || cmd == F_OFD_GETLK) &&
786 bf.l_type == F_UNLCK) {
787 if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
788 sizeof (bf.l_type)))
789 error = EFAULT;
790 break;
791 }
792
793 if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
794 int i;
795
796 /*
797 * We do not want to assume that the flock64 structure
798 * is laid out in the same in ILP32 and LP64
799 * environments, so we will copy out the ILP32 version
800 * of flock64 explicitly after copying the native
801 * flock64 structure to it.
802 */
803 for (i = 0; i < 4; i++)
804 bf64_32.l_pad[i] = 0;
805 bf64_32.l_type = (int16_t)bf.l_type;
806 bf64_32.l_whence = (int16_t)bf.l_whence;
807 bf64_32.l_start = bf.l_start;
808 bf64_32.l_len = bf.l_len;
809 bf64_32.l_sysid = (int32_t)bf.l_sysid;
810 bf64_32.l_pid = (pid32_t)bf.l_pid;
811 if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32)))
812 error = EFAULT;
813 }
814 break;
815 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
816
817 case F_SHARE:
818 case F_SHARE_NBMAND:
819 case F_UNSHARE:
820
821 /*
822 * Copy in input fields only.
823 */
824 if (copyin((void *)arg, &fsh, sizeof (fsh))) {
825 error = EFAULT;
826 break;
827 }
828
829 /*
830 * Local share reservations always have this simple form
831 */
832 shr.s_access = fsh.f_access;
833 shr.s_deny = fsh.f_deny;
834 shr.s_sysid = 0;
835 shr.s_pid = ttoproc(curthread)->p_pid;
836 shr_own.sl_pid = shr.s_pid;
837 shr_own.sl_id = fsh.f_id;
838 shr.s_own_len = sizeof (shr_own);
839 shr.s_owner = (caddr_t)&shr_own;
840 error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred, NULL);
841 break;
842
843 default:
844 error = EINVAL;
845 break;
846 }
847
848 if (in_crit)
849 nbl_end_crit(vp);
850
851 done:
852 releasef(fdes);
853 out:
854 if (error)
855 return (set_errno(error));
856 return (retval);
857 }
858
859 int
860 flock_check(vnode_t *vp, flock64_t *flp, offset_t offset, offset_t max)
861 {
862 struct vattr vattr;
863 int error;
864 u_offset_t start, end;
865
866 /*
867 * Determine the starting point of the request
868 */
869 switch (flp->l_whence) {
870 case 0: /* SEEK_SET */
871 start = (u_offset_t)flp->l_start;
872 if (start > max)
873 return (EINVAL);
874 break;
875 case 1: /* SEEK_CUR */
876 if (flp->l_start > (max - offset))
877 return (EOVERFLOW);
878 start = (u_offset_t)(flp->l_start + offset);
879 if (start > max)
880 return (EINVAL);
881 break;
882 case 2: /* SEEK_END */
883 vattr.va_mask = AT_SIZE;
884 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
885 return (error);
886 if (flp->l_start > (max - (offset_t)vattr.va_size))
887 return (EOVERFLOW);
888 start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
889 if (start > max)
890 return (EINVAL);
891 break;
892 default:
893 return (EINVAL);
894 }
895
896 /*
897 * Determine the range covered by the request.
898 */
899 if (flp->l_len == 0)
900 end = MAXEND;
901 else if ((offset_t)flp->l_len > 0) {
902 if (flp->l_len > (max - start + 1))
903 return (EOVERFLOW);
904 end = (u_offset_t)(start + (flp->l_len - 1));
905 ASSERT(end <= max);
906 } else {
907 /*
908 * Negative length; why do we even allow this ?
909 * Because this allows easy specification of
910 * the last n bytes of the file.
911 */
912 end = start;
913 start += (u_offset_t)flp->l_len;
914 (start)++;
915 if (start > max)
916 return (EINVAL);
917 ASSERT(end <= max);
918 }
919 ASSERT(start <= max);
920 if (flp->l_type == F_UNLCK && flp->l_len > 0 &&
921 end == (offset_t)max) {
922 flp->l_len = 0;
923 }
924 if (start > end)
925 return (EINVAL);
926 return (0);
927 }
928
929 static int
930 flock_get_start(vnode_t *vp, flock64_t *flp, offset_t offset, u_offset_t *start)
931 {
932 struct vattr vattr;
933 int error;
934
935 /*
936 * Determine the starting point of the request. Assume that it is
937 * a valid starting point.
938 */
939 switch (flp->l_whence) {
940 case 0: /* SEEK_SET */
941 *start = (u_offset_t)flp->l_start;
942 break;
943 case 1: /* SEEK_CUR */
944 *start = (u_offset_t)(flp->l_start + offset);
945 break;
946 case 2: /* SEEK_END */
947 vattr.va_mask = AT_SIZE;
948 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
949 return (error);
950 *start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
951 break;
952 default:
953 return (EINVAL);
954 }
955
956 return (0);
957 }
958
959 /*
960 * Take rctl action when the requested file descriptor is too big.
961 */
962 static void
963 fd_too_big(proc_t *p)
964 {
965 mutex_enter(&p->p_lock);
966 (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
967 p->p_rctls, p, RCA_SAFE);
968 mutex_exit(&p->p_lock);
969 }