1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
25 * Copyright 2015, Joyent, Inc.
26 */
27
28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31 /*
32 * Portions of this source code were derived from Berkeley 4.3 BSD
33 * under license from the Regents of the University of California.
34 */
35
36
37 #include <sys/param.h>
38 #include <sys/isa_defs.h>
39 #include <sys/types.h>
40 #include <sys/sysmacros.h>
41 #include <sys/systm.h>
42 #include <sys/errno.h>
43 #include <sys/fcntl.h>
44 #include <sys/flock.h>
45 #include <sys/vnode.h>
46 #include <sys/file.h>
47 #include <sys/mode.h>
48 #include <sys/proc.h>
49 #include <sys/filio.h>
50 #include <sys/share.h>
51 #include <sys/debug.h>
52 #include <sys/rctl.h>
53 #include <sys/nbmlock.h>
54
55 #include <sys/cmn_err.h>
56
57 static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
58 static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *);
59 static void fd_too_big(proc_t *);
60
61 /*
62 * File control.
63 */
64 int
65 fcntl(int fdes, int cmd, intptr_t arg)
66 {
67 int iarg;
68 int error = 0;
69 int retval;
70 proc_t *p;
71 file_t *fp;
72 vnode_t *vp;
73 u_offset_t offset;
74 u_offset_t start;
75 struct vattr vattr;
76 int in_crit;
77 int flag;
78 struct flock sbf;
79 struct flock64 bf;
80 struct o_flock obf;
81 struct flock64_32 bf64_32;
82 struct fshare fsh;
83 struct shrlock shr;
84 struct shr_locowner shr_own;
85 offset_t maxoffset;
86 model_t datamodel;
87 int fdres;
88
89 #if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32)
90 ASSERT(sizeof (struct flock) == sizeof (struct flock32));
91 ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32));
92 #endif
93 #if defined(_LP64) && !defined(lint) && defined(_SYSCALL32)
94 ASSERT(sizeof (struct flock) == sizeof (struct flock64_64));
95 ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64));
96 #endif
97
98 /*
99 * First, for speed, deal with the subset of cases
100 * that do not require getf() / releasef().
101 */
102 switch (cmd) {
103 case F_GETFD:
104 if ((error = f_getfd_error(fdes, &flag)) == 0)
105 retval = flag;
106 goto out;
107
108 case F_SETFD:
109 error = f_setfd_error(fdes, (int)arg);
110 retval = 0;
111 goto out;
112
113 case F_GETFL:
114 if ((error = f_getfl(fdes, &flag)) == 0) {
115 retval = (flag & (FMASK | FASYNC));
116 if ((flag & (FSEARCH | FEXEC)) == 0)
117 retval += FOPEN;
118 else
119 retval |= (flag & (FSEARCH | FEXEC));
120 }
121 goto out;
122
123 case F_GETXFL:
124 if ((error = f_getfl(fdes, &flag)) == 0) {
125 retval = flag;
126 if ((flag & (FSEARCH | FEXEC)) == 0)
127 retval += FOPEN;
128 }
129 goto out;
130
131 case F_BADFD:
132 if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0)
133 retval = fdres;
134 goto out;
135 }
136
137 /*
138 * Second, for speed, deal with the subset of cases that
139 * require getf() / releasef() but do not require copyin.
140 */
141 if ((fp = getf(fdes)) == NULL) {
142 error = EBADF;
143 goto out;
144 }
145 iarg = (int)arg;
146
147 switch (cmd) {
148 case F_DUPFD:
149 case F_DUPFD_CLOEXEC:
150 p = curproc;
151 if ((uint_t)iarg >= p->p_fno_ctl) {
152 if (iarg >= 0)
153 fd_too_big(p);
154 error = EINVAL;
155 goto done;
156 }
157 /*
158 * We need to increment the f_count reference counter
159 * before allocating a new file descriptor.
160 * Doing it other way round opens a window for race condition
161 * with closeandsetf() on the target file descriptor which can
162 * close the file still referenced by the original
163 * file descriptor.
164 */
165 mutex_enter(&fp->f_tlock);
166 fp->f_count++;
167 mutex_exit(&fp->f_tlock);
168 if ((retval = ufalloc_file(iarg, fp)) == -1) {
169 /*
170 * New file descriptor can't be allocated.
171 * Revert the reference count.
172 */
173 mutex_enter(&fp->f_tlock);
174 fp->f_count--;
175 mutex_exit(&fp->f_tlock);
176 error = EMFILE;
177 } else {
178 if (cmd == F_DUPFD_CLOEXEC) {
179 f_setfd(retval, FD_CLOEXEC);
180 }
181 }
182
183 if (error == 0 && fp->f_vnode != NULL) {
184 (void) VOP_IOCTL(fp->f_vnode, F_ASSOCI_PID,
185 (intptr_t)p->p_pidp->pid_id, FKIOCTL, kcred,
186 NULL, NULL);
187 }
188
189 goto done;
190
191 case F_DUP2FD_CLOEXEC:
192 if (fdes == iarg) {
193 error = EINVAL;
194 goto done;
195 }
196
197 /*FALLTHROUGH*/
198
199 case F_DUP2FD:
200 p = curproc;
201 if (fdes == iarg) {
202 retval = iarg;
203 } else if ((uint_t)iarg >= p->p_fno_ctl) {
204 if (iarg >= 0)
205 fd_too_big(p);
206 error = EBADF;
207 } else {
208 /*
209 * We can't hold our getf(fdes) across the call to
210 * closeandsetf() because it creates a window for
211 * deadlock: if one thread is doing dup2(a, b) while
212 * another is doing dup2(b, a), each one will block
213 * waiting for the other to call releasef(). The
214 * solution is to increment the file reference count
215 * (which we have to do anyway), then releasef(fdes),
216 * then closeandsetf(). Incrementing f_count ensures
217 * that fp won't disappear after we call releasef().
218 * When closeandsetf() fails, we try avoid calling
219 * closef() because of all the side effects.
220 */
221 mutex_enter(&fp->f_tlock);
222 fp->f_count++;
223 mutex_exit(&fp->f_tlock);
224 releasef(fdes);
225
226 /* assume we have forked successfully */
227 if (fp->f_vnode != NULL) {
228 (void) VOP_IOCTL(fp->f_vnode, F_ASSOCI_PID,
229 (intptr_t)p->p_pidp->pid_id, FKIOCTL,
230 kcred, NULL, NULL);
231 }
232
233 if ((error = closeandsetf(iarg, fp)) == 0) {
234 if (cmd == F_DUP2FD_CLOEXEC) {
235 f_setfd(iarg, FD_CLOEXEC);
236 }
237 retval = iarg;
238 } else {
239 mutex_enter(&fp->f_tlock);
240 if (fp->f_count > 1) {
241 fp->f_count--;
242 mutex_exit(&fp->f_tlock);
243 if (fp->f_vnode != NULL) {
244 (void) VOP_IOCTL(fp->f_vnode,
245 F_DASSOC_PID,
246 (intptr_t)p->p_pidp->pid_id,
247 FKIOCTL, kcred, NULL, NULL);
248 }
249
250 } else {
251 mutex_exit(&fp->f_tlock);
252 (void) closef(fp);
253 }
254 }
255 goto out;
256 }
257 goto done;
258
259 case F_SETFL:
260 vp = fp->f_vnode;
261 flag = fp->f_flag;
262 if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY))
263 iarg &= ~FNDELAY;
264 if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred, NULL)) ==
265 0) {
266 iarg &= FMASK;
267 mutex_enter(&fp->f_tlock);
268 fp->f_flag &= ~FMASK | (FREAD|FWRITE);
269 fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE);
270 mutex_exit(&fp->f_tlock);
271 }
272 retval = 0;
273 goto done;
274 }
275
276 /*
277 * Finally, deal with the expensive cases.
278 */
279 retval = 0;
280 in_crit = 0;
281 maxoffset = MAXOFF_T;
282 datamodel = DATAMODEL_NATIVE;
283 #if defined(_SYSCALL32_IMPL)
284 if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32)
285 maxoffset = MAXOFF32_T;
286 #endif
287
288 vp = fp->f_vnode;
289 flag = fp->f_flag;
290 offset = fp->f_offset;
291
292 switch (cmd) {
293 /*
294 * The file system and vnode layers understand and implement
295 * locking with flock64 structures. So here once we pass through
296 * the test for compatibility as defined by LFS API, (for F_SETLK,
297 * F_SETLKW, F_GETLK, F_GETLKW, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW,
298 * F_FREESP) we transform the flock structure to a flock64 structure
299 * and send it to the lower layers. Similarly in case of GETLK and
300 * OFD_GETLK the returned flock64 structure is transformed to a flock
301 * structure if everything fits in nicely, otherwise we return
302 * EOVERFLOW.
303 */
304
305 case F_GETLK:
306 case F_O_GETLK:
307 case F_SETLK:
308 case F_SETLKW:
309 case F_SETLK_NBMAND:
310 case F_OFD_GETLK:
311 case F_OFD_SETLK:
312 case F_OFD_SETLKW:
313 case F_FLOCK:
314 case F_FLOCKW:
315
316 /*
317 * Copy in input fields only.
318 */
319
320 if (cmd == F_O_GETLK) {
321 if (datamodel != DATAMODEL_ILP32) {
322 error = EINVAL;
323 break;
324 }
325
326 if (copyin((void *)arg, &obf, sizeof (obf))) {
327 error = EFAULT;
328 break;
329 }
330 bf.l_type = obf.l_type;
331 bf.l_whence = obf.l_whence;
332 bf.l_start = (off64_t)obf.l_start;
333 bf.l_len = (off64_t)obf.l_len;
334 bf.l_sysid = (int)obf.l_sysid;
335 bf.l_pid = obf.l_pid;
336 } else if (datamodel == DATAMODEL_NATIVE) {
337 if (copyin((void *)arg, &sbf, sizeof (sbf))) {
338 error = EFAULT;
339 break;
340 }
341 /*
342 * XXX In an LP64 kernel with an LP64 application
343 * there's no need to do a structure copy here
344 * struct flock == struct flock64. However,
345 * we did it this way to avoid more conditional
346 * compilation.
347 */
348 bf.l_type = sbf.l_type;
349 bf.l_whence = sbf.l_whence;
350 bf.l_start = (off64_t)sbf.l_start;
351 bf.l_len = (off64_t)sbf.l_len;
352 bf.l_sysid = sbf.l_sysid;
353 bf.l_pid = sbf.l_pid;
354 }
355 #if defined(_SYSCALL32_IMPL)
356 else {
357 struct flock32 sbf32;
358 if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
359 error = EFAULT;
360 break;
361 }
362 bf.l_type = sbf32.l_type;
363 bf.l_whence = sbf32.l_whence;
364 bf.l_start = (off64_t)sbf32.l_start;
365 bf.l_len = (off64_t)sbf32.l_len;
366 bf.l_sysid = sbf32.l_sysid;
367 bf.l_pid = sbf32.l_pid;
368 }
369 #endif /* _SYSCALL32_IMPL */
370
371 /*
372 * 64-bit support: check for overflow for 32-bit lock ops
373 */
374 if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0)
375 break;
376
377 if (cmd == F_FLOCK || cmd == F_FLOCKW) {
378 /* FLOCK* locking is always over the entire file. */
379 if (bf.l_whence != 0 || bf.l_start != 0 ||
380 bf.l_len != 0) {
381 error = EINVAL;
382 break;
383 }
384 if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
385 error = EINVAL;
386 break;
387 }
388 }
389
390 if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
391 /*
392 * TBD OFD-style locking is currently limited to
393 * covering the entire file.
394 */
395 if (bf.l_whence != 0 || bf.l_start != 0 ||
396 bf.l_len != 0) {
397 error = EINVAL;
398 break;
399 }
400 }
401
402 /*
403 * Not all of the filesystems understand F_O_GETLK, and
404 * there's no need for them to know. Map it to F_GETLK.
405 *
406 * The *_frlock functions in the various file systems basically
407 * do some validation and then funnel everything through the
408 * fs_frlock function. For OFD-style locks fs_frlock will do
409 * nothing so that once control returns here we can call the
410 * ofdlock function with the correct fp. For OFD-style locks
411 * the unsupported remote file systems, such as NFS, detect and
412 * reject the OFD-style cmd argument.
413 */
414 if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd,
415 &bf, flag, offset, NULL, fp->f_cred, NULL)) != 0)
416 break;
417
418 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
419 cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
420 /*
421 * This is an OFD-style lock so we need to handle it
422 * here. Because OFD-style locks are associated with
423 * the file_t we didn't have enough info down the
424 * VOP_FRLOCK path immediately above.
425 */
426 if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
427 break;
428 }
429
430 /*
431 * If command is GETLK and no lock is found, only
432 * the type field is changed.
433 */
434 if ((cmd == F_O_GETLK || cmd == F_GETLK ||
435 cmd == F_OFD_GETLK) && bf.l_type == F_UNLCK) {
436 /* l_type always first entry, always a short */
437 if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
438 sizeof (bf.l_type)))
439 error = EFAULT;
440 break;
441 }
442
443 if (cmd == F_O_GETLK) {
444 /*
445 * Return an SVR3 flock structure to the user.
446 */
447 obf.l_type = (int16_t)bf.l_type;
448 obf.l_whence = (int16_t)bf.l_whence;
449 obf.l_start = (int32_t)bf.l_start;
450 obf.l_len = (int32_t)bf.l_len;
451 if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) {
452 /*
453 * One or both values for the above fields
454 * is too large to store in an SVR3 flock
455 * structure.
456 */
457 error = EOVERFLOW;
458 break;
459 }
460 obf.l_sysid = (int16_t)bf.l_sysid;
461 obf.l_pid = (int16_t)bf.l_pid;
462 if (copyout(&obf, (void *)arg, sizeof (obf)))
463 error = EFAULT;
464 } else if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
465 /*
466 * Copy out SVR4 flock.
467 */
468 int i;
469
470 if (bf.l_start > maxoffset || bf.l_len > maxoffset) {
471 error = EOVERFLOW;
472 break;
473 }
474
475 if (datamodel == DATAMODEL_NATIVE) {
476 for (i = 0; i < 4; i++)
477 sbf.l_pad[i] = 0;
478 /*
479 * XXX In an LP64 kernel with an LP64
480 * application there's no need to do a
481 * structure copy here as currently
482 * struct flock == struct flock64.
483 * We did it this way to avoid more
484 * conditional compilation.
485 */
486 sbf.l_type = bf.l_type;
487 sbf.l_whence = bf.l_whence;
488 sbf.l_start = (off_t)bf.l_start;
489 sbf.l_len = (off_t)bf.l_len;
490 sbf.l_sysid = bf.l_sysid;
491 sbf.l_pid = bf.l_pid;
492 if (copyout(&sbf, (void *)arg, sizeof (sbf)))
493 error = EFAULT;
494 }
495 #if defined(_SYSCALL32_IMPL)
496 else {
497 struct flock32 sbf32;
498 if (bf.l_start > MAXOFF32_T ||
499 bf.l_len > MAXOFF32_T) {
500 error = EOVERFLOW;
501 break;
502 }
503 for (i = 0; i < 4; i++)
504 sbf32.l_pad[i] = 0;
505 sbf32.l_type = (int16_t)bf.l_type;
506 sbf32.l_whence = (int16_t)bf.l_whence;
507 sbf32.l_start = (off32_t)bf.l_start;
508 sbf32.l_len = (off32_t)bf.l_len;
509 sbf32.l_sysid = (int32_t)bf.l_sysid;
510 sbf32.l_pid = (pid32_t)bf.l_pid;
511 if (copyout(&sbf32,
512 (void *)arg, sizeof (sbf32)))
513 error = EFAULT;
514 }
515 #endif
516 }
517 break;
518
519 case F_CHKFL:
520 /*
521 * This is for internal use only, to allow the vnode layer
522 * to validate a flags setting before applying it. User
523 * programs can't issue it.
524 */
525 error = EINVAL;
526 break;
527
528 case F_ALLOCSP:
529 case F_FREESP:
530 case F_ALLOCSP64:
531 case F_FREESP64:
532 /*
533 * Test for not-a-regular-file (and returning EINVAL)
534 * before testing for open-for-writing (and returning EBADF).
535 * This is relied upon by posix_fallocate() in libc.
536 */
537 if (vp->v_type != VREG) {
538 error = EINVAL;
539 break;
540 }
541
542 if ((flag & FWRITE) == 0) {
543 error = EBADF;
544 break;
545 }
546
547 if (datamodel != DATAMODEL_ILP32 &&
548 (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
549 error = EINVAL;
550 break;
551 }
552
553 #if defined(_ILP32) || defined(_SYSCALL32_IMPL)
554 if (datamodel == DATAMODEL_ILP32 &&
555 (cmd == F_ALLOCSP || cmd == F_FREESP)) {
556 struct flock32 sbf32;
557 /*
558 * For compatibility we overlay an SVR3 flock on an SVR4
559 * flock. This works because the input field offsets
560 * in "struct flock" were preserved.
561 */
562 if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
563 error = EFAULT;
564 break;
565 } else {
566 bf.l_type = sbf32.l_type;
567 bf.l_whence = sbf32.l_whence;
568 bf.l_start = (off64_t)sbf32.l_start;
569 bf.l_len = (off64_t)sbf32.l_len;
570 bf.l_sysid = sbf32.l_sysid;
571 bf.l_pid = sbf32.l_pid;
572 }
573 }
574 #endif /* _ILP32 || _SYSCALL32_IMPL */
575
576 #if defined(_LP64)
577 if (datamodel == DATAMODEL_LP64 &&
578 (cmd == F_ALLOCSP || cmd == F_FREESP)) {
579 if (copyin((void *)arg, &bf, sizeof (bf))) {
580 error = EFAULT;
581 break;
582 }
583 }
584 #endif /* defined(_LP64) */
585
586 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
587 if (datamodel == DATAMODEL_ILP32 &&
588 (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
589 if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
590 error = EFAULT;
591 break;
592 } else {
593 /*
594 * Note that the size of flock64 is different in
595 * the ILP32 and LP64 models, due to the l_pad
596 * field. We do not want to assume that the
597 * flock64 structure is laid out the same in
598 * ILP32 and LP64 environments, so we will
599 * copy in the ILP32 version of flock64
600 * explicitly and copy it to the native
601 * flock64 structure.
602 */
603 bf.l_type = (short)bf64_32.l_type;
604 bf.l_whence = (short)bf64_32.l_whence;
605 bf.l_start = bf64_32.l_start;
606 bf.l_len = bf64_32.l_len;
607 bf.l_sysid = (int)bf64_32.l_sysid;
608 bf.l_pid = (pid_t)bf64_32.l_pid;
609 }
610 }
611 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
612
613 if (cmd == F_ALLOCSP || cmd == F_FREESP)
614 error = flock_check(vp, &bf, offset, maxoffset);
615 else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64)
616 error = flock_check(vp, &bf, offset, MAXOFFSET_T);
617 if (error)
618 break;
619
620 if (vp->v_type == VREG && bf.l_len == 0 &&
621 bf.l_start > OFFSET_MAX(fp)) {
622 error = EFBIG;
623 break;
624 }
625
626 /*
627 * Make sure that there are no conflicting non-blocking
628 * mandatory locks in the region being manipulated. If
629 * there are such locks then return EACCES.
630 */
631 if ((error = flock_get_start(vp, &bf, offset, &start)) != 0)
632 break;
633
634 if (nbl_need_check(vp)) {
635 u_offset_t begin;
636 ssize_t length;
637
638 nbl_start_crit(vp, RW_READER);
639 in_crit = 1;
640 vattr.va_mask = AT_SIZE;
641 if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
642 != 0)
643 break;
644 begin = start > vattr.va_size ? vattr.va_size : start;
645 length = vattr.va_size > start ? vattr.va_size - start :
646 start - vattr.va_size;
647 if (nbl_conflict(vp, NBL_WRITE, begin, length, 0,
648 NULL)) {
649 error = EACCES;
650 break;
651 }
652 }
653
654 if (cmd == F_ALLOCSP64)
655 cmd = F_ALLOCSP;
656 else if (cmd == F_FREESP64)
657 cmd = F_FREESP;
658
659 error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL);
660
661 break;
662
663 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
664 case F_GETLK64:
665 case F_SETLK64:
666 case F_SETLKW64:
667 case F_SETLK64_NBMAND:
668 case F_OFD_GETLK64:
669 case F_OFD_SETLK64:
670 case F_OFD_SETLKW64:
671 case F_FLOCK64:
672 case F_FLOCKW64:
673 /*
674 * Large Files: Here we set cmd as *LK and send it to
675 * lower layers. *LK64 is only for the user land.
676 * Most of the comments described above for F_SETLK
677 * applies here too.
678 * Large File support is only needed for ILP32 apps!
679 */
680 if (datamodel != DATAMODEL_ILP32) {
681 error = EINVAL;
682 break;
683 }
684
685 if (cmd == F_GETLK64)
686 cmd = F_GETLK;
687 else if (cmd == F_SETLK64)
688 cmd = F_SETLK;
689 else if (cmd == F_SETLKW64)
690 cmd = F_SETLKW;
691 else if (cmd == F_SETLK64_NBMAND)
692 cmd = F_SETLK_NBMAND;
693 else if (cmd == F_OFD_GETLK64)
694 cmd = F_OFD_GETLK;
695 else if (cmd == F_OFD_SETLK64)
696 cmd = F_OFD_SETLK;
697 else if (cmd == F_OFD_SETLKW64)
698 cmd = F_OFD_SETLKW;
699 else if (cmd == F_FLOCK64)
700 cmd = F_FLOCK;
701 else if (cmd == F_FLOCKW64)
702 cmd = F_FLOCKW;
703
704 /*
705 * Note that the size of flock64 is different in the ILP32
706 * and LP64 models, due to the sucking l_pad field.
707 * We do not want to assume that the flock64 structure is
708 * laid out in the same in ILP32 and LP64 environments, so
709 * we will copy in the ILP32 version of flock64 explicitly
710 * and copy it to the native flock64 structure.
711 */
712
713 if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
714 error = EFAULT;
715 break;
716 }
717
718 bf.l_type = (short)bf64_32.l_type;
719 bf.l_whence = (short)bf64_32.l_whence;
720 bf.l_start = bf64_32.l_start;
721 bf.l_len = bf64_32.l_len;
722 bf.l_sysid = (int)bf64_32.l_sysid;
723 bf.l_pid = (pid_t)bf64_32.l_pid;
724
725 if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0)
726 break;
727
728 if (cmd == F_FLOCK || cmd == F_FLOCKW) {
729 /* FLOCK* locking is always over the entire file. */
730 if (bf.l_whence != 0 || bf.l_start != 0 ||
731 bf.l_len != 0) {
732 error = EINVAL;
733 break;
734 }
735 if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
736 error = EINVAL;
737 break;
738 }
739 }
740
741 if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
742 /*
743 * TBD OFD-style locking is currently limited to
744 * covering the entire file.
745 */
746 if (bf.l_whence != 0 || bf.l_start != 0 ||
747 bf.l_len != 0) {
748 error = EINVAL;
749 break;
750 }
751 }
752
753 /*
754 * The *_frlock functions in the various file systems basically
755 * do some validation and then funnel everything through the
756 * fs_frlock function. For OFD-style locks fs_frlock will do
757 * nothing so that once control returns here we can call the
758 * ofdlock function with the correct fp. For OFD-style locks
759 * the unsupported remote file systems, such as NFS, detect and
760 * reject the OFD-style cmd argument.
761 */
762 if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset,
763 NULL, fp->f_cred, NULL)) != 0)
764 break;
765
766 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
767 cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
768 /*
769 * This is an OFD-style lock so we need to handle it
770 * here. Because OFD-style locks are associated with
771 * the file_t we didn't have enough info down the
772 * VOP_FRLOCK path immediately above.
773 */
774 if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
775 break;
776 }
777
778 if ((cmd == F_GETLK || cmd == F_OFD_GETLK) &&
779 bf.l_type == F_UNLCK) {
780 if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
781 sizeof (bf.l_type)))
782 error = EFAULT;
783 break;
784 }
785
786 if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
787 int i;
788
789 /*
790 * We do not want to assume that the flock64 structure
791 * is laid out in the same in ILP32 and LP64
792 * environments, so we will copy out the ILP32 version
793 * of flock64 explicitly after copying the native
794 * flock64 structure to it.
795 */
796 for (i = 0; i < 4; i++)
797 bf64_32.l_pad[i] = 0;
798 bf64_32.l_type = (int16_t)bf.l_type;
799 bf64_32.l_whence = (int16_t)bf.l_whence;
800 bf64_32.l_start = bf.l_start;
801 bf64_32.l_len = bf.l_len;
802 bf64_32.l_sysid = (int32_t)bf.l_sysid;
803 bf64_32.l_pid = (pid32_t)bf.l_pid;
804 if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32)))
805 error = EFAULT;
806 }
807 break;
808 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
809
810 case F_SHARE:
811 case F_SHARE_NBMAND:
812 case F_UNSHARE:
813
814 /*
815 * Copy in input fields only.
816 */
817 if (copyin((void *)arg, &fsh, sizeof (fsh))) {
818 error = EFAULT;
819 break;
820 }
821
822 /*
823 * Local share reservations always have this simple form
824 */
825 shr.s_access = fsh.f_access;
826 shr.s_deny = fsh.f_deny;
827 shr.s_sysid = 0;
828 shr.s_pid = ttoproc(curthread)->p_pid;
829 shr_own.sl_pid = shr.s_pid;
830 shr_own.sl_id = fsh.f_id;
831 shr.s_own_len = sizeof (shr_own);
832 shr.s_owner = (caddr_t)&shr_own;
833 error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred, NULL);
834 break;
835
836 default:
837 error = EINVAL;
838 break;
839 }
840
841 if (in_crit)
842 nbl_end_crit(vp);
843
844 done:
845 releasef(fdes);
846 out:
847 if (error)
848 return (set_errno(error));
849 return (retval);
850 }
851
852 int
853 flock_check(vnode_t *vp, flock64_t *flp, offset_t offset, offset_t max)
854 {
855 struct vattr vattr;
856 int error;
857 u_offset_t start, end;
858
859 /*
860 * Determine the starting point of the request
861 */
862 switch (flp->l_whence) {
863 case 0: /* SEEK_SET */
864 start = (u_offset_t)flp->l_start;
865 if (start > max)
866 return (EINVAL);
867 break;
868 case 1: /* SEEK_CUR */
869 if (flp->l_start > (max - offset))
870 return (EOVERFLOW);
871 start = (u_offset_t)(flp->l_start + offset);
872 if (start > max)
873 return (EINVAL);
874 break;
875 case 2: /* SEEK_END */
876 vattr.va_mask = AT_SIZE;
877 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
878 return (error);
879 if (flp->l_start > (max - (offset_t)vattr.va_size))
880 return (EOVERFLOW);
881 start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
882 if (start > max)
883 return (EINVAL);
884 break;
885 default:
886 return (EINVAL);
887 }
888
889 /*
890 * Determine the range covered by the request.
891 */
892 if (flp->l_len == 0)
893 end = MAXEND;
894 else if ((offset_t)flp->l_len > 0) {
895 if (flp->l_len > (max - start + 1))
896 return (EOVERFLOW);
897 end = (u_offset_t)(start + (flp->l_len - 1));
898 ASSERT(end <= max);
899 } else {
900 /*
901 * Negative length; why do we even allow this ?
902 * Because this allows easy specification of
903 * the last n bytes of the file.
904 */
905 end = start;
906 start += (u_offset_t)flp->l_len;
907 (start)++;
908 if (start > max)
909 return (EINVAL);
910 ASSERT(end <= max);
911 }
912 ASSERT(start <= max);
913 if (flp->l_type == F_UNLCK && flp->l_len > 0 &&
914 end == (offset_t)max) {
915 flp->l_len = 0;
916 }
917 if (start > end)
918 return (EINVAL);
919 return (0);
920 }
921
922 static int
923 flock_get_start(vnode_t *vp, flock64_t *flp, offset_t offset, u_offset_t *start)
924 {
925 struct vattr vattr;
926 int error;
927
928 /*
929 * Determine the starting point of the request. Assume that it is
930 * a valid starting point.
931 */
932 switch (flp->l_whence) {
933 case 0: /* SEEK_SET */
934 *start = (u_offset_t)flp->l_start;
935 break;
936 case 1: /* SEEK_CUR */
937 *start = (u_offset_t)(flp->l_start + offset);
938 break;
939 case 2: /* SEEK_END */
940 vattr.va_mask = AT_SIZE;
941 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
942 return (error);
943 *start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
944 break;
945 default:
946 return (EINVAL);
947 }
948
949 return (0);
950 }
951
952 /*
953 * Take rctl action when the requested file descriptor is too big.
954 */
955 static void
956 fd_too_big(proc_t *p)
957 {
958 mutex_enter(&p->p_lock);
959 (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
960 p->p_rctls, p, RCA_SAFE);
961 mutex_exit(&p->p_lock);
962 }