1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. 25 * Copyright 2015, Joyent, Inc. 26 */ 27 28 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 29 /* All Rights Reserved */ 30 31 /* 32 * Portions of this source code were derived from Berkeley 4.3 BSD 33 * under license from the Regents of the University of California. 34 */ 35 36 37 #include <sys/param.h> 38 #include <sys/isa_defs.h> 39 #include <sys/types.h> 40 #include <sys/sysmacros.h> 41 #include <sys/systm.h> 42 #include <sys/errno.h> 43 #include <sys/fcntl.h> 44 #include <sys/flock.h> 45 #include <sys/vnode.h> 46 #include <sys/file.h> 47 #include <sys/mode.h> 48 #include <sys/proc.h> 49 #include <sys/filio.h> 50 #include <sys/share.h> 51 #include <sys/debug.h> 52 #include <sys/rctl.h> 53 #include <sys/nbmlock.h> 54 55 #include <sys/cmn_err.h> 56 57 static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t); 58 static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *); 59 static void fd_too_big(proc_t *); 60 61 /* 62 * File control. 63 */ 64 int 65 fcntl(int fdes, int cmd, intptr_t arg) 66 { 67 int iarg; 68 int error = 0; 69 int retval; 70 proc_t *p; 71 file_t *fp; 72 vnode_t *vp; 73 u_offset_t offset; 74 u_offset_t start; 75 struct vattr vattr; 76 int in_crit; 77 int flag; 78 struct flock sbf; 79 struct flock64 bf; 80 struct o_flock obf; 81 struct flock64_32 bf64_32; 82 struct fshare fsh; 83 struct shrlock shr; 84 struct shr_locowner shr_own; 85 offset_t maxoffset; 86 model_t datamodel; 87 int fdres; 88 89 #if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32) 90 ASSERT(sizeof (struct flock) == sizeof (struct flock32)); 91 ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32)); 92 #endif 93 #if defined(_LP64) && !defined(lint) && defined(_SYSCALL32) 94 ASSERT(sizeof (struct flock) == sizeof (struct flock64_64)); 95 ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64)); 96 #endif 97 98 /* 99 * First, for speed, deal with the subset of cases 100 * that do not require getf() / releasef(). 101 */ 102 switch (cmd) { 103 case F_GETFD: 104 if ((error = f_getfd_error(fdes, &flag)) == 0) 105 retval = flag; 106 goto out; 107 108 case F_SETFD: 109 error = f_setfd_error(fdes, (int)arg); 110 retval = 0; 111 goto out; 112 113 case F_GETFL: 114 if ((error = f_getfl(fdes, &flag)) == 0) { 115 retval = (flag & (FMASK | FASYNC)); 116 if ((flag & (FSEARCH | FEXEC)) == 0) 117 retval += FOPEN; 118 else 119 retval |= (flag & (FSEARCH | FEXEC)); 120 } 121 goto out; 122 123 case F_GETXFL: 124 if ((error = f_getfl(fdes, &flag)) == 0) { 125 retval = flag; 126 if ((flag & (FSEARCH | FEXEC)) == 0) 127 retval += FOPEN; 128 } 129 goto out; 130 131 case F_BADFD: 132 if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0) 133 retval = fdres; 134 goto out; 135 } 136 137 /* 138 * Second, for speed, deal with the subset of cases that 139 * require getf() / releasef() but do not require copyin. 140 */ 141 if ((fp = getf(fdes)) == NULL) { 142 error = EBADF; 143 goto out; 144 } 145 iarg = (int)arg; 146 147 switch (cmd) { 148 case F_DUPFD: 149 case F_DUPFD_CLOEXEC: 150 p = curproc; 151 if ((uint_t)iarg >= p->p_fno_ctl) { 152 if (iarg >= 0) 153 fd_too_big(p); 154 error = EINVAL; 155 goto done; 156 } 157 /* 158 * We need to increment the f_count reference counter 159 * before allocating a new file descriptor. 160 * Doing it other way round opens a window for race condition 161 * with closeandsetf() on the target file descriptor which can 162 * close the file still referenced by the original 163 * file descriptor. 164 */ 165 mutex_enter(&fp->f_tlock); 166 fp->f_count++; 167 mutex_exit(&fp->f_tlock); 168 if ((retval = ufalloc_file(iarg, fp)) == -1) { 169 /* 170 * New file descriptor can't be allocated. 171 * Revert the reference count. 172 */ 173 mutex_enter(&fp->f_tlock); 174 fp->f_count--; 175 mutex_exit(&fp->f_tlock); 176 error = EMFILE; 177 } else { 178 if (cmd == F_DUPFD_CLOEXEC) { 179 f_setfd(retval, FD_CLOEXEC); 180 } 181 } 182 183 if (error == 0 && fp->f_vnode != NULL) { 184 (void) VOP_IOCTL(fp->f_vnode, F_ASSOCI_PID, 185 (intptr_t)p->p_pidp->pid_id, FKIOCTL, kcred, 186 NULL, NULL); 187 } 188 189 goto done; 190 191 case F_DUP2FD_CLOEXEC: 192 if (fdes == iarg) { 193 error = EINVAL; 194 goto done; 195 } 196 197 /*FALLTHROUGH*/ 198 199 case F_DUP2FD: 200 p = curproc; 201 if (fdes == iarg) { 202 retval = iarg; 203 } else if ((uint_t)iarg >= p->p_fno_ctl) { 204 if (iarg >= 0) 205 fd_too_big(p); 206 error = EBADF; 207 } else { 208 /* 209 * We can't hold our getf(fdes) across the call to 210 * closeandsetf() because it creates a window for 211 * deadlock: if one thread is doing dup2(a, b) while 212 * another is doing dup2(b, a), each one will block 213 * waiting for the other to call releasef(). The 214 * solution is to increment the file reference count 215 * (which we have to do anyway), then releasef(fdes), 216 * then closeandsetf(). Incrementing f_count ensures 217 * that fp won't disappear after we call releasef(). 218 * When closeandsetf() fails, we try avoid calling 219 * closef() because of all the side effects. 220 */ 221 mutex_enter(&fp->f_tlock); 222 fp->f_count++; 223 mutex_exit(&fp->f_tlock); 224 releasef(fdes); 225 226 /* 227 * Assume we succeed to duplicate the file descriptor 228 * and associate the pid to the vnode. 229 */ 230 if (fp->f_vnode != NULL) { 231 (void) VOP_IOCTL(fp->f_vnode, F_ASSOCI_PID, 232 (intptr_t)p->p_pidp->pid_id, FKIOCTL, 233 kcred, NULL, NULL); 234 } 235 236 if ((error = closeandsetf(iarg, fp)) == 0) { 237 if (cmd == F_DUP2FD_CLOEXEC) { 238 f_setfd(iarg, FD_CLOEXEC); 239 } 240 retval = iarg; 241 } else { 242 mutex_enter(&fp->f_tlock); 243 if (fp->f_count > 1) { 244 fp->f_count--; 245 mutex_exit(&fp->f_tlock); 246 /* 247 * Failed to duplicate fdes, 248 * disassociate the pid from the vnode. 249 */ 250 if (fp->f_vnode != NULL) { 251 (void) VOP_IOCTL(fp->f_vnode, 252 F_DASSOC_PID, 253 (intptr_t)p->p_pidp->pid_id, 254 FKIOCTL, kcred, NULL, NULL); 255 } 256 257 } else { 258 mutex_exit(&fp->f_tlock); 259 (void) closef(fp); 260 } 261 } 262 goto out; 263 } 264 goto done; 265 266 case F_SETFL: 267 vp = fp->f_vnode; 268 flag = fp->f_flag; 269 if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY)) 270 iarg &= ~FNDELAY; 271 if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred, NULL)) == 272 0) { 273 iarg &= FMASK; 274 mutex_enter(&fp->f_tlock); 275 fp->f_flag &= ~FMASK | (FREAD|FWRITE); 276 fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE); 277 mutex_exit(&fp->f_tlock); 278 } 279 retval = 0; 280 goto done; 281 } 282 283 /* 284 * Finally, deal with the expensive cases. 285 */ 286 retval = 0; 287 in_crit = 0; 288 maxoffset = MAXOFF_T; 289 datamodel = DATAMODEL_NATIVE; 290 #if defined(_SYSCALL32_IMPL) 291 if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32) 292 maxoffset = MAXOFF32_T; 293 #endif 294 295 vp = fp->f_vnode; 296 flag = fp->f_flag; 297 offset = fp->f_offset; 298 299 switch (cmd) { 300 /* 301 * The file system and vnode layers understand and implement 302 * locking with flock64 structures. So here once we pass through 303 * the test for compatibility as defined by LFS API, (for F_SETLK, 304 * F_SETLKW, F_GETLK, F_GETLKW, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW, 305 * F_FREESP) we transform the flock structure to a flock64 structure 306 * and send it to the lower layers. Similarly in case of GETLK and 307 * OFD_GETLK the returned flock64 structure is transformed to a flock 308 * structure if everything fits in nicely, otherwise we return 309 * EOVERFLOW. 310 */ 311 312 case F_GETLK: 313 case F_O_GETLK: 314 case F_SETLK: 315 case F_SETLKW: 316 case F_SETLK_NBMAND: 317 case F_OFD_GETLK: 318 case F_OFD_SETLK: 319 case F_OFD_SETLKW: 320 case F_FLOCK: 321 case F_FLOCKW: 322 323 /* 324 * Copy in input fields only. 325 */ 326 327 if (cmd == F_O_GETLK) { 328 if (datamodel != DATAMODEL_ILP32) { 329 error = EINVAL; 330 break; 331 } 332 333 if (copyin((void *)arg, &obf, sizeof (obf))) { 334 error = EFAULT; 335 break; 336 } 337 bf.l_type = obf.l_type; 338 bf.l_whence = obf.l_whence; 339 bf.l_start = (off64_t)obf.l_start; 340 bf.l_len = (off64_t)obf.l_len; 341 bf.l_sysid = (int)obf.l_sysid; 342 bf.l_pid = obf.l_pid; 343 } else if (datamodel == DATAMODEL_NATIVE) { 344 if (copyin((void *)arg, &sbf, sizeof (sbf))) { 345 error = EFAULT; 346 break; 347 } 348 /* 349 * XXX In an LP64 kernel with an LP64 application 350 * there's no need to do a structure copy here 351 * struct flock == struct flock64. However, 352 * we did it this way to avoid more conditional 353 * compilation. 354 */ 355 bf.l_type = sbf.l_type; 356 bf.l_whence = sbf.l_whence; 357 bf.l_start = (off64_t)sbf.l_start; 358 bf.l_len = (off64_t)sbf.l_len; 359 bf.l_sysid = sbf.l_sysid; 360 bf.l_pid = sbf.l_pid; 361 } 362 #if defined(_SYSCALL32_IMPL) 363 else { 364 struct flock32 sbf32; 365 if (copyin((void *)arg, &sbf32, sizeof (sbf32))) { 366 error = EFAULT; 367 break; 368 } 369 bf.l_type = sbf32.l_type; 370 bf.l_whence = sbf32.l_whence; 371 bf.l_start = (off64_t)sbf32.l_start; 372 bf.l_len = (off64_t)sbf32.l_len; 373 bf.l_sysid = sbf32.l_sysid; 374 bf.l_pid = sbf32.l_pid; 375 } 376 #endif /* _SYSCALL32_IMPL */ 377 378 /* 379 * 64-bit support: check for overflow for 32-bit lock ops 380 */ 381 if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0) 382 break; 383 384 if (cmd == F_FLOCK || cmd == F_FLOCKW) { 385 /* FLOCK* locking is always over the entire file. */ 386 if (bf.l_whence != 0 || bf.l_start != 0 || 387 bf.l_len != 0) { 388 error = EINVAL; 389 break; 390 } 391 if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) { 392 error = EINVAL; 393 break; 394 } 395 } 396 397 if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) { 398 /* 399 * TBD OFD-style locking is currently limited to 400 * covering the entire file. 401 */ 402 if (bf.l_whence != 0 || bf.l_start != 0 || 403 bf.l_len != 0) { 404 error = EINVAL; 405 break; 406 } 407 } 408 409 /* 410 * Not all of the filesystems understand F_O_GETLK, and 411 * there's no need for them to know. Map it to F_GETLK. 412 * 413 * The *_frlock functions in the various file systems basically 414 * do some validation and then funnel everything through the 415 * fs_frlock function. For OFD-style locks fs_frlock will do 416 * nothing so that once control returns here we can call the 417 * ofdlock function with the correct fp. For OFD-style locks 418 * the unsupported remote file systems, such as NFS, detect and 419 * reject the OFD-style cmd argument. 420 */ 421 if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd, 422 &bf, flag, offset, NULL, fp->f_cred, NULL)) != 0) 423 break; 424 425 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK || 426 cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) { 427 /* 428 * This is an OFD-style lock so we need to handle it 429 * here. Because OFD-style locks are associated with 430 * the file_t we didn't have enough info down the 431 * VOP_FRLOCK path immediately above. 432 */ 433 if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0) 434 break; 435 } 436 437 /* 438 * If command is GETLK and no lock is found, only 439 * the type field is changed. 440 */ 441 if ((cmd == F_O_GETLK || cmd == F_GETLK || 442 cmd == F_OFD_GETLK) && bf.l_type == F_UNLCK) { 443 /* l_type always first entry, always a short */ 444 if (copyout(&bf.l_type, &((struct flock *)arg)->l_type, 445 sizeof (bf.l_type))) 446 error = EFAULT; 447 break; 448 } 449 450 if (cmd == F_O_GETLK) { 451 /* 452 * Return an SVR3 flock structure to the user. 453 */ 454 obf.l_type = (int16_t)bf.l_type; 455 obf.l_whence = (int16_t)bf.l_whence; 456 obf.l_start = (int32_t)bf.l_start; 457 obf.l_len = (int32_t)bf.l_len; 458 if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) { 459 /* 460 * One or both values for the above fields 461 * is too large to store in an SVR3 flock 462 * structure. 463 */ 464 error = EOVERFLOW; 465 break; 466 } 467 obf.l_sysid = (int16_t)bf.l_sysid; 468 obf.l_pid = (int16_t)bf.l_pid; 469 if (copyout(&obf, (void *)arg, sizeof (obf))) 470 error = EFAULT; 471 } else if (cmd == F_GETLK || cmd == F_OFD_GETLK) { 472 /* 473 * Copy out SVR4 flock. 474 */ 475 int i; 476 477 if (bf.l_start > maxoffset || bf.l_len > maxoffset) { 478 error = EOVERFLOW; 479 break; 480 } 481 482 if (datamodel == DATAMODEL_NATIVE) { 483 for (i = 0; i < 4; i++) 484 sbf.l_pad[i] = 0; 485 /* 486 * XXX In an LP64 kernel with an LP64 487 * application there's no need to do a 488 * structure copy here as currently 489 * struct flock == struct flock64. 490 * We did it this way to avoid more 491 * conditional compilation. 492 */ 493 sbf.l_type = bf.l_type; 494 sbf.l_whence = bf.l_whence; 495 sbf.l_start = (off_t)bf.l_start; 496 sbf.l_len = (off_t)bf.l_len; 497 sbf.l_sysid = bf.l_sysid; 498 sbf.l_pid = bf.l_pid; 499 if (copyout(&sbf, (void *)arg, sizeof (sbf))) 500 error = EFAULT; 501 } 502 #if defined(_SYSCALL32_IMPL) 503 else { 504 struct flock32 sbf32; 505 if (bf.l_start > MAXOFF32_T || 506 bf.l_len > MAXOFF32_T) { 507 error = EOVERFLOW; 508 break; 509 } 510 for (i = 0; i < 4; i++) 511 sbf32.l_pad[i] = 0; 512 sbf32.l_type = (int16_t)bf.l_type; 513 sbf32.l_whence = (int16_t)bf.l_whence; 514 sbf32.l_start = (off32_t)bf.l_start; 515 sbf32.l_len = (off32_t)bf.l_len; 516 sbf32.l_sysid = (int32_t)bf.l_sysid; 517 sbf32.l_pid = (pid32_t)bf.l_pid; 518 if (copyout(&sbf32, 519 (void *)arg, sizeof (sbf32))) 520 error = EFAULT; 521 } 522 #endif 523 } 524 break; 525 526 case F_CHKFL: 527 /* 528 * This is for internal use only, to allow the vnode layer 529 * to validate a flags setting before applying it. User 530 * programs can't issue it. 531 */ 532 error = EINVAL; 533 break; 534 535 case F_ALLOCSP: 536 case F_FREESP: 537 case F_ALLOCSP64: 538 case F_FREESP64: 539 /* 540 * Test for not-a-regular-file (and returning EINVAL) 541 * before testing for open-for-writing (and returning EBADF). 542 * This is relied upon by posix_fallocate() in libc. 543 */ 544 if (vp->v_type != VREG) { 545 error = EINVAL; 546 break; 547 } 548 549 if ((flag & FWRITE) == 0) { 550 error = EBADF; 551 break; 552 } 553 554 if (datamodel != DATAMODEL_ILP32 && 555 (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) { 556 error = EINVAL; 557 break; 558 } 559 560 #if defined(_ILP32) || defined(_SYSCALL32_IMPL) 561 if (datamodel == DATAMODEL_ILP32 && 562 (cmd == F_ALLOCSP || cmd == F_FREESP)) { 563 struct flock32 sbf32; 564 /* 565 * For compatibility we overlay an SVR3 flock on an SVR4 566 * flock. This works because the input field offsets 567 * in "struct flock" were preserved. 568 */ 569 if (copyin((void *)arg, &sbf32, sizeof (sbf32))) { 570 error = EFAULT; 571 break; 572 } else { 573 bf.l_type = sbf32.l_type; 574 bf.l_whence = sbf32.l_whence; 575 bf.l_start = (off64_t)sbf32.l_start; 576 bf.l_len = (off64_t)sbf32.l_len; 577 bf.l_sysid = sbf32.l_sysid; 578 bf.l_pid = sbf32.l_pid; 579 } 580 } 581 #endif /* _ILP32 || _SYSCALL32_IMPL */ 582 583 #if defined(_LP64) 584 if (datamodel == DATAMODEL_LP64 && 585 (cmd == F_ALLOCSP || cmd == F_FREESP)) { 586 if (copyin((void *)arg, &bf, sizeof (bf))) { 587 error = EFAULT; 588 break; 589 } 590 } 591 #endif /* defined(_LP64) */ 592 593 #if !defined(_LP64) || defined(_SYSCALL32_IMPL) 594 if (datamodel == DATAMODEL_ILP32 && 595 (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) { 596 if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) { 597 error = EFAULT; 598 break; 599 } else { 600 /* 601 * Note that the size of flock64 is different in 602 * the ILP32 and LP64 models, due to the l_pad 603 * field. We do not want to assume that the 604 * flock64 structure is laid out the same in 605 * ILP32 and LP64 environments, so we will 606 * copy in the ILP32 version of flock64 607 * explicitly and copy it to the native 608 * flock64 structure. 609 */ 610 bf.l_type = (short)bf64_32.l_type; 611 bf.l_whence = (short)bf64_32.l_whence; 612 bf.l_start = bf64_32.l_start; 613 bf.l_len = bf64_32.l_len; 614 bf.l_sysid = (int)bf64_32.l_sysid; 615 bf.l_pid = (pid_t)bf64_32.l_pid; 616 } 617 } 618 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */ 619 620 if (cmd == F_ALLOCSP || cmd == F_FREESP) 621 error = flock_check(vp, &bf, offset, maxoffset); 622 else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64) 623 error = flock_check(vp, &bf, offset, MAXOFFSET_T); 624 if (error) 625 break; 626 627 if (vp->v_type == VREG && bf.l_len == 0 && 628 bf.l_start > OFFSET_MAX(fp)) { 629 error = EFBIG; 630 break; 631 } 632 633 /* 634 * Make sure that there are no conflicting non-blocking 635 * mandatory locks in the region being manipulated. If 636 * there are such locks then return EACCES. 637 */ 638 if ((error = flock_get_start(vp, &bf, offset, &start)) != 0) 639 break; 640 641 if (nbl_need_check(vp)) { 642 u_offset_t begin; 643 ssize_t length; 644 645 nbl_start_crit(vp, RW_READER); 646 in_crit = 1; 647 vattr.va_mask = AT_SIZE; 648 if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 649 != 0) 650 break; 651 begin = start > vattr.va_size ? vattr.va_size : start; 652 length = vattr.va_size > start ? vattr.va_size - start : 653 start - vattr.va_size; 654 if (nbl_conflict(vp, NBL_WRITE, begin, length, 0, 655 NULL)) { 656 error = EACCES; 657 break; 658 } 659 } 660 661 if (cmd == F_ALLOCSP64) 662 cmd = F_ALLOCSP; 663 else if (cmd == F_FREESP64) 664 cmd = F_FREESP; 665 666 error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL); 667 668 break; 669 670 #if !defined(_LP64) || defined(_SYSCALL32_IMPL) 671 case F_GETLK64: 672 case F_SETLK64: 673 case F_SETLKW64: 674 case F_SETLK64_NBMAND: 675 case F_OFD_GETLK64: 676 case F_OFD_SETLK64: 677 case F_OFD_SETLKW64: 678 case F_FLOCK64: 679 case F_FLOCKW64: 680 /* 681 * Large Files: Here we set cmd as *LK and send it to 682 * lower layers. *LK64 is only for the user land. 683 * Most of the comments described above for F_SETLK 684 * applies here too. 685 * Large File support is only needed for ILP32 apps! 686 */ 687 if (datamodel != DATAMODEL_ILP32) { 688 error = EINVAL; 689 break; 690 } 691 692 if (cmd == F_GETLK64) 693 cmd = F_GETLK; 694 else if (cmd == F_SETLK64) 695 cmd = F_SETLK; 696 else if (cmd == F_SETLKW64) 697 cmd = F_SETLKW; 698 else if (cmd == F_SETLK64_NBMAND) 699 cmd = F_SETLK_NBMAND; 700 else if (cmd == F_OFD_GETLK64) 701 cmd = F_OFD_GETLK; 702 else if (cmd == F_OFD_SETLK64) 703 cmd = F_OFD_SETLK; 704 else if (cmd == F_OFD_SETLKW64) 705 cmd = F_OFD_SETLKW; 706 else if (cmd == F_FLOCK64) 707 cmd = F_FLOCK; 708 else if (cmd == F_FLOCKW64) 709 cmd = F_FLOCKW; 710 711 /* 712 * Note that the size of flock64 is different in the ILP32 713 * and LP64 models, due to the sucking l_pad field. 714 * We do not want to assume that the flock64 structure is 715 * laid out in the same in ILP32 and LP64 environments, so 716 * we will copy in the ILP32 version of flock64 explicitly 717 * and copy it to the native flock64 structure. 718 */ 719 720 if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) { 721 error = EFAULT; 722 break; 723 } 724 725 bf.l_type = (short)bf64_32.l_type; 726 bf.l_whence = (short)bf64_32.l_whence; 727 bf.l_start = bf64_32.l_start; 728 bf.l_len = bf64_32.l_len; 729 bf.l_sysid = (int)bf64_32.l_sysid; 730 bf.l_pid = (pid_t)bf64_32.l_pid; 731 732 if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0) 733 break; 734 735 if (cmd == F_FLOCK || cmd == F_FLOCKW) { 736 /* FLOCK* locking is always over the entire file. */ 737 if (bf.l_whence != 0 || bf.l_start != 0 || 738 bf.l_len != 0) { 739 error = EINVAL; 740 break; 741 } 742 if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) { 743 error = EINVAL; 744 break; 745 } 746 } 747 748 if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) { 749 /* 750 * TBD OFD-style locking is currently limited to 751 * covering the entire file. 752 */ 753 if (bf.l_whence != 0 || bf.l_start != 0 || 754 bf.l_len != 0) { 755 error = EINVAL; 756 break; 757 } 758 } 759 760 /* 761 * The *_frlock functions in the various file systems basically 762 * do some validation and then funnel everything through the 763 * fs_frlock function. For OFD-style locks fs_frlock will do 764 * nothing so that once control returns here we can call the 765 * ofdlock function with the correct fp. For OFD-style locks 766 * the unsupported remote file systems, such as NFS, detect and 767 * reject the OFD-style cmd argument. 768 */ 769 if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset, 770 NULL, fp->f_cred, NULL)) != 0) 771 break; 772 773 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK || 774 cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) { 775 /* 776 * This is an OFD-style lock so we need to handle it 777 * here. Because OFD-style locks are associated with 778 * the file_t we didn't have enough info down the 779 * VOP_FRLOCK path immediately above. 780 */ 781 if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0) 782 break; 783 } 784 785 if ((cmd == F_GETLK || cmd == F_OFD_GETLK) && 786 bf.l_type == F_UNLCK) { 787 if (copyout(&bf.l_type, &((struct flock *)arg)->l_type, 788 sizeof (bf.l_type))) 789 error = EFAULT; 790 break; 791 } 792 793 if (cmd == F_GETLK || cmd == F_OFD_GETLK) { 794 int i; 795 796 /* 797 * We do not want to assume that the flock64 structure 798 * is laid out in the same in ILP32 and LP64 799 * environments, so we will copy out the ILP32 version 800 * of flock64 explicitly after copying the native 801 * flock64 structure to it. 802 */ 803 for (i = 0; i < 4; i++) 804 bf64_32.l_pad[i] = 0; 805 bf64_32.l_type = (int16_t)bf.l_type; 806 bf64_32.l_whence = (int16_t)bf.l_whence; 807 bf64_32.l_start = bf.l_start; 808 bf64_32.l_len = bf.l_len; 809 bf64_32.l_sysid = (int32_t)bf.l_sysid; 810 bf64_32.l_pid = (pid32_t)bf.l_pid; 811 if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32))) 812 error = EFAULT; 813 } 814 break; 815 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */ 816 817 case F_SHARE: 818 case F_SHARE_NBMAND: 819 case F_UNSHARE: 820 821 /* 822 * Copy in input fields only. 823 */ 824 if (copyin((void *)arg, &fsh, sizeof (fsh))) { 825 error = EFAULT; 826 break; 827 } 828 829 /* 830 * Local share reservations always have this simple form 831 */ 832 shr.s_access = fsh.f_access; 833 shr.s_deny = fsh.f_deny; 834 shr.s_sysid = 0; 835 shr.s_pid = ttoproc(curthread)->p_pid; 836 shr_own.sl_pid = shr.s_pid; 837 shr_own.sl_id = fsh.f_id; 838 shr.s_own_len = sizeof (shr_own); 839 shr.s_owner = (caddr_t)&shr_own; 840 error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred, NULL); 841 break; 842 843 default: 844 error = EINVAL; 845 break; 846 } 847 848 if (in_crit) 849 nbl_end_crit(vp); 850 851 done: 852 releasef(fdes); 853 out: 854 if (error) 855 return (set_errno(error)); 856 return (retval); 857 } 858 859 int 860 flock_check(vnode_t *vp, flock64_t *flp, offset_t offset, offset_t max) 861 { 862 struct vattr vattr; 863 int error; 864 u_offset_t start, end; 865 866 /* 867 * Determine the starting point of the request 868 */ 869 switch (flp->l_whence) { 870 case 0: /* SEEK_SET */ 871 start = (u_offset_t)flp->l_start; 872 if (start > max) 873 return (EINVAL); 874 break; 875 case 1: /* SEEK_CUR */ 876 if (flp->l_start > (max - offset)) 877 return (EOVERFLOW); 878 start = (u_offset_t)(flp->l_start + offset); 879 if (start > max) 880 return (EINVAL); 881 break; 882 case 2: /* SEEK_END */ 883 vattr.va_mask = AT_SIZE; 884 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 885 return (error); 886 if (flp->l_start > (max - (offset_t)vattr.va_size)) 887 return (EOVERFLOW); 888 start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size); 889 if (start > max) 890 return (EINVAL); 891 break; 892 default: 893 return (EINVAL); 894 } 895 896 /* 897 * Determine the range covered by the request. 898 */ 899 if (flp->l_len == 0) 900 end = MAXEND; 901 else if ((offset_t)flp->l_len > 0) { 902 if (flp->l_len > (max - start + 1)) 903 return (EOVERFLOW); 904 end = (u_offset_t)(start + (flp->l_len - 1)); 905 ASSERT(end <= max); 906 } else { 907 /* 908 * Negative length; why do we even allow this ? 909 * Because this allows easy specification of 910 * the last n bytes of the file. 911 */ 912 end = start; 913 start += (u_offset_t)flp->l_len; 914 (start)++; 915 if (start > max) 916 return (EINVAL); 917 ASSERT(end <= max); 918 } 919 ASSERT(start <= max); 920 if (flp->l_type == F_UNLCK && flp->l_len > 0 && 921 end == (offset_t)max) { 922 flp->l_len = 0; 923 } 924 if (start > end) 925 return (EINVAL); 926 return (0); 927 } 928 929 static int 930 flock_get_start(vnode_t *vp, flock64_t *flp, offset_t offset, u_offset_t *start) 931 { 932 struct vattr vattr; 933 int error; 934 935 /* 936 * Determine the starting point of the request. Assume that it is 937 * a valid starting point. 938 */ 939 switch (flp->l_whence) { 940 case 0: /* SEEK_SET */ 941 *start = (u_offset_t)flp->l_start; 942 break; 943 case 1: /* SEEK_CUR */ 944 *start = (u_offset_t)(flp->l_start + offset); 945 break; 946 case 2: /* SEEK_END */ 947 vattr.va_mask = AT_SIZE; 948 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 949 return (error); 950 *start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size); 951 break; 952 default: 953 return (EINVAL); 954 } 955 956 return (0); 957 } 958 959 /* 960 * Take rctl action when the requested file descriptor is too big. 961 */ 962 static void 963 fd_too_big(proc_t *p) 964 { 965 mutex_enter(&p->p_lock); 966 (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], 967 p->p_rctls, p, RCA_SAFE); 968 mutex_exit(&p->p_lock); 969 }