1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
  25  * Copyright 2015, Joyent, Inc.
  26  */
  27 
  28 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /*        All Rights Reserved   */
  30 
  31 /*
  32  * Portions of this source code were derived from Berkeley 4.3 BSD
  33  * under license from the Regents of the University of California.
  34  */
  35 
  36 
  37 #include <sys/param.h>
  38 #include <sys/isa_defs.h>
  39 #include <sys/types.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/systm.h>
  42 #include <sys/errno.h>
  43 #include <sys/fcntl.h>
  44 #include <sys/flock.h>
  45 #include <sys/vnode.h>
  46 #include <sys/file.h>
  47 #include <sys/mode.h>
  48 #include <sys/proc.h>
  49 #include <sys/filio.h>
  50 #include <sys/share.h>
  51 #include <sys/debug.h>
  52 #include <sys/rctl.h>
  53 #include <sys/nbmlock.h>
  54 
  55 #include <sys/cmn_err.h>
  56 
  57 static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
  58 static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *);
  59 static void fd_too_big(proc_t *);
  60 
  61 /*
  62  * File control.
  63  */
  64 int
  65 fcntl(int fdes, int cmd, intptr_t arg)
  66 {
  67         int iarg;
  68         int error = 0;
  69         int retval;
  70         proc_t *p;
  71         file_t *fp;
  72         vnode_t *vp;
  73         u_offset_t offset;
  74         u_offset_t start;
  75         struct vattr vattr;
  76         int in_crit;
  77         int flag;
  78         struct flock sbf;
  79         struct flock64 bf;
  80         struct o_flock obf;
  81         struct flock64_32 bf64_32;
  82         struct fshare fsh;
  83         struct shrlock shr;
  84         struct shr_locowner shr_own;
  85         offset_t maxoffset;
  86         model_t datamodel;
  87         int fdres;
  88 
  89 #if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32)
  90         ASSERT(sizeof (struct flock) == sizeof (struct flock32));
  91         ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32));
  92 #endif
  93 #if defined(_LP64) && !defined(lint) && defined(_SYSCALL32)
  94         ASSERT(sizeof (struct flock) == sizeof (struct flock64_64));
  95         ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64));
  96 #endif
  97 
  98         /*
  99          * First, for speed, deal with the subset of cases
 100          * that do not require getf() / releasef().
 101          */
 102         switch (cmd) {
 103         case F_GETFD:
 104                 if ((error = f_getfd_error(fdes, &flag)) == 0)
 105                         retval = flag;
 106                 goto out;
 107 
 108         case F_SETFD:
 109                 error = f_setfd_error(fdes, (int)arg);
 110                 retval = 0;
 111                 goto out;
 112 
 113         case F_GETFL:
 114                 if ((error = f_getfl(fdes, &flag)) == 0) {
 115                         retval = (flag & (FMASK | FASYNC));
 116                         if ((flag & (FSEARCH | FEXEC)) == 0)
 117                                 retval += FOPEN;
 118                         else
 119                                 retval |= (flag & (FSEARCH | FEXEC));
 120                 }
 121                 goto out;
 122 
 123         case F_GETXFL:
 124                 if ((error = f_getfl(fdes, &flag)) == 0) {
 125                         retval = flag;
 126                         if ((flag & (FSEARCH | FEXEC)) == 0)
 127                                 retval += FOPEN;
 128                 }
 129                 goto out;
 130 
 131         case F_BADFD:
 132                 if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0)
 133                         retval = fdres;
 134                 goto out;
 135         }
 136 
 137         /*
 138          * Second, for speed, deal with the subset of cases that
 139          * require getf() / releasef() but do not require copyin.
 140          */
 141         if ((fp = getf(fdes)) == NULL) {
 142                 error = EBADF;
 143                 goto out;
 144         }
 145         iarg = (int)arg;
 146 
 147         switch (cmd) {
 148         case F_DUPFD:
 149         case F_DUPFD_CLOEXEC:
 150                 p = curproc;
 151                 if ((uint_t)iarg >= p->p_fno_ctl) {
 152                         if (iarg >= 0)
 153                                 fd_too_big(p);
 154                         error = EINVAL;
 155                         goto done;
 156                 }
 157                 /*
 158                  * We need to increment the f_count reference counter
 159                  * before allocating a new file descriptor.
 160                  * Doing it other way round opens a window for race condition
 161                  * with closeandsetf() on the target file descriptor which can
 162                  * close the file still referenced by the original
 163                  * file descriptor.
 164                  */
 165                 mutex_enter(&fp->f_tlock);
 166                 fp->f_count++;
 167                 mutex_exit(&fp->f_tlock);
 168                 if ((retval = ufalloc_file(iarg, fp)) == -1) {
 169                         /*
 170                          * New file descriptor can't be allocated.
 171                          * Revert the reference count.
 172                          */
 173                         mutex_enter(&fp->f_tlock);
 174                         fp->f_count--;
 175                         mutex_exit(&fp->f_tlock);
 176                         error = EMFILE;
 177                 } else {
 178                         if (cmd == F_DUPFD_CLOEXEC) {
 179                                 f_setfd(retval, FD_CLOEXEC);
 180                         }
 181                 }
 182                 goto done;
 183 
 184         case F_DUP2FD_CLOEXEC:
 185                 if (fdes == iarg) {
 186                         error = EINVAL;
 187                         goto done;
 188                 }
 189 
 190                 /*FALLTHROUGH*/
 191 
 192         case F_DUP2FD:
 193                 p = curproc;
 194                 if (fdes == iarg) {
 195                         retval = iarg;
 196                 } else if ((uint_t)iarg >= p->p_fno_ctl) {
 197                         if (iarg >= 0)
 198                                 fd_too_big(p);
 199                         error = EBADF;
 200                 } else {
 201                         /*
 202                          * We can't hold our getf(fdes) across the call to
 203                          * closeandsetf() because it creates a window for
 204                          * deadlock: if one thread is doing dup2(a, b) while
 205                          * another is doing dup2(b, a), each one will block
 206                          * waiting for the other to call releasef().  The
 207                          * solution is to increment the file reference count
 208                          * (which we have to do anyway), then releasef(fdes),
 209                          * then closeandsetf().  Incrementing f_count ensures
 210                          * that fp won't disappear after we call releasef().
 211                          * When closeandsetf() fails, we try avoid calling
 212                          * closef() because of all the side effects.
 213                          */
 214                         mutex_enter(&fp->f_tlock);
 215                         fp->f_count++;
 216                         mutex_exit(&fp->f_tlock);
 217                         releasef(fdes);
 218                         if ((error = closeandsetf(iarg, fp)) == 0) {
 219                                 if (cmd == F_DUP2FD_CLOEXEC) {
 220                                         f_setfd(iarg, FD_CLOEXEC);
 221                                 }
 222                                 retval = iarg;
 223                         } else {
 224                                 mutex_enter(&fp->f_tlock);
 225                                 if (fp->f_count > 1) {
 226                                         fp->f_count--;
 227                                         mutex_exit(&fp->f_tlock);
 228                                 } else {
 229                                         mutex_exit(&fp->f_tlock);
 230                                         (void) closef(fp);
 231                                 }
 232                         }
 233                         goto out;
 234                 }
 235                 goto done;
 236 
 237         case F_SETFL:
 238                 vp = fp->f_vnode;
 239                 flag = fp->f_flag;
 240                 if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY))
 241                         iarg &= ~FNDELAY;
 242                 if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred, NULL)) ==
 243                     0) {
 244                         iarg &= FMASK;
 245                         mutex_enter(&fp->f_tlock);
 246                         fp->f_flag &= ~FMASK | (FREAD|FWRITE);
 247                         fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE);
 248                         mutex_exit(&fp->f_tlock);
 249                 }
 250                 retval = 0;
 251                 goto done;
 252         }
 253 
 254         /*
 255          * Finally, deal with the expensive cases.
 256          */
 257         retval = 0;
 258         in_crit = 0;
 259         maxoffset = MAXOFF_T;
 260         datamodel = DATAMODEL_NATIVE;
 261 #if defined(_SYSCALL32_IMPL)
 262         if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32)
 263                 maxoffset = MAXOFF32_T;
 264 #endif
 265 
 266         vp = fp->f_vnode;
 267         flag = fp->f_flag;
 268         offset = fp->f_offset;
 269 
 270         switch (cmd) {
 271         /*
 272          * The file system and vnode layers understand and implement
 273          * locking with flock64 structures. So here once we pass through
 274          * the test for compatibility as defined by LFS API, (for F_SETLK,
 275          * F_SETLKW, F_GETLK, F_GETLKW, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW,
 276          * F_FREESP) we transform the flock structure to a flock64 structure
 277          * and send it to the lower layers. Similarly in case of GETLK and
 278          * OFD_GETLK the returned flock64 structure is transformed to a flock
 279          * structure if everything fits in nicely, otherwise we return
 280          * EOVERFLOW.
 281          */
 282 
 283         case F_GETLK:
 284         case F_O_GETLK:
 285         case F_SETLK:
 286         case F_SETLKW:
 287         case F_SETLK_NBMAND:
 288         case F_OFD_GETLK:
 289         case F_OFD_SETLK:
 290         case F_OFD_SETLKW:
 291         case F_FLOCK:
 292         case F_FLOCKW:
 293 
 294                 /*
 295                  * Copy in input fields only.
 296                  */
 297 
 298                 if (cmd == F_O_GETLK) {
 299                         if (datamodel != DATAMODEL_ILP32) {
 300                                 error = EINVAL;
 301                                 break;
 302                         }
 303 
 304                         if (copyin((void *)arg, &obf, sizeof (obf))) {
 305                                 error = EFAULT;
 306                                 break;
 307                         }
 308                         bf.l_type = obf.l_type;
 309                         bf.l_whence = obf.l_whence;
 310                         bf.l_start = (off64_t)obf.l_start;
 311                         bf.l_len = (off64_t)obf.l_len;
 312                         bf.l_sysid = (int)obf.l_sysid;
 313                         bf.l_pid = obf.l_pid;
 314                 } else if (datamodel == DATAMODEL_NATIVE) {
 315                         if (copyin((void *)arg, &sbf, sizeof (sbf))) {
 316                                 error = EFAULT;
 317                                 break;
 318                         }
 319                         /*
 320                          * XXX  In an LP64 kernel with an LP64 application
 321                          *      there's no need to do a structure copy here
 322                          *      struct flock == struct flock64. However,
 323                          *      we did it this way to avoid more conditional
 324                          *      compilation.
 325                          */
 326                         bf.l_type = sbf.l_type;
 327                         bf.l_whence = sbf.l_whence;
 328                         bf.l_start = (off64_t)sbf.l_start;
 329                         bf.l_len = (off64_t)sbf.l_len;
 330                         bf.l_sysid = sbf.l_sysid;
 331                         bf.l_pid = sbf.l_pid;
 332                 }
 333 #if defined(_SYSCALL32_IMPL)
 334                 else {
 335                         struct flock32 sbf32;
 336                         if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
 337                                 error = EFAULT;
 338                                 break;
 339                         }
 340                         bf.l_type = sbf32.l_type;
 341                         bf.l_whence = sbf32.l_whence;
 342                         bf.l_start = (off64_t)sbf32.l_start;
 343                         bf.l_len = (off64_t)sbf32.l_len;
 344                         bf.l_sysid = sbf32.l_sysid;
 345                         bf.l_pid = sbf32.l_pid;
 346                 }
 347 #endif /* _SYSCALL32_IMPL */
 348 
 349                 /*
 350                  * 64-bit support: check for overflow for 32-bit lock ops
 351                  */
 352                 if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0)
 353                         break;
 354 
 355                 if (cmd == F_FLOCK || cmd == F_FLOCKW) {
 356                         /* FLOCK* locking is always over the entire file. */
 357                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 358                             bf.l_len != 0) {
 359                                 error = EINVAL;
 360                                 break;
 361                         }
 362                         if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
 363                                 error = EINVAL;
 364                                 break;
 365                         }
 366                 }
 367 
 368                 if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 369                         /*
 370                          * TBD OFD-style locking is currently limited to
 371                          * covering the entire file.
 372                          */
 373                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 374                             bf.l_len != 0) {
 375                                 error = EINVAL;
 376                                 break;
 377                         }
 378                 }
 379 
 380                 /*
 381                  * Not all of the filesystems understand F_O_GETLK, and
 382                  * there's no need for them to know.  Map it to F_GETLK.
 383                  *
 384                  * The *_frlock functions in the various file systems basically
 385                  * do some validation and then funnel everything through the
 386                  * fs_frlock function. For OFD-style locks fs_frlock will do
 387                  * nothing so that once control returns here we can call the
 388                  * ofdlock function with the correct fp. For OFD-style locks
 389                  * the unsupported remote file systems, such as NFS, detect and
 390                  * reject the OFD-style cmd argument.
 391                  */
 392                 if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd,
 393                     &bf, flag, offset, NULL, fp->f_cred, NULL)) != 0)
 394                         break;
 395 
 396                 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
 397                     cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 398                         /*
 399                          * This is an OFD-style lock so we need to handle it
 400                          * here. Because OFD-style locks are associated with
 401                          * the file_t we didn't have enough info down the
 402                          * VOP_FRLOCK path immediately above.
 403                          */
 404                         if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
 405                                 break;
 406                 }
 407 
 408                 /*
 409                  * If command is GETLK and no lock is found, only
 410                  * the type field is changed.
 411                  */
 412                 if ((cmd == F_O_GETLK || cmd == F_GETLK ||
 413                     cmd == F_OFD_GETLK) && bf.l_type == F_UNLCK) {
 414                         /* l_type always first entry, always a short */
 415                         if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
 416                             sizeof (bf.l_type)))
 417                                 error = EFAULT;
 418                         break;
 419                 }
 420 
 421                 if (cmd == F_O_GETLK) {
 422                         /*
 423                          * Return an SVR3 flock structure to the user.
 424                          */
 425                         obf.l_type = (int16_t)bf.l_type;
 426                         obf.l_whence = (int16_t)bf.l_whence;
 427                         obf.l_start = (int32_t)bf.l_start;
 428                         obf.l_len = (int32_t)bf.l_len;
 429                         if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) {
 430                                 /*
 431                                  * One or both values for the above fields
 432                                  * is too large to store in an SVR3 flock
 433                                  * structure.
 434                                  */
 435                                 error = EOVERFLOW;
 436                                 break;
 437                         }
 438                         obf.l_sysid = (int16_t)bf.l_sysid;
 439                         obf.l_pid = (int16_t)bf.l_pid;
 440                         if (copyout(&obf, (void *)arg, sizeof (obf)))
 441                                 error = EFAULT;
 442                 } else if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
 443                         /*
 444                          * Copy out SVR4 flock.
 445                          */
 446                         int i;
 447 
 448                         if (bf.l_start > maxoffset || bf.l_len > maxoffset) {
 449                                 error = EOVERFLOW;
 450                                 break;
 451                         }
 452 
 453                         if (datamodel == DATAMODEL_NATIVE) {
 454                                 for (i = 0; i < 4; i++)
 455                                         sbf.l_pad[i] = 0;
 456                                 /*
 457                                  * XXX  In an LP64 kernel with an LP64
 458                                  *      application there's no need to do a
 459                                  *      structure copy here as currently
 460                                  *      struct flock == struct flock64.
 461                                  *      We did it this way to avoid more
 462                                  *      conditional compilation.
 463                                  */
 464                                 sbf.l_type = bf.l_type;
 465                                 sbf.l_whence = bf.l_whence;
 466                                 sbf.l_start = (off_t)bf.l_start;
 467                                 sbf.l_len = (off_t)bf.l_len;
 468                                 sbf.l_sysid = bf.l_sysid;
 469                                 sbf.l_pid = bf.l_pid;
 470                                 if (copyout(&sbf, (void *)arg, sizeof (sbf)))
 471                                         error = EFAULT;
 472                         }
 473 #if defined(_SYSCALL32_IMPL)
 474                         else {
 475                                 struct flock32 sbf32;
 476                                 if (bf.l_start > MAXOFF32_T ||
 477                                     bf.l_len > MAXOFF32_T) {
 478                                         error = EOVERFLOW;
 479                                         break;
 480                                 }
 481                                 for (i = 0; i < 4; i++)
 482                                         sbf32.l_pad[i] = 0;
 483                                 sbf32.l_type = (int16_t)bf.l_type;
 484                                 sbf32.l_whence = (int16_t)bf.l_whence;
 485                                 sbf32.l_start = (off32_t)bf.l_start;
 486                                 sbf32.l_len = (off32_t)bf.l_len;
 487                                 sbf32.l_sysid = (int32_t)bf.l_sysid;
 488                                 sbf32.l_pid = (pid32_t)bf.l_pid;
 489                                 if (copyout(&sbf32,
 490                                     (void *)arg, sizeof (sbf32)))
 491                                         error = EFAULT;
 492                         }
 493 #endif
 494                 }
 495                 break;
 496 
 497         case F_CHKFL:
 498                 /*
 499                  * This is for internal use only, to allow the vnode layer
 500                  * to validate a flags setting before applying it.  User
 501                  * programs can't issue it.
 502                  */
 503                 error = EINVAL;
 504                 break;
 505 
 506         case F_ALLOCSP:
 507         case F_FREESP:
 508         case F_ALLOCSP64:
 509         case F_FREESP64:
 510                 /*
 511                  * Test for not-a-regular-file (and returning EINVAL)
 512                  * before testing for open-for-writing (and returning EBADF).
 513                  * This is relied upon by posix_fallocate() in libc.
 514                  */
 515                 if (vp->v_type != VREG) {
 516                         error = EINVAL;
 517                         break;
 518                 }
 519 
 520                 if ((flag & FWRITE) == 0) {
 521                         error = EBADF;
 522                         break;
 523                 }
 524 
 525                 if (datamodel != DATAMODEL_ILP32 &&
 526                     (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
 527                         error = EINVAL;
 528                         break;
 529                 }
 530 
 531 #if defined(_ILP32) || defined(_SYSCALL32_IMPL)
 532                 if (datamodel == DATAMODEL_ILP32 &&
 533                     (cmd == F_ALLOCSP || cmd == F_FREESP)) {
 534                         struct flock32 sbf32;
 535                         /*
 536                          * For compatibility we overlay an SVR3 flock on an SVR4
 537                          * flock.  This works because the input field offsets
 538                          * in "struct flock" were preserved.
 539                          */
 540                         if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
 541                                 error = EFAULT;
 542                                 break;
 543                         } else {
 544                                 bf.l_type = sbf32.l_type;
 545                                 bf.l_whence = sbf32.l_whence;
 546                                 bf.l_start = (off64_t)sbf32.l_start;
 547                                 bf.l_len = (off64_t)sbf32.l_len;
 548                                 bf.l_sysid = sbf32.l_sysid;
 549                                 bf.l_pid = sbf32.l_pid;
 550                         }
 551                 }
 552 #endif /* _ILP32 || _SYSCALL32_IMPL */
 553 
 554 #if defined(_LP64)
 555                 if (datamodel == DATAMODEL_LP64 &&
 556                     (cmd == F_ALLOCSP || cmd == F_FREESP)) {
 557                         if (copyin((void *)arg, &bf, sizeof (bf))) {
 558                                 error = EFAULT;
 559                                 break;
 560                         }
 561                 }
 562 #endif /* defined(_LP64) */
 563 
 564 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
 565                 if (datamodel == DATAMODEL_ILP32 &&
 566                     (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
 567                         if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
 568                                 error = EFAULT;
 569                                 break;
 570                         } else {
 571                                 /*
 572                                  * Note that the size of flock64 is different in
 573                                  * the ILP32 and LP64 models, due to the l_pad
 574                                  * field. We do not want to assume that the
 575                                  * flock64 structure is laid out the same in
 576                                  * ILP32 and LP64 environments, so we will
 577                                  * copy in the ILP32 version of flock64
 578                                  * explicitly and copy it to the native
 579                                  * flock64 structure.
 580                                  */
 581                                 bf.l_type = (short)bf64_32.l_type;
 582                                 bf.l_whence = (short)bf64_32.l_whence;
 583                                 bf.l_start = bf64_32.l_start;
 584                                 bf.l_len = bf64_32.l_len;
 585                                 bf.l_sysid = (int)bf64_32.l_sysid;
 586                                 bf.l_pid = (pid_t)bf64_32.l_pid;
 587                         }
 588                 }
 589 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
 590 
 591                 if (cmd == F_ALLOCSP || cmd == F_FREESP)
 592                         error = flock_check(vp, &bf, offset, maxoffset);
 593                 else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64)
 594                         error = flock_check(vp, &bf, offset, MAXOFFSET_T);
 595                 if (error)
 596                         break;
 597 
 598                 if (vp->v_type == VREG && bf.l_len == 0 &&
 599                     bf.l_start > OFFSET_MAX(fp)) {
 600                         error = EFBIG;
 601                         break;
 602                 }
 603 
 604                 /*
 605                  * Make sure that there are no conflicting non-blocking
 606                  * mandatory locks in the region being manipulated. If
 607                  * there are such locks then return EACCES.
 608                  */
 609                 if ((error = flock_get_start(vp, &bf, offset, &start)) != 0)
 610                         break;
 611 
 612                 if (nbl_need_check(vp)) {
 613                         u_offset_t      begin;
 614                         ssize_t         length;
 615 
 616                         nbl_start_crit(vp, RW_READER);
 617                         in_crit = 1;
 618                         vattr.va_mask = AT_SIZE;
 619                         if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 620                             != 0)
 621                                 break;
 622                         begin = start > vattr.va_size ? vattr.va_size : start;
 623                         length = vattr.va_size > start ? vattr.va_size - start :
 624                             start - vattr.va_size;
 625                         if (nbl_conflict(vp, NBL_WRITE, begin, length, 0,
 626                             NULL)) {
 627                                 error = EACCES;
 628                                 break;
 629                         }
 630                 }
 631 
 632                 if (cmd == F_ALLOCSP64)
 633                         cmd = F_ALLOCSP;
 634                 else if (cmd == F_FREESP64)
 635                         cmd = F_FREESP;
 636 
 637                 error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL);
 638 
 639                 break;
 640 
 641 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
 642         case F_GETLK64:
 643         case F_SETLK64:
 644         case F_SETLKW64:
 645         case F_SETLK64_NBMAND:
 646         case F_OFD_GETLK64:
 647         case F_OFD_SETLK64:
 648         case F_OFD_SETLKW64:
 649         case F_FLOCK64:
 650         case F_FLOCKW64:
 651                 /*
 652                  * Large Files: Here we set cmd as *LK and send it to
 653                  * lower layers. *LK64 is only for the user land.
 654                  * Most of the comments described above for F_SETLK
 655                  * applies here too.
 656                  * Large File support is only needed for ILP32 apps!
 657                  */
 658                 if (datamodel != DATAMODEL_ILP32) {
 659                         error = EINVAL;
 660                         break;
 661                 }
 662 
 663                 if (cmd == F_GETLK64)
 664                         cmd = F_GETLK;
 665                 else if (cmd == F_SETLK64)
 666                         cmd = F_SETLK;
 667                 else if (cmd == F_SETLKW64)
 668                         cmd = F_SETLKW;
 669                 else if (cmd == F_SETLK64_NBMAND)
 670                         cmd = F_SETLK_NBMAND;
 671                 else if (cmd == F_OFD_GETLK64)
 672                         cmd = F_OFD_GETLK;
 673                 else if (cmd == F_OFD_SETLK64)
 674                         cmd = F_OFD_SETLK;
 675                 else if (cmd == F_OFD_SETLKW64)
 676                         cmd = F_OFD_SETLKW;
 677                 else if (cmd == F_FLOCK64)
 678                         cmd = F_FLOCK;
 679                 else if (cmd == F_FLOCKW64)
 680                         cmd = F_FLOCKW;
 681 
 682                 /*
 683                  * Note that the size of flock64 is different in the ILP32
 684                  * and LP64 models, due to the sucking l_pad field.
 685                  * We do not want to assume that the flock64 structure is
 686                  * laid out in the same in ILP32 and LP64 environments, so
 687                  * we will copy in the ILP32 version of flock64 explicitly
 688                  * and copy it to the native flock64 structure.
 689                  */
 690 
 691                 if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
 692                         error = EFAULT;
 693                         break;
 694                 }
 695 
 696                 bf.l_type = (short)bf64_32.l_type;
 697                 bf.l_whence = (short)bf64_32.l_whence;
 698                 bf.l_start = bf64_32.l_start;
 699                 bf.l_len = bf64_32.l_len;
 700                 bf.l_sysid = (int)bf64_32.l_sysid;
 701                 bf.l_pid = (pid_t)bf64_32.l_pid;
 702 
 703                 if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0)
 704                         break;
 705 
 706                 if (cmd == F_FLOCK || cmd == F_FLOCKW) {
 707                         /* FLOCK* locking is always over the entire file. */
 708                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 709                             bf.l_len != 0) {
 710                                 error = EINVAL;
 711                                 break;
 712                         }
 713                         if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
 714                                 error = EINVAL;
 715                                 break;
 716                         }
 717                 }
 718 
 719                 if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 720                         /*
 721                          * TBD OFD-style locking is currently limited to
 722                          * covering the entire file.
 723                          */
 724                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 725                             bf.l_len != 0) {
 726                                 error = EINVAL;
 727                                 break;
 728                         }
 729                 }
 730 
 731                 /*
 732                  * The *_frlock functions in the various file systems basically
 733                  * do some validation and then funnel everything through the
 734                  * fs_frlock function. For OFD-style locks fs_frlock will do
 735                  * nothing so that once control returns here we can call the
 736                  * ofdlock function with the correct fp. For OFD-style locks
 737                  * the unsupported remote file systems, such as NFS, detect and
 738                  * reject the OFD-style cmd argument.
 739                  */
 740                 if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset,
 741                     NULL, fp->f_cred, NULL)) != 0)
 742                         break;
 743 
 744                 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
 745                     cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 746                         /*
 747                          * This is an OFD-style lock so we need to handle it
 748                          * here. Because OFD-style locks are associated with
 749                          * the file_t we didn't have enough info down the
 750                          * VOP_FRLOCK path immediately above.
 751                          */
 752                         if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
 753                                 break;
 754                 }
 755 
 756                 if ((cmd == F_GETLK || cmd == F_OFD_GETLK) &&
 757                     bf.l_type == F_UNLCK) {
 758                         if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
 759                             sizeof (bf.l_type)))
 760                                 error = EFAULT;
 761                         break;
 762                 }
 763 
 764                 if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
 765                         int i;
 766 
 767                         /*
 768                          * We do not want to assume that the flock64 structure
 769                          * is laid out in the same in ILP32 and LP64
 770                          * environments, so we will copy out the ILP32 version
 771                          * of flock64 explicitly after copying the native
 772                          * flock64 structure to it.
 773                          */
 774                         for (i = 0; i < 4; i++)
 775                                 bf64_32.l_pad[i] = 0;
 776                         bf64_32.l_type = (int16_t)bf.l_type;
 777                         bf64_32.l_whence = (int16_t)bf.l_whence;
 778                         bf64_32.l_start = bf.l_start;
 779                         bf64_32.l_len = bf.l_len;
 780                         bf64_32.l_sysid = (int32_t)bf.l_sysid;
 781                         bf64_32.l_pid = (pid32_t)bf.l_pid;
 782                         if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32)))
 783                                 error = EFAULT;
 784                 }
 785                 break;
 786 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
 787 
 788         case F_SHARE:
 789         case F_SHARE_NBMAND:
 790         case F_UNSHARE:
 791 
 792                 /*
 793                  * Copy in input fields only.
 794                  */
 795                 if (copyin((void *)arg, &fsh, sizeof (fsh))) {
 796                         error = EFAULT;
 797                         break;
 798                 }
 799 
 800                 /*
 801                  * Local share reservations always have this simple form
 802                  */
 803                 shr.s_access = fsh.f_access;
 804                 shr.s_deny = fsh.f_deny;
 805                 shr.s_sysid = 0;
 806                 shr.s_pid = ttoproc(curthread)->p_pid;
 807                 shr_own.sl_pid = shr.s_pid;
 808                 shr_own.sl_id = fsh.f_id;
 809                 shr.s_own_len = sizeof (shr_own);
 810                 shr.s_owner = (caddr_t)&shr_own;
 811                 error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred, NULL);
 812                 break;
 813 
 814         default:
 815                 error = EINVAL;
 816                 break;
 817         }
 818 
 819         if (in_crit)
 820                 nbl_end_crit(vp);
 821 
 822 done:
 823         releasef(fdes);
 824 out:
 825         if (error)
 826                 return (set_errno(error));
 827         return (retval);
 828 }
 829 
 830 int
 831 flock_check(vnode_t *vp, flock64_t *flp, offset_t offset, offset_t max)
 832 {
 833         struct vattr    vattr;
 834         int     error;
 835         u_offset_t start, end;
 836 
 837         /*
 838          * Determine the starting point of the request
 839          */
 840         switch (flp->l_whence) {
 841         case 0:         /* SEEK_SET */
 842                 start = (u_offset_t)flp->l_start;
 843                 if (start > max)
 844                         return (EINVAL);
 845                 break;
 846         case 1:         /* SEEK_CUR */
 847                 if (flp->l_start > (max - offset))
 848                         return (EOVERFLOW);
 849                 start = (u_offset_t)(flp->l_start + offset);
 850                 if (start > max)
 851                         return (EINVAL);
 852                 break;
 853         case 2:         /* SEEK_END */
 854                 vattr.va_mask = AT_SIZE;
 855                 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 856                         return (error);
 857                 if (flp->l_start > (max - (offset_t)vattr.va_size))
 858                         return (EOVERFLOW);
 859                 start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
 860                 if (start > max)
 861                         return (EINVAL);
 862                 break;
 863         default:
 864                 return (EINVAL);
 865         }
 866 
 867         /*
 868          * Determine the range covered by the request.
 869          */
 870         if (flp->l_len == 0)
 871                 end = MAXEND;
 872         else if ((offset_t)flp->l_len > 0) {
 873                 if (flp->l_len > (max - start + 1))
 874                         return (EOVERFLOW);
 875                 end = (u_offset_t)(start + (flp->l_len - 1));
 876                 ASSERT(end <= max);
 877         } else {
 878                 /*
 879                  * Negative length; why do we even allow this ?
 880                  * Because this allows easy specification of
 881                  * the last n bytes of the file.
 882                  */
 883                 end = start;
 884                 start += (u_offset_t)flp->l_len;
 885                 (start)++;
 886                 if (start > max)
 887                         return (EINVAL);
 888                 ASSERT(end <= max);
 889         }
 890         ASSERT(start <= max);
 891         if (flp->l_type == F_UNLCK && flp->l_len > 0 &&
 892             end == (offset_t)max) {
 893                 flp->l_len = 0;
 894         }
 895         if (start  > end)
 896                 return (EINVAL);
 897         return (0);
 898 }
 899 
 900 static int
 901 flock_get_start(vnode_t *vp, flock64_t *flp, offset_t offset, u_offset_t *start)
 902 {
 903         struct vattr    vattr;
 904         int     error;
 905 
 906         /*
 907          * Determine the starting point of the request. Assume that it is
 908          * a valid starting point.
 909          */
 910         switch (flp->l_whence) {
 911         case 0:         /* SEEK_SET */
 912                 *start = (u_offset_t)flp->l_start;
 913                 break;
 914         case 1:         /* SEEK_CUR */
 915                 *start = (u_offset_t)(flp->l_start + offset);
 916                 break;
 917         case 2:         /* SEEK_END */
 918                 vattr.va_mask = AT_SIZE;
 919                 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 920                         return (error);
 921                 *start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
 922                 break;
 923         default:
 924                 return (EINVAL);
 925         }
 926 
 927         return (0);
 928 }
 929 
 930 /*
 931  * Take rctl action when the requested file descriptor is too big.
 932  */
 933 static void
 934 fd_too_big(proc_t *p)
 935 {
 936         mutex_enter(&p->p_lock);
 937         (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
 938             p->p_rctls, p, RCA_SAFE);
 939         mutex_exit(&p->p_lock);
 940 }