1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
  25  * Copyright 2015, Joyent, Inc.
  26  */
  27 
  28 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /*        All Rights Reserved   */
  30 
  31 /*
  32  * Portions of this source code were derived from Berkeley 4.3 BSD
  33  * under license from the Regents of the University of California.
  34  */
  35 
  36 
  37 #include <sys/param.h>
  38 #include <sys/isa_defs.h>
  39 #include <sys/types.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/systm.h>
  42 #include <sys/errno.h>
  43 #include <sys/fcntl.h>
  44 #include <sys/flock.h>
  45 #include <sys/vnode.h>
  46 #include <sys/file.h>
  47 #include <sys/mode.h>
  48 #include <sys/proc.h>
  49 #include <sys/filio.h>
  50 #include <sys/share.h>
  51 #include <sys/debug.h>
  52 #include <sys/rctl.h>
  53 #include <sys/nbmlock.h>
  54 
  55 #include <sys/cmn_err.h>
  56 
  57 static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
  58 static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *);
  59 static void fd_too_big(proc_t *);
  60 
  61 /*
  62  * File control.
  63  */
  64 int
  65 fcntl(int fdes, int cmd, intptr_t arg)
  66 {
  67         int iarg;
  68         int error = 0;
  69         int retval;
  70         proc_t *p;
  71         file_t *fp;
  72         vnode_t *vp;
  73         u_offset_t offset;
  74         u_offset_t start;
  75         struct vattr vattr;
  76         int in_crit;
  77         int flag;
  78         struct flock sbf;
  79         struct flock64 bf;
  80         struct o_flock obf;
  81         struct flock64_32 bf64_32;
  82         struct fshare fsh;
  83         struct shrlock shr;
  84         struct shr_locowner shr_own;
  85         offset_t maxoffset;
  86         model_t datamodel;
  87         int fdres;
  88 
  89 #if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32)
  90         ASSERT(sizeof (struct flock) == sizeof (struct flock32));
  91         ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32));
  92 #endif
  93 #if defined(_LP64) && !defined(lint) && defined(_SYSCALL32)
  94         ASSERT(sizeof (struct flock) == sizeof (struct flock64_64));
  95         ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64));
  96 #endif
  97 
  98         /*
  99          * First, for speed, deal with the subset of cases
 100          * that do not require getf() / releasef().
 101          */
 102         switch (cmd) {
 103         case F_GETFD:
 104                 if ((error = f_getfd_error(fdes, &flag)) == 0)
 105                         retval = flag;
 106                 goto out;
 107 
 108         case F_SETFD:
 109                 error = f_setfd_error(fdes, (int)arg);
 110                 retval = 0;
 111                 goto out;
 112 
 113         case F_GETFL:
 114                 if ((error = f_getfl(fdes, &flag)) == 0) {
 115                         retval = (flag & (FMASK | FASYNC));
 116                         if ((flag & (FSEARCH | FEXEC)) == 0)
 117                                 retval += FOPEN;
 118                         else
 119                                 retval |= (flag & (FSEARCH | FEXEC));
 120                 }
 121                 goto out;
 122 
 123         case F_GETXFL:
 124                 if ((error = f_getfl(fdes, &flag)) == 0) {
 125                         retval = flag;
 126                         if ((flag & (FSEARCH | FEXEC)) == 0)
 127                                 retval += FOPEN;
 128                 }
 129                 goto out;
 130 
 131         case F_BADFD:
 132                 if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0)
 133                         retval = fdres;
 134                 goto out;
 135         }
 136 
 137         /*
 138          * Second, for speed, deal with the subset of cases that
 139          * require getf() / releasef() but do not require copyin.
 140          */
 141         if ((fp = getf(fdes)) == NULL) {
 142                 error = EBADF;
 143                 goto out;
 144         }
 145         iarg = (int)arg;
 146 
 147         switch (cmd) {
 148         case F_DUPFD:
 149         case F_DUPFD_CLOEXEC:
 150                 p = curproc;
 151                 if ((uint_t)iarg >= p->p_fno_ctl) {
 152                         if (iarg >= 0)
 153                                 fd_too_big(p);
 154                         error = EINVAL;
 155                         goto done;
 156                 }
 157                 /*
 158                  * We need to increment the f_count reference counter
 159                  * before allocating a new file descriptor.
 160                  * Doing it other way round opens a window for race condition
 161                  * with closeandsetf() on the target file descriptor which can
 162                  * close the file still referenced by the original
 163                  * file descriptor.
 164                  */
 165                 mutex_enter(&fp->f_tlock);
 166                 fp->f_count++;
 167                 mutex_exit(&fp->f_tlock);
 168                 if ((retval = ufalloc_file(iarg, fp)) == -1) {
 169                         /*
 170                          * New file descriptor can't be allocated.
 171                          * Revert the reference count.
 172                          */
 173                         mutex_enter(&fp->f_tlock);
 174                         fp->f_count--;
 175                         mutex_exit(&fp->f_tlock);
 176                         error = EMFILE;
 177                 } else {
 178                         if (cmd == F_DUPFD_CLOEXEC) {
 179                                 f_setfd(retval, FD_CLOEXEC);
 180                         }
 181                 }
 182 
 183                 if (error == 0 && fp->f_vnode != NULL) {
 184                         (void) VOP_IOCTL(fp->f_vnode, F_ASSOCI_PID,
 185                             (intptr_t)p->p_pidp->pid_id, FKIOCTL, kcred,
 186                             NULL, NULL);
 187                 }
 188 
 189                 goto done;
 190 
 191         case F_DUP2FD_CLOEXEC:
 192                 if (fdes == iarg) {
 193                         error = EINVAL;
 194                         goto done;
 195                 }
 196 
 197                 /*FALLTHROUGH*/
 198 
 199         case F_DUP2FD:
 200                 p = curproc;
 201                 if (fdes == iarg) {
 202                         retval = iarg;
 203                 } else if ((uint_t)iarg >= p->p_fno_ctl) {
 204                         if (iarg >= 0)
 205                                 fd_too_big(p);
 206                         error = EBADF;
 207                 } else {
 208                         /*
 209                          * We can't hold our getf(fdes) across the call to
 210                          * closeandsetf() because it creates a window for
 211                          * deadlock: if one thread is doing dup2(a, b) while
 212                          * another is doing dup2(b, a), each one will block
 213                          * waiting for the other to call releasef().  The
 214                          * solution is to increment the file reference count
 215                          * (which we have to do anyway), then releasef(fdes),
 216                          * then closeandsetf().  Incrementing f_count ensures
 217                          * that fp won't disappear after we call releasef().
 218                          * When closeandsetf() fails, we try avoid calling
 219                          * closef() because of all the side effects.
 220                          */
 221                         mutex_enter(&fp->f_tlock);
 222                         fp->f_count++;
 223                         mutex_exit(&fp->f_tlock);
 224                         releasef(fdes);
 225 
 226                         /* assume we have forked successfully */
 227                         if (fp->f_vnode != NULL) {
 228                                 (void) VOP_IOCTL(fp->f_vnode, F_ASSOCI_PID,
 229                                     (intptr_t)p->p_pidp->pid_id, FKIOCTL,
 230                                     kcred, NULL, NULL);
 231                         }
 232 
 233                         if ((error = closeandsetf(iarg, fp)) == 0) {
 234                                 if (cmd == F_DUP2FD_CLOEXEC) {
 235                                         f_setfd(iarg, FD_CLOEXEC);
 236                                 }
 237                                 retval = iarg;
 238                         } else {
 239                                 mutex_enter(&fp->f_tlock);
 240                                 if (fp->f_count > 1) {
 241                                         fp->f_count--;
 242                                         mutex_exit(&fp->f_tlock);
 243                                         if (fp->f_vnode != NULL) {
 244                                                 (void) VOP_IOCTL(fp->f_vnode,
 245                                                     F_DASSOC_PID,
 246                                                     (intptr_t)p->p_pidp->pid_id,
 247                                                     FKIOCTL, kcred, NULL, NULL);
 248                                         }
 249 
 250                                 } else {
 251                                         mutex_exit(&fp->f_tlock);
 252                                         (void) closef(fp);
 253                                 }
 254                         }
 255                         goto out;
 256                 }
 257                 goto done;
 258 
 259         case F_SETFL:
 260                 vp = fp->f_vnode;
 261                 flag = fp->f_flag;
 262                 if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY))
 263                         iarg &= ~FNDELAY;
 264                 if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred, NULL)) ==
 265                     0) {
 266                         iarg &= FMASK;
 267                         mutex_enter(&fp->f_tlock);
 268                         fp->f_flag &= ~FMASK | (FREAD|FWRITE);
 269                         fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE);
 270                         mutex_exit(&fp->f_tlock);
 271                 }
 272                 retval = 0;
 273                 goto done;
 274         }
 275 
 276         /*
 277          * Finally, deal with the expensive cases.
 278          */
 279         retval = 0;
 280         in_crit = 0;
 281         maxoffset = MAXOFF_T;
 282         datamodel = DATAMODEL_NATIVE;
 283 #if defined(_SYSCALL32_IMPL)
 284         if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32)
 285                 maxoffset = MAXOFF32_T;
 286 #endif
 287 
 288         vp = fp->f_vnode;
 289         flag = fp->f_flag;
 290         offset = fp->f_offset;
 291 
 292         switch (cmd) {
 293         /*
 294          * The file system and vnode layers understand and implement
 295          * locking with flock64 structures. So here once we pass through
 296          * the test for compatibility as defined by LFS API, (for F_SETLK,
 297          * F_SETLKW, F_GETLK, F_GETLKW, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW,
 298          * F_FREESP) we transform the flock structure to a flock64 structure
 299          * and send it to the lower layers. Similarly in case of GETLK and
 300          * OFD_GETLK the returned flock64 structure is transformed to a flock
 301          * structure if everything fits in nicely, otherwise we return
 302          * EOVERFLOW.
 303          */
 304 
 305         case F_GETLK:
 306         case F_O_GETLK:
 307         case F_SETLK:
 308         case F_SETLKW:
 309         case F_SETLK_NBMAND:
 310         case F_OFD_GETLK:
 311         case F_OFD_SETLK:
 312         case F_OFD_SETLKW:
 313         case F_FLOCK:
 314         case F_FLOCKW:
 315 
 316                 /*
 317                  * Copy in input fields only.
 318                  */
 319 
 320                 if (cmd == F_O_GETLK) {
 321                         if (datamodel != DATAMODEL_ILP32) {
 322                                 error = EINVAL;
 323                                 break;
 324                         }
 325 
 326                         if (copyin((void *)arg, &obf, sizeof (obf))) {
 327                                 error = EFAULT;
 328                                 break;
 329                         }
 330                         bf.l_type = obf.l_type;
 331                         bf.l_whence = obf.l_whence;
 332                         bf.l_start = (off64_t)obf.l_start;
 333                         bf.l_len = (off64_t)obf.l_len;
 334                         bf.l_sysid = (int)obf.l_sysid;
 335                         bf.l_pid = obf.l_pid;
 336                 } else if (datamodel == DATAMODEL_NATIVE) {
 337                         if (copyin((void *)arg, &sbf, sizeof (sbf))) {
 338                                 error = EFAULT;
 339                                 break;
 340                         }
 341                         /*
 342                          * XXX  In an LP64 kernel with an LP64 application
 343                          *      there's no need to do a structure copy here
 344                          *      struct flock == struct flock64. However,
 345                          *      we did it this way to avoid more conditional
 346                          *      compilation.
 347                          */
 348                         bf.l_type = sbf.l_type;
 349                         bf.l_whence = sbf.l_whence;
 350                         bf.l_start = (off64_t)sbf.l_start;
 351                         bf.l_len = (off64_t)sbf.l_len;
 352                         bf.l_sysid = sbf.l_sysid;
 353                         bf.l_pid = sbf.l_pid;
 354                 }
 355 #if defined(_SYSCALL32_IMPL)
 356                 else {
 357                         struct flock32 sbf32;
 358                         if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
 359                                 error = EFAULT;
 360                                 break;
 361                         }
 362                         bf.l_type = sbf32.l_type;
 363                         bf.l_whence = sbf32.l_whence;
 364                         bf.l_start = (off64_t)sbf32.l_start;
 365                         bf.l_len = (off64_t)sbf32.l_len;
 366                         bf.l_sysid = sbf32.l_sysid;
 367                         bf.l_pid = sbf32.l_pid;
 368                 }
 369 #endif /* _SYSCALL32_IMPL */
 370 
 371                 /*
 372                  * 64-bit support: check for overflow for 32-bit lock ops
 373                  */
 374                 if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0)
 375                         break;
 376 
 377                 if (cmd == F_FLOCK || cmd == F_FLOCKW) {
 378                         /* FLOCK* locking is always over the entire file. */
 379                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 380                             bf.l_len != 0) {
 381                                 error = EINVAL;
 382                                 break;
 383                         }
 384                         if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
 385                                 error = EINVAL;
 386                                 break;
 387                         }
 388                 }
 389 
 390                 if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 391                         /*
 392                          * TBD OFD-style locking is currently limited to
 393                          * covering the entire file.
 394                          */
 395                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 396                             bf.l_len != 0) {
 397                                 error = EINVAL;
 398                                 break;
 399                         }
 400                 }
 401 
 402                 /*
 403                  * Not all of the filesystems understand F_O_GETLK, and
 404                  * there's no need for them to know.  Map it to F_GETLK.
 405                  *
 406                  * The *_frlock functions in the various file systems basically
 407                  * do some validation and then funnel everything through the
 408                  * fs_frlock function. For OFD-style locks fs_frlock will do
 409                  * nothing so that once control returns here we can call the
 410                  * ofdlock function with the correct fp. For OFD-style locks
 411                  * the unsupported remote file systems, such as NFS, detect and
 412                  * reject the OFD-style cmd argument.
 413                  */
 414                 if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd,
 415                     &bf, flag, offset, NULL, fp->f_cred, NULL)) != 0)
 416                         break;
 417 
 418                 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
 419                     cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 420                         /*
 421                          * This is an OFD-style lock so we need to handle it
 422                          * here. Because OFD-style locks are associated with
 423                          * the file_t we didn't have enough info down the
 424                          * VOP_FRLOCK path immediately above.
 425                          */
 426                         if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
 427                                 break;
 428                 }
 429 
 430                 /*
 431                  * If command is GETLK and no lock is found, only
 432                  * the type field is changed.
 433                  */
 434                 if ((cmd == F_O_GETLK || cmd == F_GETLK ||
 435                     cmd == F_OFD_GETLK) && bf.l_type == F_UNLCK) {
 436                         /* l_type always first entry, always a short */
 437                         if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
 438                             sizeof (bf.l_type)))
 439                                 error = EFAULT;
 440                         break;
 441                 }
 442 
 443                 if (cmd == F_O_GETLK) {
 444                         /*
 445                          * Return an SVR3 flock structure to the user.
 446                          */
 447                         obf.l_type = (int16_t)bf.l_type;
 448                         obf.l_whence = (int16_t)bf.l_whence;
 449                         obf.l_start = (int32_t)bf.l_start;
 450                         obf.l_len = (int32_t)bf.l_len;
 451                         if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) {
 452                                 /*
 453                                  * One or both values for the above fields
 454                                  * is too large to store in an SVR3 flock
 455                                  * structure.
 456                                  */
 457                                 error = EOVERFLOW;
 458                                 break;
 459                         }
 460                         obf.l_sysid = (int16_t)bf.l_sysid;
 461                         obf.l_pid = (int16_t)bf.l_pid;
 462                         if (copyout(&obf, (void *)arg, sizeof (obf)))
 463                                 error = EFAULT;
 464                 } else if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
 465                         /*
 466                          * Copy out SVR4 flock.
 467                          */
 468                         int i;
 469 
 470                         if (bf.l_start > maxoffset || bf.l_len > maxoffset) {
 471                                 error = EOVERFLOW;
 472                                 break;
 473                         }
 474 
 475                         if (datamodel == DATAMODEL_NATIVE) {
 476                                 for (i = 0; i < 4; i++)
 477                                         sbf.l_pad[i] = 0;
 478                                 /*
 479                                  * XXX  In an LP64 kernel with an LP64
 480                                  *      application there's no need to do a
 481                                  *      structure copy here as currently
 482                                  *      struct flock == struct flock64.
 483                                  *      We did it this way to avoid more
 484                                  *      conditional compilation.
 485                                  */
 486                                 sbf.l_type = bf.l_type;
 487                                 sbf.l_whence = bf.l_whence;
 488                                 sbf.l_start = (off_t)bf.l_start;
 489                                 sbf.l_len = (off_t)bf.l_len;
 490                                 sbf.l_sysid = bf.l_sysid;
 491                                 sbf.l_pid = bf.l_pid;
 492                                 if (copyout(&sbf, (void *)arg, sizeof (sbf)))
 493                                         error = EFAULT;
 494                         }
 495 #if defined(_SYSCALL32_IMPL)
 496                         else {
 497                                 struct flock32 sbf32;
 498                                 if (bf.l_start > MAXOFF32_T ||
 499                                     bf.l_len > MAXOFF32_T) {
 500                                         error = EOVERFLOW;
 501                                         break;
 502                                 }
 503                                 for (i = 0; i < 4; i++)
 504                                         sbf32.l_pad[i] = 0;
 505                                 sbf32.l_type = (int16_t)bf.l_type;
 506                                 sbf32.l_whence = (int16_t)bf.l_whence;
 507                                 sbf32.l_start = (off32_t)bf.l_start;
 508                                 sbf32.l_len = (off32_t)bf.l_len;
 509                                 sbf32.l_sysid = (int32_t)bf.l_sysid;
 510                                 sbf32.l_pid = (pid32_t)bf.l_pid;
 511                                 if (copyout(&sbf32,
 512                                     (void *)arg, sizeof (sbf32)))
 513                                         error = EFAULT;
 514                         }
 515 #endif
 516                 }
 517                 break;
 518 
 519         case F_CHKFL:
 520                 /*
 521                  * This is for internal use only, to allow the vnode layer
 522                  * to validate a flags setting before applying it.  User
 523                  * programs can't issue it.
 524                  */
 525                 error = EINVAL;
 526                 break;
 527 
 528         case F_ALLOCSP:
 529         case F_FREESP:
 530         case F_ALLOCSP64:
 531         case F_FREESP64:
 532                 /*
 533                  * Test for not-a-regular-file (and returning EINVAL)
 534                  * before testing for open-for-writing (and returning EBADF).
 535                  * This is relied upon by posix_fallocate() in libc.
 536                  */
 537                 if (vp->v_type != VREG) {
 538                         error = EINVAL;
 539                         break;
 540                 }
 541 
 542                 if ((flag & FWRITE) == 0) {
 543                         error = EBADF;
 544                         break;
 545                 }
 546 
 547                 if (datamodel != DATAMODEL_ILP32 &&
 548                     (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
 549                         error = EINVAL;
 550                         break;
 551                 }
 552 
 553 #if defined(_ILP32) || defined(_SYSCALL32_IMPL)
 554                 if (datamodel == DATAMODEL_ILP32 &&
 555                     (cmd == F_ALLOCSP || cmd == F_FREESP)) {
 556                         struct flock32 sbf32;
 557                         /*
 558                          * For compatibility we overlay an SVR3 flock on an SVR4
 559                          * flock.  This works because the input field offsets
 560                          * in "struct flock" were preserved.
 561                          */
 562                         if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
 563                                 error = EFAULT;
 564                                 break;
 565                         } else {
 566                                 bf.l_type = sbf32.l_type;
 567                                 bf.l_whence = sbf32.l_whence;
 568                                 bf.l_start = (off64_t)sbf32.l_start;
 569                                 bf.l_len = (off64_t)sbf32.l_len;
 570                                 bf.l_sysid = sbf32.l_sysid;
 571                                 bf.l_pid = sbf32.l_pid;
 572                         }
 573                 }
 574 #endif /* _ILP32 || _SYSCALL32_IMPL */
 575 
 576 #if defined(_LP64)
 577                 if (datamodel == DATAMODEL_LP64 &&
 578                     (cmd == F_ALLOCSP || cmd == F_FREESP)) {
 579                         if (copyin((void *)arg, &bf, sizeof (bf))) {
 580                                 error = EFAULT;
 581                                 break;
 582                         }
 583                 }
 584 #endif /* defined(_LP64) */
 585 
 586 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
 587                 if (datamodel == DATAMODEL_ILP32 &&
 588                     (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
 589                         if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
 590                                 error = EFAULT;
 591                                 break;
 592                         } else {
 593                                 /*
 594                                  * Note that the size of flock64 is different in
 595                                  * the ILP32 and LP64 models, due to the l_pad
 596                                  * field. We do not want to assume that the
 597                                  * flock64 structure is laid out the same in
 598                                  * ILP32 and LP64 environments, so we will
 599                                  * copy in the ILP32 version of flock64
 600                                  * explicitly and copy it to the native
 601                                  * flock64 structure.
 602                                  */
 603                                 bf.l_type = (short)bf64_32.l_type;
 604                                 bf.l_whence = (short)bf64_32.l_whence;
 605                                 bf.l_start = bf64_32.l_start;
 606                                 bf.l_len = bf64_32.l_len;
 607                                 bf.l_sysid = (int)bf64_32.l_sysid;
 608                                 bf.l_pid = (pid_t)bf64_32.l_pid;
 609                         }
 610                 }
 611 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
 612 
 613                 if (cmd == F_ALLOCSP || cmd == F_FREESP)
 614                         error = flock_check(vp, &bf, offset, maxoffset);
 615                 else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64)
 616                         error = flock_check(vp, &bf, offset, MAXOFFSET_T);
 617                 if (error)
 618                         break;
 619 
 620                 if (vp->v_type == VREG && bf.l_len == 0 &&
 621                     bf.l_start > OFFSET_MAX(fp)) {
 622                         error = EFBIG;
 623                         break;
 624                 }
 625 
 626                 /*
 627                  * Make sure that there are no conflicting non-blocking
 628                  * mandatory locks in the region being manipulated. If
 629                  * there are such locks then return EACCES.
 630                  */
 631                 if ((error = flock_get_start(vp, &bf, offset, &start)) != 0)
 632                         break;
 633 
 634                 if (nbl_need_check(vp)) {
 635                         u_offset_t      begin;
 636                         ssize_t         length;
 637 
 638                         nbl_start_crit(vp, RW_READER);
 639                         in_crit = 1;
 640                         vattr.va_mask = AT_SIZE;
 641                         if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 642                             != 0)
 643                                 break;
 644                         begin = start > vattr.va_size ? vattr.va_size : start;
 645                         length = vattr.va_size > start ? vattr.va_size - start :
 646                             start - vattr.va_size;
 647                         if (nbl_conflict(vp, NBL_WRITE, begin, length, 0,
 648                             NULL)) {
 649                                 error = EACCES;
 650                                 break;
 651                         }
 652                 }
 653 
 654                 if (cmd == F_ALLOCSP64)
 655                         cmd = F_ALLOCSP;
 656                 else if (cmd == F_FREESP64)
 657                         cmd = F_FREESP;
 658 
 659                 error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL);
 660 
 661                 break;
 662 
 663 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
 664         case F_GETLK64:
 665         case F_SETLK64:
 666         case F_SETLKW64:
 667         case F_SETLK64_NBMAND:
 668         case F_OFD_GETLK64:
 669         case F_OFD_SETLK64:
 670         case F_OFD_SETLKW64:
 671         case F_FLOCK64:
 672         case F_FLOCKW64:
 673                 /*
 674                  * Large Files: Here we set cmd as *LK and send it to
 675                  * lower layers. *LK64 is only for the user land.
 676                  * Most of the comments described above for F_SETLK
 677                  * applies here too.
 678                  * Large File support is only needed for ILP32 apps!
 679                  */
 680                 if (datamodel != DATAMODEL_ILP32) {
 681                         error = EINVAL;
 682                         break;
 683                 }
 684 
 685                 if (cmd == F_GETLK64)
 686                         cmd = F_GETLK;
 687                 else if (cmd == F_SETLK64)
 688                         cmd = F_SETLK;
 689                 else if (cmd == F_SETLKW64)
 690                         cmd = F_SETLKW;
 691                 else if (cmd == F_SETLK64_NBMAND)
 692                         cmd = F_SETLK_NBMAND;
 693                 else if (cmd == F_OFD_GETLK64)
 694                         cmd = F_OFD_GETLK;
 695                 else if (cmd == F_OFD_SETLK64)
 696                         cmd = F_OFD_SETLK;
 697                 else if (cmd == F_OFD_SETLKW64)
 698                         cmd = F_OFD_SETLKW;
 699                 else if (cmd == F_FLOCK64)
 700                         cmd = F_FLOCK;
 701                 else if (cmd == F_FLOCKW64)
 702                         cmd = F_FLOCKW;
 703 
 704                 /*
 705                  * Note that the size of flock64 is different in the ILP32
 706                  * and LP64 models, due to the sucking l_pad field.
 707                  * We do not want to assume that the flock64 structure is
 708                  * laid out in the same in ILP32 and LP64 environments, so
 709                  * we will copy in the ILP32 version of flock64 explicitly
 710                  * and copy it to the native flock64 structure.
 711                  */
 712 
 713                 if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
 714                         error = EFAULT;
 715                         break;
 716                 }
 717 
 718                 bf.l_type = (short)bf64_32.l_type;
 719                 bf.l_whence = (short)bf64_32.l_whence;
 720                 bf.l_start = bf64_32.l_start;
 721                 bf.l_len = bf64_32.l_len;
 722                 bf.l_sysid = (int)bf64_32.l_sysid;
 723                 bf.l_pid = (pid_t)bf64_32.l_pid;
 724 
 725                 if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0)
 726                         break;
 727 
 728                 if (cmd == F_FLOCK || cmd == F_FLOCKW) {
 729                         /* FLOCK* locking is always over the entire file. */
 730                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 731                             bf.l_len != 0) {
 732                                 error = EINVAL;
 733                                 break;
 734                         }
 735                         if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
 736                                 error = EINVAL;
 737                                 break;
 738                         }
 739                 }
 740 
 741                 if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 742                         /*
 743                          * TBD OFD-style locking is currently limited to
 744                          * covering the entire file.
 745                          */
 746                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 747                             bf.l_len != 0) {
 748                                 error = EINVAL;
 749                                 break;
 750                         }
 751                 }
 752 
 753                 /*
 754                  * The *_frlock functions in the various file systems basically
 755                  * do some validation and then funnel everything through the
 756                  * fs_frlock function. For OFD-style locks fs_frlock will do
 757                  * nothing so that once control returns here we can call the
 758                  * ofdlock function with the correct fp. For OFD-style locks
 759                  * the unsupported remote file systems, such as NFS, detect and
 760                  * reject the OFD-style cmd argument.
 761                  */
 762                 if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset,
 763                     NULL, fp->f_cred, NULL)) != 0)
 764                         break;
 765 
 766                 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
 767                     cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 768                         /*
 769                          * This is an OFD-style lock so we need to handle it
 770                          * here. Because OFD-style locks are associated with
 771                          * the file_t we didn't have enough info down the
 772                          * VOP_FRLOCK path immediately above.
 773                          */
 774                         if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
 775                                 break;
 776                 }
 777 
 778                 if ((cmd == F_GETLK || cmd == F_OFD_GETLK) &&
 779                     bf.l_type == F_UNLCK) {
 780                         if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
 781                             sizeof (bf.l_type)))
 782                                 error = EFAULT;
 783                         break;
 784                 }
 785 
 786                 if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
 787                         int i;
 788 
 789                         /*
 790                          * We do not want to assume that the flock64 structure
 791                          * is laid out in the same in ILP32 and LP64
 792                          * environments, so we will copy out the ILP32 version
 793                          * of flock64 explicitly after copying the native
 794                          * flock64 structure to it.
 795                          */
 796                         for (i = 0; i < 4; i++)
 797                                 bf64_32.l_pad[i] = 0;
 798                         bf64_32.l_type = (int16_t)bf.l_type;
 799                         bf64_32.l_whence = (int16_t)bf.l_whence;
 800                         bf64_32.l_start = bf.l_start;
 801                         bf64_32.l_len = bf.l_len;
 802                         bf64_32.l_sysid = (int32_t)bf.l_sysid;
 803                         bf64_32.l_pid = (pid32_t)bf.l_pid;
 804                         if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32)))
 805                                 error = EFAULT;
 806                 }
 807                 break;
 808 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
 809 
 810         case F_SHARE:
 811         case F_SHARE_NBMAND:
 812         case F_UNSHARE:
 813 
 814                 /*
 815                  * Copy in input fields only.
 816                  */
 817                 if (copyin((void *)arg, &fsh, sizeof (fsh))) {
 818                         error = EFAULT;
 819                         break;
 820                 }
 821 
 822                 /*
 823                  * Local share reservations always have this simple form
 824                  */
 825                 shr.s_access = fsh.f_access;
 826                 shr.s_deny = fsh.f_deny;
 827                 shr.s_sysid = 0;
 828                 shr.s_pid = ttoproc(curthread)->p_pid;
 829                 shr_own.sl_pid = shr.s_pid;
 830                 shr_own.sl_id = fsh.f_id;
 831                 shr.s_own_len = sizeof (shr_own);
 832                 shr.s_owner = (caddr_t)&shr_own;
 833                 error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred, NULL);
 834                 break;
 835 
 836         default:
 837                 error = EINVAL;
 838                 break;
 839         }
 840 
 841         if (in_crit)
 842                 nbl_end_crit(vp);
 843 
 844 done:
 845         releasef(fdes);
 846 out:
 847         if (error)
 848                 return (set_errno(error));
 849         return (retval);
 850 }
 851 
 852 int
 853 flock_check(vnode_t *vp, flock64_t *flp, offset_t offset, offset_t max)
 854 {
 855         struct vattr    vattr;
 856         int     error;
 857         u_offset_t start, end;
 858 
 859         /*
 860          * Determine the starting point of the request
 861          */
 862         switch (flp->l_whence) {
 863         case 0:         /* SEEK_SET */
 864                 start = (u_offset_t)flp->l_start;
 865                 if (start > max)
 866                         return (EINVAL);
 867                 break;
 868         case 1:         /* SEEK_CUR */
 869                 if (flp->l_start > (max - offset))
 870                         return (EOVERFLOW);
 871                 start = (u_offset_t)(flp->l_start + offset);
 872                 if (start > max)
 873                         return (EINVAL);
 874                 break;
 875         case 2:         /* SEEK_END */
 876                 vattr.va_mask = AT_SIZE;
 877                 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 878                         return (error);
 879                 if (flp->l_start > (max - (offset_t)vattr.va_size))
 880                         return (EOVERFLOW);
 881                 start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
 882                 if (start > max)
 883                         return (EINVAL);
 884                 break;
 885         default:
 886                 return (EINVAL);
 887         }
 888 
 889         /*
 890          * Determine the range covered by the request.
 891          */
 892         if (flp->l_len == 0)
 893                 end = MAXEND;
 894         else if ((offset_t)flp->l_len > 0) {
 895                 if (flp->l_len > (max - start + 1))
 896                         return (EOVERFLOW);
 897                 end = (u_offset_t)(start + (flp->l_len - 1));
 898                 ASSERT(end <= max);
 899         } else {
 900                 /*
 901                  * Negative length; why do we even allow this ?
 902                  * Because this allows easy specification of
 903                  * the last n bytes of the file.
 904                  */
 905                 end = start;
 906                 start += (u_offset_t)flp->l_len;
 907                 (start)++;
 908                 if (start > max)
 909                         return (EINVAL);
 910                 ASSERT(end <= max);
 911         }
 912         ASSERT(start <= max);
 913         if (flp->l_type == F_UNLCK && flp->l_len > 0 &&
 914             end == (offset_t)max) {
 915                 flp->l_len = 0;
 916         }
 917         if (start  > end)
 918                 return (EINVAL);
 919         return (0);
 920 }
 921 
 922 static int
 923 flock_get_start(vnode_t *vp, flock64_t *flp, offset_t offset, u_offset_t *start)
 924 {
 925         struct vattr    vattr;
 926         int     error;
 927 
 928         /*
 929          * Determine the starting point of the request. Assume that it is
 930          * a valid starting point.
 931          */
 932         switch (flp->l_whence) {
 933         case 0:         /* SEEK_SET */
 934                 *start = (u_offset_t)flp->l_start;
 935                 break;
 936         case 1:         /* SEEK_CUR */
 937                 *start = (u_offset_t)(flp->l_start + offset);
 938                 break;
 939         case 2:         /* SEEK_END */
 940                 vattr.va_mask = AT_SIZE;
 941                 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 942                         return (error);
 943                 *start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
 944                 break;
 945         default:
 946                 return (EINVAL);
 947         }
 948 
 949         return (0);
 950 }
 951 
 952 /*
 953  * Take rctl action when the requested file descriptor is too big.
 954  */
 955 static void
 956 fd_too_big(proc_t *p)
 957 {
 958         mutex_enter(&p->p_lock);
 959         (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
 960             p->p_rctls, p, RCA_SAFE);
 961         mutex_exit(&p->p_lock);
 962 }