illumos-gate New usr/src/uts/common/syscall/fcntl.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
  25  * Copyright 2015, Joyent, Inc.
  26  */
  27 
  28 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  29 /*        All Rights Reserved   */
  30 
  31 /*
  32  * Portions of this source code were derived from Berkeley 4.3 BSD
  33  * under license from the Regents of the University of California.
  34  */
  35 
  36 
  37 #include <sys/param.h>
  38 #include <sys/isa_defs.h>
  39 #include <sys/types.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/systm.h>
  42 #include <sys/errno.h>
  43 #include <sys/fcntl.h>
  44 #include <sys/flock.h>
  45 #include <sys/vnode.h>
  46 #include <sys/file.h>
  47 #include <sys/mode.h>
  48 #include <sys/proc.h>
  49 #include <sys/filio.h>
  50 #include <sys/share.h>
  51 #include <sys/debug.h>
  52 #include <sys/rctl.h>
  53 #include <sys/nbmlock.h>
  54 
  55 #include <sys/cmn_err.h>
  56 
  57 static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
  58 static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *);
  59 static void fd_too_big(proc_t *);
  60 
  61 /*
  62  * File control.
  63  */
  64 int
  65 fcntl(int fdes, int cmd, intptr_t arg)
  66 {
  67         int iarg;
  68         int error = 0;
  69         int retval;
  70         proc_t *p;
  71         file_t *fp;
  72         vnode_t *vp;
  73         u_offset_t offset;
  74         u_offset_t start;
  75         struct vattr vattr;
  76         int in_crit;
  77         int flag;
  78         struct flock sbf;
  79         struct flock64 bf;
  80         struct o_flock obf;
  81         struct flock64_32 bf64_32;
  82         struct fshare fsh;
  83         struct shrlock shr;
  84         struct shr_locowner shr_own;
  85         offset_t maxoffset;
  86         model_t datamodel;
  87         int fdres;
  88 
  89 #if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32)
  90         ASSERT(sizeof (struct flock) == sizeof (struct flock32));
  91         ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32));
  92 #endif
  93 #if defined(_LP64) && !defined(lint) && defined(_SYSCALL32)
  94         ASSERT(sizeof (struct flock) == sizeof (struct flock64_64));
  95         ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64));
  96 #endif
  97 
  98         /*
  99          * First, for speed, deal with the subset of cases
 100          * that do not require getf() / releasef().
 101          */
 102         switch (cmd) {
 103         case F_GETFD:
 104                 if ((error = f_getfd_error(fdes, &flag)) == 0)
 105                         retval = flag;
 106                 goto out;
 107 
 108         case F_SETFD:
 109                 error = f_setfd_error(fdes, (int)arg);
 110                 retval = 0;
 111                 goto out;
 112 
 113         case F_GETFL:
 114                 if ((error = f_getfl(fdes, &flag)) == 0) {
 115                         retval = (flag & (FMASK | FASYNC));
 116                         if ((flag & (FSEARCH | FEXEC)) == 0)
 117                                 retval += FOPEN;
 118                         else
 119                                 retval |= (flag & (FSEARCH | FEXEC));
 120                 }
 121                 goto out;
 122 
 123         case F_GETXFL:
 124                 if ((error = f_getfl(fdes, &flag)) == 0) {
 125                         retval = flag;
 126                         if ((flag & (FSEARCH | FEXEC)) == 0)
 127                                 retval += FOPEN;
 128                 }
 129                 goto out;
 130 
 131         case F_BADFD:
 132                 if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0)
 133                         retval = fdres;
 134                 goto out;
 135         }
 136 
 137         /*
 138          * Second, for speed, deal with the subset of cases that
 139          * require getf() / releasef() but do not require copyin.
 140          */
 141         if ((fp = getf(fdes)) == NULL) {
 142                 error = EBADF;
 143                 goto out;
 144         }
 145         iarg = (int)arg;
 146 
 147         switch (cmd) {
 148         case F_DUPFD:
 149         case F_DUPFD_CLOEXEC:
 150                 p = curproc;
 151                 if ((uint_t)iarg >= p->p_fno_ctl) {
 152                         if (iarg >= 0)
 153                                 fd_too_big(p);
 154                         error = EINVAL;
 155                         goto done;
 156                 }
 157                 /*
 158                  * We need to increment the f_count reference counter
 159                  * before allocating a new file descriptor.
 160                  * Doing it other way round opens a window for race condition
 161                  * with closeandsetf() on the target file descriptor which can
 162                  * close the file still referenced by the original
 163                  * file descriptor.
 164                  */
 165                 mutex_enter(&fp->f_tlock);
 166                 fp->f_count++;
 167                 mutex_exit(&fp->f_tlock);
 168                 if ((retval = ufalloc_file(iarg, fp)) == -1) {
 169                         /*
 170                          * New file descriptor can't be allocated.
 171                          * Revert the reference count.
 172                          */
 173                         mutex_enter(&fp->f_tlock);
 174                         fp->f_count--;
 175                         mutex_exit(&fp->f_tlock);
 176                         error = EMFILE;
 177                 } else {
 178                         if (cmd == F_DUPFD_CLOEXEC) {
 179                                 f_setfd(retval, FD_CLOEXEC);
 180                         }
 181                 }
 182 
 183                 if (error == 0 && fp->f_vnode != NULL) {
 184                         (void) VOP_IOCTL(fp->f_vnode, F_ASSOCI_PID,
 185                             (intptr_t)p->p_pidp->pid_id, FKIOCTL, kcred,
 186                             NULL, NULL);
 187                 }
 188 
 189                 goto done;
 190 
 191         case F_DUP2FD_CLOEXEC:
 192                 if (fdes == iarg) {
 193                         error = EINVAL;
 194                         goto done;
 195                 }
 196 
 197                 /*FALLTHROUGH*/
 198 
 199         case F_DUP2FD:
 200                 p = curproc;
 201                 if (fdes == iarg) {
 202                         retval = iarg;
 203                 } else if ((uint_t)iarg >= p->p_fno_ctl) {
 204                         if (iarg >= 0)
 205                                 fd_too_big(p);
 206                         error = EBADF;
 207                 } else {
 208                         /*
 209                          * We can't hold our getf(fdes) across the call to
 210                          * closeandsetf() because it creates a window for
 211                          * deadlock: if one thread is doing dup2(a, b) while
 212                          * another is doing dup2(b, a), each one will block
 213                          * waiting for the other to call releasef().  The
 214                          * solution is to increment the file reference count
 215                          * (which we have to do anyway), then releasef(fdes),
 216                          * then closeandsetf().  Incrementing f_count ensures
 217                          * that fp won't disappear after we call releasef().
 218                          * When closeandsetf() fails, we try avoid calling
 219                          * closef() because of all the side effects.
 220                          */
 221                         mutex_enter(&fp->f_tlock);
 222                         fp->f_count++;
 223                         mutex_exit(&fp->f_tlock);
 224                         releasef(fdes);
 225 
 226                         /*
 227                          * Assume we succeed to duplicate the file descriptor
 228                          * and associate the pid to the vnode.
 229                          */
 230                         if (fp->f_vnode != NULL) {
 231                                 (void) VOP_IOCTL(fp->f_vnode, F_ASSOCI_PID,
 232                                     (intptr_t)p->p_pidp->pid_id, FKIOCTL,
 233                                     kcred, NULL, NULL);
 234                         }
 235 
 236                         if ((error = closeandsetf(iarg, fp)) == 0) {
 237                                 if (cmd == F_DUP2FD_CLOEXEC) {
 238                                         f_setfd(iarg, FD_CLOEXEC);
 239                                 }
 240                                 retval = iarg;
 241                         } else {
 242                                 mutex_enter(&fp->f_tlock);
 243                                 if (fp->f_count > 1) {
 244                                         fp->f_count--;
 245                                         mutex_exit(&fp->f_tlock);
 246                                         /*
 247                                          * Failed to duplicate fdes,
 248                                          * disassociate the pid from the vnode.
 249                                          */
 250                                         if (fp->f_vnode != NULL) {
 251                                                 (void) VOP_IOCTL(fp->f_vnode,
 252                                                     F_DASSOC_PID,
 253                                                     (intptr_t)p->p_pidp->pid_id,
 254                                                     FKIOCTL, kcred, NULL, NULL);
 255                                         }
 256 
 257                                 } else {
 258                                         mutex_exit(&fp->f_tlock);
 259                                         (void) closef(fp);
 260                                 }
 261                         }
 262                         goto out;
 263                 }
 264                 goto done;
 265 
 266         case F_SETFL:
 267                 vp = fp->f_vnode;
 268                 flag = fp->f_flag;
 269                 if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY))
 270                         iarg &= ~FNDELAY;
 271                 if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred, NULL)) ==
 272                     0) {
 273                         iarg &= FMASK;
 274                         mutex_enter(&fp->f_tlock);
 275                         fp->f_flag &= ~FMASK | (FREAD|FWRITE);
 276                         fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE);
 277                         mutex_exit(&fp->f_tlock);
 278                 }
 279                 retval = 0;
 280                 goto done;
 281         }
 282 
 283         /*
 284          * Finally, deal with the expensive cases.
 285          */
 286         retval = 0;
 287         in_crit = 0;
 288         maxoffset = MAXOFF_T;
 289         datamodel = DATAMODEL_NATIVE;
 290 #if defined(_SYSCALL32_IMPL)
 291         if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32)
 292                 maxoffset = MAXOFF32_T;
 293 #endif
 294 
 295         vp = fp->f_vnode;
 296         flag = fp->f_flag;
 297         offset = fp->f_offset;
 298 
 299         switch (cmd) {
 300         /*
 301          * The file system and vnode layers understand and implement
 302          * locking with flock64 structures. So here once we pass through
 303          * the test for compatibility as defined by LFS API, (for F_SETLK,
 304          * F_SETLKW, F_GETLK, F_GETLKW, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW,
 305          * F_FREESP) we transform the flock structure to a flock64 structure
 306          * and send it to the lower layers. Similarly in case of GETLK and
 307          * OFD_GETLK the returned flock64 structure is transformed to a flock
 308          * structure if everything fits in nicely, otherwise we return
 309          * EOVERFLOW.
 310          */
 311 
 312         case F_GETLK:
 313         case F_O_GETLK:
 314         case F_SETLK:
 315         case F_SETLKW:
 316         case F_SETLK_NBMAND:
 317         case F_OFD_GETLK:
 318         case F_OFD_SETLK:
 319         case F_OFD_SETLKW:
 320         case F_FLOCK:
 321         case F_FLOCKW:
 322 
 323                 /*
 324                  * Copy in input fields only.
 325                  */
 326 
 327                 if (cmd == F_O_GETLK) {
 328                         if (datamodel != DATAMODEL_ILP32) {
 329                                 error = EINVAL;
 330                                 break;
 331                         }
 332 
 333                         if (copyin((void *)arg, &obf, sizeof (obf))) {
 334                                 error = EFAULT;
 335                                 break;
 336                         }
 337                         bf.l_type = obf.l_type;
 338                         bf.l_whence = obf.l_whence;
 339                         bf.l_start = (off64_t)obf.l_start;
 340                         bf.l_len = (off64_t)obf.l_len;
 341                         bf.l_sysid = (int)obf.l_sysid;
 342                         bf.l_pid = obf.l_pid;
 343                 } else if (datamodel == DATAMODEL_NATIVE) {
 344                         if (copyin((void *)arg, &sbf, sizeof (sbf))) {
 345                                 error = EFAULT;
 346                                 break;
 347                         }
 348                         /*
 349                          * XXX  In an LP64 kernel with an LP64 application
 350                          *      there's no need to do a structure copy here
 351                          *      struct flock == struct flock64. However,
 352                          *      we did it this way to avoid more conditional
 353                          *      compilation.
 354                          */
 355                         bf.l_type = sbf.l_type;
 356                         bf.l_whence = sbf.l_whence;
 357                         bf.l_start = (off64_t)sbf.l_start;
 358                         bf.l_len = (off64_t)sbf.l_len;
 359                         bf.l_sysid = sbf.l_sysid;
 360                         bf.l_pid = sbf.l_pid;
 361                 }
 362 #if defined(_SYSCALL32_IMPL)
 363                 else {
 364                         struct flock32 sbf32;
 365                         if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
 366                                 error = EFAULT;
 367                                 break;
 368                         }
 369                         bf.l_type = sbf32.l_type;
 370                         bf.l_whence = sbf32.l_whence;
 371                         bf.l_start = (off64_t)sbf32.l_start;
 372                         bf.l_len = (off64_t)sbf32.l_len;
 373                         bf.l_sysid = sbf32.l_sysid;
 374                         bf.l_pid = sbf32.l_pid;
 375                 }
 376 #endif /* _SYSCALL32_IMPL */
 377 
 378                 /*
 379                  * 64-bit support: check for overflow for 32-bit lock ops
 380                  */
 381                 if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0)
 382                         break;
 383 
 384                 if (cmd == F_FLOCK || cmd == F_FLOCKW) {
 385                         /* FLOCK* locking is always over the entire file. */
 386                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 387                             bf.l_len != 0) {
 388                                 error = EINVAL;
 389                                 break;
 390                         }
 391                         if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
 392                                 error = EINVAL;
 393                                 break;
 394                         }
 395                 }
 396 
 397                 if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 398                         /*
 399                          * TBD OFD-style locking is currently limited to
 400                          * covering the entire file.
 401                          */
 402                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 403                             bf.l_len != 0) {
 404                                 error = EINVAL;
 405                                 break;
 406                         }
 407                 }
 408 
 409                 /*
 410                  * Not all of the filesystems understand F_O_GETLK, and
 411                  * there's no need for them to know.  Map it to F_GETLK.
 412                  *
 413                  * The *_frlock functions in the various file systems basically
 414                  * do some validation and then funnel everything through the
 415                  * fs_frlock function. For OFD-style locks fs_frlock will do
 416                  * nothing so that once control returns here we can call the
 417                  * ofdlock function with the correct fp. For OFD-style locks
 418                  * the unsupported remote file systems, such as NFS, detect and
 419                  * reject the OFD-style cmd argument.
 420                  */
 421                 if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd,
 422                     &bf, flag, offset, NULL, fp->f_cred, NULL)) != 0)
 423                         break;
 424 
 425                 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
 426                     cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 427                         /*
 428                          * This is an OFD-style lock so we need to handle it
 429                          * here. Because OFD-style locks are associated with
 430                          * the file_t we didn't have enough info down the
 431                          * VOP_FRLOCK path immediately above.
 432                          */
 433                         if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
 434                                 break;
 435                 }
 436 
 437                 /*
 438                  * If command is GETLK and no lock is found, only
 439                  * the type field is changed.
 440                  */
 441                 if ((cmd == F_O_GETLK || cmd == F_GETLK ||
 442                     cmd == F_OFD_GETLK) && bf.l_type == F_UNLCK) {
 443                         /* l_type always first entry, always a short */
 444                         if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
 445                             sizeof (bf.l_type)))
 446                                 error = EFAULT;
 447                         break;
 448                 }
 449 
 450                 if (cmd == F_O_GETLK) {
 451                         /*
 452                          * Return an SVR3 flock structure to the user.
 453                          */
 454                         obf.l_type = (int16_t)bf.l_type;
 455                         obf.l_whence = (int16_t)bf.l_whence;
 456                         obf.l_start = (int32_t)bf.l_start;
 457                         obf.l_len = (int32_t)bf.l_len;
 458                         if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) {
 459                                 /*
 460                                  * One or both values for the above fields
 461                                  * is too large to store in an SVR3 flock
 462                                  * structure.
 463                                  */
 464                                 error = EOVERFLOW;
 465                                 break;
 466                         }
 467                         obf.l_sysid = (int16_t)bf.l_sysid;
 468                         obf.l_pid = (int16_t)bf.l_pid;
 469                         if (copyout(&obf, (void *)arg, sizeof (obf)))
 470                                 error = EFAULT;
 471                 } else if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
 472                         /*
 473                          * Copy out SVR4 flock.
 474                          */
 475                         int i;
 476 
 477                         if (bf.l_start > maxoffset || bf.l_len > maxoffset) {
 478                                 error = EOVERFLOW;
 479                                 break;
 480                         }
 481 
 482                         if (datamodel == DATAMODEL_NATIVE) {
 483                                 for (i = 0; i < 4; i++)
 484                                         sbf.l_pad[i] = 0;
 485                                 /*
 486                                  * XXX  In an LP64 kernel with an LP64
 487                                  *      application there's no need to do a
 488                                  *      structure copy here as currently
 489                                  *      struct flock == struct flock64.
 490                                  *      We did it this way to avoid more
 491                                  *      conditional compilation.
 492                                  */
 493                                 sbf.l_type = bf.l_type;
 494                                 sbf.l_whence = bf.l_whence;
 495                                 sbf.l_start = (off_t)bf.l_start;
 496                                 sbf.l_len = (off_t)bf.l_len;
 497                                 sbf.l_sysid = bf.l_sysid;
 498                                 sbf.l_pid = bf.l_pid;
 499                                 if (copyout(&sbf, (void *)arg, sizeof (sbf)))
 500                                         error = EFAULT;
 501                         }
 502 #if defined(_SYSCALL32_IMPL)
 503                         else {
 504                                 struct flock32 sbf32;
 505                                 if (bf.l_start > MAXOFF32_T ||
 506                                     bf.l_len > MAXOFF32_T) {
 507                                         error = EOVERFLOW;
 508                                         break;
 509                                 }
 510                                 for (i = 0; i < 4; i++)
 511                                         sbf32.l_pad[i] = 0;
 512                                 sbf32.l_type = (int16_t)bf.l_type;
 513                                 sbf32.l_whence = (int16_t)bf.l_whence;
 514                                 sbf32.l_start = (off32_t)bf.l_start;
 515                                 sbf32.l_len = (off32_t)bf.l_len;
 516                                 sbf32.l_sysid = (int32_t)bf.l_sysid;
 517                                 sbf32.l_pid = (pid32_t)bf.l_pid;
 518                                 if (copyout(&sbf32,
 519                                     (void *)arg, sizeof (sbf32)))
 520                                         error = EFAULT;
 521                         }
 522 #endif
 523                 }
 524                 break;
 525 
 526         case F_CHKFL:
 527                 /*
 528                  * This is for internal use only, to allow the vnode layer
 529                  * to validate a flags setting before applying it.  User
 530                  * programs can't issue it.
 531                  */
 532                 error = EINVAL;
 533                 break;
 534 
 535         case F_ALLOCSP:
 536         case F_FREESP:
 537         case F_ALLOCSP64:
 538         case F_FREESP64:
 539                 /*
 540                  * Test for not-a-regular-file (and returning EINVAL)
 541                  * before testing for open-for-writing (and returning EBADF).
 542                  * This is relied upon by posix_fallocate() in libc.
 543                  */
 544                 if (vp->v_type != VREG) {
 545                         error = EINVAL;
 546                         break;
 547                 }
 548 
 549                 if ((flag & FWRITE) == 0) {
 550                         error = EBADF;
 551                         break;
 552                 }
 553 
 554                 if (datamodel != DATAMODEL_ILP32 &&
 555                     (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
 556                         error = EINVAL;
 557                         break;
 558                 }
 559 
 560 #if defined(_ILP32) || defined(_SYSCALL32_IMPL)
 561                 if (datamodel == DATAMODEL_ILP32 &&
 562                     (cmd == F_ALLOCSP || cmd == F_FREESP)) {
 563                         struct flock32 sbf32;
 564                         /*
 565                          * For compatibility we overlay an SVR3 flock on an SVR4
 566                          * flock.  This works because the input field offsets
 567                          * in "struct flock" were preserved.
 568                          */
 569                         if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
 570                                 error = EFAULT;
 571                                 break;
 572                         } else {
 573                                 bf.l_type = sbf32.l_type;
 574                                 bf.l_whence = sbf32.l_whence;
 575                                 bf.l_start = (off64_t)sbf32.l_start;
 576                                 bf.l_len = (off64_t)sbf32.l_len;
 577                                 bf.l_sysid = sbf32.l_sysid;
 578                                 bf.l_pid = sbf32.l_pid;
 579                         }
 580                 }
 581 #endif /* _ILP32 || _SYSCALL32_IMPL */
 582 
 583 #if defined(_LP64)
 584                 if (datamodel == DATAMODEL_LP64 &&
 585                     (cmd == F_ALLOCSP || cmd == F_FREESP)) {
 586                         if (copyin((void *)arg, &bf, sizeof (bf))) {
 587                                 error = EFAULT;
 588                                 break;
 589                         }
 590                 }
 591 #endif /* defined(_LP64) */
 592 
 593 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
 594                 if (datamodel == DATAMODEL_ILP32 &&
 595                     (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
 596                         if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
 597                                 error = EFAULT;
 598                                 break;
 599                         } else {
 600                                 /*
 601                                  * Note that the size of flock64 is different in
 602                                  * the ILP32 and LP64 models, due to the l_pad
 603                                  * field. We do not want to assume that the
 604                                  * flock64 structure is laid out the same in
 605                                  * ILP32 and LP64 environments, so we will
 606                                  * copy in the ILP32 version of flock64
 607                                  * explicitly and copy it to the native
 608                                  * flock64 structure.
 609                                  */
 610                                 bf.l_type = (short)bf64_32.l_type;
 611                                 bf.l_whence = (short)bf64_32.l_whence;
 612                                 bf.l_start = bf64_32.l_start;
 613                                 bf.l_len = bf64_32.l_len;
 614                                 bf.l_sysid = (int)bf64_32.l_sysid;
 615                                 bf.l_pid = (pid_t)bf64_32.l_pid;
 616                         }
 617                 }
 618 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
 619 
 620                 if (cmd == F_ALLOCSP || cmd == F_FREESP)
 621                         error = flock_check(vp, &bf, offset, maxoffset);
 622                 else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64)
 623                         error = flock_check(vp, &bf, offset, MAXOFFSET_T);
 624                 if (error)
 625                         break;
 626 
 627                 if (vp->v_type == VREG && bf.l_len == 0 &&
 628                     bf.l_start > OFFSET_MAX(fp)) {
 629                         error = EFBIG;
 630                         break;
 631                 }
 632 
 633                 /*
 634                  * Make sure that there are no conflicting non-blocking
 635                  * mandatory locks in the region being manipulated. If
 636                  * there are such locks then return EACCES.
 637                  */
 638                 if ((error = flock_get_start(vp, &bf, offset, &start)) != 0)
 639                         break;
 640 
 641                 if (nbl_need_check(vp)) {
 642                         u_offset_t      begin;
 643                         ssize_t         length;
 644 
 645                         nbl_start_crit(vp, RW_READER);
 646                         in_crit = 1;
 647                         vattr.va_mask = AT_SIZE;
 648                         if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 649                             != 0)
 650                                 break;
 651                         begin = start > vattr.va_size ? vattr.va_size : start;
 652                         length = vattr.va_size > start ? vattr.va_size - start :
 653                             start - vattr.va_size;
 654                         if (nbl_conflict(vp, NBL_WRITE, begin, length, 0,
 655                             NULL)) {
 656                                 error = EACCES;
 657                                 break;
 658                         }
 659                 }
 660 
 661                 if (cmd == F_ALLOCSP64)
 662                         cmd = F_ALLOCSP;
 663                 else if (cmd == F_FREESP64)
 664                         cmd = F_FREESP;
 665 
 666                 error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL);
 667 
 668                 break;
 669 
 670 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
 671         case F_GETLK64:
 672         case F_SETLK64:
 673         case F_SETLKW64:
 674         case F_SETLK64_NBMAND:
 675         case F_OFD_GETLK64:
 676         case F_OFD_SETLK64:
 677         case F_OFD_SETLKW64:
 678         case F_FLOCK64:
 679         case F_FLOCKW64:
 680                 /*
 681                  * Large Files: Here we set cmd as *LK and send it to
 682                  * lower layers. *LK64 is only for the user land.
 683                  * Most of the comments described above for F_SETLK
 684                  * applies here too.
 685                  * Large File support is only needed for ILP32 apps!
 686                  */
 687                 if (datamodel != DATAMODEL_ILP32) {
 688                         error = EINVAL;
 689                         break;
 690                 }
 691 
 692                 if (cmd == F_GETLK64)
 693                         cmd = F_GETLK;
 694                 else if (cmd == F_SETLK64)
 695                         cmd = F_SETLK;
 696                 else if (cmd == F_SETLKW64)
 697                         cmd = F_SETLKW;
 698                 else if (cmd == F_SETLK64_NBMAND)
 699                         cmd = F_SETLK_NBMAND;
 700                 else if (cmd == F_OFD_GETLK64)
 701                         cmd = F_OFD_GETLK;
 702                 else if (cmd == F_OFD_SETLK64)
 703                         cmd = F_OFD_SETLK;
 704                 else if (cmd == F_OFD_SETLKW64)
 705                         cmd = F_OFD_SETLKW;
 706                 else if (cmd == F_FLOCK64)
 707                         cmd = F_FLOCK;
 708                 else if (cmd == F_FLOCKW64)
 709                         cmd = F_FLOCKW;
 710 
 711                 /*
 712                  * Note that the size of flock64 is different in the ILP32
 713                  * and LP64 models, due to the sucking l_pad field.
 714                  * We do not want to assume that the flock64 structure is
 715                  * laid out in the same in ILP32 and LP64 environments, so
 716                  * we will copy in the ILP32 version of flock64 explicitly
 717                  * and copy it to the native flock64 structure.
 718                  */
 719 
 720                 if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
 721                         error = EFAULT;
 722                         break;
 723                 }
 724 
 725                 bf.l_type = (short)bf64_32.l_type;
 726                 bf.l_whence = (short)bf64_32.l_whence;
 727                 bf.l_start = bf64_32.l_start;
 728                 bf.l_len = bf64_32.l_len;
 729                 bf.l_sysid = (int)bf64_32.l_sysid;
 730                 bf.l_pid = (pid_t)bf64_32.l_pid;
 731 
 732                 if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0)
 733                         break;
 734 
 735                 if (cmd == F_FLOCK || cmd == F_FLOCKW) {
 736                         /* FLOCK* locking is always over the entire file. */
 737                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 738                             bf.l_len != 0) {
 739                                 error = EINVAL;
 740                                 break;
 741                         }
 742                         if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
 743                                 error = EINVAL;
 744                                 break;
 745                         }
 746                 }
 747 
 748                 if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 749                         /*
 750                          * TBD OFD-style locking is currently limited to
 751                          * covering the entire file.
 752                          */
 753                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 754                             bf.l_len != 0) {
 755                                 error = EINVAL;
 756                                 break;
 757                         }
 758                 }
 759 
 760                 /*
 761                  * The *_frlock functions in the various file systems basically
 762                  * do some validation and then funnel everything through the
 763                  * fs_frlock function. For OFD-style locks fs_frlock will do
 764                  * nothing so that once control returns here we can call the
 765                  * ofdlock function with the correct fp. For OFD-style locks
 766                  * the unsupported remote file systems, such as NFS, detect and
 767                  * reject the OFD-style cmd argument.
 768                  */
 769                 if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset,
 770                     NULL, fp->f_cred, NULL)) != 0)
 771                         break;
 772 
 773                 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
 774                     cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 775                         /*
 776                          * This is an OFD-style lock so we need to handle it
 777                          * here. Because OFD-style locks are associated with
 778                          * the file_t we didn't have enough info down the
 779                          * VOP_FRLOCK path immediately above.
 780                          */
 781                         if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
 782                                 break;
 783                 }
 784 
 785                 if ((cmd == F_GETLK || cmd == F_OFD_GETLK) &&
 786                     bf.l_type == F_UNLCK) {
 787                         if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
 788                             sizeof (bf.l_type)))
 789                                 error = EFAULT;
 790                         break;
 791                 }
 792 
 793                 if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
 794                         int i;
 795 
 796                         /*
 797                          * We do not want to assume that the flock64 structure
 798                          * is laid out in the same in ILP32 and LP64
 799                          * environments, so we will copy out the ILP32 version
 800                          * of flock64 explicitly after copying the native
 801                          * flock64 structure to it.
 802                          */
 803                         for (i = 0; i < 4; i++)
 804                                 bf64_32.l_pad[i] = 0;
 805                         bf64_32.l_type = (int16_t)bf.l_type;
 806                         bf64_32.l_whence = (int16_t)bf.l_whence;
 807                         bf64_32.l_start = bf.l_start;
 808                         bf64_32.l_len = bf.l_len;
 809                         bf64_32.l_sysid = (int32_t)bf.l_sysid;
 810                         bf64_32.l_pid = (pid32_t)bf.l_pid;
 811                         if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32)))
 812                                 error = EFAULT;
 813                 }
 814                 break;
 815 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
 816 
 817         case F_SHARE:
 818         case F_SHARE_NBMAND:
 819         case F_UNSHARE:
 820 
 821                 /*
 822                  * Copy in input fields only.
 823                  */
 824                 if (copyin((void *)arg, &fsh, sizeof (fsh))) {
 825                         error = EFAULT;
 826                         break;
 827                 }
 828 
 829                 /*
 830                  * Local share reservations always have this simple form
 831                  */
 832                 shr.s_access = fsh.f_access;
 833                 shr.s_deny = fsh.f_deny;
 834                 shr.s_sysid = 0;
 835                 shr.s_pid = ttoproc(curthread)->p_pid;
 836                 shr_own.sl_pid = shr.s_pid;
 837                 shr_own.sl_id = fsh.f_id;
 838                 shr.s_own_len = sizeof (shr_own);
 839                 shr.s_owner = (caddr_t)&shr_own;
 840                 error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred, NULL);
 841                 break;
 842 
 843         default:
 844                 error = EINVAL;
 845                 break;
 846         }
 847 
 848         if (in_crit)
 849                 nbl_end_crit(vp);
 850 
 851 done:
 852         releasef(fdes);
 853 out:
 854         if (error)
 855                 return (set_errno(error));
 856         return (retval);
 857 }
 858 
 859 int
 860 flock_check(vnode_t *vp, flock64_t *flp, offset_t offset, offset_t max)
 861 {
 862         struct vattr    vattr;
 863         int     error;
 864         u_offset_t start, end;
 865 
 866         /*
 867          * Determine the starting point of the request
 868          */
 869         switch (flp->l_whence) {
 870         case 0:         /* SEEK_SET */
 871                 start = (u_offset_t)flp->l_start;
 872                 if (start > max)
 873                         return (EINVAL);
 874                 break;
 875         case 1:         /* SEEK_CUR */
 876                 if (flp->l_start > (max - offset))
 877                         return (EOVERFLOW);
 878                 start = (u_offset_t)(flp->l_start + offset);
 879                 if (start > max)
 880                         return (EINVAL);
 881                 break;
 882         case 2:         /* SEEK_END */
 883                 vattr.va_mask = AT_SIZE;
 884                 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 885                         return (error);
 886                 if (flp->l_start > (max - (offset_t)vattr.va_size))
 887                         return (EOVERFLOW);
 888                 start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
 889                 if (start > max)
 890                         return (EINVAL);
 891                 break;
 892         default:
 893                 return (EINVAL);
 894         }
 895 
 896         /*
 897          * Determine the range covered by the request.
 898          */
 899         if (flp->l_len == 0)
 900                 end = MAXEND;
 901         else if ((offset_t)flp->l_len > 0) {
 902                 if (flp->l_len > (max - start + 1))
 903                         return (EOVERFLOW);
 904                 end = (u_offset_t)(start + (flp->l_len - 1));
 905                 ASSERT(end <= max);
 906         } else {
 907                 /*
 908                  * Negative length; why do we even allow this ?
 909                  * Because this allows easy specification of
 910                  * the last n bytes of the file.
 911                  */
 912                 end = start;
 913                 start += (u_offset_t)flp->l_len;
 914                 (start)++;
 915                 if (start > max)
 916                         return (EINVAL);
 917                 ASSERT(end <= max);
 918         }
 919         ASSERT(start <= max);
 920         if (flp->l_type == F_UNLCK && flp->l_len > 0 &&
 921             end == (offset_t)max) {
 922                 flp->l_len = 0;
 923         }
 924         if (start  > end)
 925                 return (EINVAL);
 926         return (0);
 927 }
 928 
 929 static int
 930 flock_get_start(vnode_t *vp, flock64_t *flp, offset_t offset, u_offset_t *start)
 931 {
 932         struct vattr    vattr;
 933         int     error;
 934 
 935         /*
 936          * Determine the starting point of the request. Assume that it is
 937          * a valid starting point.
 938          */
 939         switch (flp->l_whence) {
 940         case 0:         /* SEEK_SET */
 941                 *start = (u_offset_t)flp->l_start;
 942                 break;
 943         case 1:         /* SEEK_CUR */
 944                 *start = (u_offset_t)(flp->l_start + offset);
 945                 break;
 946         case 2:         /* SEEK_END */
 947                 vattr.va_mask = AT_SIZE;
 948                 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 949                         return (error);
 950                 *start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
 951                 break;
 952         default:
 953                 return (EINVAL);
 954         }
 955 
 956         return (0);
 957 }
 958 
 959 /*
 960  * Take rctl action when the requested file descriptor is too big.
 961  */
 962 static void
 963 fd_too_big(proc_t *p)
 964 {
 965         mutex_enter(&p->p_lock);
 966         (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
 967             p->p_rctls, p, RCA_SAFE);
 968         mutex_exit(&p->p_lock);
 969 }