illumos-3742.2 Wdiff usr/src/uts/common/fs/zfs/zfs_vnops.c

Print this page

3742 zfs comments need cleaner, more consistent style
Submitted by:   Will Andrews <willa@spectralogic.com>
Submitted by:   Alan Somers <alans@spectralogic.com>
Reviewed by:    Matthew Ahrens <mahrens@delphix.com>
Reviewed by:    George Wilson <george.wilson@delphix.com>
Reviewed by:    Eric Schrock <eric.schrock@delphix.com>

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/zfs/zfs_vnops.c
          +++ new/usr/src/uts/common/fs/zfs/zfs_vnops.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2013 by Delphix. All rights reserved.
  24   24   */
  25   25  
  26   26  /* Portions Copyright 2007 Jeremy Teo */
  27   27  /* Portions Copyright 2010 Robert Milkowski */
  28   28  
  29   29  #include <sys/types.h>
  30   30  #include <sys/param.h>
  31   31  #include <sys/time.h>
  32   32  #include <sys/systm.h>
  33   33  #include <sys/sysmacros.h>
  34   34  #include <sys/resource.h>
  35   35  #include <sys/vfs.h>
  36   36  #include <sys/vfs_opreg.h>
  37   37  #include <sys/vnode.h>
  38   38  #include <sys/file.h>
  39   39  #include <sys/stat.h>
  40   40  #include <sys/kmem.h>
  41   41  #include <sys/taskq.h>
  42   42  #include <sys/uio.h>
  43   43  #include <sys/vmsystm.h>
  44   44  #include <sys/atomic.h>
  45   45  #include <sys/vm.h>
  46   46  #include <vm/seg_vn.h>
  47   47  #include <vm/pvn.h>
  48   48  #include <vm/as.h>
  49   49  #include <vm/kpm.h>
  50   50  #include <vm/seg_kpm.h>
  51   51  #include <sys/mman.h>
  52   52  #include <sys/pathname.h>
  53   53  #include <sys/cmn_err.h>
  54   54  #include <sys/errno.h>
  55   55  #include <sys/unistd.h>
  56   56  #include <sys/zfs_dir.h>
  57   57  #include <sys/zfs_acl.h>
  58   58  #include <sys/zfs_ioctl.h>
  59   59  #include <sys/fs/zfs.h>
  60   60  #include <sys/dmu.h>
  61   61  #include <sys/dmu_objset.h>
  62   62  #include <sys/spa.h>
  63   63  #include <sys/txg.h>
  64   64  #include <sys/dbuf.h>
  65   65  #include <sys/zap.h>
  66   66  #include <sys/sa.h>
  67   67  #include <sys/dirent.h>
  68   68  #include <sys/policy.h>
  69   69  #include <sys/sunddi.h>
  70   70  #include <sys/filio.h>
  71   71  #include <sys/sid.h>
  72   72  #include "fs/fs_subr.h"
  73   73  #include <sys/zfs_ctldir.h>
  74   74  #include <sys/zfs_fuid.h>
  75   75  #include <sys/zfs_sa.h>
  76   76  #include <sys/dnlc.h>
  77   77  #include <sys/zfs_rlock.h>
  78   78  #include <sys/extdirent.h>
  79   79  #include <sys/kidmap.h>
  80   80  #include <sys/cred.h>
  81   81  #include <sys/attr.h>
  82   82  
  83   83  /*

↓ open down ↓

83 lines elided

↑ open up ↑

  84   84   * Programming rules.
  85   85   *
  86   86   * Each vnode op performs some logical unit of work.  To do this, the ZPL must
  87   87   * properly lock its in-core state, create a DMU transaction, do the work,
  88   88   * record this work in the intent log (ZIL), commit the DMU transaction,
  89   89   * and wait for the intent log to commit if it is a synchronous operation.
  90   90   * Moreover, the vnode ops must work in both normal and log replay context.
  91   91   * The ordering of events is important to avoid deadlocks and references
  92   92   * to freed memory.  The example below illustrates the following Big Rules:
  93   93   *
  94      - *  (1) A check must be made in each zfs thread for a mounted file system.
       94 + *  (1) A check must be made in each zfs thread for a mounted file system.
  95   95   *      This is done avoiding races using ZFS_ENTER(zfsvfs).
  96      - *      A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
  97      - *      must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
  98      - *      can return EIO from the calling function.
       96 + *      A ZFS_EXIT(zfsvfs) is needed before all returns.  Any znodes
       97 + *      must be checked with ZFS_VERIFY_ZP(zp).  Both of these macros
       98 + *      can return EIO from the calling function.
  99   99   *
 100  100   *  (2) VN_RELE() should always be the last thing except for zil_commit()
 101  101   *      (if necessary) and ZFS_EXIT(). This is for 3 reasons:
 102  102   *      First, if it's the last reference, the vnode/znode
 103  103   *      can be freed, so the zp may point to freed memory.  Second, the last
 104  104   *      reference will call zfs_zinactive(), which may induce a lot of work --
 105  105   *      pushing cached pages (which acquires range locks) and syncing out
 106  106   *      cached atime changes.  Third, zfs_zinactive() may require a new tx,
 107  107   *      which could deadlock the system if you were already holding one.
 108  108   *      If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().

 109  109   *
 110  110   *  (3) All range locks must be grabbed before calling dmu_tx_assign(),
 111  111   *      as they can span dmu_tx_assign() calls.
 112  112   *
 113  113   *  (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign().
 114  114   *      This is critical because we don't want to block while holding locks.
 115  115   *      Note, in particular, that if a lock is sometimes acquired before
 116  116   *      the tx assigns, and sometimes after (e.g. z_lock), then failing to
 117  117   *      use a non-blocking assign can deadlock the system.  The scenario:
 118  118   *
 119  119   *      Thread A has grabbed a lock before calling dmu_tx_assign().

↓ open down ↓

11 lines elided

↑ open up ↑

 120  120   *      Thread B is in an already-assigned tx, and blocks for this lock.
 121  121   *      Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
 122  122   *      forever, because the previous txg can't quiesce until B's tx commits.
 123  123   *
 124  124   *      If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
 125  125   *      then drop all locks, call dmu_tx_wait(), and try again.
 126  126   *
 127  127   *  (5) If the operation succeeded, generate the intent log entry for it
 128  128   *      before dropping locks.  This ensures that the ordering of events
 129  129   *      in the intent log matches the order in which they actually occurred.
 130      - *      During ZIL replay the zfs_log_* functions will update the sequence
      130 + *      During ZIL replay the zfs_log_* functions will update the sequence
 131  131   *      number to indicate the zil transaction has replayed.
 132  132   *
 133  133   *  (6) At the end of each vnode op, the DMU tx must always commit,
 134  134   *      regardless of whether there were any errors.
 135  135   *
 136  136   *  (7) After dropping all locks, invoke zil_commit(zilog, foid)
 137  137   *      to ensure that synchronous semantics are provided when necessary.
 138  138   *
 139  139   * In general, this is how things should be ordered in each vnode op:
 140  140   *

 141  141   *      ZFS_ENTER(zfsvfs);              // exit if unmounted
 142  142   * top:
 143  143   *      zfs_dirent_lock(&dl, ...)       // lock directory entry (may VN_HOLD())
 144  144   *      rw_enter(...);                  // grab any other locks you need
 145  145   *      tx = dmu_tx_create(...);        // get DMU tx
 146  146   *      dmu_tx_hold_*();                // hold each object you might modify
 147  147   *      error = dmu_tx_assign(tx, TXG_NOWAIT);  // try to assign
 148  148   *      if (error) {
 149  149   *              rw_exit(...);           // drop locks
 150  150   *              zfs_dirent_unlock(dl);  // unlock directory entry
 151  151   *              VN_RELE(...);           // release held vnodes
 152  152   *              if (error == ERESTART) {
 153  153   *                      dmu_tx_wait(tx);
 154  154   *                      dmu_tx_abort(tx);
 155  155   *                      goto top;
 156  156   *              }
 157  157   *              dmu_tx_abort(tx);       // abort DMU tx
 158  158   *              ZFS_EXIT(zfsvfs);       // finished in zfs
 159  159   *              return (error);         // really out of space
 160  160   *      }
 161  161   *      error = do_real_work();         // do whatever this VOP does
 162  162   *      if (error == 0)
 163  163   *              zfs_log_*(...);         // on success, make ZIL entry
 164  164   *      dmu_tx_commit(tx);              // commit DMU tx -- error or not
 165  165   *      rw_exit(...);                   // drop locks
 166  166   *      zfs_dirent_unlock(dl);          // unlock directory entry
 167  167   *      VN_RELE(...);                   // release held vnodes
 168  168   *      zil_commit(zilog, foid);        // synchronous when necessary
 169  169   *      ZFS_EXIT(zfsvfs);               // finished in zfs
 170  170   *      return (error);                 // done, report error
 171  171   */
 172  172  
 173  173  /* ARGSUSED */
 174  174  static int
 175  175  zfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
 176  176  {
 177  177          znode_t *zp = VTOZ(*vpp);
 178  178          zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 179  179  
 180  180          ZFS_ENTER(zfsvfs);
 181  181          ZFS_VERIFY_ZP(zp);
 182  182  
 183  183          if ((flag & FWRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
 184  184              ((flag & FAPPEND) == 0)) {
 185  185                  ZFS_EXIT(zfsvfs);
 186  186                  return (SET_ERROR(EPERM));
 187  187          }
 188  188  
 189  189          if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
 190  190              ZTOV(zp)->v_type == VREG &&
 191  191              !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
 192  192                  if (fs_vscan(*vpp, cr, 0) != 0) {
 193  193                          ZFS_EXIT(zfsvfs);
 194  194                          return (SET_ERROR(EACCES));
 195  195                  }
 196  196          }
 197  197  
 198  198          /* Keep a count of the synchronous opens in the znode */
 199  199          if (flag & (FSYNC | FDSYNC))
 200  200                  atomic_inc_32(&zp->z_sync_cnt);
 201  201  
 202  202          ZFS_EXIT(zfsvfs);
 203  203          return (0);
 204  204  }
 205  205  
 206  206  /* ARGSUSED */
 207  207  static int
 208  208  zfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
 209  209      caller_context_t *ct)
 210  210  {
 211  211          znode_t *zp = VTOZ(vp);
 212  212          zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 213  213  
 214  214          /*
 215  215           * Clean up any locks held by this process on the vp.
 216  216           */
 217  217          cleanlocks(vp, ddi_get_pid(), 0);
 218  218          cleanshares(vp, ddi_get_pid());
 219  219  
 220  220          ZFS_ENTER(zfsvfs);
 221  221          ZFS_VERIFY_ZP(zp);
 222  222  
 223  223          /* Decrement the synchronous opens in the znode */
 224  224          if ((flag & (FSYNC | FDSYNC)) && (count == 1))
 225  225                  atomic_dec_32(&zp->z_sync_cnt);
 226  226  
 227  227          if (!zfs_has_ctldir(zp) && zp->z_zfsvfs->z_vscan &&
 228  228              ZTOV(zp)->v_type == VREG &&
 229  229              !(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
 230  230                  VERIFY(fs_vscan(vp, cr, 1) == 0);
 231  231  
 232  232          ZFS_EXIT(zfsvfs);
 233  233          return (0);
 234  234  }
 235  235  
 236  236  /*
 237  237   * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and
 238  238   * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter.
 239  239   */
 240  240  static int
 241  241  zfs_holey(vnode_t *vp, int cmd, offset_t *off)
 242  242  {
 243  243          znode_t *zp = VTOZ(vp);
 244  244          uint64_t noff = (uint64_t)*off; /* new offset */
 245  245          uint64_t file_sz;
 246  246          int error;
 247  247          boolean_t hole;
 248  248  
 249  249          file_sz = zp->z_size;
 250  250          if (noff >= file_sz)  {
 251  251                  return (SET_ERROR(ENXIO));
 252  252          }
 253  253  
 254  254          if (cmd == _FIO_SEEK_HOLE)
 255  255                  hole = B_TRUE;
 256  256          else
 257  257                  hole = B_FALSE;
 258  258  
 259  259          error = dmu_offset_next(zp->z_zfsvfs->z_os, zp->z_id, hole, &noff);
 260  260  
 261  261          /* end of file? */
 262  262          if ((error == ESRCH) || (noff > file_sz)) {
 263  263                  /*
 264  264                   * Handle the virtual hole at the end of file.
 265  265                   */
 266  266                  if (hole) {
 267  267                          *off = file_sz;
 268  268                          return (0);
 269  269                  }
 270  270                  return (SET_ERROR(ENXIO));
 271  271          }
 272  272  
 273  273          if (noff < *off)
 274  274                  return (error);
 275  275          *off = noff;
 276  276          return (error);
 277  277  }
 278  278  
 279  279  /* ARGSUSED */
 280  280  static int
 281  281  zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
 282  282      int *rvalp, caller_context_t *ct)
 283  283  {
 284  284          offset_t off;
 285  285          int error;
 286  286          zfsvfs_t *zfsvfs;
 287  287          znode_t *zp;
 288  288  
 289  289          switch (com) {
 290  290          case _FIOFFS:
 291  291                  return (zfs_sync(vp->v_vfsp, 0, cred));
 292  292  
 293  293                  /*
 294  294                   * The following two ioctls are used by bfu.  Faking out,
 295  295                   * necessary to avoid bfu errors.
 296  296                   */
 297  297          case _FIOGDIO:
 298  298          case _FIOSDIO:
 299  299                  return (0);
 300  300  
 301  301          case _FIO_SEEK_DATA:
 302  302          case _FIO_SEEK_HOLE:
 303  303                  if (ddi_copyin((void *)data, &off, sizeof (off), flag))
 304  304                          return (SET_ERROR(EFAULT));
 305  305  
 306  306                  zp = VTOZ(vp);
 307  307                  zfsvfs = zp->z_zfsvfs;
 308  308                  ZFS_ENTER(zfsvfs);
 309  309                  ZFS_VERIFY_ZP(zp);
 310  310  
 311  311                  /* offset parameter is in/out */
 312  312                  error = zfs_holey(vp, com, &off);
 313  313                  ZFS_EXIT(zfsvfs);
 314  314                  if (error)
 315  315                          return (error);
 316  316                  if (ddi_copyout(&off, (void *)data, sizeof (off), flag))
 317  317                          return (SET_ERROR(EFAULT));
 318  318                  return (0);
 319  319          }
 320  320          return (SET_ERROR(ENOTTY));
 321  321  }
 322  322  
 323  323  /*
 324  324   * Utility functions to map and unmap a single physical page.  These
 325  325   * are used to manage the mappable copies of ZFS file data, and therefore
 326  326   * do not update ref/mod bits.
 327  327   */
 328  328  caddr_t
 329  329  zfs_map_page(page_t *pp, enum seg_rw rw)
 330  330  {
 331  331          if (kpm_enable)
 332  332                  return (hat_kpm_mapin(pp, 0));
 333  333          ASSERT(rw == S_READ || rw == S_WRITE);
 334  334          return (ppmapin(pp, PROT_READ | ((rw == S_WRITE) ? PROT_WRITE : 0),
 335  335              (caddr_t)-1));
 336  336  }
 337  337  
 338  338  void
 339  339  zfs_unmap_page(page_t *pp, caddr_t addr)
 340  340  {
 341  341          if (kpm_enable) {
 342  342                  hat_kpm_mapout(pp, 0, addr);
 343  343          } else {
 344  344                  ppmapout(addr);
 345  345          }
 346  346  }
 347  347  
 348  348  /*
 349  349   * When a file is memory mapped, we must keep the IO data synchronized
 350  350   * between the DMU cache and the memory mapped pages.  What this means:
 351  351   *
 352  352   * On Write:    If we find a memory mapped page, we write to *both*
 353  353   *              the page and the dmu buffer.
 354  354   */
 355  355  static void
 356  356  update_pages(vnode_t *vp, int64_t start, int len, objset_t *os, uint64_t oid)
 357  357  {
 358  358          int64_t off;
 359  359  
 360  360          off = start & PAGEOFFSET;
 361  361          for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
 362  362                  page_t *pp;
 363  363                  uint64_t nbytes = MIN(PAGESIZE - off, len);
 364  364  
 365  365                  if (pp = page_lookup(vp, start, SE_SHARED)) {
 366  366                          caddr_t va;
 367  367  
 368  368                          va = zfs_map_page(pp, S_WRITE);
 369  369                          (void) dmu_read(os, oid, start+off, nbytes, va+off,
 370  370                              DMU_READ_PREFETCH);
 371  371                          zfs_unmap_page(pp, va);
 372  372                          page_unlock(pp);
 373  373                  }
 374  374                  len -= nbytes;
 375  375                  off = 0;
 376  376          }

↓ open down ↓

236 lines elided

↑ open up ↑

 377  377  }
 378  378  
 379  379  /*
 380  380   * When a file is memory mapped, we must keep the IO data synchronized
 381  381   * between the DMU cache and the memory mapped pages.  What this means:
 382  382   *
 383  383   * On Read:     We "read" preferentially from memory mapped pages,
 384  384   *              else we default from the dmu buffer.
 385  385   *
 386  386   * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
 387      - *      the file is memory mapped.
      387 + *       the file is memory mapped.
 388  388   */
 389  389  static int
 390  390  mappedread(vnode_t *vp, int nbytes, uio_t *uio)
 391  391  {
 392  392          znode_t *zp = VTOZ(vp);
 393  393          objset_t *os = zp->z_zfsvfs->z_os;
 394  394          int64_t start, off;
 395  395          int len = nbytes;
 396  396          int error = 0;
 397  397

 398  398          start = uio->uio_loffset;
 399  399          off = start & PAGEOFFSET;
 400  400          for (start &= PAGEMASK; len > 0; start += PAGESIZE) {
 401  401                  page_t *pp;
 402  402                  uint64_t bytes = MIN(PAGESIZE - off, len);
 403  403  
 404  404                  if (pp = page_lookup(vp, start, SE_SHARED)) {
 405  405                          caddr_t va;
 406  406  
 407  407                          va = zfs_map_page(pp, S_READ);
 408  408                          error = uiomove(va + off, bytes, UIO_READ, uio);
 409  409                          zfs_unmap_page(pp, va);
 410  410                          page_unlock(pp);
 411  411                  } else {
 412  412                          error = dmu_read_uio(os, zp->z_id, uio, bytes);
 413  413                  }
 414  414                  len -= bytes;
 415  415                  off = 0;
 416  416                  if (error)
 417  417                          break;
 418  418          }
 419  419          return (error);
 420  420  }
 421  421  
 422  422  offset_t zfs_read_chunk_size = 1024 * 1024; /* Tunable */
 423  423  
 424  424  /*
 425  425   * Read bytes from specified file into supplied buffer.

↓ open down ↓

28 lines elided

↑ open up ↑

 426  426   *
 427  427   *      IN:     vp      - vnode of file to be read from.
 428  428   *              uio     - structure supplying read location, range info,
 429  429   *                        and return buffer.
 430  430   *              ioflag  - SYNC flags; used to provide FRSYNC semantics.
 431  431   *              cr      - credentials of caller.
 432  432   *              ct      - caller context
 433  433   *
 434  434   *      OUT:    uio     - updated offset and range, buffer filled.
 435  435   *
 436      - *      RETURN: 0 if success
 437      - *              error code if failure
      436 + *      RETURN: 0 on success, error code on failure.
 438  437   *
 439  438   * Side Effects:
 440  439   *      vp - atime updated if byte count > 0
 441  440   */
 442  441  /* ARGSUSED */
 443  442  static int
 444  443  zfs_read(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 445  444  {
 446  445          znode_t         *zp = VTOZ(vp);
 447  446          zfsvfs_t        *zfsvfs = zp->z_zfsvfs;

 448  447          objset_t        *os;
 449  448          ssize_t         n, nbytes;
 450  449          int             error = 0;
 451  450          rl_t            *rl;
 452  451          xuio_t          *xuio = NULL;
 453  452  
 454  453          ZFS_ENTER(zfsvfs);
 455  454          ZFS_VERIFY_ZP(zp);
 456  455          os = zfsvfs->z_os;
 457  456  
 458  457          if (zp->z_pflags & ZFS_AV_QUARANTINED) {
 459  458                  ZFS_EXIT(zfsvfs);
 460  459                  return (SET_ERROR(EACCES));
 461  460          }
 462  461  
 463  462          /*
 464  463           * Validate file offset
 465  464           */
 466  465          if (uio->uio_loffset < (offset_t)0) {
 467  466                  ZFS_EXIT(zfsvfs);
 468  467                  return (SET_ERROR(EINVAL));
 469  468          }
 470  469  
 471  470          /*
 472  471           * Fasttrack empty reads
 473  472           */
 474  473          if (uio->uio_resid == 0) {
 475  474                  ZFS_EXIT(zfsvfs);
 476  475                  return (0);
 477  476          }
 478  477  
 479  478          /*
 480  479           * Check for mandatory locks
 481  480           */
 482  481          if (MANDMODE(zp->z_mode)) {
 483  482                  if (error = chklock(vp, FREAD,
 484  483                      uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct)) {
 485  484                          ZFS_EXIT(zfsvfs);
 486  485                          return (error);
 487  486                  }
 488  487          }
 489  488  
 490  489          /*
 491  490           * If we're in FRSYNC mode, sync out this znode before reading it.
 492  491           */
 493  492          if (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 494  493                  zil_commit(zfsvfs->z_log, zp->z_id);
 495  494  
 496  495          /*
 497  496           * Lock the range against changes.
 498  497           */
 499  498          rl = zfs_range_lock(zp, uio->uio_loffset, uio->uio_resid, RL_READER);
 500  499  
 501  500          /*
 502  501           * If we are reading past end-of-file we can skip
 503  502           * to the end; but we might still need to set atime.
 504  503           */
 505  504          if (uio->uio_loffset >= zp->z_size) {
 506  505                  error = 0;
 507  506                  goto out;
 508  507          }
 509  508  
 510  509          ASSERT(uio->uio_loffset < zp->z_size);
 511  510          n = MIN(uio->uio_resid, zp->z_size - uio->uio_loffset);
 512  511  
 513  512          if ((uio->uio_extflg == UIO_XUIO) &&
 514  513              (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY)) {
 515  514                  int nblk;
 516  515                  int blksz = zp->z_blksz;
 517  516                  uint64_t offset = uio->uio_loffset;
 518  517  
 519  518                  xuio = (xuio_t *)uio;
 520  519                  if ((ISP2(blksz))) {
 521  520                          nblk = (P2ROUNDUP(offset + n, blksz) - P2ALIGN(offset,
 522  521                              blksz)) / blksz;
 523  522                  } else {
 524  523                          ASSERT(offset + n <= blksz);
 525  524                          nblk = 1;
 526  525                  }
 527  526                  (void) dmu_xuio_init(xuio, nblk);
 528  527  
 529  528                  if (vn_has_cached_data(vp)) {
 530  529                          /*
 531  530                           * For simplicity, we always allocate a full buffer
 532  531                           * even if we only expect to read a portion of a block.
 533  532                           */
 534  533                          while (--nblk >= 0) {
 535  534                                  (void) dmu_xuio_add(xuio,
 536  535                                      dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
 537  536                                      blksz), 0, blksz);
 538  537                          }
 539  538                  }
 540  539          }
 541  540  
 542  541          while (n > 0) {
 543  542                  nbytes = MIN(n, zfs_read_chunk_size -
 544  543                      P2PHASE(uio->uio_loffset, zfs_read_chunk_size));
 545  544  
 546  545                  if (vn_has_cached_data(vp))
 547  546                          error = mappedread(vp, nbytes, uio);
 548  547                  else
 549  548                          error = dmu_read_uio(os, zp->z_id, uio, nbytes);
 550  549                  if (error) {
 551  550                          /* convert checksum errors into IO errors */
 552  551                          if (error == ECKSUM)
 553  552                                  error = SET_ERROR(EIO);
 554  553                          break;
 555  554                  }
 556  555  
 557  556                  n -= nbytes;
 558  557          }
 559  558  out:
 560  559          zfs_range_unlock(rl);
 561  560  
 562  561          ZFS_ACCESSTIME_STAMP(zfsvfs, zp);

↓ open down ↓

115 lines elided

↑ open up ↑

 563  562          ZFS_EXIT(zfsvfs);
 564  563          return (error);
 565  564  }
 566  565  
 567  566  /*
 568  567   * Write the bytes to a file.
 569  568   *
 570  569   *      IN:     vp      - vnode of file to be written to.
 571  570   *              uio     - structure supplying write location, range info,
 572  571   *                        and data buffer.
 573      - *              ioflag  - FAPPEND flag set if in append mode.
      572 + *              ioflag  - FAPPEND, FSYNC, and/or FDSYNC.  FAPPEND is
      573 + *                        set if in append mode.
 574  574   *              cr      - credentials of caller.
 575  575   *              ct      - caller context (NFS/CIFS fem monitor only)
 576  576   *
 577  577   *      OUT:    uio     - updated offset and range.
 578  578   *
 579      - *      RETURN: 0 if success
 580      - *              error code if failure
      579 + *      RETURN: 0 on success, error code on failure.
 581  580   *
 582  581   * Timestamps:
 583  582   *      vp - ctime|mtime updated if byte count > 0
 584  583   */
 585  584  
 586  585  /* ARGSUSED */
 587  586  static int
 588  587  zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
 589  588  {
 590  589          znode_t         *zp = VTOZ(vp);

 591  590          rlim64_t        limit = uio->uio_llimit;
 592  591          ssize_t         start_resid = uio->uio_resid;
 593  592          ssize_t         tx_bytes;
 594  593          uint64_t        end_size;
 595  594          dmu_tx_t        *tx;
 596  595          zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
 597  596          zilog_t         *zilog;
 598  597          offset_t        woff;
 599  598          ssize_t         n, nbytes;
 600  599          rl_t            *rl;
 601  600          int             max_blksz = zfsvfs->z_max_blksz;
 602  601          int             error = 0;
 603  602          arc_buf_t       *abuf;
 604  603          iovec_t         *aiov = NULL;
 605  604          xuio_t          *xuio = NULL;
 606  605          int             i_iov = 0;
 607  606          int             iovcnt = uio->uio_iovcnt;
 608  607          iovec_t         *iovp = uio->uio_iov;
 609  608          int             write_eof;
 610  609          int             count = 0;
 611  610          sa_bulk_attr_t  bulk[4];
 612  611          uint64_t        mtime[2], ctime[2];
 613  612  
 614  613          /*
 615  614           * Fasttrack empty write
 616  615           */
 617  616          n = start_resid;
 618  617          if (n == 0)
 619  618                  return (0);
 620  619  
 621  620          if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 622  621                  limit = MAXOFFSET_T;
 623  622  
 624  623          ZFS_ENTER(zfsvfs);
 625  624          ZFS_VERIFY_ZP(zp);
 626  625  
 627  626          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
 628  627          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
 629  628          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
 630  629              &zp->z_size, 8);
 631  630          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 632  631              &zp->z_pflags, 8);
 633  632  
 634  633          /*
 635  634           * If immutable or not appending then return EPERM
 636  635           */
 637  636          if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) ||
 638  637              ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) &&
 639  638              (uio->uio_loffset < zp->z_size))) {
 640  639                  ZFS_EXIT(zfsvfs);
 641  640                  return (SET_ERROR(EPERM));
 642  641          }
 643  642  
 644  643          zilog = zfsvfs->z_log;
 645  644  
 646  645          /*
 647  646           * Validate file offset
 648  647           */
 649  648          woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
 650  649          if (woff < 0) {
 651  650                  ZFS_EXIT(zfsvfs);
 652  651                  return (SET_ERROR(EINVAL));
 653  652          }
 654  653  
 655  654          /*
 656  655           * Check for mandatory locks before calling zfs_range_lock()
 657  656           * in order to prevent a deadlock with locks set via fcntl().
 658  657           */
 659  658          if (MANDMODE((mode_t)zp->z_mode) &&
 660  659              (error = chklock(vp, FWRITE, woff, n, uio->uio_fmode, ct)) != 0) {
 661  660                  ZFS_EXIT(zfsvfs);
 662  661                  return (error);
 663  662          }
 664  663  
 665  664          /*
 666  665           * Pre-fault the pages to ensure slow (eg NFS) pages
 667  666           * don't hold up txg.
 668  667           * Skip this if uio contains loaned arc_buf.
 669  668           */
 670  669          if ((uio->uio_extflg == UIO_XUIO) &&
 671  670              (((xuio_t *)uio)->xu_type == UIOTYPE_ZEROCOPY))
 672  671                  xuio = (xuio_t *)uio;
 673  672          else
 674  673                  uio_prefaultpages(MIN(n, max_blksz), uio);
 675  674  
 676  675          /*
 677  676           * If in append mode, set the io offset pointer to eof.
 678  677           */
 679  678          if (ioflag & FAPPEND) {
 680  679                  /*
 681  680                   * Obtain an appending range lock to guarantee file append
 682  681                   * semantics.  We reset the write offset once we have the lock.
 683  682                   */
 684  683                  rl = zfs_range_lock(zp, 0, n, RL_APPEND);
 685  684                  woff = rl->r_off;
 686  685                  if (rl->r_len == UINT64_MAX) {
 687  686                          /*
 688  687                           * We overlocked the file because this write will cause
 689  688                           * the file block size to increase.
 690  689                           * Note that zp_size cannot change with this lock held.
 691  690                           */
 692  691                          woff = zp->z_size;
 693  692                  }
 694  693                  uio->uio_loffset = woff;
 695  694          } else {
 696  695                  /*
 697  696                   * Note that if the file block size will change as a result of
 698  697                   * this write, then this range lock will lock the entire file
 699  698                   * so that we can re-write the block safely.
 700  699                   */
 701  700                  rl = zfs_range_lock(zp, woff, n, RL_WRITER);
 702  701          }
 703  702  
 704  703          if (woff >= limit) {
 705  704                  zfs_range_unlock(rl);
 706  705                  ZFS_EXIT(zfsvfs);
 707  706                  return (SET_ERROR(EFBIG));
 708  707          }
 709  708  
 710  709          if ((woff + n) > limit || woff > (limit - n))
 711  710                  n = limit - woff;
 712  711  
 713  712          /* Will this write extend the file length? */
 714  713          write_eof = (woff + n > zp->z_size);
 715  714  
 716  715          end_size = MAX(zp->z_size, woff + n);
 717  716  
 718  717          /*
 719  718           * Write the file in reasonable size chunks.  Each chunk is written
 720  719           * in a separate transaction; this keeps the intent log records small
 721  720           * and allows us to do more fine-grained space accounting.
 722  721           */
 723  722          while (n > 0) {
 724  723                  abuf = NULL;
 725  724                  woff = uio->uio_loffset;
 726  725  again:
 727  726                  if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
 728  727                      zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
 729  728                          if (abuf != NULL)
 730  729                                  dmu_return_arcbuf(abuf);
 731  730                          error = SET_ERROR(EDQUOT);
 732  731                          break;
 733  732                  }
 734  733  
 735  734                  if (xuio && abuf == NULL) {
 736  735                          ASSERT(i_iov < iovcnt);
 737  736                          aiov = &iovp[i_iov];
 738  737                          abuf = dmu_xuio_arcbuf(xuio, i_iov);
 739  738                          dmu_xuio_clear(xuio, i_iov);
 740  739                          DTRACE_PROBE3(zfs_cp_write, int, i_iov,
 741  740                              iovec_t *, aiov, arc_buf_t *, abuf);
 742  741                          ASSERT((aiov->iov_base == abuf->b_data) ||
 743  742                              ((char *)aiov->iov_base - (char *)abuf->b_data +
 744  743                              aiov->iov_len == arc_buf_size(abuf)));
 745  744                          i_iov++;
 746  745                  } else if (abuf == NULL && n >= max_blksz &&
 747  746                      woff >= zp->z_size &&
 748  747                      P2PHASE(woff, max_blksz) == 0 &&
 749  748                      zp->z_blksz == max_blksz) {
 750  749                          /*
 751  750                           * This write covers a full block.  "Borrow" a buffer
 752  751                           * from the dmu so that we can fill it before we enter
 753  752                           * a transaction.  This avoids the possibility of
 754  753                           * holding up the transaction if the data copy hangs
 755  754                           * up on a pagefault (e.g., from an NFS server mapping).
 756  755                           */
 757  756                          size_t cbytes;
 758  757  
 759  758                          abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
 760  759                              max_blksz);
 761  760                          ASSERT(abuf != NULL);
 762  761                          ASSERT(arc_buf_size(abuf) == max_blksz);
 763  762                          if (error = uiocopy(abuf->b_data, max_blksz,
 764  763                              UIO_WRITE, uio, &cbytes)) {
 765  764                                  dmu_return_arcbuf(abuf);
 766  765                                  break;
 767  766                          }
 768  767                          ASSERT(cbytes == max_blksz);
 769  768                  }
 770  769  
 771  770                  /*
 772  771                   * Start a transaction.
 773  772                   */
 774  773                  tx = dmu_tx_create(zfsvfs->z_os);
 775  774                  dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 776  775                  dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
 777  776                  zfs_sa_upgrade_txholds(tx, zp);
 778  777                  error = dmu_tx_assign(tx, TXG_NOWAIT);
 779  778                  if (error) {
 780  779                          if (error == ERESTART) {
 781  780                                  dmu_tx_wait(tx);
 782  781                                  dmu_tx_abort(tx);
 783  782                                  goto again;
 784  783                          }
 785  784                          dmu_tx_abort(tx);
 786  785                          if (abuf != NULL)
 787  786                                  dmu_return_arcbuf(abuf);
 788  787                          break;
 789  788                  }
 790  789  
 791  790                  /*
 792  791                   * If zfs_range_lock() over-locked we grow the blocksize
 793  792                   * and then reduce the lock range.  This will only happen
 794  793                   * on the first iteration since zfs_range_reduce() will
 795  794                   * shrink down r_len to the appropriate size.
 796  795                   */
 797  796                  if (rl->r_len == UINT64_MAX) {
 798  797                          uint64_t new_blksz;
 799  798  
 800  799                          if (zp->z_blksz > max_blksz) {
 801  800                                  ASSERT(!ISP2(zp->z_blksz));
 802  801                                  new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE);
 803  802                          } else {
 804  803                                  new_blksz = MIN(end_size, max_blksz);
 805  804                          }
 806  805                          zfs_grow_blocksize(zp, new_blksz, tx);
 807  806                          zfs_range_reduce(rl, woff, n);
 808  807                  }
 809  808  
 810  809                  /*
 811  810                   * XXX - should we really limit each write to z_max_blksz?
 812  811                   * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
 813  812                   */
 814  813                  nbytes = MIN(n, max_blksz - P2PHASE(woff, max_blksz));
 815  814  
 816  815                  if (abuf == NULL) {
 817  816                          tx_bytes = uio->uio_resid;
 818  817                          error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
 819  818                              uio, nbytes, tx);
 820  819                          tx_bytes -= uio->uio_resid;
 821  820                  } else {
 822  821                          tx_bytes = nbytes;
 823  822                          ASSERT(xuio == NULL || tx_bytes == aiov->iov_len);
 824  823                          /*
 825  824                           * If this is not a full block write, but we are
 826  825                           * extending the file past EOF and this data starts
 827  826                           * block-aligned, use assign_arcbuf().  Otherwise,
 828  827                           * write via dmu_write().
 829  828                           */
 830  829                          if (tx_bytes < max_blksz && (!write_eof ||
 831  830                              aiov->iov_base != abuf->b_data)) {
 832  831                                  ASSERT(xuio);
 833  832                                  dmu_write(zfsvfs->z_os, zp->z_id, woff,
 834  833                                      aiov->iov_len, aiov->iov_base, tx);
 835  834                                  dmu_return_arcbuf(abuf);
 836  835                                  xuio_stat_wbuf_copied();
 837  836                          } else {
 838  837                                  ASSERT(xuio || tx_bytes == max_blksz);
 839  838                                  dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
 840  839                                      woff, abuf, tx);
 841  840                          }
 842  841                          ASSERT(tx_bytes <= uio->uio_resid);
 843  842                          uioskip(uio, tx_bytes);
 844  843                  }
 845  844                  if (tx_bytes && vn_has_cached_data(vp)) {
 846  845                          update_pages(vp, woff,
 847  846                              tx_bytes, zfsvfs->z_os, zp->z_id);
 848  847                  }
 849  848  
 850  849                  /*
 851  850                   * If we made no progress, we're done.  If we made even
 852  851                   * partial progress, update the znode and ZIL accordingly.
 853  852                   */
 854  853                  if (tx_bytes == 0) {
 855  854                          (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
 856  855                              (void *)&zp->z_size, sizeof (uint64_t), tx);
 857  856                          dmu_tx_commit(tx);
 858  857                          ASSERT(error != 0);
 859  858                          break;
 860  859                  }
 861  860  
 862  861                  /*
 863  862                   * Clear Set-UID/Set-GID bits on successful write if not
 864  863                   * privileged and at least one of the excute bits is set.
 865  864                   *
 866  865                   * It would be nice to to this after all writes have
 867  866                   * been done, but that would still expose the ISUID/ISGID
 868  867                   * to another app after the partial write is committed.
 869  868                   *
 870  869                   * Note: we don't call zfs_fuid_map_id() here because
 871  870                   * user 0 is not an ephemeral uid.
 872  871                   */
 873  872                  mutex_enter(&zp->z_acl_lock);
 874  873                  if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) |
 875  874                      (S_IXUSR >> 6))) != 0 &&
 876  875                      (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
 877  876                      secpolicy_vnode_setid_retain(cr,
 878  877                      (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) {
 879  878                          uint64_t newmode;
 880  879                          zp->z_mode &= ~(S_ISUID | S_ISGID);
 881  880                          newmode = zp->z_mode;
 882  881                          (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
 883  882                              (void *)&newmode, sizeof (uint64_t), tx);
 884  883                  }
 885  884                  mutex_exit(&zp->z_acl_lock);
 886  885  
 887  886                  zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
 888  887                      B_TRUE);
 889  888  
 890  889                  /*
 891  890                   * Update the file size (zp_size) if it has changed;
 892  891                   * account for possible concurrent updates.
 893  892                   */
 894  893                  while ((end_size = zp->z_size) < uio->uio_loffset) {
 895  894                          (void) atomic_cas_64(&zp->z_size, end_size,
 896  895                              uio->uio_loffset);
 897  896                          ASSERT(error == 0);
 898  897                  }
 899  898                  /*
 900  899                   * If we are replaying and eof is non zero then force
 901  900                   * the file size to the specified eof. Note, there's no
 902  901                   * concurrency during replay.
 903  902                   */
 904  903                  if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
 905  904                          zp->z_size = zfsvfs->z_replay_eof;
 906  905  
 907  906                  error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 908  907  
 909  908                  zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
 910  909                  dmu_tx_commit(tx);
 911  910  
 912  911                  if (error != 0)
 913  912                          break;
 914  913                  ASSERT(tx_bytes == nbytes);
 915  914                  n -= nbytes;
 916  915  
 917  916                  if (!xuio && n > 0)
 918  917                          uio_prefaultpages(MIN(n, max_blksz), uio);
 919  918          }
 920  919  
 921  920          zfs_range_unlock(rl);
 922  921  
 923  922          /*
 924  923           * If we're in replay mode, or we made no progress, return error.
 925  924           * Otherwise, it's at least a partial write, so it's successful.
 926  925           */
 927  926          if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
 928  927                  ZFS_EXIT(zfsvfs);
 929  928                  return (error);
 930  929          }
 931  930  
 932  931          if (ioflag & (FSYNC | FDSYNC) ||
 933  932              zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 934  933                  zil_commit(zilog, zp->z_id);
 935  934  
 936  935          ZFS_EXIT(zfsvfs);
 937  936          return (0);
 938  937  }
 939  938  
 940  939  void
 941  940  zfs_get_done(zgd_t *zgd, int error)
 942  941  {
 943  942          znode_t *zp = zgd->zgd_private;
 944  943          objset_t *os = zp->z_zfsvfs->z_os;
 945  944  
 946  945          if (zgd->zgd_db)
 947  946                  dmu_buf_rele(zgd->zgd_db, zgd);
 948  947  
 949  948          zfs_range_unlock(zgd->zgd_rl);
 950  949  
 951  950          /*
 952  951           * Release the vnode asynchronously as we currently have the
 953  952           * txg stopped from syncing.
 954  953           */
 955  954          VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
 956  955  
 957  956          if (error == 0 && zgd->zgd_bp)
 958  957                  zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
 959  958  
 960  959          kmem_free(zgd, sizeof (zgd_t));
 961  960  }
 962  961  
 963  962  #ifdef DEBUG
 964  963  static int zil_fault_io = 0;
 965  964  #endif
 966  965  
 967  966  /*
 968  967   * Get data to generate a TX_WRITE intent log record.
 969  968   */
 970  969  int
 971  970  zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
 972  971  {
 973  972          zfsvfs_t *zfsvfs = arg;
 974  973          objset_t *os = zfsvfs->z_os;
 975  974          znode_t *zp;
 976  975          uint64_t object = lr->lr_foid;
 977  976          uint64_t offset = lr->lr_offset;
 978  977          uint64_t size = lr->lr_length;
 979  978          blkptr_t *bp = &lr->lr_blkptr;
 980  979          dmu_buf_t *db;
 981  980          zgd_t *zgd;
 982  981          int error = 0;
 983  982  
 984  983          ASSERT(zio != NULL);
 985  984          ASSERT(size != 0);
 986  985  
 987  986          /*
 988  987           * Nothing to do if the file has been removed
 989  988           */
 990  989          if (zfs_zget(zfsvfs, object, &zp) != 0)
 991  990                  return (SET_ERROR(ENOENT));
 992  991          if (zp->z_unlinked) {
 993  992                  /*
 994  993                   * Release the vnode asynchronously as we currently have the
 995  994                   * txg stopped from syncing.
 996  995                   */
 997  996                  VN_RELE_ASYNC(ZTOV(zp),
 998  997                      dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
 999  998                  return (SET_ERROR(ENOENT));
1000  999          }
1001 1000  
1002 1001          zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
1003 1002          zgd->zgd_zilog = zfsvfs->z_log;
1004 1003          zgd->zgd_private = zp;
1005 1004  
1006 1005          /*
1007 1006           * Write records come in two flavors: immediate and indirect.
1008 1007           * For small writes it's cheaper to store the data with the
1009 1008           * log record (immediate); for large writes it's cheaper to
1010 1009           * sync the data and get a pointer to it (indirect) so that
1011 1010           * we don't have to write the data twice.
1012 1011           */
1013 1012          if (buf != NULL) { /* immediate write */
1014 1013                  zgd->zgd_rl = zfs_range_lock(zp, offset, size, RL_READER);
1015 1014                  /* test for truncation needs to be done while range locked */
1016 1015                  if (offset >= zp->z_size) {
1017 1016                          error = SET_ERROR(ENOENT);
1018 1017                  } else {
1019 1018                          error = dmu_read(os, object, offset, size, buf,
1020 1019                              DMU_READ_NO_PREFETCH);
1021 1020                  }
1022 1021                  ASSERT(error == 0 || error == ENOENT);
1023 1022          } else { /* indirect write */
1024 1023                  /*
1025 1024                   * Have to lock the whole block to ensure when it's
1026 1025                   * written out and it's checksum is being calculated
1027 1026                   * that no one can change the data. We need to re-check
1028 1027                   * blocksize after we get the lock in case it's changed!
1029 1028                   */
1030 1029                  for (;;) {
1031 1030                          uint64_t blkoff;
1032 1031                          size = zp->z_blksz;
1033 1032                          blkoff = ISP2(size) ? P2PHASE(offset, size) : offset;
1034 1033                          offset -= blkoff;
1035 1034                          zgd->zgd_rl = zfs_range_lock(zp, offset, size,
1036 1035                              RL_READER);
1037 1036                          if (zp->z_blksz == size)
1038 1037                                  break;
1039 1038                          offset += blkoff;
1040 1039                          zfs_range_unlock(zgd->zgd_rl);
1041 1040                  }
1042 1041                  /* test for truncation needs to be done while range locked */
1043 1042                  if (lr->lr_offset >= zp->z_size)
1044 1043                          error = SET_ERROR(ENOENT);
1045 1044  #ifdef DEBUG
1046 1045                  if (zil_fault_io) {
1047 1046                          error = SET_ERROR(EIO);
1048 1047                          zil_fault_io = 0;
1049 1048                  }
1050 1049  #endif
1051 1050                  if (error == 0)
1052 1051                          error = dmu_buf_hold(os, object, offset, zgd, &db,
1053 1052                              DMU_READ_NO_PREFETCH);
1054 1053  
1055 1054                  if (error == 0) {
1056 1055                          blkptr_t *obp = dmu_buf_get_blkptr(db);
1057 1056                          if (obp) {
1058 1057                                  ASSERT(BP_IS_HOLE(bp));
1059 1058                                  *bp = *obp;
1060 1059                          }
1061 1060  
1062 1061                          zgd->zgd_db = db;
1063 1062                          zgd->zgd_bp = bp;
1064 1063  
1065 1064                          ASSERT(db->db_offset == offset);
1066 1065                          ASSERT(db->db_size == size);
1067 1066  
1068 1067                          error = dmu_sync(zio, lr->lr_common.lrc_txg,
1069 1068                              zfs_get_done, zgd);
1070 1069                          ASSERT(error || lr->lr_length <= zp->z_blksz);
1071 1070  
1072 1071                          /*
1073 1072                           * On success, we need to wait for the write I/O
1074 1073                           * initiated by dmu_sync() to complete before we can
1075 1074                           * release this dbuf.  We will finish everything up
1076 1075                           * in the zfs_get_done() callback.
1077 1076                           */
1078 1077                          if (error == 0)
1079 1078                                  return (0);
1080 1079  
1081 1080                          if (error == EALREADY) {
1082 1081                                  lr->lr_common.lrc_txtype = TX_WRITE2;
1083 1082                                  error = 0;
1084 1083                          }
1085 1084                  }
1086 1085          }
1087 1086  
1088 1087          zfs_get_done(zgd, error);
1089 1088  
1090 1089          return (error);
1091 1090  }
1092 1091  
1093 1092  /*ARGSUSED*/
1094 1093  static int
1095 1094  zfs_access(vnode_t *vp, int mode, int flag, cred_t *cr,
1096 1095      caller_context_t *ct)
1097 1096  {
1098 1097          znode_t *zp = VTOZ(vp);
1099 1098          zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1100 1099          int error;
1101 1100  
1102 1101          ZFS_ENTER(zfsvfs);
1103 1102          ZFS_VERIFY_ZP(zp);
1104 1103  
1105 1104          if (flag & V_ACE_MASK)
1106 1105                  error = zfs_zaccess(zp, mode, flag, B_FALSE, cr);
1107 1106          else
1108 1107                  error = zfs_zaccess_rwx(zp, mode, flag, cr);
1109 1108  
1110 1109          ZFS_EXIT(zfsvfs);
1111 1110          return (error);
1112 1111  }
1113 1112  
1114 1113  /*
1115 1114   * If vnode is for a device return a specfs vnode instead.
1116 1115   */
1117 1116  static int
1118 1117  specvp_check(vnode_t **vpp, cred_t *cr)
1119 1118  {
1120 1119          int error = 0;
1121 1120  
1122 1121          if (IS_DEVVP(*vpp)) {
1123 1122                  struct vnode *svp;
1124 1123  
1125 1124                  svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
1126 1125                  VN_RELE(*vpp);
1127 1126                  if (svp == NULL)
1128 1127                          error = SET_ERROR(ENOSYS);
1129 1128                  *vpp = svp;
1130 1129          }
1131 1130          return (error);
1132 1131  }
1133 1132  
1134 1133  
1135 1134  /*
1136 1135   * Lookup an entry in a directory, or an extended attribute directory.
1137 1136   * If it exists, return a held vnode reference for it.
1138 1137   *
1139 1138   *      IN:     dvp     - vnode of directory to search.
1140 1139   *              nm      - name of entry to lookup.

↓ open down ↓

550 lines elided

↑ open up ↑

1141 1140   *              pnp     - full pathname to lookup [UNUSED].
1142 1141   *              flags   - LOOKUP_XATTR set if looking for an attribute.
1143 1142   *              rdir    - root directory vnode [UNUSED].
1144 1143   *              cr      - credentials of caller.
1145 1144   *              ct      - caller context
1146 1145   *              direntflags - directory lookup flags
1147 1146   *              realpnp - returned pathname.
1148 1147   *
1149 1148   *      OUT:    vpp     - vnode of located entry, NULL if not found.
1150 1149   *
1151      - *      RETURN: 0 if success
1152      - *              error code if failure
     1150 + *      RETURN: 0 on success, error code on failure.
1153 1151   *
1154 1152   * Timestamps:
1155 1153   *      NA
1156 1154   */
1157 1155  /* ARGSUSED */
1158 1156  static int
1159 1157  zfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
1160 1158      int flags, vnode_t *rdir, cred_t *cr,  caller_context_t *ct,
1161 1159      int *direntflags, pathname_t *realpnp)
1162 1160  {

1163 1161          znode_t *zdp = VTOZ(dvp);
1164 1162          zfsvfs_t *zfsvfs = zdp->z_zfsvfs;
1165 1163          int     error = 0;
1166 1164  
1167 1165          /* fast path */
1168 1166          if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) {
1169 1167  
1170 1168                  if (dvp->v_type != VDIR) {
1171 1169                          return (SET_ERROR(ENOTDIR));
1172 1170                  } else if (zdp->z_sa_hdl == NULL) {
1173 1171                          return (SET_ERROR(EIO));
1174 1172                  }
1175 1173  
1176 1174                  if (nm[0] == 0 || (nm[0] == '.' && nm[1] == '\0')) {
1177 1175                          error = zfs_fastaccesschk_execute(zdp, cr);
1178 1176                          if (!error) {
1179 1177                                  *vpp = dvp;
1180 1178                                  VN_HOLD(*vpp);
1181 1179                                  return (0);
1182 1180                          }
1183 1181                          return (error);
1184 1182                  } else {
1185 1183                          vnode_t *tvp = dnlc_lookup(dvp, nm);
1186 1184  
1187 1185                          if (tvp) {
1188 1186                                  error = zfs_fastaccesschk_execute(zdp, cr);
1189 1187                                  if (error) {
1190 1188                                          VN_RELE(tvp);
1191 1189                                          return (error);
1192 1190                                  }
1193 1191                                  if (tvp == DNLC_NO_VNODE) {
1194 1192                                          VN_RELE(tvp);
1195 1193                                          return (SET_ERROR(ENOENT));
1196 1194                                  } else {
1197 1195                                          *vpp = tvp;
1198 1196                                          return (specvp_check(vpp, cr));
1199 1197                                  }
1200 1198                          }
1201 1199                  }
1202 1200          }
1203 1201  
1204 1202          DTRACE_PROBE2(zfs__fastpath__lookup__miss, vnode_t *, dvp, char *, nm);
1205 1203  
1206 1204          ZFS_ENTER(zfsvfs);
1207 1205          ZFS_VERIFY_ZP(zdp);
1208 1206  
1209 1207          *vpp = NULL;
1210 1208  
1211 1209          if (flags & LOOKUP_XATTR) {
1212 1210                  /*
1213 1211                   * If the xattr property is off, refuse the lookup request.
1214 1212                   */
1215 1213                  if (!(zfsvfs->z_vfs->vfs_flag & VFS_XATTR)) {
1216 1214                          ZFS_EXIT(zfsvfs);
1217 1215                          return (SET_ERROR(EINVAL));
1218 1216                  }
1219 1217  
1220 1218                  /*
1221 1219                   * We don't allow recursive attributes..
1222 1220                   * Maybe someday we will.
1223 1221                   */
1224 1222                  if (zdp->z_pflags & ZFS_XATTR) {
1225 1223                          ZFS_EXIT(zfsvfs);
1226 1224                          return (SET_ERROR(EINVAL));
1227 1225                  }
1228 1226  
1229 1227                  if (error = zfs_get_xattrdir(VTOZ(dvp), vpp, cr, flags)) {
1230 1228                          ZFS_EXIT(zfsvfs);
1231 1229                          return (error);
1232 1230                  }
1233 1231  
1234 1232                  /*
1235 1233                   * Do we have permission to get into attribute directory?
1236 1234                   */
1237 1235  
1238 1236                  if (error = zfs_zaccess(VTOZ(*vpp), ACE_EXECUTE, 0,
1239 1237                      B_FALSE, cr)) {
1240 1238                          VN_RELE(*vpp);
1241 1239                          *vpp = NULL;
1242 1240                  }
1243 1241  
1244 1242                  ZFS_EXIT(zfsvfs);
1245 1243                  return (error);
1246 1244          }
1247 1245  
1248 1246          if (dvp->v_type != VDIR) {
1249 1247                  ZFS_EXIT(zfsvfs);
1250 1248                  return (SET_ERROR(ENOTDIR));
1251 1249          }
1252 1250  
1253 1251          /*
1254 1252           * Check accessibility of directory.
1255 1253           */
1256 1254  
1257 1255          if (error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr)) {
1258 1256                  ZFS_EXIT(zfsvfs);
1259 1257                  return (error);
1260 1258          }
1261 1259  
1262 1260          if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
1263 1261              NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1264 1262                  ZFS_EXIT(zfsvfs);
1265 1263                  return (SET_ERROR(EILSEQ));
1266 1264          }
1267 1265  
1268 1266          error = zfs_dirlook(zdp, nm, vpp, flags, direntflags, realpnp);
1269 1267          if (error == 0)
1270 1268                  error = specvp_check(vpp, cr);
1271 1269  
1272 1270          ZFS_EXIT(zfsvfs);
1273 1271          return (error);
1274 1272  }
1275 1273  
1276 1274  /*
1277 1275   * Attempt to create a new entry in a directory.  If the entry
1278 1276   * already exists, truncate the file if permissible, else return
1279 1277   * an error.  Return the vp of the created or trunc'd file.
1280 1278   *
1281 1279   *      IN:     dvp     - vnode of directory to put new file entry in.
1282 1280   *              name    - name of new file entry.

↓ open down ↓

120 lines elided

↑ open up ↑

1283 1281   *              vap     - attributes of new file.
1284 1282   *              excl    - flag indicating exclusive or non-exclusive mode.
1285 1283   *              mode    - mode to open file with.
1286 1284   *              cr      - credentials of caller.
1287 1285   *              flag    - large file flag [UNUSED].
1288 1286   *              ct      - caller context
1289 1287   *              vsecp   - ACL to be set
1290 1288   *
1291 1289   *      OUT:    vpp     - vnode of created or trunc'd entry.
1292 1290   *
1293      - *      RETURN: 0 if success
1294      - *              error code if failure
     1291 + *      RETURN: 0 on success, error code on failure.
1295 1292   *
1296 1293   * Timestamps:
1297 1294   *      dvp - ctime|mtime updated if new entry created
1298 1295   *       vp - ctime|mtime always, atime if new
1299 1296   */
1300 1297  
1301 1298  /* ARGSUSED */
1302 1299  static int
1303 1300  zfs_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl,
1304 1301      int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,

1305 1302      vsecattr_t *vsecp)
1306 1303  {
1307 1304          znode_t         *zp, *dzp = VTOZ(dvp);
1308 1305          zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
1309 1306          zilog_t         *zilog;
1310 1307          objset_t        *os;
1311 1308          zfs_dirlock_t   *dl;
1312 1309          dmu_tx_t        *tx;
1313 1310          int             error;
1314 1311          ksid_t          *ksid;
1315 1312          uid_t           uid;
1316 1313          gid_t           gid = crgetgid(cr);
1317 1314          zfs_acl_ids_t   acl_ids;
1318 1315          boolean_t       fuid_dirtied;
1319 1316          boolean_t       have_acl = B_FALSE;
1320 1317  
1321 1318          /*
1322 1319           * If we have an ephemeral id, ACL, or XVATTR then
1323 1320           * make sure file system is at proper version
1324 1321           */
1325 1322  
1326 1323          ksid = crgetsid(cr, KSID_OWNER);
1327 1324          if (ksid)
1328 1325                  uid = ksid_getid(ksid);
1329 1326          else
1330 1327                  uid = crgetuid(cr);
1331 1328  
1332 1329          if (zfsvfs->z_use_fuids == B_FALSE &&
1333 1330              (vsecp || (vap->va_mask & AT_XVATTR) ||
1334 1331              IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1335 1332                  return (SET_ERROR(EINVAL));
1336 1333  
1337 1334          ZFS_ENTER(zfsvfs);
1338 1335          ZFS_VERIFY_ZP(dzp);
1339 1336          os = zfsvfs->z_os;
1340 1337          zilog = zfsvfs->z_log;
1341 1338  
1342 1339          if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
1343 1340              NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1344 1341                  ZFS_EXIT(zfsvfs);
1345 1342                  return (SET_ERROR(EILSEQ));
1346 1343          }
1347 1344  
1348 1345          if (vap->va_mask & AT_XVATTR) {
1349 1346                  if ((error = secpolicy_xvattr((xvattr_t *)vap,
1350 1347                      crgetuid(cr), cr, vap->va_type)) != 0) {
1351 1348                          ZFS_EXIT(zfsvfs);
1352 1349                          return (error);
1353 1350                  }
1354 1351          }
1355 1352  top:
1356 1353          *vpp = NULL;
1357 1354  
1358 1355          if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr))
1359 1356                  vap->va_mode &= ~VSVTX;
1360 1357  
1361 1358          if (*name == '\0') {
1362 1359                  /*
1363 1360                   * Null component name refers to the directory itself.
1364 1361                   */
1365 1362                  VN_HOLD(dvp);
1366 1363                  zp = dzp;
1367 1364                  dl = NULL;
1368 1365                  error = 0;
1369 1366          } else {
1370 1367                  /* possible VN_HOLD(zp) */
1371 1368                  int zflg = 0;
1372 1369  
1373 1370                  if (flag & FIGNORECASE)
1374 1371                          zflg |= ZCILOOK;
1375 1372  
1376 1373                  error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
1377 1374                      NULL, NULL);
1378 1375                  if (error) {
1379 1376                          if (have_acl)
1380 1377                                  zfs_acl_ids_free(&acl_ids);
1381 1378                          if (strcmp(name, "..") == 0)
1382 1379                                  error = SET_ERROR(EISDIR);
1383 1380                          ZFS_EXIT(zfsvfs);
1384 1381                          return (error);
1385 1382                  }
1386 1383          }
1387 1384  
1388 1385          if (zp == NULL) {
1389 1386                  uint64_t txtype;
1390 1387  
1391 1388                  /*
1392 1389                   * Create a new file object and update the directory
1393 1390                   * to reference it.
1394 1391                   */
1395 1392                  if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
1396 1393                          if (have_acl)
1397 1394                                  zfs_acl_ids_free(&acl_ids);
1398 1395                          goto out;
1399 1396                  }
1400 1397  
1401 1398                  /*
1402 1399                   * We only support the creation of regular files in
1403 1400                   * extended attribute directories.
1404 1401                   */
1405 1402  
1406 1403                  if ((dzp->z_pflags & ZFS_XATTR) &&
1407 1404                      (vap->va_type != VREG)) {
1408 1405                          if (have_acl)
1409 1406                                  zfs_acl_ids_free(&acl_ids);
1410 1407                          error = SET_ERROR(EINVAL);
1411 1408                          goto out;
1412 1409                  }
1413 1410  
1414 1411                  if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
1415 1412                      cr, vsecp, &acl_ids)) != 0)
1416 1413                          goto out;
1417 1414                  have_acl = B_TRUE;
1418 1415  
1419 1416                  if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
1420 1417                          zfs_acl_ids_free(&acl_ids);
1421 1418                          error = SET_ERROR(EDQUOT);
1422 1419                          goto out;
1423 1420                  }
1424 1421  
1425 1422                  tx = dmu_tx_create(os);
1426 1423  
1427 1424                  dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1428 1425                      ZFS_SA_BASE_ATTR_SIZE);
1429 1426  
1430 1427                  fuid_dirtied = zfsvfs->z_fuid_dirty;
1431 1428                  if (fuid_dirtied)
1432 1429                          zfs_fuid_txhold(zfsvfs, tx);
1433 1430                  dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
1434 1431                  dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
1435 1432                  if (!zfsvfs->z_use_sa &&
1436 1433                      acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1437 1434                          dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
1438 1435                              0, acl_ids.z_aclp->z_acl_bytes);
1439 1436                  }
1440 1437                  error = dmu_tx_assign(tx, TXG_NOWAIT);
1441 1438                  if (error) {
1442 1439                          zfs_dirent_unlock(dl);
1443 1440                          if (error == ERESTART) {
1444 1441                                  dmu_tx_wait(tx);
1445 1442                                  dmu_tx_abort(tx);
1446 1443                                  goto top;
1447 1444                          }
1448 1445                          zfs_acl_ids_free(&acl_ids);
1449 1446                          dmu_tx_abort(tx);
1450 1447                          ZFS_EXIT(zfsvfs);
1451 1448                          return (error);
1452 1449                  }
1453 1450                  zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1454 1451  
1455 1452                  if (fuid_dirtied)
1456 1453                          zfs_fuid_sync(zfsvfs, tx);
1457 1454  
1458 1455                  (void) zfs_link_create(dl, zp, tx, ZNEW);
1459 1456                  txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
1460 1457                  if (flag & FIGNORECASE)
1461 1458                          txtype |= TX_CI;
1462 1459                  zfs_log_create(zilog, tx, txtype, dzp, zp, name,
1463 1460                      vsecp, acl_ids.z_fuidp, vap);
1464 1461                  zfs_acl_ids_free(&acl_ids);
1465 1462                  dmu_tx_commit(tx);
1466 1463          } else {
1467 1464                  int aflags = (flag & FAPPEND) ? V_APPEND : 0;
1468 1465  
1469 1466                  if (have_acl)
1470 1467                          zfs_acl_ids_free(&acl_ids);
1471 1468                  have_acl = B_FALSE;
1472 1469  
1473 1470                  /*
1474 1471                   * A directory entry already exists for this name.
1475 1472                   */
1476 1473                  /*
1477 1474                   * Can't truncate an existing file if in exclusive mode.
1478 1475                   */
1479 1476                  if (excl == EXCL) {
1480 1477                          error = SET_ERROR(EEXIST);
1481 1478                          goto out;
1482 1479                  }
1483 1480                  /*
1484 1481                   * Can't open a directory for writing.
1485 1482                   */
1486 1483                  if ((ZTOV(zp)->v_type == VDIR) && (mode & S_IWRITE)) {
1487 1484                          error = SET_ERROR(EISDIR);
1488 1485                          goto out;
1489 1486                  }
1490 1487                  /*
1491 1488                   * Verify requested access to file.
1492 1489                   */
1493 1490                  if (mode && (error = zfs_zaccess_rwx(zp, mode, aflags, cr))) {
1494 1491                          goto out;
1495 1492                  }
1496 1493  
1497 1494                  mutex_enter(&dzp->z_lock);
1498 1495                  dzp->z_seq++;
1499 1496                  mutex_exit(&dzp->z_lock);
1500 1497  
1501 1498                  /*
1502 1499                   * Truncate regular files if requested.
1503 1500                   */
1504 1501                  if ((ZTOV(zp)->v_type == VREG) &&
1505 1502                      (vap->va_mask & AT_SIZE) && (vap->va_size == 0)) {
1506 1503                          /* we can't hold any locks when calling zfs_freesp() */
1507 1504                          zfs_dirent_unlock(dl);
1508 1505                          dl = NULL;
1509 1506                          error = zfs_freesp(zp, 0, 0, mode, TRUE);
1510 1507                          if (error == 0) {
1511 1508                                  vnevent_create(ZTOV(zp), ct);
1512 1509                          }
1513 1510                  }
1514 1511          }
1515 1512  out:
1516 1513  
1517 1514          if (dl)
1518 1515                  zfs_dirent_unlock(dl);
1519 1516  
1520 1517          if (error) {
1521 1518                  if (zp)
1522 1519                          VN_RELE(ZTOV(zp));
1523 1520          } else {
1524 1521                  *vpp = ZTOV(zp);
1525 1522                  error = specvp_check(vpp, cr);
1526 1523          }
1527 1524  
1528 1525          if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1529 1526                  zil_commit(zilog, 0);
1530 1527  
1531 1528          ZFS_EXIT(zfsvfs);
1532 1529          return (error);
1533 1530  }

↓ open down ↓

229 lines elided

↑ open up ↑

1534 1531  
1535 1532  /*
1536 1533   * Remove an entry from a directory.
1537 1534   *
1538 1535   *      IN:     dvp     - vnode of directory to remove entry from.
1539 1536   *              name    - name of entry to remove.
1540 1537   *              cr      - credentials of caller.
1541 1538   *              ct      - caller context
1542 1539   *              flags   - case flags
1543 1540   *
1544      - *      RETURN: 0 if success
1545      - *              error code if failure
     1541 + *      RETURN: 0 on success, error code on failure.
1546 1542   *
1547 1543   * Timestamps:
1548 1544   *      dvp - ctime|mtime
1549 1545   *       vp - ctime (if nlink > 0)
1550 1546   */
1551 1547  
1552 1548  uint64_t null_xattr = 0;
1553 1549  
1554 1550  /*ARGSUSED*/
1555 1551  static int

1556 1552  zfs_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct,
1557 1553      int flags)
1558 1554  {
1559 1555          znode_t         *zp, *dzp = VTOZ(dvp);
1560 1556          znode_t         *xzp;
1561 1557          vnode_t         *vp;
1562 1558          zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
1563 1559          zilog_t         *zilog;
1564 1560          uint64_t        acl_obj, xattr_obj;
1565 1561          uint64_t        xattr_obj_unlinked = 0;
1566 1562          uint64_t        obj = 0;
1567 1563          zfs_dirlock_t   *dl;
1568 1564          dmu_tx_t        *tx;
1569 1565          boolean_t       may_delete_now, delete_now = FALSE;
1570 1566          boolean_t       unlinked, toobig = FALSE;
1571 1567          uint64_t        txtype;
1572 1568          pathname_t      *realnmp = NULL;
1573 1569          pathname_t      realnm;
1574 1570          int             error;
1575 1571          int             zflg = ZEXISTS;
1576 1572  
1577 1573          ZFS_ENTER(zfsvfs);
1578 1574          ZFS_VERIFY_ZP(dzp);
1579 1575          zilog = zfsvfs->z_log;
1580 1576  
1581 1577          if (flags & FIGNORECASE) {
1582 1578                  zflg |= ZCILOOK;
1583 1579                  pn_alloc(&realnm);
1584 1580                  realnmp = &realnm;
1585 1581          }
1586 1582  
1587 1583  top:
1588 1584          xattr_obj = 0;
1589 1585          xzp = NULL;
1590 1586          /*
1591 1587           * Attempt to lock directory; fail if entry doesn't exist.
1592 1588           */
1593 1589          if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
1594 1590              NULL, realnmp)) {
1595 1591                  if (realnmp)
1596 1592                          pn_free(realnmp);
1597 1593                  ZFS_EXIT(zfsvfs);
1598 1594                  return (error);
1599 1595          }
1600 1596  
1601 1597          vp = ZTOV(zp);
1602 1598  
1603 1599          if (error = zfs_zaccess_delete(dzp, zp, cr)) {
1604 1600                  goto out;
1605 1601          }
1606 1602  
1607 1603          /*
1608 1604           * Need to use rmdir for removing directories.
1609 1605           */
1610 1606          if (vp->v_type == VDIR) {
1611 1607                  error = SET_ERROR(EPERM);
1612 1608                  goto out;
1613 1609          }
1614 1610  
1615 1611          vnevent_remove(vp, dvp, name, ct);
1616 1612  
1617 1613          if (realnmp)
1618 1614                  dnlc_remove(dvp, realnmp->pn_buf);
1619 1615          else
1620 1616                  dnlc_remove(dvp, name);
1621 1617  
1622 1618          mutex_enter(&vp->v_lock);
1623 1619          may_delete_now = vp->v_count == 1 && !vn_has_cached_data(vp);
1624 1620          mutex_exit(&vp->v_lock);
1625 1621  
1626 1622          /*
1627 1623           * We may delete the znode now, or we may put it in the unlinked set;
1628 1624           * it depends on whether we're the last link, and on whether there are
1629 1625           * other holds on the vnode.  So we dmu_tx_hold() the right things to
1630 1626           * allow for either case.
1631 1627           */
1632 1628          obj = zp->z_id;
1633 1629          tx = dmu_tx_create(zfsvfs->z_os);
1634 1630          dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
1635 1631          dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1636 1632          zfs_sa_upgrade_txholds(tx, zp);
1637 1633          zfs_sa_upgrade_txholds(tx, dzp);
1638 1634          if (may_delete_now) {
1639 1635                  toobig =
1640 1636                      zp->z_size > zp->z_blksz * DMU_MAX_DELETEBLKCNT;
1641 1637                  /* if the file is too big, only hold_free a token amount */
1642 1638                  dmu_tx_hold_free(tx, zp->z_id, 0,
1643 1639                      (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END));
1644 1640          }
1645 1641  
1646 1642          /* are there any extended attributes? */
1647 1643          error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
1648 1644              &xattr_obj, sizeof (xattr_obj));
1649 1645          if (error == 0 && xattr_obj) {
1650 1646                  error = zfs_zget(zfsvfs, xattr_obj, &xzp);
1651 1647                  ASSERT0(error);
1652 1648                  dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
1653 1649                  dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
1654 1650          }
1655 1651  
1656 1652          mutex_enter(&zp->z_lock);
1657 1653          if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now)
1658 1654                  dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
1659 1655          mutex_exit(&zp->z_lock);
1660 1656  
1661 1657          /* charge as an update -- would be nice not to charge at all */
1662 1658          dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
1663 1659  
1664 1660          error = dmu_tx_assign(tx, TXG_NOWAIT);
1665 1661          if (error) {
1666 1662                  zfs_dirent_unlock(dl);
1667 1663                  VN_RELE(vp);
1668 1664                  if (xzp)
1669 1665                          VN_RELE(ZTOV(xzp));
1670 1666                  if (error == ERESTART) {
1671 1667                          dmu_tx_wait(tx);
1672 1668                          dmu_tx_abort(tx);
1673 1669                          goto top;
1674 1670                  }
1675 1671                  if (realnmp)
1676 1672                          pn_free(realnmp);
1677 1673                  dmu_tx_abort(tx);
1678 1674                  ZFS_EXIT(zfsvfs);
1679 1675                  return (error);
1680 1676          }
1681 1677  
1682 1678          /*
1683 1679           * Remove the directory entry.
1684 1680           */
1685 1681          error = zfs_link_destroy(dl, zp, tx, zflg, &unlinked);
1686 1682  
1687 1683          if (error) {
1688 1684                  dmu_tx_commit(tx);
1689 1685                  goto out;
1690 1686          }
1691 1687  
1692 1688          if (unlinked) {
1693 1689  
1694 1690                  /*
1695 1691                   * Hold z_lock so that we can make sure that the ACL obj
1696 1692                   * hasn't changed.  Could have been deleted due to
1697 1693                   * zfs_sa_upgrade().
1698 1694                   */
1699 1695                  mutex_enter(&zp->z_lock);
1700 1696                  mutex_enter(&vp->v_lock);
1701 1697                  (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
1702 1698                      &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
1703 1699                  delete_now = may_delete_now && !toobig &&
1704 1700                      vp->v_count == 1 && !vn_has_cached_data(vp) &&
1705 1701                      xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) ==
1706 1702                      acl_obj;
1707 1703                  mutex_exit(&vp->v_lock);
1708 1704          }
1709 1705  
1710 1706          if (delete_now) {
1711 1707                  if (xattr_obj_unlinked) {
1712 1708                          ASSERT3U(xzp->z_links, ==, 2);
1713 1709                          mutex_enter(&xzp->z_lock);
1714 1710                          xzp->z_unlinked = 1;
1715 1711                          xzp->z_links = 0;
1716 1712                          error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
1717 1713                              &xzp->z_links, sizeof (xzp->z_links), tx);
1718 1714                          ASSERT3U(error,  ==,  0);
1719 1715                          mutex_exit(&xzp->z_lock);
1720 1716                          zfs_unlinked_add(xzp, tx);
1721 1717  
1722 1718                          if (zp->z_is_sa)
1723 1719                                  error = sa_remove(zp->z_sa_hdl,
1724 1720                                      SA_ZPL_XATTR(zfsvfs), tx);
1725 1721                          else
1726 1722                                  error = sa_update(zp->z_sa_hdl,
1727 1723                                      SA_ZPL_XATTR(zfsvfs), &null_xattr,
1728 1724                                      sizeof (uint64_t), tx);
1729 1725                          ASSERT0(error);
1730 1726                  }
1731 1727                  mutex_enter(&vp->v_lock);
1732 1728                  vp->v_count--;
1733 1729                  ASSERT0(vp->v_count);
1734 1730                  mutex_exit(&vp->v_lock);
1735 1731                  mutex_exit(&zp->z_lock);
1736 1732                  zfs_znode_delete(zp, tx);
1737 1733          } else if (unlinked) {
1738 1734                  mutex_exit(&zp->z_lock);
1739 1735                  zfs_unlinked_add(zp, tx);
1740 1736          }
1741 1737  
1742 1738          txtype = TX_REMOVE;
1743 1739          if (flags & FIGNORECASE)
1744 1740                  txtype |= TX_CI;
1745 1741          zfs_log_remove(zilog, tx, txtype, dzp, name, obj);
1746 1742  
1747 1743          dmu_tx_commit(tx);
1748 1744  out:
1749 1745          if (realnmp)
1750 1746                  pn_free(realnmp);
1751 1747  
1752 1748          zfs_dirent_unlock(dl);
1753 1749  
1754 1750          if (!delete_now)
1755 1751                  VN_RELE(vp);
1756 1752          if (xzp)
1757 1753                  VN_RELE(ZTOV(xzp));
1758 1754  
1759 1755          if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1760 1756                  zil_commit(zilog, 0);
1761 1757  
1762 1758          ZFS_EXIT(zfsvfs);
1763 1759          return (error);
1764 1760  }

↓ open down ↓

209 lines elided

↑ open up ↑

1765 1761  
1766 1762  /*
1767 1763   * Create a new directory and insert it into dvp using the name
1768 1764   * provided.  Return a pointer to the inserted directory.
1769 1765   *
1770 1766   *      IN:     dvp     - vnode of directory to add subdir to.
1771 1767   *              dirname - name of new directory.
1772 1768   *              vap     - attributes of new directory.
1773 1769   *              cr      - credentials of caller.
1774 1770   *              ct      - caller context
     1771 + *              flags   - case flags
1775 1772   *              vsecp   - ACL to be set
1776 1773   *
1777 1774   *      OUT:    vpp     - vnode of created directory.
1778 1775   *
1779      - *      RETURN: 0 if success
1780      - *              error code if failure
     1776 + *      RETURN: 0 on success, error code on failure.
1781 1777   *
1782 1778   * Timestamps:
1783 1779   *      dvp - ctime|mtime updated
1784 1780   *       vp - ctime|mtime|atime updated
1785 1781   */
1786 1782  /*ARGSUSED*/
1787 1783  static int
1788 1784  zfs_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t **vpp, cred_t *cr,
1789 1785      caller_context_t *ct, int flags, vsecattr_t *vsecp)
1790 1786  {

1791 1787          znode_t         *zp, *dzp = VTOZ(dvp);
1792 1788          zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
1793 1789          zilog_t         *zilog;
1794 1790          zfs_dirlock_t   *dl;
1795 1791          uint64_t        txtype;
1796 1792          dmu_tx_t        *tx;
1797 1793          int             error;
1798 1794          int             zf = ZNEW;
1799 1795          ksid_t          *ksid;
1800 1796          uid_t           uid;
1801 1797          gid_t           gid = crgetgid(cr);
1802 1798          zfs_acl_ids_t   acl_ids;
1803 1799          boolean_t       fuid_dirtied;
1804 1800  
1805 1801          ASSERT(vap->va_type == VDIR);
1806 1802  
1807 1803          /*
1808 1804           * If we have an ephemeral id, ACL, or XVATTR then
1809 1805           * make sure file system is at proper version
1810 1806           */
1811 1807  
1812 1808          ksid = crgetsid(cr, KSID_OWNER);
1813 1809          if (ksid)
1814 1810                  uid = ksid_getid(ksid);
1815 1811          else
1816 1812                  uid = crgetuid(cr);
1817 1813          if (zfsvfs->z_use_fuids == B_FALSE &&
1818 1814              (vsecp || (vap->va_mask & AT_XVATTR) ||
1819 1815              IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
1820 1816                  return (SET_ERROR(EINVAL));
1821 1817  
1822 1818          ZFS_ENTER(zfsvfs);
1823 1819          ZFS_VERIFY_ZP(dzp);
1824 1820          zilog = zfsvfs->z_log;
1825 1821  
1826 1822          if (dzp->z_pflags & ZFS_XATTR) {
1827 1823                  ZFS_EXIT(zfsvfs);
1828 1824                  return (SET_ERROR(EINVAL));
1829 1825          }
1830 1826  
1831 1827          if (zfsvfs->z_utf8 && u8_validate(dirname,
1832 1828              strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
1833 1829                  ZFS_EXIT(zfsvfs);
1834 1830                  return (SET_ERROR(EILSEQ));
1835 1831          }
1836 1832          if (flags & FIGNORECASE)
1837 1833                  zf |= ZCILOOK;
1838 1834  
1839 1835          if (vap->va_mask & AT_XVATTR) {
1840 1836                  if ((error = secpolicy_xvattr((xvattr_t *)vap,
1841 1837                      crgetuid(cr), cr, vap->va_type)) != 0) {
1842 1838                          ZFS_EXIT(zfsvfs);
1843 1839                          return (error);
1844 1840                  }
1845 1841          }
1846 1842  
1847 1843          if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
1848 1844              vsecp, &acl_ids)) != 0) {
1849 1845                  ZFS_EXIT(zfsvfs);
1850 1846                  return (error);
1851 1847          }
1852 1848          /*
1853 1849           * First make sure the new directory doesn't exist.
1854 1850           *
1855 1851           * Existence is checked first to make sure we don't return
1856 1852           * EACCES instead of EEXIST which can cause some applications
1857 1853           * to fail.
1858 1854           */
1859 1855  top:
1860 1856          *vpp = NULL;
1861 1857  
1862 1858          if (error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf,
1863 1859              NULL, NULL)) {
1864 1860                  zfs_acl_ids_free(&acl_ids);
1865 1861                  ZFS_EXIT(zfsvfs);
1866 1862                  return (error);
1867 1863          }
1868 1864  
1869 1865          if (error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr)) {
1870 1866                  zfs_acl_ids_free(&acl_ids);
1871 1867                  zfs_dirent_unlock(dl);
1872 1868                  ZFS_EXIT(zfsvfs);
1873 1869                  return (error);
1874 1870          }
1875 1871  
1876 1872          if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
1877 1873                  zfs_acl_ids_free(&acl_ids);
1878 1874                  zfs_dirent_unlock(dl);
1879 1875                  ZFS_EXIT(zfsvfs);
1880 1876                  return (SET_ERROR(EDQUOT));
1881 1877          }
1882 1878  
1883 1879          /*
1884 1880           * Add a new entry to the directory.
1885 1881           */
1886 1882          tx = dmu_tx_create(zfsvfs->z_os);
1887 1883          dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
1888 1884          dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
1889 1885          fuid_dirtied = zfsvfs->z_fuid_dirty;
1890 1886          if (fuid_dirtied)
1891 1887                  zfs_fuid_txhold(zfsvfs, tx);
1892 1888          if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
1893 1889                  dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
1894 1890                      acl_ids.z_aclp->z_acl_bytes);
1895 1891          }
1896 1892  
1897 1893          dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
1898 1894              ZFS_SA_BASE_ATTR_SIZE);
1899 1895  
1900 1896          error = dmu_tx_assign(tx, TXG_NOWAIT);
1901 1897          if (error) {
1902 1898                  zfs_dirent_unlock(dl);
1903 1899                  if (error == ERESTART) {
1904 1900                          dmu_tx_wait(tx);
1905 1901                          dmu_tx_abort(tx);
1906 1902                          goto top;
1907 1903                  }
1908 1904                  zfs_acl_ids_free(&acl_ids);
1909 1905                  dmu_tx_abort(tx);
1910 1906                  ZFS_EXIT(zfsvfs);
1911 1907                  return (error);
1912 1908          }
1913 1909  
1914 1910          /*
1915 1911           * Create new node.
1916 1912           */
1917 1913          zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
1918 1914  
1919 1915          if (fuid_dirtied)
1920 1916                  zfs_fuid_sync(zfsvfs, tx);
1921 1917  
1922 1918          /*
1923 1919           * Now put new name in parent dir.
1924 1920           */
1925 1921          (void) zfs_link_create(dl, zp, tx, ZNEW);
1926 1922  
1927 1923          *vpp = ZTOV(zp);
1928 1924  
1929 1925          txtype = zfs_log_create_txtype(Z_DIR, vsecp, vap);
1930 1926          if (flags & FIGNORECASE)
1931 1927                  txtype |= TX_CI;
1932 1928          zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
1933 1929              acl_ids.z_fuidp, vap);
1934 1930  
1935 1931          zfs_acl_ids_free(&acl_ids);
1936 1932  
1937 1933          dmu_tx_commit(tx);
1938 1934  
1939 1935          zfs_dirent_unlock(dl);
1940 1936  
1941 1937          if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
1942 1938                  zil_commit(zilog, 0);
1943 1939  
1944 1940          ZFS_EXIT(zfsvfs);
1945 1941          return (0);
1946 1942  }
1947 1943  
1948 1944  /*
1949 1945   * Remove a directory subdir entry.  If the current working

↓ open down ↓

159 lines elided

↑ open up ↑

1950 1946   * directory is the same as the subdir to be removed, the
1951 1947   * remove will fail.
1952 1948   *
1953 1949   *      IN:     dvp     - vnode of directory to remove from.
1954 1950   *              name    - name of directory to be removed.
1955 1951   *              cwd     - vnode of current working directory.
1956 1952   *              cr      - credentials of caller.
1957 1953   *              ct      - caller context
1958 1954   *              flags   - case flags
1959 1955   *
1960      - *      RETURN: 0 if success
1961      - *              error code if failure
     1956 + *      RETURN: 0 on success, error code on failure.
1962 1957   *
1963 1958   * Timestamps:
1964 1959   *      dvp - ctime|mtime updated
1965 1960   */
1966 1961  /*ARGSUSED*/
1967 1962  static int
1968 1963  zfs_rmdir(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
1969 1964      caller_context_t *ct, int flags)
1970 1965  {
1971 1966          znode_t         *dzp = VTOZ(dvp);

1972 1967          znode_t         *zp;
1973 1968          vnode_t         *vp;
1974 1969          zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
1975 1970          zilog_t         *zilog;
1976 1971          zfs_dirlock_t   *dl;
1977 1972          dmu_tx_t        *tx;
1978 1973          int             error;
1979 1974          int             zflg = ZEXISTS;
1980 1975  
1981 1976          ZFS_ENTER(zfsvfs);
1982 1977          ZFS_VERIFY_ZP(dzp);
1983 1978          zilog = zfsvfs->z_log;
1984 1979  
1985 1980          if (flags & FIGNORECASE)
1986 1981                  zflg |= ZCILOOK;
1987 1982  top:
1988 1983          zp = NULL;
1989 1984  
1990 1985          /*
1991 1986           * Attempt to lock directory; fail if entry doesn't exist.
1992 1987           */
1993 1988          if (error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
1994 1989              NULL, NULL)) {
1995 1990                  ZFS_EXIT(zfsvfs);
1996 1991                  return (error);
1997 1992          }
1998 1993  
1999 1994          vp = ZTOV(zp);
2000 1995  
2001 1996          if (error = zfs_zaccess_delete(dzp, zp, cr)) {
2002 1997                  goto out;
2003 1998          }
2004 1999  
2005 2000          if (vp->v_type != VDIR) {
2006 2001                  error = SET_ERROR(ENOTDIR);
2007 2002                  goto out;
2008 2003          }
2009 2004  
2010 2005          if (vp == cwd) {
2011 2006                  error = SET_ERROR(EINVAL);
2012 2007                  goto out;
2013 2008          }
2014 2009  
2015 2010          vnevent_rmdir(vp, dvp, name, ct);
2016 2011  
2017 2012          /*
2018 2013           * Grab a lock on the directory to make sure that noone is
2019 2014           * trying to add (or lookup) entries while we are removing it.
2020 2015           */
2021 2016          rw_enter(&zp->z_name_lock, RW_WRITER);
2022 2017  
2023 2018          /*
2024 2019           * Grab a lock on the parent pointer to make sure we play well
2025 2020           * with the treewalk and directory rename code.
2026 2021           */
2027 2022          rw_enter(&zp->z_parent_lock, RW_WRITER);
2028 2023  
2029 2024          tx = dmu_tx_create(zfsvfs->z_os);
2030 2025          dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
2031 2026          dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
2032 2027          dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
2033 2028          zfs_sa_upgrade_txholds(tx, zp);
2034 2029          zfs_sa_upgrade_txholds(tx, dzp);
2035 2030          error = dmu_tx_assign(tx, TXG_NOWAIT);
2036 2031          if (error) {
2037 2032                  rw_exit(&zp->z_parent_lock);
2038 2033                  rw_exit(&zp->z_name_lock);
2039 2034                  zfs_dirent_unlock(dl);
2040 2035                  VN_RELE(vp);
2041 2036                  if (error == ERESTART) {
2042 2037                          dmu_tx_wait(tx);
2043 2038                          dmu_tx_abort(tx);
2044 2039                          goto top;
2045 2040                  }
2046 2041                  dmu_tx_abort(tx);
2047 2042                  ZFS_EXIT(zfsvfs);
2048 2043                  return (error);
2049 2044          }
2050 2045  
2051 2046          error = zfs_link_destroy(dl, zp, tx, zflg, NULL);
2052 2047  
2053 2048          if (error == 0) {
2054 2049                  uint64_t txtype = TX_RMDIR;
2055 2050                  if (flags & FIGNORECASE)
2056 2051                          txtype |= TX_CI;
2057 2052                  zfs_log_remove(zilog, tx, txtype, dzp, name, ZFS_NO_OBJECT);
2058 2053          }
2059 2054  
2060 2055          dmu_tx_commit(tx);
2061 2056  
2062 2057          rw_exit(&zp->z_parent_lock);
2063 2058          rw_exit(&zp->z_name_lock);
2064 2059  out:
2065 2060          zfs_dirent_unlock(dl);
2066 2061  
2067 2062          VN_RELE(vp);
2068 2063

↓ open down ↓

97 lines elided

↑ open up ↑

2069 2064          if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
2070 2065                  zil_commit(zilog, 0);
2071 2066  
2072 2067          ZFS_EXIT(zfsvfs);
2073 2068          return (error);
2074 2069  }
2075 2070  
2076 2071  /*
2077 2072   * Read as many directory entries as will fit into the provided
2078 2073   * buffer from the given directory cursor position (specified in
2079      - * the uio structure.
     2074 + * the uio structure).
2080 2075   *
2081 2076   *      IN:     vp      - vnode of directory to read.
2082 2077   *              uio     - structure supplying read location, range info,
2083 2078   *                        and return buffer.
2084 2079   *              cr      - credentials of caller.
2085 2080   *              ct      - caller context
2086 2081   *              flags   - case flags
2087 2082   *
2088 2083   *      OUT:    uio     - updated offset and range, buffer filled.
2089 2084   *              eofp    - set to true if end-of-file detected.
2090 2085   *
2091      - *      RETURN: 0 if success
2092      - *              error code if failure
     2086 + *      RETURN: 0 on success, error code on failure.
2093 2087   *
2094 2088   * Timestamps:
2095 2089   *      vp - atime updated
2096 2090   *
2097 2091   * Note that the low 4 bits of the cookie returned by zap is always zero.
2098 2092   * This allows us to use the low range for "special" directory entries:
2099 2093   * We use 0 for '.', and 1 for '..'.  If this is the root of the filesystem,
2100 2094   * we use the offset 2 for the '.zfs' directory.
2101 2095   */
2102 2096  /* ARGSUSED */

2103 2097  static int
2104 2098  zfs_readdir(vnode_t *vp, uio_t *uio, cred_t *cr, int *eofp,
2105 2099      caller_context_t *ct, int flags)
2106 2100  {
2107 2101          znode_t         *zp = VTOZ(vp);
2108 2102          iovec_t         *iovp;
2109 2103          edirent_t       *eodp;
2110 2104          dirent64_t      *odp;
2111 2105          zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
2112 2106          objset_t        *os;
2113 2107          caddr_t         outbuf;
2114 2108          size_t          bufsize;
2115 2109          zap_cursor_t    zc;
2116 2110          zap_attribute_t zap;
2117 2111          uint_t          bytes_wanted;
2118 2112          uint64_t        offset; /* must be unsigned; checks for < 1 */
2119 2113          uint64_t        parent;
2120 2114          int             local_eof;
2121 2115          int             outcount;
2122 2116          int             error;
2123 2117          uint8_t         prefetch;
2124 2118          boolean_t       check_sysattrs;
2125 2119  
2126 2120          ZFS_ENTER(zfsvfs);
2127 2121          ZFS_VERIFY_ZP(zp);
2128 2122  
2129 2123          if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
2130 2124              &parent, sizeof (parent))) != 0) {
2131 2125                  ZFS_EXIT(zfsvfs);
2132 2126                  return (error);
2133 2127          }
2134 2128  
2135 2129          /*
2136 2130           * If we are not given an eof variable,
2137 2131           * use a local one.
2138 2132           */
2139 2133          if (eofp == NULL)
2140 2134                  eofp = &local_eof;
2141 2135  
2142 2136          /*
2143 2137           * Check for valid iov_len.
2144 2138           */
2145 2139          if (uio->uio_iov->iov_len <= 0) {
2146 2140                  ZFS_EXIT(zfsvfs);
2147 2141                  return (SET_ERROR(EINVAL));
2148 2142          }
2149 2143  
2150 2144          /*
2151 2145           * Quit if directory has been removed (posix)
2152 2146           */
2153 2147          if ((*eofp = zp->z_unlinked) != 0) {
2154 2148                  ZFS_EXIT(zfsvfs);
2155 2149                  return (0);
2156 2150          }
2157 2151  
2158 2152          error = 0;
2159 2153          os = zfsvfs->z_os;
2160 2154          offset = uio->uio_loffset;
2161 2155          prefetch = zp->z_zn_prefetch;
2162 2156  
2163 2157          /*
2164 2158           * Initialize the iterator cursor.
2165 2159           */
2166 2160          if (offset <= 3) {
2167 2161                  /*
2168 2162                   * Start iteration from the beginning of the directory.
2169 2163                   */
2170 2164                  zap_cursor_init(&zc, os, zp->z_id);
2171 2165          } else {
2172 2166                  /*
2173 2167                   * The offset is a serialized cursor.
2174 2168                   */
2175 2169                  zap_cursor_init_serialized(&zc, os, zp->z_id, offset);
2176 2170          }
2177 2171  
2178 2172          /*
2179 2173           * Get space to change directory entries into fs independent format.
2180 2174           */
2181 2175          iovp = uio->uio_iov;
2182 2176          bytes_wanted = iovp->iov_len;
2183 2177          if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) {
2184 2178                  bufsize = bytes_wanted;
2185 2179                  outbuf = kmem_alloc(bufsize, KM_SLEEP);
2186 2180                  odp = (struct dirent64 *)outbuf;
2187 2181          } else {
2188 2182                  bufsize = bytes_wanted;
2189 2183                  outbuf = NULL;
2190 2184                  odp = (struct dirent64 *)iovp->iov_base;
2191 2185          }
2192 2186          eodp = (struct edirent *)odp;
2193 2187  
2194 2188          /*
2195 2189           * If this VFS supports the system attribute view interface; and
2196 2190           * we're looking at an extended attribute directory; and we care
2197 2191           * about normalization conflicts on this vfs; then we must check
2198 2192           * for normalization conflicts with the sysattr name space.
2199 2193           */
2200 2194          check_sysattrs = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
2201 2195              (vp->v_flag & V_XATTRDIR) && zfsvfs->z_norm &&
2202 2196              (flags & V_RDDIR_ENTFLAGS);
2203 2197  
2204 2198          /*
2205 2199           * Transform to file-system independent format
2206 2200           */
2207 2201          outcount = 0;
2208 2202          while (outcount < bytes_wanted) {
2209 2203                  ino64_t objnum;
2210 2204                  ushort_t reclen;
2211 2205                  off64_t *next = NULL;
2212 2206  
2213 2207                  /*
2214 2208                   * Special case `.', `..', and `.zfs'.
2215 2209                   */
2216 2210                  if (offset == 0) {
2217 2211                          (void) strcpy(zap.za_name, ".");
2218 2212                          zap.za_normalization_conflict = 0;
2219 2213                          objnum = zp->z_id;
2220 2214                  } else if (offset == 1) {
2221 2215                          (void) strcpy(zap.za_name, "..");
2222 2216                          zap.za_normalization_conflict = 0;
2223 2217                          objnum = parent;
2224 2218                  } else if (offset == 2 && zfs_show_ctldir(zp)) {
2225 2219                          (void) strcpy(zap.za_name, ZFS_CTLDIR_NAME);
2226 2220                          zap.za_normalization_conflict = 0;
2227 2221                          objnum = ZFSCTL_INO_ROOT;
2228 2222                  } else {
2229 2223                          /*
2230 2224                           * Grab next entry.
2231 2225                           */
2232 2226                          if (error = zap_cursor_retrieve(&zc, &zap)) {
2233 2227                                  if ((*eofp = (error == ENOENT)) != 0)
2234 2228                                          break;
2235 2229                                  else
2236 2230                                          goto update;
2237 2231                          }
2238 2232  
2239 2233                          if (zap.za_integer_length != 8 ||
2240 2234                              zap.za_num_integers != 1) {
2241 2235                                  cmn_err(CE_WARN, "zap_readdir: bad directory "
2242 2236                                      "entry, obj = %lld, offset = %lld\n",
2243 2237                                      (u_longlong_t)zp->z_id,
2244 2238                                      (u_longlong_t)offset);
2245 2239                                  error = SET_ERROR(ENXIO);
2246 2240                                  goto update;
2247 2241                          }
2248 2242  
2249 2243                          objnum = ZFS_DIRENT_OBJ(zap.za_first_integer);
2250 2244                          /*
2251 2245                           * MacOS X can extract the object type here such as:
2252 2246                           * uint8_t type = ZFS_DIRENT_TYPE(zap.za_first_integer);
2253 2247                           */
2254 2248  
2255 2249                          if (check_sysattrs && !zap.za_normalization_conflict) {
2256 2250                                  zap.za_normalization_conflict =
2257 2251                                      xattr_sysattr_casechk(zap.za_name);
2258 2252                          }
2259 2253                  }
2260 2254  
2261 2255                  if (flags & V_RDDIR_ACCFILTER) {
2262 2256                          /*
2263 2257                           * If we have no access at all, don't include
2264 2258                           * this entry in the returned information
2265 2259                           */
2266 2260                          znode_t *ezp;
2267 2261                          if (zfs_zget(zp->z_zfsvfs, objnum, &ezp) != 0)
2268 2262                                  goto skip_entry;
2269 2263                          if (!zfs_has_access(ezp, cr)) {
2270 2264                                  VN_RELE(ZTOV(ezp));
2271 2265                                  goto skip_entry;
2272 2266                          }
2273 2267                          VN_RELE(ZTOV(ezp));
2274 2268                  }
2275 2269  
2276 2270                  if (flags & V_RDDIR_ENTFLAGS)
2277 2271                          reclen = EDIRENT_RECLEN(strlen(zap.za_name));
2278 2272                  else
2279 2273                          reclen = DIRENT64_RECLEN(strlen(zap.za_name));
2280 2274  
2281 2275                  /*
2282 2276                   * Will this entry fit in the buffer?
2283 2277                   */
2284 2278                  if (outcount + reclen > bufsize) {
2285 2279                          /*
2286 2280                           * Did we manage to fit anything in the buffer?
2287 2281                           */
2288 2282                          if (!outcount) {
2289 2283                                  error = SET_ERROR(EINVAL);
2290 2284                                  goto update;
2291 2285                          }
2292 2286                          break;
2293 2287                  }
2294 2288                  if (flags & V_RDDIR_ENTFLAGS) {
2295 2289                          /*
2296 2290                           * Add extended flag entry:
2297 2291                           */
2298 2292                          eodp->ed_ino = objnum;
2299 2293                          eodp->ed_reclen = reclen;
2300 2294                          /* NOTE: ed_off is the offset for the *next* entry */
2301 2295                          next = &(eodp->ed_off);
2302 2296                          eodp->ed_eflags = zap.za_normalization_conflict ?
2303 2297                              ED_CASE_CONFLICT : 0;
2304 2298                          (void) strncpy(eodp->ed_name, zap.za_name,
2305 2299                              EDIRENT_NAMELEN(reclen));
2306 2300                          eodp = (edirent_t *)((intptr_t)eodp + reclen);
2307 2301                  } else {
2308 2302                          /*
2309 2303                           * Add normal entry:
2310 2304                           */
2311 2305                          odp->d_ino = objnum;
2312 2306                          odp->d_reclen = reclen;
2313 2307                          /* NOTE: d_off is the offset for the *next* entry */
2314 2308                          next = &(odp->d_off);
2315 2309                          (void) strncpy(odp->d_name, zap.za_name,
2316 2310                              DIRENT64_NAMELEN(reclen));
2317 2311                          odp = (dirent64_t *)((intptr_t)odp + reclen);
2318 2312                  }
2319 2313                  outcount += reclen;
2320 2314  
2321 2315                  ASSERT(outcount <= bufsize);
2322 2316  
2323 2317                  /* Prefetch znode */
2324 2318                  if (prefetch)
2325 2319                          dmu_prefetch(os, objnum, 0, 0);
2326 2320  
2327 2321          skip_entry:
2328 2322                  /*
2329 2323                   * Move to the next entry, fill in the previous offset.
2330 2324                   */
2331 2325                  if (offset > 2 || (offset == 2 && !zfs_show_ctldir(zp))) {
2332 2326                          zap_cursor_advance(&zc);
2333 2327                          offset = zap_cursor_serialize(&zc);
2334 2328                  } else {
2335 2329                          offset += 1;
2336 2330                  }
2337 2331                  if (next)
2338 2332                          *next = offset;
2339 2333          }
2340 2334          zp->z_zn_prefetch = B_FALSE; /* a lookup will re-enable pre-fetching */
2341 2335  
2342 2336          if (uio->uio_segflg == UIO_SYSSPACE && uio->uio_iovcnt == 1) {
2343 2337                  iovp->iov_base += outcount;
2344 2338                  iovp->iov_len -= outcount;
2345 2339                  uio->uio_resid -= outcount;
2346 2340          } else if (error = uiomove(outbuf, (long)outcount, UIO_READ, uio)) {
2347 2341                  /*
2348 2342                   * Reset the pointer.
2349 2343                   */
2350 2344                  offset = uio->uio_loffset;
2351 2345          }
2352 2346  
2353 2347  update:
2354 2348          zap_cursor_fini(&zc);
2355 2349          if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
2356 2350                  kmem_free(outbuf, bufsize);
2357 2351  
2358 2352          if (error == ENOENT)
2359 2353                  error = 0;
2360 2354  
2361 2355          ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
2362 2356  
2363 2357          uio->uio_loffset = offset;
2364 2358          ZFS_EXIT(zfsvfs);
2365 2359          return (error);
2366 2360  }
2367 2361  
2368 2362  ulong_t zfs_fsync_sync_cnt = 4;
2369 2363  
2370 2364  static int
2371 2365  zfs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
2372 2366  {
2373 2367          znode_t *zp = VTOZ(vp);
2374 2368          zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2375 2369  
2376 2370          /*
2377 2371           * Regardless of whether this is required for standards conformance,
2378 2372           * this is the logical behavior when fsync() is called on a file with
2379 2373           * dirty pages.  We use B_ASYNC since the ZIL transactions are already
2380 2374           * going to be pushed out as part of the zil_commit().
2381 2375           */
2382 2376          if (vn_has_cached_data(vp) && !(syncflag & FNODSYNC) &&
2383 2377              (vp->v_type == VREG) && !(IS_SWAPVP(vp)))
2384 2378                  (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_ASYNC, cr, ct);
2385 2379  
2386 2380          (void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
2387 2381  
2388 2382          if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
2389 2383                  ZFS_ENTER(zfsvfs);
2390 2384                  ZFS_VERIFY_ZP(zp);
2391 2385                  zil_commit(zfsvfs->z_log, zp->z_id);
2392 2386                  ZFS_EXIT(zfsvfs);
2393 2387          }
2394 2388          return (0);
2395 2389  }
2396 2390  
2397 2391  
2398 2392  /*
2399 2393   * Get the requested file attributes and place them in the provided
2400 2394   * vattr structure.

↓ open down ↓

298 lines elided

↑ open up ↑

2401 2395   *
2402 2396   *      IN:     vp      - vnode of file.
2403 2397   *              vap     - va_mask identifies requested attributes.
2404 2398   *                        If AT_XVATTR set, then optional attrs are requested
2405 2399   *              flags   - ATTR_NOACLCHECK (CIFS server context)
2406 2400   *              cr      - credentials of caller.
2407 2401   *              ct      - caller context
2408 2402   *
2409 2403   *      OUT:    vap     - attribute values.
2410 2404   *
2411      - *      RETURN: 0 (always succeeds)
     2405 + *      RETURN: 0 (always succeeds).
2412 2406   */
2413 2407  /* ARGSUSED */
2414 2408  static int
2415 2409  zfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2416 2410      caller_context_t *ct)
2417 2411  {
2418 2412          znode_t *zp = VTOZ(vp);
2419 2413          zfsvfs_t *zfsvfs = zp->z_zfsvfs;
2420 2414          int     error = 0;
2421 2415          uint64_t links;

2422 2416          uint64_t mtime[2], ctime[2];
2423 2417          xvattr_t *xvap = (xvattr_t *)vap;       /* vap may be an xvattr_t * */
2424 2418          xoptattr_t *xoap = NULL;
2425 2419          boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2426 2420          sa_bulk_attr_t bulk[2];
2427 2421          int count = 0;
2428 2422  
2429 2423          ZFS_ENTER(zfsvfs);
2430 2424          ZFS_VERIFY_ZP(zp);
2431 2425  
2432 2426          zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
2433 2427  
2434 2428          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
2435 2429          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
2436 2430  
2437 2431          if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
2438 2432                  ZFS_EXIT(zfsvfs);
2439 2433                  return (error);
2440 2434          }
2441 2435  
2442 2436          /*
2443 2437           * If ACL is trivial don't bother looking for ACE_READ_ATTRIBUTES.
2444 2438           * Also, if we are the owner don't bother, since owner should
2445 2439           * always be allowed to read basic attributes of file.
2446 2440           */
2447 2441          if (!(zp->z_pflags & ZFS_ACL_TRIVIAL) &&
2448 2442              (vap->va_uid != crgetuid(cr))) {
2449 2443                  if (error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
2450 2444                      skipaclchk, cr)) {
2451 2445                          ZFS_EXIT(zfsvfs);
2452 2446                          return (error);
2453 2447                  }
2454 2448          }
2455 2449  
2456 2450          /*
2457 2451           * Return all attributes.  It's cheaper to provide the answer
2458 2452           * than to determine whether we were asked the question.
2459 2453           */
2460 2454  
2461 2455          mutex_enter(&zp->z_lock);
2462 2456          vap->va_type = vp->v_type;
2463 2457          vap->va_mode = zp->z_mode & MODEMASK;
2464 2458          vap->va_fsid = zp->z_zfsvfs->z_vfs->vfs_dev;
2465 2459          vap->va_nodeid = zp->z_id;
2466 2460          if ((vp->v_flag & VROOT) && zfs_show_ctldir(zp))
2467 2461                  links = zp->z_links + 1;
2468 2462          else
2469 2463                  links = zp->z_links;
2470 2464          vap->va_nlink = MIN(links, UINT32_MAX); /* nlink_t limit! */
2471 2465          vap->va_size = zp->z_size;
2472 2466          vap->va_rdev = vp->v_rdev;
2473 2467          vap->va_seq = zp->z_seq;
2474 2468  
2475 2469          /*
2476 2470           * Add in any requested optional attributes and the create time.
2477 2471           * Also set the corresponding bits in the returned attribute bitmap.
2478 2472           */
2479 2473          if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
2480 2474                  if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
2481 2475                          xoap->xoa_archive =
2482 2476                              ((zp->z_pflags & ZFS_ARCHIVE) != 0);
2483 2477                          XVA_SET_RTN(xvap, XAT_ARCHIVE);
2484 2478                  }
2485 2479  
2486 2480                  if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
2487 2481                          xoap->xoa_readonly =
2488 2482                              ((zp->z_pflags & ZFS_READONLY) != 0);
2489 2483                          XVA_SET_RTN(xvap, XAT_READONLY);
2490 2484                  }
2491 2485  
2492 2486                  if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
2493 2487                          xoap->xoa_system =
2494 2488                              ((zp->z_pflags & ZFS_SYSTEM) != 0);
2495 2489                          XVA_SET_RTN(xvap, XAT_SYSTEM);
2496 2490                  }
2497 2491  
2498 2492                  if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
2499 2493                          xoap->xoa_hidden =
2500 2494                              ((zp->z_pflags & ZFS_HIDDEN) != 0);
2501 2495                          XVA_SET_RTN(xvap, XAT_HIDDEN);
2502 2496                  }
2503 2497  
2504 2498                  if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2505 2499                          xoap->xoa_nounlink =
2506 2500                              ((zp->z_pflags & ZFS_NOUNLINK) != 0);
2507 2501                          XVA_SET_RTN(xvap, XAT_NOUNLINK);
2508 2502                  }
2509 2503  
2510 2504                  if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2511 2505                          xoap->xoa_immutable =
2512 2506                              ((zp->z_pflags & ZFS_IMMUTABLE) != 0);
2513 2507                          XVA_SET_RTN(xvap, XAT_IMMUTABLE);
2514 2508                  }
2515 2509  
2516 2510                  if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2517 2511                          xoap->xoa_appendonly =
2518 2512                              ((zp->z_pflags & ZFS_APPENDONLY) != 0);
2519 2513                          XVA_SET_RTN(xvap, XAT_APPENDONLY);
2520 2514                  }
2521 2515  
2522 2516                  if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2523 2517                          xoap->xoa_nodump =
2524 2518                              ((zp->z_pflags & ZFS_NODUMP) != 0);
2525 2519                          XVA_SET_RTN(xvap, XAT_NODUMP);
2526 2520                  }
2527 2521  
2528 2522                  if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
2529 2523                          xoap->xoa_opaque =
2530 2524                              ((zp->z_pflags & ZFS_OPAQUE) != 0);
2531 2525                          XVA_SET_RTN(xvap, XAT_OPAQUE);
2532 2526                  }
2533 2527  
2534 2528                  if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2535 2529                          xoap->xoa_av_quarantined =
2536 2530                              ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0);
2537 2531                          XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
2538 2532                  }
2539 2533  
2540 2534                  if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2541 2535                          xoap->xoa_av_modified =
2542 2536                              ((zp->z_pflags & ZFS_AV_MODIFIED) != 0);
2543 2537                          XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
2544 2538                  }
2545 2539  
2546 2540                  if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) &&
2547 2541                      vp->v_type == VREG) {
2548 2542                          zfs_sa_get_scanstamp(zp, xvap);
2549 2543                  }
2550 2544  
2551 2545                  if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
2552 2546                          uint64_t times[2];
2553 2547  
2554 2548                          (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
2555 2549                              times, sizeof (times));
2556 2550                          ZFS_TIME_DECODE(&xoap->xoa_createtime, times);
2557 2551                          XVA_SET_RTN(xvap, XAT_CREATETIME);
2558 2552                  }
2559 2553  
2560 2554                  if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2561 2555                          xoap->xoa_reparse = ((zp->z_pflags & ZFS_REPARSE) != 0);
2562 2556                          XVA_SET_RTN(xvap, XAT_REPARSE);
2563 2557                  }
2564 2558                  if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
2565 2559                          xoap->xoa_generation = zp->z_gen;
2566 2560                          XVA_SET_RTN(xvap, XAT_GEN);
2567 2561                  }
2568 2562  
2569 2563                  if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
2570 2564                          xoap->xoa_offline =
2571 2565                              ((zp->z_pflags & ZFS_OFFLINE) != 0);
2572 2566                          XVA_SET_RTN(xvap, XAT_OFFLINE);
2573 2567                  }
2574 2568  
2575 2569                  if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
2576 2570                          xoap->xoa_sparse =
2577 2571                              ((zp->z_pflags & ZFS_SPARSE) != 0);
2578 2572                          XVA_SET_RTN(xvap, XAT_SPARSE);
2579 2573                  }
2580 2574          }
2581 2575  
2582 2576          ZFS_TIME_DECODE(&vap->va_atime, zp->z_atime);
2583 2577          ZFS_TIME_DECODE(&vap->va_mtime, mtime);
2584 2578          ZFS_TIME_DECODE(&vap->va_ctime, ctime);
2585 2579  
2586 2580          mutex_exit(&zp->z_lock);
2587 2581  
2588 2582          sa_object_size(zp->z_sa_hdl, &vap->va_blksize, &vap->va_nblocks);
2589 2583  
2590 2584          if (zp->z_blksz == 0) {
2591 2585                  /*
2592 2586                   * Block size hasn't been set; suggest maximal I/O transfers.
2593 2587                   */
2594 2588                  vap->va_blksize = zfsvfs->z_max_blksz;
2595 2589          }
2596 2590  
2597 2591          ZFS_EXIT(zfsvfs);
2598 2592          return (0);
2599 2593  }
2600 2594  
2601 2595  /*
2602 2596   * Set the file attributes to the values contained in the

↓ open down ↓

181 lines elided

↑ open up ↑

2603 2597   * vattr structure.
2604 2598   *
2605 2599   *      IN:     vp      - vnode of file to be modified.
2606 2600   *              vap     - new attribute values.
2607 2601   *                        If AT_XVATTR set, then optional attrs are being set
2608 2602   *              flags   - ATTR_UTIME set if non-default time values provided.
2609 2603   *                      - ATTR_NOACLCHECK (CIFS context only).
2610 2604   *              cr      - credentials of caller.
2611 2605   *              ct      - caller context
2612 2606   *
2613      - *      RETURN: 0 if success
2614      - *              error code if failure
     2607 + *      RETURN: 0 on success, error code on failure.
2615 2608   *
2616 2609   * Timestamps:
2617 2610   *      vp - ctime updated, mtime updated if size changed.
2618 2611   */
2619 2612  /* ARGSUSED */
2620 2613  static int
2621 2614  zfs_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2622      -        caller_context_t *ct)
     2615 +    caller_context_t *ct)
2623 2616  {
2624 2617          znode_t         *zp = VTOZ(vp);
2625 2618          zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
2626 2619          zilog_t         *zilog;
2627 2620          dmu_tx_t        *tx;
2628 2621          vattr_t         oldva;
2629 2622          xvattr_t        tmpxvattr;
2630 2623          uint_t          mask = vap->va_mask;
2631 2624          uint_t          saved_mask = 0;
2632 2625          int             trim_mask = 0;

2633 2626          uint64_t        new_mode;
2634 2627          uint64_t        new_uid, new_gid;
2635 2628          uint64_t        xattr_obj;
2636 2629          uint64_t        mtime[2], ctime[2];
2637 2630          znode_t         *attrzp;
2638 2631          int             need_policy = FALSE;
2639 2632          int             err, err2;
2640 2633          zfs_fuid_info_t *fuidp = NULL;
2641 2634          xvattr_t *xvap = (xvattr_t *)vap;       /* vap may be an xvattr_t * */
2642 2635          xoptattr_t      *xoap;
2643 2636          zfs_acl_t       *aclp;
2644 2637          boolean_t skipaclchk = (flags & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
2645 2638          boolean_t       fuid_dirtied = B_FALSE;
2646 2639          sa_bulk_attr_t  bulk[7], xattr_bulk[7];
2647 2640          int             count = 0, xattr_count = 0;
2648 2641  
2649 2642          if (mask == 0)
2650 2643                  return (0);
2651 2644  
2652 2645          if (mask & AT_NOSET)
2653 2646                  return (SET_ERROR(EINVAL));
2654 2647  
2655 2648          ZFS_ENTER(zfsvfs);
2656 2649          ZFS_VERIFY_ZP(zp);
2657 2650  
2658 2651          zilog = zfsvfs->z_log;
2659 2652  
2660 2653          /*
2661 2654           * Make sure that if we have ephemeral uid/gid or xvattr specified
2662 2655           * that file system is at proper version level
2663 2656           */
2664 2657  
2665 2658          if (zfsvfs->z_use_fuids == B_FALSE &&
2666 2659              (((mask & AT_UID) && IS_EPHEMERAL(vap->va_uid)) ||
2667 2660              ((mask & AT_GID) && IS_EPHEMERAL(vap->va_gid)) ||
2668 2661              (mask & AT_XVATTR))) {
2669 2662                  ZFS_EXIT(zfsvfs);
2670 2663                  return (SET_ERROR(EINVAL));
2671 2664          }
2672 2665  
2673 2666          if (mask & AT_SIZE && vp->v_type == VDIR) {
2674 2667                  ZFS_EXIT(zfsvfs);
2675 2668                  return (SET_ERROR(EISDIR));
2676 2669          }
2677 2670  
2678 2671          if (mask & AT_SIZE && vp->v_type != VREG && vp->v_type != VFIFO) {
2679 2672                  ZFS_EXIT(zfsvfs);
2680 2673                  return (SET_ERROR(EINVAL));
2681 2674          }
2682 2675  
2683 2676          /*
2684 2677           * If this is an xvattr_t, then get a pointer to the structure of
2685 2678           * optional attributes.  If this is NULL, then we have a vattr_t.
2686 2679           */
2687 2680          xoap = xva_getxoptattr(xvap);
2688 2681  
2689 2682          xva_init(&tmpxvattr);
2690 2683  
2691 2684          /*
2692 2685           * Immutable files can only alter immutable bit and atime
2693 2686           */
2694 2687          if ((zp->z_pflags & ZFS_IMMUTABLE) &&
2695 2688              ((mask & (AT_SIZE|AT_UID|AT_GID|AT_MTIME|AT_MODE)) ||
2696 2689              ((mask & AT_XVATTR) && XVA_ISSET_REQ(xvap, XAT_CREATETIME)))) {
2697 2690                  ZFS_EXIT(zfsvfs);
2698 2691                  return (SET_ERROR(EPERM));
2699 2692          }
2700 2693  
2701 2694          if ((mask & AT_SIZE) && (zp->z_pflags & ZFS_READONLY)) {
2702 2695                  ZFS_EXIT(zfsvfs);
2703 2696                  return (SET_ERROR(EPERM));
2704 2697          }
2705 2698  
2706 2699          /*
2707 2700           * Verify timestamps doesn't overflow 32 bits.
2708 2701           * ZFS can handle large timestamps, but 32bit syscalls can't
2709 2702           * handle times greater than 2039.  This check should be removed
2710 2703           * once large timestamps are fully supported.
2711 2704           */
2712 2705          if (mask & (AT_ATIME | AT_MTIME)) {
2713 2706                  if (((mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
2714 2707                      ((mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
2715 2708                          ZFS_EXIT(zfsvfs);
2716 2709                          return (SET_ERROR(EOVERFLOW));
2717 2710                  }
2718 2711          }
2719 2712  
2720 2713  top:
2721 2714          attrzp = NULL;
2722 2715          aclp = NULL;
2723 2716  
2724 2717          /* Can this be moved to before the top label? */
2725 2718          if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
2726 2719                  ZFS_EXIT(zfsvfs);
2727 2720                  return (SET_ERROR(EROFS));
2728 2721          }
2729 2722  
2730 2723          /*
2731 2724           * First validate permissions
2732 2725           */
2733 2726  
2734 2727          if (mask & AT_SIZE) {
2735 2728                  err = zfs_zaccess(zp, ACE_WRITE_DATA, 0, skipaclchk, cr);
2736 2729                  if (err) {
2737 2730                          ZFS_EXIT(zfsvfs);
2738 2731                          return (err);
2739 2732                  }
2740 2733                  /*
2741 2734                   * XXX - Note, we are not providing any open
2742 2735                   * mode flags here (like FNDELAY), so we may
2743 2736                   * block if there are locks present... this
2744 2737                   * should be addressed in openat().
2745 2738                   */
2746 2739                  /* XXX - would it be OK to generate a log record here? */
2747 2740                  err = zfs_freesp(zp, vap->va_size, 0, 0, FALSE);
2748 2741                  if (err) {
2749 2742                          ZFS_EXIT(zfsvfs);
2750 2743                          return (err);
2751 2744                  }
2752 2745          }
2753 2746  
2754 2747          if (mask & (AT_ATIME|AT_MTIME) ||
2755 2748              ((mask & AT_XVATTR) && (XVA_ISSET_REQ(xvap, XAT_HIDDEN) ||
2756 2749              XVA_ISSET_REQ(xvap, XAT_READONLY) ||
2757 2750              XVA_ISSET_REQ(xvap, XAT_ARCHIVE) ||
2758 2751              XVA_ISSET_REQ(xvap, XAT_OFFLINE) ||
2759 2752              XVA_ISSET_REQ(xvap, XAT_SPARSE) ||
2760 2753              XVA_ISSET_REQ(xvap, XAT_CREATETIME) ||
2761 2754              XVA_ISSET_REQ(xvap, XAT_SYSTEM)))) {
2762 2755                  need_policy = zfs_zaccess(zp, ACE_WRITE_ATTRIBUTES, 0,
2763 2756                      skipaclchk, cr);
2764 2757          }
2765 2758  
2766 2759          if (mask & (AT_UID|AT_GID)) {
2767 2760                  int     idmask = (mask & (AT_UID|AT_GID));
2768 2761                  int     take_owner;
2769 2762                  int     take_group;
2770 2763  
2771 2764                  /*
2772 2765                   * NOTE: even if a new mode is being set,
2773 2766                   * we may clear S_ISUID/S_ISGID bits.
2774 2767                   */
2775 2768  
2776 2769                  if (!(mask & AT_MODE))
2777 2770                          vap->va_mode = zp->z_mode;
2778 2771  
2779 2772                  /*
2780 2773                   * Take ownership or chgrp to group we are a member of
2781 2774                   */
2782 2775  
2783 2776                  take_owner = (mask & AT_UID) && (vap->va_uid == crgetuid(cr));
2784 2777                  take_group = (mask & AT_GID) &&
2785 2778                      zfs_groupmember(zfsvfs, vap->va_gid, cr);
2786 2779  
2787 2780                  /*
2788 2781                   * If both AT_UID and AT_GID are set then take_owner and
2789 2782                   * take_group must both be set in order to allow taking
2790 2783                   * ownership.
2791 2784                   *
2792 2785                   * Otherwise, send the check through secpolicy_vnode_setattr()
2793 2786                   *
2794 2787                   */
2795 2788  
2796 2789                  if (((idmask == (AT_UID|AT_GID)) && take_owner && take_group) ||
2797 2790                      ((idmask == AT_UID) && take_owner) ||
2798 2791                      ((idmask == AT_GID) && take_group)) {
2799 2792                          if (zfs_zaccess(zp, ACE_WRITE_OWNER, 0,
2800 2793                              skipaclchk, cr) == 0) {
2801 2794                                  /*
2802 2795                                   * Remove setuid/setgid for non-privileged users
2803 2796                                   */
2804 2797                                  secpolicy_setid_clear(vap, cr);
2805 2798                                  trim_mask = (mask & (AT_UID|AT_GID));
2806 2799                          } else {
2807 2800                                  need_policy =  TRUE;
2808 2801                          }
2809 2802                  } else {
2810 2803                          need_policy =  TRUE;
2811 2804                  }
2812 2805          }
2813 2806  
2814 2807          mutex_enter(&zp->z_lock);
2815 2808          oldva.va_mode = zp->z_mode;
2816 2809          zfs_fuid_map_ids(zp, cr, &oldva.va_uid, &oldva.va_gid);
2817 2810          if (mask & AT_XVATTR) {
2818 2811                  /*
2819 2812                   * Update xvattr mask to include only those attributes
2820 2813                   * that are actually changing.
2821 2814                   *
2822 2815                   * the bits will be restored prior to actually setting
2823 2816                   * the attributes so the caller thinks they were set.
2824 2817                   */
2825 2818                  if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
2826 2819                          if (xoap->xoa_appendonly !=
2827 2820                              ((zp->z_pflags & ZFS_APPENDONLY) != 0)) {
2828 2821                                  need_policy = TRUE;
2829 2822                          } else {
2830 2823                                  XVA_CLR_REQ(xvap, XAT_APPENDONLY);
2831 2824                                  XVA_SET_REQ(&tmpxvattr, XAT_APPENDONLY);
2832 2825                          }
2833 2826                  }
2834 2827  
2835 2828                  if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
2836 2829                          if (xoap->xoa_nounlink !=
2837 2830                              ((zp->z_pflags & ZFS_NOUNLINK) != 0)) {
2838 2831                                  need_policy = TRUE;
2839 2832                          } else {
2840 2833                                  XVA_CLR_REQ(xvap, XAT_NOUNLINK);
2841 2834                                  XVA_SET_REQ(&tmpxvattr, XAT_NOUNLINK);
2842 2835                          }
2843 2836                  }
2844 2837  
2845 2838                  if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
2846 2839                          if (xoap->xoa_immutable !=
2847 2840                              ((zp->z_pflags & ZFS_IMMUTABLE) != 0)) {
2848 2841                                  need_policy = TRUE;
2849 2842                          } else {
2850 2843                                  XVA_CLR_REQ(xvap, XAT_IMMUTABLE);
2851 2844                                  XVA_SET_REQ(&tmpxvattr, XAT_IMMUTABLE);
2852 2845                          }
2853 2846                  }
2854 2847  
2855 2848                  if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
2856 2849                          if (xoap->xoa_nodump !=
2857 2850                              ((zp->z_pflags & ZFS_NODUMP) != 0)) {
2858 2851                                  need_policy = TRUE;
2859 2852                          } else {
2860 2853                                  XVA_CLR_REQ(xvap, XAT_NODUMP);
2861 2854                                  XVA_SET_REQ(&tmpxvattr, XAT_NODUMP);
2862 2855                          }
2863 2856                  }
2864 2857  
2865 2858                  if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
2866 2859                          if (xoap->xoa_av_modified !=
2867 2860                              ((zp->z_pflags & ZFS_AV_MODIFIED) != 0)) {
2868 2861                                  need_policy = TRUE;
2869 2862                          } else {
2870 2863                                  XVA_CLR_REQ(xvap, XAT_AV_MODIFIED);
2871 2864                                  XVA_SET_REQ(&tmpxvattr, XAT_AV_MODIFIED);
2872 2865                          }
2873 2866                  }
2874 2867  
2875 2868                  if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
2876 2869                          if ((vp->v_type != VREG &&
2877 2870                              xoap->xoa_av_quarantined) ||
2878 2871                              xoap->xoa_av_quarantined !=
2879 2872                              ((zp->z_pflags & ZFS_AV_QUARANTINED) != 0)) {
2880 2873                                  need_policy = TRUE;
2881 2874                          } else {
2882 2875                                  XVA_CLR_REQ(xvap, XAT_AV_QUARANTINED);
2883 2876                                  XVA_SET_REQ(&tmpxvattr, XAT_AV_QUARANTINED);
2884 2877                          }
2885 2878                  }
2886 2879  
2887 2880                  if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
2888 2881                          mutex_exit(&zp->z_lock);
2889 2882                          ZFS_EXIT(zfsvfs);
2890 2883                          return (SET_ERROR(EPERM));
2891 2884                  }
2892 2885  
2893 2886                  if (need_policy == FALSE &&
2894 2887                      (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP) ||
2895 2888                      XVA_ISSET_REQ(xvap, XAT_OPAQUE))) {
2896 2889                          need_policy = TRUE;
2897 2890                  }
2898 2891          }
2899 2892  
2900 2893          mutex_exit(&zp->z_lock);
2901 2894  
2902 2895          if (mask & AT_MODE) {
2903 2896                  if (zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr) == 0) {
2904 2897                          err = secpolicy_setid_setsticky_clear(vp, vap,
2905 2898                              &oldva, cr);
2906 2899                          if (err) {
2907 2900                                  ZFS_EXIT(zfsvfs);
2908 2901                                  return (err);
2909 2902                          }
2910 2903                          trim_mask |= AT_MODE;
2911 2904                  } else {
2912 2905                          need_policy = TRUE;
2913 2906                  }
2914 2907          }
2915 2908  
2916 2909          if (need_policy) {
2917 2910                  /*
2918 2911                   * If trim_mask is set then take ownership
2919 2912                   * has been granted or write_acl is present and user
2920 2913                   * has the ability to modify mode.  In that case remove
2921 2914                   * UID|GID and or MODE from mask so that
2922 2915                   * secpolicy_vnode_setattr() doesn't revoke it.
2923 2916                   */
2924 2917  
2925 2918                  if (trim_mask) {
2926 2919                          saved_mask = vap->va_mask;
2927 2920                          vap->va_mask &= ~trim_mask;
2928 2921                  }
2929 2922                  err = secpolicy_vnode_setattr(cr, vp, vap, &oldva, flags,
2930 2923                      (int (*)(void *, int, cred_t *))zfs_zaccess_unix, zp);
2931 2924                  if (err) {
2932 2925                          ZFS_EXIT(zfsvfs);
2933 2926                          return (err);
2934 2927                  }
2935 2928  
2936 2929                  if (trim_mask)
2937 2930                          vap->va_mask |= saved_mask;
2938 2931          }
2939 2932  
2940 2933          /*
2941 2934           * secpolicy_vnode_setattr, or take ownership may have
2942 2935           * changed va_mask
2943 2936           */
2944 2937          mask = vap->va_mask;
2945 2938  
2946 2939          if ((mask & (AT_UID | AT_GID))) {
2947 2940                  err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
2948 2941                      &xattr_obj, sizeof (xattr_obj));
2949 2942  
2950 2943                  if (err == 0 && xattr_obj) {
2951 2944                          err = zfs_zget(zp->z_zfsvfs, xattr_obj, &attrzp);
2952 2945                          if (err)
2953 2946                                  goto out2;
2954 2947                  }
2955 2948                  if (mask & AT_UID) {
2956 2949                          new_uid = zfs_fuid_create(zfsvfs,
2957 2950                              (uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
2958 2951                          if (new_uid != zp->z_uid &&
2959 2952                              zfs_fuid_overquota(zfsvfs, B_FALSE, new_uid)) {
2960 2953                                  if (attrzp)
2961 2954                                          VN_RELE(ZTOV(attrzp));
2962 2955                                  err = SET_ERROR(EDQUOT);
2963 2956                                  goto out2;
2964 2957                          }
2965 2958                  }
2966 2959  
2967 2960                  if (mask & AT_GID) {
2968 2961                          new_gid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
2969 2962                              cr, ZFS_GROUP, &fuidp);
2970 2963                          if (new_gid != zp->z_gid &&
2971 2964                              zfs_fuid_overquota(zfsvfs, B_TRUE, new_gid)) {
2972 2965                                  if (attrzp)
2973 2966                                          VN_RELE(ZTOV(attrzp));
2974 2967                                  err = SET_ERROR(EDQUOT);
2975 2968                                  goto out2;
2976 2969                          }
2977 2970                  }
2978 2971          }
2979 2972          tx = dmu_tx_create(zfsvfs->z_os);
2980 2973  
2981 2974          if (mask & AT_MODE) {
2982 2975                  uint64_t pmode = zp->z_mode;
2983 2976                  uint64_t acl_obj;
2984 2977                  new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
2985 2978  
2986 2979                  if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_RESTRICTED &&
2987 2980                      !(zp->z_pflags & ZFS_ACL_TRIVIAL)) {
2988 2981                          err = SET_ERROR(EPERM);
2989 2982                          goto out;
2990 2983                  }
2991 2984  
2992 2985                  if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))
2993 2986                          goto out;
2994 2987  
2995 2988                  mutex_enter(&zp->z_lock);
2996 2989                  if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
2997 2990                          /*
2998 2991                           * Are we upgrading ACL from old V0 format
2999 2992                           * to V1 format?
3000 2993                           */
3001 2994                          if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
3002 2995                              zfs_znode_acl_version(zp) ==
3003 2996                              ZFS_ACL_VERSION_INITIAL) {
3004 2997                                  dmu_tx_hold_free(tx, acl_obj, 0,
3005 2998                                      DMU_OBJECT_END);
3006 2999                                  dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
3007 3000                                      0, aclp->z_acl_bytes);
3008 3001                          } else {
3009 3002                                  dmu_tx_hold_write(tx, acl_obj, 0,
3010 3003                                      aclp->z_acl_bytes);
3011 3004                          }
3012 3005                  } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) {
3013 3006                          dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
3014 3007                              0, aclp->z_acl_bytes);
3015 3008                  }
3016 3009                  mutex_exit(&zp->z_lock);
3017 3010                  dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
3018 3011          } else {
3019 3012                  if ((mask & AT_XVATTR) &&
3020 3013                      XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
3021 3014                          dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
3022 3015                  else
3023 3016                          dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
3024 3017          }
3025 3018  
3026 3019          if (attrzp) {
3027 3020                  dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
3028 3021          }
3029 3022  
3030 3023          fuid_dirtied = zfsvfs->z_fuid_dirty;
3031 3024          if (fuid_dirtied)
3032 3025                  zfs_fuid_txhold(zfsvfs, tx);
3033 3026  
3034 3027          zfs_sa_upgrade_txholds(tx, zp);
3035 3028  
3036 3029          err = dmu_tx_assign(tx, TXG_NOWAIT);
3037 3030          if (err) {
3038 3031                  if (err == ERESTART)
3039 3032                          dmu_tx_wait(tx);
3040 3033                  goto out;
3041 3034          }
3042 3035  
3043 3036          count = 0;
3044 3037          /*
3045 3038           * Set each attribute requested.
3046 3039           * We group settings according to the locks they need to acquire.
3047 3040           *
3048 3041           * Note: you cannot set ctime directly, although it will be
3049 3042           * updated as a side-effect of calling this function.
3050 3043           */
3051 3044  
3052 3045  
3053 3046          if (mask & (AT_UID|AT_GID|AT_MODE))
3054 3047                  mutex_enter(&zp->z_acl_lock);
3055 3048          mutex_enter(&zp->z_lock);
3056 3049  
3057 3050          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
3058 3051              &zp->z_pflags, sizeof (zp->z_pflags));
3059 3052  
3060 3053          if (attrzp) {
3061 3054                  if (mask & (AT_UID|AT_GID|AT_MODE))
3062 3055                          mutex_enter(&attrzp->z_acl_lock);
3063 3056                  mutex_enter(&attrzp->z_lock);
3064 3057                  SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
3065 3058                      SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
3066 3059                      sizeof (attrzp->z_pflags));
3067 3060          }
3068 3061  
3069 3062          if (mask & (AT_UID|AT_GID)) {
3070 3063  
3071 3064                  if (mask & AT_UID) {
3072 3065                          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
3073 3066                              &new_uid, sizeof (new_uid));
3074 3067                          zp->z_uid = new_uid;
3075 3068                          if (attrzp) {
3076 3069                                  SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
3077 3070                                      SA_ZPL_UID(zfsvfs), NULL, &new_uid,
3078 3071                                      sizeof (new_uid));
3079 3072                                  attrzp->z_uid = new_uid;
3080 3073                          }
3081 3074                  }
3082 3075  
3083 3076                  if (mask & AT_GID) {
3084 3077                          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
3085 3078                              NULL, &new_gid, sizeof (new_gid));
3086 3079                          zp->z_gid = new_gid;
3087 3080                          if (attrzp) {
3088 3081                                  SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
3089 3082                                      SA_ZPL_GID(zfsvfs), NULL, &new_gid,
3090 3083                                      sizeof (new_gid));
3091 3084                                  attrzp->z_gid = new_gid;
3092 3085                          }
3093 3086                  }
3094 3087                  if (!(mask & AT_MODE)) {
3095 3088                          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
3096 3089                              NULL, &new_mode, sizeof (new_mode));
3097 3090                          new_mode = zp->z_mode;
3098 3091                  }
3099 3092                  err = zfs_acl_chown_setattr(zp);
3100 3093                  ASSERT(err == 0);
3101 3094                  if (attrzp) {
3102 3095                          err = zfs_acl_chown_setattr(attrzp);
3103 3096                          ASSERT(err == 0);
3104 3097                  }
3105 3098          }
3106 3099  
3107 3100          if (mask & AT_MODE) {
3108 3101                  SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
3109 3102                      &new_mode, sizeof (new_mode));
3110 3103                  zp->z_mode = new_mode;
3111 3104                  ASSERT3U((uintptr_t)aclp, !=, NULL);
3112 3105                  err = zfs_aclset_common(zp, aclp, cr, tx);
3113 3106                  ASSERT0(err);
3114 3107                  if (zp->z_acl_cached)
3115 3108                          zfs_acl_free(zp->z_acl_cached);
3116 3109                  zp->z_acl_cached = aclp;
3117 3110                  aclp = NULL;
3118 3111          }
3119 3112  
3120 3113  
3121 3114          if (mask & AT_ATIME) {
3122 3115                  ZFS_TIME_ENCODE(&vap->va_atime, zp->z_atime);
3123 3116                  SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
3124 3117                      &zp->z_atime, sizeof (zp->z_atime));
3125 3118          }
3126 3119  
3127 3120          if (mask & AT_MTIME) {
3128 3121                  ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
3129 3122                  SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
3130 3123                      mtime, sizeof (mtime));
3131 3124          }
3132 3125  
3133 3126          /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
3134 3127          if (mask & AT_SIZE && !(mask & AT_MTIME)) {
3135 3128                  SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
3136 3129                      NULL, mtime, sizeof (mtime));
3137 3130                  SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
3138 3131                      &ctime, sizeof (ctime));
3139 3132                  zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
3140 3133                      B_TRUE);
3141 3134          } else if (mask != 0) {
3142 3135                  SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
3143 3136                      &ctime, sizeof (ctime));
3144 3137                  zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
3145 3138                      B_TRUE);
3146 3139                  if (attrzp) {
3147 3140                          SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
3148 3141                              SA_ZPL_CTIME(zfsvfs), NULL,
3149 3142                              &ctime, sizeof (ctime));
3150 3143                          zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
3151 3144                              mtime, ctime, B_TRUE);
3152 3145                  }
3153 3146          }
3154 3147          /*
3155 3148           * Do this after setting timestamps to prevent timestamp
3156 3149           * update from toggling bit
3157 3150           */
3158 3151  
3159 3152          if (xoap && (mask & AT_XVATTR)) {
3160 3153  
3161 3154                  /*
3162 3155                   * restore trimmed off masks
3163 3156                   * so that return masks can be set for caller.
3164 3157                   */
3165 3158  
3166 3159                  if (XVA_ISSET_REQ(&tmpxvattr, XAT_APPENDONLY)) {
3167 3160                          XVA_SET_REQ(xvap, XAT_APPENDONLY);
3168 3161                  }
3169 3162                  if (XVA_ISSET_REQ(&tmpxvattr, XAT_NOUNLINK)) {
3170 3163                          XVA_SET_REQ(xvap, XAT_NOUNLINK);
3171 3164                  }
3172 3165                  if (XVA_ISSET_REQ(&tmpxvattr, XAT_IMMUTABLE)) {
3173 3166                          XVA_SET_REQ(xvap, XAT_IMMUTABLE);
3174 3167                  }
3175 3168                  if (XVA_ISSET_REQ(&tmpxvattr, XAT_NODUMP)) {
3176 3169                          XVA_SET_REQ(xvap, XAT_NODUMP);
3177 3170                  }
3178 3171                  if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_MODIFIED)) {
3179 3172                          XVA_SET_REQ(xvap, XAT_AV_MODIFIED);
3180 3173                  }
3181 3174                  if (XVA_ISSET_REQ(&tmpxvattr, XAT_AV_QUARANTINED)) {
3182 3175                          XVA_SET_REQ(xvap, XAT_AV_QUARANTINED);
3183 3176                  }
3184 3177  
3185 3178                  if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP))
3186 3179                          ASSERT(vp->v_type == VREG);
3187 3180  
3188 3181                  zfs_xvattr_set(zp, xvap, tx);
3189 3182          }
3190 3183  
3191 3184          if (fuid_dirtied)
3192 3185                  zfs_fuid_sync(zfsvfs, tx);
3193 3186  
3194 3187          if (mask != 0)
3195 3188                  zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
3196 3189  
3197 3190          mutex_exit(&zp->z_lock);
3198 3191          if (mask & (AT_UID|AT_GID|AT_MODE))
3199 3192                  mutex_exit(&zp->z_acl_lock);
3200 3193  
3201 3194          if (attrzp) {
3202 3195                  if (mask & (AT_UID|AT_GID|AT_MODE))
3203 3196                          mutex_exit(&attrzp->z_acl_lock);
3204 3197                  mutex_exit(&attrzp->z_lock);

↓ open down ↓

572 lines elided

↑ open up ↑

3205 3198          }
3206 3199  out:
3207 3200          if (err == 0 && attrzp) {
3208 3201                  err2 = sa_bulk_update(attrzp->z_sa_hdl, xattr_bulk,
3209 3202                      xattr_count, tx);
3210 3203                  ASSERT(err2 == 0);
3211 3204          }
3212 3205  
3213 3206          if (attrzp)
3214 3207                  VN_RELE(ZTOV(attrzp));
     3208 +
3215 3209          if (aclp)
3216 3210                  zfs_acl_free(aclp);
3217 3211  
3218 3212          if (fuidp) {
3219 3213                  zfs_fuid_info_free(fuidp);
3220 3214                  fuidp = NULL;
3221 3215          }
3222 3216  
3223 3217          if (err) {
3224 3218                  dmu_tx_abort(tx);

3225 3219                  if (err == ERESTART)
3226 3220                          goto top;
3227 3221          } else {
3228 3222                  err2 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
3229 3223                  dmu_tx_commit(tx);
3230 3224          }
3231 3225  
3232 3226  out2:
3233 3227          if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3234 3228                  zil_commit(zilog, 0);
3235 3229  
3236 3230          ZFS_EXIT(zfsvfs);
3237 3231          return (err);
3238 3232  }
3239 3233  
3240 3234  typedef struct zfs_zlock {
3241 3235          krwlock_t       *zl_rwlock;     /* lock we acquired */
3242 3236          znode_t         *zl_znode;      /* znode we held */
3243 3237          struct zfs_zlock *zl_next;      /* next in list */
3244 3238  } zfs_zlock_t;
3245 3239  
3246 3240  /*
3247 3241   * Drop locks and release vnodes that were held by zfs_rename_lock().
3248 3242   */
3249 3243  static void
3250 3244  zfs_rename_unlock(zfs_zlock_t **zlpp)
3251 3245  {
3252 3246          zfs_zlock_t *zl;
3253 3247  
3254 3248          while ((zl = *zlpp) != NULL) {
3255 3249                  if (zl->zl_znode != NULL)
3256 3250                          VN_RELE(ZTOV(zl->zl_znode));
3257 3251                  rw_exit(zl->zl_rwlock);
3258 3252                  *zlpp = zl->zl_next;
3259 3253                  kmem_free(zl, sizeof (*zl));
3260 3254          }
3261 3255  }
3262 3256  
3263 3257  /*
3264 3258   * Search back through the directory tree, using the ".." entries.
3265 3259   * Lock each directory in the chain to prevent concurrent renames.
3266 3260   * Fail any attempt to move a directory into one of its own descendants.
3267 3261   * XXX - z_parent_lock can overlap with map or grow locks
3268 3262   */
3269 3263  static int
3270 3264  zfs_rename_lock(znode_t *szp, znode_t *tdzp, znode_t *sdzp, zfs_zlock_t **zlpp)
3271 3265  {
3272 3266          zfs_zlock_t     *zl;
3273 3267          znode_t         *zp = tdzp;
3274 3268          uint64_t        rootid = zp->z_zfsvfs->z_root;
3275 3269          uint64_t        oidp = zp->z_id;
3276 3270          krwlock_t       *rwlp = &szp->z_parent_lock;
3277 3271          krw_t           rw = RW_WRITER;
3278 3272  
3279 3273          /*
3280 3274           * First pass write-locks szp and compares to zp->z_id.
3281 3275           * Later passes read-lock zp and compare to zp->z_parent.
3282 3276           */
3283 3277          do {
3284 3278                  if (!rw_tryenter(rwlp, rw)) {
3285 3279                          /*
3286 3280                           * Another thread is renaming in this path.
3287 3281                           * Note that if we are a WRITER, we don't have any
3288 3282                           * parent_locks held yet.
3289 3283                           */
3290 3284                          if (rw == RW_READER && zp->z_id > szp->z_id) {
3291 3285                                  /*
3292 3286                                   * Drop our locks and restart
3293 3287                                   */
3294 3288                                  zfs_rename_unlock(&zl);
3295 3289                                  *zlpp = NULL;
3296 3290                                  zp = tdzp;
3297 3291                                  oidp = zp->z_id;
3298 3292                                  rwlp = &szp->z_parent_lock;
3299 3293                                  rw = RW_WRITER;
3300 3294                                  continue;
3301 3295                          } else {
3302 3296                                  /*
3303 3297                                   * Wait for other thread to drop its locks
3304 3298                                   */
3305 3299                                  rw_enter(rwlp, rw);
3306 3300                          }
3307 3301                  }
3308 3302  
3309 3303                  zl = kmem_alloc(sizeof (*zl), KM_SLEEP);
3310 3304                  zl->zl_rwlock = rwlp;
3311 3305                  zl->zl_znode = NULL;
3312 3306                  zl->zl_next = *zlpp;
3313 3307                  *zlpp = zl;
3314 3308  
3315 3309                  if (oidp == szp->z_id)          /* We're a descendant of szp */
3316 3310                          return (SET_ERROR(EINVAL));
3317 3311  
3318 3312                  if (oidp == rootid)             /* We've hit the top */
3319 3313                          return (0);
3320 3314  
3321 3315                  if (rw == RW_READER) {          /* i.e. not the first pass */
3322 3316                          int error = zfs_zget(zp->z_zfsvfs, oidp, &zp);
3323 3317                          if (error)
3324 3318                                  return (error);
3325 3319                          zl->zl_znode = zp;
3326 3320                  }
3327 3321                  (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs),
3328 3322                      &oidp, sizeof (oidp));
3329 3323                  rwlp = &zp->z_parent_lock;
3330 3324                  rw = RW_READER;
3331 3325  
3332 3326          } while (zp->z_id != sdzp->z_id);
3333 3327  
3334 3328          return (0);
3335 3329  }
3336 3330  
3337 3331  /*
3338 3332   * Move an entry from the provided source directory to the target

↓ open down ↓

114 lines elided

↑ open up ↑

3339 3333   * directory.  Change the entry name as indicated.
3340 3334   *
3341 3335   *      IN:     sdvp    - Source directory containing the "old entry".
3342 3336   *              snm     - Old entry name.
3343 3337   *              tdvp    - Target directory to contain the "new entry".
3344 3338   *              tnm     - New entry name.
3345 3339   *              cr      - credentials of caller.
3346 3340   *              ct      - caller context
3347 3341   *              flags   - case flags
3348 3342   *
3349      - *      RETURN: 0 if success
3350      - *              error code if failure
     3343 + *      RETURN: 0 on success, error code on failure.
3351 3344   *
3352 3345   * Timestamps:
3353 3346   *      sdvp,tdvp - ctime|mtime updated
3354 3347   */
3355 3348  /*ARGSUSED*/
3356 3349  static int
3357 3350  zfs_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, cred_t *cr,
3358 3351      caller_context_t *ct, int flags)
3359 3352  {
3360 3353          znode_t         *tdzp, *szp, *tzp;

3361 3354          znode_t         *sdzp = VTOZ(sdvp);
3362 3355          zfsvfs_t        *zfsvfs = sdzp->z_zfsvfs;
3363 3356          zilog_t         *zilog;
3364 3357          vnode_t         *realvp;
3365 3358          zfs_dirlock_t   *sdl, *tdl;
3366 3359          dmu_tx_t        *tx;
3367 3360          zfs_zlock_t     *zl;
3368 3361          int             cmp, serr, terr;
3369 3362          int             error = 0;
3370 3363          int             zflg = 0;
3371 3364  
3372 3365          ZFS_ENTER(zfsvfs);
3373 3366          ZFS_VERIFY_ZP(sdzp);
3374 3367          zilog = zfsvfs->z_log;
3375 3368  
3376 3369          /*
3377 3370           * Make sure we have the real vp for the target directory.
3378 3371           */
3379 3372          if (VOP_REALVP(tdvp, &realvp, ct) == 0)
3380 3373                  tdvp = realvp;
3381 3374  
3382 3375          if (tdvp->v_vfsp != sdvp->v_vfsp || zfsctl_is_node(tdvp)) {
3383 3376                  ZFS_EXIT(zfsvfs);
3384 3377                  return (SET_ERROR(EXDEV));
3385 3378          }
3386 3379  
3387 3380          tdzp = VTOZ(tdvp);
3388 3381          ZFS_VERIFY_ZP(tdzp);
3389 3382          if (zfsvfs->z_utf8 && u8_validate(tnm,
3390 3383              strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3391 3384                  ZFS_EXIT(zfsvfs);
3392 3385                  return (SET_ERROR(EILSEQ));
3393 3386          }
3394 3387  
3395 3388          if (flags & FIGNORECASE)
3396 3389                  zflg |= ZCILOOK;
3397 3390  
3398 3391  top:
3399 3392          szp = NULL;
3400 3393          tzp = NULL;
3401 3394          zl = NULL;
3402 3395  
3403 3396          /*
3404 3397           * This is to prevent the creation of links into attribute space
3405 3398           * by renaming a linked file into/outof an attribute directory.
3406 3399           * See the comment in zfs_link() for why this is considered bad.
3407 3400           */
3408 3401          if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
3409 3402                  ZFS_EXIT(zfsvfs);
3410 3403                  return (SET_ERROR(EINVAL));
3411 3404          }
3412 3405  
3413 3406          /*
3414 3407           * Lock source and target directory entries.  To prevent deadlock,
3415 3408           * a lock ordering must be defined.  We lock the directory with
3416 3409           * the smallest object id first, or if it's a tie, the one with
3417 3410           * the lexically first name.
3418 3411           */
3419 3412          if (sdzp->z_id < tdzp->z_id) {
3420 3413                  cmp = -1;
3421 3414          } else if (sdzp->z_id > tdzp->z_id) {
3422 3415                  cmp = 1;
3423 3416          } else {
3424 3417                  /*
3425 3418                   * First compare the two name arguments without
3426 3419                   * considering any case folding.
3427 3420                   */
3428 3421                  int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER);
3429 3422  
3430 3423                  cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error);
3431 3424                  ASSERT(error == 0 || !zfsvfs->z_utf8);
3432 3425                  if (cmp == 0) {
3433 3426                          /*
3434 3427                           * POSIX: "If the old argument and the new argument
3435 3428                           * both refer to links to the same existing file,
3436 3429                           * the rename() function shall return successfully
3437 3430                           * and perform no other action."
3438 3431                           */
3439 3432                          ZFS_EXIT(zfsvfs);
3440 3433                          return (0);
3441 3434                  }
3442 3435                  /*
3443 3436                   * If the file system is case-folding, then we may
3444 3437                   * have some more checking to do.  A case-folding file
3445 3438                   * system is either supporting mixed case sensitivity
3446 3439                   * access or is completely case-insensitive.  Note
3447 3440                   * that the file system is always case preserving.
3448 3441                   *
3449 3442                   * In mixed sensitivity mode case sensitive behavior
3450 3443                   * is the default.  FIGNORECASE must be used to
3451 3444                   * explicitly request case insensitive behavior.
3452 3445                   *
3453 3446                   * If the source and target names provided differ only
3454 3447                   * by case (e.g., a request to rename 'tim' to 'Tim'),
3455 3448                   * we will treat this as a special case in the
3456 3449                   * case-insensitive mode: as long as the source name
3457 3450                   * is an exact match, we will allow this to proceed as
3458 3451                   * a name-change request.
3459 3452                   */
3460 3453                  if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
3461 3454                      (zfsvfs->z_case == ZFS_CASE_MIXED &&
3462 3455                      flags & FIGNORECASE)) &&
3463 3456                      u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST,
3464 3457                      &error) == 0) {
3465 3458                          /*
3466 3459                           * case preserving rename request, require exact
3467 3460                           * name matches
3468 3461                           */
3469 3462                          zflg |= ZCIEXACT;
3470 3463                          zflg &= ~ZCILOOK;
3471 3464                  }
3472 3465          }
3473 3466  
3474 3467          /*
3475 3468           * If the source and destination directories are the same, we should
3476 3469           * grab the z_name_lock of that directory only once.
3477 3470           */
3478 3471          if (sdzp == tdzp) {
3479 3472                  zflg |= ZHAVELOCK;
3480 3473                  rw_enter(&sdzp->z_name_lock, RW_READER);
3481 3474          }
3482 3475  
3483 3476          if (cmp < 0) {
3484 3477                  serr = zfs_dirent_lock(&sdl, sdzp, snm, &szp,
3485 3478                      ZEXISTS | zflg, NULL, NULL);
3486 3479                  terr = zfs_dirent_lock(&tdl,
3487 3480                      tdzp, tnm, &tzp, ZRENAMING | zflg, NULL, NULL);
3488 3481          } else {
3489 3482                  terr = zfs_dirent_lock(&tdl,
3490 3483                      tdzp, tnm, &tzp, zflg, NULL, NULL);
3491 3484                  serr = zfs_dirent_lock(&sdl,
3492 3485                      sdzp, snm, &szp, ZEXISTS | ZRENAMING | zflg,
3493 3486                      NULL, NULL);
3494 3487          }
3495 3488  
3496 3489          if (serr) {
3497 3490                  /*
3498 3491                   * Source entry invalid or not there.
3499 3492                   */
3500 3493                  if (!terr) {
3501 3494                          zfs_dirent_unlock(tdl);
3502 3495                          if (tzp)
3503 3496                                  VN_RELE(ZTOV(tzp));
3504 3497                  }
3505 3498  
3506 3499                  if (sdzp == tdzp)
3507 3500                          rw_exit(&sdzp->z_name_lock);
3508 3501  
3509 3502                  if (strcmp(snm, "..") == 0)
3510 3503                          serr = SET_ERROR(EINVAL);
3511 3504                  ZFS_EXIT(zfsvfs);
3512 3505                  return (serr);
3513 3506          }
3514 3507          if (terr) {
3515 3508                  zfs_dirent_unlock(sdl);
3516 3509                  VN_RELE(ZTOV(szp));
3517 3510  
3518 3511                  if (sdzp == tdzp)
3519 3512                          rw_exit(&sdzp->z_name_lock);
3520 3513  
3521 3514                  if (strcmp(tnm, "..") == 0)
3522 3515                          terr = SET_ERROR(EINVAL);
3523 3516                  ZFS_EXIT(zfsvfs);
3524 3517                  return (terr);
3525 3518          }
3526 3519  
3527 3520          /*
3528 3521           * Must have write access at the source to remove the old entry
3529 3522           * and write access at the target to create the new entry.
3530 3523           * Note that if target and source are the same, this can be
3531 3524           * done in a single check.
3532 3525           */
3533 3526  
3534 3527          if (error = zfs_zaccess_rename(sdzp, szp, tdzp, tzp, cr))
3535 3528                  goto out;
3536 3529  
3537 3530          if (ZTOV(szp)->v_type == VDIR) {
3538 3531                  /*
3539 3532                   * Check to make sure rename is valid.
3540 3533                   * Can't do a move like this: /usr/a/b to /usr/a/b/c/d
3541 3534                   */
3542 3535                  if (error = zfs_rename_lock(szp, tdzp, sdzp, &zl))
3543 3536                          goto out;
3544 3537          }
3545 3538  
3546 3539          /*
3547 3540           * Does target exist?
3548 3541           */
3549 3542          if (tzp) {
3550 3543                  /*
3551 3544                   * Source and target must be the same type.
3552 3545                   */
3553 3546                  if (ZTOV(szp)->v_type == VDIR) {
3554 3547                          if (ZTOV(tzp)->v_type != VDIR) {
3555 3548                                  error = SET_ERROR(ENOTDIR);
3556 3549                                  goto out;
3557 3550                          }
3558 3551                  } else {
3559 3552                          if (ZTOV(tzp)->v_type == VDIR) {
3560 3553                                  error = SET_ERROR(EISDIR);
3561 3554                                  goto out;
3562 3555                          }
3563 3556                  }
3564 3557                  /*
3565 3558                   * POSIX dictates that when the source and target
3566 3559                   * entries refer to the same file object, rename
3567 3560                   * must do nothing and exit without error.
3568 3561                   */
3569 3562                  if (szp->z_id == tzp->z_id) {
3570 3563                          error = 0;
3571 3564                          goto out;
3572 3565                  }
3573 3566          }
3574 3567  
3575 3568          vnevent_rename_src(ZTOV(szp), sdvp, snm, ct);
3576 3569          if (tzp)
3577 3570                  vnevent_rename_dest(ZTOV(tzp), tdvp, tnm, ct);
3578 3571  
3579 3572          /*
3580 3573           * notify the target directory if it is not the same
3581 3574           * as source directory.
3582 3575           */
3583 3576          if (tdvp != sdvp) {
3584 3577                  vnevent_rename_dest_dir(tdvp, ct);
3585 3578          }
3586 3579  
3587 3580          tx = dmu_tx_create(zfsvfs->z_os);
3588 3581          dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
3589 3582          dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
3590 3583          dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
3591 3584          dmu_tx_hold_zap(tx, tdzp->z_id, TRUE, tnm);
3592 3585          if (sdzp != tdzp) {
3593 3586                  dmu_tx_hold_sa(tx, tdzp->z_sa_hdl, B_FALSE);
3594 3587                  zfs_sa_upgrade_txholds(tx, tdzp);
3595 3588          }
3596 3589          if (tzp) {
3597 3590                  dmu_tx_hold_sa(tx, tzp->z_sa_hdl, B_FALSE);
3598 3591                  zfs_sa_upgrade_txholds(tx, tzp);
3599 3592          }
3600 3593  
3601 3594          zfs_sa_upgrade_txholds(tx, szp);
3602 3595          dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
3603 3596          error = dmu_tx_assign(tx, TXG_NOWAIT);
3604 3597          if (error) {
3605 3598                  if (zl != NULL)
3606 3599                          zfs_rename_unlock(&zl);
3607 3600                  zfs_dirent_unlock(sdl);
3608 3601                  zfs_dirent_unlock(tdl);
3609 3602  
3610 3603                  if (sdzp == tdzp)
3611 3604                          rw_exit(&sdzp->z_name_lock);
3612 3605  
3613 3606                  VN_RELE(ZTOV(szp));
3614 3607                  if (tzp)
3615 3608                          VN_RELE(ZTOV(tzp));
3616 3609                  if (error == ERESTART) {
3617 3610                          dmu_tx_wait(tx);
3618 3611                          dmu_tx_abort(tx);
3619 3612                          goto top;
3620 3613                  }
3621 3614                  dmu_tx_abort(tx);
3622 3615                  ZFS_EXIT(zfsvfs);
3623 3616                  return (error);
3624 3617          }
3625 3618  
3626 3619          if (tzp)        /* Attempt to remove the existing target */
3627 3620                  error = zfs_link_destroy(tdl, tzp, tx, zflg, NULL);
3628 3621  
3629 3622          if (error == 0) {
3630 3623                  error = zfs_link_create(tdl, szp, tx, ZRENAMING);
3631 3624                  if (error == 0) {
3632 3625                          szp->z_pflags |= ZFS_AV_MODIFIED;
3633 3626  
3634 3627                          error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
3635 3628                              (void *)&szp->z_pflags, sizeof (uint64_t), tx);
3636 3629                          ASSERT0(error);
3637 3630  
3638 3631                          error = zfs_link_destroy(sdl, szp, tx, ZRENAMING, NULL);
3639 3632                          if (error == 0) {
3640 3633                                  zfs_log_rename(zilog, tx, TX_RENAME |
3641 3634                                      (flags & FIGNORECASE ? TX_CI : 0), sdzp,
3642 3635                                      sdl->dl_name, tdzp, tdl->dl_name, szp);
3643 3636  
3644 3637                                  /*
3645 3638                                   * Update path information for the target vnode
3646 3639                                   */
3647 3640                                  vn_renamepath(tdvp, ZTOV(szp), tnm,
3648 3641                                      strlen(tnm));
3649 3642                          } else {
3650 3643                                  /*
3651 3644                                   * At this point, we have successfully created
3652 3645                                   * the target name, but have failed to remove
3653 3646                                   * the source name.  Since the create was done
3654 3647                                   * with the ZRENAMING flag, there are
3655 3648                                   * complications; for one, the link count is
3656 3649                                   * wrong.  The easiest way to deal with this
3657 3650                                   * is to remove the newly created target, and
3658 3651                                   * return the original error.  This must
3659 3652                                   * succeed; fortunately, it is very unlikely to
3660 3653                                   * fail, since we just created it.
3661 3654                                   */
3662 3655                                  VERIFY3U(zfs_link_destroy(tdl, szp, tx,
3663 3656                                      ZRENAMING, NULL), ==, 0);
3664 3657                          }
3665 3658                  }
3666 3659          }
3667 3660  
3668 3661          dmu_tx_commit(tx);
3669 3662  out:
3670 3663          if (zl != NULL)
3671 3664                  zfs_rename_unlock(&zl);
3672 3665  
3673 3666          zfs_dirent_unlock(sdl);
3674 3667          zfs_dirent_unlock(tdl);
3675 3668  
3676 3669          if (sdzp == tdzp)
3677 3670                  rw_exit(&sdzp->z_name_lock);
3678 3671  
3679 3672  
3680 3673          VN_RELE(ZTOV(szp));
3681 3674          if (tzp)
3682 3675                  VN_RELE(ZTOV(tzp));
3683 3676  
3684 3677          if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3685 3678                  zil_commit(zilog, 0);
3686 3679

↓ open down ↓

326 lines elided

↑ open up ↑

3687 3680          ZFS_EXIT(zfsvfs);
3688 3681          return (error);
3689 3682  }
3690 3683  
3691 3684  /*
3692 3685   * Insert the indicated symbolic reference entry into the directory.
3693 3686   *
3694 3687   *      IN:     dvp     - Directory to contain new symbolic link.
3695 3688   *              link    - Name for new symlink entry.
3696 3689   *              vap     - Attributes of new entry.
3697      - *              target  - Target path of new symlink.
3698 3690   *              cr      - credentials of caller.
3699 3691   *              ct      - caller context
3700 3692   *              flags   - case flags
3701 3693   *
3702      - *      RETURN: 0 if success
3703      - *              error code if failure
     3694 + *      RETURN: 0 on success, error code on failure.
3704 3695   *
3705 3696   * Timestamps:
3706 3697   *      dvp - ctime|mtime updated
3707 3698   */
3708 3699  /*ARGSUSED*/
3709 3700  static int
3710 3701  zfs_symlink(vnode_t *dvp, char *name, vattr_t *vap, char *link, cred_t *cr,
3711 3702      caller_context_t *ct, int flags)
3712 3703  {
3713 3704          znode_t         *zp, *dzp = VTOZ(dvp);

3714 3705          zfs_dirlock_t   *dl;
3715 3706          dmu_tx_t        *tx;
3716 3707          zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
3717 3708          zilog_t         *zilog;
3718 3709          uint64_t        len = strlen(link);
3719 3710          int             error;
3720 3711          int             zflg = ZNEW;
3721 3712          zfs_acl_ids_t   acl_ids;
3722 3713          boolean_t       fuid_dirtied;
3723 3714          uint64_t        txtype = TX_SYMLINK;
3724 3715  
3725 3716          ASSERT(vap->va_type == VLNK);
3726 3717  
3727 3718          ZFS_ENTER(zfsvfs);
3728 3719          ZFS_VERIFY_ZP(dzp);
3729 3720          zilog = zfsvfs->z_log;
3730 3721  
3731 3722          if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
3732 3723              NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3733 3724                  ZFS_EXIT(zfsvfs);
3734 3725                  return (SET_ERROR(EILSEQ));
3735 3726          }
3736 3727          if (flags & FIGNORECASE)
3737 3728                  zflg |= ZCILOOK;
3738 3729  
3739 3730          if (len > MAXPATHLEN) {
3740 3731                  ZFS_EXIT(zfsvfs);
3741 3732                  return (SET_ERROR(ENAMETOOLONG));
3742 3733          }
3743 3734  
3744 3735          if ((error = zfs_acl_ids_create(dzp, 0,
3745 3736              vap, cr, NULL, &acl_ids)) != 0) {
3746 3737                  ZFS_EXIT(zfsvfs);
3747 3738                  return (error);
3748 3739          }
3749 3740  top:
3750 3741          /*
3751 3742           * Attempt to lock directory; fail if entry already exists.
3752 3743           */
3753 3744          error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL);
3754 3745          if (error) {
3755 3746                  zfs_acl_ids_free(&acl_ids);
3756 3747                  ZFS_EXIT(zfsvfs);
3757 3748                  return (error);
3758 3749          }
3759 3750  
3760 3751          if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
3761 3752                  zfs_acl_ids_free(&acl_ids);
3762 3753                  zfs_dirent_unlock(dl);
3763 3754                  ZFS_EXIT(zfsvfs);
3764 3755                  return (error);
3765 3756          }
3766 3757  
3767 3758          if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
3768 3759                  zfs_acl_ids_free(&acl_ids);
3769 3760                  zfs_dirent_unlock(dl);
3770 3761                  ZFS_EXIT(zfsvfs);
3771 3762                  return (SET_ERROR(EDQUOT));
3772 3763          }
3773 3764          tx = dmu_tx_create(zfsvfs->z_os);
3774 3765          fuid_dirtied = zfsvfs->z_fuid_dirty;
3775 3766          dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
3776 3767          dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
3777 3768          dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
3778 3769              ZFS_SA_BASE_ATTR_SIZE + len);
3779 3770          dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
3780 3771          if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
3781 3772                  dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
3782 3773                      acl_ids.z_aclp->z_acl_bytes);
3783 3774          }
3784 3775          if (fuid_dirtied)
3785 3776                  zfs_fuid_txhold(zfsvfs, tx);
3786 3777          error = dmu_tx_assign(tx, TXG_NOWAIT);
3787 3778          if (error) {
3788 3779                  zfs_dirent_unlock(dl);
3789 3780                  if (error == ERESTART) {
3790 3781                          dmu_tx_wait(tx);
3791 3782                          dmu_tx_abort(tx);
3792 3783                          goto top;
3793 3784                  }
3794 3785                  zfs_acl_ids_free(&acl_ids);
3795 3786                  dmu_tx_abort(tx);
3796 3787                  ZFS_EXIT(zfsvfs);
3797 3788                  return (error);
3798 3789          }
3799 3790  
3800 3791          /*
3801 3792           * Create a new object for the symlink.
3802 3793           * for version 4 ZPL datsets the symlink will be an SA attribute
3803 3794           */
3804 3795          zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
3805 3796  
3806 3797          if (fuid_dirtied)
3807 3798                  zfs_fuid_sync(zfsvfs, tx);
3808 3799  
3809 3800          mutex_enter(&zp->z_lock);
3810 3801          if (zp->z_is_sa)
3811 3802                  error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
3812 3803                      link, len, tx);
3813 3804          else
3814 3805                  zfs_sa_symlink(zp, link, len, tx);
3815 3806          mutex_exit(&zp->z_lock);
3816 3807  
3817 3808          zp->z_size = len;
3818 3809          (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
3819 3810              &zp->z_size, sizeof (zp->z_size), tx);
3820 3811          /*
3821 3812           * Insert the new object into the directory.
3822 3813           */
3823 3814          (void) zfs_link_create(dl, zp, tx, ZNEW);
3824 3815  
3825 3816          if (flags & FIGNORECASE)
3826 3817                  txtype |= TX_CI;
3827 3818          zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
3828 3819  
3829 3820          zfs_acl_ids_free(&acl_ids);
3830 3821  
3831 3822          dmu_tx_commit(tx);
3832 3823  
3833 3824          zfs_dirent_unlock(dl);
3834 3825  
3835 3826          VN_RELE(ZTOV(zp));
3836 3827  
3837 3828          if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
3838 3829                  zil_commit(zilog, 0);

↓ open down ↓

125 lines elided

↑ open up ↑

3839 3830  
3840 3831          ZFS_EXIT(zfsvfs);
3841 3832          return (error);
3842 3833  }
3843 3834  
3844 3835  /*
3845 3836   * Return, in the buffer contained in the provided uio structure,
3846 3837   * the symbolic path referred to by vp.
3847 3838   *
3848 3839   *      IN:     vp      - vnode of symbolic link.
3849      - *              uoip    - structure to contain the link path.
     3840 + *              uio     - structure to contain the link path.
3850 3841   *              cr      - credentials of caller.
3851 3842   *              ct      - caller context
3852 3843   *
3853      - *      OUT:    uio     - structure to contain the link path.
     3844 + *      OUT:    uio     - structure containing the link path.
3854 3845   *
3855      - *      RETURN: 0 if success
3856      - *              error code if failure
     3846 + *      RETURN: 0 on success, error code on failure.
3857 3847   *
3858 3848   * Timestamps:
3859 3849   *      vp - atime updated
3860 3850   */
3861 3851  /* ARGSUSED */
3862 3852  static int
3863 3853  zfs_readlink(vnode_t *vp, uio_t *uio, cred_t *cr, caller_context_t *ct)
3864 3854  {
3865 3855          znode_t         *zp = VTOZ(vp);
3866 3856          zfsvfs_t        *zfsvfs = zp->z_zfsvfs;

3867 3857          int             error;
3868 3858  
3869 3859          ZFS_ENTER(zfsvfs);
3870 3860          ZFS_VERIFY_ZP(zp);
3871 3861  
3872 3862          mutex_enter(&zp->z_lock);
3873 3863          if (zp->z_is_sa)
3874 3864                  error = sa_lookup_uio(zp->z_sa_hdl,
3875 3865                      SA_ZPL_SYMLINK(zfsvfs), uio);
3876 3866          else
3877 3867                  error = zfs_sa_readlink(zp, uio);
3878 3868          mutex_exit(&zp->z_lock);
3879 3869  
3880 3870          ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
3881 3871  
3882 3872          ZFS_EXIT(zfsvfs);
3883 3873          return (error);
3884 3874  }

↓ open down ↓

18 lines elided

↑ open up ↑

3885 3875  
3886 3876  /*
3887 3877   * Insert a new entry into directory tdvp referencing svp.
3888 3878   *
3889 3879   *      IN:     tdvp    - Directory to contain new entry.
3890 3880   *              svp     - vnode of new entry.
3891 3881   *              name    - name of new entry.
3892 3882   *              cr      - credentials of caller.
3893 3883   *              ct      - caller context
3894 3884   *
3895      - *      RETURN: 0 if success
3896      - *              error code if failure
     3885 + *      RETURN: 0 on success, error code on failure.
3897 3886   *
3898 3887   * Timestamps:
3899 3888   *      tdvp - ctime|mtime updated
3900 3889   *       svp - ctime updated
3901 3890   */
3902 3891  /* ARGSUSED */
3903 3892  static int
3904 3893  zfs_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
3905 3894      caller_context_t *ct, int flags)
3906 3895  {

3907 3896          znode_t         *dzp = VTOZ(tdvp);
3908 3897          znode_t         *tzp, *szp;
3909 3898          zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
3910 3899          zilog_t         *zilog;
3911 3900          zfs_dirlock_t   *dl;
3912 3901          dmu_tx_t        *tx;
3913 3902          vnode_t         *realvp;
3914 3903          int             error;
3915 3904          int             zf = ZNEW;
3916 3905          uint64_t        parent;
3917 3906          uid_t           owner;
3918 3907  
3919 3908          ASSERT(tdvp->v_type == VDIR);
3920 3909  
3921 3910          ZFS_ENTER(zfsvfs);
3922 3911          ZFS_VERIFY_ZP(dzp);
3923 3912          zilog = zfsvfs->z_log;
3924 3913  
3925 3914          if (VOP_REALVP(svp, &realvp, ct) == 0)
3926 3915                  svp = realvp;
3927 3916  
3928 3917          /*
3929 3918           * POSIX dictates that we return EPERM here.
3930 3919           * Better choices include ENOTSUP or EISDIR.
3931 3920           */
3932 3921          if (svp->v_type == VDIR) {
3933 3922                  ZFS_EXIT(zfsvfs);
3934 3923                  return (SET_ERROR(EPERM));
3935 3924          }
3936 3925  
3937 3926          if (svp->v_vfsp != tdvp->v_vfsp || zfsctl_is_node(svp)) {
3938 3927                  ZFS_EXIT(zfsvfs);
3939 3928                  return (SET_ERROR(EXDEV));
3940 3929          }
3941 3930  
3942 3931          szp = VTOZ(svp);
3943 3932          ZFS_VERIFY_ZP(szp);
3944 3933  
3945 3934          /* Prevent links to .zfs/shares files */
3946 3935  
3947 3936          if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
3948 3937              &parent, sizeof (uint64_t))) != 0) {
3949 3938                  ZFS_EXIT(zfsvfs);
3950 3939                  return (error);
3951 3940          }
3952 3941          if (parent == zfsvfs->z_shares_dir) {
3953 3942                  ZFS_EXIT(zfsvfs);
3954 3943                  return (SET_ERROR(EPERM));
3955 3944          }
3956 3945  
3957 3946          if (zfsvfs->z_utf8 && u8_validate(name,
3958 3947              strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
3959 3948                  ZFS_EXIT(zfsvfs);
3960 3949                  return (SET_ERROR(EILSEQ));
3961 3950          }
3962 3951          if (flags & FIGNORECASE)
3963 3952                  zf |= ZCILOOK;
3964 3953  
3965 3954          /*
3966 3955           * We do not support links between attributes and non-attributes
3967 3956           * because of the potential security risk of creating links
3968 3957           * into "normal" file space in order to circumvent restrictions
3969 3958           * imposed in attribute space.
3970 3959           */
3971 3960          if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) {
3972 3961                  ZFS_EXIT(zfsvfs);
3973 3962                  return (SET_ERROR(EINVAL));
3974 3963          }
3975 3964  
3976 3965  
3977 3966          owner = zfs_fuid_map_id(zfsvfs, szp->z_uid, cr, ZFS_OWNER);
3978 3967          if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) {
3979 3968                  ZFS_EXIT(zfsvfs);
3980 3969                  return (SET_ERROR(EPERM));
3981 3970          }
3982 3971  
3983 3972          if (error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr)) {
3984 3973                  ZFS_EXIT(zfsvfs);
3985 3974                  return (error);
3986 3975          }
3987 3976  
3988 3977  top:
3989 3978          /*
3990 3979           * Attempt to lock directory; fail if entry already exists.
3991 3980           */
3992 3981          error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL);
3993 3982          if (error) {
3994 3983                  ZFS_EXIT(zfsvfs);
3995 3984                  return (error);
3996 3985          }
3997 3986  
3998 3987          tx = dmu_tx_create(zfsvfs->z_os);
3999 3988          dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
4000 3989          dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
4001 3990          zfs_sa_upgrade_txholds(tx, szp);
4002 3991          zfs_sa_upgrade_txholds(tx, dzp);
4003 3992          error = dmu_tx_assign(tx, TXG_NOWAIT);
4004 3993          if (error) {
4005 3994                  zfs_dirent_unlock(dl);
4006 3995                  if (error == ERESTART) {
4007 3996                          dmu_tx_wait(tx);
4008 3997                          dmu_tx_abort(tx);
4009 3998                          goto top;
4010 3999                  }
4011 4000                  dmu_tx_abort(tx);
4012 4001                  ZFS_EXIT(zfsvfs);
4013 4002                  return (error);
4014 4003          }
4015 4004  
4016 4005          error = zfs_link_create(dl, szp, tx, 0);
4017 4006  
4018 4007          if (error == 0) {
4019 4008                  uint64_t txtype = TX_LINK;
4020 4009                  if (flags & FIGNORECASE)
4021 4010                          txtype |= TX_CI;
4022 4011                  zfs_log_link(zilog, tx, txtype, dzp, szp, name);
4023 4012          }
4024 4013  
4025 4014          dmu_tx_commit(tx);
4026 4015  
4027 4016          zfs_dirent_unlock(dl);
4028 4017  
4029 4018          if (error == 0) {
4030 4019                  vnevent_link(svp, ct);
4031 4020          }
4032 4021  
4033 4022          if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
4034 4023                  zil_commit(zilog, 0);
4035 4024  
4036 4025          ZFS_EXIT(zfsvfs);
4037 4026          return (error);
4038 4027  }
4039 4028  
4040 4029  /*
4041 4030   * zfs_null_putapage() is used when the file system has been force
4042 4031   * unmounted. It just drops the pages.
4043 4032   */
4044 4033  /* ARGSUSED */
4045 4034  static int
4046 4035  zfs_null_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
4047 4036                  size_t *lenp, int flags, cred_t *cr)
4048 4037  {
4049 4038          pvn_write_done(pp, B_INVAL|B_FORCE|B_ERROR);
4050 4039          return (0);
4051 4040  }
4052 4041  
4053 4042  /*

↓ open down ↓

147 lines elided

↑ open up ↑

4054 4043   * Push a page out to disk, klustering if possible.
4055 4044   *
4056 4045   *      IN:     vp      - file to push page to.
4057 4046   *              pp      - page to push.
4058 4047   *              flags   - additional flags.
4059 4048   *              cr      - credentials of caller.
4060 4049   *
4061 4050   *      OUT:    offp    - start of range pushed.
4062 4051   *              lenp    - len of range pushed.
4063 4052   *
4064      - *      RETURN: 0 if success
4065      - *              error code if failure
     4053 + *      RETURN: 0 on success, error code on failure.
4066 4054   *
4067 4055   * NOTE: callers must have locked the page to be pushed.  On
4068 4056   * exit, the page (and all other pages in the kluster) must be
4069 4057   * unlocked.
4070 4058   */
4071 4059  /* ARGSUSED */
4072 4060  static int
4073 4061  zfs_putapage(vnode_t *vp, page_t *pp, u_offset_t *offp,
4074 4062                  size_t *lenp, int flags, cred_t *cr)
4075 4063  {

4076 4064          znode_t         *zp = VTOZ(vp);
4077 4065          zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
4078 4066          dmu_tx_t        *tx;
4079 4067          u_offset_t      off, koff;
4080 4068          size_t          len, klen;
4081 4069          int             err;
4082 4070  
4083 4071          off = pp->p_offset;
4084 4072          len = PAGESIZE;
4085 4073          /*
4086 4074           * If our blocksize is bigger than the page size, try to kluster
4087 4075           * multiple pages so that we write a full block (thus avoiding
4088 4076           * a read-modify-write).
4089 4077           */
4090 4078          if (off < zp->z_size && zp->z_blksz > PAGESIZE) {
4091 4079                  klen = P2ROUNDUP((ulong_t)zp->z_blksz, PAGESIZE);
4092 4080                  koff = ISP2(klen) ? P2ALIGN(off, (u_offset_t)klen) : 0;
4093 4081                  ASSERT(koff <= zp->z_size);
4094 4082                  if (koff + klen > zp->z_size)
4095 4083                          klen = P2ROUNDUP(zp->z_size - koff, (uint64_t)PAGESIZE);
4096 4084                  pp = pvn_write_kluster(vp, pp, &off, &len, koff, klen, flags);
4097 4085          }
4098 4086          ASSERT3U(btop(len), ==, btopr(len));
4099 4087  
4100 4088          /*
4101 4089           * Can't push pages past end-of-file.
4102 4090           */
4103 4091          if (off >= zp->z_size) {
4104 4092                  /* ignore all pages */
4105 4093                  err = 0;
4106 4094                  goto out;
4107 4095          } else if (off + len > zp->z_size) {
4108 4096                  int npages = btopr(zp->z_size - off);
4109 4097                  page_t *trunc;
4110 4098  
4111 4099                  page_list_break(&pp, &trunc, npages);
4112 4100                  /* ignore pages past end of file */
4113 4101                  if (trunc)
4114 4102                          pvn_write_done(trunc, flags);
4115 4103                  len = zp->z_size - off;
4116 4104          }
4117 4105  
4118 4106          if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
4119 4107              zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
4120 4108                  err = SET_ERROR(EDQUOT);
4121 4109                  goto out;
4122 4110          }
4123 4111  top:
4124 4112          tx = dmu_tx_create(zfsvfs->z_os);
4125 4113          dmu_tx_hold_write(tx, zp->z_id, off, len);
4126 4114  
4127 4115          dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4128 4116          zfs_sa_upgrade_txholds(tx, zp);
4129 4117          err = dmu_tx_assign(tx, TXG_NOWAIT);
4130 4118          if (err != 0) {
4131 4119                  if (err == ERESTART) {
4132 4120                          dmu_tx_wait(tx);
4133 4121                          dmu_tx_abort(tx);
4134 4122                          goto top;
4135 4123                  }
4136 4124                  dmu_tx_abort(tx);
4137 4125                  goto out;
4138 4126          }
4139 4127  
4140 4128          if (zp->z_blksz <= PAGESIZE) {
4141 4129                  caddr_t va = zfs_map_page(pp, S_READ);
4142 4130                  ASSERT3U(len, <=, PAGESIZE);
4143 4131                  dmu_write(zfsvfs->z_os, zp->z_id, off, len, va, tx);
4144 4132                  zfs_unmap_page(pp, va);
4145 4133          } else {
4146 4134                  err = dmu_write_pages(zfsvfs->z_os, zp->z_id, off, len, pp, tx);
4147 4135          }
4148 4136  
4149 4137          if (err == 0) {
4150 4138                  uint64_t mtime[2], ctime[2];
4151 4139                  sa_bulk_attr_t bulk[3];
4152 4140                  int count = 0;
4153 4141  
4154 4142                  SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
4155 4143                      &mtime, 16);
4156 4144                  SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
4157 4145                      &ctime, 16);
4158 4146                  SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
4159 4147                      &zp->z_pflags, 8);
4160 4148                  zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime,
4161 4149                      B_TRUE);
4162 4150                  zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, off, len, 0);
4163 4151          }
4164 4152          dmu_tx_commit(tx);
4165 4153  
4166 4154  out:
4167 4155          pvn_write_done(pp, (err ? B_ERROR : 0) | flags);
4168 4156          if (offp)
4169 4157                  *offp = off;
4170 4158          if (lenp)
4171 4159                  *lenp = len;
4172 4160  
4173 4161          return (err);
4174 4162  }
4175 4163  
4176 4164  /*

↓ open down ↓

101 lines elided

↑ open up ↑

4177 4165   * Copy the portion of the file indicated from pages into the file.
4178 4166   * The pages are stored in a page list attached to the files vnode.
4179 4167   *
4180 4168   *      IN:     vp      - vnode of file to push page data to.
4181 4169   *              off     - position in file to put data.
4182 4170   *              len     - amount of data to write.
4183 4171   *              flags   - flags to control the operation.
4184 4172   *              cr      - credentials of caller.
4185 4173   *              ct      - caller context.
4186 4174   *
4187      - *      RETURN: 0 if success
4188      - *              error code if failure
     4175 + *      RETURN: 0 on success, error code on failure.
4189 4176   *
4190 4177   * Timestamps:
4191 4178   *      vp - ctime|mtime updated
4192 4179   */
4193 4180  /*ARGSUSED*/
4194 4181  static int
4195 4182  zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
4196 4183      caller_context_t *ct)
4197 4184  {
4198 4185          znode_t         *zp = VTOZ(vp);

4199 4186          zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
4200 4187          page_t          *pp;
4201 4188          size_t          io_len;
4202 4189          u_offset_t      io_off;
4203 4190          uint_t          blksz;
4204 4191          rl_t            *rl;
4205 4192          int             error = 0;
4206 4193  
4207 4194          ZFS_ENTER(zfsvfs);
4208 4195          ZFS_VERIFY_ZP(zp);
4209 4196  
4210 4197          /*
4211 4198           * There's nothing to do if no data is cached.
4212 4199           */
4213 4200          if (!vn_has_cached_data(vp)) {
4214 4201                  ZFS_EXIT(zfsvfs);
4215 4202                  return (0);
4216 4203          }
4217 4204  
4218 4205          /*
4219 4206           * Align this request to the file block size in case we kluster.
4220 4207           * XXX - this can result in pretty aggresive locking, which can
4221 4208           * impact simultanious read/write access.  One option might be
4222 4209           * to break up long requests (len == 0) into block-by-block
4223 4210           * operations to get narrower locking.
4224 4211           */
4225 4212          blksz = zp->z_blksz;
4226 4213          if (ISP2(blksz))
4227 4214                  io_off = P2ALIGN_TYPED(off, blksz, u_offset_t);
4228 4215          else
4229 4216                  io_off = 0;
4230 4217          if (len > 0 && ISP2(blksz))
4231 4218                  io_len = P2ROUNDUP_TYPED(len + (off - io_off), blksz, size_t);
4232 4219          else
4233 4220                  io_len = 0;
4234 4221  
4235 4222          if (io_len == 0) {
4236 4223                  /*
4237 4224                   * Search the entire vp list for pages >= io_off.
4238 4225                   */
4239 4226                  rl = zfs_range_lock(zp, io_off, UINT64_MAX, RL_WRITER);
4240 4227                  error = pvn_vplist_dirty(vp, io_off, zfs_putapage, flags, cr);
4241 4228                  goto out;
4242 4229          }
4243 4230          rl = zfs_range_lock(zp, io_off, io_len, RL_WRITER);
4244 4231  
4245 4232          if (off > zp->z_size) {
4246 4233                  /* past end of file */
4247 4234                  zfs_range_unlock(rl);
4248 4235                  ZFS_EXIT(zfsvfs);
4249 4236                  return (0);
4250 4237          }
4251 4238  
4252 4239          len = MIN(io_len, P2ROUNDUP(zp->z_size, PAGESIZE) - io_off);
4253 4240  
4254 4241          for (off = io_off; io_off < off + len; io_off += io_len) {
4255 4242                  if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
4256 4243                          pp = page_lookup(vp, io_off,
4257 4244                              (flags & (B_INVAL | B_FREE)) ? SE_EXCL : SE_SHARED);
4258 4245                  } else {
4259 4246                          pp = page_lookup_nowait(vp, io_off,
4260 4247                              (flags & B_FREE) ? SE_EXCL : SE_SHARED);
4261 4248                  }
4262 4249  
4263 4250                  if (pp != NULL && pvn_getdirty(pp, flags)) {
4264 4251                          int err;
4265 4252  
4266 4253                          /*
4267 4254                           * Found a dirty page to push
4268 4255                           */
4269 4256                          err = zfs_putapage(vp, pp, &io_off, &io_len, flags, cr);
4270 4257                          if (err)
4271 4258                                  error = err;
4272 4259                  } else {
4273 4260                          io_len = PAGESIZE;
4274 4261                  }
4275 4262          }
4276 4263  out:
4277 4264          zfs_range_unlock(rl);
4278 4265          if ((flags & B_ASYNC) == 0 || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
4279 4266                  zil_commit(zfsvfs->z_log, zp->z_id);
4280 4267          ZFS_EXIT(zfsvfs);
4281 4268          return (error);
4282 4269  }
4283 4270  
4284 4271  /*ARGSUSED*/
4285 4272  void
4286 4273  zfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
4287 4274  {
4288 4275          znode_t *zp = VTOZ(vp);
4289 4276          zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4290 4277          int error;
4291 4278  
4292 4279          rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
4293 4280          if (zp->z_sa_hdl == NULL) {
4294 4281                  /*
4295 4282                   * The fs has been unmounted, or we did a
4296 4283                   * suspend/resume and this file no longer exists.
4297 4284                   */
4298 4285                  if (vn_has_cached_data(vp)) {
4299 4286                          (void) pvn_vplist_dirty(vp, 0, zfs_null_putapage,
4300 4287                              B_INVAL, cr);
4301 4288                  }
4302 4289  
4303 4290                  mutex_enter(&zp->z_lock);
4304 4291                  mutex_enter(&vp->v_lock);
4305 4292                  ASSERT(vp->v_count == 1);
4306 4293                  vp->v_count = 0;
4307 4294                  mutex_exit(&vp->v_lock);
4308 4295                  mutex_exit(&zp->z_lock);
4309 4296                  rw_exit(&zfsvfs->z_teardown_inactive_lock);
4310 4297                  zfs_znode_free(zp);
4311 4298                  return;
4312 4299          }
4313 4300  
4314 4301          /*
4315 4302           * Attempt to push any data in the page cache.  If this fails
4316 4303           * we will get kicked out later in zfs_zinactive().
4317 4304           */
4318 4305          if (vn_has_cached_data(vp)) {
4319 4306                  (void) pvn_vplist_dirty(vp, 0, zfs_putapage, B_INVAL|B_ASYNC,
4320 4307                      cr);
4321 4308          }
4322 4309  
4323 4310          if (zp->z_atime_dirty && zp->z_unlinked == 0) {
4324 4311                  dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
4325 4312  
4326 4313                  dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
4327 4314                  zfs_sa_upgrade_txholds(tx, zp);
4328 4315                  error = dmu_tx_assign(tx, TXG_WAIT);
4329 4316                  if (error) {
4330 4317                          dmu_tx_abort(tx);
4331 4318                  } else {
4332 4319                          mutex_enter(&zp->z_lock);
4333 4320                          (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
4334 4321                              (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
4335 4322                          zp->z_atime_dirty = 0;
4336 4323                          mutex_exit(&zp->z_lock);
4337 4324                          dmu_tx_commit(tx);
4338 4325                  }
4339 4326          }
4340 4327  
4341 4328          zfs_zinactive(zp);
4342 4329          rw_exit(&zfsvfs->z_teardown_inactive_lock);

↓ open down ↓

144 lines elided

↑ open up ↑

4343 4330  }
4344 4331  
4345 4332  /*
4346 4333   * Bounds-check the seek operation.
4347 4334   *
4348 4335   *      IN:     vp      - vnode seeking within
4349 4336   *              ooff    - old file offset
4350 4337   *              noffp   - pointer to new file offset
4351 4338   *              ct      - caller context
4352 4339   *
4353      - *      RETURN: 0 if success
4354      - *              EINVAL if new offset invalid
     4340 + *      RETURN: 0 on success, EINVAL if new offset invalid.
4355 4341   */
4356 4342  /* ARGSUSED */
4357 4343  static int
4358 4344  zfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp,
4359 4345      caller_context_t *ct)
4360 4346  {
4361 4347          if (vp->v_type == VDIR)
4362 4348                  return (0);
4363 4349          return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
4364 4350  }

4365 4351  
4366 4352  /*
4367 4353   * Pre-filter the generic locking function to trap attempts to place
4368 4354   * a mandatory lock on a memory mapped file.
4369 4355   */
4370 4356  static int
4371 4357  zfs_frlock(vnode_t *vp, int cmd, flock64_t *bfp, int flag, offset_t offset,
4372 4358      flk_callback_t *flk_cbp, cred_t *cr, caller_context_t *ct)
4373 4359  {
4374 4360          znode_t *zp = VTOZ(vp);
4375 4361          zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4376 4362  
4377 4363          ZFS_ENTER(zfsvfs);
4378 4364          ZFS_VERIFY_ZP(zp);
4379 4365  
4380 4366          /*
4381 4367           * We are following the UFS semantics with respect to mapcnt
4382 4368           * here: If we see that the file is mapped already, then we will
4383 4369           * return an error, but we don't worry about races between this
4384 4370           * function and zfs_map().
4385 4371           */
4386 4372          if (zp->z_mapcnt > 0 && MANDMODE(zp->z_mode)) {
4387 4373                  ZFS_EXIT(zfsvfs);
4388 4374                  return (SET_ERROR(EAGAIN));
4389 4375          }
4390 4376          ZFS_EXIT(zfsvfs);
4391 4377          return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
4392 4378  }
4393 4379  
4394 4380  /*
4395 4381   * If we can't find a page in the cache, we will create a new page
4396 4382   * and fill it with file data.  For efficiency, we may try to fill
4397 4383   * multiple pages at once (klustering) to fill up the supplied page
4398 4384   * list.  Note that the pages to be filled are held with an exclusive
4399 4385   * lock to prevent access by other threads while they are being filled.
4400 4386   */
4401 4387  static int
4402 4388  zfs_fillpage(vnode_t *vp, u_offset_t off, struct seg *seg,
4403 4389      caddr_t addr, page_t *pl[], size_t plsz, enum seg_rw rw)
4404 4390  {
4405 4391          znode_t *zp = VTOZ(vp);
4406 4392          page_t *pp, *cur_pp;
4407 4393          objset_t *os = zp->z_zfsvfs->z_os;
4408 4394          u_offset_t io_off, total;
4409 4395          size_t io_len;
4410 4396          int err;
4411 4397  
4412 4398          if (plsz == PAGESIZE || zp->z_blksz <= PAGESIZE) {
4413 4399                  /*
4414 4400                   * We only have a single page, don't bother klustering
4415 4401                   */
4416 4402                  io_off = off;
4417 4403                  io_len = PAGESIZE;
4418 4404                  pp = page_create_va(vp, io_off, io_len,
4419 4405                      PG_EXCL | PG_WAIT, seg, addr);
4420 4406          } else {
4421 4407                  /*
4422 4408                   * Try to find enough pages to fill the page list
4423 4409                   */
4424 4410                  pp = pvn_read_kluster(vp, off, seg, addr, &io_off,
4425 4411                      &io_len, off, plsz, 0);
4426 4412          }
4427 4413          if (pp == NULL) {
4428 4414                  /*
4429 4415                   * The page already exists, nothing to do here.
4430 4416                   */
4431 4417                  *pl = NULL;
4432 4418                  return (0);
4433 4419          }
4434 4420  
4435 4421          /*
4436 4422           * Fill the pages in the kluster.
4437 4423           */
4438 4424          cur_pp = pp;
4439 4425          for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
4440 4426                  caddr_t va;
4441 4427  
4442 4428                  ASSERT3U(io_off, ==, cur_pp->p_offset);
4443 4429                  va = zfs_map_page(cur_pp, S_WRITE);
4444 4430                  err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
4445 4431                      DMU_READ_PREFETCH);
4446 4432                  zfs_unmap_page(cur_pp, va);
4447 4433                  if (err) {
4448 4434                          /* On error, toss the entire kluster */
4449 4435                          pvn_read_done(pp, B_ERROR);
4450 4436                          /* convert checksum errors into IO errors */
4451 4437                          if (err == ECKSUM)
4452 4438                                  err = SET_ERROR(EIO);
4453 4439                          return (err);
4454 4440                  }
4455 4441                  cur_pp = cur_pp->p_next;
4456 4442          }
4457 4443  
4458 4444          /*
4459 4445           * Fill in the page list array from the kluster starting
4460 4446           * from the desired offset `off'.
4461 4447           * NOTE: the page list will always be null terminated.
4462 4448           */
4463 4449          pvn_plist_init(pp, pl, plsz, off, io_len, rw);
4464 4450          ASSERT(pl == NULL || (*pl)->p_offset == off);
4465 4451  
4466 4452          return (0);
4467 4453  }
4468 4454  
4469 4455  /*
4470 4456   * Return pointers to the pages for the file region [off, off + len]
4471 4457   * in the pl array.  If plsz is greater than len, this function may
4472 4458   * also return page pointers from after the specified region
4473 4459   * (i.e. the region [off, off + plsz]).  These additional pages are
4474 4460   * only returned if they are already in the cache, or were created as
4475 4461   * part of a klustered read.
4476 4462   *
4477 4463   *      IN:     vp      - vnode of file to get data from.
4478 4464   *              off     - position in file to get data from.
4479 4465   *              len     - amount of data to retrieve.

↓ open down ↓

115 lines elided

↑ open up ↑

4480 4466   *              plsz    - length of provided page list.
4481 4467   *              seg     - segment to obtain pages for.
4482 4468   *              addr    - virtual address of fault.
4483 4469   *              rw      - mode of created pages.
4484 4470   *              cr      - credentials of caller.
4485 4471   *              ct      - caller context.
4486 4472   *
4487 4473   *      OUT:    protp   - protection mode of created pages.
4488 4474   *              pl      - list of pages created.
4489 4475   *
4490      - *      RETURN: 0 if success
4491      - *              error code if failure
     4476 + *      RETURN: 0 on success, error code on failure.
4492 4477   *
4493 4478   * Timestamps:
4494 4479   *      vp - atime updated
4495 4480   */
4496 4481  /* ARGSUSED */
4497 4482  static int
4498 4483  zfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
4499      -        page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
4500      -        enum seg_rw rw, cred_t *cr, caller_context_t *ct)
     4484 +    page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr,
     4485 +    enum seg_rw rw, cred_t *cr, caller_context_t *ct)
4501 4486  {
4502 4487          znode_t         *zp = VTOZ(vp);
4503 4488          zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
4504 4489          page_t          **pl0 = pl;
4505 4490          int             err = 0;
4506 4491  
4507 4492          /* we do our own caching, faultahead is unnecessary */
4508 4493          if (pl == NULL)
4509 4494                  return (0);
4510 4495          else if (len > plsz)

4511 4496                  len = plsz;
4512 4497          else
4513 4498                  len = P2ROUNDUP(len, PAGESIZE);
4514 4499          ASSERT(plsz >= len);
4515 4500  
4516 4501          ZFS_ENTER(zfsvfs);
4517 4502          ZFS_VERIFY_ZP(zp);
4518 4503  
4519 4504          if (protp)
4520 4505                  *protp = PROT_ALL;
4521 4506  
4522 4507          /*
4523 4508           * Loop through the requested range [off, off + len) looking
4524 4509           * for pages.  If we don't find a page, we will need to create
4525 4510           * a new page and fill it with data from the file.
4526 4511           */
4527 4512          while (len > 0) {
4528 4513                  if (*pl = page_lookup(vp, off, SE_SHARED))
4529 4514                          *(pl+1) = NULL;
4530 4515                  else if (err = zfs_fillpage(vp, off, seg, addr, pl, plsz, rw))
4531 4516                          goto out;
4532 4517                  while (*pl) {
4533 4518                          ASSERT3U((*pl)->p_offset, ==, off);
4534 4519                          off += PAGESIZE;
4535 4520                          addr += PAGESIZE;
4536 4521                          if (len > 0) {
4537 4522                                  ASSERT3U(len, >=, PAGESIZE);
4538 4523                                  len -= PAGESIZE;
4539 4524                          }
4540 4525                          ASSERT3U(plsz, >=, PAGESIZE);
4541 4526                          plsz -= PAGESIZE;
4542 4527                          pl++;
4543 4528                  }
4544 4529          }
4545 4530  
4546 4531          /*
4547 4532           * Fill out the page array with any pages already in the cache.
4548 4533           */
4549 4534          while (plsz > 0 &&
4550 4535              (*pl++ = page_lookup_nowait(vp, off, SE_SHARED))) {
4551 4536                          off += PAGESIZE;
4552 4537                          plsz -= PAGESIZE;
4553 4538          }
4554 4539  out:
4555 4540          if (err) {
4556 4541                  /*
4557 4542                   * Release any pages we have previously locked.
4558 4543                   */
4559 4544                  while (pl > pl0)
4560 4545                          page_unlock(*--pl);
4561 4546          } else {
4562 4547                  ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
4563 4548          }
4564 4549

↓ open down ↓

54 lines elided

↑ open up ↑

4565 4550          *pl = NULL;
4566 4551  
4567 4552          ZFS_EXIT(zfsvfs);
4568 4553          return (err);
4569 4554  }
4570 4555  
4571 4556  /*
4572 4557   * Request a memory map for a section of a file.  This code interacts
4573 4558   * with common code and the VM system as follows:
4574 4559   *
4575      - *      common code calls mmap(), which ends up in smmap_common()
4576      - *
4577      - *      this calls VOP_MAP(), which takes you into (say) zfs
4578      - *
4579      - *      zfs_map() calls as_map(), passing segvn_create() as the callback
4580      - *
4581      - *      segvn_create() creates the new segment and calls VOP_ADDMAP()
4582      - *
4583      - *      zfs_addmap() updates z_mapcnt
     4560 + * - common code calls mmap(), which ends up in smmap_common()
     4561 + * - this calls VOP_MAP(), which takes you into (say) zfs
     4562 + * - zfs_map() calls as_map(), passing segvn_create() as the callback
     4563 + * - segvn_create() creates the new segment and calls VOP_ADDMAP()
     4564 + * - zfs_addmap() updates z_mapcnt
4584 4565   */
4585 4566  /*ARGSUSED*/
4586 4567  static int
4587 4568  zfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
4588 4569      size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
4589 4570      caller_context_t *ct)
4590 4571  {
4591 4572          znode_t *zp = VTOZ(vp);
4592 4573          zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4593 4574          segvn_crargs_t  vn_a;

4594 4575          int             error;
4595 4576  
4596 4577          ZFS_ENTER(zfsvfs);
4597 4578          ZFS_VERIFY_ZP(zp);
4598 4579  
4599 4580          if ((prot & PROT_WRITE) && (zp->z_pflags &
4600 4581              (ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
4601 4582                  ZFS_EXIT(zfsvfs);
4602 4583                  return (SET_ERROR(EPERM));
4603 4584          }
4604 4585  
4605 4586          if ((prot & (PROT_READ | PROT_EXEC)) &&
4606 4587              (zp->z_pflags & ZFS_AV_QUARANTINED)) {
4607 4588                  ZFS_EXIT(zfsvfs);
4608 4589                  return (SET_ERROR(EACCES));
4609 4590          }
4610 4591  
4611 4592          if (vp->v_flag & VNOMAP) {
4612 4593                  ZFS_EXIT(zfsvfs);
4613 4594                  return (SET_ERROR(ENOSYS));
4614 4595          }
4615 4596  
4616 4597          if (off < 0 || len > MAXOFFSET_T - off) {
4617 4598                  ZFS_EXIT(zfsvfs);
4618 4599                  return (SET_ERROR(ENXIO));
4619 4600          }
4620 4601  
4621 4602          if (vp->v_type != VREG) {
4622 4603                  ZFS_EXIT(zfsvfs);
4623 4604                  return (SET_ERROR(ENODEV));
4624 4605          }
4625 4606  
4626 4607          /*
4627 4608           * If file is locked, disallow mapping.
4628 4609           */
4629 4610          if (MANDMODE(zp->z_mode) && vn_has_flocks(vp)) {
4630 4611                  ZFS_EXIT(zfsvfs);
4631 4612                  return (SET_ERROR(EAGAIN));
4632 4613          }
4633 4614  
4634 4615          as_rangelock(as);
4635 4616          error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
4636 4617          if (error != 0) {
4637 4618                  as_rangeunlock(as);
4638 4619                  ZFS_EXIT(zfsvfs);
4639 4620                  return (error);
4640 4621          }
4641 4622  
4642 4623          vn_a.vp = vp;
4643 4624          vn_a.offset = (u_offset_t)off;
4644 4625          vn_a.type = flags & MAP_TYPE;
4645 4626          vn_a.prot = prot;
4646 4627          vn_a.maxprot = maxprot;
4647 4628          vn_a.cred = cr;
4648 4629          vn_a.amp = NULL;
4649 4630          vn_a.flags = flags & ~MAP_TYPE;
4650 4631          vn_a.szc = 0;
4651 4632          vn_a.lgrp_mem_policy_flags = 0;
4652 4633  
4653 4634          error = as_map(as, *addrp, len, segvn_create, &vn_a);
4654 4635  
4655 4636          as_rangeunlock(as);
4656 4637          ZFS_EXIT(zfsvfs);
4657 4638          return (error);
4658 4639  }
4659 4640  
4660 4641  /* ARGSUSED */
4661 4642  static int
4662 4643  zfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
4663 4644      size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
4664 4645      caller_context_t *ct)
4665 4646  {
4666 4647          uint64_t pages = btopr(len);
4667 4648  
4668 4649          atomic_add_64(&VTOZ(vp)->z_mapcnt, pages);
4669 4650          return (0);
4670 4651  }
4671 4652  
4672 4653  /*
4673 4654   * The reason we push dirty pages as part of zfs_delmap() is so that we get a
4674 4655   * more accurate mtime for the associated file.  Since we don't have a way of
4675 4656   * detecting when the data was actually modified, we have to resort to
4676 4657   * heuristics.  If an explicit msync() is done, then we mark the mtime when the
4677 4658   * last page is pushed.  The problem occurs when the msync() call is omitted,
4678 4659   * which by far the most common case:
4679 4660   *
4680 4661   *      open()
4681 4662   *      mmap()
4682 4663   *      <modify memory>
4683 4664   *      munmap()
4684 4665   *      close()
4685 4666   *      <time lapse>
4686 4667   *      putpage() via fsflush
4687 4668   *
4688 4669   * If we wait until fsflush to come along, we can have a modification time that
4689 4670   * is some arbitrary point in the future.  In order to prevent this in the
4690 4671   * common case, we flush pages whenever a (MAP_SHARED, PROT_WRITE) mapping is
4691 4672   * torn down.
4692 4673   */
4693 4674  /* ARGSUSED */
4694 4675  static int
4695 4676  zfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
4696 4677      size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr,
4697 4678      caller_context_t *ct)
4698 4679  {
4699 4680          uint64_t pages = btopr(len);
4700 4681  
4701 4682          ASSERT3U(VTOZ(vp)->z_mapcnt, >=, pages);
4702 4683          atomic_add_64(&VTOZ(vp)->z_mapcnt, -pages);
4703 4684  
4704 4685          if ((flags & MAP_SHARED) && (prot & PROT_WRITE) &&
4705 4686              vn_has_cached_data(vp))
4706 4687                  (void) VOP_PUTPAGE(vp, off, len, B_ASYNC, cr, ct);
4707 4688  
4708 4689          return (0);
4709 4690  }
4710 4691  
4711 4692  /*
4712 4693   * Free or allocate space in a file.  Currently, this function only
4713 4694   * supports the `F_FREESP' command.  However, this command is somewhat
4714 4695   * misnamed, as its functionality includes the ability to allocate as

↓ open down ↓

121 lines elided

↑ open up ↑

4715 4696   * well as free space.
4716 4697   *
4717 4698   *      IN:     vp      - vnode of file to free data in.
4718 4699   *              cmd     - action to take (only F_FREESP supported).
4719 4700   *              bfp     - section of file to free/alloc.
4720 4701   *              flag    - current file open mode flags.
4721 4702   *              offset  - current file offset.
4722 4703   *              cr      - credentials of caller [UNUSED].
4723 4704   *              ct      - caller context.
4724 4705   *
4725      - *      RETURN: 0 if success
4726      - *              error code if failure
     4706 + *      RETURN: 0 on success, error code on failure.
4727 4707   *
4728 4708   * Timestamps:
4729 4709   *      vp - ctime|mtime updated
4730 4710   */
4731 4711  /* ARGSUSED */
4732 4712  static int
4733 4713  zfs_space(vnode_t *vp, int cmd, flock64_t *bfp, int flag,
4734 4714      offset_t offset, cred_t *cr, caller_context_t *ct)
4735 4715  {
4736 4716          znode_t         *zp = VTOZ(vp);

4737 4717          zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
4738 4718          uint64_t        off, len;
4739 4719          int             error;
4740 4720  
4741 4721          ZFS_ENTER(zfsvfs);
4742 4722          ZFS_VERIFY_ZP(zp);
4743 4723  
4744 4724          if (cmd != F_FREESP) {
4745 4725                  ZFS_EXIT(zfsvfs);
4746 4726                  return (SET_ERROR(EINVAL));
4747 4727          }
4748 4728  
4749 4729          if (error = convoff(vp, bfp, 0, offset)) {
4750 4730                  ZFS_EXIT(zfsvfs);
4751 4731                  return (error);
4752 4732          }
4753 4733  
4754 4734          if (bfp->l_len < 0) {
4755 4735                  ZFS_EXIT(zfsvfs);
4756 4736                  return (SET_ERROR(EINVAL));
4757 4737          }
4758 4738  
4759 4739          off = bfp->l_start;
4760 4740          len = bfp->l_len; /* 0 means from off to end of file */
4761 4741  
4762 4742          error = zfs_freesp(zp, off, len, flag, TRUE);
4763 4743  
4764 4744          ZFS_EXIT(zfsvfs);
4765 4745          return (error);
4766 4746  }
4767 4747  
4768 4748  /*ARGSUSED*/
4769 4749  static int
4770 4750  zfs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
4771 4751  {
4772 4752          znode_t         *zp = VTOZ(vp);
4773 4753          zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
4774 4754          uint32_t        gen;
4775 4755          uint64_t        gen64;
4776 4756          uint64_t        object = zp->z_id;
4777 4757          zfid_short_t    *zfid;
4778 4758          int             size, i, error;
4779 4759  
4780 4760          ZFS_ENTER(zfsvfs);
4781 4761          ZFS_VERIFY_ZP(zp);
4782 4762  
4783 4763          if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
4784 4764              &gen64, sizeof (uint64_t))) != 0) {
4785 4765                  ZFS_EXIT(zfsvfs);
4786 4766                  return (error);
4787 4767          }
4788 4768  
4789 4769          gen = (uint32_t)gen64;
4790 4770  
4791 4771          size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
4792 4772          if (fidp->fid_len < size) {
4793 4773                  fidp->fid_len = size;
4794 4774                  ZFS_EXIT(zfsvfs);
4795 4775                  return (SET_ERROR(ENOSPC));
4796 4776          }
4797 4777  
4798 4778          zfid = (zfid_short_t *)fidp;
4799 4779  
4800 4780          zfid->zf_len = size;
4801 4781  
4802 4782          for (i = 0; i < sizeof (zfid->zf_object); i++)
4803 4783                  zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
4804 4784  
4805 4785          /* Must have a non-zero generation number to distinguish from .zfs */
4806 4786          if (gen == 0)
4807 4787                  gen = 1;
4808 4788          for (i = 0; i < sizeof (zfid->zf_gen); i++)
4809 4789                  zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
4810 4790  
4811 4791          if (size == LONG_FID_LEN) {
4812 4792                  uint64_t        objsetid = dmu_objset_id(zfsvfs->z_os);
4813 4793                  zfid_long_t     *zlfid;
4814 4794  
4815 4795                  zlfid = (zfid_long_t *)fidp;
4816 4796  
4817 4797                  for (i = 0; i < sizeof (zlfid->zf_setid); i++)
4818 4798                          zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
4819 4799  
4820 4800                  /* XXX - this should be the generation number for the objset */
4821 4801                  for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
4822 4802                          zlfid->zf_setgen[i] = 0;
4823 4803          }
4824 4804  
4825 4805          ZFS_EXIT(zfsvfs);
4826 4806          return (0);
4827 4807  }
4828 4808  
4829 4809  static int
4830 4810  zfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
4831 4811      caller_context_t *ct)
4832 4812  {
4833 4813          znode_t         *zp, *xzp;
4834 4814          zfsvfs_t        *zfsvfs;
4835 4815          zfs_dirlock_t   *dl;
4836 4816          int             error;
4837 4817  
4838 4818          switch (cmd) {
4839 4819          case _PC_LINK_MAX:
4840 4820                  *valp = ULONG_MAX;
4841 4821                  return (0);
4842 4822  
4843 4823          case _PC_FILESIZEBITS:
4844 4824                  *valp = 64;
4845 4825                  return (0);
4846 4826  
4847 4827          case _PC_XATTR_EXISTS:
4848 4828                  zp = VTOZ(vp);
4849 4829                  zfsvfs = zp->z_zfsvfs;
4850 4830                  ZFS_ENTER(zfsvfs);
4851 4831                  ZFS_VERIFY_ZP(zp);
4852 4832                  *valp = 0;
4853 4833                  error = zfs_dirent_lock(&dl, zp, "", &xzp,
4854 4834                      ZXATTR | ZEXISTS | ZSHARED, NULL, NULL);
4855 4835                  if (error == 0) {
4856 4836                          zfs_dirent_unlock(dl);
4857 4837                          if (!zfs_dirempty(xzp))
4858 4838                                  *valp = 1;
4859 4839                          VN_RELE(ZTOV(xzp));
4860 4840                  } else if (error == ENOENT) {
4861 4841                          /*
4862 4842                           * If there aren't extended attributes, it's the
4863 4843                           * same as having zero of them.
4864 4844                           */
4865 4845                          error = 0;
4866 4846                  }
4867 4847                  ZFS_EXIT(zfsvfs);
4868 4848                  return (error);
4869 4849  
4870 4850          case _PC_SATTR_ENABLED:
4871 4851          case _PC_SATTR_EXISTS:
4872 4852                  *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) &&
4873 4853                      (vp->v_type == VREG || vp->v_type == VDIR);
4874 4854                  return (0);
4875 4855  
4876 4856          case _PC_ACCESS_FILTERING:
4877 4857                  *valp = vfs_has_feature(vp->v_vfsp, VFSFT_ACCESS_FILTER) &&
4878 4858                      vp->v_type == VDIR;
4879 4859                  return (0);
4880 4860  
4881 4861          case _PC_ACL_ENABLED:
4882 4862                  *valp = _ACL_ACE_ENABLED;
4883 4863                  return (0);
4884 4864  
4885 4865          case _PC_MIN_HOLE_SIZE:
4886 4866                  *valp = (ulong_t)SPA_MINBLOCKSIZE;
4887 4867                  return (0);
4888 4868  
4889 4869          case _PC_TIMESTAMP_RESOLUTION:
4890 4870                  /* nanosecond timestamp resolution */
4891 4871                  *valp = 1L;
4892 4872                  return (0);
4893 4873  
4894 4874          default:
4895 4875                  return (fs_pathconf(vp, cmd, valp, cr, ct));
4896 4876          }
4897 4877  }
4898 4878  
4899 4879  /*ARGSUSED*/
4900 4880  static int
4901 4881  zfs_getsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
4902 4882      caller_context_t *ct)
4903 4883  {
4904 4884          znode_t *zp = VTOZ(vp);
4905 4885          zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4906 4886          int error;
4907 4887          boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
4908 4888  
4909 4889          ZFS_ENTER(zfsvfs);
4910 4890          ZFS_VERIFY_ZP(zp);
4911 4891          error = zfs_getacl(zp, vsecp, skipaclchk, cr);
4912 4892          ZFS_EXIT(zfsvfs);
4913 4893  
4914 4894          return (error);
4915 4895  }
4916 4896  
4917 4897  /*ARGSUSED*/
4918 4898  static int
4919 4899  zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int flag, cred_t *cr,
4920 4900      caller_context_t *ct)
4921 4901  {
4922 4902          znode_t *zp = VTOZ(vp);
4923 4903          zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4924 4904          int error;
4925 4905          boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
4926 4906          zilog_t *zilog = zfsvfs->z_log;
4927 4907  
4928 4908          ZFS_ENTER(zfsvfs);
4929 4909          ZFS_VERIFY_ZP(zp);
4930 4910

↓ open down ↓

194 lines elided

↑ open up ↑

4931 4911          error = zfs_setacl(zp, vsecp, skipaclchk, cr);
4932 4912  
4933 4913          if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
4934 4914                  zil_commit(zilog, 0);
4935 4915  
4936 4916          ZFS_EXIT(zfsvfs);
4937 4917          return (error);
4938 4918  }
4939 4919  
4940 4920  /*
4941      - * Tunable, both must be a power of 2.
4942      - *
4943      - * zcr_blksz_min: the smallest read we may consider to loan out an arcbuf
4944      - * zcr_blksz_max: if set to less than the file block size, allow loaning out of
4945      - *                an arcbuf for a partial block read
     4921 + * The smallest read we may consider to loan out an arcbuf.
     4922 + * This must be a power of 2.
4946 4923   */
4947 4924  int zcr_blksz_min = (1 << 10);  /* 1K */
     4925 +/*
     4926 + * If set to less than the file block size, allow loaning out of an
     4927 + * arcbuf for a partial block read.  This must be a power of 2.
     4928 + */
4948 4929  int zcr_blksz_max = (1 << 17);  /* 128K */
4949 4930  
4950 4931  /*ARGSUSED*/
4951 4932  static int
4952 4933  zfs_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr,
4953 4934      caller_context_t *ct)
4954 4935  {
4955 4936          znode_t *zp = VTOZ(vp);
4956 4937          zfsvfs_t *zfsvfs = zp->z_zfsvfs;
4957 4938          int max_blksz = zfsvfs->z_max_blksz;

4958 4939          uio_t *uio = &xuio->xu_uio;
4959 4940          ssize_t size = uio->uio_resid;
4960 4941          offset_t offset = uio->uio_loffset;
4961 4942          int blksz;
4962 4943          int fullblk, i;
4963 4944          arc_buf_t *abuf;
4964 4945          ssize_t maxsize;
4965 4946          int preamble, postamble;
4966 4947  
4967 4948          if (xuio->xu_type != UIOTYPE_ZEROCOPY)
4968 4949                  return (SET_ERROR(EINVAL));
4969 4950  
4970 4951          ZFS_ENTER(zfsvfs);
4971 4952          ZFS_VERIFY_ZP(zp);
4972 4953          switch (ioflag) {
4973 4954          case UIO_WRITE:
4974 4955                  /*
4975 4956                   * Loan out an arc_buf for write if write size is bigger than
4976 4957                   * max_blksz, and the file's block size is also max_blksz.
4977 4958                   */
4978 4959                  blksz = max_blksz;
4979 4960                  if (size < blksz || zp->z_blksz != blksz) {
4980 4961                          ZFS_EXIT(zfsvfs);
4981 4962                          return (SET_ERROR(EINVAL));
4982 4963                  }
4983 4964                  /*
4984 4965                   * Caller requests buffers for write before knowing where the
4985 4966                   * write offset might be (e.g. NFS TCP write).
4986 4967                   */
4987 4968                  if (offset == -1) {
4988 4969                          preamble = 0;
4989 4970                  } else {
4990 4971                          preamble = P2PHASE(offset, blksz);
4991 4972                          if (preamble) {
4992 4973                                  preamble = blksz - preamble;
4993 4974                                  size -= preamble;
4994 4975                          }
4995 4976                  }
4996 4977  
4997 4978                  postamble = P2PHASE(size, blksz);
4998 4979                  size -= postamble;
4999 4980  
5000 4981                  fullblk = size / blksz;
5001 4982                  (void) dmu_xuio_init(xuio,
5002 4983                      (preamble != 0) + fullblk + (postamble != 0));
5003 4984                  DTRACE_PROBE3(zfs_reqzcbuf_align, int, preamble,
5004 4985                      int, postamble, int,
5005 4986                      (preamble != 0) + fullblk + (postamble != 0));
5006 4987  
5007 4988                  /*
5008 4989                   * Have to fix iov base/len for partial buffers.  They
5009 4990                   * currently represent full arc_buf's.
5010 4991                   */
5011 4992                  if (preamble) {
5012 4993                          /* data begins in the middle of the arc_buf */
5013 4994                          abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
5014 4995                              blksz);
5015 4996                          ASSERT(abuf);
5016 4997                          (void) dmu_xuio_add(xuio, abuf,
5017 4998                              blksz - preamble, preamble);
5018 4999                  }
5019 5000  
5020 5001                  for (i = 0; i < fullblk; i++) {
5021 5002                          abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
5022 5003                              blksz);
5023 5004                          ASSERT(abuf);
5024 5005                          (void) dmu_xuio_add(xuio, abuf, 0, blksz);
5025 5006                  }
5026 5007  
5027 5008                  if (postamble) {
5028 5009                          /* data ends in the middle of the arc_buf */
5029 5010                          abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
5030 5011                              blksz);
5031 5012                          ASSERT(abuf);
5032 5013                          (void) dmu_xuio_add(xuio, abuf, 0, postamble);
5033 5014                  }
5034 5015                  break;
5035 5016          case UIO_READ:
5036 5017                  /*
5037 5018                   * Loan out an arc_buf for read if the read size is larger than
5038 5019                   * the current file block size.  Block alignment is not
5039 5020                   * considered.  Partial arc_buf will be loaned out for read.
5040 5021                   */
5041 5022                  blksz = zp->z_blksz;
5042 5023                  if (blksz < zcr_blksz_min)
5043 5024                          blksz = zcr_blksz_min;
5044 5025                  if (blksz > zcr_blksz_max)
5045 5026                          blksz = zcr_blksz_max;
5046 5027                  /* avoid potential complexity of dealing with it */
5047 5028                  if (blksz > max_blksz) {
5048 5029                          ZFS_EXIT(zfsvfs);
5049 5030                          return (SET_ERROR(EINVAL));
5050 5031                  }
5051 5032  
5052 5033                  maxsize = zp->z_size - uio->uio_loffset;
5053 5034                  if (size > maxsize)
5054 5035                          size = maxsize;
5055 5036  
5056 5037                  if (size < blksz || vn_has_cached_data(vp)) {
5057 5038                          ZFS_EXIT(zfsvfs);
5058 5039                          return (SET_ERROR(EINVAL));
5059 5040                  }
5060 5041                  break;
5061 5042          default:
5062 5043                  ZFS_EXIT(zfsvfs);
5063 5044                  return (SET_ERROR(EINVAL));
5064 5045          }
5065 5046  
5066 5047          uio->uio_extflg = UIO_XUIO;
5067 5048          XUIO_XUZC_RW(xuio) = ioflag;
5068 5049          ZFS_EXIT(zfsvfs);
5069 5050          return (0);
5070 5051  }
5071 5052  
5072 5053  /*ARGSUSED*/
5073 5054  static int
5074 5055  zfs_retzcbuf(vnode_t *vp, xuio_t *xuio, cred_t *cr, caller_context_t *ct)
5075 5056  {
5076 5057          int i;
5077 5058          arc_buf_t *abuf;
5078 5059          int ioflag = XUIO_XUZC_RW(xuio);
5079 5060  
5080 5061          ASSERT(xuio->xu_type == UIOTYPE_ZEROCOPY);
5081 5062  
5082 5063          i = dmu_xuio_cnt(xuio);
5083 5064          while (i-- > 0) {
5084 5065                  abuf = dmu_xuio_arcbuf(xuio, i);
5085 5066                  /*
5086 5067                   * if abuf == NULL, it must be a write buffer
5087 5068                   * that has been returned in zfs_write().
5088 5069                   */
5089 5070                  if (abuf)
5090 5071                          dmu_return_arcbuf(abuf);
5091 5072                  ASSERT(abuf || ioflag == UIO_WRITE);
5092 5073          }
5093 5074  
5094 5075          dmu_xuio_fini(xuio);
5095 5076          return (0);
5096 5077  }
5097 5078  
5098 5079  /*
5099 5080   * Predeclare these here so that the compiler assumes that
5100 5081   * this is an "old style" function declaration that does
5101 5082   * not include arguments => we won't get type mismatch errors
5102 5083   * in the initializations that follow.
5103 5084   */
5104 5085  static int zfs_inval();
5105 5086  static int zfs_isdir();
5106 5087  
5107 5088  static int
5108 5089  zfs_inval()
5109 5090  {
5110 5091          return (SET_ERROR(EINVAL));
5111 5092  }
5112 5093  
5113 5094  static int
5114 5095  zfs_isdir()
5115 5096  {
5116 5097          return (SET_ERROR(EISDIR));
5117 5098  }
5118 5099  /*
5119 5100   * Directory vnode operations template
5120 5101   */
5121 5102  vnodeops_t *zfs_dvnodeops;
5122 5103  const fs_operation_def_t zfs_dvnodeops_template[] = {
5123 5104          VOPNAME_OPEN,           { .vop_open = zfs_open },
5124 5105          VOPNAME_CLOSE,          { .vop_close = zfs_close },
5125 5106          VOPNAME_READ,           { .error = zfs_isdir },
5126 5107          VOPNAME_WRITE,          { .error = zfs_isdir },
5127 5108          VOPNAME_IOCTL,          { .vop_ioctl = zfs_ioctl },
5128 5109          VOPNAME_GETATTR,        { .vop_getattr = zfs_getattr },
5129 5110          VOPNAME_SETATTR,        { .vop_setattr = zfs_setattr },
5130 5111          VOPNAME_ACCESS,         { .vop_access = zfs_access },
5131 5112          VOPNAME_LOOKUP,         { .vop_lookup = zfs_lookup },
5132 5113          VOPNAME_CREATE,         { .vop_create = zfs_create },
5133 5114          VOPNAME_REMOVE,         { .vop_remove = zfs_remove },
5134 5115          VOPNAME_LINK,           { .vop_link = zfs_link },
5135 5116          VOPNAME_RENAME,         { .vop_rename = zfs_rename },
5136 5117          VOPNAME_MKDIR,          { .vop_mkdir = zfs_mkdir },
5137 5118          VOPNAME_RMDIR,          { .vop_rmdir = zfs_rmdir },
5138 5119          VOPNAME_READDIR,        { .vop_readdir = zfs_readdir },
5139 5120          VOPNAME_SYMLINK,        { .vop_symlink = zfs_symlink },
5140 5121          VOPNAME_FSYNC,          { .vop_fsync = zfs_fsync },
5141 5122          VOPNAME_INACTIVE,       { .vop_inactive = zfs_inactive },
5142 5123          VOPNAME_FID,            { .vop_fid = zfs_fid },
5143 5124          VOPNAME_SEEK,           { .vop_seek = zfs_seek },
5144 5125          VOPNAME_PATHCONF,       { .vop_pathconf = zfs_pathconf },
5145 5126          VOPNAME_GETSECATTR,     { .vop_getsecattr = zfs_getsecattr },
5146 5127          VOPNAME_SETSECATTR,     { .vop_setsecattr = zfs_setsecattr },
5147 5128          VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
5148 5129          NULL,                   NULL
5149 5130  };
5150 5131  
5151 5132  /*
5152 5133   * Regular file vnode operations template
5153 5134   */
5154 5135  vnodeops_t *zfs_fvnodeops;
5155 5136  const fs_operation_def_t zfs_fvnodeops_template[] = {
5156 5137          VOPNAME_OPEN,           { .vop_open = zfs_open },
5157 5138          VOPNAME_CLOSE,          { .vop_close = zfs_close },
5158 5139          VOPNAME_READ,           { .vop_read = zfs_read },
5159 5140          VOPNAME_WRITE,          { .vop_write = zfs_write },
5160 5141          VOPNAME_IOCTL,          { .vop_ioctl = zfs_ioctl },
5161 5142          VOPNAME_GETATTR,        { .vop_getattr = zfs_getattr },
5162 5143          VOPNAME_SETATTR,        { .vop_setattr = zfs_setattr },
5163 5144          VOPNAME_ACCESS,         { .vop_access = zfs_access },
5164 5145          VOPNAME_LOOKUP,         { .vop_lookup = zfs_lookup },
5165 5146          VOPNAME_RENAME,         { .vop_rename = zfs_rename },
5166 5147          VOPNAME_FSYNC,          { .vop_fsync = zfs_fsync },
5167 5148          VOPNAME_INACTIVE,       { .vop_inactive = zfs_inactive },
5168 5149          VOPNAME_FID,            { .vop_fid = zfs_fid },
5169 5150          VOPNAME_SEEK,           { .vop_seek = zfs_seek },
5170 5151          VOPNAME_FRLOCK,         { .vop_frlock = zfs_frlock },
5171 5152          VOPNAME_SPACE,          { .vop_space = zfs_space },
5172 5153          VOPNAME_GETPAGE,        { .vop_getpage = zfs_getpage },
5173 5154          VOPNAME_PUTPAGE,        { .vop_putpage = zfs_putpage },
5174 5155          VOPNAME_MAP,            { .vop_map = zfs_map },
5175 5156          VOPNAME_ADDMAP,         { .vop_addmap = zfs_addmap },
5176 5157          VOPNAME_DELMAP,         { .vop_delmap = zfs_delmap },
5177 5158          VOPNAME_PATHCONF,       { .vop_pathconf = zfs_pathconf },
5178 5159          VOPNAME_GETSECATTR,     { .vop_getsecattr = zfs_getsecattr },
5179 5160          VOPNAME_SETSECATTR,     { .vop_setsecattr = zfs_setsecattr },
5180 5161          VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
5181 5162          VOPNAME_REQZCBUF,       { .vop_reqzcbuf = zfs_reqzcbuf },
5182 5163          VOPNAME_RETZCBUF,       { .vop_retzcbuf = zfs_retzcbuf },
5183 5164          NULL,                   NULL
5184 5165  };
5185 5166  
5186 5167  /*
5187 5168   * Symbolic link vnode operations template
5188 5169   */
5189 5170  vnodeops_t *zfs_symvnodeops;
5190 5171  const fs_operation_def_t zfs_symvnodeops_template[] = {
5191 5172          VOPNAME_GETATTR,        { .vop_getattr = zfs_getattr },
5192 5173          VOPNAME_SETATTR,        { .vop_setattr = zfs_setattr },
5193 5174          VOPNAME_ACCESS,         { .vop_access = zfs_access },
5194 5175          VOPNAME_RENAME,         { .vop_rename = zfs_rename },
5195 5176          VOPNAME_READLINK,       { .vop_readlink = zfs_readlink },
5196 5177          VOPNAME_INACTIVE,       { .vop_inactive = zfs_inactive },
5197 5178          VOPNAME_FID,            { .vop_fid = zfs_fid },
5198 5179          VOPNAME_PATHCONF,       { .vop_pathconf = zfs_pathconf },
5199 5180          VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
5200 5181          NULL,                   NULL
5201 5182  };
5202 5183  
5203 5184  /*
5204 5185   * special share hidden files vnode operations template
5205 5186   */
5206 5187  vnodeops_t *zfs_sharevnodeops;
5207 5188  const fs_operation_def_t zfs_sharevnodeops_template[] = {
5208 5189          VOPNAME_GETATTR,        { .vop_getattr = zfs_getattr },
5209 5190          VOPNAME_ACCESS,         { .vop_access = zfs_access },
5210 5191          VOPNAME_INACTIVE,       { .vop_inactive = zfs_inactive },

↓ open down ↓

253 lines elided

↑ open up ↑

5211 5192          VOPNAME_FID,            { .vop_fid = zfs_fid },
5212 5193          VOPNAME_PATHCONF,       { .vop_pathconf = zfs_pathconf },
5213 5194          VOPNAME_GETSECATTR,     { .vop_getsecattr = zfs_getsecattr },
5214 5195          VOPNAME_SETSECATTR,     { .vop_setsecattr = zfs_setsecattr },
5215 5196          VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
5216 5197          NULL,                   NULL
5217 5198  };
5218 5199  
5219 5200  /*
5220 5201   * Extended attribute directory vnode operations template
5221      - *      This template is identical to the directory vnodes
5222      - *      operation template except for restricted operations:
5223      - *              VOP_MKDIR()
5224      - *              VOP_SYMLINK()
     5202 + *
     5203 + * This template is identical to the directory vnodes
     5204 + * operation template except for restricted operations:
     5205 + *      VOP_MKDIR()
     5206 + *      VOP_SYMLINK()
     5207 + *
5225 5208   * Note that there are other restrictions embedded in:
5226 5209   *      zfs_create()    - restrict type to VREG
5227 5210   *      zfs_link()      - no links into/out of attribute space
5228 5211   *      zfs_rename()    - no moves into/out of attribute space
5229 5212   */
5230 5213  vnodeops_t *zfs_xdvnodeops;
5231 5214  const fs_operation_def_t zfs_xdvnodeops_template[] = {
5232 5215          VOPNAME_OPEN,           { .vop_open = zfs_open },
5233 5216          VOPNAME_CLOSE,          { .vop_close = zfs_close },
5234 5217          VOPNAME_IOCTL,          { .vop_ioctl = zfs_ioctl },

5235 5218          VOPNAME_GETATTR,        { .vop_getattr = zfs_getattr },
5236 5219          VOPNAME_SETATTR,        { .vop_setattr = zfs_setattr },
5237 5220          VOPNAME_ACCESS,         { .vop_access = zfs_access },
5238 5221          VOPNAME_LOOKUP,         { .vop_lookup = zfs_lookup },
5239 5222          VOPNAME_CREATE,         { .vop_create = zfs_create },
5240 5223          VOPNAME_REMOVE,         { .vop_remove = zfs_remove },
5241 5224          VOPNAME_LINK,           { .vop_link = zfs_link },
5242 5225          VOPNAME_RENAME,         { .vop_rename = zfs_rename },
5243 5226          VOPNAME_MKDIR,          { .error = zfs_inval },
5244 5227          VOPNAME_RMDIR,          { .vop_rmdir = zfs_rmdir },
5245 5228          VOPNAME_READDIR,        { .vop_readdir = zfs_readdir },
5246 5229          VOPNAME_SYMLINK,        { .error = zfs_inval },
5247 5230          VOPNAME_FSYNC,          { .vop_fsync = zfs_fsync },
5248 5231          VOPNAME_INACTIVE,       { .vop_inactive = zfs_inactive },
5249 5232          VOPNAME_FID,            { .vop_fid = zfs_fid },
5250 5233          VOPNAME_SEEK,           { .vop_seek = zfs_seek },
5251 5234          VOPNAME_PATHCONF,       { .vop_pathconf = zfs_pathconf },
5252 5235          VOPNAME_GETSECATTR,     { .vop_getsecattr = zfs_getsecattr },
5253 5236          VOPNAME_SETSECATTR,     { .vop_setsecattr = zfs_setsecattr },
5254 5237          VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
5255 5238          NULL,                   NULL
5256 5239  };
5257 5240  
5258 5241  /*
5259 5242   * Error vnode operations template
5260 5243   */
5261 5244  vnodeops_t *zfs_evnodeops;
5262 5245  const fs_operation_def_t zfs_evnodeops_template[] = {
5263 5246          VOPNAME_INACTIVE,       { .vop_inactive = zfs_inactive },
5264 5247          VOPNAME_PATHCONF,       { .vop_pathconf = zfs_pathconf },
5265 5248          NULL,                   NULL
5266 5249  };

↓ open down ↓

32 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX