Print this page
    
3006 VERIFY[S,U,P] and ASSERT[S,U,P] frequently check if first argument is zero
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/zfs/zfs_dir.c
          +++ new/usr/src/uts/common/fs/zfs/zfs_dir.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  
    | 
      ↓ open down ↓ | 
    12 lines elided | 
    
      ↑ open up ↑ | 
  
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
       23 + * Copyright (c) 2012 by Delphix. All rights reserved.
  23   24   */
  24   25  
  25   26  #include <sys/types.h>
  26   27  #include <sys/param.h>
  27   28  #include <sys/time.h>
  28   29  #include <sys/systm.h>
  29   30  #include <sys/sysmacros.h>
  30   31  #include <sys/resource.h>
  31   32  #include <sys/vfs.h>
  32   33  #include <sys/vnode.h>
  33   34  #include <sys/file.h>
  34   35  #include <sys/mode.h>
  35   36  #include <sys/kmem.h>
  36   37  #include <sys/uio.h>
  37   38  #include <sys/pathname.h>
  38   39  #include <sys/cmn_err.h>
  39   40  #include <sys/errno.h>
  40   41  #include <sys/stat.h>
  41   42  #include <sys/unistd.h>
  42   43  #include <sys/sunddi.h>
  43   44  #include <sys/random.h>
  44   45  #include <sys/policy.h>
  45   46  #include <sys/zfs_dir.h>
  46   47  #include <sys/zfs_acl.h>
  47   48  #include <sys/fs/zfs.h>
  48   49  #include "fs/fs_subr.h"
  49   50  #include <sys/zap.h>
  50   51  #include <sys/dmu.h>
  51   52  #include <sys/atomic.h>
  52   53  #include <sys/zfs_ctldir.h>
  53   54  #include <sys/zfs_fuid.h>
  54   55  #include <sys/sa.h>
  55   56  #include <sys/zfs_sa.h>
  56   57  #include <sys/dnlc.h>
  57   58  #include <sys/extdirent.h>
  58   59  
  59   60  /*
  60   61   * zfs_match_find() is used by zfs_dirent_lock() to peform zap lookups
  61   62   * of names after deciding which is the appropriate lookup interface.
  62   63   */
  63   64  static int
  64   65  zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, boolean_t exact,
  65   66      boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid)
  66   67  {
  67   68          int error;
  68   69  
  69   70          if (zfsvfs->z_norm) {
  70   71                  matchtype_t mt = MT_FIRST;
  71   72                  boolean_t conflict = B_FALSE;
  72   73                  size_t bufsz = 0;
  73   74                  char *buf = NULL;
  74   75  
  75   76                  if (rpnp) {
  76   77                          buf = rpnp->pn_buf;
  77   78                          bufsz = rpnp->pn_bufsize;
  78   79                  }
  79   80                  if (exact)
  80   81                          mt = MT_EXACT;
  81   82                  /*
  82   83                   * In the non-mixed case we only expect there would ever
  83   84                   * be one match, but we need to use the normalizing lookup.
  84   85                   */
  85   86                  error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
  86   87                      zoid, mt, buf, bufsz, &conflict);
  87   88                  if (!error && deflags)
  88   89                          *deflags = conflict ? ED_CASE_CONFLICT : 0;
  89   90          } else {
  90   91                  error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
  91   92          }
  92   93          *zoid = ZFS_DIRENT_OBJ(*zoid);
  93   94  
  94   95          if (error == ENOENT && update)
  95   96                  dnlc_update(ZTOV(dzp), name, DNLC_NO_VNODE);
  96   97  
  97   98          return (error);
  98   99  }
  99  100  
 100  101  /*
 101  102   * Lock a directory entry.  A dirlock on <dzp, name> protects that name
 102  103   * in dzp's directory zap object.  As long as you hold a dirlock, you can
 103  104   * assume two things: (1) dzp cannot be reaped, and (2) no other thread
 104  105   * can change the zap entry for (i.e. link or unlink) this name.
 105  106   *
 106  107   * Input arguments:
 107  108   *      dzp     - znode for directory
 108  109   *      name    - name of entry to lock
 109  110   *      flag    - ZNEW: if the entry already exists, fail with EEXIST.
 110  111   *                ZEXISTS: if the entry does not exist, fail with ENOENT.
 111  112   *                ZSHARED: allow concurrent access with other ZSHARED callers.
 112  113   *                ZXATTR: we want dzp's xattr directory
 113  114   *                ZCILOOK: On a mixed sensitivity file system,
 114  115   *                         this lookup should be case-insensitive.
 115  116   *                ZCIEXACT: On a purely case-insensitive file system,
 116  117   *                          this lookup should be case-sensitive.
 117  118   *                ZRENAMING: we are locking for renaming, force narrow locks
 118  119   *                ZHAVELOCK: Don't grab the z_name_lock for this call. The
 119  120   *                           current thread already holds it.
 120  121   *
 121  122   * Output arguments:
 122  123   *      zpp     - pointer to the znode for the entry (NULL if there isn't one)
 123  124   *      dlpp    - pointer to the dirlock for this entry (NULL on error)
 124  125   *      direntflags - (case-insensitive lookup only)
 125  126   *              flags if multiple case-sensitive matches exist in directory
 126  127   *      realpnp     - (case-insensitive lookup only)
 127  128   *              actual name matched within the directory
 128  129   *
 129  130   * Return value: 0 on success or errno on failure.
 130  131   *
 131  132   * NOTE: Always checks for, and rejects, '.' and '..'.
 132  133   * NOTE: For case-insensitive file systems we take wide locks (see below),
 133  134   *       but return znode pointers to a single match.
 134  135   */
 135  136  int
 136  137  zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
 137  138      int flag, int *direntflags, pathname_t *realpnp)
 138  139  {
 139  140          zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
 140  141          zfs_dirlock_t   *dl;
 141  142          boolean_t       update;
 142  143          boolean_t       exact;
 143  144          uint64_t        zoid;
 144  145          vnode_t         *vp = NULL;
 145  146          int             error = 0;
 146  147          int             cmpflags;
 147  148  
 148  149          *zpp = NULL;
 149  150          *dlpp = NULL;
 150  151  
 151  152          /*
 152  153           * Verify that we are not trying to lock '.', '..', or '.zfs'
 153  154           */
 154  155          if (name[0] == '.' &&
 155  156              (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')) ||
 156  157              zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0)
 157  158                  return (EEXIST);
 158  159  
 159  160          /*
 160  161           * Case sensitivity and normalization preferences are set when
 161  162           * the file system is created.  These are stored in the
 162  163           * zfsvfs->z_case and zfsvfs->z_norm fields.  These choices
 163  164           * affect what vnodes can be cached in the DNLC, how we
 164  165           * perform zap lookups, and the "width" of our dirlocks.
 165  166           *
 166  167           * A normal dirlock locks a single name.  Note that with
 167  168           * normalization a name can be composed multiple ways, but
 168  169           * when normalized, these names all compare equal.  A wide
 169  170           * dirlock locks multiple names.  We need these when the file
 170  171           * system is supporting mixed-mode access.  It is sometimes
 171  172           * necessary to lock all case permutations of file name at
 172  173           * once so that simultaneous case-insensitive/case-sensitive
 173  174           * behaves as rationally as possible.
 174  175           */
 175  176  
 176  177          /*
 177  178           * Decide if exact matches should be requested when performing
 178  179           * a zap lookup on file systems supporting case-insensitive
 179  180           * access.
 180  181           */
 181  182          exact =
 182  183              ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) ||
 183  184              ((zfsvfs->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK));
 184  185  
 185  186          /*
 186  187           * Only look in or update the DNLC if we are looking for the
 187  188           * name on a file system that does not require normalization
 188  189           * or case folding.  We can also look there if we happen to be
 189  190           * on a non-normalizing, mixed sensitivity file system IF we
 190  191           * are looking for the exact name.
 191  192           *
 192  193           * Maybe can add TO-UPPERed version of name to dnlc in ci-only
 193  194           * case for performance improvement?
 194  195           */
 195  196          update = !zfsvfs->z_norm ||
 196  197              ((zfsvfs->z_case == ZFS_CASE_MIXED) &&
 197  198              !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
 198  199  
 199  200          /*
 200  201           * ZRENAMING indicates we are in a situation where we should
 201  202           * take narrow locks regardless of the file system's
 202  203           * preferences for normalizing and case folding.  This will
 203  204           * prevent us deadlocking trying to grab the same wide lock
 204  205           * twice if the two names happen to be case-insensitive
 205  206           * matches.
 206  207           */
 207  208          if (flag & ZRENAMING)
 208  209                  cmpflags = 0;
 209  210          else
 210  211                  cmpflags = zfsvfs->z_norm;
 211  212  
 212  213          /*
 213  214           * Wait until there are no locks on this name.
 214  215           *
 215  216           * Don't grab the the lock if it is already held. However, cannot
 216  217           * have both ZSHARED and ZHAVELOCK together.
 217  218           */
 218  219          ASSERT(!(flag & ZSHARED) || !(flag & ZHAVELOCK));
 219  220          if (!(flag & ZHAVELOCK))
 220  221                  rw_enter(&dzp->z_name_lock, RW_READER);
 221  222  
 222  223          mutex_enter(&dzp->z_lock);
 223  224          for (;;) {
 224  225                  if (dzp->z_unlinked) {
 225  226                          mutex_exit(&dzp->z_lock);
 226  227                          if (!(flag & ZHAVELOCK))
 227  228                                  rw_exit(&dzp->z_name_lock);
 228  229                          return (ENOENT);
 229  230                  }
 230  231                  for (dl = dzp->z_dirlocks; dl != NULL; dl = dl->dl_next) {
 231  232                          if ((u8_strcmp(name, dl->dl_name, 0, cmpflags,
 232  233                              U8_UNICODE_LATEST, &error) == 0) || error != 0)
 233  234                                  break;
 234  235                  }
 235  236                  if (error != 0) {
 236  237                          mutex_exit(&dzp->z_lock);
 237  238                          if (!(flag & ZHAVELOCK))
 238  239                                  rw_exit(&dzp->z_name_lock);
 239  240                          return (ENOENT);
 240  241                  }
 241  242                  if (dl == NULL) {
 242  243                          /*
 243  244                           * Allocate a new dirlock and add it to the list.
 244  245                           */
 245  246                          dl = kmem_alloc(sizeof (zfs_dirlock_t), KM_SLEEP);
 246  247                          cv_init(&dl->dl_cv, NULL, CV_DEFAULT, NULL);
 247  248                          dl->dl_name = name;
 248  249                          dl->dl_sharecnt = 0;
 249  250                          dl->dl_namelock = 0;
 250  251                          dl->dl_namesize = 0;
 251  252                          dl->dl_dzp = dzp;
 252  253                          dl->dl_next = dzp->z_dirlocks;
 253  254                          dzp->z_dirlocks = dl;
 254  255                          break;
 255  256                  }
 256  257                  if ((flag & ZSHARED) && dl->dl_sharecnt != 0)
 257  258                          break;
 258  259                  cv_wait(&dl->dl_cv, &dzp->z_lock);
 259  260          }
 260  261  
 261  262          /*
 262  263           * If the z_name_lock was NOT held for this dirlock record it.
 263  264           */
 264  265          if (flag & ZHAVELOCK)
 265  266                  dl->dl_namelock = 1;
 266  267  
 267  268          if ((flag & ZSHARED) && ++dl->dl_sharecnt > 1 && dl->dl_namesize == 0) {
 268  269                  /*
 269  270                   * We're the second shared reference to dl.  Make a copy of
 270  271                   * dl_name in case the first thread goes away before we do.
 271  272                   * Note that we initialize the new name before storing its
 272  273                   * pointer into dl_name, because the first thread may load
 273  274                   * dl->dl_name at any time.  He'll either see the old value,
 274  275                   * which is his, or the new shared copy; either is OK.
 275  276                   */
 276  277                  dl->dl_namesize = strlen(dl->dl_name) + 1;
 277  278                  name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
 278  279                  bcopy(dl->dl_name, name, dl->dl_namesize);
 279  280                  dl->dl_name = name;
 280  281          }
 281  282  
 282  283          mutex_exit(&dzp->z_lock);
 283  284  
 284  285          /*
 285  286           * We have a dirlock on the name.  (Note that it is the dirlock,
 286  287           * not the dzp's z_lock, that protects the name in the zap object.)
 287  288           * See if there's an object by this name; if so, put a hold on it.
 288  289           */
 289  290          if (flag & ZXATTR) {
 290  291                  error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid,
 291  292                      sizeof (zoid));
 292  293                  if (error == 0)
 293  294                          error = (zoid == 0 ? ENOENT : 0);
 294  295          } else {
 295  296                  if (update)
 296  297                          vp = dnlc_lookup(ZTOV(dzp), name);
 297  298                  if (vp == DNLC_NO_VNODE) {
 298  299                          VN_RELE(vp);
 299  300                          error = ENOENT;
 300  301                  } else if (vp) {
 301  302                          if (flag & ZNEW) {
 302  303                                  zfs_dirent_unlock(dl);
 303  304                                  VN_RELE(vp);
 304  305                                  return (EEXIST);
 305  306                          }
 306  307                          *dlpp = dl;
 307  308                          *zpp = VTOZ(vp);
 308  309                          return (0);
 309  310                  } else {
 310  311                          error = zfs_match_find(zfsvfs, dzp, name, exact,
 311  312                              update, direntflags, realpnp, &zoid);
 312  313                  }
 313  314          }
 314  315          if (error) {
 315  316                  if (error != ENOENT || (flag & ZEXISTS)) {
 316  317                          zfs_dirent_unlock(dl);
 317  318                          return (error);
 318  319                  }
 319  320          } else {
 320  321                  if (flag & ZNEW) {
 321  322                          zfs_dirent_unlock(dl);
 322  323                          return (EEXIST);
 323  324                  }
 324  325                  error = zfs_zget(zfsvfs, zoid, zpp);
 325  326                  if (error) {
 326  327                          zfs_dirent_unlock(dl);
 327  328                          return (error);
 328  329                  }
 329  330                  if (!(flag & ZXATTR) && update)
 330  331                          dnlc_update(ZTOV(dzp), name, ZTOV(*zpp));
 331  332          }
 332  333  
 333  334          *dlpp = dl;
 334  335  
 335  336          return (0);
 336  337  }
 337  338  
 338  339  /*
 339  340   * Unlock this directory entry and wake anyone who was waiting for it.
 340  341   */
 341  342  void
 342  343  zfs_dirent_unlock(zfs_dirlock_t *dl)
 343  344  {
 344  345          znode_t *dzp = dl->dl_dzp;
 345  346          zfs_dirlock_t **prev_dl, *cur_dl;
 346  347  
 347  348          mutex_enter(&dzp->z_lock);
 348  349  
 349  350          if (!dl->dl_namelock)
 350  351                  rw_exit(&dzp->z_name_lock);
 351  352  
 352  353          if (dl->dl_sharecnt > 1) {
 353  354                  dl->dl_sharecnt--;
 354  355                  mutex_exit(&dzp->z_lock);
 355  356                  return;
 356  357          }
 357  358          prev_dl = &dzp->z_dirlocks;
 358  359          while ((cur_dl = *prev_dl) != dl)
 359  360                  prev_dl = &cur_dl->dl_next;
 360  361          *prev_dl = dl->dl_next;
 361  362          cv_broadcast(&dl->dl_cv);
 362  363          mutex_exit(&dzp->z_lock);
 363  364  
 364  365          if (dl->dl_namesize != 0)
 365  366                  kmem_free(dl->dl_name, dl->dl_namesize);
 366  367          cv_destroy(&dl->dl_cv);
 367  368          kmem_free(dl, sizeof (*dl));
 368  369  }
 369  370  
 370  371  /*
 371  372   * Look up an entry in a directory.
 372  373   *
 373  374   * NOTE: '.' and '..' are handled as special cases because
 374  375   *      no directory entries are actually stored for them.  If this is
 375  376   *      the root of a filesystem, then '.zfs' is also treated as a
 376  377   *      special pseudo-directory.
 377  378   */
 378  379  int
 379  380  zfs_dirlook(znode_t *dzp, char *name, vnode_t **vpp, int flags,
 380  381      int *deflg, pathname_t *rpnp)
 381  382  {
 382  383          zfs_dirlock_t *dl;
 383  384          znode_t *zp;
 384  385          int error = 0;
 385  386          uint64_t parent;
 386  387  
 387  388          if (name[0] == 0 || (name[0] == '.' && name[1] == 0)) {
 388  389                  *vpp = ZTOV(dzp);
 389  390                  VN_HOLD(*vpp);
 390  391          } else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
 391  392                  zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
 392  393  
 393  394                  /*
 394  395                   * If we are a snapshot mounted under .zfs, return
 395  396                   * the vp for the snapshot directory.
 396  397                   */
 397  398                  if ((error = sa_lookup(dzp->z_sa_hdl,
 398  399                      SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
 399  400                          return (error);
 400  401                  if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) {
 401  402                          error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir,
 402  403                              "snapshot", vpp, NULL, 0, NULL, kcred,
 403  404                              NULL, NULL, NULL);
 404  405                          return (error);
 405  406                  }
 406  407                  rw_enter(&dzp->z_parent_lock, RW_READER);
 407  408                  error = zfs_zget(zfsvfs, parent, &zp);
 408  409                  if (error == 0)
 409  410                          *vpp = ZTOV(zp);
 410  411                  rw_exit(&dzp->z_parent_lock);
 411  412          } else if (zfs_has_ctldir(dzp) && strcmp(name, ZFS_CTLDIR_NAME) == 0) {
 412  413                  *vpp = zfsctl_root(dzp);
 413  414          } else {
 414  415                  int zf;
 415  416  
 416  417                  zf = ZEXISTS | ZSHARED;
 417  418                  if (flags & FIGNORECASE)
 418  419                          zf |= ZCILOOK;
 419  420  
 420  421                  error = zfs_dirent_lock(&dl, dzp, name, &zp, zf, deflg, rpnp);
 421  422                  if (error == 0) {
 422  423                          *vpp = ZTOV(zp);
 423  424                          zfs_dirent_unlock(dl);
 424  425                          dzp->z_zn_prefetch = B_TRUE; /* enable prefetching */
 425  426                  }
 426  427                  rpnp = NULL;
 427  428          }
 428  429  
 429  430          if ((flags & FIGNORECASE) && rpnp && !error)
 430  431                  (void) strlcpy(rpnp->pn_buf, name, rpnp->pn_bufsize);
 431  432  
 432  433          return (error);
 433  434  }
 434  435  
 435  436  /*
 436  437   * unlinked Set (formerly known as the "delete queue") Error Handling
 437  438   *
 438  439   * When dealing with the unlinked set, we dmu_tx_hold_zap(), but we
 439  440   * don't specify the name of the entry that we will be manipulating.  We
 440  441   * also fib and say that we won't be adding any new entries to the
 441  442   * unlinked set, even though we might (this is to lower the minimum file
 442  443   * size that can be deleted in a full filesystem).  So on the small
 443  444   * chance that the nlink list is using a fat zap (ie. has more than
 444  445   * 2000 entries), we *may* not pre-read a block that's needed.
 445  446   * Therefore it is remotely possible for some of the assertions
 446  447   * regarding the unlinked set below to fail due to i/o error.  On a
  
    | 
      ↓ open down ↓ | 
    414 lines elided | 
    
      ↑ open up ↑ | 
  
 447  448   * nondebug system, this will result in the space being leaked.
 448  449   */
 449  450  void
 450  451  zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
 451  452  {
 452  453          zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 453  454  
 454  455          ASSERT(zp->z_unlinked);
 455  456          ASSERT(zp->z_links == 0);
 456  457  
 457      -        VERIFY3U(0, ==,
 458      -            zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
      458 +        VERIFY0(zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
      459 +            zp->z_id, tx));
 459  460  }
 460  461  
 461  462  /*
 462  463   * Clean up any znodes that had no links when we either crashed or
 463  464   * (force) umounted the file system.
 464  465   */
 465  466  void
 466  467  zfs_unlinked_drain(zfsvfs_t *zfsvfs)
 467  468  {
 468  469          zap_cursor_t    zc;
 469  470          zap_attribute_t zap;
 470  471          dmu_object_info_t doi;
 471  472          znode_t         *zp;
 472  473          int             error;
 473  474  
 474  475          /*
 475  476           * Interate over the contents of the unlinked set.
 476  477           */
 477  478          for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
 478  479              zap_cursor_retrieve(&zc, &zap) == 0;
 479  480              zap_cursor_advance(&zc)) {
 480  481  
 481  482                  /*
 482  483                   * See what kind of object we have in list
 483  484                   */
 484  485  
 485  486                  error = dmu_object_info(zfsvfs->z_os,
 486  487                      zap.za_first_integer, &doi);
 487  488                  if (error != 0)
 488  489                          continue;
 489  490  
 490  491                  ASSERT((doi.doi_type == DMU_OT_PLAIN_FILE_CONTENTS) ||
 491  492                      (doi.doi_type == DMU_OT_DIRECTORY_CONTENTS));
 492  493                  /*
 493  494                   * We need to re-mark these list entries for deletion,
 494  495                   * so we pull them back into core and set zp->z_unlinked.
 495  496                   */
 496  497                  error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
 497  498  
 498  499                  /*
 499  500                   * We may pick up znodes that are already marked for deletion.
 500  501                   * This could happen during the purge of an extended attribute
 501  502                   * directory.  All we need to do is skip over them, since they
 502  503                   * are already in the system marked z_unlinked.
 503  504                   */
 504  505                  if (error != 0)
 505  506                          continue;
 506  507  
 507  508                  zp->z_unlinked = B_TRUE;
 508  509                  VN_RELE(ZTOV(zp));
 509  510          }
 510  511          zap_cursor_fini(&zc);
 511  512  }
 512  513  
 513  514  /*
 514  515   * Delete the entire contents of a directory.  Return a count
 515  516   * of the number of entries that could not be deleted. If we encounter
 516  517   * an error, return a count of at least one so that the directory stays
 517  518   * in the unlinked set.
 518  519   *
 519  520   * NOTE: this function assumes that the directory is inactive,
 520  521   *      so there is no need to lock its entries before deletion.
 521  522   *      Also, it assumes the directory contents is *only* regular
 522  523   *      files.
 523  524   */
 524  525  static int
 525  526  zfs_purgedir(znode_t *dzp)
 526  527  {
 527  528          zap_cursor_t    zc;
 528  529          zap_attribute_t zap;
 529  530          znode_t         *xzp;
 530  531          dmu_tx_t        *tx;
 531  532          zfsvfs_t        *zfsvfs = dzp->z_zfsvfs;
 532  533          zfs_dirlock_t   dl;
 533  534          int skipped = 0;
 534  535          int error;
 535  536  
 536  537          for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
 537  538              (error = zap_cursor_retrieve(&zc, &zap)) == 0;
 538  539              zap_cursor_advance(&zc)) {
 539  540                  error = zfs_zget(zfsvfs,
 540  541                      ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
 541  542                  if (error) {
 542  543                          skipped += 1;
 543  544                          continue;
 544  545                  }
 545  546  
 546  547                  ASSERT((ZTOV(xzp)->v_type == VREG) ||
 547  548                      (ZTOV(xzp)->v_type == VLNK));
 548  549  
 549  550                  tx = dmu_tx_create(zfsvfs->z_os);
 550  551                  dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
 551  552                  dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
 552  553                  dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
 553  554                  dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 554  555                  /* Is this really needed ? */
 555  556                  zfs_sa_upgrade_txholds(tx, xzp);
 556  557                  error = dmu_tx_assign(tx, TXG_WAIT);
 557  558                  if (error) {
 558  559                          dmu_tx_abort(tx);
 559  560                          VN_RELE(ZTOV(xzp));
 560  561                          skipped += 1;
 561  562                          continue;
 562  563                  }
 563  564                  bzero(&dl, sizeof (dl));
 564  565                  dl.dl_dzp = dzp;
 565  566                  dl.dl_name = zap.za_name;
 566  567  
 567  568                  error = zfs_link_destroy(&dl, xzp, tx, 0, NULL);
 568  569                  if (error)
 569  570                          skipped += 1;
 570  571                  dmu_tx_commit(tx);
 571  572  
 572  573                  VN_RELE(ZTOV(xzp));
 573  574          }
 574  575          zap_cursor_fini(&zc);
 575  576          if (error != ENOENT)
 576  577                  skipped += 1;
 577  578          return (skipped);
 578  579  }
 579  580  
 580  581  void
 581  582  zfs_rmnode(znode_t *zp)
 582  583  {
 583  584          zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
 584  585          objset_t        *os = zfsvfs->z_os;
 585  586          znode_t         *xzp = NULL;
 586  587          dmu_tx_t        *tx;
 587  588          uint64_t        acl_obj;
 588  589          uint64_t        xattr_obj;
 589  590          int             error;
 590  591  
 591  592          ASSERT(zp->z_links == 0);
 592  593          ASSERT(ZTOV(zp)->v_count == 0);
 593  594  
 594  595          /*
 595  596           * If this is an attribute directory, purge its contents.
 596  597           */
 597  598          if (ZTOV(zp)->v_type == VDIR && (zp->z_pflags & ZFS_XATTR)) {
 598  599                  if (zfs_purgedir(zp) != 0) {
 599  600                          /*
 600  601                           * Not enough space to delete some xattrs.
 601  602                           * Leave it in the unlinked set.
 602  603                           */
 603  604                          zfs_znode_dmu_fini(zp);
 604  605                          zfs_znode_free(zp);
 605  606                          return;
 606  607                  }
 607  608          }
 608  609  
 609  610          /*
 610  611           * Free up all the data in the file.
 611  612           */
 612  613          error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
 613  614          if (error) {
 614  615                  /*
 615  616                   * Not enough space.  Leave the file in the unlinked set.
 616  617                   */
 617  618                  zfs_znode_dmu_fini(zp);
 618  619                  zfs_znode_free(zp);
 619  620                  return;
 620  621          }
 621  622  
 622  623          /*
 623  624           * If the file has extended attributes, we're going to unlink
 624  625           * the xattr dir.
 625  626           */
 626  627          error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
 627  628              &xattr_obj, sizeof (xattr_obj));
 628  629          if (error == 0 && xattr_obj) {
 629  630                  error = zfs_zget(zfsvfs, xattr_obj, &xzp);
 630  631                  ASSERT(error == 0);
 631  632          }
 632  633  
 633  634          acl_obj = zfs_external_acl(zp);
 634  635  
 635  636          /*
 636  637           * Set up the final transaction.
 637  638           */
 638  639          tx = dmu_tx_create(os);
 639  640          dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
 640  641          dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
 641  642          if (xzp) {
 642  643                  dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
 643  644                  dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
 644  645          }
 645  646          if (acl_obj)
 646  647                  dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
 647  648  
 648  649          zfs_sa_upgrade_txholds(tx, zp);
 649  650          error = dmu_tx_assign(tx, TXG_WAIT);
 650  651          if (error) {
 651  652                  /*
 652  653                   * Not enough space to delete the file.  Leave it in the
 653  654                   * unlinked set, leaking it until the fs is remounted (at
 654  655                   * which point we'll call zfs_unlinked_drain() to process it).
 655  656                   */
 656  657                  dmu_tx_abort(tx);
 657  658                  zfs_znode_dmu_fini(zp);
 658  659                  zfs_znode_free(zp);
 659  660                  goto out;
 660  661          }
 661  662  
 662  663          if (xzp) {
 663  664                  ASSERT(error == 0);
  
    | 
      ↓ open down ↓ | 
    195 lines elided | 
    
      ↑ open up ↑ | 
  
 664  665                  mutex_enter(&xzp->z_lock);
 665  666                  xzp->z_unlinked = B_TRUE;       /* mark xzp for deletion */
 666  667                  xzp->z_links = 0;       /* no more links to it */
 667  668                  VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
 668  669                      &xzp->z_links, sizeof (xzp->z_links), tx));
 669  670                  mutex_exit(&xzp->z_lock);
 670  671                  zfs_unlinked_add(xzp, tx);
 671  672          }
 672  673  
 673  674          /* Remove this znode from the unlinked set */
 674      -        VERIFY3U(0, ==,
 675      -            zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
      675 +        VERIFY0(zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
      676 +            xzp->z_id, tx));
 676  677  
 677  678          zfs_znode_delete(zp, tx);
 678  679  
 679  680          dmu_tx_commit(tx);
 680  681  out:
 681  682          if (xzp)
 682  683                  VN_RELE(ZTOV(xzp));
 683  684  }
 684  685  
 685  686  static uint64_t
 686  687  zfs_dirent(znode_t *zp, uint64_t mode)
 687  688  {
 688  689          uint64_t de = zp->z_id;
 689  690  
 690  691          if (zp->z_zfsvfs->z_version >= ZPL_VERSION_DIRENT_TYPE)
 691  692                  de |= IFTODT(mode) << 60;
 692  693          return (de);
 693  694  }
 694  695  
 695  696  /*
 696  697   * Link zp into dl.  Can only fail if zp has been unlinked.
 697  698   */
 698  699  int
 699  700  zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
 700  701  {
 701  702          znode_t *dzp = dl->dl_dzp;
 702  703          zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 703  704          vnode_t *vp = ZTOV(zp);
 704  705          uint64_t value;
 705  706          int zp_is_dir = (vp->v_type == VDIR);
 706  707          sa_bulk_attr_t bulk[5];
 707  708          uint64_t mtime[2], ctime[2];
 708  709          int count = 0;
 709  710          int error;
 710  711  
 711  712          mutex_enter(&zp->z_lock);
 712  713  
 713  714          if (!(flag & ZRENAMING)) {
 714  715                  if (zp->z_unlinked) {   /* no new links to unlinked zp */
 715  716                          ASSERT(!(flag & (ZNEW | ZEXISTS)));
 716  717                          mutex_exit(&zp->z_lock);
 717  718                          return (ENOENT);
 718  719                  }
 719  720                  zp->z_links++;
 720  721                  SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
 721  722                      &zp->z_links, sizeof (zp->z_links));
 722  723  
 723  724          }
 724  725          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
 725  726              &dzp->z_id, sizeof (dzp->z_id));
 726  727          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 727  728              &zp->z_pflags, sizeof (zp->z_pflags));
 728  729  
 729  730          if (!(flag & ZNEW)) {
 730  731                  SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
 731  732                      ctime, sizeof (ctime));
 732  733                  zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
 733  734                      ctime, B_TRUE);
 734  735          }
 735  736          error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 736  737          ASSERT(error == 0);
 737  738  
 738  739          mutex_exit(&zp->z_lock);
 739  740  
 740  741          mutex_enter(&dzp->z_lock);
 741  742          dzp->z_size++;
 742  743          dzp->z_links += zp_is_dir;
 743  744          count = 0;
 744  745          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
 745  746              &dzp->z_size, sizeof (dzp->z_size));
 746  747          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
 747  748              &dzp->z_links, sizeof (dzp->z_links));
 748  749          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
 749  750              mtime, sizeof (mtime));
 750  751          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
 751  752              ctime, sizeof (ctime));
 752  753          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 753  754              &dzp->z_pflags, sizeof (dzp->z_pflags));
 754  755          zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
 755  756          error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
 756  757          ASSERT(error == 0);
 757  758          mutex_exit(&dzp->z_lock);
 758  759  
 759  760          value = zfs_dirent(zp, zp->z_mode);
 760  761          error = zap_add(zp->z_zfsvfs->z_os, dzp->z_id, dl->dl_name,
 761  762              8, 1, &value, tx);
 762  763          ASSERT(error == 0);
 763  764  
 764  765          dnlc_update(ZTOV(dzp), dl->dl_name, vp);
 765  766  
 766  767          return (0);
 767  768  }
 768  769  
 769  770  static int
 770  771  zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
 771  772      int flag)
 772  773  {
 773  774          int error;
 774  775  
 775  776          if (zp->z_zfsvfs->z_norm) {
 776  777                  if (((zp->z_zfsvfs->z_case == ZFS_CASE_INSENSITIVE) &&
 777  778                      (flag & ZCIEXACT)) ||
 778  779                      ((zp->z_zfsvfs->z_case == ZFS_CASE_MIXED) &&
 779  780                      !(flag & ZCILOOK)))
 780  781                          error = zap_remove_norm(zp->z_zfsvfs->z_os,
 781  782                              dzp->z_id, dl->dl_name, MT_EXACT, tx);
 782  783                  else
 783  784                          error = zap_remove_norm(zp->z_zfsvfs->z_os,
 784  785                              dzp->z_id, dl->dl_name, MT_FIRST, tx);
 785  786          } else {
 786  787                  error = zap_remove(zp->z_zfsvfs->z_os,
 787  788                      dzp->z_id, dl->dl_name, tx);
 788  789          }
 789  790  
 790  791          return (error);
 791  792  }
 792  793  
 793  794  /*
 794  795   * Unlink zp from dl, and mark zp for deletion if this was the last link.
 795  796   * Can fail if zp is a mount point (EBUSY) or a non-empty directory (EEXIST).
 796  797   * If 'unlinkedp' is NULL, we put unlinked znodes on the unlinked list.
 797  798   * If it's non-NULL, we use it to indicate whether the znode needs deletion,
 798  799   * and it's the caller's job to do it.
 799  800   */
 800  801  int
 801  802  zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
 802  803          boolean_t *unlinkedp)
 803  804  {
 804  805          znode_t *dzp = dl->dl_dzp;
 805  806          zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
 806  807          vnode_t *vp = ZTOV(zp);
 807  808          int zp_is_dir = (vp->v_type == VDIR);
 808  809          boolean_t unlinked = B_FALSE;
 809  810          sa_bulk_attr_t bulk[5];
 810  811          uint64_t mtime[2], ctime[2];
 811  812          int count = 0;
 812  813          int error;
 813  814  
 814  815          dnlc_remove(ZTOV(dzp), dl->dl_name);
 815  816  
 816  817          if (!(flag & ZRENAMING)) {
 817  818                  if (vn_vfswlock(vp))            /* prevent new mounts on zp */
 818  819                          return (EBUSY);
 819  820  
 820  821                  if (vn_ismntpt(vp)) {           /* don't remove mount point */
 821  822                          vn_vfsunlock(vp);
 822  823                          return (EBUSY);
 823  824                  }
 824  825  
 825  826                  mutex_enter(&zp->z_lock);
 826  827  
 827  828                  if (zp_is_dir && !zfs_dirempty(zp)) {
 828  829                          mutex_exit(&zp->z_lock);
 829  830                          vn_vfsunlock(vp);
 830  831                          return (EEXIST);
 831  832                  }
 832  833  
 833  834                  /*
 834  835                   * If we get here, we are going to try to remove the object.
 835  836                   * First try removing the name from the directory; if that
 836  837                   * fails, return the error.
 837  838                   */
 838  839                  error = zfs_dropname(dl, zp, dzp, tx, flag);
 839  840                  if (error != 0) {
 840  841                          mutex_exit(&zp->z_lock);
 841  842                          vn_vfsunlock(vp);
 842  843                          return (error);
 843  844                  }
 844  845  
 845  846                  if (zp->z_links <= zp_is_dir) {
 846  847                          zfs_panic_recover("zfs: link count on %s is %u, "
 847  848                              "should be at least %u",
 848  849                              zp->z_vnode->v_path ? zp->z_vnode->v_path :
 849  850                              "<unknown>", (int)zp->z_links,
 850  851                              zp_is_dir + 1);
 851  852                          zp->z_links = zp_is_dir + 1;
 852  853                  }
 853  854                  if (--zp->z_links == zp_is_dir) {
 854  855                          zp->z_unlinked = B_TRUE;
 855  856                          zp->z_links = 0;
 856  857                          unlinked = B_TRUE;
 857  858                  } else {
 858  859                          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
 859  860                              NULL, &ctime, sizeof (ctime));
 860  861                          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
 861  862                              NULL, &zp->z_pflags, sizeof (zp->z_pflags));
 862  863                          zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime,
 863  864                              B_TRUE);
 864  865                  }
 865  866                  SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
 866  867                      NULL, &zp->z_links, sizeof (zp->z_links));
 867  868                  error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 868  869                  count = 0;
 869  870                  ASSERT(error == 0);
 870  871                  mutex_exit(&zp->z_lock);
 871  872                  vn_vfsunlock(vp);
 872  873          } else {
 873  874                  error = zfs_dropname(dl, zp, dzp, tx, flag);
 874  875                  if (error != 0)
 875  876                          return (error);
 876  877          }
 877  878  
 878  879          mutex_enter(&dzp->z_lock);
 879  880          dzp->z_size--;          /* one dirent removed */
 880  881          dzp->z_links -= zp_is_dir;      /* ".." link from zp */
 881  882          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
 882  883              NULL, &dzp->z_links, sizeof (dzp->z_links));
 883  884          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
 884  885              NULL, &dzp->z_size, sizeof (dzp->z_size));
 885  886          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
 886  887              NULL, ctime, sizeof (ctime));
 887  888          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
 888  889              NULL, mtime, sizeof (mtime));
 889  890          SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
 890  891              NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
 891  892          zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
 892  893          error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
 893  894          ASSERT(error == 0);
 894  895          mutex_exit(&dzp->z_lock);
 895  896  
 896  897          if (unlinkedp != NULL)
 897  898                  *unlinkedp = unlinked;
 898  899          else if (unlinked)
 899  900                  zfs_unlinked_add(zp, tx);
 900  901  
 901  902          return (0);
 902  903  }
 903  904  
 904  905  /*
 905  906   * Indicate whether the directory is empty.  Works with or without z_lock
 906  907   * held, but can only be consider a hint in the latter case.  Returns true
 907  908   * if only "." and ".." remain and there's no work in progress.
 908  909   */
 909  910  boolean_t
 910  911  zfs_dirempty(znode_t *dzp)
 911  912  {
 912  913          return (dzp->z_size == 2 && dzp->z_dirlocks == 0);
 913  914  }
 914  915  
 915  916  int
 916  917  zfs_make_xattrdir(znode_t *zp, vattr_t *vap, vnode_t **xvpp, cred_t *cr)
 917  918  {
 918  919          zfsvfs_t *zfsvfs = zp->z_zfsvfs;
 919  920          znode_t *xzp;
 920  921          dmu_tx_t *tx;
 921  922          int error;
 922  923          zfs_acl_ids_t acl_ids;
 923  924          boolean_t fuid_dirtied;
 924  925          uint64_t parent;
 925  926  
 926  927          *xvpp = NULL;
 927  928  
 928  929          if (error = zfs_zaccess(zp, ACE_WRITE_NAMED_ATTRS, 0, B_FALSE, cr))
 929  930                  return (error);
 930  931  
 931  932          if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
 932  933              &acl_ids)) != 0)
 933  934                  return (error);
 934  935          if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
 935  936                  zfs_acl_ids_free(&acl_ids);
 936  937                  return (EDQUOT);
 937  938          }
 938  939  
 939  940  top:
 940  941          tx = dmu_tx_create(zfsvfs->z_os);
 941  942          dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
 942  943              ZFS_SA_BASE_ATTR_SIZE);
 943  944          dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
 944  945          dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
 945  946          fuid_dirtied = zfsvfs->z_fuid_dirty;
 946  947          if (fuid_dirtied)
 947  948                  zfs_fuid_txhold(zfsvfs, tx);
 948  949          error = dmu_tx_assign(tx, TXG_NOWAIT);
 949  950          if (error) {
 950  951                  if (error == ERESTART) {
 951  952                          dmu_tx_wait(tx);
 952  953                          dmu_tx_abort(tx);
 953  954                          goto top;
 954  955                  }
 955  956                  zfs_acl_ids_free(&acl_ids);
 956  957                  dmu_tx_abort(tx);
 957  958                  return (error);
 958  959          }
 959  960          zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
 960  961  
 961  962          if (fuid_dirtied)
 962  963                  zfs_fuid_sync(zfsvfs, tx);
 963  964  
 964  965  #ifdef DEBUG
 965  966          error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
 966  967              &parent, sizeof (parent));
 967  968          ASSERT(error == 0 && parent == zp->z_id);
 968  969  #endif
 969  970  
 970  971          VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id,
 971  972              sizeof (xzp->z_id), tx));
 972  973  
 973  974          (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
 974  975              xzp, "", NULL, acl_ids.z_fuidp, vap);
 975  976  
 976  977          zfs_acl_ids_free(&acl_ids);
 977  978          dmu_tx_commit(tx);
 978  979  
 979  980          *xvpp = ZTOV(xzp);
 980  981  
 981  982          return (0);
 982  983  }
 983  984  
 984  985  /*
 985  986   * Return a znode for the extended attribute directory for zp.
 986  987   * ** If the directory does not already exist, it is created **
 987  988   *
 988  989   *      IN:     zp      - znode to obtain attribute directory from
 989  990   *              cr      - credentials of caller
 990  991   *              flags   - flags from the VOP_LOOKUP call
 991  992   *
 992  993   *      OUT:    xzpp    - pointer to extended attribute znode
 993  994   *
 994  995   *      RETURN: 0 on success
 995  996   *              error number on failure
 996  997   */
 997  998  int
 998  999  zfs_get_xattrdir(znode_t *zp, vnode_t **xvpp, cred_t *cr, int flags)
 999 1000  {
1000 1001          zfsvfs_t        *zfsvfs = zp->z_zfsvfs;
1001 1002          znode_t         *xzp;
1002 1003          zfs_dirlock_t   *dl;
1003 1004          vattr_t         va;
1004 1005          int             error;
1005 1006  top:
1006 1007          error = zfs_dirent_lock(&dl, zp, "", &xzp, ZXATTR, NULL, NULL);
1007 1008          if (error)
1008 1009                  return (error);
1009 1010  
1010 1011          if (xzp != NULL) {
1011 1012                  *xvpp = ZTOV(xzp);
1012 1013                  zfs_dirent_unlock(dl);
1013 1014                  return (0);
1014 1015          }
1015 1016  
1016 1017  
1017 1018          if (!(flags & CREATE_XATTR_DIR)) {
1018 1019                  zfs_dirent_unlock(dl);
1019 1020                  return (ENOENT);
1020 1021          }
1021 1022  
1022 1023          if (zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) {
1023 1024                  zfs_dirent_unlock(dl);
1024 1025                  return (EROFS);
1025 1026          }
1026 1027  
1027 1028          /*
1028 1029           * The ability to 'create' files in an attribute
1029 1030           * directory comes from the write_xattr permission on the base file.
1030 1031           *
1031 1032           * The ability to 'search' an attribute directory requires
1032 1033           * read_xattr permission on the base file.
1033 1034           *
1034 1035           * Once in a directory the ability to read/write attributes
1035 1036           * is controlled by the permissions on the attribute file.
1036 1037           */
1037 1038          va.va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
1038 1039          va.va_type = VDIR;
1039 1040          va.va_mode = S_IFDIR | S_ISVTX | 0777;
1040 1041          zfs_fuid_map_ids(zp, cr, &va.va_uid, &va.va_gid);
1041 1042  
1042 1043          error = zfs_make_xattrdir(zp, &va, xvpp, cr);
1043 1044          zfs_dirent_unlock(dl);
1044 1045  
1045 1046          if (error == ERESTART) {
1046 1047                  /* NB: we already did dmu_tx_wait() if necessary */
1047 1048                  goto top;
1048 1049          }
1049 1050  
1050 1051          return (error);
1051 1052  }
1052 1053  
1053 1054  /*
1054 1055   * Decide whether it is okay to remove within a sticky directory.
1055 1056   *
1056 1057   * In sticky directories, write access is not sufficient;
1057 1058   * you can remove entries from a directory only if:
1058 1059   *
1059 1060   *      you own the directory,
1060 1061   *      you own the entry,
1061 1062   *      the entry is a plain file and you have write access,
1062 1063   *      or you are privileged (checked in secpolicy...).
1063 1064   *
1064 1065   * The function returns 0 if remove access is granted.
1065 1066   */
1066 1067  int
1067 1068  zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
1068 1069  {
1069 1070          uid_t           uid;
1070 1071          uid_t           downer;
1071 1072          uid_t           fowner;
1072 1073          zfsvfs_t        *zfsvfs = zdp->z_zfsvfs;
1073 1074  
1074 1075          if (zdp->z_zfsvfs->z_replay)
1075 1076                  return (0);
1076 1077  
1077 1078          if ((zdp->z_mode & S_ISVTX) == 0)
1078 1079                  return (0);
1079 1080  
1080 1081          downer = zfs_fuid_map_id(zfsvfs, zdp->z_uid, cr, ZFS_OWNER);
1081 1082          fowner = zfs_fuid_map_id(zfsvfs, zp->z_uid, cr, ZFS_OWNER);
1082 1083  
1083 1084          if ((uid = crgetuid(cr)) == downer || uid == fowner ||
1084 1085              (ZTOV(zp)->v_type == VREG &&
1085 1086              zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr) == 0))
1086 1087                  return (0);
1087 1088          else
1088 1089                  return (secpolicy_vnode_remove(cr));
1089 1090  }
  
    | 
      ↓ open down ↓ | 
    404 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX