illumos-3741 Wdiff usr/src/uts/common/fs/zfs/zfs_ctldir.c

Print this page

3741 zfs needs better comments
Submitted by:   Will Andrews <willa@spectralogic.com>
Submitted by:   Justin Gibbs <justing@spectralogic.com>
Submitted by:   Alan Somers <alans@spectralogic.com>
Reviewed by:    Matthew Ahrens <mahrens@delphix.com>

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/zfs/zfs_ctldir.c
          +++ new/usr/src/uts/common/fs/zfs/zfs_ctldir.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2013 by Delphix. All rights reserved.
  24   24   */
  25   25  
  26   26  /*
  27   27   * ZFS control directory (a.k.a. ".zfs")
  28   28   *
  29   29   * This directory provides a common location for all ZFS meta-objects.
  30   30   * Currently, this is only the 'snapshot' directory, but this may expand in the
  31   31   * future.  The elements are built using the GFS primitives, as the hierarchy
  32   32   * does not actually exist on disk.
  33   33   *
  34   34   * For 'snapshot', we don't want to have all snapshots always mounted, because
  35   35   * this would take up a huge amount of space in /etc/mnttab.  We have three
  36   36   * types of objects:
  37   37   *
  38   38   *      ctldir ------> snapshotdir -------> snapshot
  39   39   *                                             |
  40   40   *                                             |
  41   41   *                                             V
  42   42   *                                         mounted fs
  43   43   *
  44   44   * The 'snapshot' node contains just enough information to lookup '..' and act
  45   45   * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
  46   46   * perform an automount of the underlying filesystem and return the
  47   47   * corresponding vnode.
  48   48   *
  49   49   * All mounts are handled automatically by the kernel, but unmounts are
  50   50   * (currently) handled from user land.  The main reason is that there is no
  51   51   * reliable way to auto-unmount the filesystem when it's "no longer in use".
  52   52   * When the user unmounts a filesystem, we call zfsctl_unmount(), which
  53   53   * unmounts any snapshots within the snapshot directory.
  54   54   *
  55   55   * The '.zfs', '.zfs/snapshot', and all directories created under
  56   56   * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and
  57   57   * share the same vfs_t as the head filesystem (what '.zfs' lives under).
  58   58   *
  59   59   * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>'
  60   60   * (ie: snapshots) are ZFS nodes and have their own unique vfs_t.
  61   61   * However, vnodes within these mounted on file systems have their v_vfsp
  62   62   * fields set to the head filesystem to make NFS happy (see
  63   63   * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t
  64   64   * so that it cannot be freed until all snapshots have been unmounted.
  65   65   */
  66   66  
  67   67  #include <fs/fs_subr.h>
  68   68  #include <sys/zfs_ctldir.h>
  69   69  #include <sys/zfs_ioctl.h>
  70   70  #include <sys/zfs_vfsops.h>
  71   71  #include <sys/vfs_opreg.h>
  72   72  #include <sys/gfs.h>
  73   73  #include <sys/stat.h>
  74   74  #include <sys/dmu.h>
  75   75  #include <sys/dsl_destroy.h>
  76   76  #include <sys/dsl_deleg.h>
  77   77  #include <sys/mount.h>
  78   78  #include <sys/sunddi.h>
  79   79  
  80   80  #include "zfs_namecheck.h"
  81   81  
  82   82  typedef struct zfsctl_node {
  83   83          gfs_dir_t       zc_gfs_private;
  84   84          uint64_t        zc_id;
  85   85          timestruc_t     zc_cmtime;      /* ctime and mtime, always the same */
  86   86  } zfsctl_node_t;
  87   87  
  88   88  typedef struct zfsctl_snapdir {
  89   89          zfsctl_node_t   sd_node;
  90   90          kmutex_t        sd_lock;
  91   91          avl_tree_t      sd_snaps;
  92   92  } zfsctl_snapdir_t;
  93   93  
  94   94  typedef struct {
  95   95          char            *se_name;
  96   96          vnode_t         *se_root;
  97   97          avl_node_t      se_node;
  98   98  } zfs_snapentry_t;
  99   99  
 100  100  static int
 101  101  snapentry_compare(const void *a, const void *b)
 102  102  {
 103  103          const zfs_snapentry_t *sa = a;
 104  104          const zfs_snapentry_t *sb = b;
 105  105          int ret = strcmp(sa->se_name, sb->se_name);
 106  106  
 107  107          if (ret < 0)
 108  108                  return (-1);
 109  109          else if (ret > 0)
 110  110                  return (1);
 111  111          else
 112  112                  return (0);
 113  113  }
 114  114  
 115  115  vnodeops_t *zfsctl_ops_root;
 116  116  vnodeops_t *zfsctl_ops_snapdir;
 117  117  vnodeops_t *zfsctl_ops_snapshot;
 118  118  vnodeops_t *zfsctl_ops_shares;
 119  119  vnodeops_t *zfsctl_ops_shares_dir;
 120  120  
 121  121  static const fs_operation_def_t zfsctl_tops_root[];
 122  122  static const fs_operation_def_t zfsctl_tops_snapdir[];
 123  123  static const fs_operation_def_t zfsctl_tops_snapshot[];
 124  124  static const fs_operation_def_t zfsctl_tops_shares[];
 125  125  
 126  126  static vnode_t *zfsctl_mknode_snapdir(vnode_t *);
 127  127  static vnode_t *zfsctl_mknode_shares(vnode_t *);
 128  128  static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset);
 129  129  static int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *);
 130  130  
 131  131  static gfs_opsvec_t zfsctl_opsvec[] = {
 132  132          { ".zfs", zfsctl_tops_root, &zfsctl_ops_root },
 133  133          { ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir },
 134  134          { ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot },
 135  135          { ".zfs/shares", zfsctl_tops_shares, &zfsctl_ops_shares_dir },
 136  136          { ".zfs/shares/vnode", zfsctl_tops_shares, &zfsctl_ops_shares },
 137  137          { NULL }
 138  138  };
 139  139  
 140  140  /*
 141  141   * Root directory elements.  We only have two entries
 142  142   * snapshot and shares.
 143  143   */
 144  144  static gfs_dirent_t zfsctl_root_entries[] = {
 145  145          { "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE },
 146  146          { "shares", zfsctl_mknode_shares, GFS_CACHE_VNODE },
 147  147          { NULL }
 148  148  };
 149  149  
 150  150  /* include . and .. in the calculation */
 151  151  #define NROOT_ENTRIES   ((sizeof (zfsctl_root_entries) / \
 152  152      sizeof (gfs_dirent_t)) + 1)
 153  153  
 154  154  
 155  155  /*
 156  156   * Initialize the various GFS pieces we'll need to create and manipulate .zfs
 157  157   * directories.  This is called from the ZFS init routine, and initializes the
 158  158   * vnode ops vectors that we'll be using.
 159  159   */
 160  160  void
 161  161  zfsctl_init(void)
 162  162  {
 163  163          VERIFY(gfs_make_opsvec(zfsctl_opsvec) == 0);
 164  164  }
 165  165  
 166  166  void
 167  167  zfsctl_fini(void)
 168  168  {
 169  169          /*
 170  170           * Remove vfsctl vnode ops
 171  171           */
 172  172          if (zfsctl_ops_root)
 173  173                  vn_freevnodeops(zfsctl_ops_root);
 174  174          if (zfsctl_ops_snapdir)
 175  175                  vn_freevnodeops(zfsctl_ops_snapdir);
 176  176          if (zfsctl_ops_snapshot)
 177  177                  vn_freevnodeops(zfsctl_ops_snapshot);
 178  178          if (zfsctl_ops_shares)
 179  179                  vn_freevnodeops(zfsctl_ops_shares);
 180  180          if (zfsctl_ops_shares_dir)
 181  181                  vn_freevnodeops(zfsctl_ops_shares_dir);
 182  182  
 183  183          zfsctl_ops_root = NULL;
 184  184          zfsctl_ops_snapdir = NULL;
 185  185          zfsctl_ops_snapshot = NULL;
 186  186          zfsctl_ops_shares = NULL;
 187  187          zfsctl_ops_shares_dir = NULL;
 188  188  }
 189  189  
 190  190  boolean_t
 191  191  zfsctl_is_node(vnode_t *vp)
 192  192  {
 193  193          return (vn_matchops(vp, zfsctl_ops_root) ||
 194  194              vn_matchops(vp, zfsctl_ops_snapdir) ||
 195  195              vn_matchops(vp, zfsctl_ops_snapshot) ||
 196  196              vn_matchops(vp, zfsctl_ops_shares) ||
 197  197              vn_matchops(vp, zfsctl_ops_shares_dir));
 198  198  
 199  199  }
 200  200  
 201  201  /*
 202  202   * Return the inode number associated with the 'snapshot' or
 203  203   * 'shares' directory.
 204  204   */
 205  205  /* ARGSUSED */
 206  206  static ino64_t
 207  207  zfsctl_root_inode_cb(vnode_t *vp, int index)
 208  208  {
 209  209          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 210  210  
 211  211          ASSERT(index <= 2);
 212  212  
 213  213          if (index == 0)
 214  214                  return (ZFSCTL_INO_SNAPDIR);
 215  215  
 216  216          return (zfsvfs->z_shares_dir);
 217  217  }
 218  218  
 219  219  /*
 220  220   * Create the '.zfs' directory.  This directory is cached as part of the VFS
 221  221   * structure.  This results in a hold on the vfs_t.  The code in zfs_umount()
 222  222   * therefore checks against a vfs_count of 2 instead of 1.  This reference
 223  223   * is removed when the ctldir is destroyed in the unmount.
 224  224   */
 225  225  void
 226  226  zfsctl_create(zfsvfs_t *zfsvfs)
 227  227  {
 228  228          vnode_t *vp, *rvp;
 229  229          zfsctl_node_t *zcp;
 230  230          uint64_t crtime[2];
 231  231  
 232  232          ASSERT(zfsvfs->z_ctldir == NULL);
 233  233  
 234  234          vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs,
 235  235              zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries,
 236  236              zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL);
 237  237          zcp = vp->v_data;
 238  238          zcp->zc_id = ZFSCTL_INO_ROOT;
 239  239  
 240  240          VERIFY(VFS_ROOT(zfsvfs->z_vfs, &rvp) == 0);
 241  241          VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
 242  242              &crtime, sizeof (crtime)));
 243  243          ZFS_TIME_DECODE(&zcp->zc_cmtime, crtime);
 244  244          VN_RELE(rvp);
 245  245  
 246  246          /*
 247  247           * We're only faking the fact that we have a root of a filesystem for
 248  248           * the sake of the GFS interfaces.  Undo the flag manipulation it did
 249  249           * for us.
 250  250           */
 251  251          vp->v_flag &= ~(VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT);
 252  252  
 253  253          zfsvfs->z_ctldir = vp;
 254  254  }
 255  255  
 256  256  /*
 257  257   * Destroy the '.zfs' directory.  Only called when the filesystem is unmounted.
 258  258   * There might still be more references if we were force unmounted, but only
 259  259   * new zfs_inactive() calls can occur and they don't reference .zfs
 260  260   */
 261  261  void
 262  262  zfsctl_destroy(zfsvfs_t *zfsvfs)
 263  263  {
 264  264          VN_RELE(zfsvfs->z_ctldir);
 265  265          zfsvfs->z_ctldir = NULL;
 266  266  }
 267  267  
 268  268  /*
 269  269   * Given a root znode, retrieve the associated .zfs directory.
 270  270   * Add a hold to the vnode and return it.
 271  271   */
 272  272  vnode_t *
 273  273  zfsctl_root(znode_t *zp)
 274  274  {
 275  275          ASSERT(zfs_has_ctldir(zp));
 276  276          VN_HOLD(zp->z_zfsvfs->z_ctldir);
 277  277          return (zp->z_zfsvfs->z_ctldir);
 278  278  }
 279  279  
 280  280  /*
 281  281   * Common open routine.  Disallow any write access.
 282  282   */
 283  283  /* ARGSUSED */
 284  284  static int
 285  285  zfsctl_common_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
 286  286  {
 287  287          if (flags & FWRITE)
 288  288                  return (SET_ERROR(EACCES));
 289  289  
 290  290          return (0);
 291  291  }
 292  292  
 293  293  /*
 294  294   * Common close routine.  Nothing to do here.
 295  295   */
 296  296  /* ARGSUSED */
 297  297  static int
 298  298  zfsctl_common_close(vnode_t *vpp, int flags, int count, offset_t off,
 299  299      cred_t *cr, caller_context_t *ct)
 300  300  {
 301  301          return (0);
 302  302  }
 303  303  
 304  304  /*
 305  305   * Common access routine.  Disallow writes.
 306  306   */
 307  307  /* ARGSUSED */
 308  308  static int
 309  309  zfsctl_common_access(vnode_t *vp, int mode, int flags, cred_t *cr,
 310  310      caller_context_t *ct)
 311  311  {
 312  312          if (flags & V_ACE_MASK) {
 313  313                  if (mode & ACE_ALL_WRITE_PERMS)
 314  314                          return (SET_ERROR(EACCES));
 315  315          } else {
 316  316                  if (mode & VWRITE)
 317  317                          return (SET_ERROR(EACCES));
 318  318          }
 319  319  
 320  320          return (0);
 321  321  }
 322  322  
 323  323  /*
 324  324   * Common getattr function.  Fill in basic information.
 325  325   */
 326  326  static void
 327  327  zfsctl_common_getattr(vnode_t *vp, vattr_t *vap)
 328  328  {
 329  329          timestruc_t     now;
 330  330  
 331  331          vap->va_uid = 0;
 332  332          vap->va_gid = 0;
 333  333          vap->va_rdev = 0;
 334  334          /*
 335  335           * We are a purely virtual object, so we have no
 336  336           * blocksize or allocated blocks.
 337  337           */
 338  338          vap->va_blksize = 0;
 339  339          vap->va_nblocks = 0;
 340  340          vap->va_seq = 0;
 341  341          vap->va_fsid = vp->v_vfsp->vfs_dev;
 342  342          vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
 343  343              S_IROTH | S_IXOTH;
 344  344          vap->va_type = VDIR;
 345  345          /*
 346  346           * We live in the now (for atime).
 347  347           */
 348  348          gethrestime(&now);
 349  349          vap->va_atime = now;
 350  350  }
 351  351  
 352  352  /*ARGSUSED*/
 353  353  static int
 354  354  zfsctl_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
 355  355  {
 356  356          zfsvfs_t        *zfsvfs = vp->v_vfsp->vfs_data;
 357  357          zfsctl_node_t   *zcp = vp->v_data;
 358  358          uint64_t        object = zcp->zc_id;
 359  359          zfid_short_t    *zfid;
 360  360          int             i;
 361  361  
 362  362          ZFS_ENTER(zfsvfs);
 363  363  
 364  364          if (fidp->fid_len < SHORT_FID_LEN) {
 365  365                  fidp->fid_len = SHORT_FID_LEN;
 366  366                  ZFS_EXIT(zfsvfs);
 367  367                  return (SET_ERROR(ENOSPC));
 368  368          }
 369  369  
 370  370          zfid = (zfid_short_t *)fidp;
 371  371  
 372  372          zfid->zf_len = SHORT_FID_LEN;
 373  373  
 374  374          for (i = 0; i < sizeof (zfid->zf_object); i++)
 375  375                  zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
 376  376  
 377  377          /* .zfs znodes always have a generation number of 0 */
 378  378          for (i = 0; i < sizeof (zfid->zf_gen); i++)
 379  379                  zfid->zf_gen[i] = 0;
 380  380  
 381  381          ZFS_EXIT(zfsvfs);
 382  382          return (0);
 383  383  }
 384  384  
 385  385  
 386  386  /*ARGSUSED*/
 387  387  static int
 388  388  zfsctl_shares_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
 389  389  {
 390  390          zfsvfs_t        *zfsvfs = vp->v_vfsp->vfs_data;
 391  391          znode_t         *dzp;
 392  392          int             error;
 393  393  
 394  394          ZFS_ENTER(zfsvfs);
 395  395  
 396  396          if (zfsvfs->z_shares_dir == 0) {
 397  397                  ZFS_EXIT(zfsvfs);
 398  398                  return (SET_ERROR(ENOTSUP));
 399  399          }
 400  400  
 401  401          if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
 402  402                  error = VOP_FID(ZTOV(dzp), fidp, ct);
 403  403                  VN_RELE(ZTOV(dzp));
 404  404          }
 405  405  
 406  406          ZFS_EXIT(zfsvfs);
 407  407          return (error);
 408  408  }
 409  409  /*
 410  410   * .zfs inode namespace
 411  411   *
 412  412   * We need to generate unique inode numbers for all files and directories
 413  413   * within the .zfs pseudo-filesystem.  We use the following scheme:
 414  414   *
 415  415   *      ENTRY                   ZFSCTL_INODE
 416  416   *      .zfs                    1
 417  417   *      .zfs/snapshot           2
 418  418   *      .zfs/snapshot/<snap>    objectid(snap)
 419  419   */
 420  420  
 421  421  #define ZFSCTL_INO_SNAP(id)     (id)
 422  422  
 423  423  /*
 424  424   * Get root directory attributes.
 425  425   */
 426  426  /* ARGSUSED */
 427  427  static int
 428  428  zfsctl_root_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
 429  429      caller_context_t *ct)
 430  430  {
 431  431          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 432  432          zfsctl_node_t *zcp = vp->v_data;
 433  433  
 434  434          ZFS_ENTER(zfsvfs);
 435  435          vap->va_nodeid = ZFSCTL_INO_ROOT;
 436  436          vap->va_nlink = vap->va_size = NROOT_ENTRIES;
 437  437          vap->va_mtime = vap->va_ctime = zcp->zc_cmtime;
 438  438  
 439  439          zfsctl_common_getattr(vp, vap);
 440  440          ZFS_EXIT(zfsvfs);
 441  441  
 442  442          return (0);
 443  443  }
 444  444  
 445  445  /*
 446  446   * Special case the handling of "..".
 447  447   */
 448  448  /* ARGSUSED */
 449  449  int
 450  450  zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
 451  451      int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
 452  452      int *direntflags, pathname_t *realpnp)
 453  453  {
 454  454          zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
 455  455          int err;
 456  456  
 457  457          /*
 458  458           * No extended attributes allowed under .zfs
 459  459           */
 460  460          if (flags & LOOKUP_XATTR)
 461  461                  return (SET_ERROR(EINVAL));
 462  462  
 463  463          ZFS_ENTER(zfsvfs);
 464  464  
 465  465          if (strcmp(nm, "..") == 0) {
 466  466                  err = VFS_ROOT(dvp->v_vfsp, vpp);
 467  467          } else {
 468  468                  err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir,
 469  469                      cr, ct, direntflags, realpnp);
 470  470          }
 471  471  
 472  472          ZFS_EXIT(zfsvfs);
 473  473  
 474  474          return (err);
 475  475  }
 476  476  
 477  477  static int
 478  478  zfsctl_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
 479  479      caller_context_t *ct)
 480  480  {
 481  481          /*
 482  482           * We only care about ACL_ENABLED so that libsec can
 483  483           * display ACL correctly and not default to POSIX draft.
 484  484           */
 485  485          if (cmd == _PC_ACL_ENABLED) {
 486  486                  *valp = _ACL_ACE_ENABLED;
 487  487                  return (0);
 488  488          }
 489  489  
 490  490          return (fs_pathconf(vp, cmd, valp, cr, ct));
 491  491  }
 492  492  
 493  493  static const fs_operation_def_t zfsctl_tops_root[] = {
 494  494          { VOPNAME_OPEN,         { .vop_open = zfsctl_common_open }      },
 495  495          { VOPNAME_CLOSE,        { .vop_close = zfsctl_common_close }    },
 496  496          { VOPNAME_IOCTL,        { .error = fs_inval }                   },
 497  497          { VOPNAME_GETATTR,      { .vop_getattr = zfsctl_root_getattr }  },

↓ open down ↓

497 lines elided

↑ open up ↑

 498  498          { VOPNAME_ACCESS,       { .vop_access = zfsctl_common_access }  },
 499  499          { VOPNAME_READDIR,      { .vop_readdir = gfs_vop_readdir }      },
 500  500          { VOPNAME_LOOKUP,       { .vop_lookup = zfsctl_root_lookup }    },
 501  501          { VOPNAME_SEEK,         { .vop_seek = fs_seek }                 },
 502  502          { VOPNAME_INACTIVE,     { .vop_inactive = gfs_vop_inactive }    },
 503  503          { VOPNAME_PATHCONF,     { .vop_pathconf = zfsctl_pathconf }     },
 504  504          { VOPNAME_FID,          { .vop_fid = zfsctl_common_fid  }       },
 505  505          { NULL }
 506  506  };
 507  507  
      508 +/*
      509 + * Gets the full dataset name that corresponds to the given snapshot name
      510 + * Example:
      511 + *      zfsctl_snapshot_zname("snap1") -> "mypool/myfs@snap1"
      512 + */
 508  513  static int
 509  514  zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
 510  515  {
 511  516          objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
 512  517  
 513  518          if (snapshot_namecheck(name, NULL, NULL) != 0)
 514  519                  return (SET_ERROR(EILSEQ));
 515  520          dmu_objset_name(os, zname);
 516  521          if (strlen(zname) + 1 + strlen(name) >= len)
 517  522                  return (SET_ERROR(ENAMETOOLONG));

 518  523          (void) strcat(zname, "@");
 519  524          (void) strcat(zname, name);
 520  525          return (0);
 521  526  }
 522  527  
 523  528  static int
 524  529  zfsctl_unmount_snap(zfs_snapentry_t *sep, int fflags, cred_t *cr)
 525  530  {
 526  531          vnode_t *svp = sep->se_root;
 527  532          int error;
 528  533  
 529  534          ASSERT(vn_ismntpt(svp));
 530  535  
 531  536          /* this will be dropped by dounmount() */
 532  537          if ((error = vn_vfswlock(svp)) != 0)
 533  538                  return (error);
 534  539  
 535  540          VN_HOLD(svp);
 536  541          error = dounmount(vn_mountedvfs(svp), fflags, cr);
 537  542          if (error) {
 538  543                  VN_RELE(svp);
 539  544                  return (error);
 540  545          }
 541  546  
 542  547          /*
 543  548           * We can't use VN_RELE(), as that will try to invoke
 544  549           * zfsctl_snapdir_inactive(), which would cause us to destroy
 545  550           * the sd_lock mutex held by our caller.
 546  551           */
 547  552          ASSERT(svp->v_count == 1);
 548  553          gfs_vop_inactive(svp, cr, NULL);
 549  554  
 550  555          kmem_free(sep->se_name, strlen(sep->se_name) + 1);
 551  556          kmem_free(sep, sizeof (zfs_snapentry_t));
 552  557  
 553  558          return (0);
 554  559  }
 555  560  
 556  561  static void
 557  562  zfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm)
 558  563  {
 559  564          avl_index_t where;
 560  565          vfs_t *vfsp;
 561  566          refstr_t *pathref;
 562  567          char newpath[MAXNAMELEN];
 563  568          char *tail;
 564  569  
 565  570          ASSERT(MUTEX_HELD(&sdp->sd_lock));
 566  571          ASSERT(sep != NULL);
 567  572  
 568  573          vfsp = vn_mountedvfs(sep->se_root);
 569  574          ASSERT(vfsp != NULL);
 570  575  
 571  576          vfs_lock_wait(vfsp);
 572  577  
 573  578          /*
 574  579           * Change the name in the AVL tree.
 575  580           */
 576  581          avl_remove(&sdp->sd_snaps, sep);
 577  582          kmem_free(sep->se_name, strlen(sep->se_name) + 1);
 578  583          sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
 579  584          (void) strcpy(sep->se_name, nm);
 580  585          VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL);
 581  586          avl_insert(&sdp->sd_snaps, sep, where);
 582  587  
 583  588          /*
 584  589           * Change the current mountpoint info:
 585  590           *      - update the tail of the mntpoint path
 586  591           *      - update the tail of the resource path
 587  592           */
 588  593          pathref = vfs_getmntpoint(vfsp);
 589  594          (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
 590  595          VERIFY((tail = strrchr(newpath, '/')) != NULL);
 591  596          *(tail+1) = '\0';
 592  597          ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
 593  598          (void) strcat(newpath, nm);
 594  599          refstr_rele(pathref);
 595  600          vfs_setmntpoint(vfsp, newpath, 0);
 596  601  
 597  602          pathref = vfs_getresource(vfsp);
 598  603          (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
 599  604          VERIFY((tail = strrchr(newpath, '@')) != NULL);
 600  605          *(tail+1) = '\0';
 601  606          ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
 602  607          (void) strcat(newpath, nm);
 603  608          refstr_rele(pathref);
 604  609          vfs_setresource(vfsp, newpath, 0);
 605  610  
 606  611          vfs_unlock(vfsp);
 607  612  }
 608  613  
 609  614  /*ARGSUSED*/
 610  615  static int
 611  616  zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
 612  617      cred_t *cr, caller_context_t *ct, int flags)
 613  618  {
 614  619          zfsctl_snapdir_t *sdp = sdvp->v_data;
 615  620          zfs_snapentry_t search, *sep;
 616  621          zfsvfs_t *zfsvfs;
 617  622          avl_index_t where;
 618  623          char from[MAXNAMELEN], to[MAXNAMELEN];
 619  624          char real[MAXNAMELEN], fsname[MAXNAMELEN];
 620  625          int err;
 621  626  
 622  627          zfsvfs = sdvp->v_vfsp->vfs_data;
 623  628          ZFS_ENTER(zfsvfs);
 624  629  
 625  630          if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
 626  631                  err = dmu_snapshot_realname(zfsvfs->z_os, snm, real,
 627  632                      MAXNAMELEN, NULL);
 628  633                  if (err == 0) {
 629  634                          snm = real;
 630  635                  } else if (err != ENOTSUP) {
 631  636                          ZFS_EXIT(zfsvfs);
 632  637                          return (err);
 633  638                  }
 634  639          }
 635  640  
 636  641          ZFS_EXIT(zfsvfs);
 637  642  
 638  643          dmu_objset_name(zfsvfs->z_os, fsname);
 639  644  
 640  645          err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from);
 641  646          if (err == 0)
 642  647                  err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to);
 643  648          if (err == 0)
 644  649                  err = zfs_secpolicy_rename_perms(from, to, cr);
 645  650          if (err != 0)
 646  651                  return (err);
 647  652  
 648  653          /*
 649  654           * Cannot move snapshots out of the snapdir.
 650  655           */
 651  656          if (sdvp != tdvp)
 652  657                  return (SET_ERROR(EINVAL));
 653  658  
 654  659          if (strcmp(snm, tnm) == 0)
 655  660                  return (0);
 656  661  
 657  662          mutex_enter(&sdp->sd_lock);
 658  663  
 659  664          search.se_name = (char *)snm;
 660  665          if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) {
 661  666                  mutex_exit(&sdp->sd_lock);
 662  667                  return (SET_ERROR(ENOENT));
 663  668          }
 664  669  
 665  670          err = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE);
 666  671          if (err == 0)
 667  672                  zfsctl_rename_snap(sdp, sep, tnm);
 668  673  
 669  674          mutex_exit(&sdp->sd_lock);
 670  675  
 671  676          return (err);
 672  677  }
 673  678  
 674  679  /* ARGSUSED */
 675  680  static int
 676  681  zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
 677  682      caller_context_t *ct, int flags)
 678  683  {
 679  684          zfsctl_snapdir_t *sdp = dvp->v_data;
 680  685          zfs_snapentry_t *sep;
 681  686          zfs_snapentry_t search;
 682  687          zfsvfs_t *zfsvfs;
 683  688          char snapname[MAXNAMELEN];
 684  689          char real[MAXNAMELEN];
 685  690          int err;
 686  691  
 687  692          zfsvfs = dvp->v_vfsp->vfs_data;
 688  693          ZFS_ENTER(zfsvfs);
 689  694  
 690  695          if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
 691  696  
 692  697                  err = dmu_snapshot_realname(zfsvfs->z_os, name, real,
 693  698                      MAXNAMELEN, NULL);
 694  699                  if (err == 0) {
 695  700                          name = real;
 696  701                  } else if (err != ENOTSUP) {
 697  702                          ZFS_EXIT(zfsvfs);
 698  703                          return (err);
 699  704                  }
 700  705          }
 701  706  
 702  707          ZFS_EXIT(zfsvfs);
 703  708  
 704  709          err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname);
 705  710          if (err == 0)
 706  711                  err = zfs_secpolicy_destroy_perms(snapname, cr);
 707  712          if (err != 0)
 708  713                  return (err);
 709  714  
 710  715          mutex_enter(&sdp->sd_lock);
 711  716  
 712  717          search.se_name = name;
 713  718          sep = avl_find(&sdp->sd_snaps, &search, NULL);
 714  719          if (sep) {
 715  720                  avl_remove(&sdp->sd_snaps, sep);
 716  721                  err = zfsctl_unmount_snap(sep, MS_FORCE, cr);
 717  722                  if (err != 0)
 718  723                          avl_add(&sdp->sd_snaps, sep);
 719  724                  else
 720  725                          err = dsl_destroy_snapshot(snapname, B_FALSE);
 721  726          } else {
 722  727                  err = SET_ERROR(ENOENT);
 723  728          }
 724  729  
 725  730          mutex_exit(&sdp->sd_lock);
 726  731  
 727  732          return (err);
 728  733  }
 729  734  
 730  735  /*
 731  736   * This creates a snapshot under '.zfs/snapshot'.
 732  737   */
 733  738  /* ARGSUSED */
 734  739  static int
 735  740  zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t  **vpp,
 736  741      cred_t *cr, caller_context_t *cc, int flags, vsecattr_t *vsecp)
 737  742  {
 738  743          zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
 739  744          char name[MAXNAMELEN];
 740  745          int err;
 741  746          static enum symfollow follow = NO_FOLLOW;
 742  747          static enum uio_seg seg = UIO_SYSSPACE;
 743  748  
 744  749          if (snapshot_namecheck(dirname, NULL, NULL) != 0)
 745  750                  return (SET_ERROR(EILSEQ));
 746  751  
 747  752          dmu_objset_name(zfsvfs->z_os, name);
 748  753  
 749  754          *vpp = NULL;
 750  755  
 751  756          err = zfs_secpolicy_snapshot_perms(name, cr);
 752  757          if (err != 0)
 753  758                  return (err);
 754  759  
 755  760          if (err == 0) {
 756  761                  err = dmu_objset_snapshot_one(name, dirname);
 757  762                  if (err != 0)
 758  763                          return (err);
 759  764                  err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp);
 760  765          }
 761  766  
 762  767          return (err);
 763  768  }
 764  769  
 765  770  /*
 766  771   * Lookup entry point for the 'snapshot' directory.  Try to open the
 767  772   * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
 768  773   * Perform a mount of the associated dataset on top of the vnode.
 769  774   */
 770  775  /* ARGSUSED */
 771  776  static int
 772  777  zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
 773  778      int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
 774  779      int *direntflags, pathname_t *realpnp)
 775  780  {
 776  781          zfsctl_snapdir_t *sdp = dvp->v_data;
 777  782          objset_t *snap;
 778  783          char snapname[MAXNAMELEN];
 779  784          char real[MAXNAMELEN];
 780  785          char *mountpoint;
 781  786          zfs_snapentry_t *sep, search;
 782  787          struct mounta margs;
 783  788          vfs_t *vfsp;
 784  789          size_t mountpoint_len;
 785  790          avl_index_t where;
 786  791          zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
 787  792          int err;
 788  793  
 789  794          /*
 790  795           * No extended attributes allowed under .zfs
 791  796           */
 792  797          if (flags & LOOKUP_XATTR)
 793  798                  return (SET_ERROR(EINVAL));
 794  799  
 795  800          ASSERT(dvp->v_type == VDIR);
 796  801  
 797  802          /*
 798  803           * If we get a recursive call, that means we got called
 799  804           * from the domount() code while it was trying to look up the
 800  805           * spec (which looks like a local path for zfs).  We need to
 801  806           * add some flag to domount() to tell it not to do this lookup.
 802  807           */
 803  808          if (MUTEX_HELD(&sdp->sd_lock))
 804  809                  return (SET_ERROR(ENOENT));
 805  810  
 806  811          ZFS_ENTER(zfsvfs);
 807  812  
 808  813          if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
 809  814                  ZFS_EXIT(zfsvfs);
 810  815                  return (0);
 811  816          }
 812  817  
 813  818          if (flags & FIGNORECASE) {
 814  819                  boolean_t conflict = B_FALSE;
 815  820  
 816  821                  err = dmu_snapshot_realname(zfsvfs->z_os, nm, real,
 817  822                      MAXNAMELEN, &conflict);
 818  823                  if (err == 0) {
 819  824                          nm = real;
 820  825                  } else if (err != ENOTSUP) {
 821  826                          ZFS_EXIT(zfsvfs);
 822  827                          return (err);
 823  828                  }
 824  829                  if (realpnp)
 825  830                          (void) strlcpy(realpnp->pn_buf, nm,
 826  831                              realpnp->pn_bufsize);
 827  832                  if (conflict && direntflags)
 828  833                          *direntflags = ED_CASE_CONFLICT;
 829  834          }
 830  835  
 831  836          mutex_enter(&sdp->sd_lock);
 832  837          search.se_name = (char *)nm;
 833  838          if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {
 834  839                  *vpp = sep->se_root;
 835  840                  VN_HOLD(*vpp);
 836  841                  err = traverse(vpp);
 837  842                  if (err != 0) {
 838  843                          VN_RELE(*vpp);
 839  844                          *vpp = NULL;
 840  845                  } else if (*vpp == sep->se_root) {
 841  846                          /*
 842  847                           * The snapshot was unmounted behind our backs,
 843  848                           * try to remount it.
 844  849                           */
 845  850                          goto domount;
 846  851                  } else {
 847  852                          /*
 848  853                           * VROOT was set during the traverse call.  We need
 849  854                           * to clear it since we're pretending to be part
 850  855                           * of our parent's vfs.
 851  856                           */
 852  857                          (*vpp)->v_flag &= ~VROOT;
 853  858                  }
 854  859                  mutex_exit(&sdp->sd_lock);
 855  860                  ZFS_EXIT(zfsvfs);
 856  861                  return (err);
 857  862          }
 858  863  
 859  864          /*
 860  865           * The requested snapshot is not currently mounted, look it up.
 861  866           */
 862  867          err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname);
 863  868          if (err != 0) {
 864  869                  mutex_exit(&sdp->sd_lock);
 865  870                  ZFS_EXIT(zfsvfs);
 866  871                  /*
 867  872                   * handle "ls *" or "?" in a graceful manner,
 868  873                   * forcing EILSEQ to ENOENT.
 869  874                   * Since shell ultimately passes "*" or "?" as name to lookup
 870  875                   */
 871  876                  return (err == EILSEQ ? ENOENT : err);
 872  877          }
 873  878          if (dmu_objset_hold(snapname, FTAG, &snap) != 0) {
 874  879                  mutex_exit(&sdp->sd_lock);
 875  880                  ZFS_EXIT(zfsvfs);
 876  881                  return (SET_ERROR(ENOENT));
 877  882          }
 878  883  
 879  884          sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP);
 880  885          sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
 881  886          (void) strcpy(sep->se_name, nm);
 882  887          *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
 883  888          avl_insert(&sdp->sd_snaps, sep, where);
 884  889  
 885  890          dmu_objset_rele(snap, FTAG);
 886  891  domount:
 887  892          mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) +
 888  893              strlen("/.zfs/snapshot/") + strlen(nm) + 1;
 889  894          mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
 890  895          (void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s",
 891  896              refstr_value(dvp->v_vfsp->vfs_mntpt), nm);
 892  897  
 893  898          margs.spec = snapname;
 894  899          margs.dir = mountpoint;
 895  900          margs.flags = MS_SYSSPACE | MS_NOMNTTAB;
 896  901          margs.fstype = "zfs";
 897  902          margs.dataptr = NULL;
 898  903          margs.datalen = 0;
 899  904          margs.optptr = NULL;
 900  905          margs.optlen = 0;
 901  906  
 902  907          err = domount("zfs", &margs, *vpp, kcred, &vfsp);
 903  908          kmem_free(mountpoint, mountpoint_len);
 904  909  
 905  910          if (err == 0) {
 906  911                  /*
 907  912                   * Return the mounted root rather than the covered mount point.
 908  913                   * Takes the GFS vnode at .zfs/snapshot/<snapname> and returns
 909  914                   * the ZFS vnode mounted on top of the GFS node.  This ZFS
 910  915                   * vnode is the root of the newly created vfsp.
 911  916                   */
 912  917                  VFS_RELE(vfsp);
 913  918                  err = traverse(vpp);
 914  919          }
 915  920  
 916  921          if (err == 0) {
 917  922                  /*
 918  923                   * Fix up the root vnode mounted on .zfs/snapshot/<snapname>.
 919  924                   *
 920  925                   * This is where we lie about our v_vfsp in order to
 921  926                   * make .zfs/snapshot/<snapname> accessible over NFS
 922  927                   * without requiring manual mounts of <snapname>.
 923  928                   */
 924  929                  ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
 925  930                  VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
 926  931                  (*vpp)->v_vfsp = zfsvfs->z_vfs;
 927  932                  (*vpp)->v_flag &= ~VROOT;
 928  933          }
 929  934          mutex_exit(&sdp->sd_lock);
 930  935          ZFS_EXIT(zfsvfs);
 931  936  
 932  937          /*
 933  938           * If we had an error, drop our hold on the vnode and
 934  939           * zfsctl_snapshot_inactive() will clean up.
 935  940           */
 936  941          if (err != 0) {
 937  942                  VN_RELE(*vpp);
 938  943                  *vpp = NULL;
 939  944          }
 940  945          return (err);
 941  946  }
 942  947  
 943  948  /* ARGSUSED */
 944  949  static int
 945  950  zfsctl_shares_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
 946  951      int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
 947  952      int *direntflags, pathname_t *realpnp)
 948  953  {
 949  954          zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
 950  955          znode_t *dzp;
 951  956          int error;
 952  957  
 953  958          ZFS_ENTER(zfsvfs);
 954  959  
 955  960          if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
 956  961                  ZFS_EXIT(zfsvfs);
 957  962                  return (0);
 958  963          }
 959  964  
 960  965          if (zfsvfs->z_shares_dir == 0) {
 961  966                  ZFS_EXIT(zfsvfs);
 962  967                  return (SET_ERROR(ENOTSUP));
 963  968          }
 964  969          if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0)
 965  970                  error = VOP_LOOKUP(ZTOV(dzp), nm, vpp, pnp,
 966  971                      flags, rdir, cr, ct, direntflags, realpnp);
 967  972  
 968  973          VN_RELE(ZTOV(dzp));
 969  974          ZFS_EXIT(zfsvfs);
 970  975  
 971  976          return (error);
 972  977  }
 973  978  
 974  979  /* ARGSUSED */
 975  980  static int
 976  981  zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp,
 977  982      offset_t *offp, offset_t *nextp, void *data, int flags)
 978  983  {
 979  984          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 980  985          char snapname[MAXNAMELEN];
 981  986          uint64_t id, cookie;
 982  987          boolean_t case_conflict;
 983  988          int error;
 984  989  
 985  990          ZFS_ENTER(zfsvfs);
 986  991  
 987  992          cookie = *offp;
 988  993          dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
 989  994          error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id,
 990  995              &cookie, &case_conflict);
 991  996          dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
 992  997          if (error) {
 993  998                  ZFS_EXIT(zfsvfs);
 994  999                  if (error == ENOENT) {
 995 1000                          *eofp = 1;
 996 1001                          return (0);
 997 1002                  }
 998 1003                  return (error);
 999 1004          }
1000 1005  
1001 1006          if (flags & V_RDDIR_ENTFLAGS) {
1002 1007                  edirent_t *eodp = dp;
1003 1008  
1004 1009                  (void) strcpy(eodp->ed_name, snapname);
1005 1010                  eodp->ed_ino = ZFSCTL_INO_SNAP(id);
1006 1011                  eodp->ed_eflags = case_conflict ? ED_CASE_CONFLICT : 0;
1007 1012          } else {
1008 1013                  struct dirent64 *odp = dp;
1009 1014  
1010 1015                  (void) strcpy(odp->d_name, snapname);
1011 1016                  odp->d_ino = ZFSCTL_INO_SNAP(id);
1012 1017          }
1013 1018          *nextp = cookie;
1014 1019  
1015 1020          ZFS_EXIT(zfsvfs);
1016 1021  
1017 1022          return (0);
1018 1023  }
1019 1024  
1020 1025  /* ARGSUSED */
1021 1026  static int
1022 1027  zfsctl_shares_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
1023 1028      caller_context_t *ct, int flags)
1024 1029  {
1025 1030          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1026 1031          znode_t *dzp;
1027 1032          int error;
1028 1033  
1029 1034          ZFS_ENTER(zfsvfs);
1030 1035  
1031 1036          if (zfsvfs->z_shares_dir == 0) {
1032 1037                  ZFS_EXIT(zfsvfs);
1033 1038                  return (SET_ERROR(ENOTSUP));
1034 1039          }
1035 1040          if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
1036 1041                  error = VOP_READDIR(ZTOV(dzp), uiop, cr, eofp, ct, flags);
1037 1042                  VN_RELE(ZTOV(dzp));
1038 1043          } else {
1039 1044                  *eofp = 1;
1040 1045                  error = SET_ERROR(ENOENT);
1041 1046          }
1042 1047  
1043 1048          ZFS_EXIT(zfsvfs);
1044 1049          return (error);
1045 1050  }
1046 1051  
1047 1052  /*
1048 1053   * pvp is the '.zfs' directory (zfsctl_node_t).
1049 1054   * Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t).
1050 1055   *
1051 1056   * This function is the callback to create a GFS vnode for '.zfs/snapshot'
1052 1057   * when a lookup is performed on .zfs for "snapshot".
1053 1058   */
1054 1059  vnode_t *
1055 1060  zfsctl_mknode_snapdir(vnode_t *pvp)
1056 1061  {
1057 1062          vnode_t *vp;
1058 1063          zfsctl_snapdir_t *sdp;
1059 1064  
1060 1065          vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp,
1061 1066              zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN,
1062 1067              zfsctl_snapdir_readdir_cb, NULL);
1063 1068          sdp = vp->v_data;
1064 1069          sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR;
1065 1070          sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
1066 1071          mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL);
1067 1072          avl_create(&sdp->sd_snaps, snapentry_compare,
1068 1073              sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
1069 1074          return (vp);
1070 1075  }
1071 1076  
1072 1077  vnode_t *
1073 1078  zfsctl_mknode_shares(vnode_t *pvp)
1074 1079  {
1075 1080          vnode_t *vp;
1076 1081          zfsctl_node_t *sdp;
1077 1082  
1078 1083          vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp,
1079 1084              zfsctl_ops_shares, NULL, NULL, MAXNAMELEN,
1080 1085              NULL, NULL);
1081 1086          sdp = vp->v_data;
1082 1087          sdp->zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
1083 1088          return (vp);
1084 1089  
1085 1090  }
1086 1091  
1087 1092  /* ARGSUSED */
1088 1093  static int
1089 1094  zfsctl_shares_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1090 1095      caller_context_t *ct)
1091 1096  {
1092 1097          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1093 1098          znode_t *dzp;
1094 1099          int error;
1095 1100  
1096 1101          ZFS_ENTER(zfsvfs);
1097 1102          if (zfsvfs->z_shares_dir == 0) {
1098 1103                  ZFS_EXIT(zfsvfs);
1099 1104                  return (SET_ERROR(ENOTSUP));
1100 1105          }
1101 1106          if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
1102 1107                  error = VOP_GETATTR(ZTOV(dzp), vap, flags, cr, ct);
1103 1108                  VN_RELE(ZTOV(dzp));
1104 1109          }
1105 1110          ZFS_EXIT(zfsvfs);
1106 1111          return (error);
1107 1112  
1108 1113  
1109 1114  }
1110 1115  
1111 1116  /* ARGSUSED */
1112 1117  static int
1113 1118  zfsctl_snapdir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1114 1119      caller_context_t *ct)
1115 1120  {
1116 1121          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1117 1122          zfsctl_snapdir_t *sdp = vp->v_data;
1118 1123  
1119 1124          ZFS_ENTER(zfsvfs);
1120 1125          zfsctl_common_getattr(vp, vap);
1121 1126          vap->va_nodeid = gfs_file_inode(vp);
1122 1127          vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2;
1123 1128          vap->va_ctime = vap->va_mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
1124 1129          ZFS_EXIT(zfsvfs);
1125 1130  
1126 1131          return (0);
1127 1132  }
1128 1133  
1129 1134  /* ARGSUSED */
1130 1135  static void
1131 1136  zfsctl_snapdir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1132 1137  {
1133 1138          zfsctl_snapdir_t *sdp = vp->v_data;
1134 1139          void *private;
1135 1140  
1136 1141          private = gfs_dir_inactive(vp);
1137 1142          if (private != NULL) {
1138 1143                  ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
1139 1144                  mutex_destroy(&sdp->sd_lock);
1140 1145                  avl_destroy(&sdp->sd_snaps);
1141 1146                  kmem_free(private, sizeof (zfsctl_snapdir_t));
1142 1147          }
1143 1148  }
1144 1149  
1145 1150  static const fs_operation_def_t zfsctl_tops_snapdir[] = {
1146 1151          { VOPNAME_OPEN,         { .vop_open = zfsctl_common_open }      },
1147 1152          { VOPNAME_CLOSE,        { .vop_close = zfsctl_common_close }    },
1148 1153          { VOPNAME_IOCTL,        { .error = fs_inval }                   },
1149 1154          { VOPNAME_GETATTR,      { .vop_getattr = zfsctl_snapdir_getattr } },
1150 1155          { VOPNAME_ACCESS,       { .vop_access = zfsctl_common_access }  },
1151 1156          { VOPNAME_RENAME,       { .vop_rename = zfsctl_snapdir_rename } },
1152 1157          { VOPNAME_RMDIR,        { .vop_rmdir = zfsctl_snapdir_remove }  },
1153 1158          { VOPNAME_MKDIR,        { .vop_mkdir = zfsctl_snapdir_mkdir }   },
1154 1159          { VOPNAME_READDIR,      { .vop_readdir = gfs_vop_readdir }      },
1155 1160          { VOPNAME_LOOKUP,       { .vop_lookup = zfsctl_snapdir_lookup } },
1156 1161          { VOPNAME_SEEK,         { .vop_seek = fs_seek }                 },
1157 1162          { VOPNAME_INACTIVE,     { .vop_inactive = zfsctl_snapdir_inactive } },
1158 1163          { VOPNAME_FID,          { .vop_fid = zfsctl_common_fid }        },
1159 1164          { NULL }
1160 1165  };
1161 1166  
1162 1167  static const fs_operation_def_t zfsctl_tops_shares[] = {
1163 1168          { VOPNAME_OPEN,         { .vop_open = zfsctl_common_open }      },
1164 1169          { VOPNAME_CLOSE,        { .vop_close = zfsctl_common_close }    },
1165 1170          { VOPNAME_IOCTL,        { .error = fs_inval }                   },
1166 1171          { VOPNAME_GETATTR,      { .vop_getattr = zfsctl_shares_getattr } },
1167 1172          { VOPNAME_ACCESS,       { .vop_access = zfsctl_common_access }  },
1168 1173          { VOPNAME_READDIR,      { .vop_readdir = zfsctl_shares_readdir } },
1169 1174          { VOPNAME_LOOKUP,       { .vop_lookup = zfsctl_shares_lookup }  },
1170 1175          { VOPNAME_SEEK,         { .vop_seek = fs_seek }                 },
1171 1176          { VOPNAME_INACTIVE,     { .vop_inactive = gfs_vop_inactive } },
1172 1177          { VOPNAME_FID,          { .vop_fid = zfsctl_shares_fid } },
1173 1178          { NULL }
1174 1179  };
1175 1180  
1176 1181  /*
1177 1182   * pvp is the GFS vnode '.zfs/snapshot'.
1178 1183   *
1179 1184   * This creates a GFS node under '.zfs/snapshot' representing each
1180 1185   * snapshot.  This newly created GFS node is what we mount snapshot
1181 1186   * vfs_t's ontop of.
1182 1187   */
1183 1188  static vnode_t *
1184 1189  zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
1185 1190  {
1186 1191          vnode_t *vp;
1187 1192          zfsctl_node_t *zcp;
1188 1193  
1189 1194          vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp,
1190 1195              zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL);
1191 1196          zcp = vp->v_data;
1192 1197          zcp->zc_id = objset;
1193 1198  
1194 1199          return (vp);
1195 1200  }
1196 1201  
1197 1202  static void
1198 1203  zfsctl_snapshot_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1199 1204  {
1200 1205          zfsctl_snapdir_t *sdp;
1201 1206          zfs_snapentry_t *sep, *next;
1202 1207          vnode_t *dvp;
1203 1208  
1204 1209          VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0);
1205 1210          sdp = dvp->v_data;
1206 1211  
1207 1212          mutex_enter(&sdp->sd_lock);
1208 1213  
1209 1214          if (vp->v_count > 1) {
1210 1215                  mutex_exit(&sdp->sd_lock);
1211 1216                  return;
1212 1217          }
1213 1218          ASSERT(!vn_ismntpt(vp));
1214 1219  
1215 1220          sep = avl_first(&sdp->sd_snaps);
1216 1221          while (sep != NULL) {
1217 1222                  next = AVL_NEXT(&sdp->sd_snaps, sep);
1218 1223  
1219 1224                  if (sep->se_root == vp) {
1220 1225                          avl_remove(&sdp->sd_snaps, sep);
1221 1226                          kmem_free(sep->se_name, strlen(sep->se_name) + 1);
1222 1227                          kmem_free(sep, sizeof (zfs_snapentry_t));
1223 1228                          break;
1224 1229                  }
1225 1230                  sep = next;
1226 1231          }
1227 1232          ASSERT(sep != NULL);
1228 1233  
1229 1234          mutex_exit(&sdp->sd_lock);
1230 1235          VN_RELE(dvp);
1231 1236  
1232 1237          /*
1233 1238           * Dispose of the vnode for the snapshot mount point.
1234 1239           * This is safe to do because once this entry has been removed
1235 1240           * from the AVL tree, it can't be found again, so cannot become
1236 1241           * "active".  If we lookup the same name again we will end up
1237 1242           * creating a new vnode.
1238 1243           */
1239 1244          gfs_vop_inactive(vp, cr, ct);
1240 1245  }
1241 1246  
1242 1247  
1243 1248  /*
1244 1249   * These VP's should never see the light of day.  They should always
1245 1250   * be covered.
1246 1251   */
1247 1252  static const fs_operation_def_t zfsctl_tops_snapshot[] = {
1248 1253          VOPNAME_INACTIVE, { .vop_inactive =  zfsctl_snapshot_inactive },
1249 1254          NULL, NULL
1250 1255  };
1251 1256  
1252 1257  int
1253 1258  zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
1254 1259  {
1255 1260          zfsvfs_t *zfsvfs = vfsp->vfs_data;
1256 1261          vnode_t *dvp, *vp;
1257 1262          zfsctl_snapdir_t *sdp;
1258 1263          zfsctl_node_t *zcp;
1259 1264          zfs_snapentry_t *sep;
1260 1265          int error;
1261 1266  
1262 1267          ASSERT(zfsvfs->z_ctldir != NULL);
1263 1268          error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
1264 1269              NULL, 0, NULL, kcred, NULL, NULL, NULL);
1265 1270          if (error != 0)
1266 1271                  return (error);
1267 1272          sdp = dvp->v_data;
1268 1273  
1269 1274          mutex_enter(&sdp->sd_lock);
1270 1275          sep = avl_first(&sdp->sd_snaps);
1271 1276          while (sep != NULL) {
1272 1277                  vp = sep->se_root;
1273 1278                  zcp = vp->v_data;
1274 1279                  if (zcp->zc_id == objsetid)
1275 1280                          break;
1276 1281  
1277 1282                  sep = AVL_NEXT(&sdp->sd_snaps, sep);
1278 1283          }
1279 1284  
1280 1285          if (sep != NULL) {
1281 1286                  VN_HOLD(vp);
1282 1287                  /*
1283 1288                   * Return the mounted root rather than the covered mount point.
1284 1289                   * Takes the GFS vnode at .zfs/snapshot/<snapshot objsetid>
1285 1290                   * and returns the ZFS vnode mounted on top of the GFS node.
1286 1291                   * This ZFS vnode is the root of the vfs for objset 'objsetid'.
1287 1292                   */
1288 1293                  error = traverse(&vp);
1289 1294                  if (error == 0) {
1290 1295                          if (vp == sep->se_root)
1291 1296                                  error = SET_ERROR(EINVAL);
1292 1297                          else
1293 1298                                  *zfsvfsp = VTOZ(vp)->z_zfsvfs;
1294 1299                  }
1295 1300                  mutex_exit(&sdp->sd_lock);
1296 1301                  VN_RELE(vp);
1297 1302          } else {
1298 1303                  error = SET_ERROR(EINVAL);
1299 1304                  mutex_exit(&sdp->sd_lock);
1300 1305          }
1301 1306  
1302 1307          VN_RELE(dvp);
1303 1308  
1304 1309          return (error);
1305 1310  }
1306 1311  
1307 1312  /*
1308 1313   * Unmount any snapshots for the given filesystem.  This is called from
1309 1314   * zfs_umount() - if we have a ctldir, then go through and unmount all the
1310 1315   * snapshots.
1311 1316   */
1312 1317  int
1313 1318  zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
1314 1319  {
1315 1320          zfsvfs_t *zfsvfs = vfsp->vfs_data;
1316 1321          vnode_t *dvp;
1317 1322          zfsctl_snapdir_t *sdp;
1318 1323          zfs_snapentry_t *sep, *next;
1319 1324          int error;
1320 1325  
1321 1326          ASSERT(zfsvfs->z_ctldir != NULL);
1322 1327          error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
1323 1328              NULL, 0, NULL, cr, NULL, NULL, NULL);
1324 1329          if (error != 0)
1325 1330                  return (error);
1326 1331          sdp = dvp->v_data;
1327 1332  
1328 1333          mutex_enter(&sdp->sd_lock);
1329 1334  
1330 1335          sep = avl_first(&sdp->sd_snaps);
1331 1336          while (sep != NULL) {
1332 1337                  next = AVL_NEXT(&sdp->sd_snaps, sep);
1333 1338  
1334 1339                  /*
1335 1340                   * If this snapshot is not mounted, then it must
1336 1341                   * have just been unmounted by somebody else, and
1337 1342                   * will be cleaned up by zfsctl_snapdir_inactive().
1338 1343                   */
1339 1344                  if (vn_ismntpt(sep->se_root)) {
1340 1345                          avl_remove(&sdp->sd_snaps, sep);
1341 1346                          error = zfsctl_unmount_snap(sep, fflags, cr);
1342 1347                          if (error) {
1343 1348                                  avl_add(&sdp->sd_snaps, sep);
1344 1349                                  break;
1345 1350                          }
1346 1351                  }
1347 1352                  sep = next;
1348 1353          }
1349 1354  
1350 1355          mutex_exit(&sdp->sd_lock);
1351 1356          VN_RELE(dvp);
1352 1357  
1353 1358          return (error);
1354 1359  }

↓ open down ↓

837 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX