illumos-gate Wdiff usr/src/uts/common/fs/zfs/zfs_ctldir.c

Print this page

7127  remove -Wno-missing-braces from Makefile.uts

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/zfs/zfs_ctldir.c
          +++ new/usr/src/uts/common/fs/zfs/zfs_ctldir.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  24   24   * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
  25   25   */
  26   26  
  27   27  /*
  28   28   * ZFS control directory (a.k.a. ".zfs")
  29   29   *
  30   30   * This directory provides a common location for all ZFS meta-objects.
  31   31   * Currently, this is only the 'snapshot' directory, but this may expand in the
  32   32   * future.  The elements are built using the GFS primitives, as the hierarchy
  33   33   * does not actually exist on disk.
  34   34   *
  35   35   * For 'snapshot', we don't want to have all snapshots always mounted, because
  36   36   * this would take up a huge amount of space in /etc/mnttab.  We have three
  37   37   * types of objects:
  38   38   *
  39   39   *      ctldir ------> snapshotdir -------> snapshot
  40   40   *                                             |
  41   41   *                                             |
  42   42   *                                             V
  43   43   *                                         mounted fs
  44   44   *
  45   45   * The 'snapshot' node contains just enough information to lookup '..' and act
  46   46   * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
  47   47   * perform an automount of the underlying filesystem and return the
  48   48   * corresponding vnode.
  49   49   *
  50   50   * All mounts are handled automatically by the kernel, but unmounts are
  51   51   * (currently) handled from user land.  The main reason is that there is no
  52   52   * reliable way to auto-unmount the filesystem when it's "no longer in use".
  53   53   * When the user unmounts a filesystem, we call zfsctl_unmount(), which
  54   54   * unmounts any snapshots within the snapshot directory.
  55   55   *
  56   56   * The '.zfs', '.zfs/snapshot', and all directories created under
  57   57   * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and
  58   58   * share the same vfs_t as the head filesystem (what '.zfs' lives under).
  59   59   *
  60   60   * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>'
  61   61   * (ie: snapshots) are ZFS nodes and have their own unique vfs_t.
  62   62   * However, vnodes within these mounted on file systems have their v_vfsp
  63   63   * fields set to the head filesystem to make NFS happy (see
  64   64   * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t
  65   65   * so that it cannot be freed until all snapshots have been unmounted.
  66   66   */
  67   67  
  68   68  #include <fs/fs_subr.h>
  69   69  #include <sys/zfs_ctldir.h>
  70   70  #include <sys/zfs_ioctl.h>
  71   71  #include <sys/zfs_vfsops.h>
  72   72  #include <sys/vfs_opreg.h>
  73   73  #include <sys/gfs.h>
  74   74  #include <sys/stat.h>
  75   75  #include <sys/dmu.h>
  76   76  #include <sys/dsl_destroy.h>
  77   77  #include <sys/dsl_deleg.h>
  78   78  #include <sys/mount.h>
  79   79  #include <sys/sunddi.h>
  80   80  
  81   81  #include "zfs_namecheck.h"
  82   82  
  83   83  typedef struct zfsctl_node {
  84   84          gfs_dir_t       zc_gfs_private;
  85   85          uint64_t        zc_id;
  86   86          timestruc_t     zc_cmtime;      /* ctime and mtime, always the same */
  87   87  } zfsctl_node_t;
  88   88  
  89   89  typedef struct zfsctl_snapdir {
  90   90          zfsctl_node_t   sd_node;
  91   91          kmutex_t        sd_lock;
  92   92          avl_tree_t      sd_snaps;
  93   93  } zfsctl_snapdir_t;
  94   94  
  95   95  typedef struct {
  96   96          char            *se_name;
  97   97          vnode_t         *se_root;
  98   98          avl_node_t      se_node;
  99   99  } zfs_snapentry_t;
 100  100  
 101  101  static int
 102  102  snapentry_compare(const void *a, const void *b)
 103  103  {
 104  104          const zfs_snapentry_t *sa = a;
 105  105          const zfs_snapentry_t *sb = b;
 106  106          int ret = strcmp(sa->se_name, sb->se_name);
 107  107  
 108  108          if (ret < 0)
 109  109                  return (-1);
 110  110          else if (ret > 0)
 111  111                  return (1);
 112  112          else
 113  113                  return (0);
 114  114  }
 115  115  
 116  116  vnodeops_t *zfsctl_ops_root;
 117  117  vnodeops_t *zfsctl_ops_snapdir;
 118  118  vnodeops_t *zfsctl_ops_snapshot;
 119  119  vnodeops_t *zfsctl_ops_shares;
 120  120  vnodeops_t *zfsctl_ops_shares_dir;
 121  121  
 122  122  static const fs_operation_def_t zfsctl_tops_root[];
 123  123  static const fs_operation_def_t zfsctl_tops_snapdir[];
 124  124  static const fs_operation_def_t zfsctl_tops_snapshot[];
 125  125  static const fs_operation_def_t zfsctl_tops_shares[];
 126  126  
 127  127  static vnode_t *zfsctl_mknode_snapdir(vnode_t *);
 128  128  static vnode_t *zfsctl_mknode_shares(vnode_t *);
 129  129  static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset);
 130  130  static int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *);
 131  131  
 132  132  static gfs_opsvec_t zfsctl_opsvec[] = {
 133  133          { ".zfs", zfsctl_tops_root, &zfsctl_ops_root },
 134  134          { ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir },
 135  135          { ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot },
 136  136          { ".zfs/shares", zfsctl_tops_shares, &zfsctl_ops_shares_dir },
 137  137          { ".zfs/shares/vnode", zfsctl_tops_shares, &zfsctl_ops_shares },
 138  138          { NULL }
 139  139  };
 140  140  
 141  141  /*
 142  142   * Root directory elements.  We only have two entries
 143  143   * snapshot and shares.
 144  144   */
 145  145  static gfs_dirent_t zfsctl_root_entries[] = {
 146  146          { "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE },
 147  147          { "shares", zfsctl_mknode_shares, GFS_CACHE_VNODE },
 148  148          { NULL }
 149  149  };
 150  150  
 151  151  /* include . and .. in the calculation */
 152  152  #define NROOT_ENTRIES   ((sizeof (zfsctl_root_entries) / \
 153  153      sizeof (gfs_dirent_t)) + 1)
 154  154  
 155  155  
 156  156  /*
 157  157   * Initialize the various GFS pieces we'll need to create and manipulate .zfs
 158  158   * directories.  This is called from the ZFS init routine, and initializes the
 159  159   * vnode ops vectors that we'll be using.
 160  160   */
 161  161  void
 162  162  zfsctl_init(void)
 163  163  {
 164  164          VERIFY(gfs_make_opsvec(zfsctl_opsvec) == 0);
 165  165  }
 166  166  
 167  167  void
 168  168  zfsctl_fini(void)
 169  169  {
 170  170          /*
 171  171           * Remove vfsctl vnode ops
 172  172           */
 173  173          if (zfsctl_ops_root)
 174  174                  vn_freevnodeops(zfsctl_ops_root);
 175  175          if (zfsctl_ops_snapdir)
 176  176                  vn_freevnodeops(zfsctl_ops_snapdir);
 177  177          if (zfsctl_ops_snapshot)
 178  178                  vn_freevnodeops(zfsctl_ops_snapshot);
 179  179          if (zfsctl_ops_shares)
 180  180                  vn_freevnodeops(zfsctl_ops_shares);
 181  181          if (zfsctl_ops_shares_dir)
 182  182                  vn_freevnodeops(zfsctl_ops_shares_dir);
 183  183  
 184  184          zfsctl_ops_root = NULL;
 185  185          zfsctl_ops_snapdir = NULL;
 186  186          zfsctl_ops_snapshot = NULL;
 187  187          zfsctl_ops_shares = NULL;
 188  188          zfsctl_ops_shares_dir = NULL;
 189  189  }
 190  190  
 191  191  boolean_t
 192  192  zfsctl_is_node(vnode_t *vp)
 193  193  {
 194  194          return (vn_matchops(vp, zfsctl_ops_root) ||
 195  195              vn_matchops(vp, zfsctl_ops_snapdir) ||
 196  196              vn_matchops(vp, zfsctl_ops_snapshot) ||
 197  197              vn_matchops(vp, zfsctl_ops_shares) ||
 198  198              vn_matchops(vp, zfsctl_ops_shares_dir));
 199  199  
 200  200  }
 201  201  
 202  202  /*
 203  203   * Return the inode number associated with the 'snapshot' or
 204  204   * 'shares' directory.
 205  205   */
 206  206  /* ARGSUSED */
 207  207  static ino64_t
 208  208  zfsctl_root_inode_cb(vnode_t *vp, int index)
 209  209  {
 210  210          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 211  211  
 212  212          ASSERT(index <= 2);
 213  213  
 214  214          if (index == 0)
 215  215                  return (ZFSCTL_INO_SNAPDIR);
 216  216  
 217  217          return (zfsvfs->z_shares_dir);
 218  218  }
 219  219  
 220  220  /*
 221  221   * Create the '.zfs' directory.  This directory is cached as part of the VFS
 222  222   * structure.  This results in a hold on the vfs_t.  The code in zfs_umount()
 223  223   * therefore checks against a vfs_count of 2 instead of 1.  This reference
 224  224   * is removed when the ctldir is destroyed in the unmount.
 225  225   */
 226  226  void
 227  227  zfsctl_create(zfsvfs_t *zfsvfs)
 228  228  {
 229  229          vnode_t *vp, *rvp;
 230  230          zfsctl_node_t *zcp;
 231  231          uint64_t crtime[2];
 232  232  
 233  233          ASSERT(zfsvfs->z_ctldir == NULL);
 234  234  
 235  235          vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs,
 236  236              zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries,
 237  237              zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL);
 238  238          zcp = vp->v_data;
 239  239          zcp->zc_id = ZFSCTL_INO_ROOT;
 240  240  
 241  241          VERIFY(VFS_ROOT(zfsvfs->z_vfs, &rvp) == 0);
 242  242          VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
 243  243              &crtime, sizeof (crtime)));
 244  244          ZFS_TIME_DECODE(&zcp->zc_cmtime, crtime);
 245  245          VN_RELE(rvp);
 246  246  
 247  247          /*
 248  248           * We're only faking the fact that we have a root of a filesystem for
 249  249           * the sake of the GFS interfaces.  Undo the flag manipulation it did
 250  250           * for us.
 251  251           */
 252  252          vp->v_flag &= ~(VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT);
 253  253  
 254  254          zfsvfs->z_ctldir = vp;
 255  255  }
 256  256  
 257  257  /*
 258  258   * Destroy the '.zfs' directory.  Only called when the filesystem is unmounted.
 259  259   * There might still be more references if we were force unmounted, but only
 260  260   * new zfs_inactive() calls can occur and they don't reference .zfs
 261  261   */
 262  262  void
 263  263  zfsctl_destroy(zfsvfs_t *zfsvfs)
 264  264  {
 265  265          VN_RELE(zfsvfs->z_ctldir);
 266  266          zfsvfs->z_ctldir = NULL;
 267  267  }
 268  268  
 269  269  /*
 270  270   * Given a root znode, retrieve the associated .zfs directory.
 271  271   * Add a hold to the vnode and return it.
 272  272   */
 273  273  vnode_t *
 274  274  zfsctl_root(znode_t *zp)
 275  275  {
 276  276          ASSERT(zfs_has_ctldir(zp));
 277  277          VN_HOLD(zp->z_zfsvfs->z_ctldir);
 278  278          return (zp->z_zfsvfs->z_ctldir);
 279  279  }
 280  280  
 281  281  /*
 282  282   * Common open routine.  Disallow any write access.
 283  283   */
 284  284  /* ARGSUSED */
 285  285  static int
 286  286  zfsctl_common_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
 287  287  {
 288  288          if (flags & FWRITE)
 289  289                  return (SET_ERROR(EACCES));
 290  290  
 291  291          return (0);
 292  292  }
 293  293  
 294  294  /*
 295  295   * Common close routine.  Nothing to do here.
 296  296   */
 297  297  /* ARGSUSED */
 298  298  static int
 299  299  zfsctl_common_close(vnode_t *vpp, int flags, int count, offset_t off,
 300  300      cred_t *cr, caller_context_t *ct)
 301  301  {
 302  302          return (0);
 303  303  }
 304  304  
 305  305  /*
 306  306   * Common access routine.  Disallow writes.
 307  307   */
 308  308  /* ARGSUSED */
 309  309  static int
 310  310  zfsctl_common_access(vnode_t *vp, int mode, int flags, cred_t *cr,
 311  311      caller_context_t *ct)
 312  312  {
 313  313          if (flags & V_ACE_MASK) {
 314  314                  if (mode & ACE_ALL_WRITE_PERMS)
 315  315                          return (SET_ERROR(EACCES));
 316  316          } else {
 317  317                  if (mode & VWRITE)
 318  318                          return (SET_ERROR(EACCES));
 319  319          }
 320  320  
 321  321          return (0);
 322  322  }
 323  323  
 324  324  /*
 325  325   * Common getattr function.  Fill in basic information.
 326  326   */
 327  327  static void
 328  328  zfsctl_common_getattr(vnode_t *vp, vattr_t *vap)
 329  329  {
 330  330          timestruc_t     now;
 331  331  
 332  332          vap->va_uid = 0;
 333  333          vap->va_gid = 0;
 334  334          vap->va_rdev = 0;
 335  335          /*
 336  336           * We are a purely virtual object, so we have no
 337  337           * blocksize or allocated blocks.
 338  338           */
 339  339          vap->va_blksize = 0;
 340  340          vap->va_nblocks = 0;
 341  341          vap->va_seq = 0;
 342  342          vap->va_fsid = vp->v_vfsp->vfs_dev;
 343  343          vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
 344  344              S_IROTH | S_IXOTH;
 345  345          vap->va_type = VDIR;
 346  346          /*
 347  347           * We live in the now (for atime).
 348  348           */
 349  349          gethrestime(&now);
 350  350          vap->va_atime = now;
 351  351  }
 352  352  
 353  353  /*ARGSUSED*/
 354  354  static int
 355  355  zfsctl_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
 356  356  {
 357  357          zfsvfs_t        *zfsvfs = vp->v_vfsp->vfs_data;
 358  358          zfsctl_node_t   *zcp = vp->v_data;
 359  359          uint64_t        object = zcp->zc_id;
 360  360          zfid_short_t    *zfid;
 361  361          int             i;
 362  362  
 363  363          ZFS_ENTER(zfsvfs);
 364  364  
 365  365          if (fidp->fid_len < SHORT_FID_LEN) {
 366  366                  fidp->fid_len = SHORT_FID_LEN;
 367  367                  ZFS_EXIT(zfsvfs);
 368  368                  return (SET_ERROR(ENOSPC));
 369  369          }
 370  370  
 371  371          zfid = (zfid_short_t *)fidp;
 372  372  
 373  373          zfid->zf_len = SHORT_FID_LEN;
 374  374  
 375  375          for (i = 0; i < sizeof (zfid->zf_object); i++)
 376  376                  zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
 377  377  
 378  378          /* .zfs znodes always have a generation number of 0 */
 379  379          for (i = 0; i < sizeof (zfid->zf_gen); i++)
 380  380                  zfid->zf_gen[i] = 0;
 381  381  
 382  382          ZFS_EXIT(zfsvfs);
 383  383          return (0);
 384  384  }
 385  385  
 386  386  
 387  387  /*ARGSUSED*/
 388  388  static int
 389  389  zfsctl_shares_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
 390  390  {
 391  391          zfsvfs_t        *zfsvfs = vp->v_vfsp->vfs_data;
 392  392          znode_t         *dzp;
 393  393          int             error;
 394  394  
 395  395          ZFS_ENTER(zfsvfs);
 396  396  
 397  397          if (zfsvfs->z_shares_dir == 0) {
 398  398                  ZFS_EXIT(zfsvfs);
 399  399                  return (SET_ERROR(ENOTSUP));
 400  400          }
 401  401  
 402  402          if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
 403  403                  error = VOP_FID(ZTOV(dzp), fidp, ct);
 404  404                  VN_RELE(ZTOV(dzp));
 405  405          }
 406  406  
 407  407          ZFS_EXIT(zfsvfs);
 408  408          return (error);
 409  409  }
 410  410  /*
 411  411   * .zfs inode namespace
 412  412   *
 413  413   * We need to generate unique inode numbers for all files and directories
 414  414   * within the .zfs pseudo-filesystem.  We use the following scheme:
 415  415   *
 416  416   *      ENTRY                   ZFSCTL_INODE
 417  417   *      .zfs                    1
 418  418   *      .zfs/snapshot           2
 419  419   *      .zfs/snapshot/<snap>    objectid(snap)
 420  420   */
 421  421  
 422  422  #define ZFSCTL_INO_SNAP(id)     (id)
 423  423  
 424  424  /*
 425  425   * Get root directory attributes.
 426  426   */
 427  427  /* ARGSUSED */
 428  428  static int
 429  429  zfsctl_root_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
 430  430      caller_context_t *ct)
 431  431  {
 432  432          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 433  433          zfsctl_node_t *zcp = vp->v_data;
 434  434  
 435  435          ZFS_ENTER(zfsvfs);
 436  436          vap->va_nodeid = ZFSCTL_INO_ROOT;
 437  437          vap->va_nlink = vap->va_size = NROOT_ENTRIES;
 438  438          vap->va_mtime = vap->va_ctime = zcp->zc_cmtime;
 439  439  
 440  440          zfsctl_common_getattr(vp, vap);
 441  441          ZFS_EXIT(zfsvfs);
 442  442  
 443  443          return (0);
 444  444  }
 445  445  
 446  446  /*
 447  447   * Special case the handling of "..".
 448  448   */
 449  449  /* ARGSUSED */
 450  450  int
 451  451  zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
 452  452      int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
 453  453      int *direntflags, pathname_t *realpnp)
 454  454  {
 455  455          zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
 456  456          int err;
 457  457  
 458  458          /*
 459  459           * No extended attributes allowed under .zfs
 460  460           */
 461  461          if (flags & LOOKUP_XATTR)
 462  462                  return (SET_ERROR(EINVAL));
 463  463  
 464  464          ZFS_ENTER(zfsvfs);
 465  465  
 466  466          if (strcmp(nm, "..") == 0) {
 467  467                  err = VFS_ROOT(dvp->v_vfsp, vpp);
 468  468          } else {
 469  469                  err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir,
 470  470                      cr, ct, direntflags, realpnp);
 471  471          }
 472  472  
 473  473          ZFS_EXIT(zfsvfs);
 474  474  
 475  475          return (err);
 476  476  }
 477  477  
 478  478  static int
 479  479  zfsctl_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
 480  480      caller_context_t *ct)
 481  481  {
 482  482          /*
 483  483           * We only care about ACL_ENABLED so that libsec can
 484  484           * display ACL correctly and not default to POSIX draft.
 485  485           */
 486  486          if (cmd == _PC_ACL_ENABLED) {
 487  487                  *valp = _ACL_ACE_ENABLED;
 488  488                  return (0);
 489  489          }
 490  490  
 491  491          return (fs_pathconf(vp, cmd, valp, cr, ct));
 492  492  }
 493  493  
 494  494  static const fs_operation_def_t zfsctl_tops_root[] = {
 495  495          { VOPNAME_OPEN,         { .vop_open = zfsctl_common_open }      },
 496  496          { VOPNAME_CLOSE,        { .vop_close = zfsctl_common_close }    },
 497  497          { VOPNAME_IOCTL,        { .error = fs_inval }                   },
 498  498          { VOPNAME_GETATTR,      { .vop_getattr = zfsctl_root_getattr }  },
 499  499          { VOPNAME_ACCESS,       { .vop_access = zfsctl_common_access }  },
 500  500          { VOPNAME_READDIR,      { .vop_readdir = gfs_vop_readdir }      },
 501  501          { VOPNAME_LOOKUP,       { .vop_lookup = zfsctl_root_lookup }    },
 502  502          { VOPNAME_SEEK,         { .vop_seek = fs_seek }                 },
 503  503          { VOPNAME_INACTIVE,     { .vop_inactive = gfs_vop_inactive }    },
 504  504          { VOPNAME_PATHCONF,     { .vop_pathconf = zfsctl_pathconf }     },
 505  505          { VOPNAME_FID,          { .vop_fid = zfsctl_common_fid  }       },
 506  506          { NULL }
 507  507  };
 508  508  
 509  509  /*
 510  510   * Gets the full dataset name that corresponds to the given snapshot name
 511  511   * Example:
 512  512   *      zfsctl_snapshot_zname("snap1") -> "mypool/myfs@snap1"
 513  513   */
 514  514  static int
 515  515  zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
 516  516  {
 517  517          objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
 518  518  
 519  519          if (zfs_component_namecheck(name, NULL, NULL) != 0)
 520  520                  return (SET_ERROR(EILSEQ));
 521  521          dmu_objset_name(os, zname);
 522  522          if (strlen(zname) + 1 + strlen(name) >= len)
 523  523                  return (SET_ERROR(ENAMETOOLONG));
 524  524          (void) strcat(zname, "@");
 525  525          (void) strcat(zname, name);
 526  526          return (0);
 527  527  }
 528  528  
 529  529  static int
 530  530  zfsctl_unmount_snap(zfs_snapentry_t *sep, int fflags, cred_t *cr)
 531  531  {
 532  532          vnode_t *svp = sep->se_root;
 533  533          int error;
 534  534  
 535  535          ASSERT(vn_ismntpt(svp));
 536  536  
 537  537          /* this will be dropped by dounmount() */
 538  538          if ((error = vn_vfswlock(svp)) != 0)
 539  539                  return (error);
 540  540  
 541  541          VN_HOLD(svp);
 542  542          error = dounmount(vn_mountedvfs(svp), fflags, cr);
 543  543          if (error) {
 544  544                  VN_RELE(svp);
 545  545                  return (error);
 546  546          }
 547  547  
 548  548          /*
 549  549           * We can't use VN_RELE(), as that will try to invoke
 550  550           * zfsctl_snapdir_inactive(), which would cause us to destroy
 551  551           * the sd_lock mutex held by our caller.
 552  552           */
 553  553          ASSERT(svp->v_count == 1);
 554  554          gfs_vop_inactive(svp, cr, NULL);
 555  555  
 556  556          kmem_free(sep->se_name, strlen(sep->se_name) + 1);
 557  557          kmem_free(sep, sizeof (zfs_snapentry_t));
 558  558  
 559  559          return (0);
 560  560  }
 561  561  
 562  562  static void
 563  563  zfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm)
 564  564  {
 565  565          avl_index_t where;
 566  566          vfs_t *vfsp;
 567  567          refstr_t *pathref;
 568  568          char newpath[MAXNAMELEN];
 569  569          char *tail;
 570  570  
 571  571          ASSERT(MUTEX_HELD(&sdp->sd_lock));
 572  572          ASSERT(sep != NULL);
 573  573  
 574  574          vfsp = vn_mountedvfs(sep->se_root);
 575  575          ASSERT(vfsp != NULL);
 576  576  
 577  577          vfs_lock_wait(vfsp);
 578  578  
 579  579          /*
 580  580           * Change the name in the AVL tree.
 581  581           */
 582  582          avl_remove(&sdp->sd_snaps, sep);
 583  583          kmem_free(sep->se_name, strlen(sep->se_name) + 1);
 584  584          sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
 585  585          (void) strcpy(sep->se_name, nm);
 586  586          VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL);
 587  587          avl_insert(&sdp->sd_snaps, sep, where);
 588  588  
 589  589          /*
 590  590           * Change the current mountpoint info:
 591  591           *      - update the tail of the mntpoint path
 592  592           *      - update the tail of the resource path
 593  593           */
 594  594          pathref = vfs_getmntpoint(vfsp);
 595  595          (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
 596  596          VERIFY((tail = strrchr(newpath, '/')) != NULL);
 597  597          *(tail+1) = '\0';
 598  598          ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
 599  599          (void) strcat(newpath, nm);
 600  600          refstr_rele(pathref);
 601  601          vfs_setmntpoint(vfsp, newpath, 0);
 602  602  
 603  603          pathref = vfs_getresource(vfsp);
 604  604          (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
 605  605          VERIFY((tail = strrchr(newpath, '@')) != NULL);
 606  606          *(tail+1) = '\0';
 607  607          ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
 608  608          (void) strcat(newpath, nm);
 609  609          refstr_rele(pathref);
 610  610          vfs_setresource(vfsp, newpath, 0);
 611  611  
 612  612          vfs_unlock(vfsp);
 613  613  }
 614  614  
 615  615  /*ARGSUSED*/
 616  616  static int
 617  617  zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
 618  618      cred_t *cr, caller_context_t *ct, int flags)
 619  619  {
 620  620          zfsctl_snapdir_t *sdp = sdvp->v_data;
 621  621          zfs_snapentry_t search, *sep;
 622  622          zfsvfs_t *zfsvfs;
 623  623          avl_index_t where;
 624  624          char from[ZFS_MAX_DATASET_NAME_LEN], to[ZFS_MAX_DATASET_NAME_LEN];
 625  625          char real[ZFS_MAX_DATASET_NAME_LEN], fsname[ZFS_MAX_DATASET_NAME_LEN];
 626  626          int err;
 627  627  
 628  628          zfsvfs = sdvp->v_vfsp->vfs_data;
 629  629          ZFS_ENTER(zfsvfs);
 630  630  
 631  631          if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
 632  632                  err = dmu_snapshot_realname(zfsvfs->z_os, snm, real,
 633  633                      sizeof (real), NULL);
 634  634                  if (err == 0) {
 635  635                          snm = real;
 636  636                  } else if (err != ENOTSUP) {
 637  637                          ZFS_EXIT(zfsvfs);
 638  638                          return (err);
 639  639                  }
 640  640          }
 641  641  
 642  642          ZFS_EXIT(zfsvfs);
 643  643  
 644  644          dmu_objset_name(zfsvfs->z_os, fsname);
 645  645  
 646  646          err = zfsctl_snapshot_zname(sdvp, snm, sizeof (from), from);
 647  647          if (err == 0)
 648  648                  err = zfsctl_snapshot_zname(tdvp, tnm, sizeof (to), to);
 649  649          if (err == 0)
 650  650                  err = zfs_secpolicy_rename_perms(from, to, cr);
 651  651          if (err != 0)
 652  652                  return (err);
 653  653  
 654  654          /*
 655  655           * Cannot move snapshots out of the snapdir.
 656  656           */
 657  657          if (sdvp != tdvp)
 658  658                  return (SET_ERROR(EINVAL));
 659  659  
 660  660          if (strcmp(snm, tnm) == 0)
 661  661                  return (0);
 662  662  
 663  663          mutex_enter(&sdp->sd_lock);
 664  664  
 665  665          search.se_name = (char *)snm;
 666  666          if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) {
 667  667                  mutex_exit(&sdp->sd_lock);
 668  668                  return (SET_ERROR(ENOENT));
 669  669          }
 670  670  
 671  671          err = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE);
 672  672          if (err == 0)
 673  673                  zfsctl_rename_snap(sdp, sep, tnm);
 674  674  
 675  675          mutex_exit(&sdp->sd_lock);
 676  676  
 677  677          return (err);
 678  678  }
 679  679  
 680  680  /* ARGSUSED */
 681  681  static int
 682  682  zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
 683  683      caller_context_t *ct, int flags)
 684  684  {
 685  685          zfsctl_snapdir_t *sdp = dvp->v_data;
 686  686          zfs_snapentry_t *sep;
 687  687          zfs_snapentry_t search;
 688  688          zfsvfs_t *zfsvfs;
 689  689          char snapname[ZFS_MAX_DATASET_NAME_LEN];
 690  690          char real[ZFS_MAX_DATASET_NAME_LEN];
 691  691          int err;
 692  692  
 693  693          zfsvfs = dvp->v_vfsp->vfs_data;
 694  694          ZFS_ENTER(zfsvfs);
 695  695  
 696  696          if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
 697  697  
 698  698                  err = dmu_snapshot_realname(zfsvfs->z_os, name, real,
 699  699                      sizeof (real), NULL);
 700  700                  if (err == 0) {
 701  701                          name = real;
 702  702                  } else if (err != ENOTSUP) {
 703  703                          ZFS_EXIT(zfsvfs);
 704  704                          return (err);
 705  705                  }
 706  706          }
 707  707  
 708  708          ZFS_EXIT(zfsvfs);
 709  709  
 710  710          err = zfsctl_snapshot_zname(dvp, name, sizeof (snapname), snapname);
 711  711          if (err == 0)
 712  712                  err = zfs_secpolicy_destroy_perms(snapname, cr);
 713  713          if (err != 0)
 714  714                  return (err);
 715  715  
 716  716          mutex_enter(&sdp->sd_lock);
 717  717  
 718  718          search.se_name = name;
 719  719          sep = avl_find(&sdp->sd_snaps, &search, NULL);
 720  720          if (sep) {
 721  721                  avl_remove(&sdp->sd_snaps, sep);
 722  722                  err = zfsctl_unmount_snap(sep, MS_FORCE, cr);
 723  723                  if (err != 0)
 724  724                          avl_add(&sdp->sd_snaps, sep);
 725  725                  else
 726  726                          err = dsl_destroy_snapshot(snapname, B_FALSE);
 727  727          } else {
 728  728                  err = SET_ERROR(ENOENT);
 729  729          }
 730  730  
 731  731          mutex_exit(&sdp->sd_lock);
 732  732  
 733  733          return (err);
 734  734  }
 735  735  
 736  736  /*
 737  737   * This creates a snapshot under '.zfs/snapshot'.
 738  738   */
 739  739  /* ARGSUSED */
 740  740  static int
 741  741  zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t  **vpp,
 742  742      cred_t *cr, caller_context_t *cc, int flags, vsecattr_t *vsecp)
 743  743  {
 744  744          zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
 745  745          char name[ZFS_MAX_DATASET_NAME_LEN];
 746  746          int err;
 747  747          static enum symfollow follow = NO_FOLLOW;
 748  748          static enum uio_seg seg = UIO_SYSSPACE;
 749  749  
 750  750          if (zfs_component_namecheck(dirname, NULL, NULL) != 0)
 751  751                  return (SET_ERROR(EILSEQ));
 752  752  
 753  753          dmu_objset_name(zfsvfs->z_os, name);
 754  754  
 755  755          *vpp = NULL;
 756  756  
 757  757          err = zfs_secpolicy_snapshot_perms(name, cr);
 758  758          if (err != 0)
 759  759                  return (err);
 760  760  
 761  761          if (err == 0) {
 762  762                  err = dmu_objset_snapshot_one(name, dirname);
 763  763                  if (err != 0)
 764  764                          return (err);
 765  765                  err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp);
 766  766          }
 767  767  
 768  768          return (err);
 769  769  }
 770  770  
 771  771  /*
 772  772   * Lookup entry point for the 'snapshot' directory.  Try to open the
 773  773   * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
 774  774   * Perform a mount of the associated dataset on top of the vnode.
 775  775   */
 776  776  /* ARGSUSED */
 777  777  static int
 778  778  zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
 779  779      int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
 780  780      int *direntflags, pathname_t *realpnp)
 781  781  {
 782  782          zfsctl_snapdir_t *sdp = dvp->v_data;
 783  783          objset_t *snap;
 784  784          char snapname[ZFS_MAX_DATASET_NAME_LEN];
 785  785          char real[ZFS_MAX_DATASET_NAME_LEN];
 786  786          char *mountpoint;
 787  787          zfs_snapentry_t *sep, search;
 788  788          struct mounta margs;
 789  789          vfs_t *vfsp;
 790  790          size_t mountpoint_len;
 791  791          avl_index_t where;
 792  792          zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
 793  793          int err;
 794  794  
 795  795          /*
 796  796           * No extended attributes allowed under .zfs
 797  797           */
 798  798          if (flags & LOOKUP_XATTR)
 799  799                  return (SET_ERROR(EINVAL));
 800  800  
 801  801          ASSERT(dvp->v_type == VDIR);
 802  802  
 803  803          /*
 804  804           * If we get a recursive call, that means we got called
 805  805           * from the domount() code while it was trying to look up the
 806  806           * spec (which looks like a local path for zfs).  We need to
 807  807           * add some flag to domount() to tell it not to do this lookup.
 808  808           */
 809  809          if (MUTEX_HELD(&sdp->sd_lock))
 810  810                  return (SET_ERROR(ENOENT));
 811  811  
 812  812          ZFS_ENTER(zfsvfs);
 813  813  
 814  814          if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
 815  815                  ZFS_EXIT(zfsvfs);
 816  816                  return (0);
 817  817          }
 818  818  
 819  819          if (flags & FIGNORECASE) {
 820  820                  boolean_t conflict = B_FALSE;
 821  821  
 822  822                  err = dmu_snapshot_realname(zfsvfs->z_os, nm, real,
 823  823                      sizeof (real), &conflict);
 824  824                  if (err == 0) {
 825  825                          nm = real;
 826  826                  } else if (err != ENOTSUP) {
 827  827                          ZFS_EXIT(zfsvfs);
 828  828                          return (err);
 829  829                  }
 830  830                  if (realpnp)
 831  831                          (void) strlcpy(realpnp->pn_buf, nm,
 832  832                              realpnp->pn_bufsize);
 833  833                  if (conflict && direntflags)
 834  834                          *direntflags = ED_CASE_CONFLICT;
 835  835          }
 836  836  
 837  837          mutex_enter(&sdp->sd_lock);
 838  838          search.se_name = (char *)nm;
 839  839          if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {
 840  840                  *vpp = sep->se_root;
 841  841                  VN_HOLD(*vpp);
 842  842                  err = traverse(vpp);
 843  843                  if (err != 0) {
 844  844                          VN_RELE(*vpp);
 845  845                          *vpp = NULL;
 846  846                  } else if (*vpp == sep->se_root) {
 847  847                          /*
 848  848                           * The snapshot was unmounted behind our backs,
 849  849                           * try to remount it.
 850  850                           */
 851  851                          goto domount;
 852  852                  } else {
 853  853                          /*
 854  854                           * VROOT was set during the traverse call.  We need
 855  855                           * to clear it since we're pretending to be part
 856  856                           * of our parent's vfs.
 857  857                           */
 858  858                          (*vpp)->v_flag &= ~VROOT;
 859  859                  }
 860  860                  mutex_exit(&sdp->sd_lock);
 861  861                  ZFS_EXIT(zfsvfs);
 862  862                  return (err);
 863  863          }
 864  864  
 865  865          /*
 866  866           * The requested snapshot is not currently mounted, look it up.
 867  867           */
 868  868          err = zfsctl_snapshot_zname(dvp, nm, sizeof (snapname), snapname);
 869  869          if (err != 0) {
 870  870                  mutex_exit(&sdp->sd_lock);
 871  871                  ZFS_EXIT(zfsvfs);
 872  872                  /*
 873  873                   * handle "ls *" or "?" in a graceful manner,
 874  874                   * forcing EILSEQ to ENOENT.
 875  875                   * Since shell ultimately passes "*" or "?" as name to lookup
 876  876                   */
 877  877                  return (err == EILSEQ ? ENOENT : err);
 878  878          }
 879  879          if (dmu_objset_hold(snapname, FTAG, &snap) != 0) {
 880  880                  mutex_exit(&sdp->sd_lock);
 881  881                  ZFS_EXIT(zfsvfs);
 882  882                  return (SET_ERROR(ENOENT));
 883  883          }
 884  884  
 885  885          sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP);
 886  886          sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
 887  887          (void) strcpy(sep->se_name, nm);
 888  888          *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
 889  889          avl_insert(&sdp->sd_snaps, sep, where);
 890  890  
 891  891          dmu_objset_rele(snap, FTAG);
 892  892  domount:
 893  893          mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) +
 894  894              strlen("/.zfs/snapshot/") + strlen(nm) + 1;
 895  895          mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
 896  896          (void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s",
 897  897              refstr_value(dvp->v_vfsp->vfs_mntpt), nm);
 898  898  
 899  899          margs.spec = snapname;
 900  900          margs.dir = mountpoint;
 901  901          margs.flags = MS_SYSSPACE | MS_NOMNTTAB;
 902  902          margs.fstype = "zfs";
 903  903          margs.dataptr = NULL;
 904  904          margs.datalen = 0;
 905  905          margs.optptr = NULL;
 906  906          margs.optlen = 0;
 907  907  
 908  908          err = domount("zfs", &margs, *vpp, kcred, &vfsp);
 909  909          kmem_free(mountpoint, mountpoint_len);
 910  910  
 911  911          if (err == 0) {
 912  912                  /*
 913  913                   * Return the mounted root rather than the covered mount point.
 914  914                   * Takes the GFS vnode at .zfs/snapshot/<snapname> and returns
 915  915                   * the ZFS vnode mounted on top of the GFS node.  This ZFS
 916  916                   * vnode is the root of the newly created vfsp.
 917  917                   */
 918  918                  VFS_RELE(vfsp);
 919  919                  err = traverse(vpp);
 920  920          }
 921  921  
 922  922          if (err == 0) {
 923  923                  /*
 924  924                   * Fix up the root vnode mounted on .zfs/snapshot/<snapname>.
 925  925                   *
 926  926                   * This is where we lie about our v_vfsp in order to
 927  927                   * make .zfs/snapshot/<snapname> accessible over NFS
 928  928                   * without requiring manual mounts of <snapname>.
 929  929                   */
 930  930                  ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
 931  931                  VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
 932  932                  (*vpp)->v_vfsp = zfsvfs->z_vfs;
 933  933                  (*vpp)->v_flag &= ~VROOT;
 934  934          }
 935  935          mutex_exit(&sdp->sd_lock);
 936  936          ZFS_EXIT(zfsvfs);
 937  937  
 938  938          /*
 939  939           * If we had an error, drop our hold on the vnode and
 940  940           * zfsctl_snapshot_inactive() will clean up.
 941  941           */
 942  942          if (err != 0) {
 943  943                  VN_RELE(*vpp);
 944  944                  *vpp = NULL;
 945  945          }
 946  946          return (err);
 947  947  }
 948  948  
 949  949  /* ARGSUSED */
 950  950  static int
 951  951  zfsctl_shares_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
 952  952      int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
 953  953      int *direntflags, pathname_t *realpnp)
 954  954  {
 955  955          zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
 956  956          znode_t *dzp;
 957  957          int error;
 958  958  
 959  959          ZFS_ENTER(zfsvfs);
 960  960  
 961  961          if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
 962  962                  ZFS_EXIT(zfsvfs);
 963  963                  return (0);
 964  964          }
 965  965  
 966  966          if (zfsvfs->z_shares_dir == 0) {
 967  967                  ZFS_EXIT(zfsvfs);
 968  968                  return (SET_ERROR(ENOTSUP));
 969  969          }
 970  970          if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
 971  971                  error = VOP_LOOKUP(ZTOV(dzp), nm, vpp, pnp,
 972  972                      flags, rdir, cr, ct, direntflags, realpnp);
 973  973                  VN_RELE(ZTOV(dzp));
 974  974          }
 975  975  
 976  976          ZFS_EXIT(zfsvfs);
 977  977  
 978  978          return (error);
 979  979  }
 980  980  
 981  981  /* ARGSUSED */
 982  982  static int
 983  983  zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp,
 984  984      offset_t *offp, offset_t *nextp, void *data, int flags)
 985  985  {
 986  986          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 987  987          char snapname[ZFS_MAX_DATASET_NAME_LEN];
 988  988          uint64_t id, cookie;
 989  989          boolean_t case_conflict;
 990  990          int error;
 991  991  
 992  992          ZFS_ENTER(zfsvfs);
 993  993  
 994  994          cookie = *offp;
 995  995          dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
 996  996          error = dmu_snapshot_list_next(zfsvfs->z_os,
 997  997              sizeof (snapname), snapname, &id, &cookie, &case_conflict);
 998  998          dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
 999  999          if (error) {
1000 1000                  ZFS_EXIT(zfsvfs);
1001 1001                  if (error == ENOENT) {
1002 1002                          *eofp = 1;
1003 1003                          return (0);
1004 1004                  }
1005 1005                  return (error);
1006 1006          }
1007 1007  
1008 1008          if (flags & V_RDDIR_ENTFLAGS) {
1009 1009                  edirent_t *eodp = dp;
1010 1010  
1011 1011                  (void) strcpy(eodp->ed_name, snapname);
1012 1012                  eodp->ed_ino = ZFSCTL_INO_SNAP(id);
1013 1013                  eodp->ed_eflags = case_conflict ? ED_CASE_CONFLICT : 0;
1014 1014          } else {
1015 1015                  struct dirent64 *odp = dp;
1016 1016  
1017 1017                  (void) strcpy(odp->d_name, snapname);
1018 1018                  odp->d_ino = ZFSCTL_INO_SNAP(id);
1019 1019          }
1020 1020          *nextp = cookie;
1021 1021  
1022 1022          ZFS_EXIT(zfsvfs);
1023 1023  
1024 1024          return (0);
1025 1025  }
1026 1026  
1027 1027  /* ARGSUSED */
1028 1028  static int
1029 1029  zfsctl_shares_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
1030 1030      caller_context_t *ct, int flags)
1031 1031  {
1032 1032          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1033 1033          znode_t *dzp;
1034 1034          int error;
1035 1035  
1036 1036          ZFS_ENTER(zfsvfs);
1037 1037  
1038 1038          if (zfsvfs->z_shares_dir == 0) {
1039 1039                  ZFS_EXIT(zfsvfs);
1040 1040                  return (SET_ERROR(ENOTSUP));
1041 1041          }
1042 1042          if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
1043 1043                  error = VOP_READDIR(ZTOV(dzp), uiop, cr, eofp, ct, flags);
1044 1044                  VN_RELE(ZTOV(dzp));
1045 1045          } else {
1046 1046                  *eofp = 1;
1047 1047                  error = SET_ERROR(ENOENT);
1048 1048          }
1049 1049  
1050 1050          ZFS_EXIT(zfsvfs);
1051 1051          return (error);
1052 1052  }
1053 1053  
1054 1054  /*
1055 1055   * pvp is the '.zfs' directory (zfsctl_node_t).
1056 1056   *
1057 1057   * Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t).
1058 1058   *
1059 1059   * This function is the callback to create a GFS vnode for '.zfs/snapshot'
1060 1060   * when a lookup is performed on .zfs for "snapshot".
1061 1061   */
1062 1062  vnode_t *
1063 1063  zfsctl_mknode_snapdir(vnode_t *pvp)
1064 1064  {
1065 1065          vnode_t *vp;
1066 1066          zfsctl_snapdir_t *sdp;
1067 1067  
1068 1068          vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp,
1069 1069              zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN,
1070 1070              zfsctl_snapdir_readdir_cb, NULL);
1071 1071          sdp = vp->v_data;
1072 1072          sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR;
1073 1073          sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
1074 1074          mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL);
1075 1075          avl_create(&sdp->sd_snaps, snapentry_compare,
1076 1076              sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
1077 1077          return (vp);
1078 1078  }
1079 1079  
1080 1080  vnode_t *
1081 1081  zfsctl_mknode_shares(vnode_t *pvp)
1082 1082  {
1083 1083          vnode_t *vp;
1084 1084          zfsctl_node_t *sdp;
1085 1085  
1086 1086          vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp,
1087 1087              zfsctl_ops_shares, NULL, NULL, MAXNAMELEN,
1088 1088              NULL, NULL);
1089 1089          sdp = vp->v_data;
1090 1090          sdp->zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
1091 1091          return (vp);
1092 1092  
1093 1093  }
1094 1094  
1095 1095  /* ARGSUSED */
1096 1096  static int
1097 1097  zfsctl_shares_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1098 1098      caller_context_t *ct)
1099 1099  {
1100 1100          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1101 1101          znode_t *dzp;
1102 1102          int error;
1103 1103  
1104 1104          ZFS_ENTER(zfsvfs);
1105 1105          if (zfsvfs->z_shares_dir == 0) {
1106 1106                  ZFS_EXIT(zfsvfs);
1107 1107                  return (SET_ERROR(ENOTSUP));
1108 1108          }
1109 1109          if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
1110 1110                  error = VOP_GETATTR(ZTOV(dzp), vap, flags, cr, ct);
1111 1111                  VN_RELE(ZTOV(dzp));
1112 1112          }
1113 1113          ZFS_EXIT(zfsvfs);
1114 1114          return (error);
1115 1115  
1116 1116  
1117 1117  }
1118 1118  
1119 1119  /* ARGSUSED */
1120 1120  static int
1121 1121  zfsctl_snapdir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1122 1122      caller_context_t *ct)
1123 1123  {
1124 1124          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1125 1125          zfsctl_snapdir_t *sdp = vp->v_data;
1126 1126  
1127 1127          ZFS_ENTER(zfsvfs);
1128 1128          zfsctl_common_getattr(vp, vap);
1129 1129          vap->va_nodeid = gfs_file_inode(vp);
1130 1130          vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2;
1131 1131          vap->va_ctime = vap->va_mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
1132 1132          ZFS_EXIT(zfsvfs);
1133 1133  
1134 1134          return (0);
1135 1135  }
1136 1136  
1137 1137  /* ARGSUSED */
1138 1138  static void
1139 1139  zfsctl_snapdir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1140 1140  {
1141 1141          zfsctl_snapdir_t *sdp = vp->v_data;
1142 1142          void *private;
1143 1143  
1144 1144          private = gfs_dir_inactive(vp);
1145 1145          if (private != NULL) {
1146 1146                  ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
1147 1147                  mutex_destroy(&sdp->sd_lock);
1148 1148                  avl_destroy(&sdp->sd_snaps);
1149 1149                  kmem_free(private, sizeof (zfsctl_snapdir_t));
1150 1150          }
1151 1151  }
1152 1152  
1153 1153  static const fs_operation_def_t zfsctl_tops_snapdir[] = {
1154 1154          { VOPNAME_OPEN,         { .vop_open = zfsctl_common_open }      },
1155 1155          { VOPNAME_CLOSE,        { .vop_close = zfsctl_common_close }    },
1156 1156          { VOPNAME_IOCTL,        { .error = fs_inval }                   },
1157 1157          { VOPNAME_GETATTR,      { .vop_getattr = zfsctl_snapdir_getattr } },
1158 1158          { VOPNAME_ACCESS,       { .vop_access = zfsctl_common_access }  },
1159 1159          { VOPNAME_RENAME,       { .vop_rename = zfsctl_snapdir_rename } },
1160 1160          { VOPNAME_RMDIR,        { .vop_rmdir = zfsctl_snapdir_remove }  },
1161 1161          { VOPNAME_MKDIR,        { .vop_mkdir = zfsctl_snapdir_mkdir }   },
1162 1162          { VOPNAME_READDIR,      { .vop_readdir = gfs_vop_readdir }      },
1163 1163          { VOPNAME_LOOKUP,       { .vop_lookup = zfsctl_snapdir_lookup } },
1164 1164          { VOPNAME_SEEK,         { .vop_seek = fs_seek }                 },
1165 1165          { VOPNAME_INACTIVE,     { .vop_inactive = zfsctl_snapdir_inactive } },
1166 1166          { VOPNAME_FID,          { .vop_fid = zfsctl_common_fid }        },
1167 1167          { NULL }
1168 1168  };
1169 1169  
1170 1170  static const fs_operation_def_t zfsctl_tops_shares[] = {
1171 1171          { VOPNAME_OPEN,         { .vop_open = zfsctl_common_open }      },
1172 1172          { VOPNAME_CLOSE,        { .vop_close = zfsctl_common_close }    },
1173 1173          { VOPNAME_IOCTL,        { .error = fs_inval }                   },
1174 1174          { VOPNAME_GETATTR,      { .vop_getattr = zfsctl_shares_getattr } },
1175 1175          { VOPNAME_ACCESS,       { .vop_access = zfsctl_common_access }  },
1176 1176          { VOPNAME_READDIR,      { .vop_readdir = zfsctl_shares_readdir } },
1177 1177          { VOPNAME_LOOKUP,       { .vop_lookup = zfsctl_shares_lookup }  },
1178 1178          { VOPNAME_SEEK,         { .vop_seek = fs_seek }                 },
1179 1179          { VOPNAME_INACTIVE,     { .vop_inactive = gfs_vop_inactive } },
1180 1180          { VOPNAME_FID,          { .vop_fid = zfsctl_shares_fid } },
1181 1181          { NULL }
1182 1182  };
1183 1183  
1184 1184  /*
1185 1185   * pvp is the GFS vnode '.zfs/snapshot'.
1186 1186   *
1187 1187   * This creates a GFS node under '.zfs/snapshot' representing each
1188 1188   * snapshot.  This newly created GFS node is what we mount snapshot
1189 1189   * vfs_t's ontop of.
1190 1190   */
1191 1191  static vnode_t *
1192 1192  zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
1193 1193  {
1194 1194          vnode_t *vp;
1195 1195          zfsctl_node_t *zcp;
1196 1196  
1197 1197          vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp,
1198 1198              zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL);
1199 1199          zcp = vp->v_data;
1200 1200          zcp->zc_id = objset;
1201 1201  
1202 1202          return (vp);
1203 1203  }
1204 1204  
1205 1205  static void
1206 1206  zfsctl_snapshot_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1207 1207  {
1208 1208          zfsctl_snapdir_t *sdp;
1209 1209          zfs_snapentry_t *sep, *next;
1210 1210          vnode_t *dvp;
1211 1211  
1212 1212          VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0);
1213 1213          sdp = dvp->v_data;
1214 1214  
1215 1215          mutex_enter(&sdp->sd_lock);
1216 1216  
1217 1217          mutex_enter(&vp->v_lock);
1218 1218          if (vp->v_count > 1) {
1219 1219                  vp->v_count--;
1220 1220                  mutex_exit(&vp->v_lock);
1221 1221                  mutex_exit(&sdp->sd_lock);
1222 1222                  VN_RELE(dvp);
1223 1223                  return;
1224 1224          }
1225 1225          mutex_exit(&vp->v_lock);
1226 1226          ASSERT(!vn_ismntpt(vp));
1227 1227  
1228 1228          sep = avl_first(&sdp->sd_snaps);
1229 1229          while (sep != NULL) {
1230 1230                  next = AVL_NEXT(&sdp->sd_snaps, sep);
1231 1231  
1232 1232                  if (sep->se_root == vp) {
1233 1233                          avl_remove(&sdp->sd_snaps, sep);
1234 1234                          kmem_free(sep->se_name, strlen(sep->se_name) + 1);
1235 1235                          kmem_free(sep, sizeof (zfs_snapentry_t));
1236 1236                          break;
1237 1237                  }
1238 1238                  sep = next;
1239 1239          }
1240 1240          ASSERT(sep != NULL);
1241 1241  
1242 1242          mutex_exit(&sdp->sd_lock);
1243 1243          VN_RELE(dvp);
1244 1244  
1245 1245          /*
1246 1246           * Dispose of the vnode for the snapshot mount point.
1247 1247           * This is safe to do because once this entry has been removed
1248 1248           * from the AVL tree, it can't be found again, so cannot become
1249 1249           * "active".  If we lookup the same name again we will end up
1250 1250           * creating a new vnode.

↓ open down ↓

1250 lines elided

↑ open up ↑

1251 1251           */
1252 1252          gfs_vop_inactive(vp, cr, ct);
1253 1253  }
1254 1254  
1255 1255  
1256 1256  /*
1257 1257   * These VP's should never see the light of day.  They should always
1258 1258   * be covered.
1259 1259   */
1260 1260  static const fs_operation_def_t zfsctl_tops_snapshot[] = {
1261      -        VOPNAME_INACTIVE, { .vop_inactive =  zfsctl_snapshot_inactive },
1262      -        NULL, NULL
     1261 +        { VOPNAME_INACTIVE, { .vop_inactive =  zfsctl_snapshot_inactive } },
     1262 +        { NULL, { NULL } }
1263 1263  };
1264 1264  
1265 1265  int
1266 1266  zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
1267 1267  {
1268 1268          zfsvfs_t *zfsvfs = vfsp->vfs_data;
1269 1269          vnode_t *dvp, *vp;
1270 1270          zfsctl_snapdir_t *sdp;
1271 1271          zfsctl_node_t *zcp;
1272 1272          zfs_snapentry_t *sep;

1273 1273          int error;
1274 1274  
1275 1275          ASSERT(zfsvfs->z_ctldir != NULL);
1276 1276          error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
1277 1277              NULL, 0, NULL, kcred, NULL, NULL, NULL);
1278 1278          if (error != 0)
1279 1279                  return (error);
1280 1280          sdp = dvp->v_data;
1281 1281  
1282 1282          mutex_enter(&sdp->sd_lock);
1283 1283          sep = avl_first(&sdp->sd_snaps);
1284 1284          while (sep != NULL) {
1285 1285                  vp = sep->se_root;
1286 1286                  zcp = vp->v_data;
1287 1287                  if (zcp->zc_id == objsetid)
1288 1288                          break;
1289 1289  
1290 1290                  sep = AVL_NEXT(&sdp->sd_snaps, sep);
1291 1291          }
1292 1292  
1293 1293          if (sep != NULL) {
1294 1294                  VN_HOLD(vp);
1295 1295                  /*
1296 1296                   * Return the mounted root rather than the covered mount point.
1297 1297                   * Takes the GFS vnode at .zfs/snapshot/<snapshot objsetid>
1298 1298                   * and returns the ZFS vnode mounted on top of the GFS node.
1299 1299                   * This ZFS vnode is the root of the vfs for objset 'objsetid'.
1300 1300                   */
1301 1301                  error = traverse(&vp);
1302 1302                  if (error == 0) {
1303 1303                          if (vp == sep->se_root)
1304 1304                                  error = SET_ERROR(EINVAL);
1305 1305                          else
1306 1306                                  *zfsvfsp = VTOZ(vp)->z_zfsvfs;
1307 1307                  }
1308 1308                  mutex_exit(&sdp->sd_lock);
1309 1309                  VN_RELE(vp);
1310 1310          } else {
1311 1311                  error = SET_ERROR(EINVAL);
1312 1312                  mutex_exit(&sdp->sd_lock);
1313 1313          }
1314 1314  
1315 1315          VN_RELE(dvp);
1316 1316  
1317 1317          return (error);
1318 1318  }
1319 1319  
1320 1320  /*
1321 1321   * Unmount any snapshots for the given filesystem.  This is called from
1322 1322   * zfs_umount() - if we have a ctldir, then go through and unmount all the
1323 1323   * snapshots.
1324 1324   */
1325 1325  int
1326 1326  zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
1327 1327  {
1328 1328          zfsvfs_t *zfsvfs = vfsp->vfs_data;
1329 1329          vnode_t *dvp;
1330 1330          zfsctl_snapdir_t *sdp;
1331 1331          zfs_snapentry_t *sep, *next;
1332 1332          int error;
1333 1333  
1334 1334          ASSERT(zfsvfs->z_ctldir != NULL);
1335 1335          error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
1336 1336              NULL, 0, NULL, cr, NULL, NULL, NULL);
1337 1337          if (error != 0)
1338 1338                  return (error);
1339 1339          sdp = dvp->v_data;
1340 1340  
1341 1341          mutex_enter(&sdp->sd_lock);
1342 1342  
1343 1343          sep = avl_first(&sdp->sd_snaps);
1344 1344          while (sep != NULL) {
1345 1345                  next = AVL_NEXT(&sdp->sd_snaps, sep);
1346 1346  
1347 1347                  /*
1348 1348                   * If this snapshot is not mounted, then it must
1349 1349                   * have just been unmounted by somebody else, and
1350 1350                   * will be cleaned up by zfsctl_snapdir_inactive().
1351 1351                   */
1352 1352                  if (vn_ismntpt(sep->se_root)) {
1353 1353                          avl_remove(&sdp->sd_snaps, sep);
1354 1354                          error = zfsctl_unmount_snap(sep, fflags, cr);
1355 1355                          if (error) {
1356 1356                                  avl_add(&sdp->sd_snaps, sep);
1357 1357                                  break;
1358 1358                          }
1359 1359                  }
1360 1360                  sep = next;
1361 1361          }
1362 1362  
1363 1363          mutex_exit(&sdp->sd_lock);
1364 1364          VN_RELE(dvp);
1365 1365  
1366 1366          return (error);
1367 1367  }

↓ open down ↓

95 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX